Model training

This notebook reads a small training and test set in Protobuf format, and invokes Sagemaker to train and deploy a model.

In [8]:
import pandas as pd
from collections import Counter
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import f1_score
import boto3
import os.path
from sagemaker import get_execution_role
import scipy
In [9]:
# Bucket with raw data
bucket = 'promo-dataprep'

# input train, test, and header data
trainprefix = 'train-small/train.protobuf'
testprefix = 'train-small/test.protobuf'
headerprefix = 'train-small/headers.csv'
trainlocal = 'train-small.protobuf'
testlocal = 'test-small.protobuf'
headerlocal = 'headers-train.csv'
In [10]:
# set up boto for s3 access
role = get_execution_role()
s3 = boto3.resource('s3')

# download inputs
if os.path.isfile(trainlocal) and os.path.exists(trainlocal):
    print("{0} already exists, skipping".format(trainlocal))
else:
    s3.Bucket(bucket).download_file(trainprefix, trainlocal)
if os.path.isfile(testlocal) and os.path.exists(testlocal):
    print("{0} already exists, skipping".format(testlocal))
else:
    s3.Bucket(bucket).download_file(testprefix, testlocal)
if os.path.isfile(headerlocal) and os.path.exists(headerlocal):
    print("{0} already exists, skipping".format(headerlocal))
else:
    s3.Bucket(bucket).download_file(headerprefix, headerlocal)
train-small.protobuf already exists, skipping
test-small.protobuf already exists, skipping
headers-train.csv already exists, skipping

Train model

In [11]:
import sagemaker
output_prefix  = 's3://{}/{}/output'.format(bucket, 'train-small')

containers = {'us-east-1': 'dkr.ecr.us-east-1.amazonaws.com/tffmpromo:latest'}
fm = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                   role, 
                                   train_instance_count=1, 
                                   train_instance_type='ml.c4.8xlarge',
                                   output_path=output_prefix,
                                   sagemaker_session=sagemaker.Session())

fm.set_hyperparameters(order='3',
                      rank='7',
                      epochs=50,
                       header_file_bucket=bucket,
                       header_file_prefix='train-small/headers.csv')
In [12]:
fm.fit({'train': "s3://{0}/{1}".format(bucket, trainprefix), 
        'test': "s3://{0}/{1}".format(bucket, testprefix)})
INFO:sagemaker:Creating training-job with name: tffmpromo-2018-04-10-02-17-05-752
...................................................
Starting the training.
Number of samples: 200000
Creating sparse matrix of shape 40000,62520
X_tr shape: (40000, 62520)
2018-04-10 02:21:14.694903: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
#015  0%|          | 0/50 [00:00<?, ?epoch/s]#015  2%|2         | 1/50 [00:00<00:12,  3.85epoch/s]#015  8%|8         | 4/50 [00:00<00:04, 10.25epoch/s]#015 14%|#4        | 7/50 [00:00<00:03, 13.73epoch/s]#015 20%|##        | 10/50 [00:00<00:02, 15.92epoch/s]#015 26%|##6       | 13/50 [00:00<00:02, 17.48epoch/s]#015 32%|###2      | 16/50 [00:00<00:01, 18.54epoch/s]#015 38%|###8      | 19/50 [00:00<00:01, 19.29epoch/s]#015 44%|####4     | 22/50 [00:01<00:01, 19.87epoch/s]#015 50%|#####     | 25/50 [00:01<00:01, 20.38epoch/s]#015 56%|#####6    | 28/50 [00:01<00:01, 20.85epoch/s]#015 62%|######2   | 31/50 [00:01<00:00, 21.29epoch/s]#015 68%|######8   | 34/50 [00:01<00:00, 21.52epoch/s]#015 74%|#######4  | 37/50 [00:01<00:00, 21.74epoch/s]#015 80%|########  | 40/50 [00:01<00:00, 21.99epoch/s]#015 86%|########6 | 43/50 [00:01<00:00, 22.20epoch/s]#015 92%|#########2| 46/50 [00:02<00:00, 22.31epoch/s]#015 98%|#########8| 49/50 [00:02<00:00, 22.41epoch/s]#015100%|##########| 50/50 [00:02<00:00, 22.46epoch/s]
Training complete.
Creating test sparse matrix of shape 10000,62520
X_te shape: (10000, 62520)
Weighted F1: 0.918872415972
Accuracy: 0.9363
Weighted ROC: 0.588786301559
Classification report:              precision    recall  f1-score   support

        0.0       0.94      0.99      0.97      9294
        1.0       0.68      0.18      0.29       706

avg / total       0.92      0.94      0.92     10000

Confusion matrix
predictions     0    1
actuals               
0.0          9233   61
1.0           576  130
===== Job Complete =====
Billable seconds: 103
In [13]:
print(fm.model_data)
s3://promo-dataprep/train-small/output/tffmpromo-2018-04-10-02-17-05-752/output/model.tar.gz
In [14]:
fm_predictor = fm.deploy(initial_instance_count=1,
                         instance_type='ml.m4.xlarge')
INFO:sagemaker:Creating model with name: tffmpromo-2018-04-10-02-23-16-492
INFO:sagemaker:Creating endpoint with name tffmpromo-2018-04-10-02-17-05-752
---------------------------------------------------------------!

Factorization Machine

For the sake of comparison, let's see how well the native SageMaker FM algorithm does.

In [15]:
output_prefix  = 's3://{}/{}/output'.format(bucket, 'train-fm')

containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/factorization-machines:latest',
              'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/factorization-machines:latest',
              'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/factorization-machines:latest',
              'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/factorization-machines:latest'}
sfm = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                   role, 
                                   train_instance_count=1, 
                                   train_instance_type='ml.c4.8xlarge',
                                   output_path=output_prefix,
                                   sagemaker_session=sagemaker.Session())

sfm.set_hyperparameters(feature_dim=62520,
                      predictor_type='regressor',
                      num_factors=20,
                      epochs=20)
In [16]:
sfm.fit({'train': "s3://{0}/{1}".format(bucket, trainprefix), 
        'test': "s3://{0}/{1}".format(bucket, testprefix)})
INFO:sagemaker:Creating training-job with name: factorization-machines-2018-04-10-02-29-53-695
................................................................
Docker entrypoint called with argument(s): train
[04/10/2018 02:35:09 INFO 140269428983616] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-conf.json: {u'factors_lr': u'0.0001', u'linear_init_sigma': u'0.01', u'epochs': 1, u'_wd': u'1.0', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'factors_init_sigma': u'0.001', u'_log_level': u'info', u'bias_init_method': u'normal', u'linear_init_method': u'normal', u'linear_lr': u'0.001', u'factors_init_method': u'normal', u'bias_wd': u'0.01', u'use_linear': u'true', u'_speedometer_period': u'500', u'bias_lr': u'0.1', u'mini_batch_size': u'1000', u'_use_full_symbolic': u'true', u'bias_init_sigma': u'0.01', u'_num_gpus': u'auto', u'_data_format': u'record', u'factors_wd': u'0.00001', u'linear_wd': u'0.001', u'_kvstore': u'auto', u'_learning_rate': u'1.0', u'_optimizer': u'adam'}
[04/10/2018 02:35:09 INFO 140269428983616] Reading provided configuration from /opt/ml/input/config/hyperparameters.json: {u'epochs': u'20', u'feature_dim': u'62520', u'predictor_type': u'regressor', u'num_factors': u'20'}
[04/10/2018 02:35:09 INFO 140269428983616] Final configuration: {u'factors_lr': u'0.0001', u'linear_init_sigma': u'0.01', u'epochs': u'20', u'feature_dim': u'62520', u'num_factors': u'20', u'_wd': u'1.0', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'factors_init_sigma': u'0.001', u'_log_level': u'info', u'bias_init_method': u'normal', u'linear_init_method': u'normal', u'linear_lr': u'0.001', u'factors_init_method': u'normal', u'bias_wd': u'0.01', u'use_linear': u'true', u'_speedometer_period': u'500', u'bias_lr': u'0.1', u'mini_batch_size': u'1000', u'_use_full_symbolic': u'true', u'predictor_type': u'regressor', u'bias_init_sigma': u'0.01', u'_num_gpus': u'auto', u'_data_format': u'record', u'factors_wd': u'0.00001', u'linear_wd': u'0.001', u'_kvstore': u'auto', u'_learning_rate': u'1.0', u'_optimizer': u'adam'}
[04/10/2018 02:35:09 WARNING 140269428983616] Loggers have already been setup.
[04/10/2018 02:35:09 INFO 140269428983616] Using default worker.
[04/10/2018 02:35:09 INFO 140269428983616] nvidia-smi took: 0.0252740383148 secs to identify 0 gpus
[04/10/2018 02:35:09 INFO 140269428983616] Number of GPUs being used: 0
[04/10/2018 02:35:09 INFO 140269428983616] [Sparse network] Building a sparse network.
[04/10/2018 02:35:09 INFO 140269428983616] Create Store: local
#metrics {"Metrics": {"initialize.time": {"count": 1, "max": 35.14504432678223, "sum": 35.14504432678223, "min": 35.14504432678223}}, "EndTime": 1523327709.151211, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327709.093295}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 1, "sum": 1.0, "min": 1}, "Number of Batches Since Last Reset": {"count": 1, "max": 0, "sum": 0.0, "min": 0}, "Number of Records Since Last Reset": {"count": 1, "max": 0, "sum": 0.0, "min": 0}, "Total Batches Seen": {"count": 1, "max": 1, "sum": 1.0, "min": 1}, "Total Records Seen": {"count": 1, "max": 1000, "sum": 1000.0, "min": 1000}, "Max Records Seen Between Resets": {"count": 1, "max": 1000, "sum": 1000.0, "min": 1000}, "Reset Count": {"count": 1, "max": 1, "sum": 1.0, "min": 1}}, "EndTime": 1523327709.151346, "Dimensions": {"Host": "algo-1", "Meta": "init_train_data_iter", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327709.151314}

[02:35:09] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.1.x.200034.0/RHEL5_64/generic-flavor/src/src/kvstore/./kvstore_local.h:280: Warning: non-default weights detected during kvstore pull. This call has been ignored. Please make sure to use row_sparse_pull with row_ids.
[02:35:09] /opt/brazil-pkg-cache/packages/AIAlgorithmsMXNet/AIAlgorithmsMXNet-1.1.x.200034.0/RHEL5_64/generic-flavor/src/src/kvstore/./kvstore_local.h:280: Warning: non-default weights detected during kvstore pull. This call has been ignored. Please make sure to use row_sparse_pull with row_ids.
#metrics {"Metrics": {"epochs": {"count": 1, "max": 20, "sum": 20.0, "min": 20}, "update.time": {"count": 1, "max": 269.8171138763428, "sum": 269.8171138763428, "min": 269.8171138763428}}, "EndTime": 1523327709.421367, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327709.151271}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 41, "sum": 41.0, "min": 41}, "Total Records Seen": {"count": 1, "max": 41000, "sum": 41000.0, "min": 41000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 2, "sum": 2.0, "min": 2}}, "EndTime": 1523327709.421513, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 0}, "StartTime": 1523327709.421483}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 196.38705253601074, "sum": 196.38705253601074, "min": 196.38705253601074}}, "EndTime": 1523327709.618033, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327709.421442}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 81, "sum": 81.0, "min": 81}, "Total Records Seen": {"count": 1, "max": 81000, "sum": 81000.0, "min": 81000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 3, "sum": 3.0, "min": 3}}, "EndTime": 1523327709.618174, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 1}, "StartTime": 1523327709.618145}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 200.66595077514648, "sum": 200.66595077514648, "min": 200.66595077514648}}, "EndTime": 1523327709.818988, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327709.618104}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 121, "sum": 121.0, "min": 121}, "Total Records Seen": {"count": 1, "max": 121000, "sum": 121000.0, "min": 121000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 4, "sum": 4.0, "min": 4}}, "EndTime": 1523327709.819121, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 2}, "StartTime": 1523327709.819091}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 201.15089416503906, "sum": 201.15089416503906, "min": 201.15089416503906}}, "EndTime": 1523327710.02039, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327709.819052}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 161, "sum": 161.0, "min": 161}, "Total Records Seen": {"count": 1, "max": 161000, "sum": 161000.0, "min": 161000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 5, "sum": 5.0, "min": 5}}, "EndTime": 1523327710.020536, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 3}, "StartTime": 1523327710.020507}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 198.5478401184082, "sum": 198.5478401184082, "min": 198.5478401184082}}, "EndTime": 1523327710.219211, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327710.020465}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 201, "sum": 201.0, "min": 201}, "Total Records Seen": {"count": 1, "max": 201000, "sum": 201000.0, "min": 201000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 6, "sum": 6.0, "min": 6}}, "EndTime": 1523327710.219325, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 4}, "StartTime": 1523327710.219297}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 211.26604080200195, "sum": 211.26604080200195, "min": 211.26604080200195}}, "EndTime": 1523327710.430699, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327710.219263}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 241, "sum": 241.0, "min": 241}, "Total Records Seen": {"count": 1, "max": 241000, "sum": 241000.0, "min": 241000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 7, "sum": 7.0, "min": 7}}, "EndTime": 1523327710.430831, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 5}, "StartTime": 1523327710.430801}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 201.1251449584961, "sum": 201.1251449584961, "min": 201.1251449584961}}, "EndTime": 1523327710.632076, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327710.430762}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 281, "sum": 281.0, "min": 281}, "Total Records Seen": {"count": 1, "max": 281000, "sum": 281000.0, "min": 281000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 8, "sum": 8.0, "min": 8}}, "EndTime": 1523327710.632201, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 6}, "StartTime": 1523327710.632159}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 200.98114013671875, "sum": 200.98114013671875, "min": 200.98114013671875}}, "EndTime": 1523327710.8333, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327710.632124}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 321, "sum": 321.0, "min": 321}, "Total Records Seen": {"count": 1, "max": 321000, "sum": 321000.0, "min": 321000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 9, "sum": 9.0, "min": 9}}, "EndTime": 1523327710.833443, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 7}, "StartTime": 1523327710.833406}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 218.71089935302734, "sum": 218.71089935302734, "min": 218.71089935302734}}, "EndTime": 1523327711.052289, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327710.833359}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 361, "sum": 361.0, "min": 361}, "Total Records Seen": {"count": 1, "max": 361000, "sum": 361000.0, "min": 361000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 10, "sum": 10.0, "min": 10}}, "EndTime": 1523327711.052451, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 8}, "StartTime": 1523327711.052405}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 246.22106552124023, "sum": 246.22106552124023, "min": 246.22106552124023}}, "EndTime": 1523327711.298837, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327711.052354}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 401, "sum": 401.0, "min": 401}, "Total Records Seen": {"count": 1, "max": 401000, "sum": 401000.0, "min": 401000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 11, "sum": 11.0, "min": 11}}, "EndTime": 1523327711.29901, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 9}, "StartTime": 1523327711.298963}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 250.37789344787598, "sum": 250.37789344787598, "min": 250.37789344787598}}, "EndTime": 1523327711.549531, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327711.298913}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 441, "sum": 441.0, "min": 441}, "Total Records Seen": {"count": 1, "max": 441000, "sum": 441000.0, "min": 441000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 12, "sum": 12.0, "min": 12}}, "EndTime": 1523327711.549647, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 10}, "StartTime": 1523327711.549618}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 252.0461082458496, "sum": 252.0461082458496, "min": 252.0461082458496}}, "EndTime": 1523327711.801841, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327711.549585}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 481, "sum": 481.0, "min": 481}, "Total Records Seen": {"count": 1, "max": 481000, "sum": 481000.0, "min": 481000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 13, "sum": 13.0, "min": 13}}, "EndTime": 1523327711.801983, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 11}, "StartTime": 1523327711.801938}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 254.15492057800293, "sum": 254.15492057800293, "min": 254.15492057800293}}, "EndTime": 1523327712.056283, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327711.801901}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 521, "sum": 521.0, "min": 521}, "Total Records Seen": {"count": 1, "max": 521000, "sum": 521000.0, "min": 521000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 14, "sum": 14.0, "min": 14}}, "EndTime": 1523327712.056388, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 12}, "StartTime": 1523327712.05636}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 252.57205963134766, "sum": 252.57205963134766, "min": 252.57205963134766}}, "EndTime": 1523327712.309099, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327712.05633}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 561, "sum": 561.0, "min": 561}, "Total Records Seen": {"count": 1, "max": 561000, "sum": 561000.0, "min": 561000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 15, "sum": 15.0, "min": 15}}, "EndTime": 1523327712.309221, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 13}, "StartTime": 1523327712.309192}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 283.8430404663086, "sum": 283.8430404663086, "min": 283.8430404663086}}, "EndTime": 1523327712.593195, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327712.309159}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 601, "sum": 601.0, "min": 601}, "Total Records Seen": {"count": 1, "max": 601000, "sum": 601000.0, "min": 601000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 16, "sum": 16.0, "min": 16}}, "EndTime": 1523327712.593356, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 14}, "StartTime": 1523327712.593309}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 251.01184844970703, "sum": 251.01184844970703, "min": 251.01184844970703}}, "EndTime": 1523327712.844509, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327712.593264}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 641, "sum": 641.0, "min": 641}, "Total Records Seen": {"count": 1, "max": 641000, "sum": 641000.0, "min": 641000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 17, "sum": 17.0, "min": 17}}, "EndTime": 1523327712.844618, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 15}, "StartTime": 1523327712.844589}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 274.14679527282715, "sum": 274.14679527282715, "min": 274.14679527282715}}, "EndTime": 1523327713.118908, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327712.844559}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 681, "sum": 681.0, "min": 681}, "Total Records Seen": {"count": 1, "max": 681000, "sum": 681000.0, "min": 681000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 18, "sum": 18.0, "min": 18}}, "EndTime": 1523327713.11901, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 16}, "StartTime": 1523327713.118982}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 282.3789119720459, "sum": 282.3789119720459, "min": 282.3789119720459}}, "EndTime": 1523327713.401545, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327713.118954}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 721, "sum": 721.0, "min": 721}, "Total Records Seen": {"count": 1, "max": 721000, "sum": 721000.0, "min": 721000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 19, "sum": 19.0, "min": 19}}, "EndTime": 1523327713.401726, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 17}, "StartTime": 1523327713.401679}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 229.57706451416016, "sum": 229.57706451416016, "min": 229.57706451416016}}, "EndTime": 1523327713.631451, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327713.401627}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 761, "sum": 761.0, "min": 761}, "Total Records Seen": {"count": 1, "max": 761000, "sum": 761000.0, "min": 761000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 20, "sum": 20.0, "min": 20}}, "EndTime": 1523327713.631592, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 18}, "StartTime": 1523327713.631539}

#metrics {"Metrics": {"update.time": {"count": 1, "max": 279.3447971343994, "sum": 279.3447971343994, "min": 279.3447971343994}}, "EndTime": 1523327713.911077, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327713.631507}

#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Batches Since Last Reset": {"count": 1, "max": 40, "sum": 40.0, "min": 40}, "Number of Records Since Last Reset": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Total Batches Seen": {"count": 1, "max": 801, "sum": 801.0, "min": 801}, "Total Records Seen": {"count": 1, "max": 801000, "sum": 801000.0, "min": 801000}, "Max Records Seen Between Resets": {"count": 1, "max": 40000, "sum": 40000.0, "min": 40000}, "Reset Count": {"count": 1, "max": 21, "sum": 21.0, "min": 21}}, "EndTime": 1523327713.911239, "Dimensions": {"Host": "algo-1", "Meta": "training_data_iter", "Operation": "training", "Algorithm": "factorization-machines", "epoch": 19}, "StartTime": 1523327713.911192}

[04/10/2018 02:35:13 WARNING 140269428983616] wait_for_all_workers will not sync workers since the kv store is not running distributed
[04/10/2018 02:35:13 INFO 140269428983616] Pulling entire model from kvstore to finalize
#metrics {"Metrics": {"finalize.time": {"count": 1, "max": 12.066125869750977, "sum": 12.066125869750977, "min": 12.066125869750977}}, "EndTime": 1523327713.923669, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327713.911147}

[04/10/2018 02:35:13 INFO 140269428983616] Saved checkpoint to "/tmp/tmpvKfqom/state-0001.params"
[04/10/2018 02:35:13 INFO 140269428983616] #test_score (algo-1) : rmse
[04/10/2018 02:35:13 INFO 140269428983616] #test_score (algo-1) : 0.232833703221
#metrics {"Metrics": {"Max Batches Seen Between Resets": {"count": 1, "max": 10, "sum": 10.0, "min": 10}, "Number of Batches Since Last Reset": {"count": 1, "max": 10, "sum": 10.0, "min": 10}, "Number of Records Since Last Reset": {"count": 1, "max": 10000, "sum": 10000.0, "min": 10000}, "Total Batches Seen": {"count": 1, "max": 10, "sum": 10.0, "min": 10}, "Total Records Seen": {"count": 1, "max": 10000, "sum": 10000.0, "min": 10000}, "Max Records Seen Between Resets": {"count": 1, "max": 10000, "sum": 10000.0, "min": 10000}, "Reset Count": {"count": 1, "max": 1, "sum": 1.0, "min": 1}}, "EndTime": 1523327713.977791, "Dimensions": {"Host": "algo-1", "Meta": "test_data_iter", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327713.97774}

#metrics {"Metrics": {"totaltime": {"count": 1, "max": 4926.310062408447, "sum": 4926.310062408447, "min": 4926.310062408447}, "setuptime": {"count": 1, "max": 35.804033279418945, "sum": 35.804033279418945, "min": 35.804033279418945}}, "EndTime": 1523327713.979246, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "factorization-machines"}, "StartTime": 1523327713.923716}

===== Job Complete =====
Billable seconds: 156
In [17]:
sfm_predictor = sfm.deploy(initial_instance_count=1,
                         instance_type='ml.m4.xlarge')
INFO:sagemaker:Creating model with name: factorization-machines-2018-04-10-02-36-10-107
INFO:sagemaker:Creating endpoint with name factorization-machines-2018-04-10-02-29-53-695
-------------------------------------------------------------------------------------!
In [18]:
from sagemaker.predictor import json_deserializer
import json

def fm_serializer(data):
    js = {'instances': []}
    for row in data:
        js['instances'].append({'features': row.tolist()})
    #print js
    return json.dumps(js)

sfm_predictor.content_type = 'application/json'
sfm_predictor.serializer = fm_serializer
sfm_predictor.deserializer = json_deserializer
In [25]:
import scipy.sparse as sp
import sagemaker.amazon.common as smac
from io import FileIO
import csv

num_test_samples = 0
te_row_ind = []
te_col_ind = []
te_data = []
te_idx = 0
y_te = []

headers = []
with open(headerlocal, 'r') as csvfile:
        headerreader = csv.reader(csvfile) 
        for row in headerreader:
            headers.append("".join(row))

num_features = len(headers)
            
test_records = smac.read_records(FileIO(testlocal))
num_test_samples = num_test_samples + len(test_records)
for test_record in test_records:
    te_row_ind.extend([te_idx] * len(test_record.features['values'].float32_tensor.values))
    te_col_ind.extend(test_record.features['values'].float32_tensor.keys)
    te_data.extend(test_record.features['values'].float32_tensor.values)
    te_idx = te_idx + 1
    y_te.append(test_record.label['values'].float32_tensor.values[0])

print("Creating test sparse matrix of shape {0},{1}".format(num_test_samples, num_features))
X_te_sparse = sp.csr_matrix( (np.array(te_data),(np.array(te_row_ind),np.array(te_col_ind))), shape=(num_test_samples,num_features) )
print("X_te shape: {0}".format(X_te_sparse.shape))
            
predictions = []
for idx in range(0,X_te_sparse.shape[0]):
    result = sfm_predictor.predict(X_te_sparse[idx].toarray())
    predictions += [r['score'] for r in result['predictions']]

predictions = np.array(predictions)
print("Got {0} predictions: e.g. {1}".format(len(predictions), predictions[0]))
Creating test sparse matrix of shape 10000,62520
X_te shape: (10000, 62520)
Got 10000 predictions: e.g. 0.166213095188
In [26]:
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, average_precision_score
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve
from sklearn.metrics import precision_score, precision_recall_fscore_support, recall_score
predvecfm = np.where(predictions > 0.5, 1, 0)
print('Weighted F1: {}'.format(f1_score(y_te, predvecfm,average='weighted')))
print('Accuracy: {}'.format(accuracy_score(y_te, predvecfm)))
print('Weighted ROC: {}'.format(roc_auc_score(y_te, predvecfm, average='weighted')))
Weighted F1: 0.921709510534
Accuracy: 0.9394
Weighted ROC: 0.593726129929
In [27]:
print('Classification report: {}'.format(classification_report(y_te, predvecfm)))
Classification report:              precision    recall  f1-score   support

        0.0       0.94      1.00      0.97      9294
        1.0       0.79      0.19      0.31       706

avg / total       0.93      0.94      0.92     10000

In [28]:
print("Confusion matrix")
print(pd.crosstab(np.array(y_te), predvecfm, rownames=['actuals'], colnames=['predictions']))
Confusion matrix
predictions     0    1
actuals               
0.0          9259   35
1.0           571  135