## Deploying Voting ensemble Sklearn models on SageMaker

In [1]:
!rm modelscript_ensemble_sklearn.py
!rm *.joblib
!rm src

rm: cannot remove ‘src’: No such file or directory


In [2]:
# Voting Ensemble for Classification
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Get Data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

# Fit
kfold = model_selection.KFold(n_splits=10)
# create the sub models
estimators = []
model1 = LogisticRegression(solver='newton-cg')
estimators.append(('logistic', model1))
model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))
model3 = SVC()
estimators.append(('svm', model3))
# create the ensemble model
ensemble = VotingClassifier(estimators)

ensemble.fit(X,Y)
results = model_selection.cross_val_score(ensemble, X, Y, cv=kfold)
print(results.mean())

0.7617053998632947


### Save model files

In [3]:
!pip install joblib -q
from joblib import dump

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
for estimator in ensemble.estimators:
 print(estimator[0])
 dump(estimator[1],estimator[0]+'.joblib')

dump(ensemble,'ensemble.joblib')

logistic
cart
svm


['ensemble.joblib']

In [5]:
!ls *.joblib

cart.joblib ensemble.joblib logistic.joblib svm.joblib


In [6]:
ensemble.predict(X[0].reshape(1,8))

array([1.])

## Step 1 : Write a model transform script

#### Make sure you have a ...

- "load_model" function
 - input args are model path
 - returns loaded model object
 - model name is the same as what you saved the model file as (see above step)
<br><br>
- "predict" function
 - input args are the loaded model object and a payload
 - returns the result of model.predict
 - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)
 - from a client, a list or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array
 - return the error for debugging


In [7]:
%%writefile modelscript_ensemble_sklearn.py
import sklearn
from joblib import load
import numpy as np
import os

#Return loaded model
def load_model(modelpath):
 print(modelpath)
 
 # Either load individually
 print("loading individuals")
 logistic = load(os.path.join(modelpath,'logistic.joblib'))
 cart = load(os.path.join(modelpath,'cart.joblib'))
 svm = load(os.path.join(modelpath,'svm.joblib'))
 
 # Or load the entire ensemble
 print("loading ensemble")
 ensemble = load(os.path.join(modelpath,'ensemble.joblib'))
 print("loaded")
 return ensemble

# return prediction based on loaded model (from the step above) and an input payload
def predict(model, payload):
 try:
 # locally, payload may come in as an np.ndarray
 if type(payload)==np.ndarray:
 out = [str(model.predict(payload.reshape((1,8))))]
 # in remote / container based deployment, payload comes in as a stream of bytes
 else:

 out = [str(model.predict(np.frombuffer(payload).reshape((1,8))))]
 except Exception as e:
 out = [type(payload),str(e)] #useful for debugging!
 
 return out

Writing modelscript_ensemble_sklearn.py


## Does this work locally? (not "_in a container locally_", but _actually_ in local)

In [8]:
from modelscript_ensemble_sklearn import *
model = load_model('.')

.
loading individuals
loading ensemble
loaded


In [9]:
predict(model,X[0])

['[1.]']

### ok great! Now let's install ezsmdeploy
In some cases, installs fail due to an existing package installed called greenlet.
This is not a direct dependency of ezsmdeploy but interferes with the installation. 
To fix this, either install in a virtualenv as seen above, or do:
pip install ezsmdeploy[locust] --ignore-installed greenlet

In [11]:
!pip uninstall -y ezsmdeploy
!pip install ezsmdeploy==1.0.8

Collecting ezsmdeploy==1.0.8
 Downloading ezsmdeploy-1.0.8-py3-none-any.whl (23 kB)
Installing collected packages: ezsmdeploy
Successfully installed ezsmdeploy-1.0.8
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m


In [12]:
import ezsmdeploy

#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict

In [13]:
!docker container stop $(docker container ls -aq) >/dev/null

## Deploy locally

#### tar all models

In [14]:
!rm model.tar.gz
!find ./ -name "*.joblib" | tar -czf model.tar.gz -T -

In [15]:
ez = ezsmdeploy.Deploy(model = 'model.tar.gz', # if you intend to add models later, pass model as list, otherwise str
 script = 'modelscript_ensemble_sklearn.py',
 requirements = ['scikit-learn=='+sklearn.__version__,'numpy','joblib'], #or pass in the path to requirements.txt
 instance_type = 'local',
 wait = True)

[K0:00:00.082217 | compressed model(s)
[K0:00:00.205353 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.206102 | added requirements file
[K0:00:00.207730 | added source file
[K0:00:00.208891 | added Dockerfile
[K0:00:00.210776 | added model_handler and docker utils
[K0:00:00.210855 | building docker container
[K0:00:43.321145 | built docker container
[K2m∙∙∙[0m [K

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


[K0:00:43.453548 | created model(s). Now deploying on local
[32m∙∙∙[0m [KAttaching to tmpctykbmbm_algo-1-tj704_1
[36malgo-1-tj704_1 |[0m Starting the inference server with 64 workers.
[32m∙â—∙[0m [K[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [9] [INFO] Starting gunicorn 20.0.4
[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [9] [INFO] Listening at: unix:/tmp/gunicorn.sock (9)
[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [9] [INFO] Using worker: gevent
[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [13] [INFO] Booting worker with pid: 13
[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [14] [INFO] Booting worker with pid: 14
[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [15] [INFO] Booting worker with pid: 15
[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [16] [INFO] Booting worker with pid: 16
[32m∙∙â—[0m [K[36malgo-1-tj704_1 |[0m [2021-02-12 01:06:49 +0000] [18] [INFO] Booting worker with pid: 18
[36malgo-1-tj704_1 |[

## Test containerized version locally

In [16]:
out = ez.predictor.predict(X[0].reshape((1,8)).tobytes()).decode()
out

[36malgo-1-tj704_1 |[0m received input data
[36malgo-1-tj704_1 |[0m b'\x00\x00\x00\x00\x00\x00\x18@\x00\x00\x00\x00\x00\x80b@\x00\x00\x00\x00\x00\x00R@\x00\x00\x00\x00\x00\x80A@\x00\x00\x00\x00\x00\x00\x00\x00\xcd\xcc\xcc\xcc\xcc\xcc@@\xaa\xf1\xd2Mb\x10\xe4?\x00\x00\x00\x00\x00\x00I@'
[36malgo-1-tj704_1 |[0m /opt/ml/model
[36malgo-1-tj704_1 |[0m loading individuals
[36malgo-1-tj704_1 |[0m loading ensemble
[36malgo-1-tj704_1 |[0m loaded
[36malgo-1-tj704_1 |[0m predictions from model
[36malgo-1-tj704_1 |[0m ['[1.]']


'[1.]'

[36malgo-1-tj704_1 |[0m 172.18.0.1 - - [12/Feb/2021:01:07:29 +0000] "POST /invocations HTTP/1.1" 200 4 "-" "python-urllib3/1.26.2"


In [17]:
!docker container stop $(docker container ls -aq) >/dev/null

[36malgo-1-tj704_1 |[0m [2021-02-12 01:07:29 +0000] [9] [INFO] Handling signal: term
[36malgo-1-tj704_1 |[0m [2021-02-12 01:07:30 +0000] [212] [INFO] Worker exiting (pid: 212)
[36mtmpctykbmbm_algo-1-tj704_1 exited with code 0
[0mAborting on container exit...


## Deploy on SageMaker

In [18]:
ezonsm = ezsmdeploy.Deploy(model = 'model.tar.gz', 
 script = 'modelscript_ensemble_sklearn.py',
 requirements = ['scikit-learn=='+sklearn.__version__,'numpy','joblib'], #or pass in the path to requirements.txt
 prefix = 'testingprefix',
 wait = True)

[K0:00:00.081814 | compressed model(s)
[K0:00:00.277634 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.278338 | added requirements file
[K0:00:00.279678 | added source file
[K0:00:00.280726 | added Dockerfile
[K0:00:00.282357 | added model_handler and docker utils
[K0:00:00.282483 | building docker container
[K0:00:43.422978 | built docker container
[K2mâ—∙∙[0m [K

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


[K0:00:43.630886 | created model(s). Now deploying on ml.m5.xlarge
[K0:07:15.817639 | deployed model
[K0:07:15.818150 | estimated cost is $0.3 per hour
[K[32m0:07:15.818299 | Done! ✔[0m 


In [19]:
# !./src/build-docker.sh test

In [20]:
out = ezonsm.predictor.predict(X[0].reshape((1,8)).tobytes()).decode()
out

'[1.]'

In [21]:
ezonsm.predictor.delete_endpoint()