{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train a model locally or remote"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets, svm\n",
"digits = datasets.load_digits()\n",
"clf = svm.SVC(gamma=0.001, C=100.,probability=True)\n",
"clf.fit(digits.data[:-1], digits.target[:-1])\n",
"clf.predict(digits.data[-1:])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip show scikit-learn"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save model file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install joblib\n",
"from joblib import dump\n",
"dump(clf, 'model.joblib')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1 : Write a model transform script\n",
"\n",
"#### Make sure you have a ...\n",
"\n",
"- \"load_model\" function\n",
" - input args are model path\n",
" - returns loaded model object\n",
" - model name is the same as what you saved the model file as (see above step)\n",
"
\n",
"- \"predict\" function\n",
" - input args are the loaded model object and a payload\n",
" - returns the result of model.predict\n",
" - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)\n",
" - from a client, a list or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array\n",
" - return the error for debugging\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile modelscript_sklearn.py\n",
"import sklearn\n",
"from joblib import load\n",
"import numpy as np\n",
"import os\n",
"\n",
"#Return loaded model\n",
"def load_model(modelpath):\n",
" print(modelpath)\n",
" clf = load(os.path.join(modelpath,'model.joblib'))\n",
" print(\"loaded\")\n",
" return clf\n",
"\n",
"# return prediction based on loaded model (from the step above) and an input payload\n",
"def predict(model, payload):\n",
" try:\n",
" # locally, payload may come in as an np.ndarray\n",
" if type(payload)==np.ndarray:\n",
" out = [str(model.predict(np.frombuffer(payload).reshape((1,64))))]\n",
" # in remote / container based deployment, payload comes in as a stream of bytes\n",
" else:\n",
" out = [str(model.predict(np.frombuffer(payload[0]['body']).reshape((1,64))))]\n",
" except Exception as e:\n",
" out = [type(payload),str(e)] #useful for debugging!\n",
" \n",
" return out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Does this work locally? (not \"_in a container locally_\", but _actually_ in local)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from modelscript_sklearn import *\n",
"model = load_model('.')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predict(model,digits.data[-1:])[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### ok great! Now let's install ezsmdeploy\n",
"In some cases, installs fail due to an existing package installed called greenlet.\n",
"This is not a direct dependency of ezsmdeploy but interferes with the installation. \n",
"To fix this, either install in a virtualenv as seen above, or do:\n",
"pip install ezsmdeploy[locust] --ignore-installed greenlet"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install ezsmdeploy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import ezsmdeploy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!docker container stop $(docker container ls -aq) >/dev/null"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deploy locally"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ez = ezsmdeploy.Deploy(model = ['model.joblib'], # if you intend to add models later, pass model as list, otherwise str\n",
" script = 'modelscript_sklearn.py',\n",
" requirements = ['pyarrow','scikit-learn==0.22.1','numpy','joblib'], #or pass in the path to requirements.txt\n",
" instance_type = 'local',\n",
" autoscale = True,\n",
" #framework = 'sklearn', # not required if you provide requirements\n",
" wait = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test containerized version locally"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"out = ez.predictor.predict(digits.data[-1:].tobytes()).decode()\n",
"out"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!docker container stop $(docker container ls -aq) >/dev/null"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deploy on SageMaker"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ezonsm = ezsmdeploy.Deploy(model = ['model.joblib','model.joblib'], # example of multimodel endpoint. \n",
" script = 'modelscript_sklearn.py',\n",
" requirements = ['pyarrow','scikit-learn==0.22.1','numpy','joblib'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#!./src/build-docker.sh test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"out = ezonsm.predictor.predict(digits.data[-1:].tobytes(),target_model='model1.tar.gz').decode() \n",
"out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Install the additional locust testing functionality to enable automated testing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install ezsmdeploy[locust]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ezonsm.test(input_data=digits.data[-1:].tobytes(), target_model='model1.tar.gz',usercount=20,hatchrate=10,timeoutsecs=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"pd.read_csv('src/locuststats_stats.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ezonsm.predictor.delete_endpoint()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}