{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Load and serve a DistilBERT model from Mxnet on the fly" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install mxnet gluonnlp pixiedust" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train a model locally or remote" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gluonnlp as nlp; import mxnet as mx;\n", "\n", "# load distilbert\n", "model, vocab = nlp.model.get_model('distilbert_6_768_12', dataset_name='distilbert_book_corpus_wiki_en_uncased');\n", "\n", "# tokenize then transform\n", "tokenizer = nlp.data.BERTTokenizer(vocab, lower=True);\n", "transform = nlp.data.BERTSentenceTransform(tokenizer, max_seq_length=512, pair=False, pad=False);\n", "\n", "sample = transform(['Hello world!']);\n", "words, valid_len = mx.nd.array([sample[0]]), mx.nd.array([sample[1]])\n", "model(words, valid_len) #if you want to save the model files, hybridize first\n", "\n", "# If you want to save this model and upload as a file to S3 you will have to hybridize() it first before serializing.\n", "# If you want to load the model internally using some hub, don't pass in a model or use Model = None\n", "\n", "# model.hybridize()\n", "# model(words, valid_len)\n", "# !mkdir mxnetmodel\n", "# model.export(path='./mxnetmodel/')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1 : Write a model transform script\n", "\n", "#### Make sure you have a ...\n", "\n", "- \"load_model\" function\n", " - input args are model path\n", " - returns loaded model object\n", " - model name is the same as what you saved the model file as (see above step)\n", "

\n", "- \"predict\" function\n", " - input args are the loaded model object and a payload\n", " - returns the result of model.predict\n", " - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)\n", " - from a client, a list or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array\n", " - return the error for debugging\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile modelscript_mxnet.py\n", "import gluonnlp as nlp; import mxnet as mx;\n", "from joblib import load\n", "import numpy as np\n", "import os\n", "import json\n", "\n", "#Return loaded model\n", "def load_model(modelpath):\n", " model, vocab = nlp.model.get_model('distilbert_6_768_12', dataset_name='distilbert_book_corpus_wiki_en_uncased');\n", " print(\"loaded\")\n", " return {'model':model,'vocab':vocab}\n", "\n", "# return prediction based on loaded model (from the step above) and an input payload\n", "def predict(modeldict, payload):\n", " \n", " #set_trace()\n", " \n", " model = modeldict['model']\n", " vocab = modeldict['vocab']\n", " \n", " tokenizer = nlp.data.BERTTokenizer(vocab, lower=True);\n", " transform = nlp.data.BERTSentenceTransform(tokenizer, max_seq_length=512, pair=False, pad=False);\n", " \n", " try:\n", " # Local\n", " if type(payload) == str:\n", " sample = transform(payload);\n", " elif type(payload) == bytes :\n", " sample = transform(str(payload.decode()));\n", " # Remote, standard payload comes in as a list of json strings with 'body' key\n", " elif type(payload)==list:\n", " sample = transform(payload[0]['body'].decode());\n", " else:\n", " return [json.dumps({'response':\"Provide string or bytes string\",\n", " 'payload':str(payload),\n", " 'type':str(type(payload))})]\n", " \n", " words, valid_len = mx.nd.array([sample[0]]), mx.nd.array([sample[1]])\n", " out = model(words, valid_len) \n", " out = json.dumps({'output':out.asnumpy().tolist()})\n", " except Exception as e:\n", " out = str(e) #useful for debugging!\n", " return [out]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Does this work locally? (not \"_in a container locally_\", but _actually_ in local)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from modelscript_mxnet import *\n", "model = load_model('') # path doesn't matter here since we're loading the model directly in the script" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predict(model,'Hello World!')[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ok great! Now let's install ezsmdeploy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install ezsmdeploy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import ezsmdeploy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!docker container stop $(docker container ls -aq) >/dev/null" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Deploy locally" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ez = ezsmdeploy.Deploy(model = None, #loading distilbert model in script from hub\n", " script = 'modelscript_mxnet.py',\n", " requirements = ['pyarrow','mxnet', 'gluonnlp','numpy','joblib'], #or pass in the path to requirements.txt\n", " instance_type = 'local',\n", " wait = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test containerized version locally" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Since you are downloading this model from a hub, the first time you invoke it will be slow, so invoke again to get an inference without all of the container logs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "out = ez.predictor.predict('Hello World').decode()\n", "out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Deploy on SageMaker" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!./src/build-docker.sh" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ezonsm = ezsmdeploy.Deploy(model = None, #loading distilbert model in script from hub\n", " script = 'modelscript_mxnet.py',\n", " requirements = ['pyarrow','mxnet', 'gluonnlp','numpy','joblib'], #or pass in the path to requirements.txt\n", " instance_type = 'ml.m4.xlarge',\n", " wait = True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "out = ezonsm.predictor.predict('Hello World').decode() \n", "out" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ezonsm.predictor.delete_endpoint()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 4 }