# Load and serve a DistilBERT model from Mxnet on the fly

In [None]:
!pip install mxnet gluonnlp pixiedust

## Train a model locally or remote

In [None]:
import gluonnlp as nlp; import mxnet as mx;

# load distilbert
model, vocab = nlp.model.get_model('distilbert_6_768_12', dataset_name='distilbert_book_corpus_wiki_en_uncased');

# tokenize then transform
tokenizer = nlp.data.BERTTokenizer(vocab, lower=True);
transform = nlp.data.BERTSentenceTransform(tokenizer, max_seq_length=512, pair=False, pad=False);

sample = transform(['Hello world!']);
words, valid_len = mx.nd.array([sample[0]]), mx.nd.array([sample[1]])
model(words, valid_len) #if you want to save the model files, hybridize first

# If you want to save this model and upload as a file to S3 you will have to hybridize() it first before serializing.
# If you want to load the model internally using some hub, don't pass in a model or use Model = None

# model.hybridize()
# model(words, valid_len)
# !mkdir mxnetmodel
# model.export(path='./mxnetmodel/')

## Step 1 : Write a model transform script

#### Make sure you have a ...

- "load_model" function
    - input args are model path
    - returns loaded model object
    - model name is the same as what you saved the model file as (see above step)
<br><br>
- "predict" function
    - input args are the loaded model object and a payload
    - returns the result of model.predict
    - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)
    - from a client, a list  or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array
    - return the error for debugging


In [None]:
%%writefile modelscript_mxnet.py
import gluonnlp as nlp; import mxnet as mx;
from joblib import load
import numpy as np
import os
import json

#Return loaded model
def load_model(modelpath):
    model, vocab = nlp.model.get_model('distilbert_6_768_12', dataset_name='distilbert_book_corpus_wiki_en_uncased');
    print("loaded")
    return {'model':model,'vocab':vocab}

# return prediction based on loaded model (from the step above) and an input payload
def predict(modeldict, payload):
    
    #set_trace()
    
    model = modeldict['model']
    vocab = modeldict['vocab']
    
    tokenizer = nlp.data.BERTTokenizer(vocab, lower=True);
    transform = nlp.data.BERTSentenceTransform(tokenizer, max_seq_length=512, pair=False, pad=False);
    
    try:
        # Local
        if type(payload) == str:
            sample = transform(payload);
        elif type(payload) == bytes :
            sample = transform(str(payload.decode()));
        # Remote, standard payload comes in as a list of json strings with 'body' key
        elif type(payload)==list:
            sample = transform(payload[0]['body'].decode());
        else:
            return [json.dumps({'response':"Provide string or bytes string",
                    'payload':str(payload),
                    'type':str(type(payload))})]
        
        words, valid_len = mx.nd.array([sample[0]]), mx.nd.array([sample[1]])
        out = model(words, valid_len)  
        out = json.dumps({'output':out.asnumpy().tolist()})
    except Exception as e:
        out = str(e) #useful for debugging!
    return [out]

## Does this work locally? (not "_in a container locally_", but _actually_ in local)

In [None]:
from modelscript_mxnet import *
model = load_model('') # path doesn't matter here since we're loading the model directly in the script

In [None]:
predict(model,'Hello World!')[0]

### ok great! Now let's install ezsmdeploy

In [None]:
!pip install ezsmdeploy

In [None]:
import ezsmdeploy

#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict

In [None]:
!docker container stop $(docker container ls -aq) >/dev/null

## Deploy locally

In [None]:
ez = ezsmdeploy.Deploy(model = None, #loading distilbert model in script from hub
                  script = 'modelscript_mxnet.py',
                  requirements = ['pyarrow','mxnet', 'gluonnlp','numpy','joblib'], #or pass in the path to requirements.txt
                  instance_type = 'local',
                  wait = True)

## Test containerized version locally

Since you are downloading this model from a hub, the first time you invoke it will be slow, so invoke again to get an inference without all of the container logs

In [None]:
out = ez.predictor.predict('Hello World').decode()
out

## Deploy on SageMaker

In [None]:
!./src/build-docker.sh

In [None]:
ezonsm = ezsmdeploy.Deploy(model = None, #loading distilbert model in script from hub
                  script = 'modelscript_mxnet.py',
                  requirements = ['pyarrow','mxnet', 'gluonnlp','numpy','joblib'], #or pass in the path to requirements.txt
                  instance_type = 'ml.m4.xlarge',
                  wait = True)

In [None]:
out = ezonsm.predictor.predict('Hello World').decode() 
out

In [None]:
ezonsm.predictor.delete_endpoint()