{ "cells": [ { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2021-01-20T10:03:00.870970Z", "start_time": "2021-01-20T10:03:00.418377Z" }, "execution": { "iopub.execute_input": "2020-09-22T11:42:30.692313Z", "iopub.status.busy": "2020-09-22T11:42:30.692004Z", "iopub.status.idle": "2020-09-22T11:42:30.696371Z", "shell.execute_reply": "2020-09-22T11:42:30.695224Z", "shell.execute_reply.started": "2020-09-22T11:42:30.692282Z" } }, "source": [ "# ML model deployment\n", "\n", "Deploy the trained ML model into an Amazon SageMaker Model and save the predictions for the test dataframe." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install jsonlines" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-03T08:36:16.913987Z", "start_time": "2020-12-03T08:36:12.697973Z" } }, "outputs": [], "source": [ "import os\n", "import boto3\n", "import botocore\n", "import numpy as np\n", "import pandas as pd\n", "import json\n", "import jsonlines\n", "import sagemaker\n", "from sagemaker.predictor import json_serializer, json_deserializer\n", "from sagemaker.amazon.amazon_estimator import get_image_uri\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "import seaborn as sns\n", "from sklearn.preprocessing import normalize\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def download_object(bucket_name, key, local_path):\n", " \"\"\"Download S3 object to local\"\"\"\n", " s3 = boto3.resource('s3')\n", " try:\n", " s3.Bucket(bucket_name).download_file(key,local_path)\n", " except botocore.exceptions.ClientError as e:\n", " if e.response['Error']['Code'] == \"404\":\n", " print(\"The object does not exist\")\n", " else:\n", " raise" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_dir(directory):\n", " \"\"\"Create a directory\"\"\"\n", " if not os.path.exists(directory):\n", " os.makedirs(directory)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-12-03T08:36:17.043766Z", "start_time": "2020-12-03T08:36:16.916055Z" } }, "outputs": [], "source": [ "bucket = \"YOUR_BUCKET_HERE\"\n", "prefix_in = \"connect/O2VInput\"\n", "prefix_out = \"connect/O2VOutput\"\n", "s3_client = boto3.client('s3')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "create_dir('./meta/')\n", "create_dir('./data/')\n", "\n", "download_object(bucket, os.path.join(prefix_in,'test','test.jsonl'), './data/test.jsonl')\n", "download_object(bucket, os.path.join(prefix_in,'meta','vocab_to_token_dict.p'), './meta/vocab_to_token_dict.p')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker import get_execution_role\n", "role = get_execution_role()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Specify the job_name\n", "\n", "Specify the job name from the ML model that you would like to evaluate and deploy." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "job_name = \"default2021-05-31-14-57-38\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Create an Amazon SageMaker Model and deploy it to an endpoint.\n", "\n", "Get the Amazon SageMaker model trained and create an endpoint to host it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-01-20T10:27:46.260094Z", "start_time": "2021-01-20T10:27:46.257045Z" } }, "outputs": [], "source": [ "model_data = f's3://{bucket}/{prefix_out}/{job_name}/output/model.tar.gz'\n", "container = get_image_uri(boto3.Session().region_name, 'object2vec')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-01-20T10:36:21.923263Z", "start_time": "2021-01-20T10:27:48.692566Z" } }, "outputs": [], "source": [ "trainedmodel = sagemaker.model.Model(\n", " model_data= model_data,\n", " image_uri= container,\n", " role=role,\n", " name=job_name) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trainedmodel.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Custom serializer " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker.serializers import SimpleBaseSerializer, JSONSerializer\n", "import pickle\n", "from nltk import word_tokenize\n", "\n", "class O2VTextSerializer(SimpleBaseSerializer):\n", " def load_vocab_to_tokens(self, file_name):\n", " self.vocab_to_tokens = pickle.load(open(file_name,'rb'))\n", "\n", " def set_tokenizer(self, tokenizer):\n", " self.tokenizer = tokenizer\n", "\n", " def sentence_to_tokens(self,sentence):\n", " \"\"\"converts sentences to tokens\"\"\"\n", " words = self.tokenizer(sentence)\n", " return [ self.vocab_to_tokens[w] for w in words if w in self.vocab_to_tokens]\n", " \n", " def serialize(self, data):\n", " js = {'instances': []}\n", " for row in data['instances']:\n", " print(row)\n", " new_row = row\n", " if type(new_row['in0'])==str:\n", " new_row['in0'] = self.sentence_to_tokens(row['in0'])\n", " if type(new_row['in1'])==str:\n", " new_row['in0'] = self.sentence_to_tokens(row['in0'])\n", " \n", " print(new_row)\n", " js['instances'].append(new_row)\n", " \n", " return json.dumps(js)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 4. ##### Define predictor

Use Amazon SageMaker endpoint to retrieve the predictions of our test dataset (test.jsonl)
serializer = O2VTextSerializer(content_type='application/json')
serializer.load_vocab_to_tokens('./meta/vocab_to_token_dict.p')
serializer.set_tokenizer(word_tokenize)

predictor = sagemaker.predictor.Predictor(
    endpoint_name=trainedmodel.endpoint_name,
    serializer=serializer,
    deserializer=sagemaker.deserializers.JSONDeserializer())
test_payload = { 'instances':
    [
        {
            'in0': "Looks like it's working.",
            'in1': [0]
        }
    ]
    }
predictor.predict(test_payload)
sess = sagemaker.Session()
sess.delete_endpoint(trainedmodel.endpoint_name)
sess.delete_endpoint_config(trainedmodel.endpoint_name )