{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-01-20T10:03:00.870970Z",
     "start_time": "2021-01-20T10:03:00.418377Z"
    },
    "execution": {
     "iopub.execute_input": "2020-09-22T11:42:30.692313Z",
     "iopub.status.busy": "2020-09-22T11:42:30.692004Z",
     "iopub.status.idle": "2020-09-22T11:42:30.696371Z",
     "shell.execute_reply": "2020-09-22T11:42:30.695224Z",
     "shell.execute_reply.started": "2020-09-22T11:42:30.692282Z"
    }
   },
   "source": [
    "# ML model deployment\n",
    "\n",
    "Deploy the trained ML model into an Amazon SageMaker Model and save the predictions for the test dataframe."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install jsonlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-03T08:36:16.913987Z",
     "start_time": "2020-12-03T08:36:12.697973Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import boto3\n",
    "import botocore\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import json\n",
    "import jsonlines\n",
    "import sagemaker\n",
    "from sagemaker.predictor import json_serializer, json_deserializer\n",
    "from sagemaker.amazon.amazon_estimator import get_image_uri\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "import seaborn as sns\n",
    "from sklearn.preprocessing import normalize\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def download_object(bucket_name, key, local_path):\n",
    "    \"\"\"Download S3 object to local\"\"\"\n",
    "    s3 = boto3.resource('s3')\n",
    "    try:\n",
    "        s3.Bucket(bucket_name).download_file(key,local_path)\n",
    "    except botocore.exceptions.ClientError as e:\n",
    "        if e.response['Error']['Code'] == \"404\":\n",
    "            print(\"The object does not exist\")\n",
    "        else:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_dir(directory):\n",
    "    \"\"\"Create a directory\"\"\"\n",
    "    if not os.path.exists(directory):\n",
    "        os.makedirs(directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-03T08:36:17.043766Z",
     "start_time": "2020-12-03T08:36:16.916055Z"
    }
   },
   "outputs": [],
   "source": [
    "bucket = \"YOUR_BUCKET_HERE\"\n",
    "prefix_in = \"connect/O2VInput\"\n",
    "prefix_out = \"connect/O2VOutput\"\n",
    "s3_client = boto3.client('s3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "create_dir('./meta/')\n",
    "create_dir('./data/')\n",
    "\n",
    "download_object(bucket, os.path.join(prefix_in,'test','test.jsonl'), './data/test.jsonl')\n",
    "download_object(bucket, os.path.join(prefix_in,'meta','vocab_to_token_dict.p'), './meta/vocab_to_token_dict.p')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker import get_execution_role\n",
    "role = get_execution_role()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Specify the job_name\n",
    "\n",
    "Specify the job name from the ML model that you would like to evaluate and deploy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "job_name = \"default2021-05-31-14-57-38\" "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Create an Amazon SageMaker Model and deploy it to an endpoint.\n",
    "\n",
    "Get the Amazon SageMaker model trained and create an endpoint to host it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-01-20T10:27:46.260094Z",
     "start_time": "2021-01-20T10:27:46.257045Z"
    }
   },
   "outputs": [],
   "source": [
    "model_data = f's3://{bucket}/{prefix_out}/{job_name}/output/model.tar.gz'\n",
    "container = get_image_uri(boto3.Session().region_name, 'object2vec')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-01-20T10:36:21.923263Z",
     "start_time": "2021-01-20T10:27:48.692566Z"
    }
   },
   "outputs": [],
   "source": [
    "trainedmodel = sagemaker.model.Model(\n",
    "    model_data= model_data,\n",
    "    image_uri= container,\n",
    "    role=role,\n",
    "    name=job_name) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainedmodel.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Custom serializer "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.serializers import SimpleBaseSerializer, JSONSerializer\n",
    "import pickle\n",
    "from nltk import word_tokenize\n",
    "\n",
    "class O2VTextSerializer(SimpleBaseSerializer):\n",
    "    def load_vocab_to_tokens(self, file_name):\n",
    "        self.vocab_to_tokens = pickle.load(open(file_name,'rb'))\n",
    "\n",
    "    def set_tokenizer(self, tokenizer):\n",
    "        self.tokenizer = tokenizer\n",
    "\n",
    "    def sentence_to_tokens(self,sentence):\n",
    "        \"\"\"converts sentences to tokens\"\"\"\n",
    "        words = self.tokenizer(sentence)\n",
    "        return [ self.vocab_to_tokens[w] for w in words if w in self.vocab_to_tokens]\n",
    "    \n",
    "    def serialize(self, data):\n",
    "        js = {'instances': []}\n",
    "        for row in data['instances']:\n",
    "            print(row)\n",
    "            new_row = row\n",
    "            if type(new_row['in0'])==str:\n",
    "                new_row['in0'] = self.sentence_to_tokens(row['in0'])\n",
    "            if type(new_row['in1'])==str:\n",
    "                new_row['in0'] = self.sentence_to_tokens(row['in0'])\n",
    "            \n",
    "            print(new_row)\n",
    "            js['instances'].append(new_row)\n",
    "            \n",
    "        return json.dumps(js)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 4. Define predictor\n",
    "\n",
    "Use Amazon SageMaker endpoint to retrieve the predictions of our test dataset (test.jsonl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-01-20T10:36:22.064464Z",
     "start_time": "2021-01-20T10:36:21.925234Z"
    }
   },
   "outputs": [],
   "source": [
    "serializer = O2VTextSerializer(content_type='application/json')\n",
    "serializer.load_vocab_to_tokens('./meta/vocab_to_token_dict.p')\n",
    "serializer.set_tokenizer(word_tokenize)\n",
    "\n",
    "predictor = sagemaker.predictor.Predictor(\n",
    "    endpoint_name=trainedmodel.endpoint_name,\n",
    "    serializer=serializer,\n",
    "    deserializer=sagemaker.deserializers.JSONDeserializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_payload = { 'instances':\n",
    "                [\n",
    "                    {\n",
    "                        'in0': \"Looks like it's working.\",\n",
    "                        'in1': [0]\n",
    "                    }\n",
    "                ]\n",
    "               }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictor.predict(test_payload)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sess = sagemaker.Session()\n",
    "sess.delete_endpoint(trainedmodel.endpoint_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sess.delete_endpoint_config(trainedmodel.endpoint_name )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}