{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Deploy pre-trained HF model extending the PyTorch 1.8.1 DL inference container" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import sagemaker\n", "from sagemaker import get_execution_role\n", "\n", "ecr_namespace = 'huggingface/'\n", "prefix = 'huggingface-pytorch-serving-container'\n", "\n", "ecr_repository_name = ecr_namespace + prefix\n", "role = get_execution_role()\n", "account_id = role.split(':')[4]\n", "region = boto3.Session().region_name\n", "sagemaker_session = sagemaker.session.Session()\n", "bucket = sagemaker_session.default_bucket()\n", "prefix = 'hfdeploypytorch-extend'\n", "hf_cache_dir = 'hf_cache_dir/'\n", "\n", "print(account_id)\n", "print(region)\n", "print(role)\n", "print(bucket)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Build container" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pygmentize docker/Dockerfile" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pygmentize scripts/build_and_push.sh" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! /bin/sh scripts/build_and_push.sh $account_id $region $ecr_repository_name" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Download model from HF and save to Amazon S3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "pip install transformers==4.5.1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! mkdir -p $hf_cache_dir" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from transformers import BartForConditionalGeneration, BartTokenizer\n", "\n", "PRE_TRAINED_MODEL_NAME='facebook/bart-large-cnn'\n", "\n", "# Note that we use a specific HF cache dir, to avoid using the default cache dirs that might fill \n", "# root disk space.\n", "model = BartForConditionalGeneration.from_pretrained(PRE_TRAINED_MODEL_NAME, cache_dir=hf_cache_dir)\n", "model.save_pretrained('./models/bart_model/')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tokenizer = BartTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)\n", "tokenizer.save_pretrained('./models/bart_tokenizer/')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!tar -C models/ -cvzf model.tar.gz bart_model/ bart_tokenizer/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker.s3 import S3Uploader\n", "model_artifact = S3Uploader.upload('model.tar.gz','s3://{0}/{1}/model'.format(bucket, prefix))\n", "print(model_artifact)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Deploy model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "container_image_uri = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest'.format(account_id, region, ecr_repository_name)\n", "print(container_image_uri)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker.predictor import Predictor\n", "from sagemaker.serializers import JSONSerializer\n", "from sagemaker.deserializers import JSONDeserializer\n", "\n", "class Summarizer(Predictor):\n", " def __init__(self, endpoint_name, sagemaker_session):\n", " super().__init__(endpoint_name, sagemaker_session=sagemaker_session,\n", " serializer=JSONSerializer(), \n", " deserializer=JSONDeserializer())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker import Model\n", "\n", "hf_model = Model(image_uri=container_image_uri,\n", " model_data=model_artifact,\n", " predictor_cls=Summarizer,\n", " sagemaker_session=sagemaker_session,\n", " env = {\n", " 'SAGEMAKER_PROGRAM': 'predict'\n", " },\n", " role=role)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictor = hf_model.deploy(instance_type='ml.m5.4xlarge',\n", " initial_instance_count=1)\n", "predictor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "with open('article.txt') as f:\n", " content = f.read()\n", "content = content.replace('\\n', ' ')\n", "\n", "json_request_data = {\"text\": \"{0}\"}\n", "json_request_data[\"text\"] = json_request_data[\"text\"].format(content)\n", "\n", "json_request_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "prediction = predictor.predict(json_request_data)\n", "print(prediction)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Delete endpoint" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predictor.delete_endpoint()" ] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "conda_pytorch_latest_p36", "language": "python", "name": "conda_pytorch_latest_p36" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 4 }