{ "cells": [ { "cell_type": "markdown", "id": "1463f153-71d2-480c-8565-a0adcdf2b21f", "metadata": {}, "source": [ "## Deploy FLAN-T5-XL using Jumpstart" ] }, { "cell_type": "markdown", "id": "690bda67-b335-4fe3-a72a-e360249dfc28", "metadata": {}, "source": [ "#### Imports " ] }, { "cell_type": "code", "execution_count": 3, "id": "28f26242-65fd-471a-b1c8-a64bc686e32e", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sagemaker.predictor import Predictor\n", "from sagemaker import get_execution_role\n", "from sagemaker.model import Model\n", "from sagemaker import script_uris\n", "from sagemaker import image_uris \n", "from sagemaker import model_uris\n", "import sagemaker\n", "import logging\n", "import boto3\n", "import time\n", "import json" ] }, { "cell_type": "markdown", "id": "c3c06035-ec39-4ebd-830f-49d4c164e600", "metadata": {}, "source": [ "#### Setup essentials " ] }, { "cell_type": "code", "execution_count": 4, "id": "dbe1bbc7-303f-495c-95a1-a1ae1ea39464", "metadata": { "tags": [] }, "outputs": [], "source": [ "logger = logging.getLogger('sagemaker')\n", "logger.setLevel(logging.DEBUG)\n", "logger.addHandler(logging.StreamHandler())" ] }, { "cell_type": "code", "execution_count": 5, "id": "7c4be196-687a-492a-8a4e-97c5a4c5b2dd", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using sagemaker==2.145.0\n", "Using boto3==1.26.111\n" ] } ], "source": [ "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n", "logger.info(f'Using boto3=={boto3.__version__}')" ] }, { "cell_type": "code", "execution_count": 6, "id": "bb8dc308-3283-4dc1-b75e-9c4bb0309a89", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Role => arn:aws:iam::706553727873:role/service-role/AmazonSageMaker-ExecutionRole-20211019T121285\n" ] } ], "source": [ "MODEL_ID = 'huggingface-text2text-flan-t5-xl' # this is hard-coded\n", "MODEL_VERSION = '*'\n", "INSTANCE_TYPE = 'ml.p3.2xlarge'\n", "INSTANCE_COUNT = 1\n", "IMAGE_SCOPE = 'inference'\n", "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600 # in seconds\n", "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n", "EBS_VOLUME_SIZE = 256 # in GB\n", "CONTENT_TYPE = 'application/json'\n", "\n", "# set up roles and clients \n", "client = boto3.client('sagemaker-runtime')\n", "ROLE = get_execution_role()\n", "logger.info(f'Role => {ROLE}')" ] }, { "cell_type": "code", "execution_count": 7, "id": "51be04ac-c509-47f8-bb83-537a689dc653", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Endpoint name: huggingface-text2text-flan-t5-xl-1686836752\n" ] } ], "source": [ "unix_time = int(time.time())\n", "endpoint_name = f'{MODEL_ID}-{unix_time}'\n", "logger.info(f'Endpoint name: {endpoint_name}')" ] }, { "cell_type": "markdown", "id": "3d67f166-291e-47ce-8cd2-48b06840e970", "metadata": {}, "source": [ "#### I. Deploy FLAN-T5-XL out-of-the-box instruction-tuned model as a SageMaker endpoint" ] }, { "cell_type": "code", "execution_count": 8, "id": "b17d8a09-28e1-4ad5-9383-c960ed79bca5", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Deploy image URI => 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\n" ] } ], "source": [ "deploy_image_uri = image_uris.retrieve(region=None, \n", " framework=None, \n", " image_scope=IMAGE_SCOPE, \n", " model_id=MODEL_ID, \n", " model_version=MODEL_VERSION, \n", " instance_type=INSTANCE_TYPE)\n", "logger.info(f'Deploy image URI => {deploy_image_uri}')" ] }, { "cell_type": "code", "execution_count": 9, "id": "ef844c04-9864-4277-a5b2-c58d2858da29", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Model URI => s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz\n" ] } ], "source": [ "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n", " model_version=MODEL_VERSION, \n", " model_scope=IMAGE_SCOPE)\n", "logger.info(f'Model URI => {model_uri}')" ] }, { "cell_type": "code", "execution_count": 10, "id": "5341f604-a2bf-4748-b8d5-8343df5322f0", "metadata": { "tags": [] }, "outputs": [], "source": [ "env = {\n", " 'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n", " 'MODEL_CACHE_ROOT': '/opt/ml/model', \n", " 'SAGEMAKER_ENV': '1',\n", " 'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n", " 'SAGEMAKER_PROGRAM': 'inference.py',\n", " 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n", " 'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n", "}" ] }, { "cell_type": "code", "execution_count": 11, "id": "c17ca807-647b-4916-844d-f89bfe2af924", "metadata": { "tags": [] }, "outputs": [], "source": [ "model = Model(image_uri=deploy_image_uri, \n", " model_data=model_uri, \n", " role=ROLE, \n", " predictor_cls=Predictor, \n", " name=endpoint_name, \n", " env=env)" ] }, { "cell_type": "code", "execution_count": 12, "id": "df2669fe-c5b3-4978-8eb4-61825f5e8276", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Creating model with name: huggingface-text2text-flan-t5-xl-1686836752\n", "CreateModel request: {\n", " \"ModelName\": \"huggingface-text2text-flan-t5-xl-1686836752\",\n", " \"ExecutionRoleArn\": \"arn:aws:iam::706553727873:role/service-role/AmazonSageMaker-ExecutionRole-20211019T121285\",\n", " \"PrimaryContainer\": {\n", " \"Image\": \"763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\",\n", " \"Environment\": {\n", " \"SAGEMAKER_MODEL_SERVER_TIMEOUT\": \"3600\",\n", " \"MODEL_CACHE_ROOT\": \"/opt/ml/model\",\n", " \"SAGEMAKER_ENV\": \"1\",\n", " \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code/\",\n", " \"SAGEMAKER_PROGRAM\": \"inference.py\",\n", " \"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\",\n", " \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"\n", " },\n", " \"ModelDataUrl\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz\"\n", " },\n", " \"Tags\": [\n", " {\n", " \"Key\": \"aws-jumpstart-inference-model-uri\",\n", " \"Value\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz\"\n", " }\n", " ]\n", "}\n", "Creating endpoint-config with name huggingface-text2text-flan-t5-xl-1686836752\n", "Creating endpoint with name huggingface-text2text-flan-t5-xl-1686836752\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "-------------!CPU times: user 109 ms, sys: 26.7 ms, total: 136 ms\n", "Wall time: 7min 4s\n" ] } ], "source": [ "%%time\n", "\n", "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n", " instance_type=INSTANCE_TYPE, \n", " endpoint_name=endpoint_name, \n", " volume_size=EBS_VOLUME_SIZE, \n", " model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n", " container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)" ] }, { "cell_type": "code", "execution_count": 14, "id": "8e750267-e22e-4fc0-9235-07037765b7ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stored 'endpoint_name' (str)\n" ] } ], "source": [ "%store endpoint_name" ] }, { "cell_type": "code", "execution_count": null, "id": "6238fc53-8ca6-47aa-838f-d571b227f8c6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": true, "memoryGiB": 0, "name": "ml.geospatial.interactive", "supportedImageNames": [ "sagemaker-geospatial-v1-0" ], "vcpuNum": 0 }, { "_defaultOrder": 21, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 28, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 29, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 54, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 }, { "_defaultOrder": 55, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 56, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4de.24xlarge", "vcpuNum": 96 } ], "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }