## Deploy FLAN-T5-XL using Jumpstart

#### Imports 

In [3]:
from sagemaker.predictor import Predictor
from sagemaker import get_execution_role
from sagemaker.model import Model
from sagemaker import script_uris
from sagemaker import image_uris 
from sagemaker import model_uris
import sagemaker
import logging
import boto3
import time
import json

#### Setup essentials 

In [4]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

In [5]:
logger.info(f'Using sagemaker=={sagemaker.__version__}')
logger.info(f'Using boto3=={boto3.__version__}')

Using sagemaker==2.145.0
Using boto3==1.26.111


In [6]:
MODEL_ID = 'huggingface-text2text-flan-t5-xl' # this is hard-coded
MODEL_VERSION = '*'
INSTANCE_TYPE = 'ml.p3.2xlarge'
INSTANCE_COUNT = 1
IMAGE_SCOPE = 'inference'
MODEL_DATA_DOWNLOAD_TIMEOUT = 3600 # in seconds
CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600
EBS_VOLUME_SIZE = 256 # in GB
CONTENT_TYPE = 'application/json'

# set up roles and clients 
client = boto3.client('sagemaker-runtime')
ROLE = get_execution_role()
logger.info(f'Role => {ROLE}')

Role => arn:aws:iam::706553727873:role/service-role/AmazonSageMaker-ExecutionRole-20211019T121285


In [7]:
unix_time = int(time.time())
endpoint_name = f'{MODEL_ID}-{unix_time}'
logger.info(f'Endpoint name: {endpoint_name}')

Endpoint name: huggingface-text2text-flan-t5-xl-1686836752


#### I. Deploy FLAN-T5-XL out-of-the-box instruction-tuned model as a SageMaker endpoint

In [8]:
deploy_image_uri = image_uris.retrieve(region=None, 
 framework=None, 
 image_scope=IMAGE_SCOPE, 
 model_id=MODEL_ID, 
 model_version=MODEL_VERSION, 
 instance_type=INSTANCE_TYPE)
logger.info(f'Deploy image URI => {deploy_image_uri}')

Deploy image URI => 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04


In [9]:
model_uri = model_uris.retrieve(model_id=MODEL_ID, 
 model_version=MODEL_VERSION, 
 model_scope=IMAGE_SCOPE)
logger.info(f'Model URI => {model_uri}')

Model URI => s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz


In [10]:
env = {
 'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),
 'MODEL_CACHE_ROOT': '/opt/ml/model', 
 'SAGEMAKER_ENV': '1',
 'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',
 'SAGEMAKER_PROGRAM': 'inference.py',
 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', 
 'TS_DEFAULT_WORKERS_PER_MODEL': '1', 
}

In [11]:
model = Model(image_uri=deploy_image_uri, 
 model_data=model_uri, 
 role=ROLE, 
 predictor_cls=Predictor, 
 name=endpoint_name, 
 env=env)

In [12]:
%%time

_ = model.deploy(initial_instance_count=INSTANCE_COUNT, 
 instance_type=INSTANCE_TYPE, 
 endpoint_name=endpoint_name, 
 volume_size=EBS_VOLUME_SIZE, 
 model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, 
 container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)

Creating model with name: huggingface-text2text-flan-t5-xl-1686836752
CreateModel request: {
 "ModelName": "huggingface-text2text-flan-t5-xl-1686836752",
 "ExecutionRoleArn": "arn:aws:iam::706553727873:role/service-role/AmazonSageMaker-ExecutionRole-20211019T121285",
 "PrimaryContainer": {
 "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04",
 "Environment": {
 "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600",
 "MODEL_CACHE_ROOT": "/opt/ml/model",
 "SAGEMAKER_ENV": "1",
 "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code/",
 "SAGEMAKER_PROGRAM": "inference.py",
 "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
 "TS_DEFAULT_WORKERS_PER_MODEL": "1"
 },
 "ModelDataUrl": "s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz"
 },
 "Tags": [
 {
 "Key": "aws-jumpstart-inference-model-uri",
 "Value": "s3://jumpstart-cache-prod-us-east-1/huggingface-infer/pre

-------------!CPU times: user 109 ms, sys: 26.7 ms, total: 136 ms
Wall time: 7min 4s


In [14]:
%store endpoint_name

Stored 'endpoint_name' (str)
