In [None]:
%pip install sagemaker xgboost==1.5.1 scikit-learn install sm-serverless-benchmarking -Uqq

## Deploy a pretrained Named Entity Recognition Model
In this example, we'll deploy a pretrained Named Entity Recognition (NER) using SageMaker Jumpstart then benchmark the model using the SageMaker Serverless Inference Benchmarking toolkit

In [None]:
import sagemaker
from sagemaker import image_uris, model_uris, script_uris
from sagemaker.model import Model
import uuid

role = (
 sagemaker.get_execution_role()
) # manually provide role if using non role based identity
sess = sagemaker.Session()
region = sess.boto_region_name

In [None]:
model_id, model_version = (
 "huggingface-ner-distilbert-base-cased-finetuned-conll03-english",
 "*",
)

In [None]:
model_name = f"js-huggingface-ner-distilbert-{str(uuid.uuid1())[:5]}"

inference_instance_type = "ml.m5.xlarge" # used to lookup cpu inference container. No instance will be deployed

# Retrieve the inference docker container uri. This is the base HuggingFace container image for the default model above.
deploy_image_uri = image_uris.retrieve(
 region=None,
 framework=None, # automatically inferred from model_id
 image_scope="inference",
 model_id=model_id,
 model_version=model_version,
 instance_type=inference_instance_type,
)

# Retrieve the inference script uri. This includes all dependencies and scripts for model loading, inference handling etc.
deploy_source_uri = script_uris.retrieve(
 model_id=model_id, model_version=model_version, script_scope="inference"
)


# Retrieve the model uri. This includes the pre-trained model and parameters.
model_uri = model_uris.retrieve(
 model_id=model_id, model_version=model_version, model_scope="inference"
)


# Create the SageMaker model instance
model = Model(
 image_uri=deploy_image_uri,
 source_dir=deploy_source_uri,
 model_data=model_uri,
 entry_point="inference.py", # entry point file in source_dir and present in deploy_source_uri
 role=role,
 name=model_name,
)

In [None]:
model.sagemaker_session = sess
model.create(instance_type=inference_instance_type)

## Validate Endpoint
Before launching a full benchmarking job, it is a good idea to first deploy the model on a test endpoint to ensure everything is functioning as it should. Here we will deploy a temporary endpoint and test it with an example payload. Afterwards, the endpoint is deleted. 

In [None]:
# create a temporary endpoint
from sm_serverless_benchmarking.endpoint import ServerlessEndpoint

endpoint = ServerlessEndpoint(model_name=model.name, memory_size=6144)
endpoint.create_endpoint()

In [None]:
# invoke it with a sample payload and make sure a valid response is returned
input_text = "My name is Wolfgang and I live in Berlin"
response = endpoint.invoke_endpoint(
 {"Body": input_text, "ContentType": "application/x-text"}
)
print(response["Body"].read().decode("utf8"))

In [None]:
endpoint.clean_up() # delete the endpoint

## Launch Benchmarking SageMaker Job

In [None]:
from sm_serverless_benchmarking.utils import convert_invoke_args_to_jsonl
from sm_serverless_benchmarking.sagemaker_runner import run_as_sagemaker_job

example_invoke_args = [
 {
 "Body": "My name is Wolfgang and I live in Berlin",
 "ContentType": "application/x-text",
 },
 {
 "Body": "Amazon.com, Inc. is an American multinational technology company which focuses on e-commerce, cloud computing, digital streaming, and artificial intelligence. It is headquartered in Seattle, WA",
 "ContentType": "application/x-text",
 },
 {
 "Body": "Simon is attending a machine learning workshop in New York next week",
 "ContentType": "application/x-text",
 },
]

example_invoke_file = convert_invoke_args_to_jsonl(example_invoke_args)

In [None]:
processor = run_as_sagemaker_job(
 role=role, model_name=model.name, invoke_args_examples_file=example_invoke_file
)

In [None]:
print(
 f"Once the job is finished, the outputs will be uploaded to {processor.latest_job.outputs[0].destination}"
)

You can optionally run the command below to copy all of the benchmark output artifacts into the current directory. The primary report output will be under the `benchmarking_report/` directory

In [None]:
!aws s3 cp --recursive {processor.latest_job.outputs[0].destination} .

## Run a Local Benchmarking Job [OPTIONAL]
You can also run the same benchmark locally 

In [None]:
from sm_serverless_benchmarking.benchmark import run_serverless_benchmarks

report = run_serverless_benchmarks(
 model_name=model.name, invoke_args_examples_file=example_invoke_file
)