In [None]:
%pip install sagemaker xgboost==1.5.1 scikit-learn install sm-serverless-benchmarking -Uqq

## Deploy a pretrained Image Classification Model
In this example, we'll deploy a pretrained Image Classification model using SageMaker Jumpstart then benchmark the model using the SageMaker Serverless Inference Benchmarking toolkit

In [None]:
import sagemaker
from sagemaker import image_uris, model_uris, script_uris
from sagemaker.model import Model
import uuid

role = (
    sagemaker.get_execution_role()
)  # manually provide role if using non role based identity
sess = sagemaker.Session()
region = sess.boto_region_name

In [None]:
model_id, model_version = (
    "tensorflow-ic-imagenet-mobilenet-v2-100-224-classification-4",
    "*",
)

In [None]:
model_name = f"js-tf-ic-mobilenet-{str(uuid.uuid1())[:5]}"

inference_instance_type = "ml.m5.xlarge"  # used to lookup cpu inference container. No instance will be deployed

# Retrieve the inference docker container uri. This is the base HuggingFace container image for the default model above.
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,  # automatically inferred from model_id
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)

# Retrieve the inference script uri. This includes all dependencies and scripts for model loading, inference handling etc.
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)


# Retrieve the model uri. This includes the pre-trained model and parameters.
model_uri = model_uris.retrieve(
    model_id=model_id, model_version=model_version, model_scope="inference"
)


# Create the SageMaker model instance
model = Model(
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    model_data=model_uri,
    entry_point="inference.py",  # entry point file in source_dir and present in deploy_source_uri
    role=role,
    name=model_name,
)

In [None]:
model.sagemaker_session = sess
model.create(instance_type=inference_instance_type)

## Validate Endpoint
Before launching a full benchmarking job, it is a good idea to first deploy the model on a test endpoint to ensure everything is functioning as it should. Here we will deploy a temporary endpoint and test it with an example payload. Afterwards, the endpoint is deleted. 

In [None]:
# create a temporary endpoint
from sm_serverless_benchmarking.endpoint import ServerlessEndpoint

endpoint = ServerlessEndpoint(model_name=model.name, memory_size=6144)
endpoint.create_endpoint()

In [None]:
from pathlib import Path

sample_image_path = Path("sample_images")
image_paths = list(sample_image_path.glob("*.JPEG"))

In [None]:
# invoke it with a sample payload and make sure a valid response is returned
image_payload = image_paths[0].open("rb").read()
response = endpoint.invoke_endpoint(
    {"Body": image_payload, "ContentType": "application/x-image"}
)
print(
    len(response["Body"].read().decode("utf8"))
)  # response is a long list of probabilities so just printing the length

In [None]:
endpoint.clean_up()  # delete the endpoint

## Launch Benchmarking SageMaker Job

In [None]:
from sm_serverless_benchmarking.utils import convert_invoke_args_to_pkl
from sm_serverless_benchmarking.sagemaker_runner import run_as_sagemaker_job

example_invoke_args = [
    {"Body": img.open("rb").read(), "ContentType": "application/x-image"}
    for img in image_paths
]

example_invoke_file = convert_invoke_args_to_pkl(example_invoke_args)

In [None]:
processor = run_as_sagemaker_job(
    role=role,
    model_name=model.name,
    invoke_args_examples_file=example_invoke_file,
    stability_benchmark_invocations=2500,
    concurrency_benchmark_invocations=2500,
)

In [None]:
print(
    f"Once the job is finished, the outputs will be uploaded to {processor.latest_job.outputs[0].destination}"
)

You can optionally run the command below to copy all of the benchmark output artifacts into the current directory. The primary report output will be under the `benchmarking_report/` directory

In [None]:
!aws s3 cp --recursive {processor.latest_job.outputs[0].destination} .

## Run a Local Benchmarking Job [OPTIONAL]
You can also run the same benchmark locally 

In [None]:
from sm_serverless_benchmarking.benchmark import run_serverless_benchmarks
report = run_serverless_benchmarks(model_name=model.name, invoke_args_examples_file=example_invoke_file)