# Hugging Face Deployment Pipeline

This notebook creates a SageMaker Pipeline to deploy a trained Hugging Face model to a multi-model SageMaker inference endpoint.

## Inputs

* S3 location for model artifact

## Assumptions

* Endpoint is already created with appropriate inference script
* Endpoint name registered in Parameter Store under key `hf-endpoint`
* Test data location in S3 registered in Parameter Store under key `hf-test-data`
* Multi-model storage location in S3 registered in Parameter Store under key `hf-s3-location`
* This notebook's execution role needs permission to create new IAM roles and Lambda functions
* This notebook's execution role needs the policy `AmazonSageMakerPipelinesIntegrations`

## Steps

* Look up endpoint name
* Look up artifact storage path
* Look up test data location
* Copy model to artifact storage path
* Invoke endpoint with new model and return results

### Register pipeline input

In [138]:
from sagemaker.workflow.parameters import ParameterInteger, ParameterString

input_s3_loc = ParameterString(name="ModelLocation", default_value="s3://")

### Look up endpoint name, artifact path, test data location

In [139]:
%%writefile lambda_param_store.py

"""
This Lambda function looks up three input parameters from Parameter Store
"""

import json
import boto3

PARAM_ENDPOINT = 'hf-endpoint'
PARAM_TEST = 'hf-test-data'
PARAM_ARTIFACT = 'hf-s3-location'

def lambda_handler(event, context):

 ssm_client = boto3.client("ssm")
 
 response = ssm_client.get_parameter(
 Name=PARAM_ENDPOINT
 )
 endpoint = response['Parameter']['Value']
 
 response = ssm_client.get_parameter(
 Name=PARAM_TEST
 )
 test_s3 = response['Parameter']['Value']
 
 response = ssm_client.get_parameter(
 Name=PARAM_ARTIFACT
 )
 artifact_s3 = response['Parameter']['Value']

 return {
 "statusCode": 200,
 "endpoint": json.dumps(endpoint),
 "test_s3": json.dumps(test_s3),
 "artifact_s3": json.dumps(artifact_s3)
 }

Overwriting lambda_param_store.py


In [140]:
from iam_helper import create_lambda_role

lambda_role = create_lambda_role("lambda-deployment-role") 

Using ARN from existing role: lambda-deployment-role


In [141]:
from sagemaker.workflow.lambda_step import (
 LambdaStep,
 LambdaOutput,
 LambdaOutputTypeEnum,
)
from sagemaker.lambda_helper import Lambda
import time

# Use the current time to define unique names for the resources created
current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())
function_name = "sagemaker-demo-hf-lambda-step" + current_time

func = Lambda(
 function_name=function_name,
 execution_role_arn=lambda_role,
 script="lambda_param_store.py",
 handler="lambda_param_store.lambda_handler",
 timeout=30,
 memory_size=512,
)

In [142]:
output_param_endpoint = LambdaOutput(output_name="endpoint", output_type=LambdaOutputTypeEnum.String)
output_param_test_loc = LambdaOutput(output_name="test_s3", output_type=LambdaOutputTypeEnum.String)
output_param_artifact_loc = LambdaOutput(output_name="artifact_s3", output_type=LambdaOutputTypeEnum.String)

In [143]:
step_params_lambda = LambdaStep(
 name="ReadParamsStep",
 lambda_func=func,
 inputs={},
 outputs=[output_param_endpoint, output_param_test_loc, output_param_artifact_loc] )

### Step to copy artifact to new location

In [144]:
%%writefile lambda_artifact.py

"""
This Lambda function copies a model artifact to the multi-model endpoint artifact path
"""

import json
import boto3
from urllib.parse import urlparse

def lambda_handler(event, context):

 s3_client = boto3.client("s3")
 
 input_artifact = event["model_path"]
 artifact_path = event["artifact_path"].lstrip('"').rstrip('"')
 
 print("got input artifact {0} and artifact path {1}".format(input_artifact, artifact_path))
 
 input_p = urlparse(input_artifact)
 artifact_p = urlparse(artifact_path)
 
 input_key = input_p.path.lstrip('/')
 input_bucket = input_p.netloc
 input_name = input_p.path.split('/')[-1]
 
 print("Input key = {0}, input bucket = {1}, input name = {2}".format(input_key, input_bucket, input_name))
 
 artifact_key = artifact_p.path.lstrip('/') + input_name
 artifact_bucket = artifact_p.netloc
 
 print("Artifact key = {0}, artifact bucket = {1}".format(artifact_key, artifact_bucket))
 
 copy_source = {
 'Bucket': input_bucket,
 'Key': input_key
 }
 s3_client.copy(copy_source, artifact_bucket, artifact_key)
 print("Copying {0} to {1}/{2}".format(json.dumps(copy_source), artifact_bucket, artifact_key))

 return {
 "statusCode": 200,
 "model_key": input_name
 }

Overwriting lambda_artifact.py


In [145]:
output_model_loc = LambdaOutput(output_name="model_key", output_type=LambdaOutputTypeEnum.String)

In [146]:
current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())
function_name_2 = "sagemaker-demo-hf-lambda-step-2" + current_time
func2 = Lambda(
 function_name=function_name_2,
 execution_role_arn=lambda_role,
 script="lambda_artifact.py",
 handler="lambda_artifact.lambda_handler",
 timeout=600,
 memory_size=512,
)

In [147]:
step_artifact_lambda = LambdaStep(
 name="CopyArtifactStep",
 lambda_func=func2,
 inputs={
 "model_path": input_s3_loc,
 "artifact_path": output_param_artifact_loc
 },
 outputs=[output_model_loc] )

### Invoke endpoint with new model

In [148]:
%%writefile lambda_test.py

"""
This Lambda function invokes a specific model on a multi-model endpoint
"""

import json
import boto3
from urllib.parse import urlparse

TMP_FILE = '/tmp/body.txt'

def lambda_handler(event, context):

 sm_client = boto3.client("sagemaker-runtime")
 s3_client = boto3.client("s3")
 
 endpoint = event["endpoint"].lstrip('"').rstrip('"')
 model = event["model"]
 test = event["test"].lstrip('"').rstrip('"')
 
 print("endpoint = {0}, model = {1}, test = {2}".format(endpoint, model, test))
 
 test_p = urlparse(test)
 
 s3_client.download_file(test_p.netloc, test_p.path.lstrip('/'), TMP_FILE)
 
 with open(TMP_FILE, 'r') as F:
 input_data = {'input': F.read() }
 print(f"{input_data}")
 response = sm_client.invoke_endpoint(
 EndpointName = endpoint,
 ContentType = "application/json",
 TargetModel = model,
 Body = json.dumps(input_data).encode('utf-8'))
 output = response['Body'].read().decode("utf-8")

 return {
 "statusCode": 200,
 "inference": output
 }

Overwriting lambda_test.py


In [149]:
current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())
function_name_3 = "sagemaker-demo-hf-lambda-step-3" + current_time
func3 = Lambda(
 function_name=function_name_3,
 execution_role_arn=lambda_role,
 script="lambda_test.py",
 handler="lambda_test.lambda_handler",
 timeout=600,
 memory_size=1024,
)

In [150]:
output_test = LambdaOutput(output_name="inference", output_type=LambdaOutputTypeEnum.String)

In [151]:
step_test_lambda = LambdaStep(
 name="TestStep",
 lambda_func=func3,
 inputs={
 "endpoint": output_param_endpoint,
 "model": output_model_loc,
 "test": output_param_test_loc,
 },
 outputs=[output_test] )

### Define Pipeline

In [152]:
from sagemaker.workflow.pipeline import Pipeline
import sagemaker
import boto3

region = sagemaker.Session().boto_region_name
sm_client = boto3.client("sagemaker")
boto_session = boto3.Session(region_name=region)
sagemaker_session = sagemaker.session.Session(
 boto_session=boto_session, sagemaker_client=sm_client
)

pipeline = Pipeline(
 name=f"HuggingFaceDeployPipeline",
 parameters=[
 input_s3_loc
 ],
 steps=[step_params_lambda, step_artifact_lambda, step_test_lambda],
 sagemaker_session=sagemaker_session,
)

In [153]:
import json

json.loads(pipeline.definition())

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ModelLocation',
 'Type': 'String',
 'DefaultValue': 's3://'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
 'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'ReadParamsStep',
 'Type': 'Lambda',
 'Arguments': {},
 'FunctionArn': 'arn:aws:lambda:us-east-1:752304587005:function:sagemaker-demo-hf-lambda-step02-24-00-34-23',
 'OutputParameters': [{'OutputName': 'endpoint', 'OutputType': 'String'},
 {'OutputName': 'test_s3', 'OutputType': 'String'},
 {'OutputName': 'artifact_s3', 'OutputType': 'String'}]},
 {'Name': 'CopyArtifactStep',
 'Type': 'Lambda',
 'Arguments': {'model_path': {'Get': 'Parameters.ModelLocation'},
 'artifact_path': {'Get': "Steps.ReadParamsStep.OutputParameters['artifact_s3']"}},
 'FunctionArn': 'arn:aws:lambda:us-east-1:752304587005:function:sagemaker-demo-hf-lambda-step-202-24-00-34-26',
 'OutputParameters': [{'OutputName': 'model_key'

In [154]:
role = sagemaker.get_execution_role()

pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:752304587005:pipeline/huggingfacedeploypipeline',
 'ResponseMetadata': {'RequestId': 'c5a8607b-dc5e-4160-b0b3-7936722bf9ab',
 'HTTPStatusCode': 200,
 'HTTPHeaders': {'x-amzn-requestid': 'c5a8607b-dc5e-4160-b0b3-7936722bf9ab',
 'content-type': 'application/x-amz-json-1.1',
 'content-length': '93',
 'date': 'Thu, 24 Feb 2022 00:34:32 GMT'},
 'RetryAttempts': 0}}

### Execute pipeline

In [155]:
#execution = pipeline.start(parameters = {"ModelLocation": "s3://sagemaker-us-east-1-<>/mmtest/m2.tar.gz"})

In [173]:
execution.wait()