{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Hugging Face Deployment Pipeline\n", "\n", "This notebook creates a SageMaker Pipeline to deploy a trained Hugging Face model to a multi-model SageMaker inference endpoint.\n", "\n", "## Inputs\n", "\n", "* S3 location for model artifact\n", "\n", "## Assumptions\n", "\n", "* Endpoint is already created with appropriate inference script\n", "* Endpoint name registered in Parameter Store under key `hf-endpoint`\n", "* Test data location in S3 registered in Parameter Store under key `hf-test-data`\n", "* Multi-model storage location in S3 registered in Parameter Store under key `hf-s3-location`\n", "* This notebook's execution role needs permission to create new IAM roles and Lambda functions\n", "* This notebook's execution role needs the policy `AmazonSageMakerPipelinesIntegrations`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Steps\n", "\n", "* Look up endpoint name\n", "* Look up artifact storage path\n", "* Look up test data location\n", "* Copy model to artifact storage path\n", "* Invoke endpoint with new model and return results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Register pipeline input" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "from sagemaker.workflow.parameters import ParameterInteger, ParameterString\n", "\n", "input_s3_loc = ParameterString(name=\"ModelLocation\", default_value=\"s3://\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Look up endpoint name, artifact path, test data location" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting lambda_param_store.py\n" ] } ], "source": [ "%%writefile lambda_param_store.py\n", "\n", "\"\"\"\n", "This Lambda function looks up three input parameters from Parameter Store\n", "\"\"\"\n", "\n", "import json\n", "import boto3\n", "\n", "PARAM_ENDPOINT = 'hf-endpoint'\n", "PARAM_TEST = 'hf-test-data'\n", "PARAM_ARTIFACT = 'hf-s3-location'\n", "\n", "def lambda_handler(event, context):\n", "\n", " ssm_client = boto3.client(\"ssm\")\n", " \n", " response = ssm_client.get_parameter(\n", " Name=PARAM_ENDPOINT\n", " )\n", " endpoint = response['Parameter']['Value']\n", " \n", " response = ssm_client.get_parameter(\n", " Name=PARAM_TEST\n", " )\n", " test_s3 = response['Parameter']['Value']\n", " \n", " response = ssm_client.get_parameter(\n", " Name=PARAM_ARTIFACT\n", " )\n", " artifact_s3 = response['Parameter']['Value']\n", "\n", " return {\n", " \"statusCode\": 200,\n", " \"endpoint\": json.dumps(endpoint),\n", " \"test_s3\": json.dumps(test_s3),\n", " \"artifact_s3\": json.dumps(artifact_s3)\n", " }" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using ARN from existing role: lambda-deployment-role\n" ] } ], "source": [ "from iam_helper import create_lambda_role\n", "\n", "lambda_role = create_lambda_role(\"lambda-deployment-role\") " ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [ "from sagemaker.workflow.lambda_step import (\n", " LambdaStep,\n", " LambdaOutput,\n", " LambdaOutputTypeEnum,\n", ")\n", "from sagemaker.lambda_helper import Lambda\n", "import time\n", "\n", "# Use the current time to define unique names for the resources created\n", "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", "function_name = \"sagemaker-demo-hf-lambda-step\" + current_time\n", "\n", "func = Lambda(\n", " function_name=function_name,\n", " execution_role_arn=lambda_role,\n", " script=\"lambda_param_store.py\",\n", " handler=\"lambda_param_store.lambda_handler\",\n", " timeout=30,\n", " memory_size=512,\n", ")" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [], "source": [ "output_param_endpoint = LambdaOutput(output_name=\"endpoint\", output_type=LambdaOutputTypeEnum.String)\n", "output_param_test_loc = LambdaOutput(output_name=\"test_s3\", output_type=LambdaOutputTypeEnum.String)\n", "output_param_artifact_loc = LambdaOutput(output_name=\"artifact_s3\", output_type=LambdaOutputTypeEnum.String)" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "step_params_lambda = LambdaStep(\n", " name=\"ReadParamsStep\",\n", " lambda_func=func,\n", " inputs={},\n", " outputs=[output_param_endpoint, output_param_test_loc, output_param_artifact_loc] )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Step to copy artifact to new location" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting lambda_artifact.py\n" ] } ], "source": [ "%%writefile lambda_artifact.py\n", "\n", "\"\"\"\n", "This Lambda function copies a model artifact to the multi-model endpoint artifact path\n", "\"\"\"\n", "\n", "import json\n", "import boto3\n", "from urllib.parse import urlparse\n", "\n", "def lambda_handler(event, context):\n", "\n", " s3_client = boto3.client(\"s3\")\n", " \n", " input_artifact = event[\"model_path\"]\n", " artifact_path = event[\"artifact_path\"].lstrip('\"').rstrip('\"')\n", " \n", " print(\"got input artifact {0} and artifact path {1}\".format(input_artifact, artifact_path))\n", " \n", " input_p = urlparse(input_artifact)\n", " artifact_p = urlparse(artifact_path)\n", " \n", " input_key = input_p.path.lstrip('/')\n", " input_bucket = input_p.netloc\n", " input_name = input_p.path.split('/')[-1]\n", " \n", " print(\"Input key = {0}, input bucket = {1}, input name = {2}\".format(input_key, input_bucket, input_name))\n", " \n", " artifact_key = artifact_p.path.lstrip('/') + input_name\n", " artifact_bucket = artifact_p.netloc\n", " \n", " print(\"Artifact key = {0}, artifact bucket = {1}\".format(artifact_key, artifact_bucket))\n", " \n", " copy_source = {\n", " 'Bucket': input_bucket,\n", " 'Key': input_key\n", " }\n", " s3_client.copy(copy_source, artifact_bucket, artifact_key)\n", " print(\"Copying {0} to {1}/{2}\".format(json.dumps(copy_source), artifact_bucket, artifact_key))\n", "\n", " return {\n", " \"statusCode\": 200,\n", " \"model_key\": input_name\n", " }" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [], "source": [ "output_model_loc = LambdaOutput(output_name=\"model_key\", output_type=LambdaOutputTypeEnum.String)" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [], "source": [ "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", "function_name_2 = \"sagemaker-demo-hf-lambda-step-2\" + current_time\n", "func2 = Lambda(\n", " function_name=function_name_2,\n", " execution_role_arn=lambda_role,\n", " script=\"lambda_artifact.py\",\n", " handler=\"lambda_artifact.lambda_handler\",\n", " timeout=600,\n", " memory_size=512,\n", ")" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [], "source": [ "step_artifact_lambda = LambdaStep(\n", " name=\"CopyArtifactStep\",\n", " lambda_func=func2,\n", " inputs={\n", " \"model_path\": input_s3_loc,\n", " \"artifact_path\": output_param_artifact_loc\n", " },\n", " outputs=[output_model_loc] )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Invoke endpoint with new model" ] }, { "cell_type": "code", "execution_count": 148, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting lambda_test.py\n" ] } ], "source": [ "%%writefile lambda_test.py\n", "\n", "\"\"\"\n", "This Lambda function invokes a specific model on a multi-model endpoint\n", "\"\"\"\n", "\n", "import json\n", "import boto3\n", "from urllib.parse import urlparse\n", "\n", "TMP_FILE = '/tmp/body.txt'\n", "\n", "def lambda_handler(event, context):\n", "\n", " sm_client = boto3.client(\"sagemaker-runtime\")\n", " s3_client = boto3.client(\"s3\")\n", " \n", " endpoint = event[\"endpoint\"].lstrip('\"').rstrip('\"')\n", " model = event[\"model\"]\n", " test = event[\"test\"].lstrip('\"').rstrip('\"')\n", " \n", " print(\"endpoint = {0}, model = {1}, test = {2}\".format(endpoint, model, test))\n", " \n", " test_p = urlparse(test)\n", " \n", " s3_client.download_file(test_p.netloc, test_p.path.lstrip('/'), TMP_FILE)\n", " \n", " with open(TMP_FILE, 'r') as F:\n", " input_data = {'input': F.read() }\n", " print(f\"{input_data}\")\n", " response = sm_client.invoke_endpoint(\n", " EndpointName = endpoint,\n", " ContentType = \"application/json\",\n", " TargetModel = model,\n", " Body = json.dumps(input_data).encode('utf-8'))\n", " output = response['Body'].read().decode(\"utf-8\")\n", "\n", " return {\n", " \"statusCode\": 200,\n", " \"inference\": output\n", " }" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [], "source": [ "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", "function_name_3 = \"sagemaker-demo-hf-lambda-step-3\" + current_time\n", "func3 = Lambda(\n", " function_name=function_name_3,\n", " execution_role_arn=lambda_role,\n", " script=\"lambda_test.py\",\n", " handler=\"lambda_test.lambda_handler\",\n", " timeout=600,\n", " memory_size=1024,\n", ")" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [], "source": [ "output_test = LambdaOutput(output_name=\"inference\", output_type=LambdaOutputTypeEnum.String)" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [], "source": [ "step_test_lambda = LambdaStep(\n", " name=\"TestStep\",\n", " lambda_func=func3,\n", " inputs={\n", " \"endpoint\": output_param_endpoint,\n", " \"model\": output_model_loc,\n", " \"test\": output_param_test_loc,\n", " },\n", " outputs=[output_test] )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define Pipeline" ] }, { "cell_type": "code", "execution_count": 152, "metadata": {}, "outputs": [], "source": [ "from sagemaker.workflow.pipeline import Pipeline\n", "import sagemaker\n", "import boto3\n", "\n", "region = sagemaker.Session().boto_region_name\n", "sm_client = boto3.client(\"sagemaker\")\n", "boto_session = boto3.Session(region_name=region)\n", "sagemaker_session = sagemaker.session.Session(\n", " boto_session=boto_session, sagemaker_client=sm_client\n", ")\n", "\n", "pipeline = Pipeline(\n", " name=f\"HuggingFaceDeployPipeline\",\n", " parameters=[\n", " input_s3_loc\n", " ],\n", " steps=[step_params_lambda, step_artifact_lambda, step_test_lambda],\n", " sagemaker_session=sagemaker_session,\n", ")" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Version': '2020-12-01',\n", " 'Metadata': {},\n", " 'Parameters': [{'Name': 'ModelLocation',\n", " 'Type': 'String',\n", " 'DefaultValue': 's3://'}],\n", " 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},\n", " 'TrialName': {'Get': 'Execution.PipelineExecutionId'}},\n", " 'Steps': [{'Name': 'ReadParamsStep',\n", " 'Type': 'Lambda',\n", " 'Arguments': {},\n", " 'FunctionArn': 'arn:aws:lambda:us-east-1:752304587005:function:sagemaker-demo-hf-lambda-step02-24-00-34-23',\n", " 'OutputParameters': [{'OutputName': 'endpoint', 'OutputType': 'String'},\n", " {'OutputName': 'test_s3', 'OutputType': 'String'},\n", " {'OutputName': 'artifact_s3', 'OutputType': 'String'}]},\n", " {'Name': 'CopyArtifactStep',\n", " 'Type': 'Lambda',\n", " 'Arguments': {'model_path': {'Get': 'Parameters.ModelLocation'},\n", " 'artifact_path': {'Get': \"Steps.ReadParamsStep.OutputParameters['artifact_s3']\"}},\n", " 'FunctionArn': 'arn:aws:lambda:us-east-1:752304587005:function:sagemaker-demo-hf-lambda-step-202-24-00-34-26',\n", " 'OutputParameters': [{'OutputName': 'model_key', 'OutputType': 'String'}]},\n", " {'Name': 'TestStep',\n", " 'Type': 'Lambda',\n", " 'Arguments': {'endpoint': {'Get': \"Steps.ReadParamsStep.OutputParameters['endpoint']\"},\n", " 'model': {'Get': \"Steps.CopyArtifactStep.OutputParameters['model_key']\"},\n", " 'test': {'Get': \"Steps.ReadParamsStep.OutputParameters['test_s3']\"}},\n", " 'FunctionArn': 'arn:aws:lambda:us-east-1:752304587005:function:sagemaker-demo-hf-lambda-step-302-24-00-34-28',\n", " 'OutputParameters': [{'OutputName': 'inference', 'OutputType': 'String'}]}]}" ] }, "execution_count": 153, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import json\n", "\n", "json.loads(pipeline.definition())" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'PipelineArn': 'arn:aws:sagemaker:us-east-1:752304587005:pipeline/huggingfacedeploypipeline',\n", " 'ResponseMetadata': {'RequestId': 'c5a8607b-dc5e-4160-b0b3-7936722bf9ab',\n", " 'HTTPStatusCode': 200,\n", " 'HTTPHeaders': {'x-amzn-requestid': 'c5a8607b-dc5e-4160-b0b3-7936722bf9ab',\n", " 'content-type': 'application/x-amz-json-1.1',\n", " 'content-length': '93',\n", " 'date': 'Thu, 24 Feb 2022 00:34:32 GMT'},\n", " 'RetryAttempts': 0}}" ] }, "execution_count": 154, "metadata": {}, "output_type": "execute_result" } ], "source": [ "role = sagemaker.get_execution_role()\n", "\n", "pipeline.upsert(role_arn=role)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Execute pipeline" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "#execution = pipeline.start(parameters = {\"ModelLocation\": \"s3://sagemaker-us-east-1-<>/mmtest/m2.tar.gz\"})" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [], "source": [ "execution.wait()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }