AWSTemplateFormatVersion: 2010-09-09 Description: 'CloudFormation template to create Model Endpoint with AutoScaling.' Parameters: ModelEndpointName: Description: Model endpoint name Type: String Default: "activity-detection-endpoint" ModelDataBucket: Description: "S3 Bucket location of model artefacts." Type: String Default: "activity-detection-data-bucket" InstanceType: Description: Instance type Type: String Default: "ml.g4dn.2xlarge" AllowedValues: - ml.g4dn.xlarge - ml.g4dn.2xlarge - ml.g4dn.4xlarge - ml.g4dn.8xlarge - ml.g4dn.12xlarge - ml.g4dn.16xlarge MinInstanceCount: Description: Minimum number of instances created Type: String Default: '1' MaxInstanceCount: Description: Maximum number of instances created during autoscaling Type: String Default: '3' VariantName: Description: The name of the production variant. Type: String Default: "Initial" AutoscaleCoolDownPeriod: Type: Number Default: '600' Description: The amount of time, in seconds, after a scaling activity completes before any further trigger-related scaling activities can start SageMakerVariantInvocationsPerInstance: Type: Number Default: '6' Description: SageMakerVariantInvocationsPerInstance is the average number of times per minute that each instance for a variant is invoked Resources: Model: Type: AWS::SageMaker::Model Properties: Containers: - ContainerHostname : MxnetContainer Image: !Sub "763104351884.dkr.ecr.${AWS::Region}.amazonaws.com/mxnet-inference:1.6.0-gpu-py3" ModelDataUrl: !Sub "s3://${ModelDataBucket}/artifacts/amazon-sagemaker-activity-detection/deployment/model/model.tar.gz" Environment: { "SAGEMAKER_MODEL_SERVER_WORKERS": 10, "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": true, "SAGEMAKER_REGION": {"Ref": "AWS::Region"}, "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_CONTAINER_LOG_LEVEL": 20, "SAGEMAKER_SUBMIT_DIRECTORY": !Sub "s3://${ModelDataBucket}/artifacts/amazon-sagemaker-activity-detection/deployment/model/model.tar.gz", } ExecutionRoleArn: !GetAtt SageMakerExecutionRole.Arn ModelName: !Ref ModelEndpointName EndpointConfig: Type: AWS::SageMaker::EndpointConfig Properties: ProductionVariants: - ModelName: !GetAtt Model.ModelName VariantName: !Ref VariantName InitialInstanceCount: !Ref MinInstanceCount InstanceType: !Ref InstanceType InitialVariantWeight: 1.0 EndpointConfigName: !Ref ModelEndpointName Endpoint: Type: AWS::SageMaker::Endpoint Properties: EndpointConfigName: !GetAtt EndpointConfig.EndpointConfigName EndpointName: !Ref ModelEndpointName AutoScalingTargetSagemaker: Type: AWS::ApplicationAutoScaling::ScalableTarget DependsOn: Endpoint Properties: MaxCapacity: !Ref MaxInstanceCount MinCapacity: !Ref MinInstanceCount ResourceId: Fn::Join: - '' - - endpoint - "/" - Fn::GetAtt: - EndpointConfig - EndpointConfigName - "/variant/" - Ref: VariantName RoleARN: Fn::GetAtt: - AutoScalingPolicyRole - Arn ScalableDimension: sagemaker:variant:DesiredInstanceCount ServiceNamespace: sagemaker AutoScalingPolicy: Type: AWS::ApplicationAutoScaling::ScalingPolicy DependsOn: AutoScalingTargetSagemaker Properties: PolicyName: SageMakerEndpointInvocationScalingPolicy PolicyType: TargetTrackingScaling ScalingTargetId: !Ref AutoScalingTargetSagemaker TargetTrackingScalingPolicyConfiguration: PredefinedMetricSpecification: PredefinedMetricType: SageMakerVariantInvocationsPerInstance ScaleInCooldown: !Ref AutoscaleCoolDownPeriod ScaleOutCooldown: !Ref AutoscaleCoolDownPeriod TargetValue: !Ref SageMakerVariantInvocationsPerInstance AutoScalingPolicyRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: sagemaker.amazonaws.com Action: - sts:AssumeRole Policies: - PolicyName: AutoscalingPolicySagemaker PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - sagemaker:DescribeEndpoint - sagemaker:DescribeEndpointConfig - sagemaker:UpdateEndpointWeightsAndCapacities Resource: "*" - Action: - application-autoscaling:* Effect: Allow Resource: "*" - Action: iam:CreateServiceLinkedRole Effect: Allow Resource: arn:aws:iam::*:role/aws-service-role/sagemaker.application-autoscaling.amazonaws.com/AWSServiceRoleForApplicationAutoScaling_SageMakerEndpoint Condition: StringLike: iam:AWSServiceName: sagemaker.application-autoscaling.amazonaws.com - Effect: Allow Action: - cloudwatch:PutMetricAlarm - cloudwatch:DescribeAlarms - cloudwatch:DeleteAlarms Resource: "*" RoleName: 'AutoScalingPolicyRole' SageMakerExecutionRole: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "sagemaker.amazonaws.com" Action: - "sts:AssumeRole" Path: "/" ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonS3FullAccess' - 'arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess' - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' Outputs: EndpointId: Value: !Ref Endpoint EndpointName: Value: !GetAtt Endpoint.EndpointName