version: 0.2 phases: install: runtime-versions: python: 3.8 commands: - pip install --upgrade --force-reinstall . "awscli>1.20.30" build: commands: - export PYTHONUNBUFFERED=TRUE - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}" - export INPUT_DATA="s3://sagemaker-servicecatalog-seedcode-${AWS_REGION}/dataset/abalone-dataset.csv" # Can be used if multiple MODEL_PACKAGE_GROUPs # - export MODEL_PACKAGE_GROUP_NAME="${SAGEMAKER_PROJECT_NAME_ID}" ### Model Build/Training section ### # Run the model training pipeline (train, evaluate and register) - | run-pipeline --module-name ml_pipelines.training.pipeline \ --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME}-training\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\"}" \ --pipeline-inputs "{\"InputDataUrl\":\"${INPUT_DATA}\",\"ModelApprovalStatus\":\"Approved\"}" - echo "Create/Update of the Training SageMaker Pipeline and execution completed." # Export the latest approved model to ssm - export MODEL_PACKAGE_ARN=$(python -m get_approved_package --model-package-group-name "${MODEL_PACKAGE_GROUP_NAME}") # USE THE FOLLOWING LINES ONLY IF MODEL_PACKAGE_GROUP(s) CREATED FROM THIS REPOSITORY AND NOTE THROUGH SM PROJECT TEMPLATE INSTANTIATION # - export MODEL_PACKAGE_ARN=$(python -m get_approved_package --model-package-group-name "${MODEL_PACKAGE_GROUP_NAME}" --preprod-account "${PREPROD_ACCOUNT}" --prod-account "${PROD_ACCOUNT}") # - | # aws ssm put-parameter \ # --name "/mlops/${SAGEMAKER_PROJECT_NAME}/modelpackagearn" \ # --value "${MODEL_PACKAGE_ARN}" \ # --type String \ # --overwrite # Re-encrypting the model data in S3 using the artifact bucket key to ensure cross-account decryption works # This is a workaround, since sagemaker python sdk ignores the supplied custom key and encrypts the models with the default aws s3 kms key - export MODEL_LOCATION=$(python -m get_model_location --model-package-arn "${MODEL_PACKAGE_ARN}") - aws s3 cp ${MODEL_LOCATION} ${MODEL_LOCATION} --sse aws:kms --sse-kms-key-id ${ARTIFACT_BUCKET_KMS_ID} ### Inference SageMaker pipeline section ### - echo "MODEL_PACKAGE_ARN is ${MODEL_PACKAGE_ARN}" # Test the inference pipeline - echo "Test Inference SageMaker Pipeline" # Add run-pipeline with small data subset here? - | run-pipeline --module-name ml_pipelines.inference.pipeline \ --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"artifact_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME}-inference\",\"model_package_arn\":\"${MODEL_PACKAGE_ARN}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\"}" \ --pipeline-inputs "{\"InputDataUrl\":\"${INPUT_DATA}\", \"OutputsBucket\":\"${ARTIFACT_BUCKET}\"}" # Get the inference pipeline definition - echo "Get Inference SageMaker Pipeline definition" - export PLACEHOLDER_ROLE="REPLACEROLE/REPLACEROLE" - | get-pipeline-definition --module-name ml_pipelines.inference.pipeline \ --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${PLACEHOLDER_ROLE}\",\"artifact_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME}-inference\",\"model_package_arn\":\"${MODEL_PACKAGE_ARN}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\"}" \ --file-name "inferencepipelinedefinition.json" - aws s3 cp inferencepipelinedefinition.json "s3://${ARTIFACT_BUCKET}/${SAGEMAKER_PROJECT_NAME_ID}/pipeline-definitions/inferencepipelinedefinition.json" - echo "Publish SageMaker Pipeline definition s3 location to SSM" - | aws ssm put-parameter \ --name "/mlops/${SAGEMAKER_PROJECT_NAME}/inferencepipeline" \ --value "s3://${ARTIFACT_BUCKET}/${SAGEMAKER_PROJECT_NAME_ID}/pipeline-definitions/inferencepipelinedefinition.json" \ --type String \ --overwrite