version: 0.2 env: variables: FRAMEWORK_VERSION: '1.10.2' EIA_FRAMEWORK_VERSION: '1.3.1' CPU_INSTANCE_TYPE: 'ml.c4.xlarge' GPU_INSTANCE_TYPE: 'ml.p3.8xlarge' EIA_ACCELERATOR_TYPE: 'ml.eia2.medium' ECR_REPO: 'sagemaker-test' GITHUB_REPO: 'sagemaker-pytorch-serving-container' DLC_ACCOUNT: '763104351884' SETUP_FILE: 'setup_cmds.sh' SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip\npython3.6 -m pip install -U -e .\npython3.6 -m pip install -U -e .[test]' phases: pre_build: commands: - start-dockerd - ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text) - PREPROD_IMAGE="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO" - PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+') - BUILD_ID="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')" - echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.' build: commands: - TOX_PARALLEL_NO_SPINNER=1 - PY_COLORS=0 # run linters - tox -e flake8,twine # run unit tests - tox -e py36,py37 test/unit # define tags - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID" - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID" - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID" - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID" # run local CPU integration tests (build and push the image to ECR repo) - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # launch remote GPU instance - prefix='ml.' - instance_type=${GPU_INSTANCE_TYPE#"$prefix"} - create-key-pair - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test - python3 setup.py sdist - build_dir="test/container/$FRAMEWORK_VERSION" - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION . # push DLC GPU image to ECR - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) - docker push $PREPROD_IMAGE:$DLC_GPU_TAG # run GPU local integration tests - printf "$SETUP_CMDS" > $SETUP_FILE # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # run CPU sagemaker integration tests - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # run GPU sagemaker integration tests - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # run EIA sagemaker integration tests - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --build-image --push-image --dockerfile-type dlc.eia --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $DLC_EIA_TAG" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" finally: # shut down remote GPU instance - cleanup-gpu-instances - cleanup-key-pairs # remove ECR image - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_EIA_TAG