#!/bin/bash # Read configuration from prior steps source ../1-ec2-instance/ec2.conf source ../2-dl-container/dlc.conf # Design of experiments ## The three lines below are arrays. The columns of the arrays need to be aligned by index. ## DOE_MODEL_FAMILY = [bert|resnet] export DOE_MODEL_FAMILY=bert export DOE_JOB_TYPES=(benchmark) export DOE_INSTANCE_TYPES=(inf1.2xlarge) export DOE_PROCESSOR_TYPES=(inf) export DOE_PROCESSOR_COUNTS=(1) export DOE_BATCH_SIZES=(2 4 8) # S3 configuration if [ -f ./.uuid ]; then UUID=$(cat ./.uuid) else UUID=$(uuidgen) echo ${UUID} > ./.uuid fi export S3_BUCKET_NAME=${EC2_TEMPLATE_NAME}-${UUID} # ECS configuration ## ECS_CLUSTER - name of the ECS cluster to use, if the cluster does not exist, it will be created, ECS_CLUSTER=default export ECS_CLUSTER=inferentia-workshop-cluster ## ECS_TRUST_FILE - file with IAM principal information for ecsTaskExecitonRole and ecsTaskRole export ECS_TRUST_FILE=./ecs-trust.json # Batch configuration ## BATCH_NAME - app name for the current batch workload export BATCH_NAME=${EC2_TEMPLATE_NAME} ## BATCH_COMPUTE_ENVIRONMENT_NAME - name of the compute environment to submit jobs to, BATCH_COMPUTE_ENVIRONMENT_NAME=${BATCH_NAME}-compute export BATCH_COMPUTE_ENVIRONMENT_NAME=${BATCH_NAME}-compute ## BATCH_JOB_QUEUE_NAME - name of the job queue for the current batch, BATCH_JOB_QUEUE_NAME=${BATCH_NAME}-job-queue-yyyymmdd #export BATCH_JOB_QUEUE_NAME=${BATCH_NAME}-job-queue-$(date +%Y%m%d) export BATCH_JOB_QUEUE_NAME=${BATCH_NAME}-job-queue ## BATCH_JOB_DEFINITION_NAME - name of the batch job definition, BATCH_JOB_DEFINITION_NAME=${BATCH_NAME}-job-definition export BATCH_JOB_DEFINITION_NAME=${BATCH_NAME}-job-definition ## BATCH_JOB_NAME - name of the batch job, BATCH_JOB_NAME=${BATCH_NAME}-job export BATCH_JOB_NAME=${BATCH_NAME}-job ## BATCH_COMMAND_DEFAULT - the default command to execute in the job container, BATCH_COMMAND_DEFAULT=["/job/startup.sh"] export BATCH_COMMAND_DEFAULT="/job/startup.sh" ## BATCH_JOB_ENV_VARS - environment variables to set in the job container, BATCH_JOB_ENV_VARS="[{name=NAME1,value=VALUE1},{name=NAME2,value=VALUE2}]" export BATCH_JOB_ENV_VARS="[{\"name\": \"EXAMPLE_VAR\", \"value\": \"6\"}]" ## BATCH_MANAGE_COMPUTE_ENVIRONMENT - if true create and delete compute environment upon run or stop, if false assume compute environment exists, BATCH_MANAGE_COMPUTE_ENVIRONMENT=true|false export BATCH_MANAGE_COMPUTE_ENVIRONMENT=true ## BATCH_JOB_VCPUS - number of vCPUs to assign to the batch job, BATCH_JOB_VCPUS=1 ## The combination of VCPUS and MEMORY has to be supported according to https://docs.aws.amazon.com/cli/latest/reference/batch/register-job-definition.html ## Examples: VCPUS=0.5, MEMORY=1024,2048,3072,4096; VCPUS=1, MEMORY=2048,3072,4096,5120,6144,7168,8192 export BATCH_JOB_VCPUS=4 ## BATCH_JOB_MEMORY - memory limit in MiB for the container, BATCH_JOB_MEMORY=4000 export BATCH_JOB_MEMORY=15000 ## BATCH_JOB_SHARED_MEMORY - shared memory limit in MiB for the container, BATCH_JOB_SHARED_MEMORY=2000 export BATCH_JOB_SHARED_MEMORY=4000 ## BATCH_COMPUTE_ENVIRONMENT_TYPE - type of compute to use, BATCH_COMPUTE_ENVIRONMENT_TYPE=EC2|FARGATE export BATCH_COMPUTE_ENVIRONMENT_TYPE=EC2 ## BATCH_COMPUTE_RESOURCES - compute environment configuration depending on type. ## It is assumed that a VPC already exists. Specify subnets where you would like batch jobs to run. ## Specify at least one valid security group. The vpc default security group is acceptable to use here. ## The full list of possible settings is below ## export BATCH_COMPUTE_RESOURCES="type=string,allocationStrategy=string,minvCpus=integer,maxvCpus=integer,desiredvCpus=integer,instanceTypes=string,string,imageId=string,subnets=string,string,securityGroupIds=string,string,ec2KeyPair=string,instanceRole=string,tags={KeyName1=string,KeyName2=string},placementGroup=string,bidPercentage=integer,spotIamFleetRole=string,launchTemplate={launchTemplateId=string,launchTemplateName=string,version=string},ec2Configuration=[{imageType=string,imageIdOverride=string},{imageType=string,imageIdOverride=string}]" if [ "${BATCH_COMPUTE_ENVIRONMENT_TYPE}" == "EC2" ]; then export BATCH_COMPUTE_RESOURCES="type=EC2,minvCpus=0,maxvCpus=256,instanceTypes=optimal,instanceRole=ecsInstanceRole,subnets=$EC2_SUBNET_ID,securityGroupIds=$EC2_SG_ID" else export BATCH_COMPUTE_RESOURCES="type=FARGATE,maxvCpus=256,subnets=$EC2_SUBNET_ID,securityGroupIds=$EC2_SG_ID" fi ## BATCH_REPORT_CSV - name of report file export BATCH_REPORT_CSV=performance-report-$(date +%Y%m%d%H%M).csv