#!/usr/bin/env bash # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. set -ex echo "Launching training job using $SM_NUM_GPUS GPUs" # p3 instances have larger GPU memory, so a higher batch size can be used GPU_MEM=`nvidia-smi --query-gpu=memory.total --format=csv,noheader -i 0 | awk '{print $1}'` if [ $GPU_MEM -gt 15000 ] ; then BATCH_SIZE=256; else BATCH_SIZE=128; fi # Training python -W ignore train_imagenet_resnet_hvd.py --num_epochs 90 --synthetic -b $BATCH_SIZE \ --lr_decay_mode poly --warmup_epochs 10 --clear_log