#!/bin/bash # credit of https://github.com/awslabs/amazon-sagemaker-examples/blob/master/advanced_functionality/distributed_tensorflow_mask_rcnn/prepare-s3-bucket.sh if [ "$#" -ne 2 ]; then echo "usage: $0 " # TODO: fix usage message to reflect that bucket prefix is also required exit 1 fi S3_BUCKET=$1 S3_PREFIX=$2 # Stage directory must be on EBS volume with 100 GB available space STAGE_DIR=$HOME/SageMaker/coco-2017-$(date +"%Y-%m-%d-%H-%M-%S") echo "Create stage directory: $STAGE_DIR" mkdir -p $STAGE_DIR # wget -O $STAGE_DIR/train2017.zip http://images.cocodataset.org/zips/train2017.zip aws s3 cp s3://fast-ai-coco/train2017.zip $STAGE_DIR/train2017.zip echo "Extracting $STAGE_DIR/train2017.zip" unzip -o $STAGE_DIR/train2017.zip -d $STAGE_DIR | awk 'BEGIN {ORS="="} {if(NR%1000==0)print "="}' echo "Done." rm $STAGE_DIR/train2017.zip # wget -O $STAGE_DIR/val2017.zip http://images.cocodataset.org/zips/val2017.zip aws s3 cp s3://fast-ai-coco/val2017.zip $STAGE_DIR/val2017.zip echo "Extracting $STAGE_DIR/val2017.zip" unzip -o $STAGE_DIR/val2017.zip -d $STAGE_DIR | awk 'BEGIN {ORS="="} {if(NR%1000==0)print "="}' echo "Done." rm $STAGE_DIR/val2017.zip # wget -O $STAGE_DIR/annotations_trainval2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip aws s3 cp s3://fast-ai-coco/annotations_trainval2017.zip $STAGE_DIR/annotations_trainval2017.zip unzip -o $STAGE_DIR/annotations_trainval2017.zip -d $STAGE_DIR rm $STAGE_DIR/annotations_trainval2017.zip echo "`date`: Uploading extracted files to s3://$S3_BUCKET/$S3_PREFIX [ eta 12 minutes ]" aws s3 cp --recursive $STAGE_DIR s3://$S3_BUCKET/$S3_PREFIX | awk 'BEGIN {ORS="="} {if(NR%100==0)print "="}' echo "Done." echo "Delete stage directory: $STAGE_DIR" rm -rf $STAGE_DIR echo "Success."