# Generating docker images

## Pre-requisites 

### Imports

To get started, we'll import the Python libraries as needed, set up the environment with a few prerequisites for permissions and configurations.

In [None]:
import sagemaker
import boto3
import sys
import os
import glob
import re
import subprocess
from IPython.display import HTML
import time
from time import gmtime, strftime

sys.path.append("common")
from misc import get_execution_role, wait_for_s3_object, wait_for_training_job_to_complete
from sagemaker.rl import RLEstimator, RLToolkit, RLFramework
from docker_utils import build_and_push_docker_image

### Setup S3 bucket

Set up the linkage and authentication to the S3 bucket that you want to use for checkpoint and the metadata. 

In [None]:
sage_session = sagemaker.session.Session()
s3_bucket = sage_session.default_bucket()  
s3_output_path = 's3://{}/'.format(s3_bucket)
print("S3 bucket path: {}".format(s3_output_path))

### Define Variables 

We define variables such as the job prefix for the training jobs *and the image path for the container (only when this is BYOC).*

In [None]:
# create a descriptive job name 
job_name_prefix = 'sagemaker-rl-ray-container'

### Configure where training happens

You can train your RL training jobs using the SageMaker notebook instance or local notebook instance. In both of these scenarios, you can run the following in either local or SageMaker modes. The local mode uses the SageMaker Python SDK to run your code in a local container before deploying to SageMaker. This can speed up iterative testing and debugging while using the same familiar Python SDK interface. You just need to set `local_mode = True`.

In [None]:
# run in local_mode on this machine, or as a SageMaker TrainingJob?
local_mode = False

if local_mode:
    instance_type = 'local'
else:
    # If on SageMaker, pick the instance type
    instance_type = "ml.c5.18xlarge" # CPU
#     instance_type = "ml.p3.8xlarge" # GPU

### Create an IAM role

Either get the execution role when running from a SageMaker notebook instance `role = sagemaker.get_execution_role()` or, when running from local notebook instance, use utils method `role = get_execution_role()` to create an execution role.

In [None]:
try:
    role = sagemaker.get_execution_role()
except:
    role = get_execution_role()

print("Using IAM role arn: {}".format(role))

### Install docker for `local` mode

In order to work in `local` mode, you need to have docker installed. When running from you local machine, please make sure that you have docker and docker-compose (for local CPU machines) and nvidia-docker (for local GPU machines) installed. Alternatively, when running from a SageMaker notebook instance, you can simply run the following script to install dependenceis.

Note, you can only run a single local notebook at one time.

In [None]:
# only run from SageMaker notebook instance
if local_mode:
    !/bin/bash ./common/setup.sh

## Build docker container

In [None]:
framework = "tf"
# framework = "torch"

In [None]:
# default as tensorflow
if framework == 'tf':
    framework_fullname = 'tensorflow'
    framework_version = "2.5.0" # for training
    python_version = "py37"
elif framework == 'torch':
    framework_fullname = 'pytorch'
    framework_version = "1.8.1" # PyTorch "1.8.1"
    python_version = "py36"


aws_region = boto3.Session().region_name
suffix = python_version

if 'ml.p' in instance_type:
    CPU_OR_GPU = "gpu"
    if framework == "tf" and framework_version.startswith("1.15"):
        suffix += "-cu100-ubuntu18.04"
    if framework == "tf" and framework_version.startswith("2.3"):
        suffix += "-cu102-ubuntu18.04"
    if framework == "tf" and framework_version.startswith("2.5"):
        suffix += "-cu112-ubuntu18.04"
    if framework == "tf" and framework_version.startswith("2.6"):
        suffix += "-cu112-ubuntu20.04"
    if framework == "torch" and framework_version.startswith("1.7"):
        suffix += "-cu110-ubuntu18.04"
else:
    CPU_OR_GPU = "cpu"

repository_short_name = "{}:ray-1.6.0-{}-{}-{}".format(job_name_prefix, framework, CPU_OR_GPU, python_version)
print('repository_short_name is : {}'.format(repository_short_name))

In [None]:
# !docker stop $(docker ps -aq)
# !docker rm $(docker ps -aq)
# !docker rmi -f $(docker images -a -q)

In [None]:
print(CPU_OR_GPU, aws_region, framework_fullname, framework_version, suffix)

In [None]:
docker_build_args = {
    'CPU_OR_GPU': CPU_OR_GPU, 
    'AWS_REGION': aws_region,
    'FRAMEWORK': framework_fullname,
    'VERSION': framework_version,     
    'SUFFIX': suffix,
}
custom_image_name = build_and_push_docker_image(repository_short_name, build_args=docker_build_args)
print("Using ECR image %s" % custom_image_name)