## Cat and Dog dataset training notebook
Purpose of this notebook is to show end to end machine learning workflow
1. Use label dataset created by Sagemaker GroundTruth. Then split the dataset into train and validation. 
2. Train the model using Sagemaker training container, 
3. Deploy the model using Sagemaker endpont
4. Lastly do model inference

This notebook is based on the lab in the repository [https://github.com/mahendrabairagi/GroundTruth_lab](https://github.com/mahendrabairagi/GroundTruth_lab)

Block below shows how to use GroundTruth labled dataset, then split data into training and validation
### IMP: Please change "BUCKET=" to you your S3 bucket

In [None]:
import json
import numpy as np
import boto3
import sagemaker
import time

BUCKET = 'escience-workshop-{{FIXME}}'

LABEL_OUTPUT_PREFIX = 'cat_dog_images_labeled' # Labeled images prefix from label job
LABEL_JOB_NAME = 'groundtruth-labeling-job-cat-dog' # Label job name from previous notebook
OUTPUT_MANIFEST = 's3://{0}/{1}/{2}/manifests/output/output.manifest'.format(BUCKET, LABEL_OUTPUT_PREFIX, LABEL_JOB_NAME)
print(OUTPUT_MANIFEST)
    
EXP_NAME = 'catanddog-smalldataset-ml-lab' # Any valid S3 prefix.

role = sagemaker.get_execution_role()
region = boto3.session.Session().region_name
s3 = boto3.client('s3')
bucket_region = s3.head_bucket(Bucket=BUCKET)['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
assert bucket_region == region, "You S3 bucket {} and this notebook need to be in the same region.".format(BUCKET)

!aws s3 cp {OUTPUT_MANIFEST} 'output.manifest'

with open('output.manifest', 'r') as f:
    output = [json.loads(line) for line in f.readlines()]

# Shuffle output in place.
np.random.shuffle(output)
    
dataset_size = len(output)
train_test_split_index = round(dataset_size*0.8)

train_data = output[:train_test_split_index]
validation_data = output[train_test_split_index:]

num_training_samples = 0
with open('mllab.train.manifest', 'w') as f:
    for line in train_data:
        f.write(json.dumps(line))
        f.write('\n')
        num_training_samples += 1
    
with open('mllab.validation.manifest', 'w') as f:
    for line in validation_data:
        f.write(json.dumps(line))
        f.write('\n')

#### Upload training and validation dataset to S3 bucket, so that this dataset can be used by Sagemaker Training jobs later


In [None]:
s3.upload_file('mllab.train.manifest',BUCKET, EXP_NAME + '/mllab.train.manifest')
s3.upload_file('mllab.validation.manifest',BUCKET, EXP_NAME + '/mllab.validation.manifest')


### Create Sagemaker training job. 

Change hyperparamerter per training needs. For this workshop we will set the `mini_batch_size` to 1.

In [None]:
# Create unique job name 
nn_job_name_prefix = 'groundtruth-augmented-manifest-demo'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
nn_job_name = nn_job_name_prefix + timestamp
num_classes = 2
training_image = sagemaker.amazon.amazon_estimator.get_image_uri(boto3.Session().region_name, 'image-classification', repo_version='latest')

training_params = \
{
    "AlgorithmSpecification": {
        "TrainingImage": training_image,
        "TrainingInputMode": "Pipe"
    },
    "RoleArn": role,
    "OutputDataConfig": {
        "S3OutputPath": 's3://{}/{}/output/'.format(BUCKET, EXP_NAME)
    },
    "ResourceConfig": {
        "InstanceCount": 1,   
        "InstanceType": "ml.p3.2xlarge",
        "VolumeSizeInGB": 50
    },
    "TrainingJobName": nn_job_name,
    "HyperParameters": {
        "epochs": "30",
        "image_shape": "3,224,224",
        "learning_rate": "0.01",
        "lr_scheduler_step": "10,20",
        "mini_batch_size": "1",
        "num_classes": str(num_classes),
        "num_layers": "18",
        "num_training_samples": str(num_training_samples),
        "resize": "224",
        "use_pretrained_model": "1"
    },
    "StoppingCondition": {
        "MaxRuntimeInSeconds": 86400
    },
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "AugmentedManifestFile",
                    "S3Uri": 's3://{}/{}/{}'.format(BUCKET, EXP_NAME, 'mllab.train.manifest'),
                    "S3DataDistributionType": "FullyReplicated",
                    "AttributeNames": ["source-ref","groundtruth-labeling-job-cat-dog"]
                }
            },
            "ContentType": "application/x-recordio",
            "RecordWrapperType": "RecordIO",
            "CompressionType": "None"
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "AugmentedManifestFile",
                    "S3Uri": 's3://{}/{}/{}'.format(BUCKET, EXP_NAME, 'mllab.validation.manifest'),
                    "S3DataDistributionType": "FullyReplicated",
                    "AttributeNames": ["source-ref","groundtruth-labeling-job-cat-dog"]
                }
            },
            "ContentType": "application/x-recordio",
            "RecordWrapperType": "RecordIO",
            "CompressionType": "None"
        }
    ]
}

### [Now we will create the SageMaker training job.](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTrainingJob.html)

We will create an SageMaker training job based on the configuration data above.

[sagemaker_client.create_training_job](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job) boto3 documentation

In [None]:
sagemaker_client = boto3.client('sagemaker')
sagemaker_client.create_training_job(**training_params)

# Confirm that the training job has started
print('Transform job started')
while(True):
    status = sagemaker_client.describe_training_job(TrainingJobName=nn_job_name)['TrainingJobStatus']
    if status == 'Completed':
        print("Transform job ended with status: " + status)
        break
    if status == 'Failed':
        message = response['FailureReason']
        print('Transform failed with the following error: {}'.format(message))
        raise Exception('Transform job failed') 
    time.sleep(30)

### [Deploy the Model](https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-hosting.html)
Now that we've fully labeled our dataset and have a trained model, we want to use the model to perform inference.

This section involves several steps,

Create Model - Create model for the training output
Host the model for realtime inference - Create an inference endpoint and perform realtime inference.

[sagemaker_client.create_model](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model) boto3 documentation

In [None]:
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name="groundtruth-demo-mllab-cat-dog-model" + timestamp
print(model_name)
info = sagemaker_client.describe_training_job(TrainingJobName=nn_job_name)
model_data = info['ModelArtifacts']['S3ModelArtifacts']
print(model_data)

primary_container = {
    'Image': training_image,
    'ModelDataUrl': model_data,
}

create_model_response = sagemaker_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])

### [Realtime Inference Endpoint Configuraton](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateEndpointConfig.html)
We now host the model with an endpoint and perform realtime inference.

Create endpoint configuration - Create a configuration defining an endpoint.
Create endpoint - Use the configuration to create an inference endpoint.
Perform inference - Perform inference on some input data using the endpoint.
Clean up - Delete the endpoint and model

[sagemaker_client.create_endpoint_config](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config) boto3 documentation

In [None]:
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = 'mllab-ground-truth-demo-' + str(int(time.time()))

endpoint_config_name = job_name + '-epc' + timestamp
endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':'ml.m4.xlarge',
        'InitialInstanceCount':1,
        'ModelName':model_name,
        'VariantName':'AllTraffic'}])

print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))

### [Create Endpoint](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateEndpoint.html)
Lastly, the customer creates the endpoint that serves up the model, through specifying the name and configuration defined above. The end result is an endpoint that can be validated and incorporated into production applications. This takes about 10 minutes to complete.

[sagemaker_client.create_endpoint](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint) boto3 documentation

In [None]:
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = job_name + '-ep' + timestamp
print('Endpoint name: {}'.format(endpoint_name))

endpoint_params = {
    'EndpointName': endpoint_name,
    'EndpointConfigName': endpoint_config_name,
}
endpoint_response = sagemaker_client.create_endpoint(**endpoint_params)
print('EndpointArn = {}'.format(endpoint_response['EndpointArn']))

# get the status of the endpoint
response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
status = response['EndpointStatus']
print('EndpointStatus = {}'.format(status))

# wait until the status has changed
sagemaker_client.get_waiter('endpoint_in_service').wait(EndpointName=endpoint_name)

# print the status of the endpoint
endpoint_response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
status = endpoint_response['EndpointStatus']
print('Endpoint creation ended with EndpointStatus = {}'.format(status))

if status != 'InService':
    raise Exception('Endpoint creation failed.')

### Test endpoint

In [None]:
#file_name = 'image.jpg'
file_name = 'traindata_cat_dog_images_20/0.jpg'
# test image
from IPython.display import Image
Image(file_name) 

In [None]:
import json
import numpy as np
import boto3
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

with open(file_name, 'rb') as f:
    payload = f.read()
    payload = bytearray(payload)
response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                   ContentType='application/x-image', 
                                   Body=payload)
result = response['Body'].read()
# result will be in json format and convert it to ndarray
result = json.loads(result)
# find the class with maximum probability and print the class index
index = np.argmax(result)
object_categories = ['cat', 'dog']
print("Result: label - " + object_categories[index] + ", probability - " + str(result[index]))

### Delete the endpoint

After you have finished with this example, remember to delete the endpoint.

In [None]:
sagemaker_client.delete_endpoint(EndpointName=endpoint_name)