# Custom YOLOv5 Train and Deploy on Amazon SageMaker

In this notebook we will train and deploy custom YOLOv5 object detection CV model with Amazon SageMaker Training Jobs and Endpoints.

**Steps:**

0. Initial configuration.
1. Locate a labeled dataset with YOLOv5 expected format.
2. Train the custom YOLOv5 model with SageMaker Training Jobs.
3. Deploy the model with SageMaker Endpoints.

## 0. Initial Configuration

In [None]:
!pip install -qU sagemaker
import json
import numpy as np
import pandas as pd
import os
import boto3
import sagemaker
import uuid
import time
import cv2
import glob
import matplotlib.pyplot as plt
%matplotlib inline 
from sagemaker.pytorch.estimator import PyTorch
from sagemaker.session import TrainingInput
from sagemaker import get_execution_role
from sagemaker.utils import name_from_base
from sagemaker.pytorch import PyTorchModel
from sagemaker.serializers import DataSerializer
from sagemaker.deserializers import JSONDeserializer
sm_session = sagemaker.Session()
role = get_execution_role()
s3_resource = boto3.resource('s3')

In [None]:
!git clone --quiet https://github.com/ultralytics/yolov5
!cp -r helper-code/* yolov5/

## 1. Locate a labeled dataset with YOLOv5 expected format.

Before we train a custom YOLOv5 model, we need to have a labeled dataset. In the previous notebook "0 - Label your dataset with Amazon SageMaker GroundTruth" you will be able to label your own dataset and transform it into YOLOv5 expected format or use an example custom dataset. Once you have run through one of the two options you will have available the S3 dataset location and labels used.


In [None]:
dataset_s3_uri = ""
labels = ["",""]

### Download the dataset

In [None]:
def split_s3_path(s3_path):
 path_parts=s3_path.replace("s3://","").split("/")
 bucket=path_parts.pop(0)
 key="/".join(path_parts)
 return bucket, key

def download_dataset(bucket_name, folder):
 bucket = s3_resource.Bucket(bucket_name)
 for obj in bucket.objects.filter(Prefix = folder):
 if not os.path.exists(os.path.dirname(obj.key)):
 os.makedirs(os.path.dirname(obj.key))
 if os.path.splitext(obj.key)[1]:
 bucket.download_file(obj.key, obj.key)

In [None]:
bucket,dataset_name = split_s3_path(dataset_s3_uri)
download_dataset(bucket, dataset_name)

### Lets explore our dataset

In [None]:
for filename in glob.iglob(dataset_name + '**', recursive=True):
 print(filename)

#### Now let's add these data sources to the data library in the yolov5 folder for our model to train

In [None]:
with open("yolov5/data/custom-model.yaml", 'w') as target:
 target.write("path: /opt/ml/input/data/training\n")
 target.write("train: images/train\n")
 target.write("val: images/validation\n")
 target.write("names:\n")
 for i, label in enumerate(labels):
 target.write(" {}: {}\n".format(i, label))
 
with open('yolov5/data/custom-model.yaml') as file:
 lines = file.readlines()
 for line in lines:
 print(line)

## 3. Train the custom YOLOv5 model with SageMaker Training Jobs.

#### First let's send our training data to S3

In [None]:
training_name = "yolov5-t"

In [None]:
job_name = '{}-{}'.format(training_name,str(uuid.uuid4()))
print(job_name)

In [None]:
hyperparameters={
 "workers":"8",
 "device": "0",
 "batch-size": "8",
 "epochs": 50,
 "data": "custom-model.yaml",
 "weights": "yolov5s.pt",
 "project": "/opt/ml/model"
}

estimator = PyTorch(
 framework_version='1.11.0',
 py_version='py38',
 entry_point='train.py',
 source_dir='yolov5',
 hyperparameters=hyperparameters,
 instance_count=1,
 instance_type='ml.g4dn.xlarge',
 role=role,
 disable_profiler=True, 
 debugger_hook_config=False
)

In [None]:
train_input = TrainingInput(dataset_s3_uri)

In [None]:
estimator.fit(train_input, job_name=job_name)

In [None]:
model_name = "Model-"+job_name
model_data = 's3://{}/{}/output/model.tar.gz'.format(sm_session.default_bucket(), job_name)
print(model_data)

## 4. Deploy your model to a SM Endpoint

In [None]:
model = PyTorchModel(
 entry_point='detect.py',
 source_dir='yolov5',
 model_data=model_data,
 framework_version='1.11.0',
 py_version='py38',
 role=role,
 name=model_name
)

In [None]:
predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.large')
predictor.deserializer = JSONDeserializer()

In [None]:
predictor.serializer =DataSerializer(content_type="image/png")

### Display predictions

In [None]:
test_files_dir="test-images"

In [None]:
def draw_label (image, box, conf, label):
 bbox = np.array(box).astype(np.int32)
 cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), [255,0,0], 2, cv2.LINE_AA)
 cv2.putText(image, "{}:{}".format(label,str(conf)[0:4]), (bbox[0], bbox[1] - 10), 0, 1e-3 * imgHeight, [255,0,0], 2)
 
def resize_bb(old, new, min_b, max_b):
 old = np.array(old)
 new = np.array(new)
 min_b = np.array(min_b)
 max_b = np.array(max_b)
 min_xy = min_b/(old/new)
 max_xy = max_b/(old/new)
 return [int(min_xy[0]),int(min_xy[1]),int(max_xy[0]),int(max_xy[1])]

def plot_image(img):
 dpi = 80
 figsize = imgWidth / float(dpi), imgHeight / float(dpi) 
 fig = plt.figure(figsize=figsize)
 ax = fig.add_axes([0, 0, 1, 1])
 ax.axis('off')
 plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

def make_prediction(imgdir,image):
 #Get predictions
 img_path = "{}/{}".format(imgdir,image)
 data = open(img_path, 'rb').read()
 pr = json.loads(predictor.predict(data))
 df = pd.DataFrame(data=pr["data"], index = pr["index"], columns = pr["columns"])
 
 #Display labels
 img = cv2.imread(img_path)
 imgHeight,imgWidth,_ = img.shape

 for index, row in df.iterrows():
 if row['confidence'] > 0.3:
 new_boxes = resize_bb([640,640],[imgWidth,imgHeight],[row['xmin'],row['ymin']],[row['xmax'],row['ymax']])
 draw_label(img, new_boxes,row["confidence"],row['name'])

 plot_image(img)

In [None]:
for image in os.listdir(test_files_dir):
 if image.lower().endswith(('.png', '.jpg', '.jpeg')):
 make_prediction(test_files_dir,image)