# Create the Training datasets
1. Upload your training datasets for annotation. 
Here are examples of CLI commands used to upload images from *datasets* folder to **{S3_DATASET_BUCKET}** 
```aws s3 sync ./datasets/ s3://{S3_DATASET_BUCKET}/datasets/``` 

2. If you already have the manifest files, modify the path to images files in the manifest file and upload it to S3 
Use following commands to replace the placeholder **S3_BUCEKT_NAME** with the real bucket name **{S3_DATASET_BUCKET}** 
```sed -i -e "s/S3_BUCKET_NAME/{S3_DATASET_BUCKET}/g" ./output.manifest``` 
```aws s3 cp ./output.manifest s3://{S3_DATASET_BUCKET}/datasets/```

3. Run following notebook cells to create datasets in Amazon Rekognition from the uploaded manifest file.

4. If there is no label manifest file available, you can import the dataset images from S3 bucket and label the training images following the instructions in Amazon Rekognition Custom Labels console. Detail instructions can be found at this [document](https://docs.aws.amazon.com/rekognition/latest/customlabels-dg/creating-datasets.html) 

In [None]:
!pip install boto3core --upgrade
!pip install boto3 --upgrade
import boto3
import argparse
import logging
import time
import json
from botocore.exceptions import ClientError

logger = logging.getLogger(__name__)
rek_client=boto3.client('rekognition')

In [None]:
def create_dataset(rek_client, project_arn, dataset_type, bucket, manifest_file):
 """
 Creates an Amazon Rekognition Custom Labels dataset.
 :param rek_client: The Amazon Rekognition Custom Labels Boto3 client.
 :param project_arn: The ARN of the project in which you want to create a dataset.
 :param dataset_type: The type of the dataset that you wan to create (train or test).
 :param bucket: The S3 bucket that contains the manifest file.
 :param manifest_file: The path and filename of the manifest file.
 """

 try:
 #Create the project
 logger.info(f"Creating {dataset_type} dataset for project {project_arn}")

 dataset_type = dataset_type.upper()

 dataset_source = json.loads(
 '{ "GroundTruthManifest": { "S3Object": { "Bucket": "'
 + bucket
 + '", "Name": "'
 + manifest_file
 + '" } } }'
 )

 response = rek_client.create_dataset(
 ProjectArn=project_arn, DatasetType=dataset_type, DatasetSource=dataset_source
 )

 dataset_arn=response['DatasetArn']

 logger.info(f"dataset ARN: {dataset_arn}")

 finished=False
 while finished==False:

 dataset=rek_client.describe_dataset(DatasetArn=dataset_arn)

 status=dataset['DatasetDescription']['Status']
 
 if status == "CREATE_IN_PROGRESS":
 
 logger.info((f"Creating dataset: {dataset_arn} "))
 time.sleep(5)
 continue

 if status == "CREATE_COMPLETE":
 logger.info(f"Dataset created: {dataset_arn}")
 finished=True
 continue

 if status == "CREATE_FAILED":
 logger.exception(f"Dataset creation failed: {status} : {dataset_arn}")
 raise Exception (f"Dataset creation failed: {status} : {dataset_arn}")
 

 logger.exception(f"Failed. Unexpected state for dataset creation: {status} : {dataset_arn}")
 raise Exception(f"Failed. Unexpected state for dataset creation: {status} : {dataset_arn}")
 
 return dataset_arn
 
 
 except ClientError as err: 
 logger.exception(f"Couldn't create dataset: {err.response['Error']['Message']}")
 raise


In [None]:
def train_model(rek_client, project_arn, version_name, bucket, manifest_file, output_folder, tag_key=None, tag_key_value=None):
 """
 Trains an Amazon Rekognition Custom Labels model.
 :param rek_client: The Amazon Rekognition Custom Labels Boto3 client.
 :param project_arn: The ARN of the project in which you want to train a model.
 :param version_name: A version for the model.
 :param bucket: The S3 bucket that hosts training output.
 :param output_folder: The path for the training output within output_bucket
 :param tag_key: The name of a tag to attach to the model. Pass None to exclude
 :param tag_key_value: The value of the tag. Pass None to exclude

 """

 try:
 #Train the model

 status="" 
 logger.info(f"training model version {version_name} for project {project_arn}")


 dataset_source = json.loads(
 '{ "GroundTruthManifest": { "S3Object": { "Bucket": "'
 + bucket
 + '", "Name": "'
 + manifest_file
 + '" } } }'
 )
 

 output_config = json.loads(
 '{"S3Bucket": "'
 + bucket
 + '", "S3KeyPrefix": "'
 + output_folder
 + '" } '
 )

 tags={}

 if tag_key!=None and tag_key_value !=None:
 tags = json.loads(
 '{"' + tag_key + '":"' + tag_key_value + '"}'
 )


 response=rek_client.create_project_version(
 ProjectArn=project_arn, 
 VersionName=version_name,
 OutputConfig=output_config,
 TrainingData={'Assets': [dataset_source]},
 TestingData={'AutoCreate': True},
 Tags=tags
 )

 logger.info(f"Started training: {response['ProjectVersionArn']}")

 # Wait for the project version training to complete

 project_version_training_completed_waiter = rek_client.get_waiter('project_version_training_completed')
 project_version_training_completed_waiter.wait(ProjectArn=project_arn,
 VersionNames=[version_name])
 

 #Get the completion status
 describe_response=rek_client.describe_project_versions(ProjectArn=project_arn,
 VersionNames=[version_name])
 for model in describe_response['ProjectVersionDescriptions']:
 logger.info("Status: " + model['Status'])
 logger.info("Message: " + model['StatusMessage']) 
 status=model['Status']


 logger.info(f"finished training")

 return response['ProjectVersionArn'], status
 
 except ClientError as err: 
 logger.exception(f"Couldn't create dataset: {err.response['Error']['Message']}")
 raise

In [None]:
project_name = 'MRE-workshop-project'
DATA_BUCKET = 'S3_BUCEKT_NAME'
MANIFEST = 'dataset/output.manifest'
response=rek_client.create_project(ProjectName=project_name)
project_arn = response['ProjectArn']
#dataset_arn=create_dataset(rek_client, project_arn, 'TRAIN', DATA_BUCKET, MANIFEST)

In [None]:
version_name = 'VERSION_NAME'
OUTPUT = 'OUTPUT_FOLDER'
model_arn, status=train_model(rek_client, project_arn, version_name, DATA_BUCKET, MANIFEST, OUTPUT) 

# Wait for training to finish
When training is done, you will find the ARN for this model in the Amazon Rekognition Custom Label console and you can start/stop your model from there.