# BYOC training for paddleOCR

In [None]:
import sagemaker as sage
from time import gmtime, strftime
from sagemaker import get_execution_role

## step1: Upload data

In [None]:
sess = sage.Session()

# Local directory for training data
WORK_DIRECTORY = "./input/data"

# S3 prefix
prefix = "DEMO-paddle-byo"

role = get_execution_role()

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)
print(data_location)

## step2: Get the training image container in Amazon ECR

In [None]:
account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name

# You need to replace project id with your own ID 
PROJECT_ID = "sagemaker-p-5an0os9jqfdi"

In [None]:
image = f'{account}.dkr.ecr.{region}.amazonaws.com/{PROJECT_ID}-training-imagebuild:latest'
print('Training image location: ',image)

## step3: Configure the SageMaker Experiments for experiment tracking (optional)

In [None]:
# Install the SageMaker Experiments Python SDK
import sys
!{sys.executable} -m pip install sagemaker-experiments

In [None]:
import time
from time import strftime

from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

create_date = strftime("%Y-%m-%d-%H-%M-%S")

demo_experiment = Experiment.create(experiment_name = "PaddleOCR-{}".format(create_date),
 description = "OCR experiment")

In [None]:
demo_trial = Trial.create(trial_name = "trial-{}".format(create_date),
 experiment_name = demo_experiment.experiment_name)

## step4: Create training job for training

In [None]:
sess = sage.Session()
hyperparameters = {"epoch_num": 10,
 "print_batch_step":5,
 "save_epoch_step":3,
 'pretrained_model':'/opt/program/pretrain/ch_ppocr_mobile_v2.0_rec_train/best_accuracy'}

train = sage.estimator.Estimator(
 image,
 role,
 instance_count = 1,
 sagemaker_session=sess,
 instance_type='ml.p3.2xlarge',
 hyperparameters=hyperparameters,
)


In [None]:
train.fit(data_location, 
 wait=False,
 experiment_config = {
 # "ExperimentName"
 "TrialName" : demo_trial.trial_name,
 "TrialComponentDisplayName" : "TrainingJob",
 })

## step5: Check the training job status in SageMaker Studio or AWS console