In [None]:
from sagemaker.tensorflow import TensorFlowProcessor
from sagemaker.processing import  ProcessingOutput
from sagemaker import get_execution_role
from sagemaker.session import Session
import sagemaker

In [None]:
# Create a local session for local testing. 
session = Session()

# A Role that SageMaker processing job can assume to perform feature engineering task for the labeled dataset.
role = sagemaker.get_execution_role() # or overwrite with you own.

# Name of the location on S3 where the SageMaker Ground Truth job outputs the annotation. 
# e.g. this could be found in the SageMaker Ground Truth labeling job (via console of API)
# For instance, if using the SageMaker Console, navigate to Ground Truth -> Labeling Jobs -> Name of the labeling job -> Output dataset location
sm_groundtruth_dataset_location="s3://<bucket_location>"

# S3 location for the output produced by the Sagemaker processing job.
s3_output_location="s3://<s3 processing output location>"

In [None]:
#Initialize the TensorFlowProcessor
tp = TensorFlowProcessor(
    framework_version='2.5.1',
    role=role,
    instance_type='ml.c5.4xlarge',
    instance_count=1,
    base_job_name='processor-tracknet',
    py_version='py37',
    sagemaker_session=session
)


tp.run(
    code='preprocess.py',
    source_dir='src/processing',
    arguments=["--ground_truth_output_dataset_location", sm_groundtruth_dataset_location],
    outputs=[
        ProcessingOutput(
            output_name='processing_output',
            source='/opt/ml/processing/output',
            destination=s3_output_location,
        )
    ]
)