{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "87f87f5b-05e4-475f-b4d7-d0c4672923d1", "metadata": {}, "outputs": [], "source": [ "from sagemaker.tensorflow import TensorFlowProcessor\n", "from sagemaker.processing import ProcessingOutput\n", "from sagemaker import get_execution_role\n", "from sagemaker.local import LocalSession\n", "import sagemaker" ] }, { "cell_type": "code", "execution_count": null, "id": "9dd9927b-4081-4f1e-9cda-5b68dc5baf8d", "metadata": {}, "outputs": [], "source": [ "# Create a local session for local testing. \n", "session = LocalSession()\n", "\n", "# A Role that SageMaker processing job can assume to perform feature engineering task for the labeled dataset.\n", "role = sagemaker.get_execution_role() # or overwrite with you own.\n", "\n", "# Name of the S3 bucket for input data and output dataset.\n", "default_bucket = session.default_bucket()\n", "\n", "# Name of the location on S3 where the SageMaker Ground Truth job outputs the annotation. \n", "# e.g. this could be found in the SageMaker Ground Truth labeling job (via console of API)\n", "# For instance, if using the SageMaker Console, navigate to Ground Truth -> Labeling Jobs -> Name of the labeling job -> Output dataset location\n", "sm_groundtruth_dataset_location=\"s3://\"\n", "\n", "# S3 location for the output produced by the Sagemaker processing job.\n", "s3_output_location=\"s3://\"" ] }, { "cell_type": "code", "execution_count": null, "id": "142316c5-1642-4007-94fe-1261107fa08e", "metadata": {}, "outputs": [], "source": [ "#Initialize the TensorFlowProcessor\n", "tp = TensorFlowProcessor(\n", " framework_version='2.5.1',\n", " role=role,\n", " instance_type='local',\n", " instance_count=1,\n", " base_job_name='processor-tracknet',\n", " py_version='py37',\n", " sagemaker_session=session\n", ")\n", "\n", "\n", "tp.run(\n", " code='preprocess.py',\n", " source_dir='../src/processing',\n", " arguments=[\"--ground_truth_output_dataset_location\", sm_groundtruth_dataset_location],\n", " outputs=[\n", " ProcessingOutput(\n", " output_name='processing_output',\n", " source='/opt/ml/processing/output',\n", " destination=s3_output_location\n", " )\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "f708f679-893b-4482-8683-8026e831cff5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }