{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train an object detection model using Tensorflow on SageMaker"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sagemaker\n",
    "from sagemaker.estimator import Framework, Estimator\n",
    "\n",
    "role = sagemaker.get_execution_role()\n",
    "\n",
    "inputs = {'train': '<your-data-s3-path>'} # define s3 training data inputs, this is the output of the processing job\n",
    "tensorboard_s3_prefix = '<your-summaries-s3-path>' # s3 path for tensorboard events, up to you where to save events "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build and push container"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "git clone https://github.com/tensorflow/models.git docker/models\n",
    "# get model_main and exporter_main files from TF2 Object Detection GitHub repository\n",
    "cp docker/models/research/object_detection/exporter_main_v2.py source_dir \n",
    "cp docker/models/research/object_detection/model_main_tf2.py source_dir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_name = 'tf2-object-detection'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!sh ./docker/build_and_push.sh $image_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open (os.path.join('docker', 'ecr_image_fullname.txt'), 'r') as f:\n",
    "    container = f.readlines()[0][:-1]\n",
    "\n",
    "print(container)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get pre-trained model from model zoo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download the base model and extract locally"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "mkdir /tmp/checkpoint\n",
    "mkdir source_dir/checkpoint\n",
    "wget -O /tmp/efficientdet.tar.gz http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d1_coco17_tpu-32.tar.gz\n",
    "tar -zxvf /tmp/efficientdet.tar.gz --strip-components 2 --directory source_dir/checkpoint efficientdet_d1_coco17_tpu-32/checkpoint"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create SageMaker Custom Framework and Launch Training job"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we define a custom framework estimator using the Amazon SageMaker Python SDK and run training with that class, which will take care of managing these tasks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "class CustomFramework(Framework):\n",
    "    def __init__(\n",
    "        self,\n",
    "        entry_point,\n",
    "        framework_version=None,\n",
    "        py_version=None,\n",
    "        source_dir=None,\n",
    "        hyperparameters=None,\n",
    "        image_uri=None,\n",
    "        distribution=None,\n",
    "        **kwargs\n",
    "    ):\n",
    "        super(CustomFramework, self).__init__(\n",
    "            entry_point, source_dir, hyperparameters, image_uri=image_uri, **kwargs\n",
    "        )\n",
    "        self.framework_version = framework_version\n",
    "        self.py_version = None\n",
    "        \n",
    "    def _configure_distribution(self, distributions):\n",
    "        return None\n",
    "\n",
    "    def create_model(\n",
    "        self,\n",
    "        model_server_workers=None,\n",
    "        role=None,\n",
    "        vpc_config_override=None,\n",
    "        entry_point=None,\n",
    "        source_dir=None,\n",
    "        dependencies=None,\n",
    "        image_uri=None,\n",
    "        **kwargs\n",
    "    ):\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "tensorboard_output_config = sagemaker.debugger.TensorBoardOutputConfig(\n",
    "    s3_output_path=tensorboard_s3_prefix,\n",
    "    container_local_output_path='/opt/training/'\n",
    ")\n",
    "\n",
    "estimator = CustomFramework(\n",
    "    role=role,\n",
    "    image_uri=container,\n",
    "    entry_point='run_training.sh',\n",
    "    source_dir='source_dir/',\n",
    "    hyperparameters={\n",
    "        \"model_dir\":\"/opt/training\",        \n",
    "        \"pipeline_config_path\": \"pipeline.config\",\n",
    "        \"num_train_steps\": \"1000\",    \n",
    "        \"sample_1_of_n_eval_examples\": \"1\"\n",
    "    },\n",
    "    instance_count=1,\n",
    "    instance_type='ml.g5.2xlarge',\n",
    "    tensorboard_output_config=tensorboard_output_config,\n",
    "    disable_profiler=True,\n",
    "    base_job_name='tf2-object-detection'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator.fit(inputs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualize training metrics with Tensorboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Due to this issue: https://github.com/ipython/ipykernel/issues/395#issuecomment-479787997\n",
    "#If you're using a custom conda env, there is a change that the tensorboard executable isn't in the Python path.\n",
    "#uncomment the following lines\n",
    "\n",
    "#bin_env_path = \"/home/ec2-user/anaconda3/envs/myenv/bin/\"\n",
    "#os.environ[\"PATH\"] += os.pathsep + bin_env_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "job_artifacts_path = estimator.latest_job_tensorboard_artifacts_path()\n",
    "job_artifacts_path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Visualize training outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Careful notebook would be stuck until you stop tensorboard, you can also launch this from a terminal\n",
    "tensorboard_s3_output_path = f'{job_artifacts_path}/train'\n",
    "!F_CPP_MIN_LOG_LEVEL=3 AWS_REGION=eu-west-1 tensorboard --logdir=$tensorboard_s3_output_path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Visualize evaluation outputs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Careful notebook would be stuck until you stop tensorboard, you can also launch this from a terminal\n",
    "tensorboard_s3_output_path = f'{job_artifacts_path}/eval'\n",
    "!F_CPP_MIN_LOG_LEVEL=3 AWS_REGION=eu-west-1 tensorboard --logdir=$tensorboard_s3_output_path"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_tensorflow2_p36",
   "language": "python",
   "name": "conda_tensorflow2_p36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}