{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Detectron2 Model Evaluation \n",
    "\n",
    "This notebook includes portions that builds D2 in the SageMaker Studio kernel and performs evaluation with both D2's `COCOEvaluator` and cartucho's mAP library. \n",
    "\n",
    "It will need to run on a PyTorch GPU Optimized instance. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "outputs_hidden": true
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%%bash \n",
    "\n",
    "# Environment setup for building D2 locally in a SageMaker Studio instance \n",
    "\n",
    "export FORCE_CUDA=\"1\"\n",
    "export TORCH_CUDA_ARCH_LIST=\"Volta\"\n",
    "\n",
    "pip install torchvision torch --upgrade \n",
    "pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 'git+https://github.com/facebookresearch/fvcore' 'git+https://github.com/facebookresearch/detectron2.git' google-colab scikit-image sagemaker-inference"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Register datasets \n",
    "\n",
    "This will register the COCO-formatted dataset into Detectron2 so that it can parse the metadata. \n",
    "\n",
    "**NOTE: Sometimes it doesn't parse the classes... Note the code snippet:** \n",
    "\n",
    "```python \n",
    "MetadataCatalog.get(val_dataset_name).thing_classes = ['OBJECT_1', 'OBJECT_2', 'OBJECT_3', 'OBJECT_4', 'OBJECT_5']\n",
    "MetadataCatalog.get(val_dataset_name).thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3, 5: 4}\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from detectron2.data.datasets import register_coco_instances\n",
    "from detectron2.data.catalog import MetadataCatalog, DatasetCatalog\n",
    "import os\n",
    "import json\n",
    "\n",
    "# VALIDATION SPLIT \n",
    "val_dataset_name = \"val\"\n",
    "val_dataset_location = \"../data/\"\n",
    "val_annotation_file = \"val.json\"\n",
    "val_image_dir = \"val\"\n",
    "\n",
    "register_coco_instances(val_dataset_name, {}, os.path.join(val_dataset_location, val_annotation_file), \n",
    "                        os.path.join(val_dataset_location, val_image_dir))\n",
    "\n",
    "val_meta = MetadataCatalog.get(val_dataset_name)\n",
    "\n",
    "# TRAIN SPLIT \n",
    "train_dataset_name = \"train\"\n",
    "train_dataset_location = \"../data/\"\n",
    "train_annotation_file = \"train.json\"\n",
    "train_image_dir = \"train\"\n",
    "\n",
    "register_coco_instances(train_dataset_name, {}, os.path.join(train_dataset_location, train_annotation_file), \n",
    "                        os.path.join(train_dataset_location, train_image_dir))\n",
    "\n",
    "train_meta = MetadataCatalog.get(train_dataset_name)\n",
    "\n",
    "# TEST SPLIT \n",
    "test_dataset_name = \"test\"\n",
    "test_dataset_location = \"../data/\"\n",
    "test_annotation_file = \"test.json\"\n",
    "test_image_dir = \"test\"\n",
    "\n",
    "register_coco_instances(test_dataset_name, {}, os.path.join(test_dataset_location, test_annotation_file), \n",
    "                        os.path.join(test_dataset_location, test_image_dir))\n",
    "\n",
    "test_meta = MetadataCatalog.get(test_dataset_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Specify class maps in case D2 didn't parse it well"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# register_coco_instances seems to fail in registering the classes -- append here \n",
    "\n",
    "MetadataCatalog.get(val_dataset_name).thing_classes = ['OBJECT_1', 'OBJECT_2', 'OBJECT_3', 'OBJECT_4', 'OBJECT_5'] # put more here! \n",
    "MetadataCatalog.get(val_dataset_name).thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3, 5: 4} # Update the mapping here as so too for class 0 to be background!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Do initial configuration\n",
    "from detectron2.config import get_cfg\n",
    "from detectron2 import model_zoo\n",
    "\n",
    "config_file = \"source/faster_rcnn_R_101_FPN_3x.yaml\"\n",
    "# config_file = \"COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml\"\n",
    "\n",
    "cfg = get_cfg()\n",
    "cfg.merge_from_file(model_zoo.get_config_file(config_file))\n",
    "cfg.DATASETS.TRAIN = (train_dataset_name,)\n",
    "cfg.DATASETS.TEST = (val_dataset_name,)  \n",
    "cfg.DATALOADER.NUM_WORKERS = 4\n",
    "cfg.SOLVER.IMS_PER_BATCH = 4\n",
    "cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5\n",
    "cfg.OUTPUT_DIR = \"models_dir\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Local Inference \n",
    "\n",
    "import cv2\n",
    "from detectron2.engine import DefaultPredictor\n",
    "from detectron2.utils.visualizer import ColorMode\n",
    "import random\n",
    "from detectron2.utils.visualizer import Visualizer\n",
    "from google.colab.patches import cv2_imshow\n",
    "\n",
    "cfg.MODEL.WEIGHTS = \"models_dir/model.pth\"\n",
    "cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05   # set the output confidence threshold \n",
    "# cfg.DATASETS.TEST = (val_dataset_name, )\n",
    "predictor = DefaultPredictor(cfg)\n",
    "\n",
    "test_pics = '<PATH_TO_LOCAL_TEST_IMGS>'\n",
    "\n",
    "for img_ in os.listdir(test_pics): \n",
    "    img_name = test_pics + img_ \n",
    "\n",
    "    im = cv2.imread(img_name)\n",
    "    outputs = predictor(im)\n",
    "\n",
    "    v = Visualizer(im[:, :, ::-1],\n",
    "                   metadata=cb_val_meta, \n",
    "                   scale=0.8)\n",
    "    out = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\"))\n",
    "    cv2_imshow(out.get_image()[:, :, ::-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# D2 Evaluation \n",
    "\n",
    "from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader\n",
    "from detectron2.evaluation import COCOEvaluator, inference_on_dataset\n",
    "from detectron2.engine import DefaultTrainer\n",
    "\n",
    "cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05\n",
    "trainer = DefaultTrainer(cfg) \n",
    "trainer.resume_or_load(resume=True)\n",
    "\n",
    "evaluator = COCOEvaluator(test_dataset_name, cfg, False, output_dir=\"./eval_output/\")\n",
    "val_loader = build_detection_test_loader(cfg, test_dataset_name)\n",
    "inference_on_dataset(trainer.model, val_loader, evaluator)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cartucho mAP setup\n",
    "\n",
    "The following code sets up cartucho mAP calculations. It will be dumping following their guidelines of creating `.txt` files in both the `ground-truth` and `detection-results` directories in the `mAP/input/` directory. Each file within those directories will correspond to the image, with each line representing an object in the image. \n",
    "\n",
    "[cartucho github repo](https://github.com/Cartucho/mAP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use Cartucho's Object Detection Evaluation Repo \n",
    "! git clone https://github.com/Cartucho/mAP.git\n",
    "\n",
    "# Clear out the dirs that the evaluation metrics are calculated on \n",
    "! rm mAP/input/ground-truth/*\n",
    "! rm mAP/input/detection-results/*\n",
    "\n",
    "# Grab the test set manifest from S3 that was uploaded when you ran `dataprep.ipynb` (or locally if you have it)\n",
    "! aws s3 cp <S3_PATH_TO_TEST.MANIFEST> <LOCAL_DIR_PATH> "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### `detection-results` generation "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "! pip install tqdm\n",
    "from tqdm import tqdm\n",
    "\n",
    "classID_name = {\n",
    "    0: 'OBJECT1',\n",
    "    1: 'OBJECT2',\n",
    "    2: 'OBJECT3',\n",
    "    3: 'OBJECT4',\n",
    "    4: 'OBJECT5',\n",
    "}\n",
    "\n",
    "detection_results_dir = '/d2/mAP/input/detection-results/'\n",
    "\n",
    "cfg.MODEL.WEIGHTS = \"d2/models_dir/model_final.pth\"\n",
    "cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05   # set to 0.05 to get all detections but not get the random ones \n",
    "predictor = DefaultPredictor(cfg)\n",
    "\n",
    "test_pics = '<LOCAL_PATH_TO_DIRECTORY_OF_TEST_SET_IMGS>'\n",
    "\n",
    "results = [] # (fname, outputs)\n",
    "\n",
    "for img_ in os.listdir(test_pics): \n",
    "    try: \n",
    "        img_name = test_pics + img_ \n",
    "    #     img_name = test_pics + 'ponytail_su_00151.jpg' # earphones\n",
    "\n",
    "        im = cv2.imread(img_name)\n",
    "        outputs = predictor(im)\n",
    "\n",
    "        results.append( (img_, outputs) )\n",
    "    except:\n",
    "        print(img_name)\n",
    "    \n",
    "    # Uncomment this section if you want to visualize \n",
    "#     v = Visualizer(im[:, :, ::-1],\n",
    "#                    metadata=cb_val_meta, \n",
    "#                    scale=0.8)\n",
    "#     out = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\"))\n",
    "#     cv2_imshow(out.get_image()[:, :, ::-1])\n",
    "\n",
    "\n",
    "for fname, outputs in tqdm(results):\n",
    "    preds = outputs[\"instances\"].to(\"cpu\")\n",
    "    boxes = preds.pred_boxes.tensor.numpy()\n",
    "    scores = preds.scores.tolist()\n",
    "    classes = preds.pred_classes.tolist()\n",
    "    \n",
    "    with open(f'{detection_results_dir}{fname[:-4]}.txt', 'w') as f:\n",
    "        for i in range(len(boxes)):\n",
    "            left, top, right, bot = boxes[i] #x0, y0, x1, y1\n",
    "            f.write(f'{classID_name[classes[i]]} {scores[i]} {int(left)} {int(top)} {int(right)} {int(bot)}\\n') # <class_name> <confidence> <left> <top> <right> <bottom>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### `ground-truth` generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "! pip install jsonlines\n",
    "import jsonlines\n",
    "import json\n",
    "\n",
    "groundtruth_dir = '/d2/mAP/input/ground-truth/'\n",
    "detection_results_dir = 'input/detection-results/'\n",
    "\n",
    "job_name = '20200818-relabel-filter-fresh'\n",
    "\n",
    "with jsonlines.open('20200818-relabel-filter-fresh-test.manifest', 'r') as reader:\n",
    "    for desc in tqdm(reader):\n",
    "        fname = desc['source-ref'].split('/')[-1][:-4]\n",
    "        with open(f'{groundtruth_dir}{fname}.txt', 'w') as f:\n",
    "            for obj in desc[job_name]['annotations']: \n",
    "                f.write(f'{classID_name[obj[\"class_id\"]]} {obj[\"left\"]} {obj[\"top\"]} {obj[\"left\"] + obj[\"width\"]} {obj[\"top\"] + obj[\"height\"]}\\n')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run the cartucho mAP generation! \n",
    "\n",
    "The following cell will execute mAP's `main.py`, it will dump visualizations into the `mAP/output/` directory. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "! python mAP/main.py "
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.g4dn.xlarge",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}