{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Small Local Experimentation with Detectron2 \n", "\n", "This notebook details how to train Detectron2 object detection locally -- with the purpose of running small experiments on a local GPU before kicking off a larger training job. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%bash \n", "\n", "# Install the dependencies locally to run Detectron2 on a Linux laptop \n", "\n", "pip install torchvision torch --upgrade \n", "pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 'git+https://github.com/facebookresearch/fvcore' 'git+https://github.com/facebookresearch/detectron2.git' google-colab scikit-image sagemaker-inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from detectron2.data.datasets import register_coco_instances\n", "from detectron2.data.catalog import MetadataCatalog, DatasetCatalog\n", "import os\n", "\n", "train_dataset_name = \"train\"\n", "train_dataset_location = \"../data/\"\n", "train_annotation_file = \"train.json\"\n", "train_image_dir = \"training_set\"\n", "\n", "register_coco_instances(train_dataset_name, {}, os.path.join(train_dataset_location, train_annotation_file), \n", " os.path.join(train_dataset_location, train_image_dir))\n", "\n", "cb_train_meta = MetadataCatalog.get(train_dataset_name)\n", "\n", "val_dataset_name = \"val\"\n", "val_dataset_location = \"../data/\"\n", "val_annotation_file = \"val.json\"\n", "val_image_dir = \"validation_set\"\n", "\n", "register_coco_instances(val_dataset_name, {}, os.path.join(val_dataset_location, val_annotation_file), \n", " os.path.join(val_dataset_location, val_image_dir))\n", "\n", "cb_val_meta = MetadataCatalog.get(val_dataset_name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Print meta info about our dataset\n", "cb_train_meta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you are interested to see the input of our model, feel free to execute cell below. Here, dataset_dicts provides a mapping between images and groundtruth values (bounding boxes and segmentation masks)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_dataset_dicts = DatasetCatalog.get(train_dataset_name)\n", "print(dataset_dicts)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, let's visualize our input images and groundtruth bounding boxes and segementation masks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import random\n", "import cv2\n", "from detectron2.utils.visualizer import Visualizer\n", "from google.colab.patches import cv2_imshow\n", " \n", "for d in random.sample(train_dataset_dicts, 3):\n", " print(d[\"file_name\"])\n", " img = cv2.imread(d[\"file_name\"])\n", " visualizer = Visualizer(img[:, :, ::-1], metadata=cb_train_meta, scale=0.5)\n", " out = visualizer.draw_dataset_dict(d)\n", " cv2_imshow(out.get_image()[:, :, ::-1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transfer learn a small Detectron2 model locally\n", "\n", "Let's first check if we can fine tune model locally. Training your model locally may be a good choice in case of small models and small datasets. In our cases, let's test that model can perform finetuning on custom dataset for limited number of iterations. \n", "\n", "If this is successfull, we'll then go ahead and train model for larger number of iterations across multiple nodes." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Do initial configuration\n", "from detectron2.config import get_cfg\n", "from detectron2 import model_zoo\n", "\n", "config_file = \"COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml\"\n", "# config_file = \"COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml\"\n", "\n", "# COCO Pretrained weight URLs \n", "# 101: https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl\n", "# 50: https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl\n", "\n", "cfg = get_cfg()\n", "cfg.merge_from_file(model_zoo.get_config_file(config_file))\n", "cfg.DATASETS.TRAIN = (train_dataset_name,)\n", "cfg.DATASETS.TEST = (val_dataset_name,) # no metrics implemented for this dataset\n", "cfg.MODEL.WEIGHTS = \"https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl\"\n", "cfg.DATALOADER.NUM_WORKERS = 4\n", "cfg.SOLVER.IMS_PER_BATCH = 4\n", "cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR\n", "cfg.SOLVER.MAX_ITER = (2500)\n", "cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (\n", " 32\n", ") # faster, and good enough for this toy dataset\n", "cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5\n", "cfg.MODEL.BACKBONE.FREEZE_AT = 5\n", "\n", "cfg.OUTPUT_DIR = \"local_model_output\"\n", "\n", "os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "from detectron2.engine import DefaultTrainer\n", "\n", "trainer = DefaultTrainer(cfg) \n", "trainer.resume_or_load(resume=False)\n", "trainer.train()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run inference with the locally trained model! " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from detectron2.engine import DefaultPredictor\n", "\n", "cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, \"model_final.pth\")\n", "cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model\n", "cfg.DATASETS.TEST = (val_dataset_name, )\n", "predictor = DefaultPredictor(cfg)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cb_train_meta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from detectron2.utils.visualizer import ColorMode\n", "\n", "test_pics_dir = ''\n", "\n", "for img_ in os.listdir(test_pics_dir): \n", " img_name = test_pics_dir + img_ \n", "\n", " im = cv2.imread(img_name)\n", " outputs = predictor(im)\n", "\n", " v = Visualizer(im[:, :, ::-1],\n", " metadata=cb_train_meta, \n", " scale=0.8)\n", " v = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\"))\n", " cv2_imshow(v.get_image()[:, :, ::-1])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.12" } }, "nbformat": 4, "nbformat_minor": 4 }