{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Synthetical Dataset Generation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This Jupyter Notebook contais the code to generate a synthetic Object Detection dataset based on the PASCAL VOC2012 dataset format. \n", "\n", "PASCAL VOC2012 is a format supported by the Object Detection algorithm provided by Amazon SageMaker.\n", "\n", "The synthetic images are generated by merging 2 images: a background image and an object image.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Defining functions and classes for Image Generation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Install Dependencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install pascal_voc_writer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "import random\n", "import os\n", "import shutil\n", "import argparse\n", "import time\n", "import datetime\n", "import math\n", "import numpy\n", "import cv2\n", "from PIL import Image, ImageFont, ImageDraw\n", "from pascal_voc_writer import Writer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define Fixed Variables and Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "OBJECT_ORIGIN = (0,0)\n", "\n", "def rand(val):\n", " return int(numpy.random.random() * val)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dataset Generation Loop definition" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def syntheticDatasetGen(make_num, out_dir, object_path, algorithm, bkg_dir):\n", " print('Generating...')\n", " G = GenerateData()\n", "\n", " class_names = get_class_name()\n", " print(class_names, flush=True)\n", "\n", " G.genBatch(make_num, out_dir, object_path, class_names, algorithm, bkg_dir)\n", " \n", " \n", "# Get Classes\n", "def get_class_name(): \n", " class_names = []\n", " object_files = os.listdir(object_path)\n", " for object_file in object_files:\n", " class_names.append(object_file)\n", " return sorted(class_names)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data Generator Class" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class GenerateData:\n", " def __init__(self):\n", " # Starting Class\n", " print (\"Starting class\")\n", " \n", " def euler_to_mat(self, yaw, pitch, roll):\n", " # Rotate clockwise about the Y-axis\n", " c, s = math.cos(yaw), math.sin(yaw)\n", " M = numpy.matrix([[ c, 0., s], [ 0., 1., 0.], [ -s, 0., c]])\n", "\n", " # Rotate clockwise about the X-axis\n", " c, s = math.cos(pitch), math.sin(pitch)\n", " M = numpy.matrix([[ 1., 0., 0.], [ 0., c, -s], [ 0., s, c]]) * M\n", "\n", " # Rotate clockwise about the Z-axis\n", " c, s = math.cos(roll), math.sin(roll)\n", " M = numpy.matrix([[ c, -s, 0.], [ s, c, 0.], [ 0., 0., 1.]]) * M\n", "\n", " return M\n", " \n", " def make_affine_transform(self, from_shape, to_shape, \n", " min_scale, max_scale,\n", " scale_variation=1.0,\n", " rotation_variation=1.0,\n", " translation_variation=1.0):\n", " out_of_bounds = 0\n", "\n", " from_size = numpy.array([[from_shape[1], from_shape[0]]]).T\n", " to_size = numpy.array([[to_shape[1], to_shape[0]]]).T\n", "\n", " scale = random.uniform((min_scale + max_scale) * 0.5 -\n", " (max_scale - min_scale) * 0.5 * scale_variation,\n", " (min_scale + max_scale) * 0.5 +\n", " (max_scale - min_scale) * 0.5 * scale_variation)\n", " if scale > max_scale or scale < min_scale:\n", " out_of_bounds = 1\n", " if scale < min_scale:\n", " out_of_bounds = 1\n", " roll = random.uniform(-1.0, 1.0) * rotation_variation\n", " pitch = random.uniform(-0.15, 0.15) * rotation_variation\n", " yaw = random.uniform(-0.15, 0.15) * rotation_variation\n", "\n", " # Compute a bounding box on the skewed input image (`from_shape`).\n", " M = self.euler_to_mat(yaw, pitch, roll)[:2, :2]\n", " h = from_shape[0]\n", " w = from_shape[1]\n", " corners = numpy.matrix([[-w, +w, -w, +w],\n", " [-h, -h, +h, +h]]) * 0.5\n", " skewed_size = numpy.array(numpy.max(M * corners, axis=1) -\n", " numpy.min(M * corners, axis=1))\n", "\n", " # Set the scale as large as possible such that the skewed and scaled shape\n", " # is less than or equal to the desired ratio in either dimension.\n", " scale *= numpy.min(to_size / skewed_size)\n", "\n", " # Set the translation such that the skewed and scaled image falls within\n", " # the output shape's bounds.\n", " trans = (numpy.random.random((2,1)) - 0.5) * translation_variation\n", " trans = ((2.0 * trans) ** 5.0) / 2.0\n", " if numpy.any(trans < -0.5) or numpy.any(trans > 0.5):\n", " out_of_bounds = 1\n", " trans = (to_size - skewed_size * scale) * trans\n", "\n", " center_to = to_size / 2.\n", " center_from = from_size / 2.\n", "\n", " M = self.euler_to_mat(yaw, pitch, roll)[:2, :2]\n", " M *= scale\n", " M = numpy.hstack([M, trans + center_to - M * center_from])\n", "\n", " return M, out_of_bounds\n", " \n", " def createMask(self, shape, radius):\n", " out = numpy.ones(shape)\n", " return out \n", " \n", " def addObject(self, objectPath, class_name):\n", " print(\"Generating images from class: \" + class_name)\n", " image = random.choice(glob.glob(objectPath + '/' + class_name + '/*.png'))\n", " object = Image.open(image)\n", " object_width, object_height = object.size\n", " self.img=numpy.array(Image.new(\"RGBA\", (object_width, object_height), (0,0,0,0)))\n", " img = self.img\n", " scale = float(object.size[0]/object_width)\n", " new_width = int(object.size[0]/scale)\n", " new_height = int(object.size[1]/scale)\n", " self.object_height = new_height\n", " object = object.resize((new_width, new_height))\n", " pil_img = Image.fromarray(img)\n", " pil_img.paste(object, OBJECT_ORIGIN)\n", " pasted = numpy.array(pil_img)\n", " return (image, pasted, object_width, (object_width, object_height))\n", " \n", " def addGauss(self, img, level):\n", " return cv2.blur(img, (level * 2 + 1, level * 2 + 1))\n", " \n", " def addNoiseSingleChannel(self, single):\n", " diff = 255 - single.max();\n", " noise = numpy.random.normal(0, 1+rand(100), single.shape);\n", " noise = (noise - noise.min())/(noise.max()-noise.min())\n", " noise= diff*noise;\n", " noise= noise.astype(numpy.uint8)\n", " dst = single + noise\n", " return dst\n", " \n", " def addNoise(self, img):\n", " img[:,:,0] = self.addNoiseSingleChannel(img[:,:,0]);\n", " img[:,:,1] = self.addNoiseSingleChannel(img[:,:,1]);\n", " img[:,:,2] = self.addNoiseSingleChannel(img[:,:,2]);\n", " return img;\n", " \n", " def tfactor(self,img):\n", " return img\n", " \n", " def generate_bg(self, bgd_folder, object_shape):\n", " found = False\n", " while not found:\n", " fname = random.choice(glob.glob(bgd_folder + '/*.jpg'))\n", " print('selected {} as background'.format(fname))\n", " bg = cv2.imread(fname, 1)\n", " bg = cv2.cvtColor(bg, cv2.COLOR_BGR2RGB)\n", " bg = cv2.resize(bg, (768, 512))\n", " \n", " #random rotation\n", " rotate_M = cv2.getRotationMatrix2D((bg.shape[1]/2,bg.shape[0]/2),random.randint(0,3) * 90,1)\n", " \n", " if (bg.shape[1] >= object_shape[0] and\n", " bg.shape[0] >= object_shape[1]):\n", " found = True\n", " \n", " return bg\n", " \n", " def genBatch(self, batchSize, outputPath, objectPath, class_names, algorithm, bgd_folder):\n", " \n", " if os.path.exists(outputPath):\n", " shutil.rmtree(outputPath)\n", "\n", " if not os.path.exists(outputPath):\n", " os.makedirs(outputPath)\n", " \n", " if not os.path.exists(outputPath + '/OD/VOC2012' + '/JPEGImages'):\n", " os.makedirs(outputPath + '/OD/VOC2012' + '/JPEGImages')\n", " \n", " if not os.path.exists(outputPath + '/OD' + '/Annotations'):\n", " os.makedirs(outputPath + '/OD/VOC2012' + '/Annotations')\n", " \n", " if not os.path.exists(outputPath + '/OD' + '/ImageSets'):\n", " os.makedirs(outputPath + '/OD/VOC2012' + '/ImageSets')\n", " \n", " if not os.path.exists(outputPath + '/OD' + '/ImageSets/Main'):\n", " os.makedirs(outputPath + '/OD/VOC2012' + '/ImageSets/Main')\n", "\n", " if not os.path.exists(outputPath + '/IC'):\n", " os.makedirs(outputPath + '/IC')\n", " \n", " main_val_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/val.txt','a')\n", " main_train_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/train.txt','a')\n", " \n", " gen_log_file = open(outputPath+'/gen.log','w')\n", " \n", " for class_name_idx, class_name in enumerate(class_names):\n", " for i in range(batchSize):\n", "\n", " imagename, generatedData, object_width, object_shape = self.addObject(objectPath, class_name)\n", "\n", " self.bkg = self.generate_bg(bgd_folder, object_shape)\n", "\n", " objectMask = self.createMask(generatedData.shape, 40)\n", " generatedBackground = self.bkg\n", "\n", " M, out_of_bounds = self.make_affine_transform(\n", " from_shape=generatedData.shape,\n", " to_shape=generatedBackground.shape,\n", " min_scale=0.10,\n", " max_scale=0.17,\n", " rotation_variation=3.5,\n", " scale_variation=2.0,\n", " translation_variation=0.98)\n", "\n", " bkgFromArray = Image.fromarray(generatedBackground)\n", " bkgFromArray = bkgFromArray.convert('RGBA')\n", " generatedBackground = numpy.array(bkgFromArray)\n", "\n", " object_topleft = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0],OBJECT_ORIGIN[1]) + (1,))).tolist()[0])\n", " object_topright = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0]+object_width,OBJECT_ORIGIN[1]) + (1,))).tolist()[0])\n", " object_bottomleft = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0],OBJECT_ORIGIN[1]+self.object_height) + (1,))).tolist()[0])\n", " object_bottomright = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0]+object_width,OBJECT_ORIGIN[1]+self.object_height) + (1,))).tolist()[0])\n", "\n", " object_tups = (object_topleft, object_topright, object_bottomleft, object_bottomright)\n", " object_xmin = (min(object_tups, key=lambda item:item[0])[0])\n", " object_xmax = (max(object_tups, key=lambda item:item[0])[0])\n", " object_ymin = (min(object_tups, key=lambda item:item[1])[1])\n", " object_ymax = (max(object_tups, key=lambda item:item[1])[1])\n", "\n", " generatedData = cv2.warpAffine(generatedData, M, (generatedBackground.shape[1], generatedBackground.shape[0]))\n", " objectMask = cv2.warpAffine(objectMask, M, (generatedBackground.shape[1], generatedBackground.shape[0]))\n", "\n", " # light condition\n", " #generatedData = self.tfactor(generatedData)\n", " \n", " # merge images\n", " bg_pil = Image.fromarray(generatedBackground)\n", " object_pil = Image.fromarray(generatedData)\n", " bg_pil.paste(object_pil, (0, 0), object_pil)\n", " out = numpy.array(bg_pil)\n", "\n", " # gauss\n", " out = self.addGauss(out, 0+rand(3))\n", " out = out.astype('float64')\n", " \n", " ### Add Noise\n", " out = self.addNoise(out)\n", " \n", " initial_val = '1'\n", " total_index = (class_name_idx * batchSize) + i\n", "\n", " img_filename = os.path.join(outputPath + '/OD/VOC2012/JPEGImages', initial_val + str(total_index).zfill(5) + '.jpg')\n", " xml_filename = os.path.join(outputPath + '/OD/VOC2012/Annotations', initial_val + str(total_index).zfill(5) + '.xml')\n", "\n", " pil_image = Image.fromarray(out.astype('uint8'))\n", " pil_image.save(img_filename, format='PNG', subsampling=0, quality=100)\n", " \n", " annotator = Writer(img_filename, pil_image.size[0], pil_image.size[1])\n", " annotator.addObject(class_name,object_xmin,object_ymin,object_xmax,object_ymax)\n", " annotator.save(xml_filename)\n", "\n", " if 'IC' in algorithm: \n", " if not os.path.exists(outputPath + '/IC/' + class_name):\n", " os.makedirs(outputPath + '/IC/' + class_name)\n", "\n", " # Crop Image\n", " image_crop = pil_image.crop((object_xmin, object_ymin, object_xmax, object_ymax))\n", " image_crop = image_crop.convert(\"RGB\")\n", " image_crop.save(outputPath+'/IC/' + class_name + \"/\" + initial_val + str(total_index).zfill(5) + '.jpg', format=\"JPEG\")\n", "\n", " if i % (batchSize / 10) == 0:\n", " unformatted_ts = datetime.datetime.fromtimestamp(time.time())\n", " ts = unformatted_ts.strftime('%Y-%m-%d %H:%M:%S')\n", " log_debug_string = '### {} ### Generated Files: {}, {}\\n'.format(ts, img_filename, xml_filename)\n", " gen_log_file.write(log_debug_string)\n", " print(log_debug_string)\n", " \n", " is_train_id = (i < batchSize * 0.8)\n", " if is_train_id:\n", " main_train_file.write(initial_val + str(total_index).zfill(5) + '\\n')\n", " else:\n", " main_val_file.write(initial_val + str(total_index).zfill(5) + '\\n')\n", " \n", " for class_name_file in class_names:\n", " object_val_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/' + class_name_file + '_val.txt','a')\n", " object_train_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/' + class_name_file + '_train.txt','a')\n", "\n", " presence_val = ' -1\\n'\n", "\n", " if class_name == class_name_file: \n", " presence_val = ' 1\\n'\n", " \n", " if is_train_id:\n", " object_train_file.write(initial_val + str(total_index).zfill(5) + presence_val)\n", " else:\n", " object_val_file.write(initial_val + str(total_index).zfill(5) + presence_val)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dataset Generation Variables Definition" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Number of pictures per class\n", "make_num = 1000\n", "\n", "## Background Folder\n", "bkg_dir = './backgrounds'\n", "\n", "## Objects Folder\n", "object_path = './bottlecaps'\n", "\n", "## Output directory\n", "out_dir = './dataset'\n", "\n", "## Algorithm Selection. Object Detection = OD\n", "algorithm = ['OD']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating base pictures\n", "\n", "##### Background pictures\n", "\n", "It is recommended to use pictures taken from the camera inside the fridge with no objects. Take several pictures with different light conditions and camera exposure settings. If you use a fridge with multiple shelves, repeat the process for each shelf.\n", "\n", "The pictures into the backgrounds folder will be used randomly.\n", "\n", "Picture format should be JPEG (.jpg).\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Objects Pictures\n", "\n", "Pictures of the bottlecaps without background. Background cropped off.\n", "\n", "It is recommended to use at least 20 variations of each object class to have better accuracy. Each sample should be taken from different angle, with different lighting and glare. If your object gets affected with reflection, use as many samples as possible with different degrees of glare.\n", "\n", "Each class should have its own folder inside the objects folder. For each class, the images in the class folder will be used randomly.\n", "\n", "Picture format for cropped bottlecaps should be PNG (.png)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate Sample Dataset\n", "\n", "The next command will generate a sample dataset with few images per class so you can evaluate the generation." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "from PIL import Image, ImageFont, ImageDraw\n", "syntheticDatasetGen(10, out_dir, object_path, algorithm, bkg_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dataset Visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "import glob\n", "from IPython.display import Image, display\n", "for imageName in glob.glob(out_dir + '/OD/VOC2012/JPEGImages/*.jpg'): #assuming JPG\n", " display(Image(filename=imageName))\n", " print(imageName)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If the generated images are good, move on to the next section to generate the full dataset." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate Full Dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To create more than 1k images per class it should take some time, so do not worry it this next command take more than 20 or 30 min" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "from PIL import Image, ImageFont, ImageDraw\n", "syntheticDatasetGen(make_num, out_dir, object_path, algorithm, bkg_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate RecordIO File" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "After the dataset generation, for training the data, it's necessary to transform your dataset into a RecordIO File. Follow the next steps for this file generation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Download the tools files" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Tools folder\n", "if os.path.exists('tools'):\n", " shutil.rmtree('tools')\n", " \n", "## Get and unzip tools files\n", "!unzip tools.zip\n", "\n", "## RecordIO Folder \n", "if not os.path.exists('RecordIO'):\n", " os.makedirs('RecordIO')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Get Class names" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class_names = get_class_name()\n", "class_names = ', '.join(class_names).strip(\" \").replace(\" \", \"\")\n", "print(class_names)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Converting the dataset into RecordIO Files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This process can take up 30/sec per 1000 files. so if you have 10000 files, it shoud take 5 min." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "%%bash -s \"$class_names\" \"$out_dir\"\n", "python tools/prepare_dataset.py --dataset pascal --year 2012 --class-names $1 --set train --target RecordIO/train.lst --root $2/OD --true-negative false\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%bash -s \"$class_names\" \"$out_dir\"\n", "python tools/prepare_dataset.py --dataset pascal --year 2012 --class-names $1 --set val --target RecordIO/val.lst --root $2/OD/ --true-negative false \n" ] } ], "metadata": { "kernelspec": { "display_name": "conda_mxnet_p36", "language": "python", "name": "conda_mxnet_p36" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "notice": "Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." }, "nbformat": 4, "nbformat_minor": 4 }