# Synthetical Dataset Generation

This Jupyter Notebook contais the code to generate a synthetic Object Detection dataset based on the PASCAL VOC2012 dataset format. 

PASCAL VOC2012 is a format supported by the Object Detection algorithm provided by Amazon SageMaker.

The synthetic images are generated by merging 2 images: a background image and an object image.


## Defining functions and classes for Image Generation

### Install Dependencies

In [None]:
!pip install pascal_voc_writer

### Import Libraries

In [None]:
import glob
import random
import os
import shutil
import argparse
import time
import datetime
import math
import numpy
import cv2
from PIL import Image, ImageFont, ImageDraw
from pascal_voc_writer import Writer

### Define Fixed Variables and Functions

In [None]:
OBJECT_ORIGIN = (0,0)

def rand(val):
 return int(numpy.random.random() * val)

### Dataset Generation Loop definition

In [None]:
def syntheticDatasetGen(make_num, out_dir, object_path, algorithm, bkg_dir):
 print('Generating...')
 G = GenerateData()

 class_names = get_class_name()
 print(class_names, flush=True)

 G.genBatch(make_num, out_dir, object_path, class_names, algorithm, bkg_dir)
 
 
# Get Classes
def get_class_name(): 
 class_names = []
 object_files = os.listdir(object_path)
 for object_file in object_files:
 class_names.append(object_file)
 return sorted(class_names)

### Data Generator Class

In [None]:
class GenerateData:
 def __init__(self):
 # Starting Class
 print ("Starting class")
 
 def euler_to_mat(self, yaw, pitch, roll):
 # Rotate clockwise about the Y-axis
 c, s = math.cos(yaw), math.sin(yaw)
 M = numpy.matrix([[ c, 0., s], [ 0., 1., 0.], [ -s, 0., c]])

 # Rotate clockwise about the X-axis
 c, s = math.cos(pitch), math.sin(pitch)
 M = numpy.matrix([[ 1., 0., 0.], [ 0., c, -s], [ 0., s, c]]) * M

 # Rotate clockwise about the Z-axis
 c, s = math.cos(roll), math.sin(roll)
 M = numpy.matrix([[ c, -s, 0.], [ s, c, 0.], [ 0., 0., 1.]]) * M

 return M
 
 def make_affine_transform(self, from_shape, to_shape, 
 min_scale, max_scale,
 scale_variation=1.0,
 rotation_variation=1.0,
 translation_variation=1.0):
 out_of_bounds = 0

 from_size = numpy.array([[from_shape[1], from_shape[0]]]).T
 to_size = numpy.array([[to_shape[1], to_shape[0]]]).T

 scale = random.uniform((min_scale + max_scale) * 0.5 -
 (max_scale - min_scale) * 0.5 * scale_variation,
 (min_scale + max_scale) * 0.5 +
 (max_scale - min_scale) * 0.5 * scale_variation)
 if scale > max_scale or scale < min_scale:
 out_of_bounds = 1
 if scale < min_scale:
 out_of_bounds = 1
 roll = random.uniform(-1.0, 1.0) * rotation_variation
 pitch = random.uniform(-0.15, 0.15) * rotation_variation
 yaw = random.uniform(-0.15, 0.15) * rotation_variation

 # Compute a bounding box on the skewed input image (`from_shape`).
 M = self.euler_to_mat(yaw, pitch, roll)[:2, :2]
 h = from_shape[0]
 w = from_shape[1]
 corners = numpy.matrix([[-w, +w, -w, +w],
 [-h, -h, +h, +h]]) * 0.5
 skewed_size = numpy.array(numpy.max(M * corners, axis=1) -
 numpy.min(M * corners, axis=1))

 # Set the scale as large as possible such that the skewed and scaled shape
 # is less than or equal to the desired ratio in either dimension.
 scale *= numpy.min(to_size / skewed_size)

 # Set the translation such that the skewed and scaled image falls within
 # the output shape's bounds.
 trans = (numpy.random.random((2,1)) - 0.5) * translation_variation
 trans = ((2.0 * trans) ** 5.0) / 2.0
 if numpy.any(trans < -0.5) or numpy.any(trans > 0.5):
 out_of_bounds = 1
 trans = (to_size - skewed_size * scale) * trans

 center_to = to_size / 2.
 center_from = from_size / 2.

 M = self.euler_to_mat(yaw, pitch, roll)[:2, :2]
 M *= scale
 M = numpy.hstack([M, trans + center_to - M * center_from])

 return M, out_of_bounds
 
 def createMask(self, shape, radius):
 out = numpy.ones(shape)
 return out 
 
 def addObject(self, objectPath, class_name):
 print("Generating images from class: " + class_name)
 image = random.choice(glob.glob(objectPath + '/' + class_name + '/*.png'))
 object = Image.open(image)
 object_width, object_height = object.size
 self.img=numpy.array(Image.new("RGBA", (object_width, object_height), (0,0,0,0)))
 img = self.img
 scale = float(object.size[0]/object_width)
 new_width = int(object.size[0]/scale)
 new_height = int(object.size[1]/scale)
 self.object_height = new_height
 object = object.resize((new_width, new_height))
 pil_img = Image.fromarray(img)
 pil_img.paste(object, OBJECT_ORIGIN)
 pasted = numpy.array(pil_img)
 return (image, pasted, object_width, (object_width, object_height))
 
 def addGauss(self, img, level):
 return cv2.blur(img, (level * 2 + 1, level * 2 + 1))
 
 def addNoiseSingleChannel(self, single):
 diff = 255 - single.max();
 noise = numpy.random.normal(0, 1+rand(100), single.shape);
 noise = (noise - noise.min())/(noise.max()-noise.min())
 noise= diff*noise;
 noise= noise.astype(numpy.uint8)
 dst = single + noise
 return dst
 
 def addNoise(self, img):
 img[:,:,0] = self.addNoiseSingleChannel(img[:,:,0]);
 img[:,:,1] = self.addNoiseSingleChannel(img[:,:,1]);
 img[:,:,2] = self.addNoiseSingleChannel(img[:,:,2]);
 return img;
 
 def tfactor(self,img):
 return img
 
 def generate_bg(self, bgd_folder, object_shape):
 found = False
 while not found:
 fname = random.choice(glob.glob(bgd_folder + '/*.jpg'))
 print('selected {} as background'.format(fname))
 bg = cv2.imread(fname, 1)
 bg = cv2.cvtColor(bg, cv2.COLOR_BGR2RGB)
 bg = cv2.resize(bg, (768, 512))
 
 #random rotation
 rotate_M = cv2.getRotationMatrix2D((bg.shape[1]/2,bg.shape[0]/2),random.randint(0,3) * 90,1)
 
 if (bg.shape[1] >= object_shape[0] and
 bg.shape[0] >= object_shape[1]):
 found = True
 
 return bg
 
 def genBatch(self, batchSize, outputPath, objectPath, class_names, algorithm, bgd_folder):
 
 if os.path.exists(outputPath):
 shutil.rmtree(outputPath)

 if not os.path.exists(outputPath):
 os.makedirs(outputPath)
 
 if not os.path.exists(outputPath + '/OD/VOC2012' + '/JPEGImages'):
 os.makedirs(outputPath + '/OD/VOC2012' + '/JPEGImages')
 
 if not os.path.exists(outputPath + '/OD' + '/Annotations'):
 os.makedirs(outputPath + '/OD/VOC2012' + '/Annotations')
 
 if not os.path.exists(outputPath + '/OD' + '/ImageSets'):
 os.makedirs(outputPath + '/OD/VOC2012' + '/ImageSets')
 
 if not os.path.exists(outputPath + '/OD' + '/ImageSets/Main'):
 os.makedirs(outputPath + '/OD/VOC2012' + '/ImageSets/Main')

 if not os.path.exists(outputPath + '/IC'):
 os.makedirs(outputPath + '/IC')
 
 main_val_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/val.txt','a')
 main_train_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/train.txt','a')
 
 gen_log_file = open(outputPath+'/gen.log','w')
 
 for class_name_idx, class_name in enumerate(class_names):
 for i in range(batchSize):

 imagename, generatedData, object_width, object_shape = self.addObject(objectPath, class_name)

 self.bkg = self.generate_bg(bgd_folder, object_shape)

 objectMask = self.createMask(generatedData.shape, 40)
 generatedBackground = self.bkg

 M, out_of_bounds = self.make_affine_transform(
 from_shape=generatedData.shape,
 to_shape=generatedBackground.shape,
 min_scale=0.10,
 max_scale=0.17,
 rotation_variation=3.5,
 scale_variation=2.0,
 translation_variation=0.98)

 bkgFromArray = Image.fromarray(generatedBackground)
 bkgFromArray = bkgFromArray.convert('RGBA')
 generatedBackground = numpy.array(bkgFromArray)

 object_topleft = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0],OBJECT_ORIGIN[1]) + (1,))).tolist()[0])
 object_topright = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0]+object_width,OBJECT_ORIGIN[1]) + (1,))).tolist()[0])
 object_bottomleft = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0],OBJECT_ORIGIN[1]+self.object_height) + (1,))).tolist()[0])
 object_bottomright = tuple(M.dot(numpy.array((OBJECT_ORIGIN[0]+object_width,OBJECT_ORIGIN[1]+self.object_height) + (1,))).tolist()[0])

 object_tups = (object_topleft, object_topright, object_bottomleft, object_bottomright)
 object_xmin = (min(object_tups, key=lambda item:item[0])[0])
 object_xmax = (max(object_tups, key=lambda item:item[0])[0])
 object_ymin = (min(object_tups, key=lambda item:item[1])[1])
 object_ymax = (max(object_tups, key=lambda item:item[1])[1])

 generatedData = cv2.warpAffine(generatedData, M, (generatedBackground.shape[1], generatedBackground.shape[0]))
 objectMask = cv2.warpAffine(objectMask, M, (generatedBackground.shape[1], generatedBackground.shape[0]))

 # light condition
 #generatedData = self.tfactor(generatedData)
 
 # merge images
 bg_pil = Image.fromarray(generatedBackground)
 object_pil = Image.fromarray(generatedData)
 bg_pil.paste(object_pil, (0, 0), object_pil)
 out = numpy.array(bg_pil)

 # gauss
 out = self.addGauss(out, 0+rand(3))
 out = out.astype('float64')
 
 ### Add Noise
 out = self.addNoise(out)
 
 initial_val = '1'
 total_index = (class_name_idx * batchSize) + i

 img_filename = os.path.join(outputPath + '/OD/VOC2012/JPEGImages', initial_val + str(total_index).zfill(5) + '.jpg')
 xml_filename = os.path.join(outputPath + '/OD/VOC2012/Annotations', initial_val + str(total_index).zfill(5) + '.xml')

 pil_image = Image.fromarray(out.astype('uint8'))
 pil_image.save(img_filename, format='PNG', subsampling=0, quality=100)
 
 annotator = Writer(img_filename, pil_image.size[0], pil_image.size[1])
 annotator.addObject(class_name,object_xmin,object_ymin,object_xmax,object_ymax)
 annotator.save(xml_filename)

 if 'IC' in algorithm: 
 if not os.path.exists(outputPath + '/IC/' + class_name):
 os.makedirs(outputPath + '/IC/' + class_name)

 # Crop Image
 image_crop = pil_image.crop((object_xmin, object_ymin, object_xmax, object_ymax))
 image_crop = image_crop.convert("RGB")
 image_crop.save(outputPath+'/IC/' + class_name + "/" + initial_val + str(total_index).zfill(5) + '.jpg', format="JPEG")

 if i % (batchSize / 10) == 0:
 unformatted_ts = datetime.datetime.fromtimestamp(time.time())
 ts = unformatted_ts.strftime('%Y-%m-%d %H:%M:%S')
 log_debug_string = '### {} ### Generated Files: {}, {}\n'.format(ts, img_filename, xml_filename)
 gen_log_file.write(log_debug_string)
 print(log_debug_string)
 
 is_train_id = (i < batchSize * 0.8)
 if is_train_id:
 main_train_file.write(initial_val + str(total_index).zfill(5) + '\n')
 else:
 main_val_file.write(initial_val + str(total_index).zfill(5) + '\n')
 
 for class_name_file in class_names:
 object_val_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/' + class_name_file + '_val.txt','a')
 object_train_file = open(outputPath + '/OD/VOC2012/ImageSets/Main/' + class_name_file + '_train.txt','a')

 presence_val = ' -1\n'

 if class_name == class_name_file: 
 presence_val = ' 1\n'
 
 if is_train_id:
 object_train_file.write(initial_val + str(total_index).zfill(5) + presence_val)
 else:
 object_val_file.write(initial_val + str(total_index).zfill(5) + presence_val)

### Dataset Generation Variables Definition

In [None]:
## Number of pictures per class
make_num = 1000

## Background Folder
bkg_dir = './backgrounds'

## Objects Folder
object_path = './bottlecaps'

## Output directory
out_dir = './dataset'

## Algorithm Selection. Object Detection = OD
algorithm = ['OD']

## Creating base pictures

##### Background pictures

It is recommended to use pictures taken from the camera inside the fridge with no objects. Take several pictures with different light conditions and camera exposure settings. If you use a fridge with multiple shelves, repeat the process for each shelf.

The pictures into the backgrounds folder will be used randomly.

Picture format should be JPEG (.jpg).


##### Objects Pictures

Pictures of the bottlecaps without background. Background cropped off.

It is recommended to use at least 20 variations of each object class to have better accuracy. Each sample should be taken from different angle, with different lighting and glare. If your object gets affected with reflection, use as many samples as possible with different degrees of glare.

Each class should have its own folder inside the objects folder. For each class, the images in the class folder will be used randomly.

Picture format for cropped bottlecaps should be PNG (.png).

# Generate Sample Dataset

The next command will generate a sample dataset with few images per class so you can evaluate the generation.

In [None]:
from PIL import Image, ImageFont, ImageDraw
syntheticDatasetGen(10, out_dir, object_path, algorithm, bkg_dir)

### Dataset Visualization

In [None]:
import glob
from IPython.display import Image, display
for imageName in glob.glob(out_dir + '/OD/VOC2012/JPEGImages/*.jpg'): #assuming JPG
 display(Image(filename=imageName))
 print(imageName)

If the generated images are good, move on to the next section to generate the full dataset.

# Generate Full Dataset

To create more than 1k images per class it should take some time, so do not worry it this next command take more than 20 or 30 min

In [None]:
from PIL import Image, ImageFont, ImageDraw
syntheticDatasetGen(make_num, out_dir, object_path, algorithm, bkg_dir)

# Generate RecordIO File

After the dataset generation, for training the data, it's necessary to transform your dataset into a RecordIO File. Follow the next steps for this file generation

#### Download the tools files

In [None]:
# Tools folder
if os.path.exists('tools'):
 shutil.rmtree('tools')
 
## Get and unzip tools files
!unzip tools.zip

## RecordIO Folder 
if not os.path.exists('RecordIO'):
 os.makedirs('RecordIO')

#### Get Class names

In [None]:
class_names = get_class_name()
class_names = ', '.join(class_names).strip(" ").replace(" ", "")
print(class_names)

#### Converting the dataset into RecordIO Files

This process can take up 30/sec per 1000 files. so if you have 10000 files, it shoud take 5 min.

In [None]:
%%bash -s "$class_names" "$out_dir"
python tools/prepare_dataset.py --dataset pascal --year 2012 --class-names $1 --set train --target RecordIO/train.lst --root $2/OD --true-negative false


In [None]:
%%bash -s "$class_names" "$out_dir"
python tools/prepare_dataset.py --dataset pascal --year 2012 --class-names $1 --set val --target RecordIO/val.lst --root $2/OD/ --true-negative false 
