''' Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ''' # # Data Parsing # # This notebook is to pre-process Prima head-pose dataset. The output ``pickle`` file will be used for ``HeadPose_ResNet50_Tutorial``. # # Original Data: http://www-prima.inrialpes.fr/perso/Gourier/Faces/HPDatabase.html # # > N. Gourier, D. Hall, J. L. Crowley, # > Estimating Face Orientation from Robust Detection of Salient Facial Features, # > *Proceedings of Pointing 2004, ICPR, International Workshop on Visual Observation of Deictic Gestures*, Cambridge, UK import argparse import os import sys import numpy as np import urllib import cv2 import pickle import tarfile from glob import glob parser = argparse.ArgumentParser(description='Head Pose Preprocessing') parser.add_argument('--num-data-aug', type=int, default=15, help='number of augmentation on train data (default: 15)') parser.add_argument('--num-data-aug-val', type=int, default=3, help='number of augmentation on validation data (default: 3)') parser.add_argument('--aspect-ratio', type=int, default=1, help='aspect_ratio of output image. 1: 84 pix x 84 pix, 0: 96 pix x 54 pix (default: 1)') opt = parser.parse_args() num_data_aug = opt.num_data_aug print("Number of data augmentation: ", num_data_aug) num_data_aug_val = opt.num_data_aug_val ''' aspect_ratio = 0 -> 16:9 -> 96x54 aspect_ratio = 1 -> 1:1 -> 84x84 ''' aspect_ratio = opt.aspect_ratio if aspect_ratio == 1: print("Aspect Ratio 1:1 (84 pix x 84 pix) ") else: print("Aspect Ratio 16:9 (96 pix x 54 pix) ") # ## Download dataset print('**---- Downloading Dataset ----**') def download_data(url, force_download=True): fname = url.split("/")[-1] if force_download or not os.path.exists(fname): urllib.urlretrieve(url, fname) return fname url_ds1 = "http://www-prima.inrialpes.fr/perso/Gourier/Faces/HeadPoseImageDatabase.tar.gz" fname = download_data(url_ds1) ### 28MB print(fname) # HeadPoseImageDatabase.tar.gz with tarfile.open(fname, "r:gz") as tar: tar.extractall() ### http://www-prima.inrialpes.fr/perso/Gourier/Faces/HPDatabase.html lst_persons = [str(i + 1).zfill(2) for i in range(15)] lst_ser = [str(i + 1).zfill(1) for i in range(2)] i_num = 92 lst_tilt = ['-90','-60','-30','-15','0','+15','+30','+60','+90'] ### 9 elements lst_pan = ['-90','-75','-60','-45','-30','-15','0','+15','+30','+45','+60','+75','+90'] ### 13 elements home_dir = os.getcwd() # 3 out of 15 subjects are chosen for the validation data. sub_test = [4,9,14] # ## Training Data print('**---- Training Data ----**') data_filename = [] data_tilt = [] data_pan = [] data_persons = [] data_txtfile = [] i_count = 0 for i0 in range(len(lst_persons)): pre_path = home_dir + "/Person" + lst_persons[i0] # Change Dir os.chdir(pre_path) if i0 not in sub_test: print(i0) for i1 in range(len(lst_ser)): for i2 in range(i_num + 1): pre_file = 'person' + lst_persons[i0] + lst_ser[i1] + str(i2).zfill(2) matches = [f for f in os.listdir(pre_path) if f.startswith(pre_file)] if len(matches[0][11:-4].split('0')[0]) <= 2: ## The tilt is either -90,-60,-30,0,+30,+60,or+90. s_tilt = matches[0][11:-4].split('0')[0] + '0' s_pan = matches[0][:-4].split(matches[0][:11] + s_tilt)[1] elif matches[0][11:-4].split('0')[0][:3] == '+15': ## The tilt is +15 s_tilt = '+15' s_pan = matches[0][:-4].split(matches[0][:11] + s_tilt)[1] elif matches[0][11:-4].split('0')[0][:3] == '-15': ## The tilt is +15 s_tilt = '-15' s_pan = matches[0][:-4].split(matches[0][:11] + s_tilt)[1] data_filename = data_filename + [matches[0][:-4]] data_tilt = data_tilt + [int(s_tilt)] data_pan = data_pan + [int(s_pan)] data_persons = data_persons + [int(i0)] ### Text file contains the face label. txtfile = open(matches[0][:-4] + ".txt", 'r') data_txtfile = data_txtfile + [txtfile.read().splitlines()[3:]] im = cv2.imread(matches[0][:-4] + ".jpg") im = im.reshape(im.shape[0],im.shape[1],im.shape[2],1).astype(np.float32)/255 # Normalized if i_count == 0: data_im_concat = im else: data_im_concat = np.concatenate((data_im_concat, im), axis = 3) i_count += 1 ### Data Augmentation n_aug = num_data_aug i_count = 0 for i0 in range(n_aug): print(i0) for i1 in range(data_im_concat.shape[3]): ### Cropping centx = int(data_txtfile[i1][0]) centy = int(data_txtfile[i1][1]) x_move = int(data_txtfile[i1][2]) // 2 y_move = int(data_txtfile[i1][3]) // 2 ## ImageCrop ## crop_ulX should be somewhere between (0,0) and top-left courner of the face ## i.e. (centx - x_move, centy - y_move) while True: crop_ulx = int(np.random.random_sample()*(centx- x_move)) crop_uly = int(np.random.random_sample()*(centy- y_move)) min_height = centy + y_move - crop_uly max_height = data_im_concat.shape[0] - crop_uly if aspect_ratio == 0: # Apect Ratio 16:9 crop_height = (min_height + int(np.random.random_sample()*(max_height - min_height)))//9 * 9 crop_width = crop_height // 9 * 16 else: # Apect Ratio 1:1 crop_height = (min_height + int(np.random.random_sample()*(max_height - min_height)))//9 * 9 crop_width = crop_height // 9 * 9 if crop_ulx + crop_width > centx + x_move and crop_ulx + crop_width < data_im_concat.shape[1] and crop_uly + crop_height > centy + y_move and crop_uly + crop_height < data_im_concat.shape[0]: break im = data_im_concat[:,:,:,i1] im_crop = im[crop_uly:crop_uly + crop_height, crop_ulx:crop_ulx + crop_width] if aspect_ratio == 0: # Apect Ratio 16:9 im_crop = cv2.resize(im_crop, (96, 54)) else: # Apect Ratio 1:1 im_crop = cv2.resize(im_crop, (84, 84)) im_crop = im_crop.reshape(im_crop.shape[0],im_crop.shape[1],im_crop.shape[2],1).astype(np.float32) if i_count == 0: data_im_concat_aug = im_crop else: data_im_concat_aug = np.concatenate((data_im_concat_aug, im_crop), axis = 3) i_count += 1 print(data_im_concat_aug.shape) ### Concatinating the output np_data_tilt_temp = np.asarray(data_tilt).reshape(len(data_tilt),1).astype(np.float32)/90 # Normalized np_data_pan_temp = np.asarray(data_pan).reshape(len(data_pan),1).astype(np.float32)/90 # Normalilzed for i0 in range(n_aug): if i0 == 0: np_data_tilt = np_data_tilt_temp np_data_pan = np_data_pan_temp else: np_data_tilt = np.concatenate((np_data_tilt, np_data_tilt_temp), axis = 0) np_data_pan = np.concatenate((np_data_pan, np_data_pan_temp), axis = 0) data_output = np.concatenate((np_data_tilt, np_data_pan), axis = 1) print(data_output.shape) ### Transpose the data ## MXNET input 4D (batch_size, num_channels, height, width) ==> (bsize, 3, height, width) data_im_concat_aug_t = np.transpose(data_im_concat_aug, (3,2,0,1)) data_im_concat_aug_t.shape trn_im = data_im_concat_aug_t trn_output = data_output # ## Validation Data print('**---- Validation Data ----**') data_filename = [] data_tilt = [] data_pan = [] data_persons = [] data_txtfile = [] i_count = 0 for i0 in range(len(lst_persons)): pre_path = home_dir + "/Person" + lst_persons[i0] # Change Dir os.chdir(pre_path) if i0 in sub_test: print(i0) for i1 in range(len(lst_ser)): for i2 in range(i_num + 1): pre_file = 'person' + lst_persons[i0] + lst_ser[i1] + str(i2).zfill(2) matches = [f for f in os.listdir(pre_path) if f.startswith(pre_file)] if len(matches[0][11:-4].split('0')[0]) <= 2: ## The tilt is either -90,-60,-30,0,+30,+60,or+90. s_tilt = matches[0][11:-4].split('0')[0] + '0' s_pan = matches[0][:-4].split(matches[0][:11] + s_tilt)[1] elif matches[0][11:-4].split('0')[0][:3] == '+15': ## The tilt is +15 s_tilt = '+15' s_pan = matches[0][:-4].split(matches[0][:11] + s_tilt)[1] elif matches[0][11:-4].split('0')[0][:3] == '-15': ## The tilt is +15 s_tilt = '-15' s_pan = matches[0][:-4].split(matches[0][:11] + s_tilt)[1] data_filename = data_filename + [matches[0][:-4]] data_tilt = data_tilt + [int(s_tilt)] data_pan = data_pan + [int(s_pan)] data_persons = data_persons + [int(i0)] ### Text file contains the face label. txtfile = open(matches[0][:-4] + ".txt", 'r') data_txtfile = data_txtfile + [txtfile.read().splitlines()[3:]] im = cv2.imread(matches[0][:-4] + ".jpg") im = im.reshape(im.shape[0],im.shape[1],im.shape[2],1).astype(np.float32)/255 # Normalized if i_count == 0: data_im_concat = im else: data_im_concat = np.concatenate((data_im_concat, im), axis = 3) i_count += 1 ### Data Augmentation n_aug = num_data_aug_val i_count = 0 for i0 in range(n_aug): print(i0) for i1 in range(data_im_concat.shape[3]): ### Cropping centx = int(data_txtfile[i1][0]) centy = int(data_txtfile[i1][1]) x_move = int(data_txtfile[i1][2]) // 2 y_move = int(data_txtfile[i1][3]) // 2 ## ImageCrop ## crop_ulX should be somewhere between (0,0) and top-left courner of the face ## i.e. (centx - x_move, centy - y_move) while True: crop_ulx = int(np.random.random_sample()*(centx- x_move)) crop_uly = int(np.random.random_sample()*(centy- y_move)) min_height = centy + y_move - crop_uly max_height = data_im_concat.shape[0] - crop_uly if aspect_ratio == 0: # Apect Ratio 16:9 crop_height = (min_height + int(np.random.random_sample()*(max_height - min_height)))//9 * 9 crop_width = crop_height // 9 * 16 else: # Apect Ratio 1:1 crop_height = (min_height + int(np.random.random_sample()*(max_height - min_height)))//9 * 9 crop_width = crop_height // 9 * 9 if crop_ulx + crop_width > centx + x_move and crop_ulx + crop_width < data_im_concat.shape[1] and crop_uly + crop_height > centy + y_move and crop_uly + crop_height < data_im_concat.shape[0]: break im = data_im_concat[:,:,:,i1] im_crop = im[crop_uly:crop_uly + crop_height, crop_ulx:crop_ulx + crop_width] if aspect_ratio == 0: # Apect Ratio 16:9 im_crop = cv2.resize(im_crop, (96, 54)) else: # Apect Ratio 1:1 im_crop = cv2.resize(im_crop, (84, 84)) im_crop = im_crop.reshape(im_crop.shape[0],im_crop.shape[1],im_crop.shape[2],1).astype(np.float32) if i_count == 0: data_im_concat_aug = im_crop else: data_im_concat_aug = np.concatenate((data_im_concat_aug, im_crop), axis = 3) i_count += 1 print(data_im_concat_aug.shape) ### Concatinating the output np_data_tilt_temp = np.asarray(data_tilt).reshape(len(data_tilt),1).astype(np.float32)/90 # Normalized np_data_pan_temp = np.asarray(data_pan).reshape(len(data_pan),1).astype(np.float32)/90 # Normalilzed for i0 in range(n_aug): if i0 == 0: np_data_tilt = np_data_tilt_temp np_data_pan = np_data_pan_temp else: np_data_tilt = np.concatenate((np_data_tilt, np_data_tilt_temp), axis = 0) np_data_pan = np.concatenate((np_data_pan, np_data_pan_temp), axis = 0) data_output = np.concatenate((np_data_tilt, np_data_pan), axis = 1) print(data_output.shape) ### Transpose the data ## MXNET input 4D (batch_size, num_channels, height, width) ==> (bsize, 3, height, width) data_im_concat_aug_t = np.transpose(data_im_concat_aug, (3,2,0,1)) data_im_concat_aug_t.shape test_im = data_im_concat_aug_t test_output = data_output print(test_im.shape, trn_im.shape) print(test_output.shape, trn_output.shape) pickle_name = "HeadPoseData_trn_test_x{}_py2.pkl".format(num_data_aug) import pickle os.chdir(home_dir) with open(pickle_name, "wb") as f: pickle.dump((trn_im, test_im, trn_output, test_output), f) print(pickle_name, ' is saved!') print('**---- Done ----**') # # End