# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
import sys
import os
import subprocess

# Install packages previous to executing the rest of the script. You can also build your own custom container
#   with your individual dependencies if needed
subprocess.check_call([sys.executable, "-m", "pip", "install", "wget", "opencv-python","albumentations","tqdm"])
os.system("apt-get update")
os.system("apt-get install ffmpeg libsm6 libxext6  -y")

import argparse
import json
from glob import glob
import shutil
from PIL import Image
from pathlib import Path

import cv2
from tqdm import tqdm
from albumentations import CenterCrop, RandomRotate90, GridDistortion, HorizontalFlip, VerticalFlip

from sklearn.model_selection import train_test_split


# Constants

# the "folders" in the S3 bucket for images and their ground truth masks
PREFIX_NAME_IMAGE = 'images'
PREFIX_NAME_MASK = 'masks'

# The images size used
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224

def augment_data(path, augment=True):
    save_path = path
    images = sorted(glob(os.path.join(path, PREFIX_NAME_IMAGE + "/*")))
    masks = sorted(glob(os.path.join(path, PREFIX_NAME_MASK + "/*")))
    
    for x, y in tqdm(zip(images, masks), total=len(images)):
        name = x.split("/")[-1].split(".")
        
        img_name = name[0]
        image_extn = name[1]

        name = y.split("/")[-1].split(".")
        mask_name = name[0]
        mask_extn = name[1]

        # Read image mask
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        y = cv2.imread(y, cv2.IMREAD_COLOR)

        # Augment dataset
        if augment == True:
            aug = RandomRotate90(p=1.0)
            augmented = aug(image=x, mask=y)
            x1 = augmented['image']
            y1 = augmented['mask']

            aug = RandomRotate90(p=1.0)
            augmented = aug(image=x, mask=y)
            x2 = augmented['image']
            y2 = augmented['mask']

            aug = GridDistortion(p=1.0)
            augmented = aug(image=x, mask=y)
            x3 = augmented['image']
            y3 = augmented['mask']

            aug = HorizontalFlip(p=1.0)
            augmented = aug(image=x, mask=y)
            x4 = augmented['image']
            y4 = augmented['mask']

            aug = VerticalFlip(p=1.0)
            augmented = aug(image=x, mask=y)
            x5 = augmented['image']
            y5 = augmented['mask']

            save_images = [x, x1, x2, x3, x4, x5]
            save_masks =  [y, y1, y2, y3, y4, y5]

        else:
            save_images = [x]
            save_masks = [y]

        """ Saving the image and mask. """
        idx = 0
        Path(save_path + "/" + PREFIX_NAME_IMAGE ).mkdir(parents=True, exist_ok=True)
        Path(save_path + "/" + PREFIX_NAME_MASK ).mkdir(parents=True, exist_ok=True)
        for i, m in zip(save_images, save_masks):
            i = cv2.resize(i, (IMAGE_WIDTH, IMAGE_HEIGHT))
            m = cv2.resize(m, (IMAGE_WIDTH, IMAGE_HEIGHT))

            if len(images) == 1:
                tmp_img_name = f"{img_name}.{image_extn}"
                tmp_mask_name = f"{mask_name}.{mask_extn}"
            else:
                tmp_img_name = f"{img_name}_{idx}.{image_extn}"
                tmp_mask_name = f"{mask_name}_{idx}.{mask_extn}"

            image_path = os.path.join(save_path, PREFIX_NAME_IMAGE, tmp_img_name)
            mask_path = os.path.join(save_path, PREFIX_NAME_MASK, tmp_mask_name)

            cv2.imwrite(image_path, i)
            cv2.imwrite(mask_path, m)

            idx += 1


def resize_images(path, width, height):
    """Resize all images in a given path (in-place). Please note that this method
    overwrites existing images in the path"""
    files = glob(os.path.join(path, '*.png')) + glob(os.path.join(path, '*.jpg'))
    for file in files:
        im = Image.open(file)
        im_resized = im.resize((width, height), Image.ANTIALIAS)
        im_resized.save(file)
        
def get_square_image(img, padding_color=(0, 0, 0)):
    """Returns a squared image by adding black padding"""
    width, height = img.size
    if width == height:
        return img
    elif width > height:
        result = Image.new(img.mode, (width, width), padding_color)
        result.paste(img, (0, (width - height) // 2))
        return result
    else:
        result = Image.new(img.mode, (height, height), padding_color)
        result.paste(img, ((height - width) // 2, 0))
        return result

def square_images(path, padding_color=(0,0,0)):
    """Squares all images in a given path (in-place). Please note that this
    method overwrites existing images in the path."""
    files = glob(os.path.join(path, '*.png')) + glob(os.path.join(path, '*.jpg'))
    for file in files:
        im = Image.open(file)
        im_squared = get_square_image(im, padding_color)
        im_squared.save(file)
        
def load_data(path, split=0.1):
    images = sorted(glob(os.path.join(path, PREFIX_NAME_IMAGE + "/*")))
    masks = sorted(glob(os.path.join(path, PREFIX_NAME_MASK + "/*")))

    total_size = len(images)
    valid_size = int(split * total_size)
    test_size = int(split * total_size)
    print(total_size)
    train_x, valid_x = train_test_split(images, test_size=valid_size, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size=valid_size, random_state=42)

    train_x, test_x = train_test_split(train_x, test_size=test_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=test_size, random_state=42)

    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)


if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=float, default=0.1)
    args, _ = parser.parse_known_args()

    print('Received arguments {}'.format(args))

    # Define the paths
    input_data_base_path = '/opt/ml/processing/input'
    train_output_base_path = '/opt/ml/processing/train'
    test_output_base_path = '/opt/ml/processing/test'
    val_output_base_path = '/opt/ml/processing/val'
    report_output_base_path = '/opt/ml/processing/report'
    
    #Augment images and save in new directory
    augment_data(input_data_base_path)
    
    print('Squaring images...')
    square_images(os.path.join(input_data_base_path, PREFIX_NAME_IMAGE))
    square_images(os.path.join(input_data_base_path, PREFIX_NAME_MASK), padding_color=(0))
    
    # Resize the images in-place in the container image
    print('Resizing images...')
    resize_images(os.path.join(input_data_base_path, PREFIX_NAME_IMAGE), IMAGE_WIDTH, IMAGE_HEIGHT)
    resize_images(os.path.join(input_data_base_path, PREFIX_NAME_MASK), IMAGE_WIDTH, IMAGE_HEIGHT)

    # Create train test validation split
    (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(input_data_base_path, split=float(args.split))
    
    # Copy to the output paths
    Path(train_output_base_path + "/" + PREFIX_NAME_IMAGE ).mkdir(parents=True, exist_ok=True)
    Path(train_output_base_path + "/" + PREFIX_NAME_MASK ).mkdir(parents=True, exist_ok=True)
    Path(val_output_base_path + "/" + PREFIX_NAME_IMAGE ).mkdir(parents=True, exist_ok=True)
    Path(val_output_base_path + "/" + PREFIX_NAME_MASK ).mkdir(parents=True, exist_ok=True)
    Path(test_output_base_path + "/" + PREFIX_NAME_IMAGE ).mkdir(parents=True, exist_ok=True)
    Path(test_output_base_path + "/" + PREFIX_NAME_MASK ).mkdir(parents=True, exist_ok=True)
    for file in train_x :
        shutil.copy(file, os.path.join(train_output_base_path, PREFIX_NAME_IMAGE + '/' + os.path.basename(file)))
    for file in train_y :
        shutil.copy(file, os.path.join(train_output_base_path, PREFIX_NAME_MASK + '/'+ os.path.basename(file)))
    for file in valid_x :
        shutil.copy(file, os.path.join(val_output_base_path, PREFIX_NAME_IMAGE + '/'+ os.path.basename(file)))
    for file in valid_y :
        shutil.copy(file, os.path.join(val_output_base_path, PREFIX_NAME_MASK + '/'+ os.path.basename(file)))
    for file in test_x :
        shutil.copy(file, os.path.join(test_output_base_path, PREFIX_NAME_IMAGE + '/'+ os.path.basename(file)))
    for file in test_y :
        shutil.copy(file, os.path.join(test_output_base_path, PREFIX_NAME_MASK + '/'+ os.path.basename(file)))
    # Save the preprocessing report to make information available to downstream steps
    preprocessing_report = {
        'preprocessing': {
            'dataset': {
                'num_training_samples': len(train_x),
                'num_test_samples': len(test_x),
                'num_val_samples': len(valid_x)
            }
        }
    }
    print('Preprocessing report:', preprocessing_report)
    report_output_path = os.path.join(report_output_base_path, 'preprocessing_report.json')
    with open(report_output_path, "w") as f:
            f.write(json.dumps(preprocessing_report))