#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocessing ops."""
import math
import tensorflow as tf

from sagemakercv.core import preprocessor

def _add_noise(image, std):
    noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=std, dtype=image.dtype)
    return image * (1.0 + noise)


def add_noise(image, std=0.05, seed=None):
    # random variable defining whether to add noise or not
    make_noisy = tf.greater(tf.random.uniform([], seed=seed), 0.5)
    image = tf.cond(pred=make_noisy, true_fn=lambda: _add_noise(image, std), false_fn=lambda: image)
    return image

def normalize_image(image):
    """Normalize the image.

    Args:
    image: a tensor of shape [height, width, 3] in dtype=tf.float32.

    Returns:
    normalized_image: a tensor which has the same shape and dtype as image,
      with pixel values normalized.
    """
    offset = tf.constant([0.485, 0.456, 0.406])
    offset = tf.reshape(offset, shape=(1, 1, 3))

    scale = tf.constant([0.229, 0.224, 0.225])
    scale = tf.reshape(scale, shape=(1, 1, 3))

    normalized_image = (image - offset) / scale

    return normalized_image


def random_horizontal_flip(image, boxes=None, masks=None, seed=None):
    """Random horizontal flip the image, boxes, and masks.

    Args:
    image: a tensor of shape [height, width, 3] representing the image.
    boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
      corners in normalized coordinates.
    masks: (Optional) a tensor of shape [num_masks, height, width]
      representing the object masks. Note that the size of the mask is the
      same as the image.

    Returns:
    image: the processed image tensor after being randomly flipped.
    boxes: None or the processed box tensor after being randomly flipped.
    masks: None or the processed mask tensor after being randomly flipped.
    """
    return preprocessor.random_horizontal_flip(image, boxes, masks, seed=seed)


def resize_and_pad(image, target_size, stride, boxes=None, masks=None):
    """Resize and pad images, boxes and masks.

    Resize and pad images, (optionally boxes and masks) given the desired output
    size of the image and stride size.

    Here are the preprocessing steps.
    1. For a given image, keep its aspect ratio and rescale the image to make it
     the largest rectangle to be bounded by the rectangle specified by the
     `target_size`.
    2. Pad the rescaled image such that the height and width of the image become
     the smallest multiple of the stride that is larger or equal to the desired
     output diemension.

    Args:
    image: an image tensor of shape [original_height, original_width, 3].
    target_size: a tuple of two integers indicating the desired output
      image size. Note that the actual output size could be different from this.
    stride: the stride of the backbone network. Each of the output image sides
      must be the multiple of this.
    boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
      corners in normalized coordinates.
    masks: (Optional) a tensor of shape [num_masks, height, width]
      representing the object masks. Note that the size of the mask is the
      same as the image.

    Returns:
    image: the processed image tensor after being resized and padded.
    image_info: a tensor of shape [5] which encodes the height, width before
      and after resizing and the scaling factor.
    boxes: None or the processed box tensor after being resized and padded.
      After the processing, boxes will be in the absolute coordinates w.r.t.
      the scaled image.
    masks: None or the processed mask tensor after being resized and padded.
    """

    input_height, input_width, _ = tf.unstack(
        tf.cast(tf.shape(input=image), dtype=tf.float32),
        axis=0
    )

    target_height, target_width = target_size

    scale_if_resize_height = target_height / input_height
    scale_if_resize_width = target_width / input_width

    scale = tf.minimum(scale_if_resize_height, scale_if_resize_width)

    scaled_height = tf.cast(scale * input_height, dtype=tf.int32)
    scaled_width = tf.cast(scale * input_width, dtype=tf.int32)

    image = tf.image.resize(image, [scaled_height, scaled_width], method=tf.image.ResizeMethod.BILINEAR)

    padded_height = int(math.ceil(target_height * 1.0 / stride) * stride)
    padded_width = int(math.ceil(target_width * 1.0 / stride) * stride)

    image = tf.image.pad_to_bounding_box(image, 0, 0, padded_height, padded_width)
    image.set_shape([padded_height, padded_width, 3])

    image_info = tf.stack([
        tf.cast(scaled_height, dtype=tf.float32),
        tf.cast(scaled_width, dtype=tf.float32),
        1.0 / scale,
        input_height,
        input_width]
    )

    if boxes is not None:
        normalized_box_list = preprocessor.box_list.BoxList(boxes)
        scaled_boxes = preprocessor.box_list_scale(normalized_box_list, scaled_height, scaled_width).get()

    else:
        scaled_boxes = None

    if masks is not None:
        scaled_masks = tf.image.resize(
            tf.expand_dims(masks, -1),
            [scaled_height, scaled_width],
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR
        )
        # Check if there is any instance in this image or not.
        num_masks = tf.shape(input=scaled_masks)[0]
        scaled_masks = tf.cond(
            pred=tf.greater(num_masks, 0),
            true_fn=lambda: tf.image.pad_to_bounding_box(scaled_masks, 0, 0, padded_height, padded_width),
            false_fn=lambda: tf.zeros([0, padded_height, padded_width, 1])
        )

    else:
        scaled_masks = None

    return image, image_info, scaled_boxes, scaled_masks


def crop_gt_masks(instance_masks, boxes, gt_mask_size, image_size):
    """Crops the ground truth binary masks and resize to fixed-size masks."""
    num_masks = tf.shape(input=instance_masks)[0]

    scale_sizes = tf.convert_to_tensor(value=[image_size[0], image_size[1]] * 2, dtype=tf.float32)

    boxes = boxes / scale_sizes

    cropped_gt_masks = tf.image.crop_and_resize(
        image=instance_masks,
        boxes=boxes,
        box_indices=tf.range(num_masks, dtype=tf.int32),
        crop_size=[gt_mask_size, gt_mask_size],
        method='bilinear')[:, :, :, 0]

    cropped_gt_masks = tf.pad(
        tensor=cropped_gt_masks,
        paddings=tf.constant([[0, 0], [2, 2], [2, 2]]),
        mode='CONSTANT',
        constant_values=0.
    )

    return cropped_gt_masks


def pad_to_fixed_size(data, pad_value, output_shape):
    """Pad data to a fixed length at the first dimension.

    Args:
    data: Tensor to be padded to output_shape.
    pad_value: A constant value assigned to the paddings.
    output_shape: The output shape of a 2D tensor.

    Returns:
    The Padded tensor with output_shape [max_num_instances, dimension].
    """
    max_num_instances = output_shape[0]
    dimension = output_shape[1]

    data = tf.reshape(data, [-1, dimension])
    num_instances = tf.shape(input=data)[0]

    pad_length = max_num_instances - num_instances

    paddings = pad_value * tf.ones([pad_length, dimension])

    padded_data = tf.reshape(tf.concat([data, paddings], axis=0), output_shape)
    return padded_data

# modded from https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/imagenet_input.py
def mixup(batch_size, alpha, images, labels):
    """Applies Mixup regularization to a batch of images and labels.
    [1] Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, David Lopez-Paz
      Mixup: Beyond Empirical Risk Minimization.
      ICLR'18, https://arxiv.org/abs/1710.09412
    Arguments:
      batch_size: The input batch size for images and labels.
      alpha: Float that controls the strength of Mixup regularization.
      images: A batch of images of shape [batch_size, ...]
      labels: A batch of labels of shape [batch_size, num_classes]
    Returns:
      A tuple of (images, labels) with the same dimensions as the input with
      Mixup regularization applied.
    """
    mix_weight = tf.compat.v1.distributions.Beta(alpha, alpha).sample([batch_size, 1])
    mix_weight = tf.maximum(mix_weight, 1. - mix_weight)
    images_mix_weight = tf.reshape(mix_weight, [batch_size, 1, 1, 1])
    # Mixup on a single batch is implemented by taking a weighted sum with the
    # same batch in reverse.
    images_mix = (
        images * images_mix_weight + images[::-1] * (1. - images_mix_weight))
    labels_mix = labels * mix_weight + labels[::-1] * (1. - mix_weight)
    return images_mix, labels_mix

def _decode_crop_and_flip(image_buffer, bbox, num_channels):
    """Crops the given image to a random part of the image, and randomly flips.
    We use the fused decode_and_crop op, which performs better than the two ops
    used separately in series, but note that this requires that the image be
    passed in as an un-decoded string Tensor.
    Args:
    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged as
      [ymin, xmin, ymax, xmax].
    num_channels: Integer depth of the image buffer for decoding.
    Returns:
    3-D tensor with cropped image.
    """
    # If no box is supplied, then we assume the bounding box is
    # the entire image.
    sample_distorted_bounding_box = tf.raw_ops.SampleDistortedBoundingBoxV2(
      image_size=tf.image.extract_jpeg_shape(image_buffer),
      bounding_boxes=bbox,
      min_object_covered=0.1,
      aspect_ratio_range=[0.75, 1.33],
      area_range=[0.05, 1.0],
      max_attempts=100,
      use_image_if_no_bounding_boxes=True)
    bbox_begin, bbox_size, _ = sample_distorted_bounding_box

    # Reassemble the bounding box in the format the crop op requires.
    offset_y, offset_x, _ = tf.unstack(bbox_begin)
    target_height, target_width, _ = tf.unstack(bbox_size)
    crop_window = tf.stack([offset_y, offset_x, target_height, target_width])

    # Use the fused decode and crop op here, which is faster than each in series.
    cropped = tf.image.decode_and_crop_jpeg(
      image_buffer, crop_window, channels=num_channels)

    # Flip to add a little more random distortion in.
    cropped = tf.image.random_flip_left_right(cropped)
    return cropped