# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import six

from tensorpack.utils.argtools import memoized

assert six.PY3, "FasterRCNN requires Python 3!"
import tensorflow as tf

from tensorpack.models.regularize import regularize_cost, l2_regularizer
from tensorpack.tfutils.summary import add_moving_summary, get_current_tower_context
from tensorpack.graph_builder.model_desc import ModelDesc
from tensorpack.tfutils.common import tfv1
from tensorpack.tfutils.summary import add_moving_summary

from model import mask_head, boxclass_head
from model.backbone import image_preprocess, resnet_fpn_backbone
from config import config as cfg
from data import get_all_anchors_fpn
from model_box import RPNGroundTruth, clip_boxes_batch, crop_and_resize, permute_boxes_coords
from model.fpn import fpn_model
from model.boxclass_head import boxclass_predictions, boxclass_outputs, BoxClassHead
from model.biased_sampler import sample_fast_rcnn_targets
from model.mask_head import maskrcnn_loss
from model.rpn import rpn_head, generate_fpn_proposals, batch_rpn_losses
from model.roi_ops import roi_features, roi_level_summary
from utils.randomnness import SeedGenerator

from performance import print_runtime_shape, print_runtime_tensor


class GradientClipOptimizer(tfv1.train.Optimizer):
    def __init__(self, opt, clip_norm):
        self.opt = opt
        self.clip_norm = clip_norm

    def compute_gradients(self, *args, **kwargs):
        return self.opt.compute_gradients(*args, **kwargs)
    """
    def apply_gradients(self, *args, **kwargs):
        return self.opt.apply_gradients(*args, **kwargs)
    """
    def apply_gradients(self, gradvars, global_step=None, name=None):
        old_grads, v = zip(*gradvars)
        all_are_finite = tf.math.reduce_all([tf.math.reduce_all(tf.math.is_finite(g)) for g in old_grads])
        clipped_grads, _ = tf.clip_by_global_norm(old_grads, self.clip_norm,
                                         use_norm=tf.cond(
                                         all_are_finite,
                                         lambda: tf.linalg.global_norm(old_grads),
                                         lambda: tf.constant(self.clip_norm, dtype=tf.float32)), name='clip_by_global_norm')
        gradvars = list(zip(clipped_grads, v))
        return self.opt.apply_gradients(gradvars, global_step, name)

    def get_slot(self, *args, **kwargs):
        return self.opt.get_slot(*args, **kwargs)

    def get_slot_names(self, *args, **kwargs):
        return self.opt.get_slot_names(*args, **kwargs)

    def variables(self, *args, **kwargs):
        return self.opt.variables(*args, **kwargs)

def nchw_to_nhwc_transform(input):
    return tf.transpose(input, [0, 2, 3, 1])

def nhwc_to_nchw_transform(input):
    return tf.transpose(input, [0, 3, 1, 2])

class DetectionModel(ModelDesc):
    def __init__(self):
       pass

    def preprocess(self, image):
        image = image_preprocess(image, bgr=True)
        return tf.transpose(image, [0, 3, 1, 2]) if cfg.TRAIN.BACKBONE_NCHW else image

    @property
    def training(self):
        return get_current_tower_context().is_training

    def optimizer(self):
        lr = tf.compat.v1.get_variable ('learning_rate', initializer=0.003, trainable=False)
        tf.summary.scalar('learning_rate-summary', lr)

        opt = tf.compat.v1.train.MomentumOptimizer(lr, 0.9)
        if cfg.TRAIN.GRADIENT_CLIP != 0:
            opt = GradientClipOptimizer(opt, cfg.TRAIN.GRADIENT_CLIP)
        return opt

    def get_inference_tensor_names(self):
        """
        Returns two lists of tensor names to be used to create an inference callable.

        Returns:
            [str]: input names
            [str]: output names
        """

        out = ['output/batch_indices', 'output/boxes', 'output/scores', 'output/labels']

        if cfg.MODE_MASK:
            out.append('output/masks')
        return ['images', 'orig_image_dims'], out

    def build_graph(self, *inputs):
        #tf.debugging.enable_check_numerics()

        inputs = dict(zip(self.input_names, inputs))

        images = self.preprocess(inputs['images'])   
        seed_gen = SeedGenerator(cfg.TRAIN.SEED)

        p_features = self.fpn_features(images, seed_gen) # features for levels p2, p3, p4, p5 p6

        anchor_inputs = {k: v for k, v in inputs.items() if k.startswith('anchor_')}
        image_shape2d = tf.shape(images)[2:] if cfg.TRAIN.BACKBONE_NCHW else tf.shape(images)[1:3] 
        proposal_rois, rpn_losses = self.rpn(image_shape2d, p_features, anchor_inputs, inputs['orig_image_dims'], seed_gen)

        targets = [inputs[k] for k in ['gt_boxes', 'gt_labels', 'gt_masks'] if k in inputs]
        head_losses = self.roi_heads(p_features, proposal_rois, targets, inputs, seed_gen)

        if self.training:
            wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost')
            total_cost = tf.add_n(rpn_losses + head_losses + [wd_cost], 'total_cost')
            add_moving_summary(total_cost, wd_cost)
            return total_cost

class ResNetFPNModel(DetectionModel):
    def __init__(self):
        super(ResNetFPNModel, self).__init__()

    def inputs(self):

        ret = [
            tfv1.placeholder(tf.string, (None,), 'filenames'), # N length vector of filenames
            tfv1.placeholder(tf.float32, (None, None, None, 3), 'images'),  # N x H x W x C
            tfv1.placeholder(tf.int32, (None, 3), 'orig_image_dims')  # N x 3(image dims - hwc)
        ]
        num_anchors = len(cfg.RPN.ANCHOR_RATIOS)
        for k in range(len(cfg.FPN.ANCHOR_STRIDES)):
            ret.extend([
                tfv1.placeholder(tf.int32, (None, None, None, num_anchors),  # N x H x W x NumAnchors
                               'anchor_labels_lvl{}'.format(k + 2)),
                tfv1.placeholder(tf.float32, (None, None, None, num_anchors, 4),  # N x H x W x NumAnchors x 4
                               'anchor_boxes_lvl{}'.format(k + 2))])
        ret.extend([
            tfv1.placeholder(tf.float32, (None, None, 4), 'gt_boxes'),  # N x MaxNumGTs x 4
            tfv1.placeholder(tf.int64, (None, None), 'gt_labels'),  # all > 0        # N x MaxNumGTs
            tfv1.placeholder(tf.int32, (None,), 'orig_gt_counts')  # N
        ])

        if cfg.MODE_MASK:
            ret.append(
                    tfv1.placeholder(tf.uint8, (None, None, None, None), 'gt_masks')  # N x MaxNumGTs x H x W
            )

        return ret

    def fpn_features(self, image, seed_gen):
        c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCKS, seed_gen)

        if cfg.TRAIN.BACKBONE_NCHW and not cfg.TRAIN.FPN_NCHW:
            c2345 = [nchw_to_nhwc_transform(c) for c in c2345]
        elif not cfg.TRAIN.BACKBONE_NCHW and cfg.TRAIN.FPN_NCHW:
            c2345 = [nhwc_to_nchw_transform(c) for c in c2345]

        p23456 = fpn_model('fpn', c2345, seed_gen)
        
        if cfg.TRAIN.FPN_NCHW and not cfg.TRAIN.RPN_NCHW:
            p23456 = [nchw_to_nhwc_transform(p) for p in p23456]
        elif not cfg.TRAIN.FPN_NCHW and cfg.TRAIN.RPN_NCHW:
            p23456 = [nhwc_to_nchw_transform(p) for p in p23456]

        return p23456


    def rpn(self, image_shape2d, p_features, anchor_inputs, orig_image_dims, seed_gen):
        """
        The RPN part of the graph that generate the RPN proposal and losses

        Args:
            image_shape2d:  H_image x W_image
            p_features: A List of 5 FPN feature maps, i.e. level P23456
            anchor_inputs: dict, contains all anchor input information
            orig_image_dims: BS x 3
        Returns:
            proposal_rois: BS X top K region proposals X 4
            losses: scalar, sum of the label loss and box loss
        """
        assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES)
        
        batch_size = tf.shape(orig_image_dims)[0]
        orig_image_shape2d = orig_image_dims[: ,:2]
    
        all_anchors_fpn = get_all_anchors_fpn()

        multilevel_label_preds = []
        multilevel_box_preds = []

        for p_i in p_features:
            # label_preds: BS x H_feaure x W_feature x NA, 
            # box_preds: BS  x H_feature x W_feature X (NA * 4)
            label_preds, box_preds = rpn_head('rpn', p_i, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS), seed_gen=seed_gen)
            multilevel_label_preds.append(label_preds)
            multilevel_box_preds.append(box_preds)
            
        #  proposal_rois: BS X Top_K X 4 , proposal_scores: [ BS X Top_K X 1 ]
        proposal_rois = generate_fpn_proposals(image_shape2d,
                                                    all_anchors_fpn,
                                                    multilevel_label_preds,
                                                    multilevel_box_preds,
                                                    orig_image_shape2d,
                                                    batch_size)
        if self.training:

            multilevel_anchor_labels = [anchor_inputs['anchor_labels_lvl{}'.format(i + 2)] for i in range(len(all_anchors_fpn))]
            multilevel_anchor_boxes = [anchor_inputs['anchor_boxes_lvl{}'.format(i + 2)] for i in range(len(all_anchors_fpn))]

            multilevel_box_preds_reshaped = []
            for box_preds in multilevel_box_preds:
                shp = tf.shape(box_preds)  # BS x H_feature x W_feature X (NA * 4) 
                box_preds = tf.reshape(box_preds, tf.stack([shp[0], shp[1], shp[2], -1, 4]))  # BS x H_feature x W_feature x NA x 4
                multilevel_box_preds_reshaped.append(box_preds)

            multilevel_rpn_gt = [ RPNGroundTruth(all_anchors_fpn[j], multilevel_anchor_labels[j], multilevel_anchor_boxes[j])  for j in range(len(p_features)) ]
            total_label_loss, total_box_loss = batch_rpn_losses(multilevel_rpn_gt, multilevel_label_preds, multilevel_box_preds_reshaped, orig_image_shape2d) 

            with tf.name_scope('rpn_losses'):
                label_loss = tf.math.truediv(total_label_loss, tf.cast(batch_size, dtype=tf.float32), name='label_loss')
                box_loss = tf.math.truediv(total_box_loss, tf.cast(batch_size, dtype=tf.float32), name='box_loss')
                add_moving_summary(label_loss, box_loss)

                #label_loss = print_runtime_tensor("rpn_losses/label_loss", label_loss)
                #box_loss = print_runtime_tensor("rpn_losses/box_loss", box_loss)

                losses = [label_loss, box_loss]


        else:
            losses = []

        return proposal_rois, losses

    def roi_heads(self, p_features, proposal_rois, ground_truth, inputs, seed_gen):
        """
        Implement the RoI Align and construct the RoI head (box and mask branches) of the graph

        Args:
            p_features: ([tf.Tensor]): A list of 5 FPN feature level P23456
            proposal_rois (tf.Tensor): BS X Num_rois X 4 (x1, y1, x2, y2)
            ground_truth: list of 'gt_boxes', 'gt_labels', 'gt_masks' from input
            inputs: dict, contains all input information
        Returns: all_losses: a list contains box loss and mask loss
        """

        image_shape2d = inputs['orig_image_dims'][: ,:2] # BS x 2
        assert len(p_features) == 5, "Features have to be P23456!"
        gt_boxes, gt_labels, *_ = ground_truth

        prepadding_gt_counts = inputs['orig_gt_counts']

        if self.training:

            # Sample the proposal_rois to make the foreground(fg) box and background(bg) boxes
            # ratio close to configuration. 
            proposal_rois, proposal_labels_gt, proposal_boxes_gt, proposal_fg_gt_indices = sample_fast_rcnn_targets(
                    proposal_rois, # BS X Num_rois X 4 (x1, y1, x2, y2)
                    gt_boxes, # BS X Num_gt_boxes X 4 (x1, y1, x2, y2)
                    gt_labels, # BS x Num_gt_boxes
                    prepadding_gt_counts, # BS
                    seed_gen=seed_gen) 

        p2_5features = p_features[:4] # p2, p3, p4, p5 features

        if cfg.TRAIN.RPN_NCHW:
            p2_5features = [ nchw_to_nhwc_transform(p) for p in p2_5features]

        proposal_rois_y1x1y2x2 = permute_boxes_coords(proposal_rois) # BS X Num_rois X 4 (y1, x1, y2, x2)
        # For Fast R-CNN
        roi_level_ids, roi_features_fastrcnn = roi_features(p2_5features, proposal_rois_y1x1y2x2, 7) #  BS X Num_boxes x H_roi_box x W_roi_box x NumChannel
        with tf.name_scope(name="multilevel_roi_align"):
            roi_level_summary(roi_level_ids)
            
        rff_shape = tf.shape(roi_features_fastrcnn)
        roi_features_fastrcnn = tf.reshape(roi_features_fastrcnn, [-1, rff_shape[2], rff_shape[3], rff_shape[4]])
        fastrcnn_head_func = getattr(boxclass_head, cfg.FPN.BOXCLASS_HEAD_FUNC)
        fastrcnn_head_feature = fastrcnn_head_func('fastrcnn', roi_features_fastrcnn, seed_gen=seed_gen) # Num_sampled_boxes x Num_features

        # fastrcnn_label_preds: Num_sampled_boxes x Num_classes ,fastrcnn_box_preds: Num_sampled_boxes x Num_classes x 4
        fastrcnn_labels_pred, fastrcnn_boxes_pred = boxclass_outputs('fastrcnn/outputs', fastrcnn_head_feature, cfg.DATA.NUM_CLASS, seed_gen=seed_gen)

        fastrcnn_labels_pred = tf.reshape(fastrcnn_labels_pred, [rff_shape[0], rff_shape[1], cfg.DATA.NUM_CLASS] )
        fastrcnn_boxes_pred  = tf.reshape(fastrcnn_boxes_pred, [rff_shape[0], rff_shape[1], cfg.DATA.NUM_CLASS, 4])

        regression_weights = tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)
        fastrcnn_head = BoxClassHead(fastrcnn_boxes_pred,
                                     fastrcnn_labels_pred,
                                     regression_weights,
                                     proposal_rois)
        if self.training:
            # only calculate the losses for boxes if there is an object (foreground boxes)
            fastrcnn_head.add_training_info(proposal_boxes_gt, proposal_labels_gt)
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = ground_truth[2]
                maskrcnn_head_func = getattr(mask_head, cfg.FPN.MRCNN_HEAD_FUNC)
                
                all_fg_gt_masks = []
                all_fg_gt_labels = []
                all_fg_mask_preds = []
                all_fg_roi_level_ids = []

                for i in range(cfg.TRAIN.BATCH_SIZE_PER_GPU):
                    image_gt_count = prepadding_gt_counts[i] # 1-D Num_gt_boxes_current_image
                    image_proposal_gt_labels = proposal_labels_gt[i]
                    image_proposal_fg_indices = tf.reshape(tf.where(image_proposal_gt_labels > 0), [-1])
                    
                    image_proposal_rois_y1x1y2x2 = proposal_rois_y1x1y2x2[i]
                    image_fg_proposal_rois_y1x1y2x2 = tf.gather(image_proposal_rois_y1x1y2x2, image_proposal_fg_indices)
                     
                    image_fg_proposal_rois_y1x1y2x2 = tf.expand_dims(image_fg_proposal_rois_y1x1y2x2, axis=0)
                    image_p2_5features = [ tf.expand_dims(pf[i], axis=0) for pf in p2_5features ]
                    image_fg_roi_level_ids, image_fg_roi_features_maskrcnn = roi_features(image_p2_5features, 
                                                                  image_fg_proposal_rois_y1x1y2x2, 
                                                                  14) # Num_boxes x  H_roi_mask x W_roi_mask x NumChannel
                
                    all_fg_roi_level_ids.extend(image_fg_roi_level_ids)
                        
                    image_fg_roi_features_maskrcnn = tf.squeeze(image_fg_roi_features_maskrcnn, axis=0)

                    if cfg.TRAIN.MASK_NCHW:
                        image_fg_roi_features_maskrcnn = nhwc_to_nchw_transform(image_fg_roi_features_maskrcnn)

                    image_fg_mask_preds = maskrcnn_head_func('maskrcnn', 
                                                             image_fg_roi_features_maskrcnn, 
                                                             cfg.DATA.NUM_CATEGORY, 
                                                             seed_gen=seed_gen)   # Num_boxes x num_category x (H_roi_mask*2) x (W_roi_mask*2
                    all_fg_mask_preds.append(image_fg_mask_preds)
                   
                    image_proposal_fg_gt_labels = tf.gather(image_proposal_gt_labels, image_proposal_fg_indices) # 1-D Num_fg_boxes_current_image
                    
                    image_gt_masks = gt_masks[i, :image_gt_count, :, :] # Num_gt_boxes_current_image x H_gtmask x W_gtmask
                    image_gt_masks = tf.expand_dims(image_gt_masks, axis=3) # Num_gt_boxes_current_image x H_gtmask x W_gtmask X 1
                    image_proposal_fg_gt_indices = proposal_fg_gt_indices[i]

                    image_fg_proposal_rois_y1x1y2x2 = tf.squeeze(image_fg_proposal_rois_y1x1y2x2, axis=0)
                    image_proposal_fg_gt_masks = crop_and_resize(image_gt_masks, 
                                                                image_fg_proposal_rois_y1x1y2x2, 
                                                                image_proposal_fg_gt_indices, 
                                                                28, 
                                                                image_shape2d[i], 
                                                                pad_border=False) # Num_fg_boxes_current_image x  (H_roi_mask*2) x (W_roi_mask*2) x 1
                   
                    all_fg_gt_labels.append(image_proposal_fg_gt_labels)
                    all_fg_gt_masks.append(image_proposal_fg_gt_masks)

                with tf.name_scope(name="multilevel_roi_align_mask"):
                    roi_level_ids = []
                    for i in range(4):
                        roi_level_ids.append(tf.concat(all_fg_roi_level_ids[i::4], axis=0))
                    roi_level_summary(roi_level_ids)
                  
                fg_gt_masks = tf.concat(all_fg_gt_masks, axis=0) # Num_fg_boxes x (H_roi_mask*2) x (W_roi_mask*2) X 1
                fg_gt_labels = tf.concat(all_fg_gt_labels, axis=0) # 1-D Num_fg_boxes

                fg_gt_masks = tf.squeeze(fg_gt_masks, 3, 'fg_gt_masks') # Num_fg_boxes x (H_roi_mask*2) x (W_roi_mask*2)
                fg_mask_preds = tf.concat(all_fg_mask_preds, axis=0) # Num_fg_boxes x num_category x (H_roi_mask*2) x (W_roi_mask*2)
                mask_loss = maskrcnn_loss(fg_mask_preds, fg_gt_labels, fg_gt_masks)
                #mask_loss = print_runtime_tensor("mask_loss", mask_loss)

                all_losses.append(mask_loss)
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes_batch() # BS X N x #class x 4 
            decoded_boxes = clip_boxes_batch(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # BS X N X (#class) X 4 (x1y1x2y2)
            label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') # BS X N x #class scores
    
            final_labels_list = []
            final_boxes_list = []
            final_scores_list = []
            batch_indicies_list = []
            final_masks_list = []

            maskrcnn_head_func = getattr(mask_head, cfg.FPN.MRCNN_HEAD_FUNC)
            for i in range(cfg.TEST.BATCH_SIZE_PER_GPU):
                image_decoded_boxes = decoded_boxes[i]
                image_label_scores = label_scores[i]
                image_final_boxes, image_final_scores, image_final_labels, image_box_ids = boxclass_predictions(image_decoded_boxes, image_label_scores)
                image_batch_ids = tf.tile([i], [tf.size(image_box_ids)])

                final_boxes_list.append(image_final_boxes)
                final_scores_list.append(image_final_scores)
                final_labels_list.append(image_final_labels)
                batch_indicies_list.append(image_batch_ids)

                if cfg.MODE_MASK:
                    image_final_boxes_y1x1y2x2 = permute_boxes_coords(image_final_boxes)
                    image_final_boxes_y1x1y2x2 = tf.expand_dims(image_final_boxes_y1x1y2x2, axis=0)
                    image_p2_5features = [ tf.expand_dims(pf[i], axis=0) for pf in p2_5features ]

                    _, image_roi_features_maskrcnn = roi_features(image_p2_5features, 
                                                            image_final_boxes_y1x1y2x2, 
                                                            14) # 1 X Num_boxes x  H_roi_mask x W_roi_mask x NumChannel

                    irfm_shape = tf.shape(image_roi_features_maskrcnn)
                    image_roi_features_maskrcnn = tf.reshape(image_roi_features_maskrcnn, [-1, irfm_shape[2], irfm_shape[3], irfm_shape[4]])
                    if cfg.TRAIN.MASK_NCHW:
                        image_roi_features_maskrcnn = nhwc_to_nchw_transform(image_roi_features_maskrcnn)
                    
                    image_mask_logits = maskrcnn_head_func('maskrcnn', image_roi_features_maskrcnn, cfg.DATA.NUM_CATEGORY, seed_gen=seed_gen)   # N x #cat x 28 x 28
                    
                    image_label_indices = tf.stack([tf.range(tf.size(image_final_labels)), tf.cast(image_final_labels, tf.int32) - 1], axis=1)
                    image_mask_logits = tf.gather_nd(image_mask_logits, image_label_indices)   # #resultx28x28

                    image_mask = tf.sigmoid(image_mask_logits)
                    final_masks_list.append(image_mask)

           
            with tf.name_scope(name="output"):
                batch_indices = tf.identity(tf.concat(batch_indicies_list, 0), name="batch_indices")
                final_boxes = tf.identity(tf.concat(final_boxes_list, 0), name="boxes")
                final_scores = tf.identity(tf.concat(final_scores_list, 0), name="scores")
                final_labels = tf.identity(tf.concat(final_labels_list, 0), name="labels")

                if cfg.MODE_MASK:
                    final_masks = tf.identity(tf.concat(final_masks_list, 0), name="masks")

            return []