# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

from __future__ import absolute_import

import subprocess
import sys
import io
import os
import boto3
import time
import json
import uuid

import mxnet as mx
import numpy as np
from mxnet import gluon,nd
from sagemaker_inference import content_types, default_inference_handler, errors
from io import BytesIO
from datetime import datetime


import gluoncv
from gluoncv.data.transforms import video
from gluoncv.data import VideoClsCustom
from gluoncv.utils.filesystem import try_import_decord

ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu()
#UCF101 classes
classes = ['ApplyEyeMakeup'
,'ApplyLipstick'
, 'Archery'
, 'BabyCrawling'
, 'BalanceBeam'
, 'BandMarching'
, 'BaseballPitch'
, 'Basketball'
, 'BasketballDunk'
, 'BenchPress'
, 'Biking'
, 'Billiards'
, 'BlowDryHair'
, 'BlowingCandles'
, 'BodyWeightSquats'
, 'Bowling'
, 'BoxingPunchingBag'
, 'BoxingSpeedBag'
, 'BreastStroke'
, 'BrushingTeeth'
, 'CleanAndJerk'
, 'CliffDiving'
, 'CricketBowling'
, 'CricketShot'
, 'CuttingInKitchen'
, 'Diving'
, 'Drumming'
, 'Fencing'
, 'FieldHockeyPenalty'
, 'FloorGymnastics'
, 'FrisbeeCatch'
, 'FrontCrawl'
, 'GolfSwing'
, 'Haircut'
, 'Hammering'
, 'HammerThrow'
, 'HandstandPushups'
, 'HandstandWalking'
, 'HeadMassage'
, 'HighJump'
, 'HorseRace'
, 'HorseRiding'
, 'HulaHoop'
, 'IceDancing'
, 'JavelinThrow'
, 'JugglingBalls'
, 'JumpingJack'
, 'JumpRope'
, 'Kayaking'
, 'Knitting'
, 'LongJump'
, 'Lunges'
, 'MilitaryParade'
, 'Mixing'
, 'MoppingFloor'
, 'Nunchucks'
, 'ParallelBars'
, 'PizzaTossing'
, 'PlayingCello'
, 'PlayingDaf'
, 'PlayingDhol'
, 'PlayingFlute'
, 'PlayingGuitar'
, 'PlayingPiano'
, 'PlayingSitar'
, 'PlayingTabla'
, 'PlayingViolin'
, 'PoleVault'
, 'PommelHorse'
, 'PullUps'
, 'Punch'
, 'PushUps'
, 'Rafting'
, 'RockClimbingIndoor'
, 'RopeClimbing'
, 'Rowing'
, 'SalsaSpin'
, 'ShavingBeard'
, 'Shotput'
, 'SkateBoarding'
, 'Skiing'
, 'Skijet'
, 'SkyDiving'
, 'SoccerJuggling'
, 'SoccerPenalty'
, 'StillRings'
, 'SumoWrestling'
, 'Surfing'
, 'Swing'
, 'TableTennisShot'
, 'TaiChi'
, 'TennisSwing'
, 'ThrowDiscus'
, 'TrampolineJumping'
, 'Typing'
, 'UnevenBars'
, 'VolleyballSpiking'
, 'WalkingWithDog'
, 'WallPushups'
, 'WritingOnBoard'
, 'YoYo']
dict_classes = dict(zip(range(len(classes)), classes))
# ------------------------------------------------------------ #
# Hosting methods                                              #
# ------------------------------------------------------------ #

def model_fn(model_dir):
    print('here')
    print(ctx)
    symbol = mx.sym.load('%s/model-symbol.json' % model_dir)
    outputs = mx.symbol.softmax(data=symbol, name='softmax_label')
    inputs = mx.sym.var('data')
    net = gluon.SymbolBlock(outputs, inputs)
    net.load_parameters('%s/model-0000.params' % model_dir, ctx=ctx)
    return net

#transform function that uses json (s3 path) as input and output
def transform_fn(net, data, input_content_type, output_content_type):
    print('transform_fn here')
    start = time.time()
    data = json.loads(data)
    video_data = read_video_data(data['S3_VIDEO_PATH'])
    print(time.time())
    video_input = video_data.as_in_context(ctx)
    probs = net(video_input.astype('float32', copy=False))
    print(time.time())
    predicted = mx.nd.argmax(probs, axis=1).asnumpy().tolist()[0]
    probability = mx.nd.max(probs, axis=1).asnumpy().tolist()[0]
     
    probability = '{:.4f}'.format(probability)
    predicted_name = dict_classes[int(predicted)]
    total_prediction = time.time()-start
    total_prediction = '{:.4f}'.format(total_prediction)
    print(probability)
    print(predicted_name)
    print('Model prediction time: ', total_prediction)
    
    now = datetime.utcnow()
    time_format = '%Y-%m-%d %H:%M:%S %Z%z'
    now = now.strftime(time_format)

    response = {
        'S3Path': {'S': data['S3_VIDEO_PATH']},
        'Predicted': {'S': predicted_name},
        'Probability': {'S': probability},
        'DateCreatedUTC': {'S': now},
    }

    return json.dumps(response), output_content_type

def get_bucket_and_key(s3_path):
    """Get the bucket name and key from the given path.
    Args:
        s3_path(str): Input S3 path
    """
    s3_path = s3_path.replace('s3://', '')
    s3_path = s3_path.replace('S3://', '') #Both cases
    bucket, key = s3_path.split('/', 1)
    return bucket, key


def read_video_data(s3_video_path, num_frames=32):
    """Read and preprocess video data from the S3 bucket."""
    print('read and preprocess video data here ')
    s3_client = boto3.client('s3')
    #print(uuid.uuid4())
    fname = s3_video_path.replace('s3://', '')
    fname = fname.replace('S3://', '')
    fname = fname.replace('/', '')
    #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), fname)
    #video_list_path = '/tmp/{}-{}'.format(uuid.uuid4(), 'video_list.txt')
    download_path = '/tmp/' + fname
    video_list_path = '/tmp/video_list' + str(uuid.uuid4()) + '.txt' 
    bucket, key = get_bucket_and_key(s3_video_path)
    s3_client.download_file(bucket, key, download_path)
    
    #update download_path filename to be unique
    filename,ext = os.path.splitext(download_path)    # save the file extension
    filename = filename + str(uuid.uuid4())
    os.rename(download_path, filename+ext)
    download_path = filename+ext
    
    #Dummy duration and label with each video path
    video_list = '{} {} {}'.format(download_path, 10, 1)
    with open(video_list_path, 'w') as fopen:
        fopen.write(video_list)

    #Constants
    data_dir = '/tmp/'
    num_segments = 1
    new_length = num_frames
    new_step =1
    use_decord = True
    video_loader = True
    slowfast = False
    #Preprocessing params 
        
    #The transformation function does three things: center crop the image to 224x224 in size, transpose it to num_channels,num_frames,height*width, and normalize with mean and standard deviation calculated across all ImageNet images.

    #Use the general gluoncv dataloader VideoClsCustom to load the data with num_frames = 32 as the length.
    input_size = 224
    mean = [0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    transform = video.VideoGroupValTransform(size=input_size, mean=mean, std=std)
    video_utils = VideoClsCustom(root=data_dir,
                                 setting=video_list_path,
                                 num_segments=num_segments,
                                 new_length=new_length,
                                 new_step=new_step,
                                 video_loader=video_loader,
                                 use_decord=use_decord,
                                 slowfast=slowfast)
    
    #Read for the video list
    video_name = video_list.split()[0]

    decord = try_import_decord()
    decord_vr = decord.VideoReader(video_name)
    duration = len(decord_vr)

    skip_length = new_length * new_step
    segment_indices, skip_offsets = video_utils._sample_test_indices(duration)

    if video_loader:
        if slowfast:
            clip_input = video_utils._video_TSN_decord_slowfast_loader(video_name, decord_vr, 
                                                                       duration, segment_indices, skip_offsets)
        else:
            clip_input = video_utils._video_TSN_decord_batch_loader(video_name, decord_vr, 
                                                                    duration, segment_indices, skip_offsets)
    else:
        raise RuntimeError('We only support video-based inference.')

    clip_input = transform(clip_input)

    if slowfast:
        sparse_sampels = len(clip_input) // (num_segments * num_crop)
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1,) + (sparse_sampels, 3, input_size, input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
    else:
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1,) + (new_length, 3, input_size, input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))

    if new_length == 1:
        clip_input = np.squeeze(clip_input, axis=2)    # this is for 2D input case

    clip_input = nd.array(clip_input)
    
    #Cleanup temp files
    os.remove(download_path)
    os.remove(video_list_path)
    #os.system('rm {}'.format(download_path))
    #os.system('rm {}'.format(video_list_path))

    return clip_input