# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

# import packages ##
import base64
import json
import mxnet as mx
import ffmpeg
import numpy as np
import audio2numpy as a2n
import openl3
from os.path import isfile, join, getsize
#import boto3, botocore

import gluoncv as gcv

def model_fn(model_dir):
    """
    Load the pretrained model 
    
    Args:
        model_dir (str): directory where model artifacts are saved/loaded
    """
    print('Ready to load model from '+model_dir)
 
    #You have to override this function
    #Otherwise the default model_fn can't find the model
    return 0,0

## SageMaker loading function ##
def transform_fn(net, data, input_content_type, output_content_type):

    ## retrive model and contxt from the first parameter, net
    model, ctx = net
    
    ## decode image ##
    # for endpoint API calls
    print(type(data))
    if type(data) == str:
        parsed = json.loads(data)
        img = mx.nd.array(parsed)
    # for batch transform jobs
    else:
        with open('/tmp/test.mp3','wb') as writer:
            writer.write(data)
    print('file saved',isfile('/tmp/test.mp3'))
    
    v_output = '/tmp/test.mp3'    
    x,sr = a2n.open_audio(v_output)
    nwin = int(len(x)/sr)
    x_input = []
    for i in range(nwin):
        win = x[i*sr:(i+1)*sr,0]
        if max(win) > 0.1:
            x_input.extend(win)
    if not x_input:
        print('Empety file, skip',v_output)
        return [{'prediction':''}]
    print(nwin,len(x_input))
    emb, ts = openl3.get_audio_embedding(np.array(x_input), sr, content_type="env", center=False,
                input_repr="linear", embedding_size=512, hop_size=1)
    aver_emb = np.average(emb,axis=0)
    #label_emb = np.append(file[0],aver_emb)
    result = np.append(aver_emb,nwin)
    print(result)
    # normalization values taken from gluoncv
    predictions = {'prediction':result.tolist()}
    predictionslist = [predictions]
    
    return predictionslist