"""Evaluation script for measuring mean squared error."""
import json
import logging
import argparse
import pathlib
import pickle
import tarfile
import boto3
import sagemaker
import os
import numpy as np
import pandas as pd
import mxnet as mx
import time
import PIL.Image as Image
import io

from sagemaker import image_uris, session
from sagemaker.model import Model
from sagemaker.predictor import RealTimePredictor
from sklearn.metrics import precision_recall_fscore_support, multilabel_confusion_matrix,classification_report

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())


def softmax(x):
    a = np.exp(x)/np.sum(np.exp(x))
    return a


if __name__ == "__main__":
    # parse arguments
    logger.debug("Starting evaluation.")
    parser = argparse.ArgumentParser()
    parser.add_argument("--region", type=str, required=True)
    parser.add_argument("--role", type=str, required=True)
    parser.add_argument("--modelartifact", type=str, required=True)
    parser.add_argument("--prefix", type=str, required=True)
    
    args = parser.parse_args()
    region = args.region
    role = args.role
    prefix = args.prefix
    model_path = args.modelartifact
    
    boto3.setup_default_session(region_name=region)
    client = boto3.client('runtime.sagemaker')
    bucket = sagemaker.Session().default_bucket()
    s3 = boto3.client("s3", region_name=region)

    # load model using the same image classification image or another image of your choice
    logger.debug("Loading model.")
    training_image = sagemaker.image_uris.retrieve("image-classification", region)
    model = Model(model_data=model_path, 
                  image_uri=training_image,
                  role=role,
                  predictor_cls=RealTimePredictor)

    # read data from S3 bucket: download data and read image and label from record
    logger.debug("Reading test data.")
    
    if not os.path.exists('data'):
        os.mkdir('data')
        
    precision = recall = f1 = 0
    for data_type in ["test"]: 
        # download test files
        s3.download_file(bucket, f"{prefix}/data/{data_type}/{data_type}.rec", f"data/{data_type}.rec")
        s3.download_file(bucket, f"{prefix}/data/{data_type}/{data_type}.idx", f"data/{data_type}.idx")

        record = mx.recordio.MXIndexedRecordIO(idx_path=f"data/{data_type}.idx", uri=f"data/{data_type}.rec", flag="r")

        X_test, y_test = [], []
        for i in range(len(record.keys)): 
            try:
                item = record.read_idx(i)
                header, s = mx.recordio.unpack_img(item)
                X_test.append(s)
                y_test.append(int(header.label))
            except:
                print(f"Error with loading item {i}")
                break
        record.close()

        # run inference on test data
        logger.info(f"Performing predictions against {data_type} data.")

        timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
        endpoint_name = "skin-classfication-" + timestamp
        predictor = model.deploy(initial_instance_count=1, instance_type='ml.m5.xlarge', role=role, endpoint_name=endpoint_name)

        predictions = []
        for img in X_test:
            pil_im = Image.fromarray((np.array(img)))
            b = io.BytesIO()
            pil_im.save(b, 'jpeg')
            im_bytes = b.getvalue()
    
            response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType='application/x-image', Body=im_bytes)
            np_bytes = response['Body'].read()
            array_probs = np.asarray(np_bytes.decode("utf-8").replace("[","").replace("]","").split(","),dtype=np.float32)
            predictions.append(np.argmax(array_probs,axis=0))


        logger.debug("Calculating precision, recall and F1 score.")

        precision, recall, f1, _ = precision_recall_fscore_support(y_test, predictions, average='weighted')
        logger.debug(precision, recall, f1)
        
    # generate evaluation report 
    report_dict = {
        "classification_metrics": {
            "precision": {
                "value": precision,
            },
            "recall": {
                "value": recall,
            },
            "f1_score": {
                "value": f1,
            },
        },
    }
    
    output_dir = "/opt/ml/processing/evaluation"
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    logger.info("Writing out evaluation report with precision: %f, recall: %f and F1 score: %f", precision, recall, f1)
    evaluation_path = f"{output_dir}/evaluation.json"
    with open(evaluation_path, "w") as f:
        f.write(json.dumps(report_dict))
        
    # clean up
    predictor.delete_endpoint()