import argparse
import datetime
import numpy as np
import json
import os
import glob
import sys

if __name__=="__main__":
    
    files = glob.glob("/opt/ml/processing/input/endpoint/*/*/*/*/*/*/*")
    predictions = []
    for file in files:
        content = open(file).read() 
        for entry in content.split('\n'):
            try:
                prediction = json.loads(entry)['captureData']['endpointOutput']['data']
                predictions.append(json.loads(prediction)[0][0])
            except:
                pass
    counts = {}
    max_count = 0
    max_class = 0
    for prediction in predictions:
        if prediction not in counts:
            counts[prediction] = 0
        counts[prediction] += 1
        if counts[prediction] > max_count:
            max_count = counts[prediction] 
            max_class = prediction

    ratio = max_count/np.sum(list(counts.values()))
    
    with open('/opt/ml/output/message', 'w') as outfile:
        if(ratio > float(os.environ['THRESHOLD'])):
            outfile.write(f"CompletedWithViolations: Class {max_class} predicted more than {int(ratio*100)} % of the time")
            print(f"Class {max_class} predicted more than {int(ratio*100)} % of the time")
        else:
            outfile.write("Completed: Job completed successfully with no violations.")
    outfile.close()
    
    print(f"Predicted classes {str(counts)}")
    
    outfile = open('/opt/ml/output/metrics/cloudwatch/cloudwatch_metrics.jsonl', 'a+') 
    for key, val in counts.items():
        output_dict = { 
            "MetricName": "Predicted Class " + str(key), 
            "Timestamp": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"), 
            "Dimensions" : [{"Name":"Endpoint","Value":"endpoint_0"},{"Name":"MonitoringSchedule","Value":"schedule_0"}],
            "Value": val
        }
        #one metric per line (list of dictionaries)
        json.dump(output_dict, outfile)
        outfile.write("\n")
    outfile.close()