# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0 OR ISC

import sys
import pandas as pd
import numpy as np

# Array containing ignored fields
ignored_fields = np.array(['TrustToken-Exp1-Batch1 generate_key',
    'TrustToken-Exp1-Batch1 begin_issuance',
    'TrustToken-Exp1-Batch1 issue',
    'TrustToken-Exp1-Batch1 finish_issuance',
    'TrustToken-Exp1-Batch1 begin_redemption',
    'TrustToken-Exp1-Batch1 redeem',
    'TrustToken-Exp1-Batch1 finish_redemption',
    'TrustToken-Exp1-Batch10 generate_key',
    'TrustToken-Exp1-Batch10 begin_issuance',
    'TrustToken-Exp1-Batch10 issue',
    'TrustToken-Exp1-Batch10 finish_issuance',
    'TrustToken-Exp1-Batch10 begin_redemption',
    'TrustToken-Exp1-Batch10 redeem',
    'TrustToken-Exp1-Batch10 finish_redemption',
    'TrustToken-Exp2VOfPRF-Batch1 generate_key',
    'TrustToken-Exp2VOfPRF-Batch1 begin_issuance',
    'TrustToken-Exp2VOfPRF-Batch1 issue',
    'TrustToken-Exp2VOfPRF-Batch1 finish_issuance',
    'TrustToken-Exp2VOfPRF-Batch1 begin_redemption',
    'TrustToken-Exp2VOfPRF-Batch1 redeem',
    'TrustToken-Exp2VOfPRF-Batch1 finish_redemption',
    'TrustToken-Exp2VOPRF-Batch10 generate_key',
    'TrustToken-Exp2VOPRF-Batch10 begin_issuance',
    'TrustToken-Exp2VOPRF-Batch10 issue',
    'TrustToken-Exp2VOPRF-Batch10 finish_issuance',
    'TrustToken-Exp2VOPRF-Batch10 begin_redemption',
    'TrustToken-Exp2VOPRF-Batch10 redeem',
    'TrustToken-Exp2VOPRF-Batch10 finish_redemption',
    'TrustToken-Exp2PMB-Batch1 generate_key',
    'TrustToken-Exp2PMB-Batch1 begin_issuance',
    'TrustToken-Exp2PMB-Batch1 issue',
    'TrustToken-Exp2PMB-Batch1 finish_issuance',
    'TrustToken-Exp2PMB-Batch1 begin_redemption',
    'TrustToken-Exp2PMB-Batch1 redeem',
    'TrustToken-Exp2PMB-Batch1 finish_redemption',
    'TrustToken-Exp2PMB-Batch10 generate_key',
    'TrustToken-Exp2PMB-Batch10 begin_issuance',
    'TrustToken-Exp2PMB-Batch10 issue',
    'TrustToken-Exp2PMB-Batch10 finish_issuance',
    'TrustToken-Exp2PMB-Batch10 begin_redemption',
    'TrustToken-Exp2PMB-Batch10 redeem',
    'TrustToken-Exp2PMB-Batch10 finish_redemption'])

# Helper function to read json or csv file data obtained from the speed tool into a pandas dataframe
def read_data(file):
    if file.endswith(".json"):
        df = pd.read_json(file)
    else:
        # This assumes we're using a csv generated by convert_json_to_csv.py
        df = pd.read_csv(file, skiprows=1, index_col=0)
    return df

def significant_regressions(compared_df):
    if compared_df.empty:
        return False
    descriptions = compared_df['description.1']
    evaluation = np.isin(descriptions, ignored_fields)
    return not np.all(evaluation)

def main():
    if len(sys.argv) != 4:
        print("Usage: compare_results.py [file1] [file2] [output filename]", file=sys.stderr)
        sys.exit(1)

    file1 = sys.argv[1]
    file2 = sys.argv[2]

    if not (file1.endswith(".json") or file1.endswith(".csv")) and not (file2.endswith(".json") or file2.endswith(".csv")):
        print("Provided files must either be .json files or .csv files", file=sys.stderr)
        sys.exit(1)

    # Read contents of files into a dataframe in preparation for comparison
    # Note: we're assuming that the provided input is derived from the json output of the speed tool
    df1 = read_data(file1)
    df2 = read_data(file2)

    # Only compare benchmarks that appear in both of the files
    # We need this because the speed tool at the time of writing has some tests that are disabled for OpenSSL
    # We're using .iloc[:, 0] here because we're filtering out rows where the content of the 0th index column in one df isn't in the other
    # details of .iloc can be found here: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html
    # .shape[0] represents the number of rows in the dataframe
    # Details of .shape can be found here: https://pandas.pydata.org/pandas-docs/version/0.23/generated/pandas.DataFrame.shape.html
    if df1.shape[0] > df2.shape[0]:
        df1 = df1[df1.iloc[:, 0].isin(df2.iloc[:, 0])]
    elif df2.shape[0] > df1.shape[0]:
        df2 = df2[df2.iloc[:, 0].isin(df1.iloc[:, 0])]

    # Reset any broken indices in the dataframe from the above
    df1.reset_index(drop=True, inplace=True)
    df2.reset_index(drop=True, inplace=True)

    # Rename df1 column labels
    df1.columns = [str(col) + '.1' for col in df1.columns]
    df2.columns = [str(col) + '.2' for col in df2.columns]

    print(df1.columns)

    # Setup data
    df1_numCalls = df1['numCalls.1']
    df2_numCalls = df2['numCalls.2']
    df1_time = df1['microseconds.1']
    df2_time = df2['microseconds.2']
    df1_avg_time = df1_time.astype(float) / df1_numCalls
    df2_avg_time = df2_time.astype(float) / df2_numCalls

    # Put both dataframes side by side for comparison
    dfs = pd.concat([df1, df2], axis=1)

    # Filter out entries with a +15% regression
    compared = np.where(((df2_avg_time / df1_avg_time) - 1) >= 0.15, df1.iloc[:, 0], np.nan)

    compared_df = dfs.loc[dfs.iloc[:, 0].isin(compared)]

    # Setup data for regressed entries
    compared_df1_numCalls = compared_df['numCalls.1']
    compared_df2_numCalls = compared_df['numCalls.2']
    compared_df1_time = compared_df['microseconds.1']
    compared_df2_time = compared_df['microseconds.2']
    compared_df1_avg_time = compared_df1_time.astype(float) / compared_df1_numCalls
    compared_df2_avg_time = compared_df2_time.astype(float) / compared_df2_numCalls

    # Add regression data to the table
    compared_df.loc[:, "Percentage Difference"] = 100 * ((compared_df2_avg_time / compared_df1_avg_time) - 1)

    # If the compared dataframe isn't empty, there are significant regressions present
    if significant_regressions(compared_df):
        output_file = sys.argv[3]
        if not output_file.endswith(".csv"):
            output_file += ".csv"

        with open(output_file, "w") as f:
            f.write("{},,,,{},,,,\n".format(file1, file2))
        compared_df.to_csv(output_file, index=False, mode='a')

        # Exit with an error code to denote there is a regression
        print("Regression detected between {} and {}".format(file1, file2), file=sys.stderr)
        exit(5)


if __name__ == "__main__":
    main()