# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#	http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Simple helper script to create graphs based on multiple
# test_execution.json files (it's a summary of the results of
# a single test_execution which is
# stored in ~/.benchmark/benchmarks/test_executions/TEST_EXECUTION_TS/).
# There is no specific integration into Benchmark and it is also not
# installed with Benchmark.
#
# It requires matplotlib (install with pip3 install matplotlib).
#
#
# Usage:
# python3 analyze.py [--label=LABEL] /path1/to/test_execution.json /path2/to/test_execution.json
#
# Output: A bunch of .png files in the current directory.
# Each graph shows one data series per test_execution.
# The label key is chosen based on the
# command line parameter `--label`
#


import argparse
import json
import sys

try:
    import matplotlib.pyplot as plt
except ImportError:
    print("This script requires matplotlib. Please install with 'pip3 install matplotlib' and retry.", file=sys.stderr)
    sys.exit(1)


def create_plot():
    plt.rcdefaults()
    fig, ax = plt.subplots()
    fig.set_size_inches(18, 10)
    return fig, ax


def present(a_plot, name):
    a_plot.savefig("%s.png" % name, bbox_inches='tight')
    # plt.show()  # alternatively only show it
    # explicitly close to free resources
    a_plot.close()


def decode_percentile_key(k):
    return float(k.replace("_", "."))


def data_series_name(d, label_key):
    data_series = []
    for lbl in label_key.split(","):
        path = lbl.split(".")
        doc = d
        for k in path:
            doc = doc[k]
        data_series.append(doc)
    return ",".join(data_series)


def include(series):
    return True


def plot_service_time(raw_data, label_key):
    service_time_per_op = {}

    for d in raw_data:
        data_series = data_series_name(d, label_key)
        for op_metrics in d["results"]["op_metrics"]:
            operation = op_metrics["operation"]
            service_time_metrics = op_metrics["service_time"]
            if operation not in service_time_per_op:
                service_time_per_op[operation] = []
            service_time_per_op[operation].append({
                "data_series": data_series,
                "percentiles": [decode_percentile_key(p) for p in service_time_metrics.keys()],
                "percentile_values": list(service_time_metrics.values()),
            })

    for op, results in service_time_per_op.items():
        _, ax = create_plot()
        legend_handles = []
        legend_labels = []

        for candidate in results:
            label = candidate["data_series"]
            series = ax.plot(candidate["percentiles"], candidate["percentile_values"], marker='.', label=label)
            legend_handles.append(series[0])
            legend_labels.append(label)

        ax.set_ylabel("Service Time [ms]")
        ax.set_xlabel("Percentile")
        ax.set_title("Service Time of %s" % op)
        ax.set_ylim(ymin=0)

        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
        ax.legend(legend_handles, legend_labels, loc='center left', bbox_to_anchor=(1, 0.5))

        present(plt, "service_time_%s" % op)


def plot_throughput(raw_data, label_key):
    throughput_per_op = {}
    unit = ""

    for d in raw_data:
        data_series = data_series_name(d, label_key)
        for op_metrics in d["results"]["op_metrics"]:
            operation = op_metrics["operation"]
            throughput_metrics = op_metrics["throughput"]
            if operation not in throughput_per_op:
                throughput_per_op[operation] = []
            throughput_per_op[operation].append({
                "data_series": data_series,
                "max": throughput_metrics["max"],
                "median": throughput_metrics["median"],
                "min": throughput_metrics["min"],
                "unit": throughput_metrics["unit"]
            })

    for op, results in throughput_per_op.items():
        _, ax = create_plot()
        x_tick_labels = []
        throughput = []
        min_throughput = []
        max_throughput = []
        width = 0.35
        unit = ""

        for candidate in results:
            x_tick_labels.append(candidate["data_series"])
            cmin = candidate["min"]
            cmedian = candidate["median"]
            cmax = candidate["max"]
            # all units per op are the same but they can change across operations.
            unit = candidate["unit"]
            if cmin and cmedian and cmax:
                min_throughput.append(cmedian - cmin)
                throughput.append(cmedian)
                max_throughput.append(cmax - cmedian)
            else:
                min_throughput.append(0)
                throughput.append(0)
                max_throughput.append(0)

        indices = range(len(throughput))

        ax.bar(indices, throughput, width, yerr=[min_throughput, max_throughput])
        ax.set_xticks(indices)
        ax.set_xticklabels(x_tick_labels)
        ax.set_ylabel("Throughput [%s]" % unit)
        ax.set_title("Throughput of %s" % op)
        ax.set_ylim(ymin=0)

        present(plt, "throughput_%s" % op)


def plot_gc_times(raw_data, label_key):
    _, ax = create_plot()

    x_tick_labels = []
    old_gc_times = []
    young_gc_times = []
    width = 0.35

    for d in raw_data:
        data_series = data_series_name(d, label_key)

        x_tick_labels.append(data_series)

        old_gc_times.append(d["results"]["old_gc_time"])
        young_gc_times.append(d["results"]["young_gc_time"])

    indices = range(len(old_gc_times))

    old_bar = ax.bar(indices, old_gc_times, width)
    ax.set_xticks([x + width / 2 for x in indices])
    ax.set_xticklabels(x_tick_labels)
    ax.set_ylabel("Total Duration [ms]")
    ax.set_title("GC Times")

    indices = [x + width for x in indices]
    young_bar = ax.bar(indices, young_gc_times, width)

    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

    ax.legend([old_bar[0], young_bar[0]], ["Old GC", "Young GC"], loc='center left', bbox_to_anchor=(1, 0.5))
    ax.set_ylim(ymin=0)

    present(plt, "gc_times")


def plot(raw_data, label_key):
    plot_gc_times(raw_data, label_key)
    plot_throughput(raw_data, label_key)
    plot_service_time(raw_data, label_key)


def parse_args():
    parser = argparse.ArgumentParser(description="Turns test_execution.json files into graphs")

    parser.add_argument(
        "--label",
        help="defines which attribute to use for labelling data series (default: test-execution-timestamp).",
        # choices=["environment", "test-execution-timestamp", "user-tags", "test_procedure", "provision-config-instance"],
        default="test-execution-timestamp")

    parser.add_argument("path",
                        nargs="+",
                        help="Full path to one or more test_execution.json files")

    return parser.parse_args()


def main():
    args = parse_args()
    series = []

    for f in args.path:
        a_series = json.load(open(f, "rt"))
        if include(a_series):
            series.append(a_series)
    plot(series, args.label)


if __name__ == '__main__':
    main()