#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import pandas as pd
import numpy as np
import re
from scipy import stats
import subprocess
import io

# When calculating aggregate stats, if some are zero, may
# get a benign divide-by-zero warning from numpy, make it silent.
np.seterr(divide='ignore')


def perfstat(time, counter_numerator, counter_denominator, __unused__):
    """
    Measure performance counters using perf-stat in a subprocess.  Return a CSV buffer of the values measured.
    """
    try:
        res = subprocess.run(["lscpu", "-p=CPU"], check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        output = io.StringIO(res.stdout.decode('utf-8'))
        cpus = []
        for line in output.readlines():
            match = re.search(r'''^(\d+)$''', line)
            if match is not None:
                cpus.append(match.group(1))

        res = subprocess.run(["perf", "stat", f"-C{','.join(cpus)}", "-I1000", "-x|", "-a", "-e", f"{counter_numerator}", "-e", f"{counter_denominator}", "--", "sleep", f"{time}"],
                             check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        return io.StringIO(res.stdout.decode('utf-8'))
    except subprocess.CalledProcessError:
        print("Failed to measure performance counters.")
        print("Please check that perf is installed using install_perfrunbook_dependencies.sh and in your PATH")
        return None


def plot_terminal(data, title, xtitle):
    """
    Plot data to the terminal using plotext
    """
    import plotext as plt
    x = data.index.tolist()
    y = data[title].tolist()

    plt.scatter(x, y)
    plt.title(title)
    plt.xlabel(xtitle)
    plt.plot_size(100, 30)
    plt.show()


def plot_counter_stat(csv, stat_name, counter_numerator,
                      counter_denominator, scale):
    """
    Process the returned csv file into a time-series statistic to plot and
    also calculate some useful aggregate stats.
    """
    df = pd.read_csv(csv, sep='|',
                     names=['time', 'count', 'rsrvd1', 'event',
                            'rsrvd2', 'frac', 'rsrvd3', 'rsrvd4'],
                     dtype={'time': np.float64, 'count': np.float64,
                            'rsrvd1': str, 'event': str, 'rsrvd2': str,
                            'frac': np.float64, 'rsrvd3': str, 'rsrvd4': str})
    df_processed = pd.DataFrame()

    df_processed[stat_name] = (df[df['event'] == counter_numerator]['count'].reset_index(drop=True)) / (df[df['event'] == counter_denominator]['count'].reset_index(drop=True)) * scale
    df_processed.dropna(inplace=True)

    # Calculate some meaningful aggregate stats for comparing time-series plots
    geomean = stats.gmean(df_processed[stat_name])
    p50 = stats.scoreatpercentile(df_processed[stat_name], 50)
    p90 = stats.scoreatpercentile(df_processed[stat_name], 90)
    p99 = stats.scoreatpercentile(df_processed[stat_name], 99)
    xtitle = f"gmean:{geomean:>6.2f} p50:{p50:>6.2f} p90:{p90:>6.2f} p99:{p99:>6.2f}"

    plot_terminal(df_processed, stat_name, xtitle)


def get_cpu_type():
    GRAVITON_MAPPING = {"0xd0c": "Graviton2", "0xd40": "Graviton3"}
    with open("/proc/cpuinfo", "r") as f:
        for line in f.readlines():
            if "model name" in line:
                return line.split(":")[-1].strip()
            elif "CPU part" in line:
                cpu = line.split(":")[-1].strip()
                return GRAVITON_MAPPING[cpu]


UNIVERSAL_GRAVITON_CTRS = {
    "ipc": ["armv8_pmuv3_0/event=0x8/", "armv8_pmuv3_0/event=0x11/", 1],
    "branch-mpki": ["armv8_pmuv3_0/event=0x10/", "armv8_pmuv3_0/event=0x8/", 1000],
    "data-l1-mpki": ["armv8_pmuv3_0/event=0x3/", "armv8_pmuv3_0/event=0x8/", 1000],
    "inst-l1-mpki": ["armv8_pmuv3_0/event=0x1/", "armv8_pmuv3_0/event=0x8/", 1000],
    "l2-mpki": ["armv8_pmuv3_0/event=0x17/", "armv8_pmuv3_0/event=0x8/", 1000],
    "l3-mpki": ["armv8_pmuv3_0/event=0x37/", "armv8_pmuv3_0/event=0x8/", 1000],
    "stall_frontend_pkc": ["armv8_pmuv3_0/event=0x23/", "armv8_pmuv3_0/event=0x11/", 1000],
    "stall_backend_pkc": ["armv8_pmuv3_0/event=0x24/", "armv8_pmuv3_0/event=0x11/", 1000],
    "inst-tlb-mpki": ["armv8_pmuv3_0/event=0x2/", "armv8_pmuv3_0/event=0x8/", 1000],
    "inst-tlb-tw-pki": ["armv8_pmuv3_0/event=0x35/", "armv8_pmuv3_0/event=0x8/", 1000],
    "data-tlb-mpki": ["armv8_pmuv3_0/event=0x5/", "armv8_pmuv3_0/event=0x8/", 1000],
    "data-tlb-tw-pki": ["armv8_pmuv3_0/event=0x34/", "armv8_pmuv3_0/event=0x8/", 1000],
    "code-sparsity": ["armv8_pmuv3_0/event=0x11c/", "armv8_pmuv3_0/event=0x8/", 1000],
}
GRAVITON3_CTRS = {
    "stall_backend_mem_pkc": ["armv8_pmuv3_0/event=0x4005/", "armv8_pmuv3_0/event=0x11/", 1000],
}
UNIVERSAL_INTEL_CTRS = {
    "ipc": ["cpu/event=0xc0,umask=0x0/", "cpu/event=0x3c,umask=0x0/", 1],
    "branch-mpki": ["cpu/event=0xC5,umask=0x0/", "cpu/event=0xc0,umask=0x0/", 1000],
    "data-l1-mpki": ["cpu/event=0x51,umask=0x1/", "cpu/event=0xc0,umask=0x0/", 1000],
    "inst-l1-mpki": ["cpu/event=0x24,umask=0xe4/", "cpu/event=0xc0,umask=0x0/", 1000],
    "l2-mpki": ["cpu/event=0xf1,umask=0x1f/", "cpu/event=0xc0,umask=0x0/", 1000],
    "l3-mpki": ["cpu/event=0x2e,umask=0x41/", "cpu/event=0xc0,umask=0x0/", 1000],
    "stall_frontend_pkc": ["cpu/event=0x9C,umask=0x1,cmask=0x4/", "cpu/event=0x3c,umask=0x0/", 1000],
    "stall_backend_pkc": ["cpu/event=0xA2,umask=0x1/", "cpu/event=0x3c,umask=0x0/", 1000],
    "inst-tlb-mpki": ["cpu/event=0x85,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000],
    "inst-tlb-tw-pki": ["cpu/event=0x85,umask=0x01/", "cpu/event=0xc0,umask=0x0/", 1000],
    "data-tlb-mpki": ["cpu/event=0x08,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000],
    "data-st-tlb-mpki": ["cpu/event=0x49,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000],
    "data-tlb-tw-pki": ["cpu/event=0x08,umask=0x01/", "cpu/event=0xc0,umask=0x0/", 1000],
    "data-st-tlb-tw-pki": ["cpu/event=0x49,umask=0x01/", "cpu/event=0xc0,umask=0x0/", 1000],
}
ICX_CTRS = {
    "stall_frontend_pkc": ["cpu/event=0x9C,umask=0x1,cmask=0x5/", "cpu/event=0x3c,umask=0x0/", 1000],
    "stall_backend_pkc": ["cpu/event=0xa4,umask=0x2/", "cpu/event=0xa4,umask=0x01/", 1000], 
}

filter_proc = {
    "Graviton2": UNIVERSAL_GRAVITON_CTRS,
    "Graviton3": {**UNIVERSAL_GRAVITON_CTRS, **GRAVITON3_CTRS},
    "Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz": UNIVERSAL_INTEL_CTRS,
    "Intel(R) Xeon(R) Platinum 8175M CPU @ 2.50GHz": UNIVERSAL_INTEL_CTRS,
    "Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz": UNIVERSAL_INTEL_CTRS,
    "Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz": UNIVERSAL_INTEL_CTRS,
    "Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz": {**UNIVERSAL_INTEL_CTRS, **ICX_CTRS}
}

if __name__ == "__main__":
    processor_version = get_cpu_type()
    try:
        stat_choices = list(filter_proc[processor_version].keys())
    except:
        print(f"{processor_version} is not supported")
        exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument("--stat", default="ipc", type=str, choices=stat_choices)
    parser.add_argument("--time", default=60, type=int, help="How long to measure for in seconds")
    parser.add_argument("--custom_ctr", type=str,
                        help="Specify a custom counter ratio and scaling factor as 'ctr1|ctr2|scale'"
                             ", calculated as ctr1/ctr2 * scale")

    res = subprocess.run(["id", "-u"], check=True, stdout=subprocess.PIPE)
    if int(res.stdout) > 0:
        print("Must be run under sudo privileges")
        exit(1)

    args = parser.parse_args()

    if args.custom_ctr:
        ctrs = args.custom_ctr.split("|")
        counter_info = [ctrs[0], ctrs[1], int(ctrs[2])]
    else:
        counter_info = filter_proc[processor_version][args.stat]

    csv = perfstat(args.time, *counter_info)
    plot_counter_stat(csv, args.stat, *counter_info)