# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 import os import pandas as pd from matplotlib import pyplot as plt import numpy as np CLASSIFIERS = { "cat": "Cat", "dt": "DT", "xgb": "XGB", "lgbm": "LGBM", "svm": "SVM", "mlp": "MLP", } OVERSAMPLERS = { "none": "Baseline", "default": "Default", "random": "Random", "smote": "SMOTE", "svm": "SVM", "adasyn": "ADASYN", "border": "BL", "poly": "Poly", } METRICS = { "roc_auc": "AUC", "neg_brier_score": "Minus Brier score", "f1": "F1", "f2": "F2", "jaccard": "Jaccard", "balanced_accuracy": "Balanced_accuracy", "neg_log_loss": "Minus log-loss", "class_log_loss": "", } def filter_optimal_hps(df: pd.DataFrame, opt_metric: str, output_metrics: list): """ For each {dataset, seed, oversampler, classifier} keep only the results of the HP configuration that yield the best score according to opt_metric. Then, calculate average and rank for the scores in output_metrics Parameters ---------- df : pandas.DataFrame dataframe of the experiment results opt_metric : str metric used for optimization output_metrics : list metrics to include in the output Returns ------- dict: pandas.DataFrame Filtered and summarized """ num_datasets = len(np.unique(df["dataset"])) num_seeds = len(np.unique(df["seed"])) # Filter best models according to opt_metric df = ( df.sort_values("param_set", ascending=False) .sort_values([opt_metric], ascending=False, kind="stable") .groupby(["dataset", "seed", "learner", "oversampler"]) .agg({om: "first" for om in output_metrics}) .reset_index() ) # Rank models per dataset and seed for om in output_metrics: df[f"{om}.rank"] = df.groupby(["dataset", "seed"])[om].rank(ascending=False) # Aggregate mean and rank over the datasets df = df.groupby(["learner", "seed", "oversampler"]).agg( { **{om: "mean" for om in output_metrics}, **{f"{om}.rank": "mean" for om in output_metrics}, "dataset": "count", } ) # Aggregate mean and std over the seeds df = df.groupby(["learner", "oversampler"]).agg( { **{om: ["mean", "std"] for om in output_metrics}, **{f"{om}.rank": ["mean", "std"] for om in output_metrics}, "dataset": "sum", } ) # Verify that all models have values for all datasets and seeds assert np.max(df["dataset"].to_numpy().ravel()) == num_datasets * num_seeds assert np.min(df["dataset"].to_numpy().ravel()) == num_datasets * num_seeds return df def avg_plots(df: pd.DataFrame, metric: str, plot_rank: bool = True): """ For each {dataset, seed, oversampler, classifier} keep only the results of the HP configuration that yield the best score according to opt_metric. Then, calculate average and rank for the scores in output_metrics Parameters ---------- df : pandas.DataFrame Filtered and summarized dataframe, produced by filter_optimal_hps metric : str metric to present plot_rank : bool Whether to plot rank or not Returns ------- None """ score_mean = [] score_std = [] rank_mean = [] rank_std = [] model_names = [] major_ticks = [] classifiers = list(np.unique(df.reset_index()["learner"])) oversamplers = list(np.unique(df.reset_index()["oversampler"])) for classifier in classifiers: for oversampler in oversamplers: idx = (classifier, oversampler) score_mean.append(df.loc[idx][(metric, "mean")]) score_std.append(df.loc[idx][(metric, "std")]) rank_mean.append(df.loc[idx][(f"{metric}.rank", "mean")]) rank_std.append(df.loc[idx][(f"{metric}.rank", "std")]) model_name = CLASSIFIERS[classifier] if oversampler != "none": model_name += "+" + OVERSAMPLERS[oversampler] model_names.append(model_name) # Add an empty row between classifiers score_mean.append(np.nan) score_std.append(np.nan) rank_mean.append(np.nan) rank_std.append(np.nan) model_names.append(" " * len(model_names)) major_ticks.append(len(score_mean) - 1) # Delete the last empty row score_mean = score_mean[:-1] score_std = score_std[:-1] model_names = model_names[:-1] major_ticks = major_ticks[:-1] rank_mean = rank_mean[:-1] rank_std = rank_std[:-1] fig_height = 9 / (4 * 8) * (len(classifiers) * (len(oversamplers) + 1)) plt.figure(figsize=(5, fig_height), dpi=320) ax = plt.axes() if plot_rank: ax2 = ax.twiny() ax2.errorbar(x=rank_mean, y=range(len(score_mean)), xerr=rank_std, fmt="r^") ax2.set_xlabel("Rank") ax2.xaxis.label.set_color("red") for t in ax2.xaxis.get_ticklabels(): t.set_color("red") ax.errorbar(x=score_mean, y=range(len(score_mean)), xerr=score_std, fmt="bo") ax.xaxis.grid(True) ax.set_yticks(major_ticks, minor=False) ax.set_yticklabels("" * len(major_ticks), minor=False) ax.set_yticks(range(len(model_names)), minor=True) ax.set_yticklabels(model_names, minor=True) ax.yaxis.grid(True, which="major") ax.set_xlabel(METRICS[metric.split(".")[1]]) if plot_rank: ax.xaxis.label.set_color("blue") for t in ax.xaxis.get_ticklabels(): t.set_color("blue") plt.show()