import os
import pandas as pd
import numpy as np

from analyze import filter_optimal_hps, avg_plots
from classifiers import CLASSIFIER_HPS
from oversamplers import OVERSAMPLER_HPS
from experiment import experiment


def test_experiments():
    # run all experiments on a single dataset
    x = pd.DataFrame(np.random.rand(100, 7))
    y = np.random.rand(100, 1) > 0.9

    for classifier, classifier_hp_configs in CLASSIFIER_HPS.items():
        for classifier_hp in classifier_hp_configs[:1]:
            for oversampler, oversampler_hp_configs in OVERSAMPLER_HPS.items():
                for oversampler_hp in oversampler_hp_configs[:1]:
                    print(f"Running experiment {classifier} - {oversampler}")
                    results = experiment(
                        x=x,
                        y=y,
                        oversampler={
                            "type": oversampler,
                            "ratio": 0.5,
                            "params": oversampler_hp,
                        },
                        classifier={"type": classifier, "params": classifier_hp},
                        seed=0,
                    )


def test_analyze():
    data_path = os.path.join(os.path.dirname(__file__), "../data/results.gz")
    df = pd.read_csv(data_path)
    df = df.dropna()
    df = df[
        (df["consistent"] == True)
        & (df["normalize"] == False)
        & (df["clean_early_stopping"] == False)
        & (df["repeats"] == 1)
    ]
    df = filter_optimal_hps(
        df, opt_metric="validation.roc_auc", output_metrics=["test.roc_auc"]
    )
    avg_plots(df, "test.roc_auc")