# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: LicenseRef-.amazon.com.-AmznSL-1.0
# Licensed under the Amazon Software License  http://aws.amazon.com/asl/

from smclarify.bias.metrics import AD, CDDL, CI, DAR, DCA, DCR, DI, DPL, DRR, FT, JS, KL, LP, RD, TE, KS, SD, GE
from smclarify.bias.metrics import metric_one_vs_all
from smclarify.bias.metrics.constants import INFINITY
from pytest import approx
import pandas as pd
from pandas import Series
import math
import numpy as np
import pytest

DATASET_PDF = pd.DataFrame(
    np.array(
        [
            ["a", 0, False, True],
            ["b", 0, False, False],
            ["b", 1, True, False],
            ["c", 1, True, True],
            ["a", 2, True, True],
            ["a", 1, True, True],
            ["b", 0, False, False],
            ["c", 1, True, True],
            ["b", 2, True, False],
            ["c", 2, True, True],
            ["b", 0, False, False],
            ["b", 2, True, False],
        ]
    ),
    columns=["x", "label", "positive_label_index", "sensitive_facet_index"],
)


def dfBinary():
    """
    :return: a tuple of below objects
        dataframe with one column which contains Binary categorical data (length 12)
        label
        positive label index
        predicted label
        positive predicted label index
    """
    data = [["M"], ["F"], ["F"], ["M"], ["F"], ["M"], ["F"], ["F"], ["M"], ["M"], ["F"], ["F"]]
    df = pd.DataFrame(data)
    label = pd.Series([0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0])
    positive_label_index = label == 1
    predicted_label = pd.Series([1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0])
    positive_predicted_label_index = predicted_label == 1
    return (df, label, positive_label_index, predicted_label, positive_predicted_label_index)


def dfMulticategory():
    """
    :return: dataframe with one column which contains multicategorical data (length 24)
    """
    data = [
        ["M"],
        ["O"],
        ["M"],
        ["M"],
        ["F"],
        ["O"],
        ["O"],
        ["F"],
        ["M"],
        ["M"],
        ["F"],
        ["F"],
        ["O"],
        ["F"],
        ["M"],
        ["F"],
        ["O"],
        ["F"],
        ["M"],
        ["M"],
        ["F"],
        ["F"],
        ["O"],
        ["O"],
    ]
    # [1,     0,     0,      1,     0,     1,     0,     1,      1,    0,    1,       0,     1,     0,     1,    0,     0,     0,      1,    1,     0,     0,     1,     1]
    df = pd.DataFrame(data)
    return df


def dfContinuous():
    """
    :return: dataframe with one column which contains continuous data (length 12)
    """
    data = pd.Series(
        [
            1.55255404,
            1.87128923,
            1.82640675,
            0.48706083,
            0.21833644,
            0.45007763,
            0.47457823,
            1.5346789,
            1.61042132,
            1.87130261,
            1.97768247,
            1.05499183,
        ]
    )

    df = pd.DataFrame(data)
    return df


def datasetFT():
    X = np.array(
        [
            [0, 0, 0, 0, True, 1, 1],
            [1, 0, 0, 0, True, 0, 1],
            [1, 0, 1, 0, True, 0, 1],
            [0, 0, 0, 0, False, 1, 1],
            [1, 0, 0, 1, True, 0, 1],
            [0, 0, 1, 0, True, 1, 1],
            [1, 0, 0, 0, True, 1, 1],
            [1, 1, 0, 0, True, 1, 1],
            [0, 0, 1, 0, True, 1, 1],
            [1, 0, 1, 1, True, 1, 0],
            [1, 0, 0, 0, True, 1, 0],
            [1, 0, 1, 0, True, 1, 1],
            [1, 0, 0, 0, False, 1, 1],
            [0, 0, 0, 0, True, 1, 1],
            [0, 0, 1, 0, True, 0, 1],
            [0, 0, 1, 0, True, 1, 1],
            [0, 1, 0, 1, False, 0, 1],
            [1, 0, 0, 0, False, 1, 1],
            [0, 0, 1, 0, False, 1, 1],
            [1, 0, 0, 0, True, 1, 0],
            [1, 0, 1, 0, False, 1, 1],
            [0, 1, 0, 0, False, 1, 1],
            [1, 0, 1, 0, True, 0, 1],
            [0, 0, 0, 1, True, 1, 0],
        ]
    )

    return pd.DataFrame(X)


def datasetFT_small_samples():
    """
    With facet as column 0, the FT selects only 3 rows by ~facet
    """
    X = np.array(
        [
            [0, 0, 0, 0, True, 1, 1],
            [1, 0, 0, 0, True, 0, 1],
            [1, 0, 1, 0, True, 0, 1],
            [1, 0, 0, 1, True, 0, 1],
            [1, 0, 0, 0, True, 1, 1],
            [1, 1, 0, 0, True, 1, 1],
            [1, 0, 1, 1, True, 1, 0],
            [1, 0, 0, 0, True, 1, 0],
            [1, 0, 1, 0, True, 1, 1],
            [1, 0, 0, 0, False, 1, 1],
            [1, 0, 0, 0, False, 1, 1],
            [0, 0, 1, 0, False, 1, 1],
            [1, 0, 0, 0, True, 1, 0],
            [1, 0, 1, 0, False, 1, 1],
            [1, 0, 1, 0, True, 0, 1],
            [0, 0, 0, 1, True, 1, 0],
        ]
    )
    return pd.DataFrame(X)


def datasetFTMult():
    X = np.array(
        [
            [0, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [2, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
            [2, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
            [2, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [2, 0, 0, 0, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1],
            [2, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
            [2, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 0, 1, 1, 1],
        ]
    )
    return pd.DataFrame(X)


(dfB, dfB_label, dfB_pos_label_idx, dfB_pred_label, dfB_pos_pred_label_idx) = dfBinary()
dfM = dfMulticategory()
dfC = dfContinuous()
dfFT = datasetFT()


def test_CI():
    sensitive_facet_index = dfB[0] == "F"
    assert CI(dfB[0], sensitive_facet_index) == approx(-1 / 6)

    sensitive_facet_index = dfB[0] == "M"
    assert CI(dfB[0], sensitive_facet_index) == approx(1 / 6)

    # Continuous Facet, Binary Label
    sensitive_facet_index = dfC[0] > 1.0
    assert CI(dfC[0], sensitive_facet_index) == approx(-1 / 3)

    sensitive_facet_index = dfC[0] < 1.0
    assert CI(dfC[0], sensitive_facet_index) == approx(1 / 3)

    # Multicategory Facet, Binary Label

    response = metric_one_vs_all(CI, dfM[0])
    assert response["M"] == approx(1 / 3)
    assert response["F"] == approx(1 / 4)
    assert response["O"] == approx(5 / 12)


def test_DPL():
    df = pd.DataFrame({"x": ["a", "a", "b", "b"], "y": [1, 1, 0, 1]})
    res = metric_one_vs_all(DPL, df["x"], label=df["y"], positive_label_index=(df["y"] == 1))
    assert res["a"] == -0.5
    assert res["b"] == 0.5
    return


def test_KL():
    res = KL(pd.Series([True, True, True, False, False, False]), pd.Series([True, False, False, False, False, False]))
    assert res == approx(-0.366516)

    res = KL(pd.Series([True, True, True, False, False, False]), pd.Series([True, False, False, False, True, False]))
    assert res == 0.0

    with pytest.raises(ValueError) as e:
        KL(pd.Series([True, True, True, False, False, False]), pd.Series([False, False, False, False, False, False]))
    assert str(e.value) == "No instance of common facet found, dataset may be too small"

    # multi-facet, multi-category case
    sensitive_facet_index: pd.Series = DATASET_PDF["x"] == "a"
    sensitive_facet_index += DATASET_PDF["x"] == "c"

    positive_label_index: pd.Series = DATASET_PDF["label"] == "1"
    positive_label_index += DATASET_PDF["label"] == "2"
    res = KL(positive_label_index, sensitive_facet_index)
    assert res == approx(0.2938933)


def test_KS():
    df = pd.DataFrame([["1", "a"], ["0", "a"], ["0", "b"], ["1", "b"], ["1", "b"]], columns=["label", "x"])
    result = KS(df["label"], df["x"] == "b")
    assert result == approx(0.16666666)

    result = KS(DATASET_PDF["label"], DATASET_PDF["x"] != "b")
    assert result == approx(0.33333333)

    result = KS(DATASET_PDF["positive_label_index"], DATASET_PDF["x"] != "b")
    assert result == approx(0.33333333)


def test_JS():
    res = JS(pd.Series([True, True, True, False, False, False]), pd.Series([True, False, False, False, False, False]))
    assert res == approx(0.06641431438228168)

    res = JS(pd.Series([True, True, True, False, False, False]), pd.Series([True, False, False, False, True, False]))
    assert res == 0.0

    with pytest.raises(ValueError) as e:
        JS(pd.Series([True, True, True, False, False, False]), pd.Series([False, False, False, False, False, False]))
    assert str(e.value) == "No instance of common facet found, dataset may be too small"

    # multi-facet, multi-category case
    sensitive_facet_index: pd.Series = DATASET_PDF["x"] == "a"
    sensitive_facet_index += DATASET_PDF["x"] == "c"

    positive_label_index: pd.Series = DATASET_PDF["label"] == "1"
    positive_label_index += DATASET_PDF["label"] == "2"
    res = JS(positive_label_index, sensitive_facet_index)
    assert res == approx(0.06465997)

    # Calculate JS manually.
    res = JS(pd.Series([True, True, True, True, False, False]), pd.Series([True, False, False, False, True, False]))
    Pa = np.array([0.5, 0.5])
    Pd = np.array([0.25, 0.75])
    P = np.array([0.375, 0.625])
    expected_result = 0.5 * (
        (Pa[0] * math.log(Pa[0] / P[0]))
        + (Pa[1] * math.log(Pa[1] / P[1]))
        + (Pd[0] * math.log(Pd[0] / P[0]))
        + (Pd[1] * math.log(Pd[1] / P[1]))
    )
    assert res == approx(expected_result)


def test_LP():
    res = LP(pd.Series([True, True, True, False, False, False]), pd.Series([True, False, False, False, False, False]))
    assert res == approx(0.6)

    res = LP(pd.Series([True, True, True, False, False, False]), pd.Series([True, False, False, False, True, False]))
    assert res == 0.0

    # No facet selection
    with pytest.raises(ValueError) as e:
        LP(pd.Series([True, True, True, False, False, False]), pd.Series([False, False, False, False, False, False]))
    assert str(e.value) == "No instance of common facet found, dataset may be too small"

    # multi-facet, multi-category case
    sensitive_facet_index: pd.Series = DATASET_PDF["x"] == "a"
    sensitive_facet_index += DATASET_PDF["x"] == "c"

    positive_label_index: pd.Series = DATASET_PDF["label"] == "1"
    positive_label_index += DATASET_PDF["label"] == "2"
    res = LP(positive_label_index, sensitive_facet_index)
    assert res == approx(0.471404520)

    return


def test_CDD():
    x = pd.Series(
        [
            "M",
            "M",
            "M",
            "F",
            "F",
            "F",
            "F",
            "M",
            "M",
            "M",
            "M",
            "F",
            "M",
            "M",
            "F",
            "M",
            "F",
            "F",
            "M",
            "M",
            "F",
            "M",
            "M",
            "F",
        ]
    )
    positive_label_index = pd.Series([0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])
    group_variable = pd.Series([1, 0, 2, 2, 1, 1, 2, 1, 1, 2, 0, 1, 2, 0, 1, 1, 1, 2, 0, 1, 0, 0, 1, 1])

    response = metric_one_vs_all(CDDL, x, positive_label_index=positive_label_index == 1, group_variable=group_variable)
    assert response["F"] == approx(0.3982142857)
    assert response["M"] == approx(-0.3982142857)


def test_DI():
    # Binary Facet, Binary Label
    sensitive_facet_index_f = dfB[0] == "F"
    assert DI(dfB[0], sensitive_facet_index_f, dfB_pos_pred_label_idx) == approx(10 / 7)

    sensitive_facet_index_m = dfB[0] == "M"
    assert DI(dfB[0], sensitive_facet_index_m, dfB_pos_pred_label_idx) == approx(7 / 10)

    predicted_labels_zero_for_M = pd.Series([0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1])
    positive_predicted_labels_index_zero_for_M = predicted_labels_zero_for_M == 1
    assert DI(dfB[0], sensitive_facet_index_f, positive_predicted_labels_index_zero_for_M) == INFINITY
    # Check empty facet selection
    with pytest.raises(ValueError) as e:
        DI(dfB[0], dfB[0] == None, positive_predicted_labels_index_zero_for_M)
    assert str(e.value) == "Facet set is empty"

    # Check empty facet selection
    with pytest.raises(ValueError) as e:
        x = Series(["A", "A"])
        pred = Series([0, 1])
        DI(x, x == "A", pred == 1)
    assert str(e.value) == "Negated facet set is empty"


def test_DCA():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert DCA(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(1 / 4)

    sensitive_facet_index = dfB[0] == "M"
    assert DCA(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-1 / 4)


def test_DCR():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert DCR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(1 / 3)

    sensitive_facet_index = dfB[0] == "M"
    assert DCR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-1 / 3)


def test_RD():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert RD(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-2 / 3)

    sensitive_facet_index = dfB[0] == "M"
    assert RD(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(2 / 3)


def test_SD():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert SD(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(1 / 6)

    sensitive_facet_index = dfB[0] == "M"
    assert SD(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-1 / 6)


def test_DRR():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert DRR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(1 / 3)

    sensitive_facet_index = dfB[0] == "M"
    assert DRR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-1 / 3)


def test_DRR_zero():
    # Binary Facet, Binary Label
    # All M have 1 prediction
    predicted_label = pd.Series([1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1])
    positive_predicted_label_index = predicted_label == 1

    sensitive_facet_index = dfB[0] == "F"
    assert DRR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, positive_predicted_label_index) == approx(0.5)

    sensitive_facet_index = dfB[0] == "M"
    assert DRR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, positive_predicted_label_index) == approx(-0.5)


def test_AD():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert AD(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-13 / 35)

    sensitive_facet_index = dfB[0] == "M"
    assert AD(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(13 / 35)


def test_DAR():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert DAR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-1 / 2)

    sensitive_facet_index = dfB[0] == "M"
    assert DAR(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(1 / 2)


def test_TE():
    # Binary Facet, Binary Label
    sensitive_facet_index = dfB[0] == "F"
    assert TE(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(-1 / 2)

    sensitive_facet_index = dfB[0] == "M"
    assert TE(dfB[0], sensitive_facet_index, dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(1 / 2)


def test_FT():
    dfFT = datasetFT()
    sensitive_facet_index = dfFT[0]
    predicted = pd.Series([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1])
    assert FT(dfFT, sensitive_facet_index == 1, predicted == 1) == approx(-0.23076923076923078)

    dfFT[3] = dfFT[3].apply(lambda x: "a")
    with pytest.raises(ValueError) as e:
        FT(dfFT, sensitive_facet_index == 1, predicted == 1)
    assert str(e.value) == "FlipTest does not support non-numeric columns"


def test_FT_small_samples():
    dfFT = datasetFT_small_samples()
    sensitive_facet_index = dfFT[0]
    predicted = pd.Series([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1])
    assert FT(dfFT, sensitive_facet_index == 1, predicted == 1) == approx(-0.15384615384615385)


def test_GE():
    assert GE(dfB_pos_label_idx, dfB_pos_pred_label_idx) == approx(0.24556213017751485)