# -*- coding: utf-8 -*-
"""
Created on Thu Dec  8 09:55:47 2022

@author: rpivovar
"""


import numpy as np
from twinstat.core.statistical_tests import distribution_difference_MC_test
from twinstat.core.knn_models import OutlierKNNDetector
import pandas


def test_MC_integrate():

    n_var = 5
    n_data = 200
    X= None
    for i in range(8):
        np.random.seed(i)
        means1 = np.random.uniform(size=n_var)
        np.random.seed(i)
        means1 *= np.random.randint(1,15)
        np.random.seed(i)
        x = np.random.multivariate_normal(means1, np.eye(n_var)*1.5, size=n_data)
        if i >0:
            X = np.append(X,x, axis=0)
        else:
            X = x

    Y = None
    for i in range(3):
        np.random.seed(i+100)
        means2 = np.random.uniform(size=n_var)
        np.random.seed(i+100)
        means2 *= np.random.randint(1,50)
        np.random.seed(i+100)
        y = np.random.multivariate_normal(means2, np.eye(n_var)*1.5, size=n_data)
        if i >0:
            Y = np.append(Y,y, axis=0)
        else:
            Y = y


    P_given_f, bgm1, bgm2 = distribution_difference_MC_test(X, Y,
                                                            n_mixtures_X=10,
                                                            n_mixtures_Y=10,
                                                            )

    good_arr = [1, 0, 1, 1, 1, 1, 0, 1, 0, 1]

    new_arr = list(P_given_f.values())
    new_arr = [1 if x[0] > 1e-3 else 0 for x in new_arr ]

    compare = np.allclose(new_arr,good_arr, rtol=1e-3,atol=1e-3)
    assert compare == True


def test_knn_outlier():
    n_data = 10
    y = []
    for t in range(n_data):
        np.random.seed(t)
        if np.random.uniform() > 0.95 and t>0:
            y.append(y[t-1]*2)
        else:
            np.random.seed(t+100)
            y.append(t*0.1 + np.random.normal())

    t = list(range(n_data))

    data = pandas.DataFrame([t,y])
    data = data.T

    od = OutlierKNNDetector(outlier_distance_threshold=3.0, n_neighbors=8, endog_idx=1)
    newX= od.remove_outliers(data)


    good_arr = np.array([[ 0.        , -1.74976547],
           [ 1.        ,  2.80684984],
           [ 2.        ,  1.8680683 ],
           [ 3.        , -0.94927835],
           [ 4.        , -1.8985567 ],
           [ 5.        ,  0.25467433],
           [ 6.        ,  2.76493494],
           [ 7.        ,  1.19399194],
           [ 8.        , -0.22690451],
           [ 9.        ,  0.71226506]])


    compare = np.allclose(newX,good_arr, rtol=1e-3,atol=1e-3)
    assert compare == True