import os
import pandas as pd
import pytest

from fastparquet.util import (analyse_paths, get_file_scheme, val_to_num,
                              join_path, groupby_types, get_column_metadata)


def test_analyse_paths():
    file_list = ['a', 'b']
    base, out = analyse_paths(file_list)
    assert (base, out) == ('', ['a', 'b'])

    file_list = ['c/a', 'c/b']
    base, out = analyse_paths(file_list)
    assert (base, out) == ('c', ['a', 'b'])

    file_list = ['c/d/a', 'c/d/b']
    base, out = analyse_paths(file_list)
    assert (base, out) == ('c/d', ['a', 'b'])

    file_list = ['/c/d/a', '/c/d/b']
    base, out = analyse_paths(file_list)
    assert (base, out) == ('/c/d', ['a', 'b'])

    file_list = ['c/cat=1/a', 'c/cat=2/b', 'c/cat=1/c']
    base, out = analyse_paths(file_list)
    assert (base, out) == ('c', ['cat=1/a', 'cat=2/b', 'cat=1/c'])

    file_list = ['c\\cat=1\\a', 'c\\cat=2\\b', 'c\\cat=1\\c']
    temp, os.sep = os.sep, '\\'  # We must trick linux into thinking this is windows for this test to work
    base, out = analyse_paths(file_list)
    os.sep = temp
    assert (base, out) == ('c', ['cat=1/a', 'cat=2/b', 'cat=1/c'])


def test_empty():
    assert join_path("test", ""), "test"


def test_parents():
    assert join_path("test", "../../..") == "../.."

    with pytest.raises(Exception):
        join_path("/test", "../../..")
    with pytest.raises(Exception):
        join_path("/test", "../..")


def test_abs_and_rel_paths():
    assert join_path('/', 'this/is/a/test/') == '/this/is/a/test'
    assert join_path('.', 'this/is/a/test/') == 'this/is/a/test'
    assert join_path('', 'this/is/a/test/') == 'this/is/a/test'
    assert join_path('/test', '.') == '/test'
    assert join_path('/test', '..', 'this') == '/this'
    assert join_path('/test', '../this') == '/this'


def test_file_scheme():
    paths = [None, None]
    assert get_file_scheme(paths) == 'simple'
    paths = []
    assert get_file_scheme(paths) == 'empty'  # this is pointless
    paths = ['file']
    assert get_file_scheme(paths) == 'flat'
    paths = ['file', 'file']
    assert get_file_scheme(paths) == 'flat'
    paths = ['a=1/b=2/file', 'a=2/b=1/file']
    assert get_file_scheme(paths) == 'hive'
    paths = ['a=1/z=2/file', 'a=2/b=6/file']  # note key names do not match
    assert get_file_scheme(paths) == 'drill'
    paths = ['a=1/b=2/file', 'a=2/b/file']
    assert get_file_scheme(paths) == 'drill'
    paths = ['a/b/c/file', 'a/b/file']
    assert get_file_scheme(paths) == 'other'


def test_val_to_num():
    assert val_to_num('7') == 7
    assert val_to_num('.7') == .7
    assert val_to_num('0.7') == .7
    assert val_to_num('07') == 7
    assert val_to_num('0') == 0
    assert val_to_num('00') == 0
    assert val_to_num('-20') == -20
    assert val_to_num(7) == 7
    assert val_to_num(0.7) == 0.7
    assert val_to_num(0) == 0
    assert val_to_num('NOW') == 'NOW'
    assert val_to_num('now') == 'now'
    assert val_to_num('TODAY') == 'TODAY'
    assert val_to_num('') == ''
    assert val_to_num('nan') == 'nan'
    assert val_to_num('NaN') == 'NaN'
    assert val_to_num('2018-10-10') == pd.to_datetime('2018-10-10')
    assert val_to_num('2018-10-09') == pd.to_datetime('2018-10-09')
    assert val_to_num('2017-12') == pd.to_datetime('2017-12')
    assert val_to_num('5e+6') == 5e6
    assert val_to_num('5e-6') == 5e-6
    assert val_to_num('0xabc') == '0xabc'
    assert val_to_num('hello world') == 'hello world'
    # The following tests document an idiosyncrasy of val_to_num which is difficult
    # to avoid while timedeltas are supported.
    assert val_to_num('50+20') == pd.to_timedelta('50+20')
    assert val_to_num('50-20') == pd.to_timedelta('50-20')


def test_groupby_types():
    assert len(groupby_types([1, 2, 3])) == 1
    assert len(groupby_types(["1", "2", "3.0"])) == 1
    assert len(groupby_types([1, 2, 3.0])) == 2
    assert len(groupby_types([1, "2", "3.0"])) == 2 
    assert len(groupby_types([pd.to_datetime("2000"), "2000"])) == 2


def test_bad_tz():
    idx = pd.date_range('2012-01-01', periods=3, tz='dateutil/Europe/London')
    with pytest.raises(ValueError):
        get_column_metadata(idx, 'tz')