from typing import Any, Dict, Optional

import pandas as pd


def fill_dt_all(df, ts_id=["category", "cost_center"], **kwargs) -> pd.DataFrame:
    ts = df.groupby(ts_id, as_index=False, group_keys=False).apply(fill_dt, **kwargs)
    return ts


def fill_dt(
    df,
    dates=pd.date_range("2017-01-01", "2019-12-31", freq="D"),
    freq="D",
    fillna_kwargs: Optional[Dict[str, Any]] = None,
    resample: str = "sum",
    resample_kwargs={},
) -> pd.DataFrame:
    """Make sure each timeseries has contiguous days, then optionally downsampled.

    Dataframe must either has column "x", or indexed with "x" only.

    Arguments:
        dates (pd.DatetimeIndex or Tuple[str, str, str]): new timestamp index. A bit complicated, so please pay attention.
            - If pd.DatetimeIndex, then this is typically created by pd.date_range("yyyy-mm-dd", "yyyy-mm-dd", freq="D")).
            - If Tuple[str, str, str], then dates[0] == "yyyy-mm-dd" or "min", dates[1] == "yyyy-mm-dd" or "max", and
              dates[2] = frequency of the original index.

        freq (str): after df is reindexed, further downsample to this freq.
        fillna_kwargs (Dict[str, Any], optional):  Use None for demand, dict(method='ffill') for price. Defaults to None.
        resample_fn (str, optional): Use "sum" for demand, "max" for price curves. Defaults to "sum".
        resample_kwargs (dict, optional): [description]. Defaults to {}.

    Returns a dataframe indexed by X.
    """
    X = "x"
    if X in df.columns:
        df = df.set_index(X).copy()

    if not isinstance(dates, pd.DatetimeIndex):
        # Must be Tuple[str, str, str]
        start, end, freq_ori = dates
        if start == "min":
            start = df.index.min()
        if end == "max":
            end = df.index.max()
        dates = pd.date_range(start, end, freq=freq_ori)

    # Pre-compute nan-filler.
    # - number columns: fillna with 0.0
    # - non-number columns: fillna with the 1st non-NA
    nan_repl = df.iloc[0:1, :].reset_index(drop=True)
    for i in range(nan_repl.shape[1]):
        if pd.api.types.is_numeric_dtype(type(nan_repl.iloc[0, i])):
            nan_repl.iloc[0, i] = 0.0
    nan_repl = {k: v[0] for k, v in nan_repl.to_dict().items()}

    # Re-index timeseries to contiguous days
    if fillna_kwargs is None:
        daily_binpat = df.reindex(dates).fillna(value=nan_repl)
    else:
        daily_binpat = df.reindex(dates).fillna(**fillna_kwargs)
        # For non-number columns, always use the value from the first row
        col_to_refill = {k: v for k, v in nan_repl.items() if not pd.api.types.is_numeric_dtype(type(v))}
        for k, v in col_to_refill.items():
            daily_binpat[k] = v
    daily_binpat.index.name = df.index.name

    if freq == "D":
        return daily_binpat.reset_index()

    # Downsample y if necessary.
    downsampled_binpat = daily_binpat.resample(freq)
    resample_fn = getattr(downsampled_binpat, resample)
    downsampled_binpat = resample_fn(**resample_kwargs)

    # Resample will drop non-number columns, so we need to restore them.
    col_to_reinsert = {k: v for k, v in nan_repl.items() if not pd.api.types.is_numeric_dtype(type(v))}
    for k, v in col_to_reinsert.items():
        downsampled_binpat[k] = v

    return downsampled_binpat.reset_index()