import logging
import os
from pathlib import Path
from typing import Any, Dict, Union

import numpy as np
from gluonts.dataset.common import ListDataset, MetaData, TrainDatasets
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset


def mkdir(path: Union[str, os.PathLike]):
    path = Path(path)
    path.mkdir(parents=True, exist_ok=True)
    return path


def override_hp(hp: Dict[str, Any], metadata: MetaData) -> Dict[str, Any]:
    """Resolve values to inject to the estimator: is it the hp or the one from metadata.

    This function:
    - mitigates errors made by callers when inadvertantly specifies hyperparameters that shouldn't be done, e.g.,
      the frequency should follow how the data prepared.
    - uses some metadata values as defaults, unless stated otherwise by the hyperparameters.
    """
    hp = hp.copy()

    # Always use freq from dataset.
    if "freq" in hp and hp["freq"] != metadata.freq:
        freq_hp = hp["freq"]
        logging.warning(f"freq: set freq='{metadata.freq}' from metadata; ignore '{freq_hp}' from hyperparam.")
    hp["freq"] = metadata.freq

    # Use prediction_length hyperparameters, but if not specified then fallbacks to the one from metadata.
    if "prediction_length" not in hp:
        hp["prediction_length"] = metadata.prediction_length
        logging.warning(
            "prediction_length: no hyperparam, so set " f"prediction_length={metadata.prediction_length} from metadata"
        )

    return hp


def freq_name(s):
    """Convert frequency string to friendly name.

    This implementation uses only frequency string, hence 7D still becomes daily. It's not smart enough yet to know
    that 7D equals to week.
    """
    offset = to_offset(s)
    if isinstance(offset, offsets.Day):
        return "daily"
    elif isinstance(offset, offsets.Week):
        return "weekly"
    raise ValueError(f"Unsupported frequency: {s}")


################################################################################
# Data transformations
################################################################################


def log1p_tds(dataset: TrainDatasets) -> TrainDatasets:
    """Create a new train datasets with targets log-transformed."""
    # Implementation note: currently, the only way is to eagerly load all timeseries in memory, and do the transform.
    train = ListDataset(dataset.train, freq=dataset.metadata.freq)
    log1p(train)

    if dataset.test is not None:
        test = ListDataset(dataset.test, freq=dataset.metadata.freq)
        log1p(test)
    else:
        test = None

    # fmt: off
    return TrainDatasets(
        dataset.metadata.copy(),  # Note: pydantic's deep copy.
        train=train,
        test=test
    )
    # fmt: on


def log1p(ds: ListDataset):
    """In-place log transformation."""
    for data_entry in ds.list_data:
        data_entry["target"] = np.log1p(data_entry["target"])


def expm1_and_clip_to_zero(_, yhat: np.ndarray):
    """Expm1, followed by clip at 0.0."""
    # logger.debug("Before expm1: %s %s", yhat.shape, yhat)
    # logger.debug("After expm1: %s %s", yhat.shape, np.expm1(yhat))

    return np.clip(np.expm1(yhat), a_min=0.0, a_max=None)


def clip_to_zero(_, yhat: np.ndarray):
    return np.clip(yhat, a_min=0.0, a_max=None)