# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
# Python Built-Ins:
from itertools import product
from typing import Any, Dict, List, Optional, Union

# External Dependencies:
import numpy as np
from pandas import DataFrame, Timestamp
from timeseries_generator import BaseFactor
from timeseries_generator.utils import get_cartesian_product


class RandomCompositeFeatureFactor(BaseFactor):
    """A random factor for unique combinations of multiple features

    This class is similar to timeseries_generator.RandomFeatureFactor, but generates random values
    independently for combinations of multiple features, rather than the alternative of layering
    multiple RandomFeatureFactors on different Features.
    """

    def __init__(
        self,
        feature_values: Dict[str, List[Any]],
        min_factor_value: float = 1.0,
        max_factor_value: float = 10.0,
        col_name: str = "random_feature_factor",
    ):
        """Create a RandomCompositeFeatureFactor

        Parameters
        ----------
        feature_values:
            Values (labels) by feature name.
        min_factor_value:
            Minimum factor value.
        max_factor_value:
            Maximum factor value.
        col_name:
            Column name to create for this factor in the generation output.

        Examples
        --------
        Create a factor for every combination of 'store' and 'country' in our list:
            >>> rff = RandomCompositeFeatureFactor(
            ...     feature_values={
            ...         "country": ["country_1", "country_2"],
            ...         "store": ["store_1", "store_2"],
            ...     },
            ...     min_factor_value=1,
            ...     max_factor_value=10
            ... )
        """
        super().__init__(col_name=col_name, features=feature_values)

        self._feature_values = feature_values
        if min_factor_value > max_factor_value:
            raise ValueError(
                f'min_factor_value: "{min_factor_value}" > max_factor_value: "{max_factor_value}"'
            )
        self._min_factor_value = min_factor_value
        self._max_factor_value = max_factor_value

    def generate(
        self,
        start_date: Union[Timestamp, str, int, float],
        end_date: Optional[Union[Timestamp, str, int, float]] = None,
    ) -> DataFrame:
        dr: DataFrame = self.get_datetime_index(start_date=start_date, end_date=end_date).to_frame(
            index=False, name=self._date_col_name
        )

        # calculate product of all provided features and their values:
        factor_df = DataFrame(
            product(*self._feature_values.values()),
            columns=[k for k in self._feature_values.keys()],
        )
        # generate a random factor value for each feature combination:
        # rand_value = min + ((max - min) * value)
        factor_df[self._col_name] = self._min_factor_value + (
            (self._max_factor_value - self._min_factor_value) * np.random.random(len(factor_df))
        )

        # cartesian product of factor df and datetime df
        return get_cartesian_product(dr, factor_df)