# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
#     http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import collections
import logging
import multiprocessing
import os
import shlex
import subprocess

import yaml

logging.basicConfig()
logger = logging.getLogger(__name__)

BASE_PATH = "/opt/ml"
MODEL_PATH = "/opt/ml/model"
INPUT_PATH = "/opt/ml/input"
INPUT_DATA_PATH = "/opt/ml/input/data"
OUTPUT_PATH = "/opt/ml/output"
INPUT_CONFIG_PATH = "/opt/ml/input/config"
OUTPUT_DATA_PATH = "/opt/ml/output/data"

HYPERPARAMETERS_FILE = "hyperparameters.json"
RESOURCE_CONFIG_FILE = "resourceconfig.json"
INPUT_DATA_CONFIG_FILE = "inputdataconfig.json"


def load_config(path):
    with open(path, "r") as f:
        return yaml.load(f)


def load_hyperparameters():
    return HyperParameters(load_config(os.path.join(INPUT_CONFIG_PATH, HYPERPARAMETERS_FILE)))


def load_resource_config():
    return load_config(os.path.join(INPUT_CONFIG_PATH, RESOURCE_CONFIG_FILE))


def load_input_data_config():
    return load_config(os.path.join(INPUT_CONFIG_PATH, INPUT_DATA_CONFIG_FILE))


def get_channel_dir(channel):
    """Returns the directory containing the channel data file(s) which is:
    - <self.base_dir>/input/data/<channel>
    Returns:
        (str) The input data directory for the specified channel.
    """
    return os.path.join(INPUT_DATA_PATH, channel)


def get_available_gpus():
    """The number of gpus available in the current container.

    Returns:
        (int): number of gpus available in the current container.
    """
    try:
        cmd = shlex.split("nvidia-smi --list-gpus")
        output = str(subprocess.check_output(cmd))
        return sum([1 for x in output.split("\n") if x.startswith("GPU ")])
    except OSError:
        logger.warning("No GPUs detected (normal if no gpus installed)")
        return 0


def get_available_cpus():
    return multiprocessing.cpu_count()


def create_trainer_environment():
    """
    Returns: an instance of `TrainerEnvironment`
    """
    resource_config = load_resource_config()
    current_host = resource_config["current_host"]
    hosts = resource_config["hosts"]

    input_data_config = load_input_data_config()
    channel_dirs = {channel: get_channel_dir(channel) for channel in input_data_config}

    available_cpus = get_available_cpus()
    available_gpus = get_available_gpus()

    env = TrainerEnvironment(
        input_dir=INPUT_PATH,
        input_config_dir=INPUT_CONFIG_PATH,
        model_dir=MODEL_PATH,
        output_dir=OUTPUT_PATH,
        output_data_dir=OUTPUT_DATA_PATH,
        current_host=current_host,
        hosts=hosts,
        channel_dirs=channel_dirs,
        available_gpus=available_gpus,
        available_cpus=available_cpus,
        hyperparameters=load_hyperparameters(),
        resource_config=resource_config,
        input_data_config=load_input_data_config(),
    )
    return env


class HyperParameters(collections.Mapping):
    """dict of the hyperparameters provided in the training job. Allows casting of the hyperparameters
    in the `get` method.

    """

    def __init__(self, hyperparameters_dict):
        self.hyperparameters_dict = hyperparameters_dict

    def __getitem__(self, key):
        return self.hyperparameters_dict[key]

    def __len__(self):
        return len(self.hyperparameters_dict)

    def __iter__(self):
        return iter(self.hyperparameters_dict)

    def get(self, key, default=None, object_type=None):
        """Has the same functionality of `dict.get`. Allows casting of the values using the additional attribute
        `object_type`:

        Args:
            key: hyperparameter name
            default: default hyperparameter value
            object_type: type that the hyperparameter wil be casted to.

        Returns:

        """
        try:
            value = self.hyperparameters_dict[key]
            if not object_type:
                return value
            elif object_type == bool:
                if value.lower() in ["True", "true"]:
                    return True
                return False
            else:
                return object_type(value)
        except KeyError:
            return default

    def __str__(self):
        return str(self.hyperparameters_dict)

    def __repr__(self):
        return str(self.hyperparameters_dict)


class TrainerEnvironment(
    collections.namedtuple(
        "TrainerEnvironment",
        [
            "input_dir",
            "input_config_dir",
            "model_dir",
            "output_dir",
            "hyperparameters",
            "resource_config",
            "input_data_config",
            "output_data_dir",
            "hosts",
            "channel_dirs",
            "current_host",
            "available_gpus",
            "available_cpus",
        ],
    )
):
    """Provides access to aspects of the training environment relevant to training jobs, including
    hyperparameters, system characteristics, filesystem locations, environment variables and configuration settings.

    Example on how a script can use training environment:
        ```
        import os
        import numpy as np


        from trainer.environment import create_training_environment
        env = create_training_environment()

        from keras.applications.resnet50 import ResNet50

        # get the path of the channel 'training' from the inputdataconfig.json file
        training_dir = env.channel_dirs['training']

        # get a the hyperparameter 'training_data_file' from hyperparameters.json file
        file_name = hyperparameters['training_data_file']

        # get the folder where the model should be saved
        model_dir = env.model_dir

        data = np.load(os.path.join(training_dir, training_data_file))

         x_train, y_train = data['features'], keras.utils.to_categorical(data['labels'])

        model = ResNet50(weights='imagenet')

        # unfreeze the model to allow fine tuning
        ...

        model.fit(x_train, y_train)

        # save the model in the end of training
        model.save(os.path.join(model_dir, 'saved_model'))
        ```
    """

    def __new__(
        cls,
        input_dir,
        input_config_dir,
        model_dir,
        output_dir,
        hyperparameters,
        resource_config,
        input_data_config,
        output_data_dir,
        hosts,
        channel_dirs,
        current_host,
        available_gpus,
        available_cpus,
    ):
        """

        Args:
            input_dir: The input_dir, e.g. /opt/ml/input/, is the directory where SageMaker saves input data
                        and configuration files before and during training. The input data directory has the
                        following subdirectories: config (`input_config_dir`) and data (`input_data_dir`)

            input_config_dir: The directory where standard SageMaker configuration files are located,
                        e.g. /opt/ml/input/config/.

                        SageMaker training creates the following files in this folder when training starts:
                            - `hyperparameters.json`: Amazon SageMaker makes the hyperparameters in a CreateTrainingJob
                                    request available in this file.
                            - `inputdataconfig.json`: You specify data channel information in the InputDataConfig
                                    parameter in a CreateTrainingJob request. Amazon SageMaker makes this information
                                    available in this file.
                            - `resourceconfig.json`: name of the current host and all host containers in the training

                        More information about these files can be find here:
                            https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html

            model_dir:

            output_dir: The directory where training success/failure indications will be written, e.g. /opt/ml/output.
                        To save non-model artifacts check `output_data_dir`.

            hyperparameters: An instance of `HyperParameters` containing the training job hyperparameters.

            resource_config: A dict<string, string> with the contents from /opt/ml/input/config/resourceconfig.json.
                            It has the following keys:
                                - current_host: The name of the current container on the container network.
                                    For example, 'algo-1'.
                                -  hosts: The list of names of all containers on the container network,
                                    sorted lexicographically. For example, `["algo-1", "algo-2", "algo-3"]`
                                    for a three-node cluster.

            input_data_config: A dict<string, string> with the contents from /opt/ml/input/config/inputdataconfig.json.

                                For example, suppose that you specify three data channels (train, evaluation, and
                                validation) in your request. This dictionary will contain:

                                {"train": {
                                    "ContentType":  "trainingContentType",
                                    "TrainingInputMode": "File",
                                    "S3DistributionType": "FullyReplicated",
                                    "RecordWrapperType": "None"
                                },
                                "evaluation" : {
                                    "ContentType": "evalContentType",
                                    "TrainingInputMode": "File",
                                    "S3DistributionType": "FullyReplicated",
                                    "RecordWrapperType": "None"
                                },
                                "validation": {
                                    "TrainingInputMode": "File",
                                    "S3DistributionType": "FullyReplicated",
                                    "RecordWrapperType": "None"
                                }}

                                You can find more information about /opt/ml/input/config/inputdataconfig.json here:
                                https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html#your-algorithms-training-algo-running-container-inputdataconfig

            output_data_dir: The dir to write non-model training artifacts (e.g. evaluation results) which will be
                        retained by SageMaker, e.g. /opt/ml/output/data.

                        As your algorithm runs in a container, it generates output including the status of the
                        training job and model and output artifacts. Your algorithm should write this information
                        to the this directory.

            hosts: The list of names of all containers on the container network, sorted lexicographically.
                    For example, `["algo-1", "algo-2", "algo-3"]` for a three-node cluster.

            channel_dirs:   A dict[string, string] containing the data channels and the directories where the training
                            data was saved.

                            When you run training, you can partition your training data into different logical
                            "channels". Depending on your problem, some common channel ideas are: "train", "test",
                             "evaluation" or "images',"labels".

                            The format of channel_input_dir is as follows:

                                - `channel`[key] - the name of the channel defined in the input_data_config.
                                - `training data path`[value] - the path to the directory where the training data is
                                saved.

            current_host: The name of the current container on the container network. For example, 'algo-1'.

            available_gpus: The number of gpus available in the current container.

            available_cpus: The number of cpus available in the current container.

        Returns:
            A `TrainerEnvironment` object.
        """
        return super(TrainerEnvironment, cls).__new__(
            cls,
            input_dir,
            input_config_dir,
            model_dir,
            output_dir,
            hyperparameters,
            resource_config,
            input_data_config,
            output_data_dir,
            hosts,
            channel_dirs,
            current_host,
            available_gpus,
            available_cpus,
        )