#!/usr/bin/env python
# Amazon Machine Learning Samples
# Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Amazon Software License (the "License"). You may not use
# this file except in compliance with the License. A copy of the License is
# located at
#
#     http://aws.amazon.com/asl/
#
# or in the "license" file accompanying this file. This file is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or
# implied. See the License for the specific language governing permissions and
# limitations under the License.
"""
Demonstrate how to create tasks on Amazon ML to train and evaluate a model for
K-fold cross-validation. The main function of this module requires the number
of folds(kfolds).

usage: build_folds.py [--name][--debug] kfolds

example:
    python build_folds.py --name 4-fold-cv-demo 4

"""
import sys
import logging
import argparse
import config
from fold import Fold
from collections import namedtuple


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(config.APP_NAME)


def build_folds(data_spec=None, kfolds=None):
    """
    Create Datasources, ML Model and Evaluation for each fold. Returns
        a list of newly created evaluation IDs for all folds.

    Args:
        data_spec: the named tuple object that wraps dataset related
            parameters.
        kfolds: the integer number representing the number of folds.
    Returns:
        a list of newly created evaluation IDs.
    """
    folds = [Fold(data_spec=data_spec, this_fold=i,
                  kfolds=kfolds) for i in range(kfolds)]
    for f in folds:
        f.build()  # each fold creates entities
        logger.info(f)  # prints details of folds
    return [f.ev_id for f in folds]  # return list of eval IDs


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        usage="%(prog)s [--name][--debug] kfolds",
        description="Demo code to create entities on Amazon ML for \
            K-fold cross-validation.")
    parser.add_argument("kfolds",
                        type=int,
                        choices=range(2, 11),  # 2 to 10 is valid input
                        help="the number of folds for cross-validation")
    parser.add_argument("-n", "--name",
                        default="CV sample",
                        help="the name of entities to create on Amazon ML"
                             "[default: '%(default)s']")
    parser.add_argument("-d", "--debug",
                        action="store_true",
                        help="enable debug mode, logging from DEBUG level"
                             "[default: off]")

    args = parser.parse_args()
    if (args.debug):
        logger.setLevel(logging.DEBUG)  # modify the logging level

    logger.debug("User inputs:")
    logger.debug(vars(args))

    kfolds = args.kfolds
    name = args.name

    DataSpec = namedtuple("DataSpec", ["name", "data_s3_url",
                                       "schema", "recipe",
                                       "ml_model_type", "sgd_maxPasses",
                                       "sgd_maxMLModelSizeInBytes",
                                       "sgd_l2RegularizationAmount"])

    # read datasource schema and training recipe from files:
    with open("banking.csv.schema", 'r') as schema_f:
        schema = schema_f.read()
    with open("recipe.json", 'r') as recipe_f:
        recipe = recipe_f.read()

    data_spec = DataSpec(name=name,
                         data_s3_url="s3://aml-sample-data/banking.csv",
                         schema=schema,
                         recipe=recipe,
                         ml_model_type="BINARY",
                         sgd_maxPasses="10",
                         sgd_maxMLModelSizeInBytes="104857600",  # 100MiB
                         sgd_l2RegularizationAmount="1e-4")

    eval_ids = build_folds(data_spec=data_spec, kfolds=kfolds)

    print("""

====================================
For the next step in the demo, run:
    python collect_perf.py {}""".format(" ".join(eval_ids)))