AWSTemplateFormatVersion: 2010-09-09
Description: Code-Free AutoML Pipeline Template

Parameters:
  BucketName:
    Type: String
    Default: code-free-automl-yournamehere
    Description: The name of the new S3 bucket created to use with the pipeline. Make sure it is unique!
  TrainingMinutes:
    Type: Number
    Default: 60
    Description: Training Time
  TrainingCost:
    Type: Number
    Default: 1
    Description: Expected Cost. The type of compute instance type to use for training is automatically determined by cost.
  TargetVariable:
    Type: String
    Default: label
    Description: Write the target variable of the dataframe. 
  EvalMetric:
    Type: String
    Default: f1
    AllowedValues: 
      - accuracy
      - nll
      - f1
      - mcc
      - auc
    Description: Specify the metrics the model will optimize.  
  AutoMLPresets:
    Type: String
    Default: good_quality_faster_inference_only_refit
    AllowedValues: 
      - best_quality
      - best_quality_with_high_quality_refit
      - high_quality_fast_inference_only_refit
      - good_quality_faster_inference_only_refit
      - medium_quality_faster_train
    Description: Specify AutoML presets.     

Resources:
  Bucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Ref BucketName
      PublicAccessBlockConfiguration:
        BlockPublicAcls: True
        BlockPublicPolicy: True
        IgnorePublicAcls: True
        RestrictPublicBuckets: True
      NotificationConfiguration:
        LambdaConfigurations:
          - Event: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: data/
                  - Name: suffix
                    Value: _train.csv
            Function: !GetAtt Lambda.Arn

  Lambda:
    Type: AWS::Lambda::Function
    Properties:
      Code:
        ZipFile: |
          import json
          import os
          import boto3
          import datetime
          from urllib.parse import unquote_plus
          import logging
          logging.basicConfig(level=logging.INFO,
                              format='%(asctime)s %(message)s',
                              datefmt='%Y-%m-%d %H:%M:%S')

          def lambda_handler(event, context):
            for record in event['Records']:
              bucket = record['s3']['bucket']['name']
              key = unquote_plus(record['s3']['object']['key'])
              tmpkey = key.replace('/', '')
            logging.info(key)
            logging.info(tmpkey)
            filename = key.split('/')[-1]
            print(filename)
            dataset = filename.split('_')[0]
            print(dataset)
              
            cost = int(os.environ['TRAINING_COST'])
            minutes = int(os.environ['TRAINING_MINUTES'])
            hour = minutes / 60
            cost_per_hour = cost / hour

            # ICN Region price
            keys = ['ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge']
            vals = [0.269, 0.539, 1.075, 2.419, 4.838]
            diff = [abs(cost_per_hour-v) for v in vals]
            arg_min = diff.index(min(diff))
            instance_type = keys[arg_min]
            instance_type = 'ml.m5.xlarge' # for hands-on lab
            print(instance_type)    
            
            now = datetime.datetime.now
            str_time = now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
            sm = boto3.Session().client('sagemaker')
            training_job_params = {
              'TrainingJobName': dataset + '-autogluon-' + str_time,
              'HyperParameters': {
                'filename':json.dumps(filename),
                'sagemaker_container_log_level': '20',
                'sagemaker_enable_cloudwatch_metrics': 'false',
                'sagemaker_program': 'autogluon-tab-with-test.py',
                'sagemaker_region': os.environ['AWS_REGION'],
                'sagemaker_submit_directory': 's3://' + bucket + '/source/sourcedir.tar.gz',
                's3_output': os.environ['S3_OUTPUT_PATH'],
                'target': os.environ['TARGET'],
                'eval_metric': os.environ['EVAL_METRIC'], 
                'presets': os.environ['PRESETS'],   
                'training_minutes': os.environ['TRAINING_MINUTES'],                          
              },
              'AlgorithmSpecification': {
                'TrainingImage': '763104351884.dkr.ecr.' + os.environ['AWS_REGION'] + '.amazonaws.com/mxnet-training:1.6.0-cpu-py3',
                'TrainingInputMode': 'File',
                'EnableSageMakerMetricsTimeSeries': False
              },
              'RoleArn': os.environ['SAGEMAKER_ROLE_ARN'],
              'InputDataConfig': [
                {
                  'ChannelName': 'training',
                  'DataSource': {
                    'S3DataSource': {
                      'S3DataType': 'S3Prefix',
                      'S3Uri': os.environ['S3_TRIGGER_PATH'],
                      'S3DataDistributionType': 'FullyReplicated'
                    }
                  },
                  'CompressionType': 'None',
                  'RecordWrapperType': 'None'
                }
              ],
              'OutputDataConfig': {
                'KmsKeyId': '',
                'S3OutputPath': os.environ['S3_OUTPUT_PATH']
              },
              'ResourceConfig': {
                'InstanceType': instance_type,
                'InstanceCount': 1,
                'VolumeSizeInGB': 200
              },
              'StoppingCondition': {
                'MaxRuntimeInSeconds': 86400
              },
              
              'EnableNetworkIsolation': False,
              'EnableInterContainerTrafficEncryption': False,
              'EnableManagedSpotTraining': False,
            }
            
            response = sm.create_training_job(**training_job_params)
            
            
            return {
              'statusCode': 200,
              'body': json.dumps(key)
            }
      Handler: index.lambda_handler
      Runtime: python3.7
      Description: Lambda to kick off SageMaker training job for code-free AutoML pipeline
      MemorySize: 512
      Timeout: 20
      Role: !GetAtt LambdaIamRole.Arn
      Environment:
        Variables:
          S3_OUTPUT_PATH: !Sub s3://${BucketName}/results/
          S3_TRIGGER_PATH: !Sub s3://${BucketName}/data/
          SAGEMAKER_ROLE_ARN: !GetAtt SageMakerIamRole.Arn
          TARGET: !Sub ${TargetVariable}
          EVAL_METRIC: !Sub ${EvalMetric} 
          PRESETS: !Sub ${AutoMLPresets} 
          TRAINING_MINUTES: !Sub ${TrainingMinutes}
          TRAINING_COST: !Sub ${TrainingCost}   

  S3InvokeLambdaPermission:
    Type: AWS::Lambda::Permission
    Properties:
      Action: lambda:InvokeFunction
      FunctionName: !Ref Lambda
      Principal: s3.amazonaws.com
      SourceAccount: !Ref AWS::AccountId
      SourceArn: !Sub arn:aws:s3:::${BucketName}

  LambdaIamRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Principal:
              Service: lambda.amazonaws.com
            Action: sts:AssumeRole
      Path: /
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/AWSLambdaExecute
        - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
  SageMakerIamRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Principal:
              Service: sagemaker.amazonaws.com
            Action: sts:AssumeRole
      Path: /
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
        - !Ref SageMakerS3Policy

  SageMakerS3Policy:
    Type: AWS::IAM::ManagedPolicy
    Properties:
      ManagedPolicyName: AmazonS3CodeFreeAutoMLAccess
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Action:
              - s3:GetObject
              - s3:PutObject
              - s3:DeleteObject
              - s3:ListBucket
            Resource:
              - !Sub arn:aws:s3:::${BucketName}
              - !Sub arn:aws:s3:::${BucketName}/*