# CloudFormation for an ML project (potentially touching several containers, users) AWSTemplateFormatVersion: '2010-09-09' Transform: 'AWS::Serverless-2016-10-31' # Parameters: # Anything? Resources: ########## STORAGE ########## SourceBucket: Type: 'AWS::S3::Bucket' Properties: PublicAccessBlockConfiguration: BlockPublicAcls: true IgnorePublicAcls: true BlockPublicPolicy: true RestrictPublicBuckets: true ForecastsBucket: Type: 'AWS::S3::Bucket' Properties: PublicAccessBlockConfiguration: BlockPublicAcls: true IgnorePublicAcls: true BlockPublicPolicy: true RestrictPublicBuckets: true ########## PERMISSIONS ########## SharedLambdaRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonForecastFullAccess' # TODO: Scope down Lambda permissions to remove admin - 'arn:aws:iam::aws:policy/AdministratorAccess' BucketsFullAccessPolicy: # (As created by default in StepFunctions console) Type: 'AWS::IAM::ManagedPolicy' Properties: PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 's3:Get*' - 's3:PutObject' - 's3:DeleteObject' - 's3:List*' Resource: - !GetAtt SourceBucket.Arn - !Sub '${SourceBucket.Arn}/*' - !GetAtt ForecastsBucket.Arn - !Sub '${ForecastsBucket.Arn}/*' ForecastRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: forecast.amazonaws.com Action: 'sts:AssumeRole' ManagedPolicyArns: - !Ref BucketsFullAccessPolicy StepFunctionsXRayAccessPolicy: # (As created by default in StepFunctions console) Type: 'AWS::IAM::ManagedPolicy' Properties: PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 'xray:PutTraceSegments' - 'xray:PutTelemetryRecords' - 'xray:GetSamplingRules' - 'xray:GetSamplingTargets' Resource: '*' CloudWatchLogsDeliveryFullAccessPolicy: # (As created by default in StepFunctions console) Type: 'AWS::IAM::ManagedPolicy' Properties: #ManagedPolicyName: !Sub '${AWS::StackName}-CWLogsDeliveryFullAccess' #Path: !Sub '/${AWS::StackName}/' PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 'logs:CreateLogDelivery' - 'logs:GetLogDelivery' - 'logs:UpdateLogDelivery' - 'logs:DeleteLogDelivery' - 'logs:ListLogDeliveries' - 'logs:PutResourcePolicy' - 'logs:DescribeResourcePolicies' - 'logs:DescribeLogGroups' Resource: '*' StepFunctionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: states.amazonaws.com Action: 'sts:AssumeRole' ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AWSLambdaFullAccess' - !Ref StepFunctionsXRayAccessPolicy - !Ref CloudWatchLogsDeliveryFullAccessPolicy ########## NOTEBOOK ########## # Execution role for the notebook: SageMakerNotebookRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com Action: - 'sts:AssumeRole' ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' - !Ref BucketsFullAccessPolicy # Lifecycle configuration to automate some extra notebook setup NotebookConfig: Type: 'AWS::SageMaker::NotebookInstanceLifecycleConfig' Properties: OnStart: - Content: Fn::Base64: !Sub | #!/bin/bash set -e # Install extension for ipywidgets interactivity: sudo -u ec2-user -i <<'EOF' source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv jupyter labextension install @jupyter-widgets/jupyterlab-manager source /home/ec2-user/anaconda3/bin/deactivate EOF # Pass Forecast role ARN and bucket name in to the notebook via env vars: touch /etc/profile.d/jupyter-env.sh cat << 'EOF' >> /etc/profile.d/jupyter-env.sh export FORECAST_IAM_ROLE_ARN='${ForecastRole.Arn}' export DATA_BUCKET_NAME='${SourceBucket}' export FORECAST_BUCKET_NAME='${ForecastsBucket}' EOF initctl restart jupyter-server --no-wait # SageMaker Notebook NotebookInstance: Type: 'AWS::SageMaker::NotebookInstance' Properties: #DefaultCodeRepository: 'https://github.com/glyfnet/timeseries_blog' Can only use public repositories InstanceType: ml.t3.medium LifecycleConfigName: !GetAtt NotebookConfig.NotebookInstanceLifecycleConfigName NotebookInstanceName: !Sub '${AWS::StackName}-ForecastNotebook' # Default is just notebookinstance-{RANDOM} RoleArn: !GetAtt SageMakerNotebookRole.Arn # VolumeSizeInGB: 10 ########## INPUT INTEGRATION (TRIGGERING THE STEP FUNCTION) ########## # We'll use S3 trigger to kick off our state machine, but let the state machine handle the rest of the orchestration. # Triggering a SFn from S3 is a little different to triggering a Lambda (as we do for human review callback) - we # need to do it via CloudTrail. First create a CloudTrail log storage bucket and give CloudTrail access to it... InputCloudTrailBucket: Type: 'AWS::S3::Bucket' Properties: PublicAccessBlockConfiguration: BlockPublicAcls: true IgnorePublicAcls: true BlockPublicPolicy: true RestrictPublicBuckets: true LogBucketPolicy: Type: 'AWS::S3::BucketPolicy' Properties: Bucket: !Ref InputCloudTrailBucket PolicyDocument: Version: '2012-10-17' Statement: - Sid: Require Secure Transport Action: "s3:*" Effect: Deny Resource: - !Sub "arn:${AWS::Partition}:s3:::${InputCloudTrailBucket}" - !Sub "arn:${AWS::Partition}:s3:::${InputCloudTrailBucket}/*" Condition: Bool: "aws:SecureTransport": "false" Principal: "*" - Sid: AWSCloudTrailAclCheck Effect: Allow Principal: Service: cloudtrail.amazonaws.com Action: 's3:GetBucketAcl' Resource: !GetAtt InputCloudTrailBucket.Arn - Sid: AWSCloudTrailWrite Effect: Allow Principal: Service: cloudtrail.amazonaws.com Action: 's3:PutObject' Resource: !Sub '${InputCloudTrailBucket.Arn}/*' Condition: StringEquals: 's3:x-amz-acl': 'bucket-owner-full-control' # ...Then set up CloudTrail logging events from the input bucket to the CloudTrail bucket: RawInputTrail: Type: 'AWS::CloudTrail::Trail' DependsOn: - LogBucketPolicy Properties: EventSelectors: - DataResources: - Type: AWS::S3::Object Values: - !Sub '${SourceBucket.Arn}/input/' IncludeManagementEvents: false ReadWriteType: WriteOnly IncludeGlobalServiceEvents: false IsLogging: true IsMultiRegionTrail: false S3BucketName: !Ref InputCloudTrailBucket #S3KeyPrefix: Might be useful to scope down? #TrailName: Use default # Finally create an execution role (with permission to actually start the Step Function) and the rule itself, to # start an execution every time an image/object is put in the input bucket. AmazonCloudWatchEventRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - events.amazonaws.com Action: sts:AssumeRole Path: / Policies: - PolicyName: cwe-pipeline-execution PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: 'states:StartExecution' Resource: !Ref PipelineStateMachine InputCloudWatchEventRule: Type: 'AWS::Events::Rule' Properties: EventPattern: source: - aws.s3 detail-type: - 'AWS API Call via CloudTrail' detail: eventSource: - s3.amazonaws.com eventName: - CopyObject - PutObject - CompleteMultipartUpload requestParameters: bucketName: - !Ref SourceBucket Targets: - Arn: !Ref PipelineStateMachine RoleArn: !GetAtt AmazonCloudWatchEventRole.Arn Id: start-forecast-statemachine ########## PIPELINE ########## CreateDatasetImportFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Start dataset import to Amazon Forecast Handler: datasetimport.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/createdatasetimport GetStatusImportFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Check dataset import progress Handler: getstatusimport.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/GetStatusImport CreatePredictorFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Start model training Handler: predictor.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/createpredictor GetStatusPredictorFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Check predictor status Handler: getstatuspredictor.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/GetStatusPredictor CreateForecastFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Start forecast creation Handler: forecast.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/createforecast GetStatusForecastFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Check forecast job status Handler: getstatusforecast.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/GetStatusForecast CreateForecastExportJobFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Start export of a forecast Handler: forecastexportjob.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/createforecastexportjob GetStatusForecastExportJobFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Check forecast export job status Handler: getstatusforecastexportjob.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/GetStatusForecastExportJob NotifyUserFunction: Type: 'AWS::Serverless::Function' Properties: # FunctionName: Take default Description: Notify user of forecast export Handler: notifyuser.lambda_handler MemorySize: 256 Runtime: python3.7 Role: !GetAtt SharedLambdaRole.Arn Timeout: 120 # Environment: # Variables: # XYZ: XYZ CodeUri: ./lambdas/NotifyUser PipelineLogGroup: Type: 'AWS::Logs::LogGroup' # Properties: # LogGroupName: # Whatevs, the default isn't too bad? # RetentionInDays: # 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653 PipelineStateMachine: Type: 'AWS::Serverless::StateMachine' Properties: # Omitting name gives PipelineStateMachine-[GARBAGE], which might be nice enough for you but might not! Name: !Sub '${AWS::StackName}-PipelineMachine' DefinitionUri: ./statemachine.asl.json DefinitionSubstitutions: CreateDatasetImportFunctionArn: !GetAtt CreateDatasetImportFunction.Arn GetStatusImportFunctionArn: !GetAtt GetStatusImportFunction.Arn CreatePredictorFunctionArn: !GetAtt CreatePredictorFunction.Arn GetStatusPredictorFunctionArn: !GetAtt GetStatusPredictorFunction.Arn CreateForecastFunctionArn: !GetAtt CreateForecastFunction.Arn GetStatusForecastFunctionArn: !GetAtt GetStatusForecastFunction.Arn CreateForecastExportJobFunctionArn: !GetAtt CreateForecastExportJobFunction.Arn GetStatusForecastExportJobFunctionArn: !GetAtt GetStatusForecastExportJobFunction.Arn NotifyUserFunctionArn: !GetAtt NotifyUserFunction.Arn Logging: Destinations: - CloudWatchLogsLogGroup: LogGroupArn: !GetAtt PipelineLogGroup.Arn IncludeExecutionData: true Level: ALL # or ERROR, FATAL, OFF (default) Role: !GetAtt StepFunctionRole.Arn Type: STANDARD # Tags: # - Key: ... # Value: ... Outputs: SourceBucketName: Description: Bucket for input data Value: !Ref SourceBucket ForecastBucketName: Description: Bucket for exported forecasts Value: !Ref ForecastsBucket NotebookInstance: Description: SageMaker notebook instance Value: !Ref NotebookInstance ForecastRoleArn: Description: Execution role to use for Amazon Forecast (with access to required buckets) Value: !GetAtt ForecastRole.Arn