AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: > stepfunction-complex-orchestrator-app SAM Template for stepfunction-complex-orchestrator-app Parameters: BatchScriptName: Type: String Description: 'Name of the AWS Batch script (including ".sh"); the script would need to be copied into a newly generated S3 bucket' Default: VPCID: Type: String Description: 'ID of the VPC (requires public subnets or subnets with NAT to allow internet access) to run Batch Jobs on Fargate' FargateSubnetAccessibility: Type: String Default: Private AllowedValues: - 'Public' - 'Private' Description: 'Using Private or Public Subnets for deploying Batch Jobs as Fargate Containers' Subnets: Type: String Description: 'Comma separated list of subnets in the VPC specified where to run batch jobs on fargate ' batchSleepDuration: Type: Number Default: 15 Description: 'Batch script sleep time (in seconds) to mimic job activity, ensure its not greater than 60 seconds to avoid step function timeout' Conditions: EnablePublicIpForFargate: !Equals - !Ref FargateSubnetAccessibility - 'Public' Resources: AWSBatchServiceRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: "" Action: "sts:AssumeRole" ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole AWSECSTaskExecutionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: "" Action: "sts:AssumeRole" ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy OrchestratorStepFunctionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "" - "" Action: - "sts:AssumeRole" Policies: - PolicyName: StepFunctionNotificationPolicy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: # For event subscription & notification - "events:PutTargets" - "events:PutRule" - "events:DescribeRule" Resource: - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForECSTaskRule - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForBatchJobsRule - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule - Effect: Allow Action: # Logging - "logs:CreateLogDelivery" - "logs:GetLogDelivery" - "logs:UpdateLogDelivery" - "logs:DeleteLogDelivery" - "logs:ListLogDeliveries" - "logs:PutResourcePolicy" - "logs:DescribeResourcePolicies" - "logs:DescribeLogGroups" # Following required for step function to send/receive notification from child step function - "cloudwatch:PutMetricData" # For child step function invoke/stop - "states:StartExecution" - "states:StopExecution" - "states:DescribeExecution" # X-ray enabling - "xray:PutTraceSegments" - "xray:PutTelemetryRecords" - "xray:GetSamplingRules" - "xray:GetSamplingTargets" Resource: - !Sub arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:* - Effect: Allow Action: # For Lambda invocation - "lambda:InvokeFunction" Resource: [ !GetAtt BatchJobSubmitFunction.Arn, !GetAtt CallbackNotifyFunction.Arn ] - Effect: Allow Action: # For invoking batch - "batch:SubmitJob" - "batch:DescribeJobs" - "batch:TerminateJob" Resource: # sample arn: - arn:aws:batch:us-east-1:xxxxx:job-definition/sample-batch-job-defn-test-app:1 - !Sub ${BatchJobQueue} - !Sub ${BatchJobDefn} LambdaExecutingBatchRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "" Action: - "sts:AssumeRole" ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Policies: - PolicyName: BatchExecutionPolicy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - "batch:DescribeJobs" - "batch:ListJobs" - "batch:SubmitJob" Resource: - !Sub ${BatchJobQueue} - !Sub ${BatchJobDefn} LambdaNotifyStepFunctionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "" Action: - "sts:AssumeRole" Policies: - PolicyName: LambdaNotifyStepFunctionPolicy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - "states:SendTaskSuccess" - "states:SendTaskFailure" Resource: !Join #arn:aws:states:us-east-1:xxxx:stateMachine:MyStateMachineThrowaway1 - ':' - - 'arn:aws:states' - !Ref AWS::Region - !Ref AWS::AccountId - 'stateMachine:*' ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole BatchJobRole: Type: AWS::IAM::Role DependsOn: [ BatchScriptS3Bucket ] Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "" Action: - "sts:AssumeRole" Policies: - PolicyName: S3ReadOnlyPolicy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - "s3:Get*" - "s3:List*" - "s3:Head*" Resource: [ !GetAtt BatchScriptS3Bucket.Arn, !Join [ '', [ !GetAtt BatchScriptS3Bucket.Arn, '/*' ] ] ] - PolicyName: BatchNotifyStepFunctionPolicy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - "states:SendTaskSuccess" - "states:SendTaskFailure" Resource: !Join #arn:aws:states:us-east-1:xxxx:stateMachine:MyStateMachineThrowaway1 - ':' - - 'arn:aws:states' - !Ref AWS::Region - !Ref AWS::AccountId - 'stateMachine:*' BatchScriptS3Bucket: Type: AWS::S3::Bucket Properties: BucketEncryption: ServerSideEncryptionConfiguration: - ServerSideEncryptionByDefault: SSEAlgorithm: AES256 # SSEAlgorithm: 'aws:kms' # KMSMasterKeyID: KMS-KEY-ARN PublicAccessBlockConfiguration: BlockPublicAcls: true BlockPublicPolicy: true IgnorePublicAcls: true RestrictPublicBuckets: true FargateSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Security Group for Fargate VpcId: !Ref 'VPCID' # Needed for docker hub outbound connections # The ip of docker hub registry is not static. SecurityGroupEgress: - IpProtocol: tcp FromPort : 443 ToPort : 443 CidrIp: FargateInternalRule: Type: AWS::EC2::SecurityGroupIngress Properties: Description: Internal Communication GroupId: !Ref 'FargateSecurityGroup' IpProtocol: -1 SourceSecurityGroupId: !Ref 'FargateSecurityGroup' ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: ComputeEnvironmentName: !Join [ "-", [ 'sample-batch-compute-env', !Ref 'AWS::StackName' ] ] Type: MANAGED State: ENABLED ServiceRole: !Ref AWSBatchServiceRole ComputeResources: Type: FARGATE MaxvCpus: 1 Subnets: !Split [ "," , !Ref Subnets ] SecurityGroupIds: - !Ref 'FargateSecurityGroup' BatchJobDefn: Type: AWS::Batch::JobDefinition DependsOn: [ BatchScriptS3Bucket ] Properties: Type: container JobDefinitionName: !Join [ "-", [ 'sample-batch-job-defn', !Ref 'AWS::StackName' ] ] PlatformCapabilities: [ "FARGATE" ] Parameters: { "batch_script": !Ref BatchScriptName, "sleep_interval": 60, "batch_status" : "success" } ContainerProperties: Image: "christianhxc/aws-batch-101" ResourceRequirements: - Type: VCPU Value: 0.25 - Type: MEMORY Value: 512 Command: [ "Ref::batch_script","Ref::sleep_interval","Ref::batch_body","Ref::batch_status","Ref::function_id","Ref::task_token"] FargatePlatformConfiguration: PlatformVersion: "1.4.0" # Fargate instance needs internet access to connect to Docker Hub and pull down docker image. # If deployed to private subnet, ensure it has NAT gateway enabled to allow outbound. # Otherwise, enable public ip when going with public subnets. NetworkConfiguration: !If [EnablePublicIpForFargate, AssignPublicIp: "ENABLED", !Ref "AWS::NoValue"] JobRoleArn: !Ref BatchJobRole ExecutionRoleArn: !GetAtt AWSECSTaskExecutionRole.Arn Environment: - Name: BATCH_FILE_S3_URL Value: !Join [ "/", [ "s3:/", !Ref BatchScriptS3Bucket, !Ref BatchScriptName ] ] - Name: BATCH_FILE_TYPE Value: "script" RetryStrategy: Attempts: 1 BatchJobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 ComputeEnvironmentOrder: - Order: 1 ComputeEnvironment: !Ref ComputeEnvironment State: ENABLED JobQueueName: !Join [ "-", [ 'job-queue', !Ref 'AWS::StackName' ] ] CallbackNotifyStateMachine3: Type: AWS::Serverless::StateMachine # More info about State Machine Resource: Properties: DefinitionUri: statemachine/callback_notify_step_function3.asl.json Policies: - LambdaInvokePolicy: FunctionName: !Ref CallbackNotifyFunction SyncNestedStateMachine2: Type: AWS::Serverless::StateMachine #DependsOn: [ ] Properties: DefinitionUri: statemachine/sync_nested_step_function2.asl.json DefinitionSubstitutions: BatchJobSubmitFunction: !Ref BatchJobSubmitFunction CallbackNotifyFunction: !Ref CallbackNotifyFunction CallbackNotifyStepFunction3Arn: !GetAtt CallbackNotifyStateMachine3.Arn JobDefinitionName: !Ref BatchJobDefn JobName: BatchJobSubmission JobQueue: !Ref BatchJobQueue Role: !GetAtt OrchestratorStepFunctionRole.Arn ComplexOrchestratorStateMachine1: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: statemachine/complex_orchestrator_step_function1.asl.json DefinitionSubstitutions: SyncNestedStepFunction2Arn: !GetAtt SyncNestedStateMachine2.Arn batchSleepDuration: !Ref batchSleepDuration Role: !GetAtt OrchestratorStepFunctionRole.Arn BatchJobSubmitFunction: Type: AWS::Serverless::Function Properties: CodeUri: functions/batch_submit/ Handler: batch_submit.lambda_handler Runtime: python3.8 Timeout: 20 Environment: Variables: JOB_NAME: BatchJobSubmission JOB_QUEUE: !Ref BatchJobQueue JOB_DEFINITION: !Ref BatchJobDefn Role: !GetAtt LambdaExecutingBatchRole.Arn CallbackNotifyFunction: Type: AWS::Serverless::Function DependsOn: LambdaNotifyStepFunctionRole Properties: CodeUri: functions/callback_lambda/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 25 Role: !GetAtt LambdaNotifyStepFunctionRole.Arn Outputs: ComplexOrchestratorStateMachine: Description: Main Orchestrator State machine to run Value: !GetAtt ComplexOrchestratorStateMachine1.Name BucketName: Value: !Ref BatchScriptS3Bucket Description: Name of the generated Amazon S3 bucket BatchJobDefnScriptPath: Value: !Sub "s3://${BatchScriptS3Bucket}/${BatchScriptName}" Description: Complete path of the sample AWS Batch script (to be uploaded) including the S3 Bucket name. BatchJobQueueArn: Value: !Ref BatchJobQueue Description: Arn of the AWS Batch queue where jobs would be submitted BatchJobDefnArn: Value: !Ref BatchJobDefn Description: Arn of the AWS Batch Job Definition created