# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: "Sfn-test-runner: An example app that demonstrates how to decompose large workflows to improve parallelism and manageability" Parameters: ParameterInstancePrefix: Type: String Default: "TestRunner" Description: "Prefix to be used in names of the things created by this stack" Resources: ############### Test Infrastructure ############################################### # Define a common IAM role to be used for all components of this app ApplicationRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "states.amazonaws.com" - "lambda.amazonaws.com" Action: - "sts:AssumeRole" Policies: - PolicyName: AppPolicy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - events:PutTargets - events:PutRule - events:DescribeRule - states:StartExecution - xray:PutTraceSegments - xray:PutTelemetryRecords - xray:GetSamplingRules - xray:GetSamplingTargets - logs:CreateLogDelivery - logs:GetLogDelivery - logs:UpdateLogDelivery - logs:DeleteLogDelivery - logs:ListLogDeliveries - logs:PutResourcePolicy - logs:DescribeResourcePolicies - logs:DescribeLogGroups - cloudwatch:PutMetricData Resource: '*' - Effect: Allow Action: - dynamodb:* Resource: !GetAtt TableTestResults.Arn - Effect: Allow Action: - dynamodb:* Resource: !GetAtt TableTestRuns.Arn - Effect: Allow Action: - lambda:InvokeFunction Resource: '*' - Effect: Allow Action: - s3:GetObject Resource: !Join ["",['arn:aws:s3:::',!Ref IntermediaryS3Bucket,'/*']] - Effect: Allow Action: - s3:PutObject Resource: !Join ["",['arn:aws:s3:::',!Ref IntermediaryS3Bucket,'/*']] ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole # Main state machine that runs the tests StateMachineMain: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: statemachine/main.asl.json DefinitionSubstitutions: FunctionBuildList: !GetAtt FunctionCreateBatchesInS3.Arn TableTestResults: !Join ["",[!Ref ParameterInstancePrefix,"-","testresulttable"]] TableTestRuns: !Join ["",[!Ref ParameterInstancePrefix,"-","testrunstable"]] LambdaGetTestRunReport: !GetAtt LambdaGetTestRunReport.Arn FunctionGetBatcheFromS3: !GetAtt FunctionGetBatcheFromS3.Arn StateMachineDistributor: !Ref StateMachineDistributor Tracing: Enabled: true Role: !GetAtt ApplicationRole.Arn Logging: Destinations: - CloudWatchLogsLogGroup: LogGroupArn: !GetAtt LogGroupStateMachines.Arn IncludeExecutionData: TRUE Level: "ALL" Type: "STANDARD" Name: !Join ["",[!Ref ParameterInstancePrefix,'-',"Main"]] StateMachineDistributor: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: statemachine/distributor.asl.json DefinitionSubstitutions: FunctionSplitIfRequired: !GetAtt FunctionSplitIfRequired.Arn StateMachineTestSimplewait: !Ref StateMachineTestSimplewait StateMachineTestResultRecorder: !Ref StateMachineTestResultRecorder Role: !GetAtt ApplicationRole.Arn Logging: Destinations: - CloudWatchLogsLogGroup: LogGroupArn: !GetAtt LogGroupStateMachines.Arn IncludeExecutionData: TRUE Level: "ALL" Type: "STANDARD" Name: !Join ["",[!Ref ParameterInstancePrefix,'-',"Distributor"]] StateMachineTestSimplewait: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: statemachine/runner-simplewait.asl.json Role: !GetAtt ApplicationRole.Arn Logging: Destinations: - CloudWatchLogsLogGroup: LogGroupArn: !GetAtt LogGroupStateMachines.Arn IncludeExecutionData: TRUE Level: "ALL" Type: "EXPRESS" Name: !Join ["",[!Ref ParameterInstancePrefix,'-',"Test-SimpleWait"]] StateMachineTestResultRecorder: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: statemachine/test-result-recorder.asl.json DefinitionSubstitutions: LambdaRecordMetricDataFunction: !GetAtt LambdaRecordMetricDataFunction.Arn ParameterInstancePrefix: !Ref ParameterInstancePrefix TableTestResults: !Join ["",[!Ref ParameterInstancePrefix,"-","testresulttable"]] LambdaGetTestResultsFromSfnStatus: !GetAtt LambdaGetTestResultsFromSfnStatus.Arn Role: !GetAtt ApplicationRole.Arn Logging: Destinations: - CloudWatchLogsLogGroup: LogGroupArn: !GetAtt LogGroupStateMachines.Arn IncludeExecutionData: TRUE Level: "ALL" Type: "EXPRESS" Name: !Join ["",[!Ref ParameterInstancePrefix,'-',"ResultRecorder"]] # Dynamo DB Table that stores test results TableTestResults: Type: AWS::DynamoDB::Table Properties: AttributeDefinitions: - AttributeName: "TestRunId" AttributeType: "S" - AttributeName: "TestId" AttributeType: "S" BillingMode: PAY_PER_REQUEST KeySchema: - AttributeName: "TestRunId" KeyType: "HASH" - AttributeName: "TestId" KeyType: "RANGE" TableName: !Join ["",[!Ref ParameterInstancePrefix,"-","testresulttable"]] TableTestRuns: Type: AWS::DynamoDB::Table Properties: AttributeDefinitions: - AttributeName: "TestRunId" AttributeType: "S" BillingMode: PAY_PER_REQUEST KeySchema: - AttributeName: "TestRunId" KeyType: "HASH" TableName: !Join ["",[!Ref ParameterInstancePrefix,"-","testrunstable"]] # Lambda function that builds up a list of things to run FunctionCreateBatchesInS3: Type: AWS::Serverless::Function Properties: CodeUri: functions/create_batches_in_s3/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 900 MemorySize: 5120 Environment: Variables: test_statemachine_simplewait: !Ref StateMachineTestSimplewait s3bucket: !Ref IntermediaryS3Bucket Role: !GetAtt ApplicationRole.Arn FunctionGetBatcheFromS3: Type: AWS::Serverless::Function Properties: CodeUri: functions/get_batches_from_s3/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 60 MemorySize: 1024 Role: !GetAtt ApplicationRole.Arn FunctionSplitIfRequired: Type: AWS::Serverless::Function Properties: CodeUri: functions/split_if_required/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 60 Role: !GetAtt ApplicationRole.Arn LambdaGetTestResultsFromSfnStatus: Type: AWS::Serverless::Function Properties: CodeUri: functions/get_test_results_from_state_machine_output/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 60 Role: !GetAtt ApplicationRole.Arn # Lambda function that will get results of a test run to provide in the output LambdaGetTestRunReport: Type: AWS::Serverless::Function Properties: CodeUri: functions/get_test_run_report/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 300 MemorySize: 1024 Role: !GetAtt ApplicationRole.Arn # Lambda Function that will record a metric for the test execution LambdaRecordMetricDataFunction: Type: AWS::Serverless::Function Properties: CodeUri: functions/record_metric_data/ Handler: app.lambda_handler Runtime: python3.8 Timeout: 10 ReservedConcurrentExecutions: 300 Role: !GetAtt ApplicationRole.Arn # An S3 bucket to store data being processed IntermediaryS3Bucket: Type: AWS::S3::Bucket DeletionPolicy: Retain Properties: AccessControl: Private BucketEncryption: ServerSideEncryptionConfiguration: - ServerSideEncryptionByDefault: SSEAlgorithm: AES256 VersioningConfiguration: Status: Enabled LogGroupStateMachines: Type: AWS::Logs::LogGroup Properties: LogGroupName: !Join [ "", ["/aws/states/",!Ref ParameterInstancePrefix,"-StateMachineLogs"]] DashboardMain: Type: AWS::CloudWatch::Dashboard Properties: DashboardName: !Join [ "", [!Ref ParameterInstancePrefix,"-",!Ref "AWS::Region"]] DashboardBody: !Join [ "", [ '{"widgets":[{"type":"metric","x":0,"y":12,"width":9,"height":6,"properties":{"metrics":[["AWS\/States","ThrottledEvents","ServiceMetric","StateTransition"],[".","ConsumedCapacity",".","."],[".","ProvisionedRefillRate",".",".",{"yAxis":"right","stat":"Maximum"}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":60,"yAxis":{"right":{"label":"Refill Rate (Per Second)","min":0,"showUnits":false},"left":{"min":0,"label":"State Transitions (per Minute)","showUnits":false}},"title":"Step Functions - Standard - State Transition Quota Consumption","start":"-PT1H","end":"P0D"}},{"type":"metric","x":9,"y":12,"width":9,"height":6,"properties":{"metrics":[["AWS\/States","ConsumedCapacity","APIName","DescribeExecution"],[".","ThrottledEvents",".","."]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":60,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Transactions Per Minute","showUnits":false}},"title":"Step Functions - API Calls - Describe Execution","start":"-PT1H","end":"P0D"}},{"type":"metric","x":0,"y":18,"width":9,"height":6,"properties":{"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":60,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Transactions Per Minute","showUnits":false}},"title":"Step Functions - API Calls - StartExecution and StartExpressExecution","start":"-PT1H","end":"P0D","metrics":[["AWS\/States","ConsumedCapacity","APIName","StartExpressExecution"],[".","ThrottledEvents",".","."],[".","ConsumedCapacity",".","StartExecution"],[".","ThrottledEvents",".","."]]}},{"type":"metric","x":0,"y":6,"width":9,"height":6,"properties":{"metrics":[[{"expression":"SEARCH(''{', !Ref ParameterInstancePrefix ,',TestName} TestSuccess'', ''Average'', 60)","id":"e1","period":300}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":300,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Success Rate","showUnits":false,"max":1}},"title":"Test Success Rate","start":"-PT1H","end":"P0D"}},{"type":"metric","x":9,"y":0,"width":9,"height":6,"properties":{"metrics":[[{"expression":"SEARCH(''{', !Ref ParameterInstancePrefix ,',TestName} TestDuration'', ''Average'', 60)","id":"e2","period":300}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":300,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Average Test Duration (ms)","showUnits":false}},"title":"Average Test Duration","start":"-PT1H","end":"P0D"}},{"type":"metric","x":9,"y":6,"width":9,"height":6,"properties":{"metrics":[[{"expression":"SEARCH(''{', !Ref ParameterInstancePrefix ,',TestName} TestDuration'', ''p99'', 60)","id":"e2","period":300}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":300,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Average Test Duration (ms)","showUnits":false}},"title":"p99 Test Duration","start":"-PT1H","end":"P0D"}},{"type":"metric","x":0,"y":0,"width":9,"height":6,"properties":{"metrics":[[{"expression":"SEARCH(''{', !Ref ParameterInstancePrefix ,',TestName} TestSuccess'', ''SampleCount'', 60)","id":"e1","period":300,"region":"', !Ref "AWS::Region" ,'"}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":300,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Test Execution Counts","showUnits":false}},"title":"Test Execution Counts"}},{"type":"metric","x":9,"y":18,"width":9,"height":6,"properties":{"metrics":[[{"expression":"SEARCH(''{AWS\/States,StateMachineArn} ExecutionsFailed'', ''Sum'', 60)","id":"e1","period":300,"region":"', !Ref "AWS::Region" ,'"}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":300,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Executions Started per Minute","showUnits":false}},"title":"State Machine Executions Failed per Minute"}},{"type":"metric","x":0,"y":24,"width":9,"height":6,"properties":{"metrics":[[{"expression":"SEARCH(''{AWS\/States,StateMachineArn} ExecutionsStarted'', ''Sum'', 60)","id":"e1","period":300}]],"view":"timeSeries","stacked":false,"region":"', !Ref "AWS::Region" ,'","stat":"Sum","period":300,"yAxis":{"right":{"label":"","showUnits":false},"left":{"min":0,"label":"Executions Started per Minute","showUnits":false}},"title":"State Machine Executions Started per Minute","start":"-PT1H","end":"P0D"}},{"type":"log","x":0,"y":30,"width":9,"height":9,"properties":{"query":"SOURCE ''', !Select [6, !Split [":", !GetAtt LogGroupStateMachines.Arn]] ,''' | fields @timestamp, type, details.resourceType,details.resource,details.error,details.cause\n| filter type LIKE \/Fail\/\n| sort @timestamp desc\n| limit 1000","region":"', !Ref "AWS::Region" ,'","title":"Recent Task Failures","view":"table"}}]}']] Outputs: StateMachineMain: Description: "statemachine to run" Value: !Ref StateMachineMain StateMachineLogGroup: Description: "Log group for statemachine logs" Value: !GetAtt LogGroupStateMachines.Arn MetricNamesapce: Description: "The metric namespace for this app" Value: !Ref ParameterInstancePrefix