AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: "Contains preStateMachine" Parameters: pDatasetBucket: Description: "The dataset bucket" Type: "AWS::SSM::Parameter::Value" Default: "/DataLake/S3/CentralBucket" pStageBucket: Description: "The stage bucket" Type: "AWS::SSM::Parameter::Value" Default: "/DataLake/S3/StageBucket" pApp: Description: "Name of the application (all lowercase, no symbols or spaces)" Type: "AWS::SSM::Parameter::Value" Default: "/DataLake/Misc/pApp" pOrg: Description: "Name of the organization (all lowercase, no symbols or spaces)" Type: "AWS::SSM::Parameter::Value" Default: "/DataLake/Misc/pOrg" pTeamName: Description: "Name of the team owning the pipeline (all lowercase, no symbols or spaces)" Type: String AllowedPattern: "[a-z0-9]*" pPipeline: Description: "The name of the pipeline (all lowercase, no symbols or spaces)" Type: String AllowedPattern: "[a-z0-9]*" pEnv: Description: "The name of the environment to deploy the pipeline to" Type: String Default: "dev" AllowedValues: [dev, test, prod] pStateMachineExecutionRole: Description: "The Role Arn that the state machines will use for execution" Type: "AWS::SSM::Parameter::Value" Default: "/DataLake/IAM/StateMachineRoleArn" pEcrImageTag: Description: "ECR Image Tag" Type: String Default: "latest" pPrefix: Description: Common prefix for resource naming Type: String Default: dev AllowedValues: ["dev", "stg", "prod"] Resources: ######## SQS ######### rQueueRoutingPreStep: Type: "AWS::SQS::Queue" Properties: QueueName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "queue-a.fifo", ], ] FifoQueue: True RedrivePolicy: deadLetterTargetArn: !GetAtt rDeadLetterQueueRoutingPreStep.Arn maxReceiveCount: 1 VisibilityTimeout: 60 KmsMasterKeyId: alias/aws/sqs rDeadLetterQueueRoutingPreStep: Type: "AWS::SQS::Queue" Properties: QueueName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "dlq-a.fifo", ], ] FifoQueue: True MessageRetentionPeriod: 1209600 VisibilityTimeout: 60 KmsMasterKeyId: alias/aws/sqs rQueuePreStepLambdaEventSourceMapping: Type: AWS::Lambda::EventSourceMapping Properties: BatchSize: 10 Enabled: True EventSourceArn: !GetAtt rQueueRoutingPreStep.Arn FunctionName: !GetAtt rLambdaPreStepRouting.Arn ######## IAM ######### # Routing Queues Role rLambdaCloudwatchPolicy: Type: AWS::IAM::ManagedPolicy Description: Provisions an IAM Policy which grants permissions to cloudwatch logs Properties: Description: "This Policy gives permission .Provisioned as part of Request AWS-R0000" ManagedPolicyName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdacloudwatchlogs-policy" PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - logs:CreateLogGroup Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*" - Effect: Allow Action: - "logs:CreateLogStream" - "logs:PutLogEvents" Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/sdlf-${pTeamName}*:*" rLambdaExecutionPreStepQueueRoutingPolicy: Type: AWS::IAM::ManagedPolicy Description: Provisions an IAM Policy which grants permissions to Properties: Description: "This Policy gives permission .Provisioned as part of Request AWS-R0000" ManagedPolicyName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaexecutionprestepqueuerouting-policy" Path: "/state-machine/" PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - ssm:GetParameter* Resource: !Sub "arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/DataLake/*" - Effect: Allow Action: - "dynamodb:BatchGet*" - "dynamodb:DescribeTable" - "dynamodb:Get*" - "dynamodb:Query" - "dynamodb:Scan" - "dynamodb:BatchWrite*" - "dynamodb:DeleteItem" - "dynamodb:Update*" - "dynamodb:Put*" Resource: - !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/octagon-*" - Effect: Allow Action: - states:StartExecution Resource: - !Ref rStateMachinePre - Effect: Allow Action: - "sqs:List*" - "sqs:ReceiveMessage" - "sqs:SendMessage*" - "sqs:DeleteMessage*" - "sqs:GetQueue*" Resource: - !GetAtt rQueueRoutingPreStep.Arn - !GetAtt rDeadLetterQueueRoutingPreStep.Arn rRoleLambdaExecutionPreStepQueueRouting: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: "sts:AssumeRole" Path: "/state-machine/" RoleName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaprestepqueuerouting-svc-role" ManagedPolicyArns: - !Ref rLambdaExecutionPreStepQueueRoutingPolicy - !Ref rLambdaCloudwatchPolicy rLambdaExecutionPreStep1Policy: Type: AWS::IAM::ManagedPolicy Description: Provisions an IAM Policy which grants permissions to Properties: Description: "This Policy gives permission .Provisioned as part of Request AWS-R0000" ManagedPolicyName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaexecutionprestep1-policy" Path: "/state-machine/" PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - ssm:GetParameter* Resource: !Sub "arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/DataLake/*" - Effect: Allow Action: - "dynamodb:BatchGet*" - "dynamodb:DescribeTable" - "dynamodb:Get*" - "dynamodb:Query" - "dynamodb:Scan" - "dynamodb:BatchWrite*" - "dynamodb:DeleteItem" - "dynamodb:Update*" - "dynamodb:Put*" Resource: - !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/octagon-*" # PreStep1 Role rRoleLambdaExecutionPreStep1: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: "sts:AssumeRole" Path: "/state-machine/" RoleName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaprestep1-svc-role" ManagedPolicyArns: - !Ref rLambdaExecutionPreStep1Policy - !Ref rLambdaCloudwatchPolicy rLambdaExecutionPreStep2Policy: Type: AWS::IAM::ManagedPolicy Description: Provisions an IAM Policy which grants permissions to Properties: Description: "This Policy gives permission .Provisioned as part of Request AWS-R0000" ManagedPolicyName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaexecutionprestep2-policy" Path: "/state-machine/" PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - ssm:GetParameter* Resource: !Sub "arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/DataLake/*" - Effect: Allow Action: - s3:GetObject - s3:GetObjectVersion - s3:GetBucketVersioning - s3:ListBucket Resource: - !Sub "arn:aws:s3:::${pDatasetBucket}" - !Sub "arn:aws:s3:::${pStageBucket}" - Effect: Allow Action: - s3:Get* - s3:GetBucketVersioning - s3:List* - s3:PutObject* Resource: - !Sub "arn:aws:s3:::${pDatasetBucket}/*" - !Sub "arn:aws:s3:::${pStageBucket}/*" - Effect: Allow Action: - "dynamodb:BatchGet*" - "dynamodb:DescribeTable" - "dynamodb:Get*" - "dynamodb:Query" - "dynamodb:Scan" - "dynamodb:BatchWrite*" - "dynamodb:DeleteItem" - "dynamodb:Update*" - "dynamodb:Put*" Resource: - !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/octagon-*" # PreStep2 Role rRoleLambdaExecutionPreStep2: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: "sts:AssumeRole" Path: "/state-machine/" RoleName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaprestep2-svc-role" ManagedPolicyArns: - !Ref rLambdaExecutionPreStep2Policy - !Ref rLambdaCloudwatchPolicy rLambdaExecutionPreStep3Policy: Type: AWS::IAM::ManagedPolicy Description: Provisions an IAM Policy which grants permissions to Properties: Description: "This Policy gives permission .Provisioned as part of Request AWS-R0000" ManagedPolicyName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaexecutionprestep3-policy" Path: "/state-machine/" PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - ssm:GetParameter* Resource: !Sub "arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/DataLake/*" - Effect: Allow Action: - "dynamodb:BatchGet*" - "dynamodb:DescribeTable" - "dynamodb:Get*" - "dynamodb:Query" - "dynamodb:Scan" - "dynamodb:BatchWrite*" - "dynamodb:DeleteItem" - "dynamodb:Update*" - "dynamodb:Put*" Resource: - !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/octagon-*" - Effect: Allow Action: - "sqs:List*" - "sqs:GetQueue*" - "sqs:SendMessage*" Resource: - !Sub "arn:aws:sqs:${AWS::Region}:${AWS::AccountId}:sdlf-${pTeamName}*" # PreStep3 Role rRoleLambdaExecutionPreStep3: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: "sts:AssumeRole" Path: "/state-machine/" RoleName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaprestep3-svc-role" ManagedPolicyArns: - !Ref rLambdaExecutionPreStep3Policy - !Ref rLambdaCloudwatchPolicy rLambdaExecutionPreStepErrorPolicy: Type: AWS::IAM::ManagedPolicy Description: Provisions an IAM Policy which grants permissions to Properties: Description: "This Policy gives permission .Provisioned as part of Request AWS-R0000" ManagedPolicyName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdaexecutionpresteperror-policy" Path: "/state-machine/" PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - ssm:GetParameter* Resource: !Sub "arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/DataLake/*" - Effect: Allow Action: - "sqs:List*" - "sqs:ReceiveMessage" - "sqs:SendMessage*" - "sqs:DeleteMessage*" - "sqs:GetQueue*" Resource: - !GetAtt rDeadLetterQueueRoutingPreStep.Arn # Error Handling Lambda Role rRoleLambdaExecutionPreStepError: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: "sts:AssumeRole" Path: "/state-machine/" RoleName: !Sub "sdlf-${pTeamName}-${pPipeline}-lambdapresteperror-svc-role" ManagedPolicyArns: - !Ref rLambdaExecutionPreStepErrorPolicy - !Ref rLambdaCloudwatchPolicy ######## LAMBDA FUNCTIONS ######### rLambdaPreStepRouting: Type: "AWS::Serverless::Function" Properties: PackageType: Image ImageUri: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${pPrefix}-stage-a-routing:${pEcrImageTag}" FunctionName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "routing-a", ], ] Description: "Routes S3 PutObject Logs to the relevant Stage A State Machine" MemorySize: 256 Timeout: 60 Role: !GetAtt rRoleLambdaExecutionPreStepQueueRouting.Arn rLambdaPreStepRedrive: Type: "AWS::Serverless::Function" DependsOn: rLambdaExecutionPreStepQueueRoutingPolicy Properties: PackageType: Image ImageUri: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${pPrefix}-stage-a-redrive:${pEcrImageTag}" FunctionName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "redrive-a", ], ] Environment: Variables: TEAM: !Ref pTeamName PIPELINE: !Ref pPipeline Description: "Redrives Failed S3 PutObject Logs to the routing queue" MemorySize: 256 Timeout: 300 Role: !GetAtt rRoleLambdaExecutionPreStepQueueRouting.Arn rLambdaPreStep1: Type: "AWS::Serverless::Function" DependsOn: rLambdaExecutionPreStep1Policy Properties: PackageType: Image ImageUri: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${pPrefix}-stage-a-preupdate-metadata:${pEcrImageTag}" FunctionName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "preupdate-a", ], ] Description: "Pre Updates the metadata in the DynamoDB Catalog table" MemorySize: 128 Timeout: 300 Role: !GetAtt rRoleLambdaExecutionPreStep1.Arn rLambdaPreStep2: Type: "AWS::Serverless::Function" DependsOn: rLambdaExecutionPreStep2Policy Properties: PackageType: Image ImageUri: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${pPrefix}-stage-a-process-object:${pEcrImageTag}" FunctionName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "process-a", ], ] Description: "Processing pipeline" MemorySize: 1536 Timeout: 600 Role: !GetAtt rRoleLambdaExecutionPreStep2.Arn rLambdaPreStep3: Type: "AWS::Serverless::Function" DependsOn: rLambdaExecutionPreStep3Policy Properties: PackageType: Image ImageUri: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${pPrefix}-stage-a-postupdate-metadata:${pEcrImageTag}" FunctionName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "postupdate-a", ], ] Description: "Post Updates the metadata in the DynamoDB Catalog table" MemorySize: 256 Timeout: 300 Role: !GetAtt rRoleLambdaExecutionPreStep3.Arn rLambdaPreStepError: Type: "AWS::Serverless::Function" DependsOn: rLambdaExecutionPreStepErrorPolicy Properties: PackageType: Image ImageUri: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${pPrefix}-stage-a-error:${pEcrImageTag}" FunctionName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "error-a", ], ] Description: "Fallback lambda to handle messages which failed processing" MemorySize: 256 Timeout: 300 Role: !GetAtt rRoleLambdaExecutionPreStepError.Arn ######## STATE MACHINE ######### rStateMachinePre: Type: "AWS::StepFunctions::StateMachine" Properties: StateMachineName: !Join [ "-", [ "sdlf", !Ref pTeamName, !Ref pPipeline, !Ref pOrg, !Ref pApp, !Ref pEnv, "sm-a", ], ] DefinitionString: !Sub - |- { "Comment": "Simple pseudo flow", "StartAt": "Try", "States": { "Try": { "Type": "Parallel", "Branches": [ { "StartAt": "Pre-update Comprehensive Catalogue", "States":{ "Pre-update Comprehensive Catalogue": { "Type": "Task", "Resource": "${lStep1}", "Comment": "Pre-update Comprehensive Catalogue", "Next": "Execute Light Transformation" }, "Execute Light Transformation": { "Type": "Task", "Resource": "${lStep2}", "Comment": "Execute Light Transformation", "ResultPath": "$.body.processedKeys", "Next": "Post-update comprehensive Catalogue" }, "Post-update comprehensive Catalogue": { "Type": "Task", "Resource": "${lStep3}", "Comment": "Post-update comprehensive Catalogue", "ResultPath": "$.statusCode", "End": true } } } ], "Catch": [ { "ErrorEquals": [ "States.ALL" ], "ResultPath": null, "Next": "Error" } ], "Next": "Done" }, "Done": { "Type": "Succeed" }, "Error": { "Type": "Task", "Resource": "${lError}", "Comment": "Send Original Payload to DLQ", "Next": "Failed" }, "Failed": { "Type": "Fail" } } } - { lStep1: !GetAtt rLambdaPreStep1.Arn, lStep2: !GetAtt rLambdaPreStep2.Arn, lStep3: !GetAtt rLambdaPreStep3.Arn, lError: !GetAtt rLambdaPreStepError.Arn, } RoleArn: !Ref pStateMachineExecutionRole ######## SSM OUTPUTS ######### rQueueRoutingPreStepSsm: Type: "AWS::SSM::Parameter" Properties: Name: !Sub "/DataLake/SQS/${pTeamName}/${pPipeline}PreStageQueue" Type: "String" Value: !Select ["5", !Split [":", !GetAtt rQueueRoutingPreStep.Arn]] Description: !Sub "Name of the Pre Stage ${pTeamName} ${pPipeline} Queue" rDeadLetterQueueRoutingPreStepSsm: Type: "AWS::SSM::Parameter" Properties: Name: !Sub "/DataLake/SQS/${pTeamName}/${pPipeline}PreStageDLQ" Type: "String" Value: !Select ["5", !Split [":", !GetAtt rDeadLetterQueueRoutingPreStep.Arn]] Description: !Sub "Name of the Pre Stage ${pTeamName} ${pPipeline} DLQ" rStateMachinePreSsm: Type: "AWS::SSM::Parameter" Properties: Name: !Sub "/DataLake/SM/${pTeamName}/${pPipeline}PreStageSM" Type: "String" Value: !Ref rStateMachinePre Description: !Sub "ARN of the Pre Stage ${pTeamName} ${pPipeline} State Machine"