AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: > SAM Template for Glue Step Functions. See the AWS::Glue::Job documentation for details (https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-glue-job.html). Parameters: SourceBucket: Type: String Default: "DATA_BUCKET" ArtifactsBucket: Type: String Default: "ARTIFACTS_BUCKET" ETLParametersPath: Type: String Default: "/etl/hello/parameters" Resources: FeatureEngineering: Type: AWS::Glue::Job Properties: Command: Name: pythonshell PythonVersion: "3" ScriptLocation: !Sub "s3://${ArtifactsBucket}/glue/scripts/feature_engineering.py" ExecutionProperty: MaxConcurrentRuns: 1 DefaultArguments: "--extra-py-files": !Sub "s3://${ArtifactsBucket}/glue/libs/awswrangler-2.12.0-py3-none-any.whl" "--config_path": !Sub "${ETLParametersPath}" GlueVersion: "1.0" MaxCapacity: 1 MaxRetries: 0 Name: "Feature Engineering" Role: !GetAtt GlueExecutionRole.Arn ETLStepFunctions: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: ../step_functions/statemachine.asl.json DefinitionSubstitutions: FeatureEngineering: !Sub "${FeatureEngineering}" Events: TriggerPersonalizeTrain: Type: Schedule Properties: Schedule: rate(2 days) Enabled: False Role: !GetAtt ETLStatesExecutionRole.Arn ETLParameters: Type: AWS::SSM::Parameter Properties: Name: !Sub "${ETLParametersPath}" Tier: "Standard" Description: 'Configuration values for ETL' Type: String Value: !Sub '{"bucket": "${SourceBucket}", "last_update": "2021-10-20-18-34-26", "prefix": "data/input"}' ETLStatesExecutionRole: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - !Sub states.${AWS::Region}.amazonaws.com Action: "sts:AssumeRole" Path: "/" Policies: - PolicyName: StatesExecutionPolicy PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - "glue:StartJobRun" - "glue:GetJobRun" - "glue:BatchStopJobRun" - "glue:GetJobRuns" Resource: - !Sub "arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/${FeatureEngineering}" GlueExecutionRole: Type: "AWS::IAM::Role" Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "glue.amazonaws.com" Action: - "sts:AssumeRole" Path: "/" Policies: - PolicyName: ReadWriteS3Policy PolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Action: - "s3:GetObject" - "s3:PutObject" - "s3:PutObjectAcl" - "s3:GetObjectVersion" - "s3:DeleteObject" - "s3:DeleteObjects" Resource: - !Sub "arn:${AWS::Partition}:s3:::${SourceBucket}/*" - !Sub "arn:${AWS::Partition}:s3:::${ArtifactsBucket}/*" - Effect: "Allow" Action: - "s3:GetBucketLocation" - "s3:ListBucket" - "s3:GetLifecycleConfiguration" - "s3:PutLifecycleConfiguration" Resource: - !Sub "arn:${AWS::Partition}:s3:::${SourceBucket}" - !Sub "arn:${AWS::Partition}:s3:::${ArtifactsBucket}" - PolicyName: "AccessLogging" PolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Action: - "logs:CreateLogStream" - "logs:CreateLogGroup" - "logs:PutLogEvents" Resource: - !Sub "arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws-glue/*" - PolicyName: "AccessSSM" PolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Action: - "ssm:GetParameter" - "ssm:PutParameter" Resource: !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter${ETLParametersPath}" Outputs: ETLStepFunctionsARN: Value: !GetAtt ETLStepFunctions.Arn GlueJobName: Value: !Ref FeatureEngineering