AWSTemplateFormatVersion: '2010-09-09' Transform: 'AWS::Serverless-2016-10-31' Description: A serverless datalake workshop. Resources: IngestionBucket: Type: AWS::S3::Bucket ApacheLogs: Type: AWS::Logs::LogGroup Properties: LogGroupName: !Sub /${AWS::StackName}/apache RetentionInDays: 1 ApacheLogsKinesis: Type: AWS::KinesisFirehose::DeliveryStream DependsOn: GenerateSampleDataFunction Properties: DeliveryStreamType: DirectPut ExtendedS3DestinationConfiguration: RoleARN: !GetAtt ApacheLogsServiceRole.Arn BucketARN: !GetAtt IngestionBucket.Arn BufferingHints: IntervalInSeconds: 60 SizeInMBs: 3 CloudWatchLoggingOptions: Enabled: False CompressionFormat: UNCOMPRESSED Prefix: weblogs/live/ ProcessingConfiguration: Enabled: true Processors: - Type: Lambda Parameters: - ParameterName: LambdaArn ParameterValue: !Sub ${TransformKinesis.Arn} - ParameterName: BufferSizeInMBs ParameterValue: 3 - ParameterName: BufferIntervalInSeconds ParameterValue: 60 CloudWatchLogsToKinesis: Type: AWS::Logs::SubscriptionFilter Properties: DestinationArn: !Sub ${ApacheLogsKinesis.Arn} FilterPattern: "" LogGroupName: !Sub ${ApacheLogs} RoleArn: !Sub ${LogsToKinesisServiceRole.Arn} LogsToKinesisServiceRole: Type: AWS::IAM::Role Properties: RoleName: !Sub ${AWS::StackName}_logs_kinesis_role AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: !Sub logs.${AWS::Region}.amazonaws.com Action: sts:AssumeRole LogsToKinesisRolePolicy: Type: AWS::IAM::ManagedPolicy Properties: ManagedPolicyName: !Sub ${AWS::StackName}_logs_kineis_policy PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 'firehose:*' Resource: - !Sub '${ApacheLogsKinesis.Arn}' - Effect: Allow Action: - 'iam:PassRole' Resource: - !Sub '${LogsToKinesisServiceRole.Arn}' Roles: - !Ref 'LogsToKinesisServiceRole' ApacheLogsServiceRole: Type: AWS::IAM::Role Properties: RoleName: !Sub ${AWS::StackName}_weblog_delivery_role AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: firehose.amazonaws.com Action: sts:AssumeRole ApacheLogsRolePolicy: Type: AWS::IAM::ManagedPolicy Properties: ManagedPolicyName: !Sub ${AWS::StackName}_weblog_delivery_policy PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 's3:*' Resource: - !Sub '${IngestionBucket.Arn}/*' - !Sub '${IngestionBucket.Arn}' - Effect: Allow Action: - 'lambda:InvokeFunction' - 'lambda:InvokeAsync' Resource: - !Sub '${TransformKinesis.Arn}' Roles: - !Ref 'ApacheLogsServiceRole' TransformKinesis: Type: 'AWS::Serverless::Function' Properties: Handler: transformKinesis.handler Runtime: python2.7 Description: '' MemorySize: 512 Timeout: 60 CodeUri: ./src RedshiftServiceRole: Type: AWS::IAM::Role Properties: RoleName: !Sub ${AWS::StackName}_redshift_role AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: redshift.amazonaws.com Action: sts:AssumeRole RedshiftRolePolicy: Type: AWS::IAM::ManagedPolicy Properties: ManagedPolicyName: !Sub ${AWS::StackName}_redshift_policy PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 's3:*' Resource: - !Sub '${IngestionBucket.Arn}/*' - !Sub '${IngestionBucket.Arn}' - Effect: Allow Action: - 'glue:CreateDatabase' - 'glue:DeleteDatabase' - 'glue:GetDatabase' - 'glue:GetDatabases' - 'glue:UpdateDatabase' - 'glue:CreateTable' - 'glue:DeleteTable' - 'glue:BatchDeleteTable' - 'glue:UpdateTable' - 'glue:GetTable' - 'glue:GetTables' - 'glue:BatchCreatePartition' - 'glue:CreatePartition' - 'glue:DeletePartition' - 'glue:BatchDeletePartition' - 'glue:UpdatePartition' - 'glue:GetPartition' - 'glue:GetPartition' - 'glue:BatchGetPartition' Resource: '*' Roles: - !Ref 'RedshiftServiceRole' GenerateSampleDataFunction: Type: 'AWS::Serverless::Function' Properties: Handler: writelogs.lambda_handler Runtime: python2.7 Description: '' MemorySize: 512 Timeout: 60 CodeUri: ./src Events: Schedule: Type: Schedule Properties: Schedule: rate(1 minute) Environment: Variables: LOG_GROUP_NAME: !Sub /${AWS::StackName}/apache LoadSampleDataFunction: Type: 'AWS::Serverless::Function' Properties: Handler: load-data-files.lambda_handler Runtime: python2.7 Description: '' MemorySize: 512 Timeout: 240 Policies: Version: '2012-10-17' Statement: - Effect: Allow Action: - 's3:*' Resource: - '*' CodeUri: ./src Environment: Variables: BUCKET_NAME: !Ref IngestionBucket SOURCE_BUCKET_NAME: arc326-instructions LoadSampleData: Type: Custom::LoadSampleData DependsOn: - IngestionBucket Properties: ServiceToken: !GetAtt LoadSampleDataFunction.Arn StackName: !Ref AWS::StackName Outputs: RedshiftRole: Description: The role that redshift uses to access data Value: !GetAtt RedshiftServiceRole.Arn WorkshopInstructionsUrl: Description: Follow the link for the instructions for the serverless datalake workshop. Value: !Sub https://s3.${AWS::Region}.amazonaws.com/${IngestionBucket}/instructions/instructions.html