# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 AWSTemplateFormatVersion: 2010-09-09 Description: EMR Serverless full deployment stack Conditions: InUsEast1: Fn::Equals: - { Ref: "AWS::Region" } - us-east-1 Resources: # S3 Bucket for logs EMRServerlessBucket: Type: AWS::S3::Bucket Properties: PublicAccessBlockConfiguration: BlockPublicAcls: True BlockPublicPolicy: True IgnorePublicAcls: True RestrictPublicBuckets: True VersioningConfiguration: Status: Enabled # IAM resources EMRServerlessJobRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - emr-serverless.amazonaws.com Action: - "sts:AssumeRole" Description: "Service role for EMR Studio" Policies: - PolicyName: GlueAccess PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - "glue:GetDatabase" - "glue:GetDataBases" - "glue:CreateTable" - "glue:GetTable" - "glue:GetTables" - "glue:GetPartition" - "glue:GetPartitions" - "glue:CreatePartition" - "glue:BatchCreatePartition" - "glue:GetUserDefinedFunctions" Resource: "*" - PolicyName: S3Access PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - "s3:GetObject" - "s3:ListBucket" Resource: "*" - Effect: Allow Action: - "s3:PutObject" - "s3:DeleteObject" Resource: - !Sub "arn:aws:s3:::${EMRServerlessBucket}/*" SparkApplication: Type: AWS::EMRServerless::Application Properties: Name: spark-3.2 ReleaseLabel: emr-6.6.0 Type: Spark MaximumCapacity: Cpu: 200 vCPU Memory: 100 GB AutoStartConfiguration: Enabled: true AutoStopConfiguration: Enabled: true IdleTimeoutMinutes: 100 InitialCapacity: - Key: Driver Value: WorkerCount: 3 WorkerConfiguration: Cpu: 2 vCPU Memory: 4 GB Disk: 21 GB - Key: Executor Value: WorkerCount: 4 WorkerConfiguration: Cpu: 1 vCPU Memory: 4 GB Disk: 20 GB MWAAEnvironment: Type: AWS::MWAA::Environment Properties: Name: !Join - "-" - - emr-serverless-demo - !Select [0, !Split [-, !Select [2, !Split [/, !Ref AWS::StackId]]]] EnvironmentClass: mw1.small AirflowVersion: 2.2.2 NetworkConfiguration: SubnetIds: - !Ref PrivateSubnet1 - !Ref PrivateSubnet2 SecurityGroupIds: - !Ref MWAASecurityGroupIngress DagS3Path: airflow/dags SourceBucketArn: !Sub "arn:aws:s3:::${EMRServerlessBucket}" ExecutionRoleArn: !GetAtt MWAAExecutionRole.Arn WebserverAccessMode: PUBLIC_ONLY LoggingConfiguration: WebserverLogs: Enabled: True LogLevel: INFO SchedulerLogs: Enabled: True LogLevel: INFO MWAASecurityGroupIngress: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: "Inbound access to MWAA" VpcId: !Ref VPC MWAASecurityGroupSelfAllow: Type: AWS::EC2::SecurityGroupIngress Properties: GroupId: !Ref MWAASecurityGroupIngress IpProtocol: "-1" SourceSecurityGroupId: !Ref MWAASecurityGroupIngress MWAAExecutionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - airflow.amazonaws.com - airflow-env.amazonaws.com Action: - "sts:AssumeRole" Description: "Service role for MWAA" Policies: - PolicyName: MWAA-Execution-Policy PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - "s3:GetObject*" - "s3:GetBucket*" - "s3:List*" Resource: - !Sub "arn:aws:s3:::${EMRServerlessBucket}" - !Sub "arn:aws:s3:::${EMRServerlessBucket}/*" - Effect: Allow Action: - "airflow:PublishMetrics" Resource: !Sub "arn:aws:airflow:${AWS::Region}:${AWS::AccountId}:environment/emr-serverless-demo" - Effect: Allow Action: - "logs:CreateLogStream" - "logs:CreateLogGroup" - "logs:PutLogEvents" - "logs:GetLogEvents" - "logs:GetLogRecord" - "logs:GetLogGroupFields" - "logs:GetQueryResults" Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:airflow-emr-serverless-demo-*" - Effect: Allow Action: - "logs:DescribeLogGroups" - "cloudwatch:PutMetricData" Resource: "*" - Effect: Allow Action: - "sqs:ChangeMessageVisibility" - "sqs:DeleteMessage" - "sqs:GetQueueAttributes" - "sqs:GetQueueUrl" - "sqs:ReceiveMessage" - "sqs:SendMessage" Resource: !Sub "arn:aws:sqs:${AWS::Region}:*:airflow-celery-*" - Effect: Allow Action: - "kms:Decrypt" - "kms:DescribeKey" - "kms:GenerateDataKey*" - "kms:Encrypt" NotResource: !Sub "arn:aws:kms:*:${AWS::AccountId}:key/*" Condition: StringLike: "kms:ViaService": !Sub "sqs.${AWS::Region}.amazonaws.com" - PolicyName: AirflowEMRServerlessExecutionPolicy PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - "emr-serverless:CreateApplication" - "emr-serverless:GetApplication" - "emr-serverless:StartApplication" - "emr-serverless:StopApplication" - "emr-serverless:DeleteApplication" - "emr-serverless:StartJobRun" - "emr-serverless:GetJobRun" Resource: "*" - Effect: Allow Action: - "iam:PassRole" Resource: - !GetAtt EMRServerlessJobRole.Arn Condition: StringLike: "iam:PassedToService": "emr-serverless.amazonaws.com" # Network resources VPC: Properties: # Default CIDR block for public subnet CidrBlock: 172.31.0.0/16 EnableDnsHostnames: "true" Tags: - Key: for-use-with-amazon-emr-managed-policies Value: true Type: AWS::EC2::VPC VPCDHCPAssociation: Properties: DhcpOptionsId: { Ref: VPCDHCPOptions } VpcId: { Ref: VPC } Type: AWS::EC2::VPCDHCPOptionsAssociation VPCDHCPOptions: Properties: DomainName: Fn::If: - InUsEast1 - ec2.internal - Fn::Sub: "${AWS::Region}.compute.internal" DomainNameServers: [AmazonProvidedDNS] Type: AWS::EC2::DHCPOptions # CIDR block for private subnets VpcCidrBlock1: Type: AWS::EC2::VPCCidrBlock Properties: VpcId: { Ref: VPC } CidrBlock: 172.16.0.0/16 GatewayAttachment: Properties: InternetGatewayId: { Ref: InternetGateway } VpcId: { Ref: VPC } Type: AWS::EC2::VPCGatewayAttachment InternetGateway: { Type: "AWS::EC2::InternetGateway" } PublicRouteTableIGWRoute: Properties: DestinationCidrBlock: 0.0.0.0/0 GatewayId: { Ref: InternetGateway } RouteTableId: { Ref: PublicRouteTable } Type: AWS::EC2::Route PublicRouteTable: Properties: Tags: - Key: Name Value: Public Route Table VpcId: { Ref: VPC } Type: AWS::EC2::RouteTable PublicSubnetRouteTableAssociation: Properties: RouteTableId: { Ref: PublicRouteTable } SubnetId: { Ref: PublicSubnet1 } Type: AWS::EC2::SubnetRouteTableAssociation PublicSubnet1: DependsOn: VpcCidrBlock1 Properties: Tags: - Key: Name Value: PublicSubnet1 - Key: for-use-with-amazon-emr-managed-policies Value: true VpcId: { Ref: VPC } MapPublicIpOnLaunch: "true" AvailabilityZone: Fn::Select: - 0 - Fn::GetAZs: { Ref: "AWS::Region" } CidrBlock: 172.16.0.0/20 Type: AWS::EC2::Subnet NATGateway: Type: AWS::EC2::NatGateway Properties: AllocationId: !GetAtt ElasticIPAddress.AllocationId SubnetId: !Ref PublicSubnet1 Tags: - Key: Name Value: NAT ElasticIPAddress: Type: AWS::EC2::EIP Properties: Domain: VPC # private subnets PrivateRouteTable: Type: AWS::EC2::RouteTable Properties: Tags: - Key: Name Value: Private Route Table VpcId: { Ref: VPC } PrivateRoute: Type: AWS::EC2::Route Properties: RouteTableId: { Ref: PrivateRouteTable } DestinationCidrBlock: 0.0.0.0/0 NatGatewayId: { Ref: NATGateway } PrivateSubnet1: DependsOn: VpcCidrBlock1 Type: AWS::EC2::Subnet Properties: Tags: - Key: Name Value: PrivateSubnet1 - Key: for-use-with-amazon-emr-managed-policies Value: true VpcId: { Ref: VPC } MapPublicIpOnLaunch: "false" AvailabilityZone: Fn::Select: - 0 - Fn::GetAZs: { Ref: "AWS::Region" } CidrBlock: 172.31.0.0/20 PrivateSubnetRouteTableAssociation1: Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: { Ref: PrivateRouteTable } SubnetId: { Ref: PrivateSubnet1 } PrivateSubnet2: DependsOn: VpcCidrBlock1 Type: AWS::EC2::Subnet Properties: Tags: - Key: Name Value: PrivateSubnet2 - Key: for-use-with-amazon-emr-managed-policies Value: true VpcId: { Ref: VPC } MapPublicIpOnLaunch: "false" AvailabilityZone: Fn::Select: - 1 - Fn::GetAZs: { Ref: "AWS::Region" } CidrBlock: 172.31.16.0/20 PrivateSubnetRouteTableAssociation2: Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: { Ref: PrivateRouteTable } SubnetId: { Ref: PrivateSubnet2 } Outputs: ApplicationId: Value: !Ref SparkApplication ApplicationArn: Value: !GetAtt SparkApplication.Arn JobRoleArn: Value: !GetAtt EMRServerlessJobRole.Arn S3Bucket: Value: !Ref EMRServerlessBucket