--- AWSTemplateFormatVersion: '2010-09-09' Description: > Kerberos authentication mode CloudFormation template, we create a Kerberized EMR cluster and configures it with a bootstrap action to create a Linux user and install Python libraries (Pandas, requests, and Matplotlib). We create SageMaker Studio domain & SageMaker user profile. Using SageMaker Studio Notebooks connect to EMR cluster using Kerberos Parameters: CrossRealmTrustPrincipalPassword: NoEcho: true Description: Kerberos CrossRealm Trust Principal Password. Example, CfnIntegrationTest-1 Type: String KdcAdminPassword: NoEcho: true Description: Kerberos Kdc Admin Password. Example, CfnIntegrationTest-1 Type: String Mappings: Studio: s3params: S3Bucket: aws-ml-blog S3Key: artifacts/sma-milestone1/ ClusterConfigurations: emr: masterInstanceType: m5.xlarge coreInstanceType: m5.xlarge masterInstanceCount: 1 coreInstanceCount: 2 emrReleaseVersion: emr-5.30.1 BootStrapScriptFile: createlinuxusers.sh StepScriptFile: configurekdc.sh sagemaker: sageMakerConfigName: SageEMRConfig sageMakerInstanceName: SageEMR sageMakerInstanceType: ml.t2.medium VpcConfigurations: cidr: Vpc: 10.0.0.0/16 PublicSubnet1: 10.0.10.0/24 PrivateSubnet1: 10.0.20.0/24 Resources: VPC: Type: 'AWS::EC2::VPC' Properties: CidrBlock: !FindInMap - VpcConfigurations - cidr - Vpc EnableDnsSupport: true EnableDnsHostnames: true Tags: - Key: Name Value: !Sub '${AWS::StackName}-VPC' InternetGateway: Type: 'AWS::EC2::InternetGateway' Properties: Tags: - Key: Name Value: !Sub '${AWS::StackName}-IGW' InternetGatewayAttachment: Type: 'AWS::EC2::VPCGatewayAttachment' Properties: InternetGatewayId: !Ref InternetGateway VpcId: !Ref VPC PublicSubnet1: Type: 'AWS::EC2::Subnet' Properties: VpcId: !Ref VPC AvailabilityZone: !Select - 0 - !GetAZs '' CidrBlock: !FindInMap - VpcConfigurations - cidr - PublicSubnet1 MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub '${AWS::StackName} Public Subnet (AZ1)' PrivateSubnet1: Type: 'AWS::EC2::Subnet' Properties: VpcId: !Ref VPC AvailabilityZone: !Select - 0 - !GetAZs '' CidrBlock: !FindInMap - VpcConfigurations - cidr - PrivateSubnet1 MapPublicIpOnLaunch: false Tags: - Key: Name Value: !Sub '${AWS::StackName} Private Subnet (AZ1)' NatGateway1EIP: Type: 'AWS::EC2::EIP' DependsOn: InternetGatewayAttachment Properties: Domain: vpc NatGateway1: Type: 'AWS::EC2::NatGateway' Properties: AllocationId: !GetAtt - NatGateway1EIP - AllocationId SubnetId: !Ref PublicSubnet1 PublicRouteTable: Type: 'AWS::EC2::RouteTable' Properties: VpcId: !Ref VPC Tags: - Key: Name Value: !Sub '${AWS::StackName} Public Routes' DefaultPublicRoute: Type: 'AWS::EC2::Route' DependsOn: InternetGatewayAttachment Properties: RouteTableId: !Ref PublicRouteTable DestinationCidrBlock: 0.0.0.0/0 GatewayId: !Ref InternetGateway PublicSubnet1RouteTableAssociation: Type: 'AWS::EC2::SubnetRouteTableAssociation' Properties: RouteTableId: !Ref PublicRouteTable SubnetId: !Ref PublicSubnet1 PrivateRouteTable1: Type: 'AWS::EC2::RouteTable' Properties: VpcId: !Ref VPC Tags: - Key: Name Value: !Sub '${AWS::StackName} Private Routes (AZ1)' PrivateSubnet1RouteTableAssociation: Type: 'AWS::EC2::SubnetRouteTableAssociation' Properties: RouteTableId: !Ref PrivateRouteTable1 SubnetId: !Ref PrivateSubnet1 PrivateSubnet1InternetRoute: Type: 'AWS::EC2::Route' Properties: RouteTableId: !Ref PrivateRouteTable1 DestinationCidrBlock: 0.0.0.0/0 NatGatewayId: !Ref NatGateway1 S3Endpoint: Type: 'AWS::EC2::VPCEndpoint' Properties: ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' VpcEndpointType: Gateway PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: - 's3:*' Resource: - '*' VpcId: !Ref VPC RouteTableIds: - !Ref PrivateRouteTable1 S3Bucket: Type: 'AWS::S3::Bucket' masterSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Master SG SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC slaveSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Slave SG SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC emrServiceSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Service Access SG VpcId: !Ref VPC emrMasterIngressSelfICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressSelfAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressSelfAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressLivySG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 8998 ToPort: 8998 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressHiveSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 10000 ToPort: 10000 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressServiceSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 SourceSecurityGroupId: !Ref emrServiceSecurityGroup emrServiceIngressMasterSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 9443 ToPort: 9443 SourceSecurityGroupId: !Ref masterSecurityGroup emrServiceEgressMaster: Type: 'AWS::EC2::SecurityGroupEgress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 DestinationSecurityGroupId: !Ref masterSecurityGroup emrServiceEgressSlave: Type: 'AWS::EC2::SecurityGroupEgress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 DestinationSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressSelfICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressSelfAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressSelfAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressServiceSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 SourceSecurityGroupId: !Ref emrServiceSecurityGroup EMRClusterServiceRole: Properties: AssumeRolePolicyDocument: Statement: - Action: - 'sts:AssumeRole' Effect: Allow Principal: Service: - elasticmapreduce.amazonaws.com Version: 2012-10-17 ManagedPolicyArns: - 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole' Path: / Type: 'AWS::IAM::Role' EMRClusterinstanceProfile: Properties: Path: / Roles: - !Ref EMRClusterinstanceProfileRole Type: 'AWS::IAM::InstanceProfile' EMRClusterinstanceProfileRole: Properties: RoleName: !Sub '${AWS::StackName}-EMRClusterinstanceProfileRole' AssumeRolePolicyDocument: Statement: - Action: - 'sts:AssumeRole' Effect: Allow Principal: Service: - ec2.amazonaws.com Version: 2012-10-17 ManagedPolicyArns: - 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role' Path: / Type: 'AWS::IAM::Role' CleanUpBucketonDelete: DependsOn: CleanUpBucketonDeleteLambda Type: 'Custom::emptybucket' Properties: ServiceToken: !GetAtt - CleanUpBucketonDeleteLambda - Arn inputBucketName: !Ref S3Bucket CleanUpBucketonDeleteLambda: DependsOn: - S3Bucket - CleanUpBucketonDeleteLambdaRole Type: 'AWS::Lambda::Function' Properties: Description: Empty bucket on delete Handler: index.lambda_handler Role: !GetAtt - CleanUpBucketonDeleteLambdaRole - Arn Runtime: python3.7 Timeout: 60 Code: ZipFile: !Join - |+ - - import json - import boto3 - import urllib3 - '' - 'def empty_bucket(bucket_name):' - ' print("Attempting to empty the bucket {0}".format(bucket_name))' - ' s3_client = boto3.client(''s3'')' - ' s3 = boto3.resource(''s3'')' - '' - ' try:' - ' bucket = s3.Bucket(bucket_name).load()' - ' except ClientError:' - ' print("Bucket {0} does not exist".format(bucket_name))' - ' return' - ' # Confirm if versioning is enabled' - ' version_status = s3_client.get_bucket_versioning(Bucket=bucket_name)' - ' status = version_status.get(''Status'','''')' - ' if status == ''Enabled'':' - ' version_status = s3_client.put_bucket_versioning(Bucket=bucket_name,' - ' VersioningConfiguration={''Status'': ''Suspended''})' - ' version_paginator = s3_client.get_paginator(''list_object_versions'')' - ' version_iterator = version_paginator.paginate(' - ' Bucket=bucket_name' - ' )' - '' - ' for page in version_iterator:' - ' print(page)' - ' if ''DeleteMarkers'' in page:' - ' delete_markers = page[''DeleteMarkers'']' - ' if delete_markers is not None:' - ' for delete_marker in delete_markers:' - ' key = delete_marker[''Key'']' - ' versionId = delete_marker[''VersionId'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=key, VersionId=versionId)' - ' if ''Versions'' in page and page[''Versions''] is not None:' - ' versions = page[''Versions'']' - ' for version in versions:' - ' print(version)' - ' key = version[''Key'']' - ' versionId = version[''VersionId'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=key, VersionId=versionId)' - ' object_paginator = s3_client.get_paginator(''list_objects_v2'')' - ' object_iterator = object_paginator.paginate(' - ' Bucket=bucket_name' - ' )' - ' for page in object_iterator:' - ' if ''Contents'' in page:' - ' for content in page[''Contents'']:' - ' key = content[''Key'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=content[''Key''])' - ' print("Successfully emptied the bucket {0}".format(bucket_name))' - '' - '' - '' - 'def lambda_handler(event, context):' - ' try:' - ' bucket = event[''ResourceProperties''][''inputBucketName'']' - ' if event[''RequestType''] == ''Delete'':' - ' empty_bucket(bucket)' - ' sendResponse(event, context, "SUCCESS")' - ' except Exception as e:' - ' print(e)' - ' sendResponse(event, context, "FAILED")' - '' - 'def sendResponse(event, context, status):' - ' http = urllib3.PoolManager()' - ' response_body = {''Status'': status,' - ' ''Reason'': ''Log stream name: '' + context.log_stream_name,' - ' ''PhysicalResourceId'': context.log_stream_name,' - ' ''StackId'': event[''StackId''],' - ' ''RequestId'': event[''RequestId''],' - ' ''LogicalResourceId'': event[''LogicalResourceId''],' - ' ''Data'': json.loads("{}")}' - ' http.request(''PUT'', event[''ResponseURL''], body=json.dumps(response_body))' CleanUpBucketonDeleteLambdaRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub 'CleanUpBucketonDeleteLambdaPolicy-${AWS::StackName}' PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - 's3:*' Resource: - !GetAtt - S3Bucket - Arn - !Join - '' - - !GetAtt - S3Bucket - Arn - / - !Join - '' - - !GetAtt - S3Bucket - Arn - /* - Effect: Deny Action: - 's3:DeleteBucket' Resource: '*' - Effect: Allow Action: - 'logs:*' Resource: '*' securityConfiguration: Type: 'AWS::EMR::SecurityConfiguration' Properties: SecurityConfiguration: AuthenticationConfiguration: KerberosConfiguration: Provider: ClusterDedicatedKdc ClusterDedicatedKdcConfiguration: TicketLifetimeInHours: 24 AuthorizationConfiguration: EmrFsConfiguration: RoleMappings: - Role: !Sub >- arn:aws:iam::${AWS::AccountId}:role/${AWS::StackName}-allowEMRFSAccessForUser1 IdentifierType: User Identifiers: - user1 SageMakerInstanceSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupName: SMSG GroupDescription: Security group with no ingress rule SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC SageMakerInstanceSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: '-1' GroupId: !Ref SageMakerInstanceSecurityGroup SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup VPCEndpointSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Allow TLS for VPC Endpoint SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC Tags: - Key: Name Value: !Sub ${AWS::StackName}-endpoint-security-group EndpointSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: '-1' GroupId: !Ref VPCEndpointSecurityGroup SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup # Be advised, AmazonSageMakerFullAccess provides access to a broad range of capabilities. SageMakerExecutionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-sageemr' PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - elasticmapreduce:ListInstances - elasticmapreduce:DescribeCluster - elasticmapreduce:DescribeSecurityConfiguration - iam:CreateServiceLinkedRole - iam:GetRole Resource: '*' - Sid: AllowPassRoleSageMaker Effect: Allow Action: - iam:PassRole Resource: '*' Condition: StringEquals: iam:PassedToService: sagemaker.amazonaws.com - Effect: Allow Action: - elasticmapreduce:DescribeCluster - elasticmapreduce:ListInstanceGroups Resource: 'arn:aws:elasticmapreduce:*:*:cluster/*' - Effect: Allow Action: - elasticmapreduce:ListClusters Resource: '*' ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' - 'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' emrMasterIngressKDCSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 88 ToPort: 88 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKDCAdminSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 749 ToPort: 749 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKinit464SG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 464 ToPort: 464 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup allowEMRFSAccessForUser1: Type: 'AWS::IAM::Role' Properties: RoleName: !Sub '${AWS::StackName}-allowEMRFSAccessForUser1' AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/${AWS::StackName}-EMRClusterinstanceProfileRole' Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-emrFS-user1' PolicyDocument: Version: 2012-10-17 Statement: - Action: - 's3:ListBucket' Resource: - !Sub 'arn:aws:s3:::${S3Bucket}' Effect: Allow - Action: - 's3:*' Resource: - !Sub 'arn:aws:s3:::${S3Bucket}/*' Effect: Allow LambdaExecutionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / LambdaExecutionPolicy: Type: AWS::IAM::ManagedPolicy Properties: Path: / PolicyDocument: Version: 2012-10-17 Statement: - Sid: CloudWatchLogsPermissions Effect: Allow Action: - logs:CreateLogGroup - logs:CreateLogStream - logs:PutLogEvents Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' - Sid: SageMakerDomainPermission Effect: Allow Action: - sagemaker:CreateDomain - sagemaker:DescribeDomain - sagemaker:DeleteDomain - sagemaker:UpdateDomain - sagemaker:CreateUserProfile - sagemaker:UpdateUserProfile - sagemaker:DeleteUserProfile - sagemaker:DescribeUserProfile - sageMaker:ListApps - sageMaker:DeleteApp Resource: - !Sub 'arn:${AWS::Partition}:sagemaker:*:*:domain/*' - !Sub 'arn:${AWS::Partition}:sagemaker:*:*:user-profile/*' - !Sub 'arn:${AWS::Partition}:sagemaker:*:*:app/*' - Sid: IAMServiceLinkedRolePermission Effect: Allow Action: - iam:CreateServiceLinkedRole Resource: - !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/aws-service-role/sagemaker.amazonaws.com/AWSServiceRoleForAmazonSageMaker*' Condition: StringLike: iam:AWSServiceName: sagemaker.amazonaws.com - Sid: IAMServiceLinkedRolePolicyPermission Effect: Allow Action: - iam:AttachRolePolicy - iam:PutRolePolicy Resource: - !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/aws-service-role/sagemaker.amazonaws.com/AWSServiceRoleForAmazonSageMaker*' - Sid: SageMakerExecPassRole Effect: Allow Action: - iam:PassRole Resource: !GetAtt SageMakerExecutionRole.Arn Roles: - !Ref LambdaExecutionRole CopyZips: Type: Custom::CopyZips Properties: ServiceToken: !GetAtt 'CopyZipsFunction.Arn' DestBucket: !Ref 'S3Bucket' SourceBucket: !FindInMap - Studio - s3params - S3Bucket Prefix: !FindInMap - Studio - s3params - S3Key Objects: - !FindInMap - ClusterConfigurations - emr - BootStrapScriptFile - !FindInMap - ClusterConfigurations - emr - StepScriptFile CopyZipsRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Path: / Policies: - PolicyName: lambda-copier PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - s3:GetObject Resource: '*' - Effect: Allow Action: - s3:PutObject - s3:DeleteObject Resource: - !Sub 'arn:aws:s3:::${S3Bucket}/*' CopyZipsFunction: Type: AWS::Lambda::Function Properties: Description: Copies objects from a source S3 bucket to a destination Handler: index.handler Runtime: python3.8 Role: !GetAtt 'CopyZipsRole.Arn' Timeout: 900 Code: ZipFile: | import json import logging import threading import boto3 import cfnresponse def copy_objects(source_bucket, dest_bucket, prefix, objects): s3 = boto3.client('s3') for o in objects: key = prefix + o copy_source = { 'Bucket': source_bucket, 'Key': key } print('copy_source: %s' % copy_source) print('dest_bucket = %s'%dest_bucket) print('key = %s' %key) s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, Key=key) def delete_objects(bucket, prefix, objects): s3 = boto3.client('s3') objects = {'Objects': [{'Key': prefix + o} for o in objects]} s3.delete_objects(Bucket=bucket, Delete=objects) def timeout(event, context): logging.error('Execution is about to time out, sending failure response to CloudFormation') cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) def handler(event, context): # make sure we send a failure to CloudFormation if the function # is going to timeout timer = threading.Timer((context.get_remaining_time_in_millis() / 1000.00) - 0.5, timeout, args=[event, context]) timer.start() print('Received event: %s' % json.dumps(event)) status = cfnresponse.SUCCESS try: source_bucket = event['ResourceProperties']['SourceBucket'] dest_bucket = event['ResourceProperties']['DestBucket'] prefix = event['ResourceProperties']['Prefix'] objects = event['ResourceProperties']['Objects'] if event['RequestType'] == 'Delete': delete_objects(dest_bucket, prefix, objects) else: copy_objects(source_bucket, dest_bucket, prefix, objects) except Exception as e: logging.error('Exception: %s' % e, exc_info=True) status = cfnresponse.FAILED finally: timer.cancel() cfnresponse.send(event, context, status, {}, None) VPCEndpointSagemakerAPI: Type: AWS::EC2::VPCEndpoint Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.api' VpcId: !Ref VPC VPCEndpointSageMakerRuntime: Type: AWS::EC2::VPCEndpoint Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.runtime' VpcId: !Ref VPC VPCEndpointSTS: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sts' VpcId: !Ref VPC VPCEndpointCW: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.monitoring' VpcId: !Ref VPC VPCEndpointCWL: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.logs' VpcId: !Ref VPC VPCEndpointECR: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.dkr' VpcId: !Ref VPC VPCEndpointECRAPI: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.api' VpcId: !Ref VPC StudioDomain: Type: AWS::SageMaker::Domain Properties: AppNetworkAccessType: VpcOnly AuthMode: IAM DomainName: StudioDomain VpcId: !Ref VPC SubnetIds: - !Ref PrivateSubnet1 DefaultUserSettings: ExecutionRole: !GetAtt SageMakerExecutionRole.Arn SecurityGroups: - !Ref SageMakerInstanceSecurityGroup StudioUserProfile: Type: AWS::SageMaker::UserProfile Properties: DomainId: !Ref StudioDomain UserProfileName: studio-user EMRCluster: DependsOn: - CopyZips - securityConfiguration Type: 'AWS::EMR::Cluster' Properties: Applications: - Name: Spark - Name: Hive - Name: Livy BootstrapActions: - Name: Dummy bootstrap action ScriptBootstrapAction: Args: - dummy - parameter Path: !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/createlinuxusers.sh' AutoScalingRole: EMR_AutoScaling_DefaultRole Configurations: - Classification: livy-conf ConfigurationProperties: livy.server.session.timeout: 2h EbsRootVolumeSize: 100 Instances: CoreInstanceGroup: EbsConfiguration: EbsBlockDeviceConfigs: - VolumeSpecification: SizeInGB: '320' VolumeType: gp2 VolumesPerInstance: '1' EbsOptimized: 'true' InstanceCount: !FindInMap - ClusterConfigurations - emr - coreInstanceCount InstanceType: !FindInMap - ClusterConfigurations - emr - coreInstanceType Market: ON_DEMAND Name: coreNode MasterInstanceGroup: EbsConfiguration: EbsBlockDeviceConfigs: - VolumeSpecification: SizeInGB: '320' VolumeType: gp2 VolumesPerInstance: '1' EbsOptimized: 'true' InstanceCount: 1 InstanceType: !FindInMap - ClusterConfigurations - emr - masterInstanceType Market: ON_DEMAND Name: masterNode Ec2SubnetId: !Ref PrivateSubnet1 EmrManagedMasterSecurityGroup: !Ref masterSecurityGroup EmrManagedSlaveSecurityGroup: !Ref slaveSecurityGroup ServiceAccessSecurityGroup: !Ref emrServiceSecurityGroup TerminationProtected: false JobFlowRole: !Ref EMRClusterinstanceProfile LogUri: !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/' Name: !Sub 'EMR-Cluster-${AWS::StackName}' ReleaseLabel: !FindInMap - ClusterConfigurations - emr - emrReleaseVersion ServiceRole: !Ref EMRClusterServiceRole VisibleToAllUsers: true KerberosAttributes: CrossRealmTrustPrincipalPassword: !Ref CrossRealmTrustPrincipalPassword KdcAdminPassword: !Ref KdcAdminPassword Realm: EC2.INTERNAL SecurityConfiguration: !Ref securityConfiguration Steps: - ActionOnFailure: CONTINUE HadoopJarStep: Args: - !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/configurekdc.sh' Jar: !Sub 's3://${AWS::Region}.elasticmapreduce/libs/script-runner/script-runner.jar' MainClass: '' Name: run any bash or java job in spark Outputs: VPCandCIDR: Description: VPC ID and CIDR block Value: !Join - ' - ' - - !Ref VPC - !GetAtt - VPC - CidrBlock PublicSubnets: Description: All public subnet created Value: !Join - '' - - !Ref PublicSubnet1 PrivateSubnets: Description: All private subnet created Value: !Join - ', ' - - !Ref PrivateSubnet1 S3BucketName: Description: Bucket Name for Amazon S3 bucket Value: !Ref S3Bucket EMRMasterDNSName: Description: DNS Name of the EMR Master Node Value: !GetAtt - EMRCluster - MasterPublicDNS