--- AWSTemplateFormatVersion: '2010-09-09' Description: > LDAP authentication mode cloud formation template, we provision an EC2 instance with an LDAP server and configure the EMR cluster to use this server for authentication. We create SageMaker Studio domain & SageMaker user profile. Using SageMaker Studio Notebooks connect to EMR cluster using Ldap Parameters: LatestAmiId: Type: 'AWS::SSM::Parameter::Value' Default: '/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2' Mappings: Studio: s3params: S3Bucket: aws-ml-blog S3Key: artifacts/sma-milestone1/ ClusterConfigurations: emr: masterInstanceType: m5.xlarge coreInstanceType: m5.xlarge masterInstanceCount: 1 coreInstanceCount: 2 emrReleaseVersion: emr-5.30.1 BootStrapScriptFile: installpylibs-v2.sh StepScriptFile: configurekdc.sh sagemaker: sageMakerConfigName: SageEMRConfig sageMakerInstanceName: SageEMR sageMakerInstanceType: ml.t2.medium VpcConfigurations: cidr: Vpc: 10.0.0.0/16 PublicSubnet1: 10.0.10.0/24 PrivateSubnet1: 10.0.20.0/24 Resources: VPC: Type: 'AWS::EC2::VPC' Properties: CidrBlock: !FindInMap - VpcConfigurations - cidr - Vpc EnableDnsSupport: true EnableDnsHostnames: true Tags: - Key: Name Value: !Sub '${AWS::StackName}-VPC' InternetGateway: Type: 'AWS::EC2::InternetGateway' Properties: Tags: - Key: Name Value: !Sub '${AWS::StackName}-IGW' InternetGatewayAttachment: Type: 'AWS::EC2::VPCGatewayAttachment' Properties: InternetGatewayId: !Ref InternetGateway VpcId: !Ref VPC PublicSubnet1: Type: 'AWS::EC2::Subnet' Properties: VpcId: !Ref VPC AvailabilityZone: !Select - 0 - !GetAZs '' CidrBlock: !FindInMap - VpcConfigurations - cidr - PublicSubnet1 MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub '${AWS::StackName} Public Subnet (AZ1)' PrivateSubnet1: Type: 'AWS::EC2::Subnet' Properties: VpcId: !Ref VPC AvailabilityZone: !Select - 0 - !GetAZs '' CidrBlock: !FindInMap - VpcConfigurations - cidr - PrivateSubnet1 MapPublicIpOnLaunch: false Tags: - Key: Name Value: !Sub '${AWS::StackName} Private Subnet (AZ1)' NatGateway1EIP: Type: 'AWS::EC2::EIP' DependsOn: InternetGatewayAttachment Properties: Domain: vpc NatGateway1: Type: 'AWS::EC2::NatGateway' Properties: AllocationId: !GetAtt - NatGateway1EIP - AllocationId SubnetId: !Ref PublicSubnet1 PublicRouteTable: Type: 'AWS::EC2::RouteTable' Properties: VpcId: !Ref VPC Tags: - Key: Name Value: !Sub '${AWS::StackName} Public Routes' DefaultPublicRoute: Type: 'AWS::EC2::Route' DependsOn: InternetGatewayAttachment Properties: RouteTableId: !Ref PublicRouteTable DestinationCidrBlock: 0.0.0.0/0 GatewayId: !Ref InternetGateway PublicSubnet1RouteTableAssociation: Type: 'AWS::EC2::SubnetRouteTableAssociation' Properties: RouteTableId: !Ref PublicRouteTable SubnetId: !Ref PublicSubnet1 PrivateRouteTable1: Type: 'AWS::EC2::RouteTable' Properties: VpcId: !Ref VPC Tags: - Key: Name Value: !Sub '${AWS::StackName} Private Routes (AZ1)' PrivateSubnet1RouteTableAssociation: Type: 'AWS::EC2::SubnetRouteTableAssociation' Properties: RouteTableId: !Ref PrivateRouteTable1 SubnetId: !Ref PrivateSubnet1 PrivateSubnet1InternetRoute: Type: 'AWS::EC2::Route' Properties: RouteTableId: !Ref PrivateRouteTable1 DestinationCidrBlock: 0.0.0.0/0 NatGatewayId: !Ref NatGateway1 EC2LdapSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: Enable access to the EC2 LDAP host SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC S3Endpoint: Type: 'AWS::EC2::VPCEndpoint' Properties: ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' VpcEndpointType: Gateway PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: - '*' Resource: - '*' VpcId: !Ref VPC RouteTableIds: - !Ref PrivateRouteTable1 S3Bucket: Type: 'AWS::S3::Bucket' ec2IngressLdapSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref EC2LdapSecurityGroup IpProtocol: tcp FromPort: 389 ToPort: 389 SourceSecurityGroupId: !Ref masterSecurityGroup ec2IngressLdapSMSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref EC2LdapSecurityGroup IpProtocol: tcp FromPort: 389 ToPort: 389 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup masterSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Master SG SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC slaveSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Slave SG SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC emrServiceSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Service Access SG VpcId: !Ref VPC emrMasterIngressSelfICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressSelfAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressSelfAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressLivySG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 8998 ToPort: 8998 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressHiveSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 10000 ToPort: 10000 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressServiceSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 SourceSecurityGroupId: !Ref emrServiceSecurityGroup emrServiceIngressMasterSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 9443 ToPort: 9443 SourceSecurityGroupId: !Ref masterSecurityGroup emrServiceEgressMaster: Type: 'AWS::EC2::SecurityGroupEgress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 DestinationSecurityGroupId: !Ref masterSecurityGroup emrServiceEgressSlave: Type: 'AWS::EC2::SecurityGroupEgress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 DestinationSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressSelfICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressSelfAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressSelfAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressServiceSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 SourceSecurityGroupId: !Ref emrServiceSecurityGroup EC2Ldap: Type: 'AWS::EC2::Instance' Properties: DisableApiTermination: 'false' InstanceInitiatedShutdownBehavior: stop ImageId: !Ref LatestAmiId InstanceType: t2.micro IamInstanceProfile: Ref: EC2InstanceProfile Monitoring: 'false' NetworkInterfaces: - DeleteOnTermination: 'true' Description: Primary network interface DeviceIndex: 0 SubnetId: !Ref PrivateSubnet1 GroupSet: - !Ref EC2LdapSecurityGroup UserData: Fn::Base64: !Sub | #!/bin/bash yum update -y amazon-linux-extras install docker service docker start usermod -a -G docker ec2-user sudo su - ec2-user ec2-user aws s3 cp s3://aws-ml-blog/artifacts/sma-milestone1/TestLdapServer.tgz . tar -xzvf TestLdapServer.tgz cd TestLdapServer docker build -t testldapserver:0.0.1 --rm image docker run -p 389:389 -p 636:636 --name test-ldap-server-emr \ --detach testldapserver:0.0.1 EMRClusterServiceRole: Properties: AssumeRolePolicyDocument: Statement: - Action: - 'sts:AssumeRole' Effect: Allow Principal: Service: - elasticmapreduce.amazonaws.com Version: 2012-10-17 ManagedPolicyArns: - 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole' Path: / Type: 'AWS::IAM::Role' EMRClusterinstanceProfile: Properties: Path: / Roles: - !Ref EMRClusterinstanceProfileRole Type: 'AWS::IAM::InstanceProfile' EMRClusterinstanceProfileRole: Properties: RoleName: !Sub '${AWS::StackName}-EMRClusterinstanceProfileRole' AssumeRolePolicyDocument: Statement: - Action: - 'sts:AssumeRole' Effect: Allow Principal: Service: - ec2.amazonaws.com Version: 2012-10-17 ManagedPolicyArns: - 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role' Path: / Type: 'AWS::IAM::Role' CleanUpBucketonDelete: DependsOn: CleanUpBucketonDeleteLambda Type: 'Custom::emptybucket' Properties: ServiceToken: !GetAtt - CleanUpBucketonDeleteLambda - Arn inputBucketName: !Ref S3Bucket CleanUpBucketonDeleteLambda: DependsOn: - S3Bucket - CleanUpBucketonDeleteLambdaRole Type: 'AWS::Lambda::Function' Properties: Description: Empty bucket on delete Handler: index.lambda_handler Role: !GetAtt - CleanUpBucketonDeleteLambdaRole - Arn Runtime: python3.7 Timeout: 60 Code: ZipFile: !Join - |+ - - import json - import boto3 - import urllib3 - '' - 'def empty_bucket(bucket_name):' - ' print("Attempting to empty the bucket {0}".format(bucket_name))' - ' s3_client = boto3.client(''s3'')' - ' s3 = boto3.resource(''s3'')' - '' - ' try:' - ' bucket = s3.Bucket(bucket_name).load()' - ' except ClientError:' - ' print("Bucket {0} does not exist".format(bucket_name))' - ' return' - ' # Confirm if versioning is enabled' - ' version_status = s3_client.get_bucket_versioning(Bucket=bucket_name)' - ' status = version_status.get(''Status'','''')' - ' if status == ''Enabled'':' - ' version_status = s3_client.put_bucket_versioning(Bucket=bucket_name,' - ' VersioningConfiguration={''Status'': ''Suspended''})' - ' version_paginator = s3_client.get_paginator(''list_object_versions'')' - ' version_iterator = version_paginator.paginate(' - ' Bucket=bucket_name' - ' )' - '' - ' for page in version_iterator:' - ' print(page)' - ' if ''DeleteMarkers'' in page:' - ' delete_markers = page[''DeleteMarkers'']' - ' if delete_markers is not None:' - ' for delete_marker in delete_markers:' - ' key = delete_marker[''Key'']' - ' versionId = delete_marker[''VersionId'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=key, VersionId=versionId)' - ' if ''Versions'' in page and page[''Versions''] is not None:' - ' versions = page[''Versions'']' - ' for version in versions:' - ' print(version)' - ' key = version[''Key'']' - ' versionId = version[''VersionId'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=key, VersionId=versionId)' - ' object_paginator = s3_client.get_paginator(''list_objects_v2'')' - ' object_iterator = object_paginator.paginate(' - ' Bucket=bucket_name' - ' )' - ' for page in object_iterator:' - ' if ''Contents'' in page:' - ' for content in page[''Contents'']:' - ' key = content[''Key'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=content[''Key''])' - ' print("Successfully emptied the bucket {0}".format(bucket_name))' - '' - '' - '' - 'def lambda_handler(event, context):' - ' try:' - ' bucket = event[''ResourceProperties''][''inputBucketName'']' - ' if event[''RequestType''] == ''Delete'':' - ' empty_bucket(bucket)' - ' sendResponse(event, context, "SUCCESS")' - ' except Exception as e:' - ' print(e)' - ' sendResponse(event, context, "FAILED")' - '' - 'def sendResponse(event, context, status):' - ' http = urllib3.PoolManager()' - ' response_body = {''Status'': status,' - ' ''Reason'': ''Log stream name: '' + context.log_stream_name,' - ' ''PhysicalResourceId'': context.log_stream_name,' - ' ''StackId'': event[''StackId''],' - ' ''RequestId'': event[''RequestId''],' - ' ''LogicalResourceId'': event[''LogicalResourceId''],' - ' ''Data'': json.loads("{}")}' - ' http.request(''PUT'', event[''ResponseURL''], body=json.dumps(response_body))' CleanUpBucketonDeleteLambdaRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub 'CleanUpBucketonDeleteLambdaPolicy-${AWS::StackName}' PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - 's3:*' Resource: - !GetAtt - S3Bucket - Arn - !Join - '' - - !GetAtt - S3Bucket - Arn - / - !Join - '' - - !GetAtt - S3Bucket - Arn - /* - Effect: Deny Action: - 's3:DeleteBucket' Resource: '*' - Effect: Allow Action: - 'logs:*' Resource: '*' SageMakerInstanceSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupName: SMSG GroupDescription: Security group with no ingress rule SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC SageMakerInstanceSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: '-1' GroupId: !Ref SageMakerInstanceSecurityGroup SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup VPCEndpointSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Allow TLS for VPC Endpoint SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC Tags: - Key: Name Value: !Sub ${AWS::StackName}-endpoint-security-group EndpointSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: '-1' GroupId: !Ref VPCEndpointSecurityGroup SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup EC2Role: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: 'Allow' Principal: Service: - 'ec2.amazonaws.com' - 'sagemaker.amazonaws.com' Action: - 'sts:AssumeRole' Path: '/' ManagedPolicyArns: - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess - arn:aws:iam::aws:policy/AmazonS3FullAccess - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore Policies: - PolicyName: EC2ResourceAccess PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - iam:GetRole - ecr:* Resource: '*' EC2InstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Roles: - Ref: EC2Role SageMakerExecutionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-sageemr' PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - elasticmapreduce:ListInstances - elasticmapreduce:DescribeCluster - elasticmapreduce:DescribeSecurityConfiguration - iam:CreateServiceLinkedRole - iam:GetRole Resource: '*' - Sid: AllowPassRoleSageMaker Effect: Allow Action: - iam:PassRole Resource: '*' Condition: StringEquals: iam:PassedToService: sagemaker.amazonaws.com - Effect: Allow Action: - elasticmapreduce:DescribeCluster - elasticmapreduce:ListInstanceGroups Resource: 'arn:aws:elasticmapreduce:*:*:cluster/*' - Effect: Allow Action: - elasticmapreduce:ListClusters Resource: '*' ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' - 'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' emrMasterIngressKDCSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 88 ToPort: 88 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKDCAdminSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 749 ToPort: 749 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKinit464SG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 464 ToPort: 464 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup allowEMRFSAccessForUser1: Type: 'AWS::IAM::Role' Properties: RoleName: !Sub '${AWS::StackName}-allowEMRFSAccessForUser1' AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/${AWS::StackName}-EMRClusterinstanceProfileRole' Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-emrFS-user1' PolicyDocument: Version: 2012-10-17 Statement: - Action: - 's3:ListBucket' Resource: - !Sub 'arn:aws:s3:::${S3Bucket}' Effect: Allow - Action: - 's3:*' Resource: - !Sub 'arn:aws:s3:::${S3Bucket}/*' Effect: Allow LambdaExecutionRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / LambdaExecutionPolicy: Type: AWS::IAM::ManagedPolicy Properties: Path: / PolicyDocument: Version: 2012-10-17 Statement: - Sid: CloudWatchLogsPermissions Effect: Allow Action: - logs:CreateLogGroup - logs:CreateLogStream - logs:PutLogEvents Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' - Sid: SageMakerDomainPermission Effect: Allow Action: - sagemaker:CreateDomain - sagemaker:DescribeDomain - sagemaker:DeleteDomain - sagemaker:UpdateDomain - sagemaker:CreateUserProfile - sagemaker:UpdateUserProfile - sagemaker:DeleteUserProfile - sagemaker:DescribeUserProfile - sageMaker:ListApps - sageMaker:DeleteApp Resource: - !Sub 'arn:${AWS::Partition}:sagemaker:*:*:domain/*' - !Sub 'arn:${AWS::Partition}:sagemaker:*:*:user-profile/*' - !Sub 'arn:${AWS::Partition}:sagemaker:*:*:app/*' - Sid: IAMServiceLinkedRolePermission Effect: Allow Action: - iam:CreateServiceLinkedRole Resource: - !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/aws-service-role/sagemaker.amazonaws.com/AWSServiceRoleForAmazonSageMaker*' Condition: StringLike: iam:AWSServiceName: sagemaker.amazonaws.com - Sid: IAMServiceLinkedRolePolicyPermission Effect: Allow Action: - iam:AttachRolePolicy - iam:PutRolePolicy Resource: - !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/aws-service-role/sagemaker.amazonaws.com/AWSServiceRoleForAmazonSageMaker*' - Sid: SageMakerExecPassRole Effect: Allow Action: - iam:PassRole Resource: !GetAtt SageMakerExecutionRole.Arn Roles: - !Ref LambdaExecutionRole CopyZips: Type: Custom::CopyZips Properties: ServiceToken: !GetAtt 'CopyZipsFunction.Arn' DestBucket: !Ref 'S3Bucket' SourceBucket: !FindInMap - Studio - s3params - S3Bucket Prefix: !FindInMap - Studio - s3params - S3Key Objects: - !FindInMap - ClusterConfigurations - emr - BootStrapScriptFile - !FindInMap - ClusterConfigurations - emr - StepScriptFile CopyZipsRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Path: / Policies: - PolicyName: lambda-copier PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - s3:GetObject Resource: '*' - Effect: Allow Action: - s3:PutObject - s3:DeleteObject Resource: - !Sub 'arn:aws:s3:::${S3Bucket}/*' CopyZipsFunction: Type: AWS::Lambda::Function Properties: Description: Copies objects from a source S3 bucket to a destination Handler: index.handler Runtime: python3.8 Role: !GetAtt 'CopyZipsRole.Arn' Timeout: 900 Code: ZipFile: | import json import logging import threading import boto3 import cfnresponse def copy_objects(source_bucket, dest_bucket, prefix, objects): s3 = boto3.client('s3') for o in objects: key = prefix + o copy_source = { 'Bucket': source_bucket, 'Key': key } print('copy_source: %s' % copy_source) print('dest_bucket = %s'%dest_bucket) print('key = %s' %key) s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, Key=key) def delete_objects(bucket, prefix, objects): s3 = boto3.client('s3') objects = {'Objects': [{'Key': prefix + o} for o in objects]} s3.delete_objects(Bucket=bucket, Delete=objects) def timeout(event, context): logging.error('Execution is about to time out, sending failure response to CloudFormation') cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) def handler(event, context): # make sure we send a failure to CloudFormation if the function # is going to timeout timer = threading.Timer((context.get_remaining_time_in_millis() / 1000.00) - 0.5, timeout, args=[event, context]) timer.start() print('Received event: %s' % json.dumps(event)) status = cfnresponse.SUCCESS try: source_bucket = event['ResourceProperties']['SourceBucket'] dest_bucket = event['ResourceProperties']['DestBucket'] prefix = event['ResourceProperties']['Prefix'] objects = event['ResourceProperties']['Objects'] if event['RequestType'] == 'Delete': delete_objects(dest_bucket, prefix, objects) else: copy_objects(source_bucket, dest_bucket, prefix, objects) except Exception as e: logging.error('Exception: %s' % e, exc_info=True) status = cfnresponse.FAILED finally: timer.cancel() cfnresponse.send(event, context, status, {}, None) VPCEndpointSagemakerAPI: Type: AWS::EC2::VPCEndpoint Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.api' VpcId: !Ref VPC VPCEndpointSageMakerRuntime: Type: AWS::EC2::VPCEndpoint Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.runtime' VpcId: !Ref VPC VPCEndpointSTS: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sts' VpcId: !Ref VPC VPCEndpointCW: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.monitoring' VpcId: !Ref VPC VPCEndpointCWL: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.logs' VpcId: !Ref VPC VPCEndpointECR: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.dkr' VpcId: !Ref VPC VPCEndpointECRAPI: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.api' VpcId: !Ref VPC StudioDomain: Type: AWS::SageMaker::Domain Properties: AppNetworkAccessType: VpcOnly AuthMode: IAM DomainName: StudioDomain VpcId: !Ref VPC SubnetIds: - !Ref PrivateSubnet1 DefaultUserSettings: ExecutionRole: !GetAtt SageMakerExecutionRole.Arn SecurityGroups: - !Ref SageMakerInstanceSecurityGroup StudioUserProfile: Type: AWS::SageMaker::UserProfile Properties: DomainId: !Ref StudioDomain UserProfileName: studio-user EMRCluster: DependsOn: ['CopyZips', 'EC2Ldap'] Type: 'AWS::EMR::Cluster' Properties: Applications: - Name: Spark - Name: Hive - Name: Livy BootstrapActions: - Name: Dummy bootstrap action ScriptBootstrapAction: Args: - dummy - parameter Path: !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/installpylibs-v2.sh' AutoScalingRole: EMR_AutoScaling_DefaultRole Configurations: - Classification: livy-conf ConfigurationProperties: livy.impersonation.enabled: false livy.repl.enable-hive-context: true livy.server.auth.ldap.base-dn: 'dc=example,dc=org' livy.server.auth.ldap.enable-start-tls: false livy.server.auth.ldap.security-authentication: 'simple' livy.server.auth.ldap.url: !Sub 'ldap://${EC2Ldap.PrivateDnsName}:389' livy.server.auth.type: 'ldap' livy.server.session.timeout: 2h EbsRootVolumeSize: 100 Instances: CoreInstanceGroup: EbsConfiguration: EbsBlockDeviceConfigs: - VolumeSpecification: SizeInGB: '320' VolumeType: gp2 VolumesPerInstance: '1' EbsOptimized: 'true' InstanceCount: !FindInMap - ClusterConfigurations - emr - coreInstanceCount InstanceType: !FindInMap - ClusterConfigurations - emr - coreInstanceType Market: ON_DEMAND Name: coreNode MasterInstanceGroup: EbsConfiguration: EbsBlockDeviceConfigs: - VolumeSpecification: SizeInGB: '320' VolumeType: gp2 VolumesPerInstance: '1' EbsOptimized: 'true' InstanceCount: 1 InstanceType: !FindInMap - ClusterConfigurations - emr - masterInstanceType Market: ON_DEMAND Name: masterNode Ec2SubnetId: !Ref PrivateSubnet1 EmrManagedMasterSecurityGroup: !Ref masterSecurityGroup EmrManagedSlaveSecurityGroup: !Ref slaveSecurityGroup ServiceAccessSecurityGroup: !Ref emrServiceSecurityGroup TerminationProtected: false JobFlowRole: !Ref EMRClusterinstanceProfile LogUri: !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/' Name: !Sub 'EMR-Cluster-${AWS::StackName}' ReleaseLabel: !FindInMap - ClusterConfigurations - emr - emrReleaseVersion ServiceRole: !Ref EMRClusterServiceRole VisibleToAllUsers: true Steps: - ActionOnFailure: CONTINUE HadoopJarStep: Args: - !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/configurekdc.sh' Jar: !Sub 's3://${AWS::Region}.elasticmapreduce/libs/script-runner/script-runner.jar' MainClass: '' Name: run any bash or java job in spark Outputs: VPCandCIDR: Description: VPC ID and CIDR block Value: !Join - ' - ' - - !Ref VPC - !GetAtt - VPC - CidrBlock PublicSubnets: Description: All public subnet created Value: !Join - '' - - !Ref PublicSubnet1 PrivateSubnets: Description: All private subnet created Value: !Join - ', ' - - !Ref PrivateSubnet1 S3BucketName: Description: Bucket Name for Amazon S3 bucket Value: !Ref S3Bucket EMRMasterDNSName: Description: DNS Name of the EMR Master Node Value: !GetAtt - EMRCluster - MasterPublicDNS EC2LdapLivyUrl: Description: 'Value of livy.server.auth.ldap.url' Value: !Sub 'ldap://${EC2Ldap.PrivateDnsName}:389'