AWSTemplateFormatVersion: '2010-09-09' # And for nested stack Description: > CloudFormation template for RBAC EMR. This template is for an account without a pre-existing SageMaker Studio Domain. The template creates a domain and associated user profiles. It creates the Studio Domain in a private VPC and establishes connectivity with EMR via runtime roles It also creates a Glue Table that references a public s3 data bucket and sets appropriate lake formation permissions through a nested stack. Parameters: IdleTimeout: Type: Number Description: Terminate EMR cluster automatically after x amount of seconds of inactivity. Default: 604800 ConstraintDescription: Must be between 60 seconds - 7 days (604800 seconds) S3CertsZip: Type: String Description: Enter S3 URI where the certs zip file is located AllowedPattern: ^s3://([^/]+)/(.*?([^/]+)/?)$ Mappings: ARNs: us-east-1: arn: arn:aws:sagemaker:us-east-1:081325390199:image/jupyter-server-3 us-east-2: arn: arn:aws:sagemaker:us-east-2:429704687514:image/jupyter-server-3 us-west-1: arn: arn:aws:sagemaker:us-west-1:742091327244:image/jupyter-server-3 us-west-2: arn: arn:aws:sagemaker:us-west-2:236514542706:image/jupyter-server-3 Studio: s3params: S3Bucket: aws-ml-blog S3Key: artifacts/sma-milestone1/ lfdata: S3Bucket: aws-data-analytics-workshops S3Key: shared_datasets/ TPCData: keys: customer: tpcparquet/dl_tpc_customer/part-00000-8a990a41-0c74-4a62-834e-8397f5cdbc4e-c000.snappy.parquet custaddress: tpcparquet/dl_tpc_customer_address/part-00000-8283c706-320e-4f66-a57f-70a1460a0b2d-c000.snappy.parquet demo: tpcparquet/dl_tpc_household_demographics/part-00000-aca6b874-4d62-4eda-9768-ad8ab8c5499d-c000.snappy.parquet income: tpcparquet/dl_tpc_income_band/part-00000-27533284-b0c5-4d28-a709-1edcbb8a19ee-c000.snappy.parquet item: tpcparquet/dl_tpc_item/part-00000-f6c9f56e-48f1-4723-8f5f-2661ff74b888-c000.snappy.parquet promo: tpcparquet/dl_tpc_promotion/part-00000-ebc5557f-002e-41eb-9404-24840730537c-c000.snappy.parquet webpage: tpcparquet/dl_tpc_web_page/part-00000-99a8aa66-b914-43a1-ac15-6f6aaeade0d4-c000.snappy.parquet websales: tpcparquet/dl_tpc_web_sales/part-00000-80a84c73-e44c-4ff5-8f08-708765afe13d-c000.snappy.parquet ClusterConfigurations: emr: masterInstanceType: m5.xlarge coreInstanceType: m5.xlarge masterInstanceCount: 1 coreInstanceCount: 2 emrReleaseVersion: emr-6.9.0 BootStrapScriptFile: createlinuxusers.sh StepScriptFile: configurekdc.sh PyInstallFile: installpylibs-v2.sh Bootstrap: S3Prefix: customer-bootstrap-actions/gcsc/ replaceRpms: replace-rpms.sh emrSecretAgent: emr-secret-agent-1.18.0-SNAPSHOT20221121212949.noarch.rpm VpcConfigurations: cidr: Vpc: 10.0.0.0/16 PublicSubnet1: 10.0.10.0/24 PrivateSubnet1: 10.0.20.0/24 Resources: #################################################################################################################### #### SageMaker and EMR IAM Roles #################################################################################################################### LakeFormationDataAccessPolicy: Type: AWS::IAM::ManagedPolicy Properties: ManagedPolicyName: !Sub ${AWS::StackName}-lf-data-access-policy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - lakeformation:GetDataAccess - glue:GetTable - glue:GetTables - glue:SearchTables - glue:GetDatabase - glue:GetDatabases - glue:GetPartitions - glue:CreateDatabase - lakeformation:GetResourceLFTags - lakeformation:ListLFTags - lakeformation:GetLFTag - lakeformation:SearchTablesByLFTags - lakeformation:SearchDatabasesByLFTags Resource: '*' SageMakerCustomPolicy: Type: AWS::IAM::ManagedPolicy Properties: ManagedPolicyName: !Sub ${AWS::StackName}-sagemaker-policy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - elasticmapreduce:ListInstances - elasticmapreduce:DescribeCluster - elasticmapreduce:DescribeSecurityConfiguration - elasticmapreduce:CreatePersistentAppUI - elasticmapreduce:DescribePersistentAppUI - elasticmapreduce:GetPersistentAppUIPresignedURL - elasticmapreduce:GetOnClusterAppUIPresignedURL - elasticmapreduce:ListClusters Resource: '*' - Effect: Allow Action: - elasticmapreduce:DescribeCluster - elasticmapreduce:ListInstanceGroups Resource: !Sub "arn:${AWS::Partition}:elasticmapreduce:*:*:cluster/*" - Effect: Allow Action: - elasticmapreduce:ListClusters Resource: '*' MarketingDataAccessRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com - glue.amazonaws.com - lakeformation.amazonaws.com Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:TagSession' RoleName: !Sub ${AWS::StackName}-marketing-data-access-role Path: / ManagedPolicyArns: - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AwsGlueSessionUserRestrictedServiceRole" - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonS3ReadOnlyAccess" - !Ref LakeFormationDataAccessPolicy SalesAccessRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com - glue.amazonaws.com - lakeformation.amazonaws.com Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:TagSession' RoleName: !Sub ${AWS::StackName}-sales-data-access-role Path: / ManagedPolicyArns: - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AwsGlueSessionUserRestrictedServiceRole" - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonS3ReadOnlyAccess" - !Ref LakeFormationDataAccessPolicy ElectronicsDataAccessRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com - glue.amazonaws.com - lakeformation.amazonaws.com Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:TagSession' RoleName: !Sub ${AWS::StackName}-electronics-data-access-role Path: / ManagedPolicyArns: - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AwsGlueSessionUserRestrictedServiceRole" - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonS3ReadOnlyAccess" - !Ref LakeFormationDataAccessPolicy SageMakerExecutionRole1: Type: 'AWS::IAM::Role' Properties: RoleName: !Sub ${AWS::StackName}-studio-emr-role-1 AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com - glue.amazonaws.com Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:TagSession' Condition: StringEquals: aws:RequestTag/LakeFormationAuthorizedCaller: 'Amazon EMR' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-sage-emr' PolicyDocument: Version: 2012-10-17 Statement: - Sid: AllowPassRoleSageMaker Effect: Allow Action: - iam:GetRole - iam:PassRole - iam:GetRole Resource: - !GetAtt SalesAccessRole.Arn - !GetAtt ElectronicsDataAccessRole.Arn - Sid: AllowClusterCreds Effect: Allow Action: - elasticmapreduce:GetClusterSessionCredentials Resource: '*' Condition: StringLike: elasticmapreduce:ExecutionRoleArn: - !GetAtt SalesAccessRole.Arn - !GetAtt ElectronicsDataAccessRole.Arn ManagedPolicyArns: # AmazonSageMakerFullAccces provides a broad range of permissions - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonSageMakerFullAccess" - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AwsGlueSessionUserRestrictedServiceRole" - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonS3ReadOnlyAccess" - !Ref SageMakerCustomPolicy SageMakerExecutionRole2: Type: 'AWS::IAM::Role' Properties: RoleName: !Sub ${AWS::StackName}-studio-emr-role-2 AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com - glue.amazonaws.com Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:AssumeRole' - Effect: Allow Principal: AWS: - !GetAtt EMRClusterinstanceProfileRole.Arn Action: - 'sts:TagSession' Condition: StringEquals: aws:RequestTag/LakeFormationAuthorizedCaller: 'Amazon EMR' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-sage-emr' PolicyDocument: Version: 2012-10-17 Statement: - Sid: AllowPassRoleSageMaker Effect: Allow Action: - iam:GetRole - iam:PassRole - iam:GetRole Resource: - !GetAtt MarketingDataAccessRole.Arn - Sid: AllowClusterCreds Effect: Allow Action: - elasticmapreduce:GetClusterSessionCredentials Resource: '*' Condition: StringLike: elasticmapreduce:ExecutionRoleArn: - !GetAtt MarketingDataAccessRole.Arn ManagedPolicyArns: # AmazonSageMakerFullAccces provides a broad range of permissions - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonSageMakerFullAccess" - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AwsGlueSessionUserRestrictedServiceRole" - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonS3ReadOnlyAccess" - !Ref SageMakerCustomPolicy #################################################################################################################### #### LakeFormation Database setup and Admin profile #################################################################################################################### LakeFormationStackRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - cloudformation.amazonaws.com Action: - sts:AssumeRole RoleName: !Sub ${AWS::StackName}-cfn-admin-role Path: / Policies: - PolicyName: !Sub ${AWS::StackName}-cfn-admin-policy PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - "glue:SearchTables" - "lakeformation:BatchGrantPermissions" - "lakeformation:SearchDatabasesByLFTags" - "lakeformation:GetDataAccess" - "glue:GetTableVersions" - "glue:GetPartitions" - "lakeformation:GetWorkUnits" - "glue:UpdateTable" - "glue:DeleteTable" - "lakeformation:RegisterResource" - "cloudformation:CreateChangeSet" - "lakeformation:ListPermissions" - "glue:DeleteWorkflow" - "lakeformation:AddLFTagsToResource" - "glue:StartWorkflowRun" - "glue:ListWorkflows" - "cloudtrail:LookupEvents" - "glue:UpdateDatabase" - "glue:CreateTable" - "glue:GetTables" - "lakeformation:SearchTablesByLFTags" - "lakeformation:RemoveLFTagsFromResource" - "lakeformation:UpdateLFTag" - "lakeformation:GetLFTag" - "cloudtrail:DescribeTrails" - "lakeformation:BatchRevokePermissions" - "lakeformation:GetResourceLFTags" - "lakeformation:ListResources" - "s3:GetObject" - "lakeformation:CreateLFTag" - "lakeformation:GetQueryStatistics" - "glue:BatchGetWorkflows" - "lakeformation:GetDataLakeSettings" - "lakeformation:CreateDataCellsFilter" - "glue:GetWorkflow" - "lakeformation:UpdateResource" - "lakeformation:StartQueryPlanning" - "glue:GetConnections" - "glue:DeleteDatabase" - "s3:ListBucket" - "lakeformation:GrantPermissions" - "lakeformation:PutDataLakeSettings" - "lakeformation:RevokePermissions" - "lakeformation:ListDataCellsFilter" - "lakeformation:GetEffectivePermissionsForPath" - "lakeformation:ListTransactions" - "lakeformation:DeleteObjectsOnCancel" - "lakeformation:ListLFTags" - "lakeformation:GetWorkUnitResults" - "lakeformation:ListTableStorageOptimizers" - "glue:GetDatabases" - "s3:GetBucketAcl" - "lakeformation:GetQueryState" - "glue:GetTable" - "glue:GetDatabase" - "lakeformation:DescribeResource" - "s3:ListAllMyBuckets" - "cloudformation:CreateStack" - "glue:CreateDatabase" - "lakeformation:GetTableObjects" - "lakeformation:DeleteLFTag" - "lakeformation:DeregisterResource" - "lakeformation:DeleteDataCellsFilter" - "glue:GetWorkflowRuns" - "lakeformation:DescribeTransaction" - "s3:GetBucketLocation" Resource: '*' ManagedPolicyArns: - arn:aws:iam::aws:policy/IAMFullAccess LambdaRole: Type: AWS::IAM::Role Properties: RoleName: !Sub ${AWS::StackName}-cfn-lambda-role AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Path: / Policies: - PolicyName: !Sub ${AWS::StackName}-cfn-lambda-policy PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - cloudformation:CreateStack - cloudformation:DeleteStack - cloudformation:DescribeStacks - s3:GetObject Resource: '*' - Effect: Allow Action: - iam:PassRole Resource: !GetAtt LakeFormationStackRole.Arn CFNRoleFunction: Type: AWS::Lambda::Function DependsOn: - LakeFormationStackRole - LambdaRole - MarketingDataAccessRole - SalesAccessRole - ElectronicsDataAccessRole - CopyData Properties: FunctionName: !Sub ${AWS::StackName}-create-stack Environment: Variables: CFN_ROLE: !GetAtt LakeFormationStackRole.Arn MarketingRole: !GetAtt MarketingDataAccessRole.Arn SalesRole: !GetAtt SalesAccessRole.Arn ElectronicsRole: !GetAtt ElectronicsDataAccessRole.Arn S3_BUCKET: !Ref 'S3Bucket' LakeFormationStackName: !Sub ${AWS::StackName}-LakeFormationStack Code: ZipFile: !Join - |+ - - 'import os' - 'import time' - 'import boto3' - 'import cfnresponse' - '' - '' - 'def lambda_handler(event, context):' - ' print({"event": event})' - ' cfn_role = os.environ["CFN_ROLE"]' - ' marketing_role = os.environ["MarketingRole"]' - ' sales_role = os.environ["SalesRole"]' - ' electronics_role = os.environ["ElectronicsRole"]' - ' data_lake_bucket = os.environ["S3_BUCKET"]' - ' stack_name = os.environ["LakeFormationStackName"]' - ' cfn = boto3.client("cloudformation")' - ' if event["RequestType"] == "Delete":' - ' cfn.delete_stack(' - ' StackName=stack_name,' - ' RoleARN=cfn_role' - ' )' - ' status = cfnresponse.SUCCESS' - ' else:' - ' response = cfn.create_stack(' - ' StackName=stack_name,' - ' TemplateURL="https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-11298/lake-formation.yml",' - ' Parameters=[' - ' {' - ' "ParameterKey": "CloudformationRole",' - ' "ParameterValue": cfn_role' - ' },' - ' {' - ' "ParameterKey": "MarketingRole",' - ' "ParameterValue": marketing_role' - ' },' - ' {' - ' "ParameterKey": "SalesRole",' - ' "ParameterValue": sales_role' - ' },' - ' {' - ' "ParameterKey": "ElectronicsRole",' - ' "ParameterValue": electronics_role' - ' },' - ' {' - ' "ParameterKey": "DataLakeBucket",' - ' "ParameterValue": data_lake_bucket' - ' },' - ' ],' - ' TimeoutInMinutes=10,' - ' RoleARN=cfn_role,' - ' Capabilities=["CAPABILITY_NAMED_IAM"],' - ' )' - ' print("Stack ID: ", response["StackId"])' - '' - ' flag = True' - ' status = cfnresponse.FAILED' - ' while flag:' - ' describe = cfn.describe_stacks(' - ' StackName=response["StackId"]' - ' )' - ' if describe["Stacks"][0]["StackStatus"] == "CREATE_IN_PROGRESS":' - ' print("Stack is being created..")' - ' time.sleep(60)' - ' elif describe["Stacks"][0]["StackStatus"] == "CREATE_COMPLETE":' - ' print("Lakeformation stack complete.")' - ' flag = False' - ' status = cfnresponse.SUCCESS' - ' else:' - ' print("Stack could not be created, with status: ", describe["Stacks"][0]["StackStatus"])' - ' flag = False' - ' status = cfnresponse.FAILED' - '' - ' cfnresponse.send(event, context, status, {}, None)' Handler: !Sub index.lambda_handler Runtime: python3.8 Timeout: 900 Role: !GetAtt LambdaRole.Arn InvokeLambda: Type: AWS::CloudFormation::CustomResource DependsOn: CFNRoleFunction Properties: ServiceToken: !GetAtt CFNRoleFunction.Arn #################################################################################################################### #### Studio + EMR Network Configuration #################################################################################################################### VPC: Type: 'AWS::EC2::VPC' Properties: CidrBlock: !FindInMap - VpcConfigurations - cidr - Vpc EnableDnsSupport: true EnableDnsHostnames: true Tags: - Key: Name Value: !Sub '${AWS::StackName}-VPC' InternetGateway: Type: 'AWS::EC2::InternetGateway' Properties: Tags: - Key: Name Value: !Sub '${AWS::StackName}-IGW' InternetGatewayAttachment: Type: 'AWS::EC2::VPCGatewayAttachment' Properties: InternetGatewayId: !Ref InternetGateway VpcId: !Ref VPC PublicSubnet1: Type: 'AWS::EC2::Subnet' Properties: VpcId: !Ref VPC AvailabilityZone: !Select - 0 - !GetAZs '' CidrBlock: !FindInMap - VpcConfigurations - cidr - PublicSubnet1 MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub '${AWS::StackName} Public Subnet (AZ1)' PrivateSubnet1: Type: 'AWS::EC2::Subnet' Properties: VpcId: !Ref VPC AvailabilityZone: !Select - 0 - !GetAZs '' CidrBlock: !FindInMap - VpcConfigurations - cidr - PrivateSubnet1 MapPublicIpOnLaunch: false Tags: - Key: Name Value: !Sub '${AWS::StackName} Private Subnet (AZ1)' NatGateway1EIP: Type: 'AWS::EC2::EIP' DependsOn: InternetGatewayAttachment Properties: Domain: vpc NatGateway1: Type: 'AWS::EC2::NatGateway' Properties: AllocationId: !GetAtt - NatGateway1EIP - AllocationId SubnetId: !Ref PublicSubnet1 PublicRouteTable: Type: 'AWS::EC2::RouteTable' Properties: VpcId: !Ref VPC Tags: - Key: Name Value: !Sub '${AWS::StackName} Public Routes' DefaultPublicRoute: Type: 'AWS::EC2::Route' DependsOn: InternetGatewayAttachment Properties: RouteTableId: !Ref PublicRouteTable DestinationCidrBlock: 0.0.0.0/0 GatewayId: !Ref InternetGateway PublicSubnet1RouteTableAssociation: Type: 'AWS::EC2::SubnetRouteTableAssociation' Properties: RouteTableId: !Ref PublicRouteTable SubnetId: !Ref PublicSubnet1 PrivateRouteTable1: Type: 'AWS::EC2::RouteTable' Properties: VpcId: !Ref VPC Tags: - Key: Name Value: !Sub '${AWS::StackName} Private Routes (AZ1)' PrivateSubnet1RouteTableAssociation: Type: 'AWS::EC2::SubnetRouteTableAssociation' Properties: RouteTableId: !Ref PrivateRouteTable1 SubnetId: !Ref PrivateSubnet1 PrivateSubnet1InternetRoute: Type: 'AWS::EC2::Route' Properties: RouteTableId: !Ref PrivateRouteTable1 DestinationCidrBlock: 0.0.0.0/0 NatGatewayId: !Ref NatGateway1 S3Endpoint: Type: 'AWS::EC2::VPCEndpoint' Properties: ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' VpcEndpointType: Gateway PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: - 's3:*' Resource: - '*' VpcId: !Ref VPC RouteTableIds: - !Ref PrivateRouteTable1 masterSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Master SG SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC slaveSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Slave SG SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC emrServiceSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupDescription: EMR Service Access SG VpcId: !Ref VPC emrMasterIngressSelfICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressSelfAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressSelfAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrMasterIngressSlaveAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrMasterIngressLivySG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 8998 ToPort: 8998 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressHiveSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 10000 ToPort: 10000 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressServiceSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 SourceSecurityGroupId: !Ref emrServiceSecurityGroup emrServiceIngressMasterSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 9443 ToPort: 9443 SourceSecurityGroupId: !Ref masterSecurityGroup emrServiceEgressMaster: Type: 'AWS::EC2::SecurityGroupEgress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 DestinationSecurityGroupId: !Ref masterSecurityGroup emrServiceEgressSlave: Type: 'AWS::EC2::SecurityGroupEgress' Properties: GroupId: !Ref emrServiceSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 DestinationSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressSelfICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterICMP: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: icmp FromPort: -1 ToPort: -1 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressSelfAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterAllTcp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressSelfAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref slaveSecurityGroup emrSlaveIngressMasterAllUdp: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: udp FromPort: 0 ToPort: 65535 SourceSecurityGroupId: !Ref masterSecurityGroup emrSlaveIngressServiceSg: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref slaveSecurityGroup IpProtocol: tcp FromPort: 8443 ToPort: 8443 SourceSecurityGroupId: !Ref emrServiceSecurityGroup VPCEndpointSagemakerAPI: Type: AWS::EC2::VPCEndpoint Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.api' VpcId: !Ref VPC VPCEndpointSageMakerRuntime: Type: AWS::EC2::VPCEndpoint Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.runtime' VpcId: !Ref VPC VPCEndpointSTS: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sts' VpcId: !Ref VPC VPCEndpointCW: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.monitoring' VpcId: !Ref VPC VPCEndpointCWL: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.logs' VpcId: !Ref VPC VPCEndpointECR: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.dkr' VpcId: !Ref VPC VPCEndpointECRAPI: Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' VpcEndpointType: Interface PrivateDnsEnabled: true SubnetIds: - !Ref PrivateSubnet1 SecurityGroupIds: - !Ref VPCEndpointSecurityGroup ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.api' VpcId: !Ref VPC #################################################################################################################### #### EMR Credential Configuration #################################################################################################################### EMRClusterServiceRole: Properties: AssumeRolePolicyDocument: Statement: - Action: - 'sts:AssumeRole' Effect: Allow Principal: Service: - elasticmapreduce.amazonaws.com Version: 2012-10-17 ManagedPolicyArns: - 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole' Path: / Type: 'AWS::IAM::Role' EMRClusterinstanceProfile: Properties: Path: / Roles: - !Ref EMRClusterinstanceProfileRole Type: 'AWS::IAM::InstanceProfile' EMRClusterinstanceProfileRole: Properties: RoleName: !Sub '${AWS::StackName}-EMRClusterinstanceProfileRole' AssumeRolePolicyDocument: Statement: - Action: - 'sts:AssumeRole' Effect: Allow Principal: Service: - ec2.amazonaws.com Version: 2012-10-17 ManagedPolicyArns: - 'arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role' Path: / Type: 'AWS::IAM::Role' AllowEMRClusterinstanceProfileRoleToAssumeSageMakerExecutionRole: Type: 'AWS::IAM::Policy' Properties: PolicyName: AllowEMRClusterinstanceProfileRoleToAssumeEMRRoles PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow Action: - 'sts:AssumeRole' - 'sts:TagSession' - 'iam:GetRole' Resource: - !GetAtt SageMakerExecutionRole1.Arn - !GetAtt SageMakerExecutionRole2.Arn - !GetAtt MarketingDataAccessRole.Arn - !GetAtt SalesAccessRole.Arn - !GetAtt ElectronicsDataAccessRole.Arn Roles: - !Ref EMRClusterinstanceProfileRole securityConfiguration: Type: 'AWS::EMR::SecurityConfiguration' Properties: SecurityConfiguration: AuthorizationConfiguration: LakeFormationConfiguration: AuthorizedSessionTagValue: 'Amazon EMR' IAMConfiguration: EnableApplicationScopedIAMRole: True ApplicationScopedIAMRoleConfiguration: PropagateSourceIdentity: True EncryptionConfiguration: EnableInTransitEncryption: True EnableAtRestEncryption: False InTransitEncryptionConfiguration: TLSCertificateConfiguration: CertificateProviderType: 'PEM' S3Object: !Ref S3CertsZip #################################################################################################################### #### SageMaker and EMR Network Configuration #################################################################################################################### SageMakerInstanceSecurityGroup: Type: 'AWS::EC2::SecurityGroup' Properties: GroupName: SMSG GroupDescription: Security group with no ingress rule SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC SageMakerInstanceSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: '-1' GroupId: !Ref SageMakerInstanceSecurityGroup SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup VPCEndpointSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Allow TLS for VPC Endpoint SecurityGroupEgress: - IpProtocol: -1 FromPort: -1 ToPort: -1 CidrIp: 0.0.0.0/0 VpcId: !Ref VPC Tags: - Key: Name Value: !Sub ${AWS::StackName}-endpoint-security-group EndpointSecurityGroupIngress: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: '-1' GroupId: !Ref VPCEndpointSecurityGroup SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKDCSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 88 ToPort: 88 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKDCAdminSG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 749 ToPort: 749 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup emrMasterIngressKinit464SG: Type: 'AWS::EC2::SecurityGroupIngress' Properties: GroupId: !Ref masterSecurityGroup IpProtocol: tcp FromPort: 464 ToPort: 464 SourceSecurityGroupId: !Ref SageMakerInstanceSecurityGroup #################################################################################################################### #### Allow EMR to write log files and utilize bootstrapping scripts #################################################################################################################### S3Bucket: Type: 'AWS::S3::Bucket' allowEMRFSAccessForUser1: Type: 'AWS::IAM::Role' Properties: RoleName: !Sub '${AWS::StackName}-allowEMRFSAccessForUser1' AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/${AWS::StackName}-EMRClusterinstanceProfileRole' Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub '${AWS::StackName}-emrFS-user1' PolicyDocument: Version: 2012-10-17 Statement: - Action: - 's3:ListBucket' Resource: - !Sub 'arn:aws:s3:::${S3Bucket}' Effect: Allow - Action: - s3:GetObject - s3:PutObject - s3:Describe* - s3:List* Resource: - !Sub 'arn:aws:s3:::${S3Bucket}/*' Effect: Allow CopyZips: Type: Custom::CopyZips Properties: ServiceToken: !GetAtt 'CopyZipsFunction.Arn' DestBucket: !Ref 'S3Bucket' SourceBucket: !FindInMap - Studio - s3params - S3Bucket Prefix: !FindInMap - Studio - s3params - S3Key Objects: - !FindInMap - ClusterConfigurations - emr - BootStrapScriptFile - !FindInMap - ClusterConfigurations - emr - StepScriptFile - !FindInMap - ClusterConfigurations - emr - PyInstallFile CopyBootstrapFiles: Type: Custom::CopyBootstrapFiles Properties: ServiceToken: !GetAtt 'CopyZipsFunction.Arn' DestBucket: !Ref 'S3Bucket' SourceBucket: !Sub 'emr-data-access-control-${AWS::Region}' Prefix: !FindInMap - ClusterConfigurations - Bootstrap - S3Prefix Objects: - !FindInMap - ClusterConfigurations - Bootstrap - replaceRpms - !FindInMap - ClusterConfigurations - Bootstrap - emrSecretAgent CopyData: Type: Custom::CopyData Properties: ServiceToken: !GetAtt 'CopyZipsFunction.Arn' DestBucket: !Ref 'S3Bucket' SourceBucket: !FindInMap - Studio - lfdata - S3Bucket Prefix: !FindInMap - Studio - lfdata - S3Key Objects: - !FindInMap - TPCData - keys - customer - !FindInMap - TPCData - keys - custaddress - !FindInMap - TPCData - keys - demo - !FindInMap - TPCData - keys - income - !FindInMap - TPCData - keys - item - !FindInMap - TPCData - keys - promo - !FindInMap - TPCData - keys - webpage - !FindInMap - TPCData - keys - websales CopyZipsRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Path: / Policies: - PolicyName: lambda-copier PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - s3:GetObject Resource: '*' - Effect: Allow Action: - s3:PutObject - s3:DeleteObject Resource: - !Sub 'arn:aws:s3:::${S3Bucket}/*' CopyZipsFunction: Type: AWS::Lambda::Function Properties: Description: Copies objects from a source S3 bucket to a destination Handler: index.handler Runtime: python3.8 Role: !GetAtt 'CopyZipsRole.Arn' Timeout: 900 Code: ZipFile: | import json import logging import threading import boto3 import cfnresponse def copy_objects(source_bucket, dest_bucket, prefix, objects): s3 = boto3.client('s3') for o in objects: key = prefix + o copy_source = { 'Bucket': source_bucket, 'Key': key } print('copy_source: %s' % copy_source) print('dest_bucket = %s'%dest_bucket) print('key = %s' %key) s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, Key=key) def delete_objects(bucket, prefix, objects): s3 = boto3.client('s3') objects = {'Objects': [{'Key': prefix + o} for o in objects]} s3.delete_objects(Bucket=bucket, Delete=objects) def timeout(event, context): logging.error('Execution is about to time out, sending failure response to CloudFormation') cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) def handler(event, context): # make sure we send a failure to CloudFormation if the function # is going to timeout timer = threading.Timer((context.get_remaining_time_in_millis() / 1000.00) - 0.5, timeout, args=[event, context]) timer.start() print('Received event: %s' % json.dumps(event)) status = cfnresponse.SUCCESS try: source_bucket = event['ResourceProperties']['SourceBucket'] dest_bucket = event['ResourceProperties']['DestBucket'] prefix = event['ResourceProperties']['Prefix'] objects = event['ResourceProperties']['Objects'] if event['RequestType'] == 'Delete': delete_objects(dest_bucket, prefix, objects) else: copy_objects(source_bucket, dest_bucket, prefix, objects) except Exception as e: logging.error('Exception: %s' % e, exc_info=True) status = cfnresponse.FAILED finally: timer.cancel() cfnresponse.send(event, context, status, {}, None) #################################################################################################################### #### Setup SageMaker Studio #################################################################################################################### StudioDomain: Type: AWS::SageMaker::Domain Properties: AppNetworkAccessType: VpcOnly AuthMode: IAM DomainName: StudioDomain VpcId: !Ref VPC SubnetIds: - !Ref PrivateSubnet1 DefaultUserSettings: ExecutionRole: !GetAtt SageMakerExecutionRole1.Arn JupyterServerAppSettings: DefaultResourceSpec: SageMakerImageArn: Fn::FindInMap: - ARNs - !Ref AWS::Region - arn SecurityGroups: - !Ref SageMakerInstanceSecurityGroup StudioUserProfile1: Type: AWS::SageMaker::UserProfile Properties: DomainId: !Ref StudioDomain UserProfileName: tina-sales-electronics UserSettings: ExecutionRole: !GetAtt SageMakerExecutionRole1.Arn StudioUserProfile2: Type: AWS::SageMaker::UserProfile Properties: DomainId: !Ref StudioDomain UserProfileName: david-non-sensitive-customer UserSettings: ExecutionRole: !GetAtt SageMakerExecutionRole2.Arn #################################################################################################################### #### Setup EMR Cluster #################################################################################################################### EMRCluster: DependsOn: - CopyZips - securityConfiguration Type: 'AWS::EMR::Cluster' Properties: Applications: - Name: Spark - Name: Hive - Name: Livy BootstrapActions: - Name: Install Py libraries ScriptBootstrapAction: Path: !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/installpylibs-v2.sh' - Name: Install rpm ScriptBootstrapAction: Path: !Join - '' - - !Sub 's3://${S3Bucket}/' - !FindInMap - ClusterConfigurations - Bootstrap - S3Prefix - !FindInMap - ClusterConfigurations - Bootstrap - replaceRpms Args: - !Join - '' - - !Sub 's3://${S3Bucket}/' - !FindInMap - ClusterConfigurations - Bootstrap - S3Prefix - !FindInMap - ClusterConfigurations - Bootstrap - emrSecretAgent AutoScalingRole: EMR_AutoScaling_DefaultRole Configurations: - Classification: livy-conf ConfigurationProperties: livy.server.session.timeout: 12h - Classification: spark-defaults ConfigurationProperties: spark.sql.hive.metastore.sharedPrefixes: 'com.amazonaws.services.dynamodbv2,com.amazonaws.emr.recordserver,com.amazon.ws.emr.hadoop.fs' EbsRootVolumeSize: 100 Instances: CoreInstanceGroup: EbsConfiguration: EbsBlockDeviceConfigs: - VolumeSpecification: SizeInGB: '320' VolumeType: gp2 VolumesPerInstance: '1' EbsOptimized: 'true' InstanceCount: !FindInMap - ClusterConfigurations - emr - coreInstanceCount InstanceType: !FindInMap - ClusterConfigurations - emr - coreInstanceType Market: ON_DEMAND Name: coreNode MasterInstanceGroup: EbsConfiguration: EbsBlockDeviceConfigs: - VolumeSpecification: SizeInGB: '320' VolumeType: gp2 VolumesPerInstance: '1' EbsOptimized: 'true' InstanceCount: 1 InstanceType: !FindInMap - ClusterConfigurations - emr - masterInstanceType Market: ON_DEMAND Name: masterNode Ec2SubnetId: !Ref PrivateSubnet1 EmrManagedMasterSecurityGroup: !Ref masterSecurityGroup EmrManagedSlaveSecurityGroup: !Ref slaveSecurityGroup ServiceAccessSecurityGroup: !Ref emrServiceSecurityGroup TerminationProtected: false JobFlowRole: !Ref EMRClusterinstanceProfile LogUri: !Sub 's3://${S3Bucket}/artifacts/sma-milestone1/' Name: !Sub '${AWS::StackName}-EMR-Cluster' ReleaseLabel: !FindInMap - ClusterConfigurations - emr - emrReleaseVersion ServiceRole: !Ref EMRClusterServiceRole VisibleToAllUsers: true SecurityConfiguration: !Ref securityConfiguration AutoTerminationPolicy: IdleTimeout: 'Ref': IdleTimeout #################################################################################################################### #### EMR Clean-Up on Stack Delete #################################################################################################################### CleanUpBucketonDelete: DependsOn: CleanUpBucketonDeleteLambda Type: 'Custom::emptybucket' Properties: ServiceToken: !GetAtt - CleanUpBucketonDeleteLambda - Arn inputBucketName: !Ref S3Bucket CleanUpBucketonDeleteLambda: DependsOn: - S3Bucket - CleanUpBucketonDeleteLambdaRole Type: 'AWS::Lambda::Function' Properties: Description: Empty bucket on delete Handler: index.lambda_handler Role: !GetAtt - CleanUpBucketonDeleteLambdaRole - Arn Runtime: python3.7 Timeout: 60 Code: ZipFile: !Join - |+ - - import json - import boto3 - import urllib3 - '' - 'def empty_bucket(bucket_name):' - ' print("Attempting to empty the bucket {0}".format(bucket_name))' - ' s3_client = boto3.client(''s3'')' - ' s3 = boto3.resource(''s3'')' - '' - ' try:' - ' bucket = s3.Bucket(bucket_name).load()' - ' except ClientError:' - ' print("Bucket {0} does not exist".format(bucket_name))' - ' return' - ' # Confirm if versioning is enabled' - ' version_status = s3_client.get_bucket_versioning(Bucket=bucket_name)' - ' status = version_status.get(''Status'','''')' - ' if status == ''Enabled'':' - ' version_status = s3_client.put_bucket_versioning(Bucket=bucket_name,' - ' VersioningConfiguration={''Status'': ''Suspended''})' - ' version_paginator = s3_client.get_paginator(''list_object_versions'')' - ' version_iterator = version_paginator.paginate(' - ' Bucket=bucket_name' - ' )' - '' - ' for page in version_iterator:' - ' print(page)' - ' if ''DeleteMarkers'' in page:' - ' delete_markers = page[''DeleteMarkers'']' - ' if delete_markers is not None:' - ' for delete_marker in delete_markers:' - ' key = delete_marker[''Key'']' - ' versionId = delete_marker[''VersionId'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=key, VersionId=versionId)' - ' if ''Versions'' in page and page[''Versions''] is not None:' - ' versions = page[''Versions'']' - ' for version in versions:' - ' print(version)' - ' key = version[''Key'']' - ' versionId = version[''VersionId'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=key, VersionId=versionId)' - ' object_paginator = s3_client.get_paginator(''list_objects_v2'')' - ' object_iterator = object_paginator.paginate(' - ' Bucket=bucket_name' - ' )' - ' for page in object_iterator:' - ' if ''Contents'' in page:' - ' for content in page[''Contents'']:' - ' key = content[''Key'']' - ' s3_client.delete_object(Bucket=bucket_name, Key=content[''Key''])' - ' print("Successfully emptied the bucket {0}".format(bucket_name))' - '' - '' - '' - 'def lambda_handler(event, context):' - ' try:' - ' bucket = event[''ResourceProperties''][''inputBucketName'']' - ' if event[''RequestType''] == ''Delete'':' - ' empty_bucket(bucket)' - ' sendResponse(event, context, "SUCCESS")' - ' except Exception as e:' - ' print(e)' - ' sendResponse(event, context, "FAILED")' - '' - 'def sendResponse(event, context, status):' - ' http = urllib3.PoolManager()' - ' response_body = {''Status'': status,' - ' ''Reason'': ''Log stream name: '' + context.log_stream_name,' - ' ''PhysicalResourceId'': context.log_stream_name,' - ' ''StackId'': event[''StackId''],' - ' ''RequestId'': event[''RequestId''],' - ' ''LogicalResourceId'': event[''LogicalResourceId''],' - ' ''Data'': json.loads("{}")}' - ' http.request(''PUT'', event[''ResponseURL''], body=json.dumps(response_body))' CleanUpBucketonDeleteLambdaRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub 'CleanUpBucketonDeleteLambdaPolicy-${AWS::StackName}' PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - s3:Get* - s3:List* - s3:DeleteObject - s3:DeleteObjectVersion - s3:PutBucketVersioning Resource: - !GetAtt - S3Bucket - Arn - !Join - '' - - !GetAtt - S3Bucket - Arn - / - !Join - '' - - !GetAtt - S3Bucket - Arn - /* - Effect: Deny Action: - 's3:DeleteBucket' Resource: '*' ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole #################################################################################################################### #### Life-Cycle Configuration #################################################################################################################### LifeCycleConfigLambdaRole: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: !Sub 'LifeCycleConfigLambdaPolicy-${AWS::StackName}' PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - 'sagemaker:CreateStudioLifecycleConfig' - 'sagemaker:DeleteStudioLifecycleConfig' Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:studio-lifecycle-config/*' - Effect: Allow Action: - 'sagemaker:UpdateUserProfile' - 'sagemaker:DeleteUserProfile' Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:user-profile/*' ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole LifeCycleConfigLambda: DependsOn: - SageMakerExecutionRole1 - SageMakerExecutionRole2 - StudioUserProfile1 - StudioUserProfile2 - MarketingDataAccessRole - SalesAccessRole - ElectronicsDataAccessRole Type: 'AWS::Lambda::Function' Properties: Description: Add LifeCycle Configuration files to Studio Handler: index.lambda_handler Role: !GetAtt LifeCycleConfigLambdaRole.Arn Runtime: python3.9 Timeout: 60 Code: ZipFile: !Join - |+ - - 'import boto3' - 'import base64' - 'import cfnresponse' - '' - 'client = boto3.client(''sagemaker'')' - 'lcc_up1 = ''\n''.join((' - ' ''#!/bin/bash'',' - ' '''',' - ' ''set -eux'',' - ' '''',' - ' ''FILE_DIRECTORY="/home/sagemaker-user/.sagemaker-analytics-configuration-DO_NOT_DELETE"'',' - ' ''FILE_NAME="emr-configurations-DO_NOT_DELETE.json"'',' - ' ''FILE="$FILE_DIRECTORY/$FILE_NAME"'',' - ' '''',' - ' ''mkdir -p $FILE_DIRECTORY'',' - ' '''',' - ' ''cat <<\''EOF\'' > "$FILE"'',' - ' ''{'',' - ' '' "emr-execution-role-arns":'',' - ' '' {'',' - !Sub ' '' "${AWS::AccountId}": [ '',' - !Sub ' '' "${SalesAccessRole.Arn}",'',' - !Sub ' '' "${ElectronicsDataAccessRole.Arn}"'',' - ' '' ]'',' - ' '' }'',' - ' ''}'',' - ' ''EOF''' - '))' - '' - 'lcc_up2 = ''\n''.join((' - ' ''#!/bin/bash'',' - ' '''',' - ' ''set -eux'',' - ' '''',' - ' ''FILE_DIRECTORY="/home/sagemaker-user/.sagemaker-analytics-configuration-DO_NOT_DELETE"'',' - ' ''FILE_NAME="emr-configurations-DO_NOT_DELETE.json"'',' - ' ''FILE="$FILE_DIRECTORY/$FILE_NAME"'',' - ' '''',' - ' ''mkdir -p $FILE_DIRECTORY'',' - ' '''',' - ' ''cat <<\''EOF\'' > "$FILE"'',' - ' ''{'',' - ' '' "emr-execution-role-arns":'',' - ' '' {'',' - !Sub ' '' "${AWS::AccountId}": [ '',' - !Sub ' '' "${MarketingDataAccessRole.Arn}"''' - ' '' ]'',' - ' '' }'',' - ' ''}'',' - ' ''EOF''' - '))' - '' - !Sub 'lcc_name_up1 = "${AWS::StackName}-LCC-UP1"' - !Sub 'lcc_name_up2 = "${AWS::StackName}-LCC-UP2"' - !Sub 'up1 = "${StudioUserProfile1}"' - !Sub 'up2 = "${StudioUserProfile2}"' - '' - 'def get_lcc_base64_string(lcc_string):' - ' lcc_bytes = lcc_string.encode("ascii")' - ' base64_lcc_bytes = base64.b64encode(lcc_bytes)' - ' base64_lcc_string = base64_lcc_bytes.decode("ascii")' - ' return base64_lcc_string' - '' - '' - 'def apply_lcc_to_user_profile(base64_lcc_string, lcc_config_name, profile):' - ' response = client.create_studio_lifecycle_config(' - ' StudioLifecycleConfigName=lcc_config_name,' - ' StudioLifecycleConfigContent=base64_lcc_string,' - ' StudioLifecycleConfigAppType="JupyterServer",' - ' )' - '' - ' lcc_arn = response["StudioLifecycleConfigArn"]' - ' update_up = client.update_user_profile(' - ' DomainId=profile.split("|")[1],' - ' UserProfileName=profile.split("|")[0],' - ' UserSettings={' - ' "JupyterServerAppSettings": {' - ' "DefaultResourceSpec": {"LifecycleConfigArn": lcc_arn},' - ' "LifecycleConfigArns": [lcc_arn]' - ' }' - ' }' - ' )' - ' return update_up' - '' - '' - 'def lambda_handler(event, context):' - ' print(event)' - ' try:' - ' base64_lcc_up1_string = get_lcc_base64_string(lcc_up1)' - ' base64_lcc_up2_string = get_lcc_base64_string(lcc_up2)' - ' updated_up1 = apply_lcc_to_user_profile(' - ' base64_lcc_up1_string,' - ' lcc_name_up1,' - ' up1' - ' )' - ' print("Response User Profile LCC update for UP1")' - ' print(updated_up1)' - '' - ' updated_up2 = apply_lcc_to_user_profile(' - ' base64_lcc_up2_string,' - ' lcc_name_up2,' - ' up2' - ' )' - '' - ' print("Response User Profile LCC update for UP2")' - ' print(updated_up2)' - '' - ' response_value = 120' - ' response_data = {"Data": response_value}' - ' cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data)' - ' except Exception as e:' - ' if "RequestType" in event:' - ' if event["RequestType"] == "Delete":' - ' try:' - ' response1 = client.delete_studio_lifecycle_config(' - ' StudioLifecycleConfigName=lcc_name_up1' - ' )' - ' print(response1)' - ' response2 = client.delete_studio_lifecycle_config(' - ' StudioLifecycleConfigName=lcc_name_up2' - ' )' - ' print(response2)' - ' response_data = {}' - ' cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data)' - ' return' - ' except Exception as e2:' - ' print(e2)' - ' response_data = {"Data": str(e2)}' - ' cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data)' - ' return' - ' print(e)' - ' response_data = {"Data": str(e)}' - ' cfnresponse.send(event, context, cfnresponse.FAILED, response_data)' LifeCycleConfigLambdaInvoke: Type: AWS::CloudFormation::CustomResource DependsOn: LifeCycleConfigLambda Version: "1.0" Properties: ServiceToken: !GetAtt LifeCycleConfigLambda.Arn #################################################################################################################### #### Stack Outputs #################################################################################################################### Outputs: VPCandCIDR: Description: VPC ID and CIDR block Value: !Join - ' - ' - - !Ref VPC - !GetAtt - VPC - CidrBlock PublicSubnets: Description: All public subnet created Value: !Join - '' - - !Ref PublicSubnet1 PrivateSubnets: Description: All private subnet created Value: !Join - ', ' - - !Ref PrivateSubnet1 S3BucketName: Description: Bucket Name for Amazon S3 bucket Value: !Ref S3Bucket EMRMasterDNSName: Description: DNS Name of the EMR Master Node Value: !GetAtt - EMRCluster - MasterPublicDNS