AWSTemplateFormatVersion: 2010-09-09 Description: >- This template creates resources for a Databricks workspace in your AWS account. Contact Databricks at quickstart-support@databricks.com if you have any questions. (qs-1rrjcbj76) Metadata: cfn-lint: config: ignore_checks: - W9006 # temporary to get rid of warnings - W9001 AWS::CloudFormation::Interface: ParameterGroups: - Label: default: "Workspace configuration" Parameters: - AccountId - Username - Password - WorkspaceName - Label: default: "Required IAM role and S3 bucket configuration" Parameters: - IAMRole - BucketName - Label: default: (Optional) Existing data storage Parameters: - ExistingDataBucketName - Label: default: AWS PrivateLink Configuration Parameters: - PrivateLinkMode ParameterLabels: AccountId: default: Databricks account ID Username: default: Databricks account email address Password: default: Databricks account password WorkspaceName: default: Workspace name IAMRole: default: Cross-account IAM role name BucketName: default: Root S3 bucket name for the Databricks File System ExistingDataBucketName: default: Existing S3 bucket for data. Setting this to a non empty value will trigger the creation of an instance profile PrivateLinkMode: default: AWS PrivateLink mode Outputs: DBManagedVPCIAMRoleARN: Description: ARN of the cross-account IAM role Value: !GetAtt accessRoleDBManagedVPC.Arn DefaultInstanceProfileARN: Condition: CreateDefaultInstanceProfile Description: ARN of the default instance profile Value: !GetAtt DefaultInstanceProfile.Arn S3BucketName: Description: Name of the S3 root bucket Value: !Ref assetsS3Bucket CredentialsId: Description: Credential ID Value: !Ref createCredentials ExternalId: Description: Databricks external ID Value: !GetAtt createCredentials.ExternalId StorageConfigId: Description: Storage configuration ID Value: !Ref createStorageConfiguration WorkspaceURL: Description: URL of the workspace Value: !Sub https://${createWorkspace.DeploymentName}.cloud.databricks.com WorkspaceStatus: Description: Status of the requested workspace Value: !GetAtt createWorkspace.WorkspaceStatus WorkspaceStatusMessage: Description: Detailed status description of the requested workspace Value: !GetAtt createWorkspace.WorkspaceStatusMsg PricingTier: Description: The pricing tier of the workspace. See https://databricks.com/product/aws-pricing. Value: !GetAtt createWorkspace.PricingTier Parameters: AccountId: Description: "Find your account ID at https://accounts.cloud.databricks.com" AllowedPattern: '^[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}$' MinLength: '36' Type: String Default: aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee Username: Description: "Your case-sensitive Databricks account email address." AllowedPattern: '^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-.]+$' ConstraintDescription: Must be a valid email format. MinLength: '8' Type: String Password: Description: "This is the password you set for your Databricks account." MinLength: '8' NoEcho: 'true' Type: String WorkspaceName: Description: "Human-readable name for this workspace." MinLength: '1' MaxLength: '64' Type: String IAMRole: Description: "Specify a unique cross-account IAM role name. For naming rules, see https://docs.aws.amazon.com/IAM/latest/APIReference/API_CreateRole.html." AllowedPattern: '[\w+=,@-]+' Type: String MinLength: '1' MaxLength: '64' BucketName: Description: "Specify a unique name for the S3 bucket where Databricks will store metadata for your workspace. Use only alphanumeric characters. For naming rules, see https://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html." AllowedPattern: '(?=^.{3,63}$)(?!xn--)([a-z0-9](?:[a-z0-9-]*)[a-z0-9])$' MinLength: '3' MaxLength: '63' Type: String ConstraintDescription: The Quick Start bucket name can include numbers, lowercase letters, uppercase letters, and hyphens (-). It cannot start or end with a hyphen (-). ExistingDataBucketName: Description: The name of an existing S3 bucket with your data Type: String Default: '' PrivateLinkMode: Description: "Specify whether endpoints should be set up for the Databricks VPC endpoint services" Type: String AllowedValues: - 'Enabled' - 'Disabled' Default: 'Disabled' Conditions: # Checks if private link has been enabled IsPrivateLinkEnabled: !Equals [!Ref PrivateLinkMode, 'Enabled'] # Checks if the region supports 3 availability zones IsThirdAvailabilityZoneSupported: !Not [!Or [!Equals [!Ref AWS::Region, 'us-west-1'], !Equals [!Ref AWS::Region, 'sa-east-1']]] # Checks whether it should create a default instance profile CreateDefaultInstanceProfile: !Not [!Equals [!Ref ExistingDataBucketName, '']] Rules: # Check whether the current AWS region is supported SupportedRegion: Assertions: - Assert: !Contains - - ap-northeast-1 - ap-northeast-2 - ap-south-1 - ap-southeast-1 - ap-southeast-2 - ca-central-1 - eu-central-1 - eu-west-1 - eu-west-2 - us-east-1 - us-east-2 - us-west-1 - us-west-2 - !Ref AWS::Region AssertDescription: The current AWS region is not supported for E2 deployment. Switch to one of those listed under https://docs.databricks.com/administration-guide/cloud-configurations/aws/regions.html Mappings: DatabricksAddresses: us-east-1: "workspace": "com.amazonaws.vpce.us-east-1.vpce-svc-09143d1e626de2f04" "backend": "com.amazonaws.vpce.us-east-1.vpce-svc-00018a8c3ff62ffdf" us-east-2: "workspace": "com.amazonaws.vpce.us-east-2.vpce-svc-041dc2b4d7796b8d3" "backend": "com.amazonaws.vpce.us-east-2.vpce-svc-090a8fab0d73e39a6" us-west-1: "workspace": "UNSUPPORTED" "backend": "UNSUPPORTED" us-west-2: "workspace": "com.amazonaws.vpce.us-west-2.vpce-svc-0129f463fcfbc46c5" "backend": "com.amazonaws.vpce.us-west-2.vpce-svc-0158114c0c730c3bb" eu-west-1: "workspace": "com.amazonaws.vpce.eu-west-1.vpce-svc-0da6ebf1461278016" "backend": "com.amazonaws.vpce.eu-west-1.vpce-svc-09b4eb2bc775f4e8c" eu-west-2: "workspace": "com.amazonaws.vpce.eu-west-2.vpce-svc-01148c7cdc1d1326c" "backend": "com.amazonaws.vpce.eu-west-2.vpce-svc-05279412bf5353a45" eu-central-1: "workspace": "com.amazonaws.vpce.eu-central-1.vpce-svc-081f78503812597f7" "backend": "com.amazonaws.vpce.eu-central-1.vpce-svc-08e5dfca9572c85c4" ap-southeast-1: "workspace": "com.amazonaws.vpce.ap-southeast-1.vpce-svc-02535b257fc253ff4" "backend": "com.amazonaws.vpce.ap-southeast-1.vpce-svc-0557367c6fc1a0c5c" ap-southeast-2: "workspace": "com.amazonaws.vpce.ap-southeast-2.vpce-svc-0b87155ddd6954974" "backend": "com.amazonaws.vpce.ap-southeast-2.vpce-svc-0b4a72e8f825495f6" ap-northeast-1: "workspace": "com.amazonaws.vpce.ap-northeast-1.vpce-svc-02691fd610d24fd64" "backend": "com.amazonaws.vpce.ap-northeast-1.vpce-svc-02aa633bda3edbec0" ap-south-1: "workspace": "com.amazonaws.vpce.ap-south-1.vpce-svc-0dbfe5d9ee18d6411" "backend": "com.amazonaws.vpce.ap-south-1.vpce-svc-03fd4d9b61414f3de" ca-central-1: "workspace": "com.amazonaws.vpce.ca-central-1.vpce-svc-0205f197ec0e28d65" "backend": "com.amazonaws.vpce.ca-central-1.vpce-svc-0c4e25bdbcbfbb684" Resources: # The VPC DBSVpc: Type: AWS::EC2::VPC Properties: CidrBlock: 10.52.0.0/16 EnableDnsHostnames: true EnableDnsSupport: true Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksVPC # Internet gateway DBSVpcIgw: Type: AWS::EC2::InternetGateway DependsOn: DBSVpc Properties: Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksVpcIgw #... attached to the VPC DBSVpcIgwAttachment: Type: AWS::EC2::VPCGatewayAttachment Properties: InternetGatewayId: !GetAtt DBSVpcIgw.InternetGatewayId VpcId: !Ref DBSVpc # The subnet for the NAT Gateway DBSNatSubnet: Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.0.0/24 AvailabilityZone: !Select [0, !GetAZs ""] MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksNatSubnet # The subnets for the VPC Endpoints DBSEndpointSubnet1: Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.6.0/24 AvailabilityZone: !Select [0, !GetAZs ""] MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksEndpointSubnet1 DBSEndpointSubnet2: Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.7.0/24 AvailabilityZone: !Select [1, !GetAZs ""] MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksEndpointSubnet2 DBSEndpointSubnet3: Condition: IsThirdAvailabilityZoneSupported Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.8.0/24 AvailabilityZone: !Select [2, !GetAZs ""] MapPublicIpOnLaunch: true Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksEndpointSubnet3 # The private subnets for the Databricks clusters DBSClusterSubnet1: Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.160.0/19 AvailabilityZone: !Select [0, !GetAZs ""] MapPublicIpOnLaunch: false Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksClusterSubnet1 DBSClusterSubnet2: Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.192.0/19 AvailabilityZone: !Select [1, !GetAZs ""] MapPublicIpOnLaunch: false Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksClusterSubnet2 DBSClusterSubnet3: Condition: IsThirdAvailabilityZoneSupported Type: AWS::EC2::Subnet Properties: VpcId: !Ref DBSVpc CidrBlock: 10.52.224.0/19 AvailabilityZone: !Select [2, !GetAZs ""] MapPublicIpOnLaunch: false Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksClusterSubnet3 # The Elastic IP for the NAT Gateway ElasticIPForNat: Type: AWS::EC2::EIP Properties: Domain: vpc Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksNatElasticIP # The NAT gateway DBSNat: Type: AWS::EC2::NatGateway Properties: AllocationId: !GetAtt ElasticIPForNat.AllocationId ConnectivityType: public SubnetId: !Ref DBSNatSubnet Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksNat # The route table attached to the nat subnet DBSNatRouteTable: Type: AWS::EC2::RouteTable Properties: VpcId: !Ref DBSVpc Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksNatRouteTable # Routes to the internet RouteToInternetInNatRouteTable: DependsOn: DBSVpcIgwAttachment Type: AWS::EC2::Route Properties: RouteTableId: !Ref DBSNatRouteTable DestinationCidrBlock: 0.0.0.0/0 GatewayId: !Ref DBSVpcIgw # Associate the route table to the subnet NatSubnetRouteTableAssociation: DependsOn: RouteToInternetInNatRouteTable Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: !Ref DBSNatRouteTable SubnetId: !Ref DBSNatSubnet # The route table for the private subnets DBSPrivateRouteTable: Type: AWS::EC2::RouteTable Properties: VpcId: !Ref DBSVpc Tags: - Key: Name Value: !Sub ${AWS::StackName}-DatabricksPrivateRouteTable RouteToInternetInPrivateRouteTable: Type: AWS::EC2::Route Properties: RouteTableId: !Ref DBSPrivateRouteTable DestinationCidrBlock: 0.0.0.0/0 NatGatewayId: !Ref DBSNat PrivateSubnet1RouteTableAssociation: DependsOn: RouteToInternetInPrivateRouteTable Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: !Ref DBSPrivateRouteTable SubnetId: !Ref DBSClusterSubnet1 PrivateSubnet2RouteTableAssociation: DependsOn: RouteToInternetInPrivateRouteTable Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: !Ref DBSPrivateRouteTable SubnetId: !Ref DBSClusterSubnet2 PrivateSubnet3RouteTableAssociation: Condition: IsThirdAvailabilityZoneSupported DependsOn: RouteToInternetInPrivateRouteTable Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: !Ref DBSPrivateRouteTable SubnetId: !Ref DBSClusterSubnet3 # The S3 gateway endpoint S3GatewayEndpoint: Type: AWS::EC2::VPCEndpoint Properties: ServiceName: !Sub com.amazonaws.${AWS::Region}.s3 VpcEndpointType: Gateway VpcId: !Ref DBSVpc RouteTableIds: - !Ref DBSPrivateRouteTable # The security group for the workspace DBSWorkspaceSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupName: !Sub ${AWS::StackName}-DBSWorkspaceSG VpcId: !Ref DBSVpc GroupDescription: Allow access from within the same security group Tags: - Key: Name Value: !Sub ${AWS::StackName}-DBSWorkspaceSG # Allow all access from the same security group DBSWorkspaceSecurityGroupDefaultTcpIngress: Type: AWS::EC2::SecurityGroupIngress Properties: GroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId Description: Allow all tcp inbound access from the same security group SourceSecurityGroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId IpProtocol: tcp FromPort: 0 ToPort: 65535 DBSWorkspaceSecurityGroupDefaultUdpIngress: Type: AWS::EC2::SecurityGroupIngress Properties: GroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId Description: Allow all udp inbound access from the same security group SourceSecurityGroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId IpProtocol: udp FromPort: 0 ToPort: 65535 DBSWorkspaceSecurityGroupDefaultTcpEgress: Type: AWS::EC2::SecurityGroupEgress Properties: GroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId Description: Allow all tcp output access from the same security group DestinationSecurityGroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId IpProtocol: tcp FromPort: 0 ToPort: 65535 DBSWorkspaceSecurityGroupDefaultUdpEgress: Type: AWS::EC2::SecurityGroupEgress Properties: GroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId Description: Allow all udp output access from the same security group DestinationSecurityGroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId IpProtocol: udp FromPort: 0 ToPort: 65535 DBSWorkspaceSecurityGroupEgressForHttps: Type: AWS::EC2::SecurityGroupEgress Properties: GroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId Description: Allow accessing external services using HTTPS CidrIp: 0.0.0.0/0 IpProtocol: tcp FromPort: 443 ToPort: 443 DBSWorkspaceSecurityGroupEgressForMetastore: Type: AWS::EC2::SecurityGroupEgress Properties: GroupId: !GetAtt DBSWorkspaceSecurityGroup.GroupId Description: Allow accessing the Databricks metastore CidrIp: 0.0.0.0/0 IpProtocol: tcp FromPort: 3306 ToPort: 3306 # The STS VPC endpoint STSInterfaceEndpoint: Type: AWS::EC2::VPCEndpoint Metadata: cfn-lint: config: ignore_checks: - EIAMPolicyWildcardResource - EIAMAccountIDInPrincipal ignore_reasons: EIAMPolicyWildcardResource: "Need to manage databricks workspaces" EIAMAccountIDInPrincipal: "Hardcoded account ID needed for configuration: https://docs.databricks.com/administration-guide/account-settings/aws-accounts.html#step-2-create-a-cross-account-role-and-an-access-policy" Properties: ServiceName: !Sub com.amazonaws.${AWS::Region}.sts VpcEndpointType: Interface VpcId: !Ref DBSVpc PrivateDnsEnabled: true SecurityGroupIds: - !GetAtt DBSWorkspaceSecurityGroup.GroupId SubnetIds: - !Ref DBSEndpointSubnet1 - !Ref DBSEndpointSubnet2 - !If [IsThirdAvailabilityZoneSupported, !Ref DBSEndpointSubnet3, !Ref AWS::NoValue] PolicyDocument: Statement: - Effect: Allow Principal: "AWS": !Ref AWS::AccountId Action: - sts:AssumeRole - sts:GetAccessKeyInfo - sts:GetSessionToken - sts:DecodeAuthorizationMessage - sts:TagSession Resource: "*" - Effect: Allow Principal: "AWS": - arn:aws:iam::414351767826:user/databricks-datasets-readonly-user - "414351767826" Action: - sts:AssumeRole - sts:GetSessionToken - sts:TagSession Resource: "*" # The Kinesis endpoint KinesisInterfaceEndpoint: Type: AWS::EC2::VPCEndpoint Metadata: cfn-lint: config: ignore_checks: - EIAMAccountIDInPrincipal ignore_reasons: EIAMAccountIDInPrincipal: "Hardcoded account ID needed for configuration: https://docs.databricks.com/administration-guide/account-settings/aws-accounts.html#step-2-create-a-cross-account-role-and-an-access-policy" Properties: ServiceName: !Sub com.amazonaws.${AWS::Region}.kinesis-streams VpcEndpointType: Interface VpcId: !Ref DBSVpc PrivateDnsEnabled: true SecurityGroupIds: - !GetAtt DBSWorkspaceSecurityGroup.GroupId SubnetIds: - !Ref DBSEndpointSubnet1 - !Ref DBSEndpointSubnet2 - !If [IsThirdAvailabilityZoneSupported, !Ref DBSEndpointSubnet3, !Ref AWS::NoValue] PolicyDocument: Statement: - Effect: Allow Principal: "AWS": - "414351767826" - !Ref AWS::AccountId Action: - kinesis:PutRecord - kinesis:PutRecords - kinesis:DescribeStream Resource: !Sub arn:${AWS::Partition}:kinesis:${AWS::Region}:414351767826:stream/* # The Databricks REST API endpoint DBSRestApiInterfaceEndpoint: Condition: IsPrivateLinkEnabled Type: AWS::EC2::VPCEndpoint Properties: ServiceName: !FindInMap [DatabricksAddresses, !Ref AWS::Region, workspace] VpcEndpointType: Interface VpcId: !Ref DBSVpc PrivateDnsEnabled: true SecurityGroupIds: - !GetAtt DBSWorkspaceSecurityGroup.GroupId SubnetIds: - !Ref DBSEndpointSubnet1 - !Ref DBSEndpointSubnet2 - !If [IsThirdAvailabilityZoneSupported, !Ref DBSEndpointSubnet3, !Ref AWS::NoValue] # The Databricks REST API endpoint DBSRelayInterfaceEndpoint: Condition: IsPrivateLinkEnabled Type: AWS::EC2::VPCEndpoint Properties: ServiceName: !FindInMap [DatabricksAddresses, !Ref AWS::Region, backend] VpcEndpointType: Interface VpcId: !Ref DBSVpc PrivateDnsEnabled: true SecurityGroupIds: - !GetAtt DBSWorkspaceSecurityGroup.GroupId SubnetIds: - !Ref DBSEndpointSubnet1 - !Ref DBSEndpointSubnet2 - !If [IsThirdAvailabilityZoneSupported, !Ref DBSEndpointSubnet3, !Ref AWS::NoValue] WaitForVpc: Type: AWS::CloudFormation::WaitConditionHandle Metadata: VpcReady: - !Ref NatSubnetRouteTableAssociation - !Ref PrivateSubnet1RouteTableAssociation - !Ref PrivateSubnet2RouteTableAssociation - !If [IsThirdAvailabilityZoneSupported, !Ref PrivateSubnet3RouteTableAssociation, !Ref AWS::NoValue] # Data bucket role and instance profile DefaultInstanceProfileRole: Condition: CreateDefaultInstanceProfile Type: AWS::IAM::Role Metadata: cfn-lint: config: ignore_checks: - EIAMAccountIDInPrincipal ignore_reasons: EIAMAccountIDInPrincipal: "Hardcoded account ID needed for configuration: https://docs.databricks.com/administration-guide/account-settings/aws-accounts.html#step-2-create-a-cross-account-role-and-an-access-policy" Properties: RoleName: !Sub ${AWS::StackName}-DefaultInstanceProfileRole AssumeRolePolicyDocument: Statement: - Effect: Allow Principal: Service: - ec2.amazonaws.com Action: - sts:AssumeRole - Effect: Allow Principal: AWS: - arn:aws:iam::790110701330:role/serverless-customer-resource-role Action: - sts:AssumeRole Condition: StringEquals: sts:ExternalId: - !Sub "databricks-serverless-${createWorkspace}" Path: "/" Policies: - PolicyName: !Sub ${AWS::StackName}-DefaultInstanceProfilePolicy PolicyDocument: Statement: - Effect: Allow Action: - s3:ListBucket - s3:PutObject - s3:GetObject - s3:DeleteObject - s3:PutObjectAcl Resource: - !Sub arn:${AWS::Partition}:s3:::${ExistingDataBucketName}/* - !Sub arn:${AWS::Partition}:s3:::${ExistingDataBucketName} DefaultInstanceProfile: Condition: CreateDefaultInstanceProfile Type: AWS::IAM::InstanceProfile Properties: InstanceProfileName: !Sub ${AWS::StackName}-DefaultInstanceProfile Path: "/" Roles: - Ref: DefaultInstanceProfileRole # Cross-account access role for the Databricks Control Plane accessRoleDBManagedVPC: Type: 'AWS::IAM::Role' Metadata: cfn-lint: config: ignore_checks: - EIAMPolicyWildcardResource - EIAMAccountIDInPrincipal ignore_reasons: EIAMPolicyWildcardResource: "Need to manage databricks workspaces" EIAMAccountIDInPrincipal: "Hardcoded account ID needed for configuration: https://docs.databricks.com/administration-guide/account-settings/aws-accounts.html#step-2-create-a-cross-account-role-and-an-access-policy" Properties: RoleName: !Ref IAMRole AssumeRolePolicyDocument: Statement: - Action: 'sts:AssumeRole' Condition: "StringEquals": "sts:ExternalId": !Ref AccountId Effect: Allow Principal: "AWS": "414351767826" Version: '2012-10-17' Path: / Policies: - PolicyDocument: Version: "2012-10-17" Statement: - Sid: NonResourceBasedPermissions Effect: Allow Action: - ec2:CancelSpotInstanceRequests - ec2:DescribeAvailabilityZones - ec2:DescribeIamInstanceProfileAssociations - ec2:DescribeInstanceStatus - ec2:DescribeInstances - ec2:DescribeInternetGateways - ec2:DescribeNatGateways - ec2:DescribeNetworkAcls - ec2:DescribePrefixLists - ec2:DescribeReservedInstancesOfferings - ec2:DescribeRouteTables - ec2:DescribeSecurityGroups - ec2:DescribeSpotInstanceRequests - ec2:DescribeSpotPriceHistory - ec2:DescribeSubnets - ec2:DescribeVolumes - ec2:DescribeVpcAttribute - ec2:DescribeVpcs - ec2:CreateTags - ec2:DeleteTags - ec2:RequestSpotInstances - ec2:RunInstances Resource: '*' - Sid: InstancePoolsSupport Effect: Allow Action: - ec2:AssociateIamInstanceProfile - ec2:DisassociateIamInstanceProfile - ec2:ReplaceIamInstanceProfileAssociation Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/* Condition: "StringEquals": "ec2:ResourceTag/Vendor": "Databricks" - Sid: EC2TerminateInstancesTag Effect: Allow Action: ec2:TerminateInstances Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/* Condition: "StringEquals": "ec2:ResourceTag/Vendor": "Databricks" - Sid: EC2AttachDetachVolumeTag Effect: Allow Action: - ec2:AttachVolume - ec2:DetachVolume Resource: - !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/* - !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:volume/* Condition: "StringEquals": "ec2:ResourceTag/Vendor": "Databricks" - Sid: EC2CreateVolumeByTag Effect: Allow Action: ec2:CreateVolume Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:volume/* Condition: "StringEquals": "ec2:ResourceTag/Vendor": "Databricks" - Sid: EC2DeleteVolumeByTag Effect: Allow Action: ec2:DeleteVolume Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:volume/* Condition: "StringEquals": "ec2:ResourceTag/Vendor": "Databricks" - Sid: AllowCreateServiceLinkedRole Effect: Allow Action: - iam:CreateServiceLinkedRole - iam:PutRolePolicy Resource: !Sub arn:${AWS::Partition}:iam::*:role/aws-service-role/spot.amazonaws.com/AWSServiceRoleForEC2Spot Condition: "StringLike": "iam:AWSServiceName": spot.amazonaws.com - !If - CreateDefaultInstanceProfile - Sid: AllowPassRoleForDefaultInstanceProfile Effect: Allow Action: iam:PassRole Resource: !Sub arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${AWS::StackName}-DefaultInstanceProfileRole - !Ref AWS::NoValue PolicyName: databricks-cross-account-iam-role-policy Tags: - Key: Name Value: 'databricks-trial-quickstart-cloud-formation' # S3 root bucket requirements assetsS3Bucket: Type: AWS::S3::Bucket Properties: BucketName: !Ref BucketName PublicAccessBlockConfiguration: BlockPublicAcls : true BlockPublicPolicy : true IgnorePublicAcls : true RestrictPublicBuckets : true bucketPolicy: Type: 'AWS::S3::BucketPolicy' Metadata: cfn-lint: config: ignore_checks: - EIAMAccountIDInPrincipal ignore_reasons: EIAMAccountIDInPrincipal: "Hardcoded account ID needed for configuration: https://docs.databricks.com/administration-guide/account-settings/aws-accounts.html#step-2-create-a-cross-account-role-and-an-access-policy" Properties: PolicyDocument: Id: MyPolicy Version: 2012-10-17 Statement: - Sid: Grant Databricks Access Effect: Allow Principal: AWS: arn:aws:iam::414351767826:root Action: - s3:GetObject - s3:GetObjectVersion - s3:PutObject - s3:DeleteObject - s3:ListBucket - s3:GetBucketLocation Resource: - !Sub arn:${AWS::Partition}:s3:::${assetsS3Bucket}/* - !Sub arn:${AWS::Partition}:s3:::${assetsS3Bucket} Bucket: !Ref assetsS3Bucket # Workspace credentials createCredentials: Type: Custom::CreateCredentials Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_CREDENTIALS accountId: !Ref AccountId username: !Ref Username password: !Ref Password credentials_name: !Sub ${WorkspaceName}-credentials role_arn: !GetAtt accessRoleDBManagedVPC.Arn user_agent: databricks-CloudFormation-Trial-provider # Workspace storage configuration createStorageConfiguration: Type: Custom::CreateStorageConfigurations Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_STORAGE_CONFIGURATIONS accountId: !Ref AccountId username: !Ref Username password: !Ref Password storage_config_name: !Sub ${WorkspaceName}-storage s3bucket_name: !Ref assetsS3Bucket user_agent: databricks-CloudFormation-Trial-provider WorkspaceVpcEnpoint: Condition: IsPrivateLinkEnabled Type: Custom::WorkspaceVpcEnpoint Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_VPC_ENDPOINT accountId: !Ref AccountId username: !Ref Username password: !Ref Password aws_region: !Ref AWS::Region endpoint_name: !Sub ${AWS::StackName}_workspaceVpcEndpoint vpc_endpoint_id: !Ref DBSRestApiInterfaceEndpoint user_agent: databricks-CloudFormation-Trial-provider BackendVpcEnpoint: Condition: IsPrivateLinkEnabled Type: Custom::BackendVpcEnpoint Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_VPC_ENDPOINT accountId: !Ref AccountId username: !Ref Username password: !Ref Password aws_region: !Ref AWS::Region endpoint_name: !Sub ${AWS::StackName}_workspaceVpcEndpoint vpc_endpoint_id: !Ref DBSRelayInterfaceEndpoint user_agent: databricks-CloudFormation-Trial-provider # Databricks API for network configuration createNetworks: Type: Custom::createNetworks DependsOn: WaitForVpc Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_NETWORKS accountId: !Ref AccountId username: !Ref Username password: !Ref Password network_name: !Sub ${AWS::StackName}-network vpc_id: !Ref DBSVpc subnet_ids: !If - IsThirdAvailabilityZoneSupported - !Sub ${DBSClusterSubnet1}, ${DBSClusterSubnet2}, ${DBSClusterSubnet3} - !Sub ${DBSClusterSubnet1}, ${DBSClusterSubnet2} security_group_ids: !Ref DBSWorkspaceSecurityGroup relay_access_endpoint_id: !If [IsPrivateLinkEnabled, !Ref BackendVpcEnpoint, !Ref AWS::NoValue] rest_access_endpoint_id: !If [IsPrivateLinkEnabled, !Ref WorkspaceVpcEnpoint, !Ref AWS::NoValue] user_agent: databricks-CloudFormation-Trial-provider PrivateAccessConfiguration: Condition: IsPrivateLinkEnabled Type: Custom::PrivateAccessConfiguration Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_PRIVATE_ACCESS_CONFIGURATION accountId: !Ref AccountId username: !Ref Username password: !Ref Password aws_region: !Ref AWS::Region private_access_settings_name: !Sub ${AWS::StackName}_privateAccessSettings public_access_enabled: true allowed_vpc_endpoint_ids: !Ref WorkspaceVpcEnpoint user_agent: databricks-CloudFormation-Trial-provider # Databricks Workspace createWorkspace: Type: Custom::CreateWorkspace Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_WORKSPACES accountId: !Ref AccountId username: !Ref Username password: !Ref Password workspace_name: !Ref WorkspaceName aws_region: !Ref AWS::Region credentials_id: !Ref createCredentials storage_config_id: !Ref createStorageConfiguration network_id: !Ref createNetworks private_access_settings_id: !If [IsPrivateLinkEnabled, !Ref PrivateAccessConfiguration, !Ref AWS::NoValue] user_agent: databricks-CloudFormation-Trial-provider # Registers the instance profile registerInstanceProfile: Condition: CreateDefaultInstanceProfile Type: Custom::RegisterInstanceProfile Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: REGISTER_INSTANCE_PROFILE accountId: !Ref AccountId username: !Ref Username password: !Ref Password workspace_id: !Ref createWorkspace instance_profile_arn: !GetAtt DefaultInstanceProfile.Arn user_agent: databricks-CloudFormation-Trial-provider # Creates a starter cluster createStarterCluster: Type: Custom::createStarterCluster Properties: ServiceToken: !GetAtt databricksApiFunction.Arn action: CREATE_STARTER_CLUSTER accountId: !Ref AccountId username: !Ref Username password: !Ref Password workspace_deployment_name: !GetAtt createWorkspace.DeploymentName instance_profile_arn: !If [CreateDefaultInstanceProfile, !Ref registerInstanceProfile, !Ref AWS::NoValue] user_agent: databricks-CloudFormation-Trial-provider # The Lambda for the custom resources databricksApiFunction: DependsOn: CopyZips Type: AWS::Lambda::Function Properties: Description: Databricks Account API Handler: rest_client.handler Runtime: python3.8 Role: !GetAtt 'functionRole.Arn' Timeout: 900 Code: S3Bucket: !Ref LambdaZipsBucket S3Key: quickstart-databricks-unified-data-analytics-platform/functions/packages/lambda.zip # IAM Role for lambda function execution functionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole - !Sub arn:${AWS::Partition}:iam::aws:policy/IAMFullAccess Tags: - Key: Name Value: 'databricks-CloudFormation-Trial-provider' # Resources to stage lambda.zip file LambdaZipsBucket: Type: AWS::S3::Bucket CopyZips: Type: Custom::CopyZips Properties: ServiceToken: !GetAtt CopyZipsFunction.Arn DestBucket: !Ref LambdaZipsBucket SourceBucket: aws-quickstart Prefix: 'quickstart-databricks-unified-data-analytics-platform/' Objects: - functions/packages/lambda.zip CopyZipsRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: lambda.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole Path: / Policies: - PolicyName: lambda-copier PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - s3:GetObject Resource: - !Sub 'arn:${AWS::Partition}:s3:::aws-quickstart/quickstart-databricks-unified-data-analytics-platform/*' - Effect: Allow Action: - s3:PutObject - s3:DeleteObject Resource: - !Sub 'arn:${AWS::Partition}:s3:::${LambdaZipsBucket}/quickstart-databricks-unified-data-analytics-platform/*' Tags: - Key: Name Value: 'databricks-CloudFormation-Trial-provider' CopyZipsFunction: Type: AWS::Lambda::Function Properties: Description: Copies objects from a source S3 bucket to a destination Handler: index.handler Runtime: python3.8 Role: !GetAtt 'CopyZipsRole.Arn' Timeout: 240 Code: ZipFile: | import json import logging import threading import boto3 import cfnresponse def copy_objects(source_bucket, dest_bucket, prefix, objects): s3 = boto3.client('s3') for o in objects: key = prefix + o copy_source = { 'Bucket': source_bucket, 'Key': key } print('copy_source: %s' % copy_source) print('dest_bucket = %s'%dest_bucket) print('key = %s' %key) s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, Key=key) def delete_objects(bucket, prefix, objects): s3 = boto3.client('s3') objects = {'Objects': [{'Key': prefix + o} for o in objects]} s3.delete_objects(Bucket=bucket, Delete=objects) def timeout(event, context): logging.error('Execution is about to time out, sending failure response to CloudFormation') cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) def handler(event, context): # make sure we send a failure to CloudFormation if the function # is going to timeout timer = threading.Timer((context.get_remaining_time_in_millis() / 1000.00) - 0.5, timeout, args=[event, context]) timer.start() print('Received event: %s' % json.dumps(event)) status = cfnresponse.SUCCESS try: source_bucket = event['ResourceProperties']['SourceBucket'] dest_bucket = event['ResourceProperties']['DestBucket'] prefix = event['ResourceProperties']['Prefix'] objects = event['ResourceProperties']['Objects'] if event['RequestType'] == 'Delete': delete_objects(dest_bucket, prefix, objects) else: copy_objects(source_bucket, dest_bucket, prefix, objects) except Exception as e: logging.error('Exception: %s' % e, exc_info=True) status = cfnresponse.FAILED finally: timer.cancel() cfnresponse.send(event, context, status, {}, None)