AWSTemplateFormatVersion: 2010-09-09 Description: CFN template for Amazon Workshop on End to End 3D Machine Learning Mappings: CidrMappings: vpc: CIDR: 10.0.0.0/16 private-subnet: CIDR: 10.0.2.0/24 Metadata: Author: Description: Deep Engines Team License: Description: >- Copyright 2021 Amazon.com, Inc. and its affiliates. All Rights Reserved. Outputs: awsRegionId: Description: The AWS Region ID your template was launched in Value: Ref: 'AWS::Region' privateSubnet: Description: Private subnet Value: Ref: privateSubnet vpc: Description: The VPC Value: Ref: vpc vpcSecurityGroup: Description: vpc security group Value: Ref: instanceSecurityGroup instanceRole: Description: Instance Role Value: Ref: instanceRole FileSystemId: Description: FSx Lustre FS ID Value: !Ref S3LinkedLustreFS S3Endpoint: Description: VPC Endpoint for S3 Value: !Ref S3Endpoint Parameters: sourceCidr: Default: 0.0.0.0/0 Description: Optional - CIDR/IP range for access Type: String NotebookName: Type: String Default: ThreeDEndToEnd-NB Description: Enter the name of the SageMaker notebook instance. VolumeSize: Type: Number Default: 100 MinValue: 5 MaxValue: 16384 ConstraintDescription: Must be an integer between 5 (GB) and 16384 (16 TB). Description: Enter the size of the EBS volume in GB. NotebookInstanceType: Type: String Default: ml.g4dn.2xlarge Description: Enter the SageMaker notebook instance type. PublicS3Bucket: Type: String Default: 's3://aev-autonomous-driving-dataset/camera_lidar_semantic_bboxes.tar' Description: Public S3 bucket for coco dataset ShouldCreateBucketInputParameter: Type: String AllowedValues: ["True", "False"] Default: "True" Description: Select True if you want to create S3 bucket, else select False to keep the existing default bucket Conditions: CreateS3Bucket: !Equals [!Ref ShouldCreateBucketInputParameter, "True"] Resources: attachGateway: DependsOn: - vpc - internetGateway Properties: InternetGatewayId: Ref: internetGateway VpcId: Ref: vpc Type: 'AWS::EC2::VPCGatewayAttachment' S3Bucket: Type: 'AWS::S3::Bucket' Condition: CreateS3Bucket Properties: BucketName: !Sub 'sagemaker-threedee-${AWS::Region}-${AWS::AccountId}' instanceRole: Description: Instance Role for Sagemaker and FsxLustre Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" Principal: Service: - "sagemaker.amazonaws.com" - "fsx.amazonaws.com" - "s3.data-source.lustre.fsx.amazonaws.com" Action: - "sts:AssumeRole" Path: "/" ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' - 'arn:aws:iam::aws:policy/AmazonS3FullAccess' - 'arn:aws:iam::aws:policy/IAMReadOnlyAccess' - 'arn:aws:iam::aws:policy/AmazonFSxFullAccess' - 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess' - 'arn:aws:iam::aws:policy/AWSCodeCommitPowerUser' - 'arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess' RoleName: "e2e3d-sm" Type: "AWS::IAM::Role" instanceSecurityGroup: DependsOn: - vpc Properties: GroupDescription: Allow 988 for Lustre SecurityGroupIngress: - CidrIp: Ref: sourceCidr FromPort: 988 IpProtocol: tcp ToPort: 988 - CidrIp: 0.0.0.0/0 FromPort: 1021 IpProtocol: tcp ToPort: 1021 - CidrIp: 0.0.0.0/0 FromPort: 1022 IpProtocol: tcp ToPort: 1022 - CidrIp: 0.0.0.0/0 FromPort: 1023 IpProtocol: tcp ToPort: 1023 - CidrIp: 0.0.0.0/0 FromPort: 22 IpProtocol: tcp ToPort: 22 - CidrIp: 0.0.0.0/0 FromPort: 443 IpProtocol: tcp ToPort: 443 - CidrIp: 0.0.0.0/0 FromPort: 80 IpProtocol: tcp ToPort: 80 - CidrIp: 0.0.0.0/0 FromPort: 7592 IpProtocol: tcp ToPort: 7592 - CidrIp: 0.0.0.0/0 FromPort: -1 IpProtocol: -1 ToPort: -1 SecurityGroupEgress: - CidrIp: 0.0.0.0/0 FromPort: 988 IpProtocol: tcp ToPort: 988 - CidrIp: 0.0.0.0/0 FromPort: 1021 IpProtocol: tcp ToPort: 1021 - CidrIp: 0.0.0.0/0 FromPort: 1022 IpProtocol: tcp ToPort: 1022 - CidrIp: 0.0.0.0/0 FromPort: 1023 IpProtocol: tcp ToPort: 1023 - CidrIp: 0.0.0.0/0 FromPort: 22 IpProtocol: tcp ToPort: 22 - CidrIp: 0.0.0.0/0 FromPort: 443 IpProtocol: tcp ToPort: 443 - CidrIp: 0.0.0.0/0 FromPort: 80 IpProtocol: tcp ToPort: 80 - CidrIp: 0.0.0.0/0 FromPort: 7592 IpProtocol: tcp ToPort: 7592 - CidrIp: 0.0.0.0/0 FromPort: -1 IpProtocol: -1 ToPort: -1 VpcId: Ref: vpc Type: 'AWS::EC2::SecurityGroup' privateSubnet: DependsOn: attachGateway Properties: AvailabilityZone: Fn::Select: - 0 - Fn::GetAZs: Ref: AWS::Region CidrBlock: Fn::FindInMap: - CidrMappings - private-subnet - CIDR MapPublicIpOnLaunch: true Tags: - Key: Name Value: private subnet VpcId: Ref: vpc Type: AWS::EC2::Subnet internetGateway: DependsOn: - vpc Type: 'AWS::EC2::InternetGateway' privateRouteTable: DependsOn: - vpc Properties: Tags: - Key: Name Value: private route table VpcId: Ref: vpc Type: 'AWS::EC2::RouteTable' privateSubnet1RouteTableAssociation: DependsOn: - privateRouteTable - privateSubnet Properties: RouteTableId: Ref: privateRouteTable SubnetId: Ref: privateSubnet Type: AWS::EC2::SubnetRouteTableAssociation publicRoute: DependsOn: - privateRouteTable - internetGateway - attachGateway Properties: DestinationCidrBlock: 0.0.0.0/0 GatewayId: Ref: internetGateway RouteTableId: Ref: privateRouteTable Type: 'AWS::EC2::Route' vpc: Properties: CidrBlock: 'Fn::FindInMap': - CidrMappings - vpc - CIDR EnableDnsHostnames: true EnableDnsSupport: true Tags: - Key: Name Value: VPC Type: 'AWS::EC2::VPC' NotebookLifecycle: Type: 'AWS::SageMaker::NotebookInstanceLifecycleConfig' Properties: NotebookInstanceLifecycleConfigName: jupyter-lifecycle-config OnStart: - Content: 'Fn::Base64': !Sub | #!/bin/bash set -e # echo "-----Sync of dataset from S3 public bucket----" ##sync # aws s3 sync ${PublicS3Bucket}/train s3://sagemaker-${AWS::Region}-${AWS::AccountId}/coco_dataset/train/ & # aws s3 sync ${PublicS3Bucket}/model s3://sagemaker-${AWS::Region}-${AWS::AccountId}/coco_dataset/model/ & # OVERVIEW # This script mounts a FSx for Lustre file system to the Notebook Instance at the /fsx directory based off # the DNS and Mount name parameters. # # This script assumes the following: # 1. There's an FSx for Lustre file system created and running # 2. The FSx for Lustre file system is accessible from the Notebook Instance # - The Notebook Instance has to be created on the same VPN as the FSx for Lustre file system # - The subnets and security groups have to be properly set up # 3. Set the FSX_DNS_NAME parameter below to the DNS name of the FSx for Lustre file system. # 4. Set the FSX_MOUNT_NAME parameter below to the Mount name of the FSx for Lustre file system. # PARAMETERS rm -rf /home/ec2-user/anaconda3/envs/tensorflow_p37 rm -rf /home/ec2-user/anaconda3/envs/mxnet_p36 rm -rf /home/ec2-user/anaconda3/envs/amazonei_tensorflow_p36 rm -rf /home/ec2-user/anaconda3/envs/amazonei_mxnet_p36 rm -rf /home/ec2-user/anaconda3/envs/amazonei_tensorflow2_p36 rm -rf /home/ec2-user/anaconda3/envs/tensorflow2_p37 FSX_DNS_NAME=${S3LinkedLustreFS}.fsx.${AWS::Region}.amazonaws.com FSX_MOUNT_NAME=${S3LinkedLustreFS.LustreMountName} # First, we need to install the lustre-client libraries sudo amazon-linux-extras install lustre2.10 sudo yum install -y lustre-client # Now we can create the mount point and mount the file system sudo mkdir -p /home/ec2-user/SageMaker/fsx sudo mount -t lustre -o noatime,flock $FSX_DNS_NAME@tcp:/$FSX_MOUNT_NAME /home/ec2-user/SageMaker/fsx # Let's make sure we have the appropriate access to the directory sudo chmod go+rw /home/ec2-user/SageMaker/fsx NotebookInstance: DependsOn: - S3Bucket - S3LinkedLustreFS Type: 'AWS::SageMaker::NotebookInstance' Properties: InstanceType: !Ref NotebookInstanceType NotebookInstanceName: !Ref NotebookName PlatformIdentifier: notebook-al2-v1 SubnetId: !Ref privateSubnet SecurityGroupIds: - !Ref instanceSecurityGroup RoleArn: !GetAtt instanceRole.Arn VolumeSizeInGB: !Ref VolumeSize LifecycleConfigName: !GetAtt NotebookLifecycle.NotebookInstanceLifecycleConfigName S3LinkedLustreFS: DependsOn: - instanceSecurityGroup - privateSubnet - S3Bucket Type: AWS::FSx::FileSystem Properties: FileSystemType: "LUSTRE" StorageCapacity: 1200 SubnetIds: [!Ref privateSubnet] SecurityGroupIds: [!Ref instanceSecurityGroup] Tags: - Key: "Name" Value: "e2e3d-Workshop" LustreConfiguration: AutoImportPolicy: "NEW" CopyTagsToBackups: true DeploymentType: "PERSISTENT_1" PerUnitStorageThroughput: 200 DataCompressionType: "NONE" ImportPath: !Sub s3://sagemaker-threedee-${AWS::Region}-${AWS::AccountId} ExportPath: !Sub s3://sagemaker-threedee-${AWS::Region}-${AWS::AccountId}/. S3Endpoint: DependsOn: - vpc - privateSubnet - privateRouteTable Type: 'AWS::EC2::VPCEndpoint' Properties: PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: '*' Action: '*' Resource: '*' RouteTableIds: - !Ref privateRouteTable ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' VpcId: !Ref vpc