--- Description: | AWS HPC Blog - Scalable and Cost-Effective Batch Processing with AWS Batch and Amazon FSx Parameters: EcsOptimizedAmazonLinux2RecommendedImageId: Type: AWS::SSM::Parameter::Value Default: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id Resources: Vpc: Type: AWS::EC2::VPC Properties: CidrBlock: 10.0.0.0/16 EnableDnsHostnames: true EnableDnsSupport: true Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc VpcPublicSubnet: Type: AWS::EC2::Subnet Properties: CidrBlock: 10.0.0.0/17 VpcId: !Ref Vpc AvailabilityZone: !Select [0, !GetAZs ""] Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc/PublicSubnet VpcPublicSubnetRouteTable: Type: AWS::EC2::RouteTable Properties: VpcId: !Ref Vpc Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc/PublicSubnet VpcPublicSubnetRouteTableAssociation: Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: !Ref VpcPublicSubnetRouteTable SubnetId: !Ref VpcPublicSubnet VpcPublicSubnetDefaultRoute: Type: AWS::EC2::Route DependsOn: - VpcGatewayAttachment Properties: RouteTableId: !Ref VpcPublicSubnetRouteTable DestinationCidrBlock: 0.0.0.0/0 GatewayId: !Ref VpcIGW VpcPublicSubnetEIP: Type: AWS::EC2::EIP Properties: Domain: vpc Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc/PublicSubnet VpcPublicSubnetNATGateway: Type: AWS::EC2::NatGateway Properties: SubnetId: !Ref VpcPublicSubnet AllocationId: !GetAtt VpcPublicSubnetEIP.AllocationId Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc/PublicSubnet VpcPrivateSubnet: Type: AWS::EC2::Subnet Properties: CidrBlock: 10.0.128.0/17 VpcId: !Ref Vpc AvailabilityZone: !Select [0, !GetAZs ""] MapPublicIpOnLaunch: false Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc/PrivateSubnet VpcPrivateSubnetRouteTable: Type: AWS::EC2::RouteTable Properties: VpcId: !Ref Vpc Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc/PrivateSubnet VpcPrivateSubnetRouteTableAssociation: Type: AWS::EC2::SubnetRouteTableAssociation Properties: RouteTableId: !Ref VpcPrivateSubnetRouteTable SubnetId: !Ref VpcPrivateSubnet VpcPrivateSubnetDefaultRoute: Type: AWS::EC2::Route Properties: RouteTableId: !Ref VpcPrivateSubnetRouteTable DestinationCidrBlock: 0.0.0.0/0 NatGatewayId: !Ref VpcPublicSubnetNATGateway VpcIGW: Type: AWS::EC2::InternetGateway Properties: Tags: - Key: Name Value: aws-hpc-blog-batch-processing/vpc VpcGatewayAttachment: Type: AWS::EC2::VPCGatewayAttachment Properties: VpcId: !Ref Vpc InternetGatewayId: !Ref VpcIGW Bucket: Type: AWS::S3::Bucket FSxFileSystemSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: aws-hpc-blog-batch-processing/fsx-filesystem-security-group SecurityGroupEgress: - CidrIp: 0.0.0.0/0 Description: Allow all outbound traffic by default IpProtocol: "-1" VpcId: !Ref Vpc Tags: - Key: Name Value: aws-hpc-blog-batch-processing/fsx-filesystem-security-group FSxFileSystemSecurityGroupFromSelf: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: tcp Description: allow from self:988-1023 FromPort: 988 ToPort: 1023 GroupId: !GetAtt FSxFileSystemSecurityGroup.GroupId SourceSecurityGroupId: !GetAtt FSxFileSystemSecurityGroup.GroupId FSxFileSystemSecurityGroupFromComputeEnvironment: Type: AWS::EC2::SecurityGroupIngress Properties: IpProtocol: tcp Description: allow from ComputeEnvironment:988-1023 FromPort: 988 ToPort: 1023 GroupId: !GetAtt FSxFileSystemSecurityGroup.GroupId SourceSecurityGroupId: !GetAtt ComputeEnvironmentSecurityGroup.GroupId FSxFileSystem: Type: AWS::FSx::FileSystem Properties: FileSystemType: LUSTRE LustreConfiguration: AutoImportPolicy: NEW DeploymentType: SCRATCH_2 ImportPath: !Sub "s3://${Bucket}/input/" ExportPath: !Sub "s3://${Bucket}/" StorageCapacity: 1200 SubnetIds: - !Ref VpcPrivateSubnet SecurityGroupIds: - !GetAtt FSxFileSystemSecurityGroup.GroupId Tags: - Key: Name Value: aws-hpc-blog-batch-processing/fsx-filesystem LaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateName: !Sub "${AWS::StackName}-launch-template" LaunchTemplateData: UserData: Fn::Base64: !Sub | MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="==MYBOUNDARY==" --==MYBOUNDARY== Content-Type: text/cloud-config; charset="us-ascii" runcmd: - amazon-linux-extras install -y lustre2.10 - mkdir -p /fsx - mount -t lustre ${FSxFileSystem}.fsx.${AWS::Region}.amazonaws.com@tcp:/${FSxFileSystem.LustreMountName} /fsx --==MYBOUNDARY==-- ComputeEnvironmentSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: aws-hpc-blog-batch-processing/compute-environment-security-group SecurityGroupEgress: - CidrIp: 0.0.0.0/0 Description: Allow all outbound traffic by default IpProtocol: "-1" VpcId: !Ref Vpc Tags: - Key: Name Value: aws-hpc-blog-batch-processing/compute-environment-security-group ComputeEnvironmentEcsInstanceRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: "2012-10-17" Statement: - Action: sts:AssumeRole Effect: Allow Principal: Service: !Sub "ec2.${AWS::URLSuffix}" ManagedPolicyArns: - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" Tags: - Key: Name Value: aws-hpc-blog-batch-processing/compute-environment-ecs-instance-role ComputeEnvironmentInstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Roles: - !Ref ComputeEnvironmentEcsInstanceRole ComputeEnvironmentServiceRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Statement: - Action: sts:AssumeRole Effect: Allow Principal: Service: batch.amazonaws.com Version: "2012-10-17" ManagedPolicyArns: - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSBatchServiceRole" Tags: - Key: Name Value: aws-hpc-blog-batch-processing/compute-environment-service-role ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment DependsOn: - LaunchTemplate Properties: Type: MANAGED State: ENABLED ServiceRole: !GetAtt ComputeEnvironmentServiceRole.Arn ComputeResources: Type: SPOT AllocationStrategy: SPOT_CAPACITY_OPTIMIZED BidPercentage: 100 MinvCpus: 0 MaxvCpus: 320 DesiredvCpus: 0 InstanceTypes: - optimal ImageId: !Ref EcsOptimizedAmazonLinux2RecommendedImageId InstanceRole: !GetAtt ComputeEnvironmentInstanceProfile.Arn LaunchTemplate: LaunchTemplateName: !Sub "${AWS::StackName}-launch-template" Version: "$Latest" SecurityGroupIds: - !GetAtt ComputeEnvironmentSecurityGroup.GroupId SpotIamFleetRole: !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/aws-service-role/spotfleet.amazonaws.com/AWSServiceRoleForEC2SpotFleet" Subnets: - !Ref VpcPrivateSubnet JobQueue: Type: AWS::Batch::JobQueue Properties: JobQueueName: aws-hpc-blog-batch-processing-job-queue Priority: 1 State: ENABLED ComputeEnvironmentOrder: - ComputeEnvironment: !Ref ComputeEnvironment Order: 1 Repository: Type: AWS::ECR::Repository Properties: RepositoryName: aws-hpc-blog-batch-processing JobDefinition: Type: AWS::Batch::JobDefinition Properties: Type: container JobDefinitionName: aws-hpc-blog-batch-processing-job-definition PlatformCapabilities: - EC2 RetryStrategy: Attempts: 3 ContainerProperties: Image: !Join - "" - - !Select [4, !Split [":", !GetAtt Repository.Arn]] - ".dkr.ecr." - !Select [3, !Split [":", !GetAtt Repository.Arn]] - !Sub ".${AWS::URLSuffix}/${Repository}:latest" Vcpus: 2 Memory: 7168 Environment: - Name: INPUT_DIR Value: /fsx/input - Name: OUTPUT_DIR Value: /fsx/output Volumes: - Name: fsx Host: SourcePath: /fsx MountPoints: - SourceVolume: fsx ContainerPath: /fsx ReadOnly: false LogConfiguration: LogDriver: awslogs Privileged: false ReadonlyRootFilesystem: false Outputs: BucketName: Value: !Ref Bucket RepositoryUri: Value: !Join - "" - - !Select [4, !Split [":", !GetAtt Repository.Arn]] - ".dkr.ecr." - !Select [3, !Split [":", !GetAtt Repository.Arn]] - !Sub ".${AWS::URLSuffix}/${Repository}" FSxFileSystemId: Value: !Ref FSxFileSystem