Region: us-west-2
Image:
  Os: alinux2
  CustomAmi: ami-04912fde9e80170b8
HeadNode:
  InstanceType: c5.4xlarge
  Ssh:
    KeyName: pcluster-key
  Iam:
    ## (Un)comment S3 Access and provide one of your S3 buckets name
    ## https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#HeadNode-v3-Iam
    S3Access:
      - BucketName: pcluster-ml-workshop
        EnableWriteAccess: true
    AdditionalIamPolicies:
      - Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
      - Policy: arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
  Networking:
    SubnetId: subnet-0e0944955bfdce9f1
  LocalStorage:
    RootVolume:
      Size: 100
SharedStorage:
    # install your shared applications in /apps, it can be archived through
    # a snapshot that you can reuse with other clusters.
  - Name: SharedEBS
    StorageType: Ebs
    MountDir: /apps
    EbsSettings:
      VolumeType: gp3
      Size: 200
      Throughput: 300
      Iops: 6000
  - Name: FsxLustre0
    StorageType: FsxLustre
    MountDir: /fsx
    FsxLustreSettings:
      StorageCapacity: 4800
      DeploymentType: PERSISTENT_1
      PerUnitStorageThroughput: 200
      DataCompressionType: LZ4
## Review the following doc:
## https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html
## If local storage is needed this can be configured as required
Scheduling:
  Scheduler: slurm
  SlurmQueues:
    - Name: train-g4dn-2xl
      ComputeSettings:
        LocalStorage:
          # NVMe drives will be set in RAID0
          EphemeralVolume:
            MountDir: /local_scratch
          # This is your root volume
          RootVolume:
            Size: 200
        ## We set the MinCount default to 0 to confirm that we get the
        ## instances from our ODCR when running a job. Then we change MinCount
        ## to 16 in order to avoid scale down
      ComputeResources:
        - MinCount: 2
          MaxCount: 6
          InstanceType: g4dn.2xlarge
          Name: train-g4dn-2xl
          Efa:
            Enabled: false
            GdrSupport: false
      Networking:
        PlacementGroup:
          Enabled: true
        SubnetIds:
          - subnet-01526f003e8c4e085
      ## for IAM see this doc:
      ## https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#Scheduling-v3-SlurmQueues-Iam
      Iam:
        S3Access:
          - BucketName: pcluster-ml-workshop
        AdditionalIamPolicies:
          - Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
          - Policy: arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly