name: ParallelClusterTest
description: Test ParallelCluster AMI
schemaVersion: 1.0

constants:
  - CookbookDefaultFile:
      type: string
      value: /etc/chef/node_attributes.json

phases:
  - name: test
    steps:
      ### basic ###
      - name: OperatingSystemRelease
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -v
              FILE=/etc/os-release
              if [ -e ${FILE} ]; then
                . ${FILE}
                echo "${ID}${VERSION_ID:+.${VERSION_ID}}"
              else
                echo "The file '${FILE}' does not exist. Failing build." && exit 1
              fi

      # Get uniformed OS name
      - name: OperatingSystemName
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -v
              RELEASE='{{ test.OperatingSystemRelease.outputs.stdout }}'

              if [ `echo "${RELEASE}" | grep -w '^amzn\.2'` ]; then
                OS='alinux2'
              elif [ `echo "${RELEASE}" | grep '^centos\.7'` ]; then
                OS='centos7'
              elif [ `echo "${RELEASE}" | grep '^ubuntu\.20'` ]; then
                OS='ubuntu2004'
              elif [ `echo "${RELEASE}" | grep '^ubuntu\.22'` ]; then
                OS='ubuntu2204'
              elif [ `echo "${RELEASE}" | grep '^rhel\.8'` ]; then
                OS='rhel8'
              else
                echo "Operating System '${RELEASE}' is not supported. Failing build." && exit 1
              fi

              echo ${OS}

      ### versions ###
      - name: PythonVersion
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -v
              PATTERN=$(jq '.default.cluster."python-version"' {{ CookbookDefaultFile }})
              VERSION=$(echo ${PATTERN} | tr -d '\n' | cut -d = -f 2 | xargs)
              echo ${VERSION}

      ### utils ###
      - name: NvSwitches
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -v
              NVSWITCHES=$(lspci -d 10de:1af1 | wc -l)
              echo "${NVSWITCHES}"

      - name: BaseUID
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -v
              PATTERN=$(jq '.default.cluster.reserved_base_uid' {{ CookbookDefaultFile }})
              RESERVED_BASE_UID=$(echo ${PATTERN} | tr -d '\n' | cut -d = -f 2 | xargs)
              echo "${RESERVED_BASE_UID}"

      - name: FabricManager
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              if [ {{ test.NvSwitches.outputs.stdout }} -gt 1 ]; then
                echo "test fabric-manager daemon"
                systemctl show -p SubState nvidia-fabricmanager | grep -i running
                [[ $? -ne 0 ]] && echo "fabric-manager daemon test failed" && exit 1
                echo "NVIDIA Fabric Manager service correctly started"
              fi

      - name: CloudWatch
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a status | grep status | grep stopped
              [[ $? -ne 0 ]] && echo "amazon-cloudwatch-agent is not stopped" && exit 1
              echo "CloudWatch test passed"


      - name: InSpecTestsForAwsBatch
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              echo "Performing InSpec tests for AwsBatch on the AMI..."
              cd /etc/chef/cookbooks/aws-parallelcluster-awsbatch
              inspec exec test --profiles-path . --controls /tag:testami/ --no-distinct-exit
              [[ $? -ne 0 ]] && echo "InSpec tests for AwsBatch failed" && exit 1
              echo "InSpec tests for AwsBatch passed"

      - name: InSpecTestsForPlatform
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              echo "Performing InSpec tests for platform on the AMI..."
              cd /etc/chef/cookbooks/aws-parallelcluster-platform
              inspec exec test --profiles-path . --controls /tag:testami/ --no-distinct-exit
              [[ $? -ne 0 ]] && echo "InSpec tests for platform failed" && exit 1
              echo "InSpec tests for platform passed"

      - name: InSpecTestsForEnvironment
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              echo "Performing InSpec tests for environment on the AMI..."
              cd /etc/chef/cookbooks/aws-parallelcluster-environment
              inspec exec test --profiles-path . --controls /tag:testami/ --no-distinct-exit
              [[ $? -ne 0 ]] && echo "InSpec tests for environment failed" && exit 1
              echo "InSpec tests for environment passed"

      - name: InSpecTestsForComputeFleet
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              echo "Performing InSpec tests for compute fleet on the AMI..."
              cd /etc/chef/cookbooks/aws-parallelcluster-computefleet
              inspec exec test --profiles-path . --controls /tag:testami/ --no-distinct-exit
              [[ $? -ne 0 ]] && echo "InSpec tests for compute fleet failed" && exit 1
              echo "InSpec tests for compute fleet passed"

      - name: InSpecTestsForShared
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              echo "Performing InSpec tests for shared cookbook on the AMI..."
              cd /etc/chef/cookbooks/aws-parallelcluster-shared
              inspec exec test --profiles-path . --controls /tag:testami/ --no-distinct-exit
              [[ $? -ne 0 ]] && echo "InSpec tests for shared cookbook failed" && exit 1
              echo "InSpec tests for shared cookbook passed"

      - name: InSpecTestsForSlurm
        action: ExecuteBash
        inputs:
          commands:
            - |
              set -vx
              echo "Performing InSpec tests for slurm on the AMI..."
              cd /etc/chef/cookbooks/aws-parallelcluster-slurm
              inspec exec test --profiles-path . --controls /tag:testami/ --no-distinct-exit
              [[ $? -ne 0 ]] && echo "InSpec tests for slurm failed" && exit 1
              echo "InSpec tests for slurm passed"