#!/bin/bash # terminate script if there is an error set -e ##### # don't do anything else if it's compute node, just install htop ##### . "/etc/parallelcluster/cfnconfig" case "${cfn_node_type}" in ComputeFleet) yum install -y htop exit 0 ;; *) ;; esac # give slurm user permission to run aws CLI /opt/parallelcluster/scripts/imds/imds-access.sh --allow slurm ##### #Takes the following argument, $0 is the script name, $1 is the S3 url, $2 is the first arg for rds hostname ##### PCLUSTER_RDS_HOST="$1" PCLUSTER_RDS_PORT="$2" PCLUSTER_RDS_USER="$3" PCLUSTER_RDS_PASS="$4" PCLUSTER_NAME="$5" REGION="$6" slurm_version="$7" # the head-node is used to run slurmdbd host_name=$(hostname) CORES=$(grep processor /proc/cpuinfo | wc -l) lower_name=$(echo $PCLUSTER_NAME | tr '[:upper:]' '[:lower:]') yum update -y # change the cluster name sed -i 's/ClusterName=parallelcluster/ClusterName='$lower_name'/g' /opt/slurm/etc/slurm.conf rm /var/spool/slurm.state/* ##### #install pre-requisites ##### yum install –y epel-release yum-config-manager --enable epel yum install -y hdf5-devel yum install -y libyaml http-parser-devel json-c-devel # update the linked libs export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib cat > /etc/ld.so.conf.d/slurmrestd.conf <= 20.02, #source /opt/parallelcluster/pyenv/versions/3.6.9/envs/cookbook_virtualenv/bin/activate source /opt/parallelcluster/pyenv/versions/cookbook_virtualenv/bin/activate cd /shared # have to use the exact same slurm version as in the released version of ParallelCluster2.10.1 - 20.02.4 # as of May 13, 20.02.4 was removed from schedmd and was replaced with .7 # error could be seen in the cfn-init.log file # changelog: change to 20.11.7 from 20.02.7 on 2021/09/03 - pcluster 2.11.2 # changelog: change to 20.11.8 from 20.11.7 on 2021/09/16 - pcluster 3 # slurm_version=20.11.8 wget https://download.schedmd.com/slurm/slurm-${slurm_version}.tar.bz2 tar xjf slurm-${slurm_version}.tar.bz2 cd slurm-${slurm_version} # config and build slurm ./configure --prefix=/opt/slurm --with-pmix=/opt/pmix --enable-slurmrestd make -j $CORES make install make install-contrib deactivate # set the jwt key openssl genrsa -out /var/spool/slurm.state/jwt_hs256.key 2048 chown slurm /var/spool/slurm.state/jwt_hs256.key chmod 0700 /var/spool/slurm.state/jwt_hs256.key # add 'AuthAltTypes=auth/jwt' to /opt/slurm/etc/slurm.conf cat >>/opt/slurm/etc/slurm.conf</opt/slurm/etc/slurmdbd.conf</opt/slurm/etc/slurmrestd.conf</etc/systemd/system/slurmrestd.service</shared/token_refresher.sh</etc/cron.d/slurm-token</shared/tmp/fetch_and_run.sh<&2 exit 1 } # Check if aws CLI is installed which aws >/dev/null 2>&1 || error_exit "Please install AWS CLI first." # arg1-bucketname arg2-prefix arg3-input_filename arg4-program_filename arg5-program_folder mkdir -p \$5 cd \$5 aws s3 cp "s3://\$1/\$2/\$3" \$3 aws s3 cp "s3://\$1/\$2/\$4" \$4 chmod +x \$4 sbatch \$4 EOF chmod +x /shared/tmp/fetch_and_run.sh # create the slurm token - the role permission with SecretManagerReadWrite must be added in the config file # in the cluster section with additional_iam_policies = arn:aws:iam::aws:policy/SecretsManagerReadWrite /shared/token_refresher.sh