#!/bin/bash set -euo pipefail SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" PRESERVE=true export TEST_WINDOWS="false" export AEMM_VERSION="1.8.1" export AEMM_DL_URL="https://github.com/aws/amazon-ec2-metadata-mock/releases/download/v$AEMM_VERSION/amazon-ec2-metadata-mock-$AEMM_VERSION.tgz" export CLUSTER_CONFIG_FILE=$SCRIPTPATH/cluster-spec.yaml USAGE=$(cat << 'EOM' Usage: run-test [-a script1,scripts2,...] [-w] [-d] CONFIG Options: -a Assertion script(s), default is ALL scripts in e2e dir -w Target Windows platform -d Delete cluster upon test completion/failure Arguments: CONFIG File to source, it should export the following values: CLUSTER_NAME EKS cluster name DOCKER_PULL_POLICY Docker image pull policy (defaults to IfNotPresent) NODE_TERMINATION_HANDLER_DOCKER_REPO Node Termination Handler Docker repository NODE_TERMINATION_HANDLER_DOCKER_TAG Node Termination Handler Docker tag NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY Node Termination Handler Docker image pull policy (defaults to DOCKER_PULL_POLICY) WEBHOOK_DOCKER_REPO Webhook Proxy Docker repository WEBHOOK_DOCKER_TAG Webhook Proxy Docker tag WEBHOOK_DOCKER_PULL_POLICY Webhook Proxy Docker image pull policy (defaults to DOCKER_PULL_POLICY) EC2_METADATA_PORT EC2 Metadata Test Proxy port (defaults to 18999) EC2_METADATA_URL EC2 Metadata Test Proxy URL (defaults to "http://amazon-ec2-metadata-mock-service.default.svc.cluster.local:$EC2_METADATA_PORT") WEBHOOK_URL Webhook URL (defaults to "http://webhook-test-proxy.default.svc.cluster.local") EOM ) while getopts "a:dw" opt; do case ${opt} in a ) # Assertion script(s) assertion_scripts=$(echo $OPTARG | tr "," "\n") ;; d ) # DELETE EKS Cluster + ECR repos echo "␡️ This run will delete the eks cluster and ecr repos" PRESERVE=false ;; w ) # Windows platform TEST_WINDOWS="true" ;; \? ) echo "$USAGE" 1>&2 exit esac done function exit_and_fail { local pod_id pod_id=$(get_nth_worker_pod || :) kubectl logs $pod_id --namespace kube-system || : assertion_end=$(date +%s) echo "⏰ Took $(expr $assertion_end - $assertion_start)sec" echo "❌ NTH EKS Integration Test FAILED $CLUSTER_NAME! ❌" exit 1 } function retry { local retries=$1 shift local count=0 set +e trap "set -e" RETURN until "$@"; do exit=$? set -e wait=$((2 ** $count)) count=$(($count + 1)) if [ $count -lt $retries ]; then echo "Retry $count/$retries exited $exit, retrying in $wait seconds..." sleep $wait else echo "Retry $count/$retries exited $exit, no more retries left." return $exit fi set +e done return 0 } export -f retry function clean_up { if [[ "$PRESERVE" == false ]]; then echo "=====================================================================================================" echo "🧹 Cleaning up EKS and ECR 🧹" echo "=====================================================================================================" eksctl delete cluster -f $CLUSTER_CONFIG_FILE || : aws ecr --region $REGION delete-repository --repository-name $NTH_REPO_NAME --force >/dev/null 2>&1 || : aws ecr --region $REGION delete-repository --repository-name $WEBHOOK_REPO_NAME --force >/dev/null 2>&1 || : return fi } # Required to execute tests deps=("docker" "eksctl" "helm" "kubectl" "jq") for dep in "${deps[@]}"; do path_to_executable=$(which $dep) if [ ! -x "$path_to_executable" ]; then echoerr "You are required to have $dep installed on your system..." echoerr "Please install $dep and try again. " exit 3 fi done TEST_OS="linux" if [[ $TEST_WINDOWS = "true" ]]; then TEST_OS="windows" fi export TEST_WINDOWS \ TEST_OS config=${*:$OPTIND:1} if [[ -z ${config} ]]; then echo "no CONFIG provided; creating EKS cluster using $CLUSTER_CONFIG_FILE" # shellcheck source=../eks-cluster-test/provision-cluster source $SCRIPTPATH/../eks-cluster-test/provision-cluster else echo "Reading configuration from ${config}" set -a # Export variables by default. # shellcheck disable=SC1090 source $config set +a # Disable exporting variables by default. PRESERVE=true # cluster user-provided so don't try to delete fi echo "CLUSTER_NAME=${CLUSTER_NAME:?"not found"}" echo "DOCKER_PULL_POLICY=${DOCKER_PULL_POLICY:="IfNotPresent"}" echo "NODE_TERMINATION_HANDLER_DOCKER_REPO=${NODE_TERMINATION_HANDLER_DOCKER_REPO:?"not found"}" echo "NODE_TERMINATION_HANDLER_DOCKER_TAG=${NODE_TERMINATION_HANDLER_DOCKER_TAG:?"not found"}" echo "NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY=${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY:=$DOCKER_PULL_POLICY}" echo "WEBHOOK_DOCKER_REPO=${WEBHOOK_DOCKER_REPO:?"not found"}" echo "WEBHOOK_DOCKER_TAG=${WEBHOOK_DOCKER_TAG:?"not found"}" echo "WEBHOOK_DOCKER_PULL_POLICY=${WEBHOOK_DOCKER_PULL_POLICY:=$DOCKER_PULL_POLICY}" echo "EC2_METADATA_PORT=${EC2_METADATA_PORT:=18999}" echo "EC2_METADATA_URL=${EC2_METADATA_URL:="http://amazon-ec2-metadata-mock-service.default.svc.cluster.local:$EC2_METADATA_PORT"}" echo "WEBHOOK_URL=${WEBHOOK_URL:="http://webhook-test-proxy.default.svc.cluster.local"}" # The e2e test scripts use other variable names. echo "IMDS_PORT=${IMDS_PORT:=$EC2_METADATA_PORT}" echo "INSTANCE_METADATA_URL=${INSTANCE_METADATA_URL:=$EC2_METADATA_URL}" # Label first linux node in cluster; nth will be deployed to this node ONLY -- NEEDED FOR LINUX RUN ONLY: if [[ $TEST_OS = "linux" ]]; then nth_node=$(kubectl get node --selector="kubernetes.io/os=linux" --output jsonpath='{.items[-1].metadata.name}') export NTH_WORKER_LABEL="role=nth_worker" kubectl label nodes $nth_node $NTH_WORKER_LABEL --overwrite fi export NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY \ WEBHOOK_DOCKER_PULL_POLICY \ WEBHOOK_URL \ IMDS_PORT \ INSTANCE_METADATA_URL if ! command -v kubectl >/dev/null 2>&1; then echo "🚫 Cannot find kubectl command" exit 1 fi if ! kubectl get svc >/dev/null 2>&1; then echo "🚫 kubectl failed test communication with cluster" exit 1 fi TEST_NODE=$(kubectl get nodes -l kubernetes.io/os=$TEST_OS | tail -n +2 | tr -s "\t" " " | cut -d" " -f1) export TEST_NODE echo "TEST_NODE=${TEST_NODE:?"not found"}" function get_nth_worker_pod { kubectl get pods -n kube-system -l k8s-app=aws-node-termination-handler | tail -n +2 | tr -s "\t" " " | cut -d " " -f 1 | head -1 } export -f get_nth_worker_pod # shellcheck disable=SC2120 function reset_cluster { echo "-------------------------------------------------------------------------------------------------" echo "🧹 Resetting cluster $CLUSTER_NAME" echo "-------------------------------------------------------------------------------------------------" $SCRIPTPATH/../eks-cluster-test/reset-cluster sleep ${1:-15} } if [[ -z ${assertion_scripts+x} ]]; then assertion_scripts=( "$SCRIPTPATH/../e2e/cordon-only-test" "$SCRIPTPATH/../e2e/imds-v2-test" "$SCRIPTPATH/../e2e/maintenance-event-cancellation-test" "$SCRIPTPATH/../e2e/maintenance-event-dry-run-test" #"$SCRIPTPATH/../e2e/maintenance-event-reboot-test" "$SCRIPTPATH/../e2e/maintenance-event-test" "$SCRIPTPATH/../e2e/spot-interruption-dry-run-test" "$SCRIPTPATH/../e2e/spot-interruption-test" #"$SCRIPTPATH/../e2e/webhook-http-proxy-test" #"$SCRIPTPATH/../e2e/webhook-secret-test" "$SCRIPTPATH/../e2e/webhook-test" ) fi echo "Assertion script(s): ${assertion_scripts[*]}" trap "exit_and_fail" INT TERM ERR trap "clean_up" EXIT for assertion_script in "${assertion_scripts[@]}"; do reset_cluster echo "=================================================================================================" echo "🥑 Running assertion script $(basename $assertion_script)" echo "=================================================================================================" assertion_start=$(date +%s) $assertion_script assertion_end=$(date +%s) echo "⏰ Took $(expr $assertion_end - $assertion_start)sec" pod_id=$(get_nth_worker_pod || :) kubectl logs $pod_id --namespace kube-system || : echo "✅ Assertion test $assertion_script PASSED! ✅" done reset_cluster echo "=====================================================================================================" echo "✅ All tests passed! ✅" echo "====================================================================================================="