#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This script will obtain the Solace message broker's docker image from
# sources and installs and runs it in a docker container
# solace_uri specifies where to get the docker image from
# - default (not specified): PubSub+ Standard from docker hub
# - Other public docker registry URI
# - solace.com/download
# - specified location of a docker image tarball URL
#
OPTIND=1 # Reset in case getopts has been used previously in the shell.
# Initialize our own variables:
config_file=""
solace_directory="."
solace_uri="solace/solace-pubsub-standard:latest" # default to pull latest PubSub+ standard from docker hub
admin_password_file=""
disk_size=""
disk_volume=""
ha_deployment="true"
is_primary="false"
logging_format=""
logging_group=""
logging_stream=""
max_connections="100"
max_queue_messages="100"
while getopts "c:d:p:s:u:v:h:f:g:r:n:q:" opt; do
case "$opt" in
c) config_file=$OPTARG
;;
d) solace_directory=$OPTARG
;;
p) admin_password_file=$OPTARG
;;
s) disk_size=$OPTARG
;;
u) solace_uri=$OPTARG
;;
v) disk_volume=$OPTARG
;;
h) ha_deployment=$OPTARG
;;
f) logging_format=$OPTARG
;;
g) logging_group=$OPTARG
;;
r) logging_stream=$OPTARG
;;
n) max_connections=$OPTARG
;;
q) max_queue_messages=$OPTARG
;;
esac
done
shift $((OPTIND-1))
[ "$1" = "--" ] && shift
echo "config_file=$config_file, solace_directory=$solace_directory, admin_password_file=$admin_password_file, \
solace_uri=$solace_uri, disk_size=$disk_size, volume=$disk_volume, ha_deployment=$ha_deployment, logging_format=$logging_format, \
logging_group=$logging_group, logging_stream=$logging_stream, max_connections=$max_connections, max_queue_messages=$max_queue_messages, Leftovers: $@"
export admin_password=`cat ${admin_password_file}`
# Create working dir if needed
mkdir -p ${solace_directory}
echo "`date` INFO: RETRIEVE SOLACE DOCKER IMAGE"
echo "###############################################################"
# Determine first if solace_uri is a valid docker registry uri
## First make sure Docker is actually up
docker_running=""
loop_guard=10
loop_count=0
while [ ${loop_count} != ${loop_guard} ]; do
docker_running=`service docker status | grep -o running`
if [ ${docker_running} != "running" ]; then
((loop_count++))
echo "`date` WARN: Tried to launch Solace but Docker in state ${docker_running}"
sleep 5
else
echo "`date` INFO: Docker in state ${docker_running}"
break
fi
done
## Remove any existing solace image
if [ "`docker images | grep solace-`" ] ; then
echo "`date` INFO: Removing existing Solace images from local docker repo"
docker rmi -f `docker images | grep solace- | awk '{print $3}'`
fi
## Try to load solace_uri as a docker registry uri
echo "`date` Testing ${solace_uri} for docker registry uri:"
if [ -z "`docker pull ${solace_uri}`" ] ; then
# If NOT in this branch then load was successful
echo "`date` INFO: Found that ${solace_uri} was not a docker registry uri, retrying if it is a download link"
if [[ ${solace_uri} == *"solace.com/download"* ]]; then
REAL_LINK=${solace_uri}
# the new download url
wget -O ${solace_directory}/solos.info -nv ${solace_uri}_MD5
else
REAL_LINK=${solace_uri}
# an already-existing load (plus its md5 file) hosted somewhere else (e.g. in an s3 bucket)
wget -O ${solace_directory}/solos.info -nv ${solace_uri}.md5
fi
IFS=' ' read -ra SOLOS_INFO <<< `cat ${solace_directory}/solos.info`
MD5_SUM=${SOLOS_INFO[0]}
SolOS_LOAD=${SOLOS_INFO[1]}
if [ -z ${MD5_SUM} ]; then
echo "`date` ERROR: Missing md5sum for the Solace load - exiting." | tee /dev/stderr
exit 1
fi
echo "`date` INFO: Reference md5sum is: ${MD5_SUM}"
echo "`date` INFO: Now download from URL provided and validate"
wget -q -O ${solace_directory}/${SolOS_LOAD} ${REAL_LINK}
## Check MD5
LOCAL_OS_INFO=`md5sum ${solace_directory}/${SolOS_LOAD}`
IFS=' ' read -ra SOLOS_INFO <<< ${LOCAL_OS_INFO}
LOCAL_MD5_SUM=${SOLOS_INFO[0]}
if [ -z "${MD5_SUM}" ] || [ "${LOCAL_MD5_SUM}" != "${MD5_SUM}" ]; then
echo "`date` ERROR: Possible corrupt Solace load, md5sum do not match - exiting." | tee /dev/stderr
exit 1
else
echo "`date` INFO: Successfully downloaded ${SolOS_LOAD}"
fi
## Load the image tarball
docker load -i ${solace_directory}/${SolOS_LOAD}
fi
## Image details
export SOLACE_IMAGE_ID=`docker images | grep solace | awk '{print $3}'`
if [ -z "${SOLACE_IMAGE_ID}" ] ; then
echo "`date` ERROR: Could not load a valid Solace docker image - exiting." | tee /dev/stderr
exit 1
fi
echo "`date` INFO: Successfully loaded ${solace_uri} to local docker repo"
echo "`date` INFO: Solace message broker image and tag: `docker images | grep solace | awk '{print $1,":",$2}'`"
# Common for all scalings
shmsize="1g"
ulimit_nofile="2448:422192"
SWAP_SIZE="2048"
echo "`date` INFO: Using shmsize: ${shmsize}, ulimit_nofile: ${ulimit_nofile}, SWAP_SIZE: ${SWAP_SIZE}"
echo "`date` INFO: Creating Swap space"
mkdir /var/lib/solace
dd if=/dev/zero of=/var/lib/solace/swap count=${SWAP_SIZE} bs=1MiB
mkswap -f /var/lib/solace/swap
chmod 0600 /var/lib/solace/swap
swapon -f /var/lib/solace/swap
grep -q 'solace\/swap' /etc/fstab || sudo sh -c 'echo "/var/lib/solace/swap none swap sw 0 0" >> /etc/fstab'
echo "`date` INFO: Applying TCP for WAN optimizations"
echo '
net.core.rmem_max = 134217728
net.core.wmem_max = 134217728
net.ipv4.tcp_rmem = 4096 25165824 67108864
net.ipv4.tcp_wmem = 4096 25165824 67108864
net.ipv4.tcp_mtu_probing=1' | sudo tee /etc/sysctl.d/98-solace-sysctl.conf
sudo sysctl -p /etc/sysctl.d/98-solace-sysctl.conf
cd ${solace_directory}
# Setup password file permissions
chown -R 1000001 $(dirname ${admin_password_file})
chmod 700 $(dirname ${admin_password_file})
if [[ ${disk_size} == "0" ]]; then
echo "`date` Using ephemeral volume"
#Create new volume that the PubSub+ Message Broker container can use to consume and store data.
docker volume create --name=solace
SPOOL_MOUNT="-v solace:/var/lib/solace"
else
echo "`date` Using persistent volume"
echo "`date` Create primary partition on new disk"
(
echo n # Add a new partition
echo p # Primary partition
echo 1 # Partition number
echo # First sector (Accept default: 1)
echo # Last sector (Accept default: varies)
echo w # Write changes
) | sudo fdisk $disk_volume
mkfs.xfs ${disk_volume}1 -m crc=0
UUID=`blkid -s UUID -o value ${disk_volume}1`
echo "UUID=${UUID} /opt/pubsubplus xfs defaults 0 0" >> /etc/fstab
mkdir /opt/pubsubplus
mount -a
mkdir /opt/pubsubplus/solace
chown 1000001 -R /opt/pubsubplus/
#chmod -R 777 /opt/pubsubplus
SPOOL_MOUNT="-v /opt/pubsubplus/solace:/var/lib/solace"
fi
############# From here execution path is different for nonHA and HA
if [[ $ha_deployment != "true" ]]; then
############# non-HA setup begins
echo "`date` Continuing single-node setup in a non-HA deployment"
#Define a create script
tee ~/docker-create <<- EOF
#!/bin/bash
docker create \
--uts=host \
--shm-size ${shmsize} \
--ulimit core=-1 \
--ulimit memlock=-1 \
--ulimit nofile=${ulimit_nofile} \
--env "system_scaling_maxconnectioncount=${max_connections}" \
--env "system_scaling_maxqueuemessagecount=${max_queue_messages}" \
--net=host \
--restart=always \
-v /mnt/pubsubplus/secrets:/run/secrets \
${SPOOL_MOUNT} \
--env "username_admin_globalaccesslevel=admin" \
--env "username_admin_passwordfilepath=$(basename ${admin_password_file})" \
--env "service_ssh_port=2222" \
--env "service_webtransport_port=8008" \
--env "service_webtransport_tlsport=1443" \
--env "service_semp_tlsport=1943" \
--name=solace ${SOLACE_IMAGE_ID}
EOF
#Make the file executable
chmod +x ~/docker-create
echo "`date` INFO: Creating the broker container"
~/docker-create
# Start the solace service and enable it at system start up.
chkconfig --add solace-pubsubplus
echo "`date` INFO: Starting Solace service"
service solace-pubsubplus start
# Remove all message broker Secrets from the host; at this point, the message broker should have come up
# and it won't be needing those files anymore
rm ${admin_password_file}
# Poll the broker Message-Spool
loop_guard=30
pause=10
count=0
echo "`date` INFO: Wait for the broker message-spool service to be guaranteed-active"
while [ ${count} -lt ${loop_guard} ]; do
health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active`
run_time=$((${count} * ${pause}))
if [ "${health_result}" = "200" ]; then
echo "`date` INFO: broker message-spool is guaranteed-active, after ${run_time} seconds"
break
fi
((count++))
echo "`date` INFO: Waited ${run_time} seconds, broker message-spool not yet guaranteed-active. State: ${health_result}"
sleep ${pause}
done
if [ ${count} -eq ${loop_guard} ]; then
echo "`date` ERROR: broker message-spool never came guaranteed-active" | tee /dev/stderr
exit 1
fi
echo "`date` INFO: PubSub+ non-HA node bringup complete"
exit
############# non-HA setup ends
fi
############# From here it's all HA setup
echo "`date` Continuing node setup in an HA deployment"
# Determine components
host_name=`hostname`
host_info=`grep ${host_name} ${config_file}`
local_role=`echo $host_info | grep -o -E 'Monitor|EventBrokerPrimary|EventBrokerBackup'`
primary_stack=`cat ${config_file} | grep EventBrokerPrimary | rev | cut -d "-" -f1 | rev | tr '[:upper:]' '[:lower:]'`
backup_stack=`cat ${config_file} | grep EventBrokerBackup | rev | cut -d "-" -f1 | rev | tr '[:upper:]' '[:lower:]'`
monitor_stack=`cat ${config_file} | grep Monitor | rev | cut -d "-" -f1 | rev | tr '[:upper:]' '[:lower:]'`
# Get the IP addressed for node
for role in Monitor EventBrokerPrimary EventBrokerBackup
do
role_info=`grep ${role} ${config_file}`
role_name=${role_info%% *}
role_ip=`echo ${role_name} | cut -c 4- | tr "-" .`
case $role in
Monitor )
MONITOR_IP=${role_ip}
;;
EventBrokerPrimary )
PRIMARY_IP=${role_ip}
;;
EventBrokerBackup )
BACKUP_IP=${role_ip}
;;
esac
done
case $local_role in
Monitor )
NODE_TYPE="monitoring"
ROUTER_NAME="monitor${monitor_stack}"
REDUNDANCY_CFG=""
;;
EventBrokerPrimary )
NODE_TYPE="message_routing"
ROUTER_NAME="primary${primary_stack}"
REDUNDANCY_CFG="--env redundancy_matelink_connectvia=${BACKUP_IP} --env redundancy_activestandbyrole=primary --env configsync_enable=yes"
is_primary="true"
;;
EventBrokerBackup )
NODE_TYPE="message_routing"
ROUTER_NAME="backup${backup_stack}"
REDUNDANCY_CFG="--env redundancy_matelink_connectvia=${PRIMARY_IP} --env redundancy_activestandbyrole=backup --env configsync_enable=yes"
;;
esac
#Define a create script
tee ~/docker-create <<- EOF
#!/bin/bash
docker create \
--uts=host \
--shm-size=${shmsize} \
--ulimit core=-1 \
--ulimit memlock=-1 \
--ulimit nofile=${ulimit_nofile} \
--net=host \
--restart=always \
-v /mnt/pubsubplus/secrets:/run/secrets \
${SPOOL_MOUNT} \
--log-driver awslogs \
--log-opt awslogs-group=${logging_group} \
--log-opt awslogs-stream=${logging_stream} \
--env "system_scaling_maxconnectioncount=${max_connections}" \
--env "system_scaling_maxqueuemessagecount=${max_queue_messages}" \
--env "logging_debug_output=all" \
--env "logging_debug_format=${logging_format}" \
--env "logging_command_output=all" \
--env "logging_command_format=${logging_format}" \
--env "logging_system_output=all" \
--env "logging_system_format=${logging_format}" \
--env "logging_event_output=all" \
--env "logging_event_format=${logging_format}" \
--env "logging_kernel_output=all" \
--env "logging_kernel_format=${logging_format}" \
--env "nodetype=${NODE_TYPE}" \
--env "routername=${ROUTER_NAME}" \
--env "username_admin_globalaccesslevel=admin" \
--env "username_admin_passwordfilepath=$(basename ${admin_password_file})" \
--env "service_ssh_port=2222" \
--env "service_webtransport_port=8008" \
--env "service_webtransport_tlsport=1443" \
--env "service_semp_tlsport=1943" \
${REDUNDANCY_CFG} \
--env "redundancy_authentication_presharedkey_key=`cat ${admin_password_file} | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64`" \
--env "redundancy_enable=yes" \
--env "redundancy_group_node_primary${primary_stack}_nodetype=message_routing" \
--env "redundancy_group_node_primary${primary_stack}_connectvia=${PRIMARY_IP}" \
--env "redundancy_group_node_backup${backup_stack}_nodetype=message_routing" \
--env "redundancy_group_node_backup${backup_stack}_connectvia=${BACKUP_IP}" \
--env "redundancy_group_node_monitor${monitor_stack}_nodetype=monitoring" \
--env "redundancy_group_node_monitor${monitor_stack}_connectvia=${MONITOR_IP}" \
--name=solace ${SOLACE_IMAGE_ID}
EOF
#Make the file executable
chmod +x ~/docker-create
echo "`date` INFO: Creating the broker container"
~/docker-create
# Start the solace service and enable it at system start up.
chkconfig --add solace-pubsubplus
echo "`date` INFO: Starting Solace service"
service solace-pubsubplus start
# Poll the message broker SEMP port until it is Up
loop_guard=30
pause=10
count=0
echo "`date` INFO: Wait for the Solace SEMP service to be enabled"
while [ ${count} -lt ${loop_guard} ]; do
online_results=`/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q "" \
-v "/rpc-reply/rpc/show/service/services/service[name='SEMP']/enabled[text()]"`
is_messagebroker_up=`echo ${online_results} | jq '.valueSearchResult' -`
echo "`date` INFO: SEMP service 'enabled' status is: ${is_messagebroker_up}"
run_time=$((${count} * ${pause}))
if [ "${is_messagebroker_up}" = "\"true\"" ]; then
echo "`date` INFO: Solace message broker SEMP service is up, after ${run_time} seconds"
break
fi
((count++))
echo "`date` INFO: Waited ${run_time} seconds, Solace message broker SEMP service not yet up"
sleep ${pause}
done
# Remove all message broker Secrets from the host; at this point, the message broker should have come up
# and it won't be needing those files anymore
rm ${admin_password_file}
# Poll the redundancy status on the Primary message broker
if [ "${is_primary}" = "true" ]; then
loop_guard=30
pause=10
count=0
mate_active_check=""
echo "`date` INFO: Wait for Primary to be 'Local Active' or 'Mate Active'"
while [ ${count} -lt ${loop_guard} ]; do
online_results=`/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q "" \
-v "/rpc-reply/rpc/show/redundancy/virtual-routers/primary/status/activity[text()]"`
local_activity=`echo ${online_results} | jq '.valueSearchResult' -`
echo "`date` INFO: Local activity state is: ${local_activity}"
run_time=$((${count} * ${pause}))
case "${local_activity}" in
"\"Local Active\"")
echo "`date` INFO: Redundancy is up locally, Primary Active, after ${run_time} seconds"
mate_active_check="Standby"
break
;;
"\"Mate Active\"")
echo "`date` INFO: Redundancy is up locally, Backup Active, after ${run_time} seconds"
mate_active_check="Active"
break
;;
esac
((count++))
echo "`date` INFO: Waited ${run_time} seconds, Redundancy not yet up"
sleep ${pause}
done
if [ ${count} -eq ${loop_guard} ]; then
echo "`date` ERROR: Solace redundancy group never came up - exiting." | tee /dev/stderr
exit 1
fi
loop_guard=45
pause=10
count=0
echo "`date` INFO: Wait for Backup to be 'Active' or 'Standby'"
while [ ${count} -lt ${loop_guard} ]; do
online_results=`/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q "" \
-v "/rpc-reply/rpc/show/redundancy/virtual-routers/primary/status/detail/priority-reported-by-mate/summary[text()]"`
mate_activity=`echo ${online_results} | jq '.valueSearchResult' -`
echo "`date` INFO: Mate activity state is: ${mate_activity}"
run_time=$((${count} * ${pause}))
case "${mate_activity}" in
"\"Active\"")
echo "`date` INFO: Redundancy is up end-to-end, Backup Active, after ${run_time} seconds"
mate_active_check="Standby"
break
;;
"\"Standby\"")
echo "`date` INFO: Redundancy is up end-to-end, Primary Active, after ${run_time} seconds"
mate_active_check="Active"
break
;;
esac
((count++))
echo "`date` INFO: Waited ${run_time} seconds, Backup not yet 'Active' or 'Standby'"
sleep ${pause}
done
if [ ${count} -eq ${loop_guard} ]; then
echo "`date` ERROR: Backup never became 'Active' or 'Standby' - exiting." | tee /dev/stderr
exit 1
fi
echo "`date` INFO: Initiating config-sync for router"
/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q ""
echo "`date` INFO: Initiating config-sync for default vpn"
/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q "*"
# Wait for config-sync results
count=0
echo "`date` INFO: Waiting for config-sync connected"
while [ ${count} -lt ${loop_guard} ]; do
online_results=`/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q "" \
-v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
confsyncstatus_results=`echo ${online_results} | jq '.valueSearchResult' -`
echo "`date` INFO: Config-sync is: ${confsyncstatus_results}"
run_time=$((${count} * ${pause}))
case "${confsyncstatus_results}" in
"\"Up\"")
echo "`date` INFO: Config-sync is Up, after ${run_time} seconds"
break
;;
esac
((count++))
echo "`date` INFO: Waited ${run_time} seconds, Config-sync is not yet Up"
if (( $count % 18 == 0 )) ; then
echo "`date` INFO: Re-trying initiate config-sync for router"
/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q ""
/tmp/semp_query.sh -n admin -p ${admin_password} -u http://localhost:8080/SEMP \
-q "*"
fi
sleep ${pause}
done
if [ ${count} -eq ${loop_guard} ]; then
echo "`date` ERROR: Config-sync never reached state \"Up\" - exiting." | tee /dev/stderr
exit 1
fi
fi
if [ ${count} -eq ${loop_guard} ]; then
echo "`date` ERROR: Solace bringup failed" | tee /dev/stderr
exit 1
fi
echo "`date` INFO: Solace bringup complete"
echo "`date` INFO: PubSub+ HA-node bringup complete"