#!/bin/bash ################################################################################ # 000: Prolog ################################################################################ # Only support alinux2 FLAVOR=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f 2) if [[ $FLAVOR != "Amazon Linux 2" ]]; then echo ${BASH_SOURCE[0]} does not support alinux instance. exit 1 fi # Utility function to get script's directory (deal with Mac OSX quirkiness). # This function is ambidextrous as it works on both Linux and OSX. get_bin_dir() { local READLINK=readlink if [[ $(uname) == 'Darwin' ]]; then READLINK=greadlink if [ $(which greadlink) == '' ]; then echo '[ERROR] Mac OSX requires greadlink. Install with "brew install greadlink"' >&2 exit 1 fi fi local BIN_DIR=$(dirname "$($READLINK -f ${BASH_SOURCE[0]})") echo -n ${BIN_DIR} } SECONDS=0 BIN_DIR=$(get_bin_dir) CONFIG_DOCKER=1 # Ensure that we run only on a SageMaker classic notebook instance. ${BIN_DIR}/ensure-smnb.sh [[ $? != 0 ]] && exit 1 ################################################################################ # 010: Dependencies ################################################################################ # Early install aria2c CLI, as it may be required by jobs running in subprocesses. ( echo "max_connections=10" | sudo tee -a /etc/yum.conf # Lots of problem, from wrong .repo content to broken selinux-container sudo rm /etc/yum.repos.d/docker-ce.repo || true sudo amazon-linux-extras install -y epel sudo yum install -y aria2 ) # Placeholder to store persistent config files mkdir -p ~/SageMaker/.initsmnb.d # Hold symlinks of select binaries from the 'base' conda environment, so that # custom environments don't have to install them, e.g., nbdime, docker-compose. mkdir -p ~/.local/bin ################################################################################ # 020: Here we go... ################################################################################ run_and_track_stat() { local cmd local basecmd for cmd in "$@"; do basecmd=$(basename $cmd) $cmd && echo "INITSMNB SUCCESS $basecmd" || echo "INITSMNB ERROR $basecmd" done } ( run_and_track_stat ${BIN_DIR}/install-cli.sh run_and_track_stat ${BIN_DIR}/duf.sh run_and_track_stat ${BIN_DIR}/s5cmd.sh ) &> ~/INITSMNB-install-cli.txt & # These require jupyter lab restarted and browser reloaded, to see the changes. ${BIN_DIR}/patch-jupyter-config.sh ${BIN_DIR}/change-jlab-ui.sh # Disable jupyterlab git extension. For power git users, who don't like to # be distracted by jlab's frequent status changes on lower-left status bar. ~/anaconda3/envs/JupyterSystemEnv/bin/jupyter labextension disable '@jupyterlab/git' ~/anaconda3/envs/JupyterSystemEnv/bin/jupyter labextension disable 'jupyterlab_git' ~/anaconda3/envs/JupyterSystemEnv/bin/jupyter server extension disable jupyterlab_git # To prevent .ipynb_checkpoints/ in the tarball generated by SageMaker SDK # for training scripts, framework processing scripts, and model repack. echo "c.FileCheckpoints.checkpoint_dir = '/tmp/.ipynb_checkpoints'" \ >> ~/.jupyter/jupyter_notebook_config.py echo "c.FileCheckpoints.checkpoint_dir = '/tmp/.ipynb_checkpoints'" \ >> ~/.jupyter/jupyter_server_config.py run_and_track_stat ${BIN_DIR}/install-pipx.sh &> ~/INITSMNB-install-pipx.txt & run_and_track_stat ${BIN_DIR}/upgrade-jupyter.sh &> ~/INITSMNB-upgrade-jupyter.txt & run_and_track_stat ${BIN_DIR}/install-cdk.sh &> ~/INITSMNB-install-cdk.txt & run_and_track_stat ${BIN_DIR}/install-code-server.sh &> ~/INITSMNB-install-code-server.txt & ${BIN_DIR}/adjust-sm-git.sh 'Firstname Lastname' first.last@email.abc ${BIN_DIR}/fix-osx-keymap.sh ${BIN_DIR}/patch-bash-config.sh ${BIN_DIR}/fix-ipython.sh ${BIN_DIR}/init-vim.sh ${BIN_DIR}/fix-pyspark-smnb.sh ${BIN_DIR}/mount-efs-accesspoint.sh fsid,fsapid,mountpoint if [[ $CONFIG_DOCKER == 1 ]]; then # Dances needed before we can start using the SageMaker local mode. ${BIN_DIR}/enable-sm-local-mode.sh # ~/SageMaker EBS can be upsized on demand and survives reboot. Hence, use # it for images, layers, caches, build temp dirs, etc. ${BIN_DIR}/change-docker-data-root.sh ${BIN_DIR}/change-docker-tmp-dir.sh ${BIN_DIR}/restart-docker.sh fi ################################################################################ # 030: Wrapping up... ################################################################################ # Wait for background jobs to complete. COLOR_RED="\033[1;31m" COLOR_OFF="\033[0m" echo -e " Waiting for these jobs to complete... $(jobs) They may take ${COLOR_RED}~4 minutes${COLOR_OFF} (on ml.t3.medium in ap-southeast-1 / Singapore). Job logs: $(ls -al ~/INITSMNB-*.txt) " wait # Improve code-server's UX in dealing with persistent conda environments. ~/anaconda3/bin/conda config --append envs_dirs ~/SageMaker/envs # Free up a bit more space on the ephemeral volume sudo yum clean packages rm -fr ~/.cache/{pip,yarn}/ # This operation turns out to be slow... #~/anaconda3/condabin/conda clean --all -y # Any failed jobs? echo -e "\nJobs status:" egrep -e '^INITSMNB SUCCESS|^INITSMNB ERROR' ~/INITSMNB*txt ################################################################################ # 040: Epilog ################################################################################ # Final checks and next steps to see the changes in-effect ${BIN_DIR}/final-check.sh echo "Elapsed: $(($SECONDS / 60))min $(($SECONDS % 60))sec "