ARG region FROM 520713654638.dkr.ecr.$region.amazonaws.com/sagemaker-tensorflow-scriptmode:1.11.0-cpu-py3 RUN echo $region RUN buildDeps=" \ wget \ build-essential \ " \ && apt-get update && apt-get install -y --no-install-recommends $buildDeps \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* ############################################################## # Horovod & its dependencies ############################################################## # Install Open MPI RUN mkdir /tmp/openmpi && \ cd /tmp/openmpi && \ wget https://www.open-mpi.org/software/ompi/v3.0/downloads/openmpi-3.0.0.tar.gz && \ tar zxf openmpi-3.0.0.tar.gz && \ cd openmpi-3.0.0 && \ ./configure --enable-orterun-prefix-by-default && \ make -j $(nproc) all && \ make install && \ ldconfig && \ rm -rf /tmp/openmpi # Install Horovod, temporarily using CUDA stubs RUN ldconfig /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs && \ HOROVOD_CPU_ALLREDUCE=MPI HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir horovod && \ ldconfig # Create a wrapper for OpenMPI to allow running as root by default RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ echo '#!/bin/bash' > /usr/local/bin/mpirun && \ echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ chmod a+x /usr/local/bin/mpirun # Configure OpenMPI to run good defaults: # --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0 RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf && \ echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH ENV PATH /usr/local/openmpi/bin/:$PATH ENV PATH=/usr/local/nvidia/bin:$PATH ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 # SSH. Partially taken from https://docs.docker.com/engine/examples/running_ssh_service/ RUN apt-get update && apt-get install -y --no-install-recommends openssh-server && \ mkdir -p /var/run/sshd # SSH login fix. Otherwise user is kicked off after login RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd # Create SSH key. RUN mkdir -p /root/.ssh/ && \ ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa && \ cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \ printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config # Hostname Fix COPY docker/resources/changehostname.c / COPY docker/resources/change-hostname.sh / COPY docker/resources/change-hostname.sh /usr/local/bin/change-hostname.sh RUN chmod +x /usr/local/bin/change-hostname.sh RUN chmod +x /change-hostname.sh RUN pip install keras RUN pip install retrying ##############################################################