ARG UBUNTU_VERSION=18.04 ARG CUDA_VERSION=10.2 ARG IMAGE_DIGEST=ed4b2e8703ed3f94029ce0823c44fe57b2800108953cf81f97781d6a2a5535fc FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}@sha256:${IMAGE_DIGEST} ARG MINICONDA_VERSION=4.9.2 ARG CONDA_PY_VERSION=39 ARG CONDA_CHECKSUM="b4e46fcc8029e2cfa731b788f25b1d36" ARG CONDA_PKG_VERSION=4.10.1 ARG PYTHON_VERSION=3.8.13 ARG PYARROW_VERSION=1.0 ARG MLIO_VERSION=0.7.0 ARG XGBOOST_VERSION=1.7.4 ENV DEBIAN_FRONTEND=noninteractive ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 # Python won’t try to write .pyc or .pyo files on the import of source modules # Force stdin, stdout and stderr to be totally unbuffered. Good for logging ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING='utf-8' RUN rm /etc/apt/sources.list.d/cuda.list && \ apt-key del 7fa2af80 && \ apt-get update && apt-get install -y --no-install-recommends wget && \ wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \ dpkg -i cuda-keyring_1.0-1_all.deb && \ apt-get update && \ apt-get -y upgrade && \ apt-get -y install --no-install-recommends \ build-essential \ curl \ git \ jq \ libatlas-base-dev \ nginx \ openjdk-8-jdk-headless \ unzip \ wget \ && \ # MLIO build dependencies # Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO. # Kitware contains the latest version of CMake. apt-get -y install --no-install-recommends \ apt-transport-https \ ca-certificates \ gnupg \ software-properties-common \ && \ wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ gpg --dearmor - | \ tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ apt-get update && \ rm /usr/share/keyrings/kitware-archive-keyring.gpg && \ apt-get install -y --no-install-recommends \ autoconf \ automake \ build-essential \ cmake=3.18.4-0kitware1 \ cmake-data=3.18.4-0kitware1 \ doxygen \ kitware-archive-keyring \ libcurl4-openssl-dev \ libssl-dev \ libtool \ ninja-build \ python3-dev \ python3-distutils \ python3-pip \ zlib1g-dev \ && \ rm -rf /var/lib/apt/lists/* # Install conda RUN cd /tmp && \ curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \ bash /tmp/Miniconda3.sh -bfp /miniconda3 && \ rm /tmp/Miniconda3.sh ENV PATH=/miniconda3/bin:${PATH} # Install MLIO with Apache Arrow integration # We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size # which increases training time. We build from source to minimize the image size. RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ # Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html conda config --system --set auto_update_conda false && \ conda config --system --set show_channel_urls true && \ echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ conda install -c conda-forge python=${PYTHON_VERSION} && \ conda install conda=${CONDA_PKG_VERSION} && \ conda update -y conda && \ conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \ cd /tmp && \ git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ build-tools/build-dependency build/third-party all && \ mkdir -p build/release && \ cd build/release && \ cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. && \ cmake --build . && \ cmake --build . --target install && \ cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DPYTHON_EXECUTABLE="/miniconda3/bin/python3" \ -DMLIO_INCLUDE_ARROW_INTEGRATION=ON ../.. && \ cmake --build . --target mlio-py && \ cmake --build . --target mlio-arrow && \ cd ../../src/mlio-py && \ python3 setup.py bdist_wheel && \ python3 -m pip install typing && \ python3 -m pip install --upgrade pip && \ python3 -m pip install dist/*.whl && \ cp -r /tmp/mlio/build/third-party/lib/intel64/gcc4.7/* /usr/local/lib/ && \ ldconfig && \ rm -rf /tmp/mlio # Install latest version of XGBoost RUN python3 -m pip install --no-cache -I xgboost==${XGBOOST_VERSION}