# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.

FROM nvidia/cuda:10.2-devel-ubuntu18.04

ARG EFA_INSTALLER_VERSION=1.9.5
ARG NCCL_INSTALL_VERSION=v2.6.4-1
ARG AWS_OFI_NCCL_VERSION=aws
ARG NCCL_TESTS_VERSION=v2.0.0

RUN apt-get update -y
RUN apt-get install -y --allow-unauthenticated \
    git \
    gcc \
    yum-utils \
    vim \
    kmod \
    openssh-client \
    openssh-server \
    build-essential \
    cuda-cudart-dev-10-2 \
    cuda-command-line-tools-10-2 \
    cuda-cufft-dev-10-2 \
    cuda-curand-dev-10-2 \
    cuda-cusolver-dev-10-2 \
    cuda-cusparse-dev-10-2 \
    curl \
    autoconf \
    libtool \
    gdb \
    automake


ENV HOME /tmp

ENV LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:/opt/amazon/openmpi/lib:$HOME/nccl/build/lib:/opt/amazon/efa/lib:/$HOME/aws-ofi-nccl/install/lib:$LD_LIBRARY_PATH
ENV PATH=/opt/amazon/openmpi/bin/:/opt/amazon/efa/bin:$PATH


#################################################
## Install EFA installer
RUN cd $HOME \
    && curl -O https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz \
    && tar -xf $HOME/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz \
    && cd aws-efa-installer \
    && ./efa_installer.sh -y -d --skip-kmod --skip-limit-conf --no-verify | tee $HOME/install.log

## Test EFA installation
RUN /opt/amazon/efa/bin/fi_info --version

###################################################
## Install NCCL
RUN git clone https://github.com/NVIDIA/nccl.git $HOME/nccl \
    && cd $HOME/nccl \
    && git checkout ${NCCL_INSTALL_VERSION} \
    && make -j src.build CUDA_HOME=/usr/local/cuda \
    NVCC_GENCODE="-gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_60,code=sm_60"

###################################################
## Install AWS-OFI-NCCL plugin
RUN git clone https://github.com/aws/aws-ofi-nccl.git $HOME/aws-ofi-nccl \
    && cd $HOME/aws-ofi-nccl \
    && git checkout ${AWS_OFI_NCCL_VERSION} \
    && ./autogen.sh \
    && ./configure --prefix=$HOME/aws-ofi-nccl/install \
       --with-libfabric=/opt/amazon/efa/ \
       --with-cuda=/usr/local/cuda \
       --with-nccl=$HOME/nccl/build \
       --with-mpi=/opt/amazon/openmpi/ \
    && make && make install

###################################################
## Install NCCL-tests
RUN git clone https://github.com/NVIDIA/nccl-tests.git $HOME/nccl-tests \
    && cd $HOME/nccl-tests \
    && git checkout ${NCCL_TESTS_VERSION} \
    && make MPI=1 \
       MPI_HOME=/opt/amazon/openmpi/ \
       CUDA_HOME=/usr/local/cuda \
       NCCL_HOME=$HOME/nccl/build

CMD echo "EFA Setup Complete! "