FROM 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.10.0-gpu-py38

# Install linux packages
RUN apt update && apt install -y zip htop screen libgl1-mesa-glx

# Install python dependencies
COPY requirements.txt .
RUN python -m pip install --upgrade pip
RUN pip uninstall -y nvidia-tensorboard nvidia-tensorboard-plugin-dlprof
RUN pip install --no-cache -r requirements.txt coremltools onnx gsutil notebook wandb>=0.12.2
RUN pip uninstall -y torch torchvision
RUN pip install --no-cache -U torch torchvision
# RUN pip install --no-cache -U torch torchvision numpy Pillow
# RUN pip install --no-cache torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app

# Copy contents
COPY . /usr/src/app

# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf /root/.config/Ultralytics/

WORKDIR /


# # Expecting base image to be the image built by ./Dockerfile.e3.gpu
# ARG BASE_IMAGE=""

# FROM $BASE_IMAGE

# LABEL maintainer="Amazon AI"
# LABEL dlc_major_version="1"

# ARG PYTHON=python3
# ARG PYTHON_VERSION=3.8.10
# ARG PYTHON_SHORT_VERSION=3.8
# ARG CONDA_PREFIX=/opt/conda
# ARG METIS=metis-5.1.0
# ARG RMM_VERSION=0.15.0

# # The smdebug pipeline relies for following format to perform string replace and trigger DLC pipeline for validating
# # the nightly builds. Therefore, while updating the smdebug version, please ensure that the format is not disturbed.
# ARG SMDEBUG_VERSION=1.0.9

# ENV SAGEMAKER_TRAINING_MODULE=sagemaker_pytorch_container.training:main

# # swap the pytorch training wheel with additional smdebug and smmdp features
# ARG PT_TRAINING_URL=https://aws-pytorch-cicd-v3-binaries.s3.us-west-2.amazonaws.com/r1.10.0_aws_v3/20211102-104200/66c710e4b1cb2f96a29c556c58d0d0b82f92e496/gpu/torch-1.10.0%2Bcu113-cp38-cp38-manylinux1_x86_64.whl
# ARG SMD_MODEL_PARALLEL_URL=https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.10.0/build-artifacts/2021-11-05-16-39/smdistributed_modelparallel-1.5.0-cp38-cp38-linux_x86_64.whl
# ARG SMDATAPARALLEL_BINARY=https://smdataparallel.s3.amazonaws.com/binary/pytorch/1.10.0/cu113/2021-11-03/smdistributed_dataparallel-1.2.2-cp38-cp38-linux_x86_64.whl

# # Install scikit-learn and pandas
# RUN conda install -y \
#     scikit-learn \
#     pandas

# WORKDIR /

# # Install libboost from source. This package is needed for smdataparallel functionality [for networking asynchronous IO].
# RUN wget https://sourceforge.net/projects/boost/files/boost/1.73.0/boost_1_73_0.tar.gz/download -O boost_1_73_0.tar.gz \
#   && tar -xzf boost_1_73_0.tar.gz \
#   && cd boost_1_73_0 \
#   && ./bootstrap.sh \
#   && ./b2 threading=multi --prefix=${CONDA_PREFIX} -j 64 cxxflags=-fPIC cflags=-fPIC install || true \
#   && cd .. \
#   && rm -rf boost_1_73_0.tar.gz \
#   && rm -rf boost_1_73_0 \
#   && cd ${CONDA_PREFIX}/include/boost

# WORKDIR /opt/pytorch

# # Copy workaround script for incorrect hostname
# COPY changehostname.c /
# COPY start_with_right_hostname.sh /usr/local/bin/start_with_right_hostname.sh

# WORKDIR /root

# RUN pip install --no-cache-dir -U \
#     smdebug==${SMDEBUG_VERSION} \
#     smclarify \
#     "sagemaker>=2,<3" \
#     sagemaker-experiments==0.* \
#     sagemaker-pytorch-training

# # Install extra packages
# # numba 0.54 only works with numpy>=1.20. See https://github.com/numba/numba/issues/7339
# RUN pip install --no-cache-dir -U \
#     "bokeh>=2.3,<3" \
#     "imageio>=2.9,<3" \
#     "opencv-python>=4.3,<5" \
#     "plotly>=5.1,<6" \
#     "seaborn>=0.11,<1" \
#     "numba<0.54" \
#     "shap>=0.39,<1" \
#  && pip uninstall -y torch \
#  && pip install --no-cache-dir -U ${PT_TRAINING_URL}

# # install metis
# RUN rm /etc/apt/sources.list.d/* \
#   && wget -nv http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/${METIS}.tar.gz \
#   && gunzip -f ${METIS}.tar.gz \
#   && tar -xvf ${METIS}.tar \
#   && cd ${METIS} \
#   && apt-get update \
#   && make config shared=1 \
#   && make install \
#   && cd .. \
#   && rm -rf ${METIS}.tar* \
#   && rm -rf ${METIS} \
#   && rm -rf /var/lib/apt/lists/* \
#   && apt-get clean

# # Install RAPIDSMemoryManager.
# # Requires cmake>=3.14.
# RUN  wget -nv https://github.com/rapidsai/rmm/archive/v${RMM_VERSION}.tar.gz \
#   && tar -xvf v${RMM_VERSION}.tar.gz \
#   && cd rmm-${RMM_VERSION} \
#   && INSTALL_PREFIX=/usr/local ./build.sh librmm \
#   && cd .. \
#   && rm -rf v${RMM_VERSION}.tar* \
#   && rm -rf rmm-${RMM_VERSION}

# # Install SM Distributed Modelparallel binary
# RUN pip install --no-cache-dir -U ${SMD_MODEL_PARALLEL_URL}

# # Install SM Distributed DataParallel binary
# RUN SMDATAPARALLEL_PT=1 pip install --no-cache-dir ${SMDATAPARALLEL_BINARY}

# ENV LD_LIBRARY_PATH="/opt/conda/lib/python${PYTHON_SHORT_VERSION}/site-packages/smdistributed/dataparallel/lib:$LD_LIBRARY_PATH"

# WORKDIR /

# RUN chmod +x /usr/local/bin/start_with_right_hostname.sh

# RUN HOME_DIR=/root \
#  && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
#  && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
#  && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
#  && chmod +x /usr/local/bin/testOSSCompliance \
#  && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
#  && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
#  && rm -rf ${HOME_DIR}/oss_compliance* \
#  && rm -rf /tmp/tmp*

# ENTRYPOINT ["bash", "-m", "start_with_right_hostname.sh"]
# CMD ["/bin/bash"]