### Multi-stage Docker images. See https://docs.docker.com/develop/develop-images/multistage-build/ ### Run "docker build" at the root directory of neo-ai-dlr ### Stage 0: Base image FROM public.ecr.aws/ubuntu/ubuntu:18.04 AS base ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-setuptools \ && rm -rf /var/lib/apt/lists/* \ && pip3 install wheel \ && rm -rf /root/.cache/pip ### Stage 1: Build FROM base AS builder WORKDIR /workspace ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ git \ && rm -rf /var/lib/apt/lists/* RUN apt-get install -y --reinstall build-essential COPY container/sagemaker-pytorch-inferentia-serving/ /workspace/sagemaker-pytorch-inferentia-serving RUN \ cd /workspace/sagemaker-pytorch-inferentia-serving && \ python3 setup.py bdist_wheel ### Stage 2: Run ### Stage 2-1: Runner base (everything except the APP-specific handler) FROM base AS runner_base ENV DEBIAN_FRONTEND noninteractive ENV USE_INF 1 # python3-dev and gcc are required by multi-model-server # neuron-cc[tensorflow] needs g++ and python3-venv RUN apt-get update && \ apt-get install -y --no-install-recommends \ openjdk-8-jdk-headless \ python3-dev \ python3-venv \ gcc \ g++ \ pciutils \ gnupg \ zlib1g-dev \ libjpeg-dev \ libpng-dev \ jq \ wget && \ echo "deb https://apt.repos.neuron.amazonaws.com bionic main" | tee /etc/apt/sources.list.d/neuron.list && \ wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \ apt-get update && \ apt-get install aws-neuron-tools && \ rm -rf /var/lib/apt/lists/* COPY --from=builder /workspace/sagemaker-pytorch-inferentia-serving/dist/*.whl /home/model-server/ RUN pip3 install --pre --no-cache-dir multi-model-server==1.1.8 \ && pip3 install --no-cache-dir Pillow \ && pip3 install --no-cache-dir numpy scipy xlrd boto3 six requests \ && rm -rf /root/.cache/pip #COPY sagemaker_inference-1.5.2.dev0.tar.gz /home/model-server RUN pip3 install --upgrade --force-reinstall sagemaker-inference RUN pip3 install torchvision==0.12.0 --no-deps RUN pip3 install --extra-index-url=https://pip.repos.neuron.amazonaws.com --no-cache-dir torch-neuron==1.11.0.2.3.0.0 ### Stage 2-2: Runner (APP-specific handler) FROM runner_base AS runner ENV PYTHONUNBUFFERED TRUE ENV PATH="/opt/aws/neuron/bin:${PATH}" # Disable thread pinning in TVM and Treelite ENV TVM_BIND_THREADS 0 ENV TREELITE_BIND_THREADS 0 RUN useradd -m model-server \ && mkdir -p /home/model-server/tmp \ && mkdir -p /home/model-server/model COPY container/config-inf1.properties /home/model-server/config.properties COPY container/inf1_pyt_entry.py /usr/local/bin/dockerd-entrypoint.py RUN pip3 install /home/model-server/sagemaker_*_inferentia_*.whl RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \ && chown -R model-server /home/model-server EXPOSE 8080 8081 WORKDIR /home/model-server ENV TEMP=/home/model-server/tmp ENTRYPOINT ["python3", "/usr/local/bin/dockerd-entrypoint.py"] CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] LABEL maintainer="guas@amazon.com"