### Multi-stage Docker images. See https://docs.docker.com/develop/develop-images/multistage-build/ ### Run "docker build" at the root directory of neo-ai-dlr ### Stage 0: Base image FROM public.ecr.aws/ubuntu/ubuntu:18.04 AS base ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-setuptools \ && rm -rf /var/lib/apt/lists/* \ && pip3 install wheel \ && rm -rf /root/.cache/pip ### Stage 1: Build FROM base AS builder WORKDIR /workspace ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ git \ && rm -rf /var/lib/apt/lists/* RUN apt-get install -y --reinstall build-essential COPY container/sagemaker-pytorch-inferentia-serving/ /workspace/sagemaker-pytorch-inferentia-serving RUN \ cd /workspace/sagemaker-pytorch-inferentia-serving && \ python3 setup.py bdist_wheel ### Stage 2: Run ### Stage 2-1: Runner base (everything except the APP-specific handler) FROM base AS runner_base ENV DEBIAN_FRONTEND noninteractive ENV USE_INF 1 RUN apt-get update \ && apt-get install -y --no-install-recommends software-properties-common \ && add-apt-repository ppa:openjdk-r/ppa \ && apt-get update \ && apt-get install -y --no-install-recommends \ python3-dev \ python3-venv \ build-essential \ apt-transport-https \ ca-certificates \ cmake \ curl \ emacs \ git \ jq \ libgl1-mesa-glx \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ openjdk-11-jdk \ vim \ wget \ unzip \ zlib1g-dev \ libcap-dev \ libjpeg-dev \ libpng-dev \ gpg-agent \ pciutils \ gnupg \ && echo "deb https://apt.repos.neuron.amazonaws.com bionic main" | tee /etc/apt/sources.list.d/neuron.list \ && wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - \ && apt-get update \ && apt-get install aws-neuron-tools \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /tmp/tmp* \ && apt-get clean COPY --from=builder /workspace/sagemaker-pytorch-inferentia-serving/dist/*.whl /home/model-server/ RUN pip3 install torchserve==0.5.3 \ && pip3 install --no-cache-dir "pillow>=8.3" \ && pip3 install --no-cache-dir captum \ && pip3 install --no-cache-dir numpy scipy xlrd boto3 six requests \ && pip3 install torchvision==0.11.2 --no-deps \ && pip3 install torch-model-archiver==0.5.3 \ && rm -rf /root/.cache/pip RUN pip3 install --extra-index-url=https://pip.repos.neuron.amazonaws.com --no-cache-dir torch-neuron==1.9.1.2.3.0.0 \ && rm -rf /root/.cache/pip ### Stage 2-2: Runner (APP-specific handler) FROM runner_base AS runner ENV PYTHONUNBUFFERED TRUE ENV PATH="/opt/aws/neuron/bin:${PATH}" # Disable thread pinning in TVM and Treelite ENV TVM_BIND_THREADS 0 ENV TREELITE_BIND_THREADS 0 RUN useradd -m model-server \ && mkdir -p /home/model-server/tmp \ && mkdir -p /home/model-server/model COPY container/config-inf1.properties /home/model-server/config.properties COPY container/inf1_pyt_entry.py /usr/local/bin/dockerd-entrypoint.py RUN pip3 install /home/model-server/sagemaker_*_inferentia_*.whl RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \ && chown -R model-server /home/model-server EXPOSE 8080 8081 WORKDIR /home/model-server ENV TEMP=/home/model-server/tmp ENTRYPOINT ["python3", "/usr/local/bin/dockerd-entrypoint.py"] CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--model-store /home/model-server/model"] LABEL maintainer="guas@amazon.com"