# Build an image that can do training and inference in SageMaker # This is a Python 2 image that uses the nginx, gunicorn, flask stack # for serving inferences in a stable way. FROM public.ecr.aws/lts/ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get -y update && apt-get install -y --no-install-recommends \ wget \ python3-pip \ python3-dev \ nginx \ ca-certificates \ && rm -rf /var/lib/apt/lists/* \ && cd /usr/local/bin \ && ln -s /usr/bin/python3 python \ && pip3 install --upgrade pip # Here we get all python packages. # There's substantial overlap between scipy and numpy that we eliminate by # linking them together. Likewise, pip leaves the install caches populated which uses # a significant amount of space. These optimizations save a fair amount of space in the # image, which reduces start up time. RUN wget https://bootstrap.pypa.io/pip/get-pip.py && python3 get-pip.py && \ pip install numpy scipy scikit-learn pandas flask gevent gunicorn && \ rm -rf /root/.cache # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE # keeps Python from writing the .pyc files which are unnecessary in this case. We also update # PATH so that the train and serve programs are found when the container is invoked. ENV PYTHONUNBUFFERED=TRUE ENV PYTHONDONTWRITEBYTECODE=TRUE ENV PATH="/opt/program:${PATH}" # Set up the program in the image COPY decision_trees /opt/program WORKDIR /opt/program