FROM public.ecr.aws/lambda/python:3.10 RUN yum update -y && yum install -y wget tar gzip \ cairo cups-libs libSM libGLU dbus-glib \ libXinerama.x86_64 java-1.8.0-openjdk-devel && \ yum clean all RUN wget -O - "http://downloadarchive.documentfoundation.org/libreoffice/old/7.5.3.2/rpm/x86_64/LibreOffice_7.5.3.2_Linux_x86-64_rpm.tar.gz" | tar -xvzf - RUN yum localinstall -y LibreOffice_*/RPMS/*.rpm && \ rm -rf LibreOffice_* ENV PATH="$PATH:/opt/libreoffice7.5/program" ENV HOME=/tmp ENV NLTK_DATA=/home/sbx_user1051/nltk_data COPY requirements.txt ./ RUN pip install --upgrade pip RUN pip install -r requirements.txt # Download required NLTK data for langchain text processing RUN python -c "import nltk;nltk.download('punkt', download_dir='/home/sbx_user1051/nltk_data')" RUN python -c "import nltk;nltk.download('averaged_perceptron_tagger', download_dir='/home/sbx_user1051/nltk_data')" COPY . ./ RUN touch /tmp/test.txt \ && cd /tmp \ && soffice --headless --invisible --nodefault --view \ --nolockcheck --nologo --norestore --convert-to pdf \ --outdir /tmp /tmp/test.txt \ && rm /tmp/test.* CMD ["index.lambda_handler"]