import os
import PIL
import unittest
import boto3
import uuid
import logging
from tests.utils import get_fixture_path

from textractor import Textractor
from textractor.entities.document import Document
from textractor.entities.lazy_document import LazyDocument
from textractor.data.constants import TextractFeatures
from textractor.exceptions import InvalidProfileNameError, S3FilePathMissing
from textractor.utils.s3_utils import upload_to_s3, delete_from_s3


class TestTextractor(unittest.TestCase):
    def setUp(self):
        # insert credentials and filepaths here to run test
        self.profile_name = "default"
        self.bucket_name = os.environ.get("S3_BUCKET", "textractor-tests")
        if os.environ.get("CALL_TEXTRACT"):
            self.s3_client = boto3.session.Session(
                profile_name=self.profile_name
            ).client("s3", region_name="us-west-2")

            self.current_directory = os.path.abspath(os.path.dirname(__file__))
            for asset in ["single-page-1.png", "textractor-multipage-doc.pdf"]:
                upload_to_s3(self.s3_client, f"s3://{self.bucket_name}/{asset}", os.path.join(self.current_directory, f"fixtures/{asset}"))

            self.s3_image_file = f"s3://{self.bucket_name}/single-page-1.png"
            self.s3_multipage_pdf_file = f"s3://{self.bucket_name}/textractor-multipage-doc.pdf"
            self.s3_output_path = f"s3://{self.bucket_name}/output"
            self.s3_upload_path = f"s3://{self.bucket_name}/upload"
            self.image_1 = PIL.Image.open(os.path.join(self.current_directory, "fixtures/single-page-1.png"))
            self.image_2 = PIL.Image.open(os.path.join(self.current_directory, "fixtures/single-page-2.png"))

            if self.profile_name is None:
                raise InvalidProfileNameError(
                    "Textractor could not be initialized. Populate profile_name with a valid input in tests/test_textractor.py."
                )
            self.extractor = Textractor(
                profile_name=self.profile_name, kms_key_id=""
            )

    def test_detect_document_text(self):
        # Testing local single image input
        if os.environ.get("CALL_TEXTRACT"):
            document = self.extractor.detect_document_text(
                file_source=os.path.join(self.current_directory, "fixtures/single-page-1.png"),
            )
        else:
            document = Document.open(get_fixture_path())

        self.assertIsInstance(document, Document)
        self.assertEqual(len(document.pages), 1)

    def test_detect_document_text_single_page_pdf_input(self):
        # Testing single page pdf input
        if os.environ.get("CALL_TEXTRACT"):
            document = self.extractor.detect_document_text(
                file_source=os.path.join(self.current_directory, "fixtures/textractor-singlepage-doc.pdf"),
                save_image=False,
            )
        else:
            document = Document.open(get_fixture_path())

        self.assertIsInstance(document, Document)
        self.assertIs(document.pages[0].image, None)

    def test_detect_document_text_list_PIL_images(self):
        # Testing list of PIL image input
        if os.environ.get("CALL_TEXTRACT"):
            document = self.extractor.detect_document_text(
                file_source=[self.image_1],
            )
        else:
            document = Document.open(get_fixture_path())

        self.assertIsInstance(document, Document)
        self.assertEqual(len(document.pages), 1)

    @unittest.skipIf(not os.environ.get("CALL_TEXTRACT"), "Asynchronous requests can't be processed without calling Textract")
    def test_textractor_pil_image_input(self):
        # Test PIL image input
        image_1 = PIL.Image.open(os.path.join(self.current_directory, "fixtures/single-page-1.png"))
        document = self.extractor.detect_document_text(
            file_source=image_1,
        )

    def test_textractor_s3_image_input(self):
        # Test S3 location path input
        if os.environ.get("CALL_TEXTRACT"):
            if self.s3_image_file is None:
                raise S3FilePathMissing(
                    "S3 URI needed to run test. Populate s3_image_file with a valid input in tests/test_textractor.py."
                )
            document = self.extractor.detect_document_text(
                file_source=self.s3_image_file,
            )
        else:
            document = Document.open(get_fixture_path())

        self.assertIsInstance(document, Document)

    @unittest.skipIf(not os.environ.get("CALL_TEXTRACT"), "Asynchronous requests can't be processed without calling Textract")
    def test_textractor_start_document_text_detection(self):
        # Testing start_document_text_detection() with local multipage pdf input
        document = self.extractor.start_document_text_detection(
            file_source=os.path.join(self.current_directory, "fixtures/textractor-multipage-doc.pdf"),
            s3_output_path=self.s3_output_path,
            s3_upload_path=self.s3_upload_path,
        )

        self.assertEqual(len(document.pages), 2)
        self.assertIsInstance(document, LazyDocument)

    def test_textractor_analyze_document(self):
        # Testing analyze_document() with local single image input
        if os.environ.get("CALL_TEXTRACT"):
            document = self.extractor.analyze_document(
                file_source=os.path.join(self.current_directory, "fixtures/single-page-1.png"),
                features=[TextractFeatures.TABLES, TextractFeatures.FORMS],
            )
        else:
            document = Document.open(get_fixture_path())

        self.assertEqual(len(document.pages), 1)
        self.assertIsInstance(document, Document)

    def test_textractor_analyze_document_local_pillow_image(self):
        # Testing analyze_document() with local PIL image input
        if os.environ.get("CALL_TEXTRACT"):
            document = self.extractor.analyze_document(
                file_source=self.image_1,
                features=[TextractFeatures.TABLES, TextractFeatures.FORMS],
            )
        else:
            document = Document.open(get_fixture_path())
        
        self.assertEqual(len(document.pages), 1)
        self.assertIsInstance(document, Document)

    def test_textractor_analyze_document_pillow_image_list(self):
        # Testing analyze_document() with local single image input
        if os.environ.get("CALL_TEXTRACT"):
            document = self.extractor.analyze_document(
                file_source=[self.image_1],
                features=[TextractFeatures.TABLES, TextractFeatures.FORMS],
                save_image=True,
            )
        else:
            document = Document.open(get_fixture_path())
        
        self.assertEqual(len(document.pages), 1)
        self.assertIsInstance(document, Document)

    @unittest.skipIf(not os.environ.get("CALL_TEXTRACT"), "Asynchronous requests can't be processed without calling Textract")
    def test_textractor_analyze_document_multipage_pdf(self):
        # Testing start_document_analysis() with local multipage pdf input
        document = self.extractor.start_document_analysis(
            file_source=os.path.join(self.current_directory, "fixtures/textractor-multipage-doc.pdf"),
            features=[TextractFeatures.TABLES, TextractFeatures.FORMS],
            s3_output_path=self.s3_output_path,
            s3_upload_path=self.s3_upload_path,
        )
        
        self.assertIsInstance(document, LazyDocument)
        self.assertEqual(len(document.pages), 2)

    @unittest.skipIf(not os.environ.get("CALL_TEXTRACT"), "Asynchronous requests can't be processed without calling Textract")
    def test_textractor_start_document_text_detection_multipage_pdf_s3(self):
        # Testing start_document_text_detection() with s3 multipage pdf input
        document = self.extractor.start_document_text_detection(
            file_source=self.s3_multipage_pdf_file,
            s3_output_path=self.s3_output_path,
        )
        
        self.assertEqual(len(document.pages), 2)
        self.assertIsInstance(document, LazyDocument)

    @unittest.skipIf(not os.environ.get("CALL_TEXTRACT"), "Asynchronous requests can't be processed without calling Textract")
    def test_textractor_start_document_analysis_multipage_pdf_s3(self):
        # Testing start_document_analysis() with s3 multipage pdf input
        document = self.extractor.start_document_analysis(
            file_source=self.s3_multipage_pdf_file,
            features=[TextractFeatures.TABLES, TextractFeatures.FORMS],
            s3_output_path=self.s3_output_path,
        )
        
        self.assertIsInstance(document, LazyDocument)
        self.assertEqual(len(document.pages), 2)

    @unittest.skipIf(not os.environ.get("CALL_TEXTRACT"), "Asynchronous requests can't be processed without calling Textract")
    def test_textractor_start_document_analysis(self):
        # Testing start_document_analysis() with local PIL Image input
        document = self.extractor.start_document_analysis(
            file_source=self.image_1,
            features=[TextractFeatures.TABLES, TextractFeatures.FORMS],
            s3_output_path=self.s3_output_path,
            s3_upload_path=self.s3_upload_path,
        )
        
        self.assertIsInstance(document, LazyDocument)
        self.assertEqual(len(document.pages), 1)