from textractcaller import call_textract, call_textract_analyzeid, QueriesConfig, Query, get_full_json_from_output_config, get_full_json, call_textract_lending, get_full_json_lending
from textractcaller.t_call import OutputConfig, Textract_Features, call_textract_expense, remove_none
from trp import Document
import trp.trp2 as t2
import trp.trp2_analyzeid as t2id
import pytest
import logging
import os
import boto3
import json


def test_get_full_json_from_file_and_bytes(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(SCRIPT_DIR, "data/employmentapp.png")
    with open(input_file, "rb") as sample_file:
        b = bytearray(sample_file.read())
        j = call_textract(input_document=b)
        assert j
        doc = Document(j)
        assert doc

    with open(input_file, "rb") as sample_file:
        b = sample_file.read()
        j = call_textract(input_document=b)
        assert j
        doc = Document(j)
        assert doc


def test_tiff_sync(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(SCRIPT_DIR, "data/employmentapp.tiff")
    j = call_textract(input_document=input_file)
    assert j
    assert 'Blocks' in j
    assert len(j['Blocks']) == 103
    doc = Document(j)
    assert doc


def test_tiff_async(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = os.path.join("s3://amazon-textract-public-content/blogs/employmentapp_20210510_compressed.tiff")
    j = call_textract(input_document=input_file, force_async_api=True, boto3_textract_client=textract_client)
    assert j
    assert 'Blocks' in j
    assert len(j['Blocks']) == 103
    doc = Document(j)
    assert doc


def test_tiff_async_multipage(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = os.path.join("s3://amazon-textract-public-content/blogs/multipage_tiff_example_small.tiff")
    j = call_textract(input_document=input_file, force_async_api=True, boto3_textract_client=textract_client)
    assert j
    assert 'Blocks' in j
    assert len(j['Blocks']) == 260
    doc = Document(j)
    assert doc


def test_tiff_async_multipage_with_output_config(caplog):    #
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = os.path.join("s3://amazon-textract-public-content/blogs/multipage_tiff_example_small.tiff")
    output_config = OutputConfig(s3_bucket="sdx-objects-us-east-2", s3_prefix="test/outputconfig")
    print(output_config.get_dict())
    j = call_textract(input_document=input_file,
                      force_async_api=True,
                      output_config=output_config,
                      boto3_textract_client=textract_client,
                      return_job_id=True)
    print(j['JobId'])
    # this is just to wait for the job to finish
    get_full_json(job_id=j['JobId'], boto3_textract_client=textract_client)

    textract_json = get_full_json_from_output_config(output_config=output_config, job_id=j['JobId'])

    assert textract_json


# multipage not supported on sync
def test_tiff_sync_multipage(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = os.path.join("s3://amazon-textract-public-content/blogs/multipage_tiff_example_small.tiff")
    with pytest.raises(textract_client.exceptions.UnsupportedDocumentException):
        call_textract(input_document=input_file, boto3_textract_client=textract_client)


def test_tiff_compressed_sync(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(SCRIPT_DIR, "data/employmentapp.tiff")
    j = call_textract(input_document=input_file)
    assert j
    assert 'Blocks' in j
    assert len(j['Blocks']) == 103
    doc = Document(j)
    assert doc

    with open(input_file, "rb") as sample_file:
        b = bytearray(sample_file.read())
        j = call_textract(input_document=b)
        assert j
        assert 'Blocks' in j
        assert len(j['Blocks']) == 103
        doc = Document(j)
        assert doc


def test_s3_sync_call(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = "s3://amazon-textract-public-content/blogs/amazon-textract-sample-text-amazon-dot-com.png"
    j = call_textract(input_document=input_file, boto3_textract_client=textract_client)
    assert j
    assert 'Blocks' in j
    doc = Document(j)
    assert doc


def test_analyzeid(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(SCRIPT_DIR, "data/driverlicense.png")

    # photo from S3
    textract_client = boto3.client('textract', region_name='us-east-2')
    j = call_textract_analyzeid(document_pages=["s3://amazon-textract-public-content/analyzeid/driverlicense.png"],
                                boto3_textract_client=textract_client)
    assert j
    assert 'DocumentMetadata' in j
    assert 'IdentityDocuments' in j
    assert 'IdentityDocumentFields' in j['IdentityDocuments'][0]
    assert len(j['IdentityDocuments'][0]['IdentityDocumentFields']) == 21
    doc: t2id.TAnalyzeIdDocument = t2id.TAnalyzeIdDocumentSchema().load(j)    #type: ignore
    assert doc

    # photo from local disk
    with open(input_file, "rb") as sample_file:
        b = bytearray(sample_file.read())
        j = call_textract_analyzeid(document_pages=[b])
        assert j
        assert 'DocumentMetadata' in j
        assert 'IdentityDocuments' in j
        assert 'IdentityDocumentFields' in j['IdentityDocuments'][0]
        assert len(j['IdentityDocuments'][0]['IdentityDocumentFields']) == 21
        doc: t2id.TAnalyzeIdDocument = t2id.TAnalyzeIdDocumentSchema().load(j)    #type: ignore
        assert doc


def test_queries(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    queries_config = QueriesConfig(queries=[])
    assert not queries_config.get_dict()
    query1 = Query(text="What is the applicant full name?")
    query2 = Query(text="What is the applicant phone number?", alias="PHONE_NUMBER")
    query3 = Query(text="What is the applicant home address?", alias="HOME_ADDRESS", pages=["1"])
    queries_config = QueriesConfig(queries=[query1, query2, query3])

    textract_client = boto3.client('textract', region_name='us-east-2')
    j = call_textract(input_document="s3://amazon-textract-public-content/blogs/employeeapp20210510.png",
                      boto3_textract_client=textract_client,
                      features=[Textract_Features.QUERIES],
                      queries_config=queries_config)
    assert j
    tdoc: t2.TDocument = t2.TDocumentSchema().load(j)    #type: ignore
    assert tdoc
    page = tdoc.pages[0]
    query_answers = tdoc.get_query_answers(page=page)
    assert len(query_answers) == 3


def test_empty_features_and_queries(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    j = call_textract(input_document="s3://amazon-textract-public-content/blogs/employeeapp20210510.png",
                      boto3_textract_client=textract_client,
                      features=[])
    assert j


def test_expense_get_full_json_from_file_and_bytes(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(SCRIPT_DIR, "data/employmentapp.png")
    with open(input_file, "rb") as sample_file:
        b = bytearray(sample_file.read())
        j = call_textract_expense(input_document=b)
        assert j

    with open(input_file, "rb") as sample_file:
        b = sample_file.read()
        j = call_textract_expense(input_document=b)
        assert j


def test_expense_tiff_async(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = os.path.join("s3://amazon-textract-public-content/blogs/employmentapp_20210510_compressed.tiff")
    j = call_textract_expense(input_document=input_file, force_async_api=True, boto3_textract_client=textract_client)
    assert j
    assert 'ExpenseDocuments' in j


def test_expense_tiff_async_multipage(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    textract_client = boto3.client('textract', region_name='us-east-2')
    input_file = os.path.join("s3://amazon-textract-public-content/blogs/multipage_tiff_example_small.tiff")
    j = call_textract_expense(input_document=input_file, force_async_api=True, boto3_textract_client=textract_client)
    assert j
    assert 'ExpenseDocuments' in j


def test_filter_out_none_from_output_config(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(SCRIPT_DIR, "data/json_from_python_repl.json")
    j = dict(json.load(open(input_file)))
    assert j['Blocks'][0]["BlockType"] == "PAGE"
    assert j['Blocks'][0]["ColumnIndex"] == None
    j = remove_none(j)
    assert j
    assert 'Blocks' in j and j['Blocks'][0]["BlockType"] == "PAGE"
    assert not "ColumnIndex" in j['Blocks'][0]


def test_lending(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    input_file = "s3://sdx-textract-us-east-1/lending-package.pdf"
    textract_client = boto3.client('textract', region_name='us-east-1')
    j = call_textract_lending(input_document=input_file, boto3_textract_client=textract_client, return_job_id=True)
    assert j
    textract_json = get_full_json_lending(job_id=j['JobId'], boto3_textract_client=textract_client)
    assert textract_json


def test_signature(caplog):
    caplog.set_level(logging.DEBUG, logger="textractcaller")
    input_file = "s3://amazon-textract-public-content/blogs/signature/verification-of-employment.png"
    textract_client = boto3.client('textract', region_name='us-east-2')
    j = call_textract(input_document=input_file,
                      features=[Textract_Features.FORMS, Textract_Features.SIGNATURES],
                      boto3_textract_client=textract_client,
                      return_job_id=True)
    assert j


# def test_lending_output_config(caplog):
#     caplog.set_level(logging.DEBUG, logger="textractcaller")
#     input_file = "s3://sdx-textract-us-east-1/lending-package.pdf"
#     output_config = OutputConfig(s3_bucket="sdx-objects-us-east-1", s3_prefix="test/outputconfig")
#     textract_client = boto3.client('textract', region_name='us-east-1')
#     s3_client = boto3.client('s3', region_name='us-east-1')
#     j = call_textract_lending(input_document=input_file,
#                               boto3_textract_client=textract_client,
#                               output_config=output_config,
#                               return_job_id=True)
#     assert j
#     # this is just to wait till objects are in S3
#     textract_json = get_full_json_lending(job_id=j['JobId'], boto3_textract_client=textract_client)

#     textract_json = get_full_json_lending_from_output_config(output_config=output_config,
#                                                              job_id=j['JobId'],
#                                                              s3_client=s3_client)
#     assert textract_json
#     json.dump(textract_json, open("lending-doc-output_from_output_config.json", "w"))