#!/usr/bin/env python3

import argparse
import os
import io
import boto3
import glob
import json
import sys
from typing import List
from textractcaller.t_call import Textract_Features, Textract_Types, call_textract, is_tiff
from textractoverlayer.t_overlay import DocumentDimensions, get_bounding_boxes
from textractprettyprinter.t_pretty_print import Pretty_Print_Table_Format, Textract_Pretty_Print, get_string
from textractoverlayer.image_tools import get_filename_from_document
from textracthelper._version import __version__
from PIL import Image, ImageSequence
import textracthelper._version
import logging
import trp.trp2 as t2

logger = logging.getLogger(__name__)

parser = argparse.ArgumentParser()
input_doc_or_example_or_stdin = parser.add_mutually_exclusive_group(required=True)
input_doc_or_example_or_stdin.add_argument("--input-document", help="s3 object (s3://) or file from local filesystem")
input_doc_or_example_or_stdin.add_argument("--input-glob", help="local filesystem glob to go through many files")

input_doc_or_example_or_stdin.add_argument("--example",
                                           dest='use_example',
                                           action='store_true',
                                           help="using the example document to call Textract")
input_doc_or_example_or_stdin.set_defaults(use_example=False)
input_doc_or_example_or_stdin.add_argument("--stdin",
                                           dest='inputfromstdin',
                                           action='store_true',
                                           help="receive JSON from stdin")
input_doc_or_example_or_stdin.set_defaults(inputfromstdin=False)
parser.add_argument(
    "--overlay-document",
    help=
    "point to overlay document when input is --stdin and overlay is requested. Similar to --input-document could be on s3:// or local file",
    type=str)
parser.add_argument(
    "--features",
    nargs='+',
    type=str,
    choices=["FORMS", "TABLES"],
    help="features to call Textract with. Will trigger call to AnalyzeDocument instead of DetectDocumentText")
parser.add_argument("--pretty-print", nargs='+', type=str, choices=["WORDS", "LINES", "FORMS", "TABLES"], help="")
parser.add_argument("--pretty-print-table-format",
                    type=str,
                    choices=[
                        "csv", "plain", "simple", "github", "grid", "fancy_grid", "pipe", "orgtbl", "jira", "presto",
                        "pretty", "psql", "rst", "mediawiki", "moinmoin", "youtrack", "html", "unsafehtml", "latex",
                        "latex_raw", "latex_booktabs", "latex_longtable", "textile", "tsv"
                    ],
                    default='github',
                    help="which format to output the pretty print information to. Only effects FORMS and TABLES")
parser.add_argument("--overlay",
                    nargs='+',
                    type=str,
                    choices=["WORD", "LINE", "FORM", "KEY", "VALUE", "TABLE", "CELL"],
                    help="defines what bounding boxes to draw on the output")
parser.add_argument("--pop-up-overlay-output", dest='showoutput', action='store_true', help="shows image with overlay")
parser.set_defaults(showoutput=False)
parser.add_argument("--use-polygon",
                    dest='usepolygon',
                    action='store_true',
                    help="uses polygon instead of bounding boxes")
parser.set_defaults(usepolygon=False)
parser.add_argument(
    "--overlay-text",
    dest="showoverlaytext",
    action="store_true",
    help=
    "shows image with WORD or LINE text overlay. When both WORD and LINE overlay are specified, WORD text will be overlayed"
)
parser.set_defaults(showoverlaytext=False)
parser.add_argument(
    "--overlay-confidence",
    dest='showconfidence',
    action='store_true',
    # TODO: Default to showing WORD confidence if --overlay-text not specified?
    help="shows image with confidence overlay. Only supported for WORD or LINE")
parser.set_defaults(showconfidence=False)
parser.add_argument("--overlay-output-folder", type=str, default=None, help="output with bounding boxes to folder")
parser.add_argument("--version",
                    action='version',
                    version='%(prog)s {version}'.format(version=__version__),
                    help="print version information")
parser.add_argument("--no-stdout", dest='showstdout', action='store_false', help="no output to stdout")
parser.set_defaults(showstdout=True)
show_logs = parser.add_mutually_exclusive_group(required=False)
show_logs.add_argument("-v", dest='showinfo', action='store_true', help=">=INFO level logging output to stderr")
show_logs.set_defaults(showinfo=False)
show_logs.add_argument("-vv", dest='showdebug', action='store_true', help=">=DEBUG level logging output to stderr")
show_logs.set_defaults(showdebug=False)

args = parser.parse_args()
input_document: str = args.input_document
overlay_document: str = args.overlay_document
input_glob: str = args.input_glob
features_arg = args.features
pretty_print_arg = args.pretty_print
overlay_output_folder = args.overlay_output_folder
overlay_arg = args.overlay
showoutput = args.showoutput
usepolygon = args.usepolygon
showstdout = args.showstdout
use_example = args.use_example
showdebug = args.showdebug
showinfo = args.showinfo
use_stdin = args.inputfromstdin
show_overlay_text = args.showoverlaytext
show_overlay_confidence = args.showconfidence
pretty_print_table_format_arg = args.pretty_print_table_format

if showinfo or showdebug:
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    handler.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    logger.addHandler(handler)
    caller_logger = logging.getLogger('textractcaller')
    caller_logger.propagate = True
    caller_logger.setLevel(logging.INFO)
    caller_logger.addHandler(handler)
    printer_logger = logging.getLogger('textractprettyprinter')
    printer_logger.propagate = True
    printer_logger.setLevel(logging.INFO)
    printer_logger.addHandler(handler)
    logger.info("log level: INFO")

    if showdebug:
        handler.setLevel(logging.DEBUG)
        logger.setLevel(logging.DEBUG)
        caller_logger.setLevel(logging.DEBUG)
        printer_logger.setLevel(logging.DEBUG)
        logger.debug("current log level: DEBUG")

if use_example:
    SCRIPT_DIR = os.path.dirname(os.path.abspath(textracthelper._version.__file__))
    input_document = os.path.join(SCRIPT_DIR, "examples/employmentapp.png")

features = None
if features_arg:
    features = [Textract_Features[x] for x in features_arg]
pretty_print_table_format = Pretty_Print_Table_Format[pretty_print_table_format_arg]
logger.debug(f"pretty_print_table_format: {pretty_print_table_format}")
pretty_print = None
if pretty_print_arg:
    pretty_print = [Textract_Pretty_Print[x] for x in pretty_print_arg]

if overlay_arg:
    overlay = [Textract_Types[x] for x in overlay_arg]
else:
    overlay = list()

exit_code = 0
if input_document and overlay_document:
    print(
        "both --input-document and --overlay-document do not make sense at the moment. --overlay-document is only used with --stdin"
    )
    exit_code = 1
if not use_stdin and pretty_print_arg and "FORMS" in pretty_print_arg and (not features_arg
                                                                           or "FORMS" not in features_arg):
    print("should pretty-print FORMS but is not requested as --features")
    exit_code = 1
if not use_stdin and pretty_print_arg and "TABLES" in pretty_print_arg and (not features_arg
                                                                            or "TABLES" not in features_arg):
    print("should pretty-print TABLES but is not requested as --features")
    exit_code = 1
if not use_stdin and overlay_arg and any([x for x in ["CELL", "TABLE"] if x in overlay_arg
                                          ]) and (not features_arg or "TABLES" not in features_arg):
    print("should overlay TABLE or CELL but is not requested as --features TABLES")
    exit_code = 1
if not use_stdin and showoutput and overlay_arg and any([x for x in ["FORM", "KEY", "VALUE"] if x in overlay_arg
                                                         ]) and (not features_arg or "FORMS" not in features_arg):
    print("should overlay FORM or KEY or VALUE but FORMS not requested as --features FORMS")
    exit_code = 1
if showoutput and not overlay_arg:
    print("asked for --pop-up-overlay-output or --overlay-output-folder, but not --overlay arguments given")
    exit_code = 1
if exit_code > 0:
    exit(1)

logger.debug(f"calling Textract")
if use_stdin:
    input_documents = [json.load(sys.stdin)]
else:
    if input_glob:
        input_documents = glob.glob(input_glob)
    else:
        input_documents = [input_document]

for input_document in input_documents:
    is_s3_document: bool = False
    s3_bucket: str = ""
    s3_key: str = ""
    if len(input_document) > 7 and input_document.lower().startswith("s3://"):
        is_s3_document = True
        s3_bucket, s3_key = input_document.replace("s3://", "").split("/", 1)
        logger.debug(f"s3_bucket: {s3_bucket}, s3_key: {s3_key}")
    ext: str = ""
    # TODO: have to clean this up, too much reuse of the same variable name for different purposes
    if use_stdin:
        doc = input_document
        input_document = overlay_document
    else:
        _, ext = os.path.splitext(input_document)
        logger.debug(f"input_document: '{input_document}'")
        # check if TIFF multi-page, if so force async
        force_async = False
        if is_tiff(input_document):
            num_pages = 0
            if is_s3_document:
                s3 = boto3.client('s3')
                o = s3.get_object(Bucket=s3_bucket, Key=s3_key)
                input_bytes = o.get('Body').read()
                f = io.BytesIO(input_bytes)
            else:
                f = input_document
            img = Image.open(f)
            for i, page in enumerate(ImageSequence.Iterator(img)):
                num_pages += 1
            if num_pages > 1:
                force_async = True
                logger.warning(f"multi-page TIFF, have to do async")
        if force_async:
            doc = call_textract(input_document=input_document, features=features, force_async_api=True)
        else:
            doc = call_textract(input_document=input_document, features=features)
    logger.debug(f"receivedTextract response")
    if showstdout:
        if pretty_print:
            print(get_string(textract_json=doc, output_type=pretty_print, table_format=pretty_print_table_format))
        else:
            print(json.dumps(doc))

    logger.debug(f"overlay_output_folder or showoutput: {overlay_output_folder or showoutput}")
    logger.debug(f"usepolygon: {usepolygon}")
    if overlay_output_folder or showoutput:
        logger.debug(f"overlay")
        colors = {
            "WORD": (128, 128, 0),
            "LINE": (0, 128, 128),
            "FORM": (128, 0, 128),
            "KEY": (255, 0, 0),
            "VALUE": (0, 255, 255),
            "CELL": (255, 0, 255),
            "TABLE": (255, 255, 0)
        }
        from PIL import Image, ImageDraw, ImageFont
        if is_s3_document:
            s3 = boto3.client('s3')
            o = s3.get_object(Bucket=s3_bucket, Key=s3_key)
            input_bytes = o.get('Body').read()
            f = io.BytesIO(input_bytes)
            image: Image.Image = Image.open(f)
            file_name, suffix = os.path.splitext(os.path.basename(s3_key))
        else:
            image = Image.open(input_document)
            file_name, suffix = os.path.splitext(os.path.basename(input_document))
        rgb_im = image.convert('RGB')
        draw = ImageDraw.Draw(rgb_im)
        document_dimension: DocumentDimensions = DocumentDimensions(doc_width=image.size[0], doc_height=image.size[1])
        polygon_array: List[List[List[t2.TPoint]]] = list()
        if not usepolygon:
            bounding_box_list = get_bounding_boxes(textract_json=doc,
                                                   document_dimensions=[document_dimension],
                                                   overlay_features=overlay)
        else:
            t_doc: t2.TDocument = t2.TDocumentSchema().loads(json.dumps(doc))    # type: ignore
            for page in t_doc.pages:
                word_blocks = t_doc.get_blocks_by_type(block_type_enum=t2.TextractBlockTypes.LINE, page=page)
                # word_blocks = t_doc.get_blocks_by_type(block_type_enum=t2.TextractBlockTypes.WORD, page=page)
                polygon_page_array: List[List[t2.TPoint]] = list()
                for word in word_blocks:
                    point_1: t2.TPoint = t2.TPoint(x=word.geometry.polygon[0].x, y=word.geometry.polygon[0].y)
                    point_1.scale(doc_width=document_dimension.doc_width, doc_height=document_dimension.doc_height)
                    point_2: t2.TPoint = t2.TPoint(x=word.geometry.polygon[1].x, y=word.geometry.polygon[1].y)
                    point_2.scale(doc_width=document_dimension.doc_width, doc_height=document_dimension.doc_height)
                    point_3: t2.TPoint = t2.TPoint(x=word.geometry.polygon[2].x, y=word.geometry.polygon[2].y)
                    point_3.scale(doc_width=document_dimension.doc_width, doc_height=document_dimension.doc_height)
                    point_4: t2.TPoint = t2.TPoint(x=word.geometry.polygon[3].x, y=word.geometry.polygon[3].y)
                    point_4.scale(doc_width=document_dimension.doc_width, doc_height=document_dimension.doc_height)
                    polygon_page_array.append([point_1, point_2, point_3, point_4])

                    draw.polygon(xy=[(point_1.x, point_1.y), (point_2.x, point_2.y), (point_3.x, point_3.y),
                                     (point_4.x, point_4.y)],
                                 width=2)
                polygon_array.append(polygon_page_array)

        suppress_line_text_overlay = False
        # use WORD text overlay when both LINE and WORD overlays are specified
        if show_overlay_text and len([x for x in ['WORD', 'LINE'] if x in overlay_arg]) == 2:
            suppress_line_text_overlay = True
        if not usepolygon:
            for bbox in bounding_box_list:
                box_color = colors[bbox.box_type.name]
                draw.rectangle(xy=(bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax), outline=box_color, width=2)
                if show_overlay_text and (bbox.box_type.name == 'WORD' or
                                          (bbox.box_type.name == 'LINE' and not suppress_line_text_overlay)):
                    logger.debug(f"bbox type {bbox.box_type.name}: {bbox.text}, confidence: {bbox.confidence}")
                    overlay_text = bbox.text
                    box_height = round((bbox.ymax - bbox.ymin) * 0.8)
                    if show_overlay_confidence:
                        overlay_text += f" {bbox.confidence}%"
                    try:
                        font = ImageFont.truetype('Roboto-Regular.ttf', size=box_height)
                        draw.text(xy=(bbox.xmin, bbox.ymin - box_height),
                                  text=overlay_text,
                                  fill=box_color,
                                  align='center',
                                  font=font)
                    except:
                        # use default font
                        draw.text(xy=(bbox.xmin, bbox.ymin - box_height),
                                  text=overlay_text,
                                  fill=box_color,
                                  align='center')

        if showoutput:
            rgb_im.show()
        if overlay_output_folder:
            if not os.path.exists(overlay_output_folder):
                os.makedirs(overlay_output_folder, exist_ok=True)
            file_name, suffix = get_filename_from_document(input_document=input_document)
            output_types = "_".join(overlay_arg)
            output_file_name = os.path.join(overlay_output_folder, f"{file_name}_boxed_{output_types}_{suffix}")
            output_format = suffix[1:]
            if output_format.upper() == "JPG":
                output_format = "JPEG"
            rgb_im.save(output_file_name, output_format)