import json
from textractcaller import call_textract
from yattag import Doc, indent

def resultsParser(result):
    if result["DocumentMetadata"]["Pages"] > 1:
        return multiplePageParse(result)
    else:
        return singlePageParse(result)    

def multiplePageParse(result):
    result_data = {}
    for block in result["Blocks"]:
        if block["BlockType"] == "PAGE":
            result_data[block["Page"]] = {}
            for line in block["Relationships"][0]["Ids"]:
                result_data[block["Page"]][line] = {}
        
        elif block["BlockType"] == "LINE":
            result_data[block["Page"]][block["Id"]] = {
                "BlockType": block["BlockType"],
                "Confidence": block["Confidence"],
                "Text": block["Text"],
                "BoundingBox": {
                    "Width": block["Geometry"]["BoundingBox"]["Width"],
                    "Height": block["Geometry"]["BoundingBox"]["Height"],
                    "Left": block["Geometry"]["BoundingBox"]["Left"],
                    "Top": block["Geometry"]["BoundingBox"]["Top"],
                },
                "Polygon": [
                    {
                        "X": block["Geometry"]["Polygon"][0]["X"],
                        "Y": block["Geometry"]["Polygon"][0]["Y"]
                    },
                    {
                        "X": block["Geometry"]["Polygon"][1]["X"],
                        "Y": block["Geometry"]["Polygon"][1]["Y"]
                    },
                    {
                        "X": block["Geometry"]["Polygon"][2]["X"],
                        "Y": block["Geometry"]["Polygon"][2]["Y"]
                    },
                    {
                        "X": block["Geometry"]["Polygon"][3]["X"],
                        "Y": block["Geometry"]["Polygon"][3]["Y"]
                    }
                ],
                "Words": {}
            }
            for word in block["Relationships"][0]["Ids"]:
                    for wordblock in result["Blocks"]:
                        if wordblock["Id"] == word:
                            result_data[block["Page"]][block["Id"]]["Words"][word]={
                        "BlockType": wordblock["BlockType"],
                        "Confidence": wordblock["Confidence"],
                        "Text": wordblock["Text"],
                        "TextType": wordblock["TextType"],
                        "BoundingBox": {
                            "Width": wordblock["Geometry"]["BoundingBox"]["Width"],
                            "Height": wordblock["Geometry"]["BoundingBox"]["Height"],
                            "Left": wordblock["Geometry"]["BoundingBox"]["Left"],
                            "Top": wordblock["Geometry"]["BoundingBox"]["Top"],
                        },
                        "Polygon": [
                            {
                                "X": wordblock["Geometry"]["Polygon"][0]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][0]["Y"]
                             },
                            {
                                "X": wordblock["Geometry"]["Polygon"][1]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][1]["Y"]
                            },
                            {
                                "X": wordblock["Geometry"]["Polygon"][2]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][2]["Y"]
                            },
                            {
                                "X": wordblock["Geometry"]["Polygon"][3]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][3]["Y"]
                            }
                        ]
                    }
                
    return printHTML(result_data)

def singlePageParse(result):
    result_data = {}
    result_data[1] = {}
    for block in result["Blocks"]:        
        if block["BlockType"] == "LINE":
            result_data[1][block["Id"]] = {
                "BlockType": block["BlockType"],
                "Confidence": block["Confidence"],
                "Text": block["Text"],
                "BoundingBox": {
                    "Width": block["Geometry"]["BoundingBox"]["Width"],
                    "Height": block["Geometry"]["BoundingBox"]["Height"],
                    "Left": block["Geometry"]["BoundingBox"]["Left"],
                    "Top": block["Geometry"]["BoundingBox"]["Top"],
                },
                "Polygon": [
                    {
                        "X": block["Geometry"]["Polygon"][0]["X"],
                        "Y": block["Geometry"]["Polygon"][0]["Y"]
                    },
                    {
                        "X": block["Geometry"]["Polygon"][1]["X"],
                        "Y": block["Geometry"]["Polygon"][1]["Y"]
                    },
                    {
                        "X": block["Geometry"]["Polygon"][2]["X"],
                        "Y": block["Geometry"]["Polygon"][2]["Y"]
                    },
                    {
                        "X": block["Geometry"]["Polygon"][3]["X"],
                        "Y": block["Geometry"]["Polygon"][3]["Y"]
                    }
                ],
                "Words": {}
            }
            for word in block["Relationships"][0]["Ids"]:
                    for wordblock in result["Blocks"]:
                        if wordblock["Id"] == word:
                            result_data[1][block["Id"]]["Words"][word]={
                        "BlockType": wordblock["BlockType"],
                        "Confidence": wordblock["Confidence"],
                        "Text": wordblock["Text"],
                        "TextType": wordblock["TextType"],
                        "BoundingBox": {
                            "Width": wordblock["Geometry"]["BoundingBox"]["Width"],
                            "Height": wordblock["Geometry"]["BoundingBox"]["Height"],
                            "Left": wordblock["Geometry"]["BoundingBox"]["Left"],
                            "Top": wordblock["Geometry"]["BoundingBox"]["Top"],
                        },
                        "Polygon": [
                            {
                                "X": wordblock["Geometry"]["Polygon"][0]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][0]["Y"]
                             },
                            {
                                "X": wordblock["Geometry"]["Polygon"][1]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][1]["Y"]
                            },
                            {
                                "X": wordblock["Geometry"]["Polygon"][2]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][2]["Y"]
                            },
                            {
                                "X": wordblock["Geometry"]["Polygon"][3]["X"],
                                "Y": wordblock["Geometry"]["Polygon"][3]["Y"]
                            }
                        ]
                    }
                
    return printHTML(result_data)

def printHTML(result_data):
    with tag('html'):
        with tag('body'):
            for page in result_data:
                with tag('div', klass="ocr_page", id="page_{}".format(page)):
                    for line in result_data[page]:
                        with tag('div', ('title', 'bbox '
                                                  + str(int(result_data[page][line]["BoundingBox"]["Width"]*1000))
                                                  +' '+ str(int(result_data[page][line]["BoundingBox"]["Height"]*1000))
                                                  +' '+ str(int(result_data[page][line]["BoundingBox"]["Left"]*1000))
                                                  +' '+ str(int(result_data[page][line]["BoundingBox"]["Top"]*1000))
                                                  + '; x_wconf '+ str(int(result_data[page][line]["Confidence"]))
                                         ), klass='ocr_line'):
                            for word in result_data[page][line]["Words"]:
                                with tag('span', ('title', 'bbox '
                                                          + str(int(result_data[page][line]["Words"][word]["BoundingBox"]["Width"]*1000))
                                                          + ' ' + str(int(result_data[page][line]["Words"][word]["BoundingBox"]["Height"]*1000))
                                                          + ' ' + str(int(result_data[page][line]["Words"][word]["BoundingBox"]["Left"]*1000))
                                                          + ' ' + str(int(result_data[page][line]["Words"][word]["BoundingBox"]["Top"]*1000))
                                                           + '; x_wconf ' + str(int(result_data[page][line]["Words"][word]["Confidence"]))
                                                  ), klass='ocrx_word'):
                                    text(result_data[page][line]["Words"][word]["Text"]+' ')

    return doc


if __name__ == '__main__':
	input_document_url = "s3://" # S3 Location or Local Location (only for images)
	document_name = input_document_url.split("/")[-1].split(".")[0]
	print("Calling Textract...")
	textract_json = call_textract(input_document=input_document_url)
	print("Processing Textract Results...")
	doc, tag, text = Doc().tagtext()
	doc = resultsParser(textract_json)
	with open('{}.html'.format(document_name), 'w') as f:
		print(indent(doc.getvalue()), file=f)
	f.close()
	print("Results printed out {}.html".format(document_name))