# MIT License
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject
# to  the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN  NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from urllib.parse import unquote_plus
import pandas as pd
import json
import boto3
import re
import sys
import uuid

prefix = 'intelligent-doc-demo'
# Create an SSM Client
ssm_client = boto3.client('ssm')

    # Create an S3 Client
s3_client = boto3.client('s3')

    # Create a Textract Client
textract_client = boto3.client('textract')

    # Create a Comprehend Client
comprehend_client = boto3.client('comprehend')


def get_kv_map(bucket,key):

    # process using image bytes

    response = textract_client.analyze_document(Document={'S3Object': {
            'Bucket': bucket,
            'Name': key
        }}, FeatureTypes=['FORMS'])
    print('got the image')
    # Get the text blocks
    blocks=response['Blocks']


    # get key and value maps
    key_map = {}
    value_map = {}
    block_map = {}
    for block in blocks:
        block_id = block['Id']
        block_map[block_id] = block
        if block['BlockType'] == "KEY_VALUE_SET":
            if 'KEY' in block['EntityTypes']:
                key_map[block_id] = block
            else:
                value_map[block_id] = block

    return key_map, value_map, block_map


def get_kv_relationship(key_map, value_map, block_map):
    kvs = {}
    for block_id, key_block in key_map.items():
        value_block = find_value_block(key_block, value_map)
        key = get_text(key_block, block_map)
        val = get_text(value_block, block_map)
        kvs[key] = val
    return kvs


def find_value_block(key_block, value_map):
    for relationship in key_block['Relationships']:
        if relationship['Type'] == 'VALUE':
            for value_id in relationship['Ids']:
                value_block = value_map[value_id]
    return value_block


def get_text(result, blocks_map):
    text = ''
    if 'Relationships' in result:
        for relationship in result['Relationships']:
            if relationship['Type'] == 'CHILD':
                for child_id in relationship['Ids']:
                    word = blocks_map[child_id]
                    if word['BlockType'] == 'WORD':
                        text += word['Text'] + ' '
                    if word['BlockType'] == 'SELECTION_ELEMENT':
                        if word['SelectionStatus'] == 'SELECTED':
                            text += 'X '


    return text

def prepareForInference(bucket,kvs):
    df = pd.DataFrame(kvs.items())
    print('got the dataframe')
    df.columns = ['key','value']
    df1 = df[df.value != '']
    df1 = df1.reset_index(drop=True)
    df1 = df1.drop(df1.index[1:7])
    df1 = df1.reset_index(drop=True)
    df1 = df1.drop(df1.index[4])
    df1 = df1.reset_index(drop=True)
    df1 = df1.drop(df1.index[9:12])
    df1 = df1.reset_index(drop=True)
    df1 = df1.drop(df1.index[5:7])
    df1 = df1.reset_index(drop=True)
    df_T = df1.T
    print("in the middle of all the dataframe operations")
    df_T.columns = df_T.iloc[0]
    df_T = df_T.reset_index(drop=True)
    df_T = df_T.drop([0])
    df_T = df_T.reset_index(drop=True)
    df_T = df_T.rename(columns={"Name (First, Middle, Last, Suffix) ": "Name", "Date of Birth (mm/dd/yyyy) ": "Date of Birth"})
    df_T.columns = df_T.columns.str.rstrip()
    data_key = prefix+"/cer-input/"+str(uuid.uuid4())+'laminference.csv'
    entry = ''
    for idx, row in df_T.iterrows():
        entry += 'Country'+':'+str(row['Country']).strip()+" "+'Years'+':'+str(row['Years']).strip()+" "+'Cell Phone'+':'+str(row['Cell Phone']).strip()+" "+'Name'+':'+str(row['Name']).strip()+" "+'Social Security Number'+':'+str(row['Social Security Number']).strip()+" "+'TOTAL $'+':'+str(row['TOTAL $']).strip()+" "+'Date of Birth'+':'+str(row['Date of Birth']).strip()+'\n'
    print('done done and done')
    # Store Processed Data in S3 Bucket
    processed_textract_data_response = s3_client.put_object(
        Bucket=bucket,
        Key=data_key,
        Body=json.dumps(entry)
    )
    return data_key

def lambda_handler(event, context):


    # Get the Custom Entity Recognizer's ARN from SSM Parameter Store
    comprehend_parameters = ssm_client.get_parameters(Names=['CustomEntityRecognizerARN-TCA2I',
                                                             'ComprehendExecutionRole-TCA2I', 'S3OutBucketName-TCA2I'],
                                                      WithDecryption=True)

    for parameter in comprehend_parameters['Parameters']:
        if parameter['Name'] == 'CustomEntityRecognizerARN-TCA2I':
            custom_recognizer_arn = parameter['Value']
        elif parameter['Name'] == 'ComprehendExecutionRole-TCA2I':
            comprehend_execution_role_arn = parameter['Value']
        elif parameter['Name'] == 'S3OutBucketName-TCA2I':
            out_bucket = parameter['Value']

    # Iterate over all S3 Put records that have been passed to this lambda function.
    for record in event['Records']:
        bucket = record['s3']['bucket']['name']
        key = unquote_plus(record['s3']['object']['key'])

        # get key value pairs from the uploaded input document
        key_map, value_map, block_map = get_kv_map(bucket,key)
        # Get Key Value relationship
        kvs = get_kv_relationship(key_map, value_map, block_map)
        print('got the key value pairs: ' + str(kvs))
        # Transform using a dataframe, create CSV file and load to S3 for Comprehend Inference as a next step
        inkey = prepareForInference(bucket,kvs)
        print('uploaded the inference file')

    # Start the Custom Entity Recognition Job
    response = comprehend_client.start_entities_detection_job(
        InputDataConfig={
            'S3Uri': 's3://' + bucket + '/' + inkey,
            'InputFormat': 'ONE_DOC_PER_LINE'
        },
        OutputDataConfig={
            'S3Uri': 's3://' + out_bucket + '/'+ prefix+'/cer-output/'
        },
        DataAccessRoleArn=comprehend_execution_role_arn,
        JobName=  str(uuid.uuid4()) + '-TextractComprehendA2I',
        EntityRecognizerArn=custom_recognizer_arn,
        LanguageCode='en'
    )
    print("Custom Entity Detection Job Started")

    return {
        'statusCode': 200,
        'body': json.dumps('I did what I was asked to do!')
    }