# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

import json
import boto3
import uuid
from datetime import datetime
import os

def lambda_handler(event, context):
    textract_client = boto3.client('textract')
    textract_job_details_table = boto3.resource('dynamodb').Table("textract-job-details")
    sns_arn = os.environ['SNSTOPIC']
    iam_arn = os.environ['IAMARN']

    ## Get the object name
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']


    print("bucket {} key {}".format(bucket, key))

    ## ClientRequestToken should be unique when calling textract
    unique_hash_for_client_request_token = uuid.uuid4().hex
    ## Invoke textract start analysis and store the record in DynamoDB
    textract_invocation_response = textract_client.start_document_text_detection(
        DocumentLocation={
            'S3Object': {
                'Bucket': bucket,
                'Name': key
            }
        },
        ClientRequestToken=unique_hash_for_client_request_token,
        NotificationChannel={
            'SNSTopicArn': sns_arn,
            'RoleArn': iam_arn
        })

    print("textract document text detection completed. Job id is {} and path is s://{}".format(textract_invocation_response['JobId'],
                                                                                               bucket + key))
    db_response = textract_job_details_table.put_item(
        Item={'file_path': "s3://{}/{}".format(bucket, key),
              'job_id': textract_invocation_response['JobId'],
              'job_status': 'SUBMITTED',
              'submission_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
              })

    print("record has been inserted into dynamodb table {}".format("textract-job-details"))

    return {
        'statusCode': 200,
        'body': json.dumps(db_response)
    }