import json import boto3 import uuid import os import time def lambda_handler(event, context): # Submits the files that post-split to Comprehend comprehend = boto3.client("comprehend") recognizer_arn = os.environ['recognizer_arn'] data_access_arn = os.environ['data_access_arn'] input_bucket = event['Records'][0]['s3']['bucket']['name'] input_key = event['Records'][0]['s3']['object']['key'] output_s3_uri = "s3://" + input_bucket + "/model-output/" input_s3_uri = "s3://" + input_bucket +'/'+ input_key print(input_s3_uri) time.sleep(60) comp_submit_response = comprehend.start_entities_detection_job( EntityRecognizerArn=recognizer_arn, JobName="Daily-Drilling-Detection-Job-Name-{}".format(str(uuid.uuid4())), LanguageCode="en", DataAccessRoleArn=data_access_arn, InputDataConfig={ "InputFormat": "ONE_DOC_PER_LINE", "S3Uri": input_s3_uri }, OutputDataConfig={ "S3Uri": output_s3_uri } ) return { 'statusCode': 200, 'job-id': comp_submit_response['JobId'] }