######## 1- creat Lambda function Author from scratch with runtime Python 3.8
########2- Use Lambda-S3-Glue-comprehend role
########3- Increase Timeout to 2 min
########4- change to your s3 bucke

import csv

def lambda_handler(event, context):
    ####################read first line of dataset to use it for inference###
    import boto3
    s3 =boto3.client('s3')
    account_id = boto3.client("sts").get_caller_identity()["Account"]
    bucket = '<Your Bucket name>' # change to your s3 bucke
    EntityRecognizerArn_job = "arn:aws:comprehend:us-east-1:"+account_id+":entity-recognizer/Recognizer-blog" #  replace with your Entity Recognizer Arn if needed
    DataAccessRoleArn_user  = "arn:aws:iam::"+account_id+":role/Lambda-S3-Glue-comprehend"  # replace with your IAM user created before if needed
    account_id = boto3.client("sts").get_caller_identity()["Account"]
    key = 'file_location/file_name.txt'
    obj= s3.get_object (Bucket = bucket , Key = key)
    first_line_row= obj ['Body'].read()
    first_line = first_line_row.decode("utf-8")
    first_line_list = first_line.split(",")
    bucket = first_line_list[0]
    key = first_line_list[1]
    obj= s3.get_object (Bucket = bucket , Key = key)
    data = obj['Body'].read().decode('utf-8').splitlines()
    lines = csv.reader(data)
    headers = next(lines)
    first_line_list = next(lines)

    # write to an in-memory raw connection

    with open("/tmp/csv_file.csv", 'w') as file:
         writer = csv.writer(file)
         writer.writerow(first_line_list)

    # upload file from tmp to s3 key
    s3_resource = boto3.resource('s3')
    bucket_object = s3_resource.Bucket(bucket)
    key_output ='row-data-out/row-data-out.csv'
    bucket_object.upload_file('/tmp/csv_file.csv', key_output)
    S3Uri_file = "s3://"+bucket+"/"+key_output
    S3Uri_out = "s3://"+bucket+"/"+"comprehend_output"
    #####################Comprehend Custom create Custom entity  #################################\

    import boto3
    import uuid
    comprehend = boto3.client("comprehend")
    response = comprehend.start_entities_detection_job(
    EntityRecognizerArn=EntityRecognizerArn_job,
    JobName="Detection-Job-Name-{}".format(str(uuid.uuid4())),
    LanguageCode="en",
    DataAccessRoleArn=DataAccessRoleArn_user,
    InputDataConfig={
        "InputFormat": "ONE_DOC_PER_LINE",
        "S3Uri": S3Uri_file
    },
    OutputDataConfig={
        "S3Uri": S3Uri_out
    }
)