import boto3 import pandas as pd import csv import time import os def lambda_handler(event,context): # Submits the entity recognizer training for Comprehend input_bucket = os.environ['S3Bucket'] input_file = "inputs/mv_war_main_prop_remark.txt" input_file_path = '/tmp/mv_war_main_prop_remark.txt' s3client = boto3.client('s3') s3 = boto3.resource('s3') os.chdir('/tmp/') dynamodb = boto3.client('dynamodb') s3client.download_file(input_bucket, input_file, input_file_path) df = pd.read_csv('/tmp/mv_war_main_prop_remark.txt',encoding='latin-1',header=0) well_control = ["kill the well", "well flows", "driller method", "shut in well", "influx"] with open("entityvalues_blog.csv", "w", encoding="utf-8") as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(["Text", "Type"]) for index, row in df.iterrows(): if any(substring in str(row['TEXT_REMARK']) for substring in well_control) and len(str(row['TEXT_REMARK']).encode('utf-8')) < 5000: csv_writer.writerow([row['TEXT_REMARK'], "WELL_CONTROL_PROBLEM"]) put_key = 'entity_values_blog.csv' file = 'entityvalues_blog.csv' s3.meta.client.upload_file(file, input_bucket, put_key) with open("entitylist_blog.csv", "w", encoding="utf-8") as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(["Text", "Type"]) csv_writer.writerow(["kill the well", "WELL_CONTROL_PROBLEM"]) csv_writer.writerow(["well flows", "WELL_CONTROL_PROBLEM"]) csv_writer.writerow(["driller method", "WELL_CONTROL_PROBLEM"]) csv_writer.writerow(["shut in well", "WELL_CONTROL_PROBLEM"]) csv_writer.writerow(["influx", "WELL_CONTROL_PROBLEM"]) put_key = 'entity_list_blog.csv' file = 'entitylist_blog.csv' s3.meta.client.upload_file(file, input_bucket, put_key) comprehend = boto3.client("comprehend") comp_submit_response = comprehend.create_entity_recognizer( RecognizerName="well-events-blog", LanguageCode="en", DataAccessRoleArn=os.environ['data_access_role_arn'], InputDataConfig={ "EntityTypes": [ { "Type": "WELL_CONTROL_PROBLEM" } ], "Documents": { "S3Uri": "s3://"+input_bucket+"/entity_values_blog.csv" }, 'EntityList': { 'S3Uri': "s3://"+input_bucket+"/entity_list_blog.csv" } } ) response = {} response['EntityRecognizerArn'] = comp_submit_response['EntityRecognizerArn'] return (response)