# Geosemantics with Amazon Comprehend

In [None]:
import boto3
import uuid

comprehend = boto3.client("comprehend")
role = "arn:aws:iam::141317253884:role/service-role/AmazonComprehendServiceRole-comprehend"

bucket = "personalizelab-chicago"
entity_types = "CHRONOSTRAT"
train_documents = "bgs-geo-training-data.txt"
test_documents = "bgs-geo-testing-data.txt"
entity_list = "bgs-geo-entity-list.txt"
files = [train_documents, test_documents, entity_list]

s3 = boto3.resource('s3')
[s3.Bucket(bucket).upload_file(file, str(file)) for file in files]

response = comprehend.create_entity_recognizer(
    RecognizerName="geo-entity-{}".format(str(uuid.uuid4())),
    LanguageCode="en",
    DataAccessRoleArn= role,
    InputDataConfig={
        "EntityTypes": [
            {
                "Type": entity_types
            }
        ],
        "Documents": {
            "S3Uri": '/'.join(['s3:/', bucket, train_documents])
        },
        "EntityList": {
            "S3Uri": '/'.join(['s3:/', bucket, entity_list])
        }
    }
)
recognizer_arn = response["EntityRecognizerArn"]

In [None]:
# Optional test to see the Entity Recognizer status

import time

while True:
    response = comprehend.describe_entity_recognizer(
        EntityRecognizerArn=recognizer_arn
    )

    status = response["EntityRecognizerProperties"]["Status"]
    if "IN_ERROR" == status:
        sys.exit(1)
    if "TRAINED" == status:
        break

    time.sleep(10)

In [3]:
# Optional code to send a text message once the training is complete

phone_number = "+12815159927" # number for the scientist. Must include the international code ("+1" for the US)

# Create an SNS client
sns = boto3.client("sns")

sns.publish(
    PhoneNumber = phone_number,
    Message = "{} training has stopped with status {}".format(response["RecognizerName"], status)
)

{'MessageId': '2993eb48-524a-5dd7-bb1f-bcce24fb7670',
 'ResponseMetadata': {'RequestId': '1ac2bb7a-09be-5e4f-9cf1-ecfc7f0fd5e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '1ac2bb7a-09be-5e4f-9cf1-ecfc7f0fd5e8',
   'content-type': 'text/xml',
   'content-length': '294',
   'date': 'Mon, 13 Jul 2020 14:49:21 GMT'},
  'RetryAttempts': 0}}

In [None]:
response = comprehend.start_entities_detection_job(
    EntityRecognizerArn=recognizer_arn,
    JobName="Detection-Job-Name-{}".format(str(uuid.uuid4())),
    LanguageCode="en",
    DataAccessRoleArn=role,
    InputDataConfig={
        "InputFormat": "ONE_DOC_PER_LINE",
        "S3Uri": '/'.join(['s3:/', bucket, test_documents])
    },
    OutputDataConfig={
        "S3Uri": '/'.join(['s3:/', bucket, "output"])
    }
)

In [None]:
# Optional code to send a text message once the detection job is complete

while True:
    response = comprehend.describe_entities_detection_job(
        EntityRecognizerArn=recognizer_arn
    )

    status = response["EntitiesDetectionJobProperties"]["Status"]
    if "IN_ERROR" == status:
        sys.exit(1)
    if "COMPLETED" == status:
        break

    time.sleep(60)
    
sns.publish(
    PhoneNumber = phone_number,
    Message = "{} job has stopped with status {}".format(response["JobName"], response["EntitiesDetectionJobProperties"]["JobStatus"])
)