# controller.py
from endpoints import Endpoints
from hashlib import md5
import sys
import os
import json
import boto3
from urllib.error import HTTPError
import urllib.request
import pandas as pd
from random import choice as random_choice
# *****************************************************************************
# ******** Functions to fetch Kendra Results ********
# *****************************************************************************
def processTextHighlightTags(r):
if r:
text = r['Text']
if "Highlights" not in r:
return r['Text']
tags = r['Highlights']
tags.sort(key = lambda x: x['BeginOffset'])
p = 0
for i in tags:
text = text[ :i['BeginOffset']+p ] + "" + text[ i['BeginOffset']+p:i['EndOffset']+p ] + "" + text[ i['EndOffset']+p: ]
p += 7 ### 7 characters introduced ""
## Clean up text
text = ' '.join(text.split())
# display(HTML(text))
return text
return None
def prepareKendraResults(kendra_results, BUCKET_NAME):
kendraResults = {}
kendraResults["TotalNumberOfResults"] = kendra_results["TotalNumberOfResults"]
kendraResults["QueryId"] = kendra_results["QueryId"]
kendraResultItems = []
for rs in kendra_results["ResultItems"]:
sr = {}
sr["Id"] = rs["Id"]
sr["DocumentId"] = rs["DocumentId"]
sr["DocumentTitle"] = rs["DocumentTitle"]["Text"] #processTextHighlightTags(rs["DocumentTitle"])
sr["DocumentURI"] = rs["DocumentURI"]
sr["Type"] = rs["Type"]
sr["AnswerText"] = None if not rs["AdditionalAttributes"] else rs["AdditionalAttributes"][0]["Value"]["TextWithHighlightsValue"]
sr["DocumentExcerpt"] = rs["DocumentExcerpt"]
kendraResultItems.append(sr)
kendraResultItems = pd.DataFrame(kendraResultItems)
# Prepare Answer Text & Document Excerpt
kendraResultItems["AnswerText"] = kendraResultItems.AnswerText.apply(processTextHighlightTags)
kendraResultItems["DocumentExcerpt"] = kendraResultItems.DocumentExcerpt.apply(processTextHighlightTags)
kendraResultItems.loc[kendraResultItems.Type=='ANSWER','DocumentExcerpt'] = kendraResultItems.loc[kendraResultItems.Type=='ANSWER','AnswerText']
# Final DataFrame
kendraResultItems = kendraResultItems[["Id", "Type", "DocumentURI", "DocumentExcerpt", "DocumentTitle", "DocumentId"]]
kendraResultItems["DocumentName"] = kendraResultItems.DocumentId.apply(lambda x: x.replace(f"s3://{BUCKET_NAME}/", ""))
kendraResults["ResultItems"] = list(kendraResultItems.T.to_dict().values())
return kendraResults
# *****************************************************************************
# ******** Neptune ********
# *****************************************************************************
def fetchDocNode(res):
palette = {
'anatomy':'#8dd3c7',
'medical_condition':'#e31a1c',
'medication':'#bebada',
'protected_health_information':'#fb8072',
'test_treatment_procedure':'#80b1d3',
'time_expression':'#fdb462',
'system_organ_site':'#b3de69',
'dx_name':'#fccde5',
'acuity':'#bc80bd',
'direction':'#d9d9d9',
'diagnosis':'#ccebc5',
'negation':'#ffed6f',
'sign':'a6cee3',
'symptom':'#1f78b4',
'brand_name':'#b2df8a',
'generic_name':'#33a02c',
'dosage':'#fb9a99',
'duration':'#ffffb3',
'form':'#fdbf6f',
'frequency':'#fdbf6f',
'rate':'#fdbf6f',
'route_or_mode':'#fdbf6f',
'strength':'#fdbf6f',
'address':'#fdbf6f',
'age':'#fdbf6f',
'email':'#fdbf6f',
'id':'#fdbf6f',
'name':'#fdbf6f',
'phone_or_fax':'#fdbf6f',
'profession':'#fdbf6f',
'procedure_name':'#fdbf6f',
'test_name':'#fdbf6f',
'treatment_name':'#fdbf6f',
'test_value':'#fdbf6f',
'test_units':'#fdbf6f',
'time_to_medication_name':'#fdbf6f',
'time_to_dx_name':'#fdbf6f',
'time_to_test_name':'#fdbf6f',
'time_to_procedure_name':'#fdbf6f',
'time_to_treatment_name':'#fdbf6f',
'date':'#fdbf6f',
'url':'#fdbf6f',
'contact_point':'#fdbf6f',
'identifier':'#fdbf6f',
'quality':'#fdbf6f',
'quantity':'#fdbf6f',
'rxnorm':'#fdbf6f',
'icd10code':'#fdbf6f',
'icd10description':'#fdbf6f',
'icd10text':'#fdbf6f',
'generic_name':'#fdbf6f',
'rxnormcode':'#fdbf6f',
'rxnormdescription':'#fdbf6f',
'icd10text':'#fdbf6f',
}
entType = res["ent_type"]["value"].replace("http://example.org/entity-type/", "")
kendraDocumentName = os.path.splitext(res["doc_name"]["value"].replace("source/pdfs/", ""))[0]
ent_icon = {
"anatomy": { "text": "fa fa-building", "color": palette[entType] },
"medical_condition": { "text": "fa fa-calendar", "color": palette[entType] },
"medication": { "text": "fa fa-usd", "color": palette[entType] },
"protected_health_information": { "text": "fa fa-bookmark", "color": palette[entType] },
"test_treatment_procedure": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"time_expression": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"system_organ_site": { "text": "fa fa-user", "color": palette[entType] },
"dx_name": { "text": "fa fa-balance-scale", "color": palette[entType] },
"acuity": { "text": "fa fa-comments", "color": palette[entType] },
"direction": { "text": "fa fa-building", "color": palette[entType] },
"diagnosis": { "text": "fa fa-calendar", "color": palette[entType] },
"negation": { "text": "fa fa-usd", "color": palette[entType] },
"sign": { "text": "fa fa-bookmark", "color": palette[entType] },
"symptom": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"brand_name": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"generic_name": { "text": "fa fa-user", "color": palette[entType] },
"dosage": { "text": "fa fa-balance-scale", "color": palette[entType] },
"duration": { "text": "fa fa-comments", "color": palette[entType] },
"form": { "text": "fa fa-building", "color": palette[entType] },
"frequency": { "text": "fa fa-calendar", "color": palette[entType] },
"rate": { "text": "fa fa-usd", "color": palette[entType] },
"route_or_mode": { "text": "fa fa-bookmark", "color": palette[entType] },
"strength": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"address": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"age": { "text": "fa fa-user", "color": palette[entType] },
"email": { "text": "fa fa-balance-scale", "color": palette[entType] },
"id": { "text": "fa fa-comments", "color": palette[entType] },
"name": { "text": "fa fa-building", "color": palette[entType] },
"phone_or_fax": { "text": "fa fa-calendar", "color": palette[entType] },
"profession": { "text": "fa fa-usd", "color": palette[entType] },
"procedure_name": { "text": "fa fa-bookmark", "color": palette[entType] },
"test_name": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"treatment_name": { "text": "fas fa-map-marker-alt", "color": palette[entType] },
"test_value": { "text": "fa fa-user", "color": palette[entType] },
"test_units": { "text": "fa fa-balance-scale", "color": palette[entType] },
"time_to_medication_name": { "text": "fa fa-comments", "color": palette[entType] },
"time_to_dx_name": { "text": "fa fa-building", "color": palette[entType] },
"time_to_test_name": { "text": "fa fa-calendar", "color": palette[entType] },
"time_to_procedure_name": { "text": "fa fa-usd", "color": palette[entType] },
"time_to_treatment_name": { "text": "fa fa-bookmark", "color": palette[entType] },
"date": { "text": "fa fa-bookmark", "color": palette[entType] },
"url": { "text": "fa fa-bookmark", "color": palette[entType] },
"contact_point": { "text": "fa fa-bookmark", "color": palette[entType] },
"identifier": { "text": "fa fa-bookmark", "color": palette[entType] },
"quality": { "text": "fa fa-bookmark", "color": palette[entType] },
"quantity": { "text": "fa fa-bookmark", "color": palette[entType] },
"icd10description": { "text": "fa fa-bookmark", "color": palette[entType] },
"icd10text": { "text": "fa fa-bookmark", "color": palette[entType] },
"icd10code": { "text": "fa fa-bookmark", "color": palette[entType] },
"generic_name": { "text": "fa fa-bookmark", "color": palette[entType] },
"rxnormcode": { "text": "fa fa-bookmark", "color": palette[entType] },
"rxnormdescription": { "text": "fa fa-bookmark", "color": palette[entType] },
"rxnorm": { "text": "fa fa-bookmark", "color": palette[entType] },
}
ent_id = md5(str(res["doc_name"]["value"]+'_'+res["ent_name"]["value"]+'_'+res["ent_type"]["value"]).encode()).hexdigest()
combos = md5(entType.encode()).hexdigest()
edge = md5(ent_id.encode()).hexdigest()
return {
res["doc_id"]["value"]: {
"id": res["doc_id"]["value"],
"shape": 'HexagonNode',
"data":{"id": res["doc_id"]["value"],
"label": kendraDocumentName
},
"label": kendraDocumentName
},
ent_id: {
"id": ent_id,
"shape": 'CircleNode',
"data":{"id": ent_id,
"label": res["ent_name"]["value"]
},
"label": res["ent_name"]["value"],
"comboId": entType
},
combos: {
"id": entType,
"label": entType,
"collapsed": True
},
edge: {
"data": {"label": ''},
"source": res["doc_id"]["value"],
"target": ent_id,
"properties": []
}
}
def getEndpoint(neptune_endpoint):
session = boto3.session.Session()
credentials = session.get_credentials()
endpoints = Endpoints(credentials=credentials,
neptune_endpoint=neptune_endpoint,
neptune_port = 8182,
region_name = "us-east-1")
sparqlEndpoint = endpoints.sparql_endpoint()
return sparqlEndpoint
def cleanupNameSpace(s):
return s.replace("http://example.org/resource/", "")
def npQuery(s, endpoint):
s = s.replace("\n", "")
query = f"query={s}"
ep = getEndpoint(endpoint)
request_parameters = ep.prepare_request('POST', payload = query)
req = urllib.request.Request(request_parameters.uri, data=query.encode("utf8"), headers=request_parameters.headers)
try:
response = urllib.request.urlopen(req)
response = response.read().decode('utf8')
print("*******************************")
print("Search Result: ",response[:200])
print("*******************************")
res = json.loads(cleanupNameSpace(response))
res["success"] = 1
return res
except HTTPError as e:
exc_info = sys.exc_info()
if e.code == 500:
print(f"*** SERVER ERROR ***: {json.loads(e.read().decode('utf8'))}")
return {"success": 0}
# raise Exception(json.loads(e.read().decode('utf8'))) from None
else:
print(f"*** ERROR ***: {exc_info[0].with_traceback(exc_info[1], exc_info[2])}")
return {"success": 0}
# raise exc_info[0].with_traceback(exc_info[1], exc_info[2])