###
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Copyright Amazon.com, Inc. and its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT
######
import json
import boto3
from urllib.parse import unquote_plus
import urllib
import time
import re
import os
from awsUtils import readTextFileFromS3, split_s3_path, insertParallelData, writeTextFileToS3
translate = boto3.client(service_name='translate', region_name='us-east-1', use_ssl=True)
a2i = boto3.client('sagemaker-a2i-runtime')
def lambda_handler(event, context):
# describe the job to get details on the job completed.
if event['detail']['humanLoopStatus'] == 'Completed':
s3location = event['detail']['humanLoopOutput']['outputS3Uri']
if s3location is None:
return -1
print("outputS3Uri :", s3location)
## Bucket to use
bucketName, prefix = split_s3_path( s3location )
# recreate the output text document, including post edits.
tmsFile = json.loads(readTextFileFromS3( bucketName, prefix))
inputContent = tmsFile['inputContent']
rowcount = inputContent['rowCount']
answerContent = tmsFile['humanAnswers'][0]['answerContent']
translatedFileName = inputContent['keyName']
editedContent = ''
parallelDataInput = {
'domain' : { 'S': 'Finance'},
'sourceLanguageCode': { 'S': inputContent['sourceLanguageCode']},
'targetLanguageCode' : { 'S': inputContent['targetLanguageCode']},
'source' : None,
'target': None
}
pattern = "(.*?)"
for index in range(1, rowcount+1):
if answerContent['addToCustom'+str(index)]['on']:
## insert into parallel data tableName
#print(answerContent['translation'+str(index)])
#print(answerContent['originalText'+str(index)])
tagPatternTarget = re.search( pattern, answerContent['translation'+str(index)])
tagPatternSource = re.search( pattern, answerContent['originalText'+str(index)])
if tagPatternTarget is not None and tagPatternTarget is not None:
## extract phrases
sourceTxt = tagPatternSource.group(1)
targetTxt = tagPatternTarget.group(1)
parallelDataInput['source'] = { 'S': sourceTxt}
parallelDataInput['target'] = { 'S': targetTxt}
else:
parallelDataInput['source'] = { 'S': inputContent['translationPairs'][index-1]['originalText']}
parallelDataInput['target'] = { 'S': answerContent['translation'+str(index)]}
insertParallelData( 'translate_parallel_data', parallelDataInput)
editedContent += (answerContent['translation'+str(index)].replace('','').replace('','') + " ")
writeTextFileToS3( bucketName, 'edited/{0}'.format(translatedFileName), editedContent)
print('Success')
return 0