# Copyright Amazon.com, Inc. and its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT # Licensed under the MIT License. See the LICENSE accompanying this file # for the specific language governing permissions and limitations under # the License. import json import boto3 from urllib.parse import unquote_plus import urllib import time import os translate = boto3.client(service_name='translate', region_name='us-east-1', use_ssl=True) s3 = boto3.resource('s3') a2i = boto3.client('sagemaker-a2i-runtime') flowDefnARN = unquote_plus(os.environ['FLOW_DEF_ARN']) def lambda_handler(event, context): # Get the object from the event bucketName = event['Records'][0]['s3']['bucket']['name'] keyName = unquote_plus(event['Records'][0]['s3']['object']['key']) fileName = keyName[keyName.rindex('/')+1:keyName.rindex('.')] # Read the S3 Object bucket = s3.Bucket(bucketName) body = bucket.Object(keyName).get()['Body'].read().decode("utf-8", 'ignore') # Create the human loop input JSON object humanLoopInput = { 'SourceLanguage' : 'English', 'TargetLanguage' : 'Spanish', 'sourceLanguageCode':'en', 'targetLanguageCode' : 'es', 'translationPairs' : [], 'rowCount': 0, 'bucketName': bucketName, 'keyName': keyName } translatedText = '' rowCount = 0 print('Splitting file and performing translation') # split the body by period to get individual sentences for sentence in body.split('.'): if len(sentence.lstrip()) > 0: # call translation translate_response = translate.translate_text( Text=sentence + '.', SourceLanguageCode='en', TargetLanguageCode='es' ) translatedSentence = translate_response['TranslatedText'] translationPair = { 'originalText': sentence + '.', 'translation': translatedSentence } humanLoopInput['translationPairs'].append(translationPair) rowCount+=1 translatedText = translatedText + translatedSentence + ' ' humanLoopInput['rowCount'] = rowCount humanLoopName = 'Translate-Medical-Text' + str(int(round(time.time() * 1000))) print('Starting human loop - ' + humanLoopName) response = a2i.start_human_loop( HumanLoopName=humanLoopName, FlowDefinitionArn= flowDefnARN, HumanLoopInput={ 'InputContent': json.dumps(humanLoopInput) } ) # write the machine translated file to S3 bucket. targetKey = ('machine_output/MO-{0}.txt').format(fileName) print ('Writing translated text to '+ bucketName + '/' + targetKey) object = s3.Object(bucketName, targetKey) object.put(Body=translatedText.encode('utf-8')) print('Success') return 0