AWSTemplateFormatVersion: 2010-09-09 Parameters: TargetLanguageCode: Type: String Default: de InputBucketName: Type: String OutputBucketName: Type: String IAMRoleName: Type: String Default: TranslationLambdaExecRole LambdaFunctionName: Type: String Resources: translateLambdaFunctionPolicy: Type: AWS::IAM::ManagedPolicy Properties: PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - 'logs:CreateLogGroup' - 'logs:CreateLogStream' - 'logs:PutLogEvents' Resource: - !Sub - 'arn:aws:logs:${Region}:${AccountId}:log-group:/aws/lambda/${LogGroupName}:*' - Region: !Ref "AWS::Region" AccountId: !Ref "AWS::AccountId" LogGroupName: !Ref LambdaFunctionName - Effect: Allow Action: - 's3:GetObject' - 's3:PutObject' Resource: - !Sub - 'arn:aws:s3:::${InputBucket}/*' - InputBucket: !Ref InputBucketName - !Sub - 'arn:aws:s3:::${OutputBucket}/*' - OutputBucket: !Ref OutputBucketName IAMR1LC1M: Type: 'AWS::IAM::Role' Properties: RoleName: !Ref IAMRoleName AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / ManagedPolicyArns: - !Ref translateLambdaFunctionPolicy - arn:aws:iam::aws:policy/TranslateReadOnly LF4L95L: Type: 'AWS::Lambda::Function' Properties: FunctionName: !Ref LambdaFunctionName Handler: index.lambda_handler Runtime: python3.7 Timeout: 900 Environment: Variables: SourceLanguageCode: auto TargetBucketName: !Ref OutputBucketName TargetLanguageCode: !Ref TargetLanguageCode Role: !GetAtt - IAMR1LC1M - Arn Code: ZipFile: | import boto3 from botocore.exceptions import ClientError import os import sys def lambda_handler(event, context): #boto3 connections s3 = boto3.resource('s3') translate = boto3.client('translate') for record in event['Records']: #defining variables SOURCE_BUCKET_NAME = record['s3']['bucket']['name'] TARGET_BUCKET_NAME = os.environ['TargetBucketName'] download_key = record['s3']['object']['key'] object_name = download_key.split("/")[-1] object_input_path = download_key.split("/")[0] upload_key = os.environ['TargetLanguageCode'] +"/" + object_input_path + "/" + object_name read_path = '/tmp/' + object_name write_path = '/tmp/translated-' + object_name #creating and opening a new txt file which is where the translated text will be put new_file = open(write_path, "w+") #downloading the file to be translated s3.Bucket(SOURCE_BUCKET_NAME).download_file(download_key, read_path) with open(read_path, "r") as text_file: #reading all the content in one step all_content = text_file.read() #splitting the content in paragraphs and storing it in an array content_array = all_content.split("\n") #translating the text using Amazon Translate for i in range(len(content_array)): if content_array[i] != "": if sys.getsizeof(content_array[i]) > 5000: a = content_array[i].split()[:5] print ("Error occurred - please break down the following paragraph into smaller chunks in order to avoid hitting Amazon Translate API limits.") print ("Paragraph starting with\""+' '.join(a)+"...\"") else: try: result = translate.translate_text(Text=content_array[i], SourceLanguageCode = os.environ['SourceLanguageCode'], TargetLanguageCode = os.environ['TargetLanguageCode']) except ClientError as e: print ("Unexpected error: %s" % e) #writing the translated text to local document new_file.write(result["TranslatedText"]) #===if you want to perform the translation operation by each sentence, uncomment this segment of the code=== #splitting the content by sentences (end of a sentence identified by full stop ".") and storing it in an array # sentence_array = content_array[i].split(".") # for j in range(len(sentence_array)): # if sentence_array[j] != "": # try: # result = translate.translate_text(Text=sentence_array[j], # SourceLanguageCode = os.environ['SourceLanguageCode'], # TargetLanguageCode = os.environ['TargetLanguageCode']) # except ClientError as e: # print ("Unexpected error: %s" % e) # new_file.write(result["TranslatedText"]) # new_file.write(".") new_file.write("\n") else: new_file.write("\n") new_file.close() #writing the translated document to S3 s3.Bucket(TARGET_BUCKET_NAME).upload_file(write_path, upload_key) #removing the local files os.remove(read_path) os.remove(write_path) S3B3UNSQ: Type: 'AWS::S3::Bucket' Properties: BucketName: !Ref InputBucketName VersioningConfiguration: Status: Enabled NotificationConfiguration: LambdaConfigurations: - Event: 's3:ObjectCreated:Put' Function: !GetAtt - LF4L95L - Arn BucketPermission: Type: 'AWS::Lambda::Permission' Properties: Action: 'lambda:InvokeFunction' FunctionName: !Ref LF4L95L Principal: s3.amazonaws.com SourceAccount: !Ref 'AWS::AccountId' SourceArn: !Sub 'arn:aws:s3:::${InputBucketName}' S3B5BC28: Type: 'AWS::S3::Bucket' Properties: BucketName: !Ref OutputBucketName VersioningConfiguration: Status: Enabled Outputs: IAMRoleExecutionForLambdaARN: Description: The ARN of the IAM role that was created for the Lambda to assume Value: !GetAtt - IAMR1LC1M - Arn LambdaFunctionArn: Description: The ARN of the Lambda function Value: !GetAtt - LF4L95L - Arn