import email
import os
import boto3
import logging
import uuid
import translate_helper
from email import policy
from bs4 import BeautifulSoup
workmail_message_flow = boto3.client('workmailmessageflow')
s3 = boto3.client('s3')
logger = logging.getLogger()
logger.setLevel(logging.INFO)
translated_body_template = """
"""
def get_env_var(name):
"""
Helper that returns value of the environment variable key if it exists, else logs and throws ValueError
Parameters
----------
name: string, required
Environment variable key
Returns
-------
string
A string containing value of the environment variable
Raises
------
ValueError:
When environment variable was not set
"""
var = os.getenv(name)
if not var:
error_msg = f'{name} not set in environment. Please follow https://docs.aws.amazon.com/lambda/latest/dg/env_variables.html to set it.'
logger.error(error_msg)
raise ValueError(error_msg)
return var
def update_text_content(part, translated_body):
"""
Updates "text/plain" email body part with translated body.
Parameters
----------
parsed_email: email.message.Message, required
EmailMessage representation the downloaded email
Returns
-------
email.message.Message
EmailMessage representation the translated email
"""
text_content = part.get_content()
text_content = text_content + "\n\n" + translated_body
return text_content
def update_html_content(part, translated_body):
"""
Updates "text/html" email body part with translated body.
Parameters
----------
parsed_email: email.message.Message, required
EmailMessage representation the downloaded email
Returns
-------
email.message.Message
EmailMessage representation the translated email
"""
html_content = part.get_content()
soup = BeautifulSoup(html_content, "html.parser")
translated_body = translated_body_template.format(translated_body)
translated_tag = BeautifulSoup(translated_body, "html.parser")
tag_to_update = soup.find('body')
if tag_to_update is None:
tag_to_update = soup
tag_to_update.append(translated_tag)
return soup
def extract_text_body(parsed_email):
"""
Extract email message content of type "text/plain" from a parsed email
Parameters
----------
parsed_email: email.message.Message, required
The parsed email as returned by download_email
Returns
-------
string
string containing text/plain email body decoded with according to the Content-Transfer-Encoding header
and then according to content charset.
None
No content of type "text/plain" is found.
"""
text_content = None
text_charset = None
if parsed_email.is_multipart():
# Walk over message parts of this multipart email.
for part in parsed_email.walk():
content_type = part.get_content_type()
content_disposition = str(part.get_content_disposition())
if content_type == 'text/plain' and 'attachment' not in content_disposition:
text_content = part.get_payload(decode=True)
text_charset = part.get_content_charset()
break
else:
text_content = parsed_email.get_payload(decode=True)
text_charset = parsed_email.get_content_charset()
if text_content and text_charset:
return text_content.decode(text_charset)
return
def update_email_body(parsed_email, translated_body):
"""
Finds and updates the "text/html" and "text/plain" email body parts.
Parameters
----------
parsed_email: email.message.Message, required
EmailMessage representation the downloaded email
Returns
-------
email.message.Message
EmailMessage representation the updated email
"""
text_charset = None
if parsed_email.is_multipart():
# Walk over message parts of this multipart email.
for part in parsed_email.walk():
content_type = part.get_content_type()
content_disposition = str(part.get_content_disposition())
if content_type == 'text/plain' and 'attachment' not in content_disposition:
transfer_encoding = part['Content-Transfer-Encoding']
text_charset = part.get_content_charset()
new_text_body = update_text_content(part, translated_body)
part.set_content(new_text_body, "plain", charset=text_charset, cte=transfer_encoding)
elif content_type == 'text/html' and 'attachment' not in content_disposition:
transfer_encoding = part['Content-Transfer-Encoding']
html_charset = part.get_content_charset()
new_html_body = update_html_content(part, translated_body)
if new_html_body is not None:
part.set_content(new_html_body.encode(html_charset), "text", "html", cte=transfer_encoding)
part.set_charset(html_charset)
else:
# Its a plain email with text/plain body
transfer_encoding = parsed_email['Content-Transfer-Encoding']
text_charset = parsed_email.get_content_charset()
new_text_body = update_text_content(parsed_email, translated_body)
parsed_email.set_content(new_text_body, "plain", charset=text_charset, cte=transfer_encoding)
return parsed_email
def download_email(message_id):
"""
This method downloads full email MIME content using GetRawMessageContent API and uses email.parser class
for parsing it into Python email.message.EmailMessage class.
Reference:
https://docs.python.org/3.7/library/email.message.html#email.message.EmailMessage
https://docs.python.org/3/library/email.parser.html
Parameters
----------
message_id: string, required
message_id of the email to download
Returns
-------
email.message.Message
EmailMessage representation the downloaded email
"""
response = workmail_message_flow.get_raw_message_content(messageId=message_id)
email_content = response['messageContent'].read()
email_generation_policy = policy.SMTP.clone(refold_source='none')
logger.info("Downloaded email from WorkMail successfully")
return email.message_from_bytes(email_content, policy=email_generation_policy)
def update_workmail(message_id, content):
"""
Uploads the updated message to an S3 bucket in your account and then updates it at WorkMail via
PutRawMessageContent API.
Reference: https://docs.aws.amazon.com/workmail/latest/adminguide/update-with-lambda.html
Parameters
----------
message_id: string, required
message_id of the email to download
content: email.message.Message, required
EmailMessage representation the updated email
Returns
-------
None
"""
bucket = get_env_var('TRANSLATED_EMAIL_BUCKET')
key = str(uuid.uuid4());
s3.put_object(Body=content.as_bytes(), Bucket=bucket, Key=key)
s3_reference = {
'bucket': bucket,
'key': key
}
content = {
's3Reference': s3_reference
}
workmail_message_flow.put_raw_message_content(messageId=message_id, content=content)
logger.info("Updated email sent to WorkMail successfully")
def translate_email(downloaded_email, email_subject, email_language, text_body):
"""
Updates email with translated subject and traslated body.
Parameters
----------
downloaded_email: email.message.Message, required
EmailMessage representation the original downloaded email
email_subject: string, required
Subject of the email
email_language: string, required
Language code of original email body. See https://docs.aws.amazon.com/translate/latest/dg/what-is.html#what-is-languages
text_body: string, required
Plain text body of the email message
Returns
-------
email.message.Message
EmailMessage representation the updated email.
"""
destination_lang = get_env_var('DESTINATION_LANGUAGE')
translated_body = translate_helper.translate_text(text_body, email_language, destination_lang)
translated_subject = translate_helper.translate_text(email_subject, email_language, destination_lang)
updated_email = update_email_body(downloaded_email, translated_body)
new_subject = f"{email_subject} | {translated_subject}"
updated_email.replace_header('Subject', new_subject)
logger.info("Email translated successfully")
return updated_email