# ==================================================================================
# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================
#
# stvblogCreateSSM.py
# by: Rob Dachowski
# For questions or feedback, please contact robdac@amazon.com
# 
# Purpose: This code creates a basic SSML file based on the contents of the provided SRT file.
#           This assumes that the translated phrase will be "spoken" by Polly in the same
#           timeframe as the original transcript.  Keep in mind that this could cause the
#           synthesize speech to render faster than the listener may be comfortable with because
#           the target language may require more words / syllables than easily fits into the 
#           time slot.   If this is not acceptable, then, you would probably need to add an overall
#           step in the process to have a user validate and approve the SRT file before executing this step
#
# Change Log:
#          3/1/2020: Initial version
#
# ==================================================================================

import json
import uuid
from datetime import datetime
import boto3
from botocore.exceptions import ClientError
import ssmparms as sp
import stmparms as stm
import stverrors


# ==================================================================================
# Function: labmda_handler
# Purpose: This is the "main" code for this lambda function
# Parameters: 
#                 event - the JSON input structure containing the parameters from the step function process
# ==================================================================================

def lambda_handler(event, context):

	#debugging message
	print("===> stvblogCreateSSML: " + "\nEvent:" + str(event) + "\nContext: " + str( context ) )
	print( "\t---> Boto Version: ", boto3.__version__ )
	
	# Load the parms from DynamoDB	
	parms = stm.get_stm_parms( event['input']['Outputs']['process']['ProcessName'])
	
	if not parms:
		# We have an issue, so get out
		raise stvDynamoDBError( "*** Unable to load parms from DynamoDB ***")
	
	# set up a shortcut	
	pc = parms['Item']['Config']
	pi = parms['Item']['Inputs']
	po = parms['Item']['Outputs']
	ptgts = parms['Item']['Targets']
	pt = event['item']['translate']
	sls = pi['sourceLanguageShort']
	slf = pi['sourceLanguageFull']
	tls = event['item']['translate']['targetLanguageShort']
	tlf = event['item']['translate']['targetLanguageFull']

	if event['item']['polly']['createAudio'] == 'y':
		#set up AWS resource for S3
		s3 = boto3.resource( 's3')   
		bucket = s3.Bucket(pc['baseBucketName'])
	
		srtKey = event['item']['subtitle']['srtKey']
	
		try:
			print("\t---> Reading " + srtKey + "\n")
			srtFile = bucket.Object( key=srtKey )
			srtBytes = srtFile.get()
		except Exception as e:
			print( "***Exception***: Issue reading phrase file: ", srtKey)            
			raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")

		srtIn = srtBytes['Body'].read()
		srtContents = str(srtIn, 'utf-8')
	
		# first, lets split the srt file up by each caption group (e.g. # \n time encoding \n caption \n\n)
		srtPhrases = srtContents.split("\n\n")
	
		# since the SRT files ends with "\n\n", the split will add an extra element to the list so we need to remove it
		# else we will have an index out of range error
		del srtPhrases[ (len(srtPhrases) - 1 ) ]
	
	
		# Create a new array that figures out how many seconds Polly should take to speak the translated text based on the SRT time encoding
		ssmlPhrases = []
	
		runningTime = 0
		breakTime = 0
	
		# seconds, lets split up each prhase into it's component parts.
		# 0 = the srt caption group # (e.g. 1, 2, ...)
		# 1 = the time enconding as HH:MM:SS,M --> HH:MM:SS,M
		# 2 = the words of the caption
		for srtPhrase in srtPhrases:
			phrase = srtPhrase.split('\n')
			times = phrase[1].split(' --> ')
			
			# get the start time datetime structure and determine the starting seconds
			starttime = datetime.strptime(times[0],'%H:%M:%S,%f')
			starttimeseconds = float( (starttime.microsecond/1000000 + starttime.second) + starttime.minute*60 + starttime.hour*3600 )
			
			# get the ending time datetime structure and determine the ending seconds
			endingtime = datetime.strptime(times[1],'%H:%M:%S,%f')
			endingtimeseconds = float( (endingtime.microsecond/1000000 + endingtime.second) + endingtime.minute*60 + endingtime.hour*3600 )
	
			#get the total seconds
			totalseconds = (endingtimeseconds - starttimeseconds)
	
			breakTime = starttimeseconds - runningTime
			runningTime = runningTime + breakTime + totalseconds
	
			#create a phrase list and add the total seconds
			ssmlPhrase = []
			ssmlPhrase.append(  "%3.2fs" % breakTime )
			ssmlPhrase.append(  "%3.2fs" % totalseconds )
			ssmlPhrase.append( phrase[2] )
			ssmlPhrases.append( ssmlPhrase )
	
	
		#create the SSML from the list of phrases 
		ssml = "<speak>\n"
	
		for phrase in ssmlPhrases:
	
			#for each line in the SRT, create an SSML line that will be read back in the corresponding amount of time
			ssml += "<break time=\"" + phrase[0] +  "\" />"
			ssml += "<prosody amazon:max-duration=\"" + phrase[1] +  "\">" + phrase[2] + "</prosody>\n"
	
	
		ssml += "</speak>"
	
		#write the SSML file
		ssmlKey = po['process']['uuid'] + '/' + pc['pollyInput'] + '/' + pi['mediaFile'] + "." + tls + '.ssml'
		print( "\t--- Writing " + ssmlKey + "\n")
	
		# put the new S3 object
		try:
			bucket.put_object(Body=ssml, ContentType="text/plain", Key=ssmlKey )
		except Exception as e:
			print( "***Exception***: Issue writing SSML file: ", ssmlKey)
			raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")
	
	   	# Generate and return the repsonse 
		response = event
		response['item']['polly']['ssmlKey'] = ssmlKey
	else:
		response = event
		response['item']['polly']['ssmlKey'] = 'N/A'	

	ptgts[event['index']]['polly']['ssmlKey'] = response['item']['polly']['ssmlKey']
	
	# Put the ouptut back into DynamoDB
	if stm.update_stm_target( event['input']['Outputs']['process']['ProcessName'], ptgts[event['index']], event['index'] ):
		print('===> CreateSSML Complete')
		return response
	else:
		raise stvError( "*** Error writing to the stvblog table ***" )