# ==================================================================================
# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================
#
# stvblogCombinePhrasesAndTranslation
# by: Rob Dachowski
# For questions or feedback, please contact robdac@amazon.com
# 
# Purpose: The labmda merges the original time encoded phrases from the Transcribe output with the translated, 
#			boundary marked phrases.
#
# Change Log:
#           3/1/2021: Initial version
#
# ==================================================================================

import json
import uuid
import datetime
import boto3
from botocore.exceptions import ClientError
import ssmparms as sp
import stmparms as stm
import stverrors


# ==================================================================================
# Function: labmda_handler
# Purpose: This is the "main" code for this lambda function
# Parameters: 
#                 event - the JSON input structure containing the parameters from the step function process
# ==================================================================================

def lambda_handler(event, context):

	#debugging message
	print("===> stvblogCombinePhrasesAndTranslation: " + "\nEvent:" + str(event)  )
	print( "\t---> Boto Version: ", boto3.__version__ )
	
	# Load the parms from DynamoDB	
	parms = stm.get_stm_parms( event['input']['Outputs']['process']['ProcessName'])
	
	if not parms:
		# We have an issue, so get out
		raise stvDynamoDBError( "*** Unable to load parms from DynamoDB ***")
	
	# set up a shortcut	
	pc = parms['Item']['Config']
	pi = parms['Item']['Inputs']
	po = parms['Item']['Outputs']
	ptgts = parms['Item']['Targets']
	pt = event['item']['translate']
	sls = pi['sourceLanguageShort']
	slf = pi['sourceLanguageFull']
	tls = event['item']['translate']['targetLanguageShort']
	tlf = event['item']['translate']['targetLanguageFull']
    
	#set up AWS resource for S3
	s3 = boto3.resource( 's3')   
	bucket = s3.Bucket(pc['baseBucketName'])
	
	# ===Step 1=== 
	# Set up the file key
	phraseFileKey = po['transcribe']['phraseTextKey']
	
	try:
		# Get the phrases file from S3 and read it into memory
		
		print( '\t---> Getting phrases from: ', phraseFileKey )
		phraseFile = bucket.Object( key=phraseFileKey)
		phraseResponse = phraseFile.get()
	except Exception as e:
		print( "***Exception***: Issue reading phrase file: ", phraseFileKey)
		raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")
	
	# Get the bytes from the "body" section
	phrasesIn = phraseResponse['Body'].read()
	

	# ===Step 2=== 
	# 	First, split off the the S3://baseBucket info from the S3 URI
	leftPart = "s3://" + pc['baseBucketName'] + '/' 
	fullURI = event['item']['translate']['s3Uri']
	rightPart = fullURI.split(leftPart)
	
	print( "\t---> Translation Key Info: ")
	print( "\t---> full", fullURI)
	print( "\t---> left: ", leftPart)
	print( "\t---> right: ", rightPart)

	# Set up the key to Full Translation File (FTF) and read it into memory
	# translatedFileKey = rightPart[1] + targetLanguageShort + ".translatableText-" + event['transcribeJobParms']['sourceLanguage'] + "_" + pp['uuid'] + ".txt"
	translatedFileKey = rightPart[1]


	try:
		print( "\t---> Opening file: ", translatedFileKey )
		translatedFile = bucket.Object( key=translatedFileKey)
		translatedResponse = translatedFile.get()
	except Exception as e:
		print( "***Exception***: Issue reading translation file: ", translatedFileKey)
		raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")
	
	# Get the bytes from the "body" section
	translationIn = translatedResponse['Body'].read()
	
	
	# === STEP 3 ===
	# Now convert the byte streams from the PF and FTF into the appropriate JSON structures

	# the FTF is a list of dictionaries, so split based on the end of a dictionary
	translatedPhrases = translationIn.decode('utf-8').split("]\n")

	# The original phrases are a list of strings that need to be decoded
	originalPhrases = json.loads(phrasesIn.decode('utf-8'))
	
	
	# Set up the new structure to hold the list of phrases
	newPhrases = []
	count = 0
	
	# debugging
	print("tphrase length: ", len(translatedPhrases))

	# walk through each translated phrase, split it up to remove the markers, then combine the 
	# originalPhrase fields (start_time, end_time, etc.) with the translated phrase that aligns to 
	# the phrase Id marker.  Finally add the combined phrase to the new structure
	for i in range (0, len(translatedPhrases)):
		
		if i < len(translatedPhrases):
			tphrase = translatedPhrases[i]
		else:
			print( "\t---> Reached max length. Breaking.")
			break
		
		if len(tphrase) == 0:
			print( "\t---> Phrase doesn't contain text. Breaking.")
			break;
			
		
		p1 = tphrase.split('[')
		num = p1[0].lstrip(' {')
		num = num.rstrip('} ')
		
		if len(p1) < 2: 
			break
		
		p2 = p1[1].strip('[')
		
		phr = {}
		phr['phrase_id'] = num
		phr['start_time'] = originalPhrases[i]['start_time']
		phr['end_time'] = originalPhrases[i]['end_time']
		phr['timecode'] = originalPhrases[i]['start_time'] + " --> " + originalPhrases[i]['end_time']
		phr['phrase_words'] = originalPhrases[i]['phrase_words']
		phr['words'] = p2
		
		# Now add the new phrase to the structure
		newPhrases.append(phr)
		i += 1

	# debugging
	# print("===>newPhrases:", newPhrases)
	
	# Write the combined structure to S3 for further processing
	# And write the translateable input to a file
	combinedPhrasesKey = po['process']['uuid'] + '/'+ pc['subtitleInput'] + '/' + tls + '/' + 'combinedPhrases-' + tls + '_' + po['process']['uuid'] + '.txt'

	#convert the JSON structure to a string so that we can write it out to S3
	combinedPhrasesString = json.dumps(newPhrases)

	# put the new S3 object
	bucket.put_object(Body=combinedPhrasesString, ContentType="text/plain", Key=combinedPhrasesKey )
    
    	# Generate and return the repsonse 
	response = event
	response['item']['subtitle']['combinedPhrasesKey'] = combinedPhrasesKey
	
	ptgts[event['index']]['subtitle']['combinedPhrasesKey'] = combinedPhrasesKey

	
	# Put the ouptut back into DynamoDB
	if stm.update_stm_target( event['input']['Outputs']['process']['ProcessName'], ptgts[event['index']], event['index'] ):
		print('===> stvblogCombinePhrasesAndTranslation Complete')
		return response
	else:
		raise stvError( "*** Error writing to the stvblog table ***" )