# ==================================================================================
# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================
#
# stvblogCreatePollyAudio.py
# by: Rob Dachowski
# For questions or feedback, please contact robdac@amazon.com
#
# Purpose: This code drives the process to create a language specific MP3 audio track based
#           on speech synthesized by Polly.   The length of each phrase is calculated by looking
#           at the length of the synthesized speech and trying to subtract any extraneous silece
#           if the synthesized clip is longer than the duration in the SRT.
# Change Log:
#          3/1/2020: Initial version
#
# ==================================================================================


import os
import json
import uuid
from datetime import datetime
import boto3
from botocore.exceptions import ClientError
import ssmparms as sp
import stmparms as stm
import stverrors
from pydub.audio_segment import AudioSegment as AS
from contextlib import closing
import tempfile


# ==================================================================================
# Function: writeAudio
# Purpose: writes the bytes associates with the stream to a binary file
# Parameters:
#                 output_file - file object
#                 stream - the stream of bytes to write to the output_file
# ==================================================================================
def writeAudio( output_file, stream ):

        bytes = stream.read()

        print( "\t---> Writing ", len(bytes), "bytes to audio file: ", output_file)
        try:
                # Open a file for writing the output as a binary stream
                with open(output_file, "wb") as file:
                        file.write(bytes)
        except IOError as e:
                # Could not write to file, exit gracefully
                raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")


# ==================================================================================
# Function: labmda_handler
# Purpose: This is the "main" code for this lambda function
# Parameters:
#                 event - the JSON input structure containing the parameters from the step function process
# ==================================================================================

def lambda_handler(event, context):

        #debugging message
        print("===> CreatePollyAudio: " + "\nEvent:" + str(event) + "\nContext: " + str( context ) )
        print( "\t---> Boto Version: ", boto3.__version__ )

    # load ffmpeg
        ffmpeg_version = os.system('/opt/bin/ffmpeg -version')
        AS.converter = "/opt/bin/ffmpeg"

        # Load the parms from DynamoDB
        parms = stm.get_stm_parms( event['input']['Outputs']['process']['ProcessName'])

        if not parms:
                # We have an issue, so get out
                raise stvDynamoDBError( "*** Unable to load parms from DynamoDB ***")

        # set up a shortcut
        pc = parms['Item']['Config']
        pi = parms['Item']['Inputs']
        po = parms['Item']['Outputs']
        ptgts = parms['Item']['Targets']
        pt = event['item']['translate']
        sls = pi['sourceLanguageShort']
        slf = pi['sourceLanguageFull']
        tls = event['item']['translate']['targetLanguageShort']
        tlf = event['item']['translate']['targetLanguageFull']


        #set up AWS resource for S3 and Polly
        s3 = boto3.resource('s3')
        polly = boto3.client( 'polly' )

        # set up the S3 bucket
        bucket = s3.Bucket(pc['baseBucketName'])

    # get the SSML key so that we can read in the SSML
        ssmlKey = event['item']['polly']['ssmlKey']

        if event['item']['polly']['createAudio'] == 'y':
                try:
                    # Read in the SSML file
                        print("\t---> Reading " + ssmlKey + "\n")
                        ssmlFile = bucket.Object( key=ssmlKey )
                        ssmlBytes = ssmlFile.get()
                except Exception as e:
                    # Something went wrong reading the file
                        print( "***Exception***: Issue reading SSML file: ", ssmlKey)
                        raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")

            # Ok, we read in the file, now get the contents
                ssmlIn = ssmlBytes['Body'].read()

                # Convert bytes to a string
                ssmlContents = str(ssmlIn, 'utf-8')

                # Set up where we want the Polly Output to go
                pollyOutputKey = po['process']['uuid'] + '/' + pc['pollyOutput'] + '/' + pi['mediaFile'] + "." + tls

                # Create an empty audio file to hold the concatenated audio clips
                concatenatedAudio = AS.empty()

                # Split the ssml into a list of lines
                lines = ssmlContents.split( '\n')

                print( "\t---> Process ssml...")

                # Now let's walk through each line, split it into the component parts and put the audio together.
                for i, line in enumerate(lines):


                        # Ignore the line if it is the open or close
                        if (line == "<speak>") or (line == "</speak>"):
                                print( "\t\t---> Open / Close SSML - ", line )
                        else:
                                # First, split out the break section from the prosody section
                                ssmlPhrase = line.split("/>")

                                # Next, strip off the break time up to the first digit of the seconds
                                breakStr = ssmlPhrase[0].split('<break time=\"')

                                # Now, strip off the rest of the break phrase so that we are left with just the number of seconds to break in [0]
                                breakTimeStr = breakStr[1].split('s" ')

                                # Since we are left with just the number string, convert it into milliseconds
                                breakTime = float( breakTimeStr[0] ) * 1000

                                # Now break out the max duration based on this pattern - <prosody amazon:max-duration="2.61s">
                                maxDurStr = ssmlPhrase[1].split( 's">')
                                maxDurStrSecs = maxDurStr[0].split( '<prosody amazon:max-duration="')
                                maxDuration = float( maxDurStrSecs[1] ) * 1000
                                print( "\t\t---> Clip ", i, " max duration: ", maxDuration )

                                # Put together the ssml we will send to Polly
                                ssml = "<speak>" + ssmlPhrase[1] + "</speak>"
                                print( "\t\t---> ssml: ", ssml )

                                try:
                                        # Let's call Polly to get the streamed phrase
                                        response = polly.synthesize_speech(
                                                VoiceId = event['item']['polly']['voiceId'],
                                                OutputFormat = "mp3",
                                                Text = ssml,
                                                TextType = "ssml"
                                                )
                                except Exception as e:
                                # Something went wrong with Polly
                                        print( "***Exception***: Issue synthesizing speech" )
                                        raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")

                                try:
                                        # Get the audio stream from the Polly response
                                        # write it to a tempoarary file in /tmp
                                        if "AudioStream" in response:
                                                with closing(response["AudioStream"]) as stream:
                                                        tmpFile = tempfile.NamedTemporaryFile(suffix='.mp3')
                                                        tmpFile.close()
                                                        audioFileName = tmpFile.name
                                                        writeAudio( audioFileName, stream )
                                                        speech = AS.from_file(audioFileName, format="mp3")


                                                # Now that we have the Polly stream for the clip, we need to determine how long it is compared to the max
                                                # duration.   If it is shorter than the max duration, we'll need to pad the silence so that in the end, the
                                                # total length of the audio track matches the original video
                                                                                                # Now that we know how long the silence needs to be, generate a clip of silence for the duration of breaktime
                                                clipDurationDifference = maxDuration - (speech.duration_seconds * 1000)
                                                print( "\t\t---> clipDurationDifference (%f) = maxDuration (%f) - speech.duration_seconds (%f)" % (clipDurationDifference, maxDuration, (speech.duration_seconds * 1000)) )


                                                # Now that we know how long the clip and the break time are, calculate and generate silence
                                                # for the total of the break time + the clipDurationDifference
                                                silence = AS.silent(duration=(breakTime + clipDurationDifference) )
                                                concatenatedAudio += silence
                                                print( "\t\t---> silence: ", silence.duration_seconds )

                                                concatenatedAudio += speech

                                                os.remove(audioFileName)
                                                print( "\t\t---> %s successfully deleted" % (audioFileName))
                                except Exception as e:
                                # Something went wrong writing the temporary file
                                        print( "***Exception***: Issue writing temp file" )
                                        raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")

                print( "\t---> SSML Processing complete.   concatenatedAudio duration: ", concatenatedAudio.duration_seconds)

                try:
                        # first export the
                        tmpFile = tempfile.NamedTemporaryFile(suffix='.mp3')
                        tmpFile.close()
                        concatendateAudioFileName = tmpFile.name
                        concatenatedAudio.export(concatendateAudioFileName, format="mp3")

                        audioKey = pollyOutputKey + ".mp3"
                        print( "\t---> Uploading %s to %s " % (concatendateAudioFileName, audioKey))
                        bucket = s3.Bucket( pc['baseBucketName'])
                        bucket.upload_file(concatendateAudioFileName, audioKey )
                        print( "\t---> Upload successful")

                        # now delete the temporary file once it is copied to s3
                        print( "\t---> Deleting %s" % (concatendateAudioFileName))
                        os.remove(concatendateAudioFileName)
                except Exception as e:
                # Something went wrong writing the final file
                        print( "***Exception***: Issue writing final audio file to: ", audioKey )
                        raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")

                # Get some key info from the job so that we can check on the status of the job
                pollyOutputURI = 's3://' + pc['baseBucketName'] + '/' + audioKey
                pollyOutputKey = audioKey
        else:
                pollyOutputURI = 'N/A'
                pollyOutputKey = 'N/A'

        # Generate and return the repsonse
        response = event
        response['item']['polly']['pollyOutputURI'] = pollyOutputURI
        response['item']['polly']['pollyOutputKey'] = pollyOutputKey
        ptgts[event['index']]['polly']['pollyOutputURI'] = pollyOutputURI
        ptgts[event['index']]['polly']['pollyOutputKey'] = pollyOutputKey

        # Put the ouptut back into DynamoDB

        if stm.update_stm_target( event['input']['Outputs']['process']['ProcessName'], ptgts[event['index']], event['index'] ):
                print('===> stvblogCreatePollyAudio Complete')
                return response
        else:
                raise stvError( "*** Error writing to the stvblog table ***" )