# ========================================================================
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#=========================================================================
#
# video-transcribe.py
# by: Katreena Mullican
# For questions or feedback, please contact mullicak@amazon.com
# 
# Purpose: The program loads a video from S3 and invokes Amazon Transcribe
#           to create a JSON formatted transcription that is saved to S3.  The
#           JSON can then be download and manually edited to add ambient
#           captions (applause, ringing, noises, etc)
#
#           Environment variables required:
#           TRANSCRIBE_OUTPUT_BUCKET
#
# Change Log:
#          5/01/2020: original version
#
# =========================================================================
import boto3
import json
import os, sys
from pathlib import Path
from urllib.request import urlopen

def lambda_handler(event, context):
    transcribe = boto3.client("transcribe")
    s3 = boto3.client("s3")
    ssm = boto3.client('ssm')
    
    for record in event['Records']:
        bucket = record['s3']['bucket']['name']
        key = record['s3']['object']['key']
        job_name = Path(key).stem
        job_uri = create_uri(bucket,key)
        #print(job_uri)
        #
        # retrieve S3 bucket name from store
        #
        project_bucket_param = ssm.get_parameter(Name='project-bucket-name')
        output_bucket = project_bucket_param['Parameter']['Value']
        #
        # create Transcribe job
        #
        transcribe.start_transcription_job(
         TranscriptionJobName=job_name,
         Media={'MediaFileUri': job_uri},
            OutputBucketName=output_bucket,
            MediaFormat='mp4',
            LanguageCode='en-US'
        )
        #
        # Create S3 bucket folder that will hold caption file 
        # after it is manually modified to include contextual captions
        #
        try:
            s3.put_object(Bucket=bucket,Body='',Key='modified-captions-upload/')
        except Exception as e:
            print(e)

def create_uri(bucket,key):
    return "s3://"+bucket+"/"+key
    
    return