# ======================================================================== # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without restriction, # including without limitation the rights to use, copy, modify, merge, # publish, distribute, sublicense, and/or sell copies of the Software, # and to permit persons to whom the Software is furnished to do so. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #========================================================================= # # JSON-SRT-MC.py # by: Katreena Mullican # For questions or feedback, please contact mullicak@amazon.com # # Purpose: The program converts Amazon Transcribe JSON file to SRT format, # saves the SRT file to S3, and then creates an AWS Elemental # MediaConvert job that adds WebVTT captions into an existing video # # Environment variables required: # SRT_OUTPUT_BUCKET # MEDIA_CONVERT_ROLE # # Change Log: # 5/01/2020: original version # # ======================================================================== import boto3 import json import uuid import os, sys import createSRTfromTranscriptionFile from pathlib import Path def lambda_handler(event, context): try: s3 = boto3.client('s3') ssm = boto3.client('ssm') mc_client = boto3.client('mediaconvert', region_name="us-east-1") account_ID = context.invoked_function_arn.split(":")[4] #print(account_ID) for record in event['Records']: bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] video_name = Path(key).stem srt_object_name = "CAPTIONS_"+video_name+".srt" file_name = "/tmp/"+srt_object_name # # retrieve S3 bucket name and MediaConvert Role ARN from store # project_bucket_param = ssm.get_parameter(Name='project-bucket-name') output_bucket = project_bucket_param['Parameter']['Value'] mediaconvert_role_param = ssm.get_parameter(Name='mediaconvert-role-arn') mediaconvert_role_arn = mediaconvert_role_param['Parameter']['Value'] # # get the object # data = s3.get_object(Bucket=bucket, Key=key) json_data = data['Body'].read() # # convert JSON transcript to SRT # createSRTfromTranscriptionFile.writeTranscriptToSRT(json_data, "EN", file_name) # # save SRT file to S3 # response = s3.upload_file(file_name, output_bucket, 'srt-captions/{}'.format(srt_object_name)) # # open MediaConvert job template # with open('mc-job-settings.json') as json_file: mcData = json.load(json_file) # # substitute variables in JSON with actual values # mcData['Role'] = mediaconvert_role_arn mcData['Queue'] = "arn:aws:mediaconvert:us-east-1:"+account_ID+":queues/Default" mcData['Settings']['OutputGroups'][0]['OutputGroupSettings']['HlsGroupSettings']['Destination'] = "s3://"+output_bucket+"/videos-out/"+video_name+"/"+video_name mcData['Settings']['Inputs'][0]['CaptionSelectors']['Captions Selector 1']['SourceSettings']['FileSourceSettings']['SourceFile'] = "s3://"+output_bucket+"/srt-captions/"+srt_object_name mcData['Settings']['Inputs'][0]['FileInput'] = "s3://"+bucket+"/videos-upload/"+video_name+".mp4" assetID = str(uuid.uuid4()) jobMetadata = {} jobMetadata['assetID'] = assetID jobMetadata['application'] = "JSON-SRT-MC" endpoints = mc_client.describe_endpoints() # # create MediaConvert job # mc = boto3.client('mediaconvert', region_name="us-east-1", endpoint_url=endpoints['Endpoints'][0]['Url'], verify=True) job = mc.create_job(Role=mediaconvert_role_arn, UserMetadata=jobMetadata, Settings=mcData["Settings"]) #print (str(job)) except Exception as e: print(e) return