# ======================================================================== # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without restriction, # including without limitation the rights to use, copy, modify, merge, # publish, distribute, sublicense, and/or sell copies of the Software, # and to permit persons to whom the Software is furnished to do so. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #========================================================================= # # stvblogCreateMediaConvertJob.py # by: Katreena Mullican # For questions or feedback, please contact mullicak@amazon.com # # Purpose: This function creates an AWS Elemental MediaConvert job that # adds MP3 audio tracks and WebVTT captions to an existing video. # # Input required: # original_video_name # default language for video # desired language(s) for audio tracks and S3 URI of .mp3 file(s) # desired language(s) for captions and S3 URI of .srt file(s) # # Valid caption languages: ar,en,es,fr,fa,de,he,hi,id,it,ja,ko,ms, # nl,pt,ru,ta,te,tr,zh # (https://docs.aws.amazon.com/transcribe/latest/dg/what-is-transcribe.html) # # Valid audio track languages: arb,cmn,cy,da,de,en,es,fr,is,it,ja,hi,ko,nb,nl, # pl,pt,ro,ru,sv,tr # (https://docs.aws.amazon.com/polly/latest/dg/SupportedLanguage.html) # # Change Log: # 03/01/2021: original version # # ======================================================================== import boto3 import json import uuid import os, sys import datetime from pathlib import Path from pprint import pprint import stmparms as stm import ssmparms as sp from stverrors import * # ================================================================================== # Function: labmda_handler # Purpose: This is the "main" code for this lambda function # Parameters: # event - the JSON input structure containing the parameters from the step function process # ================================================================================== def lambda_handler(event, context): #debugging message print("===> stvblogCreateMediaConvertJob: " + "\nEvent:" + str(event) ) print( "\t---> Boto Version: ", boto3.__version__ ) try: s3 = boto3.client('s3') # # Language codes (ISO 639-1: ISO 639-2 codes and full names from https://www.loc.gov/standards/iso639-2/php/code_list.php) # lang_codes = { "ar": "ARB", "ar-full": "Arabic", "arb": "ARB", "arb-full": "Arabic", "cmn": "ZHO", "cmn-full": "Chinese", "cy": "CYM", "cy-full": "Welsh", "da": "DAN", "da-full": "Danish", "de": "GER", "de-full": "German", "en": "ENG", "en-full": "English", "es": "SPA", "es-full": "Spanish", "fa": "FAS", "fa-full": "Persian", "fr": "FRA", "fr-full": "French", "he": "HEB", "he-full": "Hebrew", "hi": "HIN", "hi-full": "Hindi", "id": "IND", "id-full": "Indonesian", "is": "ISL", "is-full": "Icelandic", "it": "ITA", "it-full": "Italian", "ja": "JPN", "ja-full": "Japanese", "ko": "KOR", "ko-full": "Korean", "ms": "MSA", "ms-full": "Malay", "nb": "NOB", "nb-full": "Norwegian", "nl": "NLD", "nl-full": "Dutch", "pl": "POL", "pl-full": "Polish", "pt": "POR", "pt-full": "Portuguese", "ro": "RON", "ro-full": "Romanian", "ru": "RUS", "ru-full": "Russian", "sv": "SWE", "sv-full": "Swedish", "ta": "TAM", "ta-full": "Tamil", "te": "TEL", "te-full": "Telugu", "tr": "TUR", "tr-full": "Turkish", "zh": "ZHO", "zh-full": "Chinese" } # # retrieve param from SSM Parameter Store # print( "\t---> Getting Parameters from Parameter Store" ) mediaconvert_role_arn = sp.get_parameter('/stvblog/MediaConvertRole')['Parameter']['Value'] print( "\t---> Retrieved Parameters from Parameter Store", mediaconvert_role_arn ) # # Load the parms from DynamoDB # print( "\t---> About to get parms from DynamoDB for ", event[0]['input']['Outputs']['process']['ProcessName'] ) parms = stm.get_stm_parms(event[0]['input']['Outputs']['process']['ProcessName']) if not parms: # We have an issue, so get out raise stvDynamoDBError( "*** Unable to load parms from DynamoDB ***") else: print( "\t---> Parms Returned: ", parms ) # # set up a shortcut # parmsCfg = parms['Item']['Config'] parmsIn = parms['Item']['Inputs'] parmsOut = parms['Item']['Outputs'] parmsTgt = parms['Item']['Targets'] # # set variables from DyanaoDB values # default_lang = parmsIn['sourceLanguageShort'] media_file = parmsIn['mediaFile'] final_output_bucket_name = parmsCfg['finalOutput'] final_input_bucket_name = parmsCfg['finalInput'] input_media_bucket_name = parmsCfg['baseBucketName'] region = parmsCfg['region'] job_uuid = parmsOut['process']['ProcessName'].split('-')[1] # # loop through subtitle and audio (Polly) targets # target_subtitle_array = [] target_audio_array = [] target_subtitle_language_array = [] target_audio_language_array = [] target_subtitle_default = [] target_audio_default = [] print( "\t---> Looping through Targets..." ) for entry in parmsTgt: if entry['subtitle']['createSubtitle'] == 'y': #if entry['subtitle']['subtitleLangShort'] == default_lang: if entry['translate']['targetLanguageShort'] == default_lang: print('subtitle '+entry['translate']['targetLanguageShort']+'matches'+default_lang) target_subtitle_default = entry['subtitle'] subtitle_default_Uri = target_subtitle_default['subtitleUri'] else: target_subtitle_array.append(entry['subtitle']) target_subtitle_language_array.append(entry['translate']['targetLanguageShort']) if entry['polly']['createAudio'] == 'y': if entry['translate']['targetLanguageShort'] == default_lang: print('audio '+entry['translate']['targetLanguageShort']+'matches'+default_lang) target_audio_default = entry['polly'] else: target_audio_array.append(entry['polly']) target_audio_language_array.append(entry['translate']['targetLanguageShort']) # # determine the media_file_path and media_output_bucket_name # media_file_path = "s3://" + input_media_bucket_name + "/" + job_uuid + "/" + final_input_bucket_name + "/" + media_file media_output_bucket_name = input_media_bucket_name + "/" + job_uuid + "/" + final_output_bucket_name # # open MediaConvert job template # print( "\t---> Opening job settings JSON file ..." ) with open('mc-job-settings.json') as json_file: mcData = json.load(json_file) # # Get AWS Account ID # account_ID = context.invoked_function_arn.split(":")[4] # # substitute values in JSON template with actual values # mcData['Role'] = mediaconvert_role_arn mcData['Queue'] = "arn:aws:mediaconvert:"+region+":"+account_ID+":queues/Default" mcData['Settings']['OutputGroups'][0]['OutputGroupSettings']['HlsGroupSettings']['Destination'] = "s3://"+media_output_bucket_name+"/"+os.path.splitext(media_file)[0] mcData['Settings']['Inputs'][0]['FileInput'] = media_file_path # # set default audio track and caption # mcData["Settings"]["OutputGroups"][0]["Outputs"][1]["NameModifier"] = "_sub_" + default_lang mcData["Settings"]["OutputGroups"][0]["Outputs"][1]["CaptionDescriptions"][0]["LanguageCode"] = lang_codes.get(default_lang) mcData["Settings"]["OutputGroups"][0]["Outputs"][1]["CaptionDescriptions"][0]["LanguageDescription"] = lang_codes.get(default_lang+'-full') mcData["Settings"]["OutputGroups"][0]["Outputs"][4]["NameModifier"] = "_" + default_lang mcData["Settings"]["OutputGroups"][0]["Outputs"][4]["AudioDescriptions"][0]["StreamName"] = lang_codes.get(default_lang+'-full') mcData["Settings"]["OutputGroups"][0]["Outputs"][4]["AudioDescriptions"][0]["LanguageCode"] = lang_codes.get(default_lang) mcData['Settings']['Inputs'][0]['CaptionSelectors']['Captions Selector 1']['SourceSettings']['FileSourceSettings']['SourceFile'] = subtitle_default_Uri # # loop through caption languages and add appropriate JSON # for number in range(0, len(target_subtitle_array)): # # captions container JSON # temp_captions_container = {"ContainerSettings": {"Container": "M3U8","M3u8Settings": {"AudioFramesPerPes": 4,"PcrControl": "PCR_EVERY_PES_PACKET","PmtPid": 480,"PrivateMetadataPid": 503,"ProgramNumber": 1,"PatInterval": 0,"PmtInterval": 0,"Scte35Source": "NONE","NielsenId3": "NONE","TimedMetadata": "NONE","VideoPid": 481,"AudioPids": [482,483,484,485,486,487,488,489,490,491,492]}},"OutputSettings": {"HlsSettings": {"AudioGroupId": "program_audio","AudioOnlyContainer": "AUTOMATIC","IFrameOnlyManifest": "EXCLUDE"}},"NameModifier": "_sub_" + target_subtitle_language_array[number],"CaptionDescriptions": [{"CaptionSelectorName": "Captions Selector " + str(number + 2),"DestinationSettings": {"DestinationType": "WEBVTT"},"LanguageCode": lang_codes.get(target_subtitle_language_array[number]),"LanguageDescription":lang_codes.get(target_subtitle_language_array[number]+'-full')}]} mcData["Settings"]["OutputGroups"][0]["Outputs"].insert(2 + number, temp_captions_container) # # srt JSON # temp_srt_selector = {"SourceSettings": { "SourceType": "SRT", "FileSourceSettings": { "SourceFile": target_subtitle_array[number]['subtitleUri'] }}} mcData['Settings']['Inputs'][0]['CaptionSelectors']['Captions Selector ' + str(number + 2)] = temp_srt_selector # # loop through audio track languages and add appropriate JSON # for number in range(0, len(target_audio_array)): # # audio container JSON # temp_audio_container = {"ContainerSettings": {"Container": "M3U8","M3u8Settings": {"AudioFramesPerPes": 4,"PcrControl": "PCR_EVERY_PES_PACKET","PmtPid": 480,"PrivateMetadataPid": 503,"ProgramNumber": 1,"PatInterval": 0,"PmtInterval": 0,"Scte35Source": "NONE","NielsenId3": "NONE","TimedMetadata": "NONE","VideoPid": 481,"AudioPids": [482,483,484,485,486,487,488,489,490,491,492]}},"AudioDescriptions": [{"AudioTypeControl": "FOLLOW_INPUT","AudioSourceName": "Audio Selector " + str(number + 2),"CodecSettings": {"Codec": "AAC","AacSettings": {"AudioDescriptionBroadcasterMix": "NORMAL","Bitrate": 96000,"RateControlMode": "CBR","CodecProfile": "LC","CodingMode": "CODING_MODE_2_0","RawFormat": "NONE","SampleRate": 48000,"Specification": "MPEG4"}},"StreamName": lang_codes.get(target_audio_language_array[number]+'-full'),"LanguageCodeControl": "USE_CONFIGURED","LanguageCode": lang_codes.get(target_audio_language_array[number])}],"OutputSettings": {"HlsSettings": {"AudioGroupId": "program_audio","AudioTrackType": "ALTERNATE_AUDIO_AUTO_SELECT","AudioOnlyContainer": "AUTOMATIC","IFrameOnlyManifest":"EXCLUDE"}},"NameModifier":"_" + target_audio_language_array[number]} mcData["Settings"]["OutputGroups"][0]["Outputs"].insert(5 + number, temp_audio_container) # # audio JSON # temp_audio_selector = {"Tracks": [ 1 ], "Offset": 0, "DefaultSelection": "NOT_DEFAULT", "SelectorType": "TRACK", "ExternalAudioFileInput": target_audio_array[number]['pollyUri'], "ProgramSelection": 0} mcData["Settings"]["Inputs"][0]["AudioSelectors"]["Audio Selector " + str(number + 2)] = temp_audio_selector # # Set assetID, Metadata, and endpoints for MediaConvert job # assetID = str(uuid.uuid4()) jobMetadata = {} jobMetadata['assetID'] = assetID jobMetadata['application'] = "createMediaConvertJob" mc_client = boto3.client('mediaconvert', region_name=region) endpoints = mc_client.describe_endpoints() mc_endpoint_url = endpoints['Endpoints'][0]['Url'] # # create MediaConvert job # mc = boto3.client('mediaconvert', region_name=region, endpoint_url=mc_endpoint_url, verify=True) print( "\t---> Submitting MediaConvert Job " ) job = mc.create_job(Role=mediaconvert_role_arn, UserMetadata=jobMetadata, Settings=mcData["Settings"]) print ("\t---> Job successfully submitted: ", str(job)) # # Get some key info from the job so that we can check on the status of the job # mediaConvertJobArn = job['Job']['Arn'] mediaConvertJobId = job['Job']['Id'] mediaConvertManifestURI = "s3://" + media_output_bucket_name + "/" + os.path.splitext(media_file)[0] + ".m3u8" mediaConvertJobStatus = job['Job']['Status'] mediaConvertCreationTime = job['Job']['CreatedAt'].strftime("%Y-%m-%d %H-%M-%S") # # Generate and return the repsonse # response = {} response['Config'] = parmsCfg response['Inputs'] = parmsIn response['Outputs'] = parmsOut response['Outputs']['mediaConvert'] = {} response['Outputs']['mediaConvert']['mediaConverManifestURI'] = mediaConvertManifestURI response['Outputs']['mediaConvert']['mediaConvertJobArn'] = mediaConvertJobArn response['Outputs']['mediaConvert']['mediaConvertJobId'] = mediaConvertJobId response['Outputs']['mediaConvert']['mediaConvertJobStatus'] = mediaConvertJobStatus response['Outputs']['mediaConvert']['mediaConvertEndpointUrl'] = mc_endpoint_url response['Outputs']['mediaConvert']['startTimeStamp'] = mediaConvertCreationTime response['Targets'] = parmsTgt # # update DynamoDB # if stm.update_stm_parms(response['Outputs']['process']['ProcessName'], response['Config'], response['Inputs'], response['Outputs'], response['Targets']): print('===> stvblogCreateMediaConvertJob Complete') return response else: print( "\t===> stvblogCreateMediaConvertJob Error updating DynamoDB") raise stvError( "*** Error writing to the stvblog table ***" ) except Exception as e: raise stvError("*** Error Code: ", e.response['Error']['Message'] + " ***")