# ======================================================================== # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without restriction, # including without limitation the rights to use, copy, modify, merge, # publish, distribute, sublicense, and/or sell copies of the Software, # and to permit persons to whom the Software is furnished to do so. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #========================================================================= # # video-transcribe.py # by: Katreena Mullican # For questions or feedback, please contact mullicak@amazon.com # # Purpose: The program loads a video from S3 and invokes Amazon Transcribe # to create a JSON formatted transcription that is saved to S3. The # JSON can then be download and manually edited to add ambient # captions (applause, ringing, noises, etc) # # Environment variables required: # TRANSCRIBE_OUTPUT_BUCKET # # Change Log: # 5/01/2020: original version # # ========================================================================= import boto3 import json import os, sys from pathlib import Path from urllib.request import urlopen def lambda_handler(event, context): transcribe = boto3.client("transcribe") s3 = boto3.client("s3") ssm = boto3.client('ssm') for record in event['Records']: bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] job_name = Path(key).stem job_uri = create_uri(bucket,key) #print(job_uri) # # retrieve S3 bucket name from store # project_bucket_param = ssm.get_parameter(Name='project-bucket-name') output_bucket = project_bucket_param['Parameter']['Value'] # # create Transcribe job # transcribe.start_transcription_job( TranscriptionJobName=job_name, Media={'MediaFileUri': job_uri}, OutputBucketName=output_bucket, MediaFormat='mp4', LanguageCode='en-US' ) # # Create S3 bucket folder that will hold caption file # after it is manually modified to include contextual captions # try: s3.put_object(Bucket=bucket,Body='',Key='modified-captions-upload/') except Exception as e: print(e) def create_uri(bucket,key): return "s3://"+bucket+"/"+key return