AWSTemplateFormatVersion: '2010-09-09' Transform: AWS::Serverless-2016-10-31 Parameters: InputBucketName: Description: "input s3 bucket." Type: String OutputBucketName: Description: "output bucket for PII redaction." Type: String RedactedTranscriptBucketName: Description: "retains redacted transcript" Type: String ffmpegLambdaVersion: Type: String Default: 1.0.0 Description: The semantic version of the ffmpeg layer you wish to deploy. Resources: # IAM roles InvokeTranscribeExecutionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Statement: - Action: - sts:AssumeRole Effect: Allow Principal: Service: - lambda.amazonaws.com Version: '2012-10-17' Path: "/" Policies: - PolicyDocument: Statement: - Action: - logs:CreateLogGroup - logs:CreateLogStream - logs:PutLogEvents Effect: Allow Resource: arn:aws:logs:*:*:* Version: '2012-10-17' PolicyName: !Sub ${AWS::StackName}-${AWS::Region}-AWSLambda-CW - PolicyDocument: Statement: - Action: - s3:GetObject - s3:DeleteObject - s3:PutObject - s3:GetBucketNotification - s3:PutBucketNotification Effect: Allow Resource: - !Sub arn:aws:s3:::* - !Sub arn:aws:s3:::* Version: '2012-10-17' PolicyName: !Sub ${AWS::StackName}-${AWS::Region}-AWSLambda-S3 - PolicyDocument: Statement: - Action: - transcribe:StartTranscriptionJob - transcribe:GetTranscriptionJob Effect: Allow Resource: - "*" Version: '2012-10-17' PolicyName: !Sub ${AWS::StackName}-${AWS::Region}-AWSLambda-Transcribe RoleName: !Sub ${AWS::StackName}-${AWS::Region}-InvokeTranscribeExecutionRole RedactVideoExecutionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Statement: - Action: - sts:AssumeRole Effect: Allow Principal: Service: - lambda.amazonaws.com Version: '2012-10-17' Path: "/" Policies: - PolicyDocument: Statement: - Action: - logs:CreateLogGroup - logs:CreateLogStream - logs:PutLogEvents Effect: Allow Resource: arn:aws:logs:*:*:* Version: '2012-10-17' PolicyName: !Sub ${AWS::StackName}-${AWS::Region}-AWSLambda-CW - PolicyDocument: Statement: - Action: - s3:* Effect: Allow Resource: - !Sub arn:aws:s3:::${InputBucketName} - !Sub arn:aws:s3:::${OutputBucketName} - !Sub arn:aws:s3:::${RedactedTranscriptBucketName} - !Sub arn:aws:s3:::${InputBucketName}/* - !Sub arn:aws:s3:::${OutputBucketName}/* - !Sub arn:aws:s3:::${RedactedTranscriptBucketName}/* Version: '2012-10-17' PolicyName: !Sub ${AWS::StackName}-${AWS::Region}-AWSLambda-S3 - PolicyDocument: Statement: - Action: - s3:ListAllMyBuckets Effect: Allow Resource: - !Sub arn:aws:s3:::* Version: '2012-10-17' PolicyName: !Sub ${AWS::StackName}-${AWS::Region}-AWSLambda-S3-all RoleName: !Sub ${AWS::StackName}-${AWS::Region}-RedactVideoExecutionRole #lambda layer ffmpeglambdalayer: Type: AWS::Serverless::Application Properties: Location: ApplicationId: arn:aws:serverlessrepo:us-east-1:145266761615:applications/ffmpeg-lambda-layer SemanticVersion: !Ref ffmpegLambdaVersion #lambda functions InvokeTranscribe: Type: AWS::Lambda::Function Properties: Handler: index.lambda_handler Role: !GetAtt InvokeTranscribeExecutionRole.Arn Runtime: python3.9 MemorySize: 128 Timeout: 3 Environment: Variables: REDACTED_OUTPUT_BUCKET: !Ref RedactedTranscriptBucketName Code: ZipFile: | import boto3 import uuid import json import os def lambda_handler(event, context): #call and name Event Notification Resources record = event['Records'][0] s3bucket = record['s3']['bucket']['name'] s3object = record['s3']['object']['key'] s3Path = "s3://" + s3bucket + "/" + s3object jobName = s3object #begin the transcription job for script redaction client = boto3.client('transcribe') s3_client = boto3.client('s3') #extract the mp3 try: #redacted client.start_transcription_job( TranscriptionJobName=jobName, LanguageCode='en-US', MediaFormat='mp4', Media={ 'MediaFileUri': s3Path }, ContentRedaction = { 'RedactionType': 'PII', 'RedactionOutput': 'redacted' # select pii types to be redacted in a property named 'PiiEntityTypes', default is all }, OutputBucketName = os.environ['REDACTED_OUTPUT_BUCKET'] ) except client.exceptions.BadRequestException as e: print("error: " + str(e.response)) RedactVideo: Type: AWS::Lambda::Function Properties: Handler: index.lambda_handler Role: !GetAtt RedactVideoExecutionRole.Arn Runtime: python3.9 MemorySize: 10240 Timeout: 900 Layers: - !GetAtt ffmpeglambdalayer.Outputs.LayerVersion Environment: Variables: OUTPUT_BUCKET: !Ref OutputBucketName VIDEO_INPUT_BUCKET: !Ref InputBucketName Code: ZipFile: | import json import boto3 import os def lambda_handler(event, context): #calling and naming boto resources s3 = boto3.resource('s3') s3_client = boto3.client('s3') # naming S3 resources from the Event Notification BUCKET_NAME = event['Records'][0]['s3']['bucket']['name'] KEY = event['Records'][0]['s3']['object']['key'].split('/')[-1] VIDEO = KEY[:-5] VIDEO = VIDEO[9:] print(VIDEO) VIDEO_INPUT_BUCKET_NAME = os.environ['VIDEO_INPUT_BUCKET'] #finding and downloading the original video from the input bucket s3.Bucket(VIDEO_INPUT_BUCKET_NAME).download_file(VIDEO, '/tmp/' + VIDEO) s3.Bucket(BUCKET_NAME).download_file(KEY, '/tmp/' + KEY) transcript_json_file = open('/tmp/' + KEY) transcript = json.load(transcript_json_file) items = transcript['results']['items'] # start building ffmpeg command command_str_p1 = "/opt/bin/ffmpeg -i " redact_times = "" for item in items: if item['alternatives'][0]['content'] == '[PII]': s = 'volume=enable=\'between(t,' + item['start_time'] + ',' + item['end_time'] + ')\':volume=0' + ", " redact_times += s # format redact times redact_times = "\"" + redact_times[:-2] + "\"" # combine into full ffmpeg command FULL_COMMAND = command_str_p1 + "/tmp/" + VIDEO + " -af " + redact_times + " /tmp/redacted-" + VIDEO print(FULL_COMMAND) # run ffmpeg EXIT_CODE = os.system(FULL_COMMAND + " > /tmp/ffmpeg_log.txt") print('ffmpeg exit code: {:d}'.format(EXIT_CODE)) # upload output OUTPUT_BUCKET_NAME = os.environ['OUTPUT_BUCKET'] s3_client.upload_file("/tmp/redacted-" + VIDEO, OUTPUT_BUCKET_NAME, "redacted_audio/redacted-" + VIDEO) #lambda permissions InvokeTranscribeProcessingPermission: Type: AWS::Lambda::Permission Properties: Action: lambda:InvokeFunction FunctionName: !Ref InvokeTranscribe Principal: s3.amazonaws.com SourceArn: !Sub 'arn:aws:s3:::*' SourceAccount: !Ref AWS::AccountId FFMPEGLambdaPermission: Type: AWS::Lambda::Permission Properties: Action: lambda:InvokeFunction FunctionName: !Ref RedactVideo Principal: s3.amazonaws.com SourceArn: !Sub 'arn:aws:s3:::*' SourceAccount: !Ref AWS::AccountId #buckets InputBucket: Type: AWS::S3::Bucket DeletionPolicy: Retain DependsOn: - InvokeTranscribeProcessingPermission Properties: BucketName: !Ref InputBucketName NotificationConfiguration: LambdaConfigurations: - Event: s3:ObjectCreated:* Function: !GetAtt InvokeTranscribe.Arn RedactedTranscriptBucket: Type: AWS::S3::Bucket DeletionPolicy: Retain DependsOn: - FFMPEGLambdaPermission Properties: BucketName: !Ref RedactedTranscriptBucketName NotificationConfiguration: LambdaConfigurations: - Event: s3:ObjectCreated:* Function: !GetAtt RedactVideo.Arn OutputBucket: Type: AWS::S3::Bucket DeletionPolicy: Retain Properties: BucketName: !Ref OutputBucketName