Description: "Template for Meeting Notes Generator Demo v1.0.0" # ------ PARAMETERS ------ # Parameters: S3BucketName: Description: "S3 bucket name" Type: "String" Default: "meeting-note-generator-demo" S3RecordingsPrefix: Description: "S3 prefix for recordings" Type: "String" Default: "recordings" S3TranscriptsPrefix: Description: "S3 prefix for transcripts" Type: "String" Default: "transcripts" S3NotesPrefix: Description: "S3 prefix for notes" Type: "String" Default: "notes" ImageURI: Description: "Container URI for inference endpoint" Type: "String" Default: "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04" ModelData: Description: "S3 location for SageMaker JumpStart model" Type: "String" Default: "s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.0.3/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz" InstanceType: Description: "Instance type for SageMaker endpoint" Type: "String" Default: "ml.p3.2xlarge" InstanceCount: Description: "Number of instances for SageMaker endpoint" Type: "String" Default: "1" LambdaLayerName: Description: "Layer name for the GenerateMeetingNotes Lambda function" Type: "String" Default: "demo-layer" # ------ RESOURCES ------ # Resources: # S3 bucket Bucket: Type: AWS::S3::Bucket Properties: BucketName: !Sub ${S3BucketName}-bucket-${AWS::AccountId} BucketEncryption: ServerSideEncryptionConfiguration: - ServerSideEncryptionByDefault: SSEAlgorithm: aws:kms KMSMasterKeyID: alias/aws/s3 PublicAccessBlockConfiguration: IgnorePublicAcls: true RestrictPublicBuckets: true BlockPublicAcls: true BlockPublicPolicy: true NotificationConfiguration: LambdaConfigurations: - Event: s3:ObjectCreated:* Filter: S3Key: Rules: - Name: prefix Value: !Sub ${S3RecordingsPrefix}/ Function: !GetAtt GenerateMeetingTranscript.Arn - Event: s3:ObjectCreated:* Filter: S3Key: Rules: - Name: prefix Value: !Sub ${S3TranscriptsPrefix}/ Function: !GetAtt GenerateMeetingNotes.Arn LoggingConfiguration: DestinationBucketName: !Ref LoggingBucket LogFilePrefix: logs LoggingBucket: Type: 'AWS::S3::Bucket' Properties: AccessControl: LogDeliveryWrite OwnershipControls: Rules: - ObjectOwnership: ObjectWriter BucketEncryption: ServerSideEncryptionConfiguration: - ServerSideEncryptionByDefault: SSEAlgorithm: aws:kms KMSMasterKeyID: alias/aws/s3 PublicAccessBlockConfiguration: IgnorePublicAcls: true RestrictPublicBuckets: true BlockPublicAcls: true BlockPublicPolicy: true BucketPolicy: Type: AWS::S3::BucketPolicy Properties: Bucket: !Ref Bucket PolicyDocument: Id: RequireEncryptionInTransit Version: '2012-10-17' Statement: - Principal: '*' Action: '*' Effect: Deny Resource: - !GetAtt Bucket.Arn - !Sub ${Bucket.Arn}/* Condition: Bool: aws:SecureTransport: 'false' # Lambda Invoke Permission for GenerateMeetingTranscript Lambda function GenerateMeetingTranscriptInvokePermission: Type: AWS::Lambda::Permission Properties: FunctionName: !GetAtt GenerateMeetingTranscript.Arn Action: lambda:InvokeFunction Principal: s3.amazonaws.com SourceArn: !Sub arn:aws:s3:::${S3BucketName}-bucket-${AWS::AccountId} SourceAccount: !Ref AWS::AccountId # Execution IAM role for GenerateMeetingTranscript Lambda function GenerateMeetingTranscriptLambdaRole: Type: "AWS::IAM::Role" Properties: RoleName: !Sub ${AWS::StackName}-GenerateMeetingTranscriptLambdaRole AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - sts:AssumeRole Policies: - PolicyName: CreateLogGroupPolicy PolicyDocument: Statement: - Effect: Allow Action: - logs:CreateLogGroup Resource: !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:* - PolicyName: LogsPolicy PolicyDocument: Statement: - Effect: Allow Action: - logs:CreateLogStream - logs:PutLogEvents Resource: !GetAtt GenerateMeetingTranscriptLogGroup.Arn - PolicyName: S3BucketRecordingsReadAccess PolicyDocument: Statement: - Effect: Allow Action: - s3:GetObject Resource: !Sub arn:aws:s3:::${S3BucketName}-bucket-${AWS::AccountId}/${S3RecordingsPrefix}/* - PolicyName: S3BucketTranscriptsWriteAccess PolicyDocument: Statement: - Effect: Allow Action: - s3:PutObject Resource: !Sub arn:aws:s3:::${S3BucketName}-bucket-${AWS::AccountId}/${S3TranscriptsPrefix}/* ManagedPolicyArns: - arn:aws:iam::aws:policy/AmazonTranscribeFullAccess # GenerateMeetingTranscript Lambda function GenerateMeetingTranscript: Type: AWS::Lambda::Function Properties: Description: !Sub - Stack ${AWS::StackName} Function ${ResourceName} - ResourceName: GenerateMeetingTranscript FunctionName: !Sub ${AWS::StackName}-GenerateMeetingTranscript Handler: index.lambda_handler Runtime: python3.7 MemorySize: 128 Timeout: 15 Code: ZipFile: | import json import boto3 import os import time S3_BUCKET = os.environ.get('S3_BUCKET') SOURCE_PREFIX = os.environ.get('SOURCE_PREFIX') DESTINATION_PREFIX = os.environ.get('DESTINATION_PREFIX') transcribe_client = boto3.client('transcribe') def lambda_handler(event, context): # Transcribe meeting recording to text recording_name = event['Records'][0]['s3']['object']['key'] job_tokens = recording_name.split('/')[1].split('.') job_name = '{}_{}'.format(job_tokens[0], int(time.time())) media_format = job_tokens[1] media_uri = 's3://{}/{}'.format(S3_BUCKET, recording_name) output_key = '{}/{}.txt'.format(DESTINATION_PREFIX, job_name) try: job_args = { 'TranscriptionJobName': job_name, 'Media': {'MediaFileUri': media_uri}, 'MediaFormat': media_format, 'IdentifyLanguage': True, 'OutputBucketName':S3_BUCKET, 'OutputKey':output_key } response = transcribe_client.start_transcription_job(**job_args) job = response['TranscriptionJob'] print("Started transcription job {}.".format(job_name)) except Exception: print("Couldn't start transcription job %s.".format(job_name)) raise return { 'statusCode': 200, 'body': json.dumps('Started transcription job {}'.format(job_name)) } Environment: Variables: DESTINATION_PREFIX: !Ref S3TranscriptsPrefix S3_BUCKET: !Sub ${S3BucketName}-bucket-${AWS::AccountId} SOURCE_PREFIX: !Ref S3RecordingsPrefix Role: !GetAtt GenerateMeetingTranscriptLambdaRole.Arn GenerateMeetingTranscriptLogGroup: Type: AWS::Logs::LogGroup Properties: LogGroupName: !Sub /aws/lambda/${AWS::StackName}-GenerateMeetingTranscript # Lambda Invoke Permission for GenerateMeetingNotes Lambda function GenerateMeetingNotesInvokePermission: Type: AWS::Lambda::Permission Properties: FunctionName: !GetAtt GenerateMeetingNotes.Arn Action: lambda:InvokeFunction Principal: s3.amazonaws.com SourceArn: !Sub arn:aws:s3:::${S3BucketName}-bucket-${AWS::AccountId} SourceAccount: !Ref AWS::AccountId # Execution IAM role for GenerateMeetingNotes Lambda function GenerateMeetingNotesLambdaRole: Type: "AWS::IAM::Role" Properties: RoleName: !Sub ${AWS::StackName}-GenerateMeetingNotesLambdaRole AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - sts:AssumeRole Policies: - PolicyName: CreateLogGroupPolicy PolicyDocument: Statement: - Effect: Allow Action: - logs:CreateLogGroup Resource: !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:* - PolicyName: LogsPolicy PolicyDocument: Statement: - Effect: Allow Action: - logs:CreateLogStream - logs:PutLogEvents Resource: !GetAtt GenerateMeetingNotesLogGroup.Arn - PolicyName: SageMakerInvokeEndpointPolicy PolicyDocument: Statement: - Effect: Allow Action: - sagemaker:DescribeEndpointConfig - sagemaker:InvokeEndpointAsync - sagemaker:DescribeEndpoint - sagemaker:InvokeEndpoint Resource: - !Sub arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:endpoint/* - !Sub arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:endpoint-config/* - PolicyName: S3BucketTranscriptsReadAccess PolicyDocument: Statement: - Effect: Allow Action: - s3:GetObject Resource: !Sub arn:aws:s3:::${S3BucketName}-bucket-${AWS::AccountId}/${S3TranscriptsPrefix}/* - PolicyName: S3BucketNotesWriteAccess PolicyDocument: Statement: - Effect: Allow Action: - s3:PutObject Resource: !Sub arn:aws:s3:::${S3BucketName}-bucket-${AWS::AccountId}/${S3NotesPrefix}/* # GenerateMeetingNotes Lambda function GenerateMeetingNotes: Type: AWS::Lambda::Function Properties: Description: !Sub - Stack ${AWS::StackName} Function ${ResourceName} - ResourceName: GenerateMeetingNotes FunctionName: !Sub ${AWS::StackName}-GenerateMeetingNotes Handler: index.lambda_handler Runtime: python3.7 MemorySize: 128 Timeout: 180 Code: ZipFile: | import json import boto3 import os import math import nltk from nltk.tokenize import word_tokenize from nltk.tokenize.treebank import TreebankWordDetokenizer nltk.data.path.append("/tmp") nltk.download("punkt", download_dir="/tmp") S3_BUCKET = os.environ.get('S3_BUCKET') SOURCE_PREFIX = os.environ.get('SOURCE_PREFIX') DESTINATION_PREFIX = os.environ.get('DESTINATION_PREFIX') ENDPOINT_NAME = os.environ.get('SAGEMAKER_ENDPOINT_NAME') CHUNK_LENGTH = 400 s3_client = boto3.client('s3') def lambda_handler(event, context): print(event) # Load transcript transcript_key = event['Records'][0]['s3']['object']['key'] tokens = transcript_key.split('/')[1].split('.') transcript_name = tokens[0] file_format = tokens[1] source_uri = 's3://{}/{}'.format(S3_BUCKET, transcript_key) output_key = '{}/{}.txt'.format(DESTINATION_PREFIX, transcript_name) s3_client.download_file(Bucket=S3_BUCKET, Key=transcript_key, Filename='/tmp/transcript.txt') with open('/tmp/transcript.txt') as f: contents = json.load(f) # Chunk transcript into chunks transcript = contents['results']['transcripts'][0]['transcript'] transcript_tokens = word_tokenize(transcript) num_chunks = int(math.ceil(len(transcript_tokens) / CHUNK_LENGTH)) transcript_chunks = [] for i in range(num_chunks): if i == num_chunks - 1: chunk = TreebankWordDetokenizer().detokenize(transcript_tokens[CHUNK_LENGTH * i:]) else: chunk = TreebankWordDetokenizer().detokenize(transcript_tokens[CHUNK_LENGTH * i:CHUNK_LENGTH * (i + 1)]) transcript_chunks.append(chunk) print('Transcript broken into {} chunks of {} tokens.'.format(len(transcript_chunks), CHUNK_LENGTH)) # Invoke endpoint with transcript and instructions instruction = 'Summarize the context above.' try: # Summarize each chunk chunk_summaries = [] for i in range(len(transcript_chunks)): text_input = '{}\n{}'.format(transcript_chunks[i], instruction) payload = { "text_inputs": text_input, "max_length": 100, "num_return_sequences": 1, "top_k": 50, "top_p": 0.95, "do_sample": True } query_response = query_endpoint_with_json_payload(json.dumps(payload).encode('utf-8')) generated_texts = parse_response_multiple_texts(query_response) chunk_summaries.append(generated_texts[0]) # Create a combined summary text_input = '{}\n{}'.format(' '.join(chunk_summaries), instruction) payload = { "text_inputs": text_input, "max_length": 100, "num_return_sequences": 1, "top_k": 50, "top_p": 0.95, "do_sample": True } query_response = query_endpoint_with_json_payload(json.dumps(payload).encode('utf-8')) generated_texts = parse_response_multiple_texts(query_response) results = { "summary": generated_texts, "chunk_summaries": chunk_summaries } except Exception as e: print('Error generating text') print(e) raise # Save response to S3 with open('/tmp/output.txt', 'w') as f: json.dump(results, f) s3_client.put_object(Bucket=S3_BUCKET, Key='{}/{}.txt'.format(DESTINATION_PREFIX, transcript_name), Body=open('/tmp/output.txt', 'rb')) # Return response return { 'statusCode': 200, 'body': { 'message': json.dumps('Completed summary job {}'.format(transcript_name)), 'results': results } } def query_endpoint(encoded_text): client = boto3.client('runtime.sagemaker') response = client.invoke_endpoint(EndpointName=ENDPOINT_NAME, ContentType='application/x-text', Body=encoded_text) return response def parse_response(query_response): model_predictions = json.loads(query_response['Body'].read()) generated_text = model_predictions['generated_text'] return generated_text def query_endpoint_with_json_payload(encoded_json): client = boto3.client('runtime.sagemaker') response = client.invoke_endpoint(EndpointName=ENDPOINT_NAME, ContentType='application/json', Body=encoded_json) return response def parse_response_multiple_texts(query_response): model_predictions = json.loads(query_response['Body'].read()) generated_text = model_predictions['generated_texts'] return generated_text Environment: Variables: DESTINATION_PREFIX: !Ref S3NotesPrefix S3_BUCKET: !Sub ${S3BucketName}-bucket-${AWS::AccountId} SAGEMAKER_ENDPOINT_NAME: !GetAtt SageMakerEndpoint.EndpointName SOURCE_PREFIX: !Ref S3TranscriptsPrefix Layers: - !Sub arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:layer:${LambdaLayerName}:1 Role: !GetAtt GenerateMeetingNotesLambdaRole.Arn GenerateMeetingNotesLogGroup: Type: AWS::Logs::LogGroup Properties: LogGroupName: !Sub /aws/lambda/${AWS::StackName}-GenerateMeetingNotes # Execution IAM role for SageMaker Model SageMakerExecutionRole: Type: AWS::IAM::Role Properties: RoleName: !Sub sagemaker-soln-mng-${AWS::StackName}-SageMakerExecutionRole AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - sagemaker.amazonaws.com Action: - sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess # SageMaker Model SageMakerModel: Type: AWS::SageMaker::Model Properties: ModelName: !Sub sagemaker-soln-mng-${AWS::StackName}-SageMakerModel Containers: - Image: !Ref ImageURI ModelDataUrl: !Ref ModelData Mode: SingleModel Environment: { "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1", "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600", "SAGEMAKER_MODEL_SERVER_WORKERS": "1", "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code/", "TS_DEFAULT_WORKERS_PER_MODEL": 1 } EnableNetworkIsolation: true ExecutionRoleArn: !GetAtt SageMakerExecutionRole.Arn # SageMaker Endpoint Config SageMakerEndpointConfig: Type: AWS::SageMaker::EndpointConfig Properties: EndpointConfigName: !Sub sagemaker-soln-mng-${AWS::StackName}-SageMakerEndpointConfig ProductionVariants: - ModelName: !GetAtt SageMakerModel.ModelName VariantName: !Sub ${SageMakerModel.ModelName}-1 InitialInstanceCount: !Ref InstanceCount InstanceType: !Ref InstanceType InitialVariantWeight: 1.0 VolumeSizeInGB: 40 # SageMaker Endpoint SageMakerEndpoint: Type: AWS::SageMaker::Endpoint Properties: EndpointName: !Sub sagemaker-soln-mng-${AWS::StackName}-SageMakerEndpoint EndpointConfigName: !GetAtt SageMakerEndpointConfig.EndpointConfigName