AWSTemplateFormatVersion: "2010-09-09" Description: Amazon Transcribe Post Call Analytics - PCA Server - Glue Catalog Database Transform: AWS::Serverless-2016-10-31 Parameters: DatabaseName: Type: AWS::SSM::Parameter::Value Default: DatabaseName OutputBucketName: Type: AWS::SSM::Parameter::Value Default: OutputBucketName OutputBucketParsedResults: Type: AWS::SSM::Parameter::Value Default: OutputBucketParsedResults Resources: Database: Type: AWS::Glue::Database Properties: CatalogId: !Ref AWS::AccountId DatabaseInput: Description: Post Call Analytics LocationUri: !Sub 's3://${OutputBucketName}/${OutputBucketParsedResults}/' Name: !Ref DatabaseName ParsedResultsTable: Type: AWS::Glue::Table Properties: CatalogId: !Ref AWS::AccountId DatabaseName: !Ref Database TableInput: Description: maps to parsedOutput JSON containing call summary and turn-by-turn details for each call Name: parsedresults Owner: !Ref 'AWS::StackName' Retention: 0 StorageDescriptor: Columns: - Name: conversationanalytics Type: struct>,sentimenttrends:struct>,sentimentchange:double>,spk_1:struct>,sentimentchange:double>>,speakertime:struct,spk_1:struct,nontalktime:struct>,totaltimesecs:double>>,categoriesdetected:array,issuesdetected:array>,outcomesdetected:array>,actionitemsdetected:array>,customentities:array>>,sourceinformation:array>>,entityrecognizername:string> - Name: speechsegments Type: array,sentimentispositive:int,sentimentisnegative:int,sentimentscore:double,basesentimentscores:struct,entitiesdetected:array>,categoriesdetected:array,followoncategories:array,issuesdetected:array>,outcomesdetected:array>,actionitemsdetected:array>,wordconfidence:array>>> Compressed: false NumberOfBuckets: -1 InputFormat: org.apache.hadoop.mapred.TextInputFormat Location: !Sub 's3://${OutputBucketName}/${OutputBucketParsedResults}/' OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat SerdeInfo: Name: JSON SerializationLibrary: 'org.openx.data.jsonserde.JsonSerDe' Parameters: paths: ConversationAnalytics,SpeechSegments BucketColumns: [] SortColumns: [] StoredAsSubDirectories: false TableType: EXTERNAL_TABLE PartitionKeys: [] CallDurationAndLanguageView: Type: AWS::Glue::Table Properties: CatalogId: !Ref AWS::AccountId DatabaseName: !Ref Database TableInput: Name: v_call_duration_and_language Owner: !Ref 'AWS::StackName' StorageDescriptor: SerdeInfo: {} Columns: - Name: filename Type: string - Name: languagecode Type: string - Name: speaker_0 Type: string - Name: duration Type: double - Name: conversationtime Type: string - Name: formatedconversationtime Type: timestamp TableType: VIRTUAL_VIEW Parameters: presto_view: true ViewOriginalText: Fn::Join: - '' - - '/* Presto View: ' - Fn::Base64: !Sub | { "originalSql": "SELECT regexp_extract(\"$path\", '[^/]+$') filename, conversationanalytics.languagecode, conversationanalytics.speakerlabels[1].displaytext speaker_0, CAST(conversationanalytics.duration AS double) duration, conversationanalytics.conversationtime, date_parse(substr(conversationanalytics.conversationtime, 1, 19), '%Y-%m-%d %H:%i:%s') formatedconversationtime FROM ${Database}.${ParsedResultsTable}", "catalog": "awsdatacatalog", "schema": "${Database}", "columns": [ { "name": "filename", "type": "varchar" }, { "name": "languagecode", "type": "varchar" }, { "name": "speaker_0", "type": "varchar" }, { "name": "duration", "type": "double" }, { "name": "conversationtime", "type": "varchar" }, { "name": "formatedconversationtime", "type": "timestamp" } ] } - ' */' ViewExpandedText: '/* Presto View */' ConversationSegmentsView: Type: AWS::Glue::Table Properties: CatalogId: !Ref AWS::AccountId DatabaseName: !Ref Database TableInput: Name: v_conversation_segments Owner: !Ref 'AWS::StackName' StorageDescriptor: SerdeInfo: {} Columns: - Name: filename Type: string - Name: languagecode Type: string - Name: speaker_0 Type: string - Name: conversationtime Type: string - Name: duration Type: double - Name: formatedconversationtime Type: timestamp - Name: segmentstarttime Type: double - Name: segmentendtime Type: double - Name: segmentspeaker Type: string - Name: sentimentispositive Type: int - Name: sentimentisnegative Type: int - Name: sentiment Type: varchar(8) - Name: sentimentscore Type: double - Name: displaytext Type: string - Name: textedited Type: int - Name: entitiesdetected Type: array> - Name: speaker Type: varchar(6) TableType: VIRTUAL_VIEW Parameters: presto_view: true ViewOriginalText: Fn::Join: - '' - - '/* Presto View: ' - Fn::Base64: !Sub | { "originalSql": "SELECT regexp_extract(\"$path\", '[^/]+$') filename, conversationanalytics.languagecode, conversationanalytics.speakerlabels[1].displaytext speaker_0, conversationanalytics.conversationtime, CAST(conversationanalytics.duration AS double) duration, date_parse(substr(conversationanalytics.conversationtime, 1, 19), '%Y-%m-%d %H:%i:%s') formatedconversationtime, segment.segmentstarttime, segment.segmentendtime, segment.segmentspeaker, segment.sentimentispositive, segment.sentimentisnegative, (CASE WHEN (segment.sentimentispositive = 1) THEN 'Positive' WHEN (segment.sentimentisnegative = 1) THEN 'Negative' ELSE 'Neutral' END) sentiment, segment.sentimentscore, segment.displaytext, segment.textedited, segment.entitiesdetected, (CASE WHEN ((conversationanalytics.speakerlabels[1].displaytext = 'Agent') AND (segment.segmentspeaker = 'spk_0')) THEN 'Agent' WHEN ((conversationanalytics.speakerlabels[1].displaytext = 'Agent') AND (segment.segmentspeaker = 'spk_1')) THEN 'Caller' END) Speaker FROM (${Database}.${ParsedResultsTable} CROSS JOIN UNNEST(speechsegments) t (segment))", "catalog": "awsdatacatalog", "schema": "${Database}", "columns": [ { "name": "filename", "type": "varchar" }, { "name": "languagecode", "type": "varchar" }, { "name": "speaker_0", "type": "varchar" }, { "name": "conversationtime", "type": "varchar" }, { "name": "duration", "type": "double" }, { "name": "formatedconversationtime", "type": "timestamp" }, { "name": "segmentstarttime", "type": "double" }, { "name": "segmentendtime", "type": "double" }, { "name": "segmentspeaker", "type": "varchar" }, { "name": "sentimentispositive", "type": "integer" }, { "name": "sentimentisnegative", "type": "integer" }, { "name": "sentiment", "type": "varchar" }, { "name": "sentimentscore", "type": "double" }, { "name": "displaytext", "type": "varchar" }, { "name": "textedited", "type": "integer" }, { "name": "entitiesdetected", "type": "array(row(score double,type varchar,text varchar,beginoffset int,endoffset int))" }, { "name": "speaker", "type": "varchar" } ] } - ' */' ViewExpandedText: '/* Presto View */' ExtractedEntitiesView: Type: AWS::Glue::Table Properties: CatalogId: !Ref AWS::AccountId DatabaseName: !Ref Database TableInput: Name: v_extracted_entities Owner: !Ref 'AWS::StackName' StorageDescriptor: SerdeInfo: {} Columns: - Name: filename Type: string - Name: languagecode Type: string - Name: duration Type: double - Name: conversationtime Type: string - Name: formatedconversationtime Type: timestamp - Name: segmentstarttime Type: double - Name: segmentendtime Type: double - Name: segmentspeaker Type: string - Name: speaker Type: varchar(6) - Name: sentimentispositive Type: int - Name: sentimentisnegative Type: int - Name: sentiment Type: string - Name: sentimentscore Type: double - Name: displaytext Type: string - Name: textedited Type: int - Name: beginoffset Type: int - Name: endoffset Type: int - Name: score Type: double - Name: text Type: string - Name: type Type: string TableType: VIRTUAL_VIEW Parameters: presto_view: true ViewOriginalText: Fn::Join: - '' - - '/* Presto View: ' - Fn::Base64: !Sub | { "originalSql": "SELECT filename, languagecode, duration, conversationtime, formatedconversationtime, segmentstarttime, segmentendtime, segmentspeaker, speaker, sentimentispositive, sentimentisnegative, (CASE WHEN (sentimentispositive = 1) THEN 'Positive' WHEN (sentimentisnegative = 1) THEN 'Negative' ELSE 'Neutral' END) sentiment, sentimentscore, displaytext, textedited, entity.beginoffset, entity.endoffset, entity.score, entity.text, entity.type FROM(${Database}.${ConversationSegmentsView} CROSS JOIN UNNEST(entitiesdetected) t (entity))", "catalog": "awsdatacatalog", "schema": "${Database}", "columns": [ { "name": "filename", "type": "varchar" }, { "name": "languagecode", "type": "varchar" }, { "name": "duration", "type": "double" }, { "name": "conversationtime", "type": "varchar" }, { "name": "formatedconversationtime", "type": "timestamp" }, { "name": "segmentstarttime", "type": "double" }, { "name": "segmentendtime", "type": "double" }, { "name": "segmentspeaker", "type": "varchar" }, { "name": "speaker", "type": "varchar" }, { "name": "sentimentispositive", "type": "integer" }, { "name": "sentimentisnegative", "type": "integer" }, { "name": "sentiment", "type": "varchar" }, { "name": "sentimentscore", "type": "double" }, { "name": "displaytext", "type": "varchar" }, { "name": "textedited", "type": "integer" }, { "name": "beginoffset", "type": "integer" }, { "name": "endoffset", "type": "integer" }, { "name": "score", "type": "double" }, { "name": "text", "type": "varchar" }, { "name": "type", "type": "varchar" } ] } - ' */' ViewExpandedText: '/* Presto View */'