/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: MIT-0 * * Permission is hereby granted, free of charge, to any person obtaining a copy of this * software and associated documentation files (the "Software"), to deal in the Software * without restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "SpeechComponent.h" #include #include "Json.h" #include "UObject/Class.h" #include "TimerManager.h" #include "Kismet/GameplayStatics.h" #include #include #include "UnrealAWSUtils.h" #include "GenerateSpeechAction.h" using UnrealAWSUtils::AwsStringToFString; using UnrealAWSUtils::FStringToAwsString; DEFINE_LOG_CATEGORY(LogPollyMsg); USpeechComponent::USpeechComponent() { // Set this component to be initialized when the game starts, and to be ticked every frame. // You can turn these features off to improve performance if you don't need them. PrimaryComponentTick.bCanEverTick = false; bWantsInitializeComponent = true; } void USpeechComponent::InitializeComponent() { Super::InitializeComponent(); InitializePollyClient(); } void USpeechComponent::GenerateSpeech( UObject* WorldContextObject, const FString Text, const EVoiceId VoiceId, struct FLatentActionInfo LatentActionInfo, EGenerateSpeechExecPins& GenerateSpeechExecPins ) { if (UWorld* World = GEngine->GetWorldFromContextObjectChecked(WorldContextObject)) { FLatentActionManager& LatentActionManager = World->GetLatentActionManager(); UObject* CallbackTarget = LatentActionInfo.CallbackTarget; int32 UUID = LatentActionInfo.UUID; if (LatentActionManager.FindExistingAction(CallbackTarget, UUID) == NULL) { // LatentActionManager takes ownership of FGenerateSpeechAction (it calls delete). LatentActionManager.AddNewAction(CallbackTarget, UUID, new FGenerateSpeechAction(LatentActionInfo, this, Text, VoiceId, GenerateSpeechExecPins)); } } } USoundWaveProcedural* USpeechComponent::StartSpeech() { FScopeLock lock(&Mutex); if (VisemeEventArray.Num() == 0) { UE_LOG(LogPollyMsg, Error, TEXT("Failed to start speech. GenerateSpeech must be invoked before StartSpeech.")); return nullptr; } else { bIsSpeaking = true; CurrentVisemeIndex = 0; CurrentViseme = VisemeEventArray[CurrentVisemeIndex].Viseme; StartTimePoint = std::chrono::steady_clock::now(); float CurrentVisemeDurationSeconds = VisemeEventArray[CurrentVisemeIndex].TimeMilliseconds / 1000.0f; SetTimer(CurrentVisemeDurationSeconds); return QueuePollyAudio(); } } EViseme USpeechComponent::GetCurrentViseme() { FScopeLock lock(&Mutex); return CurrentViseme; } bool USpeechComponent::IsSpeaking() { FScopeLock lock(&Mutex); return bIsSpeaking; } void USpeechComponent::PlayNextViseme() { FScopeLock lock(&Mutex); CurrentVisemeIndex++; ClearTimer(); if (CurrentVisemeIndex == VisemeEventArray.Num() || VisemeEventArray.Num() == 0) { bIsSpeaking = false; return; } else { CurrentViseme = VisemeEventArray[CurrentVisemeIndex].Viseme; auto CurrentTimePoint = std::chrono::steady_clock::now(); float SecondsSinceStart = std::chrono::duration(CurrentTimePoint - StartTimePoint).count() / 1000.0f; float CurrentVisemeDurationSeconds = fmaxf(VisemeEventArray[CurrentVisemeIndex].TimeMilliseconds / 1000.0f - SecondsSinceStart, 0); SetTimer(CurrentVisemeDurationSeconds); } } void USpeechComponent::GenerateSpeechSync(const FString Text, const EVoiceId VoiceId) { if (Text.IsEmpty()) { UE_LOG(LogPollyMsg, Error, TEXT("Cannot generate speech (check input text).")); return; } if (IsSpeaking()) { UE_LOG(LogPollyMsg, Error, TEXT("Cannot generate speech during playback.")); return; } if (SynthesizeAudio(Text, VoiceId) && SynthesizeVisemes(Text, VoiceId)) { UE_LOG(LogPollyMsg, Display, TEXT("Polly called successfully!")); } } bool USpeechComponent::SynthesizeAudio(const FString& Text, const EVoiceId VoiceId) { PollyOutcome PollyAudioOutcome = MyPollyClient->SynthesizeSpeech(CreatePollyAudioRequest(Text, VoiceId)); if (PollyAudioOutcome.IsSuccess) { FScopeLock lock(&Mutex); Audiobuffer = PollyAudioOutcome.StreamBuffer; } else { UE_LOG(LogPollyMsg, Error, TEXT("Polly failed to generate audio file. Error: %s"), *AwsStringToFString(PollyAudioOutcome.PollyErrorMsg)); } return PollyAudioOutcome.IsSuccess; } bool USpeechComponent::SynthesizeVisemes(const FString& Text, const EVoiceId VoiceId) { PollyOutcome PollyVisemeOutcome = MyPollyClient->SynthesizeSpeech(CreatePollyVisemeRequest(Text, VoiceId)); if (PollyVisemeOutcome.IsSuccess) { FScopeLock lock(&Mutex); FString VisemeJson; FFileHelper::BufferToString(VisemeJson, PollyVisemeOutcome.StreamBuffer.GetData(), PollyVisemeOutcome.StreamBuffer.Num()); GenerateVisemeEvents(VisemeJson); } else { UE_LOG(LogPollyMsg, Error, TEXT("Polly failed to generate visemes. Error: %s"), *AwsStringToFString(PollyVisemeOutcome.PollyErrorMsg)); } return PollyVisemeOutcome.IsSuccess; } USoundWaveProcedural* USpeechComponent::QueuePollyAudio() { USoundWaveProcedural* PollyAudio = NewObject(); PollyAudio->SetSampleRate(16000); PollyAudio->NumChannels = 1; PollyAudio->DecompressionType = DTYPE_Procedural; int32 BitRate = 16 * PollyAudio->NumChannels * PollyAudio->GetSampleRateForCurrentPlatform(); PollyAudio->Duration = Audiobuffer.Num() * 8.0f / BitRate; PollyAudio->QueueAudio(Audiobuffer.GetData(), Audiobuffer.Num()); return PollyAudio; } Aws::Polly::Model::SynthesizeSpeechRequest USpeechComponent::CreatePollyAudioRequest(const FString& Text, const EVoiceId VoiceId) const { Aws::Polly::Model::SynthesizeSpeechRequest PollyRequest; PollyRequest.SetText(FStringToAwsString(Text)); PollyRequest.SetVoiceId(ToPollyVoiceId(VoiceId)); PollyRequest.SetEngine(ToPollyVoiceEngine(VoiceId)); PollyRequest.SetOutputFormat(Aws::Polly::Model::OutputFormat::pcm); return PollyRequest; } Aws::Polly::Model::SynthesizeSpeechRequest USpeechComponent::CreatePollyVisemeRequest(const FString& Text, const EVoiceId VoiceId) const { Aws::Polly::Model::SynthesizeSpeechRequest PollyRequest; PollyRequest.SetText(FStringToAwsString(Text)); PollyRequest.SetVoiceId(ToPollyVoiceId(VoiceId)); PollyRequest.SetEngine(ToPollyVoiceEngine(VoiceId)); PollyRequest.SetOutputFormat(Aws::Polly::Model::OutputFormat::json); PollyRequest.AddSpeechMarkTypes(Aws::Polly::Model::SpeechMarkType::viseme); return PollyRequest; } void USpeechComponent::GenerateVisemeEvents(FString VisemeJson) { VisemeEventArray = {}; TArray VisemeStrings; VisemeJson.ParseIntoArray(VisemeStrings, TEXT("\n"), true); for (FString VisemeSet : VisemeStrings) { TSharedPtr JsonParsed = MakeShareable(new FJsonObject); TSharedRef> JsonReader = TJsonReaderFactory::Create(VisemeSet); FString OutString; double OutNumber; if (FJsonSerializer::Deserialize(JsonReader, JsonParsed) && JsonParsed->TryGetStringField("value", OutString) && JsonParsed->TryGetNumberField("time", OutNumber)) { VisemeEvent CurrentVisemeEvent; CurrentVisemeEvent.Viseme = GetVisemeValueFromString(JsonParsed->GetStringField("value")); CurrentVisemeEvent.TimeMilliseconds = JsonParsed->GetIntegerField("time"); VisemeEventArray.Add(CurrentVisemeEvent); } else { UE_LOG(LogPollyMsg, Error, TEXT("Failed to parse json formatted viseme sequence returned by Amazon Polly.")); VisemeEventArray = {}; Audiobuffer.Empty(); break; } } } void USpeechComponent::InitializePollyClient() { MyPollyClient = MakeUnique(); } void USpeechComponent::SetTimer(float CurrentVisemeDurationSeconds) { GetWorld()->GetTimerManager().SetTimer(CountdownTimerHandle, this, &USpeechComponent::PlayNextViseme, 1.0f, true, CurrentVisemeDurationSeconds); } void USpeechComponent::ClearTimer() { GetWorld()->GetTimerManager().ClearTimer(CountdownTimerHandle); }