/** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ #include #include #include #include #include #include #include #include using namespace Aws::TextToSpeech; using namespace Aws::Polly; using namespace Aws::Polly::Model; using namespace Aws::Utils; using namespace Aws::Utils::Threading; static const char* MOCK_DRIVER_NAME = "Mock Driver"; static const char* ALLOC_TAG = "TextToSpeechManagerTests"; class MockPCMDriver : public PCMOutputDriver { public: MockPCMDriver() : m_primeCalledCount(0), m_flushCalledCount(0), m_mockWriteResponse(false) {} bool WriteBufferToDevice(const unsigned char* buffer, size_t size) override { m_buffersWritten.push_back(ByteBuffer(buffer, size)); return m_mockWriteResponse; } void MockWriteResponse(bool response) { m_mockWriteResponse = response; } const Aws::Vector& GetWrittenBuffers() const { return m_buffersWritten; } Aws::Vector EnumerateDevices() const override { return m_devices; } void AddDevice(const DeviceInfo& device) { m_devices.push_back(device); } void SetActiveDevice(const DeviceInfo& device, const CapabilityInfo& caps) override { m_activeDevice = device; m_selectedCaps = caps; } const DeviceInfo& GetActiveDevice() const { return m_activeDevice; } const CapabilityInfo& GetActiveCaps() const { return m_selectedCaps; } const char* GetName() const override { return MOCK_DRIVER_NAME; } size_t GetPrimeCalledCount() const { return m_primeCalledCount; } void Prime() override { m_primeCalledCount++; } size_t GetFlushCalledCount() const { return m_primeCalledCount; } void Flush() override { m_flushCalledCount++; } private: size_t m_primeCalledCount; size_t m_flushCalledCount; DeviceInfo m_activeDevice; CapabilityInfo m_selectedCaps; Aws::Vector m_devices; bool m_mockWriteResponse; Aws::Vector m_buffersWritten; }; class MockPCMDriverFactory : public PCMOutputDriverFactory { public: Aws::Vector> LoadDrivers() const override { return m_drivers; } void AddDriver(const std::shared_ptr& driver) { m_drivers.push_back(driver); } private: Aws::Vector> m_drivers; }; class MockPollyClient : public PollyClient { public: MockPollyClient(const Aws::Client::ClientConfiguration& clientConfig = Aws::Client::ClientConfiguration()) : PollyClient(Aws::Auth::AWSCredentials("", ""), clientConfig) {} DescribeVoicesOutcome DescribeVoices(const DescribeVoicesRequest& request) const override { m_capturedVoicesRequest = request; return m_mockedVoicesOutcome; } void MockDescribeVoices(const DescribeVoicesOutcome& outcome) { m_mockedVoicesOutcome = outcome; } const DescribeVoicesRequest& GetCapturedDescribeVoices() const { return m_capturedVoicesRequest; } SynthesizeSpeechOutcome SynthesizeSpeech(const SynthesizeSpeechRequest& request) const override { m_capturedSynthRequest = request; return SynthesizeSpeechOutcome(std::move(m_mockedSynthOutcome)); } void MockSynthesizeSpeech(SynthesizeSpeechOutcome&& outcome) { m_mockedSynthOutcome = std::move(outcome); } const SynthesizeSpeechRequest& GetCapturedSynthesizeSpeech() const { return m_capturedSynthRequest; } private: DescribeVoicesOutcome m_mockedVoicesOutcome; mutable DescribeVoicesRequest m_capturedVoicesRequest; mutable SynthesizeSpeechOutcome m_mockedSynthOutcome; mutable SynthesizeSpeechRequest m_capturedSynthRequest; }; TEST(TextToSpeechManagerTests, TestListVoicesSuccess) { Voice voice1; voice1.WithGender(Gender::Male) .WithId(VoiceId::Brian) .WithLanguageCode(LanguageCode::en_GB) .WithLanguageName("English") .WithName("Brian"); Voice voice2; voice2.WithGender(Gender::Female) .WithId(VoiceId::Amy) .WithLanguageCode(LanguageCode::en_US) .WithLanguageName("English") .WithName("Amy"); DescribeVoicesResult describeVoiceResult; describeVoiceResult.AddVoices(voice1).AddVoices(voice2); auto pollyClient = Aws::MakeShared(ALLOC_TAG); auto manager = TextToSpeechManager::Create(pollyClient); pollyClient->MockDescribeVoices(describeVoiceResult); auto voices = manager->ListAvailableVoices(); ASSERT_EQ(2u, voices.size()); ASSERT_STREQ(voice1.GetName().c_str(), voices[0].first.c_str()); ASSERT_STREQ(voice1.GetLanguageName().c_str(), voices[0].second.c_str()); ASSERT_STREQ(voice2.GetName().c_str(), voices[1].first.c_str()); ASSERT_STREQ(voice2.GetLanguageName().c_str(), voices[1].second.c_str()); } TEST(TextToSpeechManagerTests, TestListVoicesFailure) { auto pollyClient = Aws::MakeShared(ALLOC_TAG); auto manager = TextToSpeechManager::Create(pollyClient); pollyClient->MockDescribeVoices(Aws::Client::AWSError()); auto voices = manager->ListAvailableVoices(); ASSERT_EQ(0u, voices.size()); } TEST(TextToSpeechManagerTests, TestDeviceListAndSelection) { auto pollyClient = Aws::MakeShared(ALLOC_TAG); auto driver1 = Aws::MakeShared(ALLOC_TAG); auto driver2 = Aws::MakeShared(ALLOC_TAG); auto driverFactory = Aws::MakeShared(ALLOC_TAG); driverFactory->AddDriver(driver1); driverFactory->AddDriver(driver2); auto manager = TextToSpeechManager::Create(pollyClient, driverFactory); DeviceInfo devInfo1; devInfo1.deviceId = "device1"; devInfo1.deviceName ="deviceName1"; CapabilityInfo capability; capability.sampleRate = KHZ_8; devInfo1.capabilities.push_back(capability); capability.sampleRate = KHZ_16; devInfo1.capabilities.push_back(capability); driver1->AddDevice(devInfo1); DeviceInfo devInfo2; devInfo2.deviceId = "device2"; devInfo2.deviceName = "deviceName2"; capability.sampleRate = 22050; devInfo2.capabilities.push_back(capability); driver2->AddDevice(devInfo2); auto devices = manager->EnumerateDevices(); ASSERT_EQ(2u, devices.size()); ASSERT_STREQ(devInfo1.deviceId.c_str(), devices[0].first.deviceId.c_str()); ASSERT_STREQ(devInfo1.deviceName.c_str(), devices[0].first.deviceName.c_str()); ASSERT_EQ(2u, devices[0].first.capabilities.size()); ASSERT_EQ(KHZ_8, devices[0].first.capabilities[0].sampleRate); ASSERT_EQ(KHZ_16, devices[0].first.capabilities[1].sampleRate); ASSERT_EQ(devices[0].second, driver1); ASSERT_STREQ(devInfo2.deviceId.c_str(), devices[1].first.deviceId.c_str()); ASSERT_STREQ(devInfo2.deviceName.c_str(), devices[1].first.deviceName.c_str()); ASSERT_EQ(1u, devices[1].first.capabilities.size()); ASSERT_EQ(KHZ_22_5, devices[1].first.capabilities[0].sampleRate); ASSERT_EQ(devices[1].second, driver2); manager->SetActiveDevice(driver2, devInfo2, devInfo2.capabilities[0]); ASSERT_STREQ(devInfo2.deviceId.c_str(), driver2->GetActiveDevice().deviceId.c_str()); ASSERT_EQ(KHZ_22_5, driver2->GetActiveCaps().sampleRate); } TEST(TextToSpeechManagerTests, TestDeviceListEmpty) { auto pollyClient = Aws::MakeShared(ALLOC_TAG); auto driverFactory = Aws::MakeShared(ALLOC_TAG); auto manager = TextToSpeechManager::Create(pollyClient, driverFactory); ASSERT_EQ(0u, manager->EnumerateDevices().size()); } TEST(TextToSpeechManagerTests, TestTextToSpeechManagerLifetime) { Aws::Client::ClientConfiguration clientConfig; clientConfig.executor = Aws::MakeShared(ALLOC_TAG, 5); auto pollyClient = Aws::MakeShared(ALLOC_TAG, clientConfig); auto driver1 = Aws::MakeShared(ALLOC_TAG); driver1->MockWriteResponse(true); auto driverFactory = Aws::MakeShared(ALLOC_TAG); driverFactory->AddDriver(driver1); auto strStream = Aws::New(ALLOC_TAG); const char STREAM_CONTENT[] = "Stream content. blah blah blah"; *strStream << STREAM_CONTENT; SynthesizeSpeechResult res; res.ReplaceBody(strStream); pollyClient->MockSynthesizeSpeech(std::move(res)); DeviceInfo devInfo1; devInfo1.deviceId = "device1"; devInfo1.deviceName = "deviceName1"; CapabilityInfo capability; capability.sampleRate = KHZ_8; devInfo1.capabilities.push_back(capability); const char* REQUEST_TEXT = "Blah blah blah"; Aws::Utils::Threading::Semaphore handlerExitSemaphore(0, 1); Aws::Utils::Threading::Semaphore scopeExitSemaphore(0, 1); { auto manager = TextToSpeechManager::Create(pollyClient, driverFactory); manager->SetActiveDevice(driver1, devInfo1, capability); manager->SetActiveVoice("Maxim"); SendTextCompletedHandler handler = [&](const char*, const SynthesizeSpeechOutcome&, bool) { // scopeExitsemaphore is used to ensure the handler is executed after the manager is "out of scope", // so that the use_count of the manager is 1 in the handler. scopeExitSemaphore.WaitOne(); // handlerExitSemaphore is used to ensure the main thread is waiting for the async handler thread. handlerExitSemaphore.Release(); }; manager->SendTextToOutputDevice(REQUEST_TEXT, handler); // use_count for manager should be 2, one in this scope, another in SendTextToOutputDevice calling shared_from_this. ASSERT_EQ(2, manager.use_count()); } scopeExitSemaphore.Release(); handlerExitSemaphore.WaitOne(); pollyClient = nullptr; } TEST(TextToSpeechManagerTests, TestSynthResponseAndOutput) { Aws::Client::ClientConfiguration clientConfig; clientConfig.executor = Aws::MakeShared(ALLOC_TAG, 5); auto pollyClient = Aws::MakeShared(ALLOC_TAG, clientConfig); auto driver1 = Aws::MakeShared(ALLOC_TAG); driver1->MockWriteResponse(true); auto driver2 = Aws::MakeShared(ALLOC_TAG); auto driverFactory = Aws::MakeShared(ALLOC_TAG); driverFactory->AddDriver(driver1); driverFactory->AddDriver(driver2); auto strStream = Aws::New(ALLOC_TAG); const char STREAM_CONTENT[] = "Stream content. blah blah blah"; *strStream << STREAM_CONTENT; SynthesizeSpeechResult res; res.ReplaceBody(strStream); pollyClient->MockSynthesizeSpeech(std::move(res)); auto manager = TextToSpeechManager::Create(pollyClient, driverFactory); DeviceInfo devInfo1; devInfo1.deviceId = "device1"; devInfo1.deviceName = "deviceName1"; CapabilityInfo capability; capability.sampleRate = KHZ_8; devInfo1.capabilities.push_back(capability); driver1->AddDevice(devInfo1); manager->SetActiveVoice("Maxim"); std::mutex lock; std::condition_variable semaphore; const char* REQUEST_TEXT = "Blah blah blah"; SendTextCompletedHandler handler = [&](const char* text, const SynthesizeSpeechOutcome&, bool sent) { std::lock_guard lockGuard(lock); EXPECT_STREQ(REQUEST_TEXT, text); EXPECT_TRUE(sent); semaphore.notify_all(); }; std::unique_lock locker(lock); manager->SendTextToOutputDevice(REQUEST_TEXT, handler); semaphore.wait(locker); auto capturedRequest = pollyClient->GetCapturedSynthesizeSpeech(); ASSERT_STREQ(REQUEST_TEXT, capturedRequest.GetText().c_str()); ASSERT_EQ(VoiceId::Maxim, capturedRequest.GetVoiceId()); ASSERT_EQ(1u, driver1->GetPrimeCalledCount()); ASSERT_EQ(1u, driver1->GetFlushCalledCount()); auto buffers = driver1->GetWrittenBuffers(); ASSERT_EQ(1u, buffers.size()); char actualSentData[sizeof(STREAM_CONTENT)]; memset(actualSentData, 0, sizeof(STREAM_CONTENT)); memcpy(actualSentData, buffers[0].GetUnderlyingData(), buffers[0].GetLength()); ASSERT_STREQ(STREAM_CONTENT, actualSentData); ASSERT_EQ(0u, driver2->GetFlushCalledCount()); ASSERT_EQ(0u, driver2->GetPrimeCalledCount()); pollyClient = nullptr; } TEST(TextToSpeechManagerTests, TestSynthRequestFailedAndNoOutput) { Aws::Client::ClientConfiguration clientConfig; clientConfig.executor = Aws::MakeShared(ALLOC_TAG, 5); auto pollyClient = Aws::MakeShared(ALLOC_TAG, clientConfig); auto driver1 = Aws::MakeShared(ALLOC_TAG); driver1->MockWriteResponse(true); auto driverFactory = Aws::MakeShared(ALLOC_TAG); driverFactory->AddDriver(driver1); pollyClient->MockSynthesizeSpeech(Aws::Client::AWSError(PollyErrors::ACCESS_DENIED, false)); auto manager = TextToSpeechManager::Create(pollyClient, driverFactory); DeviceInfo devInfo1; devInfo1.deviceId = "device1"; devInfo1.deviceName = "deviceName1"; CapabilityInfo capability; capability.sampleRate = KHZ_8; devInfo1.capabilities.push_back(capability); manager->SetActiveDevice(driver1, devInfo1, capability); manager->SetActiveVoice("Maxim"); std::mutex lock; std::condition_variable semaphore; const char* REQUEST_TEXT = "Blah blah blah"; SendTextCompletedHandler handler = [&](const char* text, const SynthesizeSpeechOutcome&, bool sent) { std::lock_guard lockGuard(lock); EXPECT_STREQ(REQUEST_TEXT, text); EXPECT_FALSE(sent); semaphore.notify_all(); }; std::unique_lock locker(lock); manager->SendTextToOutputDevice(REQUEST_TEXT, handler); semaphore.wait(locker); auto capturedRequest = pollyClient->GetCapturedSynthesizeSpeech(); ASSERT_STREQ(REQUEST_TEXT, capturedRequest.GetText().c_str()); ASSERT_EQ(VoiceId::Maxim, capturedRequest.GetVoiceId()); ASSERT_EQ(0u, driver1->GetPrimeCalledCount()); ASSERT_EQ(0u, driver1->GetFlushCalledCount()); auto buffers = driver1->GetWrittenBuffers(); ASSERT_EQ(0u, buffers.size()); pollyClient = nullptr; } TEST(TextToSpeechManagerTests, TestListingVoices) { auto polly = Aws::MakeShared(ALLOC_TAG); auto manager = TextToSpeechManager::Create(polly); auto voices = manager->ListAvailableVoices(); ASSERT_GE(voices.size(), 0u); // we're not mocking this API call, which means it will fail and return zero voices on machines without valid creds. }