/* * All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or * its licensors. * * For complete copyright and license terms please see the LICENSE at the root of this * distribution (the "License"). All use of this software is governed by the License, * or, if provided, by the license below or the license accompanying this file. Do not * remove or modify any license notices. This file is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ #include "Microphone_precompiled.h" #include "MicrophoneSystemComponent.h" #include #include #include #include #include #include #include #include #include #if defined(USE_LIBSAMPLERATE) #include #endif // USE_LIBSAMPLERATE namespace Audio { /////////////////////////////////////////////////////////////////////////////////////////////// class MicrophoneSystemComponentWindows : public MicrophoneSystemComponent::Implementation { public: AZ_CLASS_ALLOCATOR(MicrophoneSystemComponentWindows, AZ::SystemAllocator, 0); /////////////////////////////////////////////////////////////////////////////////////////// bool InitializeDevice() override { AZ_TracePrintf("WindowsMicrophone", "Initializing Microphone device - Windows!!\n"); // Assert: m_enumerator, m_device, m_audioClient, m_audioCaptureClient are all nullptr! AZ_Assert(!m_enumerator && !m_device && !m_audioClient && !m_audioCaptureClient, "InitializeDevice - One or more pointers are not null before init!\n"); // This Gem initializes very early, before Qt application is booted. // Qt calls OleInitialize internally, which initializes COM with Apartment Threaded model. // To avoid errors, we initialize COM here with the same model. CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED); const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator); const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator); HRESULT hresult = CoCreateInstance( CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL, IID_IMMDeviceEnumerator, reinterpret_cast(&m_enumerator) ); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to create an MMDeviceEnumerator!\n"); return false; } if (m_enumerator) { hresult = m_enumerator->GetDefaultAudioEndpoint(EDataFlow::eCapture, ERole::eConsole, &m_device); if (!m_device || hresult == ERROR_NOT_FOUND) { AZ_Warning("WindowsMicrophone", false, "No Microphone Device found!\n"); return false; } if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "HRESULT %d received while getting the default endpoint!\n", hresult); return false; } IPropertyStore* m_deviceProps = nullptr; hresult = m_device->OpenPropertyStore(STGM_READ, &m_deviceProps); if (FAILED(hresult)) { AZ_Warning("WindowsMicrophone", false, "Failed to open the enpoint device's properties!\n"); // not a full failure } PROPVARIANT endpointName; PropVariantInit(&endpointName); hresult = m_deviceProps->GetValue(PKEY_Device_FriendlyName, &endpointName); if (FAILED(hresult)) { AZ_Warning("WindowsMicrophone", false, "Failed to get the endpoint device's friendly name!\n"); // not a full failure } else { AZ_TracePrintf("WindowsMicrophone", "Microphone Endpoint Device Initialized: %S\n", endpointName.pwszVal); m_deviceName = endpointName.pwszVal; } PropVariantClear(&endpointName); SAFE_RELEASE(m_deviceProps); } return true; } /////////////////////////////////////////////////////////////////////////////////////////// void ShutdownDevice() override { AZ_TracePrintf("WindowsMicrophone", "Shutting down Microphone device - Windows!\n"); // Assert: m_audioClient and m_audioCaptureClient are both nullptr! (i.e. the capture thread is not running) AZ_Assert(!m_audioClient && !m_audioCaptureClient, "ShutdownDevice - Audio Client pointers are not null! You need to call EndSession first!\n"); SAFE_RELEASE(m_device); SAFE_RELEASE(m_enumerator); CoUninitialize(); } /////////////////////////////////////////////////////////////////////////////////////////// bool StartSession() override { AZ_TracePrintf("WindowsMicrophone", "Starting Microphone session - Windows!\n"); AZ_Assert(m_device != nullptr, "Attempting to start a Microphone session while the device is uninitialized - Windows!\n"); // Get the IAudioClient from the device const IID IID_IAudioClient = __uuidof(IAudioClient); HRESULT hresult = m_device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, reinterpret_cast(&m_audioClient)); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to get an IAudioClient on the device - Windows!\n"); return false; } // Get the mix format of the IAudioClient hresult = m_audioClient->GetMixFormat(&m_streamFormat); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to get the mix format from the IAudioClient - Windows!\n"); return false; } // Initialize the IAudioClient // Note: REFERENCE_TIME = 100 nanoseconds (1e2) // --> 1e9 ns = 1 sec // --> 1e7 (REFTIMES_PER_SEC) * 1e2 (REFERENCE_TIME) ns = 1 sec const AZ::u64 REFTIMES_PER_SEC = 10000000; REFERENCE_TIME duration = REFTIMES_PER_SEC; hresult = m_audioClient->Initialize( AUDCLNT_SHAREMODE_SHARED, // Share Mode 0, // Stream Flags duration, // Buffer Duration 0, // Periodicity m_streamFormat, // Wave Format Ex nullptr // Audio Session GUID ); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to initialize the IAudioClient - Windows!\n"); return false; } // Get size of the allocated buffer hresult = m_audioClient->GetBufferSize(&m_bufferFrameCount); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to get the buffer size of the IAudioClient - Windows!\n"); return false; } // Get the IAudioCaptureClient const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient); hresult = m_audioClient->GetService(IID_IAudioCaptureClient, reinterpret_cast(&m_audioCaptureClient)); if (FAILED(hresult)) { // Some possible results: (hresult == E_NOINTERFACE || hresult == AUDCLNT_E_NOT_INITIALIZED || hresult == AUDCLNT_E_WRONG_ENDPOINT_TYPE) AZ_Error("WindowsMicrophone", false, "Failed to get an IAudioCaptureClient service interface - Windows!\n"); return false; } // Set format for internal sink SetFormatInternal(m_bufferFrameCount); if (!ValidateFormatInternal()) { AZ_Error("WindowsMicrophone", false, "Failed to set a supported format - Windows!\n"); return false; } AllocateBuffersInternal(); m_bufferDuration = static_cast(REFTIMES_PER_SEC) * (m_bufferFrameCount / m_streamFormat->nSamplesPerSec); // Start recording! hresult = m_audioClient->Start(); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to start Microphone recording - Windows!\n"); return false; } // Spawn a new thread with the basic capture loop: [GetNextPacketSize, GetBuffer, CopyData, ReleaseBuffer] m_capturing = true; AZStd::thread_desc threadDesc; threadDesc.m_name = "MicrophoneCapture-WASAPI"; auto captureFunc = AZStd::bind(&MicrophoneSystemComponentWindows::RunAudioCapture, this); m_captureThread = AZStd::thread(captureFunc, &threadDesc); return true; } /////////////////////////////////////////////////////////////////////////////////////////// void RunAudioCapture() { const AZ::u64 REFTIMES_PER_MILLISEC = 10000; AZ::u32 numFramesAvailable = 0; AZ::u32 packetLength = 0; DWORD bufferFlags = 0; AZ::u8* data = nullptr; while (m_capturing) { AZStd::this_thread::sleep_for(AZStd::chrono::milliseconds(static_cast(m_bufferDuration) / REFTIMES_PER_MILLISEC / 2)); HRESULT hresult = m_audioCaptureClient->GetNextPacketSize(&packetLength); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to GetNextPacketSize, ending thread - Windows!\n"); m_capturing = false; continue; } while (m_capturing && packetLength != 0) { bufferFlags = 0; hresult = m_audioCaptureClient->GetBuffer(&data, &numFramesAvailable, &bufferFlags, nullptr, nullptr); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to GetBuffer, ending thread - Windows!\n"); packetLength = 0; m_capturing = false; continue; } if (bufferFlags & AUDCLNT_BUFFERFLAGS_SILENT) { data = nullptr; // signals internal buffer to write silence } if (!CopyDataInternal(data, numFramesAvailable)) { numFramesAvailable = 0; } hresult = m_audioCaptureClient->ReleaseBuffer(numFramesAvailable); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to ReleaseBuffer, ending thread - Windows!\n"); packetLength = 0; m_capturing = false; continue; } hresult = m_audioCaptureClient->GetNextPacketSize(&packetLength); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to GetNextPacketSize, ending thread - Windows!\n"); packetLength = 0; m_capturing = false; continue; } } } // Any post-thread cleanup? } /////////////////////////////////////////////////////////////////////////////////////////// void EndSession() override { AZ_TracePrintf("WindowsMicrophone", "Ending Microphone session - Windows!\n"); // Signal thread to end m_capturing = false; // Thread join if (m_captureThread.joinable()) { m_captureThread.join(); m_captureThread = AZStd::thread(); // destroy } AZ_TracePrintf("WindowsMicrophone", "Microphone capture thread ended - Windows!"); if (m_audioClient) { // Stop recording! HRESULT hresult = m_audioClient->Stop(); if (FAILED(hresult)) { AZ_Error("WindowsMicrophone", false, "Failed to stop Microphone recording - Windows!\n"); } } SAFE_RELEASE(m_audioCaptureClient); SAFE_RELEASE(m_audioClient); CoTaskMemFree(m_streamFormat); m_streamFormat = nullptr; DeallocateBuffersInternal(); } /////////////////////////////////////////////////////////////////////////////////////////// bool IsCapturing() override { return m_capturing; } /////////////////////////////////////////////////////////////////////////////////////////// SAudioInputConfig GetFormatConfig() const override { return m_config; } /////////////////////////////////////////////////////////////////////////////////////////// // Returns the number of sample frames obtained AZStd::size_t GetData(void** outputData, AZStd::size_t numFrames, const SAudioInputConfig& targetConfig, bool shouldDeinterleave) override { bool changeSampleType = (targetConfig.m_sampleType != m_config.m_sampleType); bool changeSampleRate = (targetConfig.m_sampleRate != m_config.m_sampleRate); bool changeNumChannels = (targetConfig.m_numChannels != m_config.m_numChannels); #if defined(USE_LIBSAMPLERATE) bool micFormatIsInt = (m_config.m_sampleType == AudioInputSampleType::Int); bool targetFormatIsInt = (targetConfig.m_sampleType == AudioInputSampleType::Int); bool stereoToMono = (targetConfig.m_numChannels == 1 && m_config.m_numChannels == 2); // Handle the default no-change case if (!(changeSampleType || changeSampleRate || changeNumChannels)) { return m_captureData->ConsumeData(outputData, numFrames, m_config.m_numChannels, shouldDeinterleave); } // Consume mic data into the 'working' conversion buffer (In)... numFrames = m_captureData->ConsumeData(reinterpret_cast(&m_conversionBufferIn.m_data), numFrames, m_config.m_numChannels, false); if (micFormatIsInt && (changeSampleType || changeSampleRate || changeNumChannels)) { // Do a prep [Int]-->[Float] conversion... src_short_to_float_array( reinterpret_cast(m_conversionBufferIn.m_data), reinterpret_cast(m_conversionBufferOut.m_data), numFrames * m_config.m_numChannels ); // Swap to move the 'working' buffer back to the 'In' buffer. AZStd::swap(m_conversionBufferIn.m_data, m_conversionBufferOut.m_data); } if (changeSampleRate) { if (m_srcState && targetConfig.m_sampleRate < m_config.m_sampleRate) { // Setup Conversion Data m_srcData.end_of_input = 0; m_srcData.input_frames = numFrames; m_srcData.output_frames = numFrames; m_srcData.data_in = reinterpret_cast(m_conversionBufferIn.m_data); m_srcData.data_out = reinterpret_cast(m_conversionBufferOut.m_data); // Conversion ratio is output_sample_rate / input_sample_rate m_srcData.src_ratio = static_cast(targetConfig.m_sampleRate) / static_cast(m_config.m_sampleRate); // Process int error = src_process(m_srcState, &m_srcData); if (error != 0) { AZ_TracePrintf("WindowsMicrophone", "SRC(src_process): %s - Windows!\n", src_strerror(error)); } AZ_Warning("WindowsMicrophone", numFrames == m_srcData.input_frames_used, "SRC(src_process): Num Frames requested (%u) was different than Num Frames processed (%u) - Windows!\n", numFrames, m_srcData.input_frames_used); numFrames = m_srcData.output_frames_gen; // Swap to move the 'working' buffer back to the 'In' buffer. AZStd::swap(m_conversionBufferIn.m_data, m_conversionBufferOut.m_data); } else { // Unable to continue: Either upsampling is requested or there is no resampler state. // todo: Upsampling could be done if we reallocate the conversion buffers. Right now we assume a max size of the buffer // based on the size of the ringbuffer. If samplerate is to increase, those buffers would need to increase in size // too. GetData is the only point which we know the target samplerate. return 0; } } if (changeNumChannels) { if (stereoToMono) { // Samples are interleaved now, copy only left channel to the output float* inputData = reinterpret_cast(m_conversionBufferIn.m_data); float* outputData = reinterpret_cast(m_conversionBufferOut.m_data); for (AZ::u32 frame = 0; frame < numFrames; ++frame) { outputData[frame] = *inputData++; ++inputData; } } else // monoToStereo { // Split single samples to both left and right channels if (shouldDeinterleave) { float* inputData = reinterpret_cast(m_conversionBufferIn.m_data); float** outputData = reinterpret_cast(m_conversionBufferOut.m_data); for (AZ::u32 frame = 0; frame < numFrames; ++frame) { outputData[0][frame] = outputData[1][frame] = inputData[frame]; } } else { float* inputData = reinterpret_cast(m_conversionBufferIn.m_data); float* outputData = reinterpret_cast(m_conversionBufferOut.m_data); for (AZ::u32 frame = 0; frame < numFrames; ++frame) { *outputData++ = inputData[frame]; *outputData++ = inputData[frame]; } } } // Swap to move the 'working' buffer back to the 'In' buffer. AZStd::swap(m_conversionBufferIn.m_data, m_conversionBufferOut.m_data); } if (targetFormatIsInt) { // Do a final [Float]-->[Int] conversion... src_float_to_short_array( reinterpret_cast(m_conversionBufferIn.m_data), *reinterpret_cast(outputData), numFrames * m_config.m_numChannels ); } else { // Otherwise, we're done, just memcpy the 'working' buffer to the output. ::memcpy(outputData, m_conversionBufferIn.m_data, numFrames * targetConfig.m_numChannels * (targetConfig.m_bitsPerSample >> 3)); } return numFrames; #else if (changeSampleType || changeSampleRate || changeNumChannels) { // Without the SRC library, any change is unsupported! return 0; } else { // No change to the data from Input to Output return m_captureData->ConsumeData(outputData, numFrames, m_config.m_numChannels, shouldDeinterleave); } #endif // USE_LIBSAMPLERATE } /////////////////////////////////////////////////////////////////////////////////////////// void SetFormatInternal(AZ::u32 bufferFrameCount) { if (m_streamFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE) { auto streamFormatExt = reinterpret_cast(m_streamFormat); if (streamFormatExt) { if (streamFormatExt->SubFormat == KSDATAFORMAT_SUBTYPE_PCM) { AZ_TracePrintf("WindowsMicrophone", "PCM Format - Windows!\n"); m_config.m_sampleType = AudioInputSampleType::Int; } else if (streamFormatExt->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) { AZ_TracePrintf("WindowsMicrophone", "IEEE Float Format - Windows!\n"); m_config.m_sampleType = AudioInputSampleType::Float; } else { m_config.m_sampleType = AudioInputSampleType::Unsupported; } if (streamFormatExt->dwChannelMask == KSAUDIO_SPEAKER_MONO) { AZ_TracePrintf("WindowsMicrophone", "Channel Format: Mono - Windows!\n"); m_config.m_numChannels = 1; } else if (streamFormatExt->dwChannelMask == KSAUDIO_SPEAKER_STEREO) { AZ_TracePrintf("WindowsMicrophone", "Channel Format: Stereo - Windows!\n"); m_config.m_numChannels = 2; } else { AZ_Error("WindowsMicrophone", false, "Only Mono and Stereo microphone inputs are supported - Windows!\n"); m_config.m_numChannels = 0; } m_config.m_sampleRate = m_streamFormat->nSamplesPerSec; m_config.m_bitsPerSample = m_streamFormat->wBitsPerSample; AZ_TracePrintf("WindowsMicrophone", "Sample Rate: %u - Windows!\n", m_config.m_sampleRate); AZ_TracePrintf("WindowsMicrophone", "Bits Per Sample: %u - Windows!\n", m_config.m_bitsPerSample); m_config.m_sourceType = Audio::AudioInputSourceType::Microphone; m_config.SetBufferSizeFromFrameCount(bufferFrameCount); } } else { // Untested code path. // Every device I tested went through the wave format extensible path... if (m_streamFormat->wFormatTag == WAVE_FORMAT_PCM) { m_config.m_sampleType = AudioInputSampleType::Int; } else if (m_streamFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) { m_config.m_sampleType = AudioInputSampleType::Float; } else { m_config.m_sampleType = AudioInputSampleType::Unsupported; } m_config.m_numChannels = m_streamFormat->nChannels; m_config.m_sampleRate = m_streamFormat->nSamplesPerSec; m_config.m_bitsPerSample = m_streamFormat->wBitsPerSample; m_config.SetBufferSizeFromFrameCount(bufferFrameCount); } } /////////////////////////////////////////////////////////////////////////////////////////// bool ValidateFormatInternal() { bool valid = true; if (m_config.m_numChannels < 1 || m_config.m_numChannels > 2) { AZ_TracePrintf("WindowsMicrophone", "Only Mono and Stereo Microphone inputs are supported - Windows!\n"); valid = false; } if (m_config.m_sampleType == AudioInputSampleType::Unsupported) { AZ_TracePrintf("WindowsMicrophone", "Unsupported sample format detected - Windows!\n"); valid = false; } if (m_config.m_sampleType == AudioInputSampleType::Int && m_config.m_bitsPerSample != 16) { AZ_TracePrintf("WindowsMicrophone", "Only bitdepths of 16 bits are supported with integer samples - Windows!\n"); valid = false; } if (m_config.m_bufferSize == 0) { AZ_TracePrintf("WindowsMicrophone", "Buffer size for the Microphone input has not been set - Windows!\n"); valid = false; } return valid; } /////////////////////////////////////////////////////////////////////////////////////////// void AllocateBuffersInternal() { AZ_Assert(m_config.m_bufferSize > 0, "Format was checked already, but buffer size of the Microhpone input is zero - Windows!\n"); DeallocateBuffersInternal(); if (m_config.m_sampleType == AudioInputSampleType::Float) { AZ_Assert(m_config.m_bitsPerSample == 32, "Format was checked already, but non-32-bit float samples are detected - Windows!\n"); m_captureData.reset(aznew Audio::RingBuffer(m_config.GetSampleCountFromBufferSize())); } else if (m_config.m_sampleType == AudioInputSampleType::Int) { AZ_Assert(m_config.m_bitsPerSample == 16, "Format was checked already, but non-16-bit integer samples are detected - Windows!\n"); m_captureData.reset(aznew Audio::RingBuffer(m_config.GetSampleCountFromBufferSize())); } #if defined(USE_LIBSAMPLERATE) // New SRC State if (!m_srcState) { int error = 0; m_srcState = src_new(SRC_SINC_MEDIUM_QUALITY, m_config.m_numChannels, &error); if (m_srcState) { AZStd::size_t conversionBufferMaxSize = m_config.GetSampleCountFromBufferSize() * sizeof(float); // Use this because float is the biggest sample type. m_conversionBufferIn.m_data = static_cast(azmalloc(conversionBufferMaxSize, MEMORY_ALLOCATION_ALIGNMENT, AZ::SystemAllocator)); m_conversionBufferIn.m_sizeBytes = conversionBufferMaxSize; m_conversionBufferOut.m_data = static_cast(azmalloc(conversionBufferMaxSize, MEMORY_ALLOCATION_ALIGNMENT, AZ::SystemAllocator)); m_conversionBufferOut.m_sizeBytes = conversionBufferMaxSize; ::memset(m_conversionBufferIn.m_data, 0, m_conversionBufferIn.m_sizeBytes); ::memset(m_conversionBufferOut.m_data, 0, m_conversionBufferOut.m_sizeBytes); } else { AZ_TracePrintf("WindowsMicrophone", "SRC(src_new): %s - Windows!\n", src_strerror(error)); } } #endif // USE_LIBSAMPLERATE } /////////////////////////////////////////////////////////////////////////////////////////// void DeallocateBuffersInternal() { if (m_captureData) { m_captureData.reset(); } #if defined(USE_LIBSAMPLERATE) // Cleanup SRC State m_srcState = src_delete(m_srcState); m_srcState = nullptr; if (m_conversionBufferIn.m_data) { azfree(m_conversionBufferIn.m_data, AZ::SystemAllocator); m_conversionBufferIn.m_data = nullptr; m_conversionBufferIn.m_sizeBytes = 0; } if (m_conversionBufferOut.m_data) { azfree(m_conversionBufferOut.m_data, AZ::SystemAllocator); m_conversionBufferOut.m_data = nullptr; m_conversionBufferOut.m_sizeBytes = 0; } #endif // USE_LIBSAMPLERATE } /////////////////////////////////////////////////////////////////////////////////////////// bool CopyDataInternal(const AZ::u8* inputData, AZStd::size_t numFrames) { // This function should return false if unable to copy all the frames! That way we can pass 0 to ReleaseBuffer to signal // that the buffer was not consumed yet. The api states that between GetBuffer and corresponding ReleaseBuffer calls, you // need to read all of it or none of it. If unable to copy the entire buffer, calling ReleaseBuffer with 0 means that the // next call to GetBuffer will return the same buffer that wasn't consumed. AZStd::size_t numFramesCopied = m_captureData->AddData(inputData, numFrames, m_config.m_numChannels); return numFramesCopied > 0; } private: // Device and format data... IAudioClient* m_audioClient = nullptr; IAudioCaptureClient* m_audioCaptureClient = nullptr; IMMDevice* m_device = nullptr; IMMDeviceEnumerator* m_enumerator = nullptr; WAVEFORMATEX* m_streamFormat = nullptr; // Thread data... AZStd::atomic m_capturing = false; AZStd::wstring m_deviceName; AZStd::thread m_captureThread; // Wasapi buffer data... double m_bufferDuration = 0; // in REFTIMES_PER_SEC (1e7) AZ::u32 m_bufferFrameCount = 0; // number of frames the Wasapi buffer holds Audio::SAudioInputConfig m_config; // the format configuration AZStd::unique_ptr m_captureData = nullptr; #if defined(USE_LIBSAMPLERATE) // Sample Rate Converter data... SRC_STATE* m_srcState = nullptr; SRC_DATA m_srcData; AudioStreamData m_conversionBufferIn; AudioStreamData m_conversionBufferOut; #endif // USE_LIBSAMPLERATE }; /////////////////////////////////////////////////////////////////////////////////////////////// MicrophoneSystemComponent::Implementation* MicrophoneSystemComponent::Implementation::Create() { return aznew MicrophoneSystemComponentWindows(); } } // namespace Audio