#include "audio_capturer.h" #include "basic/basic.h" #define DEFAULT_SAMPLE_RATE 48000 // 默认采样率:48kHz #define DEFAULT_BITS_PER_SAMPLE 16 // 默认位深:16bit #define DEFAULT_CHANNELS 1 // 默认音频通道数:1 #define DEFAULT_AUDIO_PACKET_INTERVAL 10 // 默认音频包发送间隔:10ms bool AudioCapturer::Init(Type deviceType) { Stop(); _deviceType = deviceType; __CheckBool(_CreateDeviceEnumerator(&_pDeviceEnumerator)); __CheckBool(_CreateDevice(_pDeviceEnumerator, &_pDevice)); __CheckBool(_CreateAudioClient(_pDevice, &_pAudioClient)); if (!_IsFormatSupported(_pAudioClient)) { __CheckBool(_GetPreferFormat(_pAudioClient, &_formatex)); } __CheckBool(_InitAudioClient(_pAudioClient, &_formatex)); __CheckBool(_CreateAudioCaptureClient(_pAudioClient, &_pAudioCaptureClient)); _format.sampleRate = _formatex.Format.nSamplesPerSec; _format.channels = _formatex.Format.nChannels; _format.bitsPerSample = _formatex.Format.wBitsPerSample; _format.avgBytesPerSec = _formatex.Format.nAvgBytesPerSec; _format.blockAlign = _formatex.Format.nBlockAlign; _isInit = true; return true; } bool AudioCapturer::Start() { __CheckBool(_isInit); // 如果是麦克风设备,启动静音播放器确保音频引擎活跃 if (_deviceType == Microphone) { if (!_InitializeSilencePlayer()) { // 静音播放器初始化失败不影响主要功能,只记录日志 // qDebug() << "Failed to initialize silence player for microphone"; } } _loopFlag = true; // PlaySoundA("./rc/mute.wav", nullptr, SND_FILENAME | SND_ASYNC | SND_LOOP); _captureThread = new std::thread( [this] { _ThreadRun(_pAudioClient, _pAudioCaptureClient); }); return true; } void AudioCapturer::Stop() { // CoUninitialize(); _isInit = false; _loopFlag = false; // 清理静音播放器 _CleanupSilencePlayer(); Free(_captureThread, [this] { _captureThread->join(); delete _captureThread; }); Free(_pAudioCaptureClient, [this] { _pAudioCaptureClient->Release(); }); if (_pAudioClient != nullptr) { _pAudioClient->Stop(); } //PlaySoundA(nullptr, nullptr, SND_FILENAME | SND_ASYNC | SND_LOOP); Free(_pAudioClient, [this] { _pAudioClient->Release(); }); Free(_pDevice, [this] { _pDevice->Release(); }); Free(_pDeviceEnumerator, [this] { _pDeviceEnumerator->Release(); }); } bool AudioCapturer::_CreateDeviceEnumerator(IMMDeviceEnumerator** enumerator) { // __CheckBool(SUCCEEDED(CoInitializeEx(nullptr, COINIT_MULTITHREADED))); // __CheckBool(SUCCEEDED(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED))); __CheckBool(SUCCEEDED(CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), reinterpret_cast(enumerator)))); return true; } bool AudioCapturer::_CreateDevice(IMMDeviceEnumerator* enumerator, IMMDevice** device) { EDataFlow enDataFlow = _deviceType == Microphone ? eCapture : eRender; ERole enRole = eConsole; __CheckBool(SUCCEEDED(enumerator->GetDefaultAudioEndpoint(enDataFlow, enRole, device))); return true; } bool AudioCapturer::_CreateAudioClient(IMMDevice* device, IAudioClient** audioClient) { __CheckBool(SUCCEEDED(device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)audioClient))); return true; } bool AudioCapturer::_IsFormatSupported(IAudioClient* audioClient) { memset(&_formatex, 0, sizeof(_formatex)); WAVEFORMATEX* format = &_formatex.Format; format->nSamplesPerSec = DEFAULT_SAMPLE_RATE; format->wBitsPerSample = DEFAULT_BITS_PER_SAMPLE; format->nChannels = DEFAULT_CHANNELS; WAVEFORMATEX* closestMatch = nullptr; HRESULT hr = audioClient->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, format, &closestMatch); if (hr == AUDCLNT_E_UNSUPPORTED_FORMAT) // 0x88890008 { if (closestMatch == nullptr) // 如果找不到最相近的格式,closestMatch可能为nullptr { return false; } format->nSamplesPerSec = closestMatch->nSamplesPerSec; format->wBitsPerSample = closestMatch->wBitsPerSample; format->nChannels = closestMatch->nChannels; return true; } return false; } bool AudioCapturer::_GetPreferFormat(IAudioClient* audioClient, WAVEFORMATEXTENSIBLE* formatex) { WAVEFORMATEX* format = nullptr; __CheckBool(SUCCEEDED(audioClient->GetMixFormat(&format))); formatex->Format.nSamplesPerSec = format->nSamplesPerSec; formatex->Format.wBitsPerSample = format->wBitsPerSample; formatex->Format.nChannels = format->nChannels; return true; } bool AudioCapturer::_InitAudioClient(IAudioClient* audioClient, WAVEFORMATEXTENSIBLE* formatex) { AUDCLNT_SHAREMODE shareMode = AUDCLNT_SHAREMODE_SHARED; // share Audio Engine with other applications DWORD streamFlags = _deviceType == Microphone ? 0 : AUDCLNT_STREAMFLAGS_LOOPBACK; streamFlags |= AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM; // A channel matrixer and a sample // rate converter are inserted streamFlags |= AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY; // a sample rate converter // with better quality than // the default conversion but // with a higher performance // cost is used REFERENCE_TIME hnsBufferDuration = 0; WAVEFORMATEX* format = &formatex->Format; format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; format->nBlockAlign = (format->wBitsPerSample >> 3) * format->nChannels; format->nAvgBytesPerSec = format->nBlockAlign * format->nSamplesPerSec; format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); formatex->Samples.wValidBitsPerSample = format->wBitsPerSample; formatex->dwChannelMask = format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO; formatex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM; __CheckBool(SUCCEEDED(audioClient->Initialize(shareMode, streamFlags, hnsBufferDuration, 0, format, nullptr))); return true; } bool AudioCapturer::_CreateAudioCaptureClient(IAudioClient* audioClient, IAudioCaptureClient** audioCaptureClient) { __CheckBool(SUCCEEDED(audioClient->GetService(IID_PPV_ARGS(audioCaptureClient)))); return true; } bool AudioCapturer::_ThreadRun(IAudioClient* audio_client, IAudioCaptureClient* audio_capture_client) { UINT32 num_success = 0; BYTE* p_audio_data = nullptr; UINT32 num_frames_to_read = 0; DWORD dw_flag = 0; UINT32 num_frames_in_next_packet = 0; audio_client->Start(); while (_loopFlag) { SleepMs(5); while (true) { __CheckBool(SUCCEEDED(audio_capture_client->GetNextPacketSize(&num_frames_in_next_packet))); if (num_frames_in_next_packet == 0) { break; } __CheckBool(SUCCEEDED(audio_capture_client->GetBuffer(&p_audio_data, &num_frames_to_read, &dw_flag, nullptr, nullptr))); size_t size = (_formatex.Format.wBitsPerSample >> 3) * _formatex.Format.nChannels * num_frames_to_read; { std::lock_guard lock(_bufferMutex); size_t oldSize = _buffer.size(); _buffer.resize(oldSize + size); memcpy(_buffer.data() + oldSize, p_audio_data, size); } __CheckBool(SUCCEEDED(audio_capture_client->ReleaseBuffer(num_frames_to_read))); } } audio_client->Stop(); return true; } int AudioCapturer::readAudioData(char* buf, int maxLen) { std::lock_guard lock(_bufferMutex); int toRead = std::min(maxLen, _buffer.size()); if (toRead > 0) { memcpy(buf, _buffer.data(), toRead); _buffer.erase(_buffer.begin(), _buffer.begin() + toRead); } return toRead; } // 静音播放器实现 bool AudioCapturer::_InitializeSilencePlayer() { HRESULT hr; // 获取默认音频渲染设备 hr = _pDeviceEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &_pSilenceDevice); if (FAILED(hr)) { return false; } // 激活音频客户端 hr = _pSilenceDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&_pSilenceAudioClient); if (FAILED(hr)) { return false; } // 获取混音格式 WAVEFORMATEX* pSilenceFormat = nullptr; hr = _pSilenceAudioClient->GetMixFormat(&pSilenceFormat); if (FAILED(hr)) { return false; } // 初始化音频客户端(共享模式) hr = _pSilenceAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED, 0, 10000000, // 1秒缓冲区 0, pSilenceFormat, nullptr ); CoTaskMemFree(pSilenceFormat); if (FAILED(hr)) { return false; } // 获取渲染客户端 hr = _pSilenceAudioClient->GetService(__uuidof(IAudioRenderClient), (void**)&_pSilenceRenderClient); if (FAILED(hr)) { return false; } // 启动音频客户端 hr = _pSilenceAudioClient->Start(); if (FAILED(hr)) { return false; } // 启动静音播放线程 _silencePlayerRunning = true; _silencePlayerThread = new std::thread([this] { _SilencePlayerThreadFunc(); }); return true; } void AudioCapturer::_CleanupSilencePlayer() { // 停止静音播放线程 if (_silencePlayerRunning) { _silencePlayerRunning = false; if (_silencePlayerThread && _silencePlayerThread->joinable()) { _silencePlayerThread->join(); delete _silencePlayerThread; _silencePlayerThread = nullptr; } } // 清理 WASAPI 资源 if (_pSilenceAudioClient) { _pSilenceAudioClient->Stop(); _pSilenceAudioClient->Release(); _pSilenceAudioClient = nullptr; } if (_pSilenceRenderClient) { _pSilenceRenderClient->Release(); _pSilenceRenderClient = nullptr; } if (_pSilenceDevice) { _pSilenceDevice->Release(); _pSilenceDevice = nullptr; } } void AudioCapturer::_SilencePlayerThreadFunc() { UINT32 bufferFrameCount; HRESULT hr = _pSilenceAudioClient->GetBufferSize(&bufferFrameCount); if (FAILED(hr)) { return; } while (_silencePlayerRunning) { UINT32 numFramesPadding; hr = _pSilenceAudioClient->GetCurrentPadding(&numFramesPadding); if (FAILED(hr)) { break; } UINT32 numFramesAvailable = bufferFrameCount - numFramesPadding; if (numFramesAvailable > 0) { BYTE* pData; hr = _pSilenceRenderClient->GetBuffer(numFramesAvailable, &pData); if (SUCCEEDED(hr)) { // 填充静音数据(全零) memset(pData, 0, numFramesAvailable * sizeof(float) * 2); // 假设立体声 hr = _pSilenceRenderClient->ReleaseBuffer(numFramesAvailable, 0); } } Sleep(10); // 10ms 间隔 } }