audio_capturer.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. #include "audio_capturer.h"
  2. #include "basic/basic.h"
  3. #define DEFAULT_SAMPLE_RATE 48000 // 默认采样率:48kHz
  4. #define DEFAULT_BITS_PER_SAMPLE 16 // 默认位深:16bit
  5. #define DEFAULT_CHANNELS 1 // 默认音频通道数:1
  6. #define DEFAULT_AUDIO_PACKET_INTERVAL 10 // 默认音频包发送间隔:10ms
  7. bool AudioCapturer::Init(Type deviceType)
  8. {
  9. Stop();
  10. _deviceType = deviceType;
  11. if (!_CreateDeviceEnumerator(&_pDeviceEnumerator)) { __DebugPrint("_CreateDeviceEnumerator failed"); return false; }
  12. if (!_CreateDevice(_pDeviceEnumerator, &_pDevice)) { __DebugPrint("_CreateDevice failed"); return false; }
  13. if (!_CreateAudioClient(_pDevice, &_pAudioClient)) { __DebugPrint("_CreateAudioClient failed"); return false; }
  14. if (!_IsFormatSupported(_pAudioClient)) {
  15. if (!_GetPreferFormat(_pAudioClient, &_formatex)) { __DebugPrint("_GetPreferFormat failed"); return false; }
  16. }
  17. if (!_InitAudioClient(_pAudioClient, &_formatex)) { __DebugPrint("_InitAudioClient failed"); return false; }
  18. if (!_CreateAudioCaptureClient(_pAudioClient, &_pAudioCaptureClient)) { __DebugPrint("_CreateAudioCaptureClient failed"); return false; }
  19. _format.sampleRate = _formatex.Format.nSamplesPerSec;
  20. _format.channels = _formatex.Format.nChannels;
  21. _format.bitsPerSample = _formatex.Format.wBitsPerSample;
  22. _format.avgBytesPerSec = _formatex.Format.nAvgBytesPerSec;
  23. _format.blockAlign = _formatex.Format.nBlockAlign;
  24. _isInit = true;
  25. return true;
  26. }
  27. bool AudioCapturer::Start()
  28. {
  29. if (!_isInit) { return false; }
  30. // 如果是麦克风设备,启动静音播放器确保音频引擎活跃
  31. if (_deviceType == Microphone) {
  32. if (!_InitializeSilencePlayer()) {
  33. // 静音播放器初始化失败不影响主要功能,只记录日志
  34. // qDebug() << "Failed to initialize silence player for microphone";
  35. }
  36. }
  37. _loopFlag = true;
  38. // PlaySoundA("./rc/mute.wav", nullptr, SND_FILENAME | SND_ASYNC | SND_LOOP);
  39. _captureThread = new std::thread(
  40. [this] { _ThreadRun(_pAudioClient, _pAudioCaptureClient); });
  41. return true;
  42. }
  43. void AudioCapturer::Stop()
  44. {
  45. // CoUninitialize();
  46. _isInit = false;
  47. _loopFlag = false;
  48. // 清理静音播放器
  49. _CleanupSilencePlayer();
  50. Free(_captureThread, [this] {
  51. _captureThread->join();
  52. delete _captureThread;
  53. });
  54. Free(_pAudioCaptureClient, [this] { _pAudioCaptureClient->Release(); });
  55. if (_pAudioClient != nullptr) {
  56. _pAudioClient->Stop();
  57. }
  58. //PlaySoundA(nullptr, nullptr, SND_FILENAME | SND_ASYNC | SND_LOOP);
  59. Free(_pAudioClient, [this] { _pAudioClient->Release(); });
  60. Free(_pDevice, [this] { _pDevice->Release(); });
  61. Free(_pDeviceEnumerator, [this] { _pDeviceEnumerator->Release(); });
  62. }
  63. bool AudioCapturer::_CreateDeviceEnumerator(IMMDeviceEnumerator** enumerator)
  64. {
  65. // __CheckBool(SUCCEEDED(CoInitializeEx(nullptr, COINIT_MULTITHREADED)));
  66. // __CheckBool(SUCCEEDED(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED)));
  67. HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL,
  68. __uuidof(IMMDeviceEnumerator),
  69. reinterpret_cast<void**>(enumerator));
  70. if (FAILED(hr)) { __DebugPrint("CoCreateInstance for MMDeviceEnumerator failed, hr=0x%08lx", static_cast<unsigned long>(hr)); return false; }
  71. return true;
  72. }
  73. bool AudioCapturer::_CreateDevice(IMMDeviceEnumerator* enumerator, IMMDevice** device)
  74. {
  75. EDataFlow enDataFlow = _deviceType == Microphone ? eCapture : eRender;
  76. ERole enRole = eConsole;
  77. HRESULT hr = enumerator->GetDefaultAudioEndpoint(enDataFlow, enRole, device);
  78. if (FAILED(hr)) { __DebugPrint("GetDefaultAudioEndpoint failed, hr=0x%08lx", static_cast<unsigned long>(hr)); return false; }
  79. return true;
  80. }
  81. bool AudioCapturer::_CreateAudioClient(IMMDevice* device, IAudioClient** audioClient)
  82. {
  83. HRESULT hr = device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL,
  84. (void**)audioClient);
  85. if (FAILED(hr)) { __DebugPrint("IAudioClient Activate failed, hr=0x%08lx", static_cast<unsigned long>(hr)); return false; }
  86. return true;
  87. }
  88. bool AudioCapturer::_IsFormatSupported(IAudioClient* audioClient)
  89. {
  90. memset(&_formatex, 0, sizeof(_formatex));
  91. WAVEFORMATEX* format = &_formatex.Format;
  92. format->nSamplesPerSec = DEFAULT_SAMPLE_RATE;
  93. format->wBitsPerSample = DEFAULT_BITS_PER_SAMPLE;
  94. format->nChannels = DEFAULT_CHANNELS;
  95. WAVEFORMATEX* closestMatch = nullptr;
  96. HRESULT hr = audioClient->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
  97. format, &closestMatch);
  98. if (hr == AUDCLNT_E_UNSUPPORTED_FORMAT) // 0x88890008
  99. {
  100. if (closestMatch == nullptr) // 如果找不到最相近的格式,closestMatch可能为nullptr
  101. {
  102. return false;
  103. }
  104. format->nSamplesPerSec = closestMatch->nSamplesPerSec;
  105. format->wBitsPerSample = closestMatch->wBitsPerSample;
  106. format->nChannels = closestMatch->nChannels;
  107. return true;
  108. }
  109. return false;
  110. }
  111. bool AudioCapturer::_GetPreferFormat(IAudioClient* audioClient,
  112. WAVEFORMATEXTENSIBLE* formatex)
  113. {
  114. WAVEFORMATEX* format = nullptr;
  115. HRESULT hr = audioClient->GetMixFormat(&format);
  116. if (FAILED(hr)) { __DebugPrint("GetMixFormat failed, hr=0x%08lx", static_cast<unsigned long>(hr)); return false; }
  117. formatex->Format.nSamplesPerSec = format->nSamplesPerSec;
  118. formatex->Format.wBitsPerSample = format->wBitsPerSample;
  119. formatex->Format.nChannels = format->nChannels;
  120. CoTaskMemFree(format);
  121. return true;
  122. }
  123. bool AudioCapturer::_InitAudioClient(IAudioClient* audioClient,
  124. WAVEFORMATEXTENSIBLE* formatex)
  125. {
  126. AUDCLNT_SHAREMODE shareMode = AUDCLNT_SHAREMODE_SHARED; // share Audio Engine with other applications
  127. DWORD streamFlags = _deviceType == Microphone ? 0 : AUDCLNT_STREAMFLAGS_LOOPBACK;
  128. streamFlags |= AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM; // A channel matrixer and a sample
  129. // rate converter are inserted
  130. streamFlags |= AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY; // a sample rate converter
  131. // with better quality than
  132. // the default conversion but
  133. // with a higher performance
  134. // cost is used
  135. REFERENCE_TIME hnsBufferDuration = 0;
  136. WAVEFORMATEX* format = &formatex->Format;
  137. format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
  138. format->nBlockAlign = (format->wBitsPerSample >> 3) * format->nChannels;
  139. format->nAvgBytesPerSec = format->nBlockAlign * format->nSamplesPerSec;
  140. format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
  141. formatex->Samples.wValidBitsPerSample = format->wBitsPerSample;
  142. formatex->dwChannelMask = format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO;
  143. formatex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  144. HRESULT hr = audioClient->Initialize(shareMode, streamFlags, hnsBufferDuration, 0,
  145. format, nullptr);
  146. if (FAILED(hr)) { __DebugPrint("IAudioClient Initialize failed, hr=0x%08lx", static_cast<unsigned long>(hr)); return false; }
  147. return true;
  148. }
  149. bool AudioCapturer::_CreateAudioCaptureClient(IAudioClient* audioClient,
  150. IAudioCaptureClient** audioCaptureClient)
  151. {
  152. HRESULT hr = audioClient->GetService(IID_PPV_ARGS(audioCaptureClient));
  153. if (FAILED(hr)) { __DebugPrint("GetService(IAudioCaptureClient) failed, hr=0x%08lx", static_cast<unsigned long>(hr)); return false; }
  154. return true;
  155. }
  156. bool AudioCapturer::_ThreadRun(IAudioClient* audio_client,
  157. IAudioCaptureClient* audio_capture_client)
  158. {
  159. // 初始化COM到MTA,确保当前线程可安全调用COM接口
  160. HRESULT hrCoInit = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
  161. if (FAILED(hrCoInit)) {
  162. __DebugPrint("CoInitializeEx failed, hr=0x%08lx", static_cast<unsigned long>(hrCoInit));
  163. return false;
  164. }
  165. UINT32 num_success = 0;
  166. BYTE* p_audio_data = nullptr;
  167. UINT32 num_frames_to_read = 0;
  168. DWORD dw_flag = 0;
  169. UINT32 num_frames_in_next_packet = 0;
  170. audio_client->Start();
  171. while (_loopFlag) {
  172. SleepMs(5);
  173. while (true) {
  174. HRESULT hr = audio_capture_client->GetNextPacketSize(&num_frames_in_next_packet);
  175. if (FAILED(hr)) { __DebugPrint("GetNextPacketSize failed, hr=0x%08lx", static_cast<unsigned long>(hr)); audio_client->Stop(); CoUninitialize(); return false; }
  176. if (num_frames_in_next_packet == 0) {
  177. break;
  178. }
  179. hr = audio_capture_client->GetBuffer(&p_audio_data, &num_frames_to_read,
  180. &dw_flag, nullptr, nullptr);
  181. if (FAILED(hr)) { __DebugPrint("GetBuffer failed, hr=0x%08lx", static_cast<unsigned long>(hr)); audio_client->Stop(); CoUninitialize(); return false; }
  182. size_t size = (_formatex.Format.wBitsPerSample >> 3) * _formatex.Format.nChannels * num_frames_to_read;
  183. {
  184. std::lock_guard<std::mutex> lock(_bufferMutex);
  185. size_t oldSize = _buffer.size();
  186. _buffer.resize(oldSize + size);
  187. memcpy(_buffer.data() + oldSize, p_audio_data, size);
  188. }
  189. hr = audio_capture_client->ReleaseBuffer(num_frames_to_read);
  190. if (FAILED(hr)) { __DebugPrint("ReleaseBuffer failed, hr=0x%08lx", static_cast<unsigned long>(hr)); audio_client->Stop(); CoUninitialize(); return false; }
  191. }
  192. }
  193. audio_client->Stop();
  194. CoUninitialize();
  195. return true;
  196. }
  197. // 静音播放器实现
  198. bool AudioCapturer::_InitializeSilencePlayer()
  199. {
  200. HRESULT hr;
  201. // 获取默认音频渲染设备
  202. hr = _pDeviceEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &_pSilenceDevice);
  203. if (FAILED(hr)) {
  204. return false;
  205. }
  206. // 激活音频客户端
  207. hr = _pSilenceDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&_pSilenceAudioClient);
  208. if (FAILED(hr)) {
  209. return false;
  210. }
  211. // 获取混音格式
  212. WAVEFORMATEX* pSilenceFormat = nullptr;
  213. hr = _pSilenceAudioClient->GetMixFormat(&pSilenceFormat);
  214. if (FAILED(hr)) {
  215. return false;
  216. }
  217. // 初始化音频客户端(共享模式)
  218. hr = _pSilenceAudioClient->Initialize(
  219. AUDCLNT_SHAREMODE_SHARED,
  220. 0,
  221. 10000000, // 1秒缓冲区
  222. 0,
  223. pSilenceFormat,
  224. nullptr
  225. );
  226. CoTaskMemFree(pSilenceFormat);
  227. if (FAILED(hr)) {
  228. return false;
  229. }
  230. // 获取渲染客户端
  231. hr = _pSilenceAudioClient->GetService(__uuidof(IAudioRenderClient), (void**)&_pSilenceRenderClient);
  232. if (FAILED(hr)) {
  233. return false;
  234. }
  235. // 启动音频客户端
  236. hr = _pSilenceAudioClient->Start();
  237. if (FAILED(hr)) {
  238. return false;
  239. }
  240. // 启动静音播放线程
  241. _silencePlayerRunning = true;
  242. _silencePlayerThread = new std::thread([this] { _SilencePlayerThreadFunc(); });
  243. return true;
  244. }
  245. void AudioCapturer::_SilencePlayerThreadFunc()
  246. {
  247. // 线程内初始化COM
  248. HRESULT hrCoInit = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
  249. if (FAILED(hrCoInit)) {
  250. __DebugPrint("CoInitializeEx for silence thread failed, hr=0x%08lx", static_cast<unsigned long>(hrCoInit));
  251. return;
  252. }
  253. UINT32 bufferFrameCount;
  254. HRESULT hr = _pSilenceAudioClient->GetBufferSize(&bufferFrameCount);
  255. if (FAILED(hr)) {
  256. CoUninitialize();
  257. return;
  258. }
  259. // 计算每帧字节数(使用设备混音格式)
  260. WAVEFORMATEX* pMix = nullptr;
  261. UINT32 frameBytes = 0;
  262. hr = _pSilenceAudioClient->GetMixFormat(&pMix);
  263. if (SUCCEEDED(hr) && pMix) {
  264. frameBytes = pMix->nBlockAlign;
  265. CoTaskMemFree(pMix);
  266. } else {
  267. // 回退:猜测为立体声float(安全起见仍可为0填充任何格式)
  268. frameBytes = sizeof(float) * 2;
  269. }
  270. while (_silencePlayerRunning) {
  271. UINT32 numFramesPadding;
  272. hr = _pSilenceAudioClient->GetCurrentPadding(&numFramesPadding);
  273. if (FAILED(hr)) {
  274. break;
  275. }
  276. UINT32 numFramesAvailable = bufferFrameCount - numFramesPadding;
  277. if (numFramesAvailable > 0) {
  278. BYTE* pData;
  279. hr = _pSilenceRenderClient->GetBuffer(numFramesAvailable, &pData);
  280. if (SUCCEEDED(hr)) {
  281. // 填充静音数据(全零)
  282. memset(pData, 0, numFramesAvailable * frameBytes);
  283. _pSilenceRenderClient->ReleaseBuffer(numFramesAvailable, 0);
  284. }
  285. }
  286. Sleep(10); // 10ms 间隔
  287. }
  288. CoUninitialize();
  289. }
  290. int AudioCapturer::readAudioData(char* buf, int maxLen)
  291. {
  292. std::lock_guard<std::mutex> lock(_bufferMutex);
  293. int toRead = std::min<int>(maxLen, _buffer.size());
  294. if (toRead > 0) {
  295. memcpy(buf, _buffer.data(), toRead);
  296. _buffer.erase(_buffer.begin(), _buffer.begin() + toRead);
  297. }
  298. return toRead;
  299. }
  300. void AudioCapturer::_CleanupSilencePlayer()
  301. {
  302. // 停止静音播放线程
  303. if (_silencePlayerRunning) {
  304. _silencePlayerRunning = false;
  305. if (_silencePlayerThread && _silencePlayerThread->joinable()) {
  306. _silencePlayerThread->join();
  307. delete _silencePlayerThread;
  308. _silencePlayerThread = nullptr;
  309. }
  310. }
  311. // 清理 WASAPI 资源
  312. if (_pSilenceAudioClient) {
  313. _pSilenceAudioClient->Stop();
  314. _pSilenceAudioClient->Release();
  315. _pSilenceAudioClient = nullptr;
  316. }
  317. if (_pSilenceRenderClient) {
  318. _pSilenceRenderClient->Release();
  319. _pSilenceRenderClient = nullptr;
  320. }
  321. if (_pSilenceDevice) {
  322. _pSilenceDevice->Release();
  323. _pSilenceDevice = nullptr;
  324. }
  325. }