capture_audio_capturer.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. #include "capture_audio_capturer.h"
  2. #include "../base/logger.h"
  3. #include "../base/media_common.h"
  4. #include <algorithm>
  5. #include <cmath>
  6. #ifdef _WIN32
  7. #include <windows.h>
  8. #include <mmdeviceapi.h>
  9. #include <audioclient.h>
  10. #include <endpointvolume.h>
  11. #pragma comment(lib, "ole32.lib")
  12. #endif
  13. extern "C" {
  14. #include <libavformat/avformat.h>
  15. #include <libavdevice/avdevice.h>
  16. #include <libswresample/swresample.h>
  17. #include <libavutil/opt.h>
  18. #include <libavutil/channel_layout.h>
  19. }
  20. namespace av {
  21. namespace capture {
  22. AudioCapturer::AudioCapturer() : audioParams_(CapturerType::AUDIO_MIC) {
  23. AV_LOGGER_DEBUG("创建音频采集器");
  24. // 注册设备
  25. avdevice_register_all();
  26. lastLevelUpdate_ = std::chrono::steady_clock::now();
  27. }
  28. AudioCapturer::~AudioCapturer() {
  29. close();
  30. AV_LOGGER_DEBUG("音频采集器已销毁");
  31. }
  32. ErrorCode AudioCapturer::initialize(const CapturerParams& params) {
  33. if (params.mediaType != MediaType::AUDIO) {
  34. AV_LOGGER_ERROR("参数媒体类型不是音频");
  35. return ErrorCode::INVALID_PARAMS;
  36. }
  37. audioParams_ = static_cast<const AudioCaptureParams&>(params);
  38. if (!validateParams(audioParams_)) {
  39. return ErrorCode::INVALID_PARAMS;
  40. }
  41. ErrorCode result = ErrorCode::SUCCESS;
  42. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  43. result = initializeMicrophone();
  44. } else if (audioParams_.type == CapturerType::AUDIO_SYSTEM ||
  45. audioParams_.type == CapturerType::AUDIO_LOOPBACK) {
  46. result = initializeSystemAudio();
  47. } else {
  48. AV_LOGGER_ERROR("不支持的音频采集器类型");
  49. return ErrorCode::NOT_SUPPORTED;
  50. }
  51. if (result == ErrorCode::SUCCESS) {
  52. setState(CapturerState::INITIALIZED);
  53. AV_LOGGER_INFOF("音频采集器初始化成功: {}Hz, {}ch, {}",
  54. audioParams_.sampleRate, audioParams_.channels,
  55. av_get_sample_fmt_name(audioParams_.sampleFormat));
  56. }
  57. return result;
  58. }
  59. ErrorCode AudioCapturer::start() {
  60. std::lock_guard<std::mutex> lock(captureMutex_);
  61. if (getState() != CapturerState::INITIALIZED) {
  62. AV_LOGGER_ERROR("采集器状态无效,无法启动");
  63. return ErrorCode::INVALID_STATE;
  64. }
  65. shouldStop_ = false;
  66. // 启动采集线程
  67. try {
  68. captureThread_ = std::thread(&AudioCapturer::captureThreadFunc, this);
  69. setState(CapturerState::STARTED);
  70. AV_LOGGER_INFO("音频采集已启动");
  71. return ErrorCode::SUCCESS;
  72. } catch (const std::exception& e) {
  73. AV_LOGGER_ERRORF("启动音频采集线程失败: {}", e.what());
  74. return ErrorCode::THREAD_ERROR;
  75. }
  76. }
  77. ErrorCode AudioCapturer::stop() {
  78. std::lock_guard<std::mutex> lock(captureMutex_);
  79. if (getState() != CapturerState::STARTED) {
  80. return ErrorCode::SUCCESS;
  81. }
  82. shouldStop_ = true;
  83. // 唤醒暂停的线程
  84. {
  85. std::lock_guard<std::mutex> pauseLock(pauseMutex_);
  86. paused_ = false;
  87. pauseCondition_.notify_all();
  88. }
  89. // 等待线程结束
  90. if (captureThread_.joinable()) {
  91. captureThread_.join();
  92. }
  93. setState(CapturerState::STOPPED);
  94. AV_LOGGER_INFO("音频采集已停止");
  95. return ErrorCode::SUCCESS;
  96. }
  97. ErrorCode AudioCapturer::pause() {
  98. if (getState() != CapturerState::STARTED) {
  99. return ErrorCode::INVALID_STATE;
  100. }
  101. paused_ = true;
  102. AV_LOGGER_INFO("音频采集已暂停");
  103. return ErrorCode::SUCCESS;
  104. }
  105. ErrorCode AudioCapturer::resume() {
  106. if (getState() != CapturerState::STARTED) {
  107. return ErrorCode::INVALID_STATE;
  108. }
  109. {
  110. std::lock_guard<std::mutex> lock(pauseMutex_);
  111. paused_ = false;
  112. pauseCondition_.notify_all();
  113. }
  114. AV_LOGGER_INFO("音频采集已恢复");
  115. return ErrorCode::SUCCESS;
  116. }
  117. ErrorCode AudioCapturer::reset() {
  118. ErrorCode result = stop();
  119. if (result != ErrorCode::SUCCESS) {
  120. return result;
  121. }
  122. // 清空帧队列
  123. {
  124. std::lock_guard<std::mutex> lock(queueMutex_);
  125. while (!frameQueue_.empty()) {
  126. frameQueue_.pop();
  127. }
  128. }
  129. resetStats();
  130. audioLevel_ = 0.0f;
  131. setState(CapturerState::INITIALIZED);
  132. AV_LOGGER_INFO("音频采集器已重置");
  133. return ErrorCode::SUCCESS;
  134. }
  135. ErrorCode AudioCapturer::close() {
  136. stop();
  137. // 清理资源
  138. cleanupResampler();
  139. cleanupAudioProcessing();
  140. if (codecCtx_) {
  141. avcodec_free_context(&codecCtx_);
  142. codecCtx_ = nullptr;
  143. }
  144. if (formatCtx_) {
  145. avformat_close_input(&formatCtx_);
  146. formatCtx_ = nullptr;
  147. }
  148. codec_ = nullptr;
  149. audioStreamIndex_ = -1;
  150. setState(CapturerState::IDLE);
  151. AV_LOGGER_INFO("音频采集器已关闭");
  152. return ErrorCode::SUCCESS;
  153. }
  154. std::vector<std::string> AudioCapturer::getAvailableDevices() const {
  155. std::vector<std::string> devices;
  156. auto deviceInfos = getDetailedDeviceInfo();
  157. for (const auto& info : deviceInfos) {
  158. devices.push_back(info.name);
  159. }
  160. return devices;
  161. }
  162. std::string AudioCapturer::getCurrentDevice() const {
  163. return audioParams_.deviceName;
  164. }
  165. std::vector<AudioDeviceInfo> AudioCapturer::getDetailedDeviceInfo() const {
  166. std::lock_guard<std::mutex> lock(deviceCacheMutex_);
  167. if (!devicesCached_) {
  168. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  169. cachedDevices_ = enumerateMicrophones();
  170. } else {
  171. cachedDevices_ = enumerateSystemAudioDevices();
  172. }
  173. devicesCached_ = true;
  174. }
  175. return cachedDevices_;
  176. }
  177. ErrorCode AudioCapturer::setAudioParams(int sampleRate, int channels, AVSampleFormat sampleFormat) {
  178. if (getState() == CapturerState::STARTED) {
  179. AV_LOGGER_ERROR("无法在采集过程中修改音频参数");
  180. return ErrorCode::INVALID_STATE;
  181. }
  182. audioParams_.sampleRate = sampleRate;
  183. audioParams_.channels = channels;
  184. audioParams_.sampleFormat = sampleFormat;
  185. AV_LOGGER_INFOF("音频参数已更新: {}Hz, {}ch, {}",
  186. sampleRate, channels, av_get_sample_fmt_name(sampleFormat));
  187. return ErrorCode::SUCCESS;
  188. }
  189. ErrorCode AudioCapturer::setVolume(float volume) {
  190. if (volume < 0.0f || volume > 2.0f) {
  191. AV_LOGGER_ERROR("音量值超出范围 (0.0-2.0)");
  192. return ErrorCode::INVALID_PARAMS;
  193. }
  194. currentVolume_ = volume;
  195. audioParams_.volume = volume;
  196. AV_LOGGER_INFOF("音量已设置为: {:.2f}", volume);
  197. return ErrorCode::SUCCESS;
  198. }
  199. float AudioCapturer::getVolume() const {
  200. return currentVolume_;
  201. }
  202. ErrorCode AudioCapturer::setNoiseReduction(bool enable) {
  203. noiseReductionEnabled_ = enable;
  204. audioParams_.enableNoiseReduction = enable;
  205. AV_LOGGER_INFOF("Noise reduction {}", enable ? "enabled" : "disabled");
  206. return ErrorCode::SUCCESS;
  207. }
  208. ErrorCode AudioCapturer::setEchoCancellation(bool enable) {
  209. echoCancellationEnabled_ = enable;
  210. audioParams_.enableEchoCancellation = enable;
  211. AV_LOGGER_INFOF("Echo cancellation {}", enable ? "enabled" : "disabled");
  212. return ErrorCode::SUCCESS;
  213. }
  214. AudioCaptureParams AudioCapturer::getCurrentParams() const {
  215. return audioParams_;
  216. }
  217. float AudioCapturer::getAudioLevel() const {
  218. return audioLevel_.load();
  219. }
  220. bool AudioCapturer::validateParams(const CapturerParams& params) {
  221. const auto& audioParams = static_cast<const AudioCaptureParams&>(params);
  222. if (audioParams.sampleRate <= 0 || audioParams.sampleRate > 192000) {
  223. AV_LOGGER_ERROR("采样率无效");
  224. return false;
  225. }
  226. if (audioParams.channels <= 0 || audioParams.channels > 8) {
  227. AV_LOGGER_ERROR("声道数无效");
  228. return false;
  229. }
  230. if (audioParams.bufferSize <= 0 || audioParams.bufferSize > 8192) {
  231. AV_LOGGER_ERROR("缓冲区大小无效");
  232. return false;
  233. }
  234. if (audioParams.volume < 0.0f || audioParams.volume > 2.0f) {
  235. AV_LOGGER_ERROR("音量值无效");
  236. return false;
  237. }
  238. return true;
  239. }
  240. ErrorCode AudioCapturer::initializeMicrophone() {
  241. AV_LOGGER_INFOF("初始化麦克风采集器: 索引={}", audioParams_.micIndex);
  242. #ifdef _WIN32
  243. return setupDirectSoundMicrophone();
  244. #elif defined(__linux__)
  245. return setupALSAMicrophone();
  246. #elif defined(__APPLE__)
  247. return setupCoreAudioMicrophone();
  248. #else
  249. AV_LOGGER_ERROR("当前平台不支持麦克风采集");
  250. return ErrorCode::NOT_SUPPORTED;
  251. #endif
  252. }
  253. ErrorCode AudioCapturer::initializeSystemAudio() {
  254. AV_LOGGER_INFO("初始化系统音频采集器");
  255. #ifdef _WIN32
  256. return setupWASAPISystemAudio();
  257. #elif defined(__linux__)
  258. return setupPulseAudioCapture();
  259. #elif defined(__APPLE__)
  260. return setupCoreAudioSystemCapture();
  261. #else
  262. AV_LOGGER_ERROR("当前平台不支持系统音频采集");
  263. return ErrorCode::NOT_SUPPORTED;
  264. #endif
  265. }
  266. ErrorCode AudioCapturer::openInputDevice() {
  267. const AVInputFormat* inputFormat = getPlatformInputFormat();
  268. if (!inputFormat) {
  269. AV_LOGGER_ERROR("获取音频输入格式失败");
  270. return ErrorCode::NOT_SUPPORTED;
  271. }
  272. std::string deviceName = getPlatformDeviceName();
  273. if (deviceName.empty()) {
  274. AV_LOGGER_ERROR("获取音频设备名称失败");
  275. return ErrorCode::DEVICE_NOT_FOUND;
  276. }
  277. AV_LOGGER_INFOF("打开音频输入设备: {} (格式: {})", deviceName, inputFormat->name);
  278. // 设置输入选项
  279. AVDictionary* options = nullptr;
  280. // 设置音频参数
  281. av_dict_set(&options, "sample_rate", std::to_string(audioParams_.sampleRate).c_str(), 0);
  282. av_dict_set(&options, "channels", std::to_string(audioParams_.channels).c_str(), 0);
  283. // 设置缓冲区大小
  284. av_dict_set(&options, "audio_buffer_size", std::to_string(audioParams_.bufferSize).c_str(), 0);
  285. // 注意:不要在实际打开设备时设置list_devices选项,这会导致AVERROR_EXIT
  286. // 打开输入
  287. int ret = avformat_open_input(&formatCtx_, deviceName.c_str(), inputFormat, &options);
  288. av_dict_free(&options);
  289. if (ret < 0) {
  290. AV_LOGGER_ERRORF("打开音频输入设备失败: {} (设备: {})",
  291. ffmpeg_utils::errorToString(ret), deviceName);
  292. // 如果是设备不存在错误,尝试多种回退方案
  293. if (ret == AVERROR(EIO) || ret == AVERROR(ENOENT)) {
  294. // 尝试1: 使用默认设备
  295. AV_LOGGER_WARNING("尝试使用默认音频设备");
  296. AVDictionary* defaultOptions = nullptr;
  297. av_dict_set(&defaultOptions, "sample_rate", std::to_string(audioParams_.sampleRate).c_str(), 0);
  298. av_dict_set(&defaultOptions, "channels", std::to_string(audioParams_.channels).c_str(), 0);
  299. ret = avformat_open_input(&formatCtx_, "audio=default", inputFormat, &defaultOptions);
  300. av_dict_free(&defaultOptions);
  301. if (ret < 0) {
  302. AV_LOGGER_WARNINGF("默认设备失败: {}, 尝试设备索引0", ffmpeg_utils::errorToString(ret));
  303. // 尝试2: 使用设备索引0
  304. AVDictionary* indexOptions = nullptr;
  305. av_dict_set(&indexOptions, "sample_rate", std::to_string(audioParams_.sampleRate).c_str(), 0);
  306. av_dict_set(&indexOptions, "channels", std::to_string(audioParams_.channels).c_str(), 0);
  307. ret = avformat_open_input(&formatCtx_, "audio=0", inputFormat, &indexOptions);
  308. av_dict_free(&indexOptions);
  309. if (ret < 0) {
  310. AV_LOGGER_WARNINGF("DirectShow设备索引0失败: {}, 尝试WASAPI", ffmpeg_utils::errorToString(ret));
  311. // 尝试3: 使用WASAPI
  312. const AVInputFormat* wasapiFormat = av_find_input_format("wasapi");
  313. if (wasapiFormat) {
  314. AVDictionary* wasapiOptions = nullptr;
  315. av_dict_set(&wasapiOptions, "sample_rate", std::to_string(audioParams_.sampleRate).c_str(), 0);
  316. av_dict_set(&wasapiOptions, "channels", std::to_string(audioParams_.channels).c_str(), 0);
  317. ret = avformat_open_input(&formatCtx_, "", wasapiFormat, &wasapiOptions);
  318. av_dict_free(&wasapiOptions);
  319. if (ret < 0) {
  320. AV_LOGGER_ERRORF("所有音频设备打开尝试都失败: {}", ffmpeg_utils::errorToString(ret));
  321. return static_cast<ErrorCode>(ret);
  322. }
  323. AV_LOGGER_INFO("成功打开WASAPI默认音频设备");
  324. } else {
  325. AV_LOGGER_ERRORF("WASAPI不可用,所有音频设备打开尝试都失败: {}", ffmpeg_utils::errorToString(ret));
  326. return static_cast<ErrorCode>(ret);
  327. }
  328. } else {
  329. AV_LOGGER_INFO("成功打开音频设备索引0");
  330. }
  331. } else {
  332. AV_LOGGER_INFO("成功打开默认音频设备");
  333. }
  334. } else {
  335. return static_cast<ErrorCode>(ret);
  336. }
  337. }
  338. // 查找流信息
  339. ret = avformat_find_stream_info(formatCtx_, nullptr);
  340. if (ret < 0) {
  341. AV_LOGGER_ERRORF("查找音频流信息失败: {}", ffmpeg_utils::errorToString(ret));
  342. return static_cast<ErrorCode>(ret);
  343. }
  344. // 查找音频流
  345. audioStreamIndex_ = av_find_best_stream(formatCtx_, AVMEDIA_TYPE_AUDIO, -1, -1, &codec_, 0);
  346. if (audioStreamIndex_ < 0) {
  347. AV_LOGGER_ERROR("未找到音频流");
  348. return ErrorCode::STREAM_NOT_FOUND;
  349. }
  350. // 创建解码上下文
  351. codecCtx_ = avcodec_alloc_context3(codec_);
  352. if (!codecCtx_) {
  353. AV_LOGGER_ERROR("分配音频解码上下文失败");
  354. return ErrorCode::MEMORY_ALLOC_FAILED;
  355. }
  356. // 复制流参数到解码上下文
  357. ret = avcodec_parameters_to_context(codecCtx_, formatCtx_->streams[audioStreamIndex_]->codecpar);
  358. if (ret < 0) {
  359. AV_LOGGER_ERRORF("复制音频流参数失败: {}", ffmpeg_utils::errorToString(ret));
  360. return static_cast<ErrorCode>(ret);
  361. }
  362. // 打开解码器
  363. ret = avcodec_open2(codecCtx_, codec_, nullptr);
  364. if (ret < 0) {
  365. AV_LOGGER_ERRORF("打开音频解码器失败: {}", ffmpeg_utils::errorToString(ret));
  366. return static_cast<ErrorCode>(ret);
  367. }
  368. // 设置音频重采样
  369. return setupAudioResampling();
  370. }
  371. ErrorCode AudioCapturer::setupAudioResampling() {
  372. AVSampleFormat srcFormat = codecCtx_->sample_fmt;
  373. int srcSampleRate = codecCtx_->sample_rate;
  374. int srcChannels = codecCtx_->ch_layout.nb_channels;
  375. AVChannelLayout srcChannelLayout = codecCtx_->ch_layout;
  376. AVSampleFormat dstFormat = audioParams_.sampleFormat;
  377. int dstSampleRate = audioParams_.sampleRate;
  378. int dstChannels = audioParams_.channels;
  379. AVChannelLayout dstChannelLayout;
  380. av_channel_layout_default(&dstChannelLayout, dstChannels);
  381. needResampling_ = (srcFormat != dstFormat) ||
  382. (srcSampleRate != dstSampleRate) ||
  383. (srcChannels != dstChannels);
  384. if (needResampling_) {
  385. AV_LOGGER_INFOF("需要音频重采样: {}Hz,{}ch,{} -> {}Hz,{}ch,{}",
  386. srcSampleRate, srcChannels, av_get_sample_fmt_name(srcFormat),
  387. dstSampleRate, dstChannels, av_get_sample_fmt_name(dstFormat));
  388. swrCtx_ = swr_alloc();
  389. if (!swrCtx_) {
  390. AV_LOGGER_ERROR("分配音频重采样器失败");
  391. return ErrorCode::MEMORY_ALLOC_FAILED;
  392. }
  393. // 设置重采样参数
  394. av_opt_set_chlayout(swrCtx_, "in_chlayout", &srcChannelLayout, 0);
  395. av_opt_set_int(swrCtx_, "in_sample_rate", srcSampleRate, 0);
  396. av_opt_set_sample_fmt(swrCtx_, "in_sample_fmt", srcFormat, 0);
  397. av_opt_set_chlayout(swrCtx_, "out_chlayout", &dstChannelLayout, 0);
  398. av_opt_set_int(swrCtx_, "out_sample_rate", dstSampleRate, 0);
  399. av_opt_set_sample_fmt(swrCtx_, "out_sample_fmt", dstFormat, 0);
  400. // 初始化重采样器
  401. int ret = swr_init(swrCtx_);
  402. if (ret < 0) {
  403. AV_LOGGER_ERRORF("初始化音频重采样器失败: {}", ffmpeg_utils::errorToString(ret));
  404. cleanupResampler();
  405. return static_cast<ErrorCode>(ret);
  406. }
  407. // 创建重采样输出帧
  408. resampledFrame_ = makeAVFrame();
  409. if (!resampledFrame_) {
  410. return ErrorCode::MEMORY_ALLOC_FAILED;
  411. }
  412. resampledFrame_->format = dstFormat;
  413. resampledFrame_->sample_rate = dstSampleRate;
  414. av_channel_layout_copy(&resampledFrame_->ch_layout, &dstChannelLayout);
  415. }
  416. return ErrorCode::SUCCESS;
  417. }
  418. void AudioCapturer::captureThreadFunc() {
  419. AV_LOGGER_INFO("音频采集线程已启动");
  420. while (!shouldStop_) {
  421. // 检查暂停状态
  422. {
  423. std::unique_lock<std::mutex> lock(pauseMutex_);
  424. pauseCondition_.wait(lock, [this] { return !paused_ || shouldStop_; });
  425. }
  426. if (shouldStop_) {
  427. break;
  428. }
  429. ErrorCode result = captureFrame();
  430. if (result != ErrorCode::SUCCESS) {
  431. onError(result, "采集音频帧失败");
  432. // 短暂休眠后重试
  433. std::this_thread::sleep_for(std::chrono::milliseconds(5));
  434. }
  435. }
  436. AV_LOGGER_INFO("音频采集线程已退出");
  437. }
  438. ErrorCode AudioCapturer::captureFrame() {
  439. AVPacket* packet = av_packet_alloc();
  440. if (!packet) {
  441. return ErrorCode::MEMORY_ALLOC_FAILED;
  442. }
  443. // 读取包
  444. int ret = av_read_frame(formatCtx_, packet);
  445. if (ret < 0) {
  446. av_packet_free(&packet);
  447. if (ret == AVERROR_EOF) {
  448. AV_LOGGER_WARNING("音频流结束");
  449. return ErrorCode::END_OF_STREAM;
  450. } else {
  451. AV_LOGGER_ERRORF("读取音频帧失败: {}", ffmpeg_utils::errorToString(ret));
  452. return static_cast<ErrorCode>(ret);
  453. }
  454. }
  455. // 检查是否是音频包
  456. if (packet->stream_index != audioStreamIndex_) {
  457. av_packet_free(&packet);
  458. return ErrorCode::SUCCESS;
  459. }
  460. // 发送包到解码器
  461. ret = avcodec_send_packet(codecCtx_, packet);
  462. av_packet_free(&packet);
  463. if (ret < 0) {
  464. AV_LOGGER_ERRORF("发送音频包到解码器失败: {}", ffmpeg_utils::errorToString(ret));
  465. return static_cast<ErrorCode>(ret);
  466. }
  467. // 接收解码后的帧
  468. AVFramePtr frame = makeAVFrame();
  469. if (!frame) {
  470. return ErrorCode::MEMORY_ALLOC_FAILED;
  471. }
  472. ret = avcodec_receive_frame(codecCtx_, frame.get());
  473. if (ret == AVERROR(EAGAIN)) {
  474. return ErrorCode::SUCCESS; // 需要更多输入
  475. } else if (ret < 0) {
  476. AV_LOGGER_ERRORF("接收音频解码帧失败: {}", ffmpeg_utils::errorToString(ret));
  477. return static_cast<ErrorCode>(ret);
  478. }
  479. // 音频处理
  480. AVFramePtr processedFrame = processAudioFrame(frame);
  481. if (!processedFrame) {
  482. return ErrorCode::PROCESSING_ERROR;
  483. }
  484. // 计算音频电平
  485. calculateAudioLevel(processedFrame);
  486. // 回调
  487. onFrameCaptured(processedFrame);
  488. return ErrorCode::SUCCESS;
  489. }
  490. AVFramePtr AudioCapturer::processAudioFrame(const AVFramePtr& frame) {
  491. if (!frame) {
  492. return nullptr;
  493. }
  494. AVFramePtr processedFrame = std::move(const_cast<AVFramePtr&>(frame));
  495. // 重采样
  496. if (needResampling_) {
  497. processedFrame = resampleAudioFrame(processedFrame);
  498. if (!processedFrame) {
  499. return nullptr;
  500. }
  501. }
  502. // 音量控制
  503. if (currentVolume_ != 1.0f) {
  504. processedFrame = applyVolumeControl(processedFrame);
  505. }
  506. // 降噪处理
  507. if (noiseReductionEnabled_) {
  508. processedFrame = applyNoiseReduction(processedFrame);
  509. }
  510. return processedFrame;
  511. }
  512. AVFramePtr AudioCapturer::resampleAudioFrame(const AVFramePtr& frame) {
  513. if (!frame || !swrCtx_ || !resampledFrame_) {
  514. return nullptr;
  515. }
  516. // 计算输出采样数
  517. int outSamples = swr_get_out_samples(swrCtx_, frame->nb_samples);
  518. resampledFrame_->nb_samples = outSamples;
  519. // 重新分配缓冲区(如果需要)
  520. if (av_frame_get_buffer(resampledFrame_.get(), 0) < 0) {
  521. AV_LOGGER_ERROR("分配重采样缓冲区失败");
  522. return nullptr;
  523. }
  524. // 执行重采样
  525. int convertedSamples = swr_convert(swrCtx_,
  526. resampledFrame_->data, outSamples,
  527. (const uint8_t**)frame->data, frame->nb_samples);
  528. if (convertedSamples < 0) {
  529. AV_LOGGER_ERRORF("音频重采样失败: {}", ffmpeg_utils::errorToString(convertedSamples));
  530. return nullptr;
  531. }
  532. resampledFrame_->nb_samples = convertedSamples;
  533. // 复制时间戳等信息
  534. av_frame_copy_props(resampledFrame_.get(), frame.get());
  535. // 创建新的frame并复制数据
  536. AVFramePtr outputFrame = makeAVFrame();
  537. if (!outputFrame) {
  538. return nullptr;
  539. }
  540. av_frame_ref(outputFrame.get(), resampledFrame_.get());
  541. return outputFrame;
  542. }
  543. AVFramePtr AudioCapturer::applyVolumeControl(const AVFramePtr& frame) {
  544. if (!frame || currentVolume_ == 1.0f) {
  545. return nullptr;
  546. }
  547. // 简单的音量控制实现
  548. AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
  549. int channels = frame->ch_layout.nb_channels;
  550. int samples = frame->nb_samples;
  551. if (format == AV_SAMPLE_FMT_S16) {
  552. int16_t* data = reinterpret_cast<int16_t*>(frame->data[0]);
  553. for (int i = 0; i < samples * channels; ++i) {
  554. data[i] = static_cast<int16_t>(data[i] * currentVolume_);
  555. }
  556. } else if (format == AV_SAMPLE_FMT_FLT) {
  557. float* data = reinterpret_cast<float*>(frame->data[0]);
  558. for (int i = 0; i < samples * channels; ++i) {
  559. data[i] *= currentVolume_;
  560. }
  561. }
  562. return nullptr;
  563. }
  564. AVFramePtr AudioCapturer::applyNoiseReduction(const AVFramePtr& frame) {
  565. // 简单的降噪实现(实际应用中需要更复杂的算法)
  566. if (!frame) {
  567. return nullptr;
  568. }
  569. // 这里可以实现噪声门限、频谱减法等降噪算法
  570. // 目前只是一个占位符实现
  571. return nullptr;
  572. }
  573. void AudioCapturer::calculateAudioLevel(const AVFramePtr& frame) {
  574. if (!frame) {
  575. return;
  576. }
  577. auto now = std::chrono::steady_clock::now();
  578. auto elapsed = std::chrono::duration<double>(now - lastLevelUpdate_).count();
  579. if (elapsed < LEVEL_UPDATE_INTERVAL) {
  580. return;
  581. }
  582. std::lock_guard<std::mutex> lock(levelMutex_);
  583. AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
  584. int channels = frame->ch_layout.nb_channels;
  585. int samples = frame->nb_samples;
  586. double sum = 0.0;
  587. int totalSamples = samples * channels;
  588. if (format == AV_SAMPLE_FMT_S16) {
  589. const int16_t* data = reinterpret_cast<const int16_t*>(frame->data[0]);
  590. for (int i = 0; i < totalSamples; ++i) {
  591. sum += std::abs(data[i]) / 32768.0;
  592. }
  593. } else if (format == AV_SAMPLE_FMT_FLT) {
  594. const float* data = reinterpret_cast<const float*>(frame->data[0]);
  595. for (int i = 0; i < totalSamples; ++i) {
  596. sum += std::abs(data[i]);
  597. }
  598. }
  599. float level = static_cast<float>(sum / totalSamples);
  600. audioLevel_.store(std::min<float>(level, 1.0f));
  601. lastLevelUpdate_ = now;
  602. }
  603. void AudioCapturer::cleanupResampler() {
  604. if (swrCtx_) {
  605. swr_free(&swrCtx_);
  606. swrCtx_ = nullptr;
  607. }
  608. resampledFrame_.reset();
  609. needResampling_ = false;
  610. }
  611. void AudioCapturer::cleanupAudioProcessing() {
  612. // 清理音频处理相关资源
  613. noiseReductionEnabled_ = false;
  614. echoCancellationEnabled_ = false;
  615. currentVolume_ = 1.0f;
  616. audioLevel_ = 0.0f;
  617. }
  618. std::vector<AudioDeviceInfo> AudioCapturer::enumerateMicrophones() const {
  619. #ifdef _WIN32
  620. return enumerateDirectSoundDevices();
  621. #elif defined(__linux__)
  622. return enumerateALSADevices();
  623. #elif defined(__APPLE__)
  624. return enumerateCoreAudioDevices();
  625. #else
  626. return {};
  627. #endif
  628. }
  629. std::vector<AudioDeviceInfo> AudioCapturer::enumerateSystemAudioDevices() const {
  630. #ifdef _WIN32
  631. return enumerateWASAPIDevices();
  632. #elif defined(__linux__)
  633. return enumeratePulseAudioDevices();
  634. #elif defined(__APPLE__)
  635. return enumerateCoreAudioDevices();
  636. #else
  637. return {};
  638. #endif
  639. }
  640. const AVInputFormat* AudioCapturer::getPlatformInputFormat() const {
  641. #ifdef _WIN32
  642. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  643. return av_find_input_format("dshow");
  644. } else {
  645. return av_find_input_format("dshow"); // WASAPI通过dshow访问
  646. }
  647. #elif defined(__linux__)
  648. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  649. return av_find_input_format("alsa");
  650. } else {
  651. return av_find_input_format("pulse");
  652. }
  653. #elif defined(__APPLE__)
  654. return av_find_input_format("avfoundation");
  655. #endif
  656. return nullptr;
  657. }
  658. std::string AudioCapturer::getPlatformDeviceName() const {
  659. #ifdef _WIN32
  660. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  661. if (!audioParams_.deviceName.empty()) {
  662. return "audio=" + audioParams_.deviceName;
  663. } else {
  664. // 尝试获取可用设备列表中的第一个设备
  665. auto devices = enumerateDirectSoundDevices();
  666. if (!devices.empty() && audioParams_.micIndex < devices.size()) {
  667. // 使用设备的实际名称
  668. return "audio=" + devices[audioParams_.micIndex].name;
  669. } else {
  670. // 回退到设备索引
  671. return "audio=" + std::to_string(audioParams_.micIndex);
  672. }
  673. }
  674. } else {
  675. return "audio=" + (audioParams_.audioDevice.empty() ? "@device_cm_{33D9A762-90C8-11D0-BD43-00A0C911CE86}\\wave_{00000000-0000-0000-0000-000000000000}" : audioParams_.audioDevice);
  676. }
  677. #elif defined(__linux__)
  678. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  679. if (!audioParams_.deviceName.empty()) {
  680. return audioParams_.deviceName;
  681. } else {
  682. return "hw:" + std::to_string(audioParams_.micIndex);
  683. }
  684. } else {
  685. return audioParams_.audioDevice.empty() ? "default" : audioParams_.audioDevice;
  686. }
  687. #elif defined(__APPLE__)
  688. if (audioParams_.type == CapturerType::AUDIO_MIC) {
  689. return ":" + std::to_string(audioParams_.micIndex);
  690. } else {
  691. return ":none";
  692. }
  693. #endif
  694. return "";
  695. }
  696. #ifdef _WIN32
  697. std::vector<AudioDeviceInfo> AudioCapturer::enumerateDirectSoundDevices() const {
  698. std::vector<AudioDeviceInfo> devices;
  699. // 基于FFmpeg输出,我们知道有一个麦克风设备可用
  700. // 添加已知的麦克风设备
  701. AudioDeviceInfo micDevice;
  702. micDevice.id = "0";
  703. micDevice.name = "Microphone (High Definition Audio Device)";
  704. micDevice.description = "High Definition Audio Device Microphone";
  705. micDevice.isDefault = true;
  706. micDevice.isInput = true;
  707. micDevice.supportedSampleRates = {8000, 16000, 22050, 44100, 48000};
  708. micDevice.supportedChannels = {1, 2};
  709. micDevice.supportedFormats = {AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT};
  710. devices.push_back(micDevice);
  711. // 添加默认设备作为后备
  712. AudioDeviceInfo defaultDevice;
  713. defaultDevice.id = "default";
  714. defaultDevice.name = "默认音频设备";
  715. defaultDevice.description = "系统默认音频输入设备";
  716. defaultDevice.isDefault = false;
  717. defaultDevice.isInput = true;
  718. defaultDevice.supportedSampleRates = {8000, 16000, 22050, 44100, 48000};
  719. defaultDevice.supportedChannels = {1, 2};
  720. defaultDevice.supportedFormats = {AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT};
  721. devices.push_back(defaultDevice);
  722. return devices;
  723. }
  724. std::vector<AudioDeviceInfo> AudioCapturer::enumerateWASAPIDevices() const {
  725. std::vector<AudioDeviceInfo> devices;
  726. // 简化的WASAPI设备枚举
  727. AudioDeviceInfo device;
  728. device.id = "wasapi_default";
  729. device.name = "默认系统音频";
  730. device.description = "WASAPI系统音频设备";
  731. device.isDefault = true;
  732. device.isInput = false;
  733. // 添加常见采样率
  734. device.supportedSampleRates = {44100, 48000, 96000};
  735. // 添加常见声道数
  736. device.supportedChannels = {2, 6, 8};
  737. // 添加支持的采样格式
  738. device.supportedFormats = {
  739. AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT
  740. };
  741. devices.push_back(device);
  742. return devices;
  743. }
  744. ErrorCode AudioCapturer::setupDirectSoundMicrophone() {
  745. AV_LOGGER_INFO("设置DirectSound麦克风");
  746. return openInputDevice();
  747. }
  748. ErrorCode AudioCapturer::setupWASAPISystemAudio() {
  749. AV_LOGGER_INFO("设置WASAPI系统音频");
  750. return openInputDevice();
  751. }
  752. #endif
  753. // AudioCaptureFactory 实现
  754. std::unique_ptr<AudioCapturer> AudioCapturer::AudioCaptureFactory::createMicrophone(int micIndex) {
  755. auto capturer = std::make_unique<AudioCapturer>();
  756. AudioCaptureParams params(CapturerType::AUDIO_MIC);
  757. params.micIndex = micIndex;
  758. ErrorCode result = capturer->initialize(params);
  759. if (result != ErrorCode::SUCCESS) {
  760. AV_LOGGER_ERRORF("创建麦克风采集器失败: {}", static_cast<int>(result));
  761. return nullptr;
  762. }
  763. return capturer;
  764. }
  765. std::unique_ptr<AudioCapturer> AudioCapturer::AudioCaptureFactory::createSystemAudio(bool loopback) {
  766. auto capturer = std::make_unique<AudioCapturer>();
  767. AudioCaptureParams params(loopback ? CapturerType::AUDIO_LOOPBACK : CapturerType::AUDIO_SYSTEM);
  768. params.captureLoopback = loopback;
  769. ErrorCode result = capturer->initialize(params);
  770. if (result != ErrorCode::SUCCESS) {
  771. AV_LOGGER_ERRORF("创建系统音频采集器失败: {}", static_cast<int>(result));
  772. return nullptr;
  773. }
  774. return capturer;
  775. }
  776. std::unique_ptr<AudioCapturer> AudioCapturer::AudioCaptureFactory::createBestMicrophone() {
  777. return createMicrophone(0); // 默认使用第一个麦克风
  778. }
  779. } // namespace capture
  780. } // namespace av