codec_audio_decoder.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. #include "codec_audio_decoder.h"
  2. #include "../base/logger.h"
  3. #include "../base/media_common.h"
  4. #include <algorithm>
  5. #include <sstream>
  6. #include <thread>
  7. extern "C" {
  8. #include <libavcodec/avcodec.h>
  9. #include <libavutil/channel_layout.h>
  10. #include <libavutil/opt.h>
  11. #include <libswresample/swresample.h>
  12. }
  13. namespace av {
  14. namespace codec {
  15. // 静态成员初始化
  16. std::vector<std::string> AudioDecoder::supportedDecoders_;
  17. std::once_flag AudioDecoder::decodersInitFlag_;
  18. AudioDecoder::AudioDecoder() : AbstractDecoder(MediaType::AUDIO) {
  19. AV_LOGGER_DEBUG("创建音频解码器");
  20. }
  21. AudioDecoder::~AudioDecoder() {
  22. close();
  23. AV_LOGGER_DEBUG("音频解码器已销毁");
  24. }
  25. ErrorCode AudioDecoder::initialize(const CodecParams& params) {
  26. if (params.type != MediaType::AUDIO) {
  27. AV_LOGGER_ERROR("参数类型不是音频");
  28. return ErrorCode::INVALID_ARGUMENT;
  29. }
  30. audioParams_ = static_cast<const AudioDecoderParams&>(params);
  31. if (!validateParams(params)) {
  32. return ErrorCode::INVALID_PARAMS;
  33. }
  34. setState(CodecState::IDLE);
  35. AV_LOGGER_INFOF("音频解码器初始化成功: {}", audioParams_.codecName);
  36. return ErrorCode::OK;
  37. }
  38. ErrorCode AudioDecoder::open(const CodecParams& params) {
  39. std::lock_guard<std::mutex> lock(decodeMutex_);
  40. // 如果提供了参数,先初始化
  41. if (params.type != MediaType::UNKNOWN) {
  42. ErrorCode initResult = initialize(params);
  43. if (initResult != ErrorCode::OK) {
  44. return initResult;
  45. }
  46. }
  47. if (state_ != CodecState::IDLE) {
  48. AV_LOGGER_ERROR("解码器状态无效,无法打开");
  49. return ErrorCode::INVALID_STATE;
  50. }
  51. ErrorCode result = initDecoder();
  52. if (result != ErrorCode::OK) {
  53. return result;
  54. }
  55. setState(CodecState::OPENED);
  56. AV_LOGGER_INFOF("音频解码器已打开: {} ({}Hz, {}ch, {})",
  57. audioParams_.codecName,
  58. codecCtx_->sample_rate,
  59. codecCtx_->ch_layout.nb_channels,
  60. av_get_sample_fmt_name(codecCtx_->sample_fmt));
  61. return ErrorCode::OK;
  62. }
  63. void AudioDecoder::close() {
  64. std::lock_guard<std::mutex> lock(decodeMutex_);
  65. if (state_ == CodecState::IDLE) {
  66. return;
  67. }
  68. // 清理重采样器
  69. cleanupResampler();
  70. codecCtx_.reset();
  71. codec_ = nullptr;
  72. needResampling_ = false;
  73. setState(CodecState::IDLE);
  74. AV_LOGGER_DEBUG("音频解码器已关闭");
  75. }
  76. ErrorCode AudioDecoder::flush() {
  77. std::lock_guard<std::mutex> lock(decodeMutex_);
  78. if (state_ != CodecState::OPENED && state_ != CodecState::RUNNING) {
  79. return ErrorCode::INVALID_STATE;
  80. }
  81. if (codecCtx_) {
  82. avcodec_flush_buffers(codecCtx_.get());
  83. }
  84. // 重置重采样器
  85. if (swrCtx_) {
  86. swr_close(swrCtx_);
  87. needResampling_ = false;
  88. }
  89. setState(CodecState::OPENED);
  90. AV_LOGGER_DEBUG("音频解码器已重置");
  91. return ErrorCode::OK;
  92. }
  93. ErrorCode AudioDecoder::reset() {
  94. return flush();
  95. }
  96. ErrorCode AudioDecoder::decode(const AVPacketPtr& packet, std::vector<AVFramePtr>& frames) {
  97. std::lock_guard<std::mutex> lock(decodeMutex_);
  98. if (state_ != CodecState::OPENED && state_ != CodecState::RUNNING) {
  99. return ErrorCode::INVALID_STATE;
  100. }
  101. setState(CodecState::RUNNING);
  102. auto startTime = std::chrono::high_resolution_clock::now();
  103. ErrorCode result = decodeFrame(packet, frames);
  104. auto endTime = std::chrono::high_resolution_clock::now();
  105. double processTime = std::chrono::duration<double, std::milli>(endTime - startTime).count();
  106. int totalSamples = 0;
  107. for (const auto& frame : frames) {
  108. if (frame) {
  109. totalSamples += frame->nb_samples;
  110. }
  111. }
  112. updateStats(result == ErrorCode::OK, processTime,
  113. packet ? packet->size : 0, totalSamples);
  114. if (frameCallback_) {
  115. for (const auto& frame : frames) {
  116. frameCallback_(frame);
  117. }
  118. }
  119. return result;
  120. }
  121. ErrorCode AudioDecoder::finishDecode(std::vector<AVFramePtr>& frames) {
  122. return decode(nullptr, frames); // 发送空包来刷新解码器
  123. }
  124. bool AudioDecoder::validateParams(const CodecParams& params) {
  125. if (params.type != MediaType::AUDIO) {
  126. AV_LOGGER_ERROR("参数媒体类型不是音频");
  127. return false;
  128. }
  129. const auto& audioParams = static_cast<const AudioDecoderParams&>(params);
  130. if (audioParams.codecName.empty()) {
  131. AV_LOGGER_ERROR("解码器名称为空");
  132. return false;
  133. }
  134. if (audioParams.sampleRate <= 0) {
  135. AV_LOGGER_ERROR("采样率无效");
  136. return false;
  137. }
  138. if (audioParams.channels <= 0) {
  139. AV_LOGGER_ERROR("声道数无效");
  140. return false;
  141. }
  142. return true;
  143. }
  144. ErrorCode AudioDecoder::initDecoder() {
  145. // 查找解码器
  146. codec_ = avcodec_find_decoder_by_name(audioParams_.codecName.c_str());
  147. if (!codec_) {
  148. AV_LOGGER_ERRORF("未找到解码器: {}", audioParams_.codecName);
  149. return ErrorCode::CODEC_NOT_FOUND;
  150. }
  151. if (codec_->type != AVMEDIA_TYPE_AUDIO) {
  152. AV_LOGGER_ERROR("解码器类型不是音频");
  153. return ErrorCode::INVALID_ARGUMENT;
  154. }
  155. // 创建解码上下文
  156. codecCtx_ = makeAVCodecContext(codec_);
  157. if (!codecCtx_) {
  158. AV_LOGGER_ERROR("分配解码上下文失败");
  159. return ErrorCode::OUT_OF_MEMORY;
  160. }
  161. // 设置解码器参数
  162. ErrorCode result = setupDecoderParams();
  163. if (result != ErrorCode::OK) {
  164. return result;
  165. }
  166. // 打开解码器前的详细日志
  167. AV_LOGGER_INFOF("准备打开音频解码器: {}", audioParams_.codecName);
  168. AV_LOGGER_INFOF("解码器参数: 采样率: {}, 声道: {}, 格式: {}, 线程数: {}",
  169. codecCtx_->sample_rate,
  170. codecCtx_->ch_layout.nb_channels,
  171. av_get_sample_fmt_name(codecCtx_->sample_fmt),
  172. codecCtx_->thread_count);
  173. // 打开解码器
  174. int ret = avcodec_open2(codecCtx_.get(), codec_, nullptr);
  175. if (ret < 0) {
  176. AV_LOGGER_ERRORF("打开音频解码器失败: {} (错误码: {})",
  177. ffmpeg_utils::errorToString(ret), ret);
  178. // 详细错误分析
  179. if (ret == AVERROR(EINVAL)) {
  180. AV_LOGGER_ERROR("解码器参数无效 - 可能的原因:");
  181. AV_LOGGER_ERROR(" 1. 不支持的采样格式或参数组合");
  182. AV_LOGGER_ERROR(" 2. 采样率或声道数不支持");
  183. } else if (ret == AVERROR(ENOMEM)) {
  184. AV_LOGGER_ERROR("内存不足 - 无法分配解码器资源");
  185. }
  186. return static_cast<ErrorCode>(ret);
  187. }
  188. AV_LOGGER_INFOF("音频解码器打开成功: {}", audioParams_.codecName);
  189. return ErrorCode::OK;
  190. }
  191. ErrorCode AudioDecoder::setupDecoderParams() {
  192. // 设置线程数
  193. if (audioParams_.threadCount > 0) {
  194. codecCtx_->thread_count = audioParams_.threadCount;
  195. } else {
  196. codecCtx_->thread_count = std::thread::hardware_concurrency();
  197. }
  198. // 设置采样率(如果指定)
  199. if (audioParams_.sampleRate > 0) {
  200. codecCtx_->sample_rate = audioParams_.sampleRate;
  201. }
  202. // 设置声道数(如果指定)
  203. if (audioParams_.channels > 0) {
  204. av_channel_layout_default(&codecCtx_->ch_layout, audioParams_.channels);
  205. }
  206. // 设置采样格式(如果指定)
  207. if (audioParams_.sampleFormat != AV_SAMPLE_FMT_NONE) {
  208. codecCtx_->sample_fmt = audioParams_.sampleFormat;
  209. }
  210. // 低延迟设置
  211. if (audioParams_.lowLatency) {
  212. codecCtx_->flags |= AV_CODEC_FLAG_LOW_DELAY;
  213. codecCtx_->flags2 |= AV_CODEC_FLAG2_FAST;
  214. }
  215. // 针对特定解码器的优化设置
  216. if (audioParams_.codecName == "aac") {
  217. // AAC 特定设置
  218. av_opt_set(codecCtx_->priv_data, "dual_mono_mode", "stereo", 0);
  219. } else if (audioParams_.codecName == "mp3" || audioParams_.codecName == "mp3float") {
  220. // MP3 特定设置
  221. if (audioParams_.lowLatency) {
  222. av_opt_set_int(codecCtx_->priv_data, "skip_frame", 0, 0);
  223. }
  224. } else if (audioParams_.codecName == "opus") {
  225. // Opus 特定设置
  226. if (audioParams_.lowLatency) {
  227. av_opt_set_int(codecCtx_->priv_data, "apply_phase_inv", 0, 0);
  228. }
  229. }
  230. return ErrorCode::OK;
  231. }
  232. ErrorCode AudioDecoder::decodeFrame(const AVPacketPtr& packet, std::vector<AVFramePtr>& frames) {
  233. // 发送包到解码器
  234. int ret = avcodec_send_packet(codecCtx_.get(), packet ? packet.get() : nullptr);
  235. if (ret < 0 && ret != AVERROR_EOF) {
  236. AV_LOGGER_ERRORF("发送包到音频解码器失败: {}", ffmpeg_utils::errorToString(ret));
  237. return static_cast<ErrorCode>(ret);
  238. }
  239. // 接收解码后的帧
  240. return receiveFrames(frames);
  241. }
  242. ErrorCode AudioDecoder::receiveFrames(std::vector<AVFramePtr>& frames) {
  243. while (true) {
  244. AVFramePtr frame = makeAVFrame();
  245. if (!frame) {
  246. return ErrorCode::OUT_OF_MEMORY;
  247. }
  248. int ret = avcodec_receive_frame(codecCtx_.get(), frame.get());
  249. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  250. break; // 需要更多输入或已结束
  251. }
  252. if (ret < 0) {
  253. AV_LOGGER_ERRORF("接收音频解码帧失败: {}", ffmpeg_utils::errorToString(ret));
  254. return static_cast<ErrorCode>(ret);
  255. }
  256. // 格式转换(如果需要)
  257. auto convertedFrame = convertFrame(std::move(frame));
  258. if (convertedFrame) {
  259. frames.push_back(std::move(convertedFrame));
  260. }
  261. }
  262. return ErrorCode::OK;
  263. }
  264. AVFramePtr AudioDecoder::convertFrame(AVFramePtr frame) {
  265. if (!frame) {
  266. return nullptr;
  267. }
  268. // 检查是否需要重采样
  269. bool needConvert = (frame->format != audioParams_.sampleFormat) ||
  270. (frame->sample_rate != audioParams_.sampleRate) ||
  271. (frame->ch_layout.nb_channels != audioParams_.channels);
  272. if (!needConvert) {
  273. return std::move(frame); // 格式已经匹配,直接返回
  274. }
  275. // 设置重采样器
  276. ErrorCode result = setupResampler(frame.get());
  277. if (result != ErrorCode::OK) {
  278. AV_LOGGER_ERROR("设置重采样器失败");
  279. return nullptr;
  280. }
  281. // 创建输出帧
  282. AVFramePtr outFrame = makeAVFrame();
  283. if (!outFrame) {
  284. return nullptr;
  285. }
  286. outFrame->format = audioParams_.sampleFormat;
  287. outFrame->sample_rate = audioParams_.sampleRate;
  288. av_channel_layout_default(&outFrame->ch_layout, audioParams_.channels);
  289. // 计算输出采样数
  290. int outSamples = swr_get_out_samples(swrCtx_, frame->nb_samples);
  291. outFrame->nb_samples = outSamples;
  292. // 分配输出缓冲区
  293. if (av_frame_get_buffer(outFrame.get(), 0) < 0) {
  294. AV_LOGGER_ERROR("分配音频输出缓冲区失败");
  295. return nullptr;
  296. }
  297. // 执行重采样
  298. int convertedSamples = swr_convert(swrCtx_,
  299. outFrame->data, outSamples,
  300. (const uint8_t**)frame->data, frame->nb_samples);
  301. if (convertedSamples < 0) {
  302. AV_LOGGER_ERRORF("音频重采样失败: {}", ffmpeg_utils::errorToString(convertedSamples));
  303. return nullptr;
  304. }
  305. outFrame->nb_samples = convertedSamples;
  306. // 复制时间戳等信息
  307. av_frame_copy_props(outFrame.get(), frame.get());
  308. return outFrame;
  309. }
  310. ErrorCode AudioDecoder::setupResampler(const AVFrame* inputFrame) {
  311. if (!inputFrame) {
  312. return ErrorCode::INVALID_ARGUMENT;
  313. }
  314. // 检查是否需要重新配置重采样器
  315. bool needReconfigure = !swrCtx_ ||
  316. !needResampling_ ||
  317. (inputFrame->format != codecCtx_->sample_fmt) ||
  318. (inputFrame->sample_rate != codecCtx_->sample_rate) ||
  319. (inputFrame->ch_layout.nb_channels != codecCtx_->ch_layout.nb_channels);
  320. if (!needReconfigure) {
  321. return ErrorCode::OK;
  322. }
  323. // 清理旧的重采样器
  324. cleanupResampler();
  325. // 创建新的重采样器
  326. swrCtx_ = swr_alloc();
  327. if (!swrCtx_) {
  328. AV_LOGGER_ERROR("分配重采样器失败");
  329. return ErrorCode::OUT_OF_MEMORY;
  330. }
  331. // 设置输入参数
  332. av_opt_set_chlayout(swrCtx_, "in_chlayout", &inputFrame->ch_layout, 0);
  333. av_opt_set_int(swrCtx_, "in_sample_rate", inputFrame->sample_rate, 0);
  334. av_opt_set_sample_fmt(swrCtx_, "in_sample_fmt", static_cast<AVSampleFormat>(inputFrame->format), 0);
  335. // 设置输出参数
  336. AVChannelLayout out_ch_layout;
  337. av_channel_layout_default(&out_ch_layout, audioParams_.channels);
  338. av_opt_set_chlayout(swrCtx_, "out_chlayout", &out_ch_layout, 0);
  339. av_opt_set_int(swrCtx_, "out_sample_rate", audioParams_.sampleRate, 0);
  340. av_opt_set_sample_fmt(swrCtx_, "out_sample_fmt", audioParams_.sampleFormat, 0);
  341. // 初始化重采样器
  342. int ret = swr_init(swrCtx_);
  343. if (ret < 0) {
  344. AV_LOGGER_ERRORF("初始化重采样器失败: {}", ffmpeg_utils::errorToString(ret));
  345. cleanupResampler();
  346. return static_cast<ErrorCode>(ret);
  347. }
  348. needResampling_ = true;
  349. AV_LOGGER_INFOF("重采样器配置成功: {}Hz,{}ch,{} -> {}Hz,{}ch,{}",
  350. inputFrame->sample_rate, inputFrame->ch_layout.nb_channels,
  351. av_get_sample_fmt_name(static_cast<AVSampleFormat>(inputFrame->format)),
  352. audioParams_.sampleRate, audioParams_.channels,
  353. av_get_sample_fmt_name(audioParams_.sampleFormat));
  354. return ErrorCode::OK;
  355. }
  356. void AudioDecoder::cleanupResampler() {
  357. if (swrCtx_) {
  358. swr_free(&swrCtx_);
  359. swrCtx_ = nullptr;
  360. }
  361. needResampling_ = false;
  362. }
  363. void AudioDecoder::updateStats(bool success, double decodeTime, size_t dataSize, int samples) {
  364. std::lock_guard<std::mutex> lock(statsMutex_);
  365. if (success) {
  366. stats_.decodedFrames++;
  367. stats_.totalSamples += samples;
  368. stats_.totalBytes += dataSize;
  369. stats_.totalDecodeTime += decodeTime;
  370. // 如果启用了重采样,更新重采样帧数
  371. if (isResamplingEnabled()) {
  372. stats_.resampledFrames++;
  373. }
  374. // 更新平均解码时间
  375. if (stats_.decodedFrames == 1) {
  376. stats_.avgDecodeTime = decodeTime;
  377. } else {
  378. stats_.avgDecodeTime = stats_.totalDecodeTime / stats_.decodedFrames;
  379. }
  380. } else {
  381. stats_.errorCount++;
  382. }
  383. }
  384. AudioDecoder::DecoderStats AudioDecoder::getStats() const {
  385. std::lock_guard<std::mutex> lock(statsMutex_);
  386. return stats_;
  387. }
  388. void AudioDecoder::resetStats() {
  389. std::lock_guard<std::mutex> lock(statsMutex_);
  390. stats_ = DecoderStats{};
  391. }
  392. std::string AudioDecoder::getDecoderName() const {
  393. return audioParams_.codecName;
  394. }
  395. bool AudioDecoder::isResamplingEnabled() const {
  396. return audioParams_.enableResampling && needResampling_;
  397. }
  398. std::vector<std::string> AudioDecoder::getSupportedDecoders() {
  399. std::call_once(decodersInitFlag_, findUsableDecoders);
  400. return supportedDecoders_;
  401. }
  402. bool AudioDecoder::isDecoderSupported(const std::string& codecName) {
  403. auto decoders = getSupportedDecoders();
  404. return std::find(decoders.begin(), decoders.end(), codecName) != decoders.end();
  405. }
  406. std::string AudioDecoder::getRecommendedDecoder(const std::string& codecName) {
  407. auto decoders = getSupportedDecoders();
  408. if (!codecName.empty()) {
  409. // 查找指定编解码格式的解码器
  410. if (std::find(decoders.begin(), decoders.end(), codecName) != decoders.end()) {
  411. return codecName;
  412. }
  413. // 查找相关的解码器
  414. for (const auto& decoder : decoders) {
  415. if (decoder.find(codecName) != std::string::npos) {
  416. return decoder;
  417. }
  418. }
  419. }
  420. // 返回默认推荐的解码器
  421. const char* preferredDecoders[] = {"aac", "mp3", "opus", "vorbis", "flac"};
  422. for (const char* preferred : preferredDecoders) {
  423. if (std::find(decoders.begin(), decoders.end(), preferred) != decoders.end()) {
  424. return preferred;
  425. }
  426. }
  427. return decoders.empty() ? "" : decoders[0];
  428. }
  429. void AudioDecoder::findUsableDecoders() {
  430. AV_LOGGER_INFO("查找可用的音频解码器...");
  431. // 测试所有音频解码器
  432. for (const char* decoder : AUDIO_DECODERS) {
  433. if (decoder && CodecFactory::isCodecSupported(decoder, CodecType::DECODER, MediaType::AUDIO)) {
  434. supportedDecoders_.emplace_back(decoder);
  435. AV_LOGGER_INFOF("找到音频解码器: {}", decoder);
  436. }
  437. }
  438. AV_LOGGER_INFOF("总共找到 {} 个可用的音频解码器", supportedDecoders_.size());
  439. }
  440. // AudioDecoderFactory 实现
  441. std::unique_ptr<AudioDecoder> AudioDecoder::AudioDecoderFactory::create(const std::string& codecName) {
  442. auto decoder = std::make_unique<AudioDecoder>();
  443. if (!codecName.empty()) {
  444. if (!CodecFactory::isCodecSupported(codecName, CodecType::DECODER, MediaType::AUDIO)) {
  445. AV_LOGGER_ERRORF("不支持的音频解码器: {}", codecName);
  446. return nullptr;
  447. }
  448. }
  449. return decoder;
  450. }
  451. std::unique_ptr<AudioDecoder> AudioDecoder::AudioDecoderFactory::createBest() {
  452. std::string codecName = AudioDecoder::getRecommendedDecoder();
  453. if (codecName.empty()) {
  454. AV_LOGGER_ERROR("未找到可用的音频解码器");
  455. return nullptr;
  456. }
  457. return create(codecName);
  458. }
  459. } // namespace codec
  460. } // namespace av