codec_audio_encoder.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. #include "codec_audio_encoder.h"
  2. #include "../base/logger.h"
  3. #include <algorithm>
  4. #include <chrono>
  5. namespace av {
  6. namespace codec {
  7. // 静态成员初始化
  8. std::vector<std::string> AudioEncoder::supportedEncoders_;
  9. std::once_flag AudioEncoder::encodersInitFlag_;
  10. // AudioResampler 实现
  11. AudioResampler::AudioResampler()
  12. : swrCtx_(nullptr)
  13. , srcFormat_(AV_SAMPLE_FMT_NONE)
  14. , dstFormat_(AV_SAMPLE_FMT_NONE)
  15. , srcSampleRate_(0)
  16. , dstSampleRate_(0)
  17. , dstFrameSize_(0)
  18. , initialized_(false) {
  19. av_channel_layout_default(&srcLayout_, 0);
  20. av_channel_layout_default(&dstLayout_, 0);
  21. }
  22. AudioResampler::~AudioResampler() {
  23. if (swrCtx_) {
  24. swr_free(&swrCtx_);
  25. }
  26. av_channel_layout_uninit(&srcLayout_);
  27. av_channel_layout_uninit(&dstLayout_);
  28. }
  29. bool AudioResampler::init(const AVChannelLayout& srcLayout, AVSampleFormat srcFormat, int srcSampleRate,
  30. const AVChannelLayout& dstLayout, AVSampleFormat dstFormat, int dstSampleRate) {
  31. if (swrCtx_) {
  32. swr_free(&swrCtx_);
  33. }
  34. av_channel_layout_uninit(&srcLayout_);
  35. av_channel_layout_uninit(&dstLayout_);
  36. // 复制声道布局
  37. if (av_channel_layout_copy(&srcLayout_, &srcLayout) < 0 ||
  38. av_channel_layout_copy(&dstLayout_, &dstLayout) < 0) {
  39. AV_LOGGER_ERROR("复制声道布局失败");
  40. return false;
  41. }
  42. srcFormat_ = srcFormat;
  43. dstFormat_ = dstFormat;
  44. srcSampleRate_ = srcSampleRate;
  45. dstSampleRate_ = dstSampleRate;
  46. // 创建重采样上下文
  47. int ret = swr_alloc_set_opts2(&swrCtx_,
  48. &dstLayout_, dstFormat_, dstSampleRate_,
  49. &srcLayout_, srcFormat_, srcSampleRate_,
  50. 0, nullptr);
  51. if (ret < 0) {
  52. AV_LOGGER_ERRORF("创建重采样上下文失败: {}", ffmpeg_utils::errorToString(ret));
  53. return false;
  54. }
  55. // 初始化重采样器
  56. ret = swr_init(swrCtx_);
  57. if (ret < 0) {
  58. AV_LOGGER_ERRORF("初始化重采样器失败: {}", ffmpeg_utils::errorToString(ret));
  59. swr_free(&swrCtx_);
  60. return false;
  61. }
  62. // 计算输出帧大小
  63. dstFrameSize_ = av_rescale_rnd(1024, dstSampleRate_, srcSampleRate_, AV_ROUND_UP);
  64. // 创建输出帧
  65. dstFrame_ = makeAVFrame();
  66. if (!dstFrame_) {
  67. AV_LOGGER_ERROR("分配输出帧失败");
  68. return false;
  69. }
  70. dstFrame_->format = dstFormat_;
  71. dstFrame_->sample_rate = dstSampleRate_;
  72. av_channel_layout_copy(&dstFrame_->ch_layout, &dstLayout_);
  73. dstFrame_->nb_samples = dstFrameSize_;
  74. ret = av_frame_get_buffer(dstFrame_.get(), 0);
  75. if (ret < 0) {
  76. AV_LOGGER_ERRORF("分配帧缓冲区失败: {}", ffmpeg_utils::errorToString(ret));
  77. return false;
  78. }
  79. initialized_ = true;
  80. return true;
  81. }
  82. AVFramePtr AudioResampler::resample(const AVFramePtr& srcFrame) {
  83. if (!initialized_ || !srcFrame) {
  84. return nullptr;
  85. }
  86. // 计算输出样本数
  87. int dstSamples = av_rescale_rnd(srcFrame->nb_samples, dstSampleRate_, srcSampleRate_, AV_ROUND_UP);
  88. // 确保输出帧有足够的空间
  89. if (dstFrame_->nb_samples < dstSamples) {
  90. av_frame_unref(dstFrame_.get());
  91. dstFrame_->nb_samples = dstSamples;
  92. int ret = av_frame_get_buffer(dstFrame_.get(), 0);
  93. if (ret < 0) {
  94. AV_LOGGER_ERRORF("重新分配帧缓冲区失败: {}", ffmpeg_utils::errorToString(ret));
  95. return nullptr;
  96. }
  97. }
  98. // 执行重采样
  99. int convertedSamples = swr_convert(swrCtx_,
  100. dstFrame_->data, dstSamples,
  101. const_cast<const uint8_t**>(srcFrame->data), srcFrame->nb_samples);
  102. if (convertedSamples < 0) {
  103. AV_LOGGER_ERRORF("音频重采样失败: {}", ffmpeg_utils::errorToString(convertedSamples));
  104. return nullptr;
  105. }
  106. dstFrame_->nb_samples = convertedSamples;
  107. // 复制时间戳等信息
  108. av_frame_copy_props(dstFrame_.get(), srcFrame.get());
  109. // 调整时间戳
  110. if (srcFrame->pts != AV_NOPTS_VALUE) {
  111. dstFrame_->pts = av_rescale_q(srcFrame->pts, {1, srcSampleRate_}, {1, dstSampleRate_});
  112. }
  113. return std::move(dstFrame_);
  114. }
  115. std::vector<AVFramePtr> AudioResampler::flush() {
  116. std::vector<AVFramePtr> frames;
  117. if (!initialized_) {
  118. return frames;
  119. }
  120. while (true) {
  121. AVFramePtr frame = makeAVFrame();
  122. if (!frame) {
  123. break;
  124. }
  125. frame->format = dstFormat_;
  126. frame->sample_rate = dstSampleRate_;
  127. av_channel_layout_copy(&frame->ch_layout, &dstLayout_);
  128. frame->nb_samples = dstFrameSize_;
  129. int ret = av_frame_get_buffer(frame.get(), 0);
  130. if (ret < 0) {
  131. break;
  132. }
  133. int convertedSamples = swr_convert(swrCtx_,
  134. frame->data, dstFrameSize_,
  135. nullptr, 0);
  136. if (convertedSamples <= 0) {
  137. break;
  138. }
  139. frame->nb_samples = convertedSamples;
  140. frames.push_back(std::move(frame));
  141. }
  142. return frames;
  143. }
  144. // AudioEncoder 实现
  145. AudioEncoder::AudioEncoder()
  146. : AbstractEncoder(MediaType::AUDIO) {
  147. AV_LOGGER_DEBUG("创建音频编码器");
  148. }
  149. AudioEncoder::~AudioEncoder() {
  150. close();
  151. AV_LOGGER_DEBUG("音频编码器已销毁");
  152. }
  153. ErrorCode AudioEncoder::open(const CodecParams& params) {
  154. std::lock_guard<std::mutex> lock(encodeMutex_);
  155. if (state_ != CodecState::IDLE && state_ != CodecState::CLOSED) {
  156. AV_LOGGER_WARNING("编码器已打开,先关闭再重新打开");
  157. close();
  158. }
  159. if (!validateParams(params)) {
  160. return ErrorCode::INVALID_ARGUMENT;
  161. }
  162. audioParams_ = static_cast<const AudioEncoderParams&>(params);
  163. params_ = params;
  164. ErrorCode result = initEncoder();
  165. if (result != ErrorCode::OK) {
  166. close();
  167. return result;
  168. }
  169. setState(CodecState::OPENED);
  170. AV_LOGGER_INFOF("音频编码器已打开: {} ({}Hz, {}ch, {}kbps)",
  171. audioParams_.codecName, audioParams_.sampleRate,
  172. audioParams_.channels, audioParams_.bitRate / 1000);
  173. return ErrorCode::OK;
  174. }
  175. void AudioEncoder::close() {
  176. std::lock_guard<std::mutex> lock(encodeMutex_);
  177. if (state_ == CodecState::CLOSED || state_ == CodecState::IDLE) {
  178. return;
  179. }
  180. // 清理资源
  181. resampler_.reset();
  182. convertedFrame_.reset();
  183. // 清理编解码上下文
  184. codecCtx_.reset();
  185. codec_ = nullptr;
  186. setState(CodecState::CLOSED);
  187. AV_LOGGER_DEBUG("音频编码器已关闭");
  188. }
  189. ErrorCode AudioEncoder::flush() {
  190. std::lock_guard<std::mutex> lock(encodeMutex_);
  191. if (state_ != CodecState::OPENED && state_ != CodecState::RUNNING) {
  192. return ErrorCode::INVALID_STATE;
  193. }
  194. setState(CodecState::FLUSHING);
  195. // 发送空帧来刷新编码器
  196. int ret = avcodec_send_frame(codecCtx_.get(), nullptr);
  197. if (ret < 0 && ret != AVERROR_EOF) {
  198. AV_LOGGER_ERRORF("刷新编码器失败: {}", ffmpeg_utils::errorToString(ret));
  199. reportError(static_cast<ErrorCode>(ret));
  200. return static_cast<ErrorCode>(ret);
  201. }
  202. setState(CodecState::OPENED);
  203. return ErrorCode::OK;
  204. }
  205. ErrorCode AudioEncoder::encode(const AVFramePtr& frame, std::vector<AVPacketPtr>& packets) {
  206. std::lock_guard<std::mutex> lock(encodeMutex_);
  207. if (state_ != CodecState::OPENED && state_ != CodecState::RUNNING) {
  208. return ErrorCode::INVALID_STATE;
  209. }
  210. setState(CodecState::RUNNING);
  211. auto startTime = std::chrono::high_resolution_clock::now();
  212. ErrorCode result = encodeFrame(frame, packets);
  213. auto endTime = std::chrono::high_resolution_clock::now();
  214. double processTime = std::chrono::duration<double, std::milli>(endTime - startTime).count();
  215. updateStats(result == ErrorCode::OK, processTime,
  216. frame ? frame->nb_samples * audioParams_.channels * av_get_bytes_per_sample(static_cast<AVSampleFormat>(frame->format)) : 0);
  217. if (frameCallback_ && frame) {
  218. frameCallback_(frame);
  219. }
  220. for (const auto& packet : packets) {
  221. if (packetCallback_) {
  222. packetCallback_(packet);
  223. }
  224. }
  225. return result;
  226. }
  227. ErrorCode AudioEncoder::finishEncode(std::vector<AVPacketPtr>& packets) {
  228. return flush();
  229. }
  230. bool AudioEncoder::validateParams(const CodecParams& params) {
  231. if (params.type != MediaType::AUDIO) {
  232. AV_LOGGER_ERROR("参数媒体类型不是音频");
  233. return false;
  234. }
  235. const auto& audioParams = static_cast<const AudioEncoderParams&>(params);
  236. if (audioParams.sampleRate <= 0) {
  237. AV_LOGGER_ERROR("采样率无效");
  238. return false;
  239. }
  240. if (audioParams.channels <= 0) {
  241. AV_LOGGER_ERROR("声道数无效");
  242. return false;
  243. }
  244. if (audioParams.bitRate <= 0) {
  245. AV_LOGGER_ERROR("比特率无效");
  246. return false;
  247. }
  248. if (audioParams.codecName.empty()) {
  249. AV_LOGGER_ERROR("编码器名称为空");
  250. return false;
  251. }
  252. return true;
  253. }
  254. ErrorCode AudioEncoder::initEncoder() {
  255. // 查找编码器
  256. codec_ = avcodec_find_encoder_by_name(audioParams_.codecName.c_str());
  257. if (!codec_) {
  258. AV_LOGGER_ERRORF("未找到编码器: {}", audioParams_.codecName);
  259. return ErrorCode::CODEC_NOT_FOUND;
  260. }
  261. if (codec_->type != AVMEDIA_TYPE_AUDIO) {
  262. AV_LOGGER_ERROR("编码器类型不是音频");
  263. return ErrorCode::INVALID_ARGUMENT;
  264. }
  265. // 创建编码上下文
  266. codecCtx_ = makeAVCodecContext(codec_);
  267. if (!codecCtx_) {
  268. AV_LOGGER_ERROR("分配编码上下文失败");
  269. return ErrorCode::OUT_OF_MEMORY;
  270. }
  271. // 设置编码器参数
  272. ErrorCode result = setupEncoderParams();
  273. if (result != ErrorCode::OK) {
  274. return result;
  275. }
  276. // 打开编码器
  277. int ret = avcodec_open2(codecCtx_.get(), codec_, nullptr);
  278. if (ret < 0) {
  279. AV_LOGGER_ERRORF("打开编码器失败: {}", ffmpeg_utils::errorToString(ret));
  280. return static_cast<ErrorCode>(ret);
  281. }
  282. return ErrorCode::OK;
  283. }
  284. ErrorCode AudioEncoder::setupEncoderParams() {
  285. codecCtx_->bit_rate = audioParams_.bitRate;
  286. codecCtx_->sample_rate = audioParams_.sampleRate;
  287. // 设置声道布局
  288. ErrorCode result = setupChannelLayout();
  289. if (result != ErrorCode::OK) {
  290. return result;
  291. }
  292. // 设置采样格式
  293. codecCtx_->sample_fmt = getBestSampleFormat();
  294. // 设置帧大小
  295. if (codec_->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
  296. codecCtx_->frame_size = audioParams_.frameSize;
  297. }
  298. // 设置配置文件
  299. if (!audioParams_.profile.empty()) {
  300. av_opt_set(codecCtx_->priv_data, "profile", audioParams_.profile.c_str(), 0);
  301. }
  302. return ErrorCode::OK;
  303. }
  304. ErrorCode AudioEncoder::setupChannelLayout() {
  305. // 设置声道布局
  306. if (audioParams_.channelLayout.nb_channels > 0) {
  307. av_channel_layout_copy(&codecCtx_->ch_layout, &audioParams_.channelLayout);
  308. } else {
  309. // 根据声道数设置默认布局
  310. av_channel_layout_default(&codecCtx_->ch_layout, audioParams_.channels);
  311. }
  312. return ErrorCode::OK;
  313. }
  314. AVFramePtr AudioEncoder::convertFrame(const AVFramePtr& frame) {
  315. if (!frame) {
  316. return nullptr;
  317. }
  318. // 检查是否需要转换
  319. bool needConvert = false;
  320. if (frame->format != codecCtx_->sample_fmt) {
  321. needConvert = true;
  322. }
  323. if (frame->sample_rate != codecCtx_->sample_rate) {
  324. needConvert = true;
  325. }
  326. if (av_channel_layout_compare(&frame->ch_layout, &codecCtx_->ch_layout) != 0) {
  327. needConvert = true;
  328. }
  329. if (!needConvert) {
  330. // 创建一个新的帧来返回,避免拷贝构造
  331. AVFramePtr resultFrame = makeAVFrame();
  332. if (!resultFrame) {
  333. return nullptr;
  334. }
  335. // 复制帧数据
  336. if (av_frame_ref(resultFrame.get(), frame.get()) < 0) {
  337. return nullptr;
  338. }
  339. return resultFrame;
  340. }
  341. // 创建重采样器
  342. if (!resampler_) {
  343. resampler_ = std::make_unique<AudioResampler>();
  344. if (!resampler_->init(frame->ch_layout, static_cast<AVSampleFormat>(frame->format), frame->sample_rate,
  345. codecCtx_->ch_layout, codecCtx_->sample_fmt, codecCtx_->sample_rate)) {
  346. AV_LOGGER_ERROR("初始化音频重采样器失败");
  347. return nullptr;
  348. }
  349. }
  350. return resampler_->resample(frame);
  351. }
  352. ErrorCode AudioEncoder::encodeFrame(const AVFramePtr& frame, std::vector<AVPacketPtr>& packets) {
  353. AVFramePtr processedFrame;
  354. if (frame) {
  355. // 格式转换
  356. processedFrame = convertFrame(frame);
  357. if (!processedFrame) {
  358. AV_LOGGER_ERROR("音频帧格式转换失败");
  359. return ErrorCode::CONVERSION_FAILED;
  360. }
  361. }
  362. // 发送帧到编码器
  363. int ret = avcodec_send_frame(codecCtx_.get(), processedFrame ? processedFrame.get() : nullptr);
  364. if (ret < 0) {
  365. AV_LOGGER_ERRORF("发送帧到编码器失败: {}", ffmpeg_utils::errorToString(ret));
  366. return static_cast<ErrorCode>(ret);
  367. }
  368. // 接收编码后的包
  369. return receivePackets(packets);
  370. }
  371. ErrorCode AudioEncoder::receivePackets(std::vector<AVPacketPtr>& packets) {
  372. while (true) {
  373. AVPacketPtr packet = makeAVPacket();
  374. if (!packet) {
  375. return ErrorCode::OUT_OF_MEMORY;
  376. }
  377. int ret = avcodec_receive_packet(codecCtx_.get(), packet.get());
  378. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  379. break; // 需要更多输入或已结束
  380. }
  381. if (ret < 0) {
  382. AV_LOGGER_ERRORF("接收编码包失败: {}", ffmpeg_utils::errorToString(ret));
  383. return static_cast<ErrorCode>(ret);
  384. }
  385. packets.push_back(std::move(packet));
  386. }
  387. return ErrorCode::OK;
  388. }
  389. AVSampleFormat AudioEncoder::getBestSampleFormat() const {
  390. if (!codec_->sample_fmts) {
  391. return audioParams_.sampleFormat;
  392. }
  393. // 首先尝试使用指定的格式
  394. for (const AVSampleFormat* fmt = codec_->sample_fmts; *fmt != AV_SAMPLE_FMT_NONE; fmt++) {
  395. if (*fmt == audioParams_.sampleFormat) {
  396. return *fmt;
  397. }
  398. }
  399. // 返回第一个支持的格式
  400. return codec_->sample_fmts[0];
  401. }
  402. int AudioEncoder::getBestSampleRate() const {
  403. if (!codec_->supported_samplerates) {
  404. return audioParams_.sampleRate;
  405. }
  406. // 首先尝试使用指定的采样率
  407. for (const int* rate = codec_->supported_samplerates; *rate != 0; rate++) {
  408. if (*rate == audioParams_.sampleRate) {
  409. return *rate;
  410. }
  411. }
  412. // 返回第一个支持的采样率
  413. return codec_->supported_samplerates[0];
  414. }
  415. std::vector<std::string> AudioEncoder::getSupportedEncoders() {
  416. std::call_once(encodersInitFlag_, findSupportedEncoders);
  417. return supportedEncoders_;
  418. }
  419. std::string AudioEncoder::getRecommendedEncoder() {
  420. auto encoders = getSupportedEncoders();
  421. // 优先选择高质量编码器
  422. const char* preferredEncoders[] = {"libfdk_aac", "aac", "libopus", "opus", "libmp3lame", "mp3"};
  423. for (const char* encoder : preferredEncoders) {
  424. if (std::find(encoders.begin(), encoders.end(), encoder) != encoders.end()) {
  425. return encoder;
  426. }
  427. }
  428. return encoders.empty() ? "" : encoders[0];
  429. }
  430. bool AudioEncoder::isSampleFormatSupported(const std::string& codecName, AVSampleFormat format) {
  431. const AVCodec* codec = avcodec_find_encoder_by_name(codecName.c_str());
  432. if (!codec || !codec->sample_fmts) {
  433. return false;
  434. }
  435. for (const AVSampleFormat* fmt = codec->sample_fmts; *fmt != AV_SAMPLE_FMT_NONE; fmt++) {
  436. if (*fmt == format) {
  437. return true;
  438. }
  439. }
  440. return false;
  441. }
  442. bool AudioEncoder::isSampleRateSupported(const std::string& codecName, int sampleRate) {
  443. const AVCodec* codec = avcodec_find_encoder_by_name(codecName.c_str());
  444. if (!codec) {
  445. return false;
  446. }
  447. if (!codec->supported_samplerates) {
  448. return true; // 支持所有采样率
  449. }
  450. for (const int* rate = codec->supported_samplerates; *rate != 0; rate++) {
  451. if (*rate == sampleRate) {
  452. return true;
  453. }
  454. }
  455. return false;
  456. }
  457. bool AudioEncoder::isChannelLayoutSupported(const std::string& codecName, const AVChannelLayout& layout) {
  458. const AVCodec* codec = avcodec_find_encoder_by_name(codecName.c_str());
  459. if (!codec) {
  460. return false;
  461. }
  462. if (!codec->ch_layouts) {
  463. return true; // 支持所有声道布局
  464. }
  465. for (const AVChannelLayout* ch_layout = codec->ch_layouts; ch_layout->nb_channels != 0; ch_layout++) {
  466. if (av_channel_layout_compare(ch_layout, &layout) == 0) {
  467. return true;
  468. }
  469. }
  470. return false;
  471. }
  472. void AudioEncoder::findSupportedEncoders() {
  473. AV_LOGGER_INFO("查找可用的音频编码器...");
  474. for (const char* encoder : AUDIO_ENCODERS) {
  475. if (CodecFactory::isCodecSupported(encoder, CodecType::ENCODER, MediaType::AUDIO)) {
  476. supportedEncoders_.emplace_back(encoder);
  477. AV_LOGGER_INFOF("找到音频编码器: {}", encoder);
  478. }
  479. }
  480. AV_LOGGER_INFOF("总共找到 {} 个可用的音频编码器", supportedEncoders_.size());
  481. }
  482. // AudioEncoderFactory 实现
  483. std::unique_ptr<AudioEncoder> AudioEncoderFactory::create(const std::string& codecName) {
  484. auto encoder = std::make_unique<AudioEncoder>();
  485. if (!codecName.empty()) {
  486. if (!CodecFactory::isCodecSupported(codecName, CodecType::ENCODER, MediaType::AUDIO)) {
  487. AV_LOGGER_ERRORF("不支持的编码器: {}", codecName);
  488. return nullptr;
  489. }
  490. }
  491. return encoder;
  492. }
  493. std::unique_ptr<AudioEncoder> AudioEncoderFactory::createBest() {
  494. std::string codecName = AudioEncoder::getRecommendedEncoder();
  495. if (codecName.empty()) {
  496. AV_LOGGER_ERROR("未找到可用的音频编码器");
  497. return nullptr;
  498. }
  499. return create(codecName);
  500. }
  501. std::unique_ptr<AudioEncoder> AudioEncoderFactory::createLossless() {
  502. auto encoders = AudioEncoder::getSupportedEncoders();
  503. // 优先选择无损编码器
  504. const char* losslessEncoders[] = {"flac", "pcm_s24le", "pcm_s16le"};
  505. for (const char* encoder : losslessEncoders) {
  506. if (std::find(encoders.begin(), encoders.end(), encoder) != encoders.end()) {
  507. return create(encoder);
  508. }
  509. }
  510. AV_LOGGER_WARNING("未找到无损音频编码器,使用默认编码器");
  511. return createBest();
  512. }
  513. } // namespace codec
  514. } // namespace av