一、什么是音频重采样
音频重采样就是改变音频的采样率、采样格式、声道数等参数,使之按照我们期望的参数输出。比如我们将采样率 48kHz、采样格式 f32le、声道数 1 的音频 A 转换成采样率 44.1kHz、采样格式 s16le、声道数 2 的音频 B。
那么为什么需要对音频重采样?列举一个经典用途,有些音频编码器对输入的原始PCM数据是有特定参数要求的,比如要求必须是44100_s16le_2。但是你提供的PCM参数可能是48000_f32le_1。这个时候就需要先将48000_f32le_1转换成44100_s16le_2,然后再使用音频编码器对转换后的PCM进行编码。
二、使用 FFmpeg 命令行实现音频重采样
将采样率 48000 采样格式 s32le 声道数 1 的 PCM 音频数据重采样成采样率 44100 采样格式 s16le 声道数 2 的 PCM 音频数据:
$ ffmpeg -ar 48000 -ac 1 -f f32le -i ar48000ac1f32le.pcm -ar 44100 -ac 2 -f s16le ar44100ac2s16le.pcm
三、使用 FFmpeg API 编程实现音频重采样
使用 libavresample 音频重采样的核心步骤:
1、定义变量(为了简化释放资源的代码用到了goto 语句,需要把用到的变量定义到前面):
QFile inFile(inFilename);
QFile outFile(outFilename);
// 输入缓冲区
// 指向输入缓冲区的指针
uint8_t **inData = nullptr;
// 缓冲区大小
int inLineSize = 0;
// 声道数
int inChs = av_get_channel_layout_nb_channels(inChLayout);
// 每个样本的大小
int inBytesPerSample = inChs * av_get_bytes_per_sample(inSampleFormat);
// 输入缓冲区样本数量
int inSamples = 1024;
// 输出缓冲区
// 指向输出缓冲区的指针
uint8_t **outData = nullptr;
// 缓冲区大小
int outLineSize = 0;
// 声道数
int outChs = av_get_channel_layout_nb_channels(outChLayout);
// 每个样本的大小
int outBytesPerSample = outChs * av_get_bytes_per_sample(outSampleFormat);
// 输出缓冲区样本数量
int outSamples = av_rescale_rnd(outSampleRate, inSamples, inSampleRate, AV_ROUND_UP);
// 读取的音频大小
int len = 0;
// 返回结果
int ret = 0;
我们设置了输入缓冲区样本数量为 1024,然后根据输入输出采样率的比例计算出输出缓冲区样本数量,计算公式如下:
inSamples inSampleRate
—————————— = ———————————————
outSamples outSampleRate
outSamples = inSamples * outSampleRate / inSampleRate
FFmpeg 提供了现成的 API 计算输出缓冲区样本数量:
/**
* Rescale a 64-bit integer with specified rounding.
*
* The operation is mathematically equivalent to `a * b / c`, but writing that
* directly can overflow, and does not support different rounding methods.
*
* @see av_rescale(), av_rescale_q(), av_rescale_q_rnd()
*/
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd) av_const;
此函数的操作等价于我们上边的计算公式,并且做了防止溢出处理。rnd:取整模式选择向上取整 AV_ROUND_UP。实际上输入输出缓冲区样本大小全都设置为 1024 重采样后的音频有时也是可以播放的,听起来并没有什么不同,但是通过观察转码后的音频文件大小你可能会发现丢失了部分音频数据。
2、创建重采样上下文:
SwrContext *ctx = swr_alloc_set_opts(nullptr,
outChLayout, outSampleFormat, outSampleRate,
inChLayout, inSampleFormat, inSampleRate,
0, nullptr);
3、初始化重采样上下文:
ret = swr_init(ctx);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "初始化上下文失败:" << errbuf;
goto end;
}
4、创建输入缓冲区:
ret = av_samples_alloc_array_and_samples(&inData, &inLineSize, inChs, inSamples, inSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "创建输入缓冲区失败:" << errbuf;
goto end;
}
5、创建输出缓冲区:
ret = av_samples_alloc_array_and_samples(&outData, &outLineSize, outChs, outSamples, outSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "创建输出缓冲区失败:" << errbuf;
goto end;
}
6、打开文件:
if (!inFile.open(QFile::ReadOnly)) {
qDebug() << "打开输入文件失败";
goto end;
}
if (!outFile.open(QFile::WriteOnly)) {
qDebug() << "打开输出文件失败";
goto end;
}
7、重采样:
while ((len = inFile.read((char *)inData[0], inLineSize)) > 0) {
inSamples = len / inBytesPerSample;
ret = swr_convert(ctx, outData, outSamples, (const uint8_t **)inData, inSamples);
qDebug() << "转换:" << ret;
if (ret < 0) {
ERRBUF(ret);
qDebug() << "重采样失败:" << errbuf;
goto end;
}
outFile.write((const char *)outData[0], ret * outBytesPerSample);
}
8、检查输出缓冲区是否还有残留样本:
while ((ret = swr_convert(ctx, outData, outSamples, nullptr, 0)) > 0) {
outFile.write((const char *)outData[0], ret);
qDebug() << "残留:" << ret;
}
9、回收释放资源:
end:
inFile.close();
outFile.close();
if (inData) {
av_freep(&inData[0]);
}
av_freep(&inData);
if (outData) {
av_freep(&outData[0]);
}
av_freep(&outData);
swr_free(&ctx);
三、代码
#include "ffmpegutils.h"
#include <QDebug>
#include <QFile>
#define ERRBUF(ret) \
char errbuf[1024]; \
av_strerror(ret, errbuf, sizeof (errbuf))
FFmpegUtils::FFmpegUtils(QObject *parent) : QObject(parent)
{
}
void FFmpegUtils::resampleAudio(const char *inFilename, int inSampleRate, AVSampleFormat inSampleFormat, int inChLayout,
const char *outFilename, int outSampleRate, AVSampleFormat outSampleFormat, int outChLayout)
{
QFile inFile(inFilename);
QFile outFile(outFilename);
// 输入缓冲区
// 指向输入缓冲区的指针
uint8_t **inData = nullptr;
// 缓冲区大小
int inLineSize = 0;
// 声道数
int inChs = av_get_channel_layout_nb_channels(inChLayout);
// 每个样本的大小
int inBytesPerSample = inChs * av_get_bytes_per_sample(inSampleFormat);
// 输入缓冲区大小
int inSamples = 1024;
// 输出缓冲区
// 指向输出缓冲区的指针
uint8_t **outData = nullptr;
// 缓冲区大小
int outLineSize = 0;
// 声道数
int outChs = av_get_channel_layout_nb_channels(outChLayout);
// 每个样本的大小
int outBytesPerSample = outChs * av_get_bytes_per_sample(outSampleFormat);
// 输出缓冲区大小
int outSamples = av_rescale_rnd(outSampleRate, inSamples, inSampleRate, AV_ROUND_UP);
// 读取的音频大小
int len = 0;
// 返回结果
int ret = 0;
// 创建重采样上下文
SwrContext *ctx = swr_alloc_set_opts(nullptr,
outChLayout, outSampleFormat, outSampleRate,
inChLayout, inSampleFormat, inSampleRate,
0, nullptr);
if (!ctx) {
qDebug() << "创建重采样上下文失败!";
goto end;
}
// 初始化采样上下文
ret = swr_init(ctx);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "初始化上下文失败:" << errbuf;
goto end;
}
// 创建输入缓冲区
ret = av_samples_alloc_array_and_samples(&inData, &inLineSize, inChs, inSamples, inSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "创建输入缓冲区失败:" << errbuf;
goto end;
}
// 创建输出缓冲区
ret = av_samples_alloc_array_and_samples(&outData, &outLineSize, outChs, outSamples, outSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "创建输出缓冲区失败:" << errbuf;
goto end;
}
// 打开文件
if (!inFile.open(QFile::ReadOnly)) {
qDebug() << "打开输入文件失败";
goto end;
}
if (!outFile.open(QFile::WriteOnly)) {
qDebug() << "打开输出文件失败";
goto end;
}
while ((len = inFile.read((char *)inData[0], inLineSize)) > 0) {
inSamples = len / inBytesPerSample;
ret = swr_convert(ctx, outData, outSamples, (const uint8_t **)inData, inSamples);
qDebug() << "转换:" << ret;
if (ret < 0) {
ERRBUF(ret);
qDebug() << "重采样失败:" << errbuf;
goto end;
}
outFile.write((const char *)outData[0], ret * outBytesPerSample);
}
while ((ret = swr_convert(ctx, outData, outSamples, nullptr, 0)) > 0) {
outFile.write((const char *)outData[0], ret);
qDebug() << "残留:" << ret;
}
end:
inFile.close();
outFile.close();
if (inData) {
av_freep(&inData[0]);
}
av_freep(&inData);
if (outData) {
av_freep(&outData[0]);
}
av_freep(&outData);
swr_free(&ctx);
}
调用函数:
#define IN_FILE_NAME "/Users/mac/Downloads/music/ar48000ac1f32le.pcm”
#define OUT_FILE_NAME "/Users/mac/Downloads/music/ar44100ac2s16le.pcm"
int inSampleRate = 48000;
AVSampleFormat inSampleFormat = AV_SAMPLE_FMT_FLT;
int inChLayout = AV_CH_LAYOUT_MONO;
int outSampleRate = 44100;
AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_S16;
int outChLayout = AV_CH_LAYOUT_STEREO;
FFmpegUtils::resampleAudio(IN_FILE_NAME, inSampleRate, inSampleFormat, inChLayout,
OUT_FILE_NAME, outSampleRate, outSampleFormat, outChLayout);