最近有做iOS直播变声的需求,于是去网上搜索了可用的第三方变声库,最终选定了SoundTouch,SoundTouch是C++的一套库,对iOS项目来说也是比较容易进行集成的,具体的可以参考一下 iOS下使用SoundTouch实现变声
要了解变声原理的同学,可以参考一下 变声语音相关知识
在使用过程中遇到的最大问题应该是需要SoundTouch进行实时音频流的处理,而目前网上存在的例子,多数都是对音频文件进行处理,于是花了一些时间来适配,这也是今天要说明的重点。
目前在自己的项目中使用音频采集库数据返回的格式是 CMSampleBufferRef,我需要做的是对CMSampleBufferRef进行处理并返回给上层,而看SoundTouch的数据接口 putSamples 能接受的是一个 SAMPLETYPE 类型,继续跟踪查看会发现 SAMPLETYPE 是由以下两个宏定义来决定的
#define SOUNDTOUCH_INTEGER_SAMPLES1//< 16bit integer samples
//#define SOUNDTOUCH_FLOAT_SAMPLES1//< 32bit float samples
因为声音采集的数据格式会有不同,SoundTouch能支持的是16位的int值和32位的float值,接下来我们就需要知道自己的声音采样数据格式是什么样的,这些数据可以从CMSampleBufferRef中来寻找,以下为直接在XCode中打印的一个CMSampleBuffer的值,里面我们可以看到一个 mFormatFlags,目前这个值是0x29,去找AudioFormatFlags的定义可以发现 0x29 = kAudioFormatFlagIsFloat|kAudioFormatFlagIsBigEndian|kAudioFormatFlagIsNonInterleaved,因此可以断定音频数据格式是float类型的,从mBytesPerFrame中可以判断是32位的,这样我们就知道要在代码中打开SOUNDTOUCH_FLOAT_SAMPLES1的定义了
CMSampleBuffer 0x14bd9df00 retainCount: 1 allocator: 0x1aa381bb8
invalid = NO
dataReady = YES
makeDataReadyCallback = 0x0
makeDataReadyRefcon = 0x0
formatDescription = <CMAudioFormatDescription 0x170112fc0 [0x1aa381bb8]> {
mediaType:'soun'
mediaSubType:'lpcm'
mediaSpecific: {
ASBD: {
mSampleRate: 44100.000000
mFormatID: 'lpcm'
mFormatFlags: 0x29
mBytesPerPacket: 4
mFramesPerPacket: 1
mBytesPerFrame: 4
mChannelsPerFrame: 1
mBitsPerChannel: 32 }
cookie: {(null)}
ACL: {(null)}
FormatList Array: {(null)}
}
extensions: {(null)}
}
sbufToTrackReadiness = 0x0
numSamples = 941
sampleTimingArray[1] = {
{PTS = {10672774120541/1000000000 = 10672.774}, DTS = {INVALID}, duration = {1/44100 = 0.000}},
}
dataBuffer = 0x170113b00
接下来需要做的是将数据传给SoundTouch的putSamples,这需要我们从CMSampleBufferRef中来提取音频的数据
voidSoundTouch::putSamples(constSAMPLETYPE*samples,uint nSamples)
下面先贴出原码
- (CMSampleBufferRef)pitchSoundBuffer:(CMSampleBufferRef)ref {
AudioBufferList audioBufferList;
CMBlockBufferRef blockBuffer;
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(ref, NULL, &audioBufferList, sizeof(audioBufferList), NULL, NULL, 0, &blockBuffer);
AudioBuffer audioBuffer = audioBufferList.mBuffers[0];
Float32 *frame = (Float32*)audioBuffer.mData;
NSMutableData *audioData=[[NSMutableData alloc] init];
[audioData appendBytes:frame length:audioBuffer.mDataByteSize];
char *pcmData = (char *)audioData.bytes;
int pcmSize = (int)audioData.length;
int nSamples = pcmSize / 4;
mSoundTouch->putSamples((Float32 *)pcmData, nSamples);
if (audioData.length == 0) {
return ref;
}
NSMutableData *soundTouchDatas = [[NSMutableData alloc] init];
Float32 *samples = new Float32[pcmSize];
int numSamples = 0;
memset(samples, 0, pcmSize);
numSamples = mSoundTouch->receiveSamples(samples,nSamples);
[soundTouchDatas appendBytes:samples length:numSamples*4];
delete [] samples;
CMItemCount timingCount;
CMSampleBufferGetSampleTimingInfoArray(ref, 0, nil, &timingCount);
CMSampleTimingInfo* pInfo = (CMSampleTimingInfo *)malloc(sizeof(CMSampleTimingInfo) * timingCount);
CMSampleBufferGetSampleTimingInfoArray(ref, timingCount, pInfo, &timingCount);
if (soundTouchDatas.length == 0) {
return ref;
}
void *touchData = (void *)[soundTouchDatas bytes];
CMSampleBufferRef touchSampleBufferRef = [self createAudioSample:touchData frames:(int)[soundTouchDatas length] timing:*pInfo];
return touchSampleBufferRef;
}
下面为创建一个CMSampleBufferRef
- (CMSampleBufferRef)createAudioSample:(void *)audioData frames:(UInt32)len timing:(CMSampleTimingInfo)timing
{
int channels = 1;
AudioBufferList audioBufferList;
audioBufferList.mNumberBuffers = 1;
audioBufferList.mBuffers[0].mNumberChannels=channels;
audioBufferList.mBuffers[0].mDataByteSize=len;
audioBufferList.mBuffers[0].mData = audioData;
AudioStreamBasicDescription asbd;
asbd.mSampleRate = 44100;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = 0x29;
asbd.mBytesPerPacket = 4;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerFrame = 4;
asbd.mChannelsPerFrame = 1;
asbd.mBitsPerChannel = 32;
asbd.mReserved = 0;
CMSampleBufferRef buff = NULL;
static CMFormatDescriptionRef format = NULL;
OSStatus error = 0;
error = CMAudioFormatDescriptionCreate(kCFAllocatorDefault, &asbd, 0, NULL, 0, NULL, NULL, &format);
if (error) {
return NULL;
}
error = CMSampleBufferCreate(kCFAllocatorDefault, NULL, false, NULL, NULL, format, len/4, 1, &timing, 0, NULL, &buff);
if (error) {
return NULL;
}
error = CMSampleBufferSetDataBufferFromAudioBufferList(buff, kCFAllocatorDefault, kCFAllocatorDefault, 0, &audioBufferList);
if(error){
return NULL;
}
return buff;
}