需求说明:录音后,需要有变声选项,选择变声模式试听,满意后可保存。保存的变声文件格式为AMR
解决方案:录音问题很好解决,每次录音后保存一个文件,格式为WAV。变声功能采用网上比较常见的fmod框架。可试听可保存文件,文件格式为WAV。保存后进行文件转格式,由WAV转为amr。方案是采用Android自带的AmrInputStream 完成。
遇到的问题有:
- 变声后文件如何保存。
- 变声后文件转格式后,声音被拉长两倍。
下面是具体的解决方法。
变声采用fmod框架来实现,具体方案可以在网上搜索,这里贴上cpp代码:
#include "inc/fmod.hpp"
#include <stdlib.h>
#include <unistd.h>
#include "com_kidosc_voicechange_jni_VoiceFixer.h"
#include <android/log.h>
#define LOGI(FORMAT, ...) __android_log_print(ANDROID_LOG_INFO,"zph",FORMAT,##__VA_ARGS__);
#define LOGE(FORMAT, ...) __android_log_print(ANDROID_LOG_ERROR,"zph",FORMAT,##__VA_ARGS__);
#define MODE_NORMAL 0
#define MODE_FUNNY 1
#define MODE_UNCLE 2
#define MODE_LOLITA 3
#define MODE_ROB0T 4
#define MODE_ETHEREAL 5
#define MODE_CHORUS 6
#define MODE_HORROR 7
using namespace FMOD;
Sound *sound;
DSP *dsp;
Channel *channel;
bool playing;
float frequency;
System *mSystem;
JNIEnv *mEnv;
void stopPlaying();
JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_init(JNIEnv *env,
jclass jcls){
}
JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_fix(JNIEnv *env,
jclass jcls, jstring path_jstr,
jint type,jint save) {
playing = true;
frequency = 0;
System_Create(&mSystem);
mSystem->setSoftwareFormat(8000,FMOD_SPEAKERMODE_MONO,0); //设置采样率为8000,channel为1
mEnv=env;
const char *path_cstr = mEnv->GetStringUTFChars(path_jstr, NULL);
if(save==1){
char cDest[200] = "sdcard/xxx.wav";
mSystem->setOutput(FMOD_OUTPUTTYPE_WAVWRITER); //保存文件格式为WAV
mSystem->init(32, FMOD_INIT_NORMAL | FMOD_INIT_PROFILE_ENABLE,cDest);
} else{
mSystem->init(32, FMOD_INIT_NORMAL,NULL);
}
try {
//创建声音
mSystem->createSound(path_cstr, FMOD_DEFAULT, NULL, &sound);
mSystem->playSound(sound, 0, false, &channel);
switch (type) {
case MODE_NORMAL:
LOGI("%s", path_cstr);
LOGI("%s", "fix normal");
break;
case MODE_FUNNY:
mSystem->createDSPByType(FMOD_DSP_TYPE_NORMALIZE, &dsp);
channel->getFrequency(&frequency);
frequency = frequency * 1.6;
channel->setFrequency(frequency);
break;
case MODE_UNCLE:
mSystem->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &dsp);
dsp->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, 0.8);
channel->addDSP(0, dsp);
break;
case MODE_LOLITA:
mSystem->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &dsp);
dsp->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH,
1.8);
channel->addDSP(0, dsp);
break;
case MODE_ROB0T:
mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 50);
dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 60);
channel->addDSP(0, dsp);
break;
case MODE_ETHEREAL:
mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 300);
dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 20);
channel->addDSP(0, dsp);
break;
case MODE_CHORUS:
mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 100);
dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 50);
channel->addDSP(0, dsp);
break;
case MODE_HORROR:
mSystem->createDSPByType(FMOD_DSP_TYPE_TREMOLO, &dsp);
dsp->setParameterFloat(FMOD_DSP_TREMOLO_SKEW, 0.8);
channel->addDSP(0, dsp);
break;
default:
break;
}
} catch (...) {
LOGE("%s", "发生异常");
goto end;
}
mSystem->update();
while (playing) {
usleep(1000);
channel->isPlaying(&playing);
}
goto end;
end:
mEnv->ReleaseStringUTFChars(path_jstr, path_cstr);
sound->release();
mSystem->close();
mSystem->release();
jclass clazz = mEnv -> FindClass("com/kidosc/voicechange/jni/VoiceFixer");
if(clazz == NULL){
printf("not found com/kidosc/voicechange/jni/VoiceFixer class");
return;
}
jmethodID id = mEnv->GetStaticMethodID(clazz,"setPlayState","()V");
if (id==NULL){
printf("method not found");
return;
}
mEnv->CallStaticVoidMethod(clazz,id);
printf("env->CallStaticVoidMethod(clazz,id);");
}
JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_stopPlay(JNIEnv *env,
jclass jcls){
stopPlaying();
}
void stopPlaying(){
channel->stop();
printf("stopplaying");
}
上面代码与网上的区别是,增加了变声文件保存的功能,增加了停止变声播放的功能,改变了默认输出文件的采样率和channel。其中最后一条尤为重要setSoftwareFormat方法文档
变声临时文件保存在sdcard/xxx.wav,可在保存完成后,再进行重新转格式并保存在自己想要的位置。
转格式使用的是Android自带的AmrInputStream.java处理。这个文件被隐藏了,想要直接使用的话,需要将文件拷贝到自己项目下。具体如何使用可自行搜索。AmrInputStream转格式,默认的是8000khz,16bit,单channel。这些参数很重要,如果WAV格式与这个不匹配,转格式后会出现声音被拉长等一系列问题。所以需要将fmod变声框架的输出文件格式改成8000khz,16bit,单channel 。这才是最头痛的地方!网上根本找不到相关的内容,没办法只能自己慢慢摸索了。最开始的思路是将fmod输出的格式改为8000,成功了之后发现声音还是被拉长了两倍。怀疑是16bit的问题,又不知道fmod输出的是多少bit。所以尝试更改AmrInputStream中,将其默认输入的音频数据更改一下。更改完之后,声音都变质了。所以最终方案还是考虑更改fmod输出音频。找到一个setSoftwareFormat方法,发现可以更改输出采样率。那么现在的问题就是,为什么还是会被拉长两倍。
/**
* @param inPath 源文件
* @param outPath 目标文件
*/
public void systemWav2Amr(String inPath,String outPath){
try {
FileOutputStream fileoutputStream = new FileOutputStream(outPath);
InputStream inputStream =new FileInputStream(inPath);
AmrInputStream amrInputStream =new AmrInputStream(inputStream);
fileoutputStream.write(header);
byte[] buf = new byte[1024];
int len = 0;
while ((len = amrInputStream.read(buf)) >0){
fileoutputStream.write(buf,0,len);
}
fileoutputStream.close();
amrInputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
查找资料发现,采样速率跟三个值有关,一个是channel,一个是采样位数,一个是采样率。既然被拉长两倍,说明采样速率变为之前的一半。说明fmod输出的音频数据中,channel或者采样位数中的一个是AmrInputStream的两倍。最开始猜测是采样位数为32bit导致的。更改了AmrInputStream的输入格式为32bit。发现并没有用,时间的确已经保持一致了。说明思路是对的,那么问题就是如何更改fmod输出的音频channel了。AmrInputStream中的channel为1,说明fmod中输出的为2channel。fmod中在fmod_codec.h中找到channel相关属性如下:
struct FMOD_CODEC_WAVEFORMAT
{
char name[256]; /* [w] Name of sound. Optional. */
FMOD_SOUND_FORMAT format; /* [w] Format for (decompressed) codec output, ie FMOD_SOUND_FORMAT_PCM8, FMOD_SOUND_FORMAT_PCM16. Mandantory - Must be supplied. */
int channels; /* [w] Number of channels used by codec, ie mono = 1, stereo = 2. Mandantory - Must be supplied. */
int frequency; /* [w] Default frequency in hz of the codec, ie 44100. Mandantory - Must be supplied. */
unsigned int lengthbytes; /* [w] Length in bytes of the source data. Used for FMOD_TIMEUNIT_RAWBYTES. Optional. Default = 0. */
unsigned int lengthpcm; /* [w] Length in decompressed, PCM samples of the file, ie length in seconds * frequency. Used for Sound::getLength and for memory allocation of static decompressed sample data. Mandantory - Must be supplied. */
unsigned int pcmblocksize; /* [w] Minimum, optimal number of decompressed PCM samples codec can handle. 0 or 1 = no buffering. Anything higher means FMOD will allocate a PCM buffer of this size to read in chunks. The codec read callback will be called in multiples of this value. Optional. */
int loopstart; /* [w] Loopstart in decompressed, PCM samples of file. Optional. Default = 0. */
int loopend; /* [w] Loopend in decompressed, PCM samples of file. Optional. Default = 0. */
FMOD_MODE mode; /* [w] Mode to determine whether the sound should by default load as looping, non looping, 2d or 3d. Optional. Default = FMOD_DEFAULT. */
FMOD_CHANNELMASK channelmask; /* [w] Defined channel bitmask to describe which speakers the channels in the codec map to, in order of channel count. See fmod_common.h. Optional. Leave at 0 to map to the speaker layout_save defined in FMOD_SPEAKER. */
FMOD_CHANNELORDER channelorder; /* [w] Defined channel order type, to describe where each sound channel should pan for the number of channels specified. See fmod_common.h. Optional. Leave at 0 to play in default speaker order. */
float peakvolume; /* [w] Peak volume of sound. Optional. Default = 0 if not used. */
};
在setSoftwareFormat方法中发现第二个参数可以设置为MONO,当其为FMOD_SPEAKERMODE_MONO时,channels为1.
typedef enum
{
FMOD_SPEAKERMODE_DEFAULT, /* Default speaker mode based on operating system/output mode. Windows = control panel setting, Xbox = 5.1, PS3 = 7.1 etc. */
FMOD_SPEAKERMODE_RAW, /* There is no specific speakermode. Sound channels are mapped in order of input to output. Use System::setSoftwareFormat to specify speaker count. See remarks for more information. */
FMOD_SPEAKERMODE_MONO, /* The speakers are monaural. */
FMOD_SPEAKERMODE_STEREO, /* The speakers are stereo. */
FMOD_SPEAKERMODE_QUAD, /* 4 speaker setup. This includes front left, front right, surround left, surround right. */
FMOD_SPEAKERMODE_SURROUND, /* 5 speaker setup. This includes front left, front right, center, surround left, surround right. */
FMOD_SPEAKERMODE_5POINT1, /* 5.1 speaker setup. This includes front left, front right, center, surround left, surround right and an LFE speaker. */
FMOD_SPEAKERMODE_7POINT1, /* 7.1 speaker setup. This includes front left, front right, center, surround left, surround right, back left, back right and an LFE speaker. */
FMOD_SPEAKERMODE_MAX, /* Maximum number of speaker modes supported. */
FMOD_SPEAKERMODE_FORCEINT = 65536 /* Makes sure this enum is signed 32bit. */
} FMOD_SPEAKERMODE;
如此设置之后,就可以完美的进行转格式分享了。