具有完整C代码[turn]的音频自动增益和静音检测算法
优采云 发布时间: 2020-08-06 08:29转发自:
我以前共享了一个算法“带有完整C代码示例的音频增益响度分析重播增益”
主要用于评估特定长度音频的音量强度,
经过分析,音频增益,音量增加等许多类似需求.
但是,当实际测试项目时,确实很难设置标准.
在哪种环境下,我应该增加或减小音量?
通信行业的常规做法是使用静默检测.
一旦被检测为静音或噪音,将不会对其进行处理,否则,将通过某种策略对其进行处理.
这里涉及两种算法,一种是静音检测,另一种是音频增益.
增益实际上没什么好说的,它类似于数据标准化和扩展.
WebRTC中的静音检测使用计算GMM(高斯混合模型,高斯混合模型)进行特征提取.
很长一段时间以来,音频功能主要有3种.
GMM,频谱图(频谱图),MFCC是梅尔倒谱(Mel频率倒谱)
恕我直言,GMM提取的特征不如后两者强健.
我不作更多介绍. 有兴趣的学生可以查阅Wikipedia并补课.
当然,当实际使用该算法时,将从中扩展一些技巧.
例如,使用静音检测来进行音频剪辑,或者使用音频增益来进行一些音频增强.
用于自动增益的WebRTC源代码文件为: analog_agc.c和digital_agc.c
静音检测源代码文件是: webrtc_vad.c
这种命名有某些历史原因.
整理后,
增益算法为agc.c agc.h
静音检测是vad.c vad.h
完整的增益算法示例代码:
#include
#include
#include
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include "agc.h"
#ifndef nullptr
#define nullptr 0
#endif
#ifndef MIN
#define MIN(A, B) ((A) < (B) ? (A) : (B))
#endif
//写wav文件
void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount) {
drwav_data_format format = {};
format.container = drwav_container_riff; // path;)
if (*--p == '\\' || *p == '/') {
p++;
break;
}
if (name) {
for (s = p; s < end;)
*name++ = *s++;
*name = '\0';
}
if (dir) {
for (s = path; s < p;)
*dir++ = *s++;
*dir = '\0';
}
}
int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) {
if (buffer == nullptr) return -1;
if (samplesCount == 0) return -1;
WebRtcAgcConfig agcConfig;
agcConfig.compressionGaindB = 9; // default 9 dB
agcConfig.limiterEnable = 1; // default kAgcTrue (on)
agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv)
int minLevel = 0;
int maxLevel = 255;
size_t samples = MIN(160, sampleRate / 100);
if (samples == 0) return -1;
const int maxSamples = 320;
int16_t *input = buffer;
size_t nTotal = (samplesCount / samples);
void *agcInst = WebRtcAgc_Create();
if (agcInst == NULL) return -1;
int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate);
if (status != 0) {
printf("WebRtcAgc_Init fail\n");
WebRtcAgc_Free(agcInst);
return -1;
}
status = WebRtcAgc_set_config(agcInst, agcConfig);
if (status != 0) {
printf("WebRtcAgc_set_config fail\n");
WebRtcAgc_Free(agcInst);
return -1;
}
size_t num_bands = 1;
int inMicLevel, outMicLevel = -1;
int16_t out_buffer[maxSamples];
int16_t *out16 = out_buffer;
uint8_t saturationWarning = 1; //是否有溢出发生,增益放大以后的最大值超过了65536
int16_t echo = 0; //增益放大是否考虑回声影响
for (int i = 0; i < nTotal; i++) {
inMicLevel = 0;
int nAgcRet = WebRtcAgc_Process(agcInst, (const int16_t *const *) &input, num_bands, samples,
(int16_t *const *) &out16, inMicLevel, &outMicLevel, echo,
&saturationWarning);
if (nAgcRet != 0) {
printf("failed in WebRtcAgc_Process\n");
WebRtcAgc_Free(agcInst);
return -1;
}
memcpy(input, out_buffer, samples * sizeof(int16_t));
input += samples;
}
WebRtcAgc_Free(agcInst);
return 1;
}
void auto_gain(char *in_file, char *out_file) {
//音频采样率
uint32_t sampleRate = 0;
//总音频采样数
uint64_t inSampleCount = 0;
int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
//如果加载成功
if (inBuffer != nullptr) {
// kAgcModeAdaptiveAnalog 模拟音量调节
// kAgcModeAdaptiveDigital 自适应增益
// kAgcModeFixedDigital 固定增益
agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital);
wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount);
free(inBuffer);
}
}
int main(int argc, char *argv[]) {
printf("WebRTC Automatic Gain Control\n");
printf("博客:http://cpuimage.cnblogs.com/\n");
printf("音频自动增益\n");
if (argc < 2)
return -1;
char *in_file = argv[1];
char drive[3];
char dir[256];
char fname[256];
char ext[256];
char out_file[1024];
splitpath(in_file, drive, dir, fname, ext);
sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext);
auto_gain(in_file, out_file);
printf("按任意键退出程序 \n");
getchar();
return 0;
}
用于静默检测的完整示例代码:
#include
#include
#include
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include "vad.h"
#ifndef nullptr
#define nullptr 0
#endif
#ifndef MIN
#define MIN(A, B) ((A) < (B) ? (A) : (B))
#endif
#ifndef MAX
#define MAX(A, B) ((A) > (B) ? (A) : (B))
#endif
//读取wav文件
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
unsigned int channels;
int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
if (buffer == nullptr) {
printf("读取wav文件失败.");
}
//仅仅处理单通道音频
if (channels != 1) {
drwav_free(buffer);
buffer = nullptr;
*sampleRate = 0;
*totalSampleCount = 0;
}
return buffer;
}
int vadProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t vad_mode, int per_ms_frames) {
if (buffer == nullptr) return -1;
if (samplesCount == 0) return -1;
// kValidRates : 8000, 16000, 32000, 48000
// 10, 20 or 30 ms frames
per_ms_frames = MAX(MIN(30, per_ms_frames), 10);
size_t samples = sampleRate * per_ms_frames / 1000;
if (samples == 0) return -1;
int16_t *input = buffer;
size_t nTotal = (samplesCount / samples);
void *vadInst = WebRtcVad_Create();
if (vadInst == NULL) return -1;
int status = WebRtcVad_Init(vadInst);
if (status != 0) {
printf("WebRtcVad_Init fail\n");
WebRtcVad_Free(vadInst);
return -1;
}
status = WebRtcVad_set_mode(vadInst, vad_mode);
if (status != 0) {
printf("WebRtcVad_set_mode fail\n");
WebRtcVad_Free(vadInst);
return -1;
}
printf("Activity : \n");
for (int i = 0; i < nTotal; i++) {
int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
if (nVadRet == -1) {
printf("failed in WebRtcVad_Process\n");
WebRtcVad_Free(vadInst);
return -1;
} else {
// output result
printf(" %d \t", nVadRet);
}
input += samples;
}
printf("\n");
WebRtcVad_Free(vadInst);
return 1;
}
void vad(char *in_file) {
//音频采样率
uint32_t sampleRate = 0;
//总音频采样数
uint64_t inSampleCount = 0;
int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
//如果加载成功
if (inBuffer != nullptr) {
// Aggressiveness mode (0, 1, 2, or 3)
int16_t mode = 1;
int per_ms = 30;
vadProcess(inBuffer, sampleRate, inSampleCount, mode, per_ms);
free(inBuffer);
}
}
int main(int argc, char *argv[]) {
printf("WebRTC Voice Activity Detector\n");
printf("博客:http://cpuimage.cnblogs.com/\n");
printf("静音检测\n");
if (argc < 2)
return -1;
char *in_file = argv[1];
vad(in_file);
printf("按任意键退出程序 \n");
getchar();
return 0;
}
自动获取项目的地址:
具体过程是:
加载wav(将wav文件拖放到可执行文件中)->获得处理->另存为_out.wav文件
静音检测项目地址:
具体过程是:
加载wav(将wav文件拖放到可执行文件中)->输出静默检测结果
备注: 1表示不静音,0表示静音
要注意的位置和参数,请参见代码注释.
使用cmake编译示例代码. 有关详细信息,请参阅CMakeLists.txt.
如果您还有其他相关问题或需求,也可以给我发电子邮件进行讨论.
电子邮件地址是: