speex
speex是一个音频编解码以及处理的开源库
一般的调用流程
编码
编码器的初始化部分
// 创建编码器以及初始化
// 获得相应的编码模式编码器
SpeexMode *mode = NULL;
mode = (SpeexMode *)speex_lib_get_mode(SPEEX_MODEID_NB);
// 初始化编码器
void *state = speex_encoder_init(mode);
// 设置编码质量
int q = 10;
int ret = speex_encoder_ctl(m_enc_statae.state, SPEEX_SET_QUALITY, &q);
// 获得编码的帧大小
ret = speex_mode_query(mode, SPEEX_MODE_FRAME_SIZE, &frame_size);
一般编码的模式可以有三种
/** Number of defined modes in Speex */
#define SPEEX_NB_MODES 3
/** modeID for the defined narrowband mode */
// 一般应用于8000采样率的音频编码
#define SPEEX_MODEID_NB 0
/** modeID for the defined wideband mode */
// 一般应用于16000采样率的音频编码
#define SPEEX_MODEID_WB 1
/** modeID for the defined ultra-wideband mode */
// 一般应用于32000采样率的音频编码
#define SPEEX_MODEID_UWB 2
SpeexMode
speex 编解码的实际容器
- 定义
typedef struct SpeexMode {
/** Pointer to the low-level mode data */
const void *mode;
/** Pointer to the mode query function */
mode_query_func query;
/** The name of the mode (you should not rely on this to identify the mode)*/
const char *modeName;
/**ID of the mode*/
int modeID;
/**Version number of the bitstream (incremented every time we break
bitstream compatibility*/
int bitstream_version;
/** Pointer to encoder initialization function */
encoder_init_func enc_init;
/** Pointer to encoder destruction function */
encoder_destroy_func enc_destroy;
/** Pointer to frame encoding function */
encode_func enc;
/** Pointer to decoder initialization function */
decoder_init_func dec_init;
/** Pointer to decoder destruction function */
decoder_destroy_func dec_destroy;
/** Pointer to frame decoding function */
decode_func dec;
/** ioctl-like requests for encoder */
encoder_ctl_func enc_ctl;
/** ioctl-like requests for decoder */
decoder_ctl_func dec_ctl;
} SpeexMode;
从SpeexMode
的定义来看,其包含了音频编解码所用的基础参数,以及将用到的函数指针。
speex_lib_get_mode
获得相应编解码的从SpeexMode
- 定义
// [speex.h]
#define speex_lib_get_mode(mode) ((mode)==SPEEX_MODEID_NB ? &speex_nb_mode : speex_lib_get_mode (mode))
// [modes_wb.c]
EXPORT const SpeexMode * const speex_mode_list[SPEEX_NB_MODES] = {&speex_nb_mode, &speex_wb_mode, &speex_uwb_mode};
EXPORT const SpeexMode * speex_lib_get_mode (int mode)
{
if (mode < 0 || mode >= SPEEX_NB_MODES) return NULL;
return speex_mode_list[mode];
}
从定义来看,函数speex_lib_get_mode
实际是通过查询返回一个预定义的SpeexMode
。
speex_nb_mode
模式
speex_nb_mode
对应SPEEX_MODEID_NB
在SpeexNBMode
中的submodes
对应的是编码质量的10个等级的。根据设置的编码质量选择不同的submodes
。
//[modes.h]
typedef struct SpeexNBMode {
int frameSize; /**< Size of frames used for encoding */
int subframeSize; /**< Size of sub-frames used for encoding */
int lpcSize; /**< Order of LPC filter */
int pitchStart; /**< Smallest pitch value allowed */
int pitchEnd; /**< Largest pitch value allowed */
spx_word16_t gamma1; /**< Perceptual filter parameter #1 */
spx_word16_t gamma2; /**< Perceptual filter parameter #2 */
spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */
const SpeexSubmode *submodes[NB_SUBMODES]; /**< Sub-mode data for the mode */
int defaultSubmode; /**< Default sub-mode to use when encoding */
int quality_map[11]; /**< Mode corresponding to each quality setting */
} SpeexNBMode;
//[nb_celp.h]
#define NB_ORDER 10
#define NB_FRAME_SIZE 160
#define NB_SUBFRAME_SIZE 40
#define NB_NB_SUBFRAMES 4
#define NB_PITCH_START 17
#define NB_PITCH_END 144
//[modes.c]
/* Default mode for narrowband */
static const SpeexNBMode nb_mode = {
NB_FRAME_SIZE, /*frameSize*/
NB_SUBFRAME_SIZE, /*subframeSize*/
NB_ORDER, /*lpcSize*/
NB_PITCH_START, /*pitchStart*/
NB_PITCH_END, /*pitchEnd*/
QCONST16(0.92,15), /* gamma1 */
QCONST16(0.6,15), /* gamma2 */
QCONST16(.0002,15), /*lpc_floor*/
{NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
&nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
5,
{1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};
/* Default mode for narrowband */
EXPORT const SpeexMode speex_nb_mode = {
&nb_mode, // 基本参数集
nb_mode_query, // 参数及状态查询函数(为外部调用)
"narrowband", // mode name
0, // mode id
4, // Version
nb_encoder_init, // 编码初始化
nb_encoder_destroy, // 编码反初始化
nb_encode, // 编码
nb_decoder_init, // 解码初始化
nb_decoder_destroy, // 解码反初始化
nb_decode, // 解码
nb_encoder_ctl, // 编码属性设置以及获取
nb_decoder_ctl, // 解码属性设置以及获取
};
- [1]
nb_encoder_init
的定义
// // [nb_celp.h]
typedef struct EncState {
const SpeexMode *mode; /**< Mode corresponding to the state */
int first; /**< Is this the first frame? */
spx_word32_t cumul_gain; /**< Product of previously used pitch gains (Q10) */
int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */
int ol_pitch; /**< Open-loop pitch */
int ol_voiced; /**< Open-loop voiced/non-voiced decision */
int pitch[NB_NB_SUBFRAMES];
#ifdef VORBIS_PSYCHO
VorbisPsy *psy;
float *psy_window;
float *curve;
float *old_curve;
#endif
spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */
spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */
spx_word16_t lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/
char *stack; /**< Pseudo-stack allocation for temporary memory */
spx_word16_t winBuf[NB_WINDOW_SIZE-NB_FRAME_SIZE]; /**< Input buffer (original signal) */
spx_word16_t excBuf[NB_EXCBUF]; /**< Excitation buffer */
spx_word16_t *exc; /**< Start of excitation frame */
spx_word16_t swBuf[NB_EXCBUF]; /**< Weighted signal buffer */
spx_word16_t *sw; /**< Start of weighted signal frame */
const spx_word16_t *window; /**< Temporary (Hanning) window */
const spx_word16_t *lagWindow; /**< Window applied to auto-correlation */
spx_lsp_t old_lsp[NB_ORDER]; /**< LSPs for previous frame */
spx_lsp_t old_qlsp[NB_ORDER]; /**< Quantized LSPs for previous frame */
spx_mem_t mem_sp[NB_ORDER]; /**< Filter memory for signal synthesis */
spx_mem_t mem_sw[NB_ORDER]; /**< Filter memory for perceptually-weighted signal */
spx_mem_t mem_sw_whole[NB_ORDER]; /**< Filter memory for perceptually-weighted signal (whole frame)*/
spx_mem_t mem_exc[NB_ORDER]; /**< Filter memory for excitation (whole frame) */
spx_mem_t mem_exc2[NB_ORDER]; /**< Filter memory for excitation (whole frame) */
spx_mem_t mem_hp[2]; /**< High-pass filter memory */
spx_word32_t pi_gain[NB_NB_SUBFRAMES]; /**< Gain of LPC filter at theta=pi (fe/2) */
spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */
#ifndef DISABLE_VBR
VBRState vbr; /**< State of the VBR data */
float vbr_quality; /**< Quality setting for VBR encoding */
float relative_quality; /**< Relative quality that will be needed by VBR */
spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */
spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */
int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */
int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */
int dtx_count; /**< Number of consecutive DTX frames */
spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */
float abr_drift;
float abr_drift2;
float abr_count;
#endif /* #ifndef DISABLE_VBR */
int complexity; /**< Complexity setting (0-10 from least complex to most complex) */
spx_int32_t sampling_rate;
int plc_tuning;
int encode_submode;
const SpeexSubmode * const *submodes; /**< Sub-mode data */
int submodeID; /**< Activated sub-mode */
int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
int isWideband; /**< Is this used as part of the embedded wideband codec */
int highpass_enabled; /**< Is the input filter enabled */
} EncState;
// [os_support.h]
#ifndef OVERRIDE_SPEEX_ALLOC_SCRATCH
static inline void *speex_alloc_scratch (int size)
{
/* Scratch space doesn't need to be cleared */
return calloc(size,1);
}
#endif
// [nb_celp.c]
void *nb_encoder_init(const SpeexMode *m)
{
EncState *st;
const SpeexNBMode *mode;
int i;
mode=(const SpeexNBMode *)m->mode;
// 创建音频编码的状态结构EncState
st = (EncState*)speex_alloc(sizeof(EncState));
if (!st)
return NULL;
#if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
st->stack = NULL; // 内部将使用的堆栈
#else
st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK);
#endif
st->mode=m; // 当前的编码模式
//设置滤波器参数
st->gamma1=mode->gamma1; // nb 默认为QCONST16(0.92,15)
st->gamma2=mode->gamma2; // nb 默认为QCONST16(0.6,15)
st->lpc_floor = mode->lpc_floor; // LCP 线性预测编码中的基频的噪声水平,默认QCONST16(.0002,15)
st->submodes=mode->submodes; // 下级模式(参数集)
st->submodeID=st->submodeSelect=mode->defaultSubmode; // 5,默认使用的submode 默认为nb_submode5
st->bounded_pitch = 1; // 参考帧
st->encode_submode = 1;
#ifdef VORBIS_PSYCHO // VORBIS 编码的相应参数
st->psy = vorbis_psy_init(8000, 256);
st->curve = (float*)speex_alloc(128*sizeof(float));
st->old_curve = (float*)speex_alloc(128*sizeof(float));
st->psy_window = (float*)speex_alloc(256*sizeof(float));
#endif
st->cumul_gain = 1024; // ?增益乘积
st->window= lpc_window;
/* Create the window for autocorrelation (lag-windowing) */
st->lagWindow = lag_window;
st->first = 1;
for (i=0;i<NB_ORDER;i++)
st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), NB_ORDER+1);
st->innov_rms_save = NULL; // ?均方根值
#ifndef DISABLE_VBR //动态比特率 (VBR动态比特率)(vad静音检查)(dtx非连续传输)(abr平均比特率)
vbr_init(&st->vbr);
st->vbr_quality = 8;
st->vbr_enabled = 0;
st->vbr_max = 0;
st->vad_enabled = 0;
st->dtx_enabled = 0;
st->dtx_count=0;
st->abr_enabled = 0;
st->abr_drift = 0;
st->abr_drift2 = 0;
#endif /* #ifndef DISABLE_VBR */
st->plc_tuning = 2;
st->complexity=2;
st->sampling_rate=8000;
st->isWideband = 0;
st->highpass_enabled = 1;
#ifdef ENABLE_VALGRIND
VALGRIND_MAKE_READABLE(st, NB_ENC_STACK);
#endif
return st;
}
- [2]
nb_encoder_destroy
的定义
// [nb_celp.c]
void nb_encoder_destroy(void *state)
{
EncState *st=(EncState *)state;
/* Free all allocated memory */
#if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
// 释放堆栈
speex_free_scratch(st->stack);
#endif
#ifndef DISABLE_VBR
vbr_destroy(&st->vbr);
#endif /* #ifndef DISABLE_VBR */
#ifdef VORBIS_PSYCHO
vorbis_psy_destroy(st->psy);
speex_free (st->curve);
speex_free (st->old_curve);
speex_free (st->psy_window);
#endif
/*Free state memory... should be last*/
speex_free(st);
}
- [3]
nb_encoder_ctl
的定义
// nb_encoder_ctl 主要是为外部提供设置与获取编码器参数的接口
int nb_encoder_ctl(void *state, int request, void *ptr)
{
EncState *st;
st=(EncState*)state;
switch(request)
{
case SPEEX_GET_FRAME_SIZE: // 获得编码帧的大小
(*(spx_int32_t*)ptr) = NB_FRAME_SIZE; // 默认NB_FRAME_SIZE == 160
break;
case SPEEX_SET_LOW_MODE: // 设置 submode
case SPEEX_SET_MODE:
st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_LOW_MODE: // 获得 submode
case SPEEX_GET_MODE:
(*(spx_int32_t*)ptr) = st->submodeID;
break;
#ifndef DISABLE_VBR // 动态码率相关
case SPEEX_SET_VBR: // 设置是否开启
st->vbr_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_VBR: // 获得是否开启
(*(spx_int32_t*)ptr) = st->vbr_enabled;
break;
case SPEEX_SET_VAD: // 设置开启语音边界检测(静音检查) (1 on)(0 off)
st->vad_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_VAD: // 获得
(*(spx_int32_t*)ptr) = st->vad_enabled;
break;
case SPEEX_SET_DTX: // 设置是否开启非连续传输
st->dtx_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_DTX: // 获取
(*(spx_int32_t*)ptr) = st->dtx_enabled;
break;
case SPEEX_SET_ABR: // 设置 平均比特率
st->abr_enabled = (*(spx_int32_t*)ptr); // 0 is off (值单位bps)
st->vbr_enabled = st->abr_enabled!=0; // 动态比特率与平均比特率一致
if (st->vbr_enabled)
{
spx_int32_t i=10;
spx_int32_t rate, target;
float vbr_qual;
target = (*(spx_int32_t*)ptr);
while (i>=0)
{
// 根据abr设置相应的编码质量
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
if (rate <= target)
break;
i--;
}
vbr_qual=i;
if (vbr_qual<0)
vbr_qual=0;
// 设置相应的vbr质量
speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
st->abr_count=0;
st->abr_drift=0;
st->abr_drift2=0;
}
break;
case SPEEX_GET_ABR: // 获得 abr
(*(spx_int32_t*)ptr) = st->abr_enabled;
break;
#endif /* #ifndef DISABLE_VBR */
#if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API)
case SPEEX_SET_VBR_QUALITY: // 设置vbr质量
st->vbr_quality = (*(float*)ptr);
break;
case SPEEX_GET_VBR_QUALITY:
(*(float*)ptr) = st->vbr_quality;
break;
#endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */
case SPEEX_SET_QUALITY: //设置相应的质量
{
int quality = (*(spx_int32_t*)ptr);
if (quality < 0)
quality = 0;
if (quality > 10)
quality = 10;
// 根据质量参数,选择不同的submode
st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality];
}
break;
case SPEEX_SET_COMPLEXITY: // 设置复杂度 [0-10] , 10 is the must COMPLEXITY
st->complexity = (*(spx_int32_t*)ptr);
if (st->complexity<0)
st->complexity=0;
break;
case SPEEX_GET_COMPLEXITY:
(*(spx_int32_t*)ptr) = st->complexity;
break;
case SPEEX_SET_BITRATE: // 设置比特率
{
spx_int32_t i=10;
spx_int32_t rate, target;
target = (*(spx_int32_t*)ptr);
while (i>=0)
{
// 根据实际设置的比特率,设置相应的编码质量
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
if (rate <= target)
break;
i--;
}
}
break;
case SPEEX_GET_BITRATE: // 获得编码的比特率
if (st->submodes[st->submodeID])
// 根据使用的submode 计算相应的比特率
(*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/NB_FRAME_SIZE;
else
(*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/NB_FRAME_SIZE;
break;
case SPEEX_SET_SAMPLING_RATE: // 设置采样率
st->sampling_rate = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_SAMPLING_RATE:
(*(spx_int32_t*)ptr)=st->sampling_rate;
break;
case SPEEX_RESET_STATE: // 重置编码state
{
int i;
st->bounded_pitch = 1;
st->first = 1;
for (i=0;i<NB_ORDER;i++)
st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), NB_ORDER+1);
for (i=0;i<NB_ORDER;i++)
st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
for (i=0;i<NB_FRAME_SIZE+NB_PITCH_END+1;i++)
st->excBuf[i]=st->swBuf[i]=0;
for (i=0;i<NB_WINDOW_SIZE-NB_FRAME_SIZE;i++)
st->winBuf[i]=0;
}
break;
case SPEEX_SET_SUBMODE_ENCODING: // 设置submode
st->encode_submode = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_SUBMODE_ENCODING:
(*(spx_int32_t*)ptr) = st->encode_submode;
break;
case SPEEX_GET_LOOKAHEAD: // 获得
(*(spx_int32_t*)ptr)=(NB_WINDOW_SIZE-NB_FRAME_SIZE);
break;
case SPEEX_SET_PLC_TUNING:
st->plc_tuning = (*(spx_int32_t*)ptr);
if (st->plc_tuning>100)
st->plc_tuning=100;
break;
case SPEEX_GET_PLC_TUNING:
(*(spx_int32_t*)ptr)=(st->plc_tuning);
break;
#ifndef DISABLE_VBR
case SPEEX_SET_VBR_MAX_BITRATE: // 设置VBR模式下最大的码率
st->vbr_max = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_VBR_MAX_BITRATE:
(*(spx_int32_t*)ptr) = st->vbr_max;
break;
#endif /* #ifndef DISABLE_VBR */
case SPEEX_SET_HIGHPASS: // 输入信号的高通滤波器
st->highpass_enabled = (*(spx_int32_t*)ptr);
break;
case SPEEX_GET_HIGHPASS:
(*(spx_int32_t*)ptr) = st->highpass_enabled;
break;
// 以下为内部参数(仅提供查询参数)
/* This is all internal stuff past this point */
case SPEEX_GET_PI_GAIN:
{
int i;
spx_word32_t *g = (spx_word32_t*)ptr;
for (i=0;i<NB_NB_SUBFRAMES;i++)
g[i]=st->pi_gain[i];
}
break;
case SPEEX_GET_EXC:
{
int i;
for (i=0;i<NB_NB_SUBFRAMES;i++)
((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*NB_SUBFRAME_SIZE, NB_SUBFRAME_SIZE);
}
break;
#ifndef DISABLE_VBR
case SPEEX_GET_RELATIVE_QUALITY:
(*(float*)ptr)=st->relative_quality;
break;
#endif /* #ifndef DISABLE_VBR */
case SPEEX_SET_INNOVATION_SAVE:
st->innov_rms_save = (spx_word16_t*)ptr;
break;
case SPEEX_SET_WIDEBAND:
st->isWideband = *((spx_int32_t*)ptr);
break;
case SPEEX_GET_STACK:
*((char**)ptr) = st->stack;
break;
default:
speex_warning_int("Unknown nb_ctl request: ", request);
return -1;
}
return 0;
}
- [3]
nb_encode
的定义
// [nb_celp.c]
// vin : 待编码原始数据
// 帧长默认为160 ,分为4个sub,40*4
int nb_encode(void *state, void *vin, SpeexBits *bits)
{
EncState *st;
int i, sub, roots;
int ol_pitch;
spx_word16_t ol_pitch_coef;
spx_word32_t ol_gain;
VARDECL(spx_word16_t *target);
VARDECL(spx_sig_t *innov);
VARDECL(spx_word32_t *exc32);
VARDECL(spx_mem_t *mem);
VARDECL(spx_coef_t *bw_lpc1);
VARDECL(spx_coef_t *bw_lpc2);
VARDECL(spx_coef_t *lpc);
VARDECL(spx_lsp_t *lsp);
VARDECL(spx_lsp_t *qlsp);
VARDECL(spx_lsp_t *interp_lsp);
VARDECL(spx_lsp_t *interp_qlsp);
VARDECL(spx_coef_t *interp_lpc);
VARDECL(spx_coef_t *interp_qlpc);
char *stack;
VARDECL(spx_word16_t *syn_resp);
spx_word32_t ener=0;
spx_word16_t fine_gain;
// 输入编码帧
spx_word16_t *in = (spx_word16_t*)vin;
st=(EncState *)state;
stack=st->stack;
// 分配内存空间
// NB_ORDER = 10
ALLOC(lpc, NB_ORDER, spx_coef_t);
ALLOC(bw_lpc1, NB_ORDER, spx_coef_t);
ALLOC(bw_lpc2, NB_ORDER, spx_coef_t);
ALLOC(lsp, NB_ORDER, spx_lsp_t);
ALLOC(qlsp, NB_ORDER, spx_lsp_t);
ALLOC(interp_lsp, NB_ORDER, spx_lsp_t);
ALLOC(interp_qlsp, NB_ORDER, spx_lsp_t);
ALLOC(interp_lpc, NB_ORDER, spx_coef_t);
ALLOC(interp_qlpc, NB_ORDER, spx_coef_t);
// exc 当前帧的激励
// 在init中SPEEX_MEMSET(st->excBuf, 0, NB_FRAME_SIZE + NB_PITCH_END);
// NB_PITCH_END = 144
st->exc = st->excBuf + NB_PITCH_END + 2;
// swBuf 历史感知加权的解码语音信号,计算零输入响应
st->sw = st->swBuf + NB_PITCH_END + 2;
/* Move signals 1 frame towards the past */
// SPEEX_MOVE(dst, src, n)
// NB_FRAME_SIZE 160 NB_PITCH_END 144
// 将前一帧信号的激励往前挪,作为当前帧(第一子帧)的自适应码本,以及第二,三,四子帧的自适应码本的一部分
SPEEX_MOVE(st->excBuf, st->excBuf+NB_FRAME_SIZE, NB_PITCH_END+2);
SPEEX_MOVE(st->swBuf, st->swBuf+NB_FRAME_SIZE, NB_PITCH_END+2);
// 高通滤波
// 滤掉低频噪声
if (st->highpass_enabled)
highpass(in, in, NB_FRAME_SIZE, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp);
{
// 音频的lpc计算
// 根据 Levinson-Durbin 算法 计算出相应的lpc系数
// 再将lpc转换为lsp系数
VARDECL(spx_word16_t *w_sig);
// 自相关系数
VARDECL(spx_word16_t *autocorr);
// NB_WINDOW_SIZE (NB_FRAME_SIZE+NB_SUBFRAME_SIZE) (160+40)
ALLOC(w_sig, NB_WINDOW_SIZE, spx_word16_t);
// NB_ORDER 10
ALLOC(autocorr, NB_ORDER+1, spx_word16_t);
/* Window for analysis */
for (i=0;i<NB_WINDOW_SIZE-NB_FRAME_SIZE;i++)
w_sig[i] = MULT16_16_Q15(st->winBuf[i],st->window[i]);
for (;i<NB_WINDOW_SIZE;i++)
// 汉宁窗对音频数据进行截取(窗函数是为了在不影响信号频率特性的情况下,对信号进行截取)
w_sig[i] = MULT16_16_Q15(in[i-NB_WINDOW_SIZE+NB_FRAME_SIZE],st->window[i]);
/* Compute auto-correlation */
// 计算自相关
_spx_autocorr(w_sig, autocorr, NB_ORDER+1, NB_WINDOW_SIZE);
autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
/* Lag windowing: equivalent to filtering in the power-spectrum domain */
// LAG窗滤波
for (i=0;i<NB_ORDER+1;i++)
autocorr[i] = MULT16_16_Q15(autocorr[i],st->lagWindow[i]);
autocorr[0] = ADD16(autocorr[0],1);
/* Levinson-Durbin */
// Levinson-Durbin 算法
// lpc 转 lsp
_spx_lpc(lpc, autocorr, NB_ORDER);
/* LPC to LSPs (x-domain) transform */
roots=lpc_to_lsp (lpc, NB_ORDER, lsp, 10, LSP_DELTA1, stack);
/* Check if we found all the roots */
if (roots!=NB_ORDER)
{
/*If we can't find all LSP's, do some damage control and use previous filter*/
for (i=0;i<NB_ORDER;i++)
{
lsp[i]=st->old_lsp[i];
}
}
}
// 整帧分析(基音周期估计和励磁增益)
/* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
{
int diff = NB_WINDOW_SIZE-NB_FRAME_SIZE;
// lsp 插值
if (st->first)
for (i=0;i<NB_ORDER;i++)
interp_lsp[i] = lsp[i];
else
lsp_interpolate(st->old_lsp, lsp, interp_lsp, NB_ORDER, NB_NB_SUBFRAMES, NB_NB_SUBFRAMES<<1, LSP_MARGIN);
// 插值后,lsp转换为lpc系数 ,得到interp_lpc
/* Compute interpolated LPCs (unquantized) for whole frame*/
lsp_to_lpc(interp_lsp, interp_lpc, NB_ORDER,stack);
/*Open-loop pitch*/
if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1
#ifndef DISABLE_VBR
|| st->vbr_enabled || st->vad_enabled
#endif
)
{
int nol_pitch[6];
spx_word16_t nol_pitch_coef[6];
bw_lpc(0.9, interp_lpc, bw_lpc1, NB_ORDER);
bw_lpc(0.55, interp_lpc, bw_lpc2, NB_ORDER);
SPEEX_COPY(st->sw, st->winBuf, diff);
SPEEX_COPY(st->sw+diff, in, NB_FRAME_SIZE-diff);
filter10(st->sw, bw_lpc1, bw_lpc2, st->sw, NB_FRAME_SIZE, st->mem_sw_whole, stack);
open_loop_nbest_pitch(st->sw, NB_PITCH_START, NB_PITCH_END, NB_FRAME_SIZE,
nol_pitch, nol_pitch_coef, 6, stack);
ol_pitch=nol_pitch[0];
ol_pitch_coef = nol_pitch_coef[0];
/*Try to remove pitch multiples*/
for (i=1;i<6;i++)
{
#ifdef FIXED_POINT
if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
#else
if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
#endif
(ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
{
/*ol_pitch_coef=nol_pitch_coef[i];*/
ol_pitch = nol_pitch[i];
}
}
/*if (ol_pitch>50)
ol_pitch/=2;*/
/*ol_pitch_coef = sqrt(ol_pitch_coef);*/
} else {
ol_pitch=0;
ol_pitch_coef=0;
}
/*Compute "real" excitation*/
/*SPEEX_COPY(st->exc, st->winBuf, diff);
SPEEX_COPY(st->exc+diff, in, NB_FRAME_SIZE-diff);*/
// 将原始音频数据,通过interp_lpc表征,并滤波
// 计算精确的激励
// 逆向滤波,得到残差信号
fir_mem16(st->winBuf, interp_lpc, st->exc, diff, NB_ORDER, st->mem_exc, stack);
fir_mem16(in, interp_lpc, st->exc+diff, NB_FRAME_SIZE-diff, NB_ORDER, st->mem_exc, stack);
/* Compute open-loop excitation gain */
{
// 计算激励的能量
spx_word16_t g = compute_rms16(st->exc, NB_FRAME_SIZE);
if (st->submodeID!=1 && ol_pitch>0)
ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
else
ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
}
}
#ifdef VORBIS_PSYCHO
SPEEX_MOVE(st->psy_window, st->psy_window+NB_FRAME_SIZE, 256-NB_FRAME_SIZE);
SPEEX_COPY(&st->psy_window[256-NB_FRAME_SIZE], in, NB_FRAME_SIZE);
compute_curve(st->psy, st->psy_window, st->curve);
/*print_vec(st->curve, 128, "curve");*/
if (st->first)
SPEEX_COPY(st->old_curve, st->curve, 128);
#endif
/*VBR stuff*/
#ifndef DISABLE_VBR
if (st->vbr_enabled||st->vad_enabled)
{
float lsp_dist=0;
for (i=0;i<NB_ORDER;i++)
lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
lsp_dist /= LSP_SCALING*LSP_SCALING;
if (st->abr_enabled)
{
float qual_change=0;
if (st->abr_drift2 * st->abr_drift > 0)
{
/* Only adapt if long-term and short-term drift are the same sign */
qual_change = -.00001*st->abr_drift/(1+st->abr_count);
if (qual_change>.05)
qual_change=.05;
if (qual_change<-.05)
qual_change=-.05;
}
st->vbr_quality += qual_change;
if (st->vbr_quality>10)
st->vbr_quality=10;
if (st->vbr_quality<0)
st->vbr_quality=0;
}
st->relative_quality = vbr_analysis(&st->vbr, in, NB_FRAME_SIZE, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
/*if (delta_qual<0)*/
/* delta_qual*=.1*(3+st->vbr_quality);*/
if (st->vbr_enabled)
{
spx_int32_t mode;
int choice=0;
float min_diff=100;
mode = 8;
while (mode)
{
int v1;
float thresh;
v1=(int)floor(st->vbr_quality);
if (v1==10)
thresh = vbr_nb_thresh[mode][v1];
else
thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
if (st->relative_quality > thresh &&
st->relative_quality-thresh<min_diff)
{
choice = mode;
min_diff = st->relative_quality-thresh;
}
mode--;
}
mode=choice;
if (mode==0)
{
if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
{
mode=1;
st->dtx_count=1;
} else {
mode=0;
st->dtx_count++;
}
} else {
st->dtx_count=0;
}
speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
if (st->vbr_max>0)
{
spx_int32_t rate;
speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
if (rate > st->vbr_max)
{
rate = st->vbr_max;
speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
}
}
if (st->abr_enabled)
{
spx_int32_t bitrate;
speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
st->abr_drift+=(bitrate-st->abr_enabled);
st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
st->abr_count += 1.0;
}
} else {
/*VAD only case*/
int mode;
if (st->relative_quality<2)
{
if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
{
st->dtx_count=1;
mode=1;
} else {
mode=0;
st->dtx_count++;
}
} else {
st->dtx_count = 0;
mode=st->submodeSelect;
}
/*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
st->submodeID=mode;
}
} else {
st->relative_quality = -1;
}
#endif /* #ifndef DISABLE_VBR */
if (st->encode_submode)
{
/* First, transmit a zero for narrowband */
speex_bits_pack(bits, 0, 1);
/* Transmit the sub-mode we use for this frame */
speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
}
/* If null mode (no transmission), just set a couple things to zero*/
if (st->submodes[st->submodeID] == NULL)
{
for (i=0;i<NB_FRAME_SIZE;i++)
st->exc[i]=st->sw[i]=VERY_SMALL;
for (i=0;i<NB_ORDER;i++)
st->mem_sw[i]=0;
st->first=1;
st->bounded_pitch = 1;
SPEEX_COPY(st->winBuf, in+2*NB_FRAME_SIZE-NB_WINDOW_SIZE, NB_WINDOW_SIZE-NB_FRAME_SIZE);
/* Clear memory (no need to really compute it) */
for (i=0;i<NB_ORDER;i++)
st->mem_sp[i] = 0;
return 0;
}
/* LSP Quantization */
if (st->first)
{
for (i=0;i<NB_ORDER;i++)
st->old_lsp[i] = lsp[i];
}
// 量化 lsp
/*Quantize LSPs*/
#if 1 /*0 for unquantized*/
SUBMODE(lsp_quant)(lsp, qlsp, NB_ORDER, bits);
#else
for (i=0;i<NB_ORDER;i++)
qlsp[i]=lsp[i];
#endif
// 码率适应
/*If we use low bit-rate pitch mode, transmit open-loop pitch*/
if (SUBMODE(lbr_pitch)!=-1)
{
speex_bits_pack(bits, ol_pitch-NB_PITCH_START, 7);
}
if (SUBMODE(forced_pitch_gain))
{
int quant;
/* This just damps the pitch a bit, because it tends to be too aggressive when forced */
ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);
#ifdef FIXED_POINT
quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);
#else
quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
#endif
if (quant>15)
quant=15;
if (quant<0)
quant=0;
speex_bits_pack(bits, quant, 4);
ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
}
/*Quantize and transmit open-loop excitation gain*/
#ifdef FIXED_POINT
{
int qe = scal_quant32(ol_gain, ol_gain_table, 32);
/*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
speex_bits_pack(bits, qe, 5);
}
#else
{
int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
if (qe<0)
qe=0;
if (qe>31)
qe=31;
ol_gain = exp(qe/3.5)*SIG_SCALING;
speex_bits_pack(bits, qe, 5);
}
#endif
/* Special case for first frame */
if (st->first)
{
for (i=0;i<NB_ORDER;i++)
st->old_qlsp[i] = qlsp[i];
}
/* Target signal */
ALLOC(target, NB_SUBFRAME_SIZE, spx_word16_t);
ALLOC(innov, NB_SUBFRAME_SIZE, spx_sig_t);
ALLOC(exc32, NB_SUBFRAME_SIZE, spx_word32_t);
ALLOC(syn_resp, NB_SUBFRAME_SIZE, spx_word16_t);
ALLOC(mem, NB_ORDER, spx_mem_t);
/* Loop on sub-frames */
for (sub=0;sub<NB_NB_SUBFRAMES;sub++)
{
int offset;
spx_word16_t *sw;
spx_word16_t *exc, *inBuf;
int pitch;
int response_bound = NB_SUBFRAME_SIZE;
/* Offset relative to start of frame */
offset = NB_SUBFRAME_SIZE*sub;
/* Excitation */
exc=st->exc+offset;
/* Weighted signal */
sw=st->sw+offset;
/* LSP interpolation (quantized and unquantized) */
lsp_interpolate(st->old_lsp, lsp, interp_lsp, NB_ORDER, sub, NB_NB_SUBFRAMES, LSP_MARGIN);
lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, NB_ORDER, sub, NB_NB_SUBFRAMES, LSP_MARGIN);
/* Compute interpolated LPCs (quantized and unquantized) */
lsp_to_lpc(interp_lsp, interp_lpc, NB_ORDER,stack);
lsp_to_lpc(interp_qlsp, interp_qlpc, NB_ORDER, stack);
/* Compute analysis filter gain at w=pi (for use in SB-CELP) */
{
spx_word32_t pi_g=LPC_SCALING;
for (i=0;i<NB_ORDER;i+=2)
{
/*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
}
st->pi_gain[sub] = pi_g;
}
#ifdef VORBIS_PSYCHO
{
float curr_curve[128];
float fact = ((float)sub+1.0f)/NB_NB_SUBFRAMES;
for (i=0;i<128;i++)
curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
}
#else
/* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
bw_lpc(st->gamma1, interp_lpc, bw_lpc1, NB_ORDER);
bw_lpc(st->gamma2, interp_lpc, bw_lpc2, NB_ORDER);
/*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
#endif
/*FIXME: This will break if we change the window size */
speex_assert(NB_WINDOW_SIZE-NB_FRAME_SIZE == NB_SUBFRAME_SIZE);
if (sub==0)
inBuf = st->winBuf;
else
inBuf = &in[((sub-1)*NB_SUBFRAME_SIZE)];
for (i=0;i<NB_SUBFRAME_SIZE;i++)
sw[i] = inBuf[i];
if (st->complexity==0)
response_bound >>= 1;
compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, NB_ORDER, stack);
for (i=response_bound;i<NB_SUBFRAME_SIZE;i++)
syn_resp[i]=VERY_SMALL;
/* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
for (i=0;i<NB_ORDER;i++)
mem[i]=SHL32(st->mem_sp[i],1);
for (i=0;i<NB_SUBFRAME_SIZE;i++)
exc[i] = VERY_SMALL;
#ifdef SHORTCUTS2
iir_mem16(exc, interp_qlpc, exc, response_bound, NB_ORDER, mem, stack);
for (i=0;i<NB_ORDER;i++)
mem[i]=SHL32(st->mem_sw[i],1);
filter10(exc, st->bw_lpc1, st->bw_lpc2, exc, response_bound, mem, stack);
SPEEX_MEMSET(&exc[response_bound], 0, NB_SUBFRAME_SIZE-response_bound);
#else
iir_mem16(exc, interp_qlpc, exc, NB_SUBFRAME_SIZE, NB_ORDER, mem, stack);
for (i=0;i<NB_ORDER;i++)
mem[i]=SHL32(st->mem_sw[i],1);
filter10(exc, bw_lpc1, bw_lpc2, exc, NB_SUBFRAME_SIZE, mem, stack);
#endif
/* Compute weighted signal */
for (i=0;i<NB_ORDER;i++)
mem[i]=st->mem_sw[i];
filter10(sw, bw_lpc1, bw_lpc2, sw, NB_SUBFRAME_SIZE, mem, stack);
if (st->complexity==0)
for (i=0;i<NB_ORDER;i++)
st->mem_sw[i]=mem[i];
/* Compute target signal (saturation prevents overflows on clipped input speech) */
for (i=0;i<NB_SUBFRAME_SIZE;i++)
target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(exc[i],1)),32767));
for (i=0;i<NB_SUBFRAME_SIZE;i++)
exc[i] = inBuf[i];
fir_mem16(exc, interp_qlpc, exc, NB_SUBFRAME_SIZE, NB_ORDER, st->mem_exc2, stack);
/* If we have a long-term predictor (otherwise, something's wrong) */
speex_assert (SUBMODE(ltp_quant));
{
int pit_min, pit_max;
/* Long-term prediction */
if (SUBMODE(lbr_pitch) != -1)
{
/* Low bit-rate pitch handling */
int margin;
margin = SUBMODE(lbr_pitch);
if (margin)
{
if (ol_pitch < NB_PITCH_START+margin-1)
ol_pitch=NB_PITCH_START+margin-1;
if (ol_pitch > NB_PITCH_END-margin)
ol_pitch=NB_PITCH_END-margin;
pit_min = ol_pitch-margin+1;
pit_max = ol_pitch+margin;
} else {
pit_min=pit_max=ol_pitch;
}
} else {
pit_min = NB_PITCH_START;
pit_max = NB_PITCH_END;
}
/* Force pitch to use only the current frame if needed */
if (st->bounded_pitch && pit_max>offset)
pit_max=offset;
/* Perform pitch search */
pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
NB_ORDER, NB_SUBFRAME_SIZE, bits, stack,
exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);
st->pitch[sub]=pitch;
}
/* Quantization of innovation */
SPEEX_MEMSET(innov, 0, NB_SUBFRAME_SIZE);
/* FIXME: Make sure this is safe from overflows (so far so good) */
for (i=0;i<NB_SUBFRAME_SIZE;i++)
exc[i] = EXTRACT16(SUB32(EXTEND32(exc[i]), PSHR32(exc32[i],SIG_SHIFT-1)));
ener = SHL32(EXTEND32(compute_rms16(exc, NB_SUBFRAME_SIZE)),SIG_SHIFT);
/*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
#ifdef FIXED_POINT
{
spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
if (f<=32767)
fine_gain = f;
else
fine_gain = 32767;
}
#else
fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
#endif
/* Calculate gain correction for the sub-frame (if any) */
if (SUBMODE(have_subframe_gain))
{
int qe;
if (SUBMODE(have_subframe_gain)==3)
{
qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
speex_bits_pack(bits, qe, 3);
ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
} else {
qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
speex_bits_pack(bits, qe, 1);
ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
}
} else {
ener=ol_gain;
}
/*printf ("%f %f\n", ener, ol_gain);*/
/* Normalize innovation */
signal_div(target, target, ener, NB_SUBFRAME_SIZE);
/* Quantize innovation */
speex_assert (SUBMODE(innovation_quant));
{
/* Codebook search */
SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
SUBMODE(innovation_params), NB_ORDER, NB_SUBFRAME_SIZE,
innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
/* De-normalize innovation and update excitation */
signal_mul(innov, innov, ener, NB_SUBFRAME_SIZE);
/* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
if (SUBMODE(double_codebook)) {
char *tmp_stack=stack;
VARDECL(spx_sig_t *innov2);
ALLOC(innov2, NB_SUBFRAME_SIZE, spx_sig_t);
SPEEX_MEMSET(innov2, 0, NB_SUBFRAME_SIZE);
for (i=0;i<NB_SUBFRAME_SIZE;i++)
target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]);
SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
SUBMODE(innovation_params), NB_ORDER, NB_SUBFRAME_SIZE,
innov2, syn_resp, bits, stack, st->complexity, 0);
signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), NB_SUBFRAME_SIZE);
for (i=0;i<NB_SUBFRAME_SIZE;i++)
innov[i] = ADD32(innov[i],innov2[i]);
stack = tmp_stack;
}
for (i=0;i<NB_SUBFRAME_SIZE;i++)
exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
if (st->innov_rms_save)
st->innov_rms_save[sub] = compute_rms(innov, NB_SUBFRAME_SIZE);
}
/* Final signal synthesis from excitation */
iir_mem16(exc, interp_qlpc, sw, NB_SUBFRAME_SIZE, NB_ORDER, st->mem_sp, stack);
/* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
if (st->complexity!=0)
filter10(sw, bw_lpc1, bw_lpc2, sw, NB_SUBFRAME_SIZE, st->mem_sw, stack);
}
/* Store the LSPs for interpolation in the next frame */
if (st->submodeID>=1)
{
for (i=0;i<NB_ORDER;i++)
st->old_lsp[i] = lsp[i];
for (i=0;i<NB_ORDER;i++)
st->old_qlsp[i] = qlsp[i];
}
#ifdef VORBIS_PSYCHO
if (st->submodeID>=1)
SPEEX_COPY(st->old_curve, st->curve, 128);
#endif
if (st->submodeID==1)
{
#ifndef DISABLE_VBR
if (st->dtx_count)
speex_bits_pack(bits, 15, 4);
else
#endif
speex_bits_pack(bits, 0, 4);
}
/* The next frame will not be the first (Duh!) */
st->first = 0;
SPEEX_COPY(st->winBuf, in+2*NB_FRAME_SIZE-NB_WINDOW_SIZE, NB_WINDOW_SIZE-NB_FRAME_SIZE);
if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
st->bounded_pitch = 1;
else
st->bounded_pitch = 0;
return 1;
}
#endif /* DISABLE_ENCODER */
#ifndef DISABLE_DECODER
void *nb_decoder_init(const SpeexMode *m)
{
DecState *st;
const SpeexNBMode *mode;
int i;
mode=(const SpeexNBMode*)m->mode;
st = (DecState *)speex_alloc(sizeof(DecState));
if (!st)
return NULL;
#if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
st->stack = NULL;
#else
st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK);
#endif
st->mode=m;
st->encode_submode = 1;
st->first=1;
/* Codec parameters, should eventually have several "modes"*/
st->submodes=mode->submodes;
st->submodeID=mode->defaultSubmode;
st->lpc_enh_enabled=1;
SPEEX_MEMSET(st->excBuf, 0, NB_FRAME_SIZE + NB_PITCH_END);
st->last_pitch = 40;
st->count_lost=0;
st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
st->pitch_gain_buf_idx = 0;
st->seed = 1000;
st->sampling_rate=8000;
st->last_ol_gain = 0;
st->user_callback.func = &speex_default_user_handler;
st->user_callback.data = NULL;
for (i=0;i<16;i++)
st->speex_callbacks[i].func = NULL;
st->voc_m1=st->voc_m2=st->voc_mean=0;
st->voc_offset=0;
st->dtx_enabled=0;
st->isWideband = 0;
st->highpass_enabled = 1;
#ifdef ENABLE_VALGRIND
VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
#endif
return st;
}