Speex 编码分析

speex

speex是一个音频编解码以及处理的开源库

一般的调用流程

编码

编码器的初始化部分

// 创建编码器以及初始化
// 获得相应的编码模式编码器
SpeexMode *mode = NULL;
mode = (SpeexMode *)speex_lib_get_mode(SPEEX_MODEID_NB);
// 初始化编码器
void *state = speex_encoder_init(mode);
// 设置编码质量
int q = 10;
int ret = speex_encoder_ctl(m_enc_statae.state, SPEEX_SET_QUALITY, &q);
// 获得编码的帧大小
ret = speex_mode_query(mode, SPEEX_MODE_FRAME_SIZE, &frame_size);

一般编码的模式可以有三种

/** Number of defined modes in Speex */
#define SPEEX_NB_MODES 3

/** modeID for the defined narrowband mode */
// 一般应用于8000采样率的音频编码
#define SPEEX_MODEID_NB 0

/** modeID for the defined wideband mode */
// 一般应用于16000采样率的音频编码
#define SPEEX_MODEID_WB 1

/** modeID for the defined ultra-wideband mode */
// 一般应用于32000采样率的音频编码
#define SPEEX_MODEID_UWB 2

SpeexMode
speex 编解码的实际容器

  • 定义
typedef struct SpeexMode {
   /** Pointer to the low-level mode data */
   const void *mode;

   /** Pointer to the mode query function */
   mode_query_func query;
   
   /** The name of the mode (you should not rely on this to identify the mode)*/
   const char *modeName;

   /**ID of the mode*/
   int modeID;

   /**Version number of the bitstream (incremented every time we break
    bitstream compatibility*/
   int bitstream_version;

   /** Pointer to encoder initialization function */
   encoder_init_func enc_init;

   /** Pointer to encoder destruction function */
   encoder_destroy_func enc_destroy;

   /** Pointer to frame encoding function */
   encode_func enc;

   /** Pointer to decoder initialization function */
   decoder_init_func dec_init;

   /** Pointer to decoder destruction function */
   decoder_destroy_func dec_destroy;

   /** Pointer to frame decoding function */
   decode_func dec;

   /** ioctl-like requests for encoder */
   encoder_ctl_func enc_ctl;

   /** ioctl-like requests for decoder */
   decoder_ctl_func dec_ctl;

} SpeexMode;

SpeexMode 的定义来看,其包含了音频编解码所用的基础参数,以及将用到的函数指针。

speex_lib_get_mode
获得相应编解码的从SpeexMode

  • 定义
// [speex.h]
#define speex_lib_get_mode(mode) ((mode)==SPEEX_MODEID_NB ? &speex_nb_mode : speex_lib_get_mode (mode))
// [modes_wb.c]
EXPORT const SpeexMode * const speex_mode_list[SPEEX_NB_MODES] = {&speex_nb_mode, &speex_wb_mode, &speex_uwb_mode};
EXPORT const SpeexMode * speex_lib_get_mode (int mode)
{
   if (mode < 0 || mode >= SPEEX_NB_MODES) return NULL;
   return speex_mode_list[mode];
}

从定义来看,函数speex_lib_get_mode实际是通过查询返回一个预定义的SpeexMode

speex_nb_mode模式

speex_nb_mode对应SPEEX_MODEID_NB
SpeexNBMode中的submodes对应的是编码质量的10个等级的。根据设置的编码质量选择不同的submodes

//[modes.h]
typedef struct SpeexNBMode {
   int     frameSize;      /**< Size of frames used for encoding */
   int     subframeSize;   /**< Size of sub-frames used for encoding */
   int     lpcSize;        /**< Order of LPC filter */
   int     pitchStart;     /**< Smallest pitch value allowed */
   int     pitchEnd;       /**< Largest pitch value allowed */

   spx_word16_t gamma1;    /**< Perceptual filter parameter #1 */
   spx_word16_t gamma2;    /**< Perceptual filter parameter #2 */
   spx_word16_t   lpc_floor;      /**< Noise floor for LPC analysis */

   const SpeexSubmode *submodes[NB_SUBMODES]; /**< Sub-mode data for the mode */
   int     defaultSubmode; /**< Default sub-mode to use when encoding */
   int     quality_map[11]; /**< Mode corresponding to each quality setting */
} SpeexNBMode;

//[nb_celp.h]
#define NB_ORDER 10
#define NB_FRAME_SIZE 160
#define NB_SUBFRAME_SIZE 40
#define NB_NB_SUBFRAMES 4
#define NB_PITCH_START 17
#define NB_PITCH_END 144

//[modes.c]
/* Default mode for narrowband */
static const SpeexNBMode nb_mode = {
   NB_FRAME_SIZE,    /*frameSize*/
   NB_SUBFRAME_SIZE, /*subframeSize*/
   NB_ORDER,         /*lpcSize*/
   NB_PITCH_START,               /*pitchStart*/
   NB_PITCH_END,              /*pitchEnd*/
   QCONST16(0.92,15),  /* gamma1 */
   QCONST16(0.6,15),   /* gamma2 */
   QCONST16(.0002,15), /*lpc_floor*/
   {NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
   &nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
   5,
   {1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};

/* Default mode for narrowband */
EXPORT const SpeexMode speex_nb_mode = {
   &nb_mode,                // 基本参数集
   nb_mode_query,           // 参数及状态查询函数(为外部调用)
   "narrowband",            // mode name
   0,                       // mode id
   4,                       // Version
   nb_encoder_init,         // 编码初始化
   nb_encoder_destroy,      // 编码反初始化
   nb_encode,               // 编码
   nb_decoder_init,         // 解码初始化
   nb_decoder_destroy,      // 解码反初始化
   nb_decode,               // 解码
   nb_encoder_ctl,          // 编码属性设置以及获取
   nb_decoder_ctl,          // 解码属性设置以及获取
};
  • [1] nb_encoder_init 的定义
// // [nb_celp.h]
typedef struct EncState {
   const SpeexMode *mode;        /**< Mode corresponding to the state */
   int    first;                 /**< Is this the first frame? */

   spx_word32_t cumul_gain;      /**< Product of previously used pitch gains (Q10) */
   int    bounded_pitch;         /**< Next frame should not rely on previous frames for pitch */
   int    ol_pitch;              /**< Open-loop pitch */
   int    ol_voiced;             /**< Open-loop voiced/non-voiced decision */
   int   pitch[NB_NB_SUBFRAMES];

#ifdef VORBIS_PSYCHO
   VorbisPsy *psy;
   float *psy_window;
   float *curve;
   float *old_curve;
#endif

   spx_word16_t  gamma1;         /**< Perceptual filter: A(z/gamma1) */
   spx_word16_t  gamma2;         /**< Perceptual filter: A(z/gamma2) */
   spx_word16_t  lpc_floor;      /**< Noise floor multiplier for A[0] in LPC analysis*/
   char  *stack;                 /**< Pseudo-stack allocation for temporary memory */
   spx_word16_t winBuf[NB_WINDOW_SIZE-NB_FRAME_SIZE];         /**< Input buffer (original signal) */
   spx_word16_t excBuf[NB_EXCBUF];         /**< Excitation buffer */
   spx_word16_t *exc;            /**< Start of excitation frame */
   spx_word16_t swBuf[NB_EXCBUF];          /**< Weighted signal buffer */
   spx_word16_t *sw;             /**< Start of weighted signal frame */
   const spx_word16_t *window;   /**< Temporary (Hanning) window */
   const spx_word16_t *lagWindow;      /**< Window applied to auto-correlation */
   spx_lsp_t old_lsp[NB_ORDER];           /**< LSPs for previous frame */
   spx_lsp_t old_qlsp[NB_ORDER];          /**< Quantized LSPs for previous frame */
   spx_mem_t mem_sp[NB_ORDER];            /**< Filter memory for signal synthesis */
   spx_mem_t mem_sw[NB_ORDER];            /**< Filter memory for perceptually-weighted signal */
   spx_mem_t mem_sw_whole[NB_ORDER];      /**< Filter memory for perceptually-weighted signal (whole frame)*/
   spx_mem_t mem_exc[NB_ORDER];           /**< Filter memory for excitation (whole frame) */
   spx_mem_t mem_exc2[NB_ORDER];          /**< Filter memory for excitation (whole frame) */
   spx_mem_t mem_hp[2];          /**< High-pass filter memory */
   spx_word32_t pi_gain[NB_NB_SUBFRAMES];        /**< Gain of LPC filter at theta=pi (fe/2) */
   spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */

#ifndef DISABLE_VBR
   VBRState vbr;                /**< State of the VBR data */
   float  vbr_quality;           /**< Quality setting for VBR encoding */
   float  relative_quality;      /**< Relative quality that will be needed by VBR */
   spx_int32_t vbr_enabled;      /**< 1 for enabling VBR, 0 otherwise */
   spx_int32_t vbr_max;          /**< Max bit-rate allowed in VBR mode */
   int    vad_enabled;           /**< 1 for enabling VAD, 0 otherwise */
   int    dtx_enabled;           /**< 1 for enabling DTX, 0 otherwise */
   int    dtx_count;             /**< Number of consecutive DTX frames */
   spx_int32_t abr_enabled;      /**< ABR setting (in bps), 0 if off */
   float  abr_drift;
   float  abr_drift2;
   float  abr_count;
#endif /* #ifndef DISABLE_VBR */
   
   int    complexity;            /**< Complexity setting (0-10 from least complex to most complex) */
   spx_int32_t sampling_rate;
   int    plc_tuning;
   int    encode_submode;
   const SpeexSubmode * const *submodes; /**< Sub-mode data */
   int    submodeID;             /**< Activated sub-mode */
   int    submodeSelect;         /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
   int    isWideband;            /**< Is this used as part of the embedded wideband codec */
   int    highpass_enabled;        /**< Is the input filter enabled */
} EncState;

// [os_support.h]
#ifndef OVERRIDE_SPEEX_ALLOC_SCRATCH
static inline void *speex_alloc_scratch (int size)
{
   /* Scratch space doesn't need to be cleared */
   return calloc(size,1);
}
#endif
// [nb_celp.c]
void *nb_encoder_init(const SpeexMode *m)
{
   EncState *st;
   const SpeexNBMode *mode;
   int i;

   mode=(const SpeexNBMode *)m->mode;
   // 创建音频编码的状态结构EncState
   st = (EncState*)speex_alloc(sizeof(EncState));
   if (!st)
      return NULL;
#if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
   st->stack = NULL; // 内部将使用的堆栈
#else
   st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK);
#endif

   st->mode=m;      // 当前的编码模式
    //设置滤波器参数
   st->gamma1=mode->gamma1;    // nb 默认为QCONST16(0.92,15)
   st->gamma2=mode->gamma2;    // nb 默认为QCONST16(0.6,15)
   st->lpc_floor = mode->lpc_floor; // LCP 线性预测编码中的基频的噪声水平,默认QCONST16(.0002,15)

   st->submodes=mode->submodes;     // 下级模式(参数集)
   st->submodeID=st->submodeSelect=mode->defaultSubmode; // 5,默认使用的submode 默认为nb_submode5
   st->bounded_pitch = 1;           // 参考帧

   st->encode_submode = 1;

#ifdef VORBIS_PSYCHO                    // VORBIS 编码的相应参数
   st->psy = vorbis_psy_init(8000, 256);
   st->curve = (float*)speex_alloc(128*sizeof(float));
   st->old_curve = (float*)speex_alloc(128*sizeof(float));
   st->psy_window = (float*)speex_alloc(256*sizeof(float));
#endif

   st->cumul_gain = 1024;       // ?增益乘积

   st->window= lpc_window;

   /* Create the window for autocorrelation (lag-windowing) */
   st->lagWindow = lag_window;  

   st->first = 1;
   for (i=0;i<NB_ORDER;i++)
      st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), NB_ORDER+1);

   st->innov_rms_save = NULL;   // ?均方根值

#ifndef DISABLE_VBR             //动态比特率  (VBR动态比特率)(vad静音检查)(dtx非连续传输)(abr平均比特率)
   vbr_init(&st->vbr);
   st->vbr_quality = 8; 
   st->vbr_enabled = 0;
   st->vbr_max = 0;
   st->vad_enabled = 0;
   st->dtx_enabled = 0;
   st->dtx_count=0;
   st->abr_enabled = 0;
   st->abr_drift = 0;
   st->abr_drift2 = 0;
#endif /* #ifndef DISABLE_VBR */

   st->plc_tuning = 2;
   st->complexity=2;
   st->sampling_rate=8000;
   st->isWideband = 0;
   st->highpass_enabled = 1;

#ifdef ENABLE_VALGRIND
   VALGRIND_MAKE_READABLE(st, NB_ENC_STACK);
#endif
   return st;
}
  • [2] nb_encoder_destroy 的定义
// [nb_celp.c] 
void nb_encoder_destroy(void *state)
{
   EncState *st=(EncState *)state;
   /* Free all allocated memory */
#if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
    // 释放堆栈
   speex_free_scratch(st->stack);
#endif

#ifndef DISABLE_VBR
   vbr_destroy(&st->vbr);
#endif /* #ifndef DISABLE_VBR */

#ifdef VORBIS_PSYCHO
   vorbis_psy_destroy(st->psy);
   speex_free (st->curve);
   speex_free (st->old_curve);
   speex_free (st->psy_window);
#endif

   /*Free state memory... should be last*/
   speex_free(st);
}
  • [3] nb_encoder_ctl 的定义
// nb_encoder_ctl 主要是为外部提供设置与获取编码器参数的接口
int nb_encoder_ctl(void *state, int request, void *ptr)
{
   EncState *st;
   st=(EncState*)state;
   switch(request)
   {
   case SPEEX_GET_FRAME_SIZE:   // 获得编码帧的大小
      (*(spx_int32_t*)ptr) = NB_FRAME_SIZE;  // 默认NB_FRAME_SIZE == 160
      break;
   case SPEEX_SET_LOW_MODE:     // 设置 submode
   case SPEEX_SET_MODE:
      st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_LOW_MODE:     // 获得 submode
   case SPEEX_GET_MODE:
      (*(spx_int32_t*)ptr) = st->submodeID;
      break;
#ifndef DISABLE_VBR     // 动态码率相关
      case SPEEX_SET_VBR:       // 设置是否开启
      st->vbr_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_VBR:          // 获得是否开启
      (*(spx_int32_t*)ptr) = st->vbr_enabled;
      break;
   case SPEEX_SET_VAD:          // 设置开启语音边界检测(静音检查) (1 on)(0 off)
      st->vad_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_VAD:          // 获得
      (*(spx_int32_t*)ptr) = st->vad_enabled;
      break;
   case SPEEX_SET_DTX:          // 设置是否开启非连续传输
      st->dtx_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_DTX:          // 获取
      (*(spx_int32_t*)ptr) = st->dtx_enabled;
      break;
   case SPEEX_SET_ABR:          // 设置 平均比特率
      st->abr_enabled = (*(spx_int32_t*)ptr);   // 0 is off (值单位bps)
      st->vbr_enabled = st->abr_enabled!=0;   // 动态比特率与平均比特率一致
      if (st->vbr_enabled)
      {
         spx_int32_t i=10;
         spx_int32_t rate, target;
         float vbr_qual;
         target = (*(spx_int32_t*)ptr);
         while (i>=0)
         {
             // 根据abr设置相应的编码质量
            speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
            speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
            if (rate <= target)
               break;
            i--;
         }
         vbr_qual=i;
         if (vbr_qual<0)
            vbr_qual=0;
        // 设置相应的vbr质量
         speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
         st->abr_count=0;
         st->abr_drift=0;
         st->abr_drift2=0;
      }

      break;
   case SPEEX_GET_ABR:          // 获得 abr
      (*(spx_int32_t*)ptr) = st->abr_enabled;
      break;
#endif /* #ifndef DISABLE_VBR */
#if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API)
   case SPEEX_SET_VBR_QUALITY:              // 设置vbr质量
      st->vbr_quality = (*(float*)ptr);
      break;
   case SPEEX_GET_VBR_QUALITY:
      (*(float*)ptr) = st->vbr_quality;
      break;
#endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */
   case SPEEX_SET_QUALITY:                  //设置相应的质量
      {
         int quality = (*(spx_int32_t*)ptr);
         if (quality < 0)
            quality = 0;
         if (quality > 10)
            quality = 10;
        // 根据质量参数,选择不同的submode
         st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality];
      }
      break;
   case SPEEX_SET_COMPLEXITY:           // 设置复杂度 [0-10] , 10 is the must COMPLEXITY
      st->complexity = (*(spx_int32_t*)ptr);
      if (st->complexity<0)
         st->complexity=0;
      break;
   case SPEEX_GET_COMPLEXITY:
      (*(spx_int32_t*)ptr) = st->complexity;
      break;
   case SPEEX_SET_BITRATE:          // 设置比特率
      {
         spx_int32_t i=10;
         spx_int32_t rate, target;
         target = (*(spx_int32_t*)ptr);
         while (i>=0)
         {
             // 根据实际设置的比特率,设置相应的编码质量
            speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
            speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
            if (rate <= target)
               break;
            i--;
         }
      }
      break;
   case SPEEX_GET_BITRATE:          // 获得编码的比特率
      if (st->submodes[st->submodeID])
        // 根据使用的submode 计算相应的比特率
         (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/NB_FRAME_SIZE;
      else
         (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/NB_FRAME_SIZE;
      break;
   case SPEEX_SET_SAMPLING_RATE:            // 设置采样率
      st->sampling_rate = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_SAMPLING_RATE:
      (*(spx_int32_t*)ptr)=st->sampling_rate;
      break;
   case SPEEX_RESET_STATE:              // 重置编码state
      {
         int i;
         st->bounded_pitch = 1;
         st->first = 1;
         for (i=0;i<NB_ORDER;i++)
            st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), NB_ORDER+1);
         for (i=0;i<NB_ORDER;i++)
            st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
         for (i=0;i<NB_FRAME_SIZE+NB_PITCH_END+1;i++)
            st->excBuf[i]=st->swBuf[i]=0;
         for (i=0;i<NB_WINDOW_SIZE-NB_FRAME_SIZE;i++)
            st->winBuf[i]=0;
      }
      break;
   case SPEEX_SET_SUBMODE_ENCODING:     // 设置submode
      st->encode_submode = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_SUBMODE_ENCODING:
      (*(spx_int32_t*)ptr) = st->encode_submode;
      break;
   case SPEEX_GET_LOOKAHEAD:            // 获得
      (*(spx_int32_t*)ptr)=(NB_WINDOW_SIZE-NB_FRAME_SIZE);
      break;
   case SPEEX_SET_PLC_TUNING:
      st->plc_tuning = (*(spx_int32_t*)ptr);
      if (st->plc_tuning>100)
         st->plc_tuning=100;
      break;
   case SPEEX_GET_PLC_TUNING:
      (*(spx_int32_t*)ptr)=(st->plc_tuning);
      break;
#ifndef DISABLE_VBR
   case SPEEX_SET_VBR_MAX_BITRATE:      // 设置VBR模式下最大的码率
      st->vbr_max = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_VBR_MAX_BITRATE:
      (*(spx_int32_t*)ptr) = st->vbr_max;
      break;
#endif /* #ifndef DISABLE_VBR */
   case SPEEX_SET_HIGHPASS:             // 输入信号的高通滤波器
      st->highpass_enabled = (*(spx_int32_t*)ptr);
      break;
   case SPEEX_GET_HIGHPASS:
      (*(spx_int32_t*)ptr) = st->highpass_enabled;
      break;

// 以下为内部参数(仅提供查询参数)
   /* This is all internal stuff past this point */
   case SPEEX_GET_PI_GAIN:
      {
         int i;
         spx_word32_t *g = (spx_word32_t*)ptr;
         for (i=0;i<NB_NB_SUBFRAMES;i++)
            g[i]=st->pi_gain[i];
      }
      break;
   case SPEEX_GET_EXC:
      {
         int i;
         for (i=0;i<NB_NB_SUBFRAMES;i++)
            ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*NB_SUBFRAME_SIZE, NB_SUBFRAME_SIZE);
      }
      break;
#ifndef DISABLE_VBR
   case SPEEX_GET_RELATIVE_QUALITY:
      (*(float*)ptr)=st->relative_quality;
      break;
#endif /* #ifndef DISABLE_VBR */
   case SPEEX_SET_INNOVATION_SAVE:
      st->innov_rms_save = (spx_word16_t*)ptr;
      break;
   case SPEEX_SET_WIDEBAND:
      st->isWideband = *((spx_int32_t*)ptr);
      break;
   case SPEEX_GET_STACK:
      *((char**)ptr) = st->stack;
      break;
   default:
      speex_warning_int("Unknown nb_ctl request: ", request);
      return -1;
   }
   return 0;
}
  • [3]nb_encode 的定义
// [nb_celp.c]
// vin : 待编码原始数据
// 帧长默认为160 ,分为4个sub,40*4
int nb_encode(void *state, void *vin, SpeexBits *bits)
{
   EncState *st;
   int i, sub, roots;
   int ol_pitch;
   spx_word16_t ol_pitch_coef;
   spx_word32_t ol_gain;
   VARDECL(spx_word16_t *target);
   VARDECL(spx_sig_t *innov);
   VARDECL(spx_word32_t *exc32);
   VARDECL(spx_mem_t *mem);
   VARDECL(spx_coef_t *bw_lpc1);
   VARDECL(spx_coef_t *bw_lpc2);
   VARDECL(spx_coef_t *lpc);
   VARDECL(spx_lsp_t *lsp);
   VARDECL(spx_lsp_t *qlsp);
   VARDECL(spx_lsp_t *interp_lsp);
   VARDECL(spx_lsp_t *interp_qlsp);
   VARDECL(spx_coef_t *interp_lpc);
   VARDECL(spx_coef_t *interp_qlpc);
   char *stack;
   VARDECL(spx_word16_t *syn_resp);

   spx_word32_t ener=0;
   spx_word16_t fine_gain;
   // 输入编码帧
   spx_word16_t *in = (spx_word16_t*)vin;

   st=(EncState *)state;
   stack=st->stack;
    // 分配内存空间
    // NB_ORDER = 10
   ALLOC(lpc, NB_ORDER, spx_coef_t);
   ALLOC(bw_lpc1, NB_ORDER, spx_coef_t);
   ALLOC(bw_lpc2, NB_ORDER, spx_coef_t);
   ALLOC(lsp, NB_ORDER, spx_lsp_t);
   ALLOC(qlsp, NB_ORDER, spx_lsp_t);
   ALLOC(interp_lsp, NB_ORDER, spx_lsp_t);
   ALLOC(interp_qlsp, NB_ORDER, spx_lsp_t);
   ALLOC(interp_lpc, NB_ORDER, spx_coef_t);
   ALLOC(interp_qlpc, NB_ORDER, spx_coef_t);
    // exc 当前帧的激励
    // 在init中SPEEX_MEMSET(st->excBuf, 0, NB_FRAME_SIZE + NB_PITCH_END);
    // NB_PITCH_END = 144
   st->exc = st->excBuf + NB_PITCH_END + 2;
   // swBuf 历史感知加权的解码语音信号,计算零输入响应
   st->sw = st->swBuf + NB_PITCH_END + 2;
   /* Move signals 1 frame towards the past */
   // SPEEX_MOVE(dst, src, n)
   // NB_FRAME_SIZE 160 NB_PITCH_END 144
   // 将前一帧信号的激励往前挪,作为当前帧(第一子帧)的自适应码本,以及第二,三,四子帧的自适应码本的一部分
   SPEEX_MOVE(st->excBuf, st->excBuf+NB_FRAME_SIZE, NB_PITCH_END+2);
   SPEEX_MOVE(st->swBuf, st->swBuf+NB_FRAME_SIZE, NB_PITCH_END+2);

    // 高通滤波
    // 滤掉低频噪声
   if (st->highpass_enabled)
      highpass(in, in, NB_FRAME_SIZE, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp);

   {
    // 音频的lpc计算
    // 根据 Levinson-Durbin 算法 计算出相应的lpc系数
    // 再将lpc转换为lsp系数
      VARDECL(spx_word16_t *w_sig);
      // 自相关系数
      VARDECL(spx_word16_t *autocorr);
      // NB_WINDOW_SIZE (NB_FRAME_SIZE+NB_SUBFRAME_SIZE) (160+40)
      ALLOC(w_sig, NB_WINDOW_SIZE, spx_word16_t);
      // NB_ORDER 10
      ALLOC(autocorr, NB_ORDER+1, spx_word16_t);
      /* Window for analysis */
      for (i=0;i<NB_WINDOW_SIZE-NB_FRAME_SIZE;i++)
         w_sig[i] = MULT16_16_Q15(st->winBuf[i],st->window[i]);
      for (;i<NB_WINDOW_SIZE;i++)
      // 汉宁窗对音频数据进行截取(窗函数是为了在不影响信号频率特性的情况下,对信号进行截取)
         w_sig[i] = MULT16_16_Q15(in[i-NB_WINDOW_SIZE+NB_FRAME_SIZE],st->window[i]);
      /* Compute auto-correlation */
      // 计算自相关
      _spx_autocorr(w_sig, autocorr, NB_ORDER+1, NB_WINDOW_SIZE);
      autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */

      /* Lag windowing: equivalent to filtering in the power-spectrum domain */
      // LAG窗滤波
      for (i=0;i<NB_ORDER+1;i++)
         autocorr[i] = MULT16_16_Q15(autocorr[i],st->lagWindow[i]);
      autocorr[0] = ADD16(autocorr[0],1);

      /* Levinson-Durbin */
      // Levinson-Durbin 算法
      // lpc 转 lsp
      _spx_lpc(lpc, autocorr, NB_ORDER);
      /* LPC to LSPs (x-domain) transform */
      roots=lpc_to_lsp (lpc, NB_ORDER, lsp, 10, LSP_DELTA1, stack);
      /* Check if we found all the roots */
      if (roots!=NB_ORDER)
      {
         /*If we can't find all LSP's, do some damage control and use previous filter*/
         for (i=0;i<NB_ORDER;i++)
         {
            lsp[i]=st->old_lsp[i];
         }
      }
   }



    // 整帧分析(基音周期估计和励磁增益)
   /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
   {
      int diff = NB_WINDOW_SIZE-NB_FRAME_SIZE;
      // lsp 插值
      if (st->first)
         for (i=0;i<NB_ORDER;i++)
            interp_lsp[i] = lsp[i];
      else
         lsp_interpolate(st->old_lsp, lsp, interp_lsp, NB_ORDER, NB_NB_SUBFRAMES, NB_NB_SUBFRAMES<<1, LSP_MARGIN);
    // 插值后,lsp转换为lpc系数 ,得到interp_lpc
      /* Compute interpolated LPCs (unquantized) for whole frame*/
      lsp_to_lpc(interp_lsp, interp_lpc, NB_ORDER,stack);


      /*Open-loop pitch*/
      if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1
#ifndef DISABLE_VBR
           || st->vbr_enabled || st->vad_enabled
#endif
                  )
      {
         int nol_pitch[6];
         spx_word16_t nol_pitch_coef[6];

         bw_lpc(0.9, interp_lpc, bw_lpc1, NB_ORDER);
         bw_lpc(0.55, interp_lpc, bw_lpc2, NB_ORDER);

         SPEEX_COPY(st->sw, st->winBuf, diff);
         SPEEX_COPY(st->sw+diff, in, NB_FRAME_SIZE-diff);
         filter10(st->sw, bw_lpc1, bw_lpc2, st->sw, NB_FRAME_SIZE, st->mem_sw_whole, stack);

         open_loop_nbest_pitch(st->sw, NB_PITCH_START, NB_PITCH_END, NB_FRAME_SIZE,
                               nol_pitch, nol_pitch_coef, 6, stack);
         ol_pitch=nol_pitch[0];
         ol_pitch_coef = nol_pitch_coef[0];
         /*Try to remove pitch multiples*/
         for (i=1;i<6;i++)
         {
#ifdef FIXED_POINT
            if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
#else
            if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
#endif
                (ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
                 ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
            {
               /*ol_pitch_coef=nol_pitch_coef[i];*/
               ol_pitch = nol_pitch[i];
            }
         }
         /*if (ol_pitch>50)
           ol_pitch/=2;*/
         /*ol_pitch_coef = sqrt(ol_pitch_coef);*/

      } else {
         ol_pitch=0;
         ol_pitch_coef=0;
      }

      /*Compute "real" excitation*/
      /*SPEEX_COPY(st->exc, st->winBuf, diff);
      SPEEX_COPY(st->exc+diff, in, NB_FRAME_SIZE-diff);*/
      // 将原始音频数据,通过interp_lpc表征,并滤波
      // 计算精确的激励
      // 逆向滤波,得到残差信号
      fir_mem16(st->winBuf, interp_lpc, st->exc, diff, NB_ORDER, st->mem_exc, stack);
      fir_mem16(in, interp_lpc, st->exc+diff, NB_FRAME_SIZE-diff, NB_ORDER, st->mem_exc, stack);

      /* Compute open-loop excitation gain */
      {
          // 计算激励的能量
         spx_word16_t g = compute_rms16(st->exc, NB_FRAME_SIZE);
         if (st->submodeID!=1 && ol_pitch>0)
            ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
                                spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
         else
            ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
      }
   }

#ifdef VORBIS_PSYCHO
   SPEEX_MOVE(st->psy_window, st->psy_window+NB_FRAME_SIZE, 256-NB_FRAME_SIZE);
   SPEEX_COPY(&st->psy_window[256-NB_FRAME_SIZE], in, NB_FRAME_SIZE);
   compute_curve(st->psy, st->psy_window, st->curve);
   /*print_vec(st->curve, 128, "curve");*/
   if (st->first)
      SPEEX_COPY(st->old_curve, st->curve, 128);
#endif

   /*VBR stuff*/
#ifndef DISABLE_VBR
   if (st->vbr_enabled||st->vad_enabled)
   {
      float lsp_dist=0;
      for (i=0;i<NB_ORDER;i++)
         lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
      lsp_dist /= LSP_SCALING*LSP_SCALING;

      if (st->abr_enabled)
      {
         float qual_change=0;
         if (st->abr_drift2 * st->abr_drift > 0)
         {
            /* Only adapt if long-term and short-term drift are the same sign */
            qual_change = -.00001*st->abr_drift/(1+st->abr_count);
            if (qual_change>.05)
               qual_change=.05;
            if (qual_change<-.05)
               qual_change=-.05;
         }
         st->vbr_quality += qual_change;
         if (st->vbr_quality>10)
            st->vbr_quality=10;
         if (st->vbr_quality<0)
            st->vbr_quality=0;
      }

      st->relative_quality = vbr_analysis(&st->vbr, in, NB_FRAME_SIZE, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
      /*if (delta_qual<0)*/
      /*  delta_qual*=.1*(3+st->vbr_quality);*/
      if (st->vbr_enabled)
      {
         spx_int32_t mode;
         int choice=0;
         float min_diff=100;
         mode = 8;
         while (mode)
         {
            int v1;
            float thresh;
            v1=(int)floor(st->vbr_quality);
            if (v1==10)
               thresh = vbr_nb_thresh[mode][v1];
            else
               thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
            if (st->relative_quality > thresh &&
                st->relative_quality-thresh<min_diff)
            {
               choice = mode;
               min_diff = st->relative_quality-thresh;
            }
            mode--;
         }
         mode=choice;
         if (mode==0)
         {
            if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
            {
               mode=1;
               st->dtx_count=1;
            } else {
               mode=0;
               st->dtx_count++;
            }
         } else {
            st->dtx_count=0;
         }

         speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
         if (st->vbr_max>0)
         {
            spx_int32_t rate;
            speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
            if (rate > st->vbr_max)
            {
               rate = st->vbr_max;
               speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
            }
         }

         if (st->abr_enabled)
         {
            spx_int32_t bitrate;
            speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
            st->abr_drift+=(bitrate-st->abr_enabled);
            st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
            st->abr_count += 1.0;
         }

      } else {
         /*VAD only case*/
         int mode;
         if (st->relative_quality<2)
         {
            if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
            {
               st->dtx_count=1;
               mode=1;
            } else {
               mode=0;
               st->dtx_count++;
            }
         } else {
            st->dtx_count = 0;
            mode=st->submodeSelect;
         }
         /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
         st->submodeID=mode;
      }
   } else {
      st->relative_quality = -1;
   }
#endif /* #ifndef DISABLE_VBR */

   if (st->encode_submode)
   {
      /* First, transmit a zero for narrowband */
      speex_bits_pack(bits, 0, 1);

      /* Transmit the sub-mode we use for this frame */
      speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);

   }

   /* If null mode (no transmission), just set a couple things to zero*/
   if (st->submodes[st->submodeID] == NULL)
   {
      for (i=0;i<NB_FRAME_SIZE;i++)
         st->exc[i]=st->sw[i]=VERY_SMALL;

      for (i=0;i<NB_ORDER;i++)
         st->mem_sw[i]=0;
      st->first=1;
      st->bounded_pitch = 1;

      SPEEX_COPY(st->winBuf, in+2*NB_FRAME_SIZE-NB_WINDOW_SIZE, NB_WINDOW_SIZE-NB_FRAME_SIZE);

      /* Clear memory (no need to really compute it) */
      for (i=0;i<NB_ORDER;i++)
         st->mem_sp[i] = 0;
      return 0;

   }

   /* LSP Quantization */
   if (st->first)
   {
      for (i=0;i<NB_ORDER;i++)
         st->old_lsp[i] = lsp[i];
   }

// 量化 lsp
   /*Quantize LSPs*/
#if 1 /*0 for unquantized*/
   SUBMODE(lsp_quant)(lsp, qlsp, NB_ORDER, bits);
#else
   for (i=0;i<NB_ORDER;i++)
     qlsp[i]=lsp[i];
#endif
// 码率适应
   /*If we use low bit-rate pitch mode, transmit open-loop pitch*/
   if (SUBMODE(lbr_pitch)!=-1)
   {
      speex_bits_pack(bits, ol_pitch-NB_PITCH_START, 7);
   }

   if (SUBMODE(forced_pitch_gain))
   {
      int quant;
      /* This just damps the pitch a bit, because it tends to be too aggressive when forced */
      ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);
#ifdef FIXED_POINT
      quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);
#else
      quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
#endif
      if (quant>15)
         quant=15;
      if (quant<0)
         quant=0;
      speex_bits_pack(bits, quant, 4);
      ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
   }


   /*Quantize and transmit open-loop excitation gain*/
#ifdef FIXED_POINT
   {
      int qe = scal_quant32(ol_gain, ol_gain_table, 32);
      /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
      ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
      speex_bits_pack(bits, qe, 5);
   }
#else
   {
      int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
      if (qe<0)
         qe=0;
      if (qe>31)
         qe=31;
      ol_gain = exp(qe/3.5)*SIG_SCALING;
      speex_bits_pack(bits, qe, 5);
   }
#endif



   /* Special case for first frame */
   if (st->first)
   {
      for (i=0;i<NB_ORDER;i++)
         st->old_qlsp[i] = qlsp[i];
   }

   /* Target signal */
   ALLOC(target, NB_SUBFRAME_SIZE, spx_word16_t);
   ALLOC(innov, NB_SUBFRAME_SIZE, spx_sig_t);
   ALLOC(exc32, NB_SUBFRAME_SIZE, spx_word32_t);
   ALLOC(syn_resp, NB_SUBFRAME_SIZE, spx_word16_t);
   ALLOC(mem, NB_ORDER, spx_mem_t);

   /* Loop on sub-frames */
   for (sub=0;sub<NB_NB_SUBFRAMES;sub++)
   {
      int   offset;
      spx_word16_t *sw;
      spx_word16_t *exc, *inBuf;
      int pitch;
      int response_bound = NB_SUBFRAME_SIZE;

      /* Offset relative to start of frame */
      offset = NB_SUBFRAME_SIZE*sub;
      /* Excitation */
      exc=st->exc+offset;
      /* Weighted signal */
      sw=st->sw+offset;

      /* LSP interpolation (quantized and unquantized) */
      lsp_interpolate(st->old_lsp, lsp, interp_lsp, NB_ORDER, sub, NB_NB_SUBFRAMES, LSP_MARGIN);
      lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, NB_ORDER, sub, NB_NB_SUBFRAMES, LSP_MARGIN);

      /* Compute interpolated LPCs (quantized and unquantized) */
      lsp_to_lpc(interp_lsp, interp_lpc, NB_ORDER,stack);

      lsp_to_lpc(interp_qlsp, interp_qlpc, NB_ORDER, stack);

      /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
      {
         spx_word32_t pi_g=LPC_SCALING;
         for (i=0;i<NB_ORDER;i+=2)
         {
            /*pi_g += -st->interp_qlpc[i] +  st->interp_qlpc[i+1];*/
            pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
         }
         st->pi_gain[sub] = pi_g;
      }

#ifdef VORBIS_PSYCHO
      {
         float curr_curve[128];
         float fact = ((float)sub+1.0f)/NB_NB_SUBFRAMES;
         for (i=0;i<128;i++)
            curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
         curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
      }
#else
      /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
      bw_lpc(st->gamma1, interp_lpc, bw_lpc1, NB_ORDER);
      bw_lpc(st->gamma2, interp_lpc, bw_lpc2, NB_ORDER);
      /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
#endif

      /*FIXME: This will break if we change the window size */
      speex_assert(NB_WINDOW_SIZE-NB_FRAME_SIZE == NB_SUBFRAME_SIZE);
      if (sub==0)
         inBuf = st->winBuf;
      else
         inBuf = &in[((sub-1)*NB_SUBFRAME_SIZE)];
      for (i=0;i<NB_SUBFRAME_SIZE;i++)
         sw[i] = inBuf[i];

      if (st->complexity==0)
         response_bound >>= 1;
      compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, NB_ORDER, stack);
      for (i=response_bound;i<NB_SUBFRAME_SIZE;i++)
         syn_resp[i]=VERY_SMALL;

      /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
      for (i=0;i<NB_ORDER;i++)
         mem[i]=SHL32(st->mem_sp[i],1);
      for (i=0;i<NB_SUBFRAME_SIZE;i++)
         exc[i] = VERY_SMALL;
#ifdef SHORTCUTS2
      iir_mem16(exc, interp_qlpc, exc, response_bound, NB_ORDER, mem, stack);
      for (i=0;i<NB_ORDER;i++)
         mem[i]=SHL32(st->mem_sw[i],1);
      filter10(exc, st->bw_lpc1, st->bw_lpc2, exc, response_bound, mem, stack);
      SPEEX_MEMSET(&exc[response_bound], 0, NB_SUBFRAME_SIZE-response_bound);
#else
      iir_mem16(exc, interp_qlpc, exc, NB_SUBFRAME_SIZE, NB_ORDER, mem, stack);
      for (i=0;i<NB_ORDER;i++)
         mem[i]=SHL32(st->mem_sw[i],1);
      filter10(exc, bw_lpc1, bw_lpc2, exc, NB_SUBFRAME_SIZE, mem, stack);
#endif

      /* Compute weighted signal */
      for (i=0;i<NB_ORDER;i++)
         mem[i]=st->mem_sw[i];
      filter10(sw, bw_lpc1, bw_lpc2, sw, NB_SUBFRAME_SIZE, mem, stack);

      if (st->complexity==0)
         for (i=0;i<NB_ORDER;i++)
            st->mem_sw[i]=mem[i];

      /* Compute target signal (saturation prevents overflows on clipped input speech) */
      for (i=0;i<NB_SUBFRAME_SIZE;i++)
         target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(exc[i],1)),32767));

      for (i=0;i<NB_SUBFRAME_SIZE;i++)
         exc[i] = inBuf[i];
      fir_mem16(exc, interp_qlpc, exc, NB_SUBFRAME_SIZE, NB_ORDER, st->mem_exc2, stack);
      /* If we have a long-term predictor (otherwise, something's wrong) */
      speex_assert (SUBMODE(ltp_quant));
      {
         int pit_min, pit_max;
         /* Long-term prediction */
         if (SUBMODE(lbr_pitch) != -1)
         {
            /* Low bit-rate pitch handling */
            int margin;
            margin = SUBMODE(lbr_pitch);
            if (margin)
            {
               if (ol_pitch < NB_PITCH_START+margin-1)
                  ol_pitch=NB_PITCH_START+margin-1;
               if (ol_pitch > NB_PITCH_END-margin)
                  ol_pitch=NB_PITCH_END-margin;
               pit_min = ol_pitch-margin+1;
               pit_max = ol_pitch+margin;
            } else {
               pit_min=pit_max=ol_pitch;
            }
         } else {
            pit_min = NB_PITCH_START;
            pit_max = NB_PITCH_END;
         }

         /* Force pitch to use only the current frame if needed */
         if (st->bounded_pitch && pit_max>offset)
            pit_max=offset;

         /* Perform pitch search */
         pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
                                    exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
                                    NB_ORDER, NB_SUBFRAME_SIZE, bits, stack,
                                    exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);

         st->pitch[sub]=pitch;
      }
      /* Quantization of innovation */
      SPEEX_MEMSET(innov, 0, NB_SUBFRAME_SIZE);

      /* FIXME: Make sure this is safe from overflows (so far so good) */
      for (i=0;i<NB_SUBFRAME_SIZE;i++)
         exc[i] = EXTRACT16(SUB32(EXTEND32(exc[i]), PSHR32(exc32[i],SIG_SHIFT-1)));

      ener = SHL32(EXTEND32(compute_rms16(exc, NB_SUBFRAME_SIZE)),SIG_SHIFT);

      /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
#ifdef FIXED_POINT
      {
         spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
         if (f<=32767)
            fine_gain = f;
         else
            fine_gain = 32767;
      }
#else
      fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
#endif
      /* Calculate gain correction for the sub-frame (if any) */
      if (SUBMODE(have_subframe_gain))
      {
         int qe;
         if (SUBMODE(have_subframe_gain)==3)
         {
            qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
            speex_bits_pack(bits, qe, 3);
            ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
         } else {
            qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
            speex_bits_pack(bits, qe, 1);
            ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
         }
      } else {
         ener=ol_gain;
      }

      /*printf ("%f %f\n", ener, ol_gain);*/

      /* Normalize innovation */
      signal_div(target, target, ener, NB_SUBFRAME_SIZE);

      /* Quantize innovation */
      speex_assert (SUBMODE(innovation_quant));
      {
         /* Codebook search */
         SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
                  SUBMODE(innovation_params), NB_ORDER, NB_SUBFRAME_SIZE,
                  innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));

         /* De-normalize innovation and update excitation */
         signal_mul(innov, innov, ener, NB_SUBFRAME_SIZE);

         /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
         if (SUBMODE(double_codebook)) {
            char *tmp_stack=stack;
            VARDECL(spx_sig_t *innov2);
            ALLOC(innov2, NB_SUBFRAME_SIZE, spx_sig_t);
            SPEEX_MEMSET(innov2, 0, NB_SUBFRAME_SIZE);
            for (i=0;i<NB_SUBFRAME_SIZE;i++)
               target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]);
            SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
                                      SUBMODE(innovation_params), NB_ORDER, NB_SUBFRAME_SIZE,
                                      innov2, syn_resp, bits, stack, st->complexity, 0);
            signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), NB_SUBFRAME_SIZE);
            for (i=0;i<NB_SUBFRAME_SIZE;i++)
               innov[i] = ADD32(innov[i],innov2[i]);
            stack = tmp_stack;
         }
         for (i=0;i<NB_SUBFRAME_SIZE;i++)
            exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
         if (st->innov_rms_save)
            st->innov_rms_save[sub] = compute_rms(innov, NB_SUBFRAME_SIZE);
      }

      /* Final signal synthesis from excitation */
      iir_mem16(exc, interp_qlpc, sw, NB_SUBFRAME_SIZE, NB_ORDER, st->mem_sp, stack);

      /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
      if (st->complexity!=0)
         filter10(sw, bw_lpc1, bw_lpc2, sw, NB_SUBFRAME_SIZE, st->mem_sw, stack);

   }

   /* Store the LSPs for interpolation in the next frame */
   if (st->submodeID>=1)
   {
      for (i=0;i<NB_ORDER;i++)
         st->old_lsp[i] = lsp[i];
      for (i=0;i<NB_ORDER;i++)
         st->old_qlsp[i] = qlsp[i];
   }

#ifdef VORBIS_PSYCHO
   if (st->submodeID>=1)
      SPEEX_COPY(st->old_curve, st->curve, 128);
#endif

   if (st->submodeID==1)
   {
#ifndef DISABLE_VBR
      if (st->dtx_count)
         speex_bits_pack(bits, 15, 4);
      else
#endif
         speex_bits_pack(bits, 0, 4);
   }

   /* The next frame will not be the first (Duh!) */
   st->first = 0;
   SPEEX_COPY(st->winBuf, in+2*NB_FRAME_SIZE-NB_WINDOW_SIZE, NB_WINDOW_SIZE-NB_FRAME_SIZE);

   if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
      st->bounded_pitch = 1;
   else
      st->bounded_pitch = 0;

   return 1;
}
#endif /* DISABLE_ENCODER */


#ifndef DISABLE_DECODER
void *nb_decoder_init(const SpeexMode *m)
{
   DecState *st;
   const SpeexNBMode *mode;
   int i;

   mode=(const SpeexNBMode*)m->mode;
   st = (DecState *)speex_alloc(sizeof(DecState));
   if (!st)
      return NULL;
#if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
   st->stack = NULL;
#else
   st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK);
#endif

   st->mode=m;


   st->encode_submode = 1;

   st->first=1;
   /* Codec parameters, should eventually have several "modes"*/

   st->submodes=mode->submodes;
   st->submodeID=mode->defaultSubmode;

   st->lpc_enh_enabled=1;

   SPEEX_MEMSET(st->excBuf, 0, NB_FRAME_SIZE + NB_PITCH_END);

   st->last_pitch = 40;
   st->count_lost=0;
   st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
   st->pitch_gain_buf_idx = 0;
   st->seed = 1000;

   st->sampling_rate=8000;
   st->last_ol_gain = 0;

   st->user_callback.func = &speex_default_user_handler;
   st->user_callback.data = NULL;
   for (i=0;i<16;i++)
      st->speex_callbacks[i].func = NULL;

   st->voc_m1=st->voc_m2=st->voc_mean=0;
   st->voc_offset=0;
   st->dtx_enabled=0;
   st->isWideband = 0;
   st->highpass_enabled = 1;

#ifdef ENABLE_VALGRIND
   VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
#endif
   return st;
}
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 217,907评论 6 506
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 92,987评论 3 395
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 164,298评论 0 354
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 58,586评论 1 293
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 67,633评论 6 392
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 51,488评论 1 302
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 40,275评论 3 418
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 39,176评论 0 276
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,619评论 1 314
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,819评论 3 336
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,932评论 1 348
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,655评论 5 346
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 41,265评论 3 329
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,871评论 0 22
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,994评论 1 269
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 48,095评论 3 370
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,884评论 2 354

推荐阅读更多精彩内容

  • OpusEncoder 1:获得 OpusEncoder结构的大小 2:分配和初始化 encoder状态.一个编码...
    今忆Zoe阅读 6,517评论 0 3
  • 本文属于Android局域网内的语音对讲项目系列,《通过UDP广播实现Android局域网Peer Discove...
    yhthu阅读 19,167评论 9 53
  • 摘要 该配置文件定义了支持高质量音频分发所需的Bluetooth®设备的要求。这些要求以终端用户服务的方式表达,并...
    公子小水阅读 9,695评论 0 4
  • Spring Cloud为开发人员提供了快速构建分布式系统中一些常见模式的工具(例如配置管理,服务发现,断路器,智...
    卡卡罗2017阅读 134,656评论 18 139
  • 教程一:视频截图(Tutorial 01: Making Screencaps) 首先我们需要了解视频文件的一些基...
    90后的思维阅读 4,697评论 0 3