1 /* Copyright (C) 2002 Jean-Marc Valin */
4 @brief Narrowband CELP encoder/decoder
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 - Neither the name of the Xiph.org Foundation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <speex/speex_bits.h>
41 #include <speex/speex_callbacks.h>
45 /**Structure representing the full state of the narrowband encoder*/
46 typedef struct EncState {
47 const SpeexMode *mode; /**< Mode corresponding to the state */
48 int first; /**< Is this the first frame? */
49 int frameSize; /**< Size of frames */
50 int subframeSize; /**< Size of sub-frames */
51 int nbSubframes; /**< Number of sub-frames */
52 int windowSize; /**< Analysis (LPC) window length */
53 int lpcSize; /**< LPC order */
54 int min_pitch; /**< Minimum pitch value allowed */
55 int max_pitch; /**< Maximum pitch value allowed */
57 int safe_pitch; /**< Don't use too large values for pitch (in case we lose a packet) */
58 int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */
59 int ol_pitch; /**< Open-loop pitch */
60 int ol_voiced; /**< Open-loop voiced/non-voiced decision */
67 spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */
68 spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */
69 float lag_factor; /**< Lag windowing Gaussian width */
70 float lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/
71 char *stack; /**< Pseudo-stack allocation for temporary memory */
72 spx_sig_t *inBuf; /**< Input buffer (original signal) */
73 spx_sig_t *frame; /**< Start of original frame */
74 spx_sig_t *excBuf; /**< Excitation buffer */
75 spx_sig_t *exc; /**< Start of excitation frame */
76 spx_sig_t *swBuf; /**< Weighted signal buffer */
77 spx_sig_t *sw; /**< Start of weighted signal frame */
78 spx_sig_t *innov; /**< Innovation for the frame */
79 spx_word16_t *window; /**< Temporary (Hanning) window */
80 spx_word16_t *autocorr; /**< auto-correlation */
81 spx_word16_t *lagWindow; /**< Window applied to auto-correlation */
82 spx_coef_t *lpc; /**< LPCs for current frame */
83 spx_lsp_t *lsp; /**< LSPs for current frame */
84 spx_lsp_t *qlsp; /**< Quantized LSPs for current frame */
85 spx_lsp_t *old_lsp; /**< LSPs for previous frame */
86 spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */
87 spx_lsp_t *interp_lsp; /**< Interpolated LSPs */
88 spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs */
89 spx_coef_t *interp_lpc; /**< Interpolated LPCs */
90 spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs */
91 spx_coef_t *bw_lpc1; /**< LPCs after bandwidth expansion by gamma1 for perceptual weighting*/
92 spx_coef_t *bw_lpc2; /**< LPCs after bandwidth expansion by gamma2 for perceptual weighting*/
93 spx_mem_t *mem_sp; /**< Filter memory for signal synthesis */
94 spx_mem_t *mem_sw; /**< Filter memory for perceptually-weighted signal */
95 spx_mem_t *mem_sw_whole; /**< Filter memory for perceptually-weighted signal (whole frame)*/
96 spx_mem_t *mem_exc; /**< Filter memory for excitation (whole frame) */
97 spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */
99 VBRState *vbr; /**< State of the VBR data */
100 float vbr_quality; /**< Quality setting for VBR encoding */
101 float relative_quality; /**< Relative quality that will be needed by VBR */
102 int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */
103 int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */
104 int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */
105 int dtx_count; /**< Number of consecutive DTX frames */
106 int abr_enabled; /**< ABR setting (in bps), 0 if off */
110 int complexity; /**< Complexity setting (0-10 from least complex to most complex) */
114 const SpeexSubmode * const *submodes; /**< Sub-mode data */
115 int submodeID; /**< Activated sub-mode */
116 int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
119 /**Structure representing the full state of the narrowband decoder*/
120 typedef struct DecState {
121 const SpeexMode *mode; /**< Mode corresponding to the state */
122 int first; /**< Is this the first frame? */
123 int count_lost; /**< Was the last frame lost? */
124 int frameSize; /**< Size of frames */
125 int subframeSize; /**< Size of sub-frames */
126 int nbSubframes; /**< Number of sub-frames */
127 int lpcSize; /**< LPC order */
128 int min_pitch; /**< Minimum pitch value allowed */
129 int max_pitch; /**< Maximum pitch value allowed */
136 spx_word16_t last_ol_gain; /**< Open-loop gain for previous frame */
138 char *stack; /**< Pseudo-stack allocation for temporary memory */
139 spx_sig_t *inBuf; /**< Input buffer (original signal) */
140 spx_sig_t *frame; /**< Start of original frame */
141 spx_sig_t *excBuf; /**< Excitation buffer */
142 spx_sig_t *exc; /**< Start of excitation frame */
143 spx_sig_t *innov; /**< Innovation for the frame */
144 spx_lsp_t *qlsp; /**< Quantized LSPs for current frame */
145 spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */
146 spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs */
147 spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs */
148 spx_mem_t *mem_sp; /**< Filter memory for synthesis signal */
149 spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */
150 int last_pitch; /**< Pitch of last correctly decoded frame */
151 spx_word16_t last_pitch_gain; /**< Pitch gain of last correctly decoded frame */
152 spx_word16_t pitch_gain_buf[3]; /**< Pitch gain of last decoded frames */
153 int pitch_gain_buf_idx; /**< Tail of the buffer */
156 const SpeexSubmode * const *submodes; /**< Sub-mode data */
157 int submodeID; /**< Activated sub-mode */
158 int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */
159 CombFilterMem *comb_mem;
160 SpeexCallback speex_callbacks[SPEEX_MAX_CALLBACKS];
162 SpeexCallback user_callback;
173 /** Initializes encoder state*/
174 void *nb_encoder_init(const SpeexMode *m);
176 /** De-allocates encoder state resources*/
177 void nb_encoder_destroy(void *state);
179 /** Encodes one frame*/
180 int nb_encode(void *state, void *in, SpeexBits *bits);
183 /** Initializes decoder state*/
184 void *nb_decoder_init(const SpeexMode *m);
186 /** De-allocates decoder state resources*/
187 void nb_decoder_destroy(void *state);
189 /** Decodes one frame*/
190 int nb_decode(void *state, SpeexBits *bits, void *out);
192 /** ioctl-like function for controlling a narrowband encoder */
193 int nb_encoder_ctl(void *state, int request, void *ptr);
195 /** ioctl-like function for controlling a narrowband decoder */
196 int nb_decoder_ctl(void *state, int request, void *ptr);