1 /* Copyright (C) 2002 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "cb_search.h"
38 #include "stack_alloc.h"
43 #include "cb_search_sse.h"
44 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
45 #include "cb_search_arm4.h"
48 static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
51 VARDECL(spx_word16_t *shape);
52 ALLOC(shape, subvect_size, spx_word16_t);
53 for (i=0;i<shape_cb_size;i++)
57 res = resp+i*subvect_size;
58 for (k=0;k<subvect_size;k++)
59 shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
62 /* Compute codeword response using convolution with impulse response */
63 for(j=0;j<subvect_size;j++)
68 resj = MAC16_16(resj,shape[k],r[j-k]);
70 res16 = EXTRACT16(SHR32(resj, 11));
72 res16 = 0.03125f*resj;
74 /* Compute codeword energy */
75 E[i]=MAC16_16(E[i],res16,res16);
77 /*printf ("%d\n", (int)res[j]);*/
87 static void split_cb_search_shape_sign_N1(
88 spx_sig_t target[], /* target vector */
89 spx_coef_t ak[], /* LPCs for this subframe */
90 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
91 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
92 const void *par, /* Codebook/search parameters*/
93 int p, /* number of LPC coeffs */
94 int nsf, /* number of samples in subframe */
104 VARDECL(spx_word16_t *resp);
106 VARDECL(__m128 *resp2);
110 VARDECL(spx_word32_t *E);
112 VARDECL(spx_word16_t *t);
113 VARDECL(spx_sig_t *e);
114 const signed char *shape_cb;
115 int shape_cb_size, subvect_size, nb_subvect;
116 const split_cb_params *params;
119 spx_word32_t best_dist;
127 params = (const split_cb_params *) par;
128 subvect_size = params->subvect_size;
129 nb_subvect = params->nb_subvect;
130 shape_cb_size = 1<<params->shape_bits;
131 shape_cb = params->shape_cb;
132 have_sign = params->have_sign;
133 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
135 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
136 ALLOC(E, shape_cb_size>>2, __m128);
139 ALLOC(E, shape_cb_size, spx_word32_t);
141 ALLOC(t, nsf, spx_word16_t);
142 ALLOC(e, nsf, spx_sig_t);
144 /* FIXME: make that adaptive? */
146 t[i]=EXTRACT16(PSHR32(target[i],6));
148 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
150 for (i=0;i<nb_subvect;i++)
152 spx_word16_t *x=t+subvect_size*i;
153 /*Find new n-best based on previous n-best j*/
155 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
157 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
159 speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
166 if (rind>=shape_cb_size)
171 res = resp+rind*subvect_size;
173 for (m=0;m<subvect_size;m++)
174 t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
176 for (m=0;m<subvect_size;m++)
177 t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
182 for (j=0;j<subvect_size;j++)
183 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
185 for (j=0;j<subvect_size;j++)
186 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
189 for (j=0;j<subvect_size;j++)
190 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
195 for (m=0;m<subvect_size;m++)
201 if (rind>=shape_cb_size)
209 g=sign*shape_cb[rind*subvect_size+m];
210 for (n=subvect_size*(i+1);n<nsf;n++,q++)
211 t[n] = SUB32(t[n],MULT16_16_Q11_32(g,r[q]));
213 g=sign*0.03125*shape_cb[rind*subvect_size+m];
214 for (n=subvect_size*(i+1);n<nsf;n++,q++)
215 t[n] = SUB32(t[n],g*r[q]);
220 /* Update excitation */
221 /* FIXME: We could update the excitation directly above */
223 exc[j]=ADD32(exc[j],e[j]);
225 /* Update target: only update target if necessary */
228 VARDECL(spx_sig_t *r2);
229 ALLOC(r2, nsf, spx_sig_t);
230 syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
232 target[j]=SUB32(target[j],r2[j]);
238 void split_cb_search_shape_sign(
239 spx_sig_t target[], /* target vector */
240 spx_coef_t ak[], /* LPCs for this subframe */
241 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
242 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
243 const void *par, /* Codebook/search parameters*/
244 int p, /* number of LPC coeffs */
245 int nsf, /* number of samples in subframe */
255 VARDECL(spx_word16_t *resp);
257 VARDECL(__m128 *resp2);
261 VARDECL(spx_word32_t *E);
263 VARDECL(spx_word16_t *t);
264 VARDECL(spx_sig_t *e);
265 VARDECL(spx_sig_t *r2);
266 VARDECL(spx_word16_t *tmp);
267 VARDECL(spx_word32_t *ndist);
268 VARDECL(spx_word32_t *odist);
270 VARDECL(spx_word16_t **ot2);
271 VARDECL(spx_word16_t **nt2);
272 spx_word16_t **ot, **nt;
276 const signed char *shape_cb;
277 int shape_cb_size, subvect_size, nb_subvect;
278 const split_cb_params *params;
280 VARDECL(int *best_index);
281 VARDECL(spx_word32_t *best_dist);
291 split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,complexity,update_target);
294 ALLOC(ot2, N, spx_word16_t*);
295 ALLOC(nt2, N, spx_word16_t*);
296 ALLOC(oind, N, int*);
297 ALLOC(nind, N, int*);
299 params = (const split_cb_params *) par;
300 subvect_size = params->subvect_size;
301 nb_subvect = params->nb_subvect;
302 shape_cb_size = 1<<params->shape_bits;
303 shape_cb = params->shape_cb;
304 have_sign = params->have_sign;
305 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
307 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
308 ALLOC(E, shape_cb_size>>2, __m128);
311 ALLOC(E, shape_cb_size, spx_word32_t);
313 ALLOC(t, nsf, spx_word16_t);
314 ALLOC(e, nsf, spx_sig_t);
315 ALLOC(r2, nsf, spx_sig_t);
316 ALLOC(ind, nb_subvect, int);
318 ALLOC(tmp, 2*N*nsf, spx_word16_t);
322 nt2[i]=tmp+(2*i+1)*nsf;
326 ALLOC(best_index, N, int);
327 ALLOC(best_dist, N, spx_word32_t);
328 ALLOC(ndist, N, spx_word32_t);
329 ALLOC(odist, N, spx_word32_t);
331 ALLOC(itmp, 2*N*nb_subvect, int);
334 nind[i]=itmp+2*i*nb_subvect;
335 oind[i]=itmp+(2*i+1)*nb_subvect;
336 for (j=0;j<nb_subvect;j++)
337 nind[i][j]=oind[i][j]=-1;
340 /* FIXME: make that adaptive? */
342 t[i]=EXTRACT16(PSHR32(target[i],6));
348 /*for (i=0;i<nsf;i++)
349 printf ("%d\n", (int)t[i]);*/
351 /* Pre-compute codewords response and energy */
352 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
356 /*For all subvectors*/
357 for (i=0;i<nb_subvect;i++)
359 /*"erase" nbest list*/
363 /*For all n-bests of previous subvector*/
366 spx_word16_t *x=ot[j]+subvect_size*i;
367 /*Find new n-best based on previous n-best j*/
369 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
371 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
373 /*For all new n-bests*/
382 for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
385 /* New code: update only enough of the target to calculate error*/
390 rind = best_index[k];
391 if (rind>=shape_cb_size)
396 res = resp+rind*subvect_size;
398 for (m=0;m<subvect_size;m++)
399 t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
401 for (m=0;m<subvect_size;m++)
402 t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
405 /*compute error (distance)*/
407 for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
408 err = MAC16_16(err, t[m],t[m]);
409 /*update n-best list*/
410 if (err<ndist[N-1] || ndist[N-1]<-1)
413 /*previous target (we don't care what happened before*/
414 for (m=(i+1)*subvect_size;m<nsf;m++)
416 /* New code: update the rest of the target only if it's worth it */
417 for (m=0;m<subvect_size;m++)
422 rind = best_index[k];
423 if (rind>=shape_cb_size)
431 g=sign*shape_cb[rind*subvect_size+m];
432 for (n=subvect_size*(i+1);n<nsf;n++,q++)
433 t[n] = SUB32(t[n],MULT16_16_Q11_32(g,r[q]));
435 g=sign*0.03125*shape_cb[rind*subvect_size+m];
436 for (n=subvect_size*(i+1);n<nsf;n++,q++)
437 t[n] = SUB32(t[n],g*r[q]);
444 if (err < ndist[m] || ndist[m]<-1)
448 for (q=(i+1)*subvect_size;q<nsf;q++)
450 for (q=0;q<nb_subvect;q++)
451 nind[n][q]=nind[n-1][q];
454 for (q=(i+1)*subvect_size;q<nsf;q++)
456 for (q=0;q<nb_subvect;q++)
457 nind[m][q]=oind[j][q];
458 nind[m][i]=best_index[k];
469 /*update old-new data*/
470 /* just swap pointers instead of a long copy */
478 for (m=0;m<nb_subvect;m++)
479 oind[j][m]=nind[j][m];
485 for (i=0;i<nb_subvect;i++)
488 speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
491 /* Put everything back together */
492 for (i=0;i<nb_subvect;i++)
497 if (rind>=shape_cb_size)
505 for (j=0;j<subvect_size;j++)
506 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
508 for (j=0;j<subvect_size;j++)
509 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
512 for (j=0;j<subvect_size;j++)
513 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
516 /* Update excitation */
518 exc[j]=ADD32(exc[j],e[j]);
520 /* Update target: only update target if necessary */
523 syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
525 target[j]=SUB32(target[j],r2[j]);
530 void split_cb_shape_sign_unquant(
532 const void *par, /* non-overlapping codebook */
533 int nsf, /* number of samples in subframe */
541 const signed char *shape_cb;
543 int subvect_size, nb_subvect;
544 const split_cb_params *params;
547 params = (const split_cb_params *) par;
548 subvect_size = params->subvect_size;
549 nb_subvect = params->nb_subvect;
550 //shape_cb_size = 1<<params->shape_bits;
551 shape_cb = params->shape_cb;
552 have_sign = params->have_sign;
554 ALLOC(ind, nb_subvect, int);
555 ALLOC(signs, nb_subvect, int);
557 /* Decode codewords and gains */
558 for (i=0;i<nb_subvect;i++)
561 signs[i] = speex_bits_unpack_unsigned(bits, 1);
564 ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
566 /* Compute decoded excitation */
567 for (i=0;i<nb_subvect;i++)
575 for (j=0;j<subvect_size;j++)
576 exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
578 for (j=0;j<subvect_size;j++)
579 exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
582 for (j=0;j<subvect_size;j++)
583 exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
588 void noise_codebook_quant(
589 spx_sig_t target[], /* target vector */
590 spx_coef_t ak[], /* LPCs for this subframe */
591 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
592 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
593 const void *par, /* Codebook/search parameters*/
594 int p, /* number of LPC coeffs */
595 int nsf, /* number of samples in subframe */
605 VARDECL(spx_sig_t *tmp);
606 ALLOC(tmp, nsf, spx_sig_t);
607 residue_percep_zero(target, ak, awk1, awk2, tmp, nsf, p, stack);
617 void noise_codebook_unquant(
619 const void *par, /* non-overlapping codebook */
620 int nsf, /* number of samples in subframe */
625 speex_rand_vec(1, exc, nsf);