//cb53.c - 5.3 rate codebook code #include "opt.h" #include #include #include #include #include "typedef.h" #include "cst_lbc.h" #include "tab_lbc.h" #include "util_lbc.h" #include "exc_lbc.h" #include "timer.h" #include "mmxutil.h" void fourPulseFlt (float *rr, float *Dn, float thres, int ip[], int *shiftPtr); //-------------------------------------------------------- int extra; void reset_max_time(void) { extra = 120; } //------------------------------------------------------------ int ACELP_LBC_code(float X[], float h[], int T0, float code[], int *ind_gain, int *shift, int *sign, float gain_T0, int flags) { int i, index; float gain_q; float Dn[SubFrLen2], tmp_code[SubFrLen2]; float rr[DIM_RR]; // Include fixed-gain pitch contribution into impulse resp. h[] if (T0 < SubFrLen-2) for (i = T0; i < SubFrLen; i++) h[i] += gain_T0*h[i-T0]; // Compute correlations of h[] needed for the codebook search Cor_h(h, rr); // Compute correlation of target vector with impulse response. Cor_h_X(h, X, Dn); // Find codebook index index = D4i64_LBC(Dn, rr, h, tmp_code, rr, shift, sign, flags); // Compute innovation vector gain. // Include fixed-gain pitch contribution into code[]. *ind_gain = G_code(X, rr, &gain_q); for (i=0; i < SubFrLen; i++) code[i] = tmp_code[i]*gain_q; if(T0 < SubFrLen-2) for (i=T0; i < SubFrLen; i++) code[i] += code[i-T0]*gain_T0; return index; } //--------------------------------------------------------------- void Cor_h(float *H, float *rr) { // Compute correlations of h[] needed for the codebook search. // h[] :Impulse response. // rr[] :Correlations. float *rri0i0, *rri1i1, *rri2i2, *rri3i3; float *rri0i1, *rri0i2, *rri0i3; float *rri1i2, *rri1i3, *rri2i3; float *p0, *p1, *p2, *p3; float cor, *h2; int i, k, m, t; float h[SubFrLen2]; for(i=0; i=0; i--) { cor += h[m+0]*h[m+0] + h[m+1]*h[m+1]; rri3i3[i] = cor*0.5f; cor += h[m+2]*h[m+2] + h[m+3]*h[m+3]; rri2i2[i] = cor*0.5f; cor += h[m+4]*h[m+4] + h[m+5]*h[m+5]; rri1i1[i] = cor*0.5f; cor += h[m+6]*h[m+6] + h[m+7]*h[m+7]; rri0i0[i] = cor*0.5f; m += 8; } // Compute elements of: rri0i1[], rri0i3[], rri1i2[] and rri2i3[] h2 = h+2; p3 = rri2i3 + MSIZE-1; p2 = rri1i2 + MSIZE-1; p1 = rri0i1 + MSIZE-1; p0 = rri0i3 + MSIZE-2; for (k=0; k ((*psbest) * a[k])) { *psbest = a[k+8]; *abest = a[k]; bestk = k; } } return(bestk); } //------------------------------------------------------------------------- // routine D4i64_LBC // ~~~~~~~~~ // Algebraic codebook for LBC. // -> 17 bits; 4 pulses in a frame of 60 samples // // The code length is 60, containing 4 nonzero pulses i0, i1, i2, i3. // Each pulses can have 8 possible positions (positive or negative): // // i0 (+-1) : 0, 8, 16, 24, 32, 40, 48, 56 // i1 (+-1) : 2, 10, 18, 26, 34, 42, 50, 58 // i2 (+-1) : 4, 12, 20, 28, 36, 44, 52, (60) // i3 (+-1) : 6, 14, 22, 30, 38, 46, 54, (62) // // All the pulse can be shift by one. // The last position of the last 2 pulse falls outside the // frame and signifies that the pulse is not present. // // Input arguments: // // Dn[] Correlation between target vector and impulse response h[] // rr[] Correlations of impulse response h[] // h[] Impulse response of filters // // Output arguments: // // cod[] Selected algebraic codeword // y[] Filtered codeword // code_shift Shift of the codeword // sign Signs of the 4 pulses. // // return: Index of selected codevector // // The threshold control if a section of the innovative // codebook should be searched or not. // //-------------------------------------------------------------------- int D4i64_LBC(float Dn[], float rr[], float h[], float cod[], float y[], int *code_shift, int *sign, int flags) { int ip[4]; int i0, i1, i2, i3, ip0, ip1, ip2, ip3; int i, j; int shif; float means, max0, max1, max2, thres; float *rri0i0,*rri1i1,*rri2i2,*rri3i3; float *rri0i1,*rri0i2,*rri0i3; float *rri1i2,*rri1i3,*rri2i3; // float *ptr_ri0i0,*ptr_ri1i1,*ptr_ri2i2; float *ptr_ri0i1,*ptr_ri0i2,*ptr_ri0i3; float *ptr_ri1i2,*ptr_ri1i3,*ptr_ri2i3; int p_sign[SubFrLen2/2]; // float p_sign[SubFrLen2/2],p_sign2[SubFrLen2/2]; // Init pointers rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri0i1 = rri3i3 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri1i2 = rri0i3 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri2i3 = rri1i3 + MSIZE; // Extend the backward filtered target vector by zeros for (i=SubFrLen; i < SubFrLen2; i++) Dn[i] = 0.0f; // Chose the sign of the impulse. for (i=0; i= 0.0f) { p_sign[i/2] = 0x00000000; // p_sign[i/2] = 1.0f; // p_sign2[i/2] = 2.0f; } else { p_sign[i/2] = 0x80000000; // p_sign[i/2] = -1.0f; // p_sign2[i/2] = -2.0f; Dn[i] = -Dn[i]; Dn[i+1] = -Dn[i+1]; } } p_sign[30] = p_sign[31] = 0x00000000; // p_sign[30] = p_sign[31] = 1.0f; // p_sign2[30] = p_sign2[31] = 2.0f; // - Compute the search threshold after three pulses // odd positions // Find maximum of Dn[i0]+Dn[i1]+Dn[i2] max0 = Dn[0]; max1 = Dn[2]; max2 = Dn[4]; for (i=8; i < SubFrLen; i+=STEP) { if (Dn[i] > max0) max0 = Dn[i]; if (Dn[i+2] > max1) max1 = Dn[i+2]; if (Dn[i+4] > max2) max2 = Dn[i+4]; } max0 = max0 + max1 + max2; // Find means of Dn[i0]+Dn[i1]+Dn[i] means = 0.0f; for (i=0; i < SubFrLen; i+=STEP) means += Dn[i+4] + Dn[i+2] + Dn[i]; means *= 0.125f; if (flags & SC_THRES) thres = means*0.25f + max0*0.75f; else thres = means + (max0-means)*0.5f; // even positions // Find maximum of Dn[i0]+Dn[i1]+Dn[i2] max0 = Dn[1]; max1 = Dn[3]; max2 = Dn[5]; for (i=9; i < SubFrLen; i+=STEP) { if (Dn[i] > max0) max0 = Dn[i]; if (Dn[i+2] > max1) max1 = Dn[i+2]; if (Dn[i+4] > max2) max2 = Dn[i+4]; } max0 = max0 + max1 + max2; // Find means of Dn[i0]+Dn[i1]+Dn[i2] means = 0.0f; for (i=1; i < SubFrLen; i+=STEP) means += Dn[i+4] + Dn[i+2] + Dn[i]; means *= 0.125f; if (flags & SC_THRES) max1 = means*0.25f + max0*0.75f; else max1 = means + (max0-means)*0.5f; // Keep maximum threshold between odd and even position if(max1 > thres) thres = max1; // Modification of rrixiy[] to take signs into account. //TIMER_STAMP(a); ptr_ri0i1 = rri0i1; ptr_ri0i2 = rri0i2; ptr_ri0i3 = rri0i3; for(i0=0; i0> 1)]>=0?1:-1); i1 = (p_sign[(ip1 >> 1)]>=0?1:-1); i2 = (p_sign[(ip2 >> 1)]>=0?1:-1); i3 = (p_sign[(ip3 >> 1)]>=0?1:-1); // Find the codeword corresponding to the selected positions for(i=0; i 0) { ip0++; ip1++; ip2++; ip3++; } //printf("%3d %3d %3d %3d\n",ip0*i0,ip1*i1,ip2*i2,ip3*i3); cod[ip0] = (float)i0; cod[ip1] = (float)i1; if(ip2 0) for(i=ip0, j=0; i 0) for(i=ip1, j=0; i 0) for(i=ip2, j=0; i 0) for(i=ip3, j=0; i 0) *sign += 1; if(i1 > 0) *sign += 2; if(i2 > 0) *sign += 4; if(i3 > 0) *sign += 8; i = ((ip3 >> 3) << 9) + ((ip2 >> 3) << 6) + ((ip1 >> 3) << 3) + (ip0 >> 3); //TIMER_STAMP(d); return i; } //-------------------------------------------------------------------- int G_code(float X[], float Y[], float *gain_q) { int i; float xy, yy, gain_nq; int gain; float dist, dist_min; // Compute scalar product xy = DotProd(X,Y,SubFrLen); // Be sure xy < yy if(xy <= 0) { gain = 0; *gain_q =FcbkGainTable[gain]; return(gain); } // Compute scalar product yy = DotProd(Y,Y,SubFrLen); if (yy != 0.0f) gain_nq = xy/yy * 0.5f; else gain_nq = 0.0f; gain = 0; dist_min = (float)fabs(gain_nq - FcbkGainTable[0]); for (i=1; i dmax2) dmax2 = Dn[k]; if (Dn[k+1] > dmax3) dmax3 = Dn[k+1]; if (Dn[k+2] > dmax4) dmax4 = Dn[k+2]; if (Dn[k+3] > dmax5) dmax5 = Dn[k+3]; } // first pulse loop for (i0=0; i0 < SubFrLen; i0 +=STEP) { ps0 = Dn[i0]; ps0a = Dn[i0+1]; alp0 = *ptr_ri0i0++; // Init. pointers that depand on second loop ptr_ri1i1 = rri1i1; ptr_ri1i2 = rri1i2; ptr_ri1i3 = rri1i3; ps1 = ps0 + dmax2 + dmax4; ps1a = ps0a + dmax3 + dmax5; if (asint(ps1) < asint(thres) && asint(ps1a) < asint(thres)) { ptr_ri0i1 += NB_POS; goto skipsecond; } // second pulse loop for (i1=2; i1 < SubFrLen; i1 +=STEP) { ps1 = ps0 + Dn[i1]; ps1a = ps0a + Dn[i1+1]; alp1 = alp0 + *ptr_ri1i1++ + *ptr_ri0i1++; // Init. pointers that depend on third loop ptr_ri2i2 = rri2i2; ptr_ri2i3 = rri2i3; lasti2 = 4; ps2 = ps1 + dmax4; ps2a = ps1a + dmax5; if (asint(ps2) < asint(thres) && asint(ps2a) < asint(thres)) { i2 = 68; goto skipthird; } // third pulse loop for (i2 = 4; i2 < SubFrLen2; i2 +=STEP) { ps2 = ps1 + Dn[i2]; ps2a = ps1a + Dn[i2+1]; // Threshold test and 4th pulse loop. Since the probability of // entering this is low, we cram as much of the 3rd-pulse-loop // logic inside the threshold test. So the computation of shift, // the choice of ps2 vs ps2a, the computation of alp2, and the // incrementing of the 02,12,22 pointers are all done there. if (asint(ps2) > asint(thres) || asint(ps2a) > asint(thres)) { shift = 0; if(asint(ps2a) > asint(ps2)) { shift = 1; ps2 = ps2a; } inc = (i2 - lasti2) >> 3; lasti2 = i2; ptr_ri0i2 += inc; ptr_ri1i2 += inc; ptr_ri2i2 += inc; alp2 = alp1 + *ptr_ri2i2 + *ptr_ri0i2 + *ptr_ri1i2; pntr = &Dn[6+shift]; #if OPT_PULSE4 ASM { push esi; push ebx; mov esi,pntr; ;// First half of first loop fld DP [esi+4*8*0]; fld DP [esi+4*8*1]; fld DP [esi+4*8*2]; fld DP [esi+4*8*3]; fxch ST(3); fadd ps2; fxch ST(2); fadd ps2; fxch ST(1); fadd ps2; fxch ST(3); fadd ps2; fxch ST(2); fmul ST,ST(0); fxch ST(1); fmul ST,ST(0); fxch ST(3); fmul ST,ST(0); fxch ST(2); fmul ST,ST(0); fxch ST(1); fstp a[4*8]; fxch ST(2); fstp a[4*9]; fstp a[4*10]; fstp a[4*11]; ;// Second half of first loop fld DP [esi+4*8*4]; fld DP [esi+4*8*5]; fld DP [esi+4*8*6]; fld DP [esi+4*8*7]; fxch ST(3); fadd ps2; fxch ST(2); fadd ps2; fxch ST(1); fadd ps2; fxch ST(3); fadd ps2; fxch ST(2); fmul ST,ST(0); fxch ST(1); fmul ST,ST(0); fxch ST(3); fmul ST,ST(0); fxch ST(2); fmul ST,ST(0); fxch ST(1); fstp a[4*12]; fxch ST(2); fstp a[4*13]; fstp a[4*14]; fstp a[4*15]; ;// First half of second loop mov eax,rri3i3; mov ebx,ptr_ri0i3; mov ecx,ptr_ri1i3; mov edx,ptr_ri2i3; fld alp2; fld alp2; fld alp2; fld alp2; fxch ST(3); fadd DP [eax+4*0]; fxch ST(2); fadd DP [eax+4*1]; fxch ST(1); fadd DP [eax+4*2]; fxch ST(3); fadd DP [eax+4*3]; fxch ST(2); fadd DP [ebx+4*0]; fxch ST(1); fadd DP [ebx+4*1]; fxch ST(3); fadd DP [ebx+4*2]; fxch ST(2); fadd DP [ebx+4*3]; fxch ST(1); fadd DP [ecx+4*0]; fxch ST(3); fadd DP [ecx+4*1]; fxch ST(2); fadd DP [ecx+4*2]; fxch ST(1); fadd DP [ecx+4*3]; fxch ST(3); fadd DP [edx+4*0]; fxch ST(2); fadd DP [edx+4*1]; fxch ST(1); fadd DP [edx+4*2]; fxch ST(3); fadd DP [edx+4*3]; fxch ST(2); fstp a[4*0]; fstp a[4*1]; fxch ST(1); fstp a[4*2]; fstp a[4*3]; ;// Second half of second loop fld alp2; fld alp2; fld alp2; fld alp2; fxch ST(3); fadd DP [eax+4*4]; fxch ST(2); fadd DP [eax+4*5]; fxch ST(1); fadd DP [eax+4*6]; fxch ST(3); fadd DP [eax+4*7]; fxch ST(2); fadd DP [ebx+4*4]; fxch ST(1); fadd DP [ebx+4*5]; fxch ST(3); fadd DP [ebx+4*6]; fxch ST(2); fadd DP [ebx+4*7]; fxch ST(1); fadd DP [ecx+4*4]; fxch ST(3); fadd DP [ecx+4*5]; fxch ST(2); fadd DP [ecx+4*6]; fxch ST(1); fadd DP [ecx+4*7]; fxch ST(3); fadd DP [edx+4*4]; fxch ST(2); fadd DP [edx+4*5]; fxch ST(1); fadd DP [edx+4*6]; fxch ST(3); fadd DP [edx+4*7]; fxch ST(2); fstp a[4*4]; fstp a[4*5]; fxch ST(1); fstp a[4*6]; fstp a[4*7]; pop ebx; pop esi; } #else for (k=0; k<8; k++) { ps3 = ps2 + *pntr; pntr += STEP; a[k+8] = ps3 * ps3; } for (k=0; k<8; k++) a[k] = alp2 + rri3i3[k] + ptr_ri0i3[k] + ptr_ri1i3[k] + ptr_ri2i3[k]; #endif bestk = -1; for (k=0; k<8; k++) { t1 = a[k+8] * alpha; t2 = psc * a[k]; if (asint(t1) > asint(t2)) { psc = a[k+8]; alpha = a[k]; bestk = k; } } if (bestk >= 0) { ip0 = i0; ip1 = i1; ip2 = i2; ip3 = 6 + (bestk << 3); shif = shift; //#define t32 4294967296.0f // printf(" %3d %3d %3d %3d %d %f %f %f\n",ip0,ip1,ip2,ip3,shift,psc/thres/thres,alpha/thres,(float)psc/(float)alpha/thres); } time--; if(time <= 0) goto end_search; } ptr_ri2i3 += NB_POS; } skipthird: inc = (i2 - lasti2) >> 3; ptr_ri0i2 += inc; ptr_ri1i2 += inc; ptr_ri2i2 += inc; // end of for i2 = ptr_ri0i2 -= NB_POS; ptr_ri1i3 += NB_POS; } skipsecond: // end of for i1 = ptr_ri0i2 += NB_POS; ptr_ri0i3 += NB_POS; } // end of for i0 = end_search: extra = time; ip[0] = ip0; ip[1] = ip1; ip[2] = ip2; ip[3] = ip3; *shifPtr = shif; return; }