Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1009 lines
24 KiB

  1. //cb53.c - 5.3 rate codebook code
  2. #include "opt.h"
  3. #include <stdlib.h>
  4. #include <stdio.h>
  5. #include <math.h>
  6. #include <memory.h>
  7. #include "typedef.h"
  8. #include "cst_lbc.h"
  9. #include "tab_lbc.h"
  10. #include "util_lbc.h"
  11. #include "exc_lbc.h"
  12. #include "timer.h"
  13. #include "mmxutil.h"
  14. void fourPulseFlt (float *rr, float *Dn, float thres, int ip[], int *shiftPtr);
  15. //--------------------------------------------------------
  16. int extra;
  17. void reset_max_time(void)
  18. {
  19. extra = 120;
  20. }
  21. //------------------------------------------------------------
  22. int ACELP_LBC_code(float X[], float h[], int T0, float code[],
  23. int *ind_gain, int *shift, int *sign, float gain_T0, int flags)
  24. {
  25. int i, index;
  26. float gain_q;
  27. float Dn[SubFrLen2], tmp_code[SubFrLen2];
  28. float rr[DIM_RR];
  29. // Include fixed-gain pitch contribution into impulse resp. h[]
  30. if (T0 < SubFrLen-2)
  31. for (i = T0; i < SubFrLen; i++)
  32. h[i] += gain_T0*h[i-T0];
  33. // Compute correlations of h[] needed for the codebook search
  34. Cor_h(h, rr);
  35. // Compute correlation of target vector with impulse response.
  36. Cor_h_X(h, X, Dn);
  37. // Find codebook index
  38. index = D4i64_LBC(Dn, rr, h, tmp_code, rr, shift, sign, flags);
  39. // Compute innovation vector gain.
  40. // Include fixed-gain pitch contribution into code[].
  41. *ind_gain = G_code(X, rr, &gain_q);
  42. for (i=0; i < SubFrLen; i++)
  43. code[i] = tmp_code[i]*gain_q;
  44. if(T0 < SubFrLen-2)
  45. for (i=T0; i < SubFrLen; i++)
  46. code[i] += code[i-T0]*gain_T0;
  47. return index;
  48. }
  49. //---------------------------------------------------------------
  50. void Cor_h(float *H, float *rr)
  51. {
  52. // Compute correlations of h[] needed for the codebook search.
  53. // h[] :Impulse response.
  54. // rr[] :Correlations.
  55. float *rri0i0, *rri1i1, *rri2i2, *rri3i3;
  56. float *rri0i1, *rri0i2, *rri0i3;
  57. float *rri1i2, *rri1i3, *rri2i3;
  58. float *p0, *p1, *p2, *p3;
  59. float cor, *h2;
  60. int i, k, m, t;
  61. float h[SubFrLen2];
  62. for(i=0; i<SubFrLen; i++)
  63. h[i+4] = H[i];
  64. for(i=0; i<4; i++)
  65. h[i] = 0.0f;
  66. // Init pointers
  67. rri0i0 = rr;
  68. rri1i1 = rri0i0 + NB_POS;
  69. rri2i2 = rri1i1 + NB_POS;
  70. rri3i3 = rri2i2 + NB_POS;
  71. rri0i1 = rri3i3 + NB_POS;
  72. rri0i2 = rri0i1 + MSIZE;
  73. rri0i3 = rri0i2 + MSIZE;
  74. rri1i2 = rri0i3 + MSIZE;
  75. rri1i3 = rri1i2 + MSIZE;
  76. rri2i3 = rri1i3 + MSIZE;
  77. // Compute rri0i0[], rri1i1[], rri2i2[] and rri3i3[]
  78. cor = 0.0f;
  79. m = 0;
  80. for(i=NB_POS-1; i>=0; i--)
  81. {
  82. cor += h[m+0]*h[m+0] + h[m+1]*h[m+1]; rri3i3[i] = cor*0.5f;
  83. cor += h[m+2]*h[m+2] + h[m+3]*h[m+3]; rri2i2[i] = cor*0.5f;
  84. cor += h[m+4]*h[m+4] + h[m+5]*h[m+5]; rri1i1[i] = cor*0.5f;
  85. cor += h[m+6]*h[m+6] + h[m+7]*h[m+7]; rri0i0[i] = cor*0.5f;
  86. m += 8;
  87. }
  88. // Compute elements of: rri0i1[], rri0i3[], rri1i2[] and rri2i3[]
  89. h2 = h+2;
  90. p3 = rri2i3 + MSIZE-1;
  91. p2 = rri1i2 + MSIZE-1;
  92. p1 = rri0i1 + MSIZE-1;
  93. p0 = rri0i3 + MSIZE-2;
  94. for (k=0; k<NB_POS; k++)
  95. {
  96. cor = 0.0f;
  97. m = 0;
  98. t = 0;
  99. for(i=k+1; i<NB_POS; i++)
  100. {
  101. cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
  102. cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
  103. cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
  104. cor += h[m+6]*h2[m+6] + h[m+7]*h2[m+7]; p0[t] = cor;
  105. t -= (NB_POS+1);
  106. m += 8;
  107. }
  108. cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
  109. cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
  110. cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
  111. h2 += STEP;
  112. p3 -= NB_POS;
  113. p2 -= NB_POS;
  114. p1 -= NB_POS;
  115. p0 -= 1;
  116. }
  117. // Compute elements of: rri0i2[], rri1i3[]
  118. h2 = h+4;
  119. p3 = rri1i3 + MSIZE-1;
  120. p2 = rri0i2 + MSIZE-1;
  121. p1 = rri1i3 + MSIZE-2;
  122. p0 = rri0i2 + MSIZE-2;
  123. for (k=0; k<NB_POS; k++)
  124. {
  125. cor = 0.0f;
  126. m = 0;
  127. t = 0;
  128. for(i=k+1; i<NB_POS; i++)
  129. {
  130. cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
  131. cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
  132. cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
  133. cor += h[m+6]*h2[m+6] + h[m+7]*h2[m+7]; p0[t] = cor;
  134. t -= (NB_POS+1);
  135. m += 8;
  136. }
  137. cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
  138. cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
  139. h2 += STEP;
  140. p3 -= NB_POS;
  141. p2 -= NB_POS;
  142. p1 -= 1;
  143. p0 -= 1;
  144. }
  145. // Compute elements of: rri0i1[], rri0i3[], rri1i2[] and rri2i3[]
  146. h2 = h+6;
  147. p3 = rri0i3 + MSIZE-1;
  148. p2 = rri2i3 + MSIZE-2;
  149. p1 = rri1i2 + MSIZE-2;
  150. p0 = rri0i1 + MSIZE-2;
  151. for (k=0; k<NB_POS; k++)
  152. {
  153. cor = 0.0f;
  154. m = 0;
  155. t = 0;
  156. for(i=k+1; i<NB_POS; i++)
  157. {
  158. cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
  159. cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
  160. cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
  161. cor += h[m+6]*h2[m+6] + h[m+7]*h2[m+7]; p0[t] = cor;
  162. t -= (NB_POS+1);
  163. m += 8;
  164. }
  165. cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
  166. h2 += STEP;
  167. p3 -= NB_POS;
  168. p2 -= 1;
  169. p1 -= 1;
  170. p0 -= 1;
  171. }
  172. return;
  173. }
  174. //---------------------------------------------------------------------------
  175. void Cor_h_X(float h[],float X[],float D[])
  176. {
  177. int i;
  178. for (i=0; i < SubFrLen; i++)
  179. D[i] = DotProd(&X[i],h,(SubFrLen-i));
  180. return;
  181. }
  182. //-------------------------------------------------------------------------
  183. Find_Pulse4(float *Dn,float *rri3i3,float *ptr_ri0i3,float *ptr_ri1i3,
  184. float *ptr_ri2i3,float *ptr, float ps2,float alp2,float *psbest,float *abest)
  185. {
  186. int k,bestk;
  187. float ps3;
  188. float a[16];
  189. for (k=0; k<8; k++)
  190. {
  191. ps3 = ps2 + *ptr;
  192. a[k] = alp2 + rri3i3[k] + ptr_ri0i3[k] + ptr_ri1i3[k] + ptr_ri2i3[k];
  193. a[k+8] = ps3 * ps3;
  194. ptr += STEP;
  195. }
  196. bestk = -1;
  197. for (k=0; k<8; k++)
  198. {
  199. if((a[k+8] * (*abest)) > ((*psbest) * a[k]))
  200. {
  201. *psbest = a[k+8];
  202. *abest = a[k];
  203. bestk = k;
  204. }
  205. }
  206. return(bestk);
  207. }
  208. //-------------------------------------------------------------------------
  209. // routine D4i64_LBC
  210. // ~~~~~~~~~
  211. // Algebraic codebook for LBC.
  212. // -> 17 bits; 4 pulses in a frame of 60 samples
  213. //
  214. // The code length is 60, containing 4 nonzero pulses i0, i1, i2, i3.
  215. // Each pulses can have 8 possible positions (positive or negative):
  216. //
  217. // i0 (+-1) : 0, 8, 16, 24, 32, 40, 48, 56
  218. // i1 (+-1) : 2, 10, 18, 26, 34, 42, 50, 58
  219. // i2 (+-1) : 4, 12, 20, 28, 36, 44, 52, (60)
  220. // i3 (+-1) : 6, 14, 22, 30, 38, 46, 54, (62)
  221. //
  222. // All the pulse can be shift by one.
  223. // The last position of the last 2 pulse falls outside the
  224. // frame and signifies that the pulse is not present.
  225. //
  226. // Input arguments:
  227. //
  228. // Dn[] Correlation between target vector and impulse response h[]
  229. // rr[] Correlations of impulse response h[]
  230. // h[] Impulse response of filters
  231. //
  232. // Output arguments:
  233. //
  234. // cod[] Selected algebraic codeword
  235. // y[] Filtered codeword
  236. // code_shift Shift of the codeword
  237. // sign Signs of the 4 pulses.
  238. //
  239. // return: Index of selected codevector
  240. //
  241. // The threshold control if a section of the innovative
  242. // codebook should be searched or not.
  243. //
  244. //--------------------------------------------------------------------
  245. int D4i64_LBC(float Dn[], float rr[], float h[], float cod[],
  246. float y[], int *code_shift, int *sign, int flags)
  247. {
  248. int ip[4];
  249. int i0, i1, i2, i3, ip0, ip1, ip2, ip3;
  250. int i, j;
  251. int shif;
  252. float means, max0, max1, max2, thres;
  253. float *rri0i0,*rri1i1,*rri2i2,*rri3i3;
  254. float *rri0i1,*rri0i2,*rri0i3;
  255. float *rri1i2,*rri1i3,*rri2i3;
  256. // float *ptr_ri0i0,*ptr_ri1i1,*ptr_ri2i2;
  257. float *ptr_ri0i1,*ptr_ri0i2,*ptr_ri0i3;
  258. float *ptr_ri1i2,*ptr_ri1i3,*ptr_ri2i3;
  259. int p_sign[SubFrLen2/2];
  260. // float p_sign[SubFrLen2/2],p_sign2[SubFrLen2/2];
  261. // Init pointers
  262. rri0i0 = rr;
  263. rri1i1 = rri0i0 + NB_POS;
  264. rri2i2 = rri1i1 + NB_POS;
  265. rri3i3 = rri2i2 + NB_POS;
  266. rri0i1 = rri3i3 + NB_POS;
  267. rri0i2 = rri0i1 + MSIZE;
  268. rri0i3 = rri0i2 + MSIZE;
  269. rri1i2 = rri0i3 + MSIZE;
  270. rri1i3 = rri1i2 + MSIZE;
  271. rri2i3 = rri1i3 + MSIZE;
  272. // Extend the backward filtered target vector by zeros
  273. for (i=SubFrLen; i < SubFrLen2; i++)
  274. Dn[i] = 0.0f;
  275. // Chose the sign of the impulse.
  276. for (i=0; i<SubFrLen; i+=2)
  277. {
  278. if((Dn[i] + Dn[i+1]) >= 0.0f)
  279. {
  280. p_sign[i/2] = 0x00000000;
  281. // p_sign[i/2] = 1.0f;
  282. // p_sign2[i/2] = 2.0f;
  283. }
  284. else
  285. {
  286. p_sign[i/2] = 0x80000000;
  287. // p_sign[i/2] = -1.0f;
  288. // p_sign2[i/2] = -2.0f;
  289. Dn[i] = -Dn[i];
  290. Dn[i+1] = -Dn[i+1];
  291. }
  292. }
  293. p_sign[30] = p_sign[31] = 0x00000000;
  294. // p_sign[30] = p_sign[31] = 1.0f;
  295. // p_sign2[30] = p_sign2[31] = 2.0f;
  296. // - Compute the search threshold after three pulses
  297. // odd positions
  298. // Find maximum of Dn[i0]+Dn[i1]+Dn[i2]
  299. max0 = Dn[0];
  300. max1 = Dn[2];
  301. max2 = Dn[4];
  302. for (i=8; i < SubFrLen; i+=STEP)
  303. {
  304. if (Dn[i] > max0) max0 = Dn[i];
  305. if (Dn[i+2] > max1) max1 = Dn[i+2];
  306. if (Dn[i+4] > max2) max2 = Dn[i+4];
  307. }
  308. max0 = max0 + max1 + max2;
  309. // Find means of Dn[i0]+Dn[i1]+Dn[i]
  310. means = 0.0f;
  311. for (i=0; i < SubFrLen; i+=STEP)
  312. means += Dn[i+4] + Dn[i+2] + Dn[i];
  313. means *= 0.125f;
  314. if (flags & SC_THRES)
  315. thres = means*0.25f + max0*0.75f;
  316. else
  317. thres = means + (max0-means)*0.5f;
  318. // even positions
  319. // Find maximum of Dn[i0]+Dn[i1]+Dn[i2]
  320. max0 = Dn[1];
  321. max1 = Dn[3];
  322. max2 = Dn[5];
  323. for (i=9; i < SubFrLen; i+=STEP)
  324. {
  325. if (Dn[i] > max0) max0 = Dn[i];
  326. if (Dn[i+2] > max1) max1 = Dn[i+2];
  327. if (Dn[i+4] > max2) max2 = Dn[i+4];
  328. }
  329. max0 = max0 + max1 + max2;
  330. // Find means of Dn[i0]+Dn[i1]+Dn[i2]
  331. means = 0.0f;
  332. for (i=1; i < SubFrLen; i+=STEP)
  333. means += Dn[i+4] + Dn[i+2] + Dn[i];
  334. means *= 0.125f;
  335. if (flags & SC_THRES)
  336. max1 = means*0.25f + max0*0.75f;
  337. else
  338. max1 = means + (max0-means)*0.5f;
  339. // Keep maximum threshold between odd and even position
  340. if(max1 > thres) thres = max1;
  341. // Modification of rrixiy[] to take signs into account.
  342. //TIMER_STAMP(a);
  343. ptr_ri0i1 = rri0i1;
  344. ptr_ri0i2 = rri0i2;
  345. ptr_ri0i3 = rri0i3;
  346. for(i0=0; i0<SubFrLen/2; i0+=STEP/2)
  347. {
  348. for(i1=2/2; i1<SubFrLen/2; i1+=STEP/2)
  349. {
  350. (int)*ptr_ri0i1++ = (asint(*ptr_ri0i1) ^ p_sign[i0] ^ p_sign[i1]);
  351. (int)*ptr_ri0i2++ = (asint(*ptr_ri0i2) ^ p_sign[i0] ^ p_sign[i1+1]);
  352. (int)*ptr_ri0i3++ = (asint(*ptr_ri0i3) ^ p_sign[i0] ^ p_sign[i1+2]);
  353. }
  354. }
  355. ptr_ri1i2 = rri1i2;
  356. ptr_ri1i3 = rri1i3;
  357. for(i1=2/2; i1<SubFrLen/2; i1+=STEP/2)
  358. {
  359. for(i2=4/2; i2<SubFrLen2/2; i2+=STEP/2)
  360. {
  361. (int)*ptr_ri1i2++ = (asint(*ptr_ri1i2) ^ p_sign[i1] ^ p_sign[i2]);
  362. (int)*ptr_ri1i3++ = (asint(*ptr_ri1i3) ^ p_sign[i1] ^ p_sign[i2+1]);
  363. }
  364. }
  365. ptr_ri2i3 = rri2i3;
  366. for(i2=4/2; i2<SubFrLen2/2; i2+=STEP/2)
  367. {
  368. for(i3=6/2; i3<SubFrLen2/2; i3+=STEP/2)
  369. (int)*ptr_ri2i3++ = (asint(*ptr_ri2i3) ^ p_sign[i2] ^ p_sign[i3]);
  370. }
  371. //TIMER_STAMP(b);
  372. fourPulseFlt(rr, Dn, thres, ip, code_shift);
  373. //TIMER_STAMP(c);
  374. ip0 = ip[0];
  375. ip1 = ip[1];
  376. ip2 = ip[2];
  377. ip3 = ip[3];
  378. shif = *code_shift;
  379. // Set the sign of impulses
  380. i0 = (p_sign[(ip0 >> 1)]>=0?1:-1);
  381. i1 = (p_sign[(ip1 >> 1)]>=0?1:-1);
  382. i2 = (p_sign[(ip2 >> 1)]>=0?1:-1);
  383. i3 = (p_sign[(ip3 >> 1)]>=0?1:-1);
  384. // Find the codeword corresponding to the selected positions
  385. for(i=0; i<SubFrLen; i++)
  386. cod[i] = 0.0f;
  387. if(shif > 0)
  388. {
  389. ip0++;
  390. ip1++;
  391. ip2++;
  392. ip3++;
  393. }
  394. //printf("%3d %3d %3d %3d\n",ip0*i0,ip1*i1,ip2*i2,ip3*i3);
  395. cod[ip0] = (float)i0;
  396. cod[ip1] = (float)i1;
  397. if(ip2<SubFrLen)
  398. cod[ip2] = (float)i2;
  399. if(ip3<SubFrLen)
  400. cod[ip3] = (float)i3;
  401. // find the filtered codeword
  402. for (i=0; i < SubFrLen; i++)
  403. y[i] = 0.0f;
  404. if(i0 > 0)
  405. for(i=ip0, j=0; i<SubFrLen; i++, j++)
  406. y[i] = y[i] + h[j];
  407. else
  408. for(i=ip0, j=0; i<SubFrLen; i++, j++)
  409. y[i] = y[i] - h[j];
  410. if(i1 > 0)
  411. for(i=ip1, j=0; i<SubFrLen; i++, j++)
  412. y[i] = y[i] + h[j];
  413. else
  414. for(i=ip1, j=0; i<SubFrLen; i++, j++)
  415. y[i] = y[i] - h[j];
  416. if(ip2<SubFrLen)
  417. {
  418. if(i2 > 0)
  419. for(i=ip2, j=0; i<SubFrLen; i++, j++)
  420. y[i] = y[i] + h[j];
  421. else
  422. for(i=ip2, j=0; i<SubFrLen; i++, j++)
  423. y[i] = y[i] - h[j];
  424. }
  425. if(ip3<SubFrLen)
  426. {
  427. if(i3 > 0)
  428. for(i=ip3, j=0; i<SubFrLen; i++, j++)
  429. y[i] = y[i] + h[j];
  430. else
  431. for(i=ip3, j=0; i<SubFrLen; i++, j++)
  432. y[i] = y[i] - h[j];
  433. }
  434. // find codebook index; 17-bit address
  435. *code_shift = shif;
  436. *sign = 0;
  437. if(i0 > 0) *sign += 1;
  438. if(i1 > 0) *sign += 2;
  439. if(i2 > 0) *sign += 4;
  440. if(i3 > 0) *sign += 8;
  441. i = ((ip3 >> 3) << 9) + ((ip2 >> 3) << 6) + ((ip1 >> 3) << 3) + (ip0 >> 3);
  442. //TIMER_STAMP(d);
  443. return i;
  444. }
  445. //--------------------------------------------------------------------
  446. int G_code(float X[], float Y[], float *gain_q)
  447. {
  448. int i;
  449. float xy, yy, gain_nq;
  450. int gain;
  451. float dist, dist_min;
  452. // Compute scalar product <X[],Y[]>
  453. xy = DotProd(X,Y,SubFrLen);
  454. // Be sure xy < yy
  455. if(xy <= 0)
  456. {
  457. gain = 0;
  458. *gain_q =FcbkGainTable[gain];
  459. return(gain);
  460. }
  461. // Compute scalar product <Y[],Y[]>
  462. yy = DotProd(Y,Y,SubFrLen);
  463. if (yy != 0.0f)
  464. gain_nq = xy/yy * 0.5f;
  465. else
  466. gain_nq = 0.0f;
  467. gain = 0;
  468. dist_min = (float)fabs(gain_nq - FcbkGainTable[0]);
  469. for (i=1; i <NumOfGainLev; i++)
  470. {
  471. dist = (float)fabs(gain_nq - FcbkGainTable[i]);
  472. if (dist < dist_min)
  473. {
  474. dist_min = dist;
  475. gain = i;
  476. }
  477. }
  478. *gain_q = FcbkGainTable[gain];
  479. return(gain);
  480. }
  481. //-------------------------------------------------------------------
  482. // Search the optimum positions of the four pulses which maximize
  483. // square(correlation) / energy
  484. // The search is performed in four nested loops. At each loop, one
  485. // pulse contribution is added to the correlation and energy.
  486. //
  487. // The fourth loop is entered only if the correlation due to the
  488. // contribution of the first three pulses exceeds the preset
  489. // threshold.
  490. //-------------------------------------------------------------------
  491. void fourPulseFlt (float *rr, float *Dn, float thres, int ip[], int *shifPtr){
  492. // Default values
  493. int ip0 = 0;
  494. int ip1 = 2;
  495. int ip2 = 4;
  496. int ip3 = 6;
  497. int shif = 0;
  498. int i0, i1, i2;
  499. int k, time;
  500. int shift, bestk, lasti2, inc;
  501. float psc = 0.0f;
  502. float alpha = 1.0f;
  503. float ps0, ps1, ps2, alp0;
  504. float alp1, alp2;
  505. float ps0a, ps1a, ps2a;
  506. float *ptr_ri0i0,*ptr_ri1i1,*ptr_ri2i2;
  507. float *ptr_ri0i1,*ptr_ri0i2,*ptr_ri0i3;
  508. float *ptr_ri1i2,*ptr_ri1i3,*ptr_ri2i3;
  509. float *rri0i0,*rri1i1,*rri2i2,*rri3i3;
  510. float *rri0i1,*rri0i2,*rri0i3;
  511. float *rri1i2,*rri1i3,*rri2i3;
  512. float a[16];
  513. float t1,t2,*pntr;
  514. float dmax4, dmax5, dmax2, dmax3; //used for bypass
  515. #if !OPT_PULSE4
  516. int i3;
  517. float ps3;
  518. #endif
  519. time = max_time + extra;
  520. // Four loops to search innovation code.
  521. // Init. pointers that depend on first loop
  522. rri0i0 = rr;
  523. rri1i1 = rri0i0 + NB_POS;
  524. rri2i2 = rri1i1 + NB_POS;
  525. rri3i3 = rri2i2 + NB_POS;
  526. rri0i1 = rri3i3 + NB_POS;
  527. rri0i2 = rri0i1 + MSIZE;
  528. rri0i3 = rri0i2 + MSIZE;
  529. rri1i2 = rri0i3 + MSIZE;
  530. rri1i3 = rri1i2 + MSIZE;
  531. rri2i3 = rri1i3 + MSIZE;
  532. ptr_ri0i0 = rri0i0;
  533. ptr_ri0i1 = rri0i1;
  534. ptr_ri0i2 = rri0i2;
  535. ptr_ri0i3 = rri0i3;
  536. // Compute the Dn max's
  537. dmax2 = dmax3 = dmax4 = dmax5 = -1000000.0f; //i.e., large negative number
  538. for (k = 2; k<SubFrLen2; k+=STEP)
  539. {
  540. if (Dn[k] > dmax2) dmax2 = Dn[k];
  541. if (Dn[k+1] > dmax3) dmax3 = Dn[k+1];
  542. if (Dn[k+2] > dmax4) dmax4 = Dn[k+2];
  543. if (Dn[k+3] > dmax5) dmax5 = Dn[k+3];
  544. }
  545. // first pulse loop
  546. for (i0=0; i0 < SubFrLen; i0 +=STEP)
  547. {
  548. ps0 = Dn[i0];
  549. ps0a = Dn[i0+1];
  550. alp0 = *ptr_ri0i0++;
  551. // Init. pointers that depand on second loop
  552. ptr_ri1i1 = rri1i1;
  553. ptr_ri1i2 = rri1i2;
  554. ptr_ri1i3 = rri1i3;
  555. ps1 = ps0 + dmax2 + dmax4;
  556. ps1a = ps0a + dmax3 + dmax5;
  557. if (asint(ps1) < asint(thres) && asint(ps1a) < asint(thres))
  558. {
  559. ptr_ri0i1 += NB_POS;
  560. goto skipsecond;
  561. }
  562. // second pulse loop
  563. for (i1=2; i1 < SubFrLen; i1 +=STEP)
  564. {
  565. ps1 = ps0 + Dn[i1];
  566. ps1a = ps0a + Dn[i1+1];
  567. alp1 = alp0 + *ptr_ri1i1++ + *ptr_ri0i1++;
  568. // Init. pointers that depend on third loop
  569. ptr_ri2i2 = rri2i2;
  570. ptr_ri2i3 = rri2i3;
  571. lasti2 = 4;
  572. ps2 = ps1 + dmax4;
  573. ps2a = ps1a + dmax5;
  574. if (asint(ps2) < asint(thres) && asint(ps2a) < asint(thres))
  575. {
  576. i2 = 68;
  577. goto skipthird;
  578. }
  579. // third pulse loop
  580. for (i2 = 4; i2 < SubFrLen2; i2 +=STEP)
  581. {
  582. ps2 = ps1 + Dn[i2];
  583. ps2a = ps1a + Dn[i2+1];
  584. // Threshold test and 4th pulse loop. Since the probability of
  585. // entering this is low, we cram as much of the 3rd-pulse-loop
  586. // logic inside the threshold test. So the computation of shift,
  587. // the choice of ps2 vs ps2a, the computation of alp2, and the
  588. // incrementing of the 02,12,22 pointers are all done there.
  589. if (asint(ps2) > asint(thres) || asint(ps2a) > asint(thres))
  590. {
  591. shift = 0;
  592. if(asint(ps2a) > asint(ps2))
  593. {
  594. shift = 1;
  595. ps2 = ps2a;
  596. }
  597. inc = (i2 - lasti2) >> 3;
  598. lasti2 = i2;
  599. ptr_ri0i2 += inc;
  600. ptr_ri1i2 += inc;
  601. ptr_ri2i2 += inc;
  602. alp2 = alp1 + *ptr_ri2i2 + *ptr_ri0i2 + *ptr_ri1i2;
  603. pntr = &Dn[6+shift];
  604. #if OPT_PULSE4
  605. ASM
  606. {
  607. push esi;
  608. push ebx;
  609. mov esi,pntr;
  610. ;// First half of first loop
  611. fld DP [esi+4*8*0];
  612. fld DP [esi+4*8*1];
  613. fld DP [esi+4*8*2];
  614. fld DP [esi+4*8*3];
  615. fxch ST(3);
  616. fadd ps2;
  617. fxch ST(2);
  618. fadd ps2;
  619. fxch ST(1);
  620. fadd ps2;
  621. fxch ST(3);
  622. fadd ps2;
  623. fxch ST(2);
  624. fmul ST,ST(0);
  625. fxch ST(1);
  626. fmul ST,ST(0);
  627. fxch ST(3);
  628. fmul ST,ST(0);
  629. fxch ST(2);
  630. fmul ST,ST(0);
  631. fxch ST(1);
  632. fstp a[4*8];
  633. fxch ST(2);
  634. fstp a[4*9];
  635. fstp a[4*10];
  636. fstp a[4*11];
  637. ;// Second half of first loop
  638. fld DP [esi+4*8*4];
  639. fld DP [esi+4*8*5];
  640. fld DP [esi+4*8*6];
  641. fld DP [esi+4*8*7];
  642. fxch ST(3);
  643. fadd ps2;
  644. fxch ST(2);
  645. fadd ps2;
  646. fxch ST(1);
  647. fadd ps2;
  648. fxch ST(3);
  649. fadd ps2;
  650. fxch ST(2);
  651. fmul ST,ST(0);
  652. fxch ST(1);
  653. fmul ST,ST(0);
  654. fxch ST(3);
  655. fmul ST,ST(0);
  656. fxch ST(2);
  657. fmul ST,ST(0);
  658. fxch ST(1);
  659. fstp a[4*12];
  660. fxch ST(2);
  661. fstp a[4*13];
  662. fstp a[4*14];
  663. fstp a[4*15];
  664. ;// First half of second loop
  665. mov eax,rri3i3;
  666. mov ebx,ptr_ri0i3;
  667. mov ecx,ptr_ri1i3;
  668. mov edx,ptr_ri2i3;
  669. fld alp2;
  670. fld alp2;
  671. fld alp2;
  672. fld alp2;
  673. fxch ST(3);
  674. fadd DP [eax+4*0];
  675. fxch ST(2);
  676. fadd DP [eax+4*1];
  677. fxch ST(1);
  678. fadd DP [eax+4*2];
  679. fxch ST(3);
  680. fadd DP [eax+4*3];
  681. fxch ST(2);
  682. fadd DP [ebx+4*0];
  683. fxch ST(1);
  684. fadd DP [ebx+4*1];
  685. fxch ST(3);
  686. fadd DP [ebx+4*2];
  687. fxch ST(2);
  688. fadd DP [ebx+4*3];
  689. fxch ST(1);
  690. fadd DP [ecx+4*0];
  691. fxch ST(3);
  692. fadd DP [ecx+4*1];
  693. fxch ST(2);
  694. fadd DP [ecx+4*2];
  695. fxch ST(1);
  696. fadd DP [ecx+4*3];
  697. fxch ST(3);
  698. fadd DP [edx+4*0];
  699. fxch ST(2);
  700. fadd DP [edx+4*1];
  701. fxch ST(1);
  702. fadd DP [edx+4*2];
  703. fxch ST(3);
  704. fadd DP [edx+4*3];
  705. fxch ST(2);
  706. fstp a[4*0];
  707. fstp a[4*1];
  708. fxch ST(1);
  709. fstp a[4*2];
  710. fstp a[4*3];
  711. ;// Second half of second loop
  712. fld alp2;
  713. fld alp2;
  714. fld alp2;
  715. fld alp2;
  716. fxch ST(3);
  717. fadd DP [eax+4*4];
  718. fxch ST(2);
  719. fadd DP [eax+4*5];
  720. fxch ST(1);
  721. fadd DP [eax+4*6];
  722. fxch ST(3);
  723. fadd DP [eax+4*7];
  724. fxch ST(2);
  725. fadd DP [ebx+4*4];
  726. fxch ST(1);
  727. fadd DP [ebx+4*5];
  728. fxch ST(3);
  729. fadd DP [ebx+4*6];
  730. fxch ST(2);
  731. fadd DP [ebx+4*7];
  732. fxch ST(1);
  733. fadd DP [ecx+4*4];
  734. fxch ST(3);
  735. fadd DP [ecx+4*5];
  736. fxch ST(2);
  737. fadd DP [ecx+4*6];
  738. fxch ST(1);
  739. fadd DP [ecx+4*7];
  740. fxch ST(3);
  741. fadd DP [edx+4*4];
  742. fxch ST(2);
  743. fadd DP [edx+4*5];
  744. fxch ST(1);
  745. fadd DP [edx+4*6];
  746. fxch ST(3);
  747. fadd DP [edx+4*7];
  748. fxch ST(2);
  749. fstp a[4*4];
  750. fstp a[4*5];
  751. fxch ST(1);
  752. fstp a[4*6];
  753. fstp a[4*7];
  754. pop ebx;
  755. pop esi;
  756. }
  757. #else
  758. for (k=0; k<8; k++)
  759. {
  760. ps3 = ps2 + *pntr;
  761. pntr += STEP;
  762. a[k+8] = ps3 * ps3;
  763. }
  764. for (k=0; k<8; k++)
  765. a[k] = alp2 + rri3i3[k] + ptr_ri0i3[k] + ptr_ri1i3[k] + ptr_ri2i3[k];
  766. #endif
  767. bestk = -1;
  768. for (k=0; k<8; k++)
  769. {
  770. t1 = a[k+8] * alpha;
  771. t2 = psc * a[k];
  772. if (asint(t1) > asint(t2))
  773. {
  774. psc = a[k+8];
  775. alpha = a[k];
  776. bestk = k;
  777. }
  778. }
  779. if (bestk >= 0)
  780. {
  781. ip0 = i0;
  782. ip1 = i1;
  783. ip2 = i2;
  784. ip3 = 6 + (bestk << 3);
  785. shif = shift;
  786. //#define t32 4294967296.0f
  787. // printf(" %3d %3d %3d %3d %d %f %f %f\n",ip0,ip1,ip2,ip3,shift,psc/thres/thres,alpha/thres,(float)psc/(float)alpha/thres);
  788. }
  789. time--;
  790. if(time <= 0)
  791. goto end_search;
  792. }
  793. ptr_ri2i3 += NB_POS;
  794. }
  795. skipthird:
  796. inc = (i2 - lasti2) >> 3;
  797. ptr_ri0i2 += inc;
  798. ptr_ri1i2 += inc;
  799. ptr_ri2i2 += inc;
  800. // end of for i2 =
  801. ptr_ri0i2 -= NB_POS;
  802. ptr_ri1i3 += NB_POS;
  803. }
  804. skipsecond:
  805. // end of for i1 =
  806. ptr_ri0i2 += NB_POS;
  807. ptr_ri0i3 += NB_POS;
  808. }
  809. // end of for i0 =
  810. end_search:
  811. extra = time;
  812. ip[0] = ip0;
  813. ip[1] = ip1;
  814. ip[2] = ip2;
  815. ip[3] = ip3;
  816. *shifPtr = shif;
  817. return;
  818. }