Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

505 lines
11 KiB

  1. //cb63.c - 6.3 rate codebook code
  2. #include "opt.h"
  3. #include <windows.h>
  4. #include <stdlib.h>
  5. #include <stdio.h>
  6. #include <math.h>
  7. #include <memory.h>
  8. #include "typedef.h"
  9. #include "cst_lbc.h"
  10. #include "tab_lbc.h"
  11. #include "util_lbc.h"
  12. #include "exc_lbc.h"
  13. #include "timer.h"
  14. #include "mmxutil.h"
  15. //-------------------------------------------------------
  16. void Gen_Trn(float *Dst, float *Src, int Olp)
  17. {
  18. int i;
  19. int Tmp0;
  20. float Tmp[SubFrLen];
  21. Tmp0 = Olp;
  22. for (i=0; i < SubFrLen; i++)
  23. {
  24. Tmp[i] = Src[i];
  25. Dst[i] = Src[i];
  26. }
  27. while (Tmp0 < SubFrLen)
  28. {
  29. for (i=Tmp0; i < SubFrLen; i++)
  30. Dst[i] += Tmp[i-Tmp0];
  31. Tmp0 += Olp;
  32. }
  33. }
  34. //------------------------------------------------------------------------
  35. int Find_L(float *OccPos, float *ImrCorr, float *WrkBlk, float Pamp, int k)
  36. {
  37. #if FT_FINDL
  38. //====== New version using FT trick that removes OccPos test ======
  39. #if FIND_L_OPT
  40. int best;
  41. float max = -32768.0f;
  42. float tmp0,tmp1,tmp2,tmp3,tmp4;
  43. // Simply interleave 5 copies of the inner loop. Since we step
  44. // by 2, this means we do the 60 samples in chunks of 10.
  45. ASM
  46. {
  47. mov edi,WrkBlk;
  48. mov edx,ImrCorr;
  49. mov ecx,k;
  50. loop1:
  51. fld DP[edx+4*ecx+4*0];
  52. fmul Pamp;
  53. fld DP[edx+4*ecx+4*2];
  54. fmul Pamp;
  55. fld DP[edx+4*ecx+4*4];
  56. fmul Pamp;
  57. fld DP[edx+4*ecx+4*6];
  58. fmul Pamp;
  59. fld DP[edx+4*ecx+4*8];// 4 3 2 1 0
  60. fmul Pamp;
  61. fxch ST(4); // 0 3 2 1 4
  62. fsubr DP[edi+4*ecx+4*0];
  63. fxch ST(3); // 1 3 2 0 4
  64. fsubr DP[edi+4*ecx+4*2];
  65. fxch ST(2); // 2 3 1 0 4
  66. fsubr DP[edi+4*ecx+4*4];
  67. fxch ST(1); // 3 2 1 0 4
  68. fsubr DP[edi+4*ecx+4*6];
  69. fxch ST(4); // 4 2 1 0 3
  70. fsubr DP[edi+4*ecx+4*8];
  71. fxch ST(3); // 0 2 1 4 3
  72. fst DP[edi+4*ecx+4*0];
  73. fxch ST(2); // 1 2 0 4 3
  74. fst DP[edi+4*ecx+4*2];
  75. fxch ST(1); // 2 1 0 4 3
  76. fst DP[edi+4*ecx+4*4];
  77. fxch ST(4); // 3 1 0 4 2
  78. fst DP[edi+4*ecx+4*6];
  79. fxch ST(3); // 4 1 0 3 2
  80. fst DP[edi+4*ecx+4*8];
  81. fxch ST(2); // 0 1 4 3 2
  82. fabs;
  83. fxch ST(1); // 1 0 4 3 2
  84. fabs;
  85. fxch ST(4); // 2 0 4 3 1
  86. fabs;
  87. fxch ST(3); // 3 0 4 2 1
  88. fabs;
  89. fxch ST(2); // 4 0 3 2 1
  90. fabs;
  91. fxch ST(1); // 0 4 3 2 1
  92. fstp tmp0; // 4 3 2 1
  93. fxch ST(3); // 1 3 2 4
  94. fstp tmp1; // 3 2 4
  95. fxch ST(1); // 2 3 4
  96. fstp tmp2;
  97. fstp tmp3;
  98. fstp tmp4;
  99. mov eax,tmp0;
  100. mov ebx,max;
  101. cmp eax,ebx;
  102. jle skip0;
  103. mov max,eax;
  104. mov best,ecx;
  105. skip0:
  106. mov eax,tmp1;
  107. mov ebx,max;
  108. cmp eax,ebx;
  109. jle skip1;
  110. lea esi,[ecx+2];
  111. mov max,eax;
  112. mov best,esi;
  113. skip1:
  114. mov eax,tmp2;
  115. mov ebx,max;
  116. cmp eax,ebx;
  117. jle skip2;
  118. lea esi,[ecx+4];
  119. mov max,eax;
  120. mov best,esi;
  121. skip2:
  122. mov eax,tmp3;
  123. mov ebx,max;
  124. cmp eax,ebx;
  125. jle skip3;
  126. lea esi,[ecx+6];
  127. mov max,eax;
  128. mov best,esi;
  129. skip3:
  130. mov eax,tmp4;
  131. mov ebx,max;
  132. cmp eax,ebx;
  133. jle skip4;
  134. lea esi,[ecx+8];
  135. mov max,eax;
  136. mov best,esi;
  137. skip4:
  138. add ecx,10;
  139. cmp ecx,SubFrLen;
  140. jl loop1;
  141. }
  142. #else
  143. int best;
  144. float max = -32768.0f,tmp;
  145. while (k < SubFrLen)
  146. {
  147. WrkBlk[k] = WrkBlk[k] - Pamp*ImrCorr[k];
  148. tmp = (float) fabs(WrkBlk[k]);
  149. // printf("k %2d tmp %10.2f max %10.2f\n",k,tmp,max);
  150. if (asint(tmp) > asint(max))
  151. {
  152. max = tmp;
  153. best = k;
  154. }
  155. k += Sgrid;
  156. }
  157. #endif
  158. #else
  159. //==================================================================
  160. // Old version of Find_L
  161. int best;
  162. float max = -32768.0f,tmp;
  163. #if FIND_L_OPT
  164. // Because of the (if OccPos[k]) clause, this code is difficult
  165. // to pipeline. We could do a complicated pipeline job, but that
  166. // would require computing most of WrkBlk[k] = WrkBlk[k] - Pamp*ImrCorr[k]
  167. // whether or not OccPos[k] was 0. Alternatively, we can just do
  168. // one iteration at a time, in which case we can avoid more of that computation
  169. // when OccPos[k] is not 0, but we pay a penalty in that computing it once
  170. // is slower due to stalls. Since there isn't much difference between these
  171. // two approaches, we choose the second one since the code is so much
  172. // simpler. Loop control is only 2 clocks, so we don't even bother to unroll.
  173. ASM
  174. {
  175. mov esi,OccPos;
  176. mov edi,WrkBlk;
  177. mov edx,ImrCorr;
  178. mov ecx,k;
  179. loop1:
  180. fld DP[edx+4*ecx]; // start this here so fsubr below doesn't stall
  181. fmul Pamp;
  182. mov eax,DP[esi+4*ecx];
  183. test eax,07fffffffh;
  184. jne next1; // but if this is taken we have to pop FP stack once
  185. fsubr DP[edi+4*ecx];
  186. fld ST(0);
  187. fabs;
  188. fstp tmp; // save store of non-absolute-value for later
  189. mov eax,tmp;
  190. mov ebx,max;
  191. cmp eax,ebx;
  192. jle skip1;
  193. mov max,eax;
  194. mov best,ecx;
  195. skip1:
  196. fstp DP[edi+4*ecx]; // store new WrkBlk value
  197. add ecx,2;
  198. cmp ecx,SubFrLen;
  199. jl loop1;
  200. jmp endit;
  201. next1:
  202. faddp ST(0),ST; // get rid of value on top of stack
  203. add ecx,2;
  204. cmp ecx,SubFrLen;
  205. jl loop1;
  206. endit:
  207. }
  208. #else
  209. while (k < SubFrLen)
  210. {
  211. if (OccPos[k] == 0.0f)
  212. {
  213. WrkBlk[k] = WrkBlk[k] - Pamp*ImrCorr[k];
  214. tmp = (float) fabs(WrkBlk[k]);
  215. if (asint(tmp) > asint(max))
  216. {
  217. max = tmp;
  218. best = k;
  219. }
  220. }
  221. k += Sgrid;
  222. }
  223. #endif
  224. #endif
  225. // printf("best = %d\n",best);
  226. // printaff("WrkBlk",WrkBlk,60);
  227. return(best);
  228. }
  229. //------------------------------------------------------------------------
  230. void Find_Best(BESTDEF *Best, float *Tv, float *ImpResp,int Np,int Olp)
  231. {
  232. int i,j,k,l,n,ip;
  233. BESTDEF Temp;
  234. int MaxAmpId,flag=0;
  235. float MaxAmp;
  236. float Acc0,Acc1,Acc2,amp;
  237. float Imr[SubFrLen];
  238. float OccPos[SubFrLen];
  239. float ImrCorr[2*SubFrLen]; // see comment below
  240. float ErrBlk[SubFrLen];
  241. float WrkBlk[SubFrLen];
  242. // A trick is used here to simplify Find_L. The original Find_L
  243. // accessed ImrCorr[abs(k)]. In order to simplify this to ImrCorr[k],
  244. // we double the size of the ImrCorr array, offset the elements with
  245. // non-negative indices by SubFrLen, and then duplicate them in
  246. // reverse order in the first half of the array. This affects the
  247. // way ImrCorr is addressed in this routine also.
  248. //Update Impulse responce
  249. if (Olp < (SubFrLen-2))
  250. {
  251. Temp.UseTrn = 1;
  252. Gen_Trn(Imr, ImpResp, Olp);
  253. }
  254. else
  255. {
  256. Temp.UseTrn = 0;
  257. for (i = 0; i < SubFrLen; i++)
  258. Imr[i] = ImpResp[i];
  259. }
  260. //Search for the best sequence
  261. for (k=0; k < Sgrid; k++)
  262. {
  263. Temp.GridId = k;
  264. //Find maximum amplitude
  265. Acc1 = 0.0f;
  266. for (i=k; i < SubFrLen; i +=Sgrid)
  267. {
  268. OccPos[i] = Imr[i];
  269. ImrCorr[SubFrLen+i] = DotProd(&Imr[i],Imr,SubFrLen-i) * 2.0f;
  270. Acc0 = (float) fabs(ErrBlk[i]=DotProd(&Tv[i],Imr,SubFrLen-i));
  271. if (Acc0 >= Acc1)
  272. {
  273. Acc1 = Acc0;
  274. Temp.Ploc[0] = i;
  275. }
  276. }
  277. for (i=1; i<SubFrLen; i++)
  278. ImrCorr[i] = ImrCorr[2*SubFrLen-i];
  279. //Quantize the maximum amplitude
  280. Acc2 = Acc1;
  281. Acc1 = 32767.0f;
  282. MaxAmpId = (NumOfGainLev - MlqSteps);
  283. for (i=MaxAmpId; i >= MlqSteps; i--)
  284. {
  285. Acc0 = (float) fabs(FcbkGainTable[i]*ImrCorr[SubFrLen] - Acc2);
  286. if (Acc0 < Acc1)
  287. {
  288. Acc1 = Acc0;
  289. MaxAmpId = i;
  290. }
  291. }
  292. MaxAmpId --;
  293. for (i=1; i <=2*MlqSteps; i++)
  294. {
  295. for (j=k; j < SubFrLen; j +=Sgrid)
  296. {
  297. WrkBlk[j] = ErrBlk[j];
  298. OccPos[j] = 0.0f;
  299. }
  300. Temp.MampId = MaxAmpId - MlqSteps + i;
  301. MaxAmp = FcbkGainTable[Temp.MampId];
  302. if (WrkBlk[Temp.Ploc[0]] >= 0.0f)
  303. Temp.Pamp[0] = MaxAmp;
  304. else
  305. Temp.Pamp[0] = -MaxAmp;
  306. OccPos[Temp.Ploc[0]] = 1.0f;
  307. for (j=1; j < Np; j++)
  308. {
  309. #if FT_FINDL
  310. for (ip=0; ip<j; ip++)
  311. WrkBlk[Temp.Ploc[ip]] = Temp.Pamp[j-1]*
  312. ImrCorr[SubFrLen + Temp.Ploc[ip] - Temp.Ploc[j-1]];
  313. #endif
  314. Temp.Ploc[j] = Find_L(OccPos,&ImrCorr[SubFrLen-Temp.Ploc[j-1]],WrkBlk,
  315. Temp.Pamp[j-1],k);
  316. if (WrkBlk[Temp.Ploc[j]] >= 0.0f)
  317. Temp.Pamp[j] = MaxAmp;
  318. else
  319. Temp.Pamp[j] = -MaxAmp;
  320. OccPos[Temp.Ploc[j]] = 1.0f;
  321. }
  322. //Compute error vector
  323. #if FT_FBFILT
  324. // FT/CNET's trick #6, for reducing computation of filtered codeword
  325. for (j=0; j < SubFrLen; j++)
  326. OccPos[j] = 0.0f;
  327. for (j=0; j<Np; j++)
  328. {
  329. // Extra sub-trick we added: since pulse positions are either all
  330. // even or all odd, there's a natural two-ness in the inner loop,
  331. // so we unroll two times.
  332. amp = Temp.Pamp[j];
  333. l = 0;
  334. for (n=Temp.Ploc[j]; n<SubFrLen-k; n+=2)
  335. {
  336. OccPos[n] += amp*Imr[l];
  337. OccPos[n+1] += amp*Imr[l+1];
  338. l += 2;
  339. }
  340. if (k)
  341. OccPos[n] += amp*Imr[l];
  342. }
  343. #else
  344. for (j=0; j < SubFrLen; j++)
  345. OccPos[j] = 0.0f;
  346. for (j=0; j < Np; j++)
  347. OccPos[Temp.Ploc[j]] = Temp.Pamp[j];
  348. for (l=SubFrLen-1; l >= 0; l--)
  349. OccPos[l] = DotRev(OccPos,Imr,l+1);
  350. #endif
  351. //Evaluate error
  352. Acc2 = DotProd(Tv,OccPos,SubFrLen) - DotProd(OccPos,OccPos,SubFrLen);
  353. if (Acc2 > (*Best).MaxErr)
  354. {
  355. flag = 1;
  356. (*Best).MaxErr = Acc2;
  357. (*Best).GridId = Temp.GridId;
  358. (*Best).MampId = Temp.MampId;
  359. (*Best).UseTrn = Temp.UseTrn;
  360. for (j = 0; j < Np; j++)
  361. {
  362. (*Best).Pamp[j] = Temp.Pamp[j];
  363. (*Best).Ploc[j] = Temp.Ploc[j];
  364. }
  365. }
  366. }
  367. }
  368. #ifdef DEBUG
  369. if (flag == 0)
  370. {
  371. // this code is for tracking a rare condition in which
  372. // the above loop never get executed (Best is left uninitialized)
  373. DebugBreak();
  374. }
  375. #endif
  376. return;
  377. }
  378. void Fcbk_Pack(float *Dpnt, SFSDEF *Sfs, BESTDEF *Best, int Np)
  379. {
  380. int i,j;
  381. //Code the amplitudes and positions
  382. j = MaxPulseNum - Np;
  383. (*Sfs).Pamp = 0;
  384. (*Sfs).Ppos = 0;
  385. for (i=0; i < SubFrLen/Sgrid; i++)
  386. {
  387. if (Dpnt[(*Best).GridId + Sgrid*i] == 0)
  388. (*Sfs).Ppos = (*Sfs).Ppos + CombinatorialTable[j][i];
  389. else {
  390. (*Sfs).Pamp = (*Sfs).Pamp << 1;
  391. if (Dpnt[(*Best).GridId + Sgrid*i] < 0)
  392. (*Sfs).Pamp++;
  393. j++;
  394. //Check for end
  395. if (j == MaxPulseNum)
  396. break;
  397. }
  398. }
  399. (*Sfs).Mamp = (*Best).MampId;
  400. (*Sfs).Grid = (*Best).GridId;
  401. (*Sfs).Tran = (*Best).UseTrn;
  402. return;
  403. }