Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1841 lines
37 KiB

  1. //
  2. // ITU-T G.723 Floating Point Speech Coder ANSI C Source Code. Version 1.00
  3. // copyright (c) 1995, AudioCodes, DSP Group, France Telecom,
  4. // Universite de Sherbrooke, Intel Corporation. All rights reserved.
  5. //
  6. #include "opt.h"
  7. #include <stdlib.h>
  8. #include <stdio.h>
  9. #include <math.h>
  10. #include <memory.h>
  11. #include "typedef.h"
  12. #include "cst_lbc.h"
  13. #include "tab_lbc.h"
  14. #include "util_lbc.h"
  15. #include "exc_lbc.h"
  16. #include "timer.h"
  17. #include "mmxutil.h"
  18. // This file contains pitch and excitation related functions.
  19. //------------------------------------------------------
  20. #if COMPILE_MMX
  21. int Estim_Int(float *Dpnt, int Start)
  22. {
  23. int i,k;
  24. #define NCOR (PitchMax+1-PitchMin) // = 128 (rounded up to mult of 4)
  25. #define NTAPS (2*SubFrLen+12) // = 132
  26. int Pr,Indx = PitchMin;
  27. float MaxE = 1.0f;
  28. float MaxC = 0.0f;
  29. float E,C2,E2,Diff;
  30. int corr[NCOR];
  31. typedef struct
  32. {
  33. short taps[4][NTAPS]; //** These two arrays need
  34. short temp[PitchMax-3+2*SubFrLen]; //** to be 8-byte aligned
  35. double foo;
  36. } EstimStruct;
  37. EstimStruct est,*e;
  38. e = &est;
  39. ASM
  40. {
  41. mov eax,e;
  42. add e,7;
  43. and e,0fffffff8h;
  44. mov e,eax;
  45. }
  46. // Convert just the necessary portion of Dpnt to 16-bit integers,
  47. // store the result in 'temp'. 4 guard bits are needed since the
  48. // correlations are length 120, which means 7 guard bits are needed.
  49. // So we use 4 so that 4+4=8 guard bits are present in the product.
  50. FloatToShortScaled(&Dpnt[Start-PitchMax+3],e->temp,PitchMax-3+2*SubFrLen,4);
  51. MakeAligned4(&e->temp[PitchMax-3],e->taps[0],2*NTAPS);
  52. MakeAligned2(&e->temp[PitchMax-4],e->taps[1],2*NTAPS);
  53. MakeAligned0(&e->temp[PitchMax-5],e->taps[2],2*NTAPS);
  54. MakeAligned6(&e->temp[PitchMax-6],e->taps[3],2*NTAPS);
  55. for (i=0; i<4; i++)
  56. {
  57. for (k=0; k<i; k++)
  58. e->taps[i][k] = 0;
  59. for (k=NTAPS-12+i; k<NTAPS; k++)
  60. e->taps[i][k] = 0;
  61. }
  62. // Compute cross-correlations, store in corr[] array
  63. CorrelateInt4(e->taps[0],e->temp,corr,NTAPS-12,NCOR>>2);
  64. for (i=1; i<4; i++)
  65. CorrelateInt4(e->taps[i],e->temp,&corr[i],NTAPS,NCOR>>2);
  66. // Now do the actual pitch search.
  67. Pr = Start - PitchMin;
  68. k = PitchMax-PitchMin-3;
  69. E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen); // first energy value
  70. for (i=0; i < (PitchMax-2-PitchMin); i++)
  71. {
  72. // Update energy
  73. E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];
  74. // Check for new maximum
  75. if (corr[k] > 0)
  76. {
  77. C2 = ((float)corr[k]) * ((float)corr[k]);
  78. E2 = C2*MaxE;
  79. Diff = (E2 - E*MaxC)*4.0f;
  80. if (asint(Diff) > asint(E2) || (Diff > 0.0f && ((i - Indx) < PitchMin)))
  81. {
  82. Indx = i;
  83. MaxE = E;
  84. MaxC = C2;
  85. }
  86. }
  87. Pr--;
  88. k--;
  89. }
  90. return(Indx+PitchMin);
  91. }
  92. #endif
  93. //------------------------------------------------------
  94. int Estim_Pitch(float *Dpnt, int Start)
  95. {
  96. int i;
  97. int Pr,Indx = PitchMin;
  98. float MaxE = 1.0f;
  99. float MaxC = 0.0f;
  100. float E,C,C2,E2,Diff;
  101. int *dptr,*eptr;
  102. dptr = (int *)&Diff;
  103. eptr = (int *)&E2;
  104. Pr = Start - PitchMin + 1;
  105. // Init the energy estimate
  106. E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen);
  107. // Main Open loop pitch search loop
  108. for (i=PitchMin; i <= PitchMax-3; i++)
  109. {
  110. Pr--;
  111. // Update energy, compute cross
  112. C = DotProd(&Dpnt[Start],&Dpnt[Pr],2*SubFrLen);
  113. E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];
  114. C2 = C*C;
  115. // Check for new maximum
  116. E2 = C2*MaxE;
  117. Diff = (E2 - E*MaxC)*4.0f;
  118. if (C > 0.0f && E > 0.0f &&
  119. (*dptr > *eptr || (Diff > 0.0f && ((i - Indx) < PitchMin))))
  120. {
  121. Indx = i;
  122. MaxE = E;
  123. MaxC = C2;
  124. }
  125. }
  126. return Indx;
  127. }
  128. //------------------------------------------------------
  129. PWDEF Comp_Pw(float *Dpnt, int Start, int Olp)
  130. {
  131. int i,k;
  132. float Energy,C,E,C2,MaxE,MaxC2,MaxC,Gopt;
  133. PWDEF Pw;
  134. // Compute target energy
  135. Energy = DotProd(&Dpnt[Start],&Dpnt[Start],SubFrLen);
  136. // Find position with maximum C2/E value
  137. MaxE = 1.0f;
  138. MaxC = 0.0f;
  139. MaxC2 = 0.0f;
  140. Pw.Indx = -1;
  141. Pw.Gain = 0.0f;
  142. k = Start - (Olp-PwRange);
  143. E = DotProd(&Dpnt[k],&Dpnt[k],SubFrLen);
  144. for (i=0; i <= 2*PwRange; i++)
  145. {
  146. C = DotProd(&Dpnt[Start],&Dpnt[k],SubFrLen);
  147. if (E > 0.0f && C > 0.0f)
  148. {
  149. C2 = C*C;
  150. if (C2*MaxE > E*MaxC2)
  151. {
  152. Pw.Indx = i;
  153. MaxE = E;
  154. MaxC = C;
  155. MaxC2 = C2;
  156. }
  157. }
  158. k--;
  159. if (k < 0)
  160. {
  161. break;
  162. }
  163. E = E - Dpnt[k+SubFrLen]*Dpnt[k+SubFrLen] + Dpnt[k]*Dpnt[k];
  164. }
  165. if (Pw.Indx == -1)
  166. {
  167. Pw.Indx = Olp;
  168. return Pw;
  169. }
  170. Pw.Gain = 0.0f;
  171. if (MaxC2 > MaxE*Energy*0.375f)
  172. {
  173. if (MaxC > MaxE || MaxE == 0.0f)
  174. Gopt = 1.0f;
  175. else
  176. Gopt = (float) fabs(MaxC)/MaxE;
  177. Pw.Gain = 0.3125f*Gopt;
  178. }
  179. Pw.Indx = Olp - PwRange + Pw.Indx;
  180. return Pw;
  181. }
  182. //--------------------------------------------------------------
  183. void Filt_Pw(float *DataBuff, float *Dpnt, int Start, PWDEF Pw)
  184. {
  185. int i;
  186. // Perform the harmonic weighting
  187. for (i=0; i < SubFrLen; i++)
  188. DataBuff[Start+i] = Dpnt[PitchMax+Start+i] -
  189. Pw.Gain*Dpnt[PitchMax+Start-Pw.Indx+i];
  190. }
  191. //-----------------------------------------------------------------
  192. void Find_Fcbk(float *Dpnt, float *ImpResp, LINEDEF *Line, int Sfc, enum Crate WrkRate, int flags, int UseMMX)
  193. {
  194. int i;
  195. int Srate,T0_acelp;
  196. float gain_T0;
  197. BESTDEF Best = {0};
  198. switch(WrkRate)
  199. {
  200. case Rate63:
  201. Srate = Nb_puls[Sfc];
  202. Best.MaxErr = -99999999.9f;
  203. if (flags & SC_FINDB)
  204. {
  205. if ((*Line).Olp[Sfc>>1] < SubFrLen-2)
  206. Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]);
  207. else
  208. Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen);
  209. }
  210. else
  211. {
  212. Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen);
  213. if ((*Line).Olp[Sfc>>1] < SubFrLen-2)
  214. Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]);
  215. }
  216. // Reconstruct the excitation
  217. for (i=0; i < SubFrLen; i++)
  218. Dpnt[i] = 0.0f;
  219. for (i=0; i < Srate; i++)
  220. Dpnt[Best.Ploc[i]] = Best.Pamp[i];
  221. // Code the excitation
  222. Fcbk_Pack(Dpnt, &((*Line).Sfs[Sfc]), &Best, Srate);
  223. if (Best.UseTrn == 1)
  224. Gen_Trn(Dpnt, Dpnt, (*Line).Olp[Sfc>>1]);
  225. break;
  226. case Rate53:
  227. T0_acelp = search_T0 ((*Line).Olp[Sfc>>1]-1+(*Line).Sfs[Sfc].AcLg,
  228. (*Line).Sfs[Sfc].AcGn, &gain_T0);
  229. #if COMPILE_MMX
  230. if (UseMMX)
  231. {
  232. (*Line).Sfs[Sfc].Ppos = ACELP_LBC_code_int(Dpnt, ImpResp, T0_acelp, Dpnt,
  233. &(*Line).Sfs[Sfc].Mamp, &(*Line).Sfs[Sfc].Grid,
  234. &(*Line).Sfs[Sfc].Pamp, gain_T0, flags);
  235. }
  236. else
  237. #endif //COMPILE_MMX
  238. {
  239. (*Line).Sfs[Sfc].Ppos = ACELP_LBC_code(Dpnt, ImpResp, T0_acelp, Dpnt,
  240. &(*Line).Sfs[Sfc].Mamp, &(*Line).Sfs[Sfc].Grid,
  241. &(*Line).Sfs[Sfc].Pamp, gain_T0, flags);
  242. }
  243. (*Line).Sfs[Sfc].Tran = 0;
  244. break;
  245. }
  246. return;
  247. }
  248. //---------------------------------------------------------
  249. void Fcbk_Unpk(float *Tv, SFSDEF Sfs, int Olp, int Sfc, enum Crate WrkRate)
  250. {
  251. int i,j,Np;
  252. float Tv_tmp[SubFrLen+4];
  253. float acelp_gain,gain_T0;
  254. int acelp_sign, acelp_shift, acelp_pos;
  255. int offset, ipos, T0_acelp;
  256. Word32 Acc0;
  257. switch(WrkRate)
  258. {
  259. case Rate63:
  260. {
  261. Np = Nb_puls[Sfc];
  262. for (i=0; i < SubFrLen; i++)
  263. Tv[i] = 0.0f;
  264. if (Sfs.Ppos >= MaxPosTable[Sfc])
  265. return;
  266. // Decode the amplitudes and positions
  267. j = MaxPulseNum - Np;
  268. Acc0 = Sfs.Ppos;
  269. for (i = 0; i < SubFrLen/Sgrid; i++)
  270. {
  271. Acc0 -= CombinatorialTable[j][i];
  272. if (Acc0 < (Word32) 0)
  273. {
  274. Acc0 += CombinatorialTable[j][i];
  275. j++;
  276. if ((Sfs.Pamp & (1 << (MaxPulseNum-j))) != 0)
  277. Tv[Sfs.Grid + Sgrid*i] = -FcbkGainTable[Sfs.Mamp];
  278. else
  279. Tv[Sfs.Grid + Sgrid*i] = FcbkGainTable[Sfs.Mamp];
  280. if (j == MaxPulseNum)
  281. break;
  282. }
  283. }
  284. if (Sfs.Tran == 1)
  285. Gen_Trn(Tv, Tv, Olp);
  286. break;
  287. }
  288. case Rate53:
  289. {
  290. for (i = 0; i < SubFrLen+4; i++)
  291. Tv_tmp[i] = 0.0f;
  292. acelp_gain = FcbkGainTable[Sfs.Mamp];
  293. acelp_shift = Sfs.Grid;
  294. acelp_sign = Sfs.Pamp;
  295. acelp_pos = Sfs.Ppos;
  296. offset = 0;
  297. for(i=0; i<4; i++)
  298. {
  299. ipos = (acelp_pos & 7);
  300. ipos = (ipos << 3) + acelp_shift + offset;
  301. if((acelp_sign & 1)== 1)
  302. Tv_tmp[ipos] = acelp_gain;
  303. else
  304. Tv_tmp[ipos] = -acelp_gain;
  305. offset += 2;
  306. acelp_pos = acelp_pos >> 3;
  307. acelp_sign = acelp_sign >> 1;
  308. }
  309. for (i = 0; i < SubFrLen; i++)
  310. Tv[i] = Tv_tmp[i];
  311. T0_acelp = search_T0(Olp-1+Sfs.AcLg, Sfs.AcGn, &gain_T0);
  312. if (T0_acelp < SubFrLen-2)
  313. {
  314. for (i = T0_acelp; i < SubFrLen; i++)
  315. Tv[i] += Tv[i-T0_acelp]*gain_T0;
  316. }
  317. break;
  318. }
  319. }
  320. return;
  321. }
  322. //---------------------------------------------------------------------
  323. void Acbk_Filt(float *output,float *input,float fac,float *impresp)
  324. {
  325. #if OPT_ACBKF
  326. ASM
  327. {
  328. push esi;
  329. push edi;
  330. push ebx;
  331. mov eax,58;
  332. mov esi,input;
  333. mov edi,output;
  334. mov ebx,impresp;
  335. loop1:
  336. fld DP[ebx+4*eax];
  337. fmul fac;
  338. fld DP[ebx+4*eax-4];
  339. fmul fac;
  340. fld DP[ebx+4*eax-8];
  341. fmul fac;
  342. fld DP[ebx+4*eax-12];
  343. fmul fac; // a3 a2 a1 a0
  344. fxch ST(3);
  345. fadd DP[esi+4*eax]; // b0 a2 a1 a3
  346. fxch ST(2);
  347. fadd DP[esi+4*eax-4]; // b1 a2 b0 a3
  348. fxch ST(1);
  349. fadd DP[esi+4*eax-8]; // b2 b1 b0 a3
  350. fxch ST(3);
  351. fadd DP[esi+4*eax-12]; // b3 b1 b0 b2
  352. fxch ST(2);
  353. fstp DP[edi+4*eax]; // b1 b3 b2
  354. fstp DP[edi+4*eax-4]; // b3 b2
  355. fxch ST(1);
  356. fstp DP[edi+4*eax-8];
  357. fstp DP[edi+4*eax-12];
  358. sub eax,4;
  359. cmp eax,2;
  360. jg loop1;
  361. pop ebx;
  362. pop edi;
  363. pop esi;
  364. }
  365. #else
  366. int i;
  367. for (i=58; i>2; i-=4)
  368. {
  369. output[i-0] = fac*impresp[i-0] + input[i-0];
  370. output[i-1] = fac*impresp[i-1] + input[i-1];
  371. output[i-2] = fac*impresp[i-2] + input[i-2];
  372. output[i-3] = fac*impresp[i-3] + input[i-3];
  373. }
  374. #endif
  375. output[2] = fac*impresp[2] + input[2];
  376. output[1] = fac*impresp[1] + input[1];
  377. output[0] = fac*impresp[0] + input[0];
  378. }
  379. //---------------------------------------------------------------------
  380. #if COMPILE_MMX
  381. void Find_AcbkInt(float *Tv, float *ImpResp, float *PrevExc, LINEDEF
  382. *Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat)
  383. {
  384. int i,j,k;
  385. float RezBuf[SubFrLen+ClPitchOrd-1];
  386. short TvInt[SubFrLen];
  387. int Tvxi[SubFrLen];
  388. short *lPntInt,*sPntInt,*PtrInt;
  389. int CorBufInt[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)];
  390. int *lPntd;
  391. long Acc0l, Acc1l;
  392. int Olp,Lid,Gid,Hb,t,k1,k2;
  393. int Bound[3];
  394. int Lag1, Lag2;
  395. int MaxInt,off_filt;
  396. int shift,Tshift,mx;
  397. DECLARE_SHORT(FltBuf0Int,63);
  398. DECLARE_SHORT(FltBuf1Int,63);
  399. DECLARE_SHORT(FltBuf2Int,63);
  400. DECLARE_SHORT(FltBuf3Int,63);
  401. DECLARE_SHORT(FltBuf4Int,63) ;
  402. DECLARE_SHORT(CorVctInt,4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)) ;
  403. DECLARE_SHORT(RezTmpInt,16) ;
  404. DECLARE_SHORT(RezBufInt,SubFrLen+ClPitchOrd-1) ;
  405. DECLARE_SHORT(ImpRespInt,63);
  406. DECLARE_SHORT(Ix,2*SubFrLen+16) ;
  407. DECLARE_SHORT(Rx,2*SubFrLen+16);
  408. DECLARE_INT(Temp,64);
  409. ALIGN_ARRAY(FltBuf0Int);
  410. ALIGN_ARRAY(FltBuf1Int);
  411. ALIGN_ARRAY(FltBuf2Int);
  412. ALIGN_ARRAY(FltBuf3Int);
  413. ALIGN_ARRAY(FltBuf4Int);
  414. ALIGN_ARRAY(RezBufInt);
  415. ALIGN_ARRAY(RezTmpInt);
  416. ALIGN_ARRAY(ImpRespInt);
  417. ALIGN_ARRAY(CorVctInt);
  418. ALIGN_ARRAY(Ix);
  419. ALIGN_ARRAY(Rx);
  420. ALIGN_ARRAY(Temp);
  421. Olp = (*Line).Olp[Sfc>>1];
  422. Lid = Pstep;
  423. Gid = 0;
  424. Hb = 3 + (Sfc & 1);
  425. // For even frames only
  426. if ((Sfc & 1) == 0)
  427. {
  428. if (Olp == PitchMin)
  429. Olp++;
  430. if (Olp > (PitchMax-5))
  431. Olp = PitchMax-5;
  432. }
  433. if (flags & SC_LAG1)
  434. {
  435. lPntInt = &CorVctInt[20];
  436. k1 = 1;
  437. k2 = 2;
  438. }
  439. else
  440. {
  441. lPntInt = CorVctInt;
  442. k1 = 0;
  443. k2 = Hb;
  444. }
  445. //TIMER_SPOT_ON(Conversion);
  446. //Convert Tv to 16-bit
  447. ConstFloatToInt(Tv, Tvxi, SubFrLen, 32768.0f);
  448. for(i=0; i<SubFrLen; i++) TvInt[i] = (short)(((Tvxi[i]<<1)+0x00008000)>>16);
  449. //Convert ImpResp to 16-bit
  450. //Scale by 2^14 & truncate bits right of decimal
  451. ConstFloatToShort(ImpResp,ImpRespInt,SubFrLen,16384.0f);
  452. for (k=k1; k<k2; k++)
  453. {
  454. lPntd = &CorBufInt[k*20];
  455. // Get residual from the excitation buffer
  456. Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);
  457. //Convert RezBuf to 16-bit
  458. ConstFloatToShort(RezBuf,RezBufInt,SubFrLen+ClPitchOrd-1,1.0f);
  459. // Filter the last one (ClPitchOrd-1) using the impulse responce
  460. //TIMER_SPOT_OFF(Conversion);
  461. //TIMER_SPOT_ON(Convolution);
  462. ab2abbcw(&RezBufInt[ClPitchOrd-1], Rx, SubFrLen);
  463. j=0;
  464. for(i=0; i<SubFrLen; i+=2){
  465. Ix[j] =Ix[j+2]=ImpRespInt[SubFrLen-1-i];
  466. Ix[j+1]=Ix[j+3]=ImpRespInt[SubFrLen-2-i];
  467. j+=4;
  468. }
  469. for(i=0; i<16; i++)
  470. Ix[j+i]=0;
  471. ConvMMX(Rx, Ix, Temp, 60);
  472. for(i=0; i<SubFrLen; i++) FltBuf4Int[i] = (short)(((Temp[i]<<1)+0x00008000)>>16);
  473. //TIMER_SPOT_OFF(Convolution);
  474. //TIMER_SPOT_ON(FbufCalc);
  475. // Update the others (ClPitchOrd-2 down to 0)
  476. Acc0l = ((RezBufInt[3]<<13)+0x00004000)>>15;
  477. FltBuf3Int[0] = (short)Acc0l;
  478. Acc0l = ((RezBufInt[2]<<13)+0x00004000)>>15;
  479. FltBuf2Int[0] = (short)Acc0l;
  480. Acc0l = ((RezBufInt[1]<<13)+0x00004000)>>15;
  481. FltBuf1Int[0] = (short)Acc0l;
  482. Acc0l = ((RezBufInt[0]<<13)+0x00004000)>>15;
  483. FltBuf0Int[0] = (short)Acc0l;
  484. DupRezBuf(RezBufInt,RezTmpInt);
  485. FBufCalcInt(FltBuf4Int,FltBuf3Int,ImpRespInt,RezTmpInt,0);
  486. FBufCalcInt(FltBuf3Int,FltBuf2Int,ImpRespInt,RezTmpInt,1);
  487. FBufCalcInt(FltBuf2Int,FltBuf1Int,ImpRespInt,RezTmpInt,2);
  488. FBufCalcInt(FltBuf1Int,FltBuf0Int,ImpRespInt,RezTmpInt,3);
  489. //TIMER_SPOT_OFF(FbufCalc);
  490. //TIMER_SPOT_ON(Dots);
  491. // Compute the cross products with the signal
  492. *lPntd++ = DotMMX60(TvInt,FltBuf0Int)<<1;
  493. *lPntd++ = DotMMX60(TvInt,FltBuf1Int)<<1;
  494. *lPntd++ = DotMMX60(TvInt,FltBuf2Int)<<1;
  495. *lPntd++ = DotMMX60(TvInt,FltBuf3Int)<<1;
  496. *lPntd++ = DotMMX60(TvInt,FltBuf4Int)<<1;
  497. // Compute the energies
  498. *lPntd++ = DotMMX60(FltBuf0Int,FltBuf0Int)<<1;
  499. *lPntd++ = DotMMX60(FltBuf1Int,FltBuf1Int)<<1;
  500. *lPntd++ = DotMMX60(FltBuf2Int,FltBuf2Int)<<1;
  501. *lPntd++ = DotMMX60(FltBuf3Int,FltBuf3Int)<<1;
  502. *lPntd++ = DotMMX60(FltBuf4Int,FltBuf4Int)<<1;
  503. // Compute the between crosses
  504. *lPntd++ = DotMMX60(FltBuf1Int,FltBuf0Int)<<2;
  505. *lPntd++ = DotMMX60(FltBuf2Int,FltBuf0Int)<<2;
  506. *lPntd++ = DotMMX60(FltBuf2Int,FltBuf1Int)<<2;
  507. *lPntd++ = DotMMX60(FltBuf3Int,FltBuf0Int)<<2;
  508. *lPntd++ = DotMMX60(FltBuf3Int,FltBuf1Int)<<2;
  509. *lPntd++ = DotMMX60(FltBuf3Int,FltBuf2Int)<<2;
  510. *lPntd++ = DotMMX60(FltBuf4Int,FltBuf0Int)<<2;
  511. *lPntd++ = DotMMX60(FltBuf4Int,FltBuf1Int)<<2;
  512. *lPntd++ = DotMMX60(FltBuf4Int,FltBuf2Int)<<2;
  513. *lPntd++ = DotMMX60(FltBuf4Int,FltBuf3Int)<<2;
  514. //TIMER_SPOT_OFF(Dots);
  515. }
  516. //Convert k1 through k2 indices of CorBufInt to 16-bit
  517. // values
  518. Acc1l = 0L;
  519. for(j=k1; j<k2; j++)
  520. {
  521. for(i=0; i<20; i++)
  522. {
  523. Acc0l = abs(CorBufInt[j*20 + i]);
  524. if( Acc0l > Acc1l) Acc1l = Acc0l;
  525. }
  526. }
  527. //Need a convert_long_to_short routine
  528. shift = norm(Acc1l);
  529. for(j=k1; j<k2; j++)
  530. {
  531. for(i=0; i<20; i++)
  532. {
  533. CorBufInt[j*20 + i]=CorBufInt[j*20 + i]<<shift;
  534. CorBufInt[j*20 + i] += 0x00008000L; //round up to 16 MSBs
  535. *lPntInt++=(short)(CorBufInt[j*20 + i]>>16);
  536. }
  537. }
  538. /* Test potential error */
  539. Lag1 = Olp - Pstep;
  540. Lag2 = Olp - Pstep + Hb - 1;
  541. off_filt = Test_Err(Lag1, Lag2, CodStat);
  542. Bound[0] = NbFilt085_min + (off_filt << 2);
  543. if(Bound[0] > NbFilt085) Bound[0] = NbFilt085;
  544. Bound[1] = NbFilt170_min + (off_filt << 3);
  545. if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;
  546. Bound[2] = 85; //Use subset table in the case t=2
  547. MaxInt = 0;
  548. for (k=k1; k<k2; k++)
  549. {
  550. // Select Quantization table
  551. t = 0;
  552. if (WrkRate == Rate63)
  553. {
  554. if ((Sfc & 1) == 0)
  555. {
  556. if (Olp-Pstep+k >= SubFrLen-2)
  557. t = 1;
  558. }
  559. else
  560. {
  561. if (Olp >= SubFrLen-2)
  562. t = 1;
  563. }
  564. }
  565. else
  566. t = 1;
  567. /* If Bound=170 and SC_GAIN=TRUE, use 170subset table.
  568. Else, use full table with limited Bound.*/
  569. //if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)
  570. if ((WrkRate == Rate53) && (flags & SC_GAIN) && (Bound[t]==NbFilt170))
  571. t = 2;
  572. // Search for maximum
  573. //t=1;
  574. sPntInt = AcbkGainTablePtrInt[t];
  575. PtrInt = &CorVctInt[k*20];
  576. //TIMER_SPOT_ON(CodeBook);
  577. CodeBkSrch(PtrInt, sPntInt, Bound[t], &Gid, &MaxInt);
  578. //TIMER_SPOT_OFF(CodeBook);
  579. if (t==2)
  580. Gid = GainScramble[Gid];
  581. //else
  582. //Gid = Gid;
  583. Lid = k;
  584. }
  585. // Modify Olp for even sub frames
  586. if ((Sfc & 1) == 0)
  587. {
  588. Olp = Olp - Pstep + Lid;
  589. Lid = Pstep;
  590. }
  591. // Save Lag, Gain and Olp
  592. (*Line).Sfs[Sfc].AcLg = Lid;
  593. (*Line).Sfs[Sfc].AcGn = Gid;
  594. (*Line).Olp[Sfc>>1] = Olp;
  595. //ASM emms;
  596. /* ------------------------------ FLOAT -----------------------*/
  597. // Decode the Acbk contribution and subtract it
  598. Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);
  599. //TIMER_SPOT_ON(LastConvolv);
  600. mx = FloatToShortScaled(RezBuf, RezBufInt, SubFrLen+ClPitchOrd-1, 3);
  601. Tshift = 11 - (mx-126);
  602. if(mx==0) Tshift = 0;
  603. ab2abbcw(RezBufInt, Rx, 60);
  604. ConvMMX(Rx, Ix, Temp, SubFrLen);
  605. //ASM emms;
  606. if (Tshift >=0) {
  607. for(j=0; j<SubFrLen; j++){
  608. Temp[j] = Temp[j]>>Tshift;
  609. Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f;
  610. }
  611. }
  612. else
  613. {
  614. for(j=0; j<SubFrLen; j++){
  615. Temp[j] = Temp[j]<<(-Tshift);
  616. Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f;
  617. }
  618. }
  619. //TIMER_SPOT_OFF(LastConvolv);
  620. }
  621. #endif //COMPILE_MMX
  622. short norm(long L_var1)
  623. {
  624. short var_out;
  625. if (L_var1 == 0L) {
  626. var_out = (short)0;
  627. }
  628. else {
  629. if (L_var1 == (long)0xffffffffL) {
  630. var_out = (short)31;
  631. }
  632. else {
  633. if (L_var1 < 0L) {
  634. L_var1 = ~L_var1;
  635. }
  636. for(var_out = (short)0;L_var1 < 0x40000000L;var_out++)
  637. {
  638. L_var1 <<= 1L;
  639. }
  640. }
  641. }
  642. return(var_out);
  643. }
  644. /*---------------------------------------------------------------------*/
  645. void Find_Acbk(float *Tv, float *ImpResp, float *PrevExc, LINEDEF
  646. *Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat)
  647. {
  648. int i,j,k;
  649. float Acc0,Max;
  650. float RezBuf[SubFrLen+ClPitchOrd-1];
  651. float FltBuf[ClPitchOrd][SubFrLen];
  652. float CorVct[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)];
  653. float *lPnt;
  654. float *sPnt,*Ptr;
  655. int Olp,Lid,Gid,Hb,t,k1,k2;
  656. int Bound[3];
  657. int Lag1, Lag2;
  658. int off_filt;
  659. Olp = (*Line).Olp[Sfc>>1];
  660. Lid = Pstep;
  661. Gid = 0;
  662. Hb = 3 + (Sfc & 1);
  663. // For even frames only
  664. if ((Sfc & 1) == 0)
  665. {
  666. if (Olp == PitchMin)
  667. Olp++;
  668. if (Olp > (PitchMax-5))
  669. Olp = PitchMax-5;
  670. }
  671. if (flags & SC_LAG1)
  672. {
  673. lPnt = &CorVct[20];
  674. k1 = 1;
  675. k2 = 2;
  676. }
  677. else
  678. {
  679. lPnt = CorVct;
  680. k1 = 0;
  681. k2 = Hb;
  682. }
  683. for (k=k1; k<k2; k++)
  684. {
  685. // Get residual from the exitation buffer
  686. Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);
  687. // Filter the last one (ClPitchOrd-1) using the impulse responce
  688. for (i=0; i < SubFrLen; i++)
  689. FltBuf[ClPitchOrd-1][i] = DotRev(&RezBuf[ClPitchOrd-1],ImpResp,i+1);
  690. // Update the others (ClPitchOrd-2 down to 0)
  691. for (i=ClPitchOrd-2; i >= 0; i --)
  692. {
  693. FltBuf[i][0] = RezBuf[i]*0.5f;
  694. Acbk_Filt(&FltBuf[i][1],&FltBuf[i+1][0],RezBuf[i],&ImpResp[1]);
  695. // for (j = 1; j < SubFrLen; j++)
  696. // FltBuf[i][j] = RezBuf[i]*ImpResp[j] + FltBuf[i+1][j-1];
  697. }
  698. // Compute the cross products with the signal
  699. for (i=0; i < ClPitchOrd; i++)
  700. *lPnt++ = DotProd(Tv, FltBuf[i], SubFrLen);
  701. // Compute the energies
  702. for (i=0; i < ClPitchOrd; i++)
  703. *lPnt++ = 0.5f*DotProd(FltBuf[i], FltBuf[i], SubFrLen);
  704. // Compute the between crosses
  705. for (i=1; i < ClPitchOrd; i++)
  706. for (j = 0; j < i; j++)
  707. *lPnt++ = DotProd(FltBuf[i], FltBuf[j], SubFrLen);
  708. }
  709. /* Test potential error */
  710. Lag1 = Olp - Pstep;
  711. Lag2 = Olp - Pstep + Hb - 1;
  712. off_filt = Test_Err(Lag1, Lag2, CodStat);
  713. Bound[0] = NbFilt085_min + (off_filt << 2);
  714. if(Bound[0] > NbFilt085) Bound[0] = NbFilt085;
  715. Bound[1] = NbFilt170_min + (off_filt << 3);
  716. if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;
  717. Bound[2] = 85; //Use subset table in the case t=2
  718. Max = 0.0f;
  719. for (k=k1; k<k2; k++)
  720. {
  721. // Select Quantization table
  722. t = 0;
  723. if (WrkRate == Rate63)
  724. {
  725. if ((Sfc & 1) == 0)
  726. {
  727. if (Olp-Pstep+k >= SubFrLen-2)
  728. t = 1;
  729. }
  730. else
  731. {
  732. if (Olp >= SubFrLen-2)
  733. t = 1;
  734. }
  735. }
  736. else
  737. t = 1;
  738. /* If Bound=170 and SC_GAIN=TRUE, use 170subset table.
  739. Else, use full table with limited Bound.*/
  740. if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)
  741. t = 2;
  742. // Search for maximum
  743. sPnt = AcbkGainTablePtr[t];
  744. Ptr = &CorVct[k*20];
  745. for (i=0; i < Bound[t]; i++)
  746. {
  747. Acc0 = Ptr[0]*sPnt[0] + Ptr[1]*sPnt[1] +
  748. Ptr[2]*sPnt[2] + Ptr[3]*sPnt[3] +
  749. Ptr[4]*sPnt[4] + Ptr[5]*sPnt[5] +
  750. Ptr[6]*sPnt[6] + Ptr[7]*sPnt[7] +
  751. Ptr[8]*sPnt[8] + Ptr[9]*sPnt[9] +
  752. Ptr[10]*sPnt[10] + Ptr[11]*sPnt[11] +
  753. Ptr[12]*sPnt[12] + Ptr[13]*sPnt[13] +
  754. Ptr[14]*sPnt[14] + Ptr[15]*sPnt[15] +
  755. Ptr[16]*sPnt[16] + Ptr[17]*sPnt[17] +
  756. Ptr[18]*sPnt[18] + Ptr[19]*sPnt[19];
  757. sPnt += 20;
  758. if (asint(Acc0) > asint(Max)) // integer cmp, since Max is not negative.
  759. {
  760. Max = Acc0;
  761. if (t==2)
  762. Gid = GainScramble[i];
  763. else
  764. Gid = i;
  765. Lid = k;
  766. }
  767. }
  768. }
  769. // Modify Olp for even sub frames
  770. if ((Sfc & 1) == 0)
  771. {
  772. Olp = Olp - Pstep + Lid;
  773. Lid = Pstep;
  774. }
  775. // Save Lag, Gain and Olp
  776. (*Line).Sfs[Sfc].AcLg = Lid;
  777. (*Line).Sfs[Sfc].AcGn = Gid;
  778. (*Line).Olp[Sfc>>1] = Olp;
  779. // Decode the Acbk contribution and subtract it
  780. Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);
  781. for (i=0; i < SubFrLen; i++)
  782. Tv[i] -= DotRev(RezBuf,ImpResp,i+1);
  783. }
  784. //-----------------------------------------------------------------
  785. void Get_Rez(float *Tv, float *PrevExc, int Lag)
  786. {
  787. int i,n,div,mod;
  788. float *src,*dst;
  789. for (i=0; i < ClPitchOrd/2; i++)
  790. Tv[i] = PrevExc[PitchMax - Lag - ClPitchOrd/2 + i];
  791. n = SubFrLen+ClPitchOrd/2;
  792. div = n/Lag;
  793. mod = n%Lag;
  794. dst = &Tv[ClPitchOrd/2];
  795. src = &PrevExc[PitchMax-Lag];
  796. for (i=0; i<div; i++)
  797. {
  798. memcpy(dst,src,4*Lag);
  799. dst += Lag;
  800. }
  801. memcpy(dst,src,4*mod);
  802. }
  803. //-----------------------------------------------------------------
  804. void Decod_Acbk(float *Tv, float *PrevExc, int Olp, int Lid, int Gid, enum Crate WrkRate)
  805. {
  806. int i;
  807. float RezBuf[SubFrLen+ClPitchOrd-1];
  808. float *sPnt;
  809. Get_Rez(RezBuf, PrevExc, (Olp + Lid) - Pstep);
  810. // Select Quantization tables
  811. i = 0;
  812. if (WrkRate == Rate63)
  813. {
  814. if (Olp >= (SubFrLen-2))
  815. i++;
  816. }
  817. else
  818. i=1;
  819. sPnt = AcbkGainTablePtr[i] + Gid*20;
  820. // Compute output vector
  821. for (i=0; i < SubFrLen; i++)
  822. Tv[i] = RezBuf[i]*sPnt[0] + RezBuf[i+1]*sPnt[1] + RezBuf[i+2]*sPnt[2] +
  823. RezBuf[i+3]*sPnt[3] + RezBuf[i+4]*sPnt[4];
  824. }
  825. //-----------------------------------------------
  826. int Comp_Info(float Buff[60], int Olp)
  827. {
  828. int i;
  829. float Acc0;
  830. float Tenr;
  831. float Ccr,Enr;
  832. int Indx;
  833. if (Olp > (PitchMax-3))
  834. Olp = (PitchMax-3);
  835. Indx = Olp;
  836. Ccr = 0.0f;
  837. for (i=Olp-3; i <= Olp+3; i++)
  838. {
  839. Acc0 = DotProd(&Buff[PitchMax+Frame-2*SubFrLen],
  840. &Buff[PitchMax+Frame-2*SubFrLen-i],2*SubFrLen);
  841. if (Acc0 > Ccr)
  842. {
  843. Ccr = Acc0;
  844. Indx = i;
  845. }
  846. }
  847. // Compute target energy
  848. Tenr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen],
  849. &Buff[PitchMax+Frame-2*SubFrLen],2*SubFrLen);
  850. // Compute best energy
  851. Enr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen-Indx],
  852. &Buff[PitchMax+Frame-2*SubFrLen-Indx],2*SubFrLen);
  853. if (Ccr <= 0.0f)
  854. return 0;
  855. if (((0.125f*Enr*Tenr) - (Ccr*Ccr)) < 0.0f)
  856. return Indx;
  857. else
  858. return 0;
  859. }
  860. //------------------------------------------------------------------
  861. void Regen(float *DataBuff, float *Buff, int Lag, float Gain,
  862. int Ecount, int *Sd)
  863. {
  864. int i;
  865. // Test for clearing
  866. if (Ecount >= ErrMaxNum)
  867. {
  868. for (i = 0; i < Frame; i++)
  869. DataBuff[i] = 0.0f;
  870. for (i = 0; i < Frame+PitchMax; i++)
  871. Buff[i] = 0.0f;
  872. }
  873. else
  874. {
  875. // Interpolate accordingly to the voicing estimation
  876. if (Lag != 0)
  877. {
  878. // Voiced case
  879. for (i = 0; i < Frame; i++)
  880. Buff[PitchMax+i] = Buff[PitchMax-Lag+i];
  881. for (i = 0; i < Frame; i++)
  882. DataBuff[i] = Buff[PitchMax+i] = Buff[PitchMax+i] * 0.75f;
  883. }
  884. else
  885. {
  886. //Unvoiced case
  887. for (i = 0; i < Frame; i++)
  888. DataBuff[i] = Gain*(float)Rand_lbc(Sd)*(1.0f/16384.0f);
  889. //Clear buffer to reset memory
  890. for (i = 0; i < Frame+PitchMax; i++)
  891. Buff[i] = 0.0f;
  892. }
  893. }
  894. }
  895. //------------------------------------------------------
  896. //Comp_Lpf
  897. //------------------------------------------------------
  898. //Find_B
  899. //------------------------------------------------------
  900. //Find_F
  901. //------------------------------------------------------
  902. //Get_Ind
  903. //------------------------------------------------------
  904. //Filt_Lpf
  905. //---------------------------------------------------------------
  906. int search_T0 (int T0, int Gid, float *gain_T0)
  907. {
  908. int T0_mod;
  909. T0_mod = T0+epsi170[Gid];
  910. *gain_T0 = gain170[Gid];
  911. return(T0_mod);
  912. }
  913. /*
  914. **
  915. ** Function: Update_Err()
  916. **
  917. ** Description: Estimation of the excitation error associated
  918. ** to the excitation signal when it is disturbed at
  919. ** the decoder, the disturbing signal being filtered
  920. ** by the long term synthesis filters
  921. ** one value for (SubFrLen/2) samples
  922. ** Updates the table CodStat.Err
  923. **
  924. ** Links to text: Section
  925. **
  926. ** Arguments:
  927. **
  928. ** int Olp Center value for pitch delay
  929. ** int AcLg Offset value for pitch delay
  930. ** int AcGn Index of Gain LT filter
  931. **
  932. ** Outputs: None
  933. **
  934. ** Return value: None
  935. **
  936. */
  937. #define MAX 256.0f
  938. void Update_Err(int Olp, int AcLg, int AcGn, CODDEF *CodStat)
  939. {
  940. int i, iz, temp2;
  941. int Lag;
  942. float Worst1, Worst0, wtemp;
  943. float beta,*ptr_tab;
  944. Lag = Olp - Pstep + AcLg;
  945. /* Select Quantization tables */
  946. i = 0 ;
  947. ptr_tab = tabgain85;
  948. if ( CodStat->WrkRate == Rate63 ) {
  949. if ( Olp >= (SubFrLen-2) ) ptr_tab = tabgain170;
  950. }
  951. else {
  952. ptr_tab = tabgain170;
  953. }
  954. beta = ptr_tab[(int)AcGn];
  955. if(Lag <= (SubFrLen/2))
  956. {
  957. Worst0 = CodStat->Err[0]*beta + Err0;
  958. Worst1 = Worst0;
  959. }
  960. else
  961. {
  962. iz = (Lag*1092) >> 15;
  963. temp2 = 30*(iz+1);
  964. if (temp2 != Lag)
  965. {
  966. if(iz == 1)
  967. {
  968. Worst0 = CodStat->Err[0]*beta + Err0;
  969. Worst1 = CodStat->Err[1]*beta + Err0;
  970. if (Worst0 > Worst1)
  971. Worst1 = Worst0;
  972. else
  973. Worst0 = Worst1;
  974. }
  975. else
  976. {
  977. wtemp = CodStat->Err[iz-1]*beta + Err0;
  978. Worst0 = CodStat->Err[iz-2]*beta + Err0;
  979. if (wtemp > Worst0) Worst0 = wtemp;
  980. Worst1 = CodStat->Err[iz]*beta + Err0;
  981. if (wtemp > Worst1) Worst1 = wtemp;
  982. }
  983. }
  984. else
  985. {
  986. Worst0 = CodStat->Err[iz-1]*beta + Err0;
  987. Worst1 = CodStat->Err[iz]*beta + Err0;
  988. }
  989. }
  990. if (Worst0 > MAX) Worst0 = MAX;
  991. if (Worst1 > MAX) Worst1 = MAX;
  992. for(i=4; i>=2; i--)
  993. CodStat->Err[i] = CodStat->Err[i-2];
  994. CodStat->Err[0] = Worst0;
  995. CodStat->Err[1] = Worst1;
  996. return;
  997. }
  998. /*
  999. **
  1000. ** Function: Test_Err()
  1001. **
  1002. ** Description: Check the error excitation maximum for
  1003. ** the subframe and computes an index iTest used to
  1004. ** calculate the maximum nb of filters (in Find_Acbk) :
  1005. ** Bound = Min(Nmin + iTest x pas, Nmax) , with
  1006. ** AcbkGainTable085 : pas = 2, Nmin = 51, Nmax = 85
  1007. ** AcbkGainTable170 : pas = 4, Nmin = 93, Nmax = 170
  1008. ** iTest depends on the relative difference between
  1009. ** errmax and a fixed threshold
  1010. **
  1011. ** Links to text: Section
  1012. **
  1013. ** Arguments:
  1014. **
  1015. ** Word16 Lag1 1st long term Lag of the tested zone
  1016. ** Word16 Lag2 2nd long term Lag of the tested zone
  1017. **
  1018. ** Outputs: None
  1019. **
  1020. ** Return value:
  1021. ** Word16 index iTest used to compute Acbk number of filters
  1022. */
  1023. int Test_Err(int Lag1, int Lag2, CODDEF *CodStat)
  1024. {
  1025. int i, i1, i2;
  1026. int zone1, zone2, iTest;
  1027. float Err_max;
  1028. i2 = Lag2 + ClPitchOrd/2;
  1029. zone2 = i2/30;
  1030. i1 = - SubFrLen + 1 + Lag1 - ClPitchOrd/2;
  1031. if (i1 <= 0) i1 = 1;
  1032. zone1 = i1/30;
  1033. Err_max = -1.0f;
  1034. for(i=zone2; i>=zone1; i--)
  1035. {
  1036. if (CodStat->Err[i] > Err_max)
  1037. Err_max = CodStat->Err[i];
  1038. }
  1039. if((Err_max > ThreshErr) || (CodStat->SinDet < 0 ) )
  1040. {
  1041. iTest = 0;
  1042. //ount_clip++;
  1043. }
  1044. else
  1045. {
  1046. iTest = (int)(ThreshErr - Err_max);
  1047. }
  1048. return(iTest);
  1049. }
  1050. #if COMPILE_MMX
  1051. #if ASM_FACBK
  1052. int DotMMX60(short *ind, short *oud)
  1053. {
  1054. int dotprod;
  1055. #define reg0 mm0
  1056. #define reg1 mm1
  1057. #define reg2 mm2
  1058. #define acc0 mm6
  1059. #define inx esi
  1060. #define oux edi
  1061. #define dot eax
  1062. #define jcnt ebx
  1063. #define l(n) ASM movq reg##n,QP[inx+8*n]
  1064. #define m(n) ASM pmaddwd reg##n,QP[oux+8*n]
  1065. #define a(n) ASM paddd acc0,reg##n
  1066. ASM
  1067. {
  1068. mov inx,ind;
  1069. mov oux,oud;
  1070. mov jcnt,5;
  1071. }
  1072. //Begin loop
  1073. ASM pxor acc0,acc0;
  1074. ASM pxor reg1,reg1; //make first a(1) a nop
  1075. ASM pxor reg2,reg2; //make first a(2) a nop
  1076. inner:
  1077. //------------------
  1078. l(0);
  1079. a(1);
  1080. m(0);
  1081. l(1);
  1082. a(2);
  1083. m(1);
  1084. l(2);
  1085. a(0);
  1086. m(2);
  1087. //-------------------
  1088. ASM add inx,24;
  1089. ASM add oux,24;
  1090. ASM sub jcnt,1;
  1091. ASM jg inner;
  1092. a(1);
  1093. a(2);
  1094. ASM
  1095. {
  1096. //Add the two halves of acc0
  1097. movq reg0,acc0;
  1098. psrlq acc0,32;
  1099. paddd acc0,reg0;
  1100. movd dot,acc0; //store
  1101. mov dotprod,dot
  1102. }
  1103. ASM emms;
  1104. return(dotprod);
  1105. #undef reg0
  1106. #undef reg1
  1107. #undef reg2
  1108. #undef acc0
  1109. #undef inx
  1110. #undef oux
  1111. #undef dot
  1112. #undef jcnt
  1113. #undef l
  1114. #undef m
  1115. #undef a
  1116. }
  1117. #else
  1118. int DotMMX60(short *in, short *out)
  1119. {
  1120. int dotprod;
  1121. int j;
  1122. dotprod=0;
  1123. for(i=0; i < 60; i++)
  1124. {
  1125. dotprod += in[j]*out[j];
  1126. }
  1127. return(dotprod);
  1128. }
  1129. #endif
  1130. #if ASM_FACBK
  1131. void DupRezBuf(short *rezbuf, short *reztemp)
  1132. {
  1133. #define reg0 mm0
  1134. #define reg1 mm1
  1135. #define reg2 mm2
  1136. #define reg3 mm3
  1137. #define rbuf edi
  1138. #define rztmp esi
  1139. //rezbuf duplication operations
  1140. #define cr(r0,r1) ASM movq reg##r0,reg##r1
  1141. #define uph(r0) ASM punpckhwd reg##r0,reg##r0
  1142. #define upl(r0) ASM punpcklwd reg##r0,reg##r0
  1143. #define sto(r0,i) ASM movq QP[rztmp+8*i],reg##r0
  1144. #define sl(r0) ASM psllw reg##r0,1
  1145. #define l(r0) ASM movq reg##r0,QP[rbuf]
  1146. //Duplicate first 4 rezbuf values 4 times each
  1147. // and store into 4 QWORDS in reztemp
  1148. //Multiply by two while we're at it
  1149. ASM mov rbuf,rezbuf;
  1150. ASM mov rztmp,reztemp;
  1151. l(0);
  1152. sl(0);
  1153. cr(2,0);
  1154. uph(0);
  1155. upl(2);
  1156. cr(1,0);
  1157. cr(3,2);
  1158. uph(0);
  1159. sto(0,0);
  1160. upl(1);
  1161. sto(1,1);
  1162. uph(2);
  1163. sto(2,2);
  1164. upl(3);
  1165. sto(3,3);
  1166. ASM emms;
  1167. }
  1168. #undef reg0
  1169. #undef reg1
  1170. #undef reg2
  1171. #undef reg3
  1172. #undef rbuf
  1173. #undef rztmp
  1174. #undef cr
  1175. #undef uph
  1176. #undef upl
  1177. #undef sto
  1178. #undef sl
  1179. #undef l
  1180. #endif
  1181. #if ASM_FACBK
  1182. void FBufCalcInt(short *fi, short *fo, short *impresp, short *reztemp, int n)
  1183. {
  1184. #define reg0 mm0
  1185. #define reg1 mm1
  1186. #define reg2 mm2
  1187. #define reg3 mm3
  1188. #define reg4 mm4
  1189. #define reg5 mm5
  1190. #define reg6 mm6
  1191. #define reg7 mm1
  1192. #define reg8 mm7
  1193. #define fbufi esi
  1194. #define rbuf edi
  1195. #define imp edx
  1196. #define fbufo ebx
  1197. #define jcnt ecx
  1198. #define rzv eax
  1199. //Diagonal array operations
  1200. #define l1(r0,j) ASM movq reg##r0,QP[fbufi+8*j]
  1201. #define l2(r0,j) ASM movq reg##r0,QP[fbufi+8+8*j]
  1202. #define c3(r0) ASM movq reg##r0,QP[rbuf+8*rzv]
  1203. #define m1(r0,j) ASM pmulhw reg##r0,QP[imp+8+8*j]
  1204. #define a1(r0,r1) ASM paddsw reg##r0,reg##r1
  1205. #define sto(r0,j) ASM movq QP[fbufo+8+8*j], reg##r0
  1206. #define s1(r0) ASM psrlq reg##r0,48
  1207. #define s2(r0) ASM psllq reg##r0,16
  1208. #define or(r0,r1) ASM por reg##r0,reg##r1
  1209. //Loop setup
  1210. ASM
  1211. {
  1212. mov rbuf,reztemp
  1213. mov jcnt,5;
  1214. mov fbufi,fi;
  1215. mov fbufo,fo;
  1216. mov imp,impresp;
  1217. mov rzv,n
  1218. }
  1219. //Compute initial values
  1220. //Zero-th QWORD is different
  1221. ASM
  1222. {
  1223. movq reg0,QP[fbufo];
  1224. psllq reg0,48;
  1225. psrlq reg0,48;
  1226. //zero-th part of fbufo now in reg0
  1227. movq reg2,QP[rbuf+8*rzv];
  1228. pmulhw reg2,QP[imp+2];
  1229. paddsw reg2,QP[fbufi];
  1230. psllq reg2,16;
  1231. por reg0,reg2;
  1232. movq QP[fbufo],reg0;
  1233. }
  1234. //begin loop
  1235. l2(0,0);
  1236. l1(1,0);
  1237. s2(0);
  1238. s1(1);
  1239. c3(2);
  1240. m1(2,0);
  1241. l2(3,1);
  1242. l1(4,1);
  1243. s2(3);
  1244. s1(4);
  1245. or(0,1);
  1246. inner:
  1247. //-------------------------
  1248. l2(6,2);
  1249. a1(0,2);
  1250. c3(5);
  1251. m1(5,1);
  1252. or(3,4);
  1253. l1(7,2);
  1254. s2(6);
  1255. sto(0,0);
  1256. s1(7);
  1257. l2(0,3);
  1258. a1(3,5);
  1259. c3(8);
  1260. m1(8,2);
  1261. or(6,7);
  1262. l1(1,3);
  1263. s2(0);
  1264. sto(3,1);
  1265. s1(1);
  1266. l2(3,4);
  1267. a1(6,8);
  1268. c3(2);
  1269. m1(2,3);
  1270. or(0,1);
  1271. l1(4,4);
  1272. s2(3);
  1273. sto(6,2);
  1274. s1(4);
  1275. //-------------------------
  1276. ASM add fbufo,24;
  1277. ASM add fbufi,24;
  1278. ASM add imp,24;
  1279. ASM sub jcnt,1;
  1280. ASM jg inner;
  1281. ASM emms;
  1282. }
  1283. #undef reg0
  1284. #undef reg1
  1285. #undef reg2
  1286. #undef reg3
  1287. #undef reg4
  1288. #undef reg5
  1289. #undef reg6
  1290. #undef reg7
  1291. #undef reg8
  1292. #undef fbufi
  1293. #undef rbuf
  1294. #undef imp
  1295. #undef fbufo
  1296. #undef jcnt
  1297. #undef rzv
  1298. #undef l1
  1299. #undef l2
  1300. #undef c3
  1301. #undef m1
  1302. #undef a1
  1303. #undef sto
  1304. #undef s1
  1305. #undef s2
  1306. #undef or
  1307. #else
  1308. void FBufCalcInt(short *fi, short *fo, short *impresp, short *rezbuf, short *reztemp, int n)
  1309. {
  1310. long Acc0l;
  1311. int j;
  1312. #define MAX16 32767
  1313. #define MIN16 -32768
  1314. for(j=1; j<SubFrLen; j++)
  1315. {
  1316. Acc0l = fi[j-1];
  1317. Acc0l += (((rezbuf[4-n]<<1)*impresp[j]))>>16;
  1318. if (Acc0l > MAX16) Acc0l = MAX16;
  1319. else if(Acc0l < MIN16) Acc0l = MIN16;
  1320. fo[j] = (short)(Acc0l);
  1321. }
  1322. }
  1323. #endif
  1324. #if ASM_FACBK
  1325. //#if 0
  1326. void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max)
  1327. {
  1328. #define reg0 mm0
  1329. #define reg1 mm1
  1330. #define reg2 mm2
  1331. #define reg3 mm3
  1332. #define reg4 mm4
  1333. #define acc1 mm5
  1334. #define acc0 mm6
  1335. #define gdx mm3
  1336. #define gd mm7
  1337. #define icx mm2
  1338. #define lp esi
  1339. #define sp edi
  1340. #define maxx eax
  1341. #define gidx edx
  1342. #define icnt ebx
  1343. // In the following macros, 'n' is the column number.
  1344. #define l(n) ASM movq reg##n,QP[lp+8*n]
  1345. #define m(n) ASM pmaddwd reg##n,QP[sp+8*n]
  1346. #define a(n) ASM paddd acc0,reg##n
  1347. ASM
  1348. {
  1349. mov sp,spint;
  1350. mov lp,lpint;
  1351. mov icnt,numvecs;
  1352. mov gidx,gid;
  1353. mov maxx,max;
  1354. }
  1355. ASM movd gd,numvecs;//load gd with top codebook index
  1356. ASM movd acc1,DP[maxx];//load acc1 with previous max
  1357. //Begin loop
  1358. outer:
  1359. //inner:
  1360. ASM pxor acc0,acc0;
  1361. ASM pxor reg1,reg1; //make first a(1) a nop
  1362. ASM pxor reg2,reg2; //make first a(2) a nop
  1363. //--------------------------
  1364. l(0);
  1365. a(1);
  1366. m(0);
  1367. l(1);
  1368. a(2);
  1369. m(1);
  1370. l(2);
  1371. a(0);
  1372. m(2);
  1373. l(3);
  1374. a(1);
  1375. m(3);
  1376. l(4);
  1377. a(2);
  1378. m(4);
  1379. ASM add sp,40;
  1380. a(3);
  1381. ASM movq gdx,gd;
  1382. ASM movd icx,icnt;
  1383. a(4);
  1384. ASM
  1385. {
  1386. movq reg0,acc0;
  1387. psrlq acc0,32;
  1388. pxor gd,icx;//gd=MASK
  1389. paddd acc0,reg0;
  1390. movq reg0,acc0; //copy acc0
  1391. movq reg1,acc1; //copy old max
  1392. pxor reg1,acc0
  1393. pcmpgtd reg0,acc1; //reg0=0xFF or 0x00
  1394. pand reg1,reg0; //reg1=MASK or 0x00
  1395. pxor acc1,reg1; //acc1=acc0 or acc1
  1396. pand gd,reg0; //gd=MASK or 0x00
  1397. pxor gd,gdx; //gd=icnt or previous value
  1398. sub icnt,1;
  1399. jg outer;
  1400. }
  1401. ASM movd reg0,numvecs;
  1402. ASM psubd reg0,gd;
  1403. ASM movd DP[gidx],reg0;//return gid
  1404. ASM movd DP[maxx],acc1;//return max
  1405. ASM emms;
  1406. }
  1407. #undef reg0
  1408. #undef reg1
  1409. #undef reg2
  1410. #undef reg3
  1411. #undef reg4
  1412. #undef acc1
  1413. #undef acc0
  1414. #undef gdx
  1415. #undef gd
  1416. #undef icx
  1417. #undef lp
  1418. #undef sp
  1419. #undef maxx
  1420. #undef gidx
  1421. #undef icnt
  1422. #undef l
  1423. #undef m
  1424. #undef a
  1425. #else
  1426. void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max)
  1427. {
  1428. int acc0;
  1429. int i,j;
  1430. for(i=0; i < numvecs; i++)
  1431. {
  1432. acc0 = 0;
  1433. for(j=0; j<20; j++)
  1434. acc0 += lpint[j]*spint[j];
  1435. if (acc0 > *max)
  1436. {
  1437. *max = acc0;
  1438. *gid = i;
  1439. }
  1440. spint += 20;
  1441. }
  1442. }
  1443. #endif
  1444. #endif //COMPILE_MMX