Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

704 lines
14 KiB

  1. //
  2. // ITU-T G.723 Floating Point Speech Coder ANSI C Source Code. Version 1.00
  3. // copyright (c) 1995, AudioCodes, DSP Group, France Telecom,
  4. // Universite de Sherbrooke, Intel Corporation. All rights reserved.
  5. //
  6. #include <stdio.h>
  7. #include <math.h>
  8. #include "opt.h"
  9. #include "typedef.h"
  10. #include "cst_lbc.h"
  11. #include "tab_lbc.h"
  12. #include "coder.h"
  13. #include "decod.h"
  14. #include "util_lbc.h"
  15. #include "lpc.h"
  16. #include "mmxutil.h"
  17. #define Dot10m(x,y) \
  18. ( (*x)*(*y) + (*((x)+1))*(*((y)+1)) + (*((x)+2))*(*((y)+2)) + \
  19. (*((x)+3))*(*((y)+3)) + (*((x)+4))*(*((y)+4)) + (*((x)+5))*(*((y)+5)) + \
  20. (*((x)+6))*(*((y)+6)) + (*((x)+7))*(*((y)+7)) + (*((x)+8))*(*((y)+8)) + \
  21. (*((x)+9))*(*((y)+9)) )
  22. //-----------------------------------------------------------------
  23. void Comp_Lpc(float *UnqLpc, float *PrevDat, float *DataBuff, CODDEF *CodStat)
  24. {
  25. int i,j,k;
  26. float Dpnt[Frame+LpcFrame-SubFrLen];
  27. float Vect[LpcFrame];
  28. float Corr[LpcOrder+1];
  29. // Form Buffer
  30. for (i=0; i < LpcFrame-SubFrLen; i++)
  31. Dpnt[i] = PrevDat[i];
  32. for (i=0; i < Frame; i++)
  33. Dpnt[i+LpcFrame-SubFrLen] = DataBuff[i];
  34. // Do for all the sub frames
  35. for (k=0; k < SubFrames; k++)
  36. {
  37. // Copy the current window, multiply by Hamming window
  38. for (i = 0; i < LpcFrame; i++)
  39. Vect[i] = Dpnt[k*SubFrLen+i]*HammingWindowTable[i];
  40. // Compute correlation coefficients
  41. for (i=0; i<=LpcOrder; i++)
  42. Corr[i] = DotProd(Vect, &Vect[i], LpcFrame-i)/(LpcFrame*LpcFrame) *
  43. BinomialWindowTable[i];
  44. // Do Ridge regression
  45. Corr[0] *= (1025.0f/1024.0f);
  46. Durbin(&UnqLpc[k*LpcOrder], &Corr[1], Corr[0], CodStat);
  47. }
  48. /* Update sine detector */
  49. CodStat->SinDet &= 0x7fff ;
  50. j = CodStat->SinDet ;
  51. k = 0 ;
  52. for ( i = 0 ; i < 15 ; i ++ ) {
  53. k += j & 1 ;
  54. j >>= 1 ;
  55. }
  56. if ( k >= 14 )
  57. CodStat->SinDet |= 0x8000 ;
  58. }
  59. #if COMPILE_MMX
  60. void Comp_LpcInt(float *UnqLpc, float *PrevDat, float *DataBuff, CODDEF *CodStat)
  61. {
  62. int i,j,k;
  63. float Dpnt[Frame+LpcFrame-SubFrLen];
  64. float Vect[LpcFrame];
  65. float Corr[LpcOrder+1];
  66. float Fshift;
  67. int mx, Tshift;
  68. DECLARE_SHORT(VectShrt,LpcFrame+32);
  69. DECLARE_INT(Temp,12);
  70. ALIGN_ARRAY(VectShrt);
  71. ALIGN_ARRAY(Temp);
  72. // Form Buffer
  73. for (i=0; i < LpcFrame-SubFrLen; i++)
  74. Dpnt[i] = PrevDat[i];
  75. for (i=0; i < Frame; i++)
  76. Dpnt[i+LpcFrame-SubFrLen] = DataBuff[i];
  77. // Do for all the sub frames
  78. for (k=0; k < SubFrames; k++)
  79. {
  80. // Copy the current window, multiply by Hamming window
  81. for (i = 0; i < LpcFrame; i++)
  82. Vect[i] = Dpnt[k*SubFrLen+i]*HammingWindowTable[i];
  83. // Compute correlation coefficients
  84. mx = FloatToShortScaled(Vect,VectShrt,LpcFrame,3);
  85. for(j=0; j<31; j++) VectShrt[LpcFrame+j]=0;
  86. Tshift = 30 - (6+2*(mx-126));
  87. if(mx==0) Tshift = 0;
  88. CorrCoeff01(VectShrt, &VectShrt[0], Temp, LpcFrame);
  89. CorrCoeff23(VectShrt, &VectShrt[0], &Temp[2], LpcFrame);
  90. CorrCoeff01(VectShrt, &VectShrt[4], &Temp[4], LpcFrame);
  91. CorrCoeff23(VectShrt, &VectShrt[4], &Temp[6], LpcFrame);
  92. CorrCoeff01(VectShrt, &VectShrt[8], &Temp[8], LpcFrame);
  93. CorrCoeff23(VectShrt, &VectShrt[8], &Temp[10],LpcFrame);
  94. Fshift = 2.0f;
  95. if(Tshift>=0){
  96. for(j=1; j<Tshift; j++) Fshift *= 2.0f;
  97. Fshift = 1.0f/Fshift;
  98. }
  99. else
  100. {
  101. Tshift=-Tshift;
  102. for(j=1; j<Tshift; j++) Fshift *= 2.0f;
  103. }
  104. for (i=0; i<LpcOrder; i+=2){
  105. Corr[i] =((float)Temp[i])*Fshift*BinomialWindowTable[i] /(LpcFrame*LpcFrame);
  106. Corr[i+1]=((float)Temp[i+1])*Fshift*BinomialWindowTable[i+1]/(LpcFrame*LpcFrame);
  107. }
  108. Corr[10] =((float)Temp[10])*Fshift*BinomialWindowTable[10] /(LpcFrame*LpcFrame);
  109. // Do Ridge regression
  110. Corr[0] *= (1025.0f/1024.0f);
  111. Durbin(&UnqLpc[k*LpcOrder], &Corr[1], Corr[0], CodStat);
  112. }
  113. /* Update sine detector */
  114. CodStat->SinDet &= 0x7fff ;
  115. j = CodStat->SinDet ;
  116. k = 0 ;
  117. for ( i = 0 ; i < 15 ; i ++ ) {
  118. k += j & 1 ;
  119. j >>= 1 ;
  120. }
  121. if ( k >= 14 )
  122. CodStat->SinDet |= 0x8000 ;
  123. }
  124. #endif
  125. //----------------------------------------------------
  126. float Durbin(float *Lpc, float *Corr, float Err, CODDEF *CodStat)
  127. {
  128. int i,j;
  129. float Temp[LpcOrder];
  130. float Pk,Tmp0;
  131. // Clear the result lpc vector
  132. for (i=0; i < LpcOrder; i++)
  133. Lpc[i] = 0.0f;
  134. for (i=0; i < LpcOrder; i++)
  135. {
  136. Tmp0 = Corr[i];
  137. for (j=0; j<i; j++)
  138. Tmp0 -= Lpc[j]*Corr[i-j-1];
  139. if (fabs(Tmp0) >= Err)
  140. break;
  141. Lpc[i] = Pk = Tmp0/Err;
  142. Err -= Tmp0*Pk;
  143. for (j=0; j < i; j++)
  144. Temp[j] = Lpc[j];
  145. for (j=0; j < i; j++)
  146. Lpc[j] = Lpc[j] - Pk*Temp[i-j-1];
  147. /*
  148. * Sine detector
  149. */
  150. if ( i == 1 )
  151. {
  152. CodStat->SinDet <<= 1 ;
  153. if ( Pk > 0.95f)
  154. CodStat->SinDet ++ ;
  155. }
  156. }
  157. // Lpc[] values * 2^13 corresponds to fixed-point values
  158. return Err;
  159. }
  160. //---------------------------------------------------------
  161. void Wght_Lpc(float *PerLpc, float *UnqLpc)
  162. {
  163. int i,j;
  164. for (i=0; i < SubFrames; i++)
  165. {
  166. for (j=0; j < LpcOrder; j++)
  167. {
  168. PerLpc[j] = UnqLpc[j]*PerFiltZeroTable[j];
  169. PerLpc[j+LpcOrder] = UnqLpc[j]*PerFiltPoleTable[j];
  170. }
  171. PerLpc += 2*LpcOrder;
  172. UnqLpc += LpcOrder;
  173. }
  174. }
  175. //----------------------------------------------------------
  176. void Error_Wght(float *Dpnt, float *PerLpc,CODDEF *CodStat)
  177. {
  178. int i,k;
  179. float Acc0;
  180. for (k=0; k < SubFrames; k++)
  181. {
  182. for (i=0; i < SubFrLen; i++)
  183. {
  184. // FIR part
  185. Acc0 = *Dpnt - Dot10m(PerLpc,&CodStat->WghtFirDl[CodStat->p]);
  186. // IIR part
  187. Acc0 += Dot10m(&PerLpc[LpcOrder],&CodStat->WghtIirDl[CodStat->p]);
  188. CodStat->p = minus1mod10[CodStat->p];
  189. CodStat->WghtFirDl[CodStat->p] =
  190. CodStat->WghtFirDl[CodStat->p + LpcOrder] = *Dpnt;
  191. *Dpnt++ = CodStat->WghtIirDl[CodStat->p] =
  192. CodStat->WghtIirDl[CodStat->p + LpcOrder] = Acc0;
  193. }
  194. PerLpc += 2*LpcOrder;
  195. }
  196. }
  197. //-----------------------------------------------------------------------
  198. void Comp_Ir(float *ImpResp, float *QntLpc, float *PerLpc, PWDEF Pw)
  199. {
  200. int i;
  201. float FirDl[2*LpcOrder];
  202. float IirDl[2*LpcOrder];
  203. float Temp[PitchMax+SubFrLen];
  204. float Acc0,Acc1;
  205. int p = 9;
  206. // Clear all
  207. for (i=0; i < 2*LpcOrder; i++)
  208. FirDl[i] = IirDl[i] = 0.0f;
  209. for (i=0; i < PitchMax+SubFrLen; i++)
  210. Temp[i] = 0.0f;
  211. // Compute impulse response
  212. Acc0 = 0.5f;
  213. for (i=0; i < SubFrLen; i++)
  214. {
  215. // Synthesis filter
  216. Acc1 = Acc0 + Dot10m(QntLpc,&FirDl[p]);
  217. // FIR, IIR part
  218. Acc0 = Acc1 - Dot10m(PerLpc,&FirDl[p])
  219. + Dot10m(&PerLpc[LpcOrder],&IirDl[p]);
  220. p = minus1mod10[p];
  221. FirDl[p] = FirDl[p + LpcOrder] = Acc1;
  222. Temp[PitchMax+i] = IirDl[p] = IirDl[p + LpcOrder] = Acc0;
  223. // Harmonic part
  224. ImpResp[i] = Acc0 - Pw.Gain*Temp[PitchMax-Pw.Indx+i];
  225. Acc0 = 0.0f;
  226. }
  227. }
  228. //------------------------------------------------------------------
  229. void Sub_Ring(float *Dpnt, float *QntLpc, float *PerLpc, float
  230. *PrevErr, PWDEF Pw,CODDEF *CodStat)
  231. {
  232. int i;
  233. float Acc0,Acc1;
  234. float FirDl[2*LpcOrder];
  235. float IirDl[2*LpcOrder];
  236. float Temp[PitchMax+SubFrLen];
  237. int p = 9;
  238. // Initialize the delay lines
  239. for (i=0; i < PitchMax; i++)
  240. Temp[i] = PrevErr[i];
  241. for (i=0; i < 2*LpcOrder; i++)
  242. {
  243. FirDl[i] = CodStat->RingFirDl[i];
  244. IirDl[i] = CodStat->RingIirDl[i];
  245. }
  246. // Main loop
  247. for (i=0; i < SubFrLen; i++)
  248. {
  249. // Synthesis filter
  250. Acc1 = Acc0 = Dot10m(QntLpc,&FirDl[p]);
  251. // FIR, IIR part
  252. Acc0 -= Dot10m(PerLpc,&FirDl[p]);
  253. Acc0 += Dot10m(&PerLpc[LpcOrder],&IirDl[p]);
  254. p = minus1mod10[p];
  255. FirDl[p] = FirDl[p + LpcOrder] = Acc1;
  256. Temp[PitchMax+i] = IirDl[p] = IirDl[p + LpcOrder] = Acc0;
  257. // Harmonic Part
  258. Dpnt[i] -= Acc0 - Pw.Gain*Temp[PitchMax-Pw.Indx+i];
  259. }
  260. }
  261. //-----------------------------------------------------------------
  262. void Upd_Ring(float *Dpnt, float *QntLpc, float *PerLpc, float
  263. *PrevErr, CODDEF *CodStat)
  264. {
  265. int i;
  266. float Acc0,Acc1;
  267. // Shift the PrevErr buffer
  268. for (i=SubFrLen; i < PitchMax; i++)
  269. PrevErr[i-SubFrLen] = PrevErr[i];
  270. // Update the ring delay and PrevErr buffer
  271. for (i=0; i < SubFrLen; i++)
  272. {
  273. // Synt filter
  274. Acc1 = Acc0 = Dpnt[i] += Dot10m(QntLpc,&CodStat->RingFirDl[CodStat->q])*2.0f;
  275. // Fir,Iir filter
  276. Acc0 -= Dot10m(PerLpc,&CodStat->RingFirDl[CodStat->q])*2.0f;
  277. Acc0 += Dot10m(&PerLpc[LpcOrder],&CodStat->RingIirDl[CodStat->q])*2.0f;
  278. CodStat->q = minus1mod10[CodStat->q];
  279. CodStat->RingFirDl[CodStat->q] =
  280. CodStat->RingFirDl[CodStat->q + LpcOrder] = Acc1*0.5f;
  281. PrevErr[PitchMax-SubFrLen+i] = CodStat->RingIirDl[CodStat->q] =
  282. CodStat->RingIirDl[CodStat->q + LpcOrder] = Acc0*0.5f;
  283. }
  284. }
  285. //----------------------------------------------------
  286. void Synt(float *Dpnt, float *Lpc, DECDEF *DecStat)
  287. {
  288. int i;
  289. float Acc0 ;
  290. for (i=0 ; i < SubFrLen ; i++)
  291. {
  292. Acc0 = Dpnt[i] + Dot10m(Lpc,&DecStat->SyntIirDl[DecStat->dq]);
  293. DecStat->dq = minus1mod10[DecStat->dq];
  294. Dpnt[i] = DecStat->SyntIirDl[DecStat->dq] =
  295. DecStat->SyntIirDl[DecStat->dq + LpcOrder] = Acc0;
  296. }
  297. }
  298. //----------------------------------------------------
  299. //Spf
  300. #if COMPILE_MMX
  301. void CorrCoeff01(short *samples, short *samples_offst, int *coeff, int buffsz)
  302. {
  303. #define reg0 mm0
  304. #define reg1 mm1
  305. #define reg2 mm2
  306. #define reg3 mm3
  307. #define reg4 mm4
  308. #define reg5 mm5
  309. #define acc0 mm6
  310. #define acc1 mm7
  311. #define s esi
  312. #define t edi
  313. #define cnt ecx
  314. #define c0 eax
  315. #define L1(i,r0) ASM movq reg##r0,QP[t+8*cnt+8*i]
  316. #define L2(i,r0) ASM movq reg##r0,QP[t+8*cnt+8+8*i]
  317. #define C1(r0,r1) ASM movq reg##r0,reg##r1
  318. #define M1(i,r0) ASM pmaddwd reg##r0,QP[s+8*cnt+8*i]
  319. #define M2(i,r0) ASM pmaddwd reg##r0,QP[s+8*cnt+8*i]
  320. #define O1(r0,r1) ASM por reg##r0,reg##r1
  321. #define A1(r0) ASM paddd acc0,reg##r0
  322. #define A2(r0) ASM paddd acc1,reg##r0
  323. #define S1(r0) ASM psrlq reg##r0,16
  324. #define S2(r0) ASM psllq reg##r0,48
  325. ASM
  326. {
  327. mov c0, coeff;
  328. mov s,samples;
  329. mov t,samples_offst;
  330. mov cnt,buffsz;
  331. //assume that mod(buffsz,4)=0
  332. //this is very dangerous!!
  333. shr cnt,2;
  334. sub cnt,1;
  335. pxor acc0,acc0;
  336. pxor acc1,acc1;
  337. pxor reg2,reg2;
  338. pxor reg0,reg0;
  339. pxor reg3,reg3;
  340. pxor reg4,reg4;
  341. pxor reg5,reg5;
  342. }
  343. looptop:
  344. //----------------------------------
  345. L2(1,5)
  346. S1(4)
  347. M1(1,3)
  348. S2(5)
  349. L1(0,0)
  350. O1(5,4)
  351. M2(1,5)
  352. A2(2)
  353. L2(0,2)
  354. C1(1,0)
  355. M1(0,0)
  356. S1(1)
  357. S2(2)
  358. ASM sub cnt,2;
  359. O1(2,1)
  360. A1(3)
  361. L1(1,3)
  362. A2(5)
  363. M2(2,2)
  364. C1(4,3)
  365. A1(0)
  366. ASM jge looptop;
  367. //----------------------------------
  368. ASM
  369. {
  370. movq reg0,acc0;
  371. psrlq acc0,32;
  372. paddd acc0,reg0;
  373. movd DP[c0],acc0;
  374. movq reg1,acc1;
  375. psrlq acc1,32;
  376. paddd acc1,reg1;
  377. movd DP[c0+4],acc1;
  378. emms;
  379. }
  380. }
  381. #undef reg0
  382. #undef reg1
  383. #undef reg2
  384. #undef acc0
  385. #undef acc1
  386. #undef cnt
  387. #undef tmp
  388. #undef L1
  389. #undef L2
  390. #undef C1
  391. #undef M1
  392. #undef M2
  393. #undef A1
  394. #undef A2
  395. #undef S1
  396. #undef S2
  397. #undef O1
  398. #else
  399. void CorrCoeff01(short *samples, short *samples_offst, int *coeff, int buffsz)
  400. {
  401. int i,j;
  402. int Acc0;
  403. for(i=0; i<=1; i++){
  404. Acc0=0;
  405. for(j=0; j<LpcFrame; j++)
  406. Acc0 += samples[j]*samples_offst[j+i];
  407. *coeff++ = Acc0;
  408. }
  409. }
  410. #endif
  411. #if COMPILE_MMX
  412. void CorrCoeff23(short *samples, short *samples_offst, int *coeff, int buffsz)
  413. {
  414. #define reg0 mm0
  415. #define reg1 mm1
  416. #define reg2 mm2
  417. #define reg3 mm3
  418. #define reg4 mm4
  419. #define reg5 mm5
  420. #define acc0 mm6
  421. #define acc1 mm7
  422. #define s esi
  423. #define t edi
  424. #define cnt ecx
  425. #define c0 eax
  426. #define L1(i,r0) ASM movq reg##r0,QP[t+8*cnt+8*i]
  427. #define L2(i,r0) ASM movq reg##r0,QP[t+8*cnt+8+8*i]
  428. #define C1(r0,r1) ASM movq reg##r0,reg##r1
  429. #define M1(i,r0) ASM pmaddwd reg##r0,QP[s+8*cnt+8*i]
  430. #define M2(i,r0) ASM pmaddwd reg##r0,QP[s+8*cnt+8*i]
  431. #define O1(r0,r1) ASM por reg##r0,reg##r1
  432. #define O2(r0,r1) ASM por reg##r0,reg##r1
  433. #define A1(r0) ASM paddd acc0,reg##r0
  434. #define A2(r0) ASM paddd acc1,reg##r0
  435. #define S1(r0) ASM psrlq reg##r0,48
  436. #define S2(r0) ASM psllq reg##r0,16
  437. #define S3(r0) ASM psrlq reg##r0,32
  438. #define S4(r0) ASM psllq reg##r0,16
  439. ASM
  440. {
  441. mov c0, coeff;
  442. mov s,samples;
  443. mov t,samples_offst;
  444. mov cnt,buffsz;
  445. //assume that mod(buffsz,4)=0
  446. //this is very dangerous!!
  447. shr cnt,2;
  448. sub cnt,1;
  449. pxor acc0,acc0;
  450. pxor acc1,acc1;
  451. pxor reg2,reg2;
  452. pxor reg1,reg1;
  453. pxor reg0,reg0;
  454. pxor reg3,reg3;
  455. pxor reg4,reg4;
  456. pxor reg5,reg5;
  457. }
  458. looptop:
  459. //----------------------------------
  460. O1(0,2)
  461. S3(1)
  462. M1(1,0)
  463. A2(5)
  464. L1(0,3)
  465. S4(2)
  466. L2(0,5)
  467. O2(2,1)
  468. M2(1,2)
  469. C1(4,3)
  470. S1(3)
  471. A1(0)
  472. S2(5)
  473. ASM sub cnt,2;
  474. O1(3,5)
  475. S3(4)
  476. M1(2,3)
  477. A2(2)
  478. L1(1,0)
  479. S4(5)
  480. L2(1,2)
  481. O2(5,4)
  482. M2(2,5)
  483. C1(1,0)
  484. S1(0)
  485. A1(3)
  486. S2(2)
  487. ASM jge looptop;
  488. //------------------------------------
  489. ASM
  490. {
  491. movq reg0,acc1;
  492. psrlq acc1,32;
  493. paddd acc1,reg0;
  494. movd DP[c0],acc1;
  495. movq reg1,acc0;
  496. psrlq acc0,32;
  497. paddd acc0,reg1;
  498. movd DP[c0+4],acc0;
  499. emms;
  500. }
  501. }
  502. #undef reg0
  503. #undef reg1
  504. #undef reg2
  505. #undef acc0
  506. #undef acc1
  507. #undef cnt
  508. #undef tmp
  509. #undef L1
  510. #undef L2
  511. #undef C1
  512. #undef M1
  513. #undef M2
  514. #undef A1
  515. #undef A2
  516. #undef S1
  517. #undef S2
  518. #undef S3
  519. #undef S4
  520. #undef O1
  521. #undef O2
  522. #else
  523. void CorrCoeff23(short *samples, short *samples_offst, int *coeff, int buffsz)
  524. {
  525. int i,j;
  526. int Acc0;
  527. for(i=2; i<=3; i++){
  528. Acc0=0;
  529. for(j=0; j<LpcFrame; j++)
  530. Acc0 += samples[j]*samples_offst[j+i];
  531. *coeff++ = Acc0;
  532. }
  533. }
  534. #endif