Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1039 lines
32 KiB

  1. /*
  2. * @DEC_COPYRIGHT@
  3. */
  4. /*
  5. * HISTORY
  6. * $Log: sc_dct.c,v $
  7. * Revision 1.1.4.6 1996/01/08 16:41:14 Hans_Graves
  8. * Moved IDCT routines to sc_idct.c
  9. * [1996/01/08 15:31:42 Hans_Graves]
  10. *
  11. * Revision 1.1.4.5 1996/01/02 18:30:42 Bjorn_Engberg
  12. * Got rid of compiler warnings: Added Casts, Removed unused local variables.
  13. * [1996/01/02 15:23:37 Bjorn_Engberg]
  14. *
  15. * Revision 1.1.4.4 1995/12/28 18:16:55 Bjorn_Engberg
  16. * Define floorf = floor for NT since NT does not have floorf.
  17. * [1995/12/28 17:10:31 Bjorn_Engberg]
  18. *
  19. * Revision 1.1.4.3 1995/12/07 19:31:16 Hans_Graves
  20. * Added ScFDCT8x8s_C() and ScIDCT8x8s_C to be used by MPEG encoder
  21. * [1995/12/07 17:43:21 Hans_Graves]
  22. *
  23. * Revision 1.1.4.2 1995/09/13 14:51:40 Hans_Graves
  24. * Added ScScaleIDCT8x8().
  25. * [1995/09/13 14:40:56 Hans_Graves]
  26. *
  27. * Revision 1.1.2.2 1995/05/31 18:07:33 Hans_Graves
  28. * Inclusion in new SLIB location.
  29. * [1995/05/31 16:08:02 Hans_Graves]
  30. *
  31. * $EndLog$
  32. */
  33. /*****************************************************************************
  34. ** Copyright (c) Digital Equipment Corporation, 1995 **
  35. ** **
  36. ** All Rights Reserved. Unpublished rights reserved under the copyright **
  37. ** laws of the United States. **
  38. ** **
  39. ** The software contained on this media is proprietary to and embodies **
  40. ** the confidential technology of Digital Equipment Corporation. **
  41. ** Possession, use, duplication or dissemination of the software and **
  42. ** media is authorized only pursuant to a valid written license from **
  43. ** Digital Equipment Corporation. **
  44. ** **
  45. ** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
  46. ** Government is subject to restrictions as set forth in Subparagraph **
  47. ** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
  48. ******************************************************************************/
  49. /*
  50. ** Filename: sc_dct.c
  51. ** DCT related functions.
  52. ** (Pulled from MPEG/JPEG Decode & Encode source.)
  53. */
  54. #include <math.h>
  55. #include "SC.h"
  56. /*-----------------------------------------------------------------------
  57. Forward Discrete Cosine Transform
  58. -------------------------------------------------------------------------*/
  59. #define F (float)
  60. #define RSQ2 F 0.7071067811865
  61. #define COSM1P3 F 1.3065629648764
  62. #define COS1M3 F 0.5411961001462
  63. #define COS3 F 0.3826834323651
  64. #ifdef WIN32
  65. #define floorf F floor
  66. #endif /* WIN32 */
  67. /*
  68. ** Name: ScIFDCT
  69. ** Purpose: IFDCT takes the fast inverse DCT of 32 data points
  70. */
  71. void ScIFDCT(float in_block[32], float out_block[32])
  72. {
  73. static float cpi4,cpi8,c3pi8,cpi16,c3pi16;
  74. static float cpi32,c3pi32,c5pi32,c7pi32;
  75. static float spi8,s3pi8,spi16,s3pi16;
  76. static float spi32,s3pi32,s5pi32,s7pi32;
  77. static float cpi64,c17pi64,c9pi64,c25pi64,c5pi64,c21pi64;
  78. static float c13pi64,c29pi64;
  79. static float spi64,s17pi64,s9pi64,s25pi64,s5pi64,s21pi64;
  80. static float s13pi64,s29pi64;
  81. static int init=0;
  82. float temp;
  83. float c0,c1,c2,c3,c4,c5,c6,c7,c8;
  84. float c9,c10,c11,c12,c13,c14,c15;
  85. float c16,c17,c18,c19,c20,c21,c22;
  86. float c23,c24,c25,c26,c27,c28,c29;
  87. float c30,c31;
  88. float d0,d1,d2,d3,d4,d5,d6,d7,d8;
  89. float d9,d10,d11,d12,d13,d14,d15;
  90. float d16,d17,d18,d19,d20,d21,d22;
  91. float d23,d24,d25,d26,d27,d28,d29;
  92. float d30,d31;
  93. float e0,e1,e2,e3,e4,e5,e6,e7,e8;
  94. float e9,e10,e11,e12,e13,e14,e15;
  95. float e16,e17,e18,e19,e20,e21,e22;
  96. float e23,e24,e25,e26,e27,e28,e29;
  97. float e30,e31;
  98. float *out_block_ptr;
  99. float *in_block_ptr;
  100. /* initialization. This is to be done only once. */
  101. if(init==0) {
  102. /* establish internal variables */
  103. cpi4 = F cos(PI/4.0);
  104. temp = F (PI/8.0); cpi8 = F cos(temp); spi8 = F sin(temp);
  105. temp = F (3.0*PI/8.0); c3pi8 = F cos(temp); s3pi8 = F sin(temp);
  106. temp = F (PI/16.0); cpi16 = F cos(temp); spi16 = F sin(temp);
  107. temp = F (3.0*PI/16.0); c3pi16 = F cos(temp); s3pi16 = F sin(temp);
  108. temp = F (PI/32.0); cpi32 = F cos(temp); spi32 = F sin(temp);
  109. temp = F (3.0*PI/32.0); c3pi32 = F cos(temp); s3pi32 = F sin(temp);
  110. temp = F (5.0*PI/32.0); c5pi32 = F cos(temp); s5pi32 = F sin(temp);
  111. temp = F (7.0*PI/32.0); c7pi32 = F cos(temp); s7pi32 = F sin(temp);
  112. temp = F (PI/64.0); cpi64 = F cos(temp); spi64 = F sin(temp);
  113. temp = F (17.0*PI/64.0); c17pi64 = F cos(temp); s17pi64 = F sin(temp);
  114. temp = F (9.0*PI/64.0); c9pi64 = F cos(temp); s9pi64 = F sin(temp);
  115. temp = F (25.0*PI/64.0); c25pi64 = F cos(temp); s25pi64 = F sin(temp);
  116. temp = F (5.0*PI/64.0); c5pi64 = F cos(temp); s5pi64 = F sin(temp);
  117. temp = F (21.0*PI/64.0); c21pi64 = F cos(temp); s21pi64 = F sin(temp);
  118. temp = F (13.0*PI/64.0); c13pi64 = F cos(temp); s13pi64 = F sin(temp);
  119. temp = F (29.0*PI/64.0); c29pi64 = F cos(temp); s29pi64 = F sin(temp);
  120. init++;
  121. }
  122. /* rearrange coefficients to do bit reversed ordering */
  123. in_block_ptr = &in_block[0];
  124. d0 = *in_block_ptr++ * F 1.414213562; /******** normalization ********/
  125. d16 = *in_block_ptr++;
  126. d8 = *in_block_ptr++;
  127. d24 = *in_block_ptr++;
  128. e4 = *in_block_ptr++;
  129. d20 = *in_block_ptr++;
  130. d12 = *in_block_ptr++;
  131. d28 = *in_block_ptr++;
  132. d2 = *in_block_ptr++;
  133. d18 = *in_block_ptr++;
  134. d10 = *in_block_ptr++;
  135. d26 = *in_block_ptr++;
  136. e6 = *in_block_ptr++;
  137. d22 = *in_block_ptr++;
  138. d14 = *in_block_ptr++;
  139. d30 = *in_block_ptr++;
  140. d1 = *in_block_ptr++;
  141. d17 = *in_block_ptr++;
  142. d9 = *in_block_ptr++;
  143. d25 = *in_block_ptr++;
  144. e5 = *in_block_ptr++;
  145. d21 = *in_block_ptr++;
  146. d13 = *in_block_ptr++;
  147. d29 = *in_block_ptr++;
  148. d3 = *in_block_ptr++;
  149. d19 = *in_block_ptr++;
  150. d11 = *in_block_ptr++;
  151. d27 = *in_block_ptr++;
  152. e7 = *in_block_ptr++;
  153. d23 = *in_block_ptr++;
  154. d15 = *in_block_ptr++;
  155. d31 = *in_block_ptr++;
  156. /* first stage of fast idct */
  157. c16=spi64*d16-cpi64*d31;
  158. c31=spi64*d31+cpi64*d16;
  159. c17=s17pi64*d17-c17pi64*d30;
  160. c30=s17pi64*d30+c17pi64*d17;
  161. c18=s9pi64*d18-c9pi64*d29;
  162. c29=s9pi64*d29+c9pi64*d18;
  163. c19=s25pi64*d19-c25pi64*d28;
  164. c28=s25pi64*d28+c25pi64*d19;
  165. c20=s5pi64*d20-c5pi64*d27;
  166. c27=s5pi64*d27+c5pi64*d20;
  167. c21=s21pi64*d21-c21pi64*d26;
  168. c26=s21pi64*d26+c21pi64*d21;
  169. c22=s13pi64*d22-c13pi64*d25;
  170. c25=s13pi64*d25+c13pi64*d22;
  171. c23=s29pi64*d23-c29pi64*d24;
  172. c24=s29pi64*d24+c29pi64*d23;
  173. /* second stage of fast idct */
  174. e8=spi32*d8-cpi32*d15;
  175. e15=spi32*d15+cpi32*d8;
  176. e9=c7pi32*d9-s7pi32*d14;
  177. e14=c7pi32*d14+s7pi32*d9;
  178. e10=s5pi32*d10-c5pi32*d13;
  179. e13=s5pi32*d13+c5pi32*d10;
  180. e11=c3pi32*d11-s3pi32*d12;
  181. e12=c3pi32*d12+s3pi32*d11;
  182. d16=c17+c16;
  183. e17=c16-c17;
  184. e18=c19-c18;
  185. d19=c18+c19;
  186. d20=c21+c20;
  187. e21=c20-c21;
  188. e22=c23-c22;
  189. d23=c22+c23;
  190. d24=c25+c24;
  191. e25=c24-c25;
  192. e26=c27-c26;
  193. d27=c26+c27;
  194. d28=c29+c28;
  195. e29=c28-c29;
  196. e30=c31-c30;
  197. d31=c30+c31;
  198. /* third stage of fast idct */
  199. d4=spi16*e4-cpi16*e7;
  200. d7=spi16*e7+cpi16*e4;
  201. d5=c3pi16*e5-s3pi16*e6;
  202. d6=c3pi16*e6+s3pi16*e5;
  203. c8=e9+e8;
  204. d9=e8-e9;
  205. d10=e11-e10;
  206. c11=e10+e11;
  207. c12=e13+e12;
  208. d13=e12-e13;
  209. d14=e15-e14;
  210. c15=e14+e15;
  211. d17= -cpi16*e17+spi16*e30;
  212. d30=cpi16*e30+spi16*e17;
  213. d18= -spi16*e18-cpi16*e29;
  214. d29=spi16*e29-cpi16*e18;
  215. d21=c3pi16*e26-s3pi16*e21;
  216. d26=c3pi16*e21+s3pi16*e26;
  217. d22= -s3pi16*e25-c3pi16*e22;
  218. d25= -s3pi16*e22+c3pi16*e25;
  219. /* fourth stage of fast idct */
  220. c0=cpi4*(d1+d0);
  221. c1=cpi4*(d0-d1);
  222. c2= -cpi8*d3+spi8*d2;
  223. c3= cpi8*d2+spi8*d3;
  224. e4=d5+d4;
  225. c5=d4-d5;
  226. c6=d7-d6;
  227. e7=d6+d7;
  228. c9=spi8*d14-cpi8*d9;
  229. c14=spi8*d9+cpi8*d14;
  230. c10= -cpi8*d13-spi8*d10;
  231. c13= -cpi8*d10+spi8*d13;
  232. e16=d19+d16;
  233. c19=d16-d19;
  234. e17=d18+d17;
  235. c18=d17-d18;
  236. c20=d23-d20;
  237. e23=d20+d23;
  238. c21=d22-d21;
  239. e22=d21+d22;
  240. e24=d27+d24;
  241. c27=d24-d27;
  242. e25=d26+d25;
  243. c26=d25-d26;
  244. c28=d31-d28;
  245. e31=d28+d31;
  246. c29=d30-d29;
  247. e30=d29+d30;
  248. /* fifth stage of fast idct */
  249. e0=c3+c0;
  250. e3=c0-c3;
  251. e1=c2+c1;
  252. e2=c1-c2;
  253. e5=cpi4*(c6-c5);
  254. e6=cpi4*(c6+c5);
  255. d8=c11+c8;
  256. e11=c8-c11;
  257. d9=c10+c9;
  258. e10=c9-c10;
  259. e12=c15-c12;
  260. d15=c12+c15;
  261. e13=c14-c13;
  262. d14=c13+c14;
  263. e18=spi8*c29-cpi8*c18;
  264. e29=spi8*c18+cpi8*c29;
  265. e19=spi8*c28-cpi8*c19;
  266. e28=spi8*c19+cpi8*c28;
  267. e20= -cpi8*c27-spi8*c20;
  268. e27= -cpi8*c20+spi8*c27;
  269. e21= -cpi8*c26-spi8*c21;
  270. e26= -cpi8*c21+spi8*c26;
  271. /* sixth stage of fast dct */
  272. d0=e0+e7;
  273. d7=e0-e7;
  274. d1=e1+e6;
  275. d6=e1-e6;
  276. d2=e2+e5;
  277. d5=e2-e5;
  278. d3=e3+e4;
  279. d4=e3-e4;
  280. d10=cpi4*(e13-e10);
  281. d13=cpi4*(e13+e10);
  282. d11=cpi4*(e12-e11);
  283. d12=cpi4*(e12+e11);
  284. c16=e23+e16;
  285. d23=e16-e23;
  286. c17=e22+e17;
  287. d22=e17-e22;
  288. c18=e21+e18;
  289. d21=e18-e21;
  290. c19=e20+e19;
  291. d20=e19-e20;
  292. d24=e31-e24;
  293. c31=e24+e31;
  294. d25=e30-e25;
  295. c30=e25+e30;
  296. d26=e29-e26;
  297. c29=e26+e29;
  298. d27=e28-e27;
  299. c28=e27+e28;
  300. /* seventh stage of fast dct */
  301. c0=d0+d15;
  302. c15=d0-d15;
  303. c1=d1+d14;
  304. c14=d1-d14;
  305. c2=d2+d13;
  306. c13=d2-d13;
  307. c3=d3+d12;
  308. c12=d3-d12;
  309. c4=d4+d11;
  310. c11=d4-d11;
  311. c5=d5+d10;
  312. c10=d5-d10;
  313. c6=d6+d9;
  314. c9=d6-d9;
  315. c7=d7+d8;
  316. c8=d7-d8;
  317. c20=cpi4*(d27-d20);
  318. c27=cpi4*(d27+d20);
  319. c21=cpi4*(d26-d21);
  320. c26=cpi4*(d26+d21);
  321. c22=cpi4*(d25-d22);
  322. c25=cpi4*(d25+d22);
  323. c23=cpi4*(d24-d23);
  324. c24=cpi4*(d24+d23);
  325. /* last stage of fast idct */
  326. out_block_ptr = &out_block[0];
  327. *out_block_ptr++ = c0+c31;
  328. *out_block_ptr++ = c1+c30;
  329. *out_block_ptr++ = c2+c29;
  330. *out_block_ptr++ = c3+c28;
  331. *out_block_ptr++ = c4+c27;
  332. *out_block_ptr++ = c5+c26;
  333. *out_block_ptr++ = c6+c25;
  334. *out_block_ptr++ = c7+c24;
  335. *out_block_ptr++ = c8+c23;
  336. *out_block_ptr++ = c9+c22;
  337. *out_block_ptr++ = c10+c21;
  338. *out_block_ptr++ = c11+c20;
  339. *out_block_ptr++ = c12+c19;
  340. *out_block_ptr++ = c13+c18;
  341. *out_block_ptr++ = c14+c17;
  342. *out_block_ptr++ = c15+c16;
  343. *out_block_ptr++ = -c16+c15;
  344. *out_block_ptr++ = -c17+c14;
  345. *out_block_ptr++ = -c18+c13;
  346. *out_block_ptr++ = -c19+c12;
  347. *out_block_ptr++ = -c20+c11;
  348. *out_block_ptr++ = -c21+c10;
  349. *out_block_ptr++ = -c22+c9;
  350. *out_block_ptr++ = -c23+c8;
  351. *out_block_ptr++ = -c24+c7;
  352. *out_block_ptr++ = -c25+c6;
  353. *out_block_ptr++ = -c26+c5;
  354. *out_block_ptr++ = -c27+c4;
  355. *out_block_ptr++ = -c28+c3;
  356. *out_block_ptr++ = -c29+c2;
  357. *out_block_ptr++ = -c30+c1;
  358. *out_block_ptr++ = -c31+c0;
  359. }
  360. /*
  361. * Name: ScFDCT
  362. * Purpose: FDCT takes the fast forward DCT of 32 data points
  363. * optimize: 21%
  364. */
  365. void ScFDCT(float in_block[32],float out_block1[32],float out_block2[32])
  366. {
  367. static float cpi4,cpi8,cpi16,c3pi16;
  368. static float cpi32,c3pi32,c5pi32,c7pi32;
  369. static float spi8,spi16,s3pi16;
  370. static float spi32,s3pi32,s5pi32,s7pi32;
  371. static float cpi64,c17pi64,c9pi64,c25pi64,c5pi64,c21pi64;
  372. static float c13pi64,c29pi64;
  373. static float spi64,s17pi64,s9pi64,s25pi64,s5pi64,s21pi64;
  374. static float s13pi64,s29pi64;
  375. static int init = 1;
  376. register float c0,c1,c2,c3,c4,c5,c6,c7,c8;
  377. register float c9,c10,c11,c12,c13,c14,c15;
  378. register float c16,c17,c18,c19,c20,c21,c22;
  379. float c23,c24,c25,c26,c27,c28,c29;
  380. float c30,c31;
  381. float d0,d1,d2,d3,d4,d5,d6,d7,d8;
  382. float d9,d10,d11,d12,d13,d14,d15;
  383. float d16,d17,d18,d19,d20,d21,d22;
  384. float d23,d24,d25,d26,d27,d28,d29;
  385. float d30,d31;
  386. float e0,e1,e2,e3,e4,e5,e6,e7,e8;
  387. float e9,e10,e11,e12,e13,e14,e15;
  388. float e16,e17,e18,e19,e20,e21,e22;
  389. float e23,e24,e25,e26,e27,e28,e29;
  390. float e30,e31;
  391. /* initialization. This is to be done only once. */
  392. if(init) {
  393. /* establish internal variables */
  394. float temp;
  395. cpi4 = F cos(PI/4.0);
  396. temp = F (PI/8.0); cpi8 = F cos(temp); spi8 = F sin(temp);
  397. temp = F (PI/16.0); cpi16 = F cos(temp); spi16 = F sin(temp);
  398. temp = F (3.0*PI/16.0); c3pi16 = F cos(temp); s3pi16 = F sin(temp);
  399. temp = F (PI/32.0); cpi32 = F cos(temp); spi32 = F sin(temp);
  400. temp = F (3.0*PI/32.0); c3pi32 = F cos(temp); s3pi32 = F sin(temp);
  401. temp = F (5.0*PI/32.0); c5pi32 = F cos(temp); s5pi32 = F sin(temp);
  402. temp = F (7.0*PI/32.0); c7pi32 = F cos(temp); s7pi32 = F sin(temp);
  403. temp = F (PI/64.0); cpi64 = F cos(temp); spi64 = F sin(temp);
  404. temp = F (17.0*PI/64.0); c17pi64 = F cos(temp); s17pi64 = F sin(temp);
  405. temp = F (9.0*PI/64.0); c9pi64 = F cos(temp); s9pi64 = F sin(temp);
  406. temp = F (25.0*PI/64.0); c25pi64 = F cos(temp); s25pi64 = F sin(temp);
  407. temp = F (5.0*PI/64.0); c5pi64 = F cos(temp); s5pi64 = F sin(temp);
  408. temp = F (21.0*PI/64.0); c21pi64 = F cos(temp); s21pi64 = F sin(temp);
  409. temp = F (13.0*PI/64.0); c13pi64 = F cos(temp); s13pi64 = F sin(temp);
  410. temp = F (29.0*PI/64.0); c29pi64 = F cos(temp); s29pi64 = F sin(temp);
  411. init = 0;
  412. }
  413. /* first stage of fast dct */
  414. c0= in_block[0] + in_block[31];
  415. c1= in_block[1] + in_block[30];
  416. c2= in_block[2] + in_block[29];
  417. c3= in_block[3] + in_block[28];
  418. c4= in_block[4] + in_block[27];
  419. c5= in_block[5] + in_block[26];
  420. c6= in_block[6] + in_block[25];
  421. c7= in_block[7] + in_block[24];
  422. c8= in_block[8] + in_block[23];
  423. c9= in_block[9] + in_block[22];
  424. c10= in_block[10] + in_block[21];
  425. c11= in_block[11] + in_block[20];
  426. c12= in_block[12] + in_block[19];
  427. c13= in_block[13] + in_block[18];
  428. c14= in_block[14] + in_block[17];
  429. c15= in_block[15] + in_block[16];
  430. d16= in_block[15] - in_block[16];
  431. d17= in_block[14] - in_block[17];
  432. d18= in_block[13] - in_block[18];
  433. d19= in_block[12] - in_block[19];
  434. c20= in_block[11] - in_block[20];
  435. c21= in_block[10] - in_block[21];
  436. c22= in_block[9] - in_block[22];
  437. c23= in_block[8] - in_block[23];
  438. c24= in_block[7] - in_block[24];
  439. c25= in_block[6] - in_block[25];
  440. c26= in_block[5] - in_block[26];
  441. c27= in_block[4] - in_block[27];
  442. d28= in_block[3] - in_block[28];
  443. d29= in_block[2] - in_block[29];
  444. d30= in_block[1] - in_block[30];
  445. d31= in_block[0] - in_block[31];
  446. /* second stage of fast dct */
  447. d0=c0+c15;
  448. d1=c1+c14;
  449. d2=c2+c13;
  450. d3=c3+c12;
  451. d4=c4+c11;
  452. d5=c5+c10;
  453. d6=c6+c9;
  454. d7=c7+c8;
  455. e8=c7-c8;
  456. e9=c6-c9;
  457. d10=c5-c10;
  458. d11=c4-c11;
  459. d12=c3-c12;
  460. d13=c2-c13;
  461. e14=c1-c14;
  462. e15=c0-c15;
  463. d20=cpi4*(c27-c20);
  464. d21=cpi4*(c26-c21);
  465. d22=cpi4*(c25-c22);
  466. d23=cpi4*(c24-c23);
  467. d24=cpi4*(c24+c23);
  468. d25=cpi4*(c25+c22);
  469. d26=cpi4*(c26+c21);
  470. d27=cpi4*(c27+c20);
  471. /* third stage of fast dct */
  472. e0=d0+d7;
  473. e1=d1+d6;
  474. e2=d2+d5;
  475. e3=d3+d4;
  476. c4=d3-d4;
  477. e5=d2-d5;
  478. e6=d1-d6;
  479. c7=d0-d7;
  480. e10=cpi4*(d13-d10);
  481. e11=cpi4*(d12-d11);
  482. e12=cpi4*(d12+d11);
  483. e13=cpi4*(d13+d10);
  484. c16=d23+d16;
  485. c17=d22+d17;
  486. e18=d21+d18;
  487. e19=d20+d19;
  488. e20=d19-d20;
  489. e21=d18-d21;
  490. c22=d17-d22;
  491. c23=d16-d23;
  492. c24=d31-d24;
  493. c25=d30-d25;
  494. e26=d29-d26;
  495. e27=d28-d27;
  496. e28=d27+d28;
  497. e29=d26+d29;
  498. c30=d25+d30;
  499. c31=d24+d31;
  500. /* fourth stage of fast dct */
  501. c0=e3+e0;
  502. c1=e2+e1;
  503. c2=e1-e2;
  504. c3=e0-e3;
  505. c5=cpi4*(e6-e5);
  506. c6=cpi4*(e6+e5);
  507. d8=e11+e8;
  508. c9=e10+e9;
  509. c10=e9-e10;
  510. d11=e8-e11;
  511. d12=e15-e12;
  512. c13=e14-e13;
  513. c14=e13+e14;
  514. d15=e12+e15;
  515. c18=spi8*e29-cpi8*e18;
  516. c19=spi8*e28-cpi8*e19;
  517. c20= -cpi8*e27-spi8*e20;
  518. c21= -cpi8*e26-spi8*e21;
  519. c26= -cpi8*e21+spi8*e26;
  520. c27= -cpi8*e20+spi8*e27;
  521. c28=spi8*e19+cpi8*e28;
  522. c29=spi8*e18+cpi8*e29;
  523. /* fifth stage of fast dct */
  524. d0=cpi4*(c1+c0); /*done*/
  525. d1=cpi4*(c0-c1); /*done*/
  526. d2=cpi8*c3+spi8*c2; /*done*/
  527. d3= -cpi8*c2+spi8*c3; /*done*/
  528. d4=c5+c4;
  529. d5=c4-c5;
  530. d6=c7-c6;
  531. d7=c6+c7;
  532. d9=spi8*c14-cpi8*c9;
  533. d10= -cpi8*c13-spi8*c10;
  534. d13= -cpi8*c10+spi8*c13;
  535. d14=spi8*c9+cpi8*c14;
  536. e16=c19+c16;
  537. d17=c18+c17;
  538. d18=c17-c18;
  539. e19=c16-c19;
  540. e20=c23-c20;
  541. d21=c22-c21;
  542. d22=c21+c22;
  543. e23=c20+c23;
  544. e24=c27+c24;
  545. d25=c26+c25;
  546. d26=c25-c26;
  547. e27=c24-c27;
  548. e28=c31-c28;
  549. d29=c30-c29;
  550. d30=c29+c30;
  551. e31=c28+c31;
  552. /* sixth stage of fast dct */
  553. e4=cpi16*d7+spi16*d4; /*done*/
  554. e5=s3pi16*d6+c3pi16*d5; /*done*/
  555. e6= -s3pi16*d5+c3pi16*d6; /*done*/
  556. e7= -cpi16*d4+spi16*d7; /*done*/
  557. e8=d9+d8;
  558. e9=d8-d9;
  559. e10=d11-d10;
  560. e11=d10+d11;
  561. e12=d13+d12;
  562. e13=d12-d13;
  563. e14=d15-d14;
  564. e15=d14+d15;
  565. e17=spi16*d30-cpi16*d17;
  566. e18= -cpi16*d29-spi16*d18;
  567. e29= -cpi16*d18+spi16*d29;
  568. e30=spi16*d17+cpi16*d30;
  569. e21=c3pi16*d26-s3pi16*d21;
  570. e22= -s3pi16*d25-c3pi16*d22;
  571. e25= -s3pi16*d22+c3pi16*d25;
  572. e26=c3pi16*d21+s3pi16*d26;
  573. /* seventh stage of fast dct */
  574. d8=cpi32*e15+spi32*e8; /*done*/
  575. d9=s7pi32*e14+c7pi32*e9; /*done*/
  576. d10=c5pi32*e13+s5pi32*e10; /*done*/
  577. d11=s3pi32*e12+c3pi32*e11; /*done*/
  578. d12= -s3pi32*e11+c3pi32*e12; /*done*/
  579. d13= -c5pi32*e10+s5pi32*e13; /*done*/
  580. d14= -s7pi32*e9+c7pi32*e14; /*done*/
  581. d15= -cpi32*e8+spi32*e15; /*done*/
  582. c16=e17+e16;
  583. c17=e16-e17;
  584. c18=e19-e18;
  585. c19=e18+e19;
  586. c20=e21+e20;
  587. c21=e20-e21;
  588. c22=e23-e22;
  589. c23=e22+e23;
  590. c24=e25+e24;
  591. c25=e24-e25;
  592. c26=e27-e26;
  593. c27=e26+e27;
  594. c28=e29+e28;
  595. c29=e28-e29;
  596. c30=e31-e30;
  597. c31=e30+e31;
  598. /* last stage of fast dct */
  599. d16=cpi64*c31+spi64*c16; /*done*/
  600. d17=c17pi64*c30+s17pi64*c17; /*done*/
  601. d18=c9pi64*c29+s9pi64*c18; /*done*/
  602. d19=c25pi64*c28+s25pi64*c19; /*done*/
  603. d20=c5pi64*c27+s5pi64*c20; /*done*/
  604. d21=c21pi64*c26+s21pi64*c21; /*done*/
  605. d22=c13pi64*c25+s13pi64*c22; /*done*/
  606. d23=c29pi64*c24+s29pi64*c23; /*done*/
  607. d24= -c29pi64*c23+s29pi64*c24;/*done*/
  608. d25= -c13pi64*c22+s13pi64*c25;/*done*/
  609. d26= -c21pi64*c21+s21pi64*c26;/*done*/
  610. d27= -c5pi64*c20+s5pi64*c27; /*done*/
  611. d28= -c25pi64*c19+s25pi64*c28;/*done*/
  612. d29= -c9pi64*c18+s9pi64*c29; /*done*/
  613. d30= -c17pi64*c17+s17pi64*c30;/*done*/
  614. d31= -cpi64*c16+spi64*c31; /*done*/
  615. /* rearrange coefficients to undo bit reversed ordering */
  616. out_block2[16] = -d0;/******** normalization done in window********/
  617. out_block2[15] = -d16;
  618. out_block2[14] = -d8;
  619. out_block2[13] = -d24;
  620. out_block2[12] = -e4;
  621. out_block2[11] = -d20;
  622. out_block2[10] = -d12;
  623. out_block2[9] = -d28;
  624. out_block2[8] = -d2;
  625. out_block2[7] = -d18;
  626. out_block2[6] = -d10;
  627. out_block2[5] = -d26;
  628. out_block2[4] = -e6;
  629. out_block2[3] = -d22;
  630. out_block2[2] = -d14;
  631. out_block2[1] = -d30;
  632. out_block2[0] = -d1;
  633. out_block1[0] =d1;
  634. out_block1[1] =d17;
  635. out_block1[2] =d9;
  636. out_block1[3] =d25;
  637. out_block1[4] =e5;
  638. out_block1[5] =d21;
  639. out_block1[6] =d13;
  640. out_block1[7] =d29;
  641. out_block1[8] =d3;
  642. out_block1[9] =d19;
  643. out_block1[10] =d11;
  644. out_block1[11] =d27;
  645. out_block1[12] =e7;
  646. out_block1[13] =d23;
  647. out_block1[14] =d15;
  648. out_block1[15] =d31;
  649. out_block1[16] = F 0;
  650. }
  651. /*
  652. ** Name: ScFDCT8x8_C
  653. ** Purpose: 2-d Forward DCT (C version). Customized for (8x8) blocks
  654. ** "c" version
  655. **
  656. */
  657. void ScFDCT8x8_C(float *ipbuf, float *outbuf)
  658. {
  659. int i;
  660. register float t0, t1, t2, t3, t4, t5, t6, t7, tmp;
  661. register float *spptr, *interptr;
  662. register float *spptr_int;
  663. float tempptr[64];
  664. spptr_int = ipbuf;
  665. interptr = tempptr;
  666. /*
  667. ** Perform Row Computations
  668. */
  669. for (i = 0; i < 8; i++) {
  670. /* Compute A3 */
  671. t0 = spptr_int[0] + spptr_int[7];
  672. t7 = spptr_int[0] - spptr_int[7];
  673. t1 = spptr_int[1] + spptr_int[6];
  674. t6 = spptr_int[1] - spptr_int[6];
  675. t2 = spptr_int[2] + spptr_int[5];
  676. t5 = spptr_int[2] - spptr_int[5];
  677. t3 = spptr_int[3] + spptr_int[4];
  678. t4 = spptr_int[3] - spptr_int[4];
  679. /* Compute A2 */
  680. tmp = t0;
  681. t0 += t3;
  682. t3 = tmp - t3;
  683. tmp = t1;
  684. t1 += t2;
  685. t2 = tmp - t2;
  686. t4 = -t4 - t5;
  687. t5 += t6;
  688. t6 += t7;
  689. /* Compute A1 */
  690. interptr[32] = t0 - t1;
  691. interptr[0] = t0 + t1;
  692. t2 += t3;
  693. /* Compute M */
  694. t2 = t2*RSQ2 ;
  695. t5 = t5*RSQ2 ;
  696. tmp = (t6 - t4)*COS3;
  697. t4 = -t4*COSM1P3 - tmp;
  698. t6 = COS1M3*t6 + tmp;
  699. /* Compute B2 */
  700. interptr[16] = t2 + t3;
  701. interptr[48] = t3 - t2;
  702. tmp = t5;
  703. t5 += t7;
  704. t7 -= tmp;
  705. /* Compute PB1 */
  706. interptr[8] = t5 + t6;
  707. interptr[56] = t5 - t6;
  708. interptr[24] = t7 - t4;
  709. interptr[40] = t7 + t4;
  710. spptr_int += 8;
  711. interptr++;
  712. }
  713. spptr = tempptr;
  714. interptr = outbuf;
  715. /*
  716. ** Perform Column Computations
  717. */
  718. for (i = 0; i < 8; i++) {
  719. /* Compute A3 */
  720. t0 = spptr[0] + spptr[7];
  721. t7 = spptr[0] - spptr[7];
  722. t1 = spptr[1] + spptr[6];
  723. t6 = spptr[1] - spptr[6];
  724. t2 = spptr[2] + spptr[5];
  725. t5 = spptr[2] - spptr[5];
  726. t3 = spptr[3] + spptr[4];
  727. t4 = spptr[3] - spptr[4];
  728. /* Compute A2 */
  729. tmp = t0;
  730. t0 += t3;
  731. t3 = tmp - t3;
  732. tmp = t1;
  733. t1 += t2;
  734. t2 = tmp - t2;
  735. t4 = -t4 - t5;
  736. t5 += t6;
  737. t6 += t7;
  738. /* Compute A1 */
  739. interptr[32] = t0 - t1;
  740. interptr[0] = t0 + t1;
  741. t2 = t2+t3;
  742. /* Compute M */
  743. t2 = t2*RSQ2 ;
  744. t5 = t5*RSQ2 ;
  745. tmp = (t6 - t4)*COS3;
  746. t4 = -t4*COSM1P3 - tmp;
  747. t6 = COS1M3*t6 + tmp ;
  748. /* Compute B2 */
  749. interptr[16] = t2 + t3;
  750. interptr[48] = t3 - t2;
  751. tmp = t5;
  752. t5 += t7;
  753. t7 -= tmp;
  754. /* Compute PB1 */
  755. interptr[8] = t5 + t6;
  756. interptr[56] = t5 - t6;
  757. interptr[24] = t7 - t4;
  758. interptr[40] = t7 + t4;
  759. spptr += 8;
  760. interptr++;
  761. }
  762. }
  763. static const float dct_constants[64] = {
  764. F 0.12500000, F 0.09011998, F 0.09567086, F 0.10630377,
  765. F 0.12500000, F 0.15909483, F 0.23096988, F 0.45306373,
  766. F 0.09011998, F 0.06497288, F 0.06897485, F 0.07664075,
  767. F 0.09011998, F 0.11470097, F 0.16652001, F 0.32664075,
  768. F 0.09567086, F 0.06897485, F 0.07322331, F 0.08136138,
  769. F 0.09567086, F 0.12176590, F 0.17677669, F 0.34675997,
  770. F 0.10630377, F 0.07664074, F 0.08136138, F 0.09040392,
  771. F 0.10630377, F 0.13529903, F 0.19642374, F 0.38529903,
  772. F 0.12500000, F 0.09011998, F 0.09567086, F 0.10630377,
  773. F 0.12500000, F 0.15909483, F 0.23096988, F 0.45306373,
  774. F 0.15909483, F 0.11470097, F 0.12176590, F 0.13529903,
  775. F 0.15909483, F 0.20248929, F 0.29396889, F 0.57664073,
  776. F 0.23096988, F 0.16652001, F 0.17677669, F 0.19642374,
  777. F 0.23096988, F 0.29396892, F 0.42677671, F 0.83715260,
  778. F 0.45306373, F 0.32664075, F 0.34675995, F 0.38529903,
  779. F 0.45306373, F 0.57664073, F 0.83715260, F 1.64213395
  780. };
  781. /*
  782. ** Name: ScFDCT8x8s_C
  783. ** Purpose: 2-d Forward DCT (C version) for (8x8) blocks
  784. **
  785. */
  786. void ScFDCT8x8s_C(short *inbuf, short *outbuf)
  787. {
  788. int i;
  789. register float t0, t1, t2, t3, t4, t5, t6, t7, tmp;
  790. float *tempptr, tempbuf[64];
  791. const float *cptr=dct_constants;
  792. tempptr=tempbuf;
  793. /*
  794. ** Perform Row Computations
  795. */
  796. for (i = 0; i < 8; i++) {
  797. /* Compute A3 */
  798. t0 = F (inbuf[0] + inbuf[7]);
  799. t7 = F (inbuf[0] - inbuf[7]);
  800. t1 = F (inbuf[1] + inbuf[6]);
  801. t6 = F (inbuf[1] - inbuf[6]);
  802. t2 = F (inbuf[2] + inbuf[5]);
  803. t5 = F (inbuf[2] - inbuf[5]);
  804. t3 = F (inbuf[3] + inbuf[4]);
  805. t4 = F (inbuf[3] - inbuf[4]);
  806. /* Compute A2 */
  807. tmp = t0;
  808. t0 += t3;
  809. t3 = tmp - t3;
  810. tmp = t1;
  811. t1 += t2;
  812. t2 = tmp - t2;
  813. t4 = -t4 - t5;
  814. t5 += t6;
  815. t6 += t7;
  816. /* Compute A1 */
  817. tempptr[32] = t0 - t1;
  818. tempptr[0] = t0 + t1;
  819. t2 += t3;
  820. /* Compute M */
  821. t2 = t2*RSQ2 ;
  822. t5 = t5*RSQ2 ;
  823. tmp = (t6 - t4)*COS3;
  824. t4 = -t4*COSM1P3 - tmp;
  825. t6 = COS1M3*t6 + tmp;
  826. /* Compute B2 */
  827. tempptr[16] = t2 + t3;
  828. tempptr[48] = t3 - t2;
  829. tmp = t5;
  830. t5 += t7;
  831. t7 -= tmp;
  832. /* Compute PB1 */
  833. tempptr[8] = t5 + t6;
  834. tempptr[56] = t5 - t6;
  835. tempptr[24] = t7 - t4;
  836. tempptr[40] = t7 + t4;
  837. inbuf += 8;
  838. tempptr++;
  839. }
  840. tempptr = tempbuf;
  841. /*
  842. ** Perform Column Computations
  843. */
  844. for (i = 0; i < 8; i++) {
  845. /* Compute A3 */
  846. t0 = tempptr[0] + tempptr[7];
  847. t7 = tempptr[0] - tempptr[7];
  848. t1 = tempptr[1] + tempptr[6];
  849. t6 = tempptr[1] - tempptr[6];
  850. t2 = tempptr[2] + tempptr[5];
  851. t5 = tempptr[2] - tempptr[5];
  852. t3 = tempptr[3] + tempptr[4];
  853. t4 = tempptr[3] - tempptr[4];
  854. /* Compute A2 */
  855. tmp = t0;
  856. t0 += t3;
  857. t3 = tmp - t3;
  858. tmp = t1;
  859. t1 += t2;
  860. t2 = tmp - t2;
  861. t4 = -t4 - t5;
  862. t5 += t6;
  863. t6 += t7;
  864. /* Compute A1 */
  865. outbuf[32] = (short) floorf((t0 - t1)*cptr[4]+0.499999);
  866. outbuf[0] = (short) floorf((t0 + t1)*cptr[0]+0.499999);
  867. t2 = t2+t3;
  868. /* Compute M */
  869. t2 = t2*RSQ2 ;
  870. t5 = t5*RSQ2 ;
  871. tmp = (t6 - t4)*COS3;
  872. t4 = -t4*COSM1P3 - tmp;
  873. t6 = COS1M3*t6 + tmp ;
  874. /* Compute B2 */
  875. outbuf[16] = (short) floorf((t2 + t3)*cptr[2]+0.499999);
  876. outbuf[48] = (short) floorf((t3 - t2)*cptr[6]+0.499999);
  877. tmp = t5;
  878. t5 += t7;
  879. t7 -= tmp;
  880. /* Compute PB1 */
  881. outbuf[8] = (short) floorf((t5 + t6)*cptr[1]+0.499999);
  882. outbuf[56] = (short) floorf((t5 - t6)*cptr[7]+0.499999);
  883. outbuf[24] = (short) floorf((t7 - t4)*cptr[3]+0.499999);
  884. outbuf[40] = (short) floorf((t7 + t4)*cptr[5]+0.499999);
  885. tempptr += 8;
  886. cptr += 8;
  887. outbuf++;
  888. }
  889. }