Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

967 lines
24 KiB

  1. /*
  2. * @DEC_COPYRIGHT@
  3. */
  4. /*
  5. * HISTORY
  6. * $Log: sc_idct.c,v $
  7. * Revision 1.1.4.3 1996/03/20 22:32:42 Hans_Graves
  8. * Moved ScScaleIDCT8x8i_C to sc_idct_scaled.c
  9. * [1996/03/20 22:13:55 Hans_Graves]
  10. *
  11. * Revision 1.1.4.2 1996/03/08 18:46:17 Hans_Graves
  12. * Changed ScScaleIDCT8x8i_C() back to 20-bit
  13. * [1996/03/08 18:31:42 Hans_Graves]
  14. *
  15. * Revision 1.1.2.6 1996/02/21 22:52:40 Hans_Graves
  16. * Changed precision of ScScaleIDCT8x8i_C() from 20 to 19 bits
  17. * [1996/02/21 22:45:34 Hans_Graves]
  18. *
  19. * Revision 1.1.2.5 1996/01/26 19:01:34 Hans_Graves
  20. * Fix bug in ScScaleIDCT8x8i_C()
  21. * [1996/01/26 18:59:08 Hans_Graves]
  22. *
  23. * Revision 1.1.2.4 1996/01/24 19:33:15 Hans_Graves
  24. * Optimization of ScScaleIDCT8x8i_C
  25. * [1996/01/24 18:09:55 Hans_Graves]
  26. *
  27. * Revision 1.1.2.3 1996/01/08 20:19:31 Bjorn_Engberg
  28. * Removed unused local variable to get rid of a warning on NT.
  29. * [1996/01/08 20:17:34 Bjorn_Engberg]
  30. *
  31. * Revision 1.1.2.2 1996/01/08 16:41:17 Hans_Graves
  32. * Moved IDCT routines from sc_dct.c
  33. * [1996/01/08 15:30:46 Hans_Graves]
  34. *
  35. * $EndLog$
  36. */
  37. /*****************************************************************************
  38. ** Copyright (c) Digital Equipment Corporation, 1995 **
  39. ** **
  40. ** All Rights Reserved. Unpublished rights reserved under the copyright **
  41. ** laws of the United States. **
  42. ** **
  43. ** The software contained on this media is proprietary to and embodies **
  44. ** the confidential technology of Digital Equipment Corporation. **
  45. ** Possession, use, duplication or dissemination of the software and **
  46. ** media is authorized only pursuant to a valid written license from **
  47. ** Digital Equipment Corporation. **
  48. ** **
  49. ** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
  50. ** Government is subject to restrictions as set forth in Subparagraph **
  51. ** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
  52. ******************************************************************************/
  53. /*
  54. ** Filename: sc_idct.c
  55. ** Inverse DCT related functions.
  56. */
  57. /*
  58. #define _SLIBDEBUG_
  59. */
  60. #include <math.h>
  61. #include "SC.h"
  62. #ifdef _SLIBDEBUG_
  63. #define _DEBUG_ 1 /* detailed debuging statements */
  64. #define _VERBOSE_ 1 /* show progress */
  65. #define _VERIFY_ 1 /* verify correct operation */
  66. #define _WARN_ 1 /* warnings about strange behavior */
  67. #endif
  68. #define F (float)
  69. #define RSQ2 F 0.7071067811865
  70. #define COSM1P3 F 1.3065629648764
  71. #define COS1M3 F 0.5411961001462
  72. #define COS3 F 0.3826834323651
  73. #define Point 14
  74. /*
  75. ** Name: ScIDCT8x8
  76. ** Purpose: 2-d Inverse DCT. Customized for (8x8) blocks
  77. **
  78. ** Note: This scheme uses the direct transposition of the forward
  79. ** DCT. This may not be the preferred way in Hardware
  80. ** Implementations
  81. **
  82. ** Reference: FEIGs
  83. **
  84. */
  85. void ScIDCT8x8(int *outbuf)
  86. {
  87. register int *outptr, itmp, *spptr, *interptr;
  88. register int t0, t1, t2, t3, t4, t5, t6, t7, tmp, mtmp;
  89. int i;
  90. static int tempptr[64];
  91. spptr = outbuf;
  92. interptr = tempptr;
  93. /*
  94. ** Row Computations:
  95. */
  96. for (i = 0; i < 8; i++) {
  97. /*
  98. ** Check for zeros:
  99. */
  100. t0 = spptr[0];
  101. t1 = spptr[32];
  102. t2 = spptr[16];
  103. t3 = spptr[48];
  104. t4 = spptr[40];
  105. t5 = spptr[8];
  106. t6 = spptr[56];
  107. t7 = spptr[24];
  108. if (!(t1|t2|t3|t4|t5|t6|t7)) {
  109. interptr[0] = t0;
  110. interptr[1] = t0;
  111. interptr[2] = t0;
  112. interptr[3] = t0;
  113. interptr[4] = t0;
  114. interptr[5] = t0;
  115. interptr[6] = t0;
  116. interptr[7] = t0;
  117. interptr += 8;
  118. }
  119. else {
  120. /* Compute B1-t P' */
  121. tmp = t4;
  122. t4 -= t7;
  123. t7 += tmp;
  124. tmp = t6;
  125. t6 = t5 -t6;
  126. t5 += tmp;
  127. /* Compute B2-t */
  128. tmp = t3;
  129. t3 += t2;
  130. t2 -= tmp;
  131. tmp = t7;
  132. t7 += t5;
  133. t5 -= tmp;
  134. /* Compute M */
  135. tmp = t2 + (t2 >> 2);
  136. tmp += (tmp >> 3);
  137. t2 = (tmp + (t2 >> 7)) >> 1;
  138. tmp = t5 + (t5 >> 2);
  139. tmp += (tmp >> 3);
  140. t5 = (tmp + (t5 >> 7)) >> 1;
  141. tmp = t6 - t4;
  142. mtmp = tmp + (tmp >> 1) + (tmp >> 5) - (tmp >> 11);
  143. tmp = mtmp >> 2;
  144. mtmp = t4 + (t4 >> 2) + (t4 >> 4) - (t4 >> 7) + (t4 >> 9);
  145. t4 = -mtmp - tmp;
  146. mtmp = (t6 + (t6 >> 4) + (t6 >> 6) + (t6 >> 8)) >> 1;
  147. t6 = mtmp + tmp;
  148. /* Compute A1-t */
  149. tmp = t0;
  150. t0 += t1;
  151. t1 = tmp - t1;
  152. t3 = t2 + t3;
  153. /* Compute A2-t */
  154. tmp = t0;
  155. t0 += t3;
  156. t3 = tmp - t3;
  157. tmp = t1;
  158. t1 += t2;
  159. t2 = tmp - t2;
  160. t7 += t6;
  161. t6 += t5;
  162. t5 -= t4;
  163. /* Compute A3-t */
  164. interptr[0] = t0 + t7;
  165. interptr[1] = t1 + t6;
  166. interptr[2] = t2 + t5;
  167. interptr[3] = t3 - t4; /* Note in the prev. stage no
  168. t4 = -t4 */
  169. interptr[4] = t3 + t4;
  170. interptr[5] = t2 - t5;
  171. interptr[6] = t1 - t6;
  172. interptr[7] = t0 - t7;
  173. interptr += 8;
  174. }
  175. spptr++;
  176. }
  177. spptr = tempptr;
  178. outptr = outbuf;
  179. /*
  180. ** Column Computations
  181. */
  182. for (i = 0; i < 8; i++) {
  183. /* Check for zeros */
  184. t0 = spptr[0];
  185. t1 = spptr[32];
  186. t2 = spptr[16];
  187. t3 = spptr[48];
  188. t4 = spptr[40];
  189. t5 = spptr[8];
  190. t6 = spptr[56];
  191. t7 = spptr[24];
  192. if (!(t1|t2|t3|t4|t5|t6|t7)) {
  193. itmp = (t0 >> Point) + 128;
  194. outptr[0] = itmp;
  195. outptr[1] = itmp;
  196. outptr[2] = itmp;
  197. outptr[3] = itmp;
  198. outptr[4] = itmp;
  199. outptr[5] = itmp;
  200. outptr[6] = itmp;
  201. outptr[7] = itmp;
  202. outptr += 8;
  203. }
  204. else
  205. {
  206. /* Compute B1-t P' */
  207. tmp = t4;
  208. t4 -= t7;
  209. t7 += tmp;
  210. tmp = t6;
  211. t6 = t5 -t6;
  212. t5 += tmp;
  213. /* Compute B2-tilde */
  214. tmp = t3;
  215. t3 += t2;
  216. t2 -= tmp;
  217. tmp = t7;
  218. t7 += t5;
  219. t5 -= tmp;
  220. /* Compute M-Tilde */
  221. tmp = t2 + (t2 >> 2);
  222. tmp += (tmp >> 3);
  223. t2 = (tmp + (t2 >> 7)) >> 1;
  224. tmp = t5 + (t5 >> 2);
  225. tmp += (tmp >> 3);
  226. t5 = (tmp + (t5 >> 7)) >> 1;
  227. tmp = t6 - t4;
  228. mtmp = tmp + (tmp >> 1) + (tmp >> 5) - (tmp >> 11);
  229. tmp = mtmp >> 2;
  230. mtmp = t4 + (t4 >> 2) + (t4 >> 4) - (t4 >> 7) + (t4 >> 9);
  231. t4 = -mtmp - tmp;
  232. mtmp = (t6 + (t6 >> 4) + (t6 >> 6) + (t6 >> 8)) >> 1;
  233. t6 = mtmp + tmp;
  234. /* Compute A1-t */
  235. tmp = t0;
  236. t0 += t1;
  237. t1 = tmp - t1;
  238. t3 = t2 + t3;
  239. /* Compute A2-t */
  240. tmp = t0;
  241. t0 += t3;
  242. t3 = tmp - t3;
  243. tmp = t1;
  244. t1 += t2;
  245. t2 = tmp - t2;
  246. t7 += t6;
  247. t6 += t5;
  248. t5 -= t4;
  249. /* Compute A3-t */
  250. outptr[0] = ((t0 + t7) >> Point) + 128;
  251. outptr[1] = ((t1 + t6) >> Point) + 128;
  252. outptr[2] = ((t2 + t5) >> Point) + 128;
  253. outptr[3] = ((t3 - t4) >> Point) + 128;
  254. outptr[4] = ((t3 + t4) >> Point) + 128;
  255. outptr[5] = ((t2 - t5) >> Point) + 128;
  256. outptr[6] = ((t1 - t6) >> Point) + 128;
  257. outptr[7] = ((t0 - t7) >> Point) + 128;
  258. outptr += 8;
  259. }
  260. spptr++;
  261. }
  262. }
  263. /*
  264. ** Function: ScScaleIDCT8x8
  265. ** Note: This scheme uses the direct transposition of the forward
  266. ** DCT. This may not be the preferred way in Hardware
  267. ** Implementations
  268. */
  269. void ScScaleIDCT8x8_C(float *ipbuf, int *outbuf)
  270. {
  271. int i;
  272. int *outptr;
  273. register int itmp;
  274. register float t0, t1, t2, t3, t4, t5, t6, t7, tmp;
  275. float *spptr, *interptr;
  276. float tempptr[64];
  277. spptr = ipbuf;
  278. interptr = tempptr;
  279. /* Perform Row Computations */
  280. for (i=0; i<8; i++)
  281. {
  282. /* Check for zeros */
  283. t0 = spptr[0];
  284. t1 = spptr[4];
  285. t2 = spptr[2];
  286. t3 = spptr[6];
  287. t4 = spptr[5];
  288. t5 = spptr[1];
  289. t6 = spptr[7];
  290. t7 = spptr[3];
  291. if (!(t1||t2||t3||t4||t5||t6||t7))
  292. {
  293. interptr[0] = t0;
  294. interptr[8] = t0;
  295. interptr[16] = t0;
  296. interptr[24] = t0;
  297. interptr[32] = t0;
  298. interptr[40] = t0;
  299. interptr[48] = t0;
  300. interptr[56] = t0;
  301. }
  302. else
  303. {
  304. /* Compute B1-t P' */
  305. tmp = t4;
  306. t4 -= t7;
  307. t7 += tmp;
  308. tmp = t6;
  309. t6 = t5 -t6;
  310. t5 += tmp;
  311. /* Compute B2-t */
  312. tmp = t3;
  313. t3 += t2;
  314. t2 -= tmp;
  315. tmp = t7;
  316. t7 += t5;
  317. t5 -= tmp;
  318. /* Compute M */
  319. t2 = t2*RSQ2;
  320. t5 = t5*RSQ2;
  321. tmp = (t6 - t4)*COS3;
  322. t4 = -t4*COSM1P3 - tmp;
  323. t6 = COS1M3*t6 + tmp;
  324. /* Compute A1-t */
  325. tmp = t0;
  326. t0 += t1;
  327. t1 = tmp - t1;
  328. t3 = t2 + t3;
  329. /* Compute A2-t */
  330. tmp = t0;
  331. t0 += t3;
  332. t3 = tmp - t3;
  333. tmp = t1;
  334. t1 += t2;
  335. t2 = tmp - t2;
  336. t7 += t6;
  337. t6 += t5;
  338. t5 -= t4;
  339. /* Compute A3-t */
  340. interptr[0] = t0 + t7;
  341. interptr[56] = t0 - t7;
  342. interptr[8] = t1 + t6;
  343. interptr[48] = t1 - t6;
  344. interptr[16] = t2 + t5;
  345. interptr[40] = t2 - t5;
  346. interptr[24] = t3 - t4; /* Note in the prev. stage no
  347. t4 = -t4 */
  348. interptr[32] = t3 + t4;
  349. }
  350. spptr += 8;
  351. interptr++;
  352. }
  353. spptr = tempptr;
  354. outptr = outbuf;
  355. /* Perform Column Computations */
  356. for (i=0; i<8; i++)
  357. {
  358. /* Check for zeros */
  359. t0 = spptr[0];
  360. t1 = spptr[4];
  361. t2 = spptr[2];
  362. t3 = spptr[6];
  363. t4 = spptr[5];
  364. t5 = spptr[1];
  365. t6 = spptr[7];
  366. t7 = spptr[3];
  367. if (!(t1||t2||t3||t4||t5||t6||t7))
  368. {
  369. itmp = (int) (t0);
  370. outptr[0] = itmp;
  371. outptr[8] = itmp;
  372. outptr[16] = itmp;
  373. outptr[24] = itmp;
  374. outptr[32] = itmp;
  375. outptr[40] = itmp;
  376. outptr[48] = itmp;
  377. outptr[56] = itmp;
  378. }
  379. else
  380. {
  381. /* Compute B1-t P' */
  382. tmp = t4;
  383. t4 -= t7;
  384. t7 += tmp;
  385. tmp = t6;
  386. t6 = t5 -t6;
  387. t5 += tmp;
  388. /* Compute B2-tilde */
  389. tmp = t3;
  390. t3 += t2;
  391. t2 -= tmp;
  392. tmp = t7;
  393. t7 += t5;
  394. t5 -= tmp;
  395. /* Compute M-Tilde */
  396. t2 = t2*RSQ2 ;
  397. t5 = t5*RSQ2 ;
  398. tmp = (t6 - t4)*COS3;
  399. t4 = -t4*COSM1P3 - tmp;
  400. t6 = COS1M3*t6 + tmp ;
  401. /* Compute A1-t */
  402. tmp = t0;
  403. t0 += t1;
  404. t1 = tmp - t1;
  405. t3 = t2 + t3;
  406. /* Compute A2-t */
  407. tmp = t0;
  408. t0 += t3;
  409. t3 = tmp - t3;
  410. tmp = t1;
  411. t1 += t2;
  412. t2 = tmp - t2;
  413. t7 += t6;
  414. t6 += t5;
  415. t5 -= t4;
  416. /* Compute A3-t */
  417. outptr[0] = (int)(t0+t7);
  418. outptr[56] = (int)(t0-t7);
  419. outptr[8] = (int)(t1+t6);
  420. outptr[48] = (int)(t1-t6);
  421. outptr[16] = (int)(t2+t5);
  422. outptr[40] = (int)(t2-t5);
  423. outptr[24] = (int)(t3-t4);
  424. outptr[32] = (int)(t3+t4);
  425. }
  426. outptr++;
  427. spptr += 8;
  428. }
  429. }
  430. /*
  431. ** Function: ScIDCT8x8s
  432. ** Note: This scheme uses the direct transposition of the forward
  433. ** DCT. This may not be the preferred way in Hardware
  434. ** Implementations
  435. */
  436. #define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
  437. #define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
  438. #define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
  439. #define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
  440. #define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
  441. #define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
  442. #define IDCTSHIFTR 8
  443. #define IDCTSHIFTC (14+0)
  444. #if 1
  445. #define limit(var, min, max) (var<=min ? min : (var>=max ? max : var))
  446. #else
  447. #define limit(var, min, max) var
  448. #endif
  449. void ScIDCT8x8s_C(short *inbuf, short *outbuf)
  450. {
  451. int i;
  452. register tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
  453. register short *inblk, *outblk;
  454. register int *tmpblk;
  455. int tmpbuf[64];
  456. inblk = inbuf;
  457. tmpblk = tmpbuf;
  458. for (i=0; i<8; i++, inblk+=8, tmpblk+=8)
  459. {
  460. x0 = inblk[0];
  461. x1 = inblk[4];
  462. x1 = x1<<11;
  463. x2 = inblk[6];
  464. x3 = inblk[2];
  465. x4 = inblk[1];
  466. x5 = inblk[7];
  467. x6 = inblk[5];
  468. x7 = inblk[3];
  469. if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
  470. {
  471. tmpblk[0]=tmpblk[1]=tmpblk[2]=tmpblk[3]=tmpblk[4]=tmpblk[5]=tmpblk[6]=
  472. tmpblk[7]=x0<<3;
  473. }
  474. else
  475. {
  476. tmp0 = x4 + x5;
  477. tmp0 = W7*tmp0;
  478. x0 = x0<<11;
  479. x0 = x0 + 128;
  480. x8 = x0 + x1;
  481. tmp1 = x6 + x7;
  482. x0 = x0 - x1;
  483. tmp1 = W3*tmp1;
  484. tmp2 = (W2+W6)*x2;
  485. tmp3 = (W2-W6)*x3;
  486. x4 = (W1-W7)*x4;
  487. x5 = (W1+W7)*x5;
  488. x4 = tmp0 + x4;
  489. x1 = x3 + x2;
  490. x5 = tmp0 - x5;
  491. x1 = W6*x1;
  492. tmp0 = (W3-W5)*x6;
  493. x7 = (W3+W5)*x7;
  494. x2 = x1 - tmp2;
  495. x3 = x1 + tmp3;
  496. tmp0 = tmp1 - tmp0;
  497. x7 = tmp1 - x7;
  498. x1 = x4 + tmp0;
  499. x4 = x4 - tmp0;
  500. x6 = x5 + x7; /* F */
  501. x5 = x5 - x7; /* F */
  502. tmp0 = x4 + x5;
  503. tmp0 = 181*tmp0;
  504. x7 = x8 + x3; /* F */
  505. tmp1 = x4 - x5;
  506. x8 = x8 - x3; /* F */
  507. tmp1 = 181*tmp1;
  508. x3 = x0 + x2; /* F */
  509. x0 = x0 - x2; /* F */
  510. x2 = tmp0 + 128;
  511. x4 = tmp1 + 128;
  512. x2 = x2>>8; /* F */
  513. x4 = x4>>8; /* F */
  514. tmp0 = x7+x1;
  515. tmp0 = tmp0>>IDCTSHIFTR;
  516. tmp1 = x3+x2;
  517. tmp1 = tmp1>>IDCTSHIFTR;
  518. tmp2 = x0+x4;
  519. tmp2 = tmp2>>IDCTSHIFTR;
  520. tmp3 = x8+x6;
  521. tmp3 = tmp3>>IDCTSHIFTR;
  522. tmpblk[0] = tmp0;
  523. tmpblk[1] = tmp1;
  524. tmpblk[2] = tmp2;
  525. tmpblk[3] = tmp3;
  526. tmp0 = x8-x6;
  527. tmp0 = tmp0>>IDCTSHIFTR;
  528. tmp1 = x0-x4;
  529. tmp1 = tmp1>>IDCTSHIFTR;
  530. tmp2 = x3-x2;
  531. tmp2 = tmp2>>IDCTSHIFTR;
  532. tmp3 = x7-x1;
  533. tmp3 = tmp3>>IDCTSHIFTR;
  534. tmpblk[4] = tmp0;
  535. tmpblk[5] = tmp1;
  536. tmpblk[6] = tmp2;
  537. tmpblk[7] = tmp3;
  538. }
  539. }
  540. tmpblk = tmpbuf;
  541. outblk = outbuf;
  542. for (i=0; i<8; i++, tmpblk++, outblk++)
  543. {
  544. /* shortcut */
  545. x0 = tmpblk[8*0];
  546. x1 = tmpblk[4*8]<<8;
  547. x2 = tmpblk[6*8];
  548. x3 = tmpblk[2*8];
  549. x4 = tmpblk[1*8];
  550. x5 = tmpblk[7*8];
  551. x6 = tmpblk[5*8];
  552. x7 = tmpblk[3*8];
  553. if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
  554. {
  555. tmp0=(x0+32)>>6;
  556. outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
  557. outblk[8*6]=outblk[8*7]=limit(tmp0, -256, 255);
  558. }
  559. else
  560. {
  561. x0 = tmpblk[8*0];
  562. tmp0 = x4+x5;
  563. x0 = x0<<8;
  564. tmp0 = W7*tmp0;
  565. x0 = x0 + 8192;
  566. tmp1 = x6+x7;
  567. tmp0 = tmp0 + 4;
  568. tmp1 = W3*tmp1;
  569. tmp1 = tmp1 + 4;
  570. x8 = x0 + x1;
  571. tmp2 = (W2+W6)*x2;
  572. x0 = x0 - x1;
  573. x1 = x3 + x2;
  574. x1 = W6*x1;
  575. tmp3 = (W2-W6)*x3;
  576. x1 = x1 + 4;
  577. x4 = (W1-W7)*x4;
  578. x4 = tmp0 + x4;
  579. x4 = x4>>3;
  580. x5 = (W1+W7)*x5;
  581. x2 = x1 - tmp2;
  582. x3 = x1 + tmp3;
  583. x6 = (W3-W5)*x6;
  584. x2 = x2>>3;
  585. x5 = tmp0 - x5;
  586. x5 = x5>>3;
  587. x6 = tmp1 - x6;
  588. x6 = x6>>3;
  589. x7 = (W3+W5)*x7;
  590. x7 = tmp1 - x7;
  591. x3 = x3>>3;
  592. x7 = x7>>3;
  593. x1 = x4 + x6; /* F */
  594. x4 = x4 - x6;
  595. x6 = x5 + x7; /* F */
  596. x5 = x5 - x7; /* F */
  597. tmp1 = x4 + x5;
  598. x7 = x8 + x3; /* F */
  599. tmp1 = 181*tmp1;
  600. x8 = x8 - x3; /* F */
  601. x3 = x0 + x2; /* F */
  602. tmp2 = x4 - x5;
  603. x0 = x0 - x2; /* F */
  604. tmp2 = 181*tmp2;
  605. x2 = tmp1+128;
  606. x4 = tmp2+128;
  607. x2 = x2>>8; /* F */
  608. x4 = x4>>8; /* F */
  609. /* fourth stage */
  610. tmp0=x7+x1;
  611. tmp1=x3+x2;
  612. tmp0=tmp0>>IDCTSHIFTC;
  613. tmp2=x0+x4;
  614. tmp1=tmp1>>IDCTSHIFTC;
  615. tmp3=x8+x6;
  616. tmp2=tmp2>>IDCTSHIFTC;
  617. tmp3=tmp3>>IDCTSHIFTC;
  618. outblk[8*0] = limit(tmp0, -256, 255);
  619. outblk[8*1] = limit(tmp1, -256, 255);
  620. outblk[8*2] = limit(tmp2, -256, 255);
  621. outblk[8*3] = limit(tmp3, -256, 255);
  622. tmp0=x8-x6;
  623. tmp1=x0-x4;
  624. tmp0=tmp0>>IDCTSHIFTC;
  625. tmp2=x3-x2;
  626. tmp1=tmp1>>IDCTSHIFTC;
  627. tmp3=x7-x1;
  628. tmp2=tmp2>>IDCTSHIFTC;
  629. tmp3=tmp3>>IDCTSHIFTC;
  630. outblk[8*4] = limit(tmp0, -256, 255);
  631. outblk[8*5] = limit(tmp1, -256, 255);
  632. outblk[8*6] = limit(tmp2, -256, 255);
  633. outblk[8*7] = limit(tmp3, -256, 255);
  634. }
  635. }
  636. }
  637. #if 0
  638. void ScIDCT8x8s_C(short *inbuf, short *outbuf)
  639. {
  640. register int i, tmp, x0, x1, x2, x3, x4, x5, x6, x7, x8;
  641. register short *inblk, *outblk;
  642. register int *tmpblk;
  643. int tmpbuf[64];
  644. inblk = inbuf;
  645. tmpblk = tmpbuf;
  646. for (i=0; i<8; i++, inblk+=8, tmpblk+=8)
  647. {
  648. if (!((x1 = inblk[4]<<11) | (x2 = inblk[6]) | (x3 = inblk[2]) |
  649. (x4 = inblk[1]) | (x5 = inblk[7]) | (x6 = inblk[5]) | (x7 = inblk[3])))
  650. {
  651. tmpblk[0]=tmpblk[1]=tmpblk[2]=tmpblk[3]=tmpblk[4]=tmpblk[5]=tmpblk[6]=
  652. tmpblk[7]=inblk[0]<<3;
  653. }
  654. else
  655. {
  656. x0 = (inblk[0]<<11) + 128; /* for proper rounding in the fourth stage */
  657. /* first stage */
  658. x8 = W7*(x4+x5);
  659. x4 = x8 + (W1-W7)*x4;
  660. x5 = x8 - (W1+W7)*x5;
  661. x8 = W3*(x6+x7);
  662. x6 = x8 - (W3-W5)*x6;
  663. x7 = x8 - (W3+W5)*x7;
  664. /* second stage */
  665. x8 = x0 + x1;
  666. x0 -= x1;
  667. x1 = W6*(x3+x2);
  668. x2 = x1 - (W2+W6)*x2;
  669. x3 = x1 + (W2-W6)*x3;
  670. x1 = x4 + x6;
  671. x4 -= x6;
  672. x6 = x5 + x7;
  673. x5 -= x7;
  674. /* third stage */
  675. x7 = x8 + x3;
  676. x8 -= x3;
  677. x3 = x0 + x2;
  678. x0 -= x2;
  679. x2 = (181*(x4+x5)+128)>>8;
  680. x4 = (181*(x4-x5)+128)>>8;
  681. /* fourth stage */
  682. tmpblk[0] = (x7+x1)>>8;
  683. tmpblk[1] = (x3+x2)>>8;
  684. tmpblk[2] = (x0+x4)>>8;
  685. tmpblk[3] = (x8+x6)>>8;
  686. tmpblk[4] = (x8-x6)>>8;
  687. tmpblk[5] = (x0-x4)>>8;
  688. tmpblk[6] = (x3-x2)>>8;
  689. tmpblk[7] = (x7-x1)>>8;
  690. }
  691. }
  692. tmpblk = tmpbuf;
  693. outblk = outbuf;
  694. for (i=0; i<8; i++, tmpblk++, outblk++)
  695. {
  696. /* shortcut */
  697. if (!((x1 = (tmpblk[4*8]<<8)) | (x2 = tmpblk[6*8]) | (x3 = tmpblk[2*8]) |
  698. (x4 = tmpblk[1*8]) | (x5 = tmpblk[7*8]) | (x6 = tmpblk[5*8]) |
  699. (x7 = tmpblk[3*8])))
  700. {
  701. tmp=(tmpblk[8*0]+32)>>6;
  702. if (tmp<-256) tmp=-256; else if (tmp>255) tmp=255;
  703. outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
  704. outblk[8*6]=outblk[8*7]=tmp;
  705. }
  706. else
  707. {
  708. x0 = (tmpblk[8*0]<<8) + 8192;
  709. /* first stage */
  710. x8 = W7*(x4+x5) + 4;
  711. x4 = (x8+((W1-W7)*x4))>>3;
  712. x5 = (x8-((W1+W7)*x5))>>3;
  713. x8 = W3*(x6+x7) + 4;
  714. x6 = (x8-((W3-W5)*x6))>>3;
  715. x7 = (x8-((W3+W5)*x7))>>3;
  716. /* second stage */
  717. x8 = x0 + x1;
  718. x0 -= x1;
  719. x1 = W6*(x3+x2) + 4;
  720. x2 = (x1-((W2+W6)*x2))>>3;
  721. x3 = (x1+((W2-W6)*x3))>>3;
  722. x1 = x4 + x6;
  723. x4 -= x6;
  724. x6 = x5 + x7;
  725. x5 -= x7;
  726. /* third stage */
  727. x7 = x8 + x3;
  728. x8 -= x3;
  729. x3 = x0 + x2;
  730. x0 -= x2;
  731. x2 = ((181*(x4+x5))+128)>>8;
  732. x4 = ((181*(x4-x5))+128)>>8;
  733. /* fourth stage */
  734. outblk[8*0] = ((tmp=(x7+x1)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  735. outblk[8*1] = ((tmp=(x3+x2)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  736. outblk[8*2] = ((tmp=(x0+x4)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  737. outblk[8*3] = ((tmp=(x8+x6)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  738. outblk[8*4] = ((tmp=(x8-x6)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  739. outblk[8*5] = ((tmp=(x0-x4)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  740. outblk[8*6] = ((tmp=(x3-x2)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  741. outblk[8*7] = ((tmp=(x7-x1)>>14)<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  742. }
  743. }
  744. }
  745. #endif
  746. #if 0
  747. /* row (horizontal) IDCT
  748. *
  749. * 7 pi 1
  750. * dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l )
  751. * l=0 8 2
  752. *
  753. * where: c[0] = 128
  754. * c[1..7] = 128*sqrt(2)
  755. */
  756. static void idctrow(short *inblk, short *outblk)
  757. {
  758. int x0, x1, x2, x3, x4, x5, x6, x7, x8;
  759. /* shortcut */
  760. if (!((x1 = inblk[4]<<11) | (x2 = inblk[6]) | (x3 = inblk[2]) |
  761. (x4 = inblk[1]) | (x5 = inblk[7]) | (x6 = inblk[5]) | (x7 = inblk[3])))
  762. {
  763. outblk[0]=outblk[1]=outblk[2]=outblk[3]=outblk[4]=outblk[5]=outblk[6]=
  764. outblk[7]=inblk[0]<<3;
  765. return;
  766. }
  767. x0 = (inblk[0]<<11) + 128; /* for proper rounding in the fourth stage */
  768. /* first stage */
  769. x8 = W7*(x4+x5);
  770. x4 = x8 + (W1-W7)*x4;
  771. x5 = x8 - (W1+W7)*x5;
  772. x8 = W3*(x6+x7);
  773. x6 = x8 - (W3-W5)*x6;
  774. x7 = x8 - (W3+W5)*x7;
  775. /* second stage */
  776. x8 = x0 + x1;
  777. x0 -= x1;
  778. x1 = W6*(x3+x2);
  779. x2 = x1 - (W2+W6)*x2;
  780. x3 = x1 + (W2-W6)*x3;
  781. x1 = x4 + x6;
  782. x4 -= x6;
  783. x6 = x5 + x7;
  784. x5 -= x7;
  785. /* third stage */
  786. x7 = x8 + x3;
  787. x8 -= x3;
  788. x3 = x0 + x2;
  789. x0 -= x2;
  790. x2 = (181*(x4+x5)+128)>>8;
  791. x4 = (181*(x4-x5)+128)>>8;
  792. /* fourth stage */
  793. outblk[0] = (x7+x1)>>8;
  794. outblk[1] = (x3+x2)>>8;
  795. outblk[2] = (x0+x4)>>8;
  796. outblk[3] = (x8+x6)>>8;
  797. outblk[4] = (x8-x6)>>8;
  798. outblk[5] = (x0-x4)>>8;
  799. outblk[6] = (x3-x2)>>8;
  800. outblk[7] = (x7-x1)>>8;
  801. }
  802. /* column (vertical) IDCT
  803. *
  804. * 7 pi 1
  805. * dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l )
  806. * l=0 8 2
  807. *
  808. * where: c[0] = 1/1024
  809. * c[1..7] = (1/1024)*sqrt(2)
  810. */
  811. static void idctcol(short *inblk, short *outblk)
  812. {
  813. int tmp, x0, x1, x2, x3, x4, x5, x6, x7, x8;
  814. /* shortcut */
  815. if (!((x1 = (inblk[8*4]<<8)) | (x2 = inblk[8*6]) | (x3 = inblk[8*2]) |
  816. (x4 = inblk[8*1]) | (x5 = inblk[8*7]) | (x6 = inblk[8*5]) |
  817. (x7 = inblk[8*3])))
  818. {
  819. tmp=(inblk[8*0]+32)>>6;
  820. if (tmp<-256) tmp=-256; else if (tmp>255) tmp=255;
  821. outblk[8*0]=outblk[8*1]=outblk[8*2]=outblk[8*3]=outblk[8*4]=outblk[8*5]=
  822. outblk[8*6]=outblk[8*7]=tmp;
  823. return;
  824. }
  825. x0 = (inblk[8*0]<<8) + 8192;
  826. /* first stage */
  827. x8 = W7*(x4+x5) + 4;
  828. x4 = (x8+(W1-W7)*x4)>>3;
  829. x5 = (x8-(W1+W7)*x5)>>3;
  830. x8 = W3*(x6+x7) + 4;
  831. x6 = (x8-(W3-W5)*x6)>>3;
  832. x7 = (x8-(W3+W5)*x7)>>3;
  833. /* second stage */
  834. x8 = x0 + x1;
  835. x0 -= x1;
  836. x1 = W6*(x3+x2) + 4;
  837. x2 = (x1-(W2+W6)*x2)>>3;
  838. x3 = (x1+(W2-W6)*x3)>>3;
  839. x1 = x4 + x6;
  840. x4 -= x6;
  841. x6 = x5 + x7;
  842. x5 -= x7;
  843. /* third stage */
  844. x7 = x8 + x3;
  845. x8 -= x3;
  846. x3 = x0 + x2;
  847. x0 -= x2;
  848. x2 = (181*(x4+x5)+128)>>8;
  849. x4 = (181*(x4-x5)+128)>>8;
  850. /* fourth stage */
  851. tmp=(x7+x1)>>14;
  852. outblk[8*0] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  853. tmp=(x3+x2)>>14;
  854. outblk[8*1] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  855. tmp=(x0+x4)>>14;
  856. outblk[8*2] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  857. tmp=(x8+x6)>>14;
  858. outblk[8*3] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  859. tmp=(x8-x6)>>14;
  860. outblk[8*4] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  861. tmp=(x0-x4)>>14;
  862. outblk[8*5] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  863. tmp=(x3-x2)>>14;
  864. outblk[8*6] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  865. tmp=(x7-x1)>>14;
  866. outblk[8*7] = (tmp<=-256 ? -256 : (tmp>=255 ? 255 : tmp));
  867. }
  868. #endif