Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1281 lines
32 KiB

  1. /*
  2. * @DEC_COPYRIGHT@
  3. */
  4. /*
  5. * HISTORY
  6. * $Log: sc_idct_scaled.c,v $
  7. * Revision 1.1.2.3 1996/04/03 21:41:08 Hans_Graves
  8. * Fix bug in 8x8 IDCT
  9. * [1996/04/03 21:40:19 Hans_Graves]
  10. *
  11. * Revision 1.1.2.2 1996/03/20 22:32:44 Hans_Graves
  12. * Moved ScScaleIDCT8x8i_C from sc_idct.c; Added 1x1,2x1,1x2,3x3,4x4,6x6
  13. * [1996/03/20 22:14:59 Hans_Graves]
  14. *
  15. * $EndLog$
  16. */
  17. /*****************************************************************************
  18. ** Copyright (c) Digital Equipment Corporation, 1995 **
  19. ** **
  20. ** All Rights Reserved. Unpublished rights reserved under the copyright **
  21. ** laws of the United States. **
  22. ** **
  23. ** The software contained on this media is proprietary to and embodies **
  24. ** the confidential technology of Digital Equipment Corporation. **
  25. ** Possession, use, duplication or dissemination of the software and **
  26. ** media is authorized only pursuant to a valid written license from **
  27. ** Digital Equipment Corporation. **
  28. ** **
  29. ** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
  30. ** Government is subject to restrictions as set forth in Subparagraph **
  31. ** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
  32. ******************************************************************************/
  33. /*
  34. ** Filename: sc_idct_scaled.c
  35. ** Scaled Inverse DCT related functions.
  36. */
  37. /*
  38. #define _SLIBDEBUG_
  39. */
  40. #include <math.h>
  41. #include "SC.h"
  42. #ifdef _SLIBDEBUG_
  43. #define _DEBUG_ 1 /* detailed debuging statements */
  44. #define _VERBOSE_ 1 /* show progress */
  45. #define _VERIFY_ 1 /* verify correct operation */
  46. #define _WARN_ 1 /* warnings about strange behavior */
  47. #endif
  48. #define USE_MUL 0 /* Use multiplies vs. shift and adds */
  49. #define CHECK_FOR_ZEROS 1 /* check for zero rows/columns */
  50. #define BSHIFT 10
  51. #define B1 (759250125L>>(30-BSHIFT))
  52. #define B3 B1
  53. #define B2 (-1402911301L>>(30-BSHIFT))
  54. #define B4 (581104888L>>(30-BSHIFT))
  55. #define B5 (410903207L>>(30-BSHIFT))
  56. #define POINT 20
  57. #define POINTROUND (0x101 << (POINT - 1))
  58. #define IDCTAdjust(val) (((val + POINTROUND) >> POINT) - 128)
  59. /* printf("In: %d,%d\n", inbuf[0*8],inbuf[1*8]);
  60. printf("Out: %d,%d,%d,%d,%d,%d,%d,%d\n", inbuf[0*8],inbuf[1*8],
  61. inbuf[2*8],inbuf[3*8],inbuf[4*8],inbuf[5*8],inbuf[6*8],inbuf[7*8]);
  62. */
  63. /* Function: ScScaleIDCT8x8i_C()
  64. ** Purpose: Used by MPEG video decompression.
  65. ** 20 Bit precision.
  66. */
  67. void ScScaleIDCT8x8i_C(int *inbuf, int *outbuf)
  68. {
  69. register int *inblk;
  70. register int tmp1, tmp2, tmp3;
  71. register int x0, x1, x2, x3, x4, x5, x6, x7, x8;
  72. int i;
  73. _SlibDebug(_DEBUG_, printf("ScScaleIDCT8x8i_C()\n") );
  74. /* Perform Row Computations */
  75. inblk = inbuf;
  76. for(i=0; i<8; i++)
  77. {
  78. /* Check for zeros */
  79. x0 = inblk[0*8];
  80. x1 = inblk[1*8];
  81. x2 = inblk[2*8];
  82. x3 = inblk[3*8];
  83. x4 = inblk[4*8];
  84. x5 = inblk[5*8];
  85. x6 = inblk[6*8];
  86. x7 = inblk[7*8];
  87. #if CHECK_FOR_ZEROS
  88. if(!(x1|x3|x5|x7))
  89. {
  90. if(!(x2|x6))
  91. {
  92. tmp1 = x0 + x4;
  93. tmp2 = x0 - x4;
  94. inblk[0*8] = tmp1;
  95. inblk[1*8] = tmp2;
  96. inblk[2*8] = tmp2;
  97. inblk[3*8] = tmp1;
  98. inblk[4*8] = tmp1;
  99. inblk[5*8] = tmp2;
  100. inblk[6*8] = tmp2;
  101. inblk[7*8] = tmp1;
  102. }
  103. else
  104. {
  105. /* Stage 2 */
  106. x8 = x2 - x6;
  107. x6 = x2 + x6;
  108. /* Stage 3 */
  109. #if USE_MUL
  110. x2=(x8*B1)>>BSHIFT;
  111. #else
  112. tmp1 = x8 + (x8 >> 2); /* x2=x8*B1 */
  113. tmp1 += (tmp1 >> 3);
  114. x2 = (tmp1 + (x8 >> 7)) >> 1;
  115. #endif
  116. /* Stage 5 */
  117. tmp1 = x0 - x4;
  118. x0 = x0 + x4;
  119. tmp2 = x2 + x6;
  120. /* Stage 6 */
  121. x6 = x0 - tmp2;
  122. x0 = x0 + tmp2;
  123. x4 = tmp1 + x2;
  124. x2 = tmp1 - x2;
  125. /* Final Stage */
  126. inblk[0*8] = x0;
  127. inblk[1*8] = x4;
  128. inblk[2*8] = x2;
  129. inblk[3*8] = x6;
  130. inblk[4*8] = x6;
  131. inblk[5*8] = x2;
  132. inblk[6*8] = x4;
  133. inblk[7*8] = x0;
  134. }
  135. }
  136. else
  137. #endif
  138. {
  139. /* Stage 1 */
  140. tmp1 = x5 + x3;
  141. x5 = x5 - x3;
  142. tmp2 = x1 + x7;
  143. x7 = x1 - x7;
  144. /* Stage 2 */
  145. tmp3 = x2 - x6;
  146. x6 = x2 + x6;
  147. x3 = tmp2 + tmp1;
  148. x1 = tmp2 - tmp1;
  149. x8 = x7 - x5;
  150. /* Stage 3 */
  151. #if USE_MUL
  152. x5=(x5*B2)>>BSHIFT;
  153. x1=(x1*B3)>>BSHIFT;
  154. x2=(tmp3*B1)>>BSHIFT;
  155. x7=(x7*B4)>>BSHIFT;
  156. x8=(x8*B5)>>BSHIFT;
  157. #else
  158. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  159. x5 = -x5;
  160. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  161. tmp1 += (tmp1 >> 3);
  162. x1 = (tmp1 + (x1 >> 7)) >> 1;
  163. tmp1 = tmp3 + (tmp3 >> 2); /* x2=tmp3*B1 */
  164. tmp1 += (tmp1 >> 3);
  165. x2 = (tmp1 + (tmp3 >> 7)) >> 1;
  166. x7 = (x7 + (x7 >> 4) + (x7 >> 6) + (x7 >> 8)) >> 1; /* x7=x7*B4 */
  167. x8 = (x8 + (x8 >> 1) + (x8 >> 5) - (x8 >> 11)) >> 2; /* x8=x8*B5 */
  168. #endif /* USE_MUL */
  169. /* Stage 4 */
  170. x5=x5 - x8;
  171. x7=x7 + x8;
  172. /* Stage 5 */
  173. tmp3 = x0 - x4;
  174. x0 = x0 + x4;
  175. tmp2 = x2 + x6;
  176. x3 = x3 + x7;
  177. x7 = x1 + x7;
  178. x1 = x1 - x5;
  179. /* Stage 6 */
  180. x6 = x0 - tmp2;
  181. x0 = x0 + tmp2;
  182. x4 = tmp3 + x2;
  183. x2 = tmp3 - x2;
  184. /* Final Stage */
  185. inblk[0*8] = x0 + x3;
  186. inblk[1*8] = x4 + x7;
  187. inblk[2*8] = x2 + x1;
  188. inblk[3*8] = x6 - x5;
  189. inblk[4*8] = x6 + x5;
  190. inblk[5*8] = x2 - x1;
  191. inblk[6*8] = x4 - x7;
  192. inblk[7*8] = x0 - x3;
  193. }
  194. inblk++;
  195. }
  196. /* Perform Column Computations */
  197. inblk = inbuf;
  198. for(i=0; i<8; i++)
  199. {
  200. /* Check for zeros */
  201. x0 = inblk[0];
  202. x1 = inblk[1];
  203. x2 = inblk[2];
  204. x3 = inblk[3];
  205. x4 = inblk[4];
  206. x5 = inblk[5];
  207. x6 = inblk[6];
  208. x7 = inblk[7];
  209. #if CHECK_FOR_ZEROS
  210. if(!(x1|x3|x5|x7))
  211. {
  212. if(!(x2|x6))
  213. {
  214. tmp1 = x0 + x4;
  215. tmp2 = x0 - x4;
  216. x1 = IDCTAdjust(tmp1);
  217. x0 = IDCTAdjust(tmp2);
  218. outbuf[0] = x0;
  219. outbuf[1] = x1;
  220. outbuf[2] = x1;
  221. outbuf[3] = x0;
  222. outbuf[4] = x0;
  223. outbuf[5] = x1;
  224. outbuf[6] = x1;
  225. outbuf[7] = x0;
  226. }
  227. else
  228. {
  229. /* Stage 2 */
  230. x8 = x2 - x6;
  231. x6 = x2 + x6;
  232. /* Stage 3 */
  233. #if USE_MUL
  234. x2=(x8*B1)>>BSHIFT;
  235. #else
  236. tmp1 = x8 + (x8 >> 2); /* x2=x8*B1 */
  237. tmp1 += (tmp1 >> 3);
  238. x2 = (tmp1 + (x8 >> 7)) >> 1;
  239. #endif
  240. /* Stage 5 */
  241. tmp1 = x0 - x4;
  242. x0 = x0 + x4;
  243. tmp2 = x2 + x6;
  244. /* Stage 6 */
  245. x6 = x0 - tmp2;
  246. x0 = x0 + tmp2;
  247. x4 = tmp1 + x2;
  248. x2 = tmp1 - x2;
  249. /* Final Stage */
  250. tmp1 = IDCTAdjust(x0);
  251. outbuf[0] = tmp1;
  252. outbuf[7] = tmp1;
  253. tmp2 = IDCTAdjust(x4);
  254. outbuf[1] = tmp2;
  255. outbuf[6] = tmp2;
  256. tmp3 = IDCTAdjust(x2);
  257. outbuf[2] = tmp3;
  258. outbuf[5] = tmp3;
  259. tmp1 = IDCTAdjust(x6);
  260. outbuf[3] = tmp1;
  261. outbuf[4] = tmp1;
  262. }
  263. }
  264. else
  265. #endif
  266. {
  267. /* Stage 1 */
  268. tmp1 = x5 + x3;
  269. x5 = x5 - x3;
  270. tmp2 = x1 + x7;
  271. x7 = x1 - x7;
  272. /* Stage 2 */
  273. tmp3 = x2 - x6;
  274. x6 = x2 + x6;
  275. x3 = tmp2 + tmp1;
  276. x1 = tmp2 - tmp1;
  277. x8 = x7 - x5;
  278. /* Stage 3 */
  279. #if USE_MUL
  280. x5=(x5*B2)>>BSHIFT;
  281. x1=(x1*B3)>>BSHIFT;
  282. x2=(tmp3*B1)>>BSHIFT;
  283. x7=(x7*B4)>>BSHIFT;
  284. x8=(x8*B5)>>BSHIFT;
  285. #else
  286. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  287. x5 = -x5;
  288. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  289. tmp1 += (tmp1 >> 3);
  290. x1 = (tmp1 + (x1 >> 7)) >> 1;
  291. tmp1 = tmp3 + (tmp3 >> 2); /* x2=tmp3*B1 */
  292. tmp1 += (tmp1 >> 3);
  293. x2 = (tmp1 + (tmp3 >> 7)) >> 1;
  294. x7 = (x7 + (x7 >> 4) + (x7 >> 6) + (x7 >> 8)) >> 1; /* x7=x7*B4 */
  295. x8 = (x8 + (x8 >> 1) + (x8 >> 5) - (x8 >> 11)) >> 2; /* x8=x8*B5 */
  296. #endif /* USE_MUL */
  297. /* Stage 4 */
  298. x5=x5 - x8;
  299. x7=x7 + x8;
  300. /* Stage 5 */
  301. tmp3 = x0 - x4;
  302. x0 = x0 + x4;
  303. tmp2 = x2 + x6;
  304. x3 = x3 + x7;
  305. x7 = x1 + x7;
  306. x1 = x1 - x5;
  307. /* Stage 6 */
  308. x6 = x0 - tmp2;
  309. x0 = x0 + tmp2;
  310. x4 = tmp3 + x2;
  311. x2 = tmp3 - x2;
  312. /* Final Stage */
  313. outbuf[0] = IDCTAdjust(x0 + x3);
  314. outbuf[1] = IDCTAdjust(x4 + x7);
  315. outbuf[2] = IDCTAdjust(x2 + x1);
  316. outbuf[3] = IDCTAdjust(x6 - x5);
  317. outbuf[4] = IDCTAdjust(x6 + x5);
  318. outbuf[5] = IDCTAdjust(x2 - x1);
  319. outbuf[6] = IDCTAdjust(x4 - x7);
  320. outbuf[7] = IDCTAdjust(x0 - x3);
  321. }
  322. outbuf+=8;
  323. inblk+=8;
  324. }
  325. }
  326. #define IDCTAdjust128(val) ((val + POINTROUND) >> POINT)
  327. /* Function: ScScaleIDCT8x8i128_C()
  328. ** Purpose: Used by H263 video decompression.
  329. ** 20 Bit precision.
  330. */
  331. void ScScaleIDCT8x8i128_C(int *inbuf, int *outbuf)
  332. {
  333. register int *inblk;
  334. register int tmp1, tmp2, tmp3;
  335. register int x0, x1, x2, x3, x4, x5, x6, x7, x8;
  336. int i;
  337. _SlibDebug(_DEBUG_, printf("ScScaleIDCT8x8i128_C()\n") );
  338. /* Perform Row Computations */
  339. inblk = inbuf;
  340. for(i=0; i<8; i++)
  341. {
  342. /* Check for zeros */
  343. x0 = inblk[0*8];
  344. x1 = inblk[1*8];
  345. x2 = inblk[2*8];
  346. x3 = inblk[3*8];
  347. x4 = inblk[4*8];
  348. x5 = inblk[5*8];
  349. x6 = inblk[6*8];
  350. x7 = inblk[7*8];
  351. #if CHECK_FOR_ZEROS
  352. if(!(x1|x3|x5|x7))
  353. {
  354. if(!(x2|x6))
  355. {
  356. tmp1 = x0 + x4;
  357. tmp2 = x0 - x4;
  358. inblk[0*8] = tmp1;
  359. inblk[1*8] = tmp2;
  360. inblk[2*8] = tmp2;
  361. inblk[3*8] = tmp1;
  362. inblk[4*8] = tmp1;
  363. inblk[5*8] = tmp2;
  364. inblk[6*8] = tmp2;
  365. inblk[7*8] = tmp1;
  366. }
  367. else
  368. {
  369. /* Stage 2 */
  370. x8 = x2 - x6;
  371. x6 = x2 + x6;
  372. /* Stage 3 */
  373. #if USE_MUL
  374. x2=(x8*B1)>>BSHIFT;
  375. #else
  376. tmp1 = x8 + (x8 >> 2); /* x2=x8*B1 */
  377. tmp1 += (tmp1 >> 3);
  378. x2 = (tmp1 + (x8 >> 7)) >> 1;
  379. #endif
  380. /* Stage 5 */
  381. tmp1 = x0 - x4;
  382. x0 = x0 + x4;
  383. tmp2 = x2 + x6;
  384. /* Stage 6 */
  385. x6 = x0 - tmp2;
  386. x0 = x0 + tmp2;
  387. x4 = tmp1 + x2;
  388. x2 = tmp1 - x2;
  389. /* Final Stage */
  390. inblk[0*8] = x0;
  391. inblk[1*8] = x4;
  392. inblk[2*8] = x2;
  393. inblk[3*8] = x6;
  394. inblk[4*8] = x6;
  395. inblk[5*8] = x2;
  396. inblk[6*8] = x4;
  397. inblk[7*8] = x0;
  398. }
  399. }
  400. else
  401. #endif
  402. {
  403. /* Stage 1 */
  404. tmp1 = x5 + x3;
  405. x5 = x5 - x3;
  406. tmp2 = x1 + x7;
  407. x7 = x1 - x7;
  408. /* Stage 2 */
  409. tmp3 = x2 - x6;
  410. x6 = x2 + x6;
  411. x3 = tmp2 + tmp1;
  412. x1 = tmp2 - tmp1;
  413. x8 = x7 - x5;
  414. /* Stage 3 */
  415. #if USE_MUL
  416. x5=(x5*B2)>>BSHIFT;
  417. x1=(x1*B3)>>BSHIFT;
  418. x2=(tmp3*B1)>>BSHIFT;
  419. x7=(x7*B4)>>BSHIFT;
  420. x8=(x8*B5)>>BSHIFT;
  421. #else
  422. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  423. x5 = -x5;
  424. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  425. tmp1 += (tmp1 >> 3);
  426. x1 = (tmp1 + (x1 >> 7)) >> 1;
  427. tmp1 = tmp3 + (tmp3 >> 2); /* x2=tmp3*B1 */
  428. tmp1 += (tmp1 >> 3);
  429. x2 = (tmp1 + (tmp3 >> 7)) >> 1;
  430. x7 = (x7 + (x7 >> 4) + (x7 >> 6) + (x7 >> 8)) >> 1; /* x7=x7*B4 */
  431. x8 = (x8 + (x8 >> 1) + (x8 >> 5) - (x8 >> 11)) >> 2; /* x8=x8*B5 */
  432. #endif /* USE_MUL */
  433. /* Stage 4 */
  434. x5=x5 - x8;
  435. x7=x7 + x8;
  436. /* Stage 5 */
  437. tmp3 = x0 - x4;
  438. x0 = x0 + x4;
  439. tmp2 = x2 + x6;
  440. x3 = x3 + x7;
  441. x7 = x1 + x7;
  442. x1 = x1 - x5;
  443. /* Stage 6 */
  444. x6 = x0 - tmp2;
  445. x0 = x0 + tmp2;
  446. x4 = tmp3 + x2;
  447. x2 = tmp3 - x2;
  448. /* Final Stage */
  449. inblk[0*8] = x0 + x3;
  450. inblk[1*8] = x4 + x7;
  451. inblk[2*8] = x2 + x1;
  452. inblk[3*8] = x6 - x5;
  453. inblk[4*8] = x6 + x5;
  454. inblk[5*8] = x2 - x1;
  455. inblk[6*8] = x4 - x7;
  456. inblk[7*8] = x0 - x3;
  457. }
  458. inblk++;
  459. }
  460. /* Perform Column Computations */
  461. inblk = inbuf;
  462. for(i=0; i<8; i++)
  463. {
  464. /* Check for zeros */
  465. x0 = inblk[0];
  466. x1 = inblk[1];
  467. x2 = inblk[2];
  468. x3 = inblk[3];
  469. x4 = inblk[4];
  470. x5 = inblk[5];
  471. x6 = inblk[6];
  472. x7 = inblk[7];
  473. #if CHECK_FOR_ZEROS
  474. if(!(x1|x3|x5|x7))
  475. {
  476. if(!(x2|x6))
  477. {
  478. tmp1 = x0 + x4;
  479. tmp2 = x0 - x4;
  480. x1 = IDCTAdjust128(tmp1);
  481. x0 = IDCTAdjust128(tmp2);
  482. outbuf[0] = x0;
  483. outbuf[1] = x1;
  484. outbuf[2] = x1;
  485. outbuf[3] = x0;
  486. outbuf[4] = x0;
  487. outbuf[5] = x1;
  488. outbuf[6] = x1;
  489. outbuf[7] = x0;
  490. }
  491. else
  492. {
  493. /* Stage 2 */
  494. x8 = x2 - x6;
  495. x6 = x2 + x6;
  496. /* Stage 3 */
  497. #if USE_MUL
  498. x2=(x8*B1)>>BSHIFT;
  499. #else
  500. tmp1 = x8 + (x8 >> 2); /* x2=x8*B1 */
  501. tmp1 += (tmp1 >> 3);
  502. x2 = (tmp1 + (x8 >> 7)) >> 1;
  503. #endif
  504. /* Stage 5 */
  505. tmp1 = x0 - x4;
  506. x0 = x0 + x4;
  507. tmp2 = x2 + x6;
  508. /* Stage 6 */
  509. x6 = x0 - tmp2;
  510. x0 = x0 + tmp2;
  511. x4 = tmp1 + x2;
  512. x2 = tmp1 - x2;
  513. /* Final Stage */
  514. tmp1 = IDCTAdjust128(x0);
  515. outbuf[0] = tmp1;
  516. outbuf[7] = tmp1;
  517. tmp2 = IDCTAdjust128(x4);
  518. outbuf[1] = tmp2;
  519. outbuf[6] = tmp2;
  520. tmp3 = IDCTAdjust128(x2);
  521. outbuf[2] = tmp3;
  522. outbuf[5] = tmp3;
  523. tmp1 = IDCTAdjust128(x6);
  524. outbuf[3] = tmp1;
  525. outbuf[4] = tmp1;
  526. }
  527. }
  528. else
  529. #endif
  530. {
  531. /* Stage 1 */
  532. tmp1 = x5 + x3;
  533. x5 = x5 - x3;
  534. tmp2 = x1 + x7;
  535. x7 = x1 - x7;
  536. /* Stage 2 */
  537. tmp3 = x2 - x6;
  538. x6 = x2 + x6;
  539. x3 = tmp2 + tmp1;
  540. x1 = tmp2 - tmp1;
  541. x8 = x7 - x5;
  542. /* Stage 3 */
  543. #if USE_MUL
  544. x5=(x5*B2)>>BSHIFT;
  545. x1=(x1*B3)>>BSHIFT;
  546. x2=(tmp3*B1)>>BSHIFT;
  547. x7=(x7*B4)>>BSHIFT;
  548. x8=(x8*B5)>>BSHIFT;
  549. #else
  550. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  551. x5 = -x5;
  552. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  553. tmp1 += (tmp1 >> 3);
  554. x1 = (tmp1 + (x1 >> 7)) >> 1;
  555. tmp1 = tmp3 + (tmp3 >> 2); /* x2=tmp3*B1 */
  556. tmp1 += (tmp1 >> 3);
  557. x2 = (tmp1 + (tmp3 >> 7)) >> 1;
  558. x7 = (x7 + (x7 >> 4) + (x7 >> 6) + (x7 >> 8)) >> 1; /* x7=x7*B4 */
  559. x8 = (x8 + (x8 >> 1) + (x8 >> 5) - (x8 >> 11)) >> 2; /* x8=x8*B5 */
  560. #endif /* USE_MUL */
  561. /* Stage 4 */
  562. x5=x5 - x8;
  563. x7=x7 + x8;
  564. /* Stage 5 */
  565. tmp3 = x0 - x4;
  566. x0 = x0 + x4;
  567. tmp2 = x2 + x6;
  568. x3 = x3 + x7;
  569. x7 = x1 + x7;
  570. x1 = x1 - x5;
  571. /* Stage 6 */
  572. x6 = x0 - tmp2;
  573. x0 = x0 + tmp2;
  574. x4 = tmp3 + x2;
  575. x2 = tmp3 - x2;
  576. /* Final Stage */
  577. outbuf[0] = IDCTAdjust128(x0 + x3);
  578. outbuf[1] = IDCTAdjust128(x4 + x7);
  579. outbuf[2] = IDCTAdjust128(x2 + x1);
  580. outbuf[3] = IDCTAdjust128(x6 - x5);
  581. outbuf[4] = IDCTAdjust128(x6 + x5);
  582. outbuf[5] = IDCTAdjust128(x2 - x1);
  583. outbuf[6] = IDCTAdjust128(x4 - x7);
  584. outbuf[7] = IDCTAdjust128(x0 - x3);
  585. }
  586. outbuf+=8;
  587. inblk+=8;
  588. }
  589. }
  590. void ScScaleIDCT1x1i_C(int *inbuf, int *outbuf)
  591. {
  592. register int x0;
  593. int i;
  594. _SlibDebug(_DEBUG_, printf("ScScaleIDCT1x1i_C()\n") );
  595. x0=inbuf[0];
  596. x0=((x0 + POINTROUND) >> POINT) - 128;
  597. for (i=0; i<64; i++)
  598. outbuf[i]=x0;
  599. }
  600. void ScScaleIDCT1x2i_C(int *inbuf, int *outbuf)
  601. {
  602. register int x0, x1, x3, x5, x7, x8, tmp1;
  603. _SlibDebug(_DEBUG_, printf("ScScaleIDCT1x2i_C()\n") );
  604. x0 = inbuf[0*8];
  605. x1 = inbuf[1*8];
  606. /* Stage 2 */
  607. x3=x1;
  608. /* Stage 3 */
  609. #if USE_MUL
  610. x7=(x1*B4)>>BSHIFT;
  611. x8=(x1*B5)>>BSHIFT;
  612. x1=(x1*B3)>>BSHIFT;
  613. #else
  614. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x7*B4 */
  615. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x8*B5 */
  616. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  617. tmp1 += (tmp1 >> 3);
  618. x1 = (tmp1 + (x1 >> 7)) >> 1;
  619. #endif /* USE_MUL */
  620. /* Stage 4 */
  621. x5=-x8;
  622. x7+=x8;
  623. /* Stage 5 */
  624. x3+=x7;
  625. x8=x1;
  626. x1-=x5;
  627. x7+=x8;
  628. /* Final Stage */
  629. outbuf[0*8+0]=outbuf[0*8+1]=outbuf[0*8+2]=outbuf[0*8+3]=
  630. outbuf[0*8+4]=outbuf[0*8+5]=outbuf[0*8+6]=outbuf[0*8+7]=IDCTAdjust(x0 + x3);
  631. outbuf[1*8+0]=outbuf[1*8+1]=outbuf[1*8+2]=outbuf[1*8+3]=
  632. outbuf[1*8+4]=outbuf[1*8+5]=outbuf[1*8+6]=outbuf[1*8+7]=IDCTAdjust(x0 + x7);
  633. outbuf[2*8+0]=outbuf[2*8+1]=outbuf[2*8+2]=outbuf[2*8+3]=
  634. outbuf[2*8+4]=outbuf[2*8+5]=outbuf[2*8+6]=outbuf[2*8+7]=IDCTAdjust(x0 + x1);
  635. outbuf[3*8+0]=outbuf[3*8+1]=outbuf[3*8+2]=outbuf[3*8+3]=
  636. outbuf[3*8+4]=outbuf[3*8+5]=outbuf[3*8+6]=outbuf[3*8+7]=IDCTAdjust(x0 - x5);
  637. outbuf[4*8+0]=outbuf[4*8+1]=outbuf[4*8+2]=outbuf[4*8+3]=
  638. outbuf[4*8+4]=outbuf[4*8+5]=outbuf[4*8+6]=outbuf[4*8+7]=IDCTAdjust(x0 + x5);
  639. outbuf[5*8+0]=outbuf[5*8+1]=outbuf[5*8+2]=outbuf[5*8+3]=
  640. outbuf[5*8+4]=outbuf[5*8+5]=outbuf[5*8+6]=outbuf[5*8+7]=IDCTAdjust(x0 - x1);
  641. outbuf[6*8+0]=outbuf[6*8+1]=outbuf[6*8+2]=outbuf[6*8+3]=
  642. outbuf[6*8+4]=outbuf[6*8+5]=outbuf[6*8+6]=outbuf[6*8+7]=IDCTAdjust(x0 - x7);
  643. outbuf[7*8+0]=outbuf[7*8+1]=outbuf[7*8+2]=outbuf[7*8+3]=
  644. outbuf[7*8+4]=outbuf[7*8+5]=outbuf[7*8+6]=outbuf[7*8+7]=IDCTAdjust(x0 - x3);
  645. }
  646. void ScScaleIDCT2x1i_C(int *inbuf, int *outbuf)
  647. {
  648. register int x0, x1, x3, x5, x7, x8, tmp1;
  649. _SlibDebug(_DEBUG_, printf("ScScaleIDCT1x2i_C()\n") );
  650. x0 = inbuf[0];
  651. x1 = inbuf[1];
  652. /* Stage 2 */
  653. x3=x1;
  654. /* Stage 3 */
  655. #if USE_MUL
  656. x7=(x1*B4)>>BSHIFT;
  657. x8=(x1*B5)>>BSHIFT;
  658. x1=(x1*B3)>>BSHIFT;
  659. #else
  660. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x7*B4 */
  661. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x8*B5 */
  662. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  663. tmp1 += (tmp1 >> 3);
  664. x1 = (tmp1 + (x1 >> 7)) >> 1;
  665. #endif /* USE_MUL */
  666. /* Stage 4 */
  667. x5=-x8;
  668. x7+=x8;
  669. /* Stage 5 */
  670. x3+=x7;
  671. x8=x1;
  672. x1-=x5;
  673. x7+=x8;
  674. /* Final Stage */
  675. outbuf[0*8+0]=outbuf[1*8+0]=outbuf[2*8+0]=outbuf[3*8+0]=
  676. outbuf[4*8+0]=outbuf[5*8+0]=outbuf[6*8+0]=outbuf[7*8+0]=IDCTAdjust(x0 + x3);
  677. outbuf[0*8+1]=outbuf[1*8+1]=outbuf[2*8+1]=outbuf[3*8+1]=
  678. outbuf[4*8+1]=outbuf[5*8+1]=outbuf[6*8+1]=outbuf[7*8+1]=IDCTAdjust(x0 + x7);
  679. outbuf[0*8+2]=outbuf[1*8+2]=outbuf[2*8+2]=outbuf[3*8+2]=
  680. outbuf[4*8+2]=outbuf[5*8+2]=outbuf[6*8+2]=outbuf[7*8+2]=IDCTAdjust(x0 + x1);
  681. outbuf[0*8+3]=outbuf[1*8+3]=outbuf[2*8+3]=outbuf[3*8+3]=
  682. outbuf[4*8+3]=outbuf[5*8+3]=outbuf[6*8+3]=outbuf[7*8+3]=IDCTAdjust(x0 - x5);
  683. outbuf[0*8+4]=outbuf[1*8+4]=outbuf[2*8+4]=outbuf[3*8+4]=
  684. outbuf[4*8+4]=outbuf[5*8+4]=outbuf[6*8+4]=outbuf[7*8+4]=IDCTAdjust(x0 + x5);
  685. outbuf[0*8+5]=outbuf[1*8+5]=outbuf[2*8+5]=outbuf[3*8+5]=
  686. outbuf[4*8+5]=outbuf[5*8+5]=outbuf[6*8+5]=outbuf[7*8+5]=IDCTAdjust(x0 - x1);
  687. outbuf[0*8+6]=outbuf[1*8+6]=outbuf[2*8+6]=outbuf[3*8+6]=
  688. outbuf[4*8+6]=outbuf[5*8+6]=outbuf[6*8+6]=outbuf[7*8+6]=IDCTAdjust(x0 - x7);
  689. outbuf[0*8+7]=outbuf[1*8+7]=outbuf[2*8+7]=outbuf[3*8+7]=
  690. outbuf[4*8+7]=outbuf[5*8+7]=outbuf[6*8+7]=outbuf[7*8+7]=IDCTAdjust(x0 - x3);
  691. }
  692. void ScScaleIDCT2x2i_C(int *inbuf, int *outbuf)
  693. {
  694. #if 1
  695. register unsigned int i;
  696. register int x0, x1, x3, x5, x7, x8, tmp1;
  697. _SlibDebug(_DEBUG_, printf("ScScaleIDCT2x2i_C()\n") );
  698. /* Column 1 */
  699. x0 = inbuf[0*8];
  700. x1 = inbuf[1*8];
  701. x3=x1; /* Stage 2 */
  702. #if USE_MUL
  703. x7=(x1*B4)>>BSHIFT;
  704. x8=(x1*B5)>>BSHIFT;
  705. x1=(x1*B3)>>BSHIFT;
  706. #else
  707. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x7*B4 */
  708. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x8*B5 */
  709. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  710. tmp1 += (tmp1 >> 3);
  711. x1 = (tmp1 + (x1 >> 7)) >> 1;
  712. #endif /* USE_MUL */
  713. x5=-x8; /* Stage 4 */
  714. x7+=x8;
  715. x3+=x7; /* Stage 5 */
  716. x8=x1;
  717. x1-=x5;
  718. x7+=x8;
  719. inbuf[0*8]=x0 + x3;
  720. inbuf[1*8]=x0 + x7;
  721. inbuf[2*8]=x0 + x1;
  722. inbuf[3*8]=x0 - x5;
  723. inbuf[4*8]=x0 + x5;
  724. inbuf[5*8]=x0 - x1;
  725. inbuf[6*8]=x0 - x7;
  726. inbuf[7*8]=x0 - x3;
  727. /* Column 2 */
  728. x0 = inbuf[0*8+1];
  729. x1 = inbuf[1*8+1];
  730. x3=x1; /* Stage 2 */
  731. #if USE_MUL
  732. x7=(x1*B4)>>BSHIFT;
  733. x8=(x1*B5)>>BSHIFT;
  734. x1=(x1*B3)>>BSHIFT;
  735. #else
  736. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x7*B4 */
  737. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x8*B5 */
  738. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  739. tmp1 += (tmp1 >> 3);
  740. x1 = (tmp1 + (x1 >> 7)) >> 1;
  741. #endif /* USE_MUL */
  742. x5=-x8; /* Stage 4 */
  743. x7+=x8;
  744. x3+=x7; /* Stage 5 */
  745. x8=x1;
  746. x1-=x5;
  747. x7+=x8;
  748. inbuf[0*8+1]=x0 + x3;
  749. inbuf[1*8+1]=x0 + x7;
  750. inbuf[2*8+1]=x0 + x1;
  751. inbuf[3*8+1]=x0 - x5;
  752. inbuf[4*8+1]=x0 + x5;
  753. inbuf[5*8+1]=x0 - x1;
  754. inbuf[6*8+1]=x0 - x7;
  755. inbuf[7*8+1]=x0 - x3;
  756. /* Rows */
  757. for (i=0; i<8; i++)
  758. {
  759. x0 = inbuf[0];
  760. x1 = inbuf[1];
  761. x3=x1; /* Stage 2 */
  762. #if USE_MUL
  763. x7=(x1*B4)>>BSHIFT;
  764. x8=(x1*B5)>>BSHIFT;
  765. x1=(x1*B3)>>BSHIFT;
  766. #else
  767. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x7*B4 */
  768. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x8*B5 */
  769. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  770. tmp1 += (tmp1 >> 3);
  771. x1 = (tmp1 + (x1 >> 7)) >> 1;
  772. #endif /* USE_MUL */
  773. x5=-x8; /* Stage 4 */
  774. x7+=x8;
  775. x3+=x7; /* Stage 5 */
  776. x8=x1;
  777. x1-=x5;
  778. x7+=x8;
  779. outbuf[0] = IDCTAdjust(x0 + x3);
  780. outbuf[1] = IDCTAdjust(x0 + x7);
  781. outbuf[2] = IDCTAdjust(x0 + x1);
  782. outbuf[3] = IDCTAdjust(x0 - x5);
  783. outbuf[4] = IDCTAdjust(x0 + x5);
  784. outbuf[5] = IDCTAdjust(x0 - x1);
  785. outbuf[6] = IDCTAdjust(x0 - x7);
  786. outbuf[7] = IDCTAdjust(x0 - x3);
  787. outbuf+=8;
  788. inbuf+=8;
  789. }
  790. #else
  791. /* Register only version */
  792. register int x3, x5, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
  793. register int x0a, x1a, x3a, x5a, x7a;
  794. register int x0b, x1b, x3b, x5b, x7b;
  795. _SlibDebug(_DEBUG_, printf("ScScaleIDCT2x2i_C()\n") );
  796. #define Calc2x2(col, x0_calc, x1_calc, x0, x1, x3, x5, x7, x8) \
  797. x0 = x0_calc; \
  798. x1 = x1_calc; \
  799. x3=x1; /* Stage 2 */ \
  800. x7=(x1*B4)>>BSHIFT; /* Stage 3 */ \
  801. x8=(x1*B5)>>BSHIFT; \
  802. x1=(x1*B3)>>BSHIFT; \
  803. x5=-x8; /* Stage 4 */ \
  804. x7+=x8; \
  805. x3+=x7; /* Stage 5 */ \
  806. x8=x1; \
  807. x1-=x5; \
  808. x7+=x8; \
  809. outbuf[0+col*8] = IDCTAdjust(x0 + x3); \
  810. outbuf[1+col*8] = IDCTAdjust(x0 + x7); \
  811. outbuf[2+col*8] = IDCTAdjust(x0 + x1); \
  812. outbuf[3+col*8] = IDCTAdjust(x0 - x5); \
  813. outbuf[4+col*8] = IDCTAdjust(x0 + x5); \
  814. outbuf[5+col*8] = IDCTAdjust(x0 - x1); \
  815. outbuf[6+col*8] = IDCTAdjust(x0 - x7); \
  816. outbuf[7+col*8] = IDCTAdjust(x0 - x3);
  817. /****** Row 0 ******/
  818. x0a = inbuf[0*8];
  819. x1a = inbuf[1*8];
  820. x3a=x1a; /* Stage 2 */
  821. x7a=(x1a*B4)>>BSHIFT; /* Stage 3 */
  822. tmp1=(x1a*B5)>>BSHIFT;
  823. x1a=(x1a*B3)>>BSHIFT;
  824. x5a=-tmp1; /* Stage 4 */
  825. x7a+=tmp1;
  826. x3a+=x7a; /* Stage 5 */
  827. tmp1=x1a;
  828. x1a-=x5a;
  829. x7a+=tmp1;
  830. /****** Row 1 ******/
  831. x0b = inbuf[0*8+1];
  832. x1b = inbuf[1*8+1];
  833. x3b=x1b; /* Stage 2 */
  834. x7b=(x1b*B4)>>BSHIFT; /* Stage 3 */
  835. tmp2=(x1b*B5)>>BSHIFT;
  836. x1b=(x1b*B3)>>BSHIFT;
  837. x5b=-tmp2; /* Stage 4 */
  838. x7b+=tmp2;
  839. x3b+=x7b; /* Stage 5 */
  840. tmp2=x1b;
  841. x1b-=x5b;
  842. x7b+=tmp2;
  843. Calc2x2(0, x0a+x3a, x0b+x3b, tmp1, tmp2, x3, x5, tmp3, tmp4);
  844. Calc2x2(1, x0a+x7a, x0b+x7b, tmp5, tmp6, x3, x5, tmp7, tmp8);
  845. Calc2x2(2, x0a+x1a, x0b+x1b, tmp1, tmp2, x3, x5, tmp3, tmp4);
  846. Calc2x2(3, x0a-x5a, x0b-x5b, tmp5, tmp6, x3, x5, tmp7, tmp8);
  847. Calc2x2(4, x0a+x5a, x0b+x5b, tmp1, tmp2, x3, x5, tmp3, tmp4);
  848. Calc2x2(5, x0a-x1a, x0b-x1b, tmp5, tmp6, x3, x5, tmp7, tmp8);
  849. Calc2x2(6, x0a-x7a, x0b-x7b, tmp1, tmp2, x3, x5, tmp3, tmp4);
  850. Calc2x2(7, x0a-x3a, x0b-x3b, tmp5, tmp6, x3, x5, tmp7, tmp8);
  851. #endif
  852. }
  853. void ScScaleIDCT3x3i_C(int *inbuf, int *outbuf)
  854. {
  855. register int *inblk;
  856. register int tmp1;
  857. register int x0, x1, x2, x3, x4, x6, x7, x8;
  858. int i;
  859. _SlibDebug(_DEBUG_, printf("ScScaleIDCT3x3i_C()\n") );
  860. /* Perform Row Computations */
  861. inblk = inbuf;
  862. for(i=0; i<3; i++)
  863. {
  864. x0 = inblk[0*8];
  865. x1 = inblk[1*8];
  866. x2 = inblk[2*8];
  867. /* Stage 2 */
  868. x6=x2;
  869. x3=x1;
  870. /* Stage 3 */
  871. #if USE_MUL
  872. x2=(x2*B1)>>BSHIFT;
  873. x7=(x1*B4)>>BSHIFT;
  874. x8=(x1*B5)>>BSHIFT;
  875. x1=(x1*B3)>>BSHIFT;
  876. #else
  877. tmp1 = x2 + (x2 >> 2); /* x2=x2*B1 */
  878. tmp1 += (tmp1 >> 3);
  879. x2 = (tmp1 + (x2 >> 7)) >> 1;
  880. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x1*B4 */
  881. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x1*B5 */
  882. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  883. tmp1 += (tmp1 >> 3);
  884. x1 = (tmp1 + (x1 >> 7)) >> 1;
  885. #endif /* USE_MUL */
  886. /* Stage 4 */
  887. x7+=x8;
  888. /* Stage 5 */
  889. tmp1=x6+x2;
  890. x3+=x7;
  891. x7+=x1;
  892. x1+=x8;
  893. /* Stage 6 */
  894. x4=x0+x2;
  895. x2=x0-x2;
  896. x6=x0-tmp1;
  897. x0=x0+tmp1;
  898. /* Final Stage */
  899. inblk[0*8] = x0 + x3;
  900. inblk[1*8] = x4 + x7;
  901. inblk[2*8] = x2 + x1;
  902. inblk[3*8] = x6 + x8;
  903. inblk[4*8] = x6 - x8;
  904. inblk[5*8] = x2 - x1;
  905. inblk[6*8] = x4 - x7;
  906. inblk[7*8] = x0 - x3;
  907. inblk++;
  908. }
  909. /* Perform Column Computations */
  910. inblk = inbuf;
  911. for(i=0; i<8; i++)
  912. {
  913. x0 = inblk[0];
  914. x1 = inblk[1];
  915. x2 = inblk[2];
  916. /* Stage 2 */
  917. x6=x2;
  918. x3=x1;
  919. /* Stage 3 */
  920. #if USE_MUL
  921. x2=(x2*B1)>>BSHIFT;
  922. x7=(x1*B4)>>BSHIFT;
  923. x8=(x1*B5)>>BSHIFT;
  924. x1=(x1*B3)>>BSHIFT;
  925. #else
  926. tmp1 = x2 + (x2 >> 2); /* x2=x2*B1 */
  927. tmp1 += (tmp1 >> 3);
  928. x2 = (tmp1 + (x2 >> 7)) >> 1;
  929. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x1*B4 */
  930. x8 = (x1 + (x1 >> 1) + (x1 >> 5) - (x1 >> 11)) >> 2; /* x8=x1*B5 */
  931. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  932. tmp1 += (tmp1 >> 3);
  933. x1 = (tmp1 + (x1 >> 7)) >> 1;
  934. #endif /* USE_MUL */
  935. /* Stage 4 */
  936. x7+=x8;
  937. /* Stage 5 */
  938. tmp1=x6+x2;
  939. x3+=x7;
  940. x7+=x1;
  941. x1+=x8;
  942. /* Stage 6 */
  943. x4=x0+x2;
  944. x2=x0-x2;
  945. x6=x0-tmp1;
  946. x0=x0+tmp1;
  947. /* Final Stage */
  948. outbuf[0] = IDCTAdjust(x0 + x3);
  949. outbuf[1] = IDCTAdjust(x4 + x7);
  950. outbuf[2] = IDCTAdjust(x2 + x1);
  951. outbuf[3] = IDCTAdjust(x6 + x8);
  952. outbuf[4] = IDCTAdjust(x6 - x8);
  953. outbuf[5] = IDCTAdjust(x2 - x1);
  954. outbuf[6] = IDCTAdjust(x4 - x7);
  955. outbuf[7] = IDCTAdjust(x0 - x3);
  956. outbuf+=8;
  957. inblk+=8;
  958. }
  959. }
  960. void ScScaleIDCT4x4i_C(int *inbuf, int *outbuf)
  961. {
  962. register int *inblk;
  963. register int tmp1, tmp2;
  964. register int x0, x1, x2, x3, x4, x5, x6, x7, x8;
  965. int i;
  966. _SlibDebug(_DEBUG_, printf("ScScaleIDCT4x4i_C()\n") );
  967. /* Perform Row Computations */
  968. inblk = inbuf;
  969. for(i=0; i<4; i++)
  970. {
  971. x0 = inblk[0*8];
  972. x1 = inblk[1*8];
  973. x2 = inblk[2*8];
  974. x3 = inblk[3*8];
  975. /* Stage 1 */
  976. x5=-x3;
  977. /* Stage 2 */
  978. x6=x2;
  979. tmp1=x1-x3;
  980. x3=x1+x3;
  981. /* Stage 3 */
  982. #if USE_MUL
  983. x5=(x5*B2)>>BSHIFT;
  984. x2=(x2*B1)>>BSHIFT;
  985. x7=(x1*B4)>>BSHIFT;
  986. x8=(x3*B5)>>BSHIFT;
  987. x1=(tmp1*B3)>>BSHIFT;
  988. #else
  989. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  990. x5 = -x5;
  991. tmp2 = x2 + (x2 >> 2); /* x2=x2*B1 */
  992. tmp2 += (tmp2 >> 3);
  993. x2 = (tmp2 + (x2 >> 7)) >> 1;
  994. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x1*B4 */
  995. x8 = (x3 + (x3 >> 1) + (x3 >> 5) - (x3 >> 11)) >> 2; /* x8=x3*B5 */
  996. tmp2 = tmp1 + (tmp1 >> 2); /* x1=tmp1*B3 */
  997. tmp2 += (tmp2 >> 3);
  998. x1 = (tmp2 + (tmp1 >> 7)) >> 1;
  999. #endif /* USE_MUL */
  1000. /* Stage 4 */
  1001. x5-=x8;
  1002. x7+=x8;
  1003. /* Stage 5 */
  1004. tmp1=x6+x2;
  1005. x3+=x7;
  1006. x7+=x1;
  1007. x1-=x5;
  1008. /* Stage 6 */
  1009. x4=x0+x2;
  1010. x2=x0-x2;
  1011. x6=x0-tmp1;
  1012. x0=x0+tmp1;
  1013. /* Final Stage */
  1014. inblk[0*8] = x0 + x3;
  1015. inblk[1*8] = x4 + x7;
  1016. inblk[2*8] = x2 + x1;
  1017. inblk[3*8] = x6 - x5;
  1018. inblk[4*8] = x6 + x5;
  1019. inblk[5*8] = x2 - x1;
  1020. inblk[6*8] = x4 - x7;
  1021. inblk[7*8] = x0 - x3;
  1022. inblk++;
  1023. }
  1024. /* Perform Column Computations */
  1025. inblk = inbuf;
  1026. for(i=0; i<8; i++)
  1027. {
  1028. x0 = inblk[0];
  1029. x1 = inblk[1];
  1030. x2 = inblk[2];
  1031. x3 = inblk[3];
  1032. /* Stage 1 */
  1033. x5=-x3;
  1034. /* Stage 2 */
  1035. x6=x2;
  1036. tmp1=x1-x3;
  1037. x3=x1+x3;
  1038. /* Stage 3 */
  1039. #if USE_MUL
  1040. x5=(x5*B2)>>BSHIFT;
  1041. x2=(x2*B1)>>BSHIFT;
  1042. x7=(x1*B4)>>BSHIFT;
  1043. x8=(x3*B5)>>BSHIFT;
  1044. x1=(tmp1*B3)>>BSHIFT;
  1045. #else
  1046. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  1047. x5 = -x5;
  1048. tmp2 = x2 + (x2 >> 2); /* x2=x2*B1 */
  1049. tmp2 += (tmp2 >> 3);
  1050. x2 = (tmp2 + (x2 >> 7)) >> 1;
  1051. x7 = (x1 + (x1 >> 4) + (x1 >> 6) + (x1 >> 8)) >> 1; /* x7=x1*B4 */
  1052. x8 = (x3 + (x3 >> 1) + (x3 >> 5) - (x3 >> 11)) >> 2; /* x8=x3*B5 */
  1053. tmp2 = tmp1 + (tmp1 >> 2); /* x1=tmp1*B3 */
  1054. tmp2 += (tmp2 >> 3);
  1055. x1 = (tmp2 + (tmp1 >> 7)) >> 1;
  1056. #endif /* USE_MUL */
  1057. /* Stage 4 */
  1058. x5-=x8;
  1059. x7+=x8;
  1060. /* Stage 5 */
  1061. tmp1=x6+x2;
  1062. x3+=x7;
  1063. x7+=x1;
  1064. x1-=x5;
  1065. /* Stage 6 */
  1066. x4=x0+x2;
  1067. x2=x0-x2;
  1068. x6=x0-tmp1;
  1069. x0=x0+tmp1;
  1070. /* Final Stage */
  1071. outbuf[0] = IDCTAdjust(x0 + x3);
  1072. outbuf[1] = IDCTAdjust(x4 + x7);
  1073. outbuf[2] = IDCTAdjust(x2 + x1);
  1074. outbuf[3] = IDCTAdjust(x6 - x5);
  1075. outbuf[4] = IDCTAdjust(x6 + x5);
  1076. outbuf[5] = IDCTAdjust(x2 - x1);
  1077. outbuf[6] = IDCTAdjust(x4 - x7);
  1078. outbuf[7] = IDCTAdjust(x0 - x3);
  1079. outbuf+=8;
  1080. inblk+=8;
  1081. }
  1082. }
  1083. void ScScaleIDCT6x6i_C(int *inbuf, int *outbuf)
  1084. {
  1085. register int *inblk;
  1086. register int tmp1;
  1087. register int x0, x1, x2, x3, x4, x5, x6, x7, x8;
  1088. int i;
  1089. _SlibDebug(_DEBUG_, printf("ScScaleIDCT6x6i_C()\n") );
  1090. /* Perform Row Computations */
  1091. inblk = inbuf;
  1092. for(i=0; i<6; i++)
  1093. {
  1094. x0 = inblk[0*8];
  1095. x1 = inblk[1*8];
  1096. x2 = inblk[2*8];
  1097. x3 = inblk[3*8];
  1098. x4 = inblk[4*8];
  1099. x5 = inblk[5*8];
  1100. /* Stage 1 */
  1101. x7=x1;
  1102. tmp1=x5;
  1103. x5-=x3;
  1104. x3+=tmp1;
  1105. /* Stage 2 */
  1106. x6=x2;
  1107. tmp1=x3;
  1108. x3+=x1;
  1109. x1-=tmp1;
  1110. x8=x7-x5;
  1111. /* Stage 3 */
  1112. #if USE_MUL
  1113. x5=(x5*B2)>>BSHIFT;
  1114. x2=(x2*B1)>>BSHIFT;
  1115. x1=(x1*B3)>>BSHIFT;
  1116. x7=(x7*B4)>>BSHIFT;
  1117. x8=(x8*B5)>>BSHIFT;
  1118. #else
  1119. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  1120. x5 = -x5;
  1121. tmp1 = x2 + (x2 >> 2); /* x2=x2*B1 */
  1122. tmp1 += (tmp1 >> 3);
  1123. x2 = (tmp1 + (x2 >> 7)) >> 1;
  1124. x7 = (x7 + (x7 >> 4) + (x7 >> 6) + (x7 >> 8)) >> 1; /* x7=x7*B4 */
  1125. x8 = (x8 + (x8 >> 1) + (x8 >> 5) - (x8 >> 11)) >> 2; /* x8=x8*B5 */
  1126. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  1127. tmp1 += (tmp1 >> 3);
  1128. x1 = (tmp1 + (x1 >> 7)) >> 1;
  1129. #endif /* USE_MUL */
  1130. /* Stage 4 */
  1131. x5-=x8;
  1132. x7+=x8;
  1133. /* Stage 5 */
  1134. x6+=x2;
  1135. tmp1=x4;
  1136. x4=x0-x4;
  1137. x0+=tmp1;
  1138. x3+=x7;
  1139. x7+=x1;
  1140. x1-=x5;
  1141. /* Stage 6 */
  1142. tmp1=x0;
  1143. x0+=x6;
  1144. x6=tmp1-x6;
  1145. tmp1=x2;
  1146. x2=x4-x2;
  1147. x4+=tmp1;
  1148. /* Final Stage */
  1149. inblk[0*8] = x0 + x3;
  1150. inblk[1*8] = x4 + x7;
  1151. inblk[2*8] = x2 + x1;
  1152. inblk[3*8] = x6 - x5;
  1153. inblk[4*8] = x5 + x6;
  1154. inblk[5*8] = x2 - x1;
  1155. inblk[6*8] = x4 - x7;
  1156. inblk[7*8] = x0 - x3;
  1157. inblk++;
  1158. }
  1159. /* Perform Column Computations */
  1160. inblk = inbuf;
  1161. for(i=0; i<8; i++)
  1162. {
  1163. x0 = inblk[0];
  1164. x1 = inblk[1];
  1165. x2 = inblk[2];
  1166. x3 = inblk[3];
  1167. x4 = inblk[4];
  1168. x5 = inblk[5];
  1169. /* Stage 1 */
  1170. x7=x1;
  1171. tmp1=x5;
  1172. x5-=x3;
  1173. x3+=tmp1;
  1174. /* Stage 2 */
  1175. x6=x2;
  1176. tmp1=x3;
  1177. x3+=x1;
  1178. x1-=tmp1;
  1179. x8=x7-x5;
  1180. #if USE_MUL
  1181. x5=(x5*B2)>>BSHIFT;
  1182. x2=(x2*B1)>>BSHIFT;
  1183. x1=(x1*B3)>>BSHIFT;
  1184. x7=(x7*B4)>>BSHIFT;
  1185. x8=(x8*B5)>>BSHIFT;
  1186. #else
  1187. x5 = x5 + (x5 >> 2) + (x5 >> 4) - (x5 >> 7) + (x5 >> 9); /* x5=x5*B2 */
  1188. x5 = -x5;
  1189. tmp1 = x2 + (x2 >> 2); /* x2=x2*B1 */
  1190. tmp1 += (tmp1 >> 3);
  1191. x2 = (tmp1 + (x2 >> 7)) >> 1;
  1192. x7 = (x7 + (x7 >> 4) + (x7 >> 6) + (x7 >> 8)) >> 1; /* x7=x7*B4 */
  1193. x8 = (x8 + (x8 >> 1) + (x8 >> 5) - (x8 >> 11)) >> 2; /* x8=x8*B5 */
  1194. tmp1 = x1 + (x1 >> 2); /* x1=x1*B3 */
  1195. tmp1 += (tmp1 >> 3);
  1196. x1 = (tmp1 + (x1 >> 7)) >> 1;
  1197. #endif /* USE_MUL */
  1198. /* Stage 4 */
  1199. x5-=x8;
  1200. x7+=x8;
  1201. /* Stage 5 */
  1202. x6+=x2;
  1203. tmp1=x4;
  1204. x4=x0-x4;
  1205. x0+=tmp1;
  1206. x3+=x7;
  1207. x7+=x1;
  1208. x1-=x5;
  1209. /* Stage 6 */
  1210. tmp1=x0;
  1211. x0+=x6;
  1212. x6=tmp1-x6;
  1213. tmp1=x2;
  1214. x2=x4-x2;
  1215. x4+=tmp1;
  1216. /* Final Stage */
  1217. outbuf[0] = IDCTAdjust(x0 + x3);
  1218. outbuf[1] = IDCTAdjust(x4 + x7);
  1219. outbuf[2] = IDCTAdjust(x2 + x1);
  1220. outbuf[3] = IDCTAdjust(x6 - x5);
  1221. outbuf[4] = IDCTAdjust(x6 + x5);
  1222. outbuf[5] = IDCTAdjust(x2 - x1);
  1223. outbuf[6] = IDCTAdjust(x4 - x7);
  1224. outbuf[7] = IDCTAdjust(x0 - x3);
  1225. outbuf+=8;
  1226. inblk+=8;
  1227. }
  1228. }