Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1211 lines
29 KiB

  1. /* File: sv_h263_dct.c */
  2. /*****************************************************************************
  3. ** Copyright (c) Digital Equipment Corporation, 1995, 1997 **
  4. ** **
  5. ** All Rights Reserved. Unpublished rights reserved under the copyright **
  6. ** laws of the United States. **
  7. ** **
  8. ** The software contained on this media is proprietary to and embodies **
  9. ** the confidential technology of Digital Equipment Corporation. **
  10. ** Possession, use, duplication or dissemination of the software and **
  11. ** media is authorized only pursuant to a valid written license from **
  12. ** Digital Equipment Corporation. **
  13. ** **
  14. ** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
  15. ** Government is subject to restrictions as set forth in Subparagraph **
  16. ** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
  17. ******************************************************************************/
  18. #include <math.h>
  19. #include "sv_h263.h"
  20. #include "proto.h"
  21. #define F (float)
  22. #define S (short)
  23. static const unsigned int tdzz[64] = {
  24. 0, 1, 5, 6, 14, 15, 27, 28,
  25. 2, 4, 7, 13, 16, 26, 29, 42,
  26. 3, 8, 12, 17, 25, 30, 41, 43,
  27. 9, 11, 18, 24, 31, 40, 44, 53,
  28. 10, 19, 23, 32, 39, 45, 52, 54,
  29. 20, 22, 33, 38, 46, 51, 55, 60,
  30. 21, 34, 37, 47, 50, 56, 59, 61,
  31. 35, 36, 48, 49, 57, 58, 62, 63};
  32. static const unsigned int ttdzz[64] = {
  33. 0, 2, 3, 9, 10, 20, 21, 35,
  34. 1, 4, 8, 11, 19, 22, 34, 36,
  35. 5, 7, 12, 18, 23, 33, 37, 48,
  36. 6, 13, 17, 24, 32, 38, 47, 49,
  37. 14, 16, 25, 31, 39, 46, 50, 57,
  38. 15, 26, 30, 40, 45, 51, 56, 58,
  39. 27, 29, 41, 44, 52, 55, 59, 62,
  40. 28, 42, 43, 53, 54, 60, 61, 63};
  41. /**********************************************************************
  42. *
  43. * Name: Dct
  44. * Description: Does dct on an 8x8 block, does zigzag-scanning of
  45. * coefficients
  46. *
  47. * Input: 64 pixels in a 1D array
  48. * Returns: 64 coefficients in a 1D array
  49. * Side effects:
  50. *
  51. **********************************************************************/
  52. /*
  53. ** Name: ScFDCT8x8s_C
  54. ** Purpose: 2-d Forward DCT (C version) for (8x8) blocks
  55. **
  56. ** update: Wei-Lien Hsu, store in ZZ order.
  57. */
  58. static const float W0=(float).7071068, W1=(float).4903926, W2=(float).4619398,
  59. W3=(float).4157348, W4=(float).3535534, W5=(float).2777851,
  60. W6=(float).1913417, W7=(float).0975452;
  61. int sv_H263DCT( short *block, short *coeff, int QP, int Mode)
  62. {
  63. int i;
  64. register float b0, b1, b2, b3, b4, b5, b6, b7, tmp, t0, t1, t2;
  65. float tmpbuf[64];
  66. const unsigned int *ptdzz=ttdzz;
  67. register short *blockptr, *coeffptr ;
  68. register float *dptr;
  69. #if 1
  70. short val, halfQ;
  71. /* check significant signals in Inter-frame */
  72. if(!(Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) {
  73. halfQ = QP >> 1;
  74. blockptr = block;
  75. for (i=0; i < 64; i++) {
  76. val = *blockptr++;
  77. if((val > halfQ) | (val < -halfQ)) break;
  78. }
  79. if(i==64){ memset(coeff,0,128) ; return 0; }
  80. }
  81. #endif
  82. /* Horizontal transform */
  83. dptr = tmpbuf;
  84. blockptr = block;
  85. for (i = 0; i < 8; i++)
  86. {
  87. t0 = *blockptr++;
  88. t1 = *blockptr++;
  89. t2 = *blockptr++;
  90. tmp= *blockptr++;
  91. b4 = *blockptr++;
  92. b5 = *blockptr++;
  93. b6 = *blockptr++;
  94. b7 = *blockptr++;
  95. b0 = t0 + b7;
  96. b7 = t0 - b7;
  97. b1 = t1 + b6;
  98. b6 = t1 - b6;
  99. b2 = t2 + b5;
  100. b5 = t2 - b5;
  101. b3 = tmp + b4;
  102. b4 = tmp - b4;
  103. t0 = b0 + b3;
  104. b3 = b0 - b3;
  105. t1 = b1 + b2;
  106. b2 = b1 - b2;
  107. tmp = b5;
  108. b5 = (b6 - b5) * W0;
  109. b6 = (b6 + tmp) * W0;
  110. t2 = b4 + b5;
  111. b5 = b4 - b5;
  112. tmp = b7 + b6;
  113. b6 = b7 - b6;
  114. *dptr++ = (t0 + t1) * W4;
  115. *dptr++ = t2 * W7 + tmp * W1;
  116. *dptr++ = b2 * W6 + b3 * W2;
  117. *dptr++ = b6 * W3 - b5 * W5;
  118. *dptr++ = (t0 - t1) * W4;
  119. *dptr++ = b5 * W3 + b6 * W5;
  120. *dptr++ = b3 * W6 - b2 * W2;
  121. *dptr++ = tmp * W7 - t2 * W1;
  122. }
  123. /* Vertical transform */
  124. dptr = tmpbuf;
  125. coeffptr = coeff;
  126. for (i = 0; i < 8; i++, dptr++)
  127. {
  128. b0 = *dptr;
  129. tmp = *(dptr + 56) ;
  130. b7 = b0 - tmp ;
  131. b0 += tmp;
  132. b1 = *(dptr + 8);
  133. tmp = *(dptr + 48) ;
  134. b6 = b1 - tmp;
  135. b1 += tmp;
  136. b2 = *(dptr + 16);
  137. tmp = *(dptr + 40) ;
  138. b5 = b2 - tmp;
  139. b2 += tmp;
  140. b3 = *(dptr + 24);
  141. tmp = *(dptr + 32) ;
  142. b4 = b3 - tmp;
  143. b3 += tmp;
  144. t0 = b0 + b3;
  145. b3 = b0 - b3;
  146. t1 = b1 + b2;
  147. b2 = b1 - b2;
  148. tmp = b5;
  149. b5 = (b6 - b5) * W0;
  150. b6 = (b6 + tmp) * W0;
  151. t2 = b4 + b5;
  152. b5 = b4 - b5;
  153. tmp = b7 + b6;
  154. b6 = b7 - b6;
  155. *(coeffptr + *ptdzz++) = S ((t0 + t1) * W4);
  156. *(coeffptr + *ptdzz++) = S (t2 * W7 + tmp * W1);
  157. *(coeffptr + *ptdzz++) = S (b2 * W6 + b3 * W2);
  158. *(coeffptr + *ptdzz++) = S (b6 * W3 - b5 * W5);
  159. *(coeffptr + *ptdzz++) = S ((t0 - t1) * W4);
  160. *(coeffptr + *ptdzz++) = S (b5 * W3 + b6 * W5);
  161. *(coeffptr + *ptdzz++) = S (b3 * W6 - b2 * W2);
  162. *(coeffptr + *ptdzz++) = S (tmp * W7 - t2 * W1);
  163. }
  164. return 1;
  165. }
  166. /**********************************************************************
  167. *
  168. * Description: Does zone-filter on an 8x8 block-dct,
  169. * does zigzag-scanning of coefficients
  170. *
  171. * Input: 64 pixels in a 1D array
  172. * Returns: 64 coefficients in a 1D array
  173. * Side effects:
  174. *
  175. **********************************************************************/
  176. int sv_H263ZoneDCT( short *block, short *coeff, int QP, int Mode)
  177. {
  178. int i;
  179. register float b0, b1, b2, b3, b4, b5, b6, b7, tmp, t0, t1, t2;
  180. float tmpbuf[64];
  181. const unsigned int *ptdzz=ttdzz;
  182. register short *blockptr, *coeffptr ;
  183. register float *dptr;
  184. #if 1
  185. short val, halfQ;
  186. /* check significant signals in Inter-frame */
  187. if(!(Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) {
  188. halfQ = QP >> 1;
  189. blockptr = block;
  190. for (i=0; i < 64; i++) {
  191. val = *blockptr++;
  192. if((val > halfQ) | (val < -halfQ)) break;
  193. }
  194. if(i==64){ memset(coeff,0,128) ; return 0; }
  195. }
  196. #endif
  197. /* Horizontal transform */
  198. dptr = tmpbuf;
  199. blockptr = block;
  200. for (i = 0; i < 8; i++)
  201. {
  202. t0 = *blockptr++;
  203. t1 = *blockptr++;
  204. t2 = *blockptr++;
  205. tmp= *blockptr++;
  206. b4 = *blockptr++;
  207. b5 = *blockptr++;
  208. b6 = *blockptr++;
  209. b7 = *blockptr++;
  210. b0 = t0 + b7;
  211. b7 = t0 - b7;
  212. b1 = t1 + b6;
  213. b6 = t1 - b6;
  214. b2 = t2 + b5;
  215. b5 = t2 - b5;
  216. b3 = tmp + b4;
  217. b4 = tmp - b4;
  218. t0 = b0 + b3;
  219. b3 = b0 - b3;
  220. t1 = b1 + b2;
  221. b2 = b1 - b2;
  222. tmp = b5;
  223. b5 = (b6 - b5) * W0;
  224. b6 = (b6 + tmp) * W0;
  225. t2 = b4 + b5;
  226. b5 = b4 - b5;
  227. tmp = b7 + b6;
  228. b6 = b7 - b6;
  229. *dptr++ = (t0 + t1) * W4;
  230. *dptr++ = t2 * W7 + tmp * W1;
  231. *dptr++ = b2 * W6 + b3 * W2;
  232. *dptr++ = b6 * W3 - b5 * W5;
  233. dptr+= 4;
  234. }
  235. /* Vertical transform */
  236. dptr = tmpbuf;
  237. coeffptr = coeff;
  238. memset(coeff,0,128) ;
  239. for (i = 0; i < 4; i++, dptr++)
  240. {
  241. b0 = *dptr;
  242. tmp = *(dptr + 56) ;
  243. b7 = b0 - tmp ;
  244. b0 += tmp;
  245. b1 = *(dptr + 8);
  246. tmp = *(dptr + 48) ;
  247. b6 = b1 - tmp;
  248. b1 += tmp;
  249. b2 = *(dptr + 16);
  250. tmp = *(dptr + 40) ;
  251. b5 = b2 - tmp;
  252. b2 += tmp;
  253. b3 = *(dptr + 24);
  254. tmp = *(dptr + 32) ;
  255. b4 = b3 - tmp;
  256. b3 += tmp;
  257. t0 = b0 + b3;
  258. b3 = b0 - b3;
  259. t1 = b1 + b2;
  260. b2 = b1 - b2;
  261. tmp = b5;
  262. b5 = (b6 - b5) * W0;
  263. b6 = (b6 + tmp) * W0;
  264. t2 = b4 + b5;
  265. b5 = b4 - b5;
  266. tmp = b7 + b6;
  267. b6 = b7 - b6;
  268. *(coeffptr + *ptdzz++) = S ((t0 + t1) * W4);
  269. *(coeffptr + *ptdzz++) = S (t2 * W7 + tmp * W1);
  270. *(coeffptr + *ptdzz++) = S (b2 * W6 + b3 * W2);
  271. *(coeffptr + *ptdzz++) = S (b6 * W3 - b5 * W5);
  272. ptdzz+=4;
  273. }
  274. return 1;
  275. }
  276. /**********************************************************************
  277. *
  278. * Name: idct
  279. * Description: inverse dct on 64 coefficients
  280. *
  281. * Input: 64 coefficients, block for 64 pixels
  282. * Returns: 0
  283. * Side effects:
  284. *
  285. **********************************************************************/
  286. /*
  287. ** Function: ScIDCT8x8s
  288. ** Note: This scheme uses the direct transposition of the forward
  289. ** DCT. This may not be the preferred way in Hardware
  290. ** Implementations
  291. ** #define W1 2841 */ /* 2048*sqrt(2)*cos(1*pi/16)
  292. ** #define W2 2676 */ /* 2048*sqrt(2)*cos(2*pi/16)
  293. ** #define W5 1609 */ /* 2048*sqrt(2)*cos(5*pi/16)
  294. */
  295. #define WW3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
  296. #define WW6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
  297. #define WW7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
  298. #define AW26 3784
  299. #define DW26 1568
  300. #define AW17 3406
  301. #define DW17 2276
  302. #define AW35 4017
  303. #define DW35 799
  304. #define IDCTSHIFTR 8
  305. #define IDCTSHIFTC 14
  306. #ifndef USE_C
  307. void sv_H263FillX0_S(short *stream, short wd);
  308. #endif
  309. int sv_H263IDCT(short *inbuf, short *outbuf, int QP, int Mode, int outbuf_size)
  310. {
  311. int i;
  312. const unsigned int *ptdzz=tdzz;
  313. register int tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
  314. register short *inblk, *outblk;
  315. register short *tmpblk;
  316. short tmpbuf[64];
  317. int Q2,QP_1;
  318. int p1, p2, p3, p4, p5, p6, p7;
  319. /* double quantization step */
  320. Q2 = QP << 1;
  321. QP_1 = QP - 1;
  322. inblk = inbuf;
  323. tmpblk = tmpbuf;
  324. if((QP %2) == 0){
  325. for (i=0; i<8; i++)
  326. {
  327. /* read in ZZ order */
  328. x0 = inblk[*ptdzz++];
  329. x4 = inblk[*ptdzz++];
  330. x3 = inblk[*ptdzz++];
  331. x7 = inblk[*ptdzz++];
  332. x1 = inblk[*ptdzz++];
  333. x6 = inblk[*ptdzz++];
  334. x2 = inblk[*ptdzz++];
  335. x5 = inblk[*ptdzz++];
  336. /* dequantize DC */
  337. if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
  338. x0 = x0 << 3;
  339. else
  340. if(x0) x0 = (x0 > 0) ? Q2*x0+QP-1 : Q2*x0-QP+1 ;
  341. if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
  342. if(!x0) memset(tmpblk, 0, 16) ;
  343. else {
  344. #ifndef USE_C
  345. sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
  346. #else
  347. *tmpblk = *(tmpblk+1) =
  348. *(tmpblk+2) = *(tmpblk+3) =
  349. *(tmpblk+4) = *(tmpblk+5) =
  350. *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
  351. #endif
  352. }
  353. tmpblk += 8;
  354. }
  355. else
  356. {
  357. /* dequantize AC */
  358. if(x1) x1 = (x1 > 0) ? Q2*x1+QP_1 : Q2*x1-QP_1 ;
  359. if(x2) x2 = (x2 > 0) ? Q2*x2+QP_1 : Q2*x2-QP_1 ;
  360. if(x3) x3 = (x3 > 0) ? Q2*x3+QP_1 : Q2*x3-QP_1 ;
  361. if(x4) x4 = (x4 > 0) ? Q2*x4+QP_1 : Q2*x4-QP_1 ;
  362. if(x5) x5 = (x5 > 0) ? Q2*x5+QP_1 : Q2*x5-QP_1 ;
  363. if(x6) x6 = (x6 > 0) ? Q2*x6+QP_1 : Q2*x6-QP_1 ;
  364. if(x7) x7 = (x7 > 0) ? Q2*x7+QP_1 : Q2*x7-QP_1 ;
  365. x1 = x1<<11;
  366. tmp0 = x4 + x5;
  367. tmp0 = WW7*tmp0;
  368. x0 = x0<<11;
  369. x0 = x0 + 128;
  370. x8 = x0 + x1;
  371. tmp1 = x6 + x7;
  372. x0 = x0 - x1;
  373. tmp1 = WW3*tmp1;
  374. tmp2 = AW26*x2;
  375. tmp3 = DW26*x3;
  376. x4 = DW17*x4;
  377. x5 = AW17*x5;
  378. x4 = tmp0 + x4;
  379. x1 = x3 + x2;
  380. x5 = tmp0 - x5;
  381. x1 = WW6*x1;
  382. tmp0 = DW35*x6;
  383. x7 = AW35*x7;
  384. x2 = x1 - tmp2;
  385. x3 = x1 + tmp3;
  386. tmp0 = tmp1 - tmp0;
  387. x7 = tmp1 - x7;
  388. x1 = x4 + tmp0;
  389. x4 = x4 - tmp0;
  390. x6 = x5 + x7; /* F */
  391. x5 = x5 - x7; /* F */
  392. tmp0 = x4 + x5;
  393. tmp0 = 181*tmp0;
  394. x7 = x8 + x3; /* F */
  395. tmp1 = x4 - x5;
  396. x8 = x8 - x3; /* F */
  397. tmp1 = 181*tmp1;
  398. x3 = x0 + x2; /* F */
  399. x0 = x0 - x2; /* F */
  400. x2 = tmp0 + 128;
  401. x4 = tmp1 + 128;
  402. x2 = x2>>8; /* F */
  403. x4 = x4>>8; /* F */
  404. tmp0 = x7+x1;
  405. tmp0 = tmp0>>IDCTSHIFTR;
  406. tmp1 = x3+x2;
  407. tmp1 = tmp1>>IDCTSHIFTR;
  408. tmp2 = x0+x4;
  409. tmp2 = tmp2>>IDCTSHIFTR;
  410. tmp3 = x8+x6;
  411. tmp3 = tmp3>>IDCTSHIFTR;
  412. *tmpblk++ = (short)tmp0;
  413. *tmpblk++ = (short)tmp1;
  414. *tmpblk++ = (short)tmp2;
  415. *tmpblk++ = (short)tmp3;
  416. tmp0 = x8-x6;
  417. tmp0 = tmp0>>IDCTSHIFTR;
  418. tmp1 = x0-x4;
  419. tmp1 = tmp1>>IDCTSHIFTR;
  420. tmp2 = x3-x2;
  421. tmp2 = tmp2>>IDCTSHIFTR;
  422. tmp3 = x7-x1;
  423. tmp3 = tmp3>>IDCTSHIFTR;
  424. *tmpblk++ = (short)tmp0;
  425. *tmpblk++ = (short)tmp1;
  426. *tmpblk++ = (short)tmp2;
  427. *tmpblk++ = (short)tmp3;
  428. }
  429. }
  430. }
  431. else{
  432. for (i=0; i<8; i++)
  433. {
  434. /* read in ZZ order */
  435. x0 = inblk[*ptdzz++];
  436. x4 = inblk[*ptdzz++];
  437. x3 = inblk[*ptdzz++];
  438. x7 = inblk[*ptdzz++];
  439. x1 = inblk[*ptdzz++];
  440. x6 = inblk[*ptdzz++];
  441. x2 = inblk[*ptdzz++];
  442. x5 = inblk[*ptdzz++];
  443. /* quantize DC */
  444. if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
  445. x0 = x0 << 3;
  446. else
  447. if(x0) x0 = (x0 > 0) ? Q2*x0+QP : Q2*x0-QP ;
  448. if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
  449. if(!x0) memset(tmpblk, 0, 16) ;
  450. else {
  451. #ifndef USE_C
  452. sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
  453. #else
  454. *tmpblk = *(tmpblk+1) =
  455. *(tmpblk+2) = *(tmpblk+3) =
  456. *(tmpblk+4) = *(tmpblk+5) =
  457. *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
  458. #endif
  459. }
  460. tmpblk += 8;
  461. }
  462. else
  463. {
  464. /* dequantize AC */
  465. if(x1) x1 = (x1 > 0) ? Q2*x1+QP : Q2*x1-QP ;
  466. if(x2) x2 = (x2 > 0) ? Q2*x2+QP : Q2*x2-QP ;
  467. if(x3) x3 = (x3 > 0) ? Q2*x3+QP : Q2*x3-QP ;
  468. if(x4) x4 = (x4 > 0) ? Q2*x4+QP : Q2*x4-QP ;
  469. if(x5) x5 = (x5 > 0) ? Q2*x5+QP : Q2*x5-QP ;
  470. if(x6) x6 = (x6 > 0) ? Q2*x6+QP : Q2*x6-QP ;
  471. if(x7) x7 = (x7 > 0) ? Q2*x7+QP : Q2*x7-QP ;
  472. x1 = x1<<11;
  473. tmp0 = x4 + x5;
  474. tmp0 = WW7*tmp0;
  475. x0 = x0<<11;
  476. x0 = x0 + 128;
  477. x8 = x0 + x1;
  478. tmp1 = x6 + x7;
  479. x0 = x0 - x1;
  480. tmp1 = WW3*tmp1;
  481. tmp2 = AW26*x2;
  482. tmp3 = DW26*x3;
  483. x4 = DW17*x4;
  484. x5 = AW17*x5;
  485. x4 = tmp0 + x4;
  486. x1 = x3 + x2;
  487. x5 = tmp0 - x5;
  488. x1 = WW6*x1;
  489. tmp0 = DW35*x6;
  490. x7 = AW35*x7;
  491. x2 = x1 - tmp2;
  492. x3 = x1 + tmp3;
  493. tmp0 = tmp1 - tmp0;
  494. x7 = tmp1 - x7;
  495. x1 = x4 + tmp0;
  496. x4 = x4 - tmp0;
  497. x6 = x5 + x7; /* F */
  498. x5 = x5 - x7; /* F */
  499. tmp0 = x4 + x5;
  500. tmp0 = 181*tmp0;
  501. x7 = x8 + x3; /* F */
  502. tmp1 = x4 - x5;
  503. x8 = x8 - x3; /* F */
  504. tmp1 = 181*tmp1;
  505. x3 = x0 + x2; /* F */
  506. x0 = x0 - x2; /* F */
  507. x2 = tmp0 + 128;
  508. x4 = tmp1 + 128;
  509. x2 = x2>>8; /* F */
  510. x4 = x4>>8; /* F */
  511. tmp0 = x7+x1;
  512. tmp0 = tmp0>>IDCTSHIFTR;
  513. tmp1 = x3+x2;
  514. tmp1 = tmp1>>IDCTSHIFTR;
  515. tmp2 = x0+x4;
  516. tmp2 = tmp2>>IDCTSHIFTR;
  517. tmp3 = x8+x6;
  518. tmp3 = tmp3>>IDCTSHIFTR;
  519. *tmpblk++ = (short)tmp0;
  520. *tmpblk++ = (short)tmp1;
  521. *tmpblk++ = (short)tmp2;
  522. *tmpblk++ = (short)tmp3;
  523. tmp0 = x8-x6;
  524. tmp0 = tmp0>>IDCTSHIFTR;
  525. tmp1 = x0-x4;
  526. tmp1 = tmp1>>IDCTSHIFTR;
  527. tmp2 = x3-x2;
  528. tmp2 = tmp2>>IDCTSHIFTR;
  529. tmp3 = x7-x1;
  530. tmp3 = tmp3>>IDCTSHIFTR;
  531. *tmpblk++ = (short)tmp0;
  532. *tmpblk++ = (short)tmp1;
  533. *tmpblk++ = (short)tmp2;
  534. *tmpblk++ = (short)tmp3;
  535. }
  536. }
  537. }
  538. /* output position */
  539. p1 = outbuf_size;
  540. p2 = p1 + outbuf_size;
  541. p3 = p2 + outbuf_size;
  542. p4 = p3 + outbuf_size;
  543. p5 = p4 + outbuf_size;
  544. p6 = p5 + outbuf_size;
  545. p7 = p6 + outbuf_size;
  546. tmpblk = tmpbuf;
  547. outblk = outbuf;
  548. for (i=0; i<8; i++, tmpblk++, outblk++)
  549. {
  550. /* shortcut */
  551. x0 = tmpblk[0];
  552. x1 = tmpblk[32];
  553. x2 = tmpblk[48];
  554. x3 = tmpblk[16];
  555. x4 = tmpblk[8];
  556. x5 = tmpblk[56];
  557. x6 = tmpblk[40];
  558. x7 = tmpblk[24];
  559. if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
  560. {
  561. tmp0=(x0+32)>>6;
  562. outblk[0]=outblk[p1]=outblk[p2]=outblk[p3]=outblk[p4]=outblk[p5]=
  563. outblk[p6]=outblk[p7]= (short)tmp0 ;
  564. }
  565. else
  566. {
  567. x1 = x1 <<8;
  568. tmp0 = x4+x5;
  569. x0 = x0<<8;
  570. tmp0 = WW7*tmp0;
  571. x0 = x0 + 8192;
  572. tmp1 = x6+x7;
  573. tmp0 = tmp0 + 4;
  574. tmp1 = WW3*tmp1;
  575. tmp1 = tmp1 + 4;
  576. x8 = x0 + x1;
  577. tmp2 = AW26*x2;
  578. x0 = x0 - x1;
  579. x1 = x3 + x2;
  580. x1 = WW6*x1;
  581. tmp3 = DW26*x3;
  582. x1 = x1 + 4;
  583. x4 = DW17*x4;
  584. x4 = tmp0 + x4;
  585. x4 = x4>>3;
  586. x5 = AW17*x5;
  587. x2 = x1 - tmp2;
  588. x3 = x1 + tmp3;
  589. x6 = DW35*x6;
  590. x2 = x2>>3;
  591. x5 = tmp0 - x5;
  592. x5 = x5>>3;
  593. x6 = tmp1 - x6;
  594. x6 = x6>>3;
  595. x7 = AW35*x7;
  596. x7 = tmp1 - x7;
  597. x3 = x3>>3;
  598. x7 = x7>>3;
  599. x1 = x4 + x6; /* F */
  600. x4 = x4 - x6;
  601. x6 = x5 + x7; /* F */
  602. x5 = x5 - x7; /* F */
  603. tmp1 = x4 + x5;
  604. x7 = x8 + x3; /* F */
  605. tmp1 = 181*tmp1;
  606. x8 = x8 - x3; /* F */
  607. x3 = x0 + x2; /* F */
  608. tmp2 = x4 - x5;
  609. x0 = x0 - x2; /* F */
  610. tmp2 = 181*tmp2;
  611. x2 = tmp1+128;
  612. x4 = tmp2+128;
  613. x2 = x2>>8; /* F */
  614. x4 = x4>>8; /* F */
  615. /* fourth stage */
  616. tmp0=x7+x1;
  617. tmp1=x3+x2;
  618. tmp0=tmp0>>IDCTSHIFTC;
  619. tmp2=x0+x4;
  620. tmp1=tmp1>>IDCTSHIFTC;
  621. tmp3=x8+x6;
  622. tmp2=tmp2>>IDCTSHIFTC;
  623. tmp3=tmp3>>IDCTSHIFTC;
  624. outblk[0] = (short)tmp0;
  625. outblk[p1] = (short)tmp1;
  626. outblk[p2] = (short)tmp2;
  627. outblk[p3] = (short)tmp3;
  628. tmp0=x8-x6;
  629. tmp1=x0-x4;
  630. tmp0=tmp0>>IDCTSHIFTC;
  631. tmp2=x3-x2;
  632. tmp1=tmp1>>IDCTSHIFTC;
  633. tmp3=x7-x1;
  634. tmp2=tmp2>>IDCTSHIFTC;
  635. tmp3=tmp3>>IDCTSHIFTC;
  636. outblk[p4] = (short)tmp0;
  637. outblk[p5] = (short)tmp1;
  638. outblk[p6] = (short)tmp2;
  639. outblk[p7] = (short)tmp3;
  640. }
  641. }
  642. return 0;
  643. }
  644. /**********************************************************************
  645. *
  646. * Description: inverse zone-dct on 64 coefficients
  647. *
  648. * Input: 64 coefficients, block for 64 pixels
  649. * Returns: 0
  650. * Side effects:
  651. *
  652. **********************************************************************/
  653. int sv_H263ZoneIDCT(short *inbuf, short *outbuf, int QP, int Mode, int outbuf_size)
  654. {
  655. int i;
  656. const unsigned int *ptdzz=tdzz;
  657. register int tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
  658. register short *inblk, *outblk;
  659. register short *tmpblk;
  660. short tmpbuf[64];
  661. int Q2,QP_1;
  662. int p1, p2, p3, p4, p5, p6, p7;
  663. /* double quantization step */
  664. Q2 = QP << 1;
  665. QP_1 = QP - 1;
  666. inblk = inbuf;
  667. tmpblk = tmpbuf;
  668. memset(tmpblk, 0, 128) ;
  669. if((QP %2) == 0){
  670. for (i=0; i<4; i++)
  671. {
  672. /* read in ZZ order */
  673. x0 = inblk[*ptdzz++];
  674. x4 = inblk[*ptdzz++];
  675. x3 = inblk[*ptdzz++];
  676. x7 = inblk[*ptdzz++];
  677. x1 = x6 = x2 = x5 = 0;
  678. ptdzz += 4;
  679. /* dequantize DC */
  680. if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
  681. x0 = x0 << 3;
  682. else
  683. if(x0) x0 = (x0 > 0) ? Q2*x0+QP-1 : Q2*x0-QP+1 ;
  684. if (!(x3 | x4 | x7)) {
  685. if(!x0) memset(tmpblk, 0, 16) ;
  686. else {
  687. #ifndef USE_C
  688. sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
  689. #else
  690. *tmpblk = *(tmpblk+1) =
  691. *(tmpblk+2) = *(tmpblk+3) =
  692. *(tmpblk+4) = *(tmpblk+5) =
  693. *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
  694. #endif
  695. }
  696. tmpblk += 8;
  697. }
  698. else
  699. {
  700. /* dequantize AC */
  701. if(x3) x3 = (x3 > 0) ? Q2*x3+QP_1 : Q2*x3-QP_1 ;
  702. if(x4) x4 = (x4 > 0) ? Q2*x4+QP_1 : Q2*x4-QP_1 ;
  703. if(x7) x7 = (x7 > 0) ? Q2*x7+QP_1 : Q2*x7-QP_1 ;
  704. tmp0 = x4;
  705. tmp0 = WW7*tmp0;
  706. x0 = x0<<11;
  707. x0 = x0 + 128;
  708. x8 = x0;
  709. tmp1 = WW3*x7;
  710. tmp3 = DW26*x3;
  711. x4 = DW17*x4;
  712. x4 = tmp0 + x4;
  713. x1 = x3;
  714. x5 = tmp0;
  715. x7 = AW35*x7;
  716. x2 = x1;
  717. x3 = x1 + tmp3;
  718. tmp0 = tmp1;
  719. x7 = tmp1 - x7;
  720. x1 = x4 + tmp0;
  721. x4 = x4 - tmp0;
  722. x6 = x5 + x7; /* F */
  723. x5 = x5 - x7; /* F */
  724. tmp0 = x4 + x5;
  725. tmp0 = 181*tmp0;
  726. x7 = x8 + x3; /* F */
  727. tmp1 = x4 - x5;
  728. x8 = x8 - x3; /* F */
  729. tmp1 = 181*tmp1;
  730. x3 = x0 + x2; /* F */
  731. x0 = x0 - x2; /* F */
  732. x2 = tmp0 + 128;
  733. x4 = tmp1 + 128;
  734. x2 = x2>>8; /* F */
  735. x4 = x4>>8; /* F */
  736. tmp0 = x7+x1;
  737. tmp0 = tmp0>>IDCTSHIFTR;
  738. tmp1 = x3+x2;
  739. tmp1 = tmp1>>IDCTSHIFTR;
  740. tmp2 = x0+x4;
  741. tmp2 = tmp2>>IDCTSHIFTR;
  742. tmp3 = x8+x6;
  743. tmp3 = tmp3>>IDCTSHIFTR;
  744. *tmpblk++ = (short)tmp0;
  745. *tmpblk++ = (short)tmp1;
  746. *tmpblk++ = (short)tmp2;
  747. *tmpblk++ = (short)tmp3;
  748. tmp0 = x8-x6;
  749. tmp0 = tmp0>>IDCTSHIFTR;
  750. tmp1 = x0-x4;
  751. tmp1 = tmp1>>IDCTSHIFTR;
  752. tmp2 = x3-x2;
  753. tmp2 = tmp2>>IDCTSHIFTR;
  754. tmp3 = x7-x1;
  755. tmp3 = tmp3>>IDCTSHIFTR;
  756. *tmpblk++ = (short)tmp0;
  757. *tmpblk++ = (short)tmp1;
  758. *tmpblk++ = (short)tmp2;
  759. *tmpblk++ = (short)tmp3;
  760. }
  761. }
  762. }
  763. else{
  764. for (i=0; i<4; i++)
  765. {
  766. /* read in ZZ order */
  767. x0 = inblk[*ptdzz++];
  768. x4 = inblk[*ptdzz++];
  769. x3 = inblk[*ptdzz++];
  770. x7 = inblk[*ptdzz++];
  771. x1 = x6 = x2 = x5 = 0;
  772. ptdzz += 4;
  773. /* quantize DC */
  774. if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
  775. x0 = x0 << 3;
  776. else
  777. if(x0) x0 = (x0 > 0) ? Q2*x0+QP : Q2*x0-QP ;
  778. if (!(x3 | x4 | x7)) {
  779. if(!x0) memset(tmpblk, 0, 16) ;
  780. else {
  781. #ifndef USE_C
  782. sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
  783. #else
  784. *tmpblk = *(tmpblk+1) =
  785. *(tmpblk+2) = *(tmpblk+3) =
  786. *(tmpblk+4) = *(tmpblk+5) =
  787. *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
  788. #endif
  789. }
  790. tmpblk += 8;
  791. }
  792. else
  793. {
  794. /* dequantize AC */
  795. if(x3) x3 = (x3 > 0) ? Q2*x3+QP : Q2*x3-QP ;
  796. if(x4) x4 = (x4 > 0) ? Q2*x4+QP : Q2*x4-QP ;
  797. if(x7) x7 = (x7 > 0) ? Q2*x7+QP : Q2*x7-QP ;
  798. tmp0 = x4;
  799. tmp0 = WW7*tmp0;
  800. x0 = x0<<11;
  801. x0 = x0 + 128;
  802. x8 = x0;
  803. tmp1 = WW3*x7;
  804. tmp3 = DW26*x3;
  805. x4 = DW17*x4;
  806. x4 = tmp0 + x4;
  807. x1 = x3;
  808. x5 = tmp0;
  809. x1 = WW6*x1;
  810. tmp0 = 0;
  811. x7 = AW35*x7;
  812. x2 = x1 - tmp2;
  813. x3 = x1 + tmp3;
  814. tmp0 = tmp1;
  815. x7 = tmp1 - x7;
  816. x1 = x4 + tmp0;
  817. x4 = x4 - tmp0;
  818. x6 = x5 + x7; /* F */
  819. x5 = x5 - x7; /* F */
  820. tmp0 = x4 + x5;
  821. tmp0 = 181*tmp0;
  822. x7 = x8 + x3; /* F */
  823. tmp1 = x4 - x5;
  824. x8 = x8 - x3; /* F */
  825. tmp1 = 181*tmp1;
  826. x3 = x0 + x2; /* F */
  827. x0 = x0 - x2; /* F */
  828. x2 = tmp0 + 128;
  829. x4 = tmp1 + 128;
  830. x2 = x2>>8; /* F */
  831. x4 = x4>>8; /* F */
  832. tmp0 = x7+x1;
  833. tmp0 = tmp0>>IDCTSHIFTR;
  834. tmp1 = x3+x2;
  835. tmp1 = tmp1>>IDCTSHIFTR;
  836. tmp2 = x0+x4;
  837. tmp2 = tmp2>>IDCTSHIFTR;
  838. tmp3 = x8+x6;
  839. tmp3 = tmp3>>IDCTSHIFTR;
  840. *tmpblk++ = (short)tmp0;
  841. *tmpblk++ = (short)tmp1;
  842. *tmpblk++ = (short)tmp2;
  843. *tmpblk++ = (short)tmp3;
  844. tmp0 = x8-x6;
  845. tmp0 = tmp0>>IDCTSHIFTR;
  846. tmp1 = x0-x4;
  847. tmp1 = tmp1>>IDCTSHIFTR;
  848. tmp2 = x3-x2;
  849. tmp2 = tmp2>>IDCTSHIFTR;
  850. tmp3 = x7-x1;
  851. tmp3 = tmp3>>IDCTSHIFTR;
  852. *tmpblk++ = (short)tmp0;
  853. *tmpblk++ = (short)tmp1;
  854. *tmpblk++ = (short)tmp2;
  855. *tmpblk++ = (short)tmp3;
  856. }
  857. }
  858. }
  859. /* output position */
  860. p1 = outbuf_size;
  861. p2 = p1 + outbuf_size;
  862. p3 = p2 + outbuf_size;
  863. p4 = p3 + outbuf_size;
  864. p5 = p4 + outbuf_size;
  865. p6 = p5 + outbuf_size;
  866. p7 = p6 + outbuf_size;
  867. tmpblk = tmpbuf;
  868. outblk = outbuf;
  869. for (i=0; i<8; i++, tmpblk++, outblk++)
  870. {
  871. /* shortcut */
  872. x0 = tmpblk[0];
  873. x1 = tmpblk[32];
  874. x2 = tmpblk[48];
  875. x3 = tmpblk[16];
  876. x4 = tmpblk[8];
  877. x5 = tmpblk[56];
  878. x6 = tmpblk[40];
  879. x7 = tmpblk[24];
  880. if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
  881. {
  882. tmp0=(x0+32)>>6;
  883. outblk[0]=outblk[p1]=outblk[p2]=outblk[p3]=outblk[p4]=outblk[p5]=
  884. outblk[p6]=outblk[p7]= (short)tmp0 ;
  885. }
  886. else
  887. {
  888. x1 = x1 <<8;
  889. tmp0 = x4+x5;
  890. x0 = x0<<8;
  891. tmp0 = WW7*tmp0;
  892. x0 = x0 + 8192;
  893. tmp1 = x6+x7;
  894. tmp0 = tmp0 + 4;
  895. tmp1 = WW3*tmp1;
  896. tmp1 = tmp1 + 4;
  897. x8 = x0 + x1;
  898. tmp2 = AW26*x2;
  899. x0 = x0 - x1;
  900. x1 = x3 + x2;
  901. x1 = WW6*x1;
  902. tmp3 = DW26*x3;
  903. x1 = x1 + 4;
  904. x4 = DW17*x4;
  905. x4 = tmp0 + x4;
  906. x4 = x4>>3;
  907. x5 = AW17*x5;
  908. x2 = x1 - tmp2;
  909. x3 = x1 + tmp3;
  910. x6 = DW35*x6;
  911. x2 = x2>>3;
  912. x5 = tmp0 - x5;
  913. x5 = x5>>3;
  914. x6 = tmp1 - x6;
  915. x6 = x6>>3;
  916. x7 = AW35*x7;
  917. x7 = tmp1 - x7;
  918. x3 = x3>>3;
  919. x7 = x7>>3;
  920. x1 = x4 + x6; /* F */
  921. x4 = x4 - x6;
  922. x6 = x5 + x7; /* F */
  923. x5 = x5 - x7; /* F */
  924. tmp1 = x4 + x5;
  925. x7 = x8 + x3; /* F */
  926. tmp1 = 181*tmp1;
  927. x8 = x8 - x3; /* F */
  928. x3 = x0 + x2; /* F */
  929. tmp2 = x4 - x5;
  930. x0 = x0 - x2; /* F */
  931. tmp2 = 181*tmp2;
  932. x2 = tmp1+128;
  933. x4 = tmp2+128;
  934. x2 = x2>>8; /* F */
  935. x4 = x4>>8; /* F */
  936. /* fourth stage */
  937. tmp0=x7+x1;
  938. tmp1=x3+x2;
  939. tmp0=tmp0>>IDCTSHIFTC;
  940. tmp2=x0+x4;
  941. tmp1=tmp1>>IDCTSHIFTC;
  942. tmp3=x8+x6;
  943. tmp2=tmp2>>IDCTSHIFTC;
  944. tmp3=tmp3>>IDCTSHIFTC;
  945. outblk[0] = (short)tmp0;
  946. outblk[p1] = (short)tmp1;
  947. outblk[p2] = (short)tmp2;
  948. outblk[p3] = (short)tmp3;
  949. tmp0=x8-x6;
  950. tmp1=x0-x4;
  951. tmp0=tmp0>>IDCTSHIFTC;
  952. tmp2=x3-x2;
  953. tmp1=tmp1>>IDCTSHIFTC;
  954. tmp3=x7-x1;
  955. tmp2=tmp2>>IDCTSHIFTC;
  956. tmp3=tmp3>>IDCTSHIFTC;
  957. outblk[p4] = (short)tmp0;
  958. outblk[p5] = (short)tmp1;
  959. outblk[p6] = (short)tmp2;
  960. outblk[p7] = (short)tmp3;
  961. }
  962. }
  963. return 0;
  964. }
  965. #if 0
  966. /*
  967. ** Function: ZigzagMatrix()
  968. ** Purpose: Performs a zig-zag translation on the input imatrix
  969. ** and puts the output in omatrix.
  970. */
  971. void svH263ZigzagMatrix(short *imatrix, short *omatrix)
  972. {
  973. const unsigned int *ptdzz=tdzz;
  974. int k;
  975. for(k=64; k; k--)
  976. omatrix[*ptdzz++] = *imatrix++;
  977. }
  978. /*
  979. ** Function: InvZigzagMatrix()
  980. ** Purpose: Performs an inverse zig-zag translation on the input imatrix
  981. ** and puts the output in omatrix.
  982. */
  983. void svH263InvZigzagMatrix(short *imatrix, short *omatrix)
  984. {
  985. const unsigned int *ptdzz=tdzz;
  986. int k;
  987. for(k=64; k; k--)
  988. *omatrix++ = imatrix[*ptdzz++];
  989. }
  990. #endif
  991. #ifndef PI
  992. # ifdef M_PI
  993. # define PI M_PI
  994. # else
  995. # define PI 3.14159265358979323846
  996. # endif
  997. #endif
  998. int zigzag[8][8] = {
  999. {0, 1, 5, 6,14,15,27,28},
  1000. {2, 4, 7,13,16,26,29,42},
  1001. {3, 8,12,17,25,30,41,43},
  1002. {9,11,18,24,31,40,44,53},
  1003. {10,19,23,32,39,45,52,54},
  1004. {20,22,33,38,46,51,55,60},
  1005. {21,34,37,47,50,56,59,61},
  1006. {35,36,48,49,57,58,62,63},
  1007. };
  1008. /* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
  1009. * direct matrix multiply) Inverse Discrete Cosine Transform
  1010. */
  1011. /* Here we use math.h to generate constants. Compiler results may
  1012. vary a little */
  1013. /* private data */
  1014. /* cosine transform matrix for 8x1 IDCT */
  1015. static double c[8][8];
  1016. /* initialize DCT coefficient matrix */
  1017. void sv_H263init_idctref()
  1018. {
  1019. int freq, time;
  1020. double scale;
  1021. for (freq=0; freq < 8; freq++)
  1022. {
  1023. scale = (freq == 0) ? sqrt(0.125) : 0.5;
  1024. for (time=0; time<8; time++)
  1025. c[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5));
  1026. }
  1027. }
  1028. /* perform IDCT matrix multiply for 8x8 coefficient block */
  1029. void sv_H263idctref(short *coeff, short *block)
  1030. {
  1031. int i, j, k, v;
  1032. double partial_product;
  1033. double tmp[64];
  1034. int tmp2[64];
  1035. extern int zigzag[8][8];
  1036. for (i=0; i<8; i++)
  1037. for (j=0; j<8; j++)
  1038. tmp2[j+i*8] = *(coeff + zigzag[i][j]);
  1039. for (i=0; i<8; i++)
  1040. for (j=0; j<8; j++)
  1041. {
  1042. partial_product = 0.0;
  1043. for (k=0; k<8; k++)
  1044. partial_product+= c[k][j]*tmp2[8*i+k];
  1045. tmp[8*i+j] = partial_product;
  1046. }
  1047. /* Transpose operation is integrated into address mapping by switching
  1048. loop order of i and j */
  1049. for (j=0; j<8; j++)
  1050. for (i=0; i<8; i++)
  1051. {
  1052. partial_product = 0.0;
  1053. for (k=0; k<8; k++)
  1054. partial_product+= c[k][i]*tmp[8*k+j];
  1055. v = (int)floor(partial_product+0.5);
  1056. block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v);
  1057. }
  1058. }
  1059.