Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

544 lines
17 KiB

  1. /* File: sv_h263_me3.c */
  2. /*****************************************************************************
  3. ** Copyright (c) Digital Equipment Corporation, 1995, 1997 **
  4. ** **
  5. ** All Rights Reserved. Unpublished rights reserved under the copyright **
  6. ** laws of the United States. **
  7. ** **
  8. ** The software contained on this media is proprietary to and embodies **
  9. ** the confidential technology of Digital Equipment Corporation. **
  10. ** Possession, use, duplication or dissemination of the software and **
  11. ** media is authorized only pursuant to a valid written license from **
  12. ** Digital Equipment Corporation. **
  13. ** **
  14. ** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
  15. ** Government is subject to restrictions as set forth in Subparagraph **
  16. ** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
  17. ******************************************************************************/
  18. #include "sv_h263.h"
  19. #include "proto.h"
  20. #ifndef USE_C
  21. #include "perr.h"
  22. #endif
  23. #define THREEBYEIGHT .375
  24. #define THREEBYFOUR .75
  25. #define MINUSONEBYEIGHT -0.125
  26. /**********************************************************************
  27. *
  28. * Name: MotionEstimation
  29. * Description: Estimate all motionvectors for one MB
  30. *
  31. * Input: pointers to current an previous image,
  32. * pointers to current slice and current MB
  33. * Returns:
  34. * Side effects: motion vector information in MB changed
  35. *
  36. ***********************************************************************/
  37. void sv_H263ME_2levels_7_polint(SvH263CompressInfo_t *H263Info,
  38. unsigned char *curr, unsigned char *prev, int x_curr,
  39. int y_curr, int xoff, int yoff, int seek_dist,
  40. H263_MotionVector *MV[6][H263_MBR+1][H263_MBC+2], int *SAD_0)
  41. {
  42. int Min_FRAME[5];
  43. H263_MotionVector MVFrame[5];
  44. unsigned char *aa,*ii;
  45. unsigned char *adv_search_area = NULL, *zero_area = NULL;
  46. int sxy,i,k,j,l;
  47. int ihigh,ilow,jhigh,jlow,h_length,v_length;
  48. int adv_ihigh,adv_ilow,adv_jhigh,adv_jlow,adv_h_length,adv_v_length;
  49. int xmax,ymax,block,sad,lx;
  50. int adv_x_curr, adv_y_curr,xvec,yvec;
  51. unsigned char *act_block_subs2, *search_area_subs2, *adv_search_area_subs2;
  52. int h_lenby2,v_lenby2,adv_h_lenby2,adv_v_lenby2;
  53. int xlevel1,ylevel1,sxylevel1;
  54. int xlevel1_block[4], ylevel1_block[4];
  55. /*
  56. int level0_x_curr,level0_y_curr,sxylevel0;
  57. */
  58. int start_x, start_y, stop_x, stop_y, new_x, new_y;
  59. int AE[5];
  60. H263_Point search[5];
  61. int p1,p2,p3,p4;
  62. int AE_minx, AE_miny, min_posx, min_posy;
  63. xmax = H263Info->pels;
  64. ymax = H263Info->lines;
  65. sxy = seek_dist;
  66. if (!H263Info->long_vectors) {
  67. /* Maximum normal search range centered around _zero-vector_ */
  68. sxy = mmin(15, sxy);
  69. }
  70. else {
  71. /* Maximum extended search range centered around _predictor_ */
  72. sxy = mmin(15 - (2*H263_DEF_8X8_WIN+1), sxy);
  73. /* NB! */
  74. /* It is only possible to transmit motion vectors within
  75. a 15x15 window around the motion vector predictor
  76. for any 8x8 or 16x16 block */
  77. /* The reason for the search window's reduction above with
  78. 2*DEF_8X8_WIN+1 is that the 8x8 search may change the MV
  79. predictor for some of the blocks within the macroblock. When we
  80. impose the limitation above, we are sure that any 8x8 vector we
  81. might find is possible to transmit */
  82. /* We have found that with OBMC, DEF_8X8_WIN should be quite small
  83. for two reasons: (i) a good filtering effect, and (ii) not too
  84. many bits used for transferring the vectors. As can be seen
  85. above this is also useful to avoid a large limitation on the MV
  86. search range */
  87. /* It is possible to make sure the motion vectors found are legal
  88. in other less limiting ways than above, but this would be more
  89. complicated as well as time-consuming. Any good suggestions for
  90. improvement is welcome, though */
  91. #ifdef USE_C
  92. xoff = mmin(16,mmax(-16,xoff));
  93. yoff = mmin(16,mmax(-16,yoff));
  94. #else
  95. xoff = sv_H263lim_S(xoff,-16,16);
  96. yoff = sv_H263lim_S(yoff,-16,16);
  97. #endif
  98. /* in case xoff or yoff is odd */
  99. xoff= 2 * ((xoff)>>1);
  100. yoff= 2 * ((yoff)>>1);
  101. /* There is no need to check if (xoff + x_curr) points outside
  102. the picture, since the Extended Motion Vector Range is
  103. always used together with the Unrestricted MV mode */
  104. }
  105. lx = (H263Info->mv_outside_frame ? H263Info->pels + (H263Info->long_vectors?64:32) : H263Info->pels);
  106. ilow = x_curr + xoff - sxy;
  107. ihigh = x_curr + xoff + sxy;
  108. jlow = y_curr + yoff - sxy;
  109. jhigh = y_curr + yoff + sxy;
  110. if (!H263Info->mv_outside_frame) {
  111. if (ilow<0) ilow = 0;
  112. if (ihigh>xmax-16) ihigh = xmax-16;
  113. if (jlow<0) jlow = 0;
  114. if (jhigh>ymax-16) jhigh = ymax-16;
  115. }
  116. h_length = ihigh - ilow + 16;
  117. v_length = jhigh - jlow + 16;
  118. /* subsampled version for ME level 1 */
  119. h_lenby2 = (h_length-1)>>1;
  120. v_lenby2 = (v_length-1)>>1;
  121. act_block_subs2 = sv_H263LoadSubs2Area(curr, x_curr, y_curr, 8, 8, H263Info->pels);
  122. search_area_subs2 = sv_H263LoadSubs2Area(prev, ilow, jlow, h_lenby2, v_lenby2, lx);
  123. for (k = 0; k < 5; k++) {
  124. Min_FRAME[k] = INT_MAX;
  125. MVFrame[k].x = 0;
  126. MVFrame[k].y = 0;
  127. MVFrame[k].x_half = 0;
  128. MVFrame[k].y_half = 0;
  129. }
  130. /* match for zero (or [xoff,yoff]) motion vector on subsampled images */
  131. ii = search_area_subs2 + ((x_curr+xoff-ilow)>>1) + ((y_curr+yoff-jlow)>>1)*h_lenby2;
  132. #ifdef USE_C
  133. Min_FRAME[0] = sv_H263MySADBlock(ii, act_block_subs2, h_lenby2, 8, Min_FRAME[0]);
  134. #else
  135. Min_FRAME[0] = sv_H263PError8x8_S(ii, act_block_subs2, h_lenby2, 8, Min_FRAME[0]);
  136. #endif
  137. MVFrame[0].x = (short)xoff;
  138. MVFrame[0].y = (short)yoff;
  139. /*** Spiral search (+-7) on subsampled images ***/
  140. sxylevel1 = (sxy-1)>>1;
  141. for (l = 1; l <= sxylevel1; l++) {
  142. i = x_curr + xoff - 2*l;
  143. j = y_curr + yoff - 2*l;
  144. for (k = 0; k < 8*l; k++) {
  145. if (i>=ilow && i<=ihigh && j>=jlow && j<=jhigh) {
  146. /* 8x8 integer pel MV */
  147. ii = search_area_subs2 + ((i-ilow)>>1) + ((j-jlow)>>1)*h_lenby2;
  148. #ifdef USE_C
  149. sad = sv_H263MySADBlock(ii, act_block_subs2, h_lenby2, 8, Min_FRAME[0]);
  150. #else
  151. sad = sv_H263PError8x8_S(ii, act_block_subs2, h_lenby2, 8, Min_FRAME[0]);
  152. #endif
  153. if (sad < Min_FRAME[0]) {
  154. MVFrame[0].x = i - x_curr;
  155. MVFrame[0].y = j - y_curr;
  156. Min_FRAME[0] = sad;
  157. }
  158. }
  159. if (k<2*l) i+=2;
  160. else if (k<4*l) j+=2;
  161. else if (k<6*l) i-=2;
  162. else j-=2;
  163. }
  164. }
  165. /* motion vectors after level1 */
  166. xlevel1=MVFrame[0].x;
  167. ylevel1=MVFrame[0].y;
  168. /* reset */
  169. Min_FRAME[0] = INT_MAX;
  170. MVFrame[0].x = 0;
  171. MVFrame[0].y = 0;
  172. /* Zero vector search*/
  173. if (x_curr-ilow < 0 || y_curr-jlow < 0 ||
  174. x_curr-ilow+H263_MB_SIZE > h_length || y_curr-jlow+H263_MB_SIZE > v_length) {
  175. /* in case the zero vector is outside the loaded area in search_area */
  176. zero_area = sv_H263LoadSubs2Area(prev, x_curr, y_curr, 8, 8, lx);
  177. #ifdef USE_C
  178. *SAD_0 = 4*sv_H263MySADBlock(zero_area, act_block_subs2, 8, 8, Min_FRAME[0]) -
  179. H263_PREF_NULL_VEC;
  180. #else
  181. *SAD_0 = 4*sv_H263PError8x8_S(zero_area, act_block_subs2, 8, 8, Min_FRAME[0]) -
  182. H263_PREF_NULL_VEC;
  183. #endif
  184. ScFree(zero_area);
  185. }
  186. else {
  187. /* the zero vector is within search_area */
  188. ii = search_area_subs2 + ((x_curr-ilow)>>1) + ((y_curr-jlow)>>1)*h_lenby2;
  189. #ifdef USE_C
  190. *SAD_0 = 4*sv_H263MySADBlock(ii, act_block_subs2, h_lenby2, 8, Min_FRAME[0]) -
  191. H263_PREF_NULL_VEC;
  192. #else
  193. *SAD_0 = 4*sv_H263PError8x8_S(ii, act_block_subs2, h_lenby2, 8, Min_FRAME[0]) -
  194. H263_PREF_NULL_VEC;
  195. #endif
  196. }
  197. /*** +-1 search on full-resolution images done by polynomial interpolation ***/
  198. start_x = -1;
  199. stop_x = 1;
  200. start_y = -1;
  201. stop_y = 1;
  202. new_x = x_curr + xlevel1;
  203. new_y = y_curr + ylevel1;
  204. /* Make sure that no addressing is outside the frame */
  205. if (!H263Info->mv_outside_frame) {
  206. if ((new_x) <= (ilow+1))
  207. start_x = 0;
  208. if ((new_y) <= (jlow+1))
  209. start_y = 0;
  210. if ((new_x) >= (ihigh-1))
  211. stop_x = 0;
  212. if ((new_y) >= (jhigh-1))
  213. stop_y = 0;
  214. }
  215. /* 1 */
  216. /* 2 0 3 */
  217. /* 4 */
  218. search[0].x = 0; search[0].y = 0;
  219. search[1].x = 0; search[1].y = (short)start_y;
  220. search[2].x = (short)start_x; search[2].y = 0;
  221. search[3].x = (short)stop_x; search[3].y = 0;
  222. search[4].x = 0; search[4].y = (short)stop_y;
  223. for (l = 0; l < 5 ; l++) {
  224. AE[l] = INT_MAX;
  225. i = new_x + 2*search[l].x;
  226. j = new_y + 2*search[l].y;
  227. /* 8x8 integer pel MV */
  228. ii = search_area_subs2 + ((i-ilow)>>1) + ((j-jlow)>>1)*h_lenby2;
  229. #ifdef USE_C
  230. AE[l] = sv_H263MySADBlock(ii, act_block_subs2, h_lenby2, 8, INT_MAX);
  231. #else
  232. AE[l] = sv_H263PEr8_init_S(ii, act_block_subs2, h_lenby2, 8);
  233. #endif
  234. }
  235. /* 1D polynomial interpolation along x and y respectively */
  236. AE_minx = AE[0];
  237. min_posx = 0;
  238. p2 = (int)( THREEBYEIGHT * (double) AE[2]
  239. + THREEBYFOUR * (double) AE[0]
  240. + MINUSONEBYEIGHT * (double) AE[3]);
  241. if (p2<AE_minx) {
  242. AE_minx = p2;
  243. min_posx = 2;
  244. }
  245. p3 = (int)(MINUSONEBYEIGHT * (double) AE[2]
  246. + THREEBYFOUR * (double) AE[0]
  247. + THREEBYEIGHT * (double) AE[3]);
  248. if (p3<AE_minx) {
  249. AE_minx = p3;
  250. min_posx = 3;
  251. }
  252. AE_miny = AE[0];
  253. min_posy = 0;
  254. p1 = (int)(THREEBYEIGHT * (double) AE[1]
  255. + THREEBYFOUR * (double) AE[0]
  256. + MINUSONEBYEIGHT * (double) AE[4]);
  257. if (p1<AE_miny) {
  258. AE_miny = p1;
  259. min_posy = 1;
  260. }
  261. p4 = (int)(MINUSONEBYEIGHT * (double) AE[1]
  262. + THREEBYFOUR * (double) AE[0]
  263. + THREEBYEIGHT * (double) AE[4]);
  264. if (p4<AE_miny) {
  265. AE_miny = p4;
  266. min_posy = 4;
  267. }
  268. /* Store optimal values */
  269. Min_FRAME[0] = (AE_minx<AE_miny ? 4*AE_minx : 4*AE_miny);
  270. MVFrame[0].x = new_x + search[min_posx].x - x_curr;
  271. MVFrame[0].y = new_y + search[min_posy].y - y_curr;
  272. if (H263Info->advanced) {
  273. /* Center the 8x8 search around the 16x16 vector. This is
  274. different than in TMN5 where the 8x8 search is also a full
  275. search. The reasons for this is: (i) it is faster, and (ii) it
  276. generally gives better results because of a better OBMC
  277. filtering effect and less bits spent for vectors, and (iii) if
  278. the Extended MV Range is used, the search range around the
  279. motion vector predictor will be less limited */
  280. xvec = MVFrame[0].x;
  281. yvec = MVFrame[0].y;
  282. if (!H263Info->long_vectors) {
  283. if (xvec > 15 - H263_DEF_8X8_WIN) { xvec = 15 - H263_DEF_8X8_WIN ;}
  284. if (yvec > 15 - H263_DEF_8X8_WIN) { yvec = 15 - H263_DEF_8X8_WIN ;}
  285. if (xvec < -15 + H263_DEF_8X8_WIN) { xvec = -15 + H263_DEF_8X8_WIN ;}
  286. if (yvec < -15 + H263_DEF_8X8_WIN) { yvec = -15 + H263_DEF_8X8_WIN ;}
  287. }
  288. adv_x_curr = x_curr + xvec;
  289. adv_y_curr = y_curr + yvec;
  290. sxy = H263_DEF_8X8_WIN;
  291. adv_ilow = adv_x_curr - sxy;
  292. adv_ihigh = adv_x_curr + sxy;
  293. adv_jlow = adv_y_curr - sxy;
  294. adv_jhigh = adv_y_curr + sxy;
  295. adv_h_length = adv_ihigh - adv_ilow + 16;
  296. adv_v_length = adv_jhigh - adv_jlow + 16;
  297. /* BUG
  298. adv_h_lenby2 = (adv_h_length-1)>>1;
  299. adv_v_lenby2 = (adv_v_length-1)>>1;
  300. */
  301. adv_h_lenby2 = (adv_h_length)>>1;
  302. adv_v_lenby2 = (adv_v_length)>>1;
  303. /* must load entire macroblock
  304. adv_search_area_subs2 = sv_H263LoadSubs2Area(prev, adv_ilow, adv_jlow,
  305. adv_h_lenby2, adv_v_lenby2, lx);
  306. */
  307. adv_search_area_subs2 = sv_H263LoadSubs2Area(prev, adv_ilow, adv_jlow,
  308. adv_h_length, adv_v_length, lx);
  309. for (block = 0; block < 4; block++) {
  310. ii = adv_search_area_subs2 + ((adv_x_curr-adv_ilow)>>1) + ((block&1)<<2) +
  311. (((adv_y_curr-adv_jlow)>>1) + ((block&2)<<1) )*adv_h_lenby2;
  312. aa = act_block_subs2 + ((block&1)<<2) + ((block&2)<<1)*8;
  313. /*
  314. Min_FRAME[block+1] = sv_H263MySADSubBlock(ii,aa,adv_h_lenby2,Min_FRAME[block+1]);
  315. */
  316. Min_FRAME[block+1] = sv_H263MySADSubBlock(ii,aa,adv_h_length,Min_FRAME[block+1]);
  317. MVFrame[block+1].x = MVFrame[0].x;
  318. MVFrame[block+1].y = MVFrame[0].y;
  319. }
  320. /* Spiral search */
  321. sxylevel1 = (sxy-1)>>1;
  322. for (l = 1; l <= sxylevel1; l++) {
  323. i = adv_x_curr - 2*l;
  324. j = adv_y_curr - 2*l;
  325. for (k = 0; k < 8*l; k++) {
  326. if (i>=adv_ilow && i<=adv_ihigh && j>=adv_jlow && j<=adv_jhigh) {
  327. /* 8x8 integer pel MVs */
  328. for (block = 0; block < 4; block++) {
  329. ii = adv_search_area_subs2 + ((i-adv_ilow)>>1) + ((block&1)<<2) +
  330. (((j-adv_jlow)>>1) + ((block&2)<<1) )*adv_h_lenby2;
  331. aa = act_block_subs2 + ((block&1)<<2) + ((block&2)<<1)*8;
  332. /*
  333. sad = sv_H263MySADSubBlock(ii, aa, adv_h_lenby2, Min_FRAME[block+1]);
  334. */
  335. sad = sv_H263MySADSubBlock(ii, aa, adv_h_length, Min_FRAME[block+1]);
  336. if (sad < Min_FRAME[block+1]) {
  337. MVFrame[block+1].x = i - x_curr;
  338. MVFrame[block+1].y = j - y_curr;
  339. Min_FRAME[block+1] = sad;
  340. }
  341. }
  342. }
  343. if (k<2*l) i++;
  344. else if (k<4*l) j++;
  345. else if (k<6*l) i--;
  346. else j--;
  347. }
  348. }
  349. for (block = 0; block < 4; block++) {
  350. xlevel1_block[block] = MVFrame[block+1].x;
  351. ylevel1_block[block] = MVFrame[block+1].y;
  352. /* reset */
  353. Min_FRAME[block+1] = INT_MAX;
  354. MVFrame[block+1].x = 0;
  355. MVFrame[block+1].y = 0;
  356. }
  357. /* +-1 search on full resolution on full-resolution images */
  358. /* by polynomial interpolation */
  359. for (block = 0; block < 4; block++) {
  360. start_x = -1;
  361. stop_x = 1;
  362. start_y = -1;
  363. stop_y = 1;
  364. adv_x_curr = x_curr + xlevel1_block[block];
  365. adv_y_curr = y_curr + ylevel1_block[block];
  366. /* 1 */
  367. /* 2 0 3 */
  368. /* 4 */
  369. search[0].x = 0; search[0].y = 0;
  370. search[1].x = 0; search[1].y = (short)start_y;
  371. search[2].x = (short)start_x; search[2].y = 0;
  372. search[3].x = (short)stop_x; search[3].y = 0;
  373. search[4].x = 0; search[4].y = (short)stop_y;
  374. for (l = 0; l < 5 ; l++) {
  375. AE[l] = INT_MAX;
  376. i = adv_x_curr + 2*search[l].x;
  377. j = adv_y_curr + 2*search[l].y;
  378. /* 8x8 integer pel MV */
  379. ii = adv_search_area_subs2 + ((i-adv_ilow)>>1) + ((block&1)<<2) +
  380. (((j-adv_jlow)>>1) + ((block&2)<<1) )*adv_h_lenby2;
  381. aa = act_block_subs2 + ((block&1)<<2) + ((block&2)<<1)*8;
  382. /*
  383. AE[l] = sv_H263MySADSubBlock(ii, aa, adv_h_lenby2, INT_MAX);
  384. */
  385. AE[l] = sv_H263MySADSubBlock(ii, aa, adv_h_length, INT_MAX);
  386. }
  387. /* 1D polynomial interpolation along x and y respectively */
  388. AE_minx = AE[0];
  389. min_posx = 0;
  390. p2 = (int)(THREEBYEIGHT * (double) AE[2]
  391. + THREEBYFOUR * (double) AE[0]
  392. + MINUSONEBYEIGHT * (double) AE[3]);
  393. if (p2<AE_minx) {
  394. AE_minx = p2;
  395. min_posx = 2;
  396. }
  397. p3 = (int)(MINUSONEBYEIGHT * (double) AE[2]
  398. + THREEBYFOUR * (double) AE[0]
  399. + THREEBYEIGHT * (double) AE[3]);
  400. if (p3<AE_minx) {
  401. AE_minx = p3;
  402. min_posx = 3;
  403. }
  404. AE_miny = AE[0];
  405. min_posy = 0;
  406. p1 = (int)(THREEBYEIGHT * (double) AE[1]
  407. + THREEBYFOUR * (double) AE[0]
  408. + MINUSONEBYEIGHT * (double) AE[4]);
  409. if (p1<AE_miny) {
  410. AE_miny = p1;
  411. min_posy = 1;
  412. }
  413. p4 = (int)(MINUSONEBYEIGHT * (double) AE[1]
  414. + THREEBYFOUR * (double) AE[0]
  415. + THREEBYEIGHT * (double) AE[4]);
  416. if (p4<AE_miny) {
  417. AE_miny = p4;
  418. min_posy = 4;
  419. }
  420. /* Store optimal values */
  421. Min_FRAME[block+1] = (AE_minx<AE_miny ? 4*AE_minx : 4*AE_miny);
  422. MVFrame[block+1].x = adv_x_curr + search[min_posx].x - x_curr;
  423. MVFrame[block+1].y = adv_y_curr + search[min_posy].y - y_curr;
  424. }
  425. }
  426. i = x_curr/H263_MB_SIZE+1;
  427. j = y_curr/H263_MB_SIZE+1;
  428. if (!H263Info->advanced) {
  429. MV[0][j][i]->x = MVFrame[0].x;
  430. MV[0][j][i]->y = MVFrame[0].y;
  431. MV[0][j][i]->min_error = (short)Min_FRAME[0];
  432. }
  433. else {
  434. for (k = 0; k < 5; k++) {
  435. MV[k][j][i]->x = MVFrame[k].x;
  436. MV[k][j][i]->y = MVFrame[k].y;
  437. MV[k][j][i]->min_error = (short)Min_FRAME[k];
  438. }
  439. }
  440. ScFree(act_block_subs2);
  441. ScFree(search_area_subs2);
  442. if (H263Info->advanced)
  443. ScFree(adv_search_area);
  444. return;
  445. }
  446.