Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1268 lines
32 KiB

  1. /* *************************************************************************
  2. ** INTEL Corporation Proprietary Information
  3. **
  4. ** This listing is supplied under the terms of a license
  5. ** agreement with INTEL Corporation and may not be copied
  6. ** nor disclosed except in accordance with the terms of
  7. ** that agreement.
  8. **
  9. ** Copyright (c) 1995 Intel Corporation.
  10. ** All Rights Reserved.
  11. **
  12. ** *************************************************************************
  13. */
  14. #include "precomp.h"
  15. #if defined(H263P) || defined(USE_BILINEAR_MSH26X) // {
  16. /***************************************************
  17. * H26X_YVU9toYUV12()
  18. * Convert from YVU9 to YUV12
  19. * and copy to destination memory with pitch
  20. * defined by the constant PITCH.
  21. *
  22. * uv_plane_common()
  23. * Helper function to convert V and U plane information.
  24. * Since the process is similar for both planes, the
  25. * conversion code was included in this subroutine.
  26. *
  27. ***************************************************/
  28. #define READ_DWORD_AND_SHIFT(val,src) \
  29. (((val) = *((unsigned int *)(src))), ((val) &= 0xFEFEFEFE), ((val) >>= 1))
  30. #define WRITE_DWORD(dest,val) ((*(unsigned int *)(dest)) = (val))
  31. #define AVERAGE_DWORDS(out,in1,in2) ((out) = ((((in1) + (in2)) & 0xFEFEFEFE) >> 1))
  32. #define DUP_LOWER_TWO_BYTES(dest,val) \
  33. (*((unsigned int *)(dest)) = (((val) & 0x000000FF) | (((val) << 8) & 0x0000FF00) | \
  34. (((val) << 8) & 0x00FF0000) | (((val) << 16) & 0xFF000000)))
  35. #define DUP_UPPER_TWO_BYTES(dest,val) \
  36. (*((unsigned int *)(dest)) = ((((val) >> 16) & 0x000000FF) | (((val) >> 8) & 0x0000FF00) | \
  37. (((val) >> 8) & 0x00FF0000) | ((val) & 0xFF000000)))
  38. static void C_uv_plane_common(
  39. LPBITMAPINFOHEADER lpbiInput,
  40. WORD OutputWidth,
  41. WORD OutputHeight,
  42. U8 *psrc,
  43. U8 *Plane,
  44. const int pitch) {
  45. U8 *pprev;
  46. U8 *pnext = psrc + (lpbiInput->biWidth >> 2);
  47. U8 *pdest_copy = Plane;
  48. U8 *pdest_avg = Plane + pitch;
  49. U8 t, tb1, tb2;
  50. U32 t1, t2;
  51. int i, j, k;
  52. int dest_pitch_adj;
  53. int widthx4 = ((OutputWidth >> 2) + 0x3) & ~0x3;
  54. int heightx4 = 0;
  55. int width_diff = 0;
  56. int height_diff = 0;
  57. int stretch = 0;
  58. int flag = 0;
  59. int NextSrcLine = 0;
  60. int ChromaIters = 1;
  61. int mark = (OutputHeight >> 2);
  62. int byte_uvpitch_adj = 0;
  63. if (lpbiInput->biHeight > OutputHeight) {
  64. for (ChromaIters = 0, i = OutputHeight; i > 0; i -= 48) {
  65. ChromaIters += 2;
  66. }
  67. NextSrcLine = (lpbiInput->biWidth - OutputWidth) >> 2;
  68. stretch = (NextSrcLine ? 1 : 0);
  69. mark = 6 - stretch;
  70. flag = stretch;
  71. } else {
  72. width_diff = widthx4 - (OutputWidth >> 2);
  73. byte_uvpitch_adj -= width_diff;
  74. heightx4 = ((lpbiInput->biHeight >> 2) + 0x3) & ~0x3;
  75. height_diff = (heightx4 - (lpbiInput->biHeight >> 2)) << 1;
  76. }
  77. dest_pitch_adj = pitch - (widthx4 << 1);
  78. for (j = ChromaIters; j > 0; j--) {
  79. for (k = mark + (flag & 1); k > 0; k--) {
  80. if (!stretch && (1 == j) && (1 == k)) {
  81. pnext = psrc;
  82. }
  83. for (i = (OutputWidth >> 1); (i & ~0x7); i-=8, psrc+=4, pnext+=4,
  84. pdest_copy+=8, pdest_avg+=8) {
  85. READ_DWORD_AND_SHIFT(t1,psrc);
  86. DUP_LOWER_TWO_BYTES(pdest_copy,t1);
  87. DUP_UPPER_TWO_BYTES((pdest_copy+4),t1);
  88. READ_DWORD_AND_SHIFT(t2,pnext);
  89. AVERAGE_DWORDS(t1,t1,t2);
  90. DUP_LOWER_TWO_BYTES(pdest_avg,t1);
  91. DUP_UPPER_TWO_BYTES((pdest_avg+4),t1);
  92. }
  93. if (i & 0x4) {
  94. t = *psrc++ >> 1;
  95. *(U16*)pdest_copy = t | (t<<8);
  96. t = (t + (*pnext++ >> 1)) >> 1;
  97. *(U16*)pdest_avg = t | (t<<8);
  98. t = *psrc++ >> 1;
  99. *(U16*)(pdest_copy+2) = t | (t<<8);
  100. t = (t + (*pnext++ >> 1)) >> 1;
  101. *(U16*)(pdest_avg+2) = t | (t<<8);
  102. pdest_copy += 4; pdest_avg += 4;
  103. }
  104. if (i & 0x2) {
  105. t = *psrc++ >> 1;
  106. *(U16*)pdest_copy = t | (t<<8);
  107. t = (t + (*pnext++ >> 1)) >> 1;
  108. *(U16*)pdest_avg = t | (t<<8);
  109. pdest_copy += 2; pdest_avg += 2;
  110. }
  111. if (width_diff) {
  112. tb1 = *(pdest_copy-1);
  113. tb2 = *(pdest_avg-1);
  114. *pdest_copy++ = tb1; *pdest_copy++ = tb1;
  115. *pdest_avg++ = tb2; *pdest_avg++ = tb2;
  116. if ((width_diff-1) > 0) {
  117. *pdest_copy++ = tb1; *pdest_copy++ = tb1;
  118. *pdest_avg++ = tb2; *pdest_avg++ = tb2;
  119. }
  120. if ((width_diff-2) > 0) {
  121. *pdest_copy++ = tb1; *pdest_copy++ = tb1;
  122. *pdest_avg++ = tb2; *pdest_avg++ = tb2;
  123. }
  124. }
  125. psrc += NextSrcLine;
  126. pnext += NextSrcLine;
  127. pdest_copy = pdest_avg + dest_pitch_adj;
  128. pdest_avg = pdest_copy + pitch;
  129. }
  130. if (height_diff) {
  131. pprev = pdest_copy - pitch;
  132. for (j = height_diff; j > 0; j--) {
  133. for (i = widthx4; i>0; i--) {
  134. *pdest_copy++ = *pprev++;
  135. *pdest_copy++ = *pprev++;
  136. }
  137. pprev += dest_pitch_adj;
  138. pdest_copy += dest_pitch_adj;
  139. }
  140. }
  141. if (stretch) {
  142. psrc -= (lpbiInput->biWidth >> 2);
  143. pnext -= (lpbiInput->biWidth >> 2);
  144. pdest_avg = pdest_copy;
  145. for (i = OutputWidth >> 1; i > 0; i -= 8, psrc += 4, pnext += 4,
  146. pdest_avg += 8) {
  147. READ_DWORD_AND_SHIFT(t1,psrc);
  148. READ_DWORD_AND_SHIFT(t2,pnext);
  149. AVERAGE_DWORDS(t1,t1,t2);
  150. AVERAGE_DWORDS(t1,t1,t2);
  151. DUP_LOWER_TWO_BYTES(pdest_avg,t1);
  152. DUP_UPPER_TWO_BYTES((pdest_avg+4),t1);
  153. }
  154. psrc += NextSrcLine;
  155. pnext += NextSrcLine;
  156. pdest_copy = pdest_avg + dest_pitch_adj;
  157. pdest_avg = pdest_copy + pitch;
  158. flag++;
  159. }
  160. }
  161. }
  162. void C_H26X_YVU9toYUV12(
  163. LPBITMAPINFOHEADER lpbiInput,
  164. WORD OutputWidth,
  165. WORD OutputHeight,
  166. U8 *lpInput,
  167. U8 *YPlane,
  168. U8 *UPlane,
  169. U8 *VPlane,
  170. const int pitch) {
  171. U32 *pnext, *plast, *pbn;
  172. U32 *pyprev, *pyspace;
  173. U8 *pvsrc, *pusrc;
  174. int t;
  175. int i, j, k;
  176. int NextLine;
  177. int widthx16;
  178. int heightx16;
  179. int width_diff = 0;
  180. int height_diff = 0;
  181. int width_adj = 0;
  182. int height_adj = 0;
  183. int stretch = 0;
  184. int aspect = 0;
  185. int word_ypitch_adj = 0;
  186. int LumaIters = 1;
  187. int mark = OutputHeight;
  188. int byte_ypitch_adj = pitch - OutputWidth;
  189. if (lpbiInput->biHeight > OutputHeight) {
  190. for (LumaIters = 0, i = OutputHeight; i > 0; i -= 48) {
  191. LumaIters += 4;
  192. }
  193. width_adj = (lpbiInput->biWidth - OutputWidth) >> 1;
  194. aspect = LumaIters;
  195. height_adj = (lpbiInput->biHeight - (OutputHeight - aspect)) >> 1;
  196. stretch = 1;
  197. mark = 11;
  198. } else {
  199. widthx16 = (lpbiInput->biWidth + 0xF) & ~0xF;
  200. width_diff = widthx16 - OutputWidth;
  201. byte_ypitch_adj -= width_diff;
  202. word_ypitch_adj = byte_ypitch_adj >> 2;
  203. heightx16 = (lpbiInput->biHeight + 0xF) & ~0xF;
  204. height_diff = heightx16 - OutputHeight;
  205. }
  206. NextLine = width_adj >> 1;
  207. pnext = (U32 *)(lpInput + (lpbiInput->biWidth * height_adj) + width_adj);
  208. for (j = LumaIters; j > 0; j--) {
  209. for (k = mark; k > 0; k--) {
  210. for (i = OutputWidth; (i & ~0xF); i-=16, YPlane+=16) {
  211. *(U32 *)YPlane = (*pnext++ >> 1) & 0x7F7F7F7F;
  212. *(U32 *)(YPlane+4) = (*pnext++ >> 1) & 0x7F7F7F7F;
  213. *(U32 *)(YPlane+8) = (*pnext++ >> 1) & 0x7F7F7F7F;
  214. *(U32 *)(YPlane+12) = (*pnext++ >> 1) & 0x7F7F7F7F;
  215. }
  216. if (i & 0x8) {
  217. *(U32 *)YPlane = (*pnext++ >> 1) & 0x7F7F7F7F;
  218. *(U32 *)(YPlane+4) = (*pnext++ >> 1) & 0x7F7F7F7F;
  219. YPlane += 8;
  220. }
  221. if (i & 0x4) {
  222. *(U32 *)YPlane = (*pnext++ >> 1) & 0x7F7F7F7F;
  223. YPlane += 4;
  224. }
  225. if (width_diff) {
  226. t = (*(YPlane-1)) << 24;
  227. t |= (t>>8) | (t>>16) | (t>>24);
  228. *(U32 *)YPlane = t;
  229. if ((width_diff-4) > 0) {
  230. *(U32 *)(YPlane + 4) = t;
  231. }
  232. if ((width_diff-8) > 0) {
  233. *(U32 *)(YPlane + 8) = t;
  234. }
  235. YPlane += width_diff;
  236. }
  237. pnext += NextLine;
  238. YPlane += byte_ypitch_adj;
  239. }
  240. if (height_diff) {
  241. pyprev = (U32 *)(YPlane - pitch);
  242. pyspace = (U32 *)YPlane;
  243. for (j = height_diff; j > 0; j--) {
  244. for (i = widthx16; i>0; i -=4) {
  245. *pyspace++ = *pyprev++;
  246. }
  247. pyspace += word_ypitch_adj;
  248. pyprev += word_ypitch_adj;
  249. }
  250. }
  251. if (stretch) {
  252. plast = pnext - (lpbiInput->biWidth >> 2);
  253. pbn = pnext;
  254. for (i = OutputWidth; i > 0; i -= 4, YPlane += 4, plast++, pbn++) {
  255. *(U32 *)YPlane =
  256. ( ((*plast & 0xFCFCFCFC) >> 2) +
  257. ((*pbn & 0xFCFCFCFC) >> 2) );
  258. }
  259. YPlane += byte_ypitch_adj;
  260. }
  261. }
  262. pvsrc = lpInput + (lpbiInput->biWidth * lpbiInput->biHeight);
  263. pusrc = pvsrc + ((lpbiInput->biWidth>>2) * (lpbiInput->biHeight>>2));
  264. t = ((lpbiInput->biWidth>>2) * (height_adj>>2)) + (width_adj>>2);
  265. pvsrc += t;
  266. pusrc += t;
  267. C_uv_plane_common(lpbiInput,OutputWidth,OutputHeight,pusrc,UPlane,pitch);
  268. C_uv_plane_common(lpbiInput,OutputWidth,OutputHeight,pvsrc,VPlane,pitch);
  269. }
  270. /***************************************************
  271. * H26X_YUY2toYUV12()
  272. * Convert from YUY2 to YUV12
  273. * and copy to destination memory with pitch
  274. * defined by the constant PITCH.
  275. *
  276. ***************************************************/
  277. #if 0 // { 0
  278. void C_H26X_YUY2toYUV12(
  279. LPBITMAPINFOHEADER lpbiInput,
  280. WORD OutputWidth,
  281. WORD OutputHeight,
  282. U8 *lpInput,
  283. U8 *YPlane,
  284. U8 *UPlane,
  285. U8 *VPlane,
  286. const int pitch) {
  287. U8 *pline;
  288. C_RGB_COLOR_CONVERT_INIT
  289. // Since YUY2 is so much like RGB (inverted image), the macro used to initialize
  290. // RGB conversion is also used here. However, there are some local variables
  291. // declared in C_RGB_COLOR_CONVERT_INIT that are not used here. The following
  292. // assignment is here simply to avoid warnings.
  293. t = t;
  294. pline = (U8 *)pnext;
  295. for ( j = 0; j < LumaIters; j++) {
  296. for (k = 0; k < mark; k++) {
  297. for (i = OutputWidth; i > 0; i-=4, pline+=8, YPlane+=4) {
  298. *(U32 *)YPlane = (*pline>>1) | ((*(pline+ 2)<<7)&0x7F00) |
  299. ((*(pline+ 4)<<15)&0x7F0000) | ((*(pline+ 6)<<23)&0x7F000000);
  300. if (0 == (k & 1)) {
  301. *(U16 *)UPlane = (*(pline+ 1)>>1) | ((*(pline+ 5)<<7)&0x7F00);
  302. *(U16 *)VPlane = (*(pline+ 3)>>1) | ((*(pline+ 7)<<7)&0x7F00);
  303. UPlane += 2; VPlane += 2;
  304. }
  305. }
  306. pnext = (U32 *)pline;
  307. C_WIDTH_FILL
  308. if (stretch && (0 == k) && j) {
  309. for (i = OutputWidth; i > 0; i -= 8) {
  310. tm = ((*pyprev++ & 0xFEFEFEFE) >> 1);
  311. tm += ((*pynext++ & 0xFEFEFEFE) >> 1);
  312. *pyspace++ = tm;
  313. tm = ((*pyprev++ & 0xFEFEFEFE) >> 1);
  314. tm += ((*pynext++ & 0xFEFEFEFE) >> 1);
  315. *pyspace++ = tm;
  316. }
  317. }
  318. pnext += BackTwoLines;
  319. pline = (U8 *)pnext;
  320. YPlane += byte_ypitch_adj;
  321. if (0 == (k & 1)) {
  322. UPlane += byte_uvpitch_adj;
  323. VPlane += byte_uvpitch_adj;
  324. }
  325. }
  326. if (stretch) {
  327. pyprev = (U32 *)(YPlane - pitch);
  328. pyspace = (U32 *)YPlane;
  329. pynext = (U32 *)(YPlane += pitch);
  330. }
  331. }
  332. C_HEIGHT_FILL
  333. if (stretch) {
  334. for (i = OutputWidth; i > 0; i -= 4) {
  335. *pyspace++ = *pyprev++;
  336. }
  337. }
  338. }
  339. #endif // } 0
  340. __declspec(naked)
  341. void P5_H26X_YUY2toYUV12(
  342. LPBITMAPINFOHEADER lpbiInput,
  343. WORD OutputWidth,
  344. WORD OutputHeight,
  345. U8 *lpInput,
  346. U8 *YPlane,
  347. U8 *UPlane,
  348. U8 *VPlane,
  349. const int pitch)
  350. {
  351. // Permanent (callee-save) registers - ebx, esi, edi, ebp
  352. // Temporary (caller-save) registers - eax, ecx, edx
  353. //
  354. // Stack frame layout
  355. // | pitch | +136
  356. // | VPlane | +132
  357. // | UPlane | +128
  358. // | YPlane | +124
  359. // | lpInput | +120
  360. // | OutputHeight | +116
  361. // | OutputWidth | +112
  362. // | lpbiInput | +108
  363. // ----------------------------
  364. // | return addr | +104
  365. // | saved ebp | +100
  366. // | saved ebx | + 96
  367. // | saved esi | + 92
  368. // | saved edi | + 88
  369. // | output_width | + 84
  370. // | pyprev | + 80
  371. // | pyspace | + 76
  372. // | pynext | + 72
  373. // | puvprev | + 68
  374. // | puvspace | + 64
  375. // | i | + 60
  376. // | j | + 56
  377. // | k | + 52
  378. // | BackTwoLines | + 48
  379. // | widthx16 | + 44
  380. // | heightx16 | + 40
  381. // | width_diff | + 36
  382. // | height_diff | + 32
  383. // | width_adj | + 28
  384. // | height_adj | + 24
  385. // | stretch | + 20
  386. // | aspect | + 16
  387. // | LumaIters | + 12
  388. // | mark | + 8
  389. // | byte_ypitch_adj | + 4
  390. // | byte_uvpitch_adj | + 0
  391. #define LOCALSIZE 88
  392. #define PITCH_PARM 136
  393. #define VPLANE 132
  394. #define UPLANE 128
  395. #define YPLANE 124
  396. #define LP_INPUT 120
  397. #define OUTPUT_HEIGHT_WORD 116
  398. #define OUTPUT_WIDTH_WORD 112
  399. #define LPBI_INPUT 108
  400. #define OUTPUT_WIDTH 84
  401. #define PYPREV 80
  402. #define PYSPACE 76
  403. #define PYNEXT 72
  404. #define PUVPREV 68
  405. #define PUVSPACE 64
  406. #define LOOP_I 60
  407. #define LOOP_J 56
  408. #define LOOP_K 52
  409. #define BACK_TWO_LINES 48
  410. #define WIDTHX16 44
  411. #define HEIGHTX16 40
  412. #define WIDTH_DIFF 36
  413. #define HEIGHT_DIFF 32
  414. #define WIDTH_ADJ 28
  415. #define HEIGHT_ADJ 24
  416. #define STRETCH 20
  417. #define ASPECT 16
  418. #define LUMA_ITERS 12
  419. #define MARK 8
  420. #define BYTE_YPITCH_ADJ 4
  421. #define BYTE_UVPITCH_ADJ 0
  422. _asm {
  423. push ebp
  424. push ebx
  425. push esi
  426. push edi
  427. sub esp, LOCALSIZE
  428. // int width_diff = 0
  429. // int height_diff = 0
  430. // int width_adj = 0
  431. // int height_adj = 0
  432. // int stretch = 0
  433. // int aspect = 0
  434. xor eax, eax
  435. mov [esp + WIDTH_DIFF], eax
  436. mov [esp + HEIGHT_DIFF], eax
  437. mov [esp + WIDTH_ADJ], eax
  438. mov [esp + HEIGHT_ADJ], eax
  439. mov [esp + STRETCH], eax
  440. mov [esp + ASPECT], eax
  441. // int LumaIters = 1
  442. inc eax
  443. mov [esp + LUMA_ITERS], eax
  444. // int mark = OutputHeight
  445. // int output_width = OutputWidth
  446. // int byte_ypitch_adj = pitch - OutputWidth
  447. // int byte_uvpitch_adj = pitch - (OutputWidth >> 1)
  448. xor ebx, ebx
  449. mov bx, [esp + OUTPUT_HEIGHT_WORD]
  450. mov [esp + MARK], ebx
  451. mov bx, [esp + OUTPUT_WIDTH_WORD]
  452. mov [esp + OUTPUT_WIDTH], ebx
  453. mov ecx, [esp + PITCH_PARM]
  454. mov edx, ecx
  455. sub ecx, ebx
  456. mov [esp + BYTE_YPITCH_ADJ], ecx
  457. sar ebx, 1
  458. sub edx, ebx
  459. mov [esp + BYTE_UVPITCH_ADJ], edx
  460. // if (lpbiInput->biHeight > OutputHeight)
  461. mov ebx, [esp + LPBI_INPUT]
  462. mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
  463. xor edx, edx
  464. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  465. cmp ecx, edx
  466. jle Lno_stretch
  467. // for (LumaIters = 0, i = OutputHeight; i > 0; i -= 48) LumaIters += 4
  468. xor ecx, ecx
  469. Lrepeat48:
  470. lea ecx, [ecx + 4]
  471. sub edx, 48
  472. jnz Lrepeat48
  473. mov [esp + LUMA_ITERS], ecx
  474. // aspect = LumaIters
  475. mov [esp + ASPECT], ecx
  476. // width_adj = (lpbiInput->biWidth - OutputWidth) >> 1
  477. // width_adj *= lpbiInput->biBitCount
  478. // width_adj >>= 3
  479. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  480. mov edx, [esp + OUTPUT_WIDTH]
  481. sub ecx, edx
  482. sar ecx, 1
  483. xor edx, edx
  484. mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
  485. imul ecx, edx
  486. sar ecx, 3
  487. mov [esp + WIDTH_ADJ], ecx
  488. // height_adj = (lpbiInput->biHeight - (OutputHeight - aspect)) >> 1
  489. mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
  490. xor edx, edx
  491. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  492. sub ecx, edx
  493. add ecx, [esp + ASPECT]
  494. sar ecx, 1
  495. mov [esp + HEIGHT_ADJ], ecx
  496. // stretch = 1
  497. // mark = 11
  498. mov ecx, 1
  499. mov edx, 11
  500. mov [esp + STRETCH], ecx
  501. mov [esp + MARK], edx
  502. jmp Lif_done
  503. Lno_stretch:
  504. // widthx16 = (lpbiInput->biWidth + 0xF) & ~0xF
  505. // width_diff = widthx16 - OutputWidth
  506. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  507. add ecx, 00FH
  508. and ecx, 0FFFFFFF0H
  509. mov [esp + WIDTHX16], ecx
  510. mov edx, [esp + OUTPUT_WIDTH]
  511. sub ecx, edx
  512. mov [esp + WIDTH_DIFF], ecx
  513. // byte_ypitch_adj -= width_diff
  514. mov edx, [esp + BYTE_YPITCH_ADJ]
  515. sub edx, ecx
  516. mov [esp + BYTE_YPITCH_ADJ], edx
  517. // byte_uvpitch_adj -= (width_diff >> 1)
  518. mov edx, [esp + BYTE_UVPITCH_ADJ]
  519. sar ecx, 1
  520. sub edx, ecx
  521. mov [esp + BYTE_UVPITCH_ADJ], edx
  522. // heightx16 = (lpbiInput->biHeight + 0xF) & ~0xF
  523. // height_diff = heightx16 - OutputHeight
  524. mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
  525. add ecx, 00FH
  526. and ecx, 0FFFFFFF0H
  527. mov [esp + HEIGHTX16], ecx
  528. xor edx, edx
  529. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  530. sub ecx, edx
  531. mov [esp + HEIGHT_DIFF], ecx
  532. Lif_done:
  533. // BackTwoLines = -(lpbiInput->biWidth + OutputWidth);
  534. // BackTwoLines *= lpbiInput->biBitCount
  535. // BackTwoLines >>= 3
  536. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  537. mov edx, [esp + OUTPUT_WIDTH]
  538. add ecx, edx
  539. neg ecx
  540. xor edx, edx
  541. mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
  542. imul ecx, edx
  543. sar ecx, 3
  544. mov [esp + BACK_TWO_LINES], ecx
  545. // pnext = (U32 *)(lpInput +
  546. // (((lpbiInput->biWidth * lpbiInput->biBitCount) >> 3)) *
  547. // ((OutputHeight - aspect - 1) + height_adj)) +
  548. // width_adj)
  549. // assign (esi, pnext)
  550. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  551. xor edx, edx
  552. mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
  553. imul ecx, edx
  554. sar ecx, 3
  555. xor edx, edx
  556. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  557. sub edx, [esp + ASPECT]
  558. dec edx
  559. add edx, [esp + HEIGHT_ADJ]
  560. imul ecx, edx
  561. add ecx, [esp + WIDTH_ADJ]
  562. add ecx, [esp + LP_INPUT]
  563. mov esi, ecx
  564. // assign (edi, YPlane)
  565. mov edi, [esp + YPLANE]
  566. // for (j = 0; j < LumaIters; j++)
  567. xor eax, eax
  568. mov [esp + LOOP_J], eax
  569. L4:
  570. // for (k = 0; k < mark; k++)
  571. xor eax, eax
  572. mov [esp + LOOP_K], eax
  573. L5:
  574. // for (i = FrameWidth; i > 0; i -= 4, pnext += 8, YPlane += 4)
  575. mov ebp, [esp + OUTPUT_WIDTH]
  576. // The following jump is used to make sure the start of the loop begin in the U pipe.
  577. jmp L6
  578. // *(U32 *)YPlane = (*pline>>1) | ((*(pline+ 2)<<7)&0x7F00) |
  579. // ((*(pline+ 4)<<15)&0x7F0000) | ((*(pline+ 6)<<23)&0x7F000000)
  580. // Register usage:
  581. // esi - ptr to interlaced (VYUY) input
  582. // edi - ptr for writing Y values
  583. L6:
  584. mov al, [esi]
  585. mov cl, [esi+4]
  586. shr eax, 1
  587. mov bl, [esi+2]
  588. shl ecx, 15
  589. mov dl, [esi+6]
  590. shl ebx, 7
  591. and ecx, 0x7F0000
  592. shl edx, 23
  593. and ebx, 0x7F00
  594. and edx, 0x7F000000
  595. or ebx, eax
  596. or ebx, ecx
  597. lea edi, [edi+4]
  598. or ebx, edx
  599. lea esi, [esi+8]
  600. mov [edi-4], ebx
  601. mov ebx, [esp + LOOP_K]
  602. // if (0 == (k & 1))
  603. // *(U16 *)UPlane = (*(pline+ 1)>>1) | ((*(pline+ 5)<<7)&0x7F00)
  604. // *(U16 *)VPlane = (*(pline+ 3)>>1) | ((*(pline+ 7)<<7)&0x7F00)
  605. test ebx, 1
  606. jnz L7
  607. mov ecx, [esp + UPLANE]
  608. mov edx, [esp + VPLANE]
  609. mov al, [esi-7]
  610. mov bl, [esi-3]
  611. shr eax, 1
  612. and ebx, 0xFE
  613. shl ebx, 7
  614. lea edx, [edx+2]
  615. or ebx, eax
  616. mov al, [esi-5]
  617. shr eax, 1
  618. mov [ecx], bx
  619. mov bl, [esi-1]
  620. lea ecx, [ecx+2]
  621. and ebx, 0xFE
  622. mov [esp + UPLANE], ecx
  623. shl ebx, 7
  624. mov [esp + VPLANE], edx
  625. or ebx, eax
  626. nop
  627. mov [edx-2], bx
  628. nop
  629. L7:
  630. sub ebp, 4
  631. jnz L6
  632. // Assembler version of C_WIDTH_DIFF
  633. // if (width_diff)
  634. mov eax, [esp + WIDTH_DIFF]
  635. mov edx, eax
  636. test eax, eax
  637. jz Lno_width_diff
  638. // tm = (*(YPlane-1)) << 24
  639. // tm |= (tm>>8) | (tm>>16) | (tm>>24)
  640. mov bl, [edi - 1]
  641. shl ebx, 24
  642. mov ecx, ebx
  643. shr ebx, 8
  644. or ecx, ebx
  645. shr ebx, 8
  646. or ecx, ebx
  647. shr ebx, 8
  648. or ecx, ebx
  649. // *(U32 *)YPlane = tm
  650. mov [edi], ecx
  651. // if ((width_diff-4) > 0)
  652. sub eax, 4
  653. jz Lupdate_YPlane
  654. // *(U32 *)(YPlane + 4) = tm
  655. mov [edi + 4], ecx
  656. sub eax, 4
  657. // if ((width_diff-8) > 0)
  658. jz Lupdate_YPlane
  659. // *(U32 *)(YPlane + 8) = tm
  660. mov [edi + 8], ecx
  661. Lupdate_YPlane:
  662. // YPlane += width_diff
  663. lea edi, [edi + edx]
  664. ///if (0 == (k&1))
  665. mov eax, [esp + LOOP_K]
  666. test eax, 1
  667. jnz Lno_width_diff
  668. // t8u = *(UPlane-1)
  669. // t8v = *(VPlane-1)
  670. // *UPlane++ = t8u
  671. // *UPlane++ = t8u
  672. // *VPlane++ = t8v
  673. // *VPlane++ = t8v
  674. mov ebp, edx
  675. mov eax, [esp + UPLANE]
  676. mov ebx, [esp + VPLANE]
  677. mov cl, [eax - 1]
  678. mov ch, [ebx - 1]
  679. mov [eax], cl
  680. mov [eax + 1], cl
  681. mov [ebx], ch
  682. mov [ebx + 1], ch
  683. // if ((width_diff-4) > 0)
  684. sub ebp, 4
  685. jz Lupdate_UVPlane
  686. // *UPlane++ = t8u
  687. // *UPlane++ = t8u
  688. // *VPlane++ = t8v
  689. // *VPlane++ = t8v
  690. mov [eax + 2], cl
  691. mov [eax + 3], cl
  692. mov [ebx + 2], ch
  693. mov [ebx + 3], ch
  694. // if ((width_diff-8) > 0)
  695. sub ebp, 4
  696. jz Lupdate_UVPlane
  697. // *UPlane++ = t8u
  698. // *UPlane++ = t8u
  699. // *VPlane++ = t8v
  700. // *VPlane++ = t8v
  701. mov [eax + 4], cl
  702. mov [eax + 5], cl
  703. mov [ebx + 4], ch
  704. mov [ebx + 5], ch
  705. Lupdate_UVPlane:
  706. sar edx, 1
  707. lea eax, [eax + edx]
  708. mov [esp + UPLANE], eax
  709. lea ebx, [ebx + edx]
  710. mov [esp + VPLANE], ebx
  711. Lno_width_diff:
  712. // if (stretch && (0 == k) && j)
  713. mov eax, [esp + STRETCH]
  714. test eax, eax
  715. jz L14
  716. mov eax, [esp + LOOP_K]
  717. test eax, eax
  718. jnz L14
  719. mov eax, [esp + LOOP_J]
  720. test eax, eax
  721. jz L14
  722. // spill YPlane ptr
  723. mov [esp + YPLANE], edi
  724. nop
  725. // for (i = OutputWidth; i > 0; i -= 8)
  726. // assign (ebx, pyprev)
  727. // assign (ecx, t)
  728. // assign (edx, pynext)
  729. // assign (edi, pyspace)
  730. // assign (ebp, i)
  731. // make sure offsets are such that there are no bank conflicts here
  732. mov ebx, [esp + PYPREV]
  733. mov edi, [esp + PYSPACE]
  734. mov edx, [esp + PYNEXT]
  735. mov ebp, [esp + OUTPUT_WIDTH]
  736. // t = (*pyprev++ & 0xFEFEFEFE) >> 1
  737. // t += (*pynext++ & 0xFEFEFEFE) >> 1
  738. // *pyspace++ = t
  739. // t = (*pyprev++ & 0xFEFEFEFE) >> 1
  740. // t += (*pynext++ & 0xFEFEFEFE) >> 1
  741. // *pyspace++ = t
  742. L15:
  743. // 1
  744. mov eax, [ebx]
  745. lea ebx, [ebx + 4]
  746. // 2
  747. mov ecx, [edx]
  748. lea edx, [edx + 4]
  749. // 3
  750. shr ecx, 1
  751. and eax, 0xFEFEFEFE
  752. // 4
  753. shr eax, 1
  754. and ecx, 0x7F7F7F7F
  755. // 5
  756. add eax, ecx
  757. mov ecx, [ebx]
  758. // 6
  759. shr ecx, 1
  760. mov [edi], eax
  761. // 7
  762. mov eax, [edx]
  763. and ecx, 0x7F7F7F7F
  764. // 8
  765. shr eax, 1
  766. lea edi, [edi + 4]
  767. // 9
  768. and eax, 0x7F7F7F7F
  769. lea ebx, [ebx + 4]
  770. // 10
  771. lea edx, [edx + 4]
  772. add eax, ecx
  773. // 11
  774. mov [edi], eax
  775. lea edi, [edi + 4]
  776. // 12
  777. sub ebp, 8
  778. jnz L15
  779. // kill (ebx, pyprev)
  780. // kill (ecx, t)
  781. // kill (edx, pynext)
  782. // kill (edi, pyspace)
  783. // kill (ebp, i)
  784. // restore YPlane
  785. mov edi, [esp + YPLANE]
  786. // pnext += BackTwoLines
  787. L14:
  788. add esi, [esp + BACK_TWO_LINES]
  789. // YPlane += byte_ypitch_adj;
  790. add edi, [esp + BYTE_YPITCH_ADJ]
  791. // if(0 == (k&1))
  792. mov eax, [esp + LOOP_K]
  793. and eax, 1
  794. jnz L16
  795. // UPlane += byte_uvpitch_adj;
  796. // VPlane += byte_uvpitch_adj;
  797. mov eax, [esp + BYTE_UVPITCH_ADJ]
  798. add [esp + UPLANE], eax
  799. add [esp + VPLANE], eax
  800. L16:
  801. inc DWORD PTR [esp + LOOP_K]
  802. xor eax, eax
  803. mov ebx, [esp + LOOP_K]
  804. cmp ebx, [esp + MARK]
  805. jl L5
  806. // if (stretch)
  807. cmp DWORD PTR [esp + STRETCH], 0
  808. je L17
  809. // pyprev = YPlane - pitch
  810. mov eax, edi
  811. sub eax, [esp + PITCH_PARM]
  812. mov [esp + PYPREV], eax
  813. // pyspace = YPlane
  814. mov [esp + PYSPACE], edi
  815. // pynext = (YPlane += pitch)
  816. add edi, [esp + PITCH_PARM]
  817. mov [esp + PYNEXT], edi
  818. L17:
  819. inc DWORD PTR [esp + LOOP_J]
  820. mov eax, [esp + LOOP_J]
  821. cmp eax, [esp + LUMA_ITERS]
  822. jl L4
  823. // kill (esi, pnext)
  824. // kill (edi, YPlane)
  825. // ASM version of C_HEIGHT_FILL
  826. // if (height_diff)
  827. mov eax, [esp + HEIGHT_DIFF]
  828. test eax, eax
  829. jz Lno_height_diff
  830. // pyspace = (U32 *)YPlane
  831. mov esi, edi
  832. // pyprev = (U32 *)(YPlane - pitch)
  833. sub esi, [esp + PITCH_PARM]
  834. // for (j = height_diff; j > 0; j--)
  835. Lheight_yfill_loop:
  836. mov ebx, [esp + WIDTHX16]
  837. // for (i = widthx16; i>0; i -=4)
  838. Lheight_yfill_row:
  839. // *pyspace++ = *pyprev++
  840. mov ecx, [esi]
  841. lea esi, [esi + 4]
  842. mov [edi], ecx
  843. lea edi, [edi + 4]
  844. sub ebx, 4
  845. jnz Lheight_yfill_row
  846. // pyspace += word_ypitch_adj
  847. // pyprev += word_ypitch_adj
  848. add esi, [esp + BYTE_YPITCH_ADJ]
  849. add edi, [esp + BYTE_YPITCH_ADJ]
  850. dec eax
  851. jnz Lheight_yfill_loop
  852. mov eax, [esp + HEIGHT_DIFF]
  853. mov edi, [esp + UPLANE]
  854. // puvspace = (U32 *)UPlane
  855. mov esi, edi
  856. // puvprev = (U32 *)(UPlane - pitch)
  857. sub esi, [esp + PITCH_PARM]
  858. // for (j = height_diff; j > 0; j -= 2)
  859. Lheight_ufill_loop:
  860. mov ebx, [esp + WIDTHX16]
  861. // for (i = widthx16; i>0; i -= 8)
  862. Lheight_ufill_row:
  863. // *puvspace++ = *puvprev++
  864. mov ecx, [esi]
  865. mov [edi], ecx
  866. lea esi, [esi + 4]
  867. lea edi, [edi + 4]
  868. sub ebx, 8
  869. jnz Lheight_ufill_row
  870. // puvspace += word_uvpitch_adj
  871. // puvprev += word_uvpitch_adj
  872. add esi, [esp + BYTE_UVPITCH_ADJ]
  873. add edi, [esp + BYTE_UVPITCH_ADJ]
  874. sub eax, 2
  875. jnz Lheight_ufill_loop
  876. mov eax, [esp + HEIGHT_DIFF]
  877. mov edi, [esp + VPLANE]
  878. // puvspace = (U32 *)VPlane
  879. mov esi, edi
  880. // puvprev = (U32 *)(VPlane - pitch)
  881. sub esi, [esp + PITCH_PARM]
  882. // for (j = height_diff; j > 0; j -= 2)
  883. Lheight_vfill_loop:
  884. mov ebx, [esp + WIDTHX16]
  885. // for (i = widthx16; i>0; i -= 8)
  886. Lheight_vfill_row:
  887. // *puvspace++ = *puvprev++
  888. mov ecx, [esi]
  889. mov [edi], ecx
  890. lea esi, [esi + 4]
  891. lea edi, [edi + 4]
  892. sub ebx, 8
  893. jnz Lheight_vfill_row
  894. // puvspace += word_uvpitch_adj
  895. // puvprev += word_uvpitch_adj
  896. add esi, [esp + BYTE_UVPITCH_ADJ]
  897. add edi, [esp + BYTE_UVPITCH_ADJ]
  898. sub eax, 2
  899. jnz Lheight_vfill_loop
  900. Lno_height_diff:
  901. // if (stretch)
  902. mov esi, [esp + PYPREV]
  903. cmp DWORD PTR [esp + STRETCH], 0
  904. je L19
  905. // for (i = OutputWidth; i > 0; i -= 4)
  906. // assign (esi, pyprev)
  907. // assign (edi, pyspace)
  908. // assign (ebp, i)
  909. mov ebp, [esp + OUTPUT_WIDTH]
  910. mov edi, [esp + PYSPACE]
  911. L18:
  912. mov ecx, [esi]
  913. lea esi, [esi + 4]
  914. mov [edi], ecx
  915. lea edi, [edi + 4]
  916. sub ebp, 4
  917. jnz L18
  918. // kill (esi, pyprev)
  919. // kill (edi, pyspace)
  920. // kill (ebp, i)
  921. L19:
  922. add esp, LOCALSIZE
  923. pop edi
  924. pop esi
  925. pop ebx
  926. pop ebp
  927. ret
  928. }
  929. }
  930. #undef LOCALSIZE
  931. #undef PITCH_PARM
  932. #undef VPLANE
  933. #undef UPLANE
  934. #undef YPLANE
  935. #undef LP_INPUT
  936. #undef OUTPUT_HEIGHT_WORD
  937. #undef OUTPUT_WIDTH_WORD
  938. #undef LPBI_INPUT
  939. #undef OUTPUT_WIDTH
  940. #undef PYPREV
  941. #undef PYSPACE
  942. #undef PYNEXT
  943. #undef PUVPREV
  944. #undef PUVSPACE
  945. #undef LOOP_I
  946. #undef LOOP_J
  947. #undef LOOP_K
  948. #undef BACK_TWO_LINES
  949. #undef WIDTHX16
  950. #undef HEIGHTX16
  951. #undef WIDTH_DIFF
  952. #undef HEIGHT_DIFF
  953. #undef WIDTH_ADJ
  954. #undef HEIGHT_ADJ
  955. #undef STRETCH
  956. #undef ASPECT
  957. #undef LUMA_ITERS
  958. #undef MARK
  959. #undef BYTE_YPITCH_ADJ
  960. #undef BYTE_UVPITCH_ADJ
  961. /***************************************************
  962. * H26X_YUV12toEncYUV12()
  963. * Copy YUV12 data to encoder memory at the
  964. * appropriate location. It is assumed that the input
  965. * data is stored as rows of Y, followed by rows of U,
  966. * then rows of V.
  967. *
  968. ***************************************************/
  969. void C_H26X_YUV12toEncYUV12(
  970. LPBITMAPINFOHEADER lpbiInput,
  971. WORD OutputWidth,
  972. WORD OutputHeight,
  973. U8 *lpInput,
  974. U8 *YPlane,
  975. U8 *UPlane,
  976. U8 *VPlane,
  977. const int pitch) {
  978. int i, j;
  979. U32 *pnext = (U32 *)lpInput;
  980. U32 *plast;
  981. U32 t;
  982. U16 t16;
  983. U8 *p8next;
  984. int byte_ypitch_adj;
  985. int byte_uvpitch_adj;
  986. int yinput_height = lpbiInput->biHeight;
  987. int yinput_width = lpbiInput->biWidth;
  988. int yheight_diff = 0;
  989. int ywidth_diff = 0;
  990. int uvheight_diff = 0;
  991. int uvwidth_diff = 0;
  992. int uvinput_width = yinput_width >> 1;
  993. int uvinput_height = yinput_height >> 1;
  994. int uvoutput_width = OutputWidth >> 1;
  995. int widthx16 = (OutputWidth + 0xF) & ~0xF;
  996. int width_diff = widthx16 - OutputWidth;
  997. int heightx16 = (OutputHeight + 0xF) & ~0xF;
  998. int height_diff = heightx16 - OutputHeight;
  999. // This routine has to handle two cases:
  1000. // - arbitrary frame size (width and height may be any multiple of 4 up to CIF size).
  1001. // - backward compatibility with H263 (320x240 -> 352x288 still mode)
  1002. // Note: Crop and stretch was not supported for YUV12 conversion in H263.
  1003. if (width_diff) {
  1004. byte_ypitch_adj = pitch - widthx16;
  1005. byte_uvpitch_adj = pitch - (widthx16 >> 1);
  1006. } else {
  1007. byte_ypitch_adj = pitch - OutputWidth;
  1008. byte_uvpitch_adj = pitch - (OutputWidth >> 1);
  1009. ywidth_diff = OutputWidth - yinput_width;
  1010. yheight_diff = OutputHeight - yinput_height;
  1011. uvwidth_diff = ywidth_diff >> 1;
  1012. uvheight_diff = yheight_diff >> 1;
  1013. }
  1014. // Y Plane conversion.
  1015. for (j = yinput_height; j > 0; j--, YPlane += byte_ypitch_adj) {
  1016. for (i = yinput_width; (i & ~0xF); i-=16, YPlane+=16) {
  1017. *(U32 *)YPlane = (*pnext++ >> 1) & 0x7F7F7F7F;
  1018. *(U32 *)(YPlane+4) = (*pnext++ >> 1) & 0x7F7F7F7F;
  1019. *(U32 *)(YPlane+8) = (*pnext++ >> 1) & 0x7F7F7F7F;
  1020. *(U32 *)(YPlane+12) = (*pnext++ >> 1) & 0x7F7F7F7F;
  1021. }
  1022. if (i & 0x8) {
  1023. *(U32 *)YPlane = (*pnext++ >> 1) & 0x7F7F7F7F;
  1024. *(U32 *)(YPlane+4) = (*pnext++ >> 1) & 0x7F7F7F7F;
  1025. YPlane += 8;
  1026. }
  1027. if (i & 0x4) {
  1028. *(U32 *)YPlane = (*pnext++ >> 1) & 0x7F7F7F7F;
  1029. YPlane += 4;
  1030. }
  1031. // The next two cases are mutually exclusive. If there is a width_diff,
  1032. // then there is no ywidth_diff. If there is a ywidth_diff, then there
  1033. // is no width_diff. Both width_diff and ywidth_diff may be zero.
  1034. if (width_diff) {
  1035. t = (*(YPlane-1)) << 24;
  1036. t |= (t>>8) | (t>>16) | (t>>24);
  1037. *(U32 *)YPlane = t;
  1038. if ((width_diff-4) > 0) {
  1039. *(U32 *)(YPlane + 4) = t;
  1040. }
  1041. if ((width_diff-8) > 0) {
  1042. *(U32 *)(YPlane + 8) = t;
  1043. }
  1044. YPlane += width_diff;
  1045. }
  1046. for (i = ywidth_diff; i > 0; i -= 4) {
  1047. *(U32 *)YPlane = 0; YPlane += 4;
  1048. }
  1049. }
  1050. // The next two cases are mutually exclusive. If there is a height_diff,
  1051. // then there is no yheight_diff. If there is a yheight_diff, then there
  1052. // is no height_diff. Both height_diff and yheight_diff may be zero.
  1053. if (height_diff) {
  1054. for (j = height_diff; j > 0; j-- ) {
  1055. plast = (U32 *)(YPlane - pitch);
  1056. for (i = widthx16; i > 0; i -= 4, YPlane += 4) {
  1057. *(U32 *)YPlane = *plast++;
  1058. }
  1059. YPlane += byte_ypitch_adj;
  1060. }
  1061. }
  1062. for (j = yheight_diff; j > 0; j--, YPlane += byte_ypitch_adj) {
  1063. for (i = widthx16; i > 0; i -= 4) {
  1064. *(U32 *)YPlane = 0; YPlane += 4;
  1065. }
  1066. }
  1067. // U Plane conversion.
  1068. p8next = (U8 *)pnext;
  1069. for (j = uvinput_height; j > 0; j--, UPlane += byte_uvpitch_adj) {
  1070. for (i = uvinput_width; (i & ~0x7); i-=8, UPlane+=8, p8next+=8) {
  1071. *(U32 *)UPlane = (*(U32 *)p8next >> 1) & 0x7F7F7F7F;
  1072. *(U32 *)(UPlane+4) = (*(U32 *)(p8next+4) >> 1) & 0x7F7F7F7F;
  1073. }
  1074. if (i & 0x4) {
  1075. *(U32 *)UPlane = (*(U32 *)p8next >> 1) & 0x7F7F7F7F;
  1076. UPlane += 4, p8next += 4;
  1077. }
  1078. if (i & 0x2) {
  1079. *(U16 *)UPlane = (*(U16 *)p8next >> 1) & 0x7F7F;
  1080. UPlane += 2, p8next += 2;
  1081. }
  1082. // The next two cases are mutually exclusive. If there is a width_diff,
  1083. // then there is no uvwidth_diff. If there is a uvwidth_diff, then there
  1084. // is no width_diff. Both width_diff and uvwidth_diff may be zero.
  1085. if (width_diff) {
  1086. t16 = (*(UPlane-1)) << 8;
  1087. t16 |= (t16>>8);
  1088. *(U16*)UPlane = t16; UPlane += 2;
  1089. if ((width_diff-4) > 0) {
  1090. *(U16*)UPlane = t16; UPlane += 2;
  1091. }
  1092. if ((width_diff-8) > 0) {
  1093. *(U16*)UPlane = t16; UPlane += 2;
  1094. }
  1095. }
  1096. for (i = uvwidth_diff; i > 0; i -= 4) {
  1097. *(U32 *)UPlane = 0x40404040; UPlane += 4;
  1098. }
  1099. }
  1100. // The next two cases are mutually exclusive. If there is a height_diff,
  1101. // then there is no uvheight_diff. If there is a uvheight_diff, then there
  1102. // is no height_diff. Both height_diff and uvheight_diff may be zero.
  1103. if (height_diff) {
  1104. for (j = (height_diff >> 1); j > 0; j--, UPlane += byte_uvpitch_adj ) {
  1105. plast = (U32 *)(UPlane - pitch);
  1106. for (i = (widthx16 >> 1); i > 0; i -= 4, UPlane += 4) {
  1107. *(U32 *)UPlane = *plast++;
  1108. }
  1109. }
  1110. }
  1111. for (j = uvheight_diff; j > 0; j--, UPlane += byte_uvpitch_adj) {
  1112. for (i = uvoutput_width; i > 0; i -= 4) {
  1113. *(U32 *)UPlane = 0x40404040; UPlane += 4;
  1114. }
  1115. }
  1116. // V Plane conversion.
  1117. for (j = uvinput_height; j > 0; j--, VPlane += byte_uvpitch_adj) {
  1118. for (i = uvinput_width; (i & ~0x7); i-=8, VPlane+=8, p8next+=8) {
  1119. *(U32 *)VPlane = (*(U32 *)p8next >> 1) & 0x7F7F7F7F;
  1120. *(U32 *)(VPlane+4) = (*(U32 *)(p8next+4) >> 1) & 0x7F7F7F7F;
  1121. }
  1122. if (i & 0x4) {
  1123. *(U32 *)VPlane = (*(U32 *)p8next >> 1) & 0x7F7F7F7F;
  1124. VPlane += 4, p8next += 4;
  1125. }
  1126. if (i & 0x2) {
  1127. *(U16 *)VPlane = (*(U16 *)p8next >> 1) & 0x7F7F;
  1128. VPlane += 2, p8next += 2;
  1129. }
  1130. // The next two cases are mutually exclusive. If there is a width_diff,
  1131. // then there is no uvwidth_diff. If there is a uvwidth_diff, then there
  1132. // is no width_diff. Both width_diff and uvwidth_diff may be zero.
  1133. if (width_diff) {
  1134. t16 = (*(VPlane-1)) << 8;
  1135. t16 |= (t16>>8);
  1136. *(U16*)VPlane = t16; VPlane += 2;
  1137. if ((width_diff-4) > 0) {
  1138. *(U16*)VPlane = t16; VPlane += 2;
  1139. }
  1140. if ((width_diff-8) > 0) {
  1141. *(U16*)VPlane = t16; VPlane += 2;
  1142. }
  1143. }
  1144. for (i = uvwidth_diff; i > 0; i -= 4) {
  1145. *(U32 *)VPlane = 0x40404040; VPlane += 4;
  1146. }
  1147. }
  1148. // The next two cases are mutually exclusive. If there is a height_diff,
  1149. // then there is no uvheight_diff. If there is a uvheight_diff, then there
  1150. // is no height_diff. Both height_diff and uvheight_diff may be zero.
  1151. if (height_diff) {
  1152. for (j = (height_diff >> 1); j > 0; j--, VPlane += byte_uvpitch_adj ) {
  1153. plast = (U32 *)(VPlane - pitch);
  1154. for (i = (widthx16 >> 1); i > 0; i -= 4, VPlane += 4) {
  1155. *(U32 *)VPlane = *plast++;
  1156. }
  1157. }
  1158. }
  1159. for (j = uvheight_diff; j > 0; j--, VPlane += byte_uvpitch_adj) {
  1160. for (i = uvoutput_width; i > 0; i -= 4) {
  1161. *(U32 *)VPlane = 0x40404040; VPlane += 4;
  1162. }
  1163. }
  1164. }
  1165. #endif // } H263P