Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

859 lines
19 KiB

  1. /* *************************************************************************
  2. ** INTEL Corporation Proprietary Information
  3. **
  4. ** This listing is supplied under the terms of a license
  5. ** agreement with INTEL Corporation and may not be copied
  6. ** nor disclosed except in accordance with the terms of
  7. ** that agreement.
  8. **
  9. ** Copyright (c) 1995 Intel Corporation.
  10. ** All Rights Reserved.
  11. **
  12. ** *************************************************************************
  13. */
  14. #include "precomp.h"
  15. #if defined(H263P) || defined(USE_BILINEAR_MSH26X) // {
  16. //
  17. // For the P5 versions, the strategy is to compute the Y value for an odd RGB value
  18. // followed by computing the Y value for the corresponding even RGB value. The registers
  19. // are then set with the proper values to compute U and V values for the even RGB
  20. // value. This avoids repeating the shifting and masking needed to extract the Red,
  21. // Green and Blue components.
  22. //
  23. /*****************************************************************************
  24. *
  25. * H26X_CLUT4toYUV12()
  26. *
  27. * Convert from CLUT4 to YUV12 (YCrCb 4:2:0) and copy to destination memory
  28. * with pitch defined by the constant PITCH.
  29. *
  30. * This is needed to support the quickcam.
  31. */
  32. #if 0 // { 0
  33. void C_H26X_CLUT4toYUV12(
  34. LPBITMAPINFOHEADER lpbiInput,
  35. WORD OutputWidth,
  36. WORD OutputHeight,
  37. U8 *lpInput,
  38. U8 *YPlane,
  39. U8 *UPlane,
  40. U8 *VPlane,
  41. const int pitch)
  42. {
  43. U8 *p8next;
  44. U8 tm1, tm2, tm3, tm4;
  45. C_RGB_COLOR_CONVERT_INIT
  46. // The following assignment is here simply to avoid a warning message.
  47. t = t;
  48. // The palette may change with a new frame. Since we don't know when the palette
  49. // changes, we have to be conservative and compute it for each frame. However, this
  50. // should still be quicker than computing Y, U, and V for each pixel.
  51. Compute_YUVPalette(lpbiInput);
  52. for (j = 0, p8next = (U8 *)pnext; j < LumaIters; j++) {
  53. for (k = 0; k < mark; k++) {
  54. for (i = OutputWidth; (i & ~0x7); i-=8, YPlane+=8, p8next+=4) {
  55. tm1 = *p8next;
  56. tm2 = *(p8next+1);
  57. *(U32 *)YPlane =
  58. YUVPalette[(tm1>>4)&0xF].Yval |
  59. ((YUVPalette[tm1&0xF].Yval) << 8) |
  60. ((YUVPalette[(tm2>>4)&0xF].Yval) << 16) |
  61. ((YUVPalette[tm2&0xF].Yval) << 24);
  62. tm3 = *(p8next+2);
  63. tm4 = *(p8next+3);
  64. *(U32 *)(YPlane+4) =
  65. YUVPalette[(tm3>>4)&0xF].Yval |
  66. ((YUVPalette[tm3&0xF].Yval) << 8) |
  67. ((YUVPalette[(tm4>>4)&0xF].Yval) << 16) |
  68. ((YUVPalette[tm4&0xF].Yval) << 24);
  69. if (0 == (k&1)) {
  70. *(U32 *)UPlane =
  71. YUVPalette[(tm1>>4)&0xF].Uval |
  72. ((YUVPalette[(tm2>>4)&0xF].Uval) << 8) |
  73. ((YUVPalette[(tm3>>4)&0xF].Uval) << 16) |
  74. ((YUVPalette[(tm4>>4)&0xF].Uval) << 24);
  75. *(U32 *)VPlane =
  76. YUVPalette[(tm1>>4)&0xF].Vval |
  77. ((YUVPalette[(tm2>>4)&0xF].Vval) << 8) |
  78. ((YUVPalette[(tm3>>4)&0xF].Vval) << 16) |
  79. ((YUVPalette[(tm4>>4)&0xF].Vval) << 24);
  80. UPlane +=4; VPlane += 4;
  81. }
  82. }
  83. if (i & 0x4) {
  84. tm1 = *p8next++;
  85. tm2 = *p8next++;
  86. *(U32 *)YPlane =
  87. YUVPalette[(tm1>>4)&0xF].Yval |
  88. ((YUVPalette[tm1&0xF].Yval) << 8) |
  89. ((YUVPalette[(tm2>>4)&0xF].Yval) << 16) |
  90. ((YUVPalette[tm2&0xF].Yval) << 24);
  91. YPlane += 4;
  92. if (0 == (k&1)) {
  93. *(U16 *)UPlane =
  94. YUVPalette[(tm1>>4)&0xF].Uval |
  95. ((YUVPalette[(tm2>>4)&0xF].Uval) << 8);
  96. *(U16 *)VPlane =
  97. YUVPalette[(tm1>>4)&0xF].Vval |
  98. ((YUVPalette[(tm2>>4)&0xF].Vval) << 8);
  99. UPlane += 2; VPlane += 2;
  100. }
  101. }
  102. C_WIDTH_FILL
  103. if (stretch && (0 == k) && j) {
  104. for (i = OutputWidth; i > 0; i -= 8) {
  105. tm = ((*pyprev++ & 0xFEFEFEFE) >> 1);
  106. tm += ((*pynext++ & 0xFEFEFEFE) >> 1);
  107. *pyspace++ = tm;
  108. tm = ((*pyprev++ & 0xFEFEFEFE) >> 1);
  109. tm += ((*pynext++ & 0xFEFEFEFE) >> 1);
  110. *pyspace++ = tm;
  111. }
  112. }
  113. p8next += (BackTwoLines << 2);
  114. YPlane += byte_ypitch_adj;
  115. // Increment after even lines.
  116. if(0 == (k&1)) {
  117. UPlane += byte_uvpitch_adj;
  118. VPlane += byte_uvpitch_adj;
  119. }
  120. }
  121. C_HEIGHT_FILL
  122. if (stretch) {
  123. pyprev = (U32 *)(YPlane - pitch);
  124. pyspace = (U32 *)YPlane;
  125. pynext = (U32 *)(YPlane += pitch);
  126. }
  127. }
  128. if (stretch) {
  129. for (i = OutputWidth; i > 0; i -= 4) {
  130. *pyspace++ = *pyprev++;
  131. }
  132. }
  133. } // end of H26X_CLUT4toYUV12()
  134. #endif // } 0
  135. __declspec(naked)
  136. void P5_H26X_CLUT4toYUV12(
  137. LPBITMAPINFOHEADER lpbiInput,
  138. WORD OutputWidth,
  139. WORD OutputHeight,
  140. U8 *lpInput,
  141. U8 *YPlane,
  142. U8 *UPlane,
  143. U8 *VPlane,
  144. const int pitch)
  145. {
  146. // Permanent (callee-save) registers - ebx, esi, edi, ebp
  147. // Temporary (caller-save) registers - eax, ecx, edx
  148. //
  149. // Stack frame layout
  150. // | pitch | +136
  151. // | VPlane | +132
  152. // | UPlane | +128
  153. // | YPlane | +124
  154. // | lpInput | +120
  155. // | OutputHeight | +116
  156. // | OutputWidth | +112
  157. // | lpbiInput | +108
  158. // ----------------------------
  159. // | return addr | +104
  160. // | saved ebp | +100
  161. // | saved ebx | + 96
  162. // | saved esi | + 92
  163. // | saved edi | + 88
  164. // | output_width | + 84
  165. // | pyprev | + 80
  166. // | pyspace | + 76
  167. // | pynext | + 72
  168. // | puvprev | + 68
  169. // | puvspace | + 64
  170. // | i | + 60
  171. // | j | + 56
  172. // | k | + 52
  173. // | BackTwoLines | + 48
  174. // | widthx16 | + 44
  175. // | heightx16 | + 40
  176. // | width_diff | + 36
  177. // | height_diff | + 32
  178. // | width_adj | + 28
  179. // | height_adj | + 24
  180. // | stretch | + 20
  181. // | aspect | + 16
  182. // | LumaIters | + 12
  183. // | mark | + 8
  184. // | byte_ypitch_adj | + 4
  185. // | byte_uvpitch_adj | + 0
  186. #define LOCALSIZE 88
  187. #define PITCH_PARM 136
  188. #define VPLANE 132
  189. #define UPLANE 128
  190. #define YPLANE 124
  191. #define LP_INPUT 120
  192. #define OUTPUT_HEIGHT_WORD 116
  193. #define OUTPUT_WIDTH_WORD 112
  194. #define LPBI_INPUT 108
  195. #define OUTPUT_WIDTH 84
  196. #define PYPREV 80
  197. #define PYSPACE 76
  198. #define PYNEXT 72
  199. #define PUVPREV 68
  200. #define PUVSPACE 64
  201. #define LOOP_I 60
  202. #define LOOP_J 56
  203. #define LOOP_K 52
  204. #define BACK_TWO_LINES 48
  205. #define WIDTHX16 44
  206. #define HEIGHTX16 40
  207. #define WIDTH_DIFF 36
  208. #define HEIGHT_DIFF 32
  209. #define WIDTH_ADJ 28
  210. #define HEIGHT_ADJ 24
  211. #define STRETCH 20
  212. #define ASPECT 16
  213. #define LUMA_ITERS 12
  214. #define MARK 8
  215. #define BYTE_YPITCH_ADJ 4
  216. #define BYTE_UVPITCH_ADJ 0
  217. _asm {
  218. push ebp
  219. push ebx
  220. push esi
  221. push edi
  222. sub esp, LOCALSIZE
  223. // int width_diff = 0
  224. // int height_diff = 0
  225. // int width_adj = 0
  226. // int height_adj = 0
  227. // int stretch = 0
  228. // int aspect = 0
  229. xor eax, eax
  230. mov [esp + WIDTH_DIFF], eax
  231. mov [esp + HEIGHT_DIFF], eax
  232. mov [esp + WIDTH_ADJ], eax
  233. mov [esp + HEIGHT_ADJ], eax
  234. mov [esp + STRETCH], eax
  235. mov [esp + ASPECT], eax
  236. // int LumaIters = 1
  237. inc eax
  238. mov [esp + LUMA_ITERS], eax
  239. // int mark = OutputHeight
  240. // int output_width = OutputWidth
  241. // int byte_ypitch_adj = pitch - OutputWidth
  242. // int byte_uvpitch_adj = pitch - (OutputWidth >> 1)
  243. xor ebx, ebx
  244. mov bx, [esp + OUTPUT_HEIGHT_WORD]
  245. mov [esp + MARK], ebx
  246. mov bx, [esp + OUTPUT_WIDTH_WORD]
  247. mov [esp + OUTPUT_WIDTH], ebx
  248. mov ecx, [esp + PITCH_PARM]
  249. mov edx, ecx
  250. sub ecx, ebx
  251. mov [esp + BYTE_YPITCH_ADJ], ecx
  252. shr ebx, 1
  253. sub edx, ebx
  254. mov [esp + BYTE_UVPITCH_ADJ], edx
  255. // if (lpbiInput->biHeight > OutputHeight)
  256. mov ebx, [esp + LPBI_INPUT]
  257. mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
  258. xor edx, edx
  259. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  260. cmp ecx, edx
  261. jle Lno_stretch
  262. // for (LumaIters = 0, i = OutputHeight; i > 0; i -= 48) LumaIters += 4
  263. xor ecx, ecx
  264. Lrepeat48:
  265. lea ecx, [ecx + 4]
  266. sub edx, 48
  267. jnz Lrepeat48
  268. mov [esp + LUMA_ITERS], ecx
  269. // aspect = LumaIters
  270. mov [esp + ASPECT], ecx
  271. // width_adj = (lpbiInput->biWidth - OutputWidth) >> 1
  272. // width_adj *= lpbiInput->biBitCount
  273. // width_adj >>= 3
  274. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  275. mov edx, [esp + OUTPUT_WIDTH]
  276. sub ecx, edx
  277. shr ecx, 1
  278. xor edx, edx
  279. mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
  280. imul ecx, edx
  281. shr ecx, 3
  282. mov [esp + WIDTH_ADJ], ecx
  283. // height_adj = (lpbiInput->biHeight - (OutputHeight - aspect)) >> 1
  284. mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
  285. xor edx, edx
  286. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  287. sub ecx, edx
  288. add ecx, [esp + ASPECT]
  289. shr ecx, 1
  290. mov [esp + HEIGHT_ADJ], ecx
  291. // stretch = 1
  292. // mark = 11
  293. mov ecx, 1
  294. mov edx, 11
  295. mov [esp + STRETCH], ecx
  296. mov [esp + MARK], edx
  297. jmp Lif_done
  298. Lno_stretch:
  299. // widthx16 = (lpbiInput->biWidth + 0xF) & ~0xF
  300. // width_diff = widthx16 - OutputWidth
  301. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  302. add ecx, 00FH
  303. and ecx, 0FFFFFFF0H
  304. mov [esp + WIDTHX16], ecx
  305. mov edx, [esp + OUTPUT_WIDTH]
  306. sub ecx, edx
  307. mov [esp + WIDTH_DIFF], ecx
  308. // byte_ypitch_adj -= width_diff
  309. mov edx, [esp + BYTE_YPITCH_ADJ]
  310. sub edx, ecx
  311. mov [esp + BYTE_YPITCH_ADJ], edx
  312. // byte_uvpitch_adj -= (width_diff >> 1)
  313. mov edx, [esp + BYTE_UVPITCH_ADJ]
  314. shr ecx, 1
  315. sub edx, ecx
  316. mov [esp + BYTE_UVPITCH_ADJ], edx
  317. // heightx16 = (lpbiInput->biHeight + 0xF) & ~0xF
  318. // height_diff = heightx16 - OutputHeight
  319. mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight
  320. add ecx, 00FH
  321. and ecx, 0FFFFFFF0H
  322. mov [esp + HEIGHTX16], ecx
  323. xor edx, edx
  324. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  325. sub ecx, edx
  326. mov [esp + HEIGHT_DIFF], ecx
  327. Lif_done:
  328. // BackTwoLines = -(lpbiInput->biWidth + OutputWidth);
  329. // BackTwoLines *= lpbiInput->biBitCount
  330. // BackTwoLines >>= 3
  331. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  332. mov edx, [esp + OUTPUT_WIDTH]
  333. add ecx, edx
  334. neg ecx
  335. xor edx, edx
  336. mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
  337. imul ecx, edx
  338. sar ecx, 3
  339. mov [esp + BACK_TWO_LINES], ecx
  340. // pnext = (U32 *)(lpInput +
  341. // (((lpbiInput->biWidth * lpbiInput->biBitCount) >> 3)) *
  342. // ((OutputHeight - aspect - 1) + height_adj)) +
  343. // width_adj)
  344. // assign (esi, pnext)
  345. mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth
  346. xor edx, edx
  347. mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount
  348. imul ecx, edx
  349. shr ecx, 3
  350. xor edx, edx
  351. mov dx, [esp + OUTPUT_HEIGHT_WORD]
  352. sub edx, [esp + ASPECT]
  353. dec edx
  354. add edx, [esp + HEIGHT_ADJ]
  355. imul ecx, edx
  356. add ecx, [esp + WIDTH_ADJ]
  357. add ecx, [esp + LP_INPUT]
  358. mov esi, ecx
  359. // Compute_YUVPalette(lpbiInput)
  360. mov eax, [esp + LPBI_INPUT]
  361. push eax
  362. call Compute_YUVPalette
  363. pop eax
  364. // assign (edi, YPlane)
  365. mov edi, [esp + YPLANE]
  366. // for (j = 0; j < LumaIters; j++)
  367. xor eax, eax
  368. mov [esp + LOOP_J], eax
  369. // for (k = 0; k < mark; k++)
  370. L4:
  371. xor eax, eax
  372. mov [esp + LOOP_K], eax
  373. // for (i = OutputWidth; i > 0; i -= 2, pnext += 4)
  374. L5:
  375. mov ebp, [esp + OUTPUT_WIDTH]
  376. // This jump is here to make sure the following loop starts on the U pipe
  377. jmp L6
  378. L6:
  379. // tm1 = *p8next
  380. // tm2 = *(p8next+1);
  381. // *(U32 *)YPlane =
  382. // YUVPalette[(tm1>>4)&0xF].Yval |
  383. // ((YUVPalette[tm1&0xF].Yval) << 8) |
  384. // ((YUVPalette[(tm2>>4)&0xF].Yval) << 16) |
  385. // ((YUVPalette[tm2&0xF].Yval) << 24)
  386. mov ax, [esi]
  387. nop
  388. mov ebx, eax
  389. mov ecx, eax
  390. shr ecx, 12
  391. mov edx, eax
  392. shr eax, 4
  393. and ebx, 0xF
  394. shr edx, 8
  395. and eax, 0xF
  396. and ecx, 0xF
  397. and edx, 0xF
  398. mov al, [YUVPalette+eax*4].Yval
  399. mov bl, [YUVPalette+ebx*4].Yval
  400. shl ebx, 8
  401. mov cl, [YUVPalette+ecx*4].Yval
  402. shl ecx, 16
  403. mov dl, [YUVPalette+edx*4].Yval
  404. shl edx, 24
  405. or eax, ebx
  406. or eax, ecx
  407. mov ebx, [esp + LOOP_K]
  408. or eax, edx
  409. and ebx, 1
  410. mov [edi], eax
  411. jnz Lno_luma
  412. // tm1 = *p8next
  413. // tm2 = *(p8next+1);
  414. // *(U32 *)UPlane =
  415. // YUVPalette[(tm1>>4)&0xF].Uval |
  416. // ((YUVPalette[(tm2>>4)&0xF].Uval) << 8)
  417. // *(U32 *)VPlane =
  418. // YUVPalette[(tm1>>4)&0xF].Vval |
  419. // ((YUVPalette[(tm2>>4)&0xF].Vval) << 8)
  420. // UPlane += 2
  421. // VPlane += 2
  422. mov ax, [esi]
  423. nop
  424. mov ecx, eax
  425. and eax, 0xF0
  426. shr eax, 4
  427. mov ebx, [esp + UPLANE]
  428. shr ecx, 12
  429. mov ax, [YUVPalette+eax*4].UVval
  430. and ecx, 0xF
  431. mov edx, [esp + VPLANE]
  432. add ebx, 2
  433. mov cx, [YUVPalette+ecx*4].UVval
  434. add edx, 2
  435. mov [ebx - 2], al
  436. mov [esp + UPLANE], ebx
  437. mov [edx - 2], ah
  438. mov [esp + VPLANE], edx
  439. mov [ebx - 1], cl
  440. mov [edx - 1], ch
  441. nop
  442. Lno_luma:
  443. // p8next += 2
  444. // YPlane += 4
  445. lea esi, [esi + 2]
  446. lea edi, [edi + 4]
  447. sub ebp, 4
  448. jnz L6
  449. // Assembler version of C_WIDTH_DIFF
  450. // if (width_diff)
  451. mov eax, [esp + WIDTH_DIFF]
  452. mov edx, eax
  453. test eax, eax
  454. jz Lno_width_diff
  455. // tm = (*(YPlane-1)) << 24
  456. // tm |= (tm>>8) | (tm>>16) | (tm>>24)
  457. mov bl, [edi - 1]
  458. shl ebx, 24
  459. mov ecx, ebx
  460. shr ebx, 8
  461. or ecx, ebx
  462. shr ebx, 8
  463. or ecx, ebx
  464. shr ebx, 8
  465. or ecx, ebx
  466. // *(U32 *)YPlane = tm
  467. mov [edi], ecx
  468. // if ((width_diff-4) > 0)
  469. sub eax, 4
  470. jz Lupdate_YPlane
  471. // *(U32 *)(YPlane + 4) = tm
  472. mov [edi + 4], ecx
  473. sub eax, 4
  474. // if ((width_diff-8) > 0)
  475. jz Lupdate_YPlane
  476. // *(U32 *)(YPlane + 8) = tm
  477. mov [edi + 8], ecx
  478. Lupdate_YPlane:
  479. // YPlane += width_diff
  480. lea edi, [edi + edx]
  481. ///if (0 == (k&1))
  482. mov eax, [esp + LOOP_K]
  483. test eax, 1
  484. jnz Lno_width_diff
  485. // t8u = *(UPlane-1)
  486. // t8v = *(VPlane-1)
  487. // *UPlane++ = t8u
  488. // *UPlane++ = t8u
  489. // *VPlane++ = t8v
  490. // *VPlane++ = t8v
  491. mov ebp, edx
  492. mov eax, [esp + UPLANE]
  493. mov ebx, [esp + VPLANE]
  494. mov cl, [eax - 1]
  495. mov ch, [ebx - 1]
  496. mov [eax], cl
  497. mov [eax + 1], cl
  498. mov [ebx], ch
  499. mov [ebx + 1], ch
  500. // if ((width_diff-4) > 0)
  501. sub ebp, 4
  502. jz Lupdate_UVPlane
  503. // *UPlane++ = t8u
  504. // *UPlane++ = t8u
  505. // *VPlane++ = t8v
  506. // *VPlane++ = t8v
  507. mov [eax + 2], cl
  508. mov [eax + 3], cl
  509. mov [ebx + 2], ch
  510. mov [ebx + 3], ch
  511. // if ((width_diff-8) > 0)
  512. sub ebp, 4
  513. jz Lupdate_UVPlane
  514. // *UPlane++ = t8u
  515. // *UPlane++ = t8u
  516. // *VPlane++ = t8v
  517. // *VPlane++ = t8v
  518. mov [eax + 4], cl
  519. mov [eax + 5], cl
  520. mov [ebx + 4], ch
  521. mov [ebx + 5], ch
  522. Lupdate_UVPlane:
  523. shr edx, 1
  524. lea eax, [eax + edx]
  525. mov [esp + UPLANE], eax
  526. lea ebx, [ebx + edx]
  527. mov [esp + VPLANE], ebx
  528. Lno_width_diff:
  529. // if (stretch && (0 == k) && j)
  530. mov eax, [esp + STRETCH]
  531. test eax, eax
  532. jz L14
  533. mov eax, [esp + LOOP_K]
  534. test eax, eax
  535. jnz L14
  536. mov eax, [esp + LOOP_J]
  537. test eax, eax
  538. jz L14
  539. // spill YPlane ptr
  540. mov [esp + YPLANE], edi
  541. nop
  542. // for (i = OutputWidth; i > 0; i -= 8)
  543. // assign (ebx, pyprev)
  544. // assign (ecx, t)
  545. // assign (edx, pynext)
  546. // assign (edi, pyspace)
  547. // assign (ebp, i)
  548. // make sure offsets are such that there are no bank conflicts here
  549. mov ebx, [esp + PYPREV]
  550. mov edi, [esp + PYSPACE]
  551. mov edx, [esp + PYNEXT]
  552. mov ebp, [esp + OUTPUT_WIDTH]
  553. // t = (*pyprev++ & 0xFEFEFEFE) >> 1
  554. // t += (*pynext++ & 0xFEFEFEFE) >> 1
  555. // *pyspace++ = t
  556. // t = (*pyprev++ & 0xFEFEFEFE) >> 1
  557. // t += (*pynext++ & 0xFEFEFEFE) >> 1
  558. // *pyspace++ = t
  559. L15:
  560. // 1
  561. mov eax, [ebx]
  562. lea ebx, [ebx + 4]
  563. // 2
  564. mov ecx, [edx]
  565. lea edx, [edx + 4]
  566. // 3
  567. shr ecx, 1
  568. and eax, 0xFEFEFEFE
  569. // 4
  570. shr eax, 1
  571. and ecx, 0x7F7F7F7F
  572. // 5
  573. add eax, ecx
  574. mov ecx, [ebx]
  575. // 6
  576. shr ecx, 1
  577. mov [edi], eax
  578. // 7
  579. mov eax, [edx]
  580. and ecx, 0x7F7F7F7F
  581. // 8
  582. shr eax, 1
  583. lea edi, [edi + 4]
  584. // 9
  585. and eax, 0x7F7F7F7F
  586. lea ebx, [ebx + 4]
  587. // 10
  588. lea edx, [edx + 4]
  589. add eax, ecx
  590. // 11
  591. mov [edi], eax
  592. lea edi, [edi + 4]
  593. // 12
  594. sub ebp, 8
  595. jnz L15
  596. // kill (ebx, pyprev)
  597. // kill (ecx, t)
  598. // kill (edx, pynext)
  599. // kill (edi, pyspace)
  600. // kill (ebp, i)
  601. // restore YPlane
  602. mov edi, [esp + YPLANE]
  603. // pnext += BackTwoLines
  604. L14:
  605. add esi, [esp + BACK_TWO_LINES]
  606. // YPlane += byte_ypitch_adj;
  607. add edi, [esp + BYTE_YPITCH_ADJ]
  608. // if(0 == (k&1))
  609. mov eax, [esp + LOOP_K]
  610. and eax, 1
  611. jnz L16
  612. // UPlane += byte_uvpitch_adj;
  613. // VPlane += byte_uvpitch_adj;
  614. mov eax, [esp + BYTE_UVPITCH_ADJ]
  615. add [esp + UPLANE], eax
  616. add [esp + VPLANE], eax
  617. L16:
  618. inc DWORD PTR [esp + LOOP_K]
  619. mov eax, [esp + LOOP_K]
  620. cmp eax, [esp + MARK]
  621. jl L5
  622. // if (stretch)
  623. cmp DWORD PTR [esp + STRETCH], 0
  624. je L17
  625. // pyprev = YPlane - pitch
  626. mov eax, edi
  627. sub eax, [esp + PITCH_PARM]
  628. mov [esp + PYPREV], eax
  629. // pyspace = YPlane
  630. mov [esp + PYSPACE], edi
  631. // pynext = (YPlane += pitch)
  632. add edi, [esp + PITCH_PARM]
  633. mov [esp + PYNEXT], edi
  634. L17:
  635. inc DWORD PTR [esp + LOOP_J]
  636. mov eax, [esp + LOOP_J]
  637. cmp eax, [esp + LUMA_ITERS]
  638. jl L4
  639. // kill (esi, pnext)
  640. // kill (edi, YPlane)
  641. // ASM version of C_HEIGHT_FILL
  642. // if (height_diff)
  643. mov eax, [esp + HEIGHT_DIFF]
  644. test eax, eax
  645. jz Lno_height_diff
  646. // pyspace = (U32 *)YPlane
  647. mov esi, edi
  648. // pyprev = (U32 *)(YPlane - pitch)
  649. sub esi, [esp + PITCH_PARM]
  650. // for (j = height_diff; j > 0; j--)
  651. Lheight_yfill_loop:
  652. mov ebx, [esp + WIDTHX16]
  653. // for (i = widthx16; i>0; i -=4)
  654. Lheight_yfill_row:
  655. // *pyspace++ = *pyprev++
  656. mov ecx, [esi]
  657. lea esi, [esi + 4]
  658. mov [edi], ecx
  659. lea edi, [edi + 4]
  660. sub ebx, 4
  661. jnz Lheight_yfill_row
  662. // pyspace += word_ypitch_adj
  663. // pyprev += word_ypitch_adj
  664. add esi, [esp + BYTE_YPITCH_ADJ]
  665. add edi, [esp + BYTE_YPITCH_ADJ]
  666. dec eax
  667. jnz Lheight_yfill_loop
  668. mov eax, [esp + HEIGHT_DIFF]
  669. mov edi, [esp + UPLANE]
  670. // puvspace = (U32 *)UPlane
  671. mov esi, edi
  672. // puvprev = (U32 *)(UPlane - pitch)
  673. sub esi, [esp + PITCH_PARM]
  674. // for (j = height_diff; j > 0; j -= 2)
  675. Lheight_ufill_loop:
  676. mov ebx, [esp + WIDTHX16]
  677. // for (i = widthx16; i>0; i -= 8)
  678. Lheight_ufill_row:
  679. // *puvspace++ = *puvprev++
  680. mov ecx, [esi]
  681. mov [edi], ecx
  682. lea esi, [esi + 4]
  683. lea edi, [edi + 4]
  684. sub ebx, 8
  685. jnz Lheight_ufill_row
  686. // puvspace += word_uvpitch_adj
  687. // puvprev += word_uvpitch_adj
  688. add esi, [esp + BYTE_UVPITCH_ADJ]
  689. add edi, [esp + BYTE_UVPITCH_ADJ]
  690. sub eax, 2
  691. jnz Lheight_ufill_loop
  692. mov eax, [esp + HEIGHT_DIFF]
  693. mov edi, [esp + VPLANE]
  694. // puvspace = (U32 *)VPlane
  695. mov esi, edi
  696. // puvprev = (U32 *)(VPlane - pitch)
  697. sub esi, [esp + PITCH_PARM]
  698. // for (j = height_diff; j > 0; j -= 2)
  699. Lheight_vfill_loop:
  700. mov ebx, [esp + WIDTHX16]
  701. // for (i = widthx16; i>0; i -= 8)
  702. Lheight_vfill_row:
  703. // *puvspace++ = *puvprev++
  704. mov ecx, [esi]
  705. mov [edi], ecx
  706. lea esi, [esi + 4]
  707. lea edi, [edi + 4]
  708. sub ebx, 8
  709. jnz Lheight_vfill_row
  710. // puvspace += word_uvpitch_adj
  711. // puvprev += word_uvpitch_adj
  712. add esi, [esp + BYTE_UVPITCH_ADJ]
  713. add edi, [esp + BYTE_UVPITCH_ADJ]
  714. sub eax, 2
  715. jnz Lheight_vfill_loop
  716. Lno_height_diff:
  717. // if (stretch)
  718. mov esi, [esp + PYPREV]
  719. cmp DWORD PTR [esp + STRETCH], 0
  720. je L19
  721. // for (i = OutputWidth; i > 0; i -= 4)
  722. // assign (esi, pyprev)
  723. // assign (edi, pyspace)
  724. // assign (ebp, i)
  725. mov ebp, [esp + OUTPUT_WIDTH]
  726. mov edi, [esp + PYSPACE]
  727. L18:
  728. mov ecx, [esi]
  729. lea esi, [esi + 4]
  730. mov [edi], ecx
  731. lea edi, [edi + 4]
  732. sub ebp, 4
  733. jnz L18
  734. // kill (esi, pyprev)
  735. // kill (edi, pyspace)
  736. // kill (ebp, i)
  737. L19:
  738. add esp, LOCALSIZE
  739. pop edi
  740. pop esi
  741. pop ebx
  742. pop ebp
  743. ret
  744. }
  745. }
  746. #undef LOCALSIZE
  747. #undef PITCH_PARM
  748. #undef VPLANE
  749. #undef UPLANE
  750. #undef YPLANE
  751. #undef LP_INPUT
  752. #undef OUTPUT_HEIGHT_WORD
  753. #undef OUTPUT_WIDTH_WORD
  754. #undef LPBI_INPUT
  755. #undef OUTPUT_WIDTH
  756. #undef PYPREV
  757. #undef PYSPACE
  758. #undef PYNEXT
  759. #undef PUVPREV
  760. #undef PUVSPACE
  761. #undef LOOP_I
  762. #undef LOOP_J
  763. #undef LOOP_K
  764. #undef BACK_TWO_LINES
  765. #undef WIDTHX16
  766. #undef HEIGHTX16
  767. #undef WIDTH_DIFF
  768. #undef HEIGHT_DIFF
  769. #undef WIDTH_ADJ
  770. #undef HEIGHT_ADJ
  771. #undef STRETCH
  772. #undef ASPECT
  773. #undef LUMA_ITERS
  774. #undef MARK
  775. #undef BYTE_YPITCH_ADJ
  776. #undef BYTE_UVPITCH_ADJ
  777. #endif // } H263P