Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

454 lines
23 KiB

  1. ;-------------------------------------------------------------------------
  2. ; INTEL Corporation Proprietary Information
  3. ;
  4. ; This listing is supplied under the terms of a license
  5. ; agreement with INTEL Corporation and may not be copied
  6. ; nor disclosed except in accordance with the terms of
  7. ; that agreement.
  8. ;
  9. ; Copyright (c) 1996 Intel Corporation.
  10. ; All Rights Reserved.
  11. ;
  12. ;-------------------------------------------------------------------------
  13. ; $Header: S:\h26x\src\dec\cxm12321.asv 1.4 24 May 1996 10:30:20 AGUPTA2 $
  14. ; $Log: S:\h26x\src\dec\cxm12321.asv $
  15. ;//
  16. ;// Rev 1.4 24 May 1996 10:30:20 AGUPTA2
  17. ;// Cosmetic changes to adhere to a common coding convention in all
  18. ;// MMX color convertor files.
  19. ;//
  20. ;//
  21. ;// Rev 1.3 11 Apr 1996 09:51:14 RMCKENZX
  22. ;// Changed return to pop the stack.
  23. ;//
  24. ;// Rev 1.2 09 Apr 1996 17:15:30 RMCKENZX
  25. ;// Optimized.
  26. ;//
  27. ;// Rev 1.1 09 Apr 1996 09:50:32 RMCKENZX
  28. ;// Added aspect correction, fixed wrap-around, changed calling sequence.
  29. ;//
  30. ;// Rev 1.0 06 Apr 1996 17:06:06 RMCKENZX
  31. ;// Initial revision.
  32. ;
  33. ;-------------------------------------------------------------------------
  34. ;
  35. ; +---------- Color convertor.
  36. ; |+--------- For both H261 and H263.
  37. ; ||+-------- MMx Version.
  38. ; |||++------ Convert from YUV12.
  39. ; |||||++---- Convert to RGB32.
  40. ; |||||||+--- Zoom by one, i.e. non-zoom.
  41. ; ||||||||
  42. ; cxm12321 -- This function performs YUV12-to-RGB32 color conversion for H26x.
  43. ; It handles the format in which the low order byte is B, the
  44. ; second byte is G, and the third byte is R, and the high order
  45. ; byte is 0.
  46. ;
  47. ; The YUV12 input is planar, 8 bits per pel. The Y plane may have
  48. ; a pitch of up to 768. It may have a width less than or equal
  49. ; to the pitch. It must be DWORD aligned, and preferably QWORD
  50. ; aligned. Pitch and Width must be a multiple of 8. The U
  51. ; and V planes may have a different pitch than the Y plane, subject
  52. ; to the same limitations.
  53. ;
  54. OPTION CASEMAP:NONE
  55. OPTION PROLOGUE:None
  56. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  57. .586
  58. .xlist
  59. include iammx.inc
  60. include memmodel.inc
  61. .list
  62. MMXCODE1 SEGMENT PARA USE32 PUBLIC 'CODE'
  63. MMXCODE1 ENDS
  64. MMXDATA1 SEGMENT PARA USE32 PUBLIC 'DATA'
  65. MMXDATA1 ENDS
  66. MMXDATA1 SEGMENT
  67. ALIGN 8
  68. ;
  69. ; constants for direct RGB calculation: 4x10.6 values
  70. ; chroma constants are multiplied by 64 (6 fraction bits) and 255/224 (scale).
  71. ; luma constant is 64 * (255/219) = 74.55055, so it is dithered.
  72. ;
  73. PUBLIC C VtR, VtG, UtG, UtB, Ymul0, Ymul1, Ysub, UVsub
  74. VtR DWORD 00660066h, 00660066h ; 1.402 -> 102.14571
  75. VtG DWORD 0ffccffcch, 0ffccffcch ; -.71414 -> -52.03020
  76. UtG DWORD 0ffe7ffe7h, 0ffe7ffe7h ; -.34414 -> -25.07306
  77. UtB DWORD 00810081h, 00810081h ; 1.772 -> 129.10286
  78. Ymul0 DWORD 004a004bh, 004a004bh ; 74.55055
  79. Ymul1 DWORD 004b004ah, 004b004ah ; 74.55055
  80. Ysub DWORD 00100010h, 00100010h ; bias for y
  81. UVsub DWORD 00800080h, 00800080h ; bias for uv
  82. MMXDATA1 ENDS
  83. MMXCODE1 SEGMENT
  84. MMX_YUV12ToRGB32 PROC DIST LANG PUBLIC,
  85. AYPlane: DWORD,
  86. AVPlane: DWORD,
  87. AUPlane: DWORD,
  88. AFrameWidth: DWORD,
  89. AFrameHeight: DWORD,
  90. AYPitch: DWORD,
  91. AVPitch: DWORD,
  92. AAspectAdjustmentCnt: DWORD,
  93. AColorConvertedFrame: DWORD,
  94. ADCIOffset: DWORD,
  95. ACCOffsetToLine0: DWORD,
  96. ACCOPitch: DWORD,
  97. ACCType: DWORD
  98. LocalSize = 20h ; for 7 local variables
  99. RegisterSize = 10h ; for the 4 push/pops
  100. StashSize = 1200h ; 768 (max width) * 6
  101. LocalFrameSize = LocalSize + StashSize
  102. FrameAdjustOne = 800h
  103. FrameAdjustTwo = LocalFrameSize - FrameAdjustOne
  104. argument_base EQU ebp + RegisterSize
  105. local_base EQU esp
  106. stash_base EQU esp + LocalSize
  107. ; Arguments:
  108. YPlane EQU argument_base + 04h
  109. VPlane EQU argument_base + 08h
  110. UPlane EQU argument_base + 0ch
  111. FrameWidth EQU argument_base + 10h
  112. FrameHeight EQU argument_base + 14h
  113. LumaPitch EQU argument_base + 18h
  114. ChromaPitch EQU argument_base + 1ch
  115. AspectAdjustmentCount EQU argument_base + 20h
  116. ColorConvertedFrame EQU argument_base + 24h
  117. DCIOffset EQU argument_base + 28h
  118. CCOffsetToLine0 EQU argument_base + 2ch
  119. CCOPitch EQU argument_base + 30h
  120. ; Locals (on local stack frame)
  121. localAspectCount EQU local_base + 00h
  122. localAspectAdjustment EQU local_base + 04h
  123. localWidth EQU local_base + 08h
  124. localYPitch EQU local_base + 0ch
  125. localUVPitch EQU local_base + 10h
  126. localOutPitch EQU local_base + 14h
  127. localStashEsp EQU local_base + 18h
  128. ; symbolic register names for shuffle segments
  129. mmx_zero EQU mm0 ; mmx_zero
  130. push esi
  131. push edi
  132. push ebp
  133. push ebx
  134. mov ebp, esp
  135. and esp, -32 ; align to cache-line size
  136. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  137. ; Initialize: 'x'=live, '-'=dead, 'o'=live(ALU op)
  138. ; esi eax ebx ecx edx edi ebp
  139. pxor mmx_zero, mmx_zero ;
  140. sub esp, FrameAdjustOne ;
  141. mov edi, [CCOPitch] ; x
  142. mov ecx, [ChromaPitch] ; x |
  143. mov ebx, [esp] ; | - | |
  144. sub esp, FrameAdjustTwo ; | | |
  145. mov eax, [LumaPitch] ; x | |
  146. nop
  147. mov [localStashEsp], ebp ; | | |
  148. mov [localOutPitch], edi ; | | -
  149. mov [localUVPitch], ecx ; | -
  150. mov [localYPitch], eax ; -
  151. mov eax, [AspectAdjustmentCount] ; x
  152. mov edi, [ColorConvertedFrame] ; | x
  153. mov [localAspectCount], eax ; |
  154. mov esi, [FrameWidth] ; x |
  155. mov ebx, [DCIOffset] ; | | x |
  156. mov edx, [CCOffsetToLine0] ; | | | x |
  157. add edi, ebx ; | | - | o
  158. add edi, edx ; | | - o
  159. mov [localAspectAdjustment], eax ; | - |
  160. mov eax, [YPlane] ; | x |
  161. lea edi, [edi+4*esi] ; | | o RGB plane base
  162. mov ecx, [UPlane] ; | | x |
  163. mov edx, [VPlane] ; | | | x |
  164. mov ebx, [FrameHeight] ; | | x | | | Outer loop control
  165. sar esi, 1 ; o | | | | |
  166. xor ebp, ebp ; | | | | | | +
  167. add ecx, esi ; + | | o | | | U plane base
  168. add edx, esi ; + | | | o | | V plane base
  169. lea eax, [eax+2*esi] ; + o | | | | | Y plane base
  170. sub ebp, esi ; - | | | | | o Inner loop control
  171. mov [localWidth], ebp ; + | | | | | |
  172. xor esi, esi ; x | | | | | | Stash pointer
  173. ; v v v v v v v
  174. ; esi eax ebx ecx esi edi ebp
  175. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  176. ;
  177. ; ALGORITHM:
  178. ; The following outer loop (do_two_lines) does two lines of Y (sharing
  179. ; one line of UV) per iteration. It contains two inner loops.
  180. ;
  181. ; The first inner loop (do_next_even_line) does 8 pels of the even line
  182. ; per iteration and stashes the chroma contribution on the stack.
  183. ;
  184. ; The second inner loop (do_next_odd_line) reads the stashed chroma and
  185. ; does 8 pels of the odd line per iteration.
  186. ;
  187. ; Aspect Adjustment is accomplished by skipping the second inner loop
  188. ; if needed.
  189. ;
  190. ; CORE REGISTERS:
  191. ; (all registers are pre-loaded):
  192. ; eax Y plane base address.
  193. ; ebx outer loop control. Starts at Height, runs down to 0.
  194. ; ecx U plane base address.
  195. ; edx V plane base address.
  196. ; esi stash pointer.
  197. ; edi output RGB plane base address.
  198. ; ebp inner loop control. Starts at -Width/2, runs up to 0.
  199. ;
  200. ; All plane base addresses are previously biased by Width (y plane),
  201. ; Width/2 (uv plane), or 4*Width (rgb plane) and are used in conjunction
  202. ; with the inner loop control, ebp. The base addresses are updated after
  203. ; the first inner loop (Y/U/V/RGB), and after the second inner loop (Y/RGB).
  204. ;
  205. ; The stash pointer is referenced in chromaC (with esp). It is updated
  206. ; inside each inner loop and reset to 0 after each inner loop.
  207. ;
  208. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  209. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  210. ; start outer loop
  211. ; start first inner loop
  212. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  213. do_two_lines:
  214. do_next_even_line:
  215. movd mm3, [ecx+ebp] ; ...3.... xxxxxxxx U76 U54 U32 U10
  216. ;
  217. movd mm4, [edx+ebp] ; ...34... xxxxxxxx V76 V54 V32 V10
  218. punpcklbw mm3, mmx_zero ; ...34... .U76 .U54 .U32 .U10
  219. psubw mm3, UVsub ; ...34... unbias U (sub 128)
  220. punpcklbw mm4, mmx_zero ; ...34... .V76 .V54 .V32 .V10
  221. psubw mm4, UVsub ; ...34... unbias V (sub 128)
  222. movq mm1, mm3 ; .1.34... .U76 .U54 .U32 .U10
  223. pmullw mm3, UtG ; .1.34... .G76 .G54 .G32 .G10 (from U)
  224. movq mm2, mm4 ; .1234... .V76 .V54 .V32 .V10
  225. pmullw mm4, VtG ; .1234... .G76 .G54 .G32 .G10 (from V)
  226. ;
  227. movq mm6, [eax+2*ebp] ; .123..6. Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
  228. ;
  229. movq mm7, mm6 ; .123..67 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
  230. punpcklbw mm6, mmx_zero ; .123..67 ..Y3 ..Y2 ..Y1 ..Y0
  231. psubw mm6, Ysub ; .123..67 unbias Y (sub 16) & clip at 0
  232. punpckhbw mm7, mmx_zero ; .123..67 ..Y7 ..Y6 ..Y5 ..Y4
  233. psubw mm7, Ysub ; .123..67 unbias Y (sub 16) & clip at 0
  234. paddsw mm3, mm4 ; .123..67 .G76 .G54 .G32 .G10 (from chroma)
  235. pmullw mm6, Ymul0 ; .123..67 RGB3 RGB2 RGB1 RGB0 (from luma)
  236. movq mm5, mm3 ; .123.567 .G76 .G54 .G32 .G10 (from chroma)
  237. pmullw mm7, Ymul0 ; .123.567 RGB7 RGB6 RGB5 RGB4 (from luma)
  238. punpcklwd mm3, mm3 ; .123.567 ..G3 ..G2 ..G1 ..G0 (from chroma)
  239. pmullw mm1, UtB ; .123.567 .B76 .B54 .B32 .B10 (from U)
  240. punpckhwd mm5, mm5 ; .123.567 ..G7 ..G6 ..G5 ..G4 (from chroma)
  241. movq [stash_base+esi+00h], mm3 ; .123.567 stash low green from chroma
  242. paddsw mm3, mm6 ; .123.567 ..G3 ..G2 ..G1 ..G0 (scaled total)
  243. movq [stash_base+esi+08h], mm5 ; .123.567 stash high green from chroma
  244. paddsw mm5, mm7 ; .123.567 ..G7 ..G6 ..G5 ..G4 (scaled total)
  245. movq mm4, mm1 ; .1234567 .B76 .B54 .B32 .B10 (from U)
  246. psraw mm3, 6 ; .1234567 ..G3 ..G2 ..G1 ..G0 (total)
  247. pmullw mm2, VtR ; .1234567 .R76 .R54 .R32 .R10 (from V)
  248. psraw mm5, 6 ; .1234567 ..G7 ..G6 ..G5 ..G4 (total)
  249. packuswb mm3, mm5 ; .1234.67 G7 G6 G5 G4 G3 G2 G1 G0
  250. movq mm5, mm2 ; .1234567 .R76 .R54 .R32 .R10 (from V)
  251. ; -------- green done --------
  252. punpcklwd mm1, mm1 ; .1234567 ..B3 ..B2 ..B1 ..B0 (from U)
  253. ;
  254. punpckhwd mm4, mm4 ; .1234567 ..B7 ..B6 ..B5 ..B4 (from U)
  255. ;
  256. movq [stash_base+esi+10h], mm1 ; .1234567 stash low blue from chroma
  257. punpcklwd mm2, mm2 ; .1234567 ..R3 ..R2 ..R1 ..R0 (from V)
  258. movq [stash_base+esi+18h], mm4 ; .1234567 stash high blue from chroma
  259. punpckhwd mm5, mm5 ; .1234567 ..R7 ..R6 ..R5 ..R4 (from V)
  260. paddsw mm1, mm6 ; .1234567 ..B3 ..B2 ..B1 ..B0 (scaled total)
  261. paddsw mm4, mm7 ; .1234567 ..B7 ..B6 ..B5 ..B4 (scaled total)
  262. movq [stash_base+esi+20h], mm2 ; .1234567 stash low red from chroma
  263. psraw mm1, 6 ; .1234567 ..B3 ..B2 ..B1 ..B0 (total)
  264. movq [stash_base+esi+28h], mm5 ; .1234567 stash high red from chroma
  265. psraw mm4, 6 ; .1234567 ..B7 ..B6 ..B5 ..B4 (total)
  266. paddsw mm2, mm6 ; .12345.7 ..R3 ..R2 ..R1 ..R0 (total scaled)
  267. packuswb mm1, mm4 ; .123.5.7 B7 B6 B5 B4 B3 B2 B1 B0
  268. ; -------- blue done --------
  269. paddsw mm5, mm7 ; .123.5.. ..R7 ..R6 ..R5 ..R4 (total scaled)
  270. psraw mm2, 6 ; .123.5.. ..R3 ..R2 ..R1 ..R0 (total)
  271. psraw mm5, 6 ; .123.5.. ..R7 ..R6 ..R5 ..R4 (total)
  272. ;
  273. packuswb mm2, mm5 ; .123.... R7 R6 R5 R4 R3 R2 R1 R0
  274. ; ; -------- red done --------
  275. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  276. ; shuffle up the results:
  277. ; red = mm2
  278. ; green = mm4
  279. ; blue = mm1
  280. ; into red-green-blue order and store
  281. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  282. movq mm5, mm1 ; .123.5.. blue copy
  283. punpcklbw mm1, mm3 ; .123.5.. G3 B3 G2 B2 G1 B1 G0 B0
  284. movq mm4, mm2 ; .12345.. red copy
  285. punpcklbw mm2, mmx_zero ; .12345.. -- R3 -- R2 -- R1 -- R0
  286. movq mm6, mm1 ; .123456. G3 B3 G2 B2 G1 B1 G0 B0
  287. punpcklwd mm1, mm2 ; .123456. R1 G1 B1 -- R0 G0 B0
  288. punpckhwd mm6, mm2 ; .1.3456. -- R3 G3 B3 -- R2 G2 B2
  289. ;
  290. movq [edi+8*ebp+00], mm1 ; ...3456. write first two pels
  291. punpckhbw mm5, mm3 ; ....456. G7 B7 G6 B6 G5 B5 G4 B4
  292. movq [edi+8*ebp+08], mm6 ; ....45.. write second two pels
  293. punpckhbw mm4, mmx_zero ; ....45.. -- R7 -- R6 -- R5 -- R4
  294. movq mm7, mm5 ; ....45.7 G7 B7 G6 B6 G5 B5 G4 B4
  295. punpcklwd mm5, mm4 ; ....45.7 -- R5 G5 B5 -- R4 G4 B4
  296. punpckhwd mm7, mm4 ; .....5.7 -- R7 G7 B7 -- R6 G6 B6
  297. add esi, 30h ; increment stash pointer
  298. movq [edi+8*ebp+16], mm5 ; .......7 write third two pels
  299. ;
  300. movq [edi+8*ebp+24], mm7 ; ........ write fourth two pels
  301. ;
  302. add ebp, 4 ; increment loop control
  303. jl do_next_even_line ; back up if not done
  304. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  305. ; end do next even line loop
  306. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  307. ; add pitches to base plane addresses and check aspect
  308. mov ebp, [localOutPitch]
  309. mov esi, [localUVPitch]
  310. add edi, ebp ; update RGB plane base address
  311. add edx, esi ; update V plane base address
  312. add ecx, esi ; update U plane base address
  313. mov esi, [localYPitch]
  314. add eax, esi ; update Y plane base address
  315. mov ebp, [localAspectCount]
  316. sub ebp, 2
  317. jle skip_odd_line
  318. mov [localAspectCount], ebp ; store aspect count
  319. mov ebp, [localWidth] ; load inner loop control
  320. xor esi, esi ; reset stash pointer
  321. ;
  322. movq mm7, Ymul1 ; pre-load Y scaling factor to mm7
  323. ;
  324. ;
  325. ; start odd line loop
  326. ;
  327. do_next_odd_line:
  328. movq mm3, [eax+2*ebp] ; ...3.... Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
  329. ;
  330. movq mm4, mm3 ; ...34... Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
  331. punpcklbw mm3, mmx_zero ; ...34... ..Y3 ..Y2 ..Y1 ..Y0
  332. psubw mm3, Ysub ; ...34... unbias Y
  333. punpckhbw mm4, mmx_zero ; ...34... ..Y7 ..Y6 ..Y5 ..Y4
  334. psubw mm4, Ysub ; ...34... unbias Y
  335. pmullw mm3, mm7 ; ...34... RGB3 RGB2 RGB1 RGB0 (from luma)
  336. movq mm2, [stash_base+esi+20h] ; ..234... ..R3 ..R2 ..R1 ..R0 (from V)
  337. pmullw mm4, mm7 ; ...34... RGB7 RGB6 RGB5 RGB4 (from luma)
  338. movq mm5, [stash_base+esi+28h] ; ..2345.. ..R7 ..R6 ..R5 ..R4 (from V)
  339. paddsw mm2, mm3 ; ..2345.. ..R3 ..R2 ..R1 ..R0 (scaled total)
  340. movq mm1, [stash_base+esi+10h] ; .12345.. ..B3 ..B2 ..B1 ..B0 (from U)
  341. paddsw mm5, mm4 ; .12345.. ..R7 ..R6 ..R5 ..R4 (scaled total)
  342. movq mm6, [stash_base+esi+18h] ; .123456. ..B7 ..B6 ..B5 ..B4 (from U)
  343. psraw mm2, 6 ; .123456. ..R3 ..R2 ..R1 ..R0 (total)
  344. paddsw mm1, mm3 ; .123456. ..B3 ..B2 ..B1 ..B0 (scaled total)
  345. psraw mm5, 6 ; .123456. ..R7 ..R6 ..R5 ..R4 (total)
  346. paddsw mm6, mm4 ; .123456. ..B7 ..B6 ..B5 ..B4 (scaled total)
  347. packuswb mm2, mm5 ; .1234.6. R7 R6 R5 R4 R3 R2 R1 R0
  348. ; -------- red done --------
  349. paddsw mm3, [stash_base+esi+00h] ; .1234.6. ..G3 ..G2 ..G1 ..G0 (scaled total)
  350. psraw mm1, 6 ; .1234.6. ..B3 ..B2 ..B1 ..B0 (total)
  351. paddsw mm4, [stash_base+esi+08h] ; .1234.6. ..G7 ..G6 ..G5 ..G4 (scaled total)
  352. psraw mm6, 6 ; .1234.6. ..B7 ..B6 ..B5 ..B4 (total)
  353. packuswb mm1, mm6 ; .1234... B7 B6 B5 B4 B3 B2 B1 B0
  354. ; ; -------- blue done --------
  355. psraw mm3, 6 ; .1234... ..G3 ..G2 ..G1 ..G0 (total)
  356. ;
  357. psraw mm4, 6 ; .1234... ..G7 ..G6 ..G5 ..G4 (total)
  358. ;
  359. packuswb mm3, mm4 ; .123.... G7 G6 G5 G4 G3 G2 G1 G0
  360. ; ; -------- green done --------
  361. ;
  362. ; shuffle up the results:
  363. ; red = mm2
  364. ; green = mm3
  365. ; blue = mm1
  366. ; into red-green-blue order and store
  367. ;
  368. movq mm5, mm1 ; .123.5.. blue copy
  369. punpcklbw mm1, mm3 ; .123.5.. G3 B3 G2 B2 G1 B1 G0 B0
  370. movq mm4, mm2 ; .12345.. red copy
  371. punpcklbw mm2, mmx_zero ; .12345.. -- R3 -- R2 -- R1 -- R0
  372. movq mm6, mm1 ; .123456. G3 B3 G2 B2 G1 B1 G0 B0
  373. punpcklwd mm1, mm2 ; .123456. R1 G1 B1 -- R0 G0 B0
  374. punpckhwd mm6, mm2 ; .1.3456. -- R3 G3 B3 -- R2 G2 B2
  375. ;
  376. movq [edi+8*ebp+00], mm1 ; ...3456. write first two pels
  377. punpckhbw mm5, mm3 ; ....456. G7 B7 G6 B6 G5 B5 G4 B4
  378. movq [edi+8*ebp+08], mm6 ; ....45.. write second two pels
  379. punpckhbw mm4, mmx_zero ; ....45.. -- R7 -- R6 -- R5 -- R4
  380. movq mm1, mm5 ; .1..45.. G7 B7 G6 B6 G5 B5 G4 B4
  381. punpcklwd mm5, mm4 ; .1..45.. -- R5 G5 B5 -- R4 G4 B4
  382. punpckhwd mm1, mm4 ; .1...5.. -- R7 G7 B7 -- R6 G6 B6
  383. add esi, 30h ; increment stash pointer
  384. movq [edi+8*ebp+16], mm5 ; .1...... write third two pels
  385. ;
  386. movq [edi+8*ebp+24], mm1 ; ........ write fourth two pels
  387. ;
  388. add ebp, 4 ; increment loop control
  389. jl do_next_odd_line ; back up if not done
  390. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  391. ; end do next odd line loop
  392. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  393. mov ebp, [localYPitch]
  394. mov esi, [localOutPitch]
  395. add eax, ebp ; update Y plane base address
  396. add edi, esi ; update RGB plane base address
  397. mov ebp, [localWidth] ; load inner loop control
  398. xor esi, esi ; reset stash pointer
  399. sub ebx, 2 ; decrement outer loop control
  400. jg do_two_lines
  401. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  402. ; end do two lines loop
  403. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  404. finish:
  405. mov esp, [localStashEsp]
  406. ;
  407. pop ebx
  408. pop ebp
  409. pop edi
  410. pop esi
  411. ret 52
  412. skip_odd_line:
  413. add eax, esi ; update Y plane base address
  414. mov esi, [localAspectAdjustment]
  415. add ebp, esi ; reset aspect adjustment count
  416. xor esi, esi ; reset stash pointer
  417. mov [localAspectCount], ebp ; store aspect count
  418. mov ebp, [localWidth] ; load inner loop control
  419. sub ebx, 2 ; decrement outer loop control
  420. jg do_two_lines ; back up if not done
  421. ; else go home
  422. mov esp, [localStashEsp]
  423. ;
  424. pop ebx
  425. pop ebp
  426. pop edi
  427. pop esi
  428. ret
  429. MMX_YUV12ToRGB32 ENDP
  430. MMXCODE1 ENDS
  431. END