Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

426 lines
14 KiB

  1. OPTION PROLOGUE: None
  2. OPTION EPILOGUE: ReturnAndRelieveEpilogueMacro
  3. .xlist
  4. include iammx.inc
  5. include memmodel.inc
  6. .list
  7. MMXCODE1 SEGMENT PARA USE32 PUBLIC 'CODE'
  8. MMXCODE1 ENDS
  9. MMXDATA1 SEGMENT PARA USE32 PUBLIC 'DATA'
  10. MMXDATA1 ENDS
  11. MMXCODE1 SEGMENT
  12. MMX_YUV12ToYUY2 proc DIST LANG PUBLIC,
  13. AuYPlane: DWORD,
  14. AuVPlane: DWORD,
  15. AuUPlane: DWORD,
  16. AuWidth: DWORD,
  17. AuHeight: DWORD,
  18. AuYPitch: DWORD,
  19. AUVPitch: DWORD,
  20. AbShapingFlag: DWORD,
  21. AuCCOutputBuffer: DWORD,
  22. AlOutput: DWORD,
  23. AuOffsetToLine0: DWORD,
  24. AintPitch: DWORD,
  25. ACCType: DWORD
  26. LocalFrameSize = 52
  27. RegisterStorageSize = 16 ; 4 registers pushed
  28. ; Argument offsets (after register pushed)
  29. uYPlane = LocalFrameSize + RegisterStorageSize + 4
  30. uVPlane = LocalFrameSize + RegisterStorageSize + 8
  31. uUPlane = LocalFrameSize + RegisterStorageSize + 12
  32. uWidth = LocalFrameSize + RegisterStorageSize + 16
  33. uHeight = LocalFrameSize + RegisterStorageSize + 20
  34. uYPitch = LocalFrameSize + RegisterStorageSize + 24
  35. uUVPitch = LocalFrameSize + RegisterStorageSize + 28
  36. bShapingFlag = LocalFrameSize + RegisterStorageSize + 32
  37. uCCOutputBuffer = LocalFrameSize + RegisterStorageSize + 36
  38. lOutput = LocalFrameSize + RegisterStorageSize + 40
  39. uOffsetToLine0 = LocalFrameSize + RegisterStorageSize + 44
  40. intPitch = LocalFrameSize + RegisterStorageSize + 48
  41. CCType = LocalFrameSize + RegisterStorageSize + 52
  42. ; Local offsets (after register pushes)
  43. ASMTMP1 = 48 ; 13
  44. Y = 44 ; 12
  45. U = 40 ; 11
  46. V = 36 ; 10
  47. Outt = 32 ; 9
  48. YTemp = 28 ; 8
  49. UTemp = 24 ; 7
  50. VTemp = 20 ; 6
  51. ASMTMP2 = 16 ; 5
  52. Col = 12 ; 4
  53. OutTemp = 8 ; 3
  54. VAL = 4 ; 2
  55. LineCount = 0 ; 1
  56. ; Arguments relative to esp
  57. _uYPlane EQU [esp + uYPlane]
  58. _uVPlane EQU [esp + uVPlane]
  59. _UUPlane EQU [esp + uUPlane]
  60. _uWidth EQU [esp + uWidth ]
  61. _uHeight EQU [esp + uHeight]
  62. _uYPitch EQU [esp + uYPitch]
  63. _uUVPitch EQU [esp + uUVPitch]
  64. _bShapingFlag EQU [esp + bShapingFlag]
  65. _uCCOutputBuffer EQU [esp + uCCOutputBuffer]
  66. _lOutput EQU [esp + lOutput]
  67. _uOffsetToLine0 EQU [esp + uOffsetToLine0]
  68. _intPitch EQU [esp + intPitch]
  69. _uCCType EQU [esp + CCType]
  70. ; Locals relative to esp
  71. _ASMTMP1 EQU [esp + ASMTMP1]
  72. _Y EQU [esp + Y]
  73. _U EQU [esp + U]
  74. _V EQU [esp + V]
  75. _Out EQU [esp + Outt]
  76. _YTemp EQU [esp + YTemp]
  77. _UTemp EQU [esp + UTemp]
  78. _VTemp EQU [esp + VTemp]
  79. _ASMTMP2 EQU [esp + ASMTMP2]
  80. _Col EQU [esp + Col]
  81. _OutTemp EQU [esp + OutTemp]
  82. _VAL EQU [esp + VAL]
  83. _LineCount EQU [esp + LineCount]
  84. ; Save registers and start working
  85. push ebx
  86. push esi
  87. push edi
  88. push ebp
  89. sub esp, LocalFrameSize
  90. mov eax, DWORD PTR _bShapingFlag ; eax = bShapingFlag
  91. mov ecx, DWORD PTR _uYPlane ; ecx = uYPlane
  92. dec eax ; eax = bShapingFlag - 1
  93. mov edx, DWORD PTR _uUPlane ; edx = uUPlane
  94. mov DWORD PTR _LineCount, eax ; eax = FREE, LineCount
  95. mov DWORD PTR _Y, ecx ; ecx = FREE, Y
  96. mov eax, DWORD PTR _uVPlane ; eax = uVPlane
  97. mov ecx, DWORD PTR _uOffsetToLine0 ; ecx = uOffsetToLine0
  98. mov DWORD PTR _U, edx ; edx = FREE, U
  99. add ecx, DWORD PTR _lOutput ; ecx = uOffsetToLine0 +
  100. mov DWORD PTR _V, eax ; eax = FREE, V
  101. mov eax, DWORD PTR _uCCOutputBuffer ; eax = uCCOutputBuffer
  102. add eax, ecx ; eax = uCCOutputBuffer +
  103. ; uOffsetToLine0 +
  104. ; lOutput
  105. ; ecx = FREE
  106. mov DWORD PTR _Out, eax ; eax = FREE, Out
  107. mov eax, DWORD PTR _uHeight ; eax = uHeight
  108. sar eax, 1 ; eax = uHeight/2
  109. mov DWORD PTR _ASMTMP2, eax ; eax = FREE, Row ready to
  110. ; count down
  111. RowLoop:; L27704 outer loop over all rows
  112. mov ecx, DWORD PTR _Y ; ecx = Y: ecx EQU YTemp
  113. mov edi, DWORD PTR _U ; edi = U: edi EQU UTemp
  114. mov ebp, DWORD PTR _V ; ebp = V: ebp EQU VTemp
  115. mov esi, DWORD PTR _Out ; esi = OutTemp
  116. mov eax, DWORD PTR _LineCount ; eax = LineCount
  117. test eax, eax ; is LineCount == 0? eax = FREE
  118. je SkipEvenRow ; L27708 loop if so, skip the even loop
  119. mov eax, DWORD PTR _uWidth ; eax = uWidth
  120. ; Due to the fact that YUV12 non-compressed input files can be
  121. ; any dimension that is a multiple of 4x4 up to CIF, we must
  122. ; check for extra bytes that can't be processed in the following
  123. ; loop. Here, we don't have the luxury of buffer padding to overrun
  124. ; the frame size.
  125. test eax, 0FFFFFFF0H
  126. jz L100
  127. EvenRowPels:; L27709 loop over columns in even row - two YUY2 pels at a time.
  128. movq mm0, [ecx] ; [ Y07 Y06 Y05 Y04 Y03 Y02 Y01 Y00 ]
  129. movq mm1, [edi] ; [ U07 U06 U05 U04 U03 U02 U01 U00 ]
  130. movq mm2, [ebp] ; [ V07 V06 V05 V04 V03 V02 V01 V00 ]
  131. movq mm3, mm1
  132. punpcklbw mm3, mm2 ; [ V03 U03 V02 U02 V01 U01 V00 U00 ]
  133. movq mm4, mm0
  134. punpcklbw mm4, mm3 ; [ V01 Y03 U01 Y02 V00 Y01 U00 Y00 ]
  135. movq [esi], mm4 ; Write out 8 data values.
  136. psrlq mm3, 32 ; [ 0 0 0 0 V03 U03 V02 U02 ]
  137. psrlq mm0, 32 ; [ 0 0 0 0 Y07 Y06 Y05 Y04 ]
  138. punpcklbw mm0, mm3 ; [ V03 Y07 U03 Y06 V02 Y05 U02 Y04 ]
  139. movq [esi+8], mm0 ; Write out 8 data values.
  140. movq mm0, [ecx+8] ; [ Y15 Y14 Y13 Y12 Y11 Y10 Y09 Y08 ]
  141. psrlq mm1, 32 ; [ 0 0 0 0 U07 U06 U05 U04 ]
  142. psrlq mm2, 32 ; [ 0 0 0 0 V07 V06 V05 V04 ]
  143. movq mm3, mm1
  144. punpcklbw mm3, mm2 ; [ V07 U07 V06 U06 V05 U05 V04 U04 ]
  145. movq mm4, mm0
  146. punpcklbw mm4, mm3 ; [ V05 Y11 U05 Y10 V04 Y09 U04 Y08 ]
  147. movq [esi+16], mm4 ; Write out 8 data values.
  148. psrlq mm3, 32 ; [ 0 0 0 0 V07 U07 V06 U06 ]
  149. psrlq mm0, 32 ; [ 0 0 0 0 Y15 Y14 Y13 Y12 ]
  150. punpcklbw mm0, mm3 ; [ V07 Y15 U07 Y14 V06 Y13 U06 Y12 ]
  151. movq [esi+24], mm0 ; Write out 8 data values.
  152. lea ecx, [ecx+16] ; Advance Y pointer.
  153. lea edi, [edi+8] ; Advance U pointer.
  154. lea ebp, [ebp+8] ; Advance V pointer.
  155. lea esi, [esi+32] ; Advance Out pointer.
  156. sub eax, 16
  157. test eax, 0FFFFFFF0H
  158. jnz EvenRowPels
  159. test eax, eax
  160. jz L101
  161. ; eax can be 4, 8 or 12
  162. L100:
  163. mov ebx, [ecx] ; [ Y03 Y02 Y01 Y00 ]
  164. mov dl, [edi] ; [ U00 ]
  165. mov dh, [ebp] ; [ V00 ]
  166. mov [esi], bl
  167. mov [esi+1], dl
  168. mov [esi+2], bh
  169. mov [esi+3], dh
  170. shr ebx, 16
  171. mov dl, [edi+1] ; [ U01 ]
  172. mov dh, [ebp+1] ; [ V01 ]
  173. mov [esi+4], bl
  174. mov [esi+5], dl
  175. mov [esi+6], bh
  176. mov [esi+7], dh
  177. sub eax, 4
  178. jz L101
  179. mov ebx, [ecx+4] ; [ Y07 Y06 Y05 Y04 ]
  180. mov dl, [edi+2] ; [ U02 ]
  181. mov dh, [ebp+2] ; [ V02 ]
  182. mov [esi+8], bl
  183. mov [esi+9], dl
  184. mov [esi+10], bh
  185. mov [esi+11], dh
  186. shr ebx, 16
  187. mov dl, [edi+3] ; [ U03 ]
  188. mov dh, [ebp+3] ; [ V03 ]
  189. mov [esi+12], bl
  190. mov [esi+13], dl
  191. mov [esi+14], bh
  192. mov [esi+15], dh
  193. sub eax, 4
  194. jz L101
  195. mov ebx, [ecx+8] ; [ Y11 Y10 Y09 Y08 ]
  196. mov dl, [edi+4] ; [ U04 ]
  197. mov dh, [ebp+4] ; [ V04 ]
  198. mov [esi+16], bl
  199. mov [esi+17], dl
  200. mov [esi+18], bh
  201. mov [esi+19], dh
  202. shr ebx, 16
  203. mov dl, [edi+5] ; [ U05 ]
  204. mov dh, [ebp+5] ; [ V05 ]
  205. mov [esi+20], bl
  206. mov [esi+21], dl
  207. mov [esi+22], bh
  208. mov [esi+23], dh
  209. L101:
  210. mov eax, DWORD PTR _LineCount ; eax = LineCount
  211. jmp SHORT UpdatePointers ; L27770
  212. SkipEvenRow:; L27708
  213. mov eax, DWORD PTR _bShapingFlag ; eax = bShapingFlag
  214. mov edx, DWORD PTR _Out ; edx = Out
  215. mov ebx, DWORD PTR _intPitch ; edx = intPitch
  216. sub edx, ebx ; edx = Out - intPitch
  217. mov DWORD PTR _Out, edx ; save Out
  218. UpdatePointers: ; L27770
  219. mov ecx, DWORD PTR _Y ; ecx = Y
  220. dec eax ; eax = LineCount-1 OR bShapingFlag - 1
  221. mov edx, DWORD PTR _intPitch ; edx = intPitch
  222. mov esi, DWORD PTR _Out ; esi = Out
  223. mov DWORD PTR _LineCount, eax ; store decremented linecount
  224. ; eax = FREE
  225. add esi, edx ; (esi) Out += intPitch ***
  226. mov eax, DWORD PTR _uYPitch ; eax = uYPitch
  227. mov edi, DWORD PTR _U ; edi = U ***
  228. add ecx, eax ; (ecx) Y += uYPitch ***
  229. mov ebp, DWORD PTR _V ; ebp = V ***
  230. mov DWORD PTR _Y, ecx ; store updated Y
  231. mov DWORD PTR _Out, esi ; store Out
  232. mov eax, DWORD PTR _LineCount ; eax = LineCount
  233. test eax, eax ; is LineCount == 0?
  234. ; if so, ignore the odd
  235. ; row loop over columns
  236. je SkipOddRow ; L27714
  237. mov eax, DWORD PTR _uWidth ; eax = uWidth
  238. ; Due to the fact that YUV12 non-compressed input files can be
  239. ; any dimension that is a multiple of 4x4 up to CIF, we must
  240. ; check for extra bytes that can't be processed in the following
  241. ; loop. Here, we don't have the luxury of buffer padding to overrun
  242. ; the frame size.
  243. test eax, 0FFFFFFF0H
  244. jz L102
  245. OddRowPels: ;L27715 loop over columns of odd rows
  246. movq mm0, [ecx] ; [ Y07 Y06 Y05 Y04 Y03 Y02 Y01 Y00 ]
  247. movq mm1, [edi] ; [ U07 U06 U05 U04 U03 U02 U01 U00 ]
  248. movq mm2, [ebp] ; [ V07 V06 V05 V04 V03 V02 V01 V00 ]
  249. movq mm3, mm1
  250. punpcklbw mm3, mm2 ; [ V03 U03 V02 U02 V01 U01 V00 U00 ]
  251. movq mm4, mm0
  252. punpcklbw mm4, mm3 ; [ V01 Y03 U01 Y02 V00 Y01 U00 Y00 ]
  253. movq [esi], mm4 ; Write out 8 data values.
  254. psrlq mm3, 32 ; [ 0 0 0 0 V03 U03 V02 U02 ]
  255. psrlq mm0, 32 ; [ 0 0 0 0 Y07 Y06 Y05 Y04 ]
  256. punpcklbw mm0, mm3 ; [ V03 Y07 U03 Y06 V02 Y05 U02 Y04 ]
  257. movq [esi+8], mm0 ; Write out 8 data values.
  258. movq mm0, [ecx+8] ; [ Y15 Y14 Y13 Y12 Y11 Y10 Y09 Y08 ]
  259. psrlq mm1, 32 ; [ 0 0 0 0 U07 U06 U05 U04 ]
  260. psrlq mm2, 32 ; [ 0 0 0 0 V07 V06 V05 V04 ]
  261. movq mm3, mm1
  262. punpcklbw mm3, mm2 ; [ V07 U07 V06 U06 V05 U05 V04 U04 ]
  263. movq mm4, mm0
  264. punpcklbw mm4, mm3 ; [ V05 Y11 U05 Y10 V04 Y09 U04 Y08 ]
  265. movq [esi+16], mm4 ; Write out 8 data values.
  266. psrlq mm3, 32 ; [ 0 0 0 0 V07 U07 V06 U06 ]
  267. psrlq mm0, 32 ; [ 0 0 0 0 Y15 Y14 Y13 Y12 ]
  268. punpcklbw mm0, mm3 ; [ V07 Y15 U07 Y14 V06 Y13 U06 Y12 ]
  269. movq [esi+24], mm0 ; Write out 8 data values.
  270. lea ecx, [ecx+16] ; Advance Y pointer.
  271. lea edi, [edi+8] ; Advance U pointer.
  272. lea ebp, [ebp+8] ; Advance V pointer.
  273. lea esi, [esi+32] ; Advance Out pointer.
  274. sub eax, 16
  275. test eax, 0FFFFFFF0H
  276. jnz OddRowPels
  277. test eax, eax
  278. jz L103
  279. ; eax can be 4, 8 or 12
  280. L102:
  281. mov ebx, [ecx] ; [ Y03 Y02 Y01 Y00 ]
  282. mov dl, [edi] ; [ U00 ]
  283. mov dh, [ebp] ; [ V00 ]
  284. mov [esi], bl
  285. mov [esi+1], dl
  286. mov [esi+2], bh
  287. mov [esi+3], dh
  288. shr ebx, 16
  289. mov dl, [edi+1] ; [ U01 ]
  290. mov dh, [ebp+1] ; [ V01 ]
  291. mov [esi+4], bl
  292. mov [esi+5], dl
  293. mov [esi+6], bh
  294. mov [esi+7], dh
  295. sub eax, 4
  296. jz L103
  297. mov ebx, [ecx+4] ; [ Y07 Y06 Y05 Y04 ]
  298. mov dl, [edi+2] ; [ U02 ]
  299. mov dh, [ebp+2] ; [ V02 ]
  300. mov [esi+8], bl
  301. mov [esi+9], dl
  302. mov [esi+10], bh
  303. mov [esi+11], dh
  304. shr ebx, 16
  305. mov dl, [edi+3] ; [ U03 ]
  306. mov dh, [ebp+3] ; [ V03 ]
  307. mov [esi+12], bl
  308. mov [esi+13], dl
  309. mov [esi+14], bh
  310. mov [esi+15], dh
  311. sub eax, 4
  312. jz L103
  313. mov ebx, [ecx+8] ; [ Y11 Y10 Y09 Y08 ]
  314. mov dl, [edi+4] ; [ U04 ]
  315. mov dh, [ebp+4] ; [ V04 ]
  316. mov [esi+16], bl
  317. mov [esi+17], dl
  318. mov [esi+18], bh
  319. mov [esi+19], dh
  320. shr ebx, 16
  321. mov dl, [edi+5] ; [ U05 ]
  322. mov dh, [ebp+5] ; [ V05 ]
  323. mov [esi+20], bl
  324. mov [esi+21], dl
  325. mov [esi+22], bh
  326. mov [esi+23], dh
  327. L103:
  328. mov eax, DWORD PTR _LineCount ; eax = LineCount
  329. jmp SHORT UpdateAllPointers ; L27771
  330. SkipOddRow: ;L27714
  331. mov eax, DWORD PTR _bShapingFlag ; eax = bShapingFlag
  332. mov edx, DWORD PTR _Out ; edx = Out
  333. mov ebx, DWORD PTR _intPitch ; edx = intPitch
  334. sub edx, ebx ; edx = Out - intPitch
  335. mov DWORD PTR _Out, edx ; save Out
  336. UpdateAllPointers: ; L27771 update pointers
  337. dec eax ; eax = LineCount-1 OR bShapingFlag - 1
  338. mov ecx, DWORD PTR _Y ; ecx = Y
  339. mov edx, DWORD PTR _intPitch ; edx = intPitch
  340. mov ebx, DWORD PTR _Out ; ebx = Out
  341. add ebx, edx ; ebx = Out + intPitch
  342. mov ebp, DWORD PTR _ASMTMP2 ; ebp = row loop counter
  343. mov DWORD PTR _LineCount, eax ; store updated LineCount
  344. mov DWORD PTR _Out, ebx ; store updated Out
  345. mov edx, DWORD PTR _uUVPitch ; edx = uUVPitch
  346. mov eax, DWORD PTR _U ; eax = U
  347. mov esi, DWORD PTR _V ; esi = V
  348. add eax, edx ; eax = U + uUVPitch
  349. add esi, edx ; esi = V + uUVPitch
  350. mov DWORD PTR _U, eax ; store updated U
  351. mov DWORD PTR _V, esi ; store updated V
  352. add ecx, DWORD PTR _uYPitch ; ecx = Y + uYPitch
  353. dec ebp ; decrement loop counter
  354. mov DWORD PTR _Y, ecx ; store updated Y
  355. mov DWORD PTR _ASMTMP2, ebp ; store updated loop counter
  356. jne RowLoop ; back to L27704 row loop
  357. CleanUp:
  358. add esp, LocalFrameSize ; restore esp to registers
  359. pop ebp
  360. pop edi
  361. pop esi
  362. pop ebx
  363. ret 52 ; 13*4 bytes of arguments
  364. MMX_YUV12ToYUY2 ENDP
  365. MMXCODE1 ENDS
  366. END