Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

312 lines
7.5 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. ;//
  14. ;// $Header: S:\h26x\src\dec\cx512yuv.asv 1.5 30 Dec 1996 20:02:08 MDUDA $
  15. ;//
  16. ;// $Log: S:\h26x\src\dec\cx512yuv.asv $
  17. ;//
  18. ;// Rev 1.5 30 Dec 1996 20:02:08 MDUDA
  19. ;// Fixed problem where buffer boundaries were being over-written.
  20. ;//
  21. ;// Rev 1.4 11 Dec 1996 14:58:52 JMCVEIGH
  22. ;//
  23. ;// Changed to support width the are multiples of 4.
  24. ;//
  25. ;// Rev 1.3 18 Jul 1996 12:52:58 KLILLEVO
  26. ;// changed cache heating to speed things up a bit
  27. ;//
  28. ;// Rev 1.2 18 Jul 1996 09:39:34 KLILLEVO
  29. ;//
  30. ;// added PVCS header and log
  31. ;; Very straightforward implementation of the YUV pitch changer
  32. ;; Does 16 pels at a time. If the width is not a multiple of 16
  33. ;; the remainder pels are handled as a special case. We assume
  34. ;; that the width is at least a multiple of 4
  35. OPTION PROLOGUE: None
  36. OPTION EPILOGUE: ReturnAndRelieveEpilogueMacro
  37. .xlist
  38. include memmodel.inc
  39. .list
  40. .DATA
  41. ; any data would go here
  42. .CODE
  43. ASSUME cs: FLAT
  44. ASSUME ds: FLAT
  45. ASSUME es: FLAT
  46. ASSUME fs: FLAT
  47. ASSUME gs: FLAT
  48. ASSUME ss: FLAT
  49. PUBLIC YUV12ToYUV
  50. YUV12ToYUV proc DIST LANG AuYPlane: DWORD,
  51. AuVPlane: DWORD,
  52. AuUPlane: DWORD,
  53. AuWidth: DWORD,
  54. AuHeight: DWORD,
  55. AuYPitch: DWORD,
  56. AUVPitch: DWORD,
  57. AbShapingFlag: DWORD,
  58. AuCCOutputBuffer: DWORD,
  59. AlOutput: DWORD,
  60. AuOffsetToLine0: DWORD,
  61. AintPitch: DWORD,
  62. ACCType: DWORD
  63. LocalFrameSize = 12
  64. RegisterStorageSize = 16 ; 4 registers pushed
  65. ; Argument offsets (after register pushed)
  66. uYPlane = LocalFrameSize + RegisterStorageSize + 4
  67. uVPlane = LocalFrameSize + RegisterStorageSize + 8
  68. uUPlane = LocalFrameSize + RegisterStorageSize + 12
  69. uWidth = LocalFrameSize + RegisterStorageSize + 16
  70. uHeight = LocalFrameSize + RegisterStorageSize + 20
  71. uYPitch = LocalFrameSize + RegisterStorageSize + 24
  72. uUVPitch = LocalFrameSize + RegisterStorageSize + 28
  73. bShapingFlag = LocalFrameSize + RegisterStorageSize + 32
  74. uCCOutputBuffer = LocalFrameSize + RegisterStorageSize + 36
  75. lOutput = LocalFrameSize + RegisterStorageSize + 40
  76. uOffsetToLine0 = LocalFrameSize + RegisterStorageSize + 44
  77. intPitch = LocalFrameSize + RegisterStorageSize + 48
  78. CCType = LocalFrameSize + RegisterStorageSize + 52
  79. ; Local offsets (after register pushes)
  80. LineAdd = 0 ; 1
  81. LineWidth = 4 ; 2
  82. ; Arguments relative to esp
  83. _uYPlane EQU [esp + uYPlane]
  84. _uVPlane EQU [esp + uVPlane]
  85. _UUPlane EQU [esp + uUPlane]
  86. _uWidth EQU [esp + uWidth ]
  87. _uHeight EQU [esp + uHeight]
  88. _uYPitch EQU [esp + uYPitch]
  89. _uUVPitch EQU [esp + uUVPitch]
  90. _bShapingFlag EQU [esp + bShapingFlag]
  91. _uCCOutputBuffer EQU [esp + uCCOutputBuffer]
  92. _lOutput EQU [esp + lOutput]
  93. _uOffsetToLine0 EQU [esp + uOffsetToLine0]
  94. _intPitch EQU [esp + intPitch]
  95. _uCCType EQU [esp + CCType]
  96. ; Locals relative to esp
  97. _LineAdd EQU [esp + LineAdd]
  98. _LineWidth EQU [esp + LineWidth]
  99. _uRemainderEdgePels EQU [esp + uRemainderEdgePels]
  100. ; Save registers and start working
  101. push ebx
  102. push esi
  103. push edi
  104. push ebp
  105. sub esp, LocalFrameSize
  106. mov eax, _uCCOutputBuffer
  107. add eax, _uOffsetToLine0
  108. mov ecx, _lOutput
  109. add eax, ecx
  110. mov ebx, _uYPitch
  111. mov ecx, _uWidth
  112. mov esi, _uYPlane
  113. mov edi, eax
  114. ; luma
  115. sub ebx, ecx ; ebx = pitch - width
  116. mov edx, _uHeight
  117. mov eax, _uWidth
  118. mov _LineAdd, ebx
  119. L2:
  120. test ecx, 0FFFFFFF0H
  121. jz LEdgePels ; Width may be less than 16
  122. L1:
  123. mov ebx, DWORD PTR [edi] ; heat cache
  124. add edi, 16
  125. mov eax, DWORD PTR [esi + 0]
  126. mov ebx, DWORD PTR [esi + 4]
  127. mov DWORD PTR [edi - 16], eax
  128. mov DWORD PTR [edi - 12], ebx
  129. mov eax, DWORD PTR [esi + 8]
  130. mov ebx, DWORD PTR [esi +12]
  131. mov DWORD PTR [edi - 8], eax
  132. mov DWORD PTR [edi - 4], ebx
  133. add esi, 16
  134. sub ecx, 16
  135. test ecx, 0FFFFFFF0H
  136. jnz L1
  137. LEdgePels:
  138. ; Do edge pels is needed (if width a multiple of 4, but not 16)
  139. ; Check 8 edge pels
  140. test ecx, 08H
  141. jz Lchk4
  142. mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
  143. mov ebx, DWORD PTR [esi + 4] ; Input pels 4-7
  144. mov DWORD PTR [edi + 0], eax ; Output pels 0-3
  145. mov DWORD PTR [edi + 4], ebx ; Output pels 4-7
  146. add esi, 8
  147. add edi, 8
  148. Lchk4:
  149. ; Check 4 edge pels
  150. test ecx, 04H
  151. jz L2_cont
  152. mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
  153. add esi, 4
  154. mov DWORD PTR [edi + 0], eax ; Output pels 0-3
  155. add edi, 4
  156. L2_cont:
  157. add esi, _LineAdd
  158. mov ecx, _uWidth
  159. dec edx
  160. jnz L2
  161. ; chroma
  162. mov esi, _uUPlane
  163. mov ecx, _uWidth
  164. shr ecx, 1
  165. mov ebx, _uUVPitch
  166. sub ebx, ecx ; ebx = pitch - width/2
  167. mov edx, _uHeight
  168. shr edx, 1
  169. mov _LineAdd, ebx
  170. mov _uWidth, ecx
  171. mov _uHeight, edx
  172. U2:
  173. test ecx, 0FFFFFFF8H
  174. jz UEdgePels ; Width may be less than 16
  175. U1:
  176. mov ebx, DWORD PTR [edi] ; heat cache
  177. add edi, 8
  178. mov eax, DWORD PTR [esi + 0]
  179. mov ebx, DWORD PTR [esi + 4]
  180. mov DWORD PTR [edi - 8], eax
  181. mov DWORD PTR [edi - 4], ebx
  182. add esi, 8
  183. sub ecx, 8
  184. test ecx, 0FFFFFFF8H
  185. jnz U1
  186. UEdgePels:
  187. ; Do edge pels is needed (if width a multiple of 4, but not 16)
  188. ; Check 4 edge pels
  189. test ecx, 04H
  190. jz Uchk4
  191. mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
  192. add esi, 4
  193. mov DWORD PTR [edi + 0], eax ; Output pels 0-3
  194. add edi, 4
  195. Uchk4:
  196. ; Check 2 edge pels
  197. test ecx, 02H
  198. jz U2_cont
  199. mov ax, WORD PTR [esi + 0] ; Input pels 0-3
  200. add esi, 2
  201. mov WORD PTR [edi + 0], ax ; Output pels 0-3
  202. add edi, 2
  203. U2_cont:
  204. add esi, _LineAdd
  205. mov ecx, _uWidth
  206. dec edx
  207. jnz U2
  208. ; chroma
  209. mov esi, _uVPlane
  210. mov ecx, _uWidth
  211. mov edx, _uHeight
  212. nop
  213. V2:
  214. test ecx, 0FFFFFFF8H
  215. jz UEdgePels ; Width may be less than 16
  216. V1:
  217. mov ebx, DWORD PTR [edi] ; heat cache
  218. add edi, 8
  219. mov eax, DWORD PTR [esi + 0]
  220. mov ebx, DWORD PTR [esi + 4]
  221. mov DWORD PTR [edi - 8], eax
  222. mov DWORD PTR [edi - 4], ebx
  223. add esi, 8
  224. sub ecx, 8
  225. test ecx, 0FFFFFFF8H
  226. jnz V1
  227. VEdgePels:
  228. ; Do edge pels is needed (if width a multiple of 4, but not 16)
  229. ; Check 4 edge pels
  230. test ecx, 04H
  231. jz Vchk4
  232. mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
  233. add esi, 4
  234. mov DWORD PTR [edi + 0], eax ; Output pels 0-3
  235. add edi, 4
  236. Vchk4:
  237. ; Check 2 edge pels
  238. test ecx, 02H
  239. jz V2_cont
  240. mov ax, WORD PTR [esi + 0] ; Input pels 0-3
  241. add esi, 2
  242. mov WORD PTR [edi + 0], ax ; Output pels 0-3
  243. add edi, 2
  244. V2_cont:
  245. add esi, _LineAdd
  246. mov ecx, _uWidth
  247. dec edx
  248. jnz V2
  249. add esp, LocalFrameSize ; restore esp to registers
  250. pop ebp
  251. pop edi
  252. pop esi
  253. pop ebx
  254. ret 52 ; 13*4 bytes of arguments
  255. YUV12ToYUV ENDP
  256. END