Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

303 lines
8.5 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. ;//
  14. ;//
  15. ;////////////////////////////////////////////////////////////////////////////
  16. ; yuv12enc -- This function performs "color conversion" in the H26X decoder for
  17. ; consumption by the H26X encoder. This entails reformatting the decoder's
  18. ; YVU data into the shape required by the encoder - including YUV order. It
  19. ; Also includes 7-bit pels.
  20. ; $Header: S:\h26x\src\dec\yuv12enc.asv 1.5 30 Oct 1996 14:31:00 mbodart $
  21. ; $Log: S:\h26x\src\dec\yuv12enc.asv $
  22. ;//
  23. ;// Rev 1.5 30 Oct 1996 14:31:00 mbodart
  24. ;// Re-checking in changes originally made by Atul, but lost when the server
  25. ;// ran out of disk space during a PVCS operation. Atul's original log msg:
  26. ;//
  27. ;// Removed AGI in IA code. Added MMX code but it is not ready for prime-time.
  28. ;//
  29. ;// Rev 1.4 08 Mar 1996 15:11:10 AGUPTA2
  30. ;// Removed segment register override when compiling for WIN32.
  31. ;// Should speed-up this routine substantially.
  32. ;//
  33. ;
  34. OPTION PROLOGUE:None
  35. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  36. include locals.inc
  37. include decconst.inc
  38. IFNDEF DSEGNAME
  39. IFNDEF WIN32
  40. DSEGNAME TEXTEQU <DataH26x_YUV12ForEnc>
  41. ENDIF
  42. ENDIF
  43. IFDEF WIN32
  44. .xlist
  45. include memmodel.inc
  46. .list
  47. .DATA
  48. ELSE
  49. DSEGNAME SEGMENT WORD PUBLIC 'DATA'
  50. ENDIF
  51. ; any data would go here
  52. IFNDEF WIN32
  53. DSEGNAME ENDS
  54. .xlist
  55. include memmodel.inc
  56. .list
  57. ENDIF
  58. IFNDEF SEGNAME
  59. IFNDEF WIN32
  60. SEGNAME TEXTEQU <_CODE32>
  61. ENDIF
  62. ENDIF
  63. ifdef WIN32
  64. .CODE
  65. else
  66. SEGNAME SEGMENT PARA PUBLIC USE32 'CODE'
  67. endif
  68. ifdef WIN32
  69. ASSUME cs : FLAT
  70. ASSUME ds : FLAT
  71. ASSUME es : FLAT
  72. ASSUME fs : FLAT
  73. ASSUME gs : FLAT
  74. ASSUME ss : FLAT
  75. else
  76. ASSUME CS : SEGNAME
  77. ASSUME DS : Nothing
  78. ASSUME ES : Nothing
  79. ASSUME FS : Nothing
  80. ASSUME GS : Nothing
  81. endif
  82. ; void FAR ASM_CALLTYPE H26x_YUV12ForEnc (
  83. ; U8 FAR * InstanceBase,
  84. ; X32 YPlane,
  85. ; X32 VPlane,
  86. ; X32 UPlane,
  87. ; UN FrameWidth,
  88. ; UN FrameHeight,
  89. ; UN Pitch,
  90. ; U8 FAR * ColorConvertedFrame, // encoder's buffers.
  91. ; X32 YOutputPlane,
  92. ; X32 VOutputPlane,
  93. ; X32 UOutputPlane)
  94. ;
  95. ; YPlane, VPlane, YOutputPlane, and VOutputPlane are offsets. In 16-bit Microsoft
  96. ; Windows (tm), space in this segment is used for local variables and tables.
  97. ; In 32-bit variants of Microsoft Windows (tm), the local variables are on
  98. ; the stack, while the tables are in the one and only data segment.
  99. ;
  100. PUBLIC H26x_YUV12ForEnc
  101. ; due to the need for the ebp reg, these parameter declarations aren't used,
  102. ; they are here so the assembler knows how many bytes to relieve from the stack
  103. H26x_YUV12ForEnc proc DIST LANG PUBLIC,
  104. AInstanceBase: DWORD,
  105. AYPlane: DWORD,
  106. AVPlane: DWORD,
  107. AUPlane: DWORD,
  108. AFrameWidth: DWORD,
  109. AFrameHeight: DWORD,
  110. APitch: DWORD,
  111. AColorConvertedFrame: DWORD,
  112. AYOutputPlane: DWORD,
  113. AVOutputPLane: DWORD,
  114. AUOutputPLane: DWORD
  115. LocalFrameSize = 0
  116. RegisterStorageSize = 16
  117. ; Arguments:
  118. InstanceBase = LocalFrameSize + RegisterStorageSize + 4
  119. YPlane = LocalFrameSize + RegisterStorageSize + 8
  120. VPlane = LocalFrameSize + RegisterStorageSize + 12
  121. UPlane = LocalFrameSize + RegisterStorageSize + 16
  122. FrameWidth = LocalFrameSize + RegisterStorageSize + 20
  123. FrameHeight = LocalFrameSize + RegisterStorageSize + 24
  124. Pitch = LocalFrameSize + RegisterStorageSize + 28
  125. ColorConvertedFrame = LocalFrameSize + RegisterStorageSize + 32
  126. YOutputPlane = LocalFrameSize + RegisterStorageSize + 36
  127. VOutputPlane = LocalFrameSize + RegisterStorageSize + 40
  128. UOutputPlane = LocalFrameSize + RegisterStorageSize + 44
  129. EndOfArgList = LocalFrameSize + RegisterStorageSize + 48
  130. LCL EQU <esp+>
  131. push esi
  132. push edi
  133. push ebp
  134. push ebx
  135. sub esp,LocalFrameSize
  136. mov eax,PD [esp+InstanceBase]
  137. add PD [esp+YPlane],eax
  138. add PD [esp+VPlane],eax
  139. add PD [esp+UPlane],eax
  140. mov eax,PD [esp+ColorConvertedFrame]
  141. add PD [esp+YOutputPlane],eax
  142. add PD [esp+VOutputPlane],eax
  143. add PD [esp+UOutputPlane],eax
  144. ; We copy 16 pels in one iteration of the inner loop
  145. ; Register usage:
  146. ; edi -- Y plane output cursor
  147. ; esi -- Y plane input cursor
  148. ; ebp -- Count down Y plane height
  149. ; ecx -- Count down Y plane width
  150. ; ebx -- Y plane input pitch
  151. ; eax,edx -- scratch
  152. Lebp FrameHeight
  153. Lecx FrameWidth
  154. Lesi YPlane
  155. Lebx Pitch
  156. Ledi YOutputPlane
  157. YLoopHeader:
  158. mov eax, PD [esi+ecx-8] ;
  159. mov edx, PD [esi+ecx-4]
  160. ALIGN 4
  161. YLoop:
  162. shr eax, 1 ; Shift packed pel by 1 to convert to 7-bit
  163. and edx, 0FEFEFEFEH ; and to get rid of upper bit
  164. shr edx, 1
  165. and eax, 07F7F7F7Fh ; and to get rid of upper bit
  166. mov PD [edi+ecx-8], eax
  167. mov PD [edi+ecx-4], edx
  168. ; NEXT 8 PELS
  169. mov eax, PD [esi+ecx-8-8] ; speculatively load next 8 pels
  170. mov edx, PD [esi+ecx-4-8] ; this avoids AGI
  171. shr eax, 1 ; Shift packed pel by 1 to convert to 7-bit
  172. and edx, 0FEFEFEFEH ; and to get rid of upper bit
  173. shr edx, 1
  174. and eax, 07F7F7F7Fh ; and to get rid of upper bit
  175. mov PD [edi+ecx-8-8], eax
  176. mov PD [edi+ecx-4-8], edx
  177. mov eax, PD [esi+ecx-8-16] ; speculatively load next 8 pels
  178. mov edx, PD [esi+ecx-4-16] ; for next iteration
  179. sub ecx, 16
  180. jg YLoop
  181. Lecx FrameWidth
  182. add esi, ebx
  183. add edi, ebx
  184. dec ebp
  185. jne YLoopHeader
  186. ; We copy 8 pels in one iteration of the inner loop
  187. ; Register usage:
  188. ; edi -- V plane output cursor
  189. ; esi -- V plane input cursor
  190. ; ebp -- Count down V plane height
  191. ; ecx -- Count down V plane width
  192. ; ebx -- Pitch
  193. ; eax,edx -- scratch
  194. Lebp FrameHeight
  195. Lecx FrameWidth
  196. sar ecx,1
  197. Lesi VPlane
  198. sar ebp,1
  199. Ledi VOutputPlane
  200. ALIGN 4
  201. VLoopHeader:
  202. mov eax, PD [esi+ecx-8]
  203. mov edx, PD [esi+ecx-4]
  204. VLoop:
  205. shr eax, 1 ; Shift packed pel by 1 to convert to 7-bit
  206. and edx, 0FEFEFEFEH ; and to get rid of upper bit
  207. shr edx, 1
  208. and eax, 07F7F7F7Fh ; and to get rid of upper bit
  209. mov PD [edi+ecx-8], eax
  210. mov PD [edi+ecx-4], edx
  211. mov eax, PD [esi+ecx-8-8] ; speculatively load next 8 pels
  212. mov edx, PD [esi+ecx-4-8] ; this avoids AGI
  213. sub ecx, 8
  214. jg VLoop
  215. Lecx FrameWidth
  216. add esi,ebx
  217. shr ecx,1
  218. add edi,ebx
  219. dec ebp
  220. jne VLoopHeader
  221. ; We copy 8 pels in one iteration of the inner loop
  222. ; Register usage:
  223. ; edi -- U plane output cursor
  224. ; esi -- U plane input cursor
  225. ; ebp -- Count down U plane height
  226. ; ecx -- Count down U plane width
  227. ; ebx -- Pitch
  228. ; eax,edx -- scratch
  229. Lebp FrameHeight
  230. Lecx FrameWidth
  231. sar ecx,1
  232. Lesi UPlane
  233. sar ebp,1
  234. Ledi UOutputPlane
  235. ALIGN 4
  236. ULoopHeader:
  237. mov eax,PD [esi+ecx-8]
  238. mov edx,PD [esi+ecx-4]
  239. ULoop:
  240. shr eax, 1 ; Shift packed pel by 1 to convert to 7-bit
  241. and edx, 0FEFEFEFEH ; and to get rid of upper bit
  242. shr edx, 1
  243. and eax, 07F7F7F7Fh ; and to get rid of upper bit
  244. mov PD [edi+ecx-8], eax
  245. mov PD [edi+ecx-4], edx
  246. mov eax, PD [esi+ecx-8-8]
  247. mov edx, PD [esi+ecx-4-8]
  248. sub ecx, 8
  249. jg ULoop
  250. Lecx FrameWidth
  251. add esi, ebx
  252. shr ecx, 1
  253. add edi, ebx
  254. dec ebp
  255. jne ULoopHeader
  256. add esp,LocalFrameSize
  257. pop ebx
  258. pop ebp
  259. pop edi
  260. pop esi
  261. rturn
  262. H26x_YUV12ForEnc endp
  263. IFNDEF WIN32
  264. SEGNAME ENDS
  265. ENDIF
  266. END