Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

359 lines
12 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. ;//
  14. ;// $Header: S:\h26x\src\dec\cx512322.asv
  15. ;//
  16. ;// $Log: S:\h26x\src\dec\cx512322.asv $
  17. ;//
  18. ;// Rev 1.2 12 Apr 1996 11:26:26 RMCKENZX
  19. ;// Corrected bug in fetching first V contribution to Red.
  20. ;//
  21. ;// Rev 1.1 10 Apr 1996 11:12:54 RMCKENZX
  22. ;// Fixed bug in aspect ratio correction -- clearing sign bit of bl.
  23. ;//
  24. ;// Rev 1.0 01 Apr 1996 10:25:48 BNICKERS
  25. ;// Initial revision.
  26. ;//
  27. ;////////////////////////////////////////////////////////////////////////////
  28. ;
  29. ; +---------- Color convertor.
  30. ; |+--------- For both H261 and H263.
  31. ; ||+-------- Version for the Pentium(r) Microprocessor.
  32. ; |||++------ Convert from YUV12.
  33. ; |||||++---- Convert to RGB32.
  34. ; |||||||+--- Zoom by two.
  35. ; ||||||||
  36. ; cx512322 -- This function performs YUV12-to-RGB32 zoom-by-two color conversion
  37. ; for H26x. It is tuned for best performance on the Pentium(r)
  38. ; Microprocessor. It handles the format in which the low order
  39. ; byte is B, the second byte is G, the third byte is R, and the
  40. ; high order byte is zero.
  41. ;
  42. ; The YUV12 input is planar, 8 bits per pel. The Y plane may have
  43. ; a pitch of up to 768. It may have a width less than or equal
  44. ; to the pitch. It must be DWORD aligned, and preferably QWORD
  45. ; aligned. Pitch and Width must be a multiple of four. For best
  46. ; performance, Pitch should not be 4 more than a multiple of 32.
  47. ; Height may be any amount, but must be a multiple of two. The U
  48. ; and V planes may have a different pitch than the Y plane, subject
  49. ; to the same limitations.
  50. ;
  51. ; The color convertor is non-destructive; the input Y, U, and V
  52. ; planes will not be clobbered.
  53. OPTION PROLOGUE:None
  54. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  55. include locals.inc
  56. include ccinst.inc
  57. include decconst.inc
  58. .xlist
  59. include memmodel.inc
  60. .list
  61. .DATA
  62. ; any data would go here
  63. .CODE
  64. ASSUME cs : FLAT
  65. ASSUME ds : FLAT
  66. ASSUME es : FLAT
  67. ASSUME fs : FLAT
  68. ASSUME gs : FLAT
  69. ASSUME ss : FLAT
  70. ; void FAR ASM_CALLTYPE YUV12ToRGB32ZoomBy2 (U8 * YPlane,
  71. ; U8 * VPlane,
  72. ; U8 * UPlane,
  73. ; UN FrameWidth,
  74. ; UN FrameHeight,
  75. ; UN YPitch,
  76. ; UN VPitch,
  77. ; UN AspectAdjustmentCount,
  78. ; U8 FAR * ColorConvertedFrame,
  79. ; U32 DCIOffset,
  80. ; U32 CCOffsetToLine0,
  81. ; IN CCOPitch,
  82. ; IN CCType)
  83. ;
  84. ; CCOffsetToLine0 is relative to ColorConvertedFrame.
  85. ;
  86. PUBLIC YUV12ToRGB32ZoomBy2
  87. ; due to the need for the ebp reg, these parameter declarations aren't used,
  88. ; they are here so the assembler knows how many bytes to relieve from the stack
  89. YUV12ToRGB32ZoomBy2 proc DIST LANG AYPlane: DWORD,
  90. AVPlane: DWORD,
  91. AUPlane: DWORD,
  92. AFrameWidth: DWORD,
  93. AFrameHeight: DWORD,
  94. AYPitch: DWORD,
  95. AVPitch: DWORD,
  96. AAspectAdjustmentCnt: DWORD,
  97. AColorConvertedFrame: DWORD,
  98. ADCIOffset: DWORD,
  99. ACCOffsetToLine0: DWORD,
  100. ACCOPitch: DWORD,
  101. ACCType: DWORD
  102. LocalFrameSize = 64+768*8+32
  103. RegisterStorageSize = 16
  104. ; Arguments:
  105. YPlane_arg = RegisterStorageSize + 4
  106. VPlane_arg = RegisterStorageSize + 8
  107. UPlane_arg = RegisterStorageSize + 12
  108. FrameWidth_arg = RegisterStorageSize + 16
  109. FrameHeight = RegisterStorageSize + 20
  110. YPitch_arg = RegisterStorageSize + 24
  111. ChromaPitch_arg = RegisterStorageSize + 28
  112. AspectAdjustmentCount_arg = RegisterStorageSize + 32
  113. ColorConvertedFrame = RegisterStorageSize + 36
  114. DCIOffset = RegisterStorageSize + 40
  115. CCOffsetToLine0 = RegisterStorageSize + 44
  116. CCOPitch = RegisterStorageSize + 48
  117. CCType_arg = RegisterStorageSize + 52
  118. EndOfArgList = RegisterStorageSize + 56
  119. ; Locals (on local stack frame)
  120. CCOCursor EQU [esp+ 0]
  121. CCOSkipDistance EQU [esp+ 4]
  122. ChromaLineLen EQU [esp+ 8]
  123. YLimit EQU [esp+16]
  124. YCursor EQU [esp+20]
  125. VCursor EQU [esp+24]
  126. DistanceFromVToU EQU [esp+28]
  127. EndOfChromaLine EQU [esp+32]
  128. AspectCount EQU [esp+36]
  129. ChromaPitch EQU [esp+40]
  130. AspectAdjustmentCount EQU [esp+44]
  131. LineParity EQU [esp+48]
  132. LumaPitch EQU [esp+52]
  133. FrameWidth EQU [esp+56]
  134. StashESP EQU [esp+60]
  135. ChromaContribution EQU [esp+64]
  136. push esi
  137. push edi
  138. push ebp
  139. push ebx
  140. mov edi,esp
  141. sub esp,LocalFrameSize
  142. and esp,0FFFFF800H
  143. mov eax,[edi+YPitch_arg]
  144. mov ebx,[edi+ChromaPitch_arg]
  145. mov ecx,[edi+AspectAdjustmentCount_arg]
  146. mov edx,[edi+FrameWidth_arg]
  147. mov LumaPitch,eax
  148. mov ChromaPitch,ebx
  149. mov AspectAdjustmentCount,ecx
  150. mov AspectCount,ecx
  151. mov FrameWidth,edx
  152. mov ebx,[edi+VPlane_arg]
  153. mov ecx,[edi+UPlane_arg]
  154. mov eax,[edi+YPlane_arg]
  155. sub ecx,ebx
  156. mov DistanceFromVToU,ecx
  157. mov VCursor,ebx
  158. mov YCursor,eax
  159. mov eax,[edi+ColorConvertedFrame]
  160. add eax,[edi+DCIOffset]
  161. add eax,[edi+CCOffsetToLine0]
  162. mov CCOCursor,eax
  163. mov StashESP,edi
  164. mov edx,[edi+FrameHeight]
  165. mov ecx,LumaPitch
  166. imul edx,ecx
  167. mov ebx,FrameWidth
  168. mov eax,[edi+CCOPitch]
  169. shl ebx,3
  170. mov esi,YCursor ; Fetch cursor over luma plane.
  171. add edx,esi
  172. sub eax,ebx
  173. shr ebx,4
  174. mov YLimit,edx
  175. mov ChromaLineLen,ebx
  176. mov CCOSkipDistance,eax
  177. mov esi,VCursor
  178. mov ecx,AspectAdjustmentCount
  179. mov AspectCount,ecx
  180. ; Register Usage:
  181. ;
  182. ; edi -- Y Line cursor. Chroma contribs go in lines above current Y line.
  183. ; esi -- Chroma Line cursor.
  184. ; ebp -- Y Pitch
  185. ; edx -- Distance from V pel to U pel.
  186. ; ecx -- V contribution to RGB; sum of U and V contributions.
  187. ; ebx -- U contribution to RGB.
  188. ; eax -- Alternately a U and a V pel.
  189. PrepareChromaLine:
  190. mov edi,ChromaLineLen
  191. xor eax,eax
  192. mov edx,DistanceFromVToU
  193. mov al,[esi] ; Fetch V.
  194. add edi,esi ; Compute EOL address.
  195. xor ecx,ecx
  196. mov ebp,PD V24Contrib[eax*8] ; ebp[24:31] -- Zero (blue).
  197. ; ; ebp[16:24] -- V contrib to G.
  198. ; ; ebp[ 9:15] -- Zero (pad).
  199. ; ; ebp[ 0: 8] -- V contrib to R.
  200. mov cl,[esi+edx] ; Fetch U.
  201. mov EndOfChromaLine,edi
  202. xor ebx,ebx ; Keep pairing happy.
  203. mov ebx,PD U24Contrib[ecx*8] ; ebx[24:31] -- U contrib to B.
  204. ; ; ebx[16:24] -- U contrib to G.
  205. ; ; ebx[11:15] -- Zero (pad).
  206. ; ; ebx[ 2:10] -- Zero (red).
  207. ; ; ebx[ 0: 1] -- Zero (pad).
  208. mov cl,[esi+edx+1] ; Fetch next U.
  209. lea edi,ChromaContribution
  210. add ebp,ebx ; Chroma contributions to RGB.
  211. NextChromaPel:
  212. mov ebx,PD U24Contrib[ecx*8] ; See above.
  213. mov al,[esi+1] ; Fetch V.
  214. mov [edi],ebp ; Store contribs to use for even chroma pel.
  215. mov cl,[esi+edx+2] ; Fetch next U.
  216. mov ebp,PD V24Contrib[eax*8] ; See above.
  217. add edi,8
  218. add ebp,ebx ; Chroma contributions to RGB.
  219. mov al,[esi+2] ; Fetch V.
  220. mov [edi-4],ebp ; Store contribs to use for odd chroma pel.
  221. mov ebx,PD U24Contrib[ecx*8] ; See above.
  222. mov ebp,PD V24Contrib[eax*8] ; See above.
  223. mov cl,[esi+edx+3] ; Fetch next U.
  224. add ebp,ebx ; Chroma contributions to RGB.
  225. add esi,2 ; Inc Chroma cursor.
  226. cmp esi,EndOfChromaLine
  227. jne NextChromaPel
  228. xor eax,eax
  229. mov esi,YCursor
  230. mov [edi],eax ; Store EOL indicator.
  231. mov LineParity,eax
  232. mov edi,CCOCursor
  233. Keep2ndLineOfOutput:
  234. DoLine1:
  235. ; Register Usage:
  236. ;
  237. ; edi -- Cursor over the color converted output image.
  238. ; esi -- Cursor over a line of the Y Plane.
  239. ; ebp -- V contribution to R field of RGB value.
  240. ; edx -- Construction of a pel of RGB32.
  241. ; cl -- Y value (i.e. Y contribution to R, G, and B);
  242. ; bl -- UV contribution to G field of RGB value.
  243. ; al -- U contribution to B field of RGB val.
  244. xor edx,edx
  245. mov ebp,ChromaContribution ; Get V contribution to R value.
  246. xor ecx,ecx
  247. sub esp,1536
  248. mov cl,[esi] ; Get Y00.
  249. xor ebx,ebx
  250. and ebp,01FFH ; Extract V contribution to R value.
  251. mov bl,ChromaContribution+1536+2 ; Get UV contribution to G value.
  252. xor eax,eax
  253. DoNext2YPelsOfLine0:
  254. mov dl,PB R24Value[ecx+ebp*1] ; Get clamped R value for Pel00.
  255. add esi,2 ; Advance luma cursor.
  256. shl edx,16 ; Position R and high order 0-byte.
  257. mov al,ChromaContribution+1536+3 ; Get U contribution to B value.
  258. mov dh,PB G24Value[ecx+ebx] ; Get clamped G value for Pel00.
  259. add esp,4 ; Advance chroma contribution cursor.
  260. mov dl,PB B24Value[ecx+eax*2] ; Get clamped B value for Pel00.
  261. mov cl,[esi-1] ; Get Y01.
  262. mov Ze [edi],edx ; Write RGB32 for Pel00.
  263. mov Ze [edi+4],edx ; Write RGB32 for Pel00.
  264. xor edx,edx
  265. mov dh,PB R24Value[ecx+ebp*1] ; Get clamped R value for Pel01.
  266. mov ebp,ChromaContribution+1536 ; Get V contribution to R value.
  267. mov dl,PB G24Value[ecx+ebx] ; Get clamped G value for Pel01.
  268. lea edi,[edi+16] ; Advance output cursor.
  269. shl edx,8 ; Position R, G, and high order 0-byte.
  270. mov bl,ChromaContribution+1536+2 ; Get UV contribution to G value.
  271. mov dl,PB B24Value[ecx+eax*2] ; Get clamped B value for Pel01.
  272. mov cl,[esi] ; Get Y02.
  273. mov Ze [edi-8],edx ; Write RGB32 for Pel01.
  274. mov Ze [edi-4],edx ; Write RGB32 for Pel01.
  275. xor edx,edx
  276. and ebp,01FFH ; Extract V contrib to R val. 0 --> EOL.
  277. jne DoNext2YPelsOfLine0
  278. and esp,0FFFFF800H
  279. add esp,800H
  280. mov eax,CCOSkipDistance
  281. mov bl,LineParity
  282. add edi,eax
  283. xor bl,080H
  284. mov esi,YCursor
  285. jns SecondOutputLineDone
  286. mov LineParity,bl
  287. mov ebp,AspectCount
  288. sub ebp,2 ; If count is non-zero, we keep the line.
  289. mov ecx,AspectAdjustmentCount
  290. mov AspectCount,ebp
  291. jg Keep2ndLineOfOutput
  292. add ebp,ecx
  293. and bl, 7fh ; clear LineParity SecondOutputLineDone bit
  294. mov AspectCount,ebp
  295. SecondOutputLineDone:
  296. add esi,LumaPitch
  297. xor bl,1
  298. mov CCOCursor,edi
  299. mov YCursor,esi
  300. mov LineParity,bl
  301. jne DoLine1
  302. mov eax,esi
  303. mov esi,VCursor ; Inc VPlane cursor to next line.
  304. mov ebp,ChromaPitch
  305. mov ebx,YLimit ; Done with last line?
  306. add esi,ebp
  307. cmp eax,ebx
  308. mov VCursor,esi
  309. jb PrepareChromaLine
  310. Done:
  311. mov esp,StashESP
  312. pop ebx
  313. pop ebp
  314. pop edi
  315. pop esi
  316. rturn
  317. YUV12ToRGB32ZoomBy2 endp
  318. END