Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

466 lines
17 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. ;//
  14. ;// $Header: S:\h26x\src\dec\cx512162.asv
  15. ;//
  16. ;// $Log: S:\h26x\src\dec\cx512162.asv $
  17. ;//
  18. ;// Rev 1.8 22 Mar 1996 16:41:06 BNICKERS
  19. ;// Fix bug wherein UV contrib was being taken from one pel to the right.
  20. ;//
  21. ;// Rev 1.7 19 Mar 1996 11:50:00 bnickers
  22. ;// Fix error regarding commitment of pages to stack.
  23. ;//
  24. ;// Rev 1.6 18 Mar 1996 10:02:00 BNICKERS
  25. ;// Make color convertors non-destructive.
  26. ;//
  27. ;// Rev 1.5 16 Feb 1996 15:12:42 BNICKERS
  28. ;// Correct color shift.
  29. ;//
  30. ;// Rev 1.4 05 Feb 1996 13:35:22 BNICKERS
  31. ;// Fix RGB16 color flash problem, by allowing different RGB16 formats at oce.
  32. ;//
  33. ;// Rev 1.3 22 Dec 1995 15:38:54 KMILLS
  34. ;// added new copyright notice
  35. ;//
  36. ;// Rev 1.2 27 Oct 1995 17:30:54 BNICKERS
  37. ;// Fix RGB16 color convertors.
  38. ;//
  39. ;// Rev 1.1 26 Oct 1995 09:46:16 BNICKERS
  40. ;// Reduce the number of blanks in the "proc" statement because the assembler
  41. ;// sometimes has problems with statements longer than 512 characters long.
  42. ;//
  43. ;// Rev 1.0 25 Oct 1995 17:59:18 BNICKERS
  44. ;// Initial revision.
  45. ;//
  46. ;////////////////////////////////////////////////////////////////////////////
  47. ;
  48. ; +---------- Color convertor.
  49. ; |+--------- For both H261 and H263.
  50. ; ||+-------- Version for the Pentium(r) Microprocessor.
  51. ; |||++------ Convert from YUV12.
  52. ; |||||++---- Convert to RGB16.
  53. ; |||||||+--- Zoom by two.
  54. ; ||||||||
  55. ; cx512162 -- This function performs zoom-by-2 YUV12-to-RGB16 color conversion
  56. ; for H26x. It is tuned for best performance on the Pentium(r)
  57. ; Microprocessor. for H26x. It handles any format in which there
  58. ; are three fields, the low order field being B and fully contained
  59. ; in the low order byte, the second field being G and being
  60. ; somewhere in bits 4 through 11, and the high order field being
  61. ; R and fully contained in the high order byte. Present support
  62. ; for 555, 565, 655, and 644 formats only.
  63. ;
  64. ; The YUV12 input is planar, 8 bits per pel. The Y plane may have
  65. ; a pitch of up to 768. It may have a width less than or equal
  66. ; to the pitch. It must be DWORD aligned, and preferably QWORD
  67. ; aligned. Pitch and Width must be a multiple of four. For best
  68. ; performance, Pitch should not be 4 more than a multiple of 32.
  69. ; Height may be any amount, but must be a multiple of two. The U
  70. ; and V planes may have a different pitch than the Y plane, subject
  71. ; to the same limitations.
  72. ;
  73. ; The color convertor is non-destructive; the input Y, U, and V
  74. ; planes will not be clobbered.
  75. OPTION PROLOGUE:None
  76. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  77. include locals.inc
  78. include ccinst.inc
  79. include decconst.inc
  80. .xlist
  81. include memmodel.inc
  82. .list
  83. .DATA
  84. ; any data would go here
  85. .CODE
  86. ASSUME cs : FLAT
  87. ASSUME ds : FLAT
  88. ASSUME es : FLAT
  89. ASSUME fs : FLAT
  90. ASSUME gs : FLAT
  91. ASSUME ss : FLAT
  92. ; void FAR ASM_CALLTYPE YUV12ToRGB16ZoomBy2 (U8 * YPlane,
  93. ; U8 * VPlane,
  94. ; U8 * UPlane,
  95. ; UN FrameWidth,
  96. ; UN FrameHeight,
  97. ; UN YPitch,
  98. ; UN VPitch,
  99. ; UN AspectAdjustmentCount,
  100. ; U8 * ColorConvertedFrame,
  101. ; U32 DCIOffset,
  102. ; U32 CCOffsetToLine0,
  103. ; IN CCOPitch,
  104. ; IN CCType)
  105. ;
  106. ; CCOffsetToLine0 is relative to ColorConvertedFrame.
  107. ;
  108. PUBLIC YUV12ToRGB16ZoomBy2
  109. ; due to the need for the ebp reg, these parameter declarations aren't used,
  110. ; they are here so the assembler knows how many bytes to relieve from the stack
  111. YUV12ToRGB16ZoomBy2 proc DIST LANG AYPlane: DWORD,
  112. AVPlane: DWORD, AUPlane: DWORD, AFrameWidth: DWORD, AFrameHeight: DWORD,
  113. AYPitch: DWORD, AVPitch: DWORD, AAspectAdjustmentCnt: DWORD,
  114. AColorConvertedFrame: DWORD, ADCIOffset: DWORD, ACCOffsetToLine0: DWORD,
  115. ACCOPitch: DWORD, ACCType: DWORD
  116. LocalFrameSize = 64+768*6+24
  117. RegisterStorageSize = 16
  118. ; Arguments:
  119. YPlane_arg = RegisterStorageSize + 4
  120. VPlane_arg = RegisterStorageSize + 8
  121. UPlane_arg = RegisterStorageSize + 12
  122. FrameWidth_arg = RegisterStorageSize + 16
  123. FrameHeight = RegisterStorageSize + 20
  124. YPitch = RegisterStorageSize + 24
  125. ChromaPitch_arg = RegisterStorageSize + 28
  126. AspectAdjustmentCount_arg = RegisterStorageSize + 32
  127. ColorConvertedFrame = RegisterStorageSize + 36
  128. DCIOffset = RegisterStorageSize + 40
  129. CCOffsetToLine0 = RegisterStorageSize + 44
  130. CCOPitch = RegisterStorageSize + 48
  131. CCType_arg = RegisterStorageSize + 52
  132. EndOfArgList = RegisterStorageSize + 56
  133. ; Locals (on local stack frame)
  134. CCOCursor EQU [esp+ 0]
  135. CCOSkipDistance EQU [esp+ 4]
  136. ChromaLineLen EQU [esp+ 8]
  137. YSkipDistance EQU [esp+12]
  138. YLimit EQU [esp+16]
  139. YCursor EQU [esp+20]
  140. VCursor EQU [esp+24]
  141. DistanceFromVToU EQU [esp+28]
  142. EndOfChromaLine EQU [esp+32]
  143. AspectCount EQU [esp+36]
  144. CCType EQU [esp+40]
  145. FrameWidth EQU [esp+44]
  146. ChromaPitch EQU [esp+48]
  147. AspectAdjustmentCount EQU [esp+52]
  148. LineParity EQU [esp+56]
  149. StashESP EQU [esp+60]
  150. ChromaContribution EQU [esp+64]
  151. StashOddLinePel0 EQU [esp+72]
  152. StashOddLinePel1 EQU [esp+76]
  153. StashOddLinePel2 EQU [esp+80]
  154. StashOddLinePel3 EQU [esp+84]
  155. push esi
  156. push edi
  157. push ebp
  158. push ebx
  159. mov edi,esp
  160. sub esp,4096
  161. mov eax,[esp]
  162. sub esp,LocalFrameSize-4096
  163. and esp,0FFFFF000H
  164. mov eax,[esp]
  165. and esp,0FFFFE000H
  166. mov eax,[esp]
  167. sub esp,1000H
  168. mov eax,[esp]
  169. sub esp,1000H
  170. mov eax,[esp]
  171. add esp,2000H
  172. mov eax,[edi+FrameWidth_arg]
  173. mov ebx,[edi+ChromaPitch_arg]
  174. mov ecx,[edi+AspectAdjustmentCount_arg]
  175. mov FrameWidth,eax
  176. mov ChromaPitch,ebx
  177. mov AspectAdjustmentCount,ecx
  178. mov AspectCount,ecx
  179. mov ebx,[edi+VPlane_arg]
  180. mov ecx,[edi+UPlane_arg]
  181. mov eax,[edi+YPlane_arg]
  182. sub ecx,ebx
  183. mov DistanceFromVToU,ecx
  184. mov VCursor,ebx
  185. mov YCursor,eax
  186. mov eax,[edi+ColorConvertedFrame]
  187. add eax,[edi+DCIOffset]
  188. add eax,[edi+CCOffsetToLine0]
  189. mov CCOCursor,eax
  190. mov ebx,[edi+CCType_arg]
  191. mov ecx,0/2
  192. cmp ebx,CCTYPE_RGB16555ZoomBy2
  193. je @f
  194. cmp ebx,CCTYPE_RGB16555ZoomBy2DCI
  195. je @f
  196. mov ecx,4096/2
  197. cmp ebx,CCTYPE_RGB16565ZoomBy2
  198. je @f
  199. cmp ebx,CCTYPE_RGB16565ZoomBy2DCI
  200. je @f
  201. mov ecx,8192/2
  202. cmp ebx,CCTYPE_RGB16655ZoomBy2
  203. je @f
  204. cmp ebx,CCTYPE_RGB16655ZoomBy2DCI
  205. je @f
  206. mov ecx,12288/2
  207. cmp ebx,CCTYPE_RGB16664ZoomBy2DCI
  208. je @f
  209. cmp ebx,CCTYPE_RGB16664ZoomBy2
  210. je @f
  211. mov ecx,0DEADBEEFH
  212. @@:
  213. mov CCType,ecx
  214. mov StashESP,edi
  215. mov edx,[edi+FrameHeight]
  216. mov ecx,[edi+YPitch]
  217. imul edx,ecx
  218. mov ebx,FrameWidth
  219. mov eax,[edi+CCOPitch]
  220. sub ecx,ebx
  221. mov esi,YCursor ; Fetch cursor over luma plane.
  222. shl ebx,2
  223. add edx,esi
  224. sub eax,ebx
  225. mov YSkipDistance,ecx
  226. shr ebx,3
  227. mov CCOSkipDistance,eax
  228. mov ChromaLineLen,ebx
  229. mov YLimit,edx
  230. mov esi,VCursor
  231. ; Register Usage:
  232. ;
  233. ; edi -- Chroma contribution Line cursor.
  234. ; esi -- Chroma Line cursor.
  235. ; ebp -- V contribution to RGB; sum of U and V contributions.
  236. ; edx -- Distance from V pel to U pel.
  237. ; ecx -- A U pel.
  238. ; ebx -- U contribution to RGB.
  239. ; eax -- A V pel.
  240. PrepareChromaLine:
  241. mov edi,ChromaLineLen
  242. xor eax,eax
  243. mov edx,DistanceFromVToU
  244. mov al,[esi] ; Fetch V.
  245. add edi,esi ; Compute EOL address.
  246. xor ecx,ecx
  247. mov ebp,PD VContrib[eax*8] ; ebp[ 0: 7] -- Zero
  248. ; ; ebp[ 8:15] -- V contrib to G.
  249. ; ; ebp[16:23] -- V contrib to R.
  250. ; ; ebp[24:31] -- Zero.
  251. mov cl,[esi+edx] ; Fetch U.
  252. mov EndOfChromaLine,edi
  253. xor ebx,ebx ; Keep pairing happy.
  254. mov ebx,PD UContrib[ecx*8] ; ebx[ 0: 7] -- U contrib to B.
  255. ; ; ebx[ 8:15] -- U contrib to G.
  256. ; ; ebx[16:23] -- Zero.
  257. mov cl,[esi+edx+1] ; Fetch next U.
  258. lea edi,ChromaContribution
  259. add ebp,ebx ; Chroma contributions to RGB.
  260. NextChromaPel:
  261. mov ebx,PD UContrib[ecx*8] ; See above.
  262. mov al,[esi+1] ; Fetch V.
  263. mov [edi],ebp ; Store contribs to use for even chroma pel.
  264. mov cl,[esi+edx+2] ; Fetch next U.
  265. mov ebp,PD VContrib[eax*8] ; See above.
  266. add edi,24
  267. add ebp,ebx ; Chroma contributions to RGB.
  268. mov al,[esi+2] ; Fetch V.
  269. mov [edi-20],ebp ; Store contribs to use for odd chroma pel.
  270. mov ebx,PD UContrib[ecx*8] ; See above.
  271. mov ebp,PD VContrib[eax*8] ; See above.
  272. mov cl,[esi+edx+3] ; Fetch next U.
  273. add ebp,ebx ; Chroma contributions to RGB.
  274. add esi,2 ; Inc Chroma cursor.
  275. cmp esi,EndOfChromaLine
  276. jne NextChromaPel
  277. mov esi,YCursor
  278. xor ecx,ecx
  279. mov [edi],ecx ; Store EOL indicator.
  280. mov edx,CCType
  281. mov dl,[esi] ; Fetch Y00.
  282. xor ebx,ebx
  283. mov bl,ChromaContribution ; Get U contribution to B value.
  284. and edx,0FFFFFFFEH ; Reduce to Y00 to 7 bits.
  285. mov StashOddLinePel3,edx ; Stash offset to RGB table to use.
  286. mov edi,CCOCursor
  287. mov al,ChromaContribution+2 ; Get V contribution to R value.
  288. add edx,edx ; Get four times luma.
  289. mov cl,ChromaContribution+1 ; Get UV contribution to G value.
  290. mov LineParity,ch
  291. and eax,0FFH
  292. sub esp,4608
  293. ; Register Usage:
  294. ;
  295. ; esp -- Cursor over the Chroma contribution.
  296. ; edi -- Cursor over the color converted output image.
  297. ; esi -- Cursor over a line of the Y Plane.
  298. ; ebp -- Construction of a pel (twice) of RGB16.
  299. ; edx -- Y value (i.e. Y contribution to R, G, and B) times 4, plus offset
  300. ; to select appropriate table.
  301. ; cl -- UV contribution to G field of RGB value.
  302. ; bl -- U contribution to B field of RGB value.
  303. ; al -- V contribution to R field of RGB value.
  304. DoLine1:
  305. DoNext4YPelsOfLine0:
  306. mov ebp,PD BValZ2[edx+ebx*4] ; Get clamped B value for Pel00.
  307. or ebp,PD RValZ2[edx+eax*4] ; Get clamped R value for Pel00.
  308. or ebp,PD GValZ2[edx+ecx*4] ; Get clamped G value for Pel00.
  309. mov edx,StashOddLinePel3+4608 ; edx[8:31] == Offset to RGB tbl, div by 2.
  310. mov Ze [edi],ebp ; Store Pel00 to color converted output.
  311. mov dl,[esi+1] ; Fetch Y01.
  312. rol ebp,16 ; Swap Pel00 copies, for better dither.
  313. and edx,0FFFFFFFEH ; Reduce to Y00 to 7 bits.
  314. mov StashOddLinePel0+4608,ebp ; Stash Pel00 for later xfer to 2nd line.
  315. add edx,edx ; Get four times luma.
  316. add edi,16 ; Advance output cursor.
  317. add esi,4 ; Advance luma cursor.
  318. mov ebp,PD BValZ2[edx+ebx*4] ; Get clamped B value for Pel01.
  319. mov bl,ChromaContribution+4+4608 ; Load U contribution to B val for pels2&3.
  320. or ebp,PD RValZ2[edx+eax*4] ; Get clamped R value for Pel01.
  321. mov al,ChromaContribution+6+4608 ; Load V contribution to R val for pels2&3.
  322. or ebp,PD GValZ2[edx+ecx*4] ; Get clamped G value for Pel01.
  323. mov edx,StashOddLinePel3+4608 ; edx[8:31] == Offset to RGB tbl, div by 2.
  324. mov Ze [edi+4-16],ebp ; Store Pel01 to color converted output.
  325. mov dl,[esi+2-4] ; Fetch Y02.
  326. rol ebp,16 ; Swap Pel01 copies, for better dither.
  327. and edx,0FFFFFFFEH ; Reduce to Y00 to 7 bits.
  328. mov StashOddLinePel1+4608,ebp ; Stash Pel01 for later xfer to 2nd line.
  329. add edx,edx ; Get four times luma.
  330. mov cl,ChromaContribution+5+4608 ; Load UV contrib to G val for pels2&3.
  331. add esp,24 ; Advance chroma cursor.
  332. mov ebp,PD BValZ2[edx+ebx*4] ; Get clamped B value for Pel02.
  333. or ebp,PD RValZ2[edx+eax*4] ; Get clamped R value for Pel02.
  334. or ebp,PD GValZ2[edx+ecx*4] ; Get clamped G value for Pel02.
  335. mov edx,StashOddLinePel3+4608-24 ; edx[8:31] == Offset to RGB tbl, div by 2.
  336. mov Ze [edi+8-16],ebp ; Store Pel02 to color converted output.
  337. mov dl,[esi+3-4] ; Fetch Y03.
  338. rol ebp,16 ; Swap Pel02 copies, for better dither.
  339. and edx,0FFFFFFFEH ; Reduce to Y02 to 7 bits.
  340. mov StashOddLinePel3+4608,edx ; Stash offset to RGB table to use.
  341. add edx,edx ; Get four times luma.
  342. mov StashOddLinePel2+4608-24,ebp ; Stash Pel02 for later xfer to 2nd line.
  343. mov esi,esi ; Keep pairing happy.
  344. mov ebp,PD BValZ2[edx+ebx*4] ; Get clamped B value for Pel03.
  345. mov bl,ChromaContribution+0+4608 ; Load U contribution to B val for pels0&1.
  346. or ebp,PD RValZ2[edx+eax*4] ; Get clamped R value for Pel03.
  347. mov al,ChromaContribution+2+4608 ; Load V contribution to R val for pels0&1.
  348. or ebp,PD GValZ2[edx+ecx*4] ; Get clamped G value for Pel03.
  349. mov edx,StashOddLinePel3+4608 ; edx[8:31] == Offset to RGB tbl, div by 2.
  350. mov Ze [edi+12-16],ebp ; Store Pel03 to color converted output.
  351. mov dl,[esi] ; Fetch Y00.
  352. rol ebp,16 ; Swap Pel03 copies, for better dither.
  353. and edx,0FFFFFFFEH ; Reduce to Y00 to 7 bits.
  354. mov StashOddLinePel3+4608-24,ebp ; Stash Pel03 for later xfer to 2nd line.
  355. add edx,edx ; Get four times luma.
  356. test eax,eax
  357. mov cl,ChromaContribution+1+4608 ; Load UV contrib to G val for pels2&3.
  358. jne DoNext4YPelsOfLine0
  359. and esp,0FFFFE000H
  360. add esp,02000H
  361. mov edx,YSkipDistance
  362. mov ebp,CCOSkipDistance
  363. add esi,edx
  364. mov ebx,AspectCount
  365. add edi,ebp
  366. sub ebx,2 ; If count is non-zero, we keep the line.
  367. mov AspectCount,ebx
  368. lea ecx,StashOddLinePel0
  369. mov edx,FrameWidth
  370. jg Keep2ndLineOfLine0
  371. add ebx,AspectAdjustmentCount
  372. mov AspectCount,ebx
  373. jmp Skip2ndLineOfLine0
  374. Keep2ndLineOfLine0:
  375. Keep2ndLineOfLine0_Loop:
  376. mov eax,[ecx]
  377. mov ebx,[ecx+4]
  378. mov Ze [edi],eax
  379. mov eax,[ecx+8]
  380. mov Ze [edi+4],ebx
  381. mov ebx,[ecx+12]
  382. mov Ze [edi+8],eax
  383. add ecx,24
  384. mov Ze [edi+12],ebx
  385. add edi,16
  386. sub edx,4
  387. jne Keep2ndLineOfLine0_Loop
  388. add edi,ebp
  389. Skip2ndLineOfLine0:
  390. mov al,LineParity
  391. xor al,1
  392. je Line1Done
  393. mov LineParity,al
  394. mov edx,CCType
  395. mov dl,[esi]
  396. xor ebx,ebx
  397. mov bl,ChromaContribution
  398. and edx,0FFFFFFFEH
  399. mov StashOddLinePel3,edx
  400. xor ecx,ecx
  401. add edx,edx
  402. mov al,ChromaContribution+2
  403. mov cl,ChromaContribution+1
  404. sub esp,4608
  405. and eax,0FFH
  406. jmp DoLine1
  407. Line1Done:
  408. mov YCursor,esi
  409. mov eax,esi
  410. mov CCOCursor,edi
  411. mov ecx,ChromaPitch
  412. mov esi,VCursor ; Inc VPlane cursor to next line.
  413. mov ebx,YLimit ; Done with last line?
  414. add esi,ecx
  415. cmp eax,ebx
  416. mov VCursor,esi
  417. jb PrepareChromaLine
  418. mov esp,StashESP
  419. pop ebx
  420. pop ebp
  421. pop edi
  422. pop esi
  423. rturn
  424. YUV12ToRGB16ZoomBy2 endp
  425. END