Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

544 lines
17 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. ;//
  14. ;// $Header: S:\h26x\src\dec\cx51281.asv
  15. ;//
  16. ;// $Log: S:\h26x\src\dec\cx51281.asv $
  17. ;//
  18. ;// Rev 1.6 18 Mar 1996 09:58:40 bnickers
  19. ;// Make color convertors non-destructive.
  20. ;//
  21. ;// Rev 1.5 05 Feb 1996 13:35:38 BNICKERS
  22. ;// Fix RGB16 color flash problem, by allowing different RGB16 formats at oce.
  23. ;//
  24. ;// Rev 1.4 16 Jan 1996 11:23:06 BNICKERS
  25. ;// Fix starting point in output stream, so we don't start at line two and
  26. ;// write off the end of the output frame.
  27. ;//
  28. ;// Rev 1.3 22 Dec 1995 15:43:28 KMILLS
  29. ;//
  30. ;// added new copyright notice
  31. ;//
  32. ;// Rev 1.2 03 Nov 1995 11:49:40 BNICKERS
  33. ;// Support YUV12 to CLUT8 zoom and non-zoom color conversions.
  34. ;//
  35. ;// Rev 1.1 26 Oct 1995 09:46:08 BNICKERS
  36. ;// Reduce the number of blanks in the "proc" statement because the assembler
  37. ;// sometimes has problems with statements longer than 512 characters long.
  38. ;//
  39. ;// Rev 1.0 25 Oct 1995 17:59:20 BNICKERS
  40. ;// Initial revision.
  41. ;//
  42. ;////////////////////////////////////////////////////////////////////////////
  43. ;
  44. ; +---------- Color convertor.
  45. ; |+--------- For both H261 and H263.
  46. ; ||+-------- Version for the Pentium Microprocessor.
  47. ; |||++------ Convert from YUV12.
  48. ; |||||+----- Convert to CLUT8.
  49. ; ||||||+---- Zoom by one, i.e. non-zoom.
  50. ; |||||||
  51. ; cx51281 -- This function performs YUV12 to CLUT8 color conversion for H26x.
  52. ; It is tuned for best performance on the Pentium(r) Microprocessor.
  53. ; It dithers among 9 chroma points and 26 luma points, mapping the
  54. ; 8 bit luma pels into the 26 luma points by clamping the ends and
  55. ; stepping the luma by 8.
  56. ;
  57. ; The color convertor is non-destructive; the input Y, U, and V
  58. ; planes will not be clobbered.
  59. OPTION PROLOGUE:None
  60. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  61. include locals.inc
  62. include ccinst.inc
  63. include decconst.inc
  64. .xlist
  65. include memmodel.inc
  66. .list
  67. .DATA
  68. ; any data would go here
  69. .CODE
  70. ASSUME cs : FLAT
  71. ASSUME ds : FLAT
  72. ASSUME es : FLAT
  73. ASSUME fs : FLAT
  74. ASSUME gs : FLAT
  75. ASSUME ss : FLAT
  76. ; void FAR ASM_CALLTYPE YUV12ToCLUT8 (U8 * YPlane,
  77. ; U8 * VPlane,
  78. ; U8 * UPlane,
  79. ; UN FrameWidth,
  80. ; UN FrameHeight,
  81. ; UN YPitch,
  82. ; UN VPitch,
  83. ; UN AspectAdjustmentCount,
  84. ; U8 * ColorConvertedFrame,
  85. ; U32 DCIOffset,
  86. ; U32 CCOffsetToLine0,
  87. ; IN CCOPitch,
  88. ; IN CCType)
  89. ;
  90. ; CCOffsetToLine0 is relative to ColorConvertedFrame.
  91. ;
  92. PUBLIC YUV12ToCLUT8
  93. ; due to the need for the ebp reg, these parameter declarations aren't used,
  94. ; they are here so the assembler knows how many bytes to relieve from the stack
  95. YUV12ToCLUT8 proc DIST LANG AYPlane: DWORD,
  96. AVPlane: DWORD,
  97. AUPlane: DWORD,
  98. AFrameWidth: DWORD,
  99. AFrameHeight: DWORD,
  100. AYPitch: DWORD,
  101. AVPitch: DWORD,
  102. AAspectAdjustmentCnt: DWORD,
  103. AColorConvertedFrame: DWORD,
  104. ADCIOffset: DWORD,
  105. ACCOffsetToLine0: DWORD,
  106. ACCOPitch: DWORD,
  107. ACCType: DWORD
  108. LocalFrameSize = 64+768*2+4
  109. RegisterStorageSize = 16
  110. ; Arguments:
  111. YPlane_arg = RegisterStorageSize + 4
  112. VPlane_arg = RegisterStorageSize + 8
  113. UPlane_arg = RegisterStorageSize + 12
  114. FrameWidth_arg = RegisterStorageSize + 16
  115. FrameHeight = RegisterStorageSize + 20
  116. YPitch_arg = RegisterStorageSize + 24
  117. ChromaPitch_arg = RegisterStorageSize + 28
  118. AspectAdjustmentCount_arg = RegisterStorageSize + 32
  119. ColorConvertedFrame = RegisterStorageSize + 36
  120. DCIOffset = RegisterStorageSize + 40
  121. CCOffsetToLine0 = RegisterStorageSize + 44
  122. CCOPitch_arg = RegisterStorageSize + 48
  123. CCType_arg = RegisterStorageSize + 52
  124. EndOfArgList = RegisterStorageSize + 56
  125. ; Locals (on local stack frame)
  126. CCOCursor EQU [esp+ 0]
  127. ChromaLineLen EQU [esp+ 4]
  128. YLimit EQU [esp+ 8]
  129. YCursor EQU [esp+12]
  130. VCursor EQU [esp+16]
  131. DistanceFromVToU EQU [esp+20]
  132. EndOfChromaLine EQU [esp+24]
  133. AspectCount EQU [esp+28]
  134. FrameWidth EQU [esp+32]
  135. ChromaPitch EQU [esp+36]
  136. AspectAdjustmentCount EQU [esp+40]
  137. LumaPitch EQU [esp+44]
  138. CCOPitch EQU [esp+48]
  139. StashESP EQU [esp+52]
  140. ChromaContribution EQU [esp+64]
  141. push esi
  142. push edi
  143. push ebp
  144. push ebx
  145. mov edi,esp
  146. sub esp,LocalFrameSize
  147. and esp,0FFFFF800H
  148. mov eax,[edi+FrameWidth_arg]
  149. mov ebx,[edi+ChromaPitch_arg]
  150. mov ecx,[edi+AspectAdjustmentCount_arg]
  151. mov edx,[edi+YPitch_arg]
  152. mov esi,[edi+CCOPitch_arg]
  153. mov FrameWidth,eax
  154. mov ChromaPitch,ebx
  155. mov AspectAdjustmentCount,ecx
  156. mov AspectCount,ecx
  157. mov LumaPitch,edx
  158. mov CCOPitch,esi
  159. mov ebx,[edi+VPlane_arg]
  160. mov ecx,[edi+UPlane_arg]
  161. mov eax,[edi+YPlane_arg]
  162. sub ecx,ebx
  163. mov DistanceFromVToU,ecx
  164. mov VCursor,ebx
  165. mov YCursor,eax
  166. mov eax,[edi+ColorConvertedFrame]
  167. add eax,[edi+DCIOffset]
  168. add eax,[edi+CCOffsetToLine0]
  169. mov CCOCursor,eax
  170. mov StashESP,edi
  171. mov edx,[edi+FrameHeight]
  172. mov ecx,LumaPitch
  173. imul edx,ecx
  174. mov ebx,FrameWidth
  175. mov esi,YCursor ; Fetch cursor over luma plane.
  176. sar ebx,1
  177. add edx,esi
  178. mov YLimit,edx
  179. mov ChromaLineLen,ebx
  180. NextFourLines:
  181. ; Convert line of U and V pels to the corresponding UVDitherPattern Indices.
  182. ;
  183. ; Register Usage
  184. ;
  185. ; edi -- Cursor over V line
  186. ; esi -- Cursor over storage to hold preprocessed UV.
  187. ; ebp -- Distance from V line to U line.
  188. ; edx -- UVDitherPattern index: ((V:{0:8}*9) + U:{0:8}) * 2 + 1
  189. ; bl -- U pel value
  190. ; cl -- V pel value
  191. ; eax -- Scratch
  192. mov edi,VCursor ; Fetch address of pel 0 of next line of V.
  193. mov ebp,DistanceFromVToU ; Fetch span from V plane to U plane.
  194. lea esi,ChromaContribution
  195. mov eax,ChromaLineLen
  196. mov edx,ChromaPitch
  197. add eax,edi
  198. mov EndOfChromaLine,eax
  199. add edx,edi
  200. mov bl,[edi] ; Fetch first V pel.
  201. ;
  202. and ebx,0FCH ; Reduce to 6 bits.
  203. mov cl,[edi+ebp*1] ; Fetch first U pel.
  204. and ecx,0FCH ; Reduce to 6 bits.
  205. mov VCursor,edx ; Stash for next time around.
  206. @@:
  207. mov edx,PD UVDitherLine01[ebx] ; Fetch dither pattern for V point.
  208. mov bl,[edi+1] ; Fetch next V pel.
  209. mov eax,PD UVDitherLine23[ecx] ; Fetch dither pattern for U point.
  210. mov cl,[edi+ebp*1+1] ; Fetch next U pel.
  211. lea edx,[edx+edx*2+00A0A0A0AH] ; Weight V dither pattern.
  212. and bl,0FCH ; Reduce to 6 bits.
  213. add eax,edx ; Combine dither patterns for U and V.
  214. and cl,0FCH ; Reduce to 6 bits.
  215. mov edx,PD UVDitherLine01[ebx] ; Fetch dither pattern for V point.
  216. mov [esi],eax ; Stash UV corresponding to Y00,Y01,Y10,Y11.
  217. mov eax,PD UVDitherLine23[ecx] ; Fetch dither pattern for U point.
  218. mov bl,[edi+2] ; Fetch next V pel.
  219. lea edx,[edx+edx*2+00A0A0A0AH] ; Weight V dither pattern.
  220. mov cl,[edi+ebp*1+2] ; Fetch next U pel.
  221. add eax,edx ; Combine dither patterns for U and V.
  222. mov edx,EndOfChromaLine ; Fetch EOL address.
  223. mov [esi+4],eax ; Stash UV corresponding to Y02,Y03,Y12,Y13.
  224. add edi,2 ; Advance U plane cursor.
  225. and bl,0FCH ; Reduce to 6 bits.
  226. and cl,0FCH ; Reduce to 6 bits.
  227. add esi,8
  228. sub edx,edi
  229. jne @b
  230. ; Now color convert a line of luma.
  231. ;
  232. ; Register Usage
  233. ; edi -- Cursor over line of color converted output frame, minus esi.
  234. ; esi -- Cursor over Y line.
  235. ; ebp -- Not used.
  236. ; edx,eax -- Build output pels.
  237. ; ecx,ebx -- Y pels.
  238. mov [esi],edx ; Stash EOL indication.
  239. mov edx,AspectCount
  240. mov esi,YCursor ; Reload cursor over Y line.
  241. dec edx
  242. mov AspectCount,edx
  243. jne KeepLine0
  244. mov edx,AspectAdjustmentCount
  245. mov AspectCount,edx
  246. jmp SkipLine0
  247. KeepLine0:
  248. mov edi,CCOCursor ; Fetch output cursor.
  249. mov eax,CCOPitch ; Compute start of next line.
  250. add eax,edi
  251. mov edx,ChromaContribution+4 ; Fetch <UV03 UV02 xxxx xxxx>.
  252. mov CCOCursor,eax ; Stash start of next line.
  253. sub edi,esi ; Get span from Y cursor to CCO cursor.
  254. mov bl,[esi+3] ; Fetch Y03.
  255. and edx,0FFFF0000H ; <UV03 UV02 xxxx xxxx>.
  256. mov eax,ChromaContribution ; Fetch <xxxx xxxx UV01 UV00>.
  257. sub esp,1536-8
  258. and eax,00000FFFFH ; <xxxx xxxx UV01 UV00>.
  259. mov cl,[esi+2] ; Fetch Y02.
  260. Line0Loop:
  261. or eax,edx ; <UV03 UV02 UV01 UV00>.
  262. mov dh,PB YDither[ebx+4] ; <xxxx xxxx Y03 xxxx>.
  263. mov dl,PB YDither[ecx+2] ; <xxxx xxxx Y03 Y02>.
  264. mov bl,PB [esi+1] ; Fetch Y01.
  265. shl edx,16 ; < Y03 Y02 xxxx xxxx>.
  266. mov cl,PB [esi] ; Fetch Y00.
  267. mov dh,PB YDither[ebx+6] ; < Y03 Y02 Y01 xxxx>.
  268. mov bl,PB [esi+3+4] ; Fetch next Y03.
  269. mov dl,PB YDither[ecx+0] ; < Y03 Y02 Y01 Y00>.
  270. mov cl,PB [esi+2+4] ; Fetch next Y02.
  271. add eax,edx ; < P03 P02 P01 P00>.
  272. mov edx,ChromaContribution+1536+4 ; Fetch next <UV03 UV02 xxxx xxxx>.
  273. mov Ze [edi+esi],eax ; Store four pels to color conv output.
  274. mov eax,ChromaContribution+1536 ; Fetch next <xxxx xxxx UV01 UV00>.
  275. and edx,0FFFF0000H ; <UV03 UV02 xxxx xxxx>.
  276. add esi,4 ; Advance input cursor.
  277. add esp,8
  278. and eax,00000FFFFH ; <xxxx xxxx UV01 UV00>.
  279. jne Line0Loop
  280. and esp,0FFFFF800H
  281. add esp,0800H
  282. SkipLine0:
  283. ; Color convert another line of luma.
  284. ;
  285. ; Register Usage
  286. ; edi -- Cursor over line of color converted output frame, minus esi.
  287. ; esi -- Cursor over Y line.
  288. ; ebp -- Y Pitch.
  289. ; edx,eax -- Build output pels.
  290. ; ecx,ebx -- Y pels.
  291. mov esi,YCursor ; Reload cursor over Y line.
  292. mov ebp,LumaPitch
  293. mov edx,AspectCount
  294. mov edi,CCOCursor ; Fetch output cursor.
  295. lea eax,[esi+ebp*2] ; Compute address of next line of Y.
  296. dec edx
  297. mov YCursor,eax
  298. mov eax,CCOPitch ; Compute start of next line.
  299. mov AspectCount,edx
  300. jne KeepLine1
  301. mov edx,AspectAdjustmentCount
  302. mov AspectCount,edx
  303. jmp SkipLine1
  304. KeepLine1:
  305. add eax,edi
  306. mov edx,ChromaContribution+4 ; Fetch <xxxx xxxx UV13 UV12>.
  307. mov CCOCursor,eax ; Stash start of next line.
  308. sub edi,esi ; Get span from Y cursor to CCO cursor.
  309. mov bl,[esi+ebp*1+3] ; Fetch Y13.
  310. mov eax,ChromaContribution ; Fetch <UV11 UV10 xxxx xxxx>.
  311. shl edx,16 ; <UV13 UV12 xxxx xxxx>.
  312. sub esp,1536-8
  313. shr eax,16 ; <xxxx xxxx UV11 UV10>.
  314. mov cl,[esi+ebp*1+2] ; Fetch Y12.
  315. Line1Loop:
  316. or eax,edx ; <UV13 UV12 UV11 UV10>.
  317. mov dh,PB YDither[ebx+6] ; <xxxx xxxx Y13 xxxx>.
  318. mov dl,PB YDither[ecx+0] ; <xxxx xxxx Y13 Y12>.
  319. mov bl,PB [esi+ebp*1+1] ; Fetch Y11.
  320. shl edx,16 ; < Y13 Y12 xxxx xxxx>.
  321. mov cl,PB [esi+ebp*1] ; Fetch Y10.
  322. mov dh,PB YDither[ebx+4] ; < Y13 Y12 Y11 xxxx>.
  323. mov bl,PB [esi+ebp*1+3+4] ; Fetch next Y13.
  324. mov dl,PB YDither[ecx+2] ; < Y13 Y12 Y11 Y10>.
  325. mov cl,PB [esi+ebp*1+2+4] ; Fetch next Y12.
  326. add eax,edx ; < P13 P12 P11 P10>.
  327. mov edx,ChromaContribution+1536+4 ; Fetch next <xxxx xxxx UV13 UV12>.
  328. mov Ze [edi+esi],eax ; Store four pels to color conv output.
  329. mov eax,ChromaContribution+1536 ; Fetch next <UV11 UV10 xxxx xxxx>.
  330. shl edx,16 ; <UV13 UV12 xxxx xxxx>.
  331. add esi,4 ; Advance input cursor.
  332. shr eax,16 ; <xxxx xxxx UV11 UV10>.
  333. lea esp,[esp+8]
  334. jne Line1Loop
  335. and esp,0FFFFF800H
  336. add esp,0800H
  337. SkipLine1:
  338. mov edi,VCursor ; Fetch addr of pel 0 of next line of V.
  339. mov ebp,DistanceFromVToU ; Fetch span from V plane to U plane.
  340. lea esi,ChromaContribution
  341. mov eax,ChromaLineLen
  342. mov edx,ChromaPitch
  343. add eax,edi
  344. mov EndOfChromaLine,eax
  345. add edx,edi
  346. mov bl,[edi] ; Fetch first V pel.
  347. ;
  348. and ebx,0FCH ; Reduce to 6 bits.
  349. mov cl,[edi+ebp*1] ; Fetch first U pel.
  350. and ecx,0FCH ; Reduce to 6 bits.
  351. mov VCursor,edx ; Stash for next time around.
  352. @@:
  353. mov edx,PD UVDitherLine23[ebx]
  354. mov bl,[edi+1]
  355. mov eax,PD UVDitherLine01[ecx]
  356. mov cl,[edi+ebp*1+1]
  357. lea edx,[edx+edx*2+00A0A0A0AH]
  358. and bl,0FCH
  359. add eax,edx
  360. and cl,0FCH
  361. mov edx,PD UVDitherLine23[ebx]
  362. mov [esi],eax
  363. mov eax,PD UVDitherLine01[ecx]
  364. mov bl,[edi+2]
  365. lea edx,[edx+edx*2+00A0A0A0AH]
  366. mov cl,[edi+ebp*1+2]
  367. add eax,edx
  368. mov edx,EndOfChromaLine
  369. mov [esi+4],eax
  370. add edi,2
  371. and bl,0FCH
  372. and cl,0FCH
  373. add esi,8
  374. sub edx,edi
  375. jne @b
  376. mov [esi],edx
  377. mov edx,AspectCount
  378. mov esi,YCursor
  379. dec edx
  380. mov AspectCount,edx
  381. jne KeepLine2
  382. mov edx,AspectAdjustmentCount
  383. mov AspectCount,edx
  384. jmp SkipLine2
  385. KeepLine2:
  386. mov edi,CCOCursor
  387. mov eax,CCOPitch
  388. add eax,edi
  389. mov edx,ChromaContribution+4
  390. mov CCOCursor,eax
  391. sub edi,esi
  392. mov bl,[esi+3]
  393. and edx,0FFFF0000H
  394. mov eax,ChromaContribution
  395. sub esp,1536-8
  396. and eax,00000FFFFH
  397. mov cl,[esi+2]
  398. Line2Loop:
  399. or eax,edx
  400. mov dh,PB YDither[ebx+2]
  401. mov dl,PB YDither[ecx+4]
  402. mov bl,PB [esi+1]
  403. shl edx,16
  404. mov cl,PB [esi]
  405. mov dh,PB YDither[ebx+0]
  406. mov bl,PB [esi+3+4]
  407. mov dl,PB YDither[ecx+6]
  408. mov cl,PB [esi+2+4]
  409. add eax,edx
  410. mov edx,ChromaContribution+1536+4
  411. mov Ze [edi+esi],eax
  412. mov eax,ChromaContribution+1536
  413. and edx,0FFFF0000H
  414. add esi,4
  415. add esp,8
  416. and eax,00000FFFFH
  417. jne Line2Loop
  418. and esp,0FFFFF800H
  419. add esp,0800H
  420. SkipLine2:
  421. mov esi,YCursor
  422. mov ebp,LumaPitch
  423. mov edx,AspectCount
  424. mov edi,CCOCursor
  425. lea eax,[esi+ebp*2]
  426. dec edx
  427. mov YCursor,eax
  428. mov eax,CCOPitch
  429. mov AspectCount,edx
  430. jne KeepLine3
  431. mov edx,AspectAdjustmentCount
  432. mov AspectCount,edx
  433. jmp SkipLine3
  434. KeepLine3:
  435. add eax,edi
  436. mov edx,ChromaContribution+4
  437. mov CCOCursor,eax
  438. sub edi,esi
  439. mov bl,[esi+ebp*1+3]
  440. mov eax,ChromaContribution
  441. shl edx,16
  442. sub esp,1536-8
  443. shr eax,16
  444. mov cl,[esi+ebp*1+2]
  445. Line3Loop:
  446. or eax,edx
  447. mov dh,PB YDither[ebx+0]
  448. mov dl,PB YDither[ecx+6]
  449. mov bl,PB [esi+ebp*1+1]
  450. shl edx,16
  451. mov cl,PB [esi+ebp*1]
  452. mov dh,PB YDither[ebx+2]
  453. mov bl,PB [esi+ebp*1+3+4]
  454. mov dl,PB YDither[ecx+4]
  455. mov cl,PB [esi+ebp*1+2+4]
  456. add eax,edx
  457. mov edx,ChromaContribution+1536+4
  458. mov Ze [edi+esi],eax
  459. mov eax,ChromaContribution+1536
  460. shl edx,16
  461. add esi,4
  462. shr eax,16
  463. lea esp,[esp+8]
  464. jne Line3Loop
  465. and esp,0FFFFF800H
  466. add esp,0800H
  467. SkipLine3:
  468. mov esi,YCursor
  469. mov eax,YLimit
  470. cmp eax,esi
  471. jne NextFourLines
  472. mov esp,StashESP
  473. pop ebx
  474. pop ebp
  475. pop edi
  476. pop esi
  477. rturn
  478. YUV12ToCLUT8 endp
  479. END