Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

687 lines
24 KiB

  1. ;-------------------------------------------------------------------------
  2. ; INTEL Corporation Proprietary Information
  3. ;
  4. ; This listing is supplied under the terms of a license
  5. ; agreement with INTEL Corporation and may not be copied
  6. ; nor disclosed except in accordance with the terms of
  7. ; that agreement.
  8. ;
  9. ; Copyright (c) 1996 Intel Corporation.
  10. ; All Rights Reserved.
  11. ;
  12. ;-------------------------------------------------------------------------
  13. ;-------------------------------------------------------------------------
  14. ;//
  15. ;// $Header: S:\h26x\src\dec\cxm12162.asv
  16. ;//
  17. ;// $Log: S:\h26x\src\dec\cxm12162.asv $
  18. ;//
  19. ;// Rev 1.10 01 Apr 1997 12:51:50 BNICKERS
  20. ;// Fix bugs # 153 and 156 -- wrong color when U is small; right edge flickeri
  21. ;//
  22. ;// Rev 1.9 09 Dec 1996 15:20:40 BECHOLS
  23. ;// Brian fixed ARC bug #94.
  24. ;//
  25. ;// Rev 1.8 06 Sep 1996 16:07:58 BNICKERS
  26. ;// Re-written to filter new points.
  27. ;//
  28. ;-------------------------------------------------------------------------
  29. ;
  30. ; +---------- Color convertor.
  31. ; |+--------- For both H261 and H263.
  32. ; ||+-------- Version for Intel Microprocessors with MMX Technology
  33. ; |||++------ Convert from YUV12.
  34. ; |||||++---- Convert to RGB16.
  35. ; |||||||+--- Zoom by two.
  36. ; ||||||||
  37. ; cxm12162 -- This function performs zoom-by-2 YUV12-to-RGB16 color conversion
  38. ; for H26x. It is tuned for best performance on Intel
  39. ; Microprocessors with MMX Technology. It handles any format in
  40. ; which there are three fields, the low order field being B and
  41. ; starting in bit 0, the second field being G, and the high order
  42. ; field being R. Present support for 555, 565, 655, and 644
  43. ; formats only. This version adds new rows and columns by
  44. ; averaging them with the originals to either side.
  45. ;
  46. ; The YUV12 input is planar, 8 bits per pel. The Y plane may have
  47. ; a pitch of up to 768. It may have a width less than or equal
  48. ; to the pitch. It must be QWORD aligned. Pitch and Width must
  49. ; be a multiple of eight. Height may be any amount, but must be
  50. ; a multiple of two. The U and V planes may have a different
  51. ; pitch than the Y plane, subject to the same limitations.
  52. ;
  53. ; The color convertor is non-destructive; the input Y, U, and V
  54. ; planes will not be clobbered.
  55. OPTION PROLOGUE:None
  56. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  57. include ccinst.inc
  58. .xlist
  59. include iammx.inc
  60. include memmodel.inc
  61. .list
  62. MMXCCDATA SEGMENT PAGE
  63. ALIGN 16
  64. Luma0020004000200000 LABEL DWORD
  65. REPEAT 16
  66. DD 0, 0
  67. ENDM
  68. CNT = 0
  69. REPEAT 219
  70. DW 0
  71. DW (CNT*04A7FH)/00200H
  72. DW (CNT*04A7FH)/00100H
  73. DW (CNT*04A7FH)/00200H
  74. CNT = CNT + 1
  75. ENDM
  76. REPEAT 21
  77. DW 00000H
  78. DW 01FFFH
  79. DW 03FFFH
  80. DW 01FFFH
  81. ENDM
  82. UContribToBandG LABEL DWORD
  83. DW -(-128*0C83H)/00040H
  84. DW 08000H
  85. DW -(-127*0C83H)/00040H
  86. DW 08000H
  87. CNT = -126
  88. REPEAT 253
  89. DW -(CNT*00C83H)/00040H
  90. DW (CNT*0408BH)/00040H
  91. CNT = CNT + 1
  92. ENDM
  93. DW (127*0C83H)/00040H
  94. DW 07FFFH
  95. VContribToRandG LABEL DWORD
  96. CNT = -128
  97. REPEAT 256
  98. DW -(CNT*01A04H)/00040H
  99. DW (CNT*03312H)/00040H
  100. CNT = CNT + 1
  101. ENDM
  102. MMXCCDATA ENDS
  103. .CODE
  104. ASSUME ds : FLAT
  105. ASSUME es : FLAT
  106. ASSUME fs : FLAT
  107. ASSUME gs : FLAT
  108. ASSUME ss : FLAT
  109. ; void FAR ASM_CALLTYPE YUV12ToRGB16ZoomBy2 (U8 * YPlane,
  110. ; U8 * VPlane,
  111. ; U8 * UPlane,
  112. ; UN FrameWidth,
  113. ; UN FrameHeight,
  114. ; UN YPitch,
  115. ; UN VPitch,
  116. ; UN AspectAdjustmentCount,
  117. ; U8 * ColorConvertedFrame,
  118. ; U32 DCIOffset,
  119. ; U32 CCOffsetToLine0,
  120. ; IN CCOPitch,
  121. ; IN CCType)
  122. ;
  123. ; CCOffsetToLine0 is relative to ColorConvertedFrame.
  124. ;
  125. ; due to the need for the ebp reg, these parameter declarations aren't used,
  126. ; they are here so the assembler knows how many bytes to relieve from the stack
  127. PUBLIC MMX_YUV12ToRGB16ZoomBy2
  128. MMX_YUV12ToRGB16ZoomBy2 proc DIST LANG AYPlane: DWORD,
  129. AVPlane: DWORD,
  130. AUPlane: DWORD,
  131. AFrameWidth: DWORD,
  132. AFrameHeight: DWORD,
  133. AYPitch: DWORD,
  134. AVPitch: DWORD,
  135. AAspectAdjustmentCnt: DWORD,
  136. AColorConvertedFrame: DWORD,
  137. ADCIOffset: DWORD,
  138. ACCOffsetToLine0: DWORD,
  139. ACCOPitch: DWORD,
  140. ACCType: DWORD
  141. MAXWIDTH = 768
  142. LocalFrameSize = MAXWIDTH*20+128+64
  143. RegisterStorageSize = 16
  144. ; Arguments:
  145. YPlane_arg = RegisterStorageSize + 4
  146. VPlane_arg = RegisterStorageSize + 8
  147. UPlane_arg = RegisterStorageSize + 12
  148. FrameWidth_arg = RegisterStorageSize + 16
  149. FrameHeight = RegisterStorageSize + 20
  150. YPitch_arg = RegisterStorageSize + 24
  151. ChromaPitch_arg = RegisterStorageSize + 28
  152. AspectAdjustmentCount_arg = RegisterStorageSize + 32
  153. ColorConvertedFrame = RegisterStorageSize + 36
  154. DCIOffset = RegisterStorageSize + 40
  155. CCOffsetToLine0 = RegisterStorageSize + 44
  156. CCOPitch_arg = RegisterStorageSize + 48
  157. CCType = RegisterStorageSize + 52
  158. EndOfArgList = RegisterStorageSize + 56
  159. ; Locals (on local stack frame)
  160. DitherB EQU [esp+ 0]
  161. DitherG EQU [esp+ 8]
  162. DitherR EQU [esp+ 16]
  163. SelectBBits EQU [esp+ 24]
  164. SelectGBits EQU [esp+ 32]
  165. SelectRBits EQU [esp+ 40]
  166. ShiftCountForB EQU [esp+ 48]
  167. ShiftCountForG EQU [esp+ 52]
  168. ShiftCountForR EQU [esp+ 56]
  169. CCOCursor EQU [esp+ 60]
  170. CCOPitch EQU [esp+MAXWIDTH*20+128+ 0]
  171. YCursor EQU [esp+MAXWIDTH*20+128+ 4]
  172. YLimit EQU [esp+MAXWIDTH*20+128+ 8]
  173. YPitch EQU [esp+MAXWIDTH*20+128+12]
  174. UCursor EQU [esp+MAXWIDTH*20+128+16]
  175. DistanceFromUToV EQU [esp+MAXWIDTH*20+128+20]
  176. ChromaPitch EQU [esp+MAXWIDTH*20+128+24]
  177. AspectCount EQU [esp+MAXWIDTH*20+128+28]
  178. AspectAdjustmentCount EQU [esp+MAXWIDTH*20+128+32]
  179. StartIndexOfYLine EQU [esp+MAXWIDTH*20+128+36]
  180. StashESP EQU [esp+MAXWIDTH*20+128+40]
  181. FiltLine0 EQU [esp+ 64] ; Must be 32 byte aligned.
  182. FiltLine1 EQU [esp+ 72]
  183. FiltLine2 EQU [esp+ 80]
  184. FiltLine3 EQU [esp+ 88]
  185. HFiltLinePrev EQU [esp+ 96]
  186. push esi
  187. push edi
  188. push ebp
  189. push ebx
  190. mov edi,esp
  191. and esp,0FFFFF000H
  192. sub esp,4096
  193. mov eax,[esp]
  194. sub esp,4096
  195. mov eax,[esp]
  196. sub esp,4096
  197. mov eax,[esp]
  198. sub esp,LocalFrameSize-12288
  199. mov eax,[esp]
  200. mov eax,768
  201. sub eax,[edi+FrameWidth_arg]
  202. imul eax,20
  203. mov StartIndexOfYLine,eax
  204. mov eax,[edi+YPlane_arg]
  205. mov YCursor,eax
  206. mov ebx,[edi+YPitch_arg]
  207. mov YPitch,ebx
  208. mov ecx,[edi+FrameHeight]
  209. imul ebx,ecx
  210. add eax,ebx
  211. mov YLimit,eax
  212. mov eax,[edi+UPlane_arg]
  213. mov ebx,[edi+VPlane_arg]
  214. mov UCursor,eax
  215. sub ebx,eax
  216. mov DistanceFromUToV,ebx
  217. mov eax,[edi+ColorConvertedFrame]
  218. add eax,[edi+DCIOffset]
  219. add eax,[edi+CCOffsetToLine0]
  220. mov CCOCursor,eax
  221. mov eax,[edi+ChromaPitch_arg]
  222. mov ChromaPitch,eax
  223. mov eax,[edi+CCOPitch_arg]
  224. mov CCOPitch,eax
  225. mov eax,[edi+AspectAdjustmentCount_arg]
  226. mov AspectAdjustmentCount,eax
  227. mov AspectCount,eax
  228. mov StashESP,edi
  229. mov eax,[edi+CCType]
  230. cmp eax,CCTYPE_RGB16555ZoomBy2
  231. je CCTypeIs555
  232. cmp eax,CCTYPE_RGB16555ZoomBy2DCI
  233. je CCTypeIs555
  234. cmp eax,CCTYPE_RGB16565ZoomBy2
  235. je CCTypeIs565
  236. cmp eax,CCTYPE_RGB16565ZoomBy2DCI
  237. je CCTypeIs565
  238. cmp eax,CCTYPE_RGB16655ZoomBy2
  239. je CCTypeIs655
  240. cmp eax,CCTYPE_RGB16655ZoomBy2DCI
  241. je CCTypeIs655
  242. cmp eax,CCTYPE_RGB16664ZoomBy2DCI
  243. je CCTypeIs664
  244. cmp eax,CCTYPE_RGB16664ZoomBy2
  245. je CCTypeIs664
  246. mov eax,0DEADBEEFH
  247. mov YCursor,eax
  248. CCTypeIs555:
  249. mov eax,000000200H ; Dither pattern.
  250. mov ebx,002000000H
  251. mov DitherB,eax
  252. mov DitherB+4,eax
  253. mov DitherG,ebx
  254. mov DitherG+4,ebx
  255. mov DitherR,eax
  256. mov DitherR+4,eax
  257. mov eax,003E003E0H ; Bits to extract for fields
  258. mov ebx,07C007C00H
  259. mov SelectGBits,eax
  260. mov SelectGBits+4,eax
  261. mov SelectRBits,ebx
  262. mov SelectRBits+4,ebx
  263. mov eax,0001F001FH
  264. xor ecx,ecx ; Left shift count for R
  265. mov SelectBBits,eax
  266. mov SelectBBits+4,eax
  267. mov eax,10 ; Right shift count for B
  268. mov ebx,5 ; Right shift count for G
  269. mov ShiftCountForB,eax
  270. mov ShiftCountForG,ebx
  271. mov ShiftCountForR,ecx
  272. jmp CCTypeInitialized
  273. CCTypeIs565:
  274. mov eax,000000200H
  275. mov ebx,004000000H
  276. mov DitherB,eax
  277. mov DitherB+4,eax
  278. mov DitherG,ebx
  279. mov DitherG+4,ebx
  280. mov DitherR,eax
  281. mov DitherR+4,eax
  282. mov eax,007E007E0H
  283. mov ebx,0F800F800H
  284. mov SelectGBits,eax
  285. mov SelectGBits+4,eax
  286. mov SelectRBits,ebx
  287. mov SelectRBits+4,ebx
  288. mov eax,0001F001FH
  289. mov ecx,1
  290. mov SelectBBits,eax
  291. mov SelectBBits+4,eax
  292. mov eax,10
  293. mov ebx,4
  294. mov ShiftCountForB,eax
  295. mov ShiftCountForG,ebx
  296. mov ShiftCountForR,ecx
  297. jmp CCTypeInitialized
  298. CCTypeIs655:
  299. mov eax,000000200H ; Dither pattern.
  300. mov ebx,004000000H
  301. mov DitherB,eax
  302. mov DitherB+4,eax
  303. mov DitherG,eax
  304. mov DitherG+4,eax
  305. mov DitherR,ebx
  306. mov DitherR+4,ebx
  307. mov eax,003E003E0H ; Bits to extract for fields
  308. mov ebx,0FC00FC00H
  309. mov SelectGBits,eax
  310. mov SelectGBits+4,eax
  311. mov SelectRBits,ebx
  312. mov SelectRBits+4,ebx
  313. mov eax,0001F001FH
  314. mov ecx,1 ; Left shift count for R
  315. mov SelectBBits,eax
  316. mov SelectBBits+4,eax
  317. mov eax,10 ; Right shift count for B
  318. mov ebx,5 ; Right shift count for G
  319. mov ShiftCountForB,eax
  320. mov ShiftCountForG,ebx
  321. mov ShiftCountForR,ecx
  322. jmp CCTypeInitialized
  323. CCTypeIs664:
  324. mov eax,000000400H ; Dither pattern.
  325. mov ebx,001000000H
  326. mov DitherB,ebx
  327. mov DitherB+4,ebx
  328. mov DitherG,eax
  329. mov DitherG+4,eax
  330. mov DitherR,eax
  331. mov DitherR+4,eax
  332. mov eax,003F003F0H ; Bits to extract for fields
  333. mov ebx,0FC00FC00H
  334. mov SelectGBits,eax
  335. mov SelectGBits+4,eax
  336. mov SelectRBits,ebx
  337. mov SelectRBits+4,ebx
  338. mov eax,0000F000FH
  339. mov ecx,1 ; Left shift count for R
  340. mov SelectBBits,eax
  341. mov SelectBBits+4,eax
  342. mov eax,11 ; Right shift count for B
  343. mov ebx,5 ; Right shift count for G
  344. mov ShiftCountForB,eax
  345. mov ShiftCountForG,ebx
  346. mov ShiftCountForR,ecx
  347. CCTypeInitialized:
  348. mov esi,YCursor
  349. mov ebp,YPitch
  350. mov edi,StartIndexOfYLine
  351. xor eax,eax
  352. lea edx,[esi+ebp*2]
  353. xor ebx,ebx
  354. mov YCursor,edx
  355. mov bl,[esi+ebp*1] ; Get Y10 (a of line L3; for left edge).
  356. mov al,[esi] ; Get Y00 (A of line L2; for left edge).
  357. movq mm1,Luma0020004000200000[ebx*8] ; L1:< 32a 64a 32a 0 >
  358. mov bl,[esi+ebp*1+2] ; Get c.
  359. movq mm0,Luma0020004000200000[eax*8] ; L0:< 32A 64A 32A 0 >
  360. mov al,[esi+2] ; Get C.
  361. ; esi -- Cursor over input line of Y.
  362. ; edi -- Index to lines of filtered Y. Quit when MAXWIDTH*20.
  363. ; ebp -- Pitch from one line of Y to the next.
  364. ; al, bl -- Y pels
  365. ; mm0 -- For line 0, contribution of pel to left of two pels under cursor now.
  366. ; mm1 -- For line 1, contribution of pel to left of two pels under cursor now.
  367. ; mm2-mm6 -- Scratch.
  368. Next2PelsOfFirst2LumaLines:
  369. movq mm3,Luma0020004000200000[ebx*8] ; L1:< 32c 64c 32c 0 >
  370. psrlq mm1,32 ; L1:< 0 0 32a 64a >
  371. movq mm2,Luma0020004000200000[eax*8] ; L0:< 32C 64C 32C 0 >
  372. punpckldq mm1,mm3 ; L1:< 32c 0 32a 64a >
  373. xor ebx,ebx
  374. xor eax,eax
  375. mov bl,[esi+ebp*1+1] ; Get b.
  376. psrlq mm0,32 ; L0:< 0 0 32A 64A >
  377. mov al,[esi+1] ; Get B.
  378. add edi,40 ; Inc filtered luma temp stg idx.
  379. paddw mm1,Luma0020004000200000[ebx*8] ; L1:< 32b+32c 64b 32a+32b 64a >
  380. punpckldq mm0,mm2 ; L0:< 32C 0 32A 64A >
  381. paddw mm0,Luma0020004000200000[eax*8] ; L0:< 32B+32C 64B 32A+32B 64A >
  382. movq HFiltLinePrev[edi-40],mm1 ; Save L1 as next iters LPrev.
  383. paddw mm1,mm0 ; L0+L1
  384. paddw mm0,mm0 ; 2L0
  385. add esi,2 ; Increment input index.
  386. movq FiltLine3[edi-40],mm1 ; Save filtered line L0+L1.
  387. movq mm1,mm3 ; Next iters a.
  388. movq FiltLine2[edi-40],mm0 ; Save filtered line 2L0.
  389. movq mm0,mm2 ; Next iters A.
  390. mov bl,[esi+ebp*1+2] ; Get c.
  391. cmp edi,MAXWIDTH*20-40 ; Done yet.
  392. mov al,[esi+2] ; Get C.
  393. jl Next2PelsOfFirst2LumaLines
  394. xor ebx,ebx
  395. xor ecx,ecx
  396. mov bl,[esi+ebp*1+1] ; Get c.
  397. cmp edi,MAXWIDTH*20 ; Done yet.
  398. mov al,[esi+1] ; Get C.
  399. jl Next2PelsOfFirst2LumaLines
  400. mov ebp,DistanceFromUToV
  401. lea eax,FiltLine2
  402. mov esi,UCursor
  403. mov edx,StartIndexOfYLine
  404. jmp DoOutputLine
  405. Last2OutputLines:
  406. mov ebp,DistanceFromUToV
  407. lea esi,[edi+40]
  408. ja Done
  409. ; edi -- Index to lines of filtered Y. Quit when MAXWIDTH*20.
  410. ; mm0-mm6 -- Scratch.
  411. movq mm0,HFiltLinePrev[edi] ; Fetch horizontally filtered line LP.
  412. paddw mm0,mm0 ; 2LP
  413. Next2PelsOfLast2LumaLines:
  414. movq FiltLine3[edi],mm0 ; Save horz and vert filt line 2LP.
  415. movq FiltLine2[edi],mm0 ; Save horz and vert filt line 2LP.
  416. movq mm0,HFiltLinePrev[edi+40]; Fetch horizontally filtered line LP.
  417. add edi,40
  418. paddw mm0,mm0 ; 2LP
  419. cmp edi,MAXWIDTH*20 ; Done yet.
  420. jne Next2PelsOfLast2LumaLines
  421. lea eax,FiltLine2
  422. mov edx,StartIndexOfYLine
  423. mov esi,UCursor
  424. jmp DoOutputLine
  425. Next4OutputLines:
  426. mov esi,YCursor
  427. mov ebp,YPitch
  428. mov edi,StartIndexOfYLine
  429. mov ecx,YLimit
  430. lea edx,[esi+ebp*2]
  431. xor eax,eax
  432. mov YCursor,edx
  433. xor ebx,ebx
  434. mov al,[esi] ; Get Y00 (A of line L2; for left edge).
  435. cmp esi,ecx
  436. mov bl,[esi+ebp*1] ; Get Y10 (a of line L3; for left edge).
  437. jae Last2OutputLines
  438. movq mm1,Luma0020004000200000[ebx*8] ; L1:< 32a 64a 32a 0 >
  439. mov bl,[esi+ebp*1+2] ; Get c.
  440. movq mm0,Luma0020004000200000[eax*8] ; L0:< 32A 64A 32A 0 >
  441. mov al,[esi+2] ; Get C.
  442. ; esi -- Cursor over input line of Y.
  443. ; edi -- Index to lines of filtered Y. Quit when MAXWIDTH*20.
  444. ; ebp -- Pitch from one line of Y to the next.
  445. ; al, bl -- Y pels
  446. ; mm0 -- For line 0, contribution of pel to left of two pels under cursor now.
  447. ; mm1 -- For line 1, contribution of pel to left of two pels under cursor now.
  448. ; mm2-mm6 -- Scratch.
  449. Next2PelsOf2LumaLines:
  450. movq mm3,Luma0020004000200000[ebx*8] ; L1:< 32c 64c 32c 0 >
  451. psrlq mm1,32 ; L1:< 0 0 32a 64a >
  452. movq mm2,Luma0020004000200000[eax*8] ; L0:< 32C 64C 32C 0 >
  453. punpckldq mm1,mm3 ; L1:< 32c 0 32a 64a >
  454. movq mm4,HFiltLinePrev[edi] ; LP
  455. psrlq mm0,32 ; L0:< 0 0 32A 64A >
  456. xor ebx,ebx
  457. xor eax,eax
  458. mov bl,[esi+ebp*1+1] ; Get b.
  459. movq mm5,mm4 ; LP
  460. mov al,[esi+1] ; Get B.
  461. add esi,2 ; Increment input index.
  462. paddw mm1,Luma0020004000200000[ebx*8] ; L1:< 32b+32c 64b 32a+32b 64a >
  463. punpckldq mm0,mm2 ; L0:< 32C 0 32A 64A >
  464. paddw mm0,Luma0020004000200000[eax*8] ; L0:< 32B+32C 64B 32A+32B 64A >
  465. paddw mm5,mm5 ; 2LP
  466. movq HFiltLinePrev[edi],mm1 ; Save L1 as next iters LPrev.
  467. paddw mm4,mm0 ; LP+L0
  468. movq FiltLine0[edi],mm5 ; Save 2LP
  469. paddw mm1,mm0 ; L0+L1
  470. movq FiltLine1[edi],mm4 ; Save LP+L0
  471. paddw mm0,mm0 ; 2L0
  472. movq FiltLine3[edi],mm1 ; Save L0+L1
  473. movq mm1,mm3 ; Next iters a.
  474. movq FiltLine2[edi],mm0 ; Save 2L0
  475. movq mm0,mm2 ; Next iters A.
  476. add edi,40 ; Inc filtered luma temp stg idx.
  477. mov bl,[esi+ebp*1+2] ; Get c.
  478. cmp edi,MAXWIDTH*20-40 ; Done yet.
  479. mov al,[esi+2] ; Get C.
  480. jl Next2PelsOf2LumaLines
  481. xor ebx,ebx
  482. xor ecx,ecx
  483. mov bl,[esi+ebp*1+1] ; Get c.
  484. cmp edi,MAXWIDTH*20 ; Done yet.
  485. mov al,[esi+1] ; Get C.
  486. jl Next2PelsOf2LumaLines
  487. mov ebp,DistanceFromUToV
  488. mov esi,UCursor
  489. lea eax,FiltLine0
  490. mov edx,StartIndexOfYLine
  491. DoOutputLine:
  492. mov edi,CCOCursor
  493. mov ecx,AspectCount
  494. dec ecx ; If count is non-zero, we keep the line.
  495. mov ebx,CCOPitch
  496. mov AspectCount,ecx
  497. je SkipOutputLine
  498. add ebx,edi
  499. xor ecx,ecx
  500. mov cl,[esi]
  501. add eax,MAXWIDTH*20
  502. movdt mm3,ShiftCountForB
  503. pcmpeqw mm6,mm6
  504. movdt mm0,UContribToBandG[ecx*4] ; < 0 0 Bu Gu >
  505. mov cl,[esi+ebp*1]
  506. sub edx,MAXWIDTH*20
  507. movdt mm4,ShiftCountForG
  508. psllw mm6,15 ; Four words of -32768
  509. movdt mm5,ShiftCountForR
  510. punpcklwd mm0,mm0 ; < Bu Bu Gu Gu >
  511. movq mm7,SelectBBits
  512. mov CCOCursor,ebx
  513. jmp StartDoOutputLine
  514. ; ebp -- Distance from U to V
  515. ; esi -- Cursor over U
  516. ; edi -- Cursor over output
  517. ; edx -- Index over Y storage area
  518. ; eax -- Base address of Y line
  519. ; mm6 -- Four words of -32768, to clamp at floor.
  520. ; mm3, mm4, mm5 -- Shift counts to apply to R, G, and B.
  521. DoNext4OutputPels:
  522. movq [edi-8],mm2 ; Save 4 output pels.
  523. punpcklwd mm0,mm0 ; < Bu Bu Gu Gu >
  524. StartDoOutputLine:
  525. movdt mm2,VContribToRandG[ecx*4] ; < 0 0 Rv Gv >
  526. punpcklwd mm2,mm2 ; < Rv Rv Gv Gv >
  527. movq mm1,mm0 ; < junk junk Gu Gu >
  528. punpckhdq mm0,mm0 ; < Bu Bu Bu Bu >
  529. paddsw mm0,[eax+edx] ; < B B B B > with ceiling clamped.
  530. paddw mm1,mm2 ; < junk junk Guv Guv >
  531. paddsw mm0,DitherB ; B with dither added.
  532. punpckldq mm1,mm1 ; < Guv Guv Guv Guv >
  533. paddsw mm1,[eax+edx] ; < G G G G > with ceiling clamped.
  534. punpckhdq mm2,mm2 ; < Rv Rv Rv Rv >
  535. paddsw mm1,DitherG ; G with dither added.
  536. paddsw mm0,mm6 ; B with floor clamped.
  537. paddsw mm2,[eax+edx] ; < R R R R > with ceiling clamped.
  538. paddsw mm1,mm6 ; G with floor clamped.
  539. paddsw mm2,DitherR ; R with dither added.
  540. psrlw mm0,mm3 ; Position B bits.
  541. paddsw mm2,mm6 ; R with floor clamped.
  542. psrlw mm1,mm4 ; Position G bits.
  543. pand mm1,SelectGBits ; Eliminate fractional bits.
  544. psllw mm2,mm5 ; Position R bits.
  545. inc esi ; Advance input cursor
  546. xor ecx,ecx
  547. pand mm2,SelectRBits ; Eliminate fractional bits.
  548. pand mm0,mm7
  549. mov cl,[esi] ; Fetch next U.
  550. add edi,8 ; Advance output cursor.
  551. por mm2,mm0 ; R and B combined.
  552. add edx,40 ; Increment Y index.
  553. movdt mm0,UContribToBandG[ecx*4] ; < 0 0 Bu Gv > next iter.
  554. por mm2,mm1 ; Completed RGB16 for 4 output pels.
  555. mov cl,[esi+ebp*1] ; Fetch next V.
  556. jne DoNext4OutputPels
  557. movq [edi-8],mm2 ; Save 4 output pels.
  558. movq mm0,DitherB ; Reverse dither patterns.
  559. movq mm1,DitherG
  560. psrlq mm0,16
  561. movq mm2,DitherR
  562. psrlq mm1,16
  563. psrlq mm2,16
  564. punpckldq mm0,mm0
  565. punpckldq mm1,mm1
  566. movq DitherB,mm0
  567. punpckldq mm2,mm2
  568. movq DitherG,mm1
  569. movq DitherR,mm2
  570. PrepareForNextOutputLine:
  571. mov edx,StartIndexOfYLine
  572. add eax,8-MAXWIDTH*20 ; Advance to next filtered line of Y.
  573. mov esi,UCursor
  574. test al,8 ; Jump if just did line 0 or 2.
  575. mov ebx,ChromaPitch
  576. jne DoOutputLine
  577. add esi,ebx ; Advance to next chroma line.
  578. test al,16 ; Jump if about to do line 2.
  579. mov UCursor,esi
  580. jne DoOutputLine
  581. sub esi,ebx ; Done with 4 lines. Restore UCursor.
  582. mov UCursor,esi
  583. jmp Next4OutputLines
  584. SkipOutputLine:
  585. mov ecx,AspectAdjustmentCount
  586. add eax,MAXWIDTH*20
  587. mov AspectCount,ecx
  588. jmp PrepareForNextOutputLine
  589. Done:
  590. mov esp,StashESP
  591. pop ebx
  592. pop ebp
  593. pop edi
  594. pop esi
  595. rturn
  596. MMX_YUV12ToRGB16ZoomBy2 endp
  597. END