You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
570 lines
20 KiB
570 lines
20 KiB
;-------------------------------------------------------------------------
|
|
; INTEL Corporation Proprietary Information
|
|
;
|
|
; This listing is supplied under the terms of a license
|
|
; agreement with INTEL Corporation and may not be copied
|
|
; nor disclosed except in accordance with the terms of
|
|
; that agreement.
|
|
;
|
|
; Copyright (c) 1996 Intel Corporation.
|
|
; All Rights Reserved.
|
|
;
|
|
;-------------------------------------------------------------------------
|
|
|
|
;-------------------------------------------------------------------------
|
|
;//
|
|
;// $Header: S:\h26x\src\dec\cxm12242.asv
|
|
;//
|
|
;// $Log: S:\h26x\src\dec\cxm12242.asv $
|
|
;//
|
|
;// Rev 1.4 01 Apr 1997 12:53:18 BNICKERS
|
|
;// Fix bugs # 153 and 156 -- wrong color when U is small; right edge flickeri
|
|
;//
|
|
;// Rev 1.3 11 Mar 1997 13:49:36 JMCVEIGH
|
|
;// Same ARC bug fix (#94) as was done in cxm12162.asm. Without
|
|
;// this, zoom by 2 and ARC causes black lines in output (every 12th).
|
|
;//
|
|
;// Rev 1.2 06 Sep 1996 16:08:14 BNICKERS
|
|
;// Re-written to filter new points.
|
|
;//
|
|
;-------------------------------------------------------------------------
|
|
;
|
|
; +---------- Color convertor.
|
|
; |+--------- For both H261 and H263.
|
|
; ||+-------- Version for Intel Microprocessors with MMX Technology
|
|
; |||++------ Convert from YUV12.
|
|
; |||||++---- Convert to RGB24.
|
|
; |||||||+--- Zoom by two.
|
|
; ||||||||
|
|
; cxm12242 -- This function performs zoom-by-2 YUV12-to-RGB24 color conversion
|
|
; for H26x. It is tuned for best performance on Intel
|
|
; Microprocessors with MMX Technology. It handles the format in
|
|
; which B is the low order field, then G, then R. This version
|
|
; adds new rows and columns by averaging them with the originals
|
|
; to either side.
|
|
;
|
|
; The YUV12 input is planar, 8 bits per pel. The Y plane may have
|
|
; a pitch of up to 768. It may have a width less than or equal
|
|
; to the pitch. It must be QWORD aligned. Pitch and Width must
|
|
; be a multiple of eight. Height may be any amount, but must be
|
|
; a multiple of two. The U and V planes may have a different
|
|
; pitch than the Y plane, subject to the same limitations.
|
|
;
|
|
; The color convertor is non-destructive; the input Y, U, and V
|
|
; planes will not be clobbered.
|
|
|
|
OPTION PROLOGUE:None
|
|
OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
|
|
|
|
.xlist
|
|
include iammx.inc
|
|
include memmodel.inc
|
|
.list
|
|
|
|
MMXCCDATA SEGMENT PAGE
|
|
ALIGN 16
|
|
|
|
Luma0040002000200000 LABEL DWORD
|
|
REPEAT 16
|
|
DD 0, 0
|
|
ENDM
|
|
CNT = 0
|
|
REPEAT 219
|
|
DW 0
|
|
DW (CNT*04A7FH)/00200H
|
|
DW (CNT*04A7FH)/00200H
|
|
DW (CNT*04A7FH)/00100H
|
|
CNT = CNT + 1
|
|
ENDM
|
|
REPEAT 21
|
|
DW 00000H
|
|
DW 01FFFH
|
|
DW 01FFFH
|
|
DW 03FFFH
|
|
ENDM
|
|
|
|
Luma0020004000000020 LABEL DWORD
|
|
REPEAT 16
|
|
DD 0, 0
|
|
ENDM
|
|
CNT = 0
|
|
REPEAT 219
|
|
DW (CNT*04A7FH)/00200H
|
|
DW 0
|
|
DW (CNT*04A7FH)/00100H
|
|
DW (CNT*04A7FH)/00200H
|
|
CNT = CNT + 1
|
|
ENDM
|
|
REPEAT 21
|
|
DW 01FFFH
|
|
DW 00000H
|
|
DW 03FFFH
|
|
DW 01FFFH
|
|
ENDM
|
|
|
|
UContribToBandG LABEL DWORD
|
|
DW -(-128*0C83H)/00040H
|
|
DW 08000H
|
|
DW -(-127*0C83H)/00040H
|
|
DW 08000H
|
|
CNT = -126
|
|
REPEAT 253
|
|
DW -(CNT*00C83H)/00040H
|
|
DW (CNT*0408BH)/00040H
|
|
CNT = CNT + 1
|
|
ENDM
|
|
DW (127*0C83H)/00040H
|
|
DW 07FFFH
|
|
|
|
VContribToRandG LABEL DWORD
|
|
CNT = -128
|
|
REPEAT 256
|
|
DW -(CNT*01A04H)/00040H
|
|
DW (CNT*03312H)/00040H
|
|
CNT = CNT + 1
|
|
ENDM
|
|
|
|
|
|
C0001000001000001 DD 001000001H, 000010000H
|
|
C0200020002000200 DD 002000200H, 002000200H
|
|
C0000000001000000 DD 001000000H, 000000000H
|
|
C000000FF00000000 DD 000000000H, 0000000FFH
|
|
C0000010000010000 DD 000010000H, 000000100H
|
|
|
|
MMXCCDATA ENDS
|
|
|
|
.CODE
|
|
|
|
ASSUME ds : FLAT
|
|
ASSUME es : FLAT
|
|
ASSUME fs : FLAT
|
|
ASSUME gs : FLAT
|
|
ASSUME ss : FLAT
|
|
|
|
; void FAR ASM_CALLTYPE YUV12ToRGB24ZoomBy2 (U8 * YPlane,
|
|
; U8 * VPlane,
|
|
; U8 * UPlane,
|
|
; UN FrameWidth,
|
|
; UN FrameHeight,
|
|
; UN YPitch,
|
|
; UN VPitch,
|
|
; UN AspectAdjustmentCount,
|
|
; U8 * ColorConvertedFrame,
|
|
; U32 DCIOffset,
|
|
; U32 CCOffsetToLine0,
|
|
; IN CCOPitch,
|
|
; IN CCType)
|
|
;
|
|
; CCOffsetToLine0 is relative to ColorConvertedFrame.
|
|
;
|
|
|
|
; due to the need for the ebp reg, these parameter declarations aren't used,
|
|
; they are here so the assembler knows how many bytes to relieve from the stack
|
|
|
|
PUBLIC MMX_YUV12ToRGB24ZoomBy2
|
|
|
|
MMX_YUV12ToRGB24ZoomBy2 proc DIST LANG AYPlane: DWORD,
|
|
AVPlane: DWORD,
|
|
AUPlane: DWORD,
|
|
AFrameWidth: DWORD,
|
|
AFrameHeight: DWORD,
|
|
AYPitch: DWORD,
|
|
AVPitch: DWORD,
|
|
AAspectAdjustmentCnt: DWORD,
|
|
AColorConvertedFrame: DWORD,
|
|
ADCIOffset: DWORD,
|
|
ACCOffsetToLine0: DWORD,
|
|
ACCOPitch: DWORD,
|
|
ACCType: DWORD
|
|
|
|
MAXWIDTH = 768
|
|
LocalFrameSize = MAXWIDTH*20+64
|
|
RegisterStorageSize = 16
|
|
|
|
; Arguments:
|
|
|
|
YPlane_arg = RegisterStorageSize + 4
|
|
VPlane_arg = RegisterStorageSize + 8
|
|
UPlane_arg = RegisterStorageSize + 12
|
|
FrameWidth_arg = RegisterStorageSize + 16
|
|
FrameHeight = RegisterStorageSize + 20
|
|
YPitch_arg = RegisterStorageSize + 24
|
|
ChromaPitch_arg = RegisterStorageSize + 28
|
|
AspectAdjustmentCount_arg = RegisterStorageSize + 32
|
|
ColorConvertedFrame = RegisterStorageSize + 36
|
|
DCIOffset = RegisterStorageSize + 40
|
|
CCOffsetToLine0 = RegisterStorageSize + 44
|
|
CCOPitch_arg = RegisterStorageSize + 48
|
|
CCType = RegisterStorageSize + 52
|
|
EndOfArgList = RegisterStorageSize + 56
|
|
|
|
; Locals (on local stack frame)
|
|
|
|
CCOCursor EQU [esp+ 0]
|
|
CCOPitch EQU [esp+ 4]
|
|
YCursor EQU [esp+ 8]
|
|
YLimit EQU [esp+ 12]
|
|
YPitch EQU [esp+ 16]
|
|
UCursor EQU [esp+ 20]
|
|
DistanceFromUToV EQU [esp+ 24]
|
|
ChromaPitch EQU [esp+ 28]
|
|
AspectCount EQU [esp+ 32]
|
|
AspectAdjustmentCount EQU [esp+ 36]
|
|
StartIndexOfYLine EQU [esp+ 40]
|
|
StashESP EQU [esp+ 44]
|
|
|
|
FiltLine0 EQU [esp+ 64] ; Must be 32 byte aligned.
|
|
FiltLine1 EQU [esp+ 72]
|
|
FiltLine2 EQU [esp+ 80]
|
|
FiltLine3 EQU [esp+ 88]
|
|
HFiltLinePrev EQU [esp+ 96]
|
|
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
push ebx
|
|
|
|
mov edi,esp
|
|
and esp,0FFFFF000H
|
|
sub esp,4096
|
|
mov eax,[esp]
|
|
sub esp,4096
|
|
mov eax,[esp]
|
|
sub esp,4096
|
|
mov eax,[esp]
|
|
sub esp,LocalFrameSize-12288
|
|
mov eax,[esp]
|
|
|
|
mov eax,768
|
|
sub eax,[edi+FrameWidth_arg]
|
|
imul eax,20
|
|
mov StartIndexOfYLine,eax
|
|
|
|
mov eax,[edi+YPlane_arg]
|
|
mov YCursor,eax
|
|
|
|
mov ebx,[edi+YPitch_arg]
|
|
mov YPitch,ebx
|
|
mov ecx,[edi+FrameHeight]
|
|
imul ebx,ecx
|
|
add eax,ebx
|
|
mov YLimit,eax
|
|
|
|
mov eax,[edi+UPlane_arg]
|
|
mov ebx,[edi+VPlane_arg]
|
|
mov UCursor,eax
|
|
sub ebx,eax
|
|
mov DistanceFromUToV,ebx
|
|
|
|
mov eax,[edi+ColorConvertedFrame]
|
|
add eax,[edi+DCIOffset]
|
|
add eax,[edi+CCOffsetToLine0]
|
|
mov CCOCursor,eax
|
|
|
|
mov eax,[edi+ChromaPitch_arg]
|
|
mov ChromaPitch,eax
|
|
|
|
mov eax,[edi+CCOPitch_arg]
|
|
mov CCOPitch,eax
|
|
|
|
mov eax,[edi+AspectAdjustmentCount_arg]
|
|
mov AspectAdjustmentCount,eax
|
|
mov AspectCount,eax
|
|
|
|
mov StashESP,edi
|
|
|
|
mov esi,YCursor
|
|
mov ebp,YPitch
|
|
mov edi,StartIndexOfYLine
|
|
xor eax,eax
|
|
lea edx,[esi+ebp*2]
|
|
xor ebx,ebx
|
|
mov YCursor,edx
|
|
mov bl,[esi+ebp*1] ; Get Y10 (a of line L3; for left edge).
|
|
mov al,[esi] ; Get Y00 (A of line L2; for left edge).
|
|
|
|
movq mm1,Luma0020004000000020[ebx*8] ; L1:< 32a 64a 0 32a >
|
|
mov bl,[esi+ebp*1+2] ; Get c.
|
|
movq mm0,Luma0020004000000020[eax*8] ; L0:< 32A 64A 0 32A >
|
|
mov al,[esi+2] ; Get C.
|
|
|
|
; esi -- Cursor over input line of Y.
|
|
; edi -- Index to lines of filtered Y. Quit when MAXWIDTH*20.
|
|
; ebp -- Pitch from one line of Y to the next.
|
|
; al, bl -- Y pels
|
|
; mm0 -- For line 0, contribution of pel to left of two pels under cursor now.
|
|
; mm1 -- For line 1, contribution of pel to left of two pels under cursor now.
|
|
; mm2-mm6 -- Scratch.
|
|
|
|
Next2PelsOfFirst2LumaLines:
|
|
|
|
movq mm3,Luma0020004000000020[ebx*8] ; L1:< 32c 64c 0 32c >
|
|
psrlq mm1,32 ; L1:< 0 0 32a 64a >
|
|
movq mm2,Luma0020004000000020[eax*8] ; L0:< 32C 64C 0 32C >
|
|
punpckldq mm1,mm3 ; L1:< 0 32c 32a 64a >
|
|
xor ebx,ebx
|
|
xor eax,eax
|
|
mov bl,[esi+ebp*1+1] ; Get b.
|
|
psrlq mm0,32 ; L0:< 0 0 32A 64A >
|
|
mov al,[esi+1] ; Get B.
|
|
add edi,40 ; Inc filtered luma temp stg idx.
|
|
paddw mm1,Luma0040002000200000[ebx*8] ; L1:< 64b 32b+32c 32a+32b 64a >
|
|
punpckldq mm0,mm2 ; L0:< 0 32C 32A 64A >
|
|
paddw mm0,Luma0040002000200000[eax*8] ; L0:< 64B 32B+32C 32A+32B 64A >
|
|
|
|
movq HFiltLinePrev[edi-40],mm1 ; Save L1 as next iters LPrev.
|
|
paddw mm1,mm0 ; L0+L1
|
|
paddw mm0,mm0 ; 2L0
|
|
add esi,2 ; Increment input index.
|
|
movq FiltLine3[edi-40],mm1 ; Save filtered line L0+L1.
|
|
movq mm1,mm3 ; Next iters a.
|
|
movq FiltLine2[edi-40],mm0 ; Save filtered line 2L0.
|
|
movq mm0,mm2 ; Next iters A.
|
|
mov bl,[esi+ebp*1+2] ; Get c.
|
|
cmp edi,MAXWIDTH*20-40 ; Done yet.
|
|
mov al,[esi+2] ; Get C.
|
|
jl Next2PelsOfFirst2LumaLines
|
|
|
|
xor ebx,ebx
|
|
xor ecx,ecx
|
|
mov bl,[esi+ebp*1+1] ; Get c.
|
|
cmp edi,MAXWIDTH*20 ; Done yet.
|
|
mov al,[esi+1] ; Get C.
|
|
jl Next2PelsOfFirst2LumaLines
|
|
|
|
mov ebp,DistanceFromUToV
|
|
lea eax,FiltLine2
|
|
mov esi,UCursor
|
|
mov edx,StartIndexOfYLine
|
|
jmp DoOutputLine
|
|
|
|
|
|
Last2OutputLines:
|
|
|
|
mov ebp,DistanceFromUToV
|
|
lea esi,[edi+40]
|
|
ja Done
|
|
|
|
; edi -- Index to lines of filtered Y. Quit when MAXWIDTH*20.
|
|
; mm0-mm6 -- Scratch.
|
|
|
|
|
|
movq mm0,HFiltLinePrev[edi] ; Fetch horizontally filtered line LP.
|
|
paddw mm0,mm0 ; 2LP
|
|
|
|
Next2PelsOfLast2LumaLines:
|
|
|
|
movq FiltLine3[edi],mm0 ; Save horz and vert filt line 2LP.
|
|
movq FiltLine2[edi],mm0 ; Save horz and vert filt line 2LP.
|
|
movq mm0,HFiltLinePrev[edi+40]; Fetch horizontally filtered line LP.
|
|
add edi,40
|
|
paddw mm0,mm0 ; 2LP
|
|
cmp edi,MAXWIDTH*20 ; Done yet.
|
|
jne Next2PelsOfLast2LumaLines
|
|
|
|
lea eax,FiltLine2
|
|
mov edx,StartIndexOfYLine
|
|
mov esi,UCursor
|
|
jmp DoOutputLine
|
|
|
|
|
|
Next4OutputLines:
|
|
|
|
mov esi,YCursor
|
|
mov ebp,YPitch
|
|
mov edi,StartIndexOfYLine
|
|
mov ecx,YLimit
|
|
lea edx,[esi+ebp*2]
|
|
xor eax,eax
|
|
mov YCursor,edx
|
|
xor ebx,ebx
|
|
mov al,[esi] ; Get Y00 (A of line L2; for left edge).
|
|
cmp esi,ecx
|
|
mov bl,[esi+ebp*1] ; Get Y10 (a of line L3; for left edge).
|
|
jae Last2OutputLines
|
|
|
|
movq mm1,Luma0020004000000020[ebx*8] ; L1:< 32a 64a 0 32a >
|
|
mov bl,[esi+ebp*1+2] ; Get c.
|
|
movq mm0,Luma0020004000000020[eax*8] ; L0:< 32A 64A 0 32A >
|
|
mov al,[esi+2] ; Get C.
|
|
|
|
; esi -- Cursor over input line of Y.
|
|
; edi -- Index to lines of filtered Y. Quit when MAXWIDTH*20.
|
|
; ebp -- Pitch from one line of Y to the next.
|
|
; al, bl -- Y pels
|
|
; mm0 -- For line 0, contribution of pel to left of two pels under cursor now.
|
|
; mm1 -- For line 1, contribution of pel to left of two pels under cursor now.
|
|
; mm2-mm6 -- Scratch.
|
|
|
|
Next2PelsOf2LumaLines:
|
|
|
|
movq mm3,Luma0020004000000020[ebx*8] ; L1:< 32c 64c 0 32c >
|
|
psrlq mm1,32 ; L1:< 0 0 32a 64a >
|
|
movq mm2,Luma0020004000000020[eax*8] ; L0:< 32C 64C 0 32C >
|
|
punpckldq mm1,mm3 ; L1:< 0 32c 32a 64a >
|
|
movq mm4,HFiltLinePrev[edi] ; LP
|
|
psrlq mm0,32 ; L0:< 0 0 32A 64A >
|
|
xor ebx,ebx
|
|
xor eax,eax
|
|
mov bl,[esi+ebp*1+1] ; Get b.
|
|
movq mm5,mm4 ; LP
|
|
mov al,[esi+1] ; Get B.
|
|
add esi,2 ; Increment input index.
|
|
paddw mm1,Luma0040002000200000[ebx*8] ; L1:< 64b 32b+32c 32a+32b 64a >
|
|
punpckldq mm0,mm2 ; L0:< 0 32C 32A 64A >
|
|
paddw mm0,Luma0040002000200000[eax*8] ; L0:< 64B 32B+32C 32A+32B 64A >
|
|
paddw mm5,mm5 ; 2LP
|
|
movq HFiltLinePrev[edi],mm1 ; Save L1 as next iters LPrev.
|
|
paddw mm4,mm0 ; LP+L0
|
|
movq FiltLine0[edi],mm5 ; Save 2LP
|
|
paddw mm1,mm0 ; L0+L1
|
|
movq FiltLine1[edi],mm4 ; Save LP+L0
|
|
paddw mm0,mm0 ; 2L0
|
|
movq FiltLine3[edi],mm1 ; Save L0+L1
|
|
movq mm1,mm3 ; Next iters a.
|
|
movq FiltLine2[edi],mm0 ; Save 2L0
|
|
movq mm0,mm2 ; Next iters A.
|
|
add edi,40 ; Inc filtered luma temp stg idx.
|
|
mov bl,[esi+ebp*1+2] ; Get c.
|
|
cmp edi,MAXWIDTH*20-40 ; Done yet.
|
|
mov al,[esi+2] ; Get C.
|
|
jl Next2PelsOf2LumaLines
|
|
|
|
xor ebx,ebx
|
|
xor ecx,ecx
|
|
mov bl,[esi+ebp*1+1] ; Get c.
|
|
cmp edi,MAXWIDTH*20 ; Done yet.
|
|
mov al,[esi+1] ; Get C.
|
|
jl Next2PelsOf2LumaLines
|
|
|
|
mov ebp,DistanceFromUToV
|
|
mov esi,UCursor
|
|
lea eax,FiltLine0
|
|
mov edx,StartIndexOfYLine
|
|
|
|
DoOutputLine:
|
|
|
|
mov edi,CCOCursor
|
|
mov ecx,AspectCount
|
|
dec ecx ; If count is non-zero, we keep the line.
|
|
mov ebx,CCOPitch
|
|
mov AspectCount,ecx
|
|
je SkipOutputLine
|
|
|
|
add ebx,edi
|
|
xor ecx,ecx
|
|
mov cl,[esi]
|
|
add eax,MAXWIDTH*20
|
|
movq mm7,C0001000001000001
|
|
pcmpeqw mm6,mm6
|
|
movdt mm0,UContribToBandG[ecx*4] ; < 0 0 Bu Gu >
|
|
psllw mm6,15 ; Four words of -32768
|
|
mov cl,[esi+ebp*1]
|
|
sub edx,MAXWIDTH*20
|
|
pxor mm3,mm3
|
|
movq mm5,mm7
|
|
mov CCOCursor,ebx
|
|
jmp StartDoOutputLine
|
|
|
|
; ebp -- Distance from U to V
|
|
; esi -- Cursor over U
|
|
; edi -- Cursor over output
|
|
; edx -- Index over Y storage area
|
|
; eax -- Base address of Y line
|
|
; mm6 -- Four words of -32768, to clamp at floor.
|
|
; mm7 -- <0x0100 0x0000 0x0100 0x0001>
|
|
|
|
DoNext4OutputPels:
|
|
|
|
movdf [edi-4],mm4 ; Store <R3 G3 B3 R2>
|
|
movq mm5,mm7 ; < 0100 0000 0100 0001 >
|
|
|
|
StartDoOutputLine:
|
|
|
|
movdt mm2,VContribToRandG[ecx*4] ; < 0 0 Rv Gv >
|
|
punpcklwd mm0,mm0 ; < Bu Bu Gu Gu >
|
|
movq mm1,mm0 ; < junk junk Gu Gu >
|
|
punpcklwd mm2,mm2 ; < Rv Rv Gv Gv >
|
|
paddw mm1,mm2 ; < junk junk Guv Guv >
|
|
punpckhdq mm0,mm0 ; < Bu Bu Bu Bu >
|
|
paddsw mm0,[eax+edx] ; < B2 B3 B1 B0 > w/ ceiling clamped.
|
|
punpckldq mm1,mm1 ; < Guv Guv Guv Guv >
|
|
paddsw mm1,[eax+edx] ; < G2 G3 G1 G0 > w/ ceiling clamped.
|
|
punpckhdq mm2,mm2 ; < Rv Rv Rv Rv >
|
|
paddsw mm2,[eax+edx] ; < R2 R3 R1 R0 > w/ ceiling clamped.
|
|
paddsw mm0,mm6 ; B with floor clamped.
|
|
psubsw mm0,mm6 ; B back in range.
|
|
paddsw mm1,mm6 ; G with floor clamped.
|
|
psubsw mm1,mm6 ; G back in range.
|
|
paddsw mm2,mm6 ; R with floor clamped.
|
|
psubsw mm2,mm6 ; R back in range.
|
|
psrlw mm0,7 ; < 0 B2 0 B3 0 B1 0 B0 >
|
|
pmulhw mm1,C0200020002000200 ; < 0 G2 0 G3 0 G1 0 G0 >
|
|
punpckhwd mm3,mm0 ; < -- -- -- -- 0 B3 -- -- >
|
|
pmaddwd mm3,C0000000001000000 ; < -- -- -- -- 0 0 B3 0 >
|
|
psrlw mm2,7 ; < 0 R2 0 R3 0 R1 0 R0 >
|
|
pmullw mm5,mm2 ; < -- -- 0 0 R1 0 0 R0 >
|
|
punpckhdq mm2,mm2 ; < -- -- 0 R3 0 R2 -- -- >
|
|
pmullw mm0,mm7 ; < 0 B2 0 0 B1 0 0 B0 >
|
|
movq mm4,mm1 ; < -- -- -- G3 -- -- -- -- >
|
|
pand mm4,C000000FF00000000 ; < -- -- 0 G3 0 0 -- -- >
|
|
pmullw mm1,mm7 ; < 0 G2 0 0 G1 0 0 G0 >
|
|
pmullw mm2,C0000010000010000 ; < -- -- R3 0 0 R2 -- -- >
|
|
psllq mm5,16 ; < 0 0 R1 0 0 R0 0 0 >
|
|
xor ecx,ecx
|
|
por mm5,mm0 ; < 0 B2 R1 0 B1 R0 0 B0 >
|
|
mov cl,[esi+1] ; Fetch next U.
|
|
psllq mm1,8 ; < G2 0 0 G1 0 0 G0 0 >
|
|
por mm4,mm2 ; < -- -- R3 G3 0 R2 -- -- >
|
|
por mm5,mm1 ; < G2 B2 R1 G1 B1 R0 G0 B0 >
|
|
inc esi ; Advance input cursor
|
|
psrlq mm4,16 ; < -- -- -- -- R3 G3 0 R2 >
|
|
movdf [edi],mm5 ; Store < B1 R0 G0 B0 >
|
|
psrlq mm5,32 ; < -- -- -- -- G2 B2 R1 G1 >
|
|
movdt mm0,UContribToBandG[ecx*4] ; < 0 0 Bu Gv > next iter.
|
|
por mm4,mm3 ; < -- -- -- -- R3 G3 B3 R2 >
|
|
movdf [edi+4],mm5 ; Store < G2 B2 R1 G1 >
|
|
;
|
|
add edi,12 ; Advance output cursor.
|
|
add edx,40 ; Increment Y index.
|
|
mov cl,[esi+ebp*1] ; Fetch next V.
|
|
jne DoNext4OutputPels
|
|
|
|
movdf [edi-4],mm4 ; Store <R3 G3 B3 R2>
|
|
|
|
PrepareForNextOutputLine:
|
|
|
|
mov edx,StartIndexOfYLine
|
|
add eax,8-MAXWIDTH*20 ; Advance to next filtered line of Y.
|
|
mov esi,UCursor
|
|
test al,8 ; Jump if just did line 0 or 2.
|
|
mov ebx,ChromaPitch
|
|
jne DoOutputLine
|
|
|
|
add esi,ebx ; Advance to next chroma line.
|
|
test al,16 ; Jump if about to do line 2.
|
|
mov UCursor,esi
|
|
jne DoOutputLine
|
|
|
|
sub esi,ebx ; Done with 4 lines. Restore UCursor.
|
|
mov UCursor,esi
|
|
jmp Next4OutputLines
|
|
|
|
SkipOutputLine:
|
|
mov ecx,AspectAdjustmentCount
|
|
add eax,MAXWIDTH*20
|
|
mov AspectCount,ecx
|
|
jmp PrepareForNextOutputLine
|
|
|
|
Done:
|
|
|
|
mov esp,StashESP
|
|
pop ebx
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
rturn
|
|
|
|
MMX_YUV12ToRGB24ZoomBy2 endp
|
|
|
|
END
|