Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

544 lines
17 KiB

;*************************************************************************
;** INTEL Corporation Proprietary Information
;**
;** This listing is supplied under the terms of a license
;** agreement with INTEL Corporation and may not be copied
;** nor disclosed except in accordance with the terms of
;** that agreement.
;**
;** Copyright (c) 1995 Intel Corporation.
;** All Rights Reserved.
;**
;*************************************************************************
;//
;// $Header: S:\h26x\src\dec\cx51281.asv
;//
;// $Log: S:\h26x\src\dec\cx51281.asv $
;//
;// Rev 1.6 18 Mar 1996 09:58:40 bnickers
;// Make color convertors non-destructive.
;//
;// Rev 1.5 05 Feb 1996 13:35:38 BNICKERS
;// Fix RGB16 color flash problem, by allowing different RGB16 formats at oce.
;//
;// Rev 1.4 16 Jan 1996 11:23:06 BNICKERS
;// Fix starting point in output stream, so we don't start at line two and
;// write off the end of the output frame.
;//
;// Rev 1.3 22 Dec 1995 15:43:28 KMILLS
;//
;// added new copyright notice
;//
;// Rev 1.2 03 Nov 1995 11:49:40 BNICKERS
;// Support YUV12 to CLUT8 zoom and non-zoom color conversions.
;//
;// Rev 1.1 26 Oct 1995 09:46:08 BNICKERS
;// Reduce the number of blanks in the "proc" statement because the assembler
;// sometimes has problems with statements longer than 512 characters long.
;//
;// Rev 1.0 25 Oct 1995 17:59:20 BNICKERS
;// Initial revision.
;//
;////////////////////////////////////////////////////////////////////////////
;
; +---------- Color convertor.
; |+--------- For both H261 and H263.
; ||+-------- Version for the Pentium Microprocessor.
; |||++------ Convert from YUV12.
; |||||+----- Convert to CLUT8.
; ||||||+---- Zoom by one, i.e. non-zoom.
; |||||||
; cx51281 -- This function performs YUV12 to CLUT8 color conversion for H26x.
; It is tuned for best performance on the Pentium(r) Microprocessor.
; It dithers among 9 chroma points and 26 luma points, mapping the
; 8 bit luma pels into the 26 luma points by clamping the ends and
; stepping the luma by 8.
;
; The color convertor is non-destructive; the input Y, U, and V
; planes will not be clobbered.
OPTION PROLOGUE:None
OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
include locals.inc
include ccinst.inc
include decconst.inc
.xlist
include memmodel.inc
.list
.DATA
; any data would go here
.CODE
ASSUME cs : FLAT
ASSUME ds : FLAT
ASSUME es : FLAT
ASSUME fs : FLAT
ASSUME gs : FLAT
ASSUME ss : FLAT
; void FAR ASM_CALLTYPE YUV12ToCLUT8 (U8 * YPlane,
; U8 * VPlane,
; U8 * UPlane,
; UN FrameWidth,
; UN FrameHeight,
; UN YPitch,
; UN VPitch,
; UN AspectAdjustmentCount,
; U8 * ColorConvertedFrame,
; U32 DCIOffset,
; U32 CCOffsetToLine0,
; IN CCOPitch,
; IN CCType)
;
; CCOffsetToLine0 is relative to ColorConvertedFrame.
;
PUBLIC YUV12ToCLUT8
; due to the need for the ebp reg, these parameter declarations aren't used,
; they are here so the assembler knows how many bytes to relieve from the stack
YUV12ToCLUT8 proc DIST LANG AYPlane: DWORD,
AVPlane: DWORD,
AUPlane: DWORD,
AFrameWidth: DWORD,
AFrameHeight: DWORD,
AYPitch: DWORD,
AVPitch: DWORD,
AAspectAdjustmentCnt: DWORD,
AColorConvertedFrame: DWORD,
ADCIOffset: DWORD,
ACCOffsetToLine0: DWORD,
ACCOPitch: DWORD,
ACCType: DWORD
LocalFrameSize = 64+768*2+4
RegisterStorageSize = 16
; Arguments:
YPlane_arg = RegisterStorageSize + 4
VPlane_arg = RegisterStorageSize + 8
UPlane_arg = RegisterStorageSize + 12
FrameWidth_arg = RegisterStorageSize + 16
FrameHeight = RegisterStorageSize + 20
YPitch_arg = RegisterStorageSize + 24
ChromaPitch_arg = RegisterStorageSize + 28
AspectAdjustmentCount_arg = RegisterStorageSize + 32
ColorConvertedFrame = RegisterStorageSize + 36
DCIOffset = RegisterStorageSize + 40
CCOffsetToLine0 = RegisterStorageSize + 44
CCOPitch_arg = RegisterStorageSize + 48
CCType_arg = RegisterStorageSize + 52
EndOfArgList = RegisterStorageSize + 56
; Locals (on local stack frame)
CCOCursor EQU [esp+ 0]
ChromaLineLen EQU [esp+ 4]
YLimit EQU [esp+ 8]
YCursor EQU [esp+12]
VCursor EQU [esp+16]
DistanceFromVToU EQU [esp+20]
EndOfChromaLine EQU [esp+24]
AspectCount EQU [esp+28]
FrameWidth EQU [esp+32]
ChromaPitch EQU [esp+36]
AspectAdjustmentCount EQU [esp+40]
LumaPitch EQU [esp+44]
CCOPitch EQU [esp+48]
StashESP EQU [esp+52]
ChromaContribution EQU [esp+64]
push esi
push edi
push ebp
push ebx
mov edi,esp
sub esp,LocalFrameSize
and esp,0FFFFF800H
mov eax,[edi+FrameWidth_arg]
mov ebx,[edi+ChromaPitch_arg]
mov ecx,[edi+AspectAdjustmentCount_arg]
mov edx,[edi+YPitch_arg]
mov esi,[edi+CCOPitch_arg]
mov FrameWidth,eax
mov ChromaPitch,ebx
mov AspectAdjustmentCount,ecx
mov AspectCount,ecx
mov LumaPitch,edx
mov CCOPitch,esi
mov ebx,[edi+VPlane_arg]
mov ecx,[edi+UPlane_arg]
mov eax,[edi+YPlane_arg]
sub ecx,ebx
mov DistanceFromVToU,ecx
mov VCursor,ebx
mov YCursor,eax
mov eax,[edi+ColorConvertedFrame]
add eax,[edi+DCIOffset]
add eax,[edi+CCOffsetToLine0]
mov CCOCursor,eax
mov StashESP,edi
mov edx,[edi+FrameHeight]
mov ecx,LumaPitch
imul edx,ecx
mov ebx,FrameWidth
mov esi,YCursor ; Fetch cursor over luma plane.
sar ebx,1
add edx,esi
mov YLimit,edx
mov ChromaLineLen,ebx
NextFourLines:
; Convert line of U and V pels to the corresponding UVDitherPattern Indices.
;
; Register Usage
;
; edi -- Cursor over V line
; esi -- Cursor over storage to hold preprocessed UV.
; ebp -- Distance from V line to U line.
; edx -- UVDitherPattern index: ((V:{0:8}*9) + U:{0:8}) * 2 + 1
; bl -- U pel value
; cl -- V pel value
; eax -- Scratch
mov edi,VCursor ; Fetch address of pel 0 of next line of V.
mov ebp,DistanceFromVToU ; Fetch span from V plane to U plane.
lea esi,ChromaContribution
mov eax,ChromaLineLen
mov edx,ChromaPitch
add eax,edi
mov EndOfChromaLine,eax
add edx,edi
mov bl,[edi] ; Fetch first V pel.
;
and ebx,0FCH ; Reduce to 6 bits.
mov cl,[edi+ebp*1] ; Fetch first U pel.
and ecx,0FCH ; Reduce to 6 bits.
mov VCursor,edx ; Stash for next time around.
@@:
mov edx,PD UVDitherLine01[ebx] ; Fetch dither pattern for V point.
mov bl,[edi+1] ; Fetch next V pel.
mov eax,PD UVDitherLine23[ecx] ; Fetch dither pattern for U point.
mov cl,[edi+ebp*1+1] ; Fetch next U pel.
lea edx,[edx+edx*2+00A0A0A0AH] ; Weight V dither pattern.
and bl,0FCH ; Reduce to 6 bits.
add eax,edx ; Combine dither patterns for U and V.
and cl,0FCH ; Reduce to 6 bits.
mov edx,PD UVDitherLine01[ebx] ; Fetch dither pattern for V point.
mov [esi],eax ; Stash UV corresponding to Y00,Y01,Y10,Y11.
mov eax,PD UVDitherLine23[ecx] ; Fetch dither pattern for U point.
mov bl,[edi+2] ; Fetch next V pel.
lea edx,[edx+edx*2+00A0A0A0AH] ; Weight V dither pattern.
mov cl,[edi+ebp*1+2] ; Fetch next U pel.
add eax,edx ; Combine dither patterns for U and V.
mov edx,EndOfChromaLine ; Fetch EOL address.
mov [esi+4],eax ; Stash UV corresponding to Y02,Y03,Y12,Y13.
add edi,2 ; Advance U plane cursor.
and bl,0FCH ; Reduce to 6 bits.
and cl,0FCH ; Reduce to 6 bits.
add esi,8
sub edx,edi
jne @b
; Now color convert a line of luma.
;
; Register Usage
; edi -- Cursor over line of color converted output frame, minus esi.
; esi -- Cursor over Y line.
; ebp -- Not used.
; edx,eax -- Build output pels.
; ecx,ebx -- Y pels.
mov [esi],edx ; Stash EOL indication.
mov edx,AspectCount
mov esi,YCursor ; Reload cursor over Y line.
dec edx
mov AspectCount,edx
jne KeepLine0
mov edx,AspectAdjustmentCount
mov AspectCount,edx
jmp SkipLine0
KeepLine0:
mov edi,CCOCursor ; Fetch output cursor.
mov eax,CCOPitch ; Compute start of next line.
add eax,edi
mov edx,ChromaContribution+4 ; Fetch <UV03 UV02 xxxx xxxx>.
mov CCOCursor,eax ; Stash start of next line.
sub edi,esi ; Get span from Y cursor to CCO cursor.
mov bl,[esi+3] ; Fetch Y03.
and edx,0FFFF0000H ; <UV03 UV02 xxxx xxxx>.
mov eax,ChromaContribution ; Fetch <xxxx xxxx UV01 UV00>.
sub esp,1536-8
and eax,00000FFFFH ; <xxxx xxxx UV01 UV00>.
mov cl,[esi+2] ; Fetch Y02.
Line0Loop:
or eax,edx ; <UV03 UV02 UV01 UV00>.
mov dh,PB YDither[ebx+4] ; <xxxx xxxx Y03 xxxx>.
mov dl,PB YDither[ecx+2] ; <xxxx xxxx Y03 Y02>.
mov bl,PB [esi+1] ; Fetch Y01.
shl edx,16 ; < Y03 Y02 xxxx xxxx>.
mov cl,PB [esi] ; Fetch Y00.
mov dh,PB YDither[ebx+6] ; < Y03 Y02 Y01 xxxx>.
mov bl,PB [esi+3+4] ; Fetch next Y03.
mov dl,PB YDither[ecx+0] ; < Y03 Y02 Y01 Y00>.
mov cl,PB [esi+2+4] ; Fetch next Y02.
add eax,edx ; < P03 P02 P01 P00>.
mov edx,ChromaContribution+1536+4 ; Fetch next <UV03 UV02 xxxx xxxx>.
mov Ze [edi+esi],eax ; Store four pels to color conv output.
mov eax,ChromaContribution+1536 ; Fetch next <xxxx xxxx UV01 UV00>.
and edx,0FFFF0000H ; <UV03 UV02 xxxx xxxx>.
add esi,4 ; Advance input cursor.
add esp,8
and eax,00000FFFFH ; <xxxx xxxx UV01 UV00>.
jne Line0Loop
and esp,0FFFFF800H
add esp,0800H
SkipLine0:
; Color convert another line of luma.
;
; Register Usage
; edi -- Cursor over line of color converted output frame, minus esi.
; esi -- Cursor over Y line.
; ebp -- Y Pitch.
; edx,eax -- Build output pels.
; ecx,ebx -- Y pels.
mov esi,YCursor ; Reload cursor over Y line.
mov ebp,LumaPitch
mov edx,AspectCount
mov edi,CCOCursor ; Fetch output cursor.
lea eax,[esi+ebp*2] ; Compute address of next line of Y.
dec edx
mov YCursor,eax
mov eax,CCOPitch ; Compute start of next line.
mov AspectCount,edx
jne KeepLine1
mov edx,AspectAdjustmentCount
mov AspectCount,edx
jmp SkipLine1
KeepLine1:
add eax,edi
mov edx,ChromaContribution+4 ; Fetch <xxxx xxxx UV13 UV12>.
mov CCOCursor,eax ; Stash start of next line.
sub edi,esi ; Get span from Y cursor to CCO cursor.
mov bl,[esi+ebp*1+3] ; Fetch Y13.
mov eax,ChromaContribution ; Fetch <UV11 UV10 xxxx xxxx>.
shl edx,16 ; <UV13 UV12 xxxx xxxx>.
sub esp,1536-8
shr eax,16 ; <xxxx xxxx UV11 UV10>.
mov cl,[esi+ebp*1+2] ; Fetch Y12.
Line1Loop:
or eax,edx ; <UV13 UV12 UV11 UV10>.
mov dh,PB YDither[ebx+6] ; <xxxx xxxx Y13 xxxx>.
mov dl,PB YDither[ecx+0] ; <xxxx xxxx Y13 Y12>.
mov bl,PB [esi+ebp*1+1] ; Fetch Y11.
shl edx,16 ; < Y13 Y12 xxxx xxxx>.
mov cl,PB [esi+ebp*1] ; Fetch Y10.
mov dh,PB YDither[ebx+4] ; < Y13 Y12 Y11 xxxx>.
mov bl,PB [esi+ebp*1+3+4] ; Fetch next Y13.
mov dl,PB YDither[ecx+2] ; < Y13 Y12 Y11 Y10>.
mov cl,PB [esi+ebp*1+2+4] ; Fetch next Y12.
add eax,edx ; < P13 P12 P11 P10>.
mov edx,ChromaContribution+1536+4 ; Fetch next <xxxx xxxx UV13 UV12>.
mov Ze [edi+esi],eax ; Store four pels to color conv output.
mov eax,ChromaContribution+1536 ; Fetch next <UV11 UV10 xxxx xxxx>.
shl edx,16 ; <UV13 UV12 xxxx xxxx>.
add esi,4 ; Advance input cursor.
shr eax,16 ; <xxxx xxxx UV11 UV10>.
lea esp,[esp+8]
jne Line1Loop
and esp,0FFFFF800H
add esp,0800H
SkipLine1:
mov edi,VCursor ; Fetch addr of pel 0 of next line of V.
mov ebp,DistanceFromVToU ; Fetch span from V plane to U plane.
lea esi,ChromaContribution
mov eax,ChromaLineLen
mov edx,ChromaPitch
add eax,edi
mov EndOfChromaLine,eax
add edx,edi
mov bl,[edi] ; Fetch first V pel.
;
and ebx,0FCH ; Reduce to 6 bits.
mov cl,[edi+ebp*1] ; Fetch first U pel.
and ecx,0FCH ; Reduce to 6 bits.
mov VCursor,edx ; Stash for next time around.
@@:
mov edx,PD UVDitherLine23[ebx]
mov bl,[edi+1]
mov eax,PD UVDitherLine01[ecx]
mov cl,[edi+ebp*1+1]
lea edx,[edx+edx*2+00A0A0A0AH]
and bl,0FCH
add eax,edx
and cl,0FCH
mov edx,PD UVDitherLine23[ebx]
mov [esi],eax
mov eax,PD UVDitherLine01[ecx]
mov bl,[edi+2]
lea edx,[edx+edx*2+00A0A0A0AH]
mov cl,[edi+ebp*1+2]
add eax,edx
mov edx,EndOfChromaLine
mov [esi+4],eax
add edi,2
and bl,0FCH
and cl,0FCH
add esi,8
sub edx,edi
jne @b
mov [esi],edx
mov edx,AspectCount
mov esi,YCursor
dec edx
mov AspectCount,edx
jne KeepLine2
mov edx,AspectAdjustmentCount
mov AspectCount,edx
jmp SkipLine2
KeepLine2:
mov edi,CCOCursor
mov eax,CCOPitch
add eax,edi
mov edx,ChromaContribution+4
mov CCOCursor,eax
sub edi,esi
mov bl,[esi+3]
and edx,0FFFF0000H
mov eax,ChromaContribution
sub esp,1536-8
and eax,00000FFFFH
mov cl,[esi+2]
Line2Loop:
or eax,edx
mov dh,PB YDither[ebx+2]
mov dl,PB YDither[ecx+4]
mov bl,PB [esi+1]
shl edx,16
mov cl,PB [esi]
mov dh,PB YDither[ebx+0]
mov bl,PB [esi+3+4]
mov dl,PB YDither[ecx+6]
mov cl,PB [esi+2+4]
add eax,edx
mov edx,ChromaContribution+1536+4
mov Ze [edi+esi],eax
mov eax,ChromaContribution+1536
and edx,0FFFF0000H
add esi,4
add esp,8
and eax,00000FFFFH
jne Line2Loop
and esp,0FFFFF800H
add esp,0800H
SkipLine2:
mov esi,YCursor
mov ebp,LumaPitch
mov edx,AspectCount
mov edi,CCOCursor
lea eax,[esi+ebp*2]
dec edx
mov YCursor,eax
mov eax,CCOPitch
mov AspectCount,edx
jne KeepLine3
mov edx,AspectAdjustmentCount
mov AspectCount,edx
jmp SkipLine3
KeepLine3:
add eax,edi
mov edx,ChromaContribution+4
mov CCOCursor,eax
sub edi,esi
mov bl,[esi+ebp*1+3]
mov eax,ChromaContribution
shl edx,16
sub esp,1536-8
shr eax,16
mov cl,[esi+ebp*1+2]
Line3Loop:
or eax,edx
mov dh,PB YDither[ebx+0]
mov dl,PB YDither[ecx+6]
mov bl,PB [esi+ebp*1+1]
shl edx,16
mov cl,PB [esi+ebp*1]
mov dh,PB YDither[ebx+2]
mov bl,PB [esi+ebp*1+3+4]
mov dl,PB YDither[ecx+4]
mov cl,PB [esi+ebp*1+2+4]
add eax,edx
mov edx,ChromaContribution+1536+4
mov Ze [edi+esi],eax
mov eax,ChromaContribution+1536
shl edx,16
add esi,4
shr eax,16
lea esp,[esp+8]
jne Line3Loop
and esp,0FFFFF800H
add esp,0800H
SkipLine3:
mov esi,YCursor
mov eax,YLimit
cmp eax,esi
jne NextFourLines
mov esp,StashESP
pop ebx
pop ebp
pop edi
pop esi
rturn
YUV12ToCLUT8 endp
END