Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

495 lines
17 KiB

;-------------------------------------------------------------------------
; INTEL Corporation Proprietary Information
;
; This listing is supplied under the terms of a license
; agreement with INTEL Corporation and may not be copied
; nor disclosed except in accordance with the terms of
; that agreement.
;
; Copyright (c) 1996 Intel Corporation.
; All Rights Reserved.
;
;-------------------------------------------------------------------------
;-------------------------------------------------------------------------
;// $Header: S:\h26x\src\dec\cx51281.asv
;//
;// $Log: S:\h26x\src\dec\cxm1281.asv $
;//
;// Rev 1.7 25 Jul 1996 13:47:58 AGUPTA2
;// Fixed blockiness problem; dither matrices were not created properly.
;//
;// Rev 1.6 14 Jun 1996 16:28:24 AGUPTA2
;// Cosmetic changes to adhere to common coding convention.
;//
;// Rev 1.5 13 May 1996 11:01:34 AGUPTA2
;// Final drop from IDC.
;//
;// Rev 1.1 20 Mar 1996 11:19:24 RMCKENZX
;// March 96 version.
;//
;// Rev 1.2 01 Feb 1996 10:45:58 vladip
;// Reduced number of locals, DataSegment changed to PARA
;//
;// Rev 1.1 29 Jan 1996 18:53:40 vladip
;//
;// IFDEF TIMING is added
;//
;// Rev 1.0 29 Jan 1996 17:28:06 vladip
;// Initial mmx verision.
;//
;-------------------------------------------------------------------------
;
; +---------- Color convertor.
; |+--------- For both H261 and H263.
; ||+-------- MMx Version.
; |||++------ Convert from YUV12.
; |||||+----- Convert to CLUT8.
; ||||||+---- Zoom by one, i.e. non-zoom.
; |||||||
; cxm1281 -- This function performs YUV12 to CLUT8 color conversion for H26x.
; It dithers among 9 chroma points and 26 luma points, mapping the
; 8 bit luma pels into the 26 luma points by clamping the ends and
; stepping the luma by 8.
;
; Color convertor is not destructive.
; Requirement:
; U and V plane SHOULD be followed by 4 bytes (for read only)
; Y plane SHOULD be followed by 8 bytes (for read only)
OPTION CASEMAP:NONE
OPTION PROLOGUE:None
OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
.586
.xlist
include iammx.inc
include memmodel.inc
.list
MMXCODE1 SEGMENT PARA USE32 PUBLIC 'CODE'
MMXCODE1 ENDS
MMXDATA1 SEGMENT PARA USE32 PUBLIC 'DATA'
MMXDATA1 ENDS
MMXDATA1 SEGMENT
ALIGN 8
PUBLIC Y0_low
PUBLIC Y1_low
PUBLIC U_low_value
PUBLIC V_low_value
PUBLIC U2_V0high_bound
PUBLIC U2_V0low_bound
PUBLIC V2_U0high_bound
PUBLIC V2_U0low_bound
PUBLIC return_from_Y_high
PUBLIC saturate_to_Y_high
PUBLIC clean_MSB_mask
PUBLIC convert_to_sign
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; U,V,Y impacts are calculated as follows:
; 0 U < 64h
; U impact 1ah 64h <= U < 84h
; 24h U >= 84h
;
; 0 V < 64h
; V impact 4eh 64h <= V < 84h
; 9ch V >= 84h
;
; 0 Y < 1bh
; Y impact Y/8 1bh <= Y < ebh
; 19h Y >= ebh
; and the dither pattern is added to the input Y,U,V values and is a
; 4X4 matrix as defined below:
; U
; 10h 8 18h 0
; 18h 0 10h 8
; 8 10h 0 18h
; 0 18h 8 10h
; V
; 8 10h 0 18h
; 0 18h 8 10h
; 10h 8 18h 0
; 18h 0 10h 8
; Y
; 4 2 6 0
; 6 0 4 2
; 2 4 0 6
; 0 6 2 4
; Note the following equalities in dither matrices which will explain funny
; data declarations below:
; U0=V2
; U1=V3
; U2=V0
; U3=V1
; More gory details can be found in the color convertor document written
; by IDC.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
V2_U0low_bound DWORD 0f3ebfbe3h, 0f3ebfbe3h ; 746c7c64746c7c64 - 8080808080808080
U2_V0low_bound DWORD 0ebf3e3fbh, 0ebf3e3fbh, ; 6c74647c6c74647c - 8080808080808080
0f3ebfbe3h, 0f3ebfbe3h ; 746c7c64746c7c64 - 8080808080808080
U3_V1low_bound DWORD 0e3fbebf3h, 0e3fbebf3h ; 647c6c74647c6c74 - 8080808080808080
V3_U1low_bound DWORD 0fbe3f3ebh, 0fbe3f3ebh, ; 7c64746c7c64746c - 8080808080808080
0e3fbebf3h, 0e3fbebf3h ; 647c6c74647c6c74 - 8080808080808080
V2_U0high_bound DWORD 0130b1b03h, 0130b1b03h ; 948c9c84948c9c84 - 8080808080808080
U2_V0high_bound DWORD 00b13031bh, 00b13031bh, ; 8c94849c8c94849c - 8080808080808080
0130b1b03h, 0130b1b03h ; 948c9c84948c9c84 - 8080808080808080
U3_V1high_bound DWORD 0031b0b13h, 0031b0b13h ; 849c8c94849c8c94 - 8080808080808080
V3_U1high_bound DWORD 01b03130bh, 01b03130bh, ; 9c84948c9c84948c - 8080808080808080
0031b0b13h, 0031b0b13h ; 849c8c94849c8c94 - 8080808080808080
U_low_value DWORD 01a1a1a1ah, 01a1a1a1ah
V_low_value DWORD 04e4e4e4eh, 04e4e4e4eh
convert_to_sign DWORD 080808080h, 080808080h
; Y0_low,Y1_low are arrays
Y0_low DWORD 01719151bh, 01719151bh, ; 1b1b1b1b1b1b1b1b - 0402060004020600 ; for line%4=0
019171b15h, 019171b15h ; 1b1b1b1b1b1b1b1b - 0204000602040006 ; for line%4=2
Y1_low DWORD 0151b1719h, 0151b1719h, ; 1b1b1b1b1b1b1b1b - 0600040206000402 ; for line%4=1
01b151917h, 01b151917h ; 1b1b1b1b1b1b1b1b - 0006020400060204 ; for line%4=3
clean_MSB_mask DWORD 01f1f1f1fh, 01f1f1f1fh
saturate_to_Y_high DWORD 0e6e6e6e6h, 0e6e6e6e6h ; ffh-19h
return_from_Y_high DWORD 0dcdcdcdch, 0dcdcdcdch ; ffh-19h-ah (return back and ADD ah);
MMXDATA1 ENDS
MMXCODE1 SEGMENT
MMX_YUV12ToCLUT8 PROC DIST LANG PUBLIC,
AYPlane: DWORD,
AVPlane: DWORD,
AUPlane: DWORD,
AFrameWidth: DWORD,
AFrameHeight: DWORD,
AYPitch: DWORD,
AVPitch: DWORD,
AAspectAdjustmentCnt: DWORD,
AColorConvertedFrame: DWORD,
ADCIOffset: DWORD,
ACCOffsetToLine0: DWORD,
ACCOPitch: DWORD,
ACCType: DWORD
LocalFrameSize = 108
RegisterStorageSize = 16
argument_base EQU ebp + RegisterStorageSize
local_base EQU esp
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Arguments:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
YPlane EQU argument_base + 4
VPlane EQU argument_base + 8
UPlane EQU argument_base + 12
FrameWidth EQU argument_base + 16
FrameHeight EQU argument_base + 20
YPitch EQU argument_base + 24
ChromaPitch EQU argument_base + 28
AspectAdjustmentCount EQU argument_base + 32
ColorConvertedFrame EQU argument_base + 36
DCIOffset EQU argument_base + 40
CCOffsetToLine0 EQU argument_base + 44
CCOPitch EQU argument_base + 48
CCType EQU argument_base + 52
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Locals (on local stack frame)
; (local_base is aligned at cache-line boundary in the prologue)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
tmpV2_U0low_bound EQU local_base + 0 ; qword
tmpU2_V0low_bound EQU local_base + 8 ; qword
tmpU3_V1low_bound EQU local_base + 16 ; qword
tmpV3_U1low_bound EQU local_base + 24 ; qword
tmpV2_U0high_bound EQU local_base + 32 ; qword
tmpU2_V0high_bound EQU local_base + 40 ; qword
tmpU3_V1high_bound EQU local_base + 48 ; qword
tmpV3_U1high_bound EQU local_base + 56 ; qword
tmpY0_low EQU local_base + 64 ; qword
tmpY1_low EQU local_base + 72 ; qword
tmpBlockParity EQU local_base + 80
YLimit EQU local_base + 84
AspectCount EQU local_base + 88
tmpYCursorEven EQU local_base + 92
tmpYCursorOdd EQU local_base + 96
tmpCCOPitch EQU local_base + 100
StashESP EQU local_base + 104
U_low EQU mm6
V_low EQU mm7
U_high EQU U_low
V_high EQU V_low
push esi
push edi
push ebp
push ebx
mov ebp, esp
sub esp, LocalFrameSize
and esp, -32 ; align at cache line boundary
mov [StashESP], ebp
mov ecx, [YPitch]
mov edx, [FrameHeight]
mov ebx, [FrameWidth]
;
imul edx, ecx
;
mov eax, [YPlane]
add edx, eax ; edx is relative to YPlane
add eax, ebx ; Points to end of Y even line
;
mov [tmpYCursorEven], eax
add eax, ecx ; add YPitch
mov [tmpYCursorOdd], eax
lea edx, [edx+2*ebx] ; final value of Y-odd-pointer
mov [YLimit], edx
mov esi, [VPlane]
mov edx, [UPlane]
mov eax, [ColorConvertedFrame]
add eax, [DCIOffset]
;
add eax, [CCOffsetToLine0]
sar ebx, 1
add esi, ebx
add edx, ebx
lea edi, [eax+2*ebx] ; CCOCursor
mov ecx, [AspectAdjustmentCount]
mov [AspectCount], ecx
test ecx, ecx ; if AspectCount=0 we should not drop any lines
jnz non_zero_AspectCount
dec ecx
non_zero_AspectCount:
mov [AspectCount], ecx
cmp ecx, 1
jbe finish
;
neg ebx
;
mov [FrameWidth], ebx
;
movq mm6, U_low_value ; store some frequently used values in registers
;
movq mm7, V_low_value
xor eax, eax
mov [tmpBlockParity], eax
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Register Usage:
;
; esi -- points to the end of V Line
; edx -- points to the end of U Line.
; edi -- points to the end of even line of output.
; ebp -- points to the end of odd line of output.
;
; ecx -- points to the end of even/odd Y Line
; eax -- 8*(line&2) == 0, on line%4=0,1
; == 8, on line%4=2,3
; in the loop, eax points to the end of even Y line
; ebx -- Number of points, we havn't done yet. (multiplyed by -0.5)
;
;
; Noise matrix is of size 4x4 , so we have different noise values in even
; pair of lines, and in odd pair of lines. But in our loop we are doing 2
; lines. So here we are prepairing constants for next two lines. This code
; is done each time we are starting to convert next pair of lines.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PrepareNext2Lines:
mov eax, [tmpBlockParity]
;
;constants for odd line
movq mm0, V3_U1low_bound[eax]
;
movq mm1, V3_U1high_bound[eax]
;
movq mm2, U3_V1low_bound[eax]
;
movq mm3, U3_V1high_bound[eax]
;
movq [tmpV3_U1low_bound], mm0
;
movq [tmpV3_U1high_bound], mm1
;
movq [tmpU3_V1low_bound], mm2
;
movq [tmpU3_V1high_bound], mm3
;
;
;constants for even line
;
movq mm0, V2_U0low_bound[eax]
;
movq mm1, V2_U0high_bound[eax]
;
movq mm2, U2_V0low_bound[eax]
;
movq mm3, U2_V0high_bound[eax]
;
movq [tmpV2_U0low_bound], mm0
;
movq [tmpV2_U0high_bound], mm1
;
movq [tmpU2_V0low_bound], mm2
;
movq [tmpU2_V0high_bound], mm3
;
;
; Constants for Y values
;
movq mm4, Y0_low[eax]
;
movq mm5, Y1_low[eax]
;
xor eax, 8
mov [tmpBlockParity], eax
movq [tmpY0_low], mm4
;
movq [tmpY1_low], mm5
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; if AspectCount<2 we should skip a line. In this case we are still doing two
; lines, but output pointers are the same, so we just overwriting line
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
mov eax, [CCOPitch]
mov ebx, [AspectCount]
xor ecx, ecx
sub ebx, 2
mov [tmpCCOPitch], eax
ja continue
mov eax, [AspectAdjustmentCount]
mov [tmpCCOPitch], ecx ; 0
lea ebx, [ebx+eax] ; calculate new AspectCount
jnz continue ; skiping even line
;
;skip_odd_line
;
mov eax, [tmpYCursorEven]
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; set odd constants to be equal to even_constants
; Odd line will be performed as even
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
movq [tmpV3_U1low_bound], mm0
;
movq [tmpV3_U1high_bound], mm1
;
movq [tmpU3_V1low_bound], mm2
;
movq [tmpU3_V1high_bound], mm3
;
movq [tmpY1_low], mm4
;
mov [tmpYCursorOdd], eax
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; when we got here, we already did all preparations.
; we are entering a main loop which is starts at do_next_2x8_block label
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
continue:
mov [AspectCount], ebx
mov ebx, [FrameWidth]
mov ebp, edi
;
add ebp, [tmpCCOPitch] ; ebp points to the end of odd line
mov eax, [tmpYCursorEven]
mov ecx, [tmpYCursorOdd]
;
movdt mm0, [edx+ebx] ; 0:0:0:0|u3:u2:u1:u0 unsigned
;
movdt mm2, [esi+ebx] ; 0:0:0:0|v3:v2:v1:v0 unsigned
punpcklbw mm0, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0 unsigned
psubb mm0, convert_to_sign ; u3:u3:u2:u2|u1:u1:u0:u0 signed
punpcklbw mm2, mm2 ; v3:v3:v2:v2|v1:v1:v0:v0 unsigned
movq mm4, [eax+2*ebx] ; y7|..|y0
;
movq mm1, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0
;
do_next_2x8_block:
psubb mm2, convert_to_sign ; v3:v3:v2:v2|v1:v1:v0:v0 signed
movq mm5, mm1 ; u3:u3:u2:u2|u1:u1:u0:u0
pcmpgtb mm0, [tmpV2_U0low_bound]
movq mm3, mm2
pcmpgtb mm1, [tmpV2_U0high_bound]
pand mm0, U_low
psubusb mm4, [tmpY0_low]
pand mm1, U_high
pcmpgtb mm2, [tmpU2_V0low_bound]
psrlq mm4, 3
pand mm4, clean_MSB_mask
pand mm2, V_low
paddusb mm4, saturate_to_Y_high
paddb mm0, mm1 ; U03:U03:U02:U02|U01:U01:U00:U00
psubusb mm4, return_from_Y_high
movq mm1, mm5
pcmpgtb mm5, [tmpV3_U1low_bound]
paddd mm0, mm2
pcmpgtb mm1, [tmpV3_U1high_bound]
pand mm5, U_low
paddd mm0, mm4
movq mm2, mm3
pcmpgtb mm3, [tmpU2_V0high_bound]
pand mm1, U_high
movq mm4, [ecx+2*ebx] ; read next 8 Y points from odd line
paddb mm5, mm1 ; u impact on odd line
psubusb mm4, [tmpY1_low]
movq mm1, mm2
pcmpgtb mm2, [tmpU3_V1low_bound]
psrlq mm4, 3
pand mm4, clean_MSB_mask
pand mm2, V_low
paddusb mm4, saturate_to_Y_high
paddd mm5, mm2
psubusb mm4, return_from_Y_high
pand mm3, V_high
pcmpgtb mm1, [tmpU3_V1high_bound]
paddb mm3, mm0
movdt mm0, [edx+ebx+4] ; read next 4 U points
pand mm1, V_high
movdt mm2, [esi+ebx+4] ; read next 4 V points
paddd mm5, mm4
movq mm4, [eax+2*ebx+8] ; read next 8 Y points from even line
paddb mm5, mm1
psubb mm0, convert_to_sign
punpcklbw mm2, mm2 ; v3:v3:v2:v2|v1:v1:v0:v0
movq [edi+2*ebx], mm3 ; write even line
punpcklbw mm0, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0
movq [ebp+2*ebx], mm5 ; write odd line
movq mm1, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0
add ebx, 4
jl do_next_2x8_block
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; update pointes to input and output buffers, to point to the next lines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
mov ebp, [StashESP]
mov eax, [tmpYCursorEven]
mov ecx, [YPitch]
add edi, [CCOPitch] ; go to the end of next line
add edi, [tmpCCOPitch] ; skip odd line
lea eax, [eax+2*ecx]
mov [tmpYCursorEven], eax
add eax, [YPitch]
mov [tmpYCursorOdd], eax
add esi, [ChromaPitch]
mov ecx, [YLimit] ; Done with last line?
add edx, [ChromaPitch]
cmp eax, ecx
jb PrepareNext2Lines
finish:
mov esp, [StashESP]
;
pop ebx
pop ebp
pop edi
pop esi
ret
MMX_YUV12ToCLUT8 ENDP
MMXCODE1 ENDS
END