Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

445 lines
14 KiB

dnl----------------------------------------------------------------------------
dnl
dnl x86 assembly code generating macros for attribute handlers.
dnl
dnl Copyright (C) Microsoft Corporation, 1997.
dnl
dnl----------------------------------------------------------------------------
dnl
dnl d_AddAttrsCode
dnl
dnl Macro to generate fld/fadd/fstp for each argument with
dnl pipelining across all arguments.
dnl Achieves complete pipelining with four arguments, so
dnl care should be taken to batch up at least four things.
dnl A max of seven things should be added to avoid FP stack overflow.
dnl
define(`d_AddAttrsCodeLoop',
` fld DWORD PTR [ecx+d_Nth1($1, d_shift(d_shift($@)))]
fadd DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))]
ifelse(eval($2 > 1), `1',
`d_AddAttrsCodeLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
ifelse(eval($1 > $2), `1',
` fxch st(eval($1 - $2))
')dnl
fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
')dnl
define(`d_AddAttrsCode', `d_AddAttrsCodeLoop(`1', $#, $@)')dnl
define(`d_AddUVowCode',
` ; Just in case ebx is used
push ebx
mov ebx, pStpCtx
mov ebx, [ebx + SCTX_pCtx]
xor eax, eax
Loop$1$2:
cmp eax, DWORD PTR[ebx + RCTX_cActTex]
je Done$1$2
fld DWORD PTR [ecx + ATTRSET_fUoW + eax * 4]
fadd DWORD PTR [edx+ATTRSET_fUoW + eax * 4]
fld DWORD PTR [ecx + ATTRSET_fVoW + eax * 4]
fadd DWORD PTR [edx+ATTRSET_fVoW + eax * 4]
fstp DWORD PTR [ecx+ATTRSET_fVoW + eax * 4]
fstp DWORD PTR [ecx+ATTRSET_fUoW + eax * 4]
inc eax
jmp Loop$1$2
Done$1$2:
pop ebx
')dnl
dnl
dnl d_AddScaledAttrsCode
dnl
dnl Macro to generate fld/fmul/fadd/fstp for each argument with
dnl pipelining across all arguments.
dnl Achieves complete pipelining with four arguments, so
dnl care should be taken to batch up at least four things.
dnl A max of seven things should be added to avoid FP stack overflow.
dnl
define(`d_AddScaledAttrsLoadLoop',
` fld DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))]
fmul fScaleVal
ifelse(eval($2 > 1), `1',
`d_AddScaledAttrsLoadLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
ifelse(eval($1 > 1), `1',
` fxch st(decr($1))
',
` fxch st(decr($2))
')dnl
fld DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
faddp st(1), st(0)
')dnl
define(`d_AddScaledAttrsStoreLoop',
`ifelse(eval($2 > 1), `1',
`d_AddScaledAttrsStoreLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
ifelse(eval($1 > $2), `1',
` fxch st(eval($1 - $2))
')dnl
ifelse($1, `1',
` fxch st(1)
')dnl
fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
')dnl
define(`d_AddScaledAttrsCode',
`d_AddScaledAttrsLoadLoop(`1', $#, $@)dnl
d_AddScaledAttrsStoreLoop(`0', $#, $@)')dnl
define(`d_AddScaledUVowCode',
` ; Just in case ebx is used
push ebx
mov ebx, pStpCtx
mov ebx, [ebx + SCTX_pCtx]
xor eax, eax
LoopScaled$1$2:
cmp eax, DWORD PTR[ebx + RCTX_cActTex]
je DoneScaled$1$2
fld DWORD PTR [edx+ATTRSET_fUoW + eax * 4]
fmul fScaleVal ; fU*fScale
fld DWORD PTR [edx+ATTRSET_fVoW + eax * 4]
fmul fScaleVal ; fV*fScale fU*fScale
fld DWORD PTR [ecx+ATTRSET_fVoW + eax * 4] ; fVc fV*fScale fU*fScale
faddp st(1), st(0) ; fVc+fV*fScale fU*fScale
fxch st(1) ; fU*fScale fVc+fV*fScale
fld DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] ; fUc fU*fScale fVc+fV*fScale
faddp st(1), st(0) ; fUc+fU*fScale fVc+fV*fScale
fstp DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] ; fVc+fV*fScale
fstp DWORD PTR [ecx+ATTRSET_fVoW + eax * 4]
inc eax
jmp LoopScaled$1$2
DoneScaled$1$2:
pop ebx
')dnl
dnl
dnl d_AddFloatAttrsBody
dnl
dnl Generates the body of an FP attribute adder routine.
dnl Attributes are processed in cache order as much as possible.
dnl
dnl $1 is one of Z_Diff, Z_Diff_Spec, Z_Diff_Tex, Z_Diff_Spec_Tex,
dnl Z_DIdx, Z_DIdx_Tex, Z_Tex.
dnl
define(`d_AddFloatAttrsBody',
`
; Add surface pointers.
mov eax, [edx+ATTRSET_ipSurface]
add eax, [ecx+ATTRSET_pSurface]
mov [ecx+ATTRSET_pSurface], eax
mov eax, [edx+ATTRSET_ipZ]
add eax, [ecx+ATTRSET_pZ]
mov [ecx+ATTRSET_pZ], eax
; Do FP additions.
ifelse(`$1', `Z_Diff',
`d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB',
`ATTRSET_fG', `ATTRSET_fR',
`ATTRSET_fA')dnl
')dnl
ifelse(`$1', `Z_Diff_Spec',
`d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB',
`ATTRSET_fG', `ATTRSET_fR')dnl
d_AddAttrsCode(`ATTRSET_fA', `ATTRSET_fBS',
`ATTRSET_fGS', `ATTRSET_fRS')dnl
')dnl
ifelse(`$1', `Z_Diff_Tex',
`d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
`ATTRSET_fB', `ATTRSET_fG',
`ATTRSET_fR', `ATTRSET_fA')dnl
d_AddUVowCode($1, `Float')
')dnl
ifelse(`$1', `Z_Diff_Spec_Tex',
`d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
`ATTRSET_fB')dnl
d_AddUVowCode($1, `Float')
d_AddAttrsCode(`ATTRSET_fG', `ATTRSET_fR',
`ATTRSET_fA', `ATTRSET_fBS',
`ATTRSET_fGS', `ATTRSET_fRS')dnl
')dnl
ifelse(`$1', `Z_DIdx',
`d_AddAttrsCode(`ATTRSET_fZ',
`ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl
')dnl
ifelse(`$1', `Z_DIdx_Tex',
`d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
`ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl
d_AddUVowCode($1, `Float')
')dnl
ifelse(`$1', `Z_Tex',
`d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW')dnl
d_AddUVowCode($1, `Float')
')dnl
')dnl
dnl
dnl d_AddFixedAttrsBody
dnl
dnl Generates the body of a fixed attribute adder routine.
dnl Attributes are processed in cache order as much as possible.
dnl
dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix.
dnl
define(`d_AddFixedAttrsBody',
`
; Add surface pointers.
mov eax, [edx+ATTRSET_ipSurface]
add eax, [ecx+ATTRSET_pSurface]
mov [ecx+ATTRSET_pSurface], eax
mov eax, [edx+ATTRSET_ipZ]
add eax, [ecx+ATTRSET_pZ]
mov [ecx+ATTRSET_pZ], eax
; Add attributes.
ifelse(eval(d_index(`$1', `Z') >= 0), `1',
`
mov eax, [edx+ATTRSET_iZ]
add eax, [ecx+ATTRSET_iZ]
mov [ecx+ATTRSET_iZ], eax
')dnl
ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
`
mov eax, [edx+ATTRSET_iOoW]
add eax, [ecx+ATTRSET_iOoW]
mov [ecx+ATTRSET_iOoW], eax
; Just in case ebx and edi are used
push ebx
push edi
mov ebx, pStpCtx
mov ebx, [ebx + SCTX_pCtx]
xor edi, edi
LoopFixed$1:
cmp edi, DWORD PTR[ebx + RCTX_cActTex]
je DoneFixed$1
mov eax, [edx+ATTRSET_iUoW + 4 * edi]
add eax, [ecx+ATTRSET_iUoW + 4 * edi]
mov [ecx+ATTRSET_iUoW + 4 * edi], eax
mov eax, [edx+ATTRSET_iVoW + 4 * edi]
add eax, [ecx+ATTRSET_iVoW + 4 * edi]
mov [ecx+ATTRSET_iVoW + 4 * edi], eax
inc edi
jmp LoopFixed$1
DoneFixed$1:
pop edi
pop ebx
')dnl
ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
`
mov eax, [edx+ATTRSET_iB]
add eax, [ecx+ATTRSET_iB]
mov [ecx+ATTRSET_iB], eax
mov eax, [edx+ATTRSET_iG]
add eax, [ecx+ATTRSET_iG]
mov [ecx+ATTRSET_iG], eax
mov eax, [edx+ATTRSET_iR]
add eax, [ecx+ATTRSET_iR]
mov [ecx+ATTRSET_iR], eax
mov eax, [edx+ATTRSET_iA]
add eax, [ecx+ATTRSET_iA]
mov [ecx+ATTRSET_iA], eax
')dnl
ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
`
mov eax, [edx+ATTRSET_iBS]
add eax, [ecx+ATTRSET_iBS]
mov [ecx+ATTRSET_iBS], eax
mov eax, [edx+ATTRSET_iGS]
add eax, [ecx+ATTRSET_iGS]
mov [ecx+ATTRSET_iGS], eax
mov eax, [edx+ATTRSET_iRS]
add eax, [ecx+ATTRSET_iRS]
mov [ecx+ATTRSET_iRS], eax
')dnl
ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
`
mov eax, [edx+ATTRSET_iDIdx]
add eax, [ecx+ATTRSET_iDIdx]
mov [ecx+ATTRSET_iDIdx], eax
mov eax, [edx+ATTRSET_iDIdxA]
add eax, [ecx+ATTRSET_iDIdxA]
mov [ecx+ATTRSET_iDIdxA], eax
')dnl
')dnl
dnl
dnl d_FillSpanFloatAttrsBody
dnl
dnl Generates the body of a FP span filler routine.
dnl Suboptimal cache ordering due to attempt to overlap OoW divide with
dnl integer ops.
dnl
dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix.
dnl
define(`d_FillSpanFloatAttrsBody',
`ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
`
fld DWORD PTR [ecx+ATTRSET_fOoW]
fist DWORD PTR [edx+RASTSPAN_iOoW]
fdivr DWORD PTR OOW_W_SCALE
')dnl
; Set surface pointers.
mov eax, [ecx+ATTRSET_pSurface]
mov [edx+RASTSPAN_pSurface], eax
mov eax, [ecx+ATTRSET_pZ]
mov [edx+RASTSPAN_pZ], eax
ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
`
; Clears both iLOD and iDLOD.
xor eax, eax
mov [edx+RASTSPAN_iLOD], eax
fistp DWORD PTR [edx+RASTSPAN_iW]
; Just in case ebx is used
push ebx
mov ebx, pStpCtx
mov ebx, [ebx + SCTX_pCtx]
LoopFloat$1:
cmp eax, DWORD PTR[ebx + RCTX_cActTex]
je DoneFloat$1
fld DWORD PTR [ecx+ATTRSET_fUoW + 4 * eax]
fistp DWORD PTR [edx+RASTSPAN_UVoW + 8 * eax]
fld DWORD PTR [ecx+ATTRSET_fVoW + 4 * eax]
fistp DWORD PTR [edx+RASTSPAN_UVoW + 8 * eax + 4]
inc eax
jmp LoopFloat$1
DoneFloat$1:
pop ebx
')dnl
ifelse(eval(d_index(`$1', `Z') >= 0), `1',
`
fld DWORD PTR [ecx+ATTRSET_fZ]
fistp DWORD PTR [edx+RASTSPAN_uZ]
')dnl
ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
`
; Directly store DWORD-aligned fields, then whack in
; half DWORDs.
; ATTENTION - 8.8 color fields could use the FP fixing trick
; to use fstp instead of fistp. Adds could be overlapped
; so itd be free cycles?
fld DWORD PTR [ecx+ATTRSET_fG]
fistp DWORD PTR iVal
fld DWORD PTR [ecx+ATTRSET_fB]
fistp DWORD PTR [edx+RASTSPAN_uB]
mov ax, WORD PTR iVal
fld DWORD PTR [ecx+ATTRSET_fA]
fistp DWORD PTR iVal
mov WORD PTR [edx+RASTSPAN_uG], ax
fld DWORD PTR [ecx+ATTRSET_fR]
fistp DWORD PTR [edx+RASTSPAN_uR]
mov ax, WORD PTR iVal
mov WORD PTR [edx+RASTSPAN_uA], ax
')dnl
ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
`
fld DWORD PTR [ecx+ATTRSET_fGS]
fistp DWORD PTR iVal
fld DWORD PTR [ecx+ATTRSET_fBS]
fistp DWORD PTR [edx+RASTSPAN_uBS]
mov ax, WORD PTR iVal
mov WORD PTR [edx+RASTSPAN_uGS], ax
; Trashes uFog, but thats OK because fog isnt getting used.
fld DWORD PTR [ecx+ATTRSET_fRS]
fistp DWORD PTR [edx+RASTSPAN_uRS]
')dnl
ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
`
fld DWORD PTR [ecx+ATTRSET_fDIdx]
fistp DWORD PTR [edx+RASTSPAN_iIdx]
fld DWORD PTR [ecx+ATTRSET_fDIdxA]
fistp DWORD PTR [edx+RASTSPAN_iIdxA]
')dnl
')dnl
dnl
dnl d_FillSpanFixedAttrsBody
dnl
dnl Generates the body of a fixed span filler routine.
dnl Cache ordered except for the overlap of the OoW divide.
dnl
dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix.
dnl
define(`d_FillSpanFixedAttrsBody',
`ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
`
fild DWORD PTR [ecx+ATTRSET_iOoW]
fdivr DWORD PTR OOW_W_SCALE
')dnl
; Set surface pointers.
mov eax, [ecx+ATTRSET_pSurface]
mov [edx+RASTSPAN_pSurface], eax
mov eax, [ecx+ATTRSET_pZ]
mov [edx+RASTSPAN_pZ], eax
ifelse(eval(d_index(`$1', `Z') >= 0), `1',
`
mov eax, [ecx+ATTRSET_uZ]
mov [edx+RASTSPAN_uZ], eax
')dnl
ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
`
; Clears both iLOD and iDLOD.
xor eax, eax
mov [edx+RASTSPAN_iLOD], eax
mov eax, [ecx+ATTRSET_iOoW]
mov [edx+RASTSPAN_iOoW], eax
; Just in case ebx and edi are used
push ebx
push edi
mov ebx, pStpCtx
mov ebx, [ebx + SCTX_pCtx]
xor edi, edi
LoopFixed$1:
cmp edi, DWORD PTR[ebx + RCTX_cActTex]
je DoneFixed$1
mov eax, [ecx+ATTRSET_iUoW + 4 * edi]
mov [edx+RASTSPAN_UVoW + 8 * edi], eax
mov eax, [ecx+ATTRSET_iVoW + 4 * edi]
mov [edx+RASTSPAN_UVoW + 8 * edi + 4], eax
inc edi
jmp LoopFixed$1
DoneFixed$1:
pop edi
pop ebx
')dnl
ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
`
; Directly store DWORD-aligned fields, then whack in
; half DWORDs.
; ATTENTION - Keep word pairs shifted and OR together to store
; as DWORDs instead?
mov eax, [ecx+ATTRSET_uB]
mov [edx+RASTSPAN_uB], eax
mov eax, [ecx+ATTRSET_uR]
mov [edx+RASTSPAN_uR], eax
mov ax, [ecx+ATTRSET_uG]
mov [edx+RASTSPAN_uG], ax
mov ax, [ecx+ATTRSET_uA]
mov [edx+RASTSPAN_uA], ax
')dnl
ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
`
mov eax, [ecx+ATTRSET_uBS]
mov [edx+RASTSPAN_uBS], eax
mov eax, [ecx+ATTRSET_uRS]
mov [edx+RASTSPAN_uRS], eax
mov ax, [ecx+ATTRSET_uGS]
mov [edx+RASTSPAN_uGS], ax
')dnl
ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
`
mov eax, [ecx+ATTRSET_uDIdx]
mov [edx+RASTSPAN_iIdx], eax
mov eax, [ecx+ATTRSET_uDIdxA]
mov [edx+RASTSPAN_iIdxA], eax
')dnl
ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
`
fistp DWORD PTR [edx+RASTSPAN_iW]
')dnl
')dnl