Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

568 lines
19 KiB

;-----------------------------------------------------------------------------
;
; This file contains the general span parsing code combined with loop code.
;
;
; WARNING WARNING WARNING
; This asm file generated from mas file.
; EDIT THE MAS FILE.
; I warned you.
; WARNING WARNING WARNING
;
;-----------------------------------------------------------------------------
INCLUDE iammx.inc
INCLUDE offs_acp.inc
include(`m4hdr.mh')dnl
include(`cvars.mh')dnl
include(`texaddra.mh')dnl
EXTERN g_uDitherValue:MMWORD
.586
.model flat
; Big seperating lines seperate code into span code
; and loop code. If span and loop are not going to
; end up being combined then it will be easy to
; seperate the code.
.data
;-----------------------------------------------------------------------------
; Span Variables
StackPos dd ?
uSpans dd ?
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; Loop Variables
;// Table is needed to get starting value for dither, but can use xor trick afterwards to generate consecutive values.
;// Need to compare table based method with Xor method and compare timing/memory usage. It is good to keep xor method
;// around since it can be used more efficently when there are more free registers (i.e. a monolithic routine Probably
;// only enough registers to do it in a gouraud or gouraud/specular case).
;static UINT64 uMMXDitherTable[16] =
;{
; 0x0000000000000000 >> 6, 0x0000800080008000 >> 6, 0x0000200020002000 >> 6, 0x0000a000a000a000 >> 6,
; 0x0000c000c000c000 >> 6, 0x0000400040004000 >> 6, 0x0000e000e000e000 >> 6, 0x0000600060006000 >> 6,
; 0x0000300030003000 >> 6, 0x0000b000b000b000 >> 6, 0x0000100010001000 >> 6, 0x0000900090009000 >> 6,
; 0x0000f000f000f000 >> 6, 0x0000700070007000 >> 6, 0x0000d000d000d000 >> 6, 0x0000500050005000 >> 6
;};
uMMXDitherTable dq 000000000000000h , 000800080008000h , 000200020002000h , 000a000a000a000h
dq 000c000c000c000h , 000400040004000h , 000e000e000e000h , 000600060006000h
dq 000300030003000h , 000b000b000b000h , 000100010001000h , 000900090009000h
dq 000f000f000f000h , 000700070007000h , 000d000d000d000h , 000500050005000h
u565MultShifter dq 00000000200010002h
u555MultShifter dq 00000000200020002h
uFogDXAdd dq 00000000400040004h
iSurfaceStep dd ?
iZStep dd ?
uDitherXorXorMask dq 0
uDitherXorMask dq 0
uDitherXorXorMaskInitVal dq 0000200020002000h
uDitherXorMaskInitVal dq 0000800080008000h
uPix dd ?
;-----------------------------------------------------------------------------
.code
;HRESULT MMX_RenderSpansAny(PD3DI_RASTCTX pCtx)
;{
PUBLIC _MMX_RenderSpansAny
_MMX_RenderSpansAny:
push ebp
mov StackPos, esp
mov eax, esp
sub esp, 0Ch ; This will need to change if stack frame size changes.
push ebx
push esi
push edi
; Put pCtx into ebx
mov ebx, [eax+8]
;PD3DI_RASTPRIM pP = pCtx->pPrim;
mov ecx, XpCtx(pPrim)
; ATTENTION?? Should these be set by validation? I dont know
; why they would need to be since every span routine knows
; where the code needs to return. Also, How is pfnAlphaTestFailEnd
; different than pfnPixelEnd?
mov eax, _MMX_LoopAnyEndPixel
mov XpCtx(pfnPixelEnd), eax
mov XpCtx(pfnAlphaTestFailEnd), eax
;while (pP)
;{
PrimLoop:
cmp ecx, 0
je ExitPrimLoop
;UINT16 uSpans = pP->uSpans;
movzx eax, word ptr XpP(uSpans)
mov uSpans, eax
;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
mov ebp, ecx
add ebp, SIZEOF_RASTPRIM
;while (uSpans-- > 0)
;{
SpanLoop:
mov edx, uSpans
mov eax, edx
dec eax
mov uSpans, eax
test edx, edx
jle ExitSpanLoop
;pCtx->pfnBegin(pCtx, pP, pS);
;-----------------------------------------------------------------------------
; LoopAny code inserted here. This is to get rid of an extra
; jump.
;-----------------------------------------------------------------------------
; Setup Code begins
; get values to iterate
;uPix = pS->uPix;
movzx eax, word ptr XpS(uPix)
mov uPix, eax
; TODO Copy uFog and iDFog from pS to pCtx.SI
; so fog increment can be done faster in MMX.
; dont need to do this if there is no fog.
;if (pCtx->pdwRenderState[D3DRENDERSTATE_FOGENABLE]) {
cmp dword ptr XpCtx(pdwRenderState+RS_FOGENABLE), 0
je NoFogSetup
;D3DCOLOR FogColor = pCtx->pdwRenderState[D3DRENDERSTATE_FOGCOLOR];
;UINT16 FR = (UINT16)RGBA_GETRED(FogColor);
;UINT16 FG = (UINT16)RGBA_GETGREEN(FogColor);
;UINT16 FB = (UINT16)RGBA_GETBLUE(FogColor);
pxor mm0, mm0
movd mm1, XpCtx(pdwRenderState+RS_FOGCOLOR)
;UINT16 uMFog = 0xff - (pS->uFog>>8);
pcmpeqd mm2, mm2
movzx eax, word ptr XpS(uFog)
shr eax, 8
movd mm3, eax
psubb mm2, mm3
punpcklbw mm2, mm0
punpcklwd mm2, mm2 ; Replicate uMFog
punpckldq mm2, mm2
;pCtx->SI.uFogR = uMFog * FR; // 0.8 * 0.8 = 8.8
;pCtx->SI.uFogG = uMFog * FG;
;pCtx->SI.uFogB = uMFog * FB;
punpcklbw mm1, mm0
pmullw mm2, mm1
movq XpCtxSI(uFogB), mm2
;INT32 iMDFog = -pS->iDFog;
movsx eax, word ptr XpS(iDFog)
neg eax
movd mm3, eax
punpcklwd mm3, mm3
punpckldq mm3, mm3
;// 1.7.8 * 8.0 >> 8 = 1.7.8 (ATTENTION this could overflow, but it is naturally aligned for
;// doing the walking. Can fix by changing precision of uFogR values, or by clamping
;// range of iDFog.
;pCtx->SI.iFogRDX = (INT16)((iMDFog * FR) >> 8);
;pCtx->SI.iFogGDX = (INT16)((iMDFog * FG) >> 8);
;pCtx->SI.iFogBDX = (INT16)((iMDFog * FB) >> 8);
psllw mm1, 7 ; Have to loose a bit on fog or add some extra code
pmulhw mm3, mm1
psllw mm3, 1
;// if iFog*DX is positive, iFog*DX will always be too small, hence no overflow
;// but if iFog*DX is negative, add some to make sure overflow does not
;// occur
;if (pCtx->SI.iFogRDX < 0)
;{
; pCtx->SI.iFogRDX = min(pCtx->SI.iFogRDX+4, 0);
;}
pxor mm4, mm4 ; make zero for compare
pcmpgtw mm4, mm3 ; ffff mask of all negative deltas
movq mm5, mm4 ; save copy of mask
pand mm4, MMWORD PTR uFogDXAdd ; 4 for negative deltas
paddw mm3, mm4 ; 4 added to negative deltas
movq mm2, mm3 ; copy of deltas after add
pxor mm4, mm4 ; make zero for compare
pcmpgtw mm2, mm4 ; ffff mask for all positive values
pand mm2, mm5 ; ffff mask for all created positive values
pandn mm2, mm3 ; all created positive values anded out to zero
movq XpCtxSI(iFogBDX), mm2 ; save deltas
; Copy these values to Span Iterator so that they can be done at the same time
; as other increments.
xor eax, eax
mov ax, XpS(uFog)
mov XpCtxSI(uFog), ax
mov ax, XpS(iDFog)
mov XpCtxSI(iDFog), ax
;}
NoFogSetup:
; dont need to do this if not texture mapping
;if (pCtx->pdwRenderState[D3DRENDERSTATE_TEXTUREPERSPECTIVE])
;{
cmp dword ptr XpCtx(pdwRenderState+RS_TEXTUREPERSPECTIVE), 0
je SetupNonPerspective
;//pCtx->SI.iU1 = (pS->iW*(pS->iUoW1>>4))>>16; // 8.16 * 1.11.16 == 1.15.32 >> 16 == 1.15.16
;//pCtx->SI.iV1 = (pS->iW*(pS->iVoW1>>4))>>16;
;//pCtx->SI.iU2 = (pS->iW*(pS->iUoW2>>4))>>16;
;//pCtx->SI.iV2 = (pS->iW*(pS->iVoW2>>4))>>16;
;pCtx->SI.iDW = 0x0;
mov dword ptr XpCtxSI(iDW), 0
; edi now is used to store the texture index
push edi
mov edi, 0
LoopSetTexturePers:
cmp edi, dword ptr XpCtx(cActTex)
je DoneSetTexturePers
mov esi, XpS(iW)
movq mm5, MMWORD PTR XpS(UVoW + edi * SIZEOF_UV_UNION)
d_UoWVoWTimesW()
inc edi
jmp LoopSetTexturePers
DoneSetTexturePers:
; Restore edi
pop edi
;if (pP->iDOoWDX > 0)
;{
cmp dword ptr XpP(iDOoWDX), 0
jg SpecialWLast3
;// iSpecialW should be negative for the first 3 pixels of span
;pCtx->SI.iSpecialW = -3;
mov word ptr XpCtxSI(iSpecialW), -3
jmp DoneSpecialWif
;}
;else
;{
SpecialWLast3:
;// iSpecialW should be negative for the last 3 pixels of span
;pCtx->SI.iSpecialW = 0x7fff - uPix;
mov eax, 07fffh
sub eax, uPix
;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
add eax, 5
mov XpCtxSI(iSpecialW), eax
;}
DoneSpecialWif:
jmp DonePerspectiveif
;}
;else
;{
SetupNonPerspective:
; TODO Add assembly code for affine setup.
;pCtx->SI.iU1 = pS->iUoW1>>TEX_TO_FINAL_SHIFT; // 1.11.20 >> 4 == 1.15.16
;pCtx->SI.iV1 = pS->iVoW1>>TEX_TO_FINAL_SHIFT;
; edi now is used to store the texture index
push edi
mov edi, 0
LoopSetTexture:
cmp edi, dword ptr XpCtx(cActTex)
je DoneSetTexture
movq mm5, XpS(UVoW + edi * SIZEOF_UV_UNION)
d_UpdateNonPersp()
inc edi
jmp LoopSetTexture
DoneSetTexture:
; Restore edi
pop edi
;pCtx->SI.iDW = 0x0;
mov dword ptr XpCtxSI(iDW), 0
;pCtx->SI.iSpecialW = 0;
mov word ptr XpCtxSI(iSpecialW), 0
;}
DonePerspectiveif:
; Static variables are placed in
;static INT iSurfaceStep;
;static INT iZStep;
; Note: Dither code needs to be setup if either color dithering or alpha dithering are on.
;
;// Dither code depends on rendering direction.
;// Shift everything down by 6 then use multiply to shift up one to have an end result of either 565 or 555.
;static UINT64 uDitherXorMask; // will be either 1010b or 1000b (even or odd)
;static UINT64 uDitherXorXorMask;
;uDitherXorXorMask = 0x0000200020002000 >> 6;
;uDitherXorMask = 0x0000800080008000 >> 6;
movq mm0, MMWORD PTR uDitherXorXorMaskInitVal
psrlw mm0, 6
movq MMWORD PTR uDitherXorXorMask, mm0
movq mm0, MMWORD PTR uDitherXorMaskInitVal
psrlw mm0, 6
movq MMWORD PTR uDitherXorMask, mm0
;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
;{
mov eax, XpP(uFlags)
and eax, D3DI_RASTPRIM_X_DEC
test eax, eax
jz LeftToRightSpan
;iZStep = -pCtx->iZStep;
mov eax, XpCtx(iZStep)
neg eax
mov iZStep, eax
;iSurfaceStep = -pCtx->iSurfaceStep;
mov eax, XpCtx(iSurfaceStep)
neg eax
mov iSurfaceStep, eax
;pCtx->SI.iXStep = -1; // for dithering.
; This shouldnt be needed for dithering
; since I do it differently. TODO check this
;_asm{
; Dither xor mask starting value changes
movq mm1, MMWORD PTR uDitherXorMask
por mm1, MMWORD PTR uDitherXorXorMask
movq MMWORD PTR uDitherXorMask, mm1
;}
;}
jmp DoneSpanDirif
;else
;{
LeftToRightSpan:
;iZStep = pCtx->iZStep;
mov eax, XpCtx(iZStep)
mov iZStep, eax
;iSurfaceStep = pCtx->iSurfaceStep;
mov eax, XpCtx(iSurfaceStep)
mov iSurfaceStep, eax
;pCtx->SI.iXStep = 1;
; iXStep shouldnt be needed. TODO check this.
;}
DoneSpanDirif:
;// ----------------------------------------------------------------------------------------------------------------
;// Doing dither setup code even if dither is not turned on.
;// This code is not very clean. TODO clean it up after it works.
;_asm{
;//if(pS->uX & 1) uDitherXorValue |= uDitherXorXorValue;
movzx eax, word ptr XpS(uX)
;// Create Zero or uDitherXorXorValue based on low bit of uX
and eax, 1
shl eax, (13 - 6)
movd mm1, eax
punpcklwd mm1, mm1
punpckldq mm1, mm1
; TODO Do I need to and here so that I dont disrupt Alpha channel???
pxor mm1, MMWORD PTR uDitherXorMask
movq MMWORD PTR uDitherXorMask, mm1
;}
;// Keep dither pattern up to date directly, so keeping SI.uX up
;// to date is not necessary, except for debug
;//pCtx->SI.uDitherOffset = (pS->uY & 3) | ((pS->uX & 3)<<2);
;// I move along the dithertable completely orthogonal to the way the C code does. This should not make a difference.
;g_uDitherValue = uMMXDitherTable[( ((pS->uY & 3)<<2) | (pS->uX & 3))]; // >> 6; shift is done in table.
movzx eax, word ptr XpS(uY)
and eax, 3
shl eax, 2
movzx edx, word ptr XpS(uX)
and edx, 3
or eax, edx
shl eax, 3
movq mm1, MMWORD PTR uMMXDitherTable[eax]
psrlw mm1, 6
movq MMWORD PTR g_uDitherValue, mm1
;//if colormode is 565 then shift all green values down by one more.
;// TODO Add RAST_STYPE_B5G5R5A1 when code is done for that format.
;// Are these multiplies noticeable or should I use two tables?
;switch(pCtx->iSurfaceType)
;{
;case RAST_STYPE_B5G6R5:
cmp dword ptr XpCtx(iSurfaceType), RAST_STYPE_B5G6R5
jne Test555
;_asm{
movq mm1, MMWORD PTR uDitherXorMask
pmullw mm1, MMWORD PTR u565MultShifter
movq MMWORD PTR uDitherXorMask, mm1
movq mm1, MMWORD PTR uDitherXorXorMask
pmullw mm1, MMWORD PTR u565MultShifter
movq MMWORD PTR uDitherXorXorMask, mm1
movq mm1, MMWORD PTR g_uDitherValue
pmullw mm1, MMWORD PTR u565MultShifter
movq MMWORD PTR g_uDitherValue, mm1
;}
;break;
jmp DoneModDitherValues
Test555:
;case RAST_STYPE_B5G5R5:
; Commented out this condional because dither needs to be on for alpha dithering
; which is independent of what type of color output we want.
;
;cmp dword ptr XpCtx(iSurfaceType), RAST_STYPE_B5G5R5
;jne DoneModDitherValues
;_asm{
movq mm1, MMWORD PTR uDitherXorMask
pmullw mm1, MMWORD PTR u555MultShifter
movq MMWORD PTR uDitherXorMask, mm1
movq mm1, MMWORD PTR uDitherXorXorMask
pmullw mm1, MMWORD PTR u555MultShifter
movq MMWORD PTR uDitherXorXorMask, mm1
movq mm1, MMWORD PTR g_uDitherValue
pmullw mm1, MMWORD PTR u555MultShifter
movq MMWORD PTR g_uDitherValue, mm1
;}
;break;
;}
DoneModDitherValues:
; Setup Code Ends
; ----------------------------------------------------------------------------------------------------------------
; Loop Code Begins
;//while (1)
;//{
PixelLoop:
; uncomment to look at a span in a particular range
; movzx eax, word ptr XpS(uX)
; cmp eax, 340
; jl NotSpecial
; cmp eax, 363
; jg NotSpecial
; cmp word ptr XpS(uY), 330
; jne NotSpecial
;
; ; Special
; xor eax, eax
;
;NotSpecial:
; Probably dont need to move this into a register first.
mov eax, XpCtx(pfnLoopEnd)
;pCtx->pfnLoopEnd(pCtx, pP, pS);
jmp eax
; Just put EndBead here for now. After Kent and Drew decide on beads, code can be moved around.
PUBLIC _MMX_LoopAnyEndPixel
_MMX_LoopAnyEndPixel:
;//if (--uPix <= 0)
;// break;
dec uPix ;// BUG BUG?? uPix should never start as zero should it?
;// if so, this is a bug.
jle ExitPixelLoop
;//pS->pZ += iZStep;
;//pS->pSurface += iSurfaceStep;
mov eax, dword ptr XpS(pZ)
mov edx, dword ptr XpS(pSurface)
add eax, iZStep
add edx, iSurfaceStep
mov dword ptr XpS(pZ), eax
mov dword ptr XpS(pSurface), edx
;// dont update this in dithered write functions because of alpha test
;// ATTENTION could specialize loop routines based on things like dither and Z buffer
;//pCtx->SI.uDitherOffset = (pCtx->SI.uDitherOffset + (pCtx->SI.iXStep<<2)) & 0xf;
;// May Not need DitherOffset, but I might have to update xor masks.
movq mm3, MMWORD PTR g_uDitherValue ; four bit value from table
movq mm4, MMWORD PTR uDitherXorMask ; will be either 1010b or 1000b (even or odd)
pxor mm3, mm4 ; change dither value
pxor mm4, MMWORD PTR uDitherXorXorMask ; always 0010b
movq MMWORD PTR uDitherXorMask, mm4 ; save new xor mask
movq MMWORD PTR g_uDitherValue, mm3 ; save new dither value.
;#ifdef DBG
;// handy for debug to see where we are
;//pS->uX += (INT16)pCtx->SI.iXStep;
;#endif
;// } // while
jmp PixelLoop
ExitPixelLoop:
; Loop code ends
;-----------------------------------------------------------------------------
; LoopAny code ends here
;-----------------------------------------------------------------------------
;pS++;
add ebp, SIZEOF_RASTSPAN
;}
jmp SpanLoop
ExitSpanLoop:
;pP = pP->pNext;
mov ecx, XpP(pNext)
;}
jmp PrimLoop
ExitPrimLoop:
;_asm{
emms
;}
;return S_OK;
xor eax, eax
;}
pop edi
pop esi
pop ebx
mov esp, StackPos
pop ebp
ret
; ATTENTION Just putting this here, because selection code needs a function pointer
PUBLIC _MMX_LoopAny
_MMX_LoopAny:
; This Should never be called by anything.
ret
END