You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
405 lines
12 KiB
405 lines
12 KiB
dnl
|
|
dnl d_StencilOp
|
|
dnl
|
|
dnl Macro to do stencil operation test routine
|
|
dnl
|
|
dnl $1 is one of STENCILFUNC
|
|
dnl returns new stencil value in uS
|
|
dnl
|
|
dnl uSB is in mm3 and uSR is in mm4. Result in mm3.
|
|
dnl
|
|
dnl mm1 is uZ (iterated)
|
|
dnl mm2 is uZB (original)
|
|
dnl mm5 is the shifted uZB used for the compare. Later mm5 is needed for masking code below.
|
|
dnl mm5 contains the final Z mask for the conditional Z write.
|
|
|
|
define(`testVars',`
|
|
EXTERN DW_One_One:MMWORD
|
|
')
|
|
|
|
define(`d_StencSwitchcnt', 0)dnl
|
|
define(`d_StencilOp', `
|
|
define(`d_StencSwitchcnt', eval(d_StencSwitchcnt+1))dnl
|
|
movd mm6, XpCtx(pdwRenderState + RS_STENCILWRITEMASK)
|
|
movq mm5, mm3 ; Need to save this since it is used below.
|
|
|
|
;switch($1)
|
|
;{
|
|
mov eax, $1 ;Compare from register instead of from memory.
|
|
;case D3DSTENCILOP_KEEP:
|
|
cmp eax, D3DSTENCILOP_KEEP
|
|
je doneswitch`'d_StencSwitchcnt
|
|
;uS = uSB;
|
|
;break;
|
|
; Dont have to do anything since input and output in same reg.
|
|
;case D3DSTENCILOP_ZERO:
|
|
cmp eax, D3DSTENCILOP_ZERO
|
|
jne @f
|
|
;uS = 0;
|
|
pxor mm3, mm3
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
;case D3DSTENCILOP_REPLACE:
|
|
cmp eax, D3DSTENCILOP_REPLACE
|
|
jne @f
|
|
;uS = uSR;
|
|
movq mm3, mm4
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
;case D3DSTENCILOP_INCRSAT:
|
|
cmp eax, D3DSTENCILOP_INCRSAT
|
|
jne @f
|
|
;uS = min(uSB + 1, 0xff);
|
|
paddusb mm3, MMWORD PTR Val0x01
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
;case D3DSTENCILOP_DECRSAT:
|
|
cmp eax, D3DSTENCILOP_DECRSAT
|
|
jne @f
|
|
;uS = max(uSB - 1, 0x00);
|
|
psubusb mm3, MMWORD PTR Val0x01
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
;case D3DSTENCILOP_INVERT:
|
|
cmp eax, D3DSTENCILOP_INVERT
|
|
jne @f
|
|
;uS = ~uSB;
|
|
pxor mm3, MMWORD PTR Val0xffffffff
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
;case D3DSTENCILOP_INCR:
|
|
cmp eax, D3DSTENCILOP_INCR
|
|
jne @f
|
|
;uS = uSB + 1;
|
|
paddd mm3, MMWORD PTR Val0x01
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
;case D3DSTENCILOP_DECR:
|
|
cmp eax, D3DSTENCILOP_DECR
|
|
jne @f
|
|
;uS = uSB - 1;
|
|
psubd mm3, MMWORD PTR Val0x01
|
|
;break;
|
|
jmp doneswitch`'d_StencSwitchcnt
|
|
@@:
|
|
; Not a valid Stencil Case, just fall through
|
|
|
|
;}
|
|
doneswitch`'d_StencSwitchcnt`':
|
|
|
|
;// do write mask, do not let stencil mess up Z bits
|
|
;uS &= pCtx->pdwRenderState[D3DRS_STENCILWRITEMASK];
|
|
pand mm3, mm6
|
|
;uSB &= ~(pCtx->pdwRenderState[D3DRS_STENCILWRITEMASK]);
|
|
pandn mm6, mm5
|
|
|
|
;uS |= uSB;
|
|
por mm3, mm6
|
|
;uS &= 0xff;
|
|
pand mm3, MMWORD PTR Val0xff
|
|
')dnl
|
|
dnl
|
|
dnl d_Test
|
|
dnl
|
|
dnl Macro to build test routine
|
|
dnl
|
|
dnl $1 is one of 16 32
|
|
dnl $2 is one of ZWrite NoZWrite
|
|
dnl $3 is one of ZDeferred NoZDeferred
|
|
dnl $4 is one of ZAll ZNeverAlways ZLessGreaterEqual ZEqualNotEqual ZGreaterLessEqual
|
|
dnl $5 is one of NoStencil Stencil
|
|
dnl $6 is one of NotMonolithic Monolithic
|
|
dnl $7 is jump label when Monolithic fails
|
|
dnl
|
|
dnl uZB mm2
|
|
dnl uZBS mm5
|
|
dnl uSB eax mm3
|
|
dnl uSR edx mm4
|
|
dnl
|
|
define(`d_StencilFailcnt', 0)dnl
|
|
define(`d_ZFailcnt', 0)dnl
|
|
define(`d_Test', `
|
|
|
|
ifelse(`$6', `NotMonolithic', `
|
|
;void Test_$1_$2_$3_$4_$5(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
|
|
; PD3DI_RASTSPAN pS)
|
|
;{
|
|
PUBLIC _MMX_Test_$1_$2_$3_$4_$5
|
|
_MMX_Test_$1_$2_$3_$4_$5:
|
|
')
|
|
|
|
define(`d_StencilFailcnt', eval(d_StencilFailcnt+1))dnl
|
|
define(`d_ZFailcnt', eval(d_ZFailcnt+1))dnl
|
|
|
|
ifelse(eval((d_index(`$1', `16') == 0) && (d_index(`$5', `Stencil') == 0)), `1', `
|
|
|
|
;D3D_WARN(0, "16 bit Z with stencil should never be called");
|
|
;DDASSERT(0);
|
|
', eval((d_index(`$4', `ZAll') == 0) && (d_index(`$5', `NoStencil') == 0)), `1', `
|
|
;D3D_WARN(0, "ZAll AND NoStencil, more optimal code should be called");
|
|
;DDASSERT(0);
|
|
', `
|
|
movd mm4, XpS(uZ)
|
|
|
|
ifelse(`$1', `16', `
|
|
;// 16 bit unsigned format
|
|
;UINT16 uZ = (UINT16)(pS->uZ>>15);
|
|
movq mm1, mm4
|
|
psrld mm1, 15 ; 31 bit value down 15 to 16 bit z value.
|
|
|
|
;UINT16 uZB = *((UINT16*)pS->pZ);
|
|
mov esi, XpS(pZ)
|
|
movzx eax, word ptr[esi]
|
|
movd mm5, eax
|
|
movq mm2, mm5
|
|
', `
|
|
;// 24S8 bit unsigned format
|
|
;UINT32 uZ = pS->uZ;
|
|
movq mm1, mm4 ; No need for shift since setup gives 31 bits.
|
|
;UINT32 uZB = *((UINT32*)pS->pZ);
|
|
mov esi, XpS(pZ)
|
|
movd mm5, dword ptr[esi]
|
|
movq mm2, mm5
|
|
;UINT32 uSB = uZB & 0xff;
|
|
movd eax, mm2
|
|
and eax, 0ffh
|
|
movd mm7, eax ; for safe keeping, need this below
|
|
;UINT32 uZBS = uZB >> 1;
|
|
psrld mm5, 1
|
|
') dnl 16 or 24s8
|
|
|
|
ifelse(eval((d_index(`$4', `ZNeverAlways') != 0) && (d_index(`$1', `16') != 0)), `1', `
|
|
;uZ &= ~0x7f; // clear stencil bits so equal compare will work
|
|
;uZBS &= ~0x7f;
|
|
pand mm1, MMWORD PTR Val0xffffff80 ; ~0x7f = 0xffffff80
|
|
pand mm5, MMWORD PTR Val0xffffff80 ; ~0x7f = 0xffffff80
|
|
')
|
|
;pS->uZ += pP->iDZDX;
|
|
paddd mm4, XpP(iDZDX)
|
|
movd XpS(uZ), mm4
|
|
|
|
|
|
ifelse(`$5', `Stencil', `
|
|
;uZ &= ~0x7f; // clear stencil region
|
|
pand mm1, MMWORD PTR Val0xffffff80 ; ~0x7f = 0xffffff80
|
|
;uZB &= ~0xff;
|
|
pand mm2, MMWORD PTR Val0xffffff00 ; ~0xff = 0xffffff00
|
|
;UINT32 uS; // holds final stencil value
|
|
;UINT32 uSR = pCtx->pdwRenderState[D3DRS_STENCILREF];
|
|
mov edx, XpCtx(pdwRenderState + RS_STENCILREF)
|
|
;UINT32 uMask = pCtx->pdwRenderState[D3DRS_STENCILMASK];
|
|
;mov esi, XpCtx(pdwRenderState + RS_STENCILMASK)
|
|
; Not moved into a register to preserve pointer to ZBuffer location.
|
|
|
|
movd mm4, edx ; Save values for later
|
|
movd mm3, eax
|
|
|
|
mov word ptr XpCtxSI(bStencilPass), 0
|
|
|
|
;if (SCMP(pCtx, uSB&uMask, uSR&uMask))
|
|
;{
|
|
and edx, XpCtx(pdwRenderState + RS_STENCILMASK) ; uSB&uMask
|
|
and eax, XpCtx(pdwRenderState + RS_STENCILMASK) ; uSR&uMask
|
|
|
|
sub edx, eax
|
|
and edx, XpCtx(iSAndMask)
|
|
sub edx, XpCtx(iSNeg)
|
|
sar edx, 31
|
|
xor edx, XpCtx(iSXorMask)
|
|
test edx, edx ; Not needed! Avoiding Flag stall?
|
|
jz StencilFail`'d_StencilFailcnt
|
|
|
|
;if (ZCMP32(pCtx, uZ, uZBS))
|
|
;{
|
|
movd edx, mm1
|
|
movd eax, mm5
|
|
|
|
sub edx, eax
|
|
and edx, XpCtx(iZAndMask)
|
|
sub edx, XpCtx(iZNeg)
|
|
sar edx, 31
|
|
xor edx, XpCtx(iZXorMask)
|
|
test edx, edx ; Not needed! Avoiding Flag stall?
|
|
jz ZFail`'d_ZFailcnt
|
|
|
|
mov word ptr XpCtxSI(bStencilPass), 1
|
|
d_StencilOp(XpCtx(pdwRenderState + RS_STENCILPASS))
|
|
mov eax, XpCtx(pfnTestPassEnd)
|
|
ifelse(`$3', `NoZDeferred', `ifelse(`$2', `ZWrite', `
|
|
;*((UINT32*)pS->pZ) = (uZ<<1) | uS;
|
|
pslld mm1, 1
|
|
por mm1, mm3
|
|
movd dword ptr [esi], mm1', `
|
|
;*((UINT32*)pS->pZ) = uZB | uS;
|
|
por mm2, mm3
|
|
movd dword ptr [esi], mm2')
|
|
', `ifelse(`$2', `ZWrite', `
|
|
;pCtx->SI.uZDeferred = (uZ<<1) | uS;
|
|
pslld mm1, 1
|
|
por mm1, mm3
|
|
movd dword ptr XpCtxSI(uZDeferred), mm1',`
|
|
;pCtx->SI.uZDeferred = uZB | uS;
|
|
por mm2, mm3
|
|
movd dword ptr XpCtxSI(uZDeferred), mm2')
|
|
')
|
|
;pCtx->pfnTestPassEnd(pCtx, pP, pS);
|
|
jmp eax
|
|
;}
|
|
;else
|
|
;{
|
|
ZFail`'d_ZFailcnt`':
|
|
d_StencilOp(XpCtx(pdwRenderState + RS_STENCILZFAIL))
|
|
|
|
mov eax, XpCtx(pfnTestFailEnd)
|
|
|
|
ifelse(`$3', `NoZDeferred', `
|
|
;*((UINT32*)pS->pZ) = uZB | uS;
|
|
por mm2, mm3
|
|
movd dword ptr [esi], mm2
|
|
',`
|
|
;pCtx->SI.uZDeferred = uZB | uS;
|
|
por mm2, mm3
|
|
movd dword ptr XpCtxSI(uZDeferred), mm2
|
|
')
|
|
;pCtx->pfnTestFailEnd(pCtx, pP, pS);
|
|
jmp eax
|
|
;}
|
|
;}
|
|
;else
|
|
;{
|
|
StencilFail`'d_StencilFailcnt`':
|
|
d_StencilOp(XpCtx(pdwRenderState + RS_STENCILFAIL))
|
|
|
|
mov eax, XpCtx(pfnTestFailEnd)
|
|
ifelse(`$3', `NoZDeferred', `
|
|
;*((UINT32*)pS->pZ) = uZB | uS;
|
|
por mm2, mm3
|
|
movd dword ptr [esi], mm2
|
|
',`
|
|
;pCtx->SI.uZDeferred = uZB | uS;
|
|
por mm2, mm3
|
|
movd dword ptr XpCtxSI(uZDeferred), mm2
|
|
')
|
|
|
|
;pCtx->pfnTestFailEnd(pCtx, pP, pS);
|
|
jmp eax
|
|
;}
|
|
|
|
', ` dnl Above code is for Stencil. Below Code is for Standard Z buffer.
|
|
|
|
ifelse(`$3', `ZDeferred', `
|
|
;pCtx->SI.uZDeferred = uZB;
|
|
movd XpCtxSI(uZDeferred), mm2
|
|
')
|
|
|
|
ifelse(`$4', `ZAll', `
|
|
|
|
dnl 16 bit and 32 bit compare are the same.
|
|
movd edx, mm1
|
|
movd eax, mm5
|
|
|
|
sub edx, eax
|
|
and edx, XpCtx(iZAndMask)
|
|
sub edx, XpCtx(iZNeg)
|
|
sar edx, 31
|
|
xor edx, XpCtx(iZXorMask)
|
|
movd mm3, edx
|
|
',
|
|
`$4', `ZNeverAlways', `
|
|
;if (~(pCtx->iZXorMask))
|
|
movq mm3, XpCtx(iZXorMask)
|
|
',
|
|
`$4', `ZGreaterLessEqual', `
|
|
;if ((pCtx->iZXorMask)^(uZ > uZB))
|
|
pcmpeqd mm4, mm4
|
|
movq mm3, mm1
|
|
pcmpgtd mm3, mm5
|
|
pxor mm3, XpCtx(iZXorMask)
|
|
pxor mm3, mm4
|
|
',
|
|
`$4', `ZEqualNotEqual', `
|
|
;if ((pCtx->iZXorMask)^(uZ != uZB))
|
|
movq mm3, mm5
|
|
pcmpeqd mm3, mm1
|
|
pxor mm3, XpCtx(iZXorMask)
|
|
',
|
|
`$4', `ZLessGreaterEqual', `
|
|
;if ((pCtx->iZXorMask)^(uZ >= uZB))
|
|
movq mm3, mm5
|
|
pcmpgtd mm3, mm1
|
|
pxor mm3, XpCtx(iZXorMask)
|
|
', `
|
|
#error Invalid arg to d_Test') dnl Matches with ZAll test above
|
|
|
|
movd eax, mm3
|
|
|
|
ifelse(`$6', `NotMonolithic', `
|
|
mov eax, XpCtx(pfnTestFailEnd+eax*4)
|
|
')
|
|
|
|
|
|
ifelse(`$3', `NoZDeferred',
|
|
`ifelse(`$1', `16',
|
|
`ifelse($2, `ZWrite',
|
|
`
|
|
; *((UINT16*)pS->pZ) = uZ;
|
|
pand mm1, mm3
|
|
pandn mm3, mm2
|
|
por mm1, mm3
|
|
movd edx, mm1
|
|
mov word ptr [esi], dx
|
|
', `
|
|
; *((UINT16*)pS->pZ) = uZB; Done already
|
|
')', `ifelse($2, `ZWrite', `
|
|
; *((UINT32*)pS->pZ) = (uZ<<1) | uSB;
|
|
pslld mm1, 1 ; Shift iterated Z into position if 24 bit Z
|
|
pand mm1, mm3
|
|
pandn mm3, mm2
|
|
por mm1, mm3
|
|
por mm1, mm7
|
|
movd dword ptr [esi], mm1
|
|
',`
|
|
; *((UINT32*)pS->pZ) = uZB | uSB;
|
|
por mm2, mm7
|
|
movd dword ptr [esi], mm2
|
|
')')',`ifelse(`$1', `16',`ifelse($2, `ZWrite',`
|
|
; pCtx->SI.uZDeferred = uZ;
|
|
pand mm1, mm3
|
|
pandn mm3, mm2
|
|
por mm1, mm3
|
|
movd XpCtxSI(uZDeferred), mm1
|
|
',`
|
|
; pCtx->SI.uZDeferred = uZB;
|
|
movd XpCtxSI(uZDeferred), mm2
|
|
')',`ifelse($2, `ZWrite',`
|
|
; pCtx->SI.uZDeferred = (uZ<<1) | uSB;
|
|
pslld mm1, 1 ; Shift iterated Z into position if 24 bit Z
|
|
pand mm1, mm3
|
|
pandn mm3, mm2
|
|
por mm1, mm3
|
|
por mm1, mm7
|
|
movd XpCtxSI(uZDeferred), mm1
|
|
',`
|
|
; pCtx->SI.uZDeferred = uZB | uSB;
|
|
por mm2, mm7
|
|
movd XpCtxSI(uZDeferred), mm2
|
|
')')')
|
|
|
|
ifelse(`$6', `NotMonolithic', `
|
|
jmp eax
|
|
', `
|
|
test eax, eax
|
|
jz $7
|
|
')
|
|
|
|
') dnl matches with stencil on or off.
|
|
') dnl matches with error testing code above.
|
|
') dnl matches with definition of macro
|
|
dnl
|
|
|