|
|
;----------------------------------------------------------------------------- ; ; This file contains texture blending functions. ; ; TODO Unpack TexColor[0] and TexColor[1]. ; TODO Change ONLY the alpha part of the value. Two AND's and a OR ; ;-----------------------------------------------------------------------------
include(`m4hdr.mh')dnl include(`cvars.mh')dnl include(`texblend.mh')dnl
.586 .model flat .data
PUBLIC MaskOffAlpha MaskOffAlpha dq 00000ffffffffffffh PUBLIC ShiftTA ShiftTA dq 00100000000000000h PUBLIC Val0x00ff00ff00ff00ff Val0x00ff00ff00ff00ff dq 000ff00ff00ff00ffh PUBLIC Val0x000000ff00ff00ff Val0x000000ff00ff00ff dq 0000000ff00ff00ffh PUBLIC Val0X0000000001000000 Val0X0000000001000000 dq 00000000001000000h PUBLIC Val0x0000400040004000 Val0x0000400040004000 dq 00000400040004000h PUBLIC Val0x4000000000000000 Val0x4000000000000000 dq 04000000000000000h PUBLIC AlphaVal128 AlphaVal128 dq 04000000000000000h PUBLIC RGBVal128 RGBVal128 dq 00000400040004000h ; This is actually 64 in the high byte since values needed to be ; shifted down by one to fake saturation.
.code
INCLUDE iammx.inc INCLUDE offs_acp.inc
EXTERN Zero:MMWORD
;----------------------------------------------------------------------------- ; ; TexBlend_Tex1_None ; ; cPix = cSrc ; aPix = aSrc ; ;----------------------------------------------------------------------------- ;void TexBlend_Tex1_None(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP, ; PD3DI_RASTSPAN pS) ;{ PUBLIC _MMX_TexBlend_Tex1_None _MMX_TexBlend_Tex1_None: ; Get ready for next indirect jump mov eax, XpCtx(pfnTexBlendEnd)
d_TexBlend_Tex1_None(NotMonolithic)
;pCtx->pfnTexBlendEnd(pCtx, pP, pS); jmp eax ;}
;----------------------------------------------------------------------------- ; ; TexBlend_Tex1_Decal ; ; cPix = cTex ; aPix = aTex ; ;----------------------------------------------------------------------------- ;void TexBlend_Tex1_Decal(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP, ; PD3DI_RASTSPAN pS) ;{ PUBLIC _MMX_TexBlend_Tex1_Decal _MMX_TexBlend_Tex1_Decal:
; Get ready for next indirect jump mov eax, XpCtx(pfnTexBlendEnd)
d_TexBlend_Tex1_Decal(NotMonolithic)
;pCtx->pfnTexBlendEnd(pCtx, pP, pS); jmp eax ;}
;----------------------------------------------------------------------------- ; ; TexBlend_Tex1_Modulate ; ; cPix = cSrc * cTex ; aPix = aTex ; ;----------------------------------------------------------------------------- ;void TexBlend_Tex1_Modulate(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP, ; PD3DI_RASTSPAN pS) ;{ PUBLIC _MMX_TexBlend_Tex1_Modulate _MMX_TexBlend_Tex1_Modulate:
; Get ready for next indirect jump mov eax, XpCtx(pfnTexBlendEnd)
d_TexBlend_Tex1_Modulate(NotMonolithic)
;pCtx->pfnTexBlendEnd(pCtx, pP, pS); jmp eax ;}
;----------------------------------------------------------------------------- ; ; TexBlend_Tex1_ModulateAlphaOVR ; ; cPix = cSrc * cTex ; aPix = aSrc ; ;----------------------------------------------------------------------------- ;void TexBlend_Tex1_ModulateAlphaOVR(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP, ; PD3DI_RASTSPAN pS) ;{ PUBLIC _MMX_TexBlend_Tex1_ModulateAlphaOVR _MMX_TexBlend_Tex1_ModulateAlphaOVR:
; Get ready for next indirect jump mov eax, XpCtx(pfnTexBlendEnd)
d_TexBlend_Tex1_ModulateAlphaOVR(NotMonolithic)
;pCtx->pfnTexBlendEnd(pCtx, pP, pS); jmp eax ;}
;----------------------------------------------------------------------------- ; ; TexBlend_Tex1_Gen ; ; Calls first set of function pointers to do general texture blending. ; ;----------------------------------------------------------------------------- ;void CMMX_TexBlend_Tex1_Gen(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP, ; PD3DI_RASTSPAN pS) ;{ PUBLIC _MMX_TexBlend_Tex1_Gen _MMX_TexBlend_Tex1_Gen:
; Initialize input to diffuse color (the default) ; D3DI_RASTCOLOR Input = *(D3DI_RASTCOLOR*)&pS->uB; movq mm7, XpS(uB)
;D3DI_RASTCOLOR Arg1; ; in MM1 ;D3DI_RASTCOLOR Arg2; ; in MM2
mov edx, 0
;pCtx->pfnTexBlendGetAlpha[0](&Arg1, &Arg2, &Input, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendGetAlpha) ;pCtx->pfnTexBlendOpAlpha[0]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendOpAlpha) ;pCtx->pfnTexBlendGetColor[0](&Arg1, &Arg2, &Input, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendGetColor) ;pCtx->pfnTexBlendOpColor[0]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendOpColor)
; Get ready for next indirect jump mov eax, XpCtx(pfnTexBlendEnd)
movq XpCtxSI(uBB), mm4
;pCtx->pfnTexBlendEnd(pCtx, pP, pS); jmp eax ;}
;----------------------------------------------------------------------------- ; ; TexBlend_TexM_Gen ; ; Calls all sets of function pointers to do general texture blending. ; ;----------------------------------------------------------------------------- ;void CMMX_TexBlend_TexM_Gen(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP, ; PD3DI_RASTSPAN pS) ;{ PUBLIC _MMX_TexBlend_TexM_Gen _MMX_TexBlend_TexM_Gen:
; Initialize input to diffuse color (the default) ; D3DI_RASTCOLOR Input0 = *(D3DI_RASTCOLOR*)&pS->uB; movq mm7, XpS(uB) ;D3DI_RASTCOLOR Input1; ;D3DI_RASTCOLOR Arg1; ;D3DI_RASTCOLOR Arg2;
; Set iTex to point to first texture color. Last argument of next 4 calls. mov edx, 0
;pCtx->pfnTexBlendGetAlpha[0](&Arg1, &Arg2, &Input0, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendGetAlpha) ;pCtx->pfnTexBlendOpAlpha[0](&Input1, &Arg1, &Arg2, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendOpAlpha) ;pCtx->pfnTexBlendGetColor[0](&Arg1, &Arg2, &Input0, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendGetColor) ;pCtx->pfnTexBlendOpColor[0](&Input1, &Arg1, &Arg2, pCtx, pS, 0); call dword ptr XpCtx(pfnTexBlendOpColor)
; Set iTex to point to second texture color. Last argument of next 4 calls. mov edx, 4 mov eax, 1
BlendLoop: cmp eax, dword ptr XpCtx(cActTex) je DoneBlendLoop
; Result of pre routines is in mm4 and pInput is passed as mm7 movq mm7, mm4
;pCtx->pfnTexBlendGetAlpha[1](&Arg1, &Arg2, &Input1, pCtx, pS, 1); call dword ptr XpCtx(pfnTexBlendGetAlpha+edx) ;pCtx->pfnTexBlendOpAlpha[1]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 1); call dword ptr XpCtx(pfnTexBlendOpAlpha+edx) ;pCtx->pfnTexBlendGetColor[1](&Arg1, &Arg2, &Input1, pCtx, pS, 1); call dword ptr XpCtx(pfnTexBlendGetColor+edx) ;pCtx->pfnTexBlendOpColor[1]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 1); call dword ptr XpCtx(pfnTexBlendOpColor+edx)
inc eax add edx, 4 jmp BlendLoop
DoneBlendLoop:
; Get ready for next indirect jump mov eax, XpCtx(pfnTexBlendEnd)
movq XpCtxSI(uBB), mm4
;pCtx->pfnTexBlendEnd(pCtx, pP, pS); jmp eax ;}
define(`d_TexBlendGetAlpha', `; void CMMX_TexBlend_Get_Alpha_$1_$2(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput, ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex) ; { PUBLIC _MMX_TexBlend_Get_Alpha_$1_$2 _MMX_TexBlend_Get_Alpha_$1_$2: ifelse(`$1', `TextureAlpha', ` ;pArg1->uA = (UINT8)RGBA_GETALPHA(pCtx->SI.TexCol[iTex]); pxor mm5, mm5 movq mm1, XpCtxSI(TexCol+edx) punpcklbw mm1, mm5 ', `$1', `InvTextureAlpha', ` ;pArg1->uA = (UINT8)~(RGBA_GETALPHA(pCtx->SI.TexCol[iTex])); pcmpeqd mm5, mm5 movq mm1, XpCtxSI(TexCol+edx) pxor mm1, mm5 pxor mm5, mm5 punpcklbw mm1, mm5 ') dnl ifelse(`$2', `DiffuseAlpha', ` ;pArg2->uA = (UINT8)(pS->uA>>COLOR_SHIFT); movq mm2, XpS(uB) psrlw mm2, 8 ', `$2', `InputAlpha', ` ;pArg2->uA = (UINT8)(pInput->uA>>COLOR_SHIFT); movq mm2, mm7 psrlw mm2, 8 ', `$2', `FactorAlpha', ` ;pArg2->uA = 0; pxor mm5, mm5 movd mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR) punpcklbw mm2, mm5 ', `$2', `InvDiffuseAlpha', ` ;pArg2->uA = (UINT8)~((pS->uA>>COLOR_SHIFT)); pcmpeqd mm2, mm2 pxor mm2, XpS(uB) psrlw mm2, 8 ', `$2', `InvInputAlpha', ` ;pArg2->uA = (UINT8)~((pInput->uA>>COLOR_SHIFT)); pcmpeqd mm2, mm2 pxor mm2, mm7 psrlw mm2, 8 ', `$2', `InvFactorAlpha', ` ;pArg2->uA = (UINT8)~((RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR])); pcmpeqd mm5, mm5 movd mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR) pxor mm2, mm5 pxor mm5, mm5 punpcklbw mm2, mm5 ', `$2', `SpecularAlpha', ` ;pArg2->uA = (UINT8)(pS->uA>>COLOR_SHIFT); movq mm2, XpS(uFog) psrlw mm2, 8 ', `$2', `InvSpecularAlpha', ` ;pArg2->uA = (UINT8)~((pS->uFog>>COLOR_SHIFT)); pcmpeqd mm2, mm2 pxor mm2, XpS(uFog) psrlw mm2, 8 ') ret ;} ') dnl d_RepStr(`d_RepStr(`d_TexBlendGetAlpha(AA, BB)', `AA', TextureAlpha, InvTextureAlpha)', `BB', DiffuseAlpha, InputAlpha, FactorAlpha, InvDiffuseAlpha, InvInputAlpha, InvFactorAlpha, SpecularAlpha, InvSpecularAlpha) dnl dnl define(`d_TexBlendOpAlpha', `; void CMMX_TexBlend_Op_Alpha_$1(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput, ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex) ; { PUBLIC _MMX_TexBlend_Op_Alpha_$1 _MMX_TexBlend_Op_Alpha_$1:
ifelse(`$1', `None', ` ;pOut->uA = pS->uA; movq mm3, XpS(uB) ', `$1', `CopyArg1', ` ;pOut->uA = pArg1->uA<<COLOR_SHIFT; movq mm3, mm1 psllw mm3, 8 ', `$1', `CopyArg2', ` ;pOut->uA = pArg2->uA<<COLOR_SHIFT; movq mm3, mm2 psllw mm3, 8 ', `$1', `Modulate', ` ;pOut->uA = pArg1->uA*pArg2->uA; movq mm3, mm1 pmullw mm3, mm2 ', `$1', `Modulate2', ` ;pOut->uA = min(((UINT32)pArg1->uA*pArg2->uA)<<1, 0xffff); movq mm3, mm1 pmullw mm3, mm2 movq mm4, mm3 psllw mm3, 1 psraw mm4, 15 ; Make mask based on high bit. This is used to make value saturate. paddusw mm3, mm4 ; This could just as will be an por to set all bits. ', `$1', `Modulate4', ` ;pOut->uA = min(((UINT32)pArg1->uA*pArg2->uA)<<2, 0xffff); movq mm3, mm1 pmullw mm3, mm2 movq mm4, mm3 psllw mm3, 2 paddusw mm4, MMWORD PTR Val0x4000000000000000 ; If either of the two upper bits are on, ; this will turn on the sign bit. psraw mm4, 15 paddusw mm3, mm4 ; This could just as will be an por to set all bits. ', `$1', `Add', ` ;pOut->uA = min((UINT32)pArg1->uA+pArg2->uA, 0xffff); movq mm3, mm1 paddusb mm3, mm2 psllw mm3, 8 ', `$1', `AddSigned', ` ;pOut->uA = max((INT32)pArg1->uA+pArg2->uA-128, 0x0); movq mm3, mm1 movq mm4, mm2 ; Actually only shifting up by 7 instead of 8. psllw mm3, 7 ; Shift down by one bit to check to see if there is a carry. psllw mm4, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result. paddw mm4, mm3 psubusw mm4, MMWORD PTR AlphaVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte. movq mm3, mm4 psraw mm4, 15 psllw mm3, 1 por mm3, mm4 ; Could have used paddusw here, but por achieves same effect. ', `$1', `BlendDiffuseAlpha', ` ;INT32 iA = pS->uA; ;pOut->uA = (UINT16)(iA*(pArg1->uA - pArg2->uA) + (pArg2->uA<<COLOR_SHIFT)); movq mm4, XpS(uB) movq mm3, mm1 psrlw mm4, 8 psubw mm3, mm2 pmullw mm3, mm4 movq mm4, mm2 psllw mm4, 8 paddw mm3, mm4 ', `$1', `BlendTextureAlpha', ` ;INT32 iA = RGBA_GETALPHA(pCtx->SI.TexCol[iTex]); ;pOut->uA = (UINT16)(iA*(pArg1->uA - pArg2->uA) + (pArg2->uA<<8)); movd mm4, XpCtxSI(TexCol+edx) movq mm3, mm1 punpcklbw mm4, Zero psubw mm3, mm2 pmullw mm3, mm4 movq mm4, mm2 psllw mm4, 8 paddw mm3, mm4 ', `$1', `BlendFactorAlpha', ` ;INT32 iA = 0;//ATTENTION need Factor ;pOut->uA = (UINT16)(iA*(pArg1->uA - pArg2->uA) + (pArg2->uA<<8)); movq mm3, mm1 movq mm4, mm2 psubw mm3, mm4 movd mm4, XpCtx(pdwRenderState+RS_TEXTUREFACTOR) punpcklbw mm4, Zero pmullw mm3, mm4 movq mm4, mm2 psllw mm4, 8 paddw mm3, mm4 ', `$1', `BlendTextureAlphaPM', ` ;INT32 iA = 255 - RGBA_GETALPHA(pCtx->SI.TexCol[iTex]); ;pOut->uA = min((UINT32)((pArg1->uA<<COLOR_SHIFT) + iA*pArg2->uA), 0xffff); movd mm4, XpCtxSI(TexCol+edx) pcmpeqw mm3, mm3 ; These two lines make 255 - TexColAlpha pxor mm4, mm3 punpcklbw mm4, Zero movq mm3, mm2 pmullw mm3, mm4 movq mm4, mm1 psllw mm4, 8 paddusw mm3, mm4 ', `$1', `AddSigned2', ` ;pOut->uA = (max(((INT32)pArg1->uA+pArg2->uA-128)<<1, 0x0))<<COLOR_SHIFT; movq mm3, mm1 movq mm4, mm2 ; Actually only shifting up by 7 instead of 8. psllw mm3, 7 ; Shift down by one bit to check to see if there is a carry. psllw mm4, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result. paddw mm4, mm3 psubusw mm4, MMWORD PTR AlphaVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte. movq mm3, mm4 psllw mm3, 1 por mm4, mm3 psraw mm4, 15 psllw mm3, 1 por mm3, mm4 ; Could have used paddusw here, but por achieves same effect. ', `$1', `Subtract', ` ; pOut->uA = (min(max((~((~(INT32)pArg1->uA) + pArg2->uA)), 0x0), 0xff))<<COLOR_SHIFT; movq mm3, mm1 movq mm4, mm2
psubusw mm3, mm4 psllw mm3, 8 ', `$1', `AddSmooth', ` ; pOut->uA = (min(max((INT32)pArg1->uA<<COLOR_SHIFT+(~(INT32)pArg1->uA)*pArg2->uA, 0x0), 0xffff)); movq mm3, mm1 movq mm4, mm2
paddusw mm3, mm2 psllw mm3, 8
pmullw mm4, mm1 psubusw mm3, mm4 ') ret ;} ') dnl d_RepStr(`d_TexBlendOpAlpha(AA)', `AA', None, CopyArg1, CopyArg2, Modulate, Modulate2, Modulate4, Add, AddSigned, BlendDiffuseAlpha, BlendTextureAlpha, BlendFactorAlpha, BlendTextureAlphaPM, AddSigned2, Subtract, AddSmooth) dnl dnl define(`d_TexBlendGetColor', `; void CMMX_TexBlend_Get_Color_$1_$2(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput, ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex) ; { PUBLIC _MMX_TexBlend_Get_Color_$1_$2 _MMX_TexBlend_Get_Color_$1_$2:
; Alpha is already done in mm3 ifelse(`$1', `Texture', ` ;pArg1->uB = (UINT8)RGBA_GETBLUE(pCtx->SI.TexCol[iTex]); ;pArg1->uG = (UINT8)RGBA_GETGREEN(pCtx->SI.TexCol[iTex]); ;pArg1->uR = (UINT8)RGBA_GETRED(pCtx->SI.TexCol[iTex]); pxor mm5, mm5 movq mm1, XpCtxSI(TexCol+edx) punpcklbw mm1, mm5 ', `$1', `InvTexture', ` ;pArg1->uB = (UINT8)~(RGBA_GETBLUE(pCtx->SI.TexCol[iTex])); ;pArg1->uG = (UINT8)~(RGBA_GETGREEN(pCtx->SI.TexCol[iTex])); ;pArg1->uR = (UINT8)~(RGBA_GETRED(pCtx->SI.TexCol[iTex])); pcmpeqd mm5, mm5 movd mm1, XpCtxSI(TexCol+edx) pxor mm1, mm5 pxor mm5, mm5 punpcklbw mm1, mm5 ', `$1', `TextureAlpha', ` ;pArg1->uB = (UINT8)RGBA_GETALPHA(pCtx->SI.TexCol[iTex]); ;pArg1->uG = pArg1->uB; ;pArg1->uR = pArg1->uB; pxor mm5, mm5 movd mm1, XpCtxSI(TexCol + edx) punpcklbw mm1, mm5 punpckhwd mm1, mm1 punpckhdq mm1, mm1 ', `$1', `InvTextureAlpha', ` ;pArg1->uB = (UINT8)~(RGBA_GETALPHA(pCtx->SI.TexCol[iTex])); ;pArg1->uG = pArg1->uB; ;pArg1->uR = pArg1->uB; pcmpeqd mm5, mm5 movq mm1, XpCtxSI(TexCol + edx) pxor mm1, mm5 pxor mm5, mm5 punpcklbw mm1, mm5 punpckhwd mm1, mm1 punpckhdq mm1, mm1 ')
dnl ifelse(`$2', `Diffuse', ` ;pArg2->uB = (UINT8)(pS->uB>>COLOR_SHIFT); ;pArg2->uG = (UINT8)(pS->uG>>COLOR_SHIFT); ;pArg2->uR = (UINT8)(pS->uR>>COLOR_SHIFT); movq mm2, XpS(uB) psrlw mm2, 8 ', `$2', `Specular', ` ;pArg2->uB = (UINT8)(pInput->uBS>>COLOR_SHIFT); ;pArg2->uG = (UINT8)(pInput->uGS>>COLOR_SHIFT); ;pArg2->uR = (UINT8)(pInput->uRS>>COLOR_SHIFT); movq mm2, XpS(uBS) psrlw mm2, 8 ', `$2', `Input', ` ;pArg2->uB = (UINT8)(pInput->uB>>COLOR_SHIFT); ;pArg2->uG = (UINT8)(pInput->uG>>COLOR_SHIFT); ;pArg2->uR = (UINT8)(pInput->uR>>COLOR_SHIFT); movq mm2, mm7 psrlw mm2, 8 ', `$2', `Factor', ` ;pArg2->uB = 0;//ATTENTION need Factor ;pArg2->uG = 0;//ATTENTION need Factor ;pArg2->uR = 0;//ATTENTION need Factor pxor mm5, mm5 movd mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR) punpcklbw mm2, mm5 ', `$2', `InvDiffuse', ` ;pArg2->uB = (UINT8)~((pS->uB>>COLOR_SHIFT)); ;pArg2->uG = (UINT8)~((pS->uG>>COLOR_SHIFT)); ;pArg2->uR = (UINT8)~((pS->uR>>COLOR_SHIFT)); pcmpeqd mm2, mm2 pxor mm2, XpS(uB) pxor mm2, mm5 psrlw mm2, 8 ', `$2', `InvSpecular', ` ;pArg2->uB = (UINT8)~((pS->uBS>>COLOR_SHIFT)); ;pArg2->uG = (UINT8)~((pS->uGS>>COLOR_SHIFT)); ;pArg2->uR = (UINT8)~((pS->uRS>>COLOR_SHIFT)); pcmpeqd mm2, mm2 pxor mm2, XpS(uBS) pxor mm2, mm5 psrlw mm2, 8 ', `$2', `InvInput', ` ;pArg2->uB = (UINT8)~((pInput->uB>>COLOR_SHIFT)); ;pArg2->uG = (UINT8)~((pInput->uG>>COLOR_SHIFT)); ;pArg2->uR = (UINT8)~((pInput->uR>>COLOR_SHIFT)); movq mm2, mm7 pcmpeqd mm5, mm5 pxor mm2, mm5 psrlw mm2, 8 ', `$2', `InvFactor', ` ;pArg2->uB = (UINT8)~(RGBA_GETBLUE(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR])); ;pArg2->uG = (UINT8)~(RGBA_GETGREEN(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR])); ;pArg2->uR = (UINT8)~(RGBA_GETRED(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR])); pcmpeqd mm5, mm5 movq mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR) pxor mm2, mm5 pxor mm5, mm5 punpcklbw mm2, mm5 ', `$2', `DiffuseAlpha', ` ;pArg2->uB = (UINT8)(pS->uA>>COLOR_SHIFT); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; movq mm2, XpS(uB) psrlw mm2, 8 punpckhwd mm2, mm2 punpckhdq mm2, mm2 ', `$2', `SpecularAlpha', ` ;pArg2->uB = (UINT8)(pS->uFog>>COLOR_SHIFT); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; movq mm2, XpS(uFog) psrlw mm2, 8 punpckhwd mm2, mm2 punpckhdq mm2, mm2 ', `$2', `InputAlpha', ` ;pArg2->uB = (UINT8)(pInput->uA); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; movq mm2, mm7 punpckhwd mm2, mm2 punpckhdq mm2, mm2 psrlw mm2, 8 ', `$2', `FactorAlpha', ` ;pArg2->uB = (UINT8)RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]; ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; pxor mm5, mm5 movq mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR) punpcklbw mm2, mm5 punpckhwd mm2, mm2 punpckhdq mm2, mm2 ', `$2', `InvDiffuseAlpha', ` ;pArg2->uB = (UINT8)~((pS->uA>>COLOR_SHIFT)); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; pcmpeqd mm2, mm2 pxor mm2, XpS(uB) psrlw mm2, 8 punpckhwd mm2, mm2 punpckhdq mm2, mm2 ', `$2', `InvSpecularAlpha', ` ;pArg2->uB = (UINT8)~((pS->uFog>>COLOR_SHIFT)); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; pcmpeqd mm2, mm2 pxor mm2, XpS(uFog) psrlw mm2, 8 punpckhwd mm2, mm2 punpckhdq mm2, mm2 ', `$2', `InvInputAlpha', ` ;pArg2->uB = (UINT8)~((pInput->uA)); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; pcmpeqd mm2, mm2 pxor mm2, mm7 psrlw mm2, 8 punpckhwd mm2, mm2 punpckhdq mm2, mm2 ', `$2', `InvFactorAlpha', ` ;pArg2->uB = (UINT8)~(RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR])); ;pArg2->uG = pArg2->uB; ;pArg2->uR = pArg2->uB; pcmpeqd mm5, mm5 movq mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR) pxor mm2, mm5 pxor mm5, mm5 punpcklbw mm2, mm5 punpckhwd mm2, mm2 punpckhdq mm2, mm2
') ret ;} ') dnl d_RepStr(`d_RepStr(`d_TexBlendGetColor(AA, BB)', `AA', Texture, InvTexture, TextureAlpha, InvTextureAlpha)', `BB', Diffuse, Input, Factor, InvDiffuse, InvInput, InvFactor, DiffuseAlpha, InputAlpha, FactorAlpha, InvDiffuseAlpha, InvInputAlpha, InvFactorAlpha, Specular, InvSpecular, SpecularAlpha, InvSpecularAlpha) dnl dnl define(`d_TexBlendOpColor', `; void CMMX_TexBlend_Op_Color_$1(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput, ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex) ; { PUBLIC _MMX_TexBlend_Op_Color_$1 _MMX_TexBlend_Op_Color_$1:
movq mm6, MMWORD PTR MaskOffAlpha ; 0x0000ffff ffffffff
ifelse(`$1', `None', ` ;pOut->uB = pS->uB; ;pOut->uG = pS->uG; ;pOut->uR = pS->uR; movq mm4, XpS(uB) ', `$1', `CopyArg1', ` ;pOut->uB = pArg1->uB<<COLOR_SHIFT; ;pOut->uG = pArg1->uG<<COLOR_SHIFT; ;pOut->uR = pArg1->uR<<COLOR_SHIFT; movq mm4, mm1 psllw mm4, 8 ', `$1', `CopyArg2', ` ;pOut->uB = pArg2->uB<<COLOR_SHIFT; ;pOut->uG = pArg2->uG<<COLOR_SHIFT; ;pOut->uR = pArg2->uR<<COLOR_SHIFT; movq mm4, mm2 psllw mm4, 8 ', `$1', `Modulate', ` ;pOut->uB = pArg1->uB*pArg2->uB; ;pOut->uG = pArg1->uG*pArg2->uG; ;pOut->uR = pArg1->uR*pArg2->uR; movq mm4, mm1 movq mm5, mm2 pmullw mm4, mm5 ', `$1', `Modulate2', ` ;pOut->uB = min(((UINT32)pArg1->uB*pArg2->uB)<<1, 0xffff); ;pOut->uG = min(((UINT32)pArg1->uG*pArg2->uG)<<1, 0xffff); ;pOut->uR = min(((UINT32)pArg1->uR*pArg2->uR)<<1, 0xffff); movq mm4, mm1 movq mm5, mm2 pmullw mm4, mm5 movq mm5, mm4 psllw mm4, 1 psraw mm5, 15 ; Make mask based on high bit. This is used to make value saturate. paddusw mm4, mm5 ; This could just as will be an por to set all bits. ', `$1', `Modulate4', ` ;pOut->uB = min(((UINT32)pArg1->uB*pArg2->uB)<<2, 0xffff); ;pOut->uG = min(((UINT32)pArg1->uG*pArg2->uG)<<2, 0xffff); ;pOut->uR = min(((UINT32)pArg1->uR*pArg2->uR)<<2, 0xffff); movq mm4, mm1 movq mm5, mm2 pmullw mm4, mm5 movq mm5, mm4 psllw mm4, 2 paddusw mm5, MMWORD PTR Val0x0000400040004000 ; This will set sign bit if either of the upper bits are set. psraw mm5, 15 ; Shift sign bit down to all bits. paddusw mm4, mm5 ; This could just as will be an por to set all bits. ', `$1', `Add', ` ;pOut->uB = min((UINT32)pArg1->uB+pArg2->uB, 0xffff); ;pOut->uG = min((UINT32)pArg1->uG+pArg2->uG, 0xffff); ;pOut->uR = min((UINT32)pArg1->uR+pArg2->uR, 0xffff); movq mm4, mm1 paddusb mm4, mm2 psllw mm4, 8 ', `$1', `AddSigned', ` ;pOut->uB = min(max((INT32)pArg1->uB+pArg2->uB-128, 0x0), 0xff); ;pOut->uG = min(max((INT32)pArg1->uG+pArg2->uG-128, 0x0), 0xff); ;pOut->uR = min(max((INT32)pArg1->uR+pArg2->uR-128, 0x0), 0xff); movq mm4, mm1 movq mm5, mm2 ; Actually only shifting up by 7 instead of 8. psllw mm4, 7 ; Shift down by one bit to check to see if there is a carry. psllw mm5, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result. paddw mm5, mm4 psubusw mm5, MMWORD PTR RGBVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte. movq mm4, mm5 psraw mm5, 15 psllw mm4, 1 por mm4, mm5 ; Could have used paddusw here, but por achieves same effect. ', `$1', `BlendDiffuseAlpha', ` ;INT32 iA = pS->uA>>COLOR_SHIFT; ;pOut->uB = (UINT16)(iA*(pArg1->uB - pArg2->uB) + (pArg2->uB<<8)); ;pOut->uG = (UINT16)(iA*(pArga1->uG - pArg2->uG) + (pArg2->uG<<8)); ;pOut->uR = (UINT16)(iA*(pArg1->uR - pArg2->uR) + (pArg2->uR<<8)); movq mm4, mm1 movq mm5, mm2 psubw mm4, mm5 movq mm5, XpS(uB) ; Set iA = pS->uA psrlw mm5, 8 punpckhwd mm5, mm5 ; copy iA to high dword. punpckhdq mm5, mm5 ; copy iA to full register pmullw mm4, mm5 movq mm5, mm2 psllw mm5, 8 paddw mm4, mm5 ', `$1', `BlendTextureAlpha', ` ;INT32 iA = RGBA_GETALPHA(pCtx->SI.TexCol[iTex]); ;pOut->uB = (UINT16)(iA*(pArg1->uB - pArg2->uB) + (pArg2->uB<<8)); ;pOut->uG = (UINT16)(iA*(pArg1->uG - pArg2->uG) + (pArg2->uG<<8)); ;pOut->uR = (UINT16)(iA*(pArg1->uR - pArg2->uR) + (pArg2->uR<<8)); movq mm4, mm1 movq mm5, mm2 psubw mm4, mm5 movd mm5, XpCtxSI(TexCol+edx) punpcklbw mm5, Zero punpckhwd mm5, mm5 ; copy iA to high dword. punpckhdq mm5, mm5 ; copy iA to full register pmullw mm4, mm5 movq mm5, mm2 psllw mm5, 8 paddw mm4, mm5 ', `$1', `BlendFactorAlpha', ` ;INT32 iA = RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]); ;pOut->uB = (UINT16)(iA*(pArg1->uB - pArg2->uB) + (pArg2->uB<<COLOR_SHIFT)); ;pOut->uG = (UINT16)(iA*(pArg1->uG - pArg2->uG) + (pArg2->uG<<COLOR_SHIFT)); ;pOut->uR = (UINT16)(iA*(pArg1->uR - pArg2->uR) + (pArg2->uR<<COLOR_SHIFT)); movq mm4, mm1 movq mm5, mm2 psubw mm4, mm5 movd mm5, XpCtx(pdwRenderState+RS_TEXTUREFACTOR) punpcklbw mm5, Zero punpckhwd mm5, mm5 ; copy iA to high dword. punpckhdq mm5, mm5 ; copy iA to full register pmullw mm4, mm5 movq mm5, mm2 psllw mm5, 8 paddw mm4, mm5 ', `$1', `BlendTextureAlphaPM', ` ;INT32 iA = 255 - RGBA_GETALPHA(pCtx->SI.TexCol[iTex]); ;pOut->uB = min((UINT32)((pArg1->uB<<COLOR_SHIFT) + iA*pArg2->uB), 0xffff); ;pOut->uG = min((UINT32)((pArg1->uG<<COLOR_SHIFT) + iA*pArg2->uG), 0xffff); ;pOut->uR = min((UINT32)((pArg1->uR<<COLOR_SHIFT) + iA*pArg2->uR), 0xffff); movd mm5, XpCtxSI(TexCol+edx)
pcmpeqw mm4, mm4 ; These two lines make 255 - TexColAlpha pxor mm5, mm4
punpcklbw mm5, Zero punpckhwd mm5, mm5 ; copy iA to high dword. punpckhdq mm5, mm5 ; copy iA to full register movq mm4, mm2 pmullw mm4, mm5 movq mm5, mm1 psllw mm5, 8 paddusw mm4, mm5
', `$1', `AddSigned2', ` ; pOut->uB = (min(max(((INT32)pArg1->uB+pArg2->uB-128) << 1, 0x0), 0xff))<<COLOR_SHIFT; ; pOut->uG = (min(max(((INT32)pArg1->uG+pArg2->uG-128) << 1, 0x0), 0xff))<<COLOR_SHIFT; ; pOut->uR = (min(max(((INT32)pArg1->uR+pArg2->uR-128) << 1, 0x0), 0xff))<<COLOR_SHIFT;
movq mm4, mm1 movq mm5, mm2 ; Actually only shifting up by 7 instead of 8. psllw mm4, 7 ; Shift down by one bit to check to see if there is a carry. psllw mm5, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result. paddw mm5, mm4 psubusw mm5, MMWORD PTR RGBVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte. movq mm4, mm5 psllw mm4, 1 por mm5, mm4 psraw mm5, 15 psllw mm4, 1 por mm4, mm5 ; Could have used paddusw here, but por achieves same effect.
', `$1', `Subtract', ` ; pOut->uB = max((~((~(UINT32)pArg1->uB) + pArg2->uB)), 0x0)<<COLOR_SHIFT; ; pOut->uG = max((~((~(UINT32)pArg1->uG) + pArg2->uG)), 0x0)<<COLOR_SHIFT; ; pOut->uR = max((~((~(UINT32)pArg1->uR) + pArg2->uR)), 0x0)<<COLOR_SHIFT; movq mm4, mm1 psubusw mm4, mm2 psllw mm4, 8
', `$1', `AddSmooth', ` ; pOut->uB = min((pArg1->uB<<COLOR_SHIFT+(~pArg1->uB)*pArg2->uB), 0xffff); ; pOut->uG = min((pArg1->uG<<COLOR_SHIFT+(~pArg1->uG)*pArg2->uG), 0xffff); ; pOut->uR = min((pArg1->uR<<COLOR_SHIFT+(~pArg1->uR)*pArg2->uR), 0xffff); movq mm4, mm1 movq mm5, mm2
paddusw mm4, mm2 psllw mm4, 8
pmullw mm5, mm1 psubusw mm4, mm5 ', `$1', `ModulateAlphaAddColor', ` ; pOut->uB = min((pArg1->uB<<COLOR_SHIFT+pArg1->uA*pArg2->uB), 0xffff); ; pOut->uG = min((pArg1->uG<<COLOR_SHIFT+pArg1->uA*pArg2->uG), 0xffff); ; pOut->uR = min((pArg1->uR<<COLOR_SHIFT+pArg1->uA*pArg2->uR), 0xffff);
; Get pArg1->uA movq mm4, mm1 psrlw mm4, 47 punpckhwd mm4, mm4 ; copy pArg1->uA to high dword. punpckhdq mm4, mm4 ; copy pArg1->uA to full register
pmullw mm4, mm2 movq mm5, mm1 psllw mm5, 8 paddw mm4, mm5
', `$1', `ModulateColorAddAlpha', ` ; pOut->uB = min((pArg2->uB*pArg1->uB+pArg1->uA<<COLOR_SHIFT), 0xffff); ; pOut->uG = min((pArg2->uG*pArg1->uG+pArg1->uA<<COLOR_SHIFT), 0xffff); ; pOut->uR = min((pArg2->uR*pArg1->uR+pArg1->uA<<COLOR_SHIFT), 0xffff); movq mm4, mm1 movq mm5, mm2
pmullw mm4, mm5
; Get pArg1->uA movq mm5, mm1 psrlw mm5, 47 punpckhwd mm5, mm5 ; copy pArg1->uA to high dword. punpckhdq mm5, mm5 ; copy pArg1->uA to full register
paddw mm4, mm5 ') ; Need pOuts alpha value. pand mm4, mm6 pandn mm6, mm3 por mm4, mm6
ret ;} ') dnl d_RepStr(`d_TexBlendOpColor(AA)', `AA', None, CopyArg1, CopyArg2, Modulate, Modulate2, Modulate4, Add, AddSigned, BlendDiffuseAlpha, BlendTextureAlpha, BlendFactorAlpha, BlendTextureAlphaPM, AddSigned2, Subtract, AddSmooth, ModulateAlphaAddColor, ModulateColorAddAlpha) dnl
END
|