541 lines
14 KiB
541 lines
14 KiB
;-----------------------------------------------------------------------------
|
|
;
|
|
; Monolith 6. Perspective Correct Nearest Gouraud Modulated
|
|
; Z buffer (LE or GT) 565.
|
|
;
|
|
; Globals (ATTENTION)
|
|
;
|
|
; StackPos - stack pos holder
|
|
; uSpans - Number of spans to process
|
|
; iSurfaceStep - what to add to screen pointer
|
|
; iZStep - what to add to Z buffer pointer
|
|
; uPix - Pixel Count
|
|
;
|
|
; Changes from general MMX code.
|
|
; 1) Convert directly from 565 to internal format to remove
|
|
; extra unpack. Remove alpha set.
|
|
; 2) Didnt need to save texture color or blended color so these
|
|
; are kept in registers.
|
|
; 3) All calls and jumps were removed.
|
|
; 4) Removed alpha masking in in modulate code.
|
|
; 5) Change registers usage to prevent extra moves.
|
|
;
|
|
;-----------------------------------------------------------------------------
|
|
|
|
INCLUDE iammx.inc
|
|
INCLUDE offs_acp.inc
|
|
|
|
|
|
|
|
; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
|
|
; at the LSB, then six bits of green, then five bits of red.
|
|
|
|
|
|
|
|
;TBD check to see if this value is correct.
|
|
COLOR_SHIFT equ 8
|
|
|
|
.586
|
|
.model flat
|
|
|
|
|
|
.data
|
|
|
|
EXTERN IncHighandLow16:MMWORD
|
|
EXTERN UFracVFracMask:MMWORD
|
|
EXTERN UV32to15Mask:MMWORD
|
|
EXTERN Makelow16one:MMWORD
|
|
EXTERN MaskKeepUValues:MMWORD
|
|
EXTERN MaskKeepVValues:MMWORD
|
|
EXTERN UFrac:MMWORD
|
|
EXTERN VFrac:MMWORD
|
|
EXTERN Zero:MMWORD
|
|
EXTERN memD3DTFG_POINT:MMWORD
|
|
EXTERN GiveUp:MMWORD
|
|
EXTERN LastW:MMWORD
|
|
EXTERN Val0x000a000a:MMWORD
|
|
EXTERN Val0xffff:MMWORD
|
|
EXTERN Val0x0000002000000020:MMWORD
|
|
EXTERN Val0x0000ffff0000ffff:MMWORD
|
|
|
|
opt_MaskRed565to888 MMWORD 000000000000F800H
|
|
|
|
EXTERN MaskRed565to888:MMWORD
|
|
EXTERN MaskGreen565to888:MMWORD
|
|
EXTERN MaskBlue565to888:MMWORD
|
|
|
|
EXTERN MaskRed555to888:MMWORD
|
|
EXTERN MaskGreen555to888:MMWORD
|
|
EXTERN MaskBlue555to888:MMWORD
|
|
|
|
EXTERN MaskAlpha1555to8888:MMWORD
|
|
EXTERN MaskRed1555to8888:MMWORD
|
|
EXTERN MaskGreen1555to8888:MMWORD
|
|
EXTERN MaskBlue1555to8888:MMWORD
|
|
|
|
|
|
EXTERN SetAlphato0xffff:MMWORD
|
|
EXTERN SetAlphato0xff:MMWORD
|
|
|
|
RedShift565to888 equ 8
|
|
GreenShift565to888 equ 5
|
|
BlueShift565to888 equ 3
|
|
|
|
RedShift555to888 equ 9
|
|
GreenShift555to888 equ 6
|
|
BlueShift555to888 equ 3
|
|
|
|
AlphaShift1555to8888 equ 16
|
|
RedShift1555to8888 equ 9
|
|
GreenShift1555to8888 equ 6
|
|
BlueShift1555to8888 equ 3
|
|
|
|
EXTERN Zero:MMWORD
|
|
|
|
|
|
EXTERN DW_One_One:MMWORD
|
|
|
|
|
|
EXTERN MaskOffAlpha:MMWORD
|
|
EXTERN ShiftTA:MMWORD
|
|
EXTERN Val0x00ff00ff00ff00ff:MMWORD
|
|
EXTERN Val0x000000ff00ff00ff:MMWORD
|
|
EXTERN Val0X0000000001000000:MMWORD
|
|
EXTERN AlphaVal128:MMWORD
|
|
EXTERN RGBVal128:MMWORD
|
|
|
|
|
|
EXTERN g_uDitherValue:MMWORD
|
|
EXTERN SetAlphato0xff:MMWORD
|
|
EXTERN u888to565RedBlueMask:MMWORD
|
|
EXTERN u888to565GreenMask:MMWORD
|
|
EXTERN u888to565Multiplier:MMWORD
|
|
EXTERN uVal0x000007ff03ff07ff:MMWORD
|
|
EXTERN uVal0x0000078003c00780:MMWORD
|
|
EXTERN u888to555RedBlueMask:MMWORD
|
|
EXTERN u888to555GreenMask:MMWORD
|
|
EXTERN u888to555Multiplier:MMWORD
|
|
EXTERN uVal0x000007ff07ff07ff:MMWORD
|
|
EXTERN uVal0x0000078007800780:MMWORD
|
|
|
|
; Span Variables
|
|
StackPos dd ?
|
|
uSpans dd ?
|
|
;-----------------------------------------------------------------------------
|
|
; Loop Variables
|
|
|
|
iSurfaceStep dd ?
|
|
iZStep dd ?
|
|
uPix dd ?
|
|
|
|
;-----------------------------------------------------------------------------
|
|
|
|
.code
|
|
|
|
|
|
|
|
PUBLIC _MMXMLRast_6
|
|
_MMXMLRast_6:
|
|
|
|
push ebp
|
|
mov StackPos, esp
|
|
mov eax, esp
|
|
sub esp, 0Ch ; This will need to change if stack frame size changes.
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
; Put pCtx into ebx
|
|
mov ebx, [eax+8]
|
|
|
|
;PD3DI_RASTPRIM pP = pCtx->pPrim;
|
|
mov ecx, [ebx+RASTCTX_pPrim]
|
|
|
|
;while (pP)
|
|
;{
|
|
PrimLoop:
|
|
cmp ecx, 0
|
|
je ExitPrimLoop
|
|
|
|
;UINT16 uSpans = pP->uSpans;
|
|
movzx eax, word ptr [ecx+RASTPRIM_uSpans]
|
|
mov uSpans, eax
|
|
|
|
;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
|
|
mov ebp, ecx
|
|
add ebp, SIZEOF_RASTPRIM
|
|
|
|
|
|
;while (uSpans-- > 0)
|
|
;{
|
|
SpanLoop:
|
|
mov edx, uSpans
|
|
mov eax, edx
|
|
dec eax
|
|
mov uSpans, eax
|
|
test edx, edx
|
|
jle ExitSpanLoop
|
|
|
|
;pCtx->pfnBegin(pCtx, pP, pS);
|
|
|
|
;-----------------------------------------------------------------------------
|
|
; LoopAny code inserted here. This is to get rid of an extra
|
|
; jump.
|
|
;-----------------------------------------------------------------------------
|
|
|
|
; Setup Code begins
|
|
|
|
; get values to iterate
|
|
|
|
;uPix = pS->uPix;
|
|
movzx eax, word ptr [ebp+RASTSPAN_uPix]
|
|
mov uPix, eax
|
|
|
|
|
|
;pCtx->SI.iDW = 0x0;
|
|
mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
|
|
|
|
mov esi, [ebp+RASTSPAN_iW]
|
|
movq mm5, MMWORD PTR [ebp+RASTSPAN_iUoW1]
|
|
|
|
|
|
;pCtx->SI.iUd_TexNum = d_WTimesUVoW(pS->iW,pS->iUoW1);
|
|
;pCtx->SI.iVd_TexNum = d_WTimesUVoW(pS->iW,pS->iVoW1);
|
|
pslld mm5, 8
|
|
shl esi, 4
|
|
movd eax, mm5
|
|
psrlq mm5, 32
|
|
imul esi
|
|
mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
|
|
movd eax, mm5
|
|
imul esi
|
|
mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
|
|
|
|
|
|
;if (pP->iDOoWDX > 0)
|
|
;{
|
|
cmp dword ptr [ecx+RASTPRIM_iDOoWDX], 0
|
|
jg SpecialWLastMonTest
|
|
;// iSpecialW should be negative for the first 3 pixels of span
|
|
;pCtx->SI.iSpecialW = -3;
|
|
mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], -3
|
|
jmp DoneSpecialWifMonTest
|
|
;}
|
|
;else
|
|
;{
|
|
SpecialWLastMonTest:
|
|
;// iSpecialW should be negative for the last 3 pixels of span
|
|
;pCtx->SI.iSpecialW = 0x7fff - uPix;
|
|
mov eax, 07fffh
|
|
sub eax, uPix
|
|
;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
|
|
add eax, 5
|
|
mov [ebx+RASTCTX_SI+SPANITER_iSpecialW], eax
|
|
;}
|
|
DoneSpecialWifMonTest:
|
|
|
|
|
|
;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
|
|
;{
|
|
mov eax, [ecx+RASTPRIM_uFlags]
|
|
and eax, D3DI_RASTPRIM_X_DEC
|
|
test eax, eax
|
|
jz LeftToRightSpan
|
|
;iZStep = -pCtx->iZStep;
|
|
mov eax, [ebx+RASTCTX_iZStep]
|
|
neg eax
|
|
mov iZStep, eax
|
|
;iSurfaceStep = -pCtx->iSurfaceStep;
|
|
mov eax, [ebx+RASTCTX_iSurfaceStep]
|
|
neg eax
|
|
mov iSurfaceStep, eax
|
|
;}
|
|
jmp DoneSpanDirif
|
|
;else
|
|
;{
|
|
LeftToRightSpan:
|
|
;iZStep = pCtx->iZStep;
|
|
mov eax, [ebx+RASTCTX_iZStep]
|
|
mov iZStep, eax
|
|
;iSurfaceStep = pCtx->iSurfaceStep;
|
|
mov eax, [ebx+RASTCTX_iSurfaceStep]
|
|
mov iSurfaceStep, eax
|
|
;}
|
|
DoneSpanDirif:
|
|
|
|
; Setup Code Ends
|
|
; ----------------------------------------------------------------------------------------------------------------
|
|
; Loop Code Begins
|
|
|
|
PixelLoop:
|
|
; Ztestcode
|
|
; edx is uZ
|
|
; eax is uZB
|
|
; 16 bit unsigned format
|
|
;UINT16 uZ = (UINT16)(pS->uZ>>15);
|
|
;UINT16 uZB = *((UINT16*)pS->pZ);
|
|
mov edx, [ebp+RASTSPAN_uZ]
|
|
movd mm4, edx
|
|
mov esi, [ebp+RASTSPAN_pZ]
|
|
shr edx, 15
|
|
movzx eax, word ptr [esi]
|
|
|
|
;pS->uZ += pP->iDZDX;
|
|
;if ((pCtx->iZXorMask)^(uZ > uZB))
|
|
; !(uZ > uZB) <==>
|
|
; (uZ <= uZB) <==>
|
|
; (uZ < uZB+1) <==>
|
|
;
|
|
sub eax, edx
|
|
paddd mm4, [ecx+RASTPRIM_iDZDX]
|
|
movd [ebp+RASTSPAN_uZ], mm4
|
|
xor eax, [ebx+RASTCTX_iZXorMask]
|
|
test eax, eax
|
|
js FailLabel
|
|
|
|
mov word ptr [esi], dx
|
|
; texturecode
|
|
|
|
mov esi, [ebx+RASTCTX_pTexture]
|
|
|
|
movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
|
|
|
|
movd mm4, [esi+SPANTEX_iShiftU]
|
|
psubw mm5, mm4
|
|
movq mm4, mm5
|
|
pand mm5, MMWORD PTR Val0xffff
|
|
|
|
psrld mm4, 16
|
|
|
|
movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
|
|
psrad mm1, mm5
|
|
movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
|
|
psrad mm2, mm4
|
|
|
|
punpckldq mm1, mm2
|
|
|
|
movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
|
|
add edx, 16
|
|
movd mm2, edx
|
|
movq mm5, MMWORD ptr Makelow16one
|
|
pslld mm5, mm2
|
|
|
|
por mm5, MMWORD ptr Makelow16one
|
|
|
|
psrad mm1, 6
|
|
|
|
packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
|
|
|
|
movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
|
|
|
|
movq mm7, mm1
|
|
movd mm4, [esi+SPANTEX_iFlipMaskU]
|
|
|
|
pand mm7, mm4
|
|
|
|
pcmpeqw mm7, MMWORD PTR Zero
|
|
pandn mm7, mm0
|
|
pand mm1, mm0
|
|
pxor mm1, mm7
|
|
|
|
pmaddwd mm1, mm5
|
|
mov edi, [esi+SPANTEX_pBits]
|
|
movd eax, mm1
|
|
|
|
movzx eax, word ptr [edi+2*eax]
|
|
|
|
; got rid of the pupack with zero
|
|
; in color conversion.
|
|
movd mm1, eax ; Make two more copies of input color
|
|
movq mm2, mm1
|
|
pand mm1, dword ptr MaskGreen565to888
|
|
pand mm2, dword ptr opt_MaskRed565to888
|
|
psllq mm2, 24
|
|
psllq mm1, 13
|
|
shl eax, 3
|
|
por mm1, mm2
|
|
and eax, 0FFH
|
|
movd mm2, eax
|
|
por mm2, mm1
|
|
|
|
;modulate
|
|
|
|
movq mm1, [ebp+RASTSPAN_uB]
|
|
psrlw mm1, COLOR_SHIFT ; COLOR_SHIFT is set to 8.
|
|
pmullw mm1, mm2
|
|
|
|
;write
|
|
|
|
mov edi, [ebp+RASTSPAN_pSurface]
|
|
|
|
psrlw mm1, 8 ; Convert color1 from 8.8 two 0.8
|
|
packuswb mm1, mm7 ; pack one color
|
|
movq mm3, mm1
|
|
pand mm1, MMWORD PTR u888to565RedBlueMask
|
|
pmaddwd mm1, MMWORD PTR u888to565Multiplier
|
|
pand mm3, MMWORD PTR u888to565GreenMask
|
|
por mm1, mm3
|
|
psrld mm1, 5
|
|
|
|
movd edx, mm1
|
|
mov [edi], dx
|
|
|
|
FailLabel:
|
|
|
|
dec uPix ;// BUG BUG?? uPix should never start as zero should it?
|
|
|
|
jle ExitPixelLoop
|
|
|
|
movq mm1, [ebp+RASTSPAN_uB]
|
|
paddw mm1, [ecx+RASTPRIM_iDBDX]
|
|
movq [ebp+RASTSPAN_uB], mm1
|
|
|
|
|
|
movq mm5, [ebp+RASTSPAN_iUoW1]
|
|
paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
|
|
movq [ebp+RASTSPAN_iUoW1], mm5
|
|
|
|
xor eax, eax
|
|
mov ax, [ebp+RASTSPAN_iLOD]
|
|
add ax, [ebp+RASTSPAN_iDLOD]
|
|
mov [ebp+RASTSPAN_iLOD], ax
|
|
|
|
|
|
mov eax, [ebp+RASTSPAN_iOoW]
|
|
add eax, [ecx+RASTPRIM_iDOoWDX]
|
|
mov [ebp+RASTSPAN_iOoW], eax
|
|
|
|
|
|
mov edx, [ebp+RASTSPAN_iW]
|
|
mov LastW, edx ; Save iW to calc iDW for next time.
|
|
add edx, [ebx+RASTCTX_SI+SPANITER_iDW]
|
|
|
|
xor edi, edi
|
|
cmp di, word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
|
|
jle DontDoSpecialW1
|
|
|
|
|
|
cmp edx, edi
|
|
jl WOutOfRange1
|
|
|
|
mov edx, LastW
|
|
sar edx, 1
|
|
WOutOfRange1:
|
|
|
|
mov GiveUp, 8 ; Pre decrementing instead of post decrementing.
|
|
|
|
SpecW1Loop1:
|
|
|
|
dec GiveUp
|
|
jz ExitSpecWLoop1
|
|
|
|
mov esi, (1 SHL 16)
|
|
mov edi, edx
|
|
imul edx
|
|
sub esi, edx
|
|
|
|
SpecW1Loop2:
|
|
test esi, esi
|
|
jns SpecW1ExitLoop2 ; This jump should be predicted correctly most of the time.
|
|
add esi, (1 SHL 15)
|
|
sar esi, 1
|
|
jmp SpecW1Loop2
|
|
|
|
SpecW1ExitLoop2:
|
|
|
|
mov eax, edi
|
|
|
|
shl eax, 5 ; 1.15.16 << 5 = 1.10.21 TBD Can I shift off upper bits??
|
|
shl esi, 12 ; 4.15 << 12 = 4.27 ;
|
|
|
|
mul esi
|
|
sub edi, edx
|
|
|
|
mov eax, edi
|
|
sar eax, 31
|
|
xor edi, eax
|
|
sub edi, eax
|
|
|
|
cmp edi, 020h ;Assuming that loop will only happen once.
|
|
jbe ExitSpecWLoop1
|
|
|
|
mov eax, [ebp+RASTSPAN_iOoW]
|
|
jmp SpecW1Loop1
|
|
|
|
DontDoSpecialW1:
|
|
mov esi, (1 SHL 16)
|
|
mov edi, edx
|
|
mul edx
|
|
sub esi, edx
|
|
shl esi, 15
|
|
mov eax, esi
|
|
|
|
mul edi ; 0.2.30 * 1.15.16 = 1.17.46 >> 32 = 1.17.14
|
|
shl edx, 2 ; 1.17.14 << 2 = 1.15.16
|
|
|
|
ExitSpecWLoop1:
|
|
|
|
mov [ebp+RASTSPAN_iW], edx
|
|
mov esi, edx ; Save W for multiplying by UoW and VoW
|
|
sub edx, LastW
|
|
mov [ebx+RASTCTX_SI+SPANITER_iDW], edx
|
|
|
|
|
|
inc word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
|
|
|
|
pslld mm5, 8
|
|
shl esi, 4
|
|
movd eax, mm5
|
|
psrlq mm5, 32
|
|
imul esi
|
|
mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
|
|
movd eax, mm5
|
|
imul esi
|
|
mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
|
|
|
|
mov eax, dword ptr [ebp+RASTSPAN_pZ]
|
|
mov edx, dword ptr [ebp+RASTSPAN_pSurface]
|
|
|
|
add eax, iZStep
|
|
add edx, iSurfaceStep
|
|
|
|
mov dword ptr [ebp+RASTSPAN_pZ], eax
|
|
mov dword ptr [ebp+RASTSPAN_pSurface], edx
|
|
|
|
jmp PixelLoop
|
|
|
|
|
|
ExitPixelLoop:
|
|
; Loop code ends
|
|
|
|
;-----------------------------------------------------------------------------
|
|
; LoopAny code ends here
|
|
;-----------------------------------------------------------------------------
|
|
|
|
;pS++;
|
|
add ebp, SIZEOF_RASTSPAN
|
|
|
|
;}
|
|
jmp SpanLoop
|
|
ExitSpanLoop:
|
|
;pP = pP->pNext;
|
|
mov ecx, [ecx+RASTPRIM_pNext]
|
|
;}
|
|
jmp PrimLoop
|
|
|
|
ExitPrimLoop:
|
|
;_asm{
|
|
emms
|
|
;}
|
|
|
|
;return S_OK;
|
|
xor eax, eax
|
|
;}
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
mov esp, StackPos
|
|
pop ebp
|
|
ret
|
|
|
|
END
|