You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
426 lines
12 KiB
426 lines
12 KiB
;-----------------------------------------------------------------------------
|
|
;
|
|
; Monolith 2. Non-perspective 16 bit Z buffered 565
|
|
;
|
|
; Globals (ATTENTION. Need to move all globals to stack.)
|
|
;
|
|
; uSpans - Count containing the number of spans.
|
|
; StackPos - Saves stack position.
|
|
; uPix - Pixel count
|
|
; iSurfaceStep - what to add to screen pointer
|
|
; iZStep - What to add to Z buffer pointer
|
|
;
|
|
; (The below globals are used to save esi register which
|
|
; was normally used for pTex pointer. Now esi is used
|
|
; for z buffer pointer.)
|
|
; uMaskU
|
|
; iShiftU
|
|
; iShiftPitch
|
|
; pBits
|
|
;
|
|
; Register Useage
|
|
;
|
|
; esi - Z buffer pointer
|
|
; edi - Screen buffer pointer
|
|
;
|
|
; All other are temporary
|
|
;
|
|
;
|
|
; The only differences between this monolith and the regular
|
|
; MMX assembly code are:
|
|
;
|
|
; 1) Uses LE/GR Z compare code that all other monoliths use.
|
|
; 2) All texture info is stored in Globals to free up esi.
|
|
; 3) This code does Wrap only for texture addressing
|
|
; 4) Since there is no modulation or bi-linear and the source
|
|
; and destination color formats are the same, there is
|
|
; no need to convert to and from the internal color format.
|
|
; 5) esi is reserved for the Zbuffer
|
|
; 6) edi is reserved for the screen buffer.
|
|
; 7) Texture read does not use edi as pBits so that edi can be
|
|
; preserved for screen buffer.
|
|
;
|
|
;-----------------------------------------------------------------------------
|
|
|
|
INCLUDE iammx.inc
|
|
INCLUDE offs_acp.inc
|
|
|
|
; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
|
|
; at the LSB, then six bits of green, then five bits of red.
|
|
|
|
;TBD check to see if this value is correct.
|
|
COLOR_SHIFT equ 8
|
|
|
|
.586
|
|
.model flat
|
|
|
|
|
|
; Big separating lines seperate code into span code
|
|
; and loop code. If span and loop are not going to
|
|
; end up being combined then it will be easy to
|
|
; seperate the code.
|
|
|
|
|
|
.data
|
|
|
|
; Need externs for all of the variables that are needed for various beads
|
|
|
|
EXTERN IncHighandLow16:MMWORD
|
|
EXTERN UFracVFracMask:MMWORD
|
|
EXTERN UV32to15Mask:MMWORD
|
|
EXTERN Makelow16one:MMWORD
|
|
EXTERN MaskKeepUValues:MMWORD
|
|
EXTERN MaskKeepVValues:MMWORD
|
|
EXTERN UFrac:MMWORD
|
|
EXTERN VFrac:MMWORD
|
|
EXTERN Zero:MMWORD
|
|
EXTERN memD3DTFG_POINT:MMWORD
|
|
EXTERN GiveUp:MMWORD
|
|
EXTERN LastW:MMWORD
|
|
EXTERN Val0x000a000a:MMWORD
|
|
EXTERN Val0xffff:MMWORD
|
|
EXTERN Val0x0000002000000020:MMWORD
|
|
EXTERN Val0x0000ffff0000ffff:MMWORD
|
|
|
|
|
|
EXTERN MaskRed565to888:MMWORD
|
|
EXTERN MaskGreen565to888:MMWORD
|
|
EXTERN MaskBlue565to888:MMWORD
|
|
|
|
EXTERN MaskRed555to888:MMWORD
|
|
EXTERN MaskGreen555to888:MMWORD
|
|
EXTERN MaskBlue555to888:MMWORD
|
|
|
|
EXTERN MaskAlpha1555to8888:MMWORD
|
|
EXTERN MaskRed1555to8888:MMWORD
|
|
EXTERN MaskGreen1555to8888:MMWORD
|
|
EXTERN MaskBlue1555to8888:MMWORD
|
|
|
|
; TBD. I think that I want to do 0xffff instead of 0xff. This will
|
|
; have to be checked. There is a value very similiar to this in
|
|
; buf write.
|
|
EXTERN SetAlphato0xffff:MMWORD
|
|
EXTERN SetAlphato0xff:MMWORD
|
|
|
|
; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
|
|
RedShift565to888 equ 8
|
|
GreenShift565to888 equ 5
|
|
BlueShift565to888 equ 3
|
|
|
|
RedShift555to888 equ 9
|
|
GreenShift555to888 equ 6
|
|
BlueShift555to888 equ 3
|
|
|
|
AlphaShift1555to8888 equ 16
|
|
RedShift1555to8888 equ 9
|
|
GreenShift1555to8888 equ 6
|
|
BlueShift1555to8888 equ 3
|
|
|
|
EXTERN Zero:MMWORD
|
|
|
|
|
|
EXTERN DW_One_One:MMWORD
|
|
|
|
|
|
EXTERN MaskOffAlpha:MMWORD
|
|
EXTERN ShiftTA:MMWORD
|
|
EXTERN Val0x00ff00ff00ff00ff:MMWORD
|
|
EXTERN Val0x000000ff00ff00ff:MMWORD
|
|
EXTERN Val0X0000000001000000:MMWORD
|
|
EXTERN AlphaVal128:MMWORD
|
|
EXTERN RGBVal128:MMWORD
|
|
|
|
|
|
EXTERN g_uDitherValue:MMWORD
|
|
EXTERN SetAlphato0xff:MMWORD
|
|
EXTERN u888to565RedBlueMask:MMWORD
|
|
EXTERN u888to565GreenMask:MMWORD
|
|
EXTERN u888to565Multiplier:MMWORD
|
|
EXTERN uVal0x000007ff03ff07ff:MMWORD
|
|
EXTERN uVal0x0000078003c00780:MMWORD
|
|
EXTERN u888to555RedBlueMask:MMWORD
|
|
EXTERN u888to555GreenMask:MMWORD
|
|
EXTERN u888to555Multiplier:MMWORD
|
|
EXTERN uVal0x000007ff07ff07ff:MMWORD
|
|
EXTERN uVal0x0000078007800780:MMWORD
|
|
|
|
|
|
|
|
;-----------------------------------------------------------------------------
|
|
; Span Variables
|
|
uMaskU dq ?
|
|
StackPos dd ?
|
|
uSpans dd ?
|
|
iShiftU dd ?
|
|
iShiftPitch dd ?
|
|
pBits dd ?
|
|
;-----------------------------------------------------------------------------
|
|
|
|
;-----------------------------------------------------------------------------
|
|
; Loop Variables
|
|
|
|
iSurfaceStep dd ?
|
|
iZStep dd ?
|
|
uPix dd ?
|
|
|
|
;-----------------------------------------------------------------------------
|
|
|
|
.code
|
|
|
|
|
|
|
|
PUBLIC _MMXMLRast_2
|
|
_MMXMLRast_2:
|
|
push ebp
|
|
mov StackPos, esp
|
|
mov eax, esp
|
|
sub esp, 0Ch ; This will need to change if stack frame size changes.
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
|
|
; Put pCtx into ebx
|
|
mov ebx, [eax+8]
|
|
|
|
;PD3DI_RASTPRIM pP = pCtx->pPrim;
|
|
mov ecx, [ebx+RASTCTX_pPrim]
|
|
|
|
;while (pP)
|
|
;{
|
|
PrimLoop:
|
|
cmp ecx, 0
|
|
je ExitPrimLoop
|
|
|
|
;UINT16 uSpans = pP->uSpans;
|
|
movzx eax, word ptr [ecx+RASTPRIM_uSpans]
|
|
mov uSpans, eax
|
|
|
|
;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
|
|
mov ebp, ecx
|
|
add ebp, SIZEOF_RASTPRIM
|
|
|
|
SpanLoop:
|
|
mov edx, uSpans
|
|
mov eax, edx
|
|
dec eax
|
|
mov uSpans, eax
|
|
test edx, edx
|
|
jle ExitSpanLoop
|
|
|
|
;pCtx->pfnBegin(pCtx, pP, pS);
|
|
|
|
;-----------------------------------------------------------------------------
|
|
; LoopAny code inserted here. This is to get rid of an extra
|
|
; jump.
|
|
;-----------------------------------------------------------------------------
|
|
|
|
; Setup Code begins - get values to iterate
|
|
|
|
movzx eax, word ptr [ebp+RASTSPAN_uPix]
|
|
mov uPix, eax
|
|
movq mm5, [ebp+RASTSPAN_iUoW1]
|
|
|
|
; non perspective correct.
|
|
|
|
psrad mm5, TEX_TO_FINAL_SHIFT
|
|
movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
|
|
mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
|
|
mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
|
|
|
|
mov eax, [ecx+RASTPRIM_uFlags]
|
|
and eax, D3DI_RASTPRIM_X_DEC
|
|
test eax, eax
|
|
jz LeftToRightSpan
|
|
mov eax, [ebx+RASTCTX_iZStep]
|
|
neg eax
|
|
mov iZStep, eax
|
|
mov eax, [ebx+RASTCTX_iSurfaceStep]
|
|
neg eax
|
|
mov iSurfaceStep, eax
|
|
jmp DoneSpanDirif
|
|
|
|
LeftToRightSpan:
|
|
|
|
mov eax, [ebx+RASTCTX_iZStep]
|
|
mov iZStep, eax
|
|
mov eax, [ebx+RASTCTX_iSurfaceStep]
|
|
mov iSurfaceStep, eax
|
|
DoneSpanDirif:
|
|
|
|
;******************************************
|
|
; Extra Globals are used here.
|
|
mov esi, [ebx+RASTCTX_pTexture]
|
|
mov edx, [esi + SPANTEX_iShiftU]
|
|
mov iShiftU, edx
|
|
movzx edx, word ptr [esi + SPANTEX_iShiftPitch]
|
|
mov iShiftPitch, edx
|
|
movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask
|
|
movq MMWORD PTR uMaskU, mm0
|
|
mov edx, [esi+SPANTEX_pBits]
|
|
mov pBits, edx
|
|
mov edi, [ebp+RASTSPAN_pSurface]
|
|
mov esi, [ebp+RASTSPAN_pZ]
|
|
;******************************************
|
|
PixelLoop:
|
|
; Ztestcode
|
|
; edx is uZ
|
|
; eax is uZB
|
|
; 16 bit unsigned format
|
|
;UINT16 uZ = (UINT16)(pS->uZ>>15);
|
|
;UINT16 uZB = *((UINT16*)pS->pZ);
|
|
mov edx, [ebp+RASTSPAN_uZ]
|
|
movd mm4, edx
|
|
shr edx, 15
|
|
movzx eax, word ptr [esi]
|
|
|
|
;pS->uZ += pP->iDZDX;
|
|
;if ((pCtx->iZXorMask)^(uZ > uZB))
|
|
sub eax, edx
|
|
paddd mm4, [ecx+RASTPRIM_iDZDX]
|
|
movd [ebp+RASTSPAN_uZ], mm4
|
|
xor eax, [ebx+RASTCTX_iZXorMask]
|
|
test eax, eax
|
|
js FailLabel
|
|
|
|
mov word ptr [esi], dx
|
|
; texturecode
|
|
xor eax, eax
|
|
|
|
; Doing UV calculation a little more accurate
|
|
; Exactly like C code.
|
|
|
|
; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
|
|
; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
|
|
; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
|
|
; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
|
|
|
|
; COMMENT1**
|
|
; If textures have a max of 1024 then shiftU0 would be at most 10 which would
|
|
; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
|
|
; It will also give bi-linear 6 bits of precision I think it was said that
|
|
; only five was needed.
|
|
;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
|
|
;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
|
|
movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
|
|
movd mm4, iShiftU
|
|
psubw mm5, mm4
|
|
movq mm4, mm5
|
|
pand mm5, MMWORD PTR Val0xffff
|
|
psrld mm4, 16
|
|
movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
|
|
psrad mm1, mm5
|
|
movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
|
|
psrad mm2, mm4
|
|
punpckldq mm1, mm2
|
|
|
|
; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table
|
|
mov edx, iShiftPitch
|
|
|
|
add edx, 16
|
|
movd mm2, edx
|
|
movq mm5, MMWORD ptr Makelow16one
|
|
pslld mm5, mm2
|
|
|
|
por mm5, MMWORD ptr Makelow16one
|
|
; Make the low 16 bits of dword one
|
|
; This helps in calculating texture address.
|
|
|
|
; Gets U and V value into mm1 so that it can be mirrored, wrapped or
|
|
; clamped. This can be done for two values in the point case
|
|
; or four values in the bilinear case.
|
|
;iU00 >>= 6;
|
|
;iV00 >>= 6;
|
|
|
|
psrad mm1, 6
|
|
packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
|
|
; operations assume UV in low 32 bits.
|
|
|
|
;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
|
|
; put mask in mm3 and replicate to match location for wrap/mirror/clamp
|
|
|
|
; Replace general purpose wrap/mirror code with specific wrap code.
|
|
|
|
pand mm1, MMWORD PTR uMaskU
|
|
movq mm4, mm1
|
|
|
|
; Making other two cases for texture addressing has to be simplier than
|
|
; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
|
|
; TBD Make this better.
|
|
; values are still stored as iV01, iU00, iV00, iU01
|
|
pmaddwd mm4, mm5 ; Throw in first address calculation.
|
|
; Just to get it started. Calculate
|
|
; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
|
|
|
|
|
|
|
|
; From here until mov edi is code that is needed for border.
|
|
; all sign bits are stored in bytes so that border code can tell if uv went below zero.
|
|
|
|
|
|
; iV0 iU1 address should be done by now.
|
|
|
|
movd eax, mm4
|
|
shl eax, 1
|
|
add eax, pBits
|
|
|
|
|
|
;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU,
|
|
; pTex->pBits[iLOD0], &pCtx->Texture[0]);
|
|
mov dx, word ptr [eax]
|
|
; Write Texture.
|
|
mov [edi], dx
|
|
|
|
|
|
FailLabel:
|
|
dec uPix
|
|
jle ExitPixelLoop
|
|
|
|
; Doing update code after span length test so that an extra update is not done.
|
|
|
|
movq mm5, [ebp+RASTSPAN_iUoW1]
|
|
paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
|
|
movq [ebp+RASTSPAN_iUoW1], mm5
|
|
|
|
; mm5 still contains iUoW and iVoW which are the iU and iV values for
|
|
; non perspective correct.
|
|
psrad mm5, TEX_TO_FINAL_SHIFT
|
|
movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
|
|
add esi, iZStep
|
|
add edi, iSurfaceStep
|
|
jmp PixelLoop
|
|
|
|
|
|
ExitPixelLoop:
|
|
; Loop code ends
|
|
|
|
;-----------------------------------------------------------------------------
|
|
; LoopAny code ends here
|
|
;-----------------------------------------------------------------------------
|
|
|
|
;pS++;
|
|
add ebp, SIZEOF_RASTSPAN
|
|
|
|
;}
|
|
jmp SpanLoop
|
|
ExitSpanLoop:
|
|
;pP = pP->pNext;
|
|
mov ecx, [ecx+RASTPRIM_pNext]
|
|
;}
|
|
jmp PrimLoop
|
|
|
|
ExitPrimLoop:
|
|
;_asm{
|
|
emms
|
|
;}
|
|
|
|
;return S_OK;
|
|
xor eax, eax
|
|
;}
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
mov esp, StackPos
|
|
pop ebp
|
|
ret
|
|
|
|
END
|