|
|
;----------------------------------------------------------------------------- ; ; Monolith 9. Non-perspective 16 bit NO Z buffered 565 ; Exactly the same as monolith 2 except Z buffer code removed. ; ;-----------------------------------------------------------------------------
INCLUDE iammx.inc INCLUDE offs_acp.inc
; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting ; at the LSB, then six bits of green, then five bits of red.
;TBD check to see if this value is correct. COLOR_SHIFT equ 8
.586 .model flat
; Big separating lines seperate code into span code ; and loop code. If span and loop are not going to ; end up being combined then it will be easy to ; seperate the code.
.data
; Need externs for all of the variables that are needed for various beads
EXTERN IncHighandLow16:MMWORD EXTERN UFracVFracMask:MMWORD EXTERN UV32to15Mask:MMWORD EXTERN Makelow16one:MMWORD EXTERN MaskKeepUValues:MMWORD EXTERN MaskKeepVValues:MMWORD EXTERN UFrac:MMWORD EXTERN VFrac:MMWORD EXTERN Zero:MMWORD EXTERN memD3DTFG_POINT:MMWORD EXTERN GiveUp:MMWORD EXTERN LastW:MMWORD EXTERN Val0x000a000a:MMWORD EXTERN Val0xffff:MMWORD EXTERN Val0x0000002000000020:MMWORD EXTERN Val0x0000ffff0000ffff:MMWORD
EXTERN MaskRed565to888:MMWORD EXTERN MaskGreen565to888:MMWORD EXTERN MaskBlue565to888:MMWORD
EXTERN MaskRed555to888:MMWORD EXTERN MaskGreen555to888:MMWORD EXTERN MaskBlue555to888:MMWORD
EXTERN MaskAlpha1555to8888:MMWORD EXTERN MaskRed1555to8888:MMWORD EXTERN MaskGreen1555to8888:MMWORD EXTERN MaskBlue1555to8888:MMWORD
; TBD. I think that I want to do 0xffff instead of 0xff. This will ; have to be checked. There is a value very similiar to this in ; buf write. EXTERN SetAlphato0xffff:MMWORD EXTERN SetAlphato0xff:MMWORD
; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file. RedShift565to888 equ 8 GreenShift565to888 equ 5 BlueShift565to888 equ 3
RedShift555to888 equ 9 GreenShift555to888 equ 6 BlueShift555to888 equ 3
AlphaShift1555to8888 equ 16 RedShift1555to8888 equ 9 GreenShift1555to8888 equ 6 BlueShift1555to8888 equ 3
EXTERN Zero:MMWORD
EXTERN DW_One_One:MMWORD
EXTERN MaskOffAlpha:MMWORD EXTERN ShiftTA:MMWORD EXTERN Val0x00ff00ff00ff00ff:MMWORD EXTERN Val0x000000ff00ff00ff:MMWORD EXTERN Val0X0000000001000000:MMWORD EXTERN AlphaVal128:MMWORD EXTERN RGBVal128:MMWORD
EXTERN g_uDitherValue:MMWORD EXTERN SetAlphato0xff:MMWORD EXTERN u888to565RedBlueMask:MMWORD EXTERN u888to565GreenMask:MMWORD EXTERN u888to565Multiplier:MMWORD EXTERN uVal0x000007ff03ff07ff:MMWORD EXTERN uVal0x0000078003c00780:MMWORD EXTERN u888to555RedBlueMask:MMWORD EXTERN u888to555GreenMask:MMWORD EXTERN u888to555Multiplier:MMWORD EXTERN uVal0x000007ff07ff07ff:MMWORD EXTERN uVal0x0000078007800780:MMWORD
;----------------------------------------------------------------------------- ; Span Variables uMaskU dq ? StackPos dd ? uSpans dd ? iShiftU dd ? iShiftPitch dd ? pBits dd ? ;-----------------------------------------------------------------------------
;----------------------------------------------------------------------------- ; Loop Variables
iSurfaceStep dd ? uPix dd ?
;-----------------------------------------------------------------------------
.code
PUBLIC _MMXMLRast_9 _MMXMLRast_9: push ebp mov StackPos, esp mov eax, esp sub esp, 0Ch ; This will need to change if stack frame size changes. push ebx push esi push edi
; Put pCtx into ebx mov ebx, [eax+8]
;PD3DI_RASTPRIM pP = pCtx->pPrim; mov ecx, [ebx+RASTCTX_pPrim]
;while (pP) ;{ PrimLoop: cmp ecx, 0 je ExitPrimLoop
;UINT16 uSpans = pP->uSpans; movzx eax, word ptr [ecx+RASTPRIM_uSpans] mov uSpans, eax
;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1); mov ebp, ecx add ebp, SIZEOF_RASTPRIM
SpanLoop: mov edx, uSpans mov eax, edx dec eax mov uSpans, eax test edx, edx jle ExitSpanLoop
;pCtx->pfnBegin(pCtx, pP, pS);
;----------------------------------------------------------------------------- ; LoopAny code inserted here. This is to get rid of an extra ; jump. ;-----------------------------------------------------------------------------
; Setup Code begins - get values to iterate
movzx eax, word ptr [ebp+RASTSPAN_uPix] mov uPix, eax movq mm5, [ebp+RASTSPAN_iUoW1]
; non perspective correct.
psrad mm5, TEX_TO_FINAL_SHIFT movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5 mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0 mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
mov eax, [ecx+RASTPRIM_uFlags] and eax, D3DI_RASTPRIM_X_DEC test eax, eax jz LeftToRightSpan mov eax, [ebx+RASTCTX_iSurfaceStep] neg eax mov iSurfaceStep, eax jmp DoneSpanDirif
LeftToRightSpan: mov eax, [ebx+RASTCTX_iSurfaceStep] mov iSurfaceStep, eax DoneSpanDirif:
;****************************************** mov esi, [ebx+RASTCTX_pTexture] mov edx, [esi + SPANTEX_iShiftU] mov iShiftU, edx movzx edx, word ptr [esi + SPANTEX_iShiftPitch] mov iShiftPitch, edx movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask movq MMWORD PTR uMaskU, mm0 mov edx, [esi+SPANTEX_pBits] mov pBits, edx mov edi, [ebp+RASTSPAN_pSurface] ;****************************************** PixelLoop:
; Doing UV calculation a little more accurate ; Exactly like C code.
; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0 ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0)) ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
; COMMENT1** ; If textures have a max of 1024 then shiftU0 would be at most 10 which would ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6 ; It will also give bi-linear 6 bits of precision I think it was said that ; only five was needed. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0; ;INT16 iShiftV0 = pTex->iShiftV - iLOD0; movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10. ;****************************************** ;movd mm4, [esi+SPANTEX_iShiftU] movd mm4, iShiftU ;****************************************** psubw mm5, mm4 movq mm4, mm5 pand mm5, MMWORD PTR Val0xffff psrld mm4, 16 movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1] psrad mm1, mm5 movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1] psrad mm2, mm4 punpckldq mm1, mm2
; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table ; ----------------- Start of hack ; ATTENTION This is really hacked right now. Just to get it working ; Pitch would be better for me, instead of pitch. ; With actual pitch, this would be two moves and a .
;****************************************** ;movzx edx, word ptr [esi+SPANTEX_iShiftPitch] mov edx, iShiftPitch ;****************************************** add edx, 16 movd mm2, edx movq mm5, MMWORD ptr Makelow16one pslld mm5, mm2
; ----------------- End of hack
por mm5, MMWORD ptr Makelow16one ; Make the low 16 bits of dword one ; This helps in calculating texture address.
; Gets U and V value into mm1 so that it can be mirrored, wrapped or ; clamped. This can be done for two values in the point case ; or four values in the bilinear case. ;iU00 >>= 6; ;iV00 >>= 6;
psrad mm1, 6 packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror ; operations assume UV in low 32 bits.
;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0; ; put mask in mm3 and replicate to match location for wrap/mirror/clamp ;*************************************************************************************** ; Replace general purpose wrap/mirror code with specific wrap code. ;*************************************************************************************** pand mm1, MMWORD PTR uMaskU ;*************************************************************************************** ;movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
; Monolith cases assumed that iLOD0 was zero so no needed. ;INT16 iFlip; ; MM1 should contain 16 bit iU and iV for both texture locations ; End Result is MM1 value wrapped or mirrored ; in Bilinear Case, four values can be done ; iU00, iV00, iU01, iV01 ; This code really does alot for the bilinear case and is kinda wasteful ; in the normal mode. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
;movq mm7, mm1 ; Point doesnt need replication ;movd mm4, [esi+SPANTEX_iFlipMaskU] ; if bilinear replicate values together, Point doesnt need this.
;pand mm7, mm4
;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0); ;pcmpeqw mm7, MMWORD PTR Zero
;iFlip1 = uMaskU0 & ~ iFlip1; ;iFlip2 = uMaskV0 & ~ iFlip2; ;iFlip3 = uMaskU0 & ~ iFlip3; ;iFlip4 = uMaskV0 & ~ iFlip4; ;pandn mm7, mm0
;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0; ;pand mm1, mm0
;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4; ;pxor mm1, mm7
; Result in mm4 now since TexAddrAll ends up that way. ; Still need to look at register useage more. movq mm4, mm1 ;***************************************************************************************
; Making other two cases for texture addressing has to be simplier than ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3. ; TBD Make this better. ; values are still stored as iV01, iU00, iV00, iU01
pmaddwd mm4, mm5 ; Throw in first address calculation. ; Just to get it started. Calculate ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
; From here until mov edi is code that is needed for border. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
; iV0 iU1 address should be done by now. ;****************************************** movd eax, mm4 shl eax, 1 ;add eax, [esi+SPANTEX_pBits] add eax, pBits ;******************************************
;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU, ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
; -------------------- In Monolithic version calls are inlined.
; Generate Border Mask to always be true in non border case. ;pcmpeqd mm5, mm5 mov dx, word ptr [eax] mov [edi], dx
dec uPix ;// BUG BUG?? uPix should never start as zero should it? ;// if so, this is a bug. jle ExitPixelLoop
; Doing update code after span length test so that an extra update is not done.
movq mm5, [ebp+RASTSPAN_iUoW1] paddd mm5, [ecx+RASTPRIM_iDUoW1DX] movq [ebp+RASTSPAN_iUoW1], mm5
; mm5 still contains iUoW and iVoW which are the iU and iV values for ; non perspective correct. psrad mm5, TEX_TO_FINAL_SHIFT movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5 add edi, iSurfaceStep jmp PixelLoop
ExitPixelLoop: ; Loop code ends
;----------------------------------------------------------------------------- ; LoopAny code ends here ;-----------------------------------------------------------------------------
;pS++; add ebp, SIZEOF_RASTSPAN
;} jmp SpanLoop ExitSpanLoop: ;pP = pP->pNext; mov ecx, [ecx+RASTPRIM_pNext] ;} jmp PrimLoop
ExitPrimLoop: ;_asm{ emms ;}
;return S_OK; xor eax, eax ;} pop edi pop esi pop ebx mov esp, StackPos pop ebp ret
END
|