Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

408 lines
13 KiB

;-----------------------------------------------------------------------------
;
; Monolith 9. Non-perspective 16 bit NO Z buffered 565
; Exactly the same as monolith 2 except Z buffer code removed.
;
;-----------------------------------------------------------------------------
INCLUDE iammx.inc
INCLUDE offs_acp.inc
; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
; at the LSB, then six bits of green, then five bits of red.
;TBD check to see if this value is correct.
COLOR_SHIFT equ 8
.586
.model flat
; Big separating lines seperate code into span code
; and loop code. If span and loop are not going to
; end up being combined then it will be easy to
; seperate the code.
.data
; Need externs for all of the variables that are needed for various beads
EXTERN IncHighandLow16:MMWORD
EXTERN UFracVFracMask:MMWORD
EXTERN UV32to15Mask:MMWORD
EXTERN Makelow16one:MMWORD
EXTERN MaskKeepUValues:MMWORD
EXTERN MaskKeepVValues:MMWORD
EXTERN UFrac:MMWORD
EXTERN VFrac:MMWORD
EXTERN Zero:MMWORD
EXTERN memD3DTFG_POINT:MMWORD
EXTERN GiveUp:MMWORD
EXTERN LastW:MMWORD
EXTERN Val0x000a000a:MMWORD
EXTERN Val0xffff:MMWORD
EXTERN Val0x0000002000000020:MMWORD
EXTERN Val0x0000ffff0000ffff:MMWORD
EXTERN MaskRed565to888:MMWORD
EXTERN MaskGreen565to888:MMWORD
EXTERN MaskBlue565to888:MMWORD
EXTERN MaskRed555to888:MMWORD
EXTERN MaskGreen555to888:MMWORD
EXTERN MaskBlue555to888:MMWORD
EXTERN MaskAlpha1555to8888:MMWORD
EXTERN MaskRed1555to8888:MMWORD
EXTERN MaskGreen1555to8888:MMWORD
EXTERN MaskBlue1555to8888:MMWORD
; TBD. I think that I want to do 0xffff instead of 0xff. This will
; have to be checked. There is a value very similiar to this in
; buf write.
EXTERN SetAlphato0xffff:MMWORD
EXTERN SetAlphato0xff:MMWORD
; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
RedShift565to888 equ 8
GreenShift565to888 equ 5
BlueShift565to888 equ 3
RedShift555to888 equ 9
GreenShift555to888 equ 6
BlueShift555to888 equ 3
AlphaShift1555to8888 equ 16
RedShift1555to8888 equ 9
GreenShift1555to8888 equ 6
BlueShift1555to8888 equ 3
EXTERN Zero:MMWORD
EXTERN DW_One_One:MMWORD
EXTERN MaskOffAlpha:MMWORD
EXTERN ShiftTA:MMWORD
EXTERN Val0x00ff00ff00ff00ff:MMWORD
EXTERN Val0x000000ff00ff00ff:MMWORD
EXTERN Val0X0000000001000000:MMWORD
EXTERN AlphaVal128:MMWORD
EXTERN RGBVal128:MMWORD
EXTERN g_uDitherValue:MMWORD
EXTERN SetAlphato0xff:MMWORD
EXTERN u888to565RedBlueMask:MMWORD
EXTERN u888to565GreenMask:MMWORD
EXTERN u888to565Multiplier:MMWORD
EXTERN uVal0x000007ff03ff07ff:MMWORD
EXTERN uVal0x0000078003c00780:MMWORD
EXTERN u888to555RedBlueMask:MMWORD
EXTERN u888to555GreenMask:MMWORD
EXTERN u888to555Multiplier:MMWORD
EXTERN uVal0x000007ff07ff07ff:MMWORD
EXTERN uVal0x0000078007800780:MMWORD
;-----------------------------------------------------------------------------
; Span Variables
uMaskU dq ?
StackPos dd ?
uSpans dd ?
iShiftU dd ?
iShiftPitch dd ?
pBits dd ?
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; Loop Variables
iSurfaceStep dd ?
uPix dd ?
;-----------------------------------------------------------------------------
.code
PUBLIC _MMXMLRast_9
_MMXMLRast_9:
push ebp
mov StackPos, esp
mov eax, esp
sub esp, 0Ch ; This will need to change if stack frame size changes.
push ebx
push esi
push edi
; Put pCtx into ebx
mov ebx, [eax+8]
;PD3DI_RASTPRIM pP = pCtx->pPrim;
mov ecx, [ebx+RASTCTX_pPrim]
;while (pP)
;{
PrimLoop:
cmp ecx, 0
je ExitPrimLoop
;UINT16 uSpans = pP->uSpans;
movzx eax, word ptr [ecx+RASTPRIM_uSpans]
mov uSpans, eax
;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
mov ebp, ecx
add ebp, SIZEOF_RASTPRIM
SpanLoop:
mov edx, uSpans
mov eax, edx
dec eax
mov uSpans, eax
test edx, edx
jle ExitSpanLoop
;pCtx->pfnBegin(pCtx, pP, pS);
;-----------------------------------------------------------------------------
; LoopAny code inserted here. This is to get rid of an extra
; jump.
;-----------------------------------------------------------------------------
; Setup Code begins - get values to iterate
movzx eax, word ptr [ebp+RASTSPAN_uPix]
mov uPix, eax
movq mm5, [ebp+RASTSPAN_iUoW1]
; non perspective correct.
psrad mm5, TEX_TO_FINAL_SHIFT
movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
mov eax, [ecx+RASTPRIM_uFlags]
and eax, D3DI_RASTPRIM_X_DEC
test eax, eax
jz LeftToRightSpan
mov eax, [ebx+RASTCTX_iSurfaceStep]
neg eax
mov iSurfaceStep, eax
jmp DoneSpanDirif
LeftToRightSpan:
mov eax, [ebx+RASTCTX_iSurfaceStep]
mov iSurfaceStep, eax
DoneSpanDirif:
;******************************************
mov esi, [ebx+RASTCTX_pTexture]
mov edx, [esi + SPANTEX_iShiftU]
mov iShiftU, edx
movzx edx, word ptr [esi + SPANTEX_iShiftPitch]
mov iShiftPitch, edx
movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask
movq MMWORD PTR uMaskU, mm0
mov edx, [esi+SPANTEX_pBits]
mov pBits, edx
mov edi, [ebp+RASTSPAN_pSurface]
;******************************************
PixelLoop:
; Doing UV calculation a little more accurate
; Exactly like C code.
; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
; COMMENT1**
; If textures have a max of 1024 then shiftU0 would be at most 10 which would
; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
; It will also give bi-linear 6 bits of precision I think it was said that
; only five was needed.
;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
;******************************************
;movd mm4, [esi+SPANTEX_iShiftU]
movd mm4, iShiftU
;******************************************
psubw mm5, mm4
movq mm4, mm5
pand mm5, MMWORD PTR Val0xffff
psrld mm4, 16
movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
psrad mm1, mm5
movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
psrad mm2, mm4
punpckldq mm1, mm2
; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table
; ----------------- Start of hack
; ATTENTION This is really hacked right now. Just to get it working
; Pitch would be better for me, instead of pitch.
; With actual pitch, this would be two moves and a .
;******************************************
;movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
mov edx, iShiftPitch
;******************************************
add edx, 16
movd mm2, edx
movq mm5, MMWORD ptr Makelow16one
pslld mm5, mm2
; ----------------- End of hack
por mm5, MMWORD ptr Makelow16one
; Make the low 16 bits of dword one
; This helps in calculating texture address.
; Gets U and V value into mm1 so that it can be mirrored, wrapped or
; clamped. This can be done for two values in the point case
; or four values in the bilinear case.
;iU00 >>= 6;
;iV00 >>= 6;
psrad mm1, 6
packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
; operations assume UV in low 32 bits.
;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
; put mask in mm3 and replicate to match location for wrap/mirror/clamp
;***************************************************************************************
; Replace general purpose wrap/mirror code with specific wrap code.
;***************************************************************************************
pand mm1, MMWORD PTR uMaskU
;***************************************************************************************
;movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
; Monolith cases assumed that iLOD0 was zero so no needed.
;INT16 iFlip;
; MM1 should contain 16 bit iU and iV for both texture locations
; End Result is MM1 value wrapped or mirrored
; in Bilinear Case, four values can be done
; iU00, iV00, iU01, iV01
; This code really does alot for the bilinear case and is kinda wasteful
; in the normal mode.
;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
;movq mm7, mm1
; Point doesnt need replication
;movd mm4, [esi+SPANTEX_iFlipMaskU]
; if bilinear replicate values together, Point doesnt need this.
;pand mm7, mm4
;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
;pcmpeqw mm7, MMWORD PTR Zero
;iFlip1 = uMaskU0 & ~ iFlip1; ;iFlip2 = uMaskV0 & ~ iFlip2; ;iFlip3 = uMaskU0 & ~ iFlip3; ;iFlip4 = uMaskV0 & ~ iFlip4;
;pandn mm7, mm0
;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
;pand mm1, mm0
;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
;pxor mm1, mm7
; Result in mm4 now since TexAddrAll ends up that way.
; Still need to look at register useage more.
movq mm4, mm1
;***************************************************************************************
; Making other two cases for texture addressing has to be simplier than
; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
; TBD Make this better.
; values are still stored as iV01, iU00, iV00, iU01
pmaddwd mm4, mm5 ; Throw in first address calculation.
; Just to get it started. Calculate
; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
; From here until mov edi is code that is needed for border.
; all sign bits are stored in bytes so that border code can tell if uv went below zero.
; iV0 iU1 address should be done by now.
;******************************************
movd eax, mm4
shl eax, 1
;add eax, [esi+SPANTEX_pBits]
add eax, pBits
;******************************************
;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU,
; pTex->pBits[iLOD0], &pCtx->Texture[0]);
; -------------------- In Monolithic version calls are inlined.
; Generate Border Mask to always be true in non border case.
;pcmpeqd mm5, mm5
mov dx, word ptr [eax]
mov [edi], dx
dec uPix ;// BUG BUG?? uPix should never start as zero should it?
;// if so, this is a bug.
jle ExitPixelLoop
; Doing update code after span length test so that an extra update is not done.
movq mm5, [ebp+RASTSPAN_iUoW1]
paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
movq [ebp+RASTSPAN_iUoW1], mm5
; mm5 still contains iUoW and iVoW which are the iU and iV values for
; non perspective correct.
psrad mm5, TEX_TO_FINAL_SHIFT
movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
add edi, iSurfaceStep
jmp PixelLoop
ExitPixelLoop:
; Loop code ends
;-----------------------------------------------------------------------------
; LoopAny code ends here
;-----------------------------------------------------------------------------
;pS++;
add ebp, SIZEOF_RASTSPAN
;}
jmp SpanLoop
ExitSpanLoop:
;pP = pP->pNext;
mov ecx, [ecx+RASTPRIM_pNext]
;}
jmp PrimLoop
ExitPrimLoop:
;_asm{
emms
;}
;return S_OK;
xor eax, eax
;}
pop edi
pop esi
pop ebx
mov esp, StackPos
pop ebp
ret
END