Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

426 lines
12 KiB

;-----------------------------------------------------------------------------
;
; Monolith 2. Non-perspective 16 bit Z buffered 565
;
; Globals (ATTENTION. Need to move all globals to stack.)
;
; uSpans - Count containing the number of spans.
; StackPos - Saves stack position.
; uPix - Pixel count
; iSurfaceStep - what to add to screen pointer
; iZStep - What to add to Z buffer pointer
;
; (The below globals are used to save esi register which
; was normally used for pTex pointer. Now esi is used
; for z buffer pointer.)
; uMaskU
; iShiftU
; iShiftPitch
; pBits
;
; Register Useage
;
; esi - Z buffer pointer
; edi - Screen buffer pointer
;
; All other are temporary
;
;
; The only differences between this monolith and the regular
; MMX assembly code are:
;
; 1) Uses LE/GR Z compare code that all other monoliths use.
; 2) All texture info is stored in Globals to free up esi.
; 3) This code does Wrap only for texture addressing
; 4) Since there is no modulation or bi-linear and the source
; and destination color formats are the same, there is
; no need to convert to and from the internal color format.
; 5) esi is reserved for the Zbuffer
; 6) edi is reserved for the screen buffer.
; 7) Texture read does not use edi as pBits so that edi can be
; preserved for screen buffer.
;
;-----------------------------------------------------------------------------
INCLUDE iammx.inc
INCLUDE offs_acp.inc
; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
; at the LSB, then six bits of green, then five bits of red.
;TBD check to see if this value is correct.
COLOR_SHIFT equ 8
.586
.model flat
; Big separating lines seperate code into span code
; and loop code. If span and loop are not going to
; end up being combined then it will be easy to
; seperate the code.
.data
; Need externs for all of the variables that are needed for various beads
EXTERN IncHighandLow16:MMWORD
EXTERN UFracVFracMask:MMWORD
EXTERN UV32to15Mask:MMWORD
EXTERN Makelow16one:MMWORD
EXTERN MaskKeepUValues:MMWORD
EXTERN MaskKeepVValues:MMWORD
EXTERN UFrac:MMWORD
EXTERN VFrac:MMWORD
EXTERN Zero:MMWORD
EXTERN memD3DTFG_POINT:MMWORD
EXTERN GiveUp:MMWORD
EXTERN LastW:MMWORD
EXTERN Val0x000a000a:MMWORD
EXTERN Val0xffff:MMWORD
EXTERN Val0x0000002000000020:MMWORD
EXTERN Val0x0000ffff0000ffff:MMWORD
EXTERN MaskRed565to888:MMWORD
EXTERN MaskGreen565to888:MMWORD
EXTERN MaskBlue565to888:MMWORD
EXTERN MaskRed555to888:MMWORD
EXTERN MaskGreen555to888:MMWORD
EXTERN MaskBlue555to888:MMWORD
EXTERN MaskAlpha1555to8888:MMWORD
EXTERN MaskRed1555to8888:MMWORD
EXTERN MaskGreen1555to8888:MMWORD
EXTERN MaskBlue1555to8888:MMWORD
; TBD. I think that I want to do 0xffff instead of 0xff. This will
; have to be checked. There is a value very similiar to this in
; buf write.
EXTERN SetAlphato0xffff:MMWORD
EXTERN SetAlphato0xff:MMWORD
; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
RedShift565to888 equ 8
GreenShift565to888 equ 5
BlueShift565to888 equ 3
RedShift555to888 equ 9
GreenShift555to888 equ 6
BlueShift555to888 equ 3
AlphaShift1555to8888 equ 16
RedShift1555to8888 equ 9
GreenShift1555to8888 equ 6
BlueShift1555to8888 equ 3
EXTERN Zero:MMWORD
EXTERN DW_One_One:MMWORD
EXTERN MaskOffAlpha:MMWORD
EXTERN ShiftTA:MMWORD
EXTERN Val0x00ff00ff00ff00ff:MMWORD
EXTERN Val0x000000ff00ff00ff:MMWORD
EXTERN Val0X0000000001000000:MMWORD
EXTERN AlphaVal128:MMWORD
EXTERN RGBVal128:MMWORD
EXTERN g_uDitherValue:MMWORD
EXTERN SetAlphato0xff:MMWORD
EXTERN u888to565RedBlueMask:MMWORD
EXTERN u888to565GreenMask:MMWORD
EXTERN u888to565Multiplier:MMWORD
EXTERN uVal0x000007ff03ff07ff:MMWORD
EXTERN uVal0x0000078003c00780:MMWORD
EXTERN u888to555RedBlueMask:MMWORD
EXTERN u888to555GreenMask:MMWORD
EXTERN u888to555Multiplier:MMWORD
EXTERN uVal0x000007ff07ff07ff:MMWORD
EXTERN uVal0x0000078007800780:MMWORD
;-----------------------------------------------------------------------------
; Span Variables
uMaskU dq ?
StackPos dd ?
uSpans dd ?
iShiftU dd ?
iShiftPitch dd ?
pBits dd ?
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; Loop Variables
iSurfaceStep dd ?
iZStep dd ?
uPix dd ?
;-----------------------------------------------------------------------------
.code
PUBLIC _MMXMLRast_2
_MMXMLRast_2:
push ebp
mov StackPos, esp
mov eax, esp
sub esp, 0Ch ; This will need to change if stack frame size changes.
push ebx
push esi
push edi
; Put pCtx into ebx
mov ebx, [eax+8]
;PD3DI_RASTPRIM pP = pCtx->pPrim;
mov ecx, [ebx+RASTCTX_pPrim]
;while (pP)
;{
PrimLoop:
cmp ecx, 0
je ExitPrimLoop
;UINT16 uSpans = pP->uSpans;
movzx eax, word ptr [ecx+RASTPRIM_uSpans]
mov uSpans, eax
;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
mov ebp, ecx
add ebp, SIZEOF_RASTPRIM
SpanLoop:
mov edx, uSpans
mov eax, edx
dec eax
mov uSpans, eax
test edx, edx
jle ExitSpanLoop
;pCtx->pfnBegin(pCtx, pP, pS);
;-----------------------------------------------------------------------------
; LoopAny code inserted here. This is to get rid of an extra
; jump.
;-----------------------------------------------------------------------------
; Setup Code begins - get values to iterate
movzx eax, word ptr [ebp+RASTSPAN_uPix]
mov uPix, eax
movq mm5, [ebp+RASTSPAN_iUoW1]
; non perspective correct.
psrad mm5, TEX_TO_FINAL_SHIFT
movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
mov eax, [ecx+RASTPRIM_uFlags]
and eax, D3DI_RASTPRIM_X_DEC
test eax, eax
jz LeftToRightSpan
mov eax, [ebx+RASTCTX_iZStep]
neg eax
mov iZStep, eax
mov eax, [ebx+RASTCTX_iSurfaceStep]
neg eax
mov iSurfaceStep, eax
jmp DoneSpanDirif
LeftToRightSpan:
mov eax, [ebx+RASTCTX_iZStep]
mov iZStep, eax
mov eax, [ebx+RASTCTX_iSurfaceStep]
mov iSurfaceStep, eax
DoneSpanDirif:
;******************************************
; Extra Globals are used here.
mov esi, [ebx+RASTCTX_pTexture]
mov edx, [esi + SPANTEX_iShiftU]
mov iShiftU, edx
movzx edx, word ptr [esi + SPANTEX_iShiftPitch]
mov iShiftPitch, edx
movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask
movq MMWORD PTR uMaskU, mm0
mov edx, [esi+SPANTEX_pBits]
mov pBits, edx
mov edi, [ebp+RASTSPAN_pSurface]
mov esi, [ebp+RASTSPAN_pZ]
;******************************************
PixelLoop:
; Ztestcode
; edx is uZ
; eax is uZB
; 16 bit unsigned format
;UINT16 uZ = (UINT16)(pS->uZ>>15);
;UINT16 uZB = *((UINT16*)pS->pZ);
mov edx, [ebp+RASTSPAN_uZ]
movd mm4, edx
shr edx, 15
movzx eax, word ptr [esi]
;pS->uZ += pP->iDZDX;
;if ((pCtx->iZXorMask)^(uZ > uZB))
sub eax, edx
paddd mm4, [ecx+RASTPRIM_iDZDX]
movd [ebp+RASTSPAN_uZ], mm4
xor eax, [ebx+RASTCTX_iZXorMask]
test eax, eax
js FailLabel
mov word ptr [esi], dx
; texturecode
xor eax, eax
; Doing UV calculation a little more accurate
; Exactly like C code.
; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
; COMMENT1**
; If textures have a max of 1024 then shiftU0 would be at most 10 which would
; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
; It will also give bi-linear 6 bits of precision I think it was said that
; only five was needed.
;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
movd mm4, iShiftU
psubw mm5, mm4
movq mm4, mm5
pand mm5, MMWORD PTR Val0xffff
psrld mm4, 16
movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
psrad mm1, mm5
movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
psrad mm2, mm4
punpckldq mm1, mm2
; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table
mov edx, iShiftPitch
add edx, 16
movd mm2, edx
movq mm5, MMWORD ptr Makelow16one
pslld mm5, mm2
por mm5, MMWORD ptr Makelow16one
; Make the low 16 bits of dword one
; This helps in calculating texture address.
; Gets U and V value into mm1 so that it can be mirrored, wrapped or
; clamped. This can be done for two values in the point case
; or four values in the bilinear case.
;iU00 >>= 6;
;iV00 >>= 6;
psrad mm1, 6
packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
; operations assume UV in low 32 bits.
;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
; put mask in mm3 and replicate to match location for wrap/mirror/clamp
; Replace general purpose wrap/mirror code with specific wrap code.
pand mm1, MMWORD PTR uMaskU
movq mm4, mm1
; Making other two cases for texture addressing has to be simplier than
; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
; TBD Make this better.
; values are still stored as iV01, iU00, iV00, iU01
pmaddwd mm4, mm5 ; Throw in first address calculation.
; Just to get it started. Calculate
; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
; From here until mov edi is code that is needed for border.
; all sign bits are stored in bytes so that border code can tell if uv went below zero.
; iV0 iU1 address should be done by now.
movd eax, mm4
shl eax, 1
add eax, pBits
;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU,
; pTex->pBits[iLOD0], &pCtx->Texture[0]);
mov dx, word ptr [eax]
; Write Texture.
mov [edi], dx
FailLabel:
dec uPix
jle ExitPixelLoop
; Doing update code after span length test so that an extra update is not done.
movq mm5, [ebp+RASTSPAN_iUoW1]
paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
movq [ebp+RASTSPAN_iUoW1], mm5
; mm5 still contains iUoW and iVoW which are the iU and iV values for
; non perspective correct.
psrad mm5, TEX_TO_FINAL_SHIFT
movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
add esi, iZStep
add edi, iSurfaceStep
jmp PixelLoop
ExitPixelLoop:
; Loop code ends
;-----------------------------------------------------------------------------
; LoopAny code ends here
;-----------------------------------------------------------------------------
;pS++;
add ebp, SIZEOF_RASTSPAN
;}
jmp SpanLoop
ExitSpanLoop:
;pP = pP->pNext;
mov ecx, [ecx+RASTPRIM_pNext]
;}
jmp PrimLoop
ExitPrimLoop:
;_asm{
emms
;}
;return S_OK;
xor eax, eax
;}
pop edi
pop esi
pop ebx
mov esp, StackPos
pop ebp
ret
END