|
|
;---------------------------Module-Header------------------------------; ; Module Name: texspanr.asm ; ; Fast replace-mode texturing. ; ; Created: 011/15/1995 ; Author: Otto Berkes [ottob] ; ; Copyright (c) 1995 Microsoft Corporation ;----------------------------------------------------------------------;
rMask = ((1 SHL rBits) - 1) SHL rShift gMask = ((1 SHL gBits) - 1) SHL gShift bMask = ((1 SHL bBits) - 1) SHL bShift
rRightShiftAdj = 16 - (rShift + rBits) gRightShiftAdj = 16 - (gShift + gBits) bRightShiftAdj = 16 - (bShift + bBits)
TMASK_SUBDIV equ [esi].GENGC_tMaskSubDiv TSHIFT_SUBDIV equ [esi].GENGC_tShiftSubDiv
if FAST_REPLACE TEXPALETTE equ [esi].GENGC_texImageReplace if (PALETTE_ONLY) TEXIMAGE equ [esi].GENGC_texImage else TEXIMAGE equ [esi].GENGC_texImageReplace endif if PALETTE_ONLY TEX_BPP_LOG2 = 0 elseif (BPP eq 8) TEX_BPP_LOG2 = 0 else TEX_BPP_LOG2 = 1 endif else
.error
endif
if PALETTE_ONLY
HANDLE_PALETTE MACRO mov bl, [edx] ; V and ebx, 0ffh ;U mov edx, TEXPALETTE ; V lea edx, [edx+4*ebx] ;U ENDM
HANDLE_PALETTEX MACRO mov al, [edx] ; V and eax, 0ffh ;U mov ebx, TEXPALETTE ; V mov edx, TEMP2 ;U lea ebx, [ebx+4*eax] ; V mov TEMP2, edx ;U GET_TEXEL_ACCUM ; V ;U ENDM
else
HANDLE_PALETTE MACRO ENDM
endif
TEMP equ [esi].GENGC_sResult
;; ;; ;; Macros for advancing a single pixel unit ;; ;;
if (BPP eq 8) PIXADVANCE MACRO var inc var ENDM elseif (BPP eq 16) PIXADVANCE MACRO var add var, (BPP / 8) ENDM else PIXADVANCE MACRO var add var, [esi].GENGC_bytesPerPixel ENDM endif
;; ;; Get pointer to current texel value in EDX: ;;
GET_TEXEL_ADDRESS MACRO
mov eax, TMASK_SUBDIV ;U mov edx, [esi].GENGC_tResult ; V mov ebx, [esi].GENGC_sResult ;U and edx, eax ; V shr edx, (6-TEX_BPP_LOG2) ;U mov ecx, [esi].GENGC_sMask ; V and ebx, ecx ;U mov eax, DWORD PTR [esi].GENGC_sResult ; V shr ebx, (16-TEX_BPP_LOG2) ;U mov ecx, [esi].GENGC_subDs ; V add eax, ecx ;U add edx, ebx ; V mov ecx, TEXIMAGE ;U mov ebx, [esi].GENGC_subDt ; V add edx, ecx ;U mov ecx, DWORD PTR [esi].GENGC_tResult ; V add ecx, ebx ;U mov DWORD PTR [esi].GENGC_sResult, eax ; V mov DWORD PTR [esi].GENGC_tResult, ecx ;U HANDLE_PALETTE
ENDM
if (BPP eq 8) GET_TEXEL MACRO mov al, [edx] ; V get texel value ENDM elseif (BPP eq 16) GET_TEXEL MACRO mov ax, [edx] ENDM endif
GET_TEXEL_ADDRESS2 MACRO count
;; input : ecx = GENGC_tResult, edi = GENGC_sResult ;; output: edx = final texel address ;; free : ebx, edx are free
mov ebx, TMASK_SUBDIV ;U mov edx, ecx ; V mov TEMP, eax ;U and edx, ebx ; V mov eax, edi ;U mov ebx, [esi].GENGC_sMask ; V shr edx, (6-TEX_BPP_LOG2) ;U and eax, ebx ; V shr eax, (16-TEX_BPP_LOG2) ;U mov ebx, TEXIMAGE ; V add edx, eax ;U mov eax, [esi].GENGC_subDs ; V add edx, ebx ;U add edi, eax ; V mov ebx, [esi].GENGC_subDt ;U mov eax, TEMP ; V add ecx, ebx ;U HANDLE_PALETTE
ENDM
if (BPP eq 8) GET_TEXEL_ACCUM MACRO mov al, [edx] ; V get texel value ror eax, BPP ;U ENDM elseif (BPP eq 16) GET_TEXEL_ACCUM MACRO mov ax, [edx] ror eax, BPP ENDM endif
if (BPP eq 8) WRITE_TEXEL_DECEBP MACRO mov al, [edx] dec ebp mov [edi-1], al ENDM elseif (BPP eq 16) WRITE_TEXEL_DECEBP MACRO mov ax, [edx] dec ebp mov [edi-2], ax ENDM endif
;;---------------------------------------------------------------------- ;; ;; This is the start of the texture routine. Kick off the divide, and use ;; the dead time to set up all of the accumulators and other variables. ;; ;;----------------------------------------------------------------------
;; ;; Start the divide: ;;
mov eax, [ecx].GENGC_flags fld DWORD PTR [ecx].GENGC_SPAN_qw ;qwAccum fld DWORD PTR [ecx].GENGC_SPAN_qw ;qwAccum qwAccum test eax, GEN_TEXTURE_ORTHO jne @f fdivr __One ;1/qw qwAccum @@:
;; ;; Save the registers that we need to: ;;
push ebx ;U push esi ; V push edi ;U push ebp ; V
mov esi, ecx ;U
;; ;; Set up accumulators: ;;
mov eax, [ecx].GENGC_SPAN_s ; V mov ebx, [ecx].GENGC_SPAN_t ;U mov [ecx].GENGC_sAccum, eax ; V mov [esi].GENGC_tAccum, ebx ;U mov ecx, [esi].GENGC_SPAN_qw ; V mov edi, [esi].GENGC_SPAN_ppix ;U mov [esi].GENGC_qwAccum, ecx ; V mov eax, [esi].GENGC_flags ;U mov ebx, [esi].GENGC_SPAN_x ; V test eax, SURFACE_TYPE_DIB ;U jne @f ; V mov edi, [esi].GENGC_ColorsBits ;U jmp short @pixAccumDone @@: if (BPP eq 8) add edi, ebx ; V elseif (BPP eq 16) lea edi, [edi + 2*ebx] endif
@pixAccumDone:
mov ebp, [esi].GENGC_SPAN_length ;U
;; ;; Before we get into the main loop, do pixel-by-pixel writes until ;; we're DWORD aligned: ;;
test edi, 3 je alignmentDone
getAligned:
test eax, GEN_TEXTURE_ORTHO je @f
mov edx, [esi].GENGC_sAccum mov eax, [esi].GENGC_tAccum mov DWORD PTR [esi].GENGC_sResult, edx mov DWORD PTR [esi].GENGC_tResult, eax jmp short @stResultDone1
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum fmul ST, ST(1) ; s/qw 1/qw qwAccum fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum fmulp ST(2), ST ; s/qw t/qw qwAccum fistp QWORD PTR [esi].GENGC_sResult ; t/qw qwAccum fistp QWORD PTR [esi].GENGC_tResult ; qwAccum fadd DWORD PTR [esi].GENGC_SPAN_dqwdx; qwAccum fld ST(0) ; qwAccum qwAccum fdivr __One ; 1/qw qwAccum
@stResultDone1:
mov cl, TSHIFT_SUBDIV ;U mov edx, [esi].GENGC_tResult ; V sar edx, cl ;UV (4) and edx, NOT 7 ;U mov ebx, [esi].GENGC_sResult ; V mov [esi].GENGC_tResult, edx ;U and edx, TMASK_SUBDIV ; V shr edx, (6-TEX_BPP_LOG2) ;U and ebx, [esi].GENGC_sMask ; V shr ebx, (16-TEX_BPP_LOG2) ;U mov ecx, TEXIMAGE ; V add edx, ecx ;U PIXADVANCE edi ; V add edx, ebx ;U HANDLE_PALETTE
mov eax, [esi].GENGC_sAccum ; V mov ebx, [esi].GENGC_tAccum ;U add eax, [esi].GENGC_SPAN_ds ; V add ebx, [esi].GENGC_SPAN_dt ;U mov [esi].GENGC_sAccum, eax ; V mov [esi].GENGC_tAccum, ebx ;U
WRITE_TEXEL_DECEBP
jle spanExit ; V test edi, 3 mov eax, [esi].GENGC_flags jne getAligned
alignmentDone:
;; ;; Kick off the next divide: ;;
test eax, GEN_TEXTURE_ORTHO je @f
mov edx, [esi].GENGC_sAccum mov eax, [esi].GENGC_tAccum mov DWORD PTR [esi].GENGC_sResult, edx mov DWORD PTR [esi].GENGC_tResult, eax jmp short @stResultDone2
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum fmul ST, ST(1) ; s/qw 1/qw qwAccum fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum fmulp ST(2), ST ; s/qw t/qw qwAccum fistp QWORD PTR [esi].GENGC_sResult ; t/qw qwAccum fistp QWORD PTR [esi].GENGC_tResult ; qwAccum fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum fld ST(0) ; qwAccum qwAccum fdivr __One ; 1/qw qwAccum
@stResultDone2:
mov eax, [esi].GENGC_sAccum ; V mov ebx, [esi].GENGC_tAccum ;U add eax, [esi].GENGC_sStepX ; V add ebx, [esi].GENGC_tStepX ;U mov [esi].GENGC_sAccum, eax ; V mov [esi].GENGC_tAccum, ebx ;U mov eax, [esi].GENGC_sResult ; V mov ebx, [esi].GENGC_tResult ;U mov cl, TSHIFT_SUBDIV ; V sar ebx, cl ;UV (4) and ebx, NOT 7 ;U mov ecx, [esi].GENGC_flags ; V mov [esi].GENGC_tResult, ebx ;U test ecx, GEN_TEXTURE_ORTHO ; V je @f
mov ecx, [esi].GENGC_sAccum mov edx, [esi].GENGC_tAccum mov DWORD PTR [esi].GENGC_sResultNew, ecx mov DWORD PTR [esi].GENGC_tResultNew, edx jmp short @stResultDone3
;; We may have to burn some cycles here...
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum fmul ST, ST(1) ; s/qw 1/qw qwAccum fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum fmulp ST(2), ST ; s/qw t/qw qwAccum fistp QWORD PTR [esi].GENGC_sResultNew; t/qw qwAccum fistp QWORD PTR [esi].GENGC_tResultNew; qwAccum fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum
@stResultDone3:
mov cl, TSHIFT_SUBDIV ;U mov edx, [esi].GENGC_tResultNew ; V sar edx, cl ;UV (4) and edx, NOT 7 ;U mov ecx, [esi].GENGC_sResultNew ; V mov [esi].GENGC_tResultNew, edx ;U sub ecx, eax ; V sar ecx, 3 ;U sub edx, ebx ; V sar edx, 3 ;U mov [esi].GENGC_subDs, ecx ; V mov [esi].GENGC_subDt, edx ;U
;; ;; ;;
;; If we have fewer than 4 (or 2) pixels, just do right edge...
if (BPP eq 8) test ebp, 0fffch ;U else test ebp, 0fffeh ;U endif je singlePixels
add ebp, 070000h
mov [esi].GENGC_pixAccum, edi mov ecx, [esi].GENGC_tResult mov eax, [esi].GENGC_flags mov edi, [esi].GENGC_sResult
loopTop:
;; ;; This is the start of the outer loop. We come back here on each ;; subdivision. The key thing is to kick off the next divide: ;;
test eax, GEN_TEXTURE_ORTHO jne @f
fld ST(0) ; qwAccum qwAccum fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum+ qwAccum fxch ST(1) ; qwAccum qwAccum+ fdivr __One ; 1/qw qwAccum+ -- let the divide rip!
@@:
loopTopNoDiv:
;; If we have fewer than 4 (or 2) pixels, just do right edge...
if (BPP eq 8)
GET_TEXEL_ADDRESS2 GET_TEXEL_ACCUM GET_TEXEL_ADDRESS2 GET_TEXEL_ACCUM GET_TEXEL_ADDRESS2 GET_TEXEL_ACCUM GET_TEXEL_ADDRESS2 mov ebx, [esi].GENGC_pixAccum ; V add ebx, 4 ;U GET_TEXEL_ACCUM ; V ;U sub ebp, 040004h ; V mov [esi].GENGC_pixAccum, ebx ;U mov [ebx-4], eax ; V
else
GET_TEXEL_ADDRESS2 GET_TEXEL_ACCUM GET_TEXEL_ADDRESS2 mov ebx, [esi].GENGC_pixAccum add ebx, 4 GET_TEXEL_ACCUM sub ebp, 020002h mov [esi].GENGC_pixAccum, ebx mov [ebx-4], eax
endif
jle doSubDiv ;U if (BPP eq 8) test ebp, 0fffch ; V else test ebp, 0fffeh endif je doRightEdgePixels ;U jmp loopTopNoDiv ; V
doRightEdgePixels:
test ebp, 0ffffh ; V je spanExit ;U
mov [esi].GENGC_sResult, edi mov [esi].GENGC_tResult, ecx mov edi, [esi].GENGC_pixAccum
rightEdgePixels:
PIXADVANCE edi ;U
GET_TEXEL_ADDRESS GET_TEXEL
if (BPP eq 8) sub ebp, 010001h ;U mov [edi-1], al ; V elseif (BPP eq 16) sub ebp, 010001h mov [edi-2], ax endif
test ebp, 0ffffh ;U jne rightEdgePixels ; V
;; ;; This is the exit point. We need to pop the unused floating-point ;; registers off the stack, and return: ;;
spanExit:
fstp ST(0) fstp ST(0)
pop ebp pop edi pop esi pop ebx
ret 0
singlePixels:
PIXADVANCE edi ;U
GET_TEXEL_ADDRESS GET_TEXEL
dec ebp
if (BPP eq 8) mov [edi-1], al ; V elseif (BPP eq 16) mov [edi-2], ax endif
jg singlePixels ; V
;; ;; This is the exit point. We need to pop the unused floating-point ;; registers off the stack, and return: ;;
fstp ST(0) mov eax, [esi].GENGC_flags pop ebp pop edi pop esi pop ebx test eax, GEN_TEXTURE_ORTHO je @f fstp ST(0) @@:
ret 0
;; ;; This is the subdivision code. After the required number of steps, the ;; routine will jump here to calculate the next set of interpolants based ;; on subdivision: ;;
doSubDiv:
add ebp, 080000h
mov eax, [esi].GENGC_sAccum
if (BPP eq 8) test ebp, 0fffch ; V else test ebp, 0fffeh endif je doRightEdgePixels ;U
test ebp, 0ffffh je spanExit
mov ecx, [esi].GENGC_flags
mov ebx, [esi].GENGC_tAccum
;; ;; Increment the big S and T steps: ;;
add eax, [esi].GENGC_sStepX add ebx, [esi].GENGC_tStepX mov [esi].GENGC_sAccum, eax mov [esi].GENGC_tAccum, ebx mov edi, [esi].GENGC_sResultNew mov ebx, [esi].GENGC_tResultNew
test ecx, GEN_TEXTURE_ORTHO je @f
;; ;; Handle ortho case (easy) ;;
mov edx, DWORD PTR [esi].GENGC_tAccum mov DWORD PTR [esi].GENGC_sResultNew, eax mov DWORD PTR [esi].GENGC_tResultNew, edx jmp short @stResultDone4
;; ;; Do the floating-point computation for perspective: ;;
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum fmul ST, ST(1) ; s/qw 1/qw qwAccum fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum fmulp ST(2), ST ; s/qw t/qw qwAccum fistp QWORD PTR [esi].GENGC_sResultNew; t/qw qwAccum fistp QWORD PTR [esi].GENGC_tResultNew; qwAccum
@stResultDone4:
;; ;; Now, calculate the per-pixel deltas: ;;
mov cl, TSHIFT_SUBDIV ;U mov edx, [esi].GENGC_tResultNew ; V sar edx, cl ;UV (4) mov ecx, [esi].GENGC_sResultNew ;U and edx, NOT 7 ; V sub ecx, edi ;U mov [esi].GENGC_tResultNew, edx ; V sar ecx, 3 ;U sub edx, ebx ; V sar edx, 3 ;U mov [esi].GENGC_subDs, ecx ; V mov [esi].GENGC_subDt, edx ;U mov ecx, ebx ; V mov eax, [esi].GENGC_flags ;U jmp loopTop ; V
|