mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
640 lines
13 KiB
640 lines
13 KiB
;---------------------------Module-Header------------------------------;
|
|
; Module Name: texspanr.asm
|
|
;
|
|
; Fast replace-mode texturing.
|
|
;
|
|
; Created: 011/15/1995
|
|
; Author: Otto Berkes [ottob]
|
|
;
|
|
; Copyright (c) 1995 Microsoft Corporation
|
|
;----------------------------------------------------------------------;
|
|
|
|
|
|
rMask = ((1 SHL rBits) - 1) SHL rShift
|
|
gMask = ((1 SHL gBits) - 1) SHL gShift
|
|
bMask = ((1 SHL bBits) - 1) SHL bShift
|
|
|
|
rRightShiftAdj = 16 - (rShift + rBits)
|
|
gRightShiftAdj = 16 - (gShift + gBits)
|
|
bRightShiftAdj = 16 - (bShift + bBits)
|
|
|
|
TMASK_SUBDIV equ [esi].GENGC_tMaskSubDiv
|
|
TSHIFT_SUBDIV equ [esi].GENGC_tShiftSubDiv
|
|
|
|
if FAST_REPLACE
|
|
TEXPALETTE equ [esi].GENGC_texImageReplace
|
|
if (PALETTE_ONLY)
|
|
TEXIMAGE equ [esi].GENGC_texImage
|
|
else
|
|
TEXIMAGE equ [esi].GENGC_texImageReplace
|
|
endif
|
|
if PALETTE_ONLY
|
|
TEX_BPP_LOG2 = 0
|
|
elseif (BPP eq 8)
|
|
TEX_BPP_LOG2 = 0
|
|
else
|
|
TEX_BPP_LOG2 = 1
|
|
endif
|
|
else
|
|
|
|
.error
|
|
|
|
endif
|
|
|
|
if PALETTE_ONLY
|
|
|
|
HANDLE_PALETTE MACRO
|
|
mov bl, [edx] ; V
|
|
and ebx, 0ffh ;U
|
|
mov edx, TEXPALETTE ; V
|
|
lea edx, [edx+4*ebx] ;U
|
|
ENDM
|
|
|
|
|
|
HANDLE_PALETTEX MACRO
|
|
mov al, [edx] ; V
|
|
and eax, 0ffh ;U
|
|
mov ebx, TEXPALETTE ; V
|
|
mov edx, TEMP2 ;U
|
|
lea ebx, [ebx+4*eax] ; V
|
|
mov TEMP2, edx ;U
|
|
GET_TEXEL_ACCUM ; V
|
|
;U
|
|
ENDM
|
|
|
|
else
|
|
|
|
HANDLE_PALETTE MACRO
|
|
ENDM
|
|
|
|
endif
|
|
|
|
TEMP equ [esi].GENGC_sResult
|
|
|
|
;;
|
|
;;
|
|
;; Macros for advancing a single pixel unit
|
|
;;
|
|
;;
|
|
|
|
|
|
if (BPP eq 8)
|
|
PIXADVANCE MACRO var
|
|
inc var
|
|
ENDM
|
|
elseif (BPP eq 16)
|
|
PIXADVANCE MACRO var
|
|
add var, (BPP / 8)
|
|
ENDM
|
|
else
|
|
PIXADVANCE MACRO var
|
|
add var, [esi].GENGC_bytesPerPixel
|
|
ENDM
|
|
endif
|
|
|
|
;;
|
|
;; Get pointer to current texel value in EDX:
|
|
;;
|
|
|
|
GET_TEXEL_ADDRESS MACRO
|
|
|
|
mov eax, TMASK_SUBDIV ;U
|
|
mov edx, [esi].GENGC_tResult ; V
|
|
mov ebx, [esi].GENGC_sResult ;U
|
|
and edx, eax ; V
|
|
shr edx, (6-TEX_BPP_LOG2) ;U
|
|
mov ecx, [esi].GENGC_sMask ; V
|
|
and ebx, ecx ;U
|
|
mov eax, DWORD PTR [esi].GENGC_sResult ; V
|
|
shr ebx, (16-TEX_BPP_LOG2) ;U
|
|
mov ecx, [esi].GENGC_subDs ; V
|
|
add eax, ecx ;U
|
|
add edx, ebx ; V
|
|
mov ecx, TEXIMAGE ;U
|
|
mov ebx, [esi].GENGC_subDt ; V
|
|
add edx, ecx ;U
|
|
mov ecx, DWORD PTR [esi].GENGC_tResult ; V
|
|
add ecx, ebx ;U
|
|
mov DWORD PTR [esi].GENGC_sResult, eax ; V
|
|
mov DWORD PTR [esi].GENGC_tResult, ecx ;U
|
|
HANDLE_PALETTE
|
|
|
|
ENDM
|
|
|
|
if (BPP eq 8)
|
|
GET_TEXEL MACRO
|
|
mov al, [edx] ; V get texel value
|
|
ENDM
|
|
elseif (BPP eq 16)
|
|
GET_TEXEL MACRO
|
|
mov ax, [edx]
|
|
ENDM
|
|
endif
|
|
|
|
|
|
GET_TEXEL_ADDRESS2 MACRO count
|
|
|
|
;; input : ecx = GENGC_tResult, edi = GENGC_sResult
|
|
;; output: edx = final texel address
|
|
;; free : ebx, edx are free
|
|
|
|
mov ebx, TMASK_SUBDIV ;U
|
|
mov edx, ecx ; V
|
|
mov TEMP, eax ;U
|
|
and edx, ebx ; V
|
|
mov eax, edi ;U
|
|
mov ebx, [esi].GENGC_sMask ; V
|
|
shr edx, (6-TEX_BPP_LOG2) ;U
|
|
and eax, ebx ; V
|
|
shr eax, (16-TEX_BPP_LOG2) ;U
|
|
mov ebx, TEXIMAGE ; V
|
|
add edx, eax ;U
|
|
mov eax, [esi].GENGC_subDs ; V
|
|
add edx, ebx ;U
|
|
add edi, eax ; V
|
|
mov ebx, [esi].GENGC_subDt ;U
|
|
mov eax, TEMP ; V
|
|
add ecx, ebx ;U
|
|
HANDLE_PALETTE
|
|
|
|
ENDM
|
|
|
|
if (BPP eq 8)
|
|
GET_TEXEL_ACCUM MACRO
|
|
mov al, [edx] ; V get texel value
|
|
ror eax, BPP ;U
|
|
ENDM
|
|
elseif (BPP eq 16)
|
|
GET_TEXEL_ACCUM MACRO
|
|
mov ax, [edx]
|
|
ror eax, BPP
|
|
ENDM
|
|
endif
|
|
|
|
|
|
if (BPP eq 8)
|
|
WRITE_TEXEL_DECEBP MACRO
|
|
mov al, [edx]
|
|
dec ebp
|
|
mov [edi-1], al
|
|
ENDM
|
|
elseif (BPP eq 16)
|
|
WRITE_TEXEL_DECEBP MACRO
|
|
mov ax, [edx]
|
|
dec ebp
|
|
mov [edi-2], ax
|
|
ENDM
|
|
endif
|
|
|
|
|
|
|
|
;;----------------------------------------------------------------------
|
|
;;
|
|
;; This is the start of the texture routine. Kick off the divide, and use
|
|
;; the dead time to set up all of the accumulators and other variables.
|
|
;;
|
|
;;----------------------------------------------------------------------
|
|
|
|
;;
|
|
;; Start the divide:
|
|
;;
|
|
|
|
mov eax, [ecx].GENGC_flags
|
|
fld DWORD PTR [ecx].GENGC_SPAN_qw ;qwAccum
|
|
fld DWORD PTR [ecx].GENGC_SPAN_qw ;qwAccum qwAccum
|
|
test eax, GEN_TEXTURE_ORTHO
|
|
jne @f
|
|
fdivr __One ;1/qw qwAccum
|
|
@@:
|
|
|
|
;;
|
|
;; Save the registers that we need to:
|
|
;;
|
|
|
|
push ebx ;U
|
|
push esi ; V
|
|
push edi ;U
|
|
push ebp ; V
|
|
|
|
mov esi, ecx ;U
|
|
|
|
;;
|
|
;; Set up accumulators:
|
|
;;
|
|
|
|
mov eax, [ecx].GENGC_SPAN_s ; V
|
|
mov ebx, [ecx].GENGC_SPAN_t ;U
|
|
mov [ecx].GENGC_sAccum, eax ; V
|
|
mov [esi].GENGC_tAccum, ebx ;U
|
|
mov ecx, [esi].GENGC_SPAN_qw ; V
|
|
mov edi, [esi].GENGC_SPAN_ppix ;U
|
|
mov [esi].GENGC_qwAccum, ecx ; V
|
|
mov eax, [esi].GENGC_flags ;U
|
|
mov ebx, [esi].GENGC_SPAN_x ; V
|
|
test eax, SURFACE_TYPE_DIB ;U
|
|
jne @f ; V
|
|
mov edi, [esi].GENGC_ColorsBits ;U
|
|
jmp short @pixAccumDone
|
|
@@:
|
|
if (BPP eq 8)
|
|
add edi, ebx ; V
|
|
elseif (BPP eq 16)
|
|
lea edi, [edi + 2*ebx]
|
|
endif
|
|
|
|
@pixAccumDone:
|
|
|
|
mov ebp, [esi].GENGC_SPAN_length ;U
|
|
|
|
;;
|
|
;; Before we get into the main loop, do pixel-by-pixel writes until
|
|
;; we're DWORD aligned:
|
|
;;
|
|
|
|
test edi, 3
|
|
je alignmentDone
|
|
|
|
getAligned:
|
|
|
|
test eax, GEN_TEXTURE_ORTHO
|
|
je @f
|
|
|
|
mov edx, [esi].GENGC_sAccum
|
|
mov eax, [esi].GENGC_tAccum
|
|
mov DWORD PTR [esi].GENGC_sResult, edx
|
|
mov DWORD PTR [esi].GENGC_tResult, eax
|
|
jmp short @stResultDone1
|
|
|
|
@@:
|
|
|
|
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
|
|
fmul ST, ST(1) ; s/qw 1/qw qwAccum
|
|
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
|
|
fmulp ST(2), ST ; s/qw t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_sResult ; t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_tResult ; qwAccum
|
|
fadd DWORD PTR [esi].GENGC_SPAN_dqwdx; qwAccum
|
|
fld ST(0) ; qwAccum qwAccum
|
|
fdivr __One ; 1/qw qwAccum
|
|
|
|
@stResultDone1:
|
|
|
|
mov cl, TSHIFT_SUBDIV ;U
|
|
mov edx, [esi].GENGC_tResult ; V
|
|
sar edx, cl ;UV (4)
|
|
and edx, NOT 7 ;U
|
|
mov ebx, [esi].GENGC_sResult ; V
|
|
mov [esi].GENGC_tResult, edx ;U
|
|
and edx, TMASK_SUBDIV ; V
|
|
shr edx, (6-TEX_BPP_LOG2) ;U
|
|
and ebx, [esi].GENGC_sMask ; V
|
|
shr ebx, (16-TEX_BPP_LOG2) ;U
|
|
mov ecx, TEXIMAGE ; V
|
|
add edx, ecx ;U
|
|
PIXADVANCE edi ; V
|
|
add edx, ebx ;U
|
|
HANDLE_PALETTE
|
|
|
|
|
|
mov eax, [esi].GENGC_sAccum ; V
|
|
mov ebx, [esi].GENGC_tAccum ;U
|
|
add eax, [esi].GENGC_SPAN_ds ; V
|
|
add ebx, [esi].GENGC_SPAN_dt ;U
|
|
mov [esi].GENGC_sAccum, eax ; V
|
|
mov [esi].GENGC_tAccum, ebx ;U
|
|
|
|
WRITE_TEXEL_DECEBP
|
|
|
|
jle spanExit ; V
|
|
test edi, 3
|
|
mov eax, [esi].GENGC_flags
|
|
jne getAligned
|
|
|
|
alignmentDone:
|
|
|
|
;;
|
|
;; Kick off the next divide:
|
|
;;
|
|
|
|
test eax, GEN_TEXTURE_ORTHO
|
|
je @f
|
|
|
|
mov edx, [esi].GENGC_sAccum
|
|
mov eax, [esi].GENGC_tAccum
|
|
mov DWORD PTR [esi].GENGC_sResult, edx
|
|
mov DWORD PTR [esi].GENGC_tResult, eax
|
|
jmp short @stResultDone2
|
|
|
|
@@:
|
|
|
|
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
|
|
fmul ST, ST(1) ; s/qw 1/qw qwAccum
|
|
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
|
|
fmulp ST(2), ST ; s/qw t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_sResult ; t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_tResult ; qwAccum
|
|
fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum
|
|
fld ST(0) ; qwAccum qwAccum
|
|
fdivr __One ; 1/qw qwAccum
|
|
|
|
@stResultDone2:
|
|
|
|
mov eax, [esi].GENGC_sAccum ; V
|
|
mov ebx, [esi].GENGC_tAccum ;U
|
|
add eax, [esi].GENGC_sStepX ; V
|
|
add ebx, [esi].GENGC_tStepX ;U
|
|
mov [esi].GENGC_sAccum, eax ; V
|
|
mov [esi].GENGC_tAccum, ebx ;U
|
|
mov eax, [esi].GENGC_sResult ; V
|
|
mov ebx, [esi].GENGC_tResult ;U
|
|
mov cl, TSHIFT_SUBDIV ; V
|
|
sar ebx, cl ;UV (4)
|
|
and ebx, NOT 7 ;U
|
|
mov ecx, [esi].GENGC_flags ; V
|
|
mov [esi].GENGC_tResult, ebx ;U
|
|
test ecx, GEN_TEXTURE_ORTHO ; V
|
|
je @f
|
|
|
|
mov ecx, [esi].GENGC_sAccum
|
|
mov edx, [esi].GENGC_tAccum
|
|
mov DWORD PTR [esi].GENGC_sResultNew, ecx
|
|
mov DWORD PTR [esi].GENGC_tResultNew, edx
|
|
jmp short @stResultDone3
|
|
|
|
|
|
;; We may have to burn some cycles here...
|
|
|
|
@@:
|
|
|
|
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
|
|
fmul ST, ST(1) ; s/qw 1/qw qwAccum
|
|
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
|
|
fmulp ST(2), ST ; s/qw t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_sResultNew; t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_tResultNew; qwAccum
|
|
fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum
|
|
|
|
@stResultDone3:
|
|
|
|
mov cl, TSHIFT_SUBDIV ;U
|
|
mov edx, [esi].GENGC_tResultNew ; V
|
|
sar edx, cl ;UV (4)
|
|
and edx, NOT 7 ;U
|
|
mov ecx, [esi].GENGC_sResultNew ; V
|
|
mov [esi].GENGC_tResultNew, edx ;U
|
|
sub ecx, eax ; V
|
|
sar ecx, 3 ;U
|
|
sub edx, ebx ; V
|
|
sar edx, 3 ;U
|
|
mov [esi].GENGC_subDs, ecx ; V
|
|
mov [esi].GENGC_subDt, edx ;U
|
|
|
|
;;
|
|
;;
|
|
;;
|
|
|
|
;; If we have fewer than 4 (or 2) pixels, just do right edge...
|
|
|
|
if (BPP eq 8)
|
|
test ebp, 0fffch ;U
|
|
else
|
|
test ebp, 0fffeh ;U
|
|
endif
|
|
je singlePixels
|
|
|
|
add ebp, 070000h
|
|
|
|
mov [esi].GENGC_pixAccum, edi
|
|
mov ecx, [esi].GENGC_tResult
|
|
mov eax, [esi].GENGC_flags
|
|
mov edi, [esi].GENGC_sResult
|
|
|
|
loopTop:
|
|
|
|
;;
|
|
;; This is the start of the outer loop. We come back here on each
|
|
;; subdivision. The key thing is to kick off the next divide:
|
|
;;
|
|
|
|
test eax, GEN_TEXTURE_ORTHO
|
|
jne @f
|
|
|
|
fld ST(0) ; qwAccum qwAccum
|
|
fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum+ qwAccum
|
|
fxch ST(1) ; qwAccum qwAccum+
|
|
fdivr __One ; 1/qw qwAccum+ -- let the divide rip!
|
|
|
|
@@:
|
|
|
|
loopTopNoDiv:
|
|
|
|
;; If we have fewer than 4 (or 2) pixels, just do right edge...
|
|
|
|
if (BPP eq 8)
|
|
|
|
GET_TEXEL_ADDRESS2
|
|
GET_TEXEL_ACCUM
|
|
GET_TEXEL_ADDRESS2
|
|
GET_TEXEL_ACCUM
|
|
GET_TEXEL_ADDRESS2
|
|
GET_TEXEL_ACCUM
|
|
GET_TEXEL_ADDRESS2
|
|
mov ebx, [esi].GENGC_pixAccum ; V
|
|
add ebx, 4 ;U
|
|
GET_TEXEL_ACCUM ; V
|
|
;U
|
|
sub ebp, 040004h ; V
|
|
mov [esi].GENGC_pixAccum, ebx ;U
|
|
mov [ebx-4], eax ; V
|
|
|
|
else
|
|
|
|
GET_TEXEL_ADDRESS2
|
|
GET_TEXEL_ACCUM
|
|
GET_TEXEL_ADDRESS2
|
|
mov ebx, [esi].GENGC_pixAccum
|
|
add ebx, 4
|
|
GET_TEXEL_ACCUM
|
|
sub ebp, 020002h
|
|
mov [esi].GENGC_pixAccum, ebx
|
|
mov [ebx-4], eax
|
|
|
|
endif
|
|
|
|
|
|
jle doSubDiv ;U
|
|
if (BPP eq 8)
|
|
test ebp, 0fffch ; V
|
|
else
|
|
test ebp, 0fffeh
|
|
endif
|
|
je doRightEdgePixels ;U
|
|
jmp loopTopNoDiv ; V
|
|
|
|
|
|
doRightEdgePixels:
|
|
|
|
test ebp, 0ffffh ; V
|
|
je spanExit ;U
|
|
|
|
mov [esi].GENGC_sResult, edi
|
|
mov [esi].GENGC_tResult, ecx
|
|
mov edi, [esi].GENGC_pixAccum
|
|
|
|
rightEdgePixels:
|
|
|
|
PIXADVANCE edi ;U
|
|
|
|
GET_TEXEL_ADDRESS
|
|
GET_TEXEL
|
|
|
|
if (BPP eq 8)
|
|
sub ebp, 010001h ;U
|
|
mov [edi-1], al ; V
|
|
elseif (BPP eq 16)
|
|
sub ebp, 010001h
|
|
mov [edi-2], ax
|
|
endif
|
|
|
|
test ebp, 0ffffh ;U
|
|
jne rightEdgePixels ; V
|
|
|
|
;;
|
|
;; This is the exit point. We need to pop the unused floating-point
|
|
;; registers off the stack, and return:
|
|
;;
|
|
|
|
spanExit:
|
|
|
|
fstp ST(0)
|
|
fstp ST(0)
|
|
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
|
|
ret 0
|
|
|
|
|
|
singlePixels:
|
|
|
|
PIXADVANCE edi ;U
|
|
|
|
GET_TEXEL_ADDRESS
|
|
GET_TEXEL
|
|
|
|
dec ebp
|
|
|
|
if (BPP eq 8)
|
|
mov [edi-1], al ; V
|
|
elseif (BPP eq 16)
|
|
mov [edi-2], ax
|
|
endif
|
|
|
|
jg singlePixels ; V
|
|
|
|
;;
|
|
;; This is the exit point. We need to pop the unused floating-point
|
|
;; registers off the stack, and return:
|
|
;;
|
|
|
|
fstp ST(0)
|
|
mov eax, [esi].GENGC_flags
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
test eax, GEN_TEXTURE_ORTHO
|
|
je @f
|
|
fstp ST(0)
|
|
@@:
|
|
|
|
ret 0
|
|
|
|
|
|
|
|
;;
|
|
;; This is the subdivision code. After the required number of steps, the
|
|
;; routine will jump here to calculate the next set of interpolants based
|
|
;; on subdivision:
|
|
;;
|
|
|
|
doSubDiv:
|
|
|
|
add ebp, 080000h
|
|
|
|
mov eax, [esi].GENGC_sAccum
|
|
|
|
if (BPP eq 8)
|
|
test ebp, 0fffch ; V
|
|
else
|
|
test ebp, 0fffeh
|
|
endif
|
|
je doRightEdgePixels ;U
|
|
|
|
test ebp, 0ffffh
|
|
je spanExit
|
|
|
|
mov ecx, [esi].GENGC_flags
|
|
|
|
mov ebx, [esi].GENGC_tAccum
|
|
|
|
;;
|
|
;; Increment the big S and T steps:
|
|
;;
|
|
|
|
add eax, [esi].GENGC_sStepX
|
|
add ebx, [esi].GENGC_tStepX
|
|
mov [esi].GENGC_sAccum, eax
|
|
mov [esi].GENGC_tAccum, ebx
|
|
mov edi, [esi].GENGC_sResultNew
|
|
mov ebx, [esi].GENGC_tResultNew
|
|
|
|
test ecx, GEN_TEXTURE_ORTHO
|
|
je @f
|
|
|
|
;;
|
|
;; Handle ortho case (easy)
|
|
;;
|
|
|
|
mov edx, DWORD PTR [esi].GENGC_tAccum
|
|
mov DWORD PTR [esi].GENGC_sResultNew, eax
|
|
mov DWORD PTR [esi].GENGC_tResultNew, edx
|
|
jmp short @stResultDone4
|
|
|
|
;;
|
|
;; Do the floating-point computation for perspective:
|
|
;;
|
|
|
|
@@:
|
|
|
|
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
|
|
fmul ST, ST(1) ; s/qw 1/qw qwAccum
|
|
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
|
|
fmulp ST(2), ST ; s/qw t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_sResultNew; t/qw qwAccum
|
|
fistp QWORD PTR [esi].GENGC_tResultNew; qwAccum
|
|
|
|
@stResultDone4:
|
|
|
|
;;
|
|
;; Now, calculate the per-pixel deltas:
|
|
;;
|
|
|
|
mov cl, TSHIFT_SUBDIV ;U
|
|
mov edx, [esi].GENGC_tResultNew ; V
|
|
sar edx, cl ;UV (4)
|
|
mov ecx, [esi].GENGC_sResultNew ;U
|
|
and edx, NOT 7 ; V
|
|
sub ecx, edi ;U
|
|
mov [esi].GENGC_tResultNew, edx ; V
|
|
sar ecx, 3 ;U
|
|
sub edx, ebx ; V
|
|
sar edx, 3 ;U
|
|
mov [esi].GENGC_subDs, ecx ; V
|
|
mov [esi].GENGC_subDt, edx ;U
|
|
mov ecx, ebx ; V
|
|
mov eax, [esi].GENGC_flags ;U
|
|
jmp loopTop ; V
|
|
|