You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
12 KiB
306 lines
12 KiB
.486p
|
|
|
|
.model flat
|
|
|
|
include offsets.asm
|
|
include pentium2.inc
|
|
|
|
.code
|
|
|
|
if 0
|
|
D3DVERTEX_x equ 0
|
|
D3DVERTEX_y equ 4
|
|
D3DVERTEX_z equ 8
|
|
|
|
D3DTLVERTEX_sx equ 0
|
|
D3DTLVERTEX_sy equ 4
|
|
D3DTLVERTEX_sz equ 8
|
|
D3DTLVERTEX_rhw equ 12
|
|
D3DTLVERTEX_color equ 16
|
|
D3DTLVERTEX_specular equ 20
|
|
D3DTLVERTEX_tu equ 24
|
|
D3DTLVERTEX_tv equ 28
|
|
|
|
D3DFE_PROCESSVERTICES_rExtents equ 16*4
|
|
D3DFE_PROCESSVERTICES_vcache equ 20*4
|
|
D3DFE_PROCESSVERTICES_dwFlags equ 24*4
|
|
|
|
D3DFE_VIEWPORTCACHE_scaleX equ 0
|
|
D3DFE_VIEWPORTCACHE_scaleY equ 4
|
|
D3DFE_VIEWPORTCACHE_offsetX equ 8
|
|
D3DFE_VIEWPORTCACHE_offsetY equ 12
|
|
|
|
D3DDP_DONOTUPDATEEXTENTS equ 1
|
|
|
|
D3DMATRIXI__11 equ 0
|
|
D3DMATRIXI__12 equ 4
|
|
D3DMATRIXI__13 equ 8
|
|
D3DMATRIXI__14 equ 12
|
|
D3DMATRIXI__21 equ 16
|
|
D3DMATRIXI__22 equ 20
|
|
D3DMATRIXI__23 equ 24
|
|
D3DMATRIXI__24 equ 28
|
|
D3DMATRIXI__31 equ 32
|
|
D3DMATRIXI__32 equ 36
|
|
D3DMATRIXI__33 equ 40
|
|
D3DMATRIXI__34 equ 44
|
|
D3DMATRIXI__41 equ 48
|
|
D3DMATRIXI__42 equ 52
|
|
D3DMATRIXI__43 equ 56
|
|
D3DMATRIXI__44 equ 60
|
|
endif
|
|
|
|
PUBLIC _matmul5
|
|
|
|
_matmul5 PROC
|
|
|
|
pout equ dword ptr [esp+44]
|
|
pin equ dword ptr [esp+48]
|
|
pmat equ dword ptr [esp+52]
|
|
hout equ dword ptr [esp+56]
|
|
|
|
tempxx equ dword ptr [esp+16]
|
|
tempyy equ dword ptr [esp+20]
|
|
tempzz equ dword ptr [esp+24]
|
|
tempx equ dword ptr [esp+28]
|
|
tempy equ dword ptr [esp+32]
|
|
tempz equ dword ptr [esp+36]
|
|
|
|
sub esp,24 ; Make room for locals
|
|
|
|
push ebx ; Save regs
|
|
push esi ;
|
|
push edi ;
|
|
push ebp ;
|
|
|
|
|
|
mov eax,pin ; Get in ptr
|
|
mov ecx,pmat ; Get mat ptr
|
|
mov ebp,pout ; Get out ptr
|
|
mov esi,80000000h ; Ready to compute clip codes
|
|
|
|
; float x, y, z, w, we;
|
|
; x = in->x*pv->mCTM._11 + in->y*pv-mCTM._21 + in->z*pv->mCTM._31 + pv->mCTM._41;
|
|
; y = in->x*pv->mCTM._12 + in->y*pv->mCTM._22 + in->z*pv->mCTM._32 + pv->mCTM._42;
|
|
; z = in->x*pv->mCTM._13 + in->y*pv->mCTM._23 + in->z*pv->mCTM._33 + pv->mCTM._43;
|
|
; we= in->x*pv->mCTM._14 + in->y*pv->mCTM._24 + in->z*pv->mCTM._34 + pv->mCTM._44;
|
|
|
|
|
|
; notation in comments on stack gives the progress on the element.
|
|
; x, y, z mean input x y z
|
|
; x1-4 means x*_11, x*_11+y*_21, x*_11+y*_21+z*_31, x*_11+y*_21+z*_31+_41
|
|
; Some intermediate results x*_11+_41 and y*_21+z*_31 are denoted as x2
|
|
|
|
fld dword ptr [eax+D3DVERTEX_x] ; x1
|
|
fmul dword ptr [ecx+D3DMATRIXI__11] ;
|
|
fld dword ptr [eax+D3DVERTEX_x] ; w1 x1
|
|
fmul dword ptr [ecx+D3DMATRIXI__14] ;
|
|
fld dword ptr [eax+D3DVERTEX_y] ; y*_21 w1 x1
|
|
fmul dword ptr [ecx+D3DMATRIXI__21] ;
|
|
fld dword ptr [eax+D3DVERTEX_y] ; y*_24 y*_21 w1 x1
|
|
fmul dword ptr [ecx+D3DMATRIXI__24] ;
|
|
fld dword ptr [eax+D3DVERTEX_z] ; z*_31 y*_24 y*_21 w1 x1
|
|
fmul dword ptr [ecx+D3DMATRIXI__31] ;
|
|
fxch st(2) ; y*_21 y*_24 z*_31 w1 x1
|
|
faddp st(4),st ; y*_24 z*_31 w1 x2
|
|
fld dword ptr [eax+D3DVERTEX_z] ; z*_34 y*_24 z*_31 w1 x2
|
|
fmul dword ptr [ecx+D3DMATRIXI__34] ;
|
|
fxch st(1) ; y*_24 z*_34 z*_31 w1 x2
|
|
faddp st(3),st ; z*_34 z*_31 w2 x2
|
|
fxch st(1) ; z*_31 z*_34 w2 x2
|
|
faddp st(3),st ; z*_34 w2 x3
|
|
fld dword ptr [eax+D3DVERTEX_x] ; y1 z*_34 w2 x3
|
|
fmul dword ptr [ecx+D3DMATRIXI__12] ;
|
|
fxch st(1) ; z*_34 y1 w2 x3
|
|
faddp st(2),st ; y1 w3 x3
|
|
fxch st(2) ; x3 w3 y1
|
|
fadd dword ptr [ecx+D3DMATRIXI__41] ; x4 w3 y1
|
|
fld dword ptr [eax+D3DVERTEX_x] ; z1 x4 w3 y1
|
|
fmul dword ptr [ecx+D3DMATRIXI__13] ;
|
|
fld dword ptr [eax+D3DVERTEX_y] ; y*_22 z1 x4 w3 y1
|
|
fmul dword ptr [ecx+D3DMATRIXI__22] ;
|
|
fld dword ptr [eax+D3DVERTEX_y] ; y*_23 y*_22 z1 x4 w3 y1
|
|
fmul dword ptr [ecx+D3DMATRIXI__23] ;
|
|
fxch st(4) ; w3 y*_22 z1 x4 y*_23 y1
|
|
fadd dword ptr [ecx+D3DMATRIXI__44] ; w4 y*_22 z1 x4 y*_23 y1
|
|
fxch st(3) ; x4 y*_22 z1 w4 y*_23 y1
|
|
fst dword ptr [ebp] ;
|
|
fxch st(1) ; y*_22 x4 z1 w4 y*_23 y1
|
|
faddp st(5),st ; x4 z1 w4 y*_23 y2
|
|
fld dword ptr [eax+D3DVERTEX_z] ; z*_32 x4 z1 w4 y*_23 y2
|
|
fmul dword ptr [ecx+D3DMATRIXI__32] ;
|
|
fld dword ptr [eax+D3DVERTEX_z] ; z*_33 z*_32 x4 z1 w4 y*_23 y2
|
|
fmul dword ptr [ecx+D3DMATRIXI__33] ;
|
|
fxch st(6) ; y2 z*_32 x4 z1 w4 y*_23 z*_33
|
|
fadd dword ptr [ecx+D3DMATRIXI__42] ; y3 z*_32 x4 z1 w4 y*_23 z*_33
|
|
fxch st(3) ; z1 z*_32 x4 y3 w4 y*_23 z*_33
|
|
fadd dword ptr [ecx+D3DMATRIXI__43] ; z2 z*_32 x4 y3 w4 y*_23 z*_33
|
|
fxch st(5) ; y*_23 z*_32 x4 y3 w4 z2 z*_33
|
|
faddp st(6),st ; z*_32 x4 y3 w4 z2 z2
|
|
faddp st(2),st ; x4 y4 w4 z2 z2
|
|
|
|
;;
|
|
fsubr st,st(2) ; xx y4 w4 z2 z2
|
|
fxch st(4) ; z2 y4 w4 z2 xx
|
|
faddp st(3),st ; y4 w4 z4 xx
|
|
fld st ; y4 y4 w4 z4 xx
|
|
fsubr st,st(2) ; yy y4 w4 z4 xx
|
|
fxch st(1) ; y4 yy w4 z4 xx
|
|
fstp dword ptr [ebp+4] ; yy w4 z4 xx
|
|
fxch st(3) ; xx w4 z4 yy
|
|
fstp tempxx ; w4 z4 yy
|
|
fxch st(1) ; z4 w4 yy
|
|
fst dword ptr [ebp+8] ;
|
|
fsubr st,st(1) ; zz w4 yy
|
|
fxch st(2) ; yy w4 zz
|
|
fstp tempyy ; w4 zz
|
|
fxch st(1) ; zz w4
|
|
fstp tempzz ;
|
|
|
|
fld1 ; 1 w4
|
|
fdiv st,st(1) ; 1/w w
|
|
|
|
;; Now compute the clipcodes.
|
|
|
|
|
|
; D3DVALUE xx = we - x;
|
|
; D3DVALUE yy = we - y;
|
|
; D3DVALUE zz = we - z;
|
|
; clip = ((ASINT32(x) & 0x80000000) >> (32-1)) | // D3DCS_LEFT
|
|
; ((ASINT32(y) & 0x80000000) >> (32-4)) | // D3DCS_BOTTOM
|
|
; ((ASINT32(z) & 0x80000000) >> (32-5)) | // D3DCS_FRONT
|
|
; ((ASINT32(xx) & 0x80000000) >> (32-2)) | // D3DCS_RIGHT
|
|
; ((ASINT32(yy) & 0x80000000) >> (32-3)) | // D3DCS_TOP
|
|
; ((ASINT32(zz) & 0x80000000) >> (32-6)); // D3DCS_BACK
|
|
|
|
mov eax,dword ptr [ebp] ; Get x
|
|
mov ebx,dword ptr [ebp+4] ; Get y
|
|
|
|
and eax,esi ;
|
|
and ebx,esi ;
|
|
|
|
shr eax,32-1 ; D3DCS_LEFT
|
|
mov ecx,dword ptr [ebp+8] ; Get z
|
|
|
|
shr ebx,32-4 ; D3DCS_BOTTOM
|
|
mov edx,tempxx ;
|
|
|
|
or eax,ebx ; OR together clip flags
|
|
and ecx,esi ;
|
|
|
|
shr ecx,32-5 ; D3DCS_FRONT
|
|
and edx,esi ;
|
|
|
|
shr edx,32-2 ; D3DCS_RIGHT
|
|
mov ebx,tempyy ;
|
|
|
|
or eax,ecx ;
|
|
and ebx,esi ;
|
|
|
|
shr ebx,32-3 ; D3DCS_TOP
|
|
or eax,edx ;
|
|
|
|
mov edx,tempzz
|
|
or eax,ebx ;
|
|
|
|
and edx,esi ;
|
|
|
|
shr edx,32-6 ; D3DCS_BACK
|
|
mov esi,hout ; Propagate diffuse, specular, tu, tv
|
|
|
|
or eax,edx ; Finish clip flag generation
|
|
mov ebx,pmat ;
|
|
|
|
mov word ptr [esi],ax ; Output clip flags
|
|
mov esi,pin ;
|
|
|
|
test eax,eax ; Bail if clip!=0
|
|
jnz ClipNonzero ;
|
|
|
|
push eax ; Save clip flags
|
|
; ax gets trashed by fstsw in min/max calcs
|
|
|
|
mov ecx,[esi+D3DTLVERTEX_color]
|
|
mov edx,[esi+D3DTLVERTEX_specular]
|
|
|
|
mov [ebp+D3DTLVERTEX_color],ecx
|
|
mov [ebp+D3DTLVERTEX_specular],edx
|
|
|
|
mov ecx,[esi+D3DTLVERTEX_tu]
|
|
mov edx,[esi+D3DTLVERTEX_tv]
|
|
|
|
mov [ebp+D3DTLVERTEX_tu],ecx
|
|
mov [ebp+D3DTLVERTEX_tv],edx
|
|
|
|
|
|
fxch st(1) ; we w
|
|
fstp st ;
|
|
; w
|
|
fld dword ptr [ebp] ; x w
|
|
fmul dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_scaleX]
|
|
fld dword ptr [ebp+4] ; y x*scaleX w
|
|
fmul dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_scaleY]
|
|
fxch st(1) ; x*scaleX y*scaleY w
|
|
fmul st,st(2) ; x*w*scaleX y*scaleY w
|
|
fxch st(1) ; y*scaleY x*w*scaleX w
|
|
fmul st,st(2) ; y*w*scaleY x*w*scaleX w
|
|
fxch st(1) ; x*w*scaleX y*w*scaleY w
|
|
fadd dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_offsetX]
|
|
fxch st(1) ; y x w
|
|
fadd dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_offsetY]
|
|
fld dword ptr [ebp+8] ; z y x w
|
|
fmul st,st(3) ; z y x w
|
|
fxch st(2) ; x y z w
|
|
|
|
test dword ptr [ebx+D3DFE_PROCESSVERTICES_dwFlags], D3DDP_DONOTUPDATEEXTENTS
|
|
jnz NoExtents
|
|
|
|
;; update extents rect in PV structure
|
|
|
|
; x y z w
|
|
fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+0]
|
|
fstsw ax
|
|
sahf
|
|
ja @f
|
|
fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+0]
|
|
@@: fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+8]
|
|
fstsw ax
|
|
sahf
|
|
jb @f
|
|
fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+8]
|
|
@@: fxch st(1)
|
|
fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+4]
|
|
fstsw ax
|
|
sahf
|
|
ja @f
|
|
fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+4]
|
|
@@: fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+12]
|
|
fstsw ax
|
|
sahf
|
|
jb @f
|
|
fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+12]
|
|
@@: fxch st(1)
|
|
NoExtents:
|
|
fstp dword ptr [ebp+D3DTLVERTEX_sx]
|
|
fstp dword ptr [ebp+D3DTLVERTEX_sy]
|
|
fstp dword ptr [ebp+D3DTLVERTEX_sz]
|
|
fstp dword ptr [ebp+D3DTLVERTEX_rhw]
|
|
|
|
pop eax ; Get clip flags back
|
|
Return:
|
|
pop ebp ; Restore registers
|
|
pop edi ;
|
|
pop esi ;
|
|
pop ebx ;
|
|
add esp,24 ; Locals
|
|
|
|
ret ; Return
|
|
|
|
ClipNonZero:
|
|
fstp st ; Get rid of 1/w
|
|
fstp dword ptr [ebp+D3DTLVERTEX_rhw] ; store we
|
|
jmp short Return
|
|
|
|
_matmul5 ENDP
|
|
|
|
end
|