.486p .model flat include offsets.asm .data one DWORD 3f800000h a1 dd 0.47 ; Constants to compute inverse square root a2 dd 1.47 v255 dd 65280.0 ; 255*256 v1_256 dd 0.00390625 ; 1/255 .code PUBLIC _Directional2P5S ; Pentium optimized, specular, unit scale PUBLIC _Directional2P5 ; Pentium optimized, no specular, unit scale ;------------------------------------------------------------------------- ; Jim Blinn's method is used to compute inverse square root s = 1/sqrt(x): ; ONE_AS_INTEGER = 0x3F800000 ; float y; ; int tmp = ((ONE_AS_INTEGER << 1 + ONE_AS_INTEGER) - *(long*)&x) >> 1; ; y = *(float*)&tmp; ; s = y*(1.47f - 0.47f*x*y*y); ; Input: ; st(0) = vector length ; y, len = should be defined as DWORD PTR ; a1, a2 = 0.27 and 1.47 ; Output: ; st(0) = 1/sqrt(vector length) ; COMPUTE_ISQRT MACRO mov eax, 07F000000h+03F800000h ; (ONE_AS_INTEGER<<1) + ONE_AS_INTEGER fst len ; Vector length (x = len) sub eax, len sar eax, 1 mov y, eax ; y fmul a1 ; len*0.47 x y z fld y ; y len*0.47 x y z fld st(0) ; y y len*0.47 x y z fmul st(0), st(1) ; y*y y len*0.47 x y z fld a2 ; 1.47 y*y y len*0.47 x y z fxch st(3) ; len*0.47 y*y y 1.47 x y z fmulp st(1), st(0) ; len*0.47*y*y y 1.47 x y z fsubp st(2), st(0) ; y aaa x y z fmulp st(1), st(0) ; 1/sqrt(len) x y z ENDM ;------------------------------------------------------------------------- ; Exit from the function ; EXIT_FUNC MACRO pop edx pop ebx pop ecx mov esp, ebp pop ebp ret ENDM ;------------------------------------------------------------------------- ; void Directional2P5S(LPD3DFE_PROCESSVERTICES pv, ; D3DI_LIGHT *light, ; D3DLIGHTINGELEMENT *vertex) ; Limitations: ; Transformation matrix should not have a scale ; Specular is always computed ; Optimized for Pentium ; ; Input: ; [esp + 4] - pv ; [esp + 8] - light ; [esp + 12] - vertex ; Output: ; pv.lighting.diffuse and pv.lighting.specular are updated ; pv.lighting.specularComputed is set to 1, if there is specular component ; pv equ DWORD PTR [ebp + 8] light equ DWORD PTR [ebp + 12] vertex equ DWORD PTR [ebp + 16] dot equ DWORD PTR [ebp - 4] y equ DWORD PTR [ebp - 8] ; temporary variable to compute ; inverse square root len equ DWORD PTR [ebp - 12] ; vector length _Directional2P5S PROC NEAR push ebp mov ebp, esp sub esp, 12 push ecx mov ecx, light push ebx mov ebx, vertex ; dot = VecDot(light->model_direction, in->dvNormal) fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _X_] fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _X_] fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Y_] fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Y_] fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Z_] fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Z_] ; z y x fxch st(2) ; x y z faddp st(1), st ; x+y z push edx faddp st(1), st ; dot mov edx, pv fst dot cmp dot, 0 jle exit1 ; ldrv.diffuse.r += light->local_diffR * dot; ; ldrv.diffuse.g += light->local_diffG * dot; ; ldrv.diffuse.b += light->local_diffB * dot; fld DWORD PTR [ecx + D3DI_LIGHT_local_diffR] fmul st(0), st(1) fld DWORD PTR [ecx + D3DI_LIGHT_local_diffG] fmul st(0), st(2) fld DWORD PTR [ecx + D3DI_LIGHT_local_diffB] fmulp st(3), st(0) ; g r b fxch st(1) ; r g b fadd DWORD PTR [edx + PV_LIGHT_diffuse + _R_] fxch st(1) ; g r b fadd DWORD PTR [edx + PV_LIGHT_diffuse + _G_] fxch st(2) ; b r g fadd DWORD PTR [edx + PV_LIGHT_diffuse + _B_] fxch st(1) ; r b g fstp DWORD PTR [edx + PV_LIGHT_diffuse + _R_] fstp DWORD PTR [edx + PV_LIGHT_diffuse + _B_] fstp DWORD PTR [edx + PV_LIGHT_diffuse + _G_] ; if (light->flags & D3DLIGHTI_COMPUTE_SPECULAR) ; test DWORD PTR [ecx + D3DI_LIGHT_flags], D3DLIGHTI_COMPUTE_SPECULAR ; jz exit ; VecSub(in->dvPosition, light->model_eye, eye); fld DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvPosition + _X_] fsub DWORD PTR [ecx + D3DI_LIGHT_model_eye + _X_] fld DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvPosition + _Y_] fsub DWORD PTR [ecx + D3DI_LIGHT_model_eye + _Y_] fld DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvPosition + _Z_] fsub DWORD PTR [ecx + D3DI_LIGHT_model_eye + _Z_] ; z y x fxch st(2) ; x y z ; VecNormalizeFast(eye); ; ; Compute vector length. Leave vector on the FPU stack, because we will use it ; fld st(1) ; x x y z fmul st(0), st(0) ; x*x x y z fld st(2) fmul st(0), st(0) ; y*y x*x x y z fld st(4) fmul st(0), st(0) ; z*z y*y x*x x y z fxch st(2) ; x y z faddp st(1), st ; x + y, z faddp st(1), st ; len x y z COMPUTE_ISQRT ; st(0) will be 1/sqrt(len) ; Start normalizing the eye vector fmul st(1), st(0) fmul st(2), st(0) fmulp st(3), st(0) ; x y z Normalized "eye" vector ; Calc halfway vector ; VecSub(light->model_direction, eye, h); ; fsubr DWORD PTR [ecx + D3DI_LIGHT_model_direction + _X_] fxch st(1) ; y x z fsubr DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Y_] fxch st(2) ; z x y fsubr DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Z_] fxch st(1) ; x z y ; dot = VecDot(h, in->dvNormal); fld st(0) fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _X_] fld st(3) fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Y_] fld st(3) ; z*Nz y*Ny x*Nx x z y fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Z_] fxch st(2) faddp st(1), st(0) faddp st(1), st(0) ; dot x z y fstp dot ; x z y ; if (FLOAT_GTZ(dot)) cmp dot, 0 jle exit2 ; dot *= ISQRTF(VecLenSq(h)); ; fmul st(0), st(0) ; x*x y z fxch st(1) ; y x*x z fmul st(0), st(0) ; y*y x*x z fxch st(2) fmul st(0), st(0) ; z*z y*y x*x fxch st(2) ; faddp st(1), st ; x + y, z faddp st(1), st ; len COMPUTE_ISQRT ; st(0) will be 1/sqrt(len) fmul dot ; dot mov eax, [edx + PV_LIGHT_specThreshold] fst dot ; if (FLOAT_CMP_POS(dot, >=, ldrv.specThreshold)) cmp dot, eax jle exit1 ; power = COMPUTE_DOT_POW(&ldrv, dot); ; int indx; ; float v; ; dot *= 255.0f; ; indx = (int)dot; ; dot -= indx; ; ldrv->specularComputed = TRUE; ; v = ldrv->currentSpecTable[indx]; ; return v + (ldrv->currentSpecTable[indx+1] - v)*dot; ; fmul v255 ; dot*255*256 push ebx fistp dot ; indx << 8. 8 bits used to compute dot fraction mov ebx, dot ; and dot, 0FFh ; fractional part of dot shr ebx, 8 ; Table index mov eax, [edx + PV_LIGHT_currentSpecTable] lea eax, [eax + ebx*4] fild dot ; fractional part of dot fmul v1_256 ; dot*1/256 -> integer fraction to floating point fld DWORD PTR [eax + 4] ; currentSpecTable[indx+1] fsub DWORD PTR [eax] ; currentSpecTable[indx] fmulp st(1), st(0) ; dot*(v2-v1) mov DWORD PTR [edx + PV_LIGHT_specularComputed], 1 pop ebx fadd DWORD PTR [eax] ; power = COMPUTE_DOT_POW(&ldrv, dot); ; This is an alternative method to compute x power y. ; Jim Blinn's method is used: ; int tmp = (int)(power*(*(long*)&dot - ONE_AS_INTEGER)) + ONE_AS_INTEGER; ; dot ^ power = *(float*)&tmp; ; ; sub dot, 03F800000h ; fstp st(0) ; Remove dot ; fld DWORD PTR [edx + PV_LIGHT_material_power] ; fimul dot ; fistp dot ; mov DWORD PTR [edx + PV_LIGHT_specularComputed], 1 ; add dot, 03F800000h ; fld dot ; ldrv.specular.r += light->local_specR * power; ; ldrv.specular.g += light->local_specG * power; ; ldrv.specular.b += light->local_specB * power; ; fld DWORD PTR [ecx + D3DI_LIGHT_local_specR] fmul st(0), st(1) fld DWORD PTR [ecx + D3DI_LIGHT_local_specG] fmul st(0), st(2) fld DWORD PTR [ecx + D3DI_LIGHT_local_specB] fmulp st(3), st(0) ; g r b fxch st(1) ; r g b fadd DWORD PTR [edx + PV_LIGHT_specular + _R_] fxch st(1) ; g r b fadd DWORD PTR [edx + PV_LIGHT_specular + _G_] fxch st(2) ; b r g fadd DWORD PTR [edx + PV_LIGHT_specular + _G_] fxch st(1) ; r b g fstp DWORD PTR [edx + PV_LIGHT_specular + _R_] fstp DWORD PTR [edx + PV_LIGHT_specular + _B_] fstp DWORD PTR [edx + PV_LIGHT_specular + _G_] exit: EXIT_FUNC exit1: fstp st(0) EXIT_FUNC exit2: fstp st(0) fstp st(0) fstp st(0) EXIT_FUNC _Directional2P5S ENDP ;------------------------------------------------------------------------- ; void Directional2P5(LPD3DFE_PROCESSVERTICES pv, ; D3DI_LIGHT *light, ; D3DLIGHTINGELEMENT *vertex) ; Limitations: ; Transformation matrix should not have a scale ; Only diffuse component is computed ; Optimized for Pentium ; ; Input: ; [esp + 4] - pv ; [esp + 8] - light ; [esp + 12] - vertex ; Output: ; pv.lighting.diffuse is updated ; pv equ DWORD PTR [ebp + 8] light equ DWORD PTR [ebp + 12] vertex equ DWORD PTR [ebp + 16] dot equ DWORD PTR [ebp - 4] y equ DWORD PTR [ebp - 8] ; temporary variable to compute ; inverse square root len equ DWORD PTR [ebp - 12] ; vector length _Directional2P5 PROC NEAR push ebp mov ebp, esp sub esp, 12 push ecx mov ecx, light push ebx mov ebx, vertex ; dot = VecDot(light->model_direction, in->dvNormal) fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _X_] fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _X_] fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Y_] fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Y_] fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Z_] fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Z_] ; z y x fxch st(2) ; x y z faddp st(1), st ; x+y z push edx faddp st(1), st ; dot mov edx, pv fst dot cmp dot, 0 jle exit3 ; ldrv.diffuse.r += light->local_diffR * dot; ; ldrv.diffuse.g += light->local_diffG * dot; ; ldrv.diffuse.b += light->local_diffB * dot; fld DWORD PTR [ecx + D3DI_LIGHT_local_diffR] fmul st(0), st(1) fld DWORD PTR [ecx + D3DI_LIGHT_local_diffG] fmul st(0), st(2) fld DWORD PTR [ecx + D3DI_LIGHT_local_diffB] fmulp st(3), st(0) ; g r b fxch st(1) ; r g b fadd DWORD PTR [edx + PV_LIGHT_diffuse + _R_] fxch st(1) ; g r b fadd DWORD PTR [edx + PV_LIGHT_diffuse + _G_] fxch st(2) ; b r g fadd DWORD PTR [edx + PV_LIGHT_diffuse + _B_] fxch st(1) ; r b g fstp DWORD PTR [edx + PV_LIGHT_diffuse + _R_] fstp DWORD PTR [edx + PV_LIGHT_diffuse + _B_] fstp DWORD PTR [edx + PV_LIGHT_diffuse + _G_] EXIT_FUNC exit3: fstp st(0) EXIT_FUNC _Directional2P5 ENDP end