Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

378 lines
13 KiB

  1. .486p
  2. .model flat
  3. include offsets.asm
  4. .data
  5. one DWORD 3f800000h
  6. a1 dd 0.47 ; Constants to compute inverse square root
  7. a2 dd 1.47
  8. v255 dd 65280.0 ; 255*256
  9. v1_256 dd 0.00390625 ; 1/255
  10. .code
  11. PUBLIC _Directional2P5S ; Pentium optimized, specular, unit scale
  12. PUBLIC _Directional2P5 ; Pentium optimized, no specular, unit scale
  13. ;-------------------------------------------------------------------------
  14. ; Jim Blinn's method is used to compute inverse square root s = 1/sqrt(x):
  15. ; ONE_AS_INTEGER = 0x3F800000
  16. ; float y;
  17. ; int tmp = ((ONE_AS_INTEGER << 1 + ONE_AS_INTEGER) - *(long*)&x) >> 1;
  18. ; y = *(float*)&tmp;
  19. ; s = y*(1.47f - 0.47f*x*y*y);
  20. ; Input:
  21. ; st(0) = vector length
  22. ; y, len = should be defined as DWORD PTR
  23. ; a1, a2 = 0.27 and 1.47
  24. ; Output:
  25. ; st(0) = 1/sqrt(vector length)
  26. ;
  27. COMPUTE_ISQRT MACRO
  28. mov eax, 07F000000h+03F800000h ; (ONE_AS_INTEGER<<1) + ONE_AS_INTEGER
  29. fst len ; Vector length (x = len)
  30. sub eax, len
  31. sar eax, 1
  32. mov y, eax ; y
  33. fmul a1 ; len*0.47 x y z
  34. fld y ; y len*0.47 x y z
  35. fld st(0) ; y y len*0.47 x y z
  36. fmul st(0), st(1) ; y*y y len*0.47 x y z
  37. fld a2 ; 1.47 y*y y len*0.47 x y z
  38. fxch st(3) ; len*0.47 y*y y 1.47 x y z
  39. fmulp st(1), st(0) ; len*0.47*y*y y 1.47 x y z
  40. fsubp st(2), st(0) ; y aaa x y z
  41. fmulp st(1), st(0) ; 1/sqrt(len) x y z
  42. ENDM
  43. ;-------------------------------------------------------------------------
  44. ; Exit from the function
  45. ;
  46. EXIT_FUNC MACRO
  47. pop edx
  48. pop ebx
  49. pop ecx
  50. mov esp, ebp
  51. pop ebp
  52. ret
  53. ENDM
  54. ;-------------------------------------------------------------------------
  55. ; void Directional2P5S(LPD3DFE_PROCESSVERTICES pv,
  56. ; D3DI_LIGHT *light,
  57. ; D3DLIGHTINGELEMENT *vertex)
  58. ; Limitations:
  59. ; Transformation matrix should not have a scale
  60. ; Specular is always computed
  61. ; Optimized for Pentium
  62. ;
  63. ; Input:
  64. ; [esp + 4] - pv
  65. ; [esp + 8] - light
  66. ; [esp + 12] - vertex
  67. ; Output:
  68. ; pv.lighting.diffuse and pv.lighting.specular are updated
  69. ; pv.lighting.specularComputed is set to 1, if there is specular component
  70. ;
  71. pv equ DWORD PTR [ebp + 8]
  72. light equ DWORD PTR [ebp + 12]
  73. vertex equ DWORD PTR [ebp + 16]
  74. dot equ DWORD PTR [ebp - 4]
  75. y equ DWORD PTR [ebp - 8] ; temporary variable to compute
  76. ; inverse square root
  77. len equ DWORD PTR [ebp - 12] ; vector length
  78. _Directional2P5S PROC NEAR
  79. push ebp
  80. mov ebp, esp
  81. sub esp, 12
  82. push ecx
  83. mov ecx, light
  84. push ebx
  85. mov ebx, vertex
  86. ; dot = VecDot(light->model_direction, in->dvNormal)
  87. fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _X_]
  88. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _X_]
  89. fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Y_]
  90. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Y_]
  91. fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Z_]
  92. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Z_] ; z y x
  93. fxch st(2) ; x y z
  94. faddp st(1), st ; x+y z
  95. push edx
  96. faddp st(1), st ; dot
  97. mov edx, pv
  98. fst dot
  99. cmp dot, 0
  100. jle exit1
  101. ; ldrv.diffuse.r += light->local_diffR * dot;
  102. ; ldrv.diffuse.g += light->local_diffG * dot;
  103. ; ldrv.diffuse.b += light->local_diffB * dot;
  104. fld DWORD PTR [ecx + D3DI_LIGHT_local_diffR]
  105. fmul st(0), st(1)
  106. fld DWORD PTR [ecx + D3DI_LIGHT_local_diffG]
  107. fmul st(0), st(2)
  108. fld DWORD PTR [ecx + D3DI_LIGHT_local_diffB]
  109. fmulp st(3), st(0) ; g r b
  110. fxch st(1) ; r g b
  111. fadd DWORD PTR [edx + PV_LIGHT_diffuse + _R_]
  112. fxch st(1) ; g r b
  113. fadd DWORD PTR [edx + PV_LIGHT_diffuse + _G_]
  114. fxch st(2) ; b r g
  115. fadd DWORD PTR [edx + PV_LIGHT_diffuse + _B_]
  116. fxch st(1) ; r b g
  117. fstp DWORD PTR [edx + PV_LIGHT_diffuse + _R_]
  118. fstp DWORD PTR [edx + PV_LIGHT_diffuse + _B_]
  119. fstp DWORD PTR [edx + PV_LIGHT_diffuse + _G_]
  120. ; if (light->flags & D3DLIGHTI_COMPUTE_SPECULAR)
  121. ; test DWORD PTR [ecx + D3DI_LIGHT_flags], D3DLIGHTI_COMPUTE_SPECULAR
  122. ; jz exit
  123. ; VecSub(in->dvPosition, light->model_eye, eye);
  124. fld DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvPosition + _X_]
  125. fsub DWORD PTR [ecx + D3DI_LIGHT_model_eye + _X_]
  126. fld DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvPosition + _Y_]
  127. fsub DWORD PTR [ecx + D3DI_LIGHT_model_eye + _Y_]
  128. fld DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvPosition + _Z_]
  129. fsub DWORD PTR [ecx + D3DI_LIGHT_model_eye + _Z_] ; z y x
  130. fxch st(2) ; x y z
  131. ; VecNormalizeFast(eye);
  132. ;
  133. ; Compute vector length. Leave vector on the FPU stack, because we will use it
  134. ;
  135. fld st(1) ; x x y z
  136. fmul st(0), st(0) ; x*x x y z
  137. fld st(2)
  138. fmul st(0), st(0) ; y*y x*x x y z
  139. fld st(4)
  140. fmul st(0), st(0) ; z*z y*y x*x x y z
  141. fxch st(2) ; x y z
  142. faddp st(1), st ; x + y, z
  143. faddp st(1), st ; len x y z
  144. COMPUTE_ISQRT ; st(0) will be 1/sqrt(len)
  145. ; Start normalizing the eye vector
  146. fmul st(1), st(0)
  147. fmul st(2), st(0)
  148. fmulp st(3), st(0) ; x y z Normalized "eye" vector
  149. ; Calc halfway vector
  150. ; VecSub(light->model_direction, eye, h);
  151. ;
  152. fsubr DWORD PTR [ecx + D3DI_LIGHT_model_direction + _X_]
  153. fxch st(1) ; y x z
  154. fsubr DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Y_]
  155. fxch st(2) ; z x y
  156. fsubr DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Z_]
  157. fxch st(1) ; x z y
  158. ; dot = VecDot(h, in->dvNormal);
  159. fld st(0)
  160. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _X_]
  161. fld st(3)
  162. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Y_]
  163. fld st(3) ; z*Nz y*Ny x*Nx x z y
  164. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Z_]
  165. fxch st(2)
  166. faddp st(1), st(0)
  167. faddp st(1), st(0) ; dot x z y
  168. fstp dot ; x z y
  169. ; if (FLOAT_GTZ(dot))
  170. cmp dot, 0
  171. jle exit2
  172. ; dot *= ISQRTF(VecLenSq(h));
  173. ;
  174. fmul st(0), st(0) ; x*x y z
  175. fxch st(1) ; y x*x z
  176. fmul st(0), st(0) ; y*y x*x z
  177. fxch st(2)
  178. fmul st(0), st(0) ; z*z y*y x*x
  179. fxch st(2) ;
  180. faddp st(1), st ; x + y, z
  181. faddp st(1), st ; len
  182. COMPUTE_ISQRT ; st(0) will be 1/sqrt(len)
  183. fmul dot ; dot
  184. mov eax, [edx + PV_LIGHT_specThreshold]
  185. fst dot
  186. ; if (FLOAT_CMP_POS(dot, >=, ldrv.specThreshold))
  187. cmp dot, eax
  188. jle exit1
  189. ; power = COMPUTE_DOT_POW(&ldrv, dot);
  190. ; int indx;
  191. ; float v;
  192. ; dot *= 255.0f;
  193. ; indx = (int)dot;
  194. ; dot -= indx;
  195. ; ldrv->specularComputed = TRUE;
  196. ; v = ldrv->currentSpecTable[indx];
  197. ; return v + (ldrv->currentSpecTable[indx+1] - v)*dot;
  198. ;
  199. fmul v255 ; dot*255*256
  200. push ebx
  201. fistp dot ; indx << 8. 8 bits used to compute dot fraction
  202. mov ebx, dot ;
  203. and dot, 0FFh ; fractional part of dot
  204. shr ebx, 8 ; Table index
  205. mov eax, [edx + PV_LIGHT_currentSpecTable]
  206. lea eax, [eax + ebx*4]
  207. fild dot ; fractional part of dot
  208. fmul v1_256 ; dot*1/256 -> integer fraction to floating point
  209. fld DWORD PTR [eax + 4] ; currentSpecTable[indx+1]
  210. fsub DWORD PTR [eax] ; currentSpecTable[indx]
  211. fmulp st(1), st(0) ; dot*(v2-v1)
  212. mov DWORD PTR [edx + PV_LIGHT_specularComputed], 1
  213. pop ebx
  214. fadd DWORD PTR [eax]
  215. ; power = COMPUTE_DOT_POW(&ldrv, dot);
  216. ; This is an alternative method to compute x power y.
  217. ; Jim Blinn's method is used:
  218. ; int tmp = (int)(power*(*(long*)&dot - ONE_AS_INTEGER)) + ONE_AS_INTEGER;
  219. ; dot ^ power = *(float*)&tmp;
  220. ;
  221. ; sub dot, 03F800000h
  222. ; fstp st(0) ; Remove dot
  223. ; fld DWORD PTR [edx + PV_LIGHT_material_power]
  224. ; fimul dot
  225. ; fistp dot
  226. ; mov DWORD PTR [edx + PV_LIGHT_specularComputed], 1
  227. ; add dot, 03F800000h
  228. ; fld dot
  229. ; ldrv.specular.r += light->local_specR * power;
  230. ; ldrv.specular.g += light->local_specG * power;
  231. ; ldrv.specular.b += light->local_specB * power;
  232. ;
  233. fld DWORD PTR [ecx + D3DI_LIGHT_local_specR]
  234. fmul st(0), st(1)
  235. fld DWORD PTR [ecx + D3DI_LIGHT_local_specG]
  236. fmul st(0), st(2)
  237. fld DWORD PTR [ecx + D3DI_LIGHT_local_specB]
  238. fmulp st(3), st(0) ; g r b
  239. fxch st(1) ; r g b
  240. fadd DWORD PTR [edx + PV_LIGHT_specular + _R_]
  241. fxch st(1) ; g r b
  242. fadd DWORD PTR [edx + PV_LIGHT_specular + _G_]
  243. fxch st(2) ; b r g
  244. fadd DWORD PTR [edx + PV_LIGHT_specular + _G_]
  245. fxch st(1) ; r b g
  246. fstp DWORD PTR [edx + PV_LIGHT_specular + _R_]
  247. fstp DWORD PTR [edx + PV_LIGHT_specular + _B_]
  248. fstp DWORD PTR [edx + PV_LIGHT_specular + _G_]
  249. exit:
  250. EXIT_FUNC
  251. exit1:
  252. fstp st(0)
  253. EXIT_FUNC
  254. exit2:
  255. fstp st(0)
  256. fstp st(0)
  257. fstp st(0)
  258. EXIT_FUNC
  259. _Directional2P5S ENDP
  260. ;-------------------------------------------------------------------------
  261. ; void Directional2P5(LPD3DFE_PROCESSVERTICES pv,
  262. ; D3DI_LIGHT *light,
  263. ; D3DLIGHTINGELEMENT *vertex)
  264. ; Limitations:
  265. ; Transformation matrix should not have a scale
  266. ; Only diffuse component is computed
  267. ; Optimized for Pentium
  268. ;
  269. ; Input:
  270. ; [esp + 4] - pv
  271. ; [esp + 8] - light
  272. ; [esp + 12] - vertex
  273. ; Output:
  274. ; pv.lighting.diffuse is updated
  275. ;
  276. pv equ DWORD PTR [ebp + 8]
  277. light equ DWORD PTR [ebp + 12]
  278. vertex equ DWORD PTR [ebp + 16]
  279. dot equ DWORD PTR [ebp - 4]
  280. y equ DWORD PTR [ebp - 8] ; temporary variable to compute
  281. ; inverse square root
  282. len equ DWORD PTR [ebp - 12] ; vector length
  283. _Directional2P5 PROC NEAR
  284. push ebp
  285. mov ebp, esp
  286. sub esp, 12
  287. push ecx
  288. mov ecx, light
  289. push ebx
  290. mov ebx, vertex
  291. ; dot = VecDot(light->model_direction, in->dvNormal)
  292. fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _X_]
  293. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _X_]
  294. fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Y_]
  295. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Y_]
  296. fld DWORD PTR [ecx + D3DI_LIGHT_model_direction + _Z_]
  297. fmul DWORD PTR [ebx + D3DLIGHTINGELEMENT_dvNormal + _Z_] ; z y x
  298. fxch st(2) ; x y z
  299. faddp st(1), st ; x+y z
  300. push edx
  301. faddp st(1), st ; dot
  302. mov edx, pv
  303. fst dot
  304. cmp dot, 0
  305. jle exit3
  306. ; ldrv.diffuse.r += light->local_diffR * dot;
  307. ; ldrv.diffuse.g += light->local_diffG * dot;
  308. ; ldrv.diffuse.b += light->local_diffB * dot;
  309. fld DWORD PTR [ecx + D3DI_LIGHT_local_diffR]
  310. fmul st(0), st(1)
  311. fld DWORD PTR [ecx + D3DI_LIGHT_local_diffG]
  312. fmul st(0), st(2)
  313. fld DWORD PTR [ecx + D3DI_LIGHT_local_diffB]
  314. fmulp st(3), st(0) ; g r b
  315. fxch st(1) ; r g b
  316. fadd DWORD PTR [edx + PV_LIGHT_diffuse + _R_]
  317. fxch st(1) ; g r b
  318. fadd DWORD PTR [edx + PV_LIGHT_diffuse + _G_]
  319. fxch st(2) ; b r g
  320. fadd DWORD PTR [edx + PV_LIGHT_diffuse + _B_]
  321. fxch st(1) ; r b g
  322. fstp DWORD PTR [edx + PV_LIGHT_diffuse + _R_]
  323. fstp DWORD PTR [edx + PV_LIGHT_diffuse + _B_]
  324. fstp DWORD PTR [edx + PV_LIGHT_diffuse + _G_]
  325. EXIT_FUNC
  326. exit3:
  327. fstp st(0)
  328. EXIT_FUNC
  329. _Directional2P5 ENDP
  330. end