Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
12 KiB

  1. .486p
  2. .model flat
  3. include offsets.asm
  4. include pentium2.inc
  5. .code
  6. if 0
  7. D3DVERTEX_x equ 0
  8. D3DVERTEX_y equ 4
  9. D3DVERTEX_z equ 8
  10. D3DTLVERTEX_sx equ 0
  11. D3DTLVERTEX_sy equ 4
  12. D3DTLVERTEX_sz equ 8
  13. D3DTLVERTEX_rhw equ 12
  14. D3DTLVERTEX_color equ 16
  15. D3DTLVERTEX_specular equ 20
  16. D3DTLVERTEX_tu equ 24
  17. D3DTLVERTEX_tv equ 28
  18. D3DFE_PROCESSVERTICES_rExtents equ 16*4
  19. D3DFE_PROCESSVERTICES_vcache equ 20*4
  20. D3DFE_PROCESSVERTICES_dwFlags equ 24*4
  21. D3DFE_VIEWPORTCACHE_scaleX equ 0
  22. D3DFE_VIEWPORTCACHE_scaleY equ 4
  23. D3DFE_VIEWPORTCACHE_offsetX equ 8
  24. D3DFE_VIEWPORTCACHE_offsetY equ 12
  25. D3DDP_DONOTUPDATEEXTENTS equ 1
  26. D3DMATRIXI__11 equ 0
  27. D3DMATRIXI__12 equ 4
  28. D3DMATRIXI__13 equ 8
  29. D3DMATRIXI__14 equ 12
  30. D3DMATRIXI__21 equ 16
  31. D3DMATRIXI__22 equ 20
  32. D3DMATRIXI__23 equ 24
  33. D3DMATRIXI__24 equ 28
  34. D3DMATRIXI__31 equ 32
  35. D3DMATRIXI__32 equ 36
  36. D3DMATRIXI__33 equ 40
  37. D3DMATRIXI__34 equ 44
  38. D3DMATRIXI__41 equ 48
  39. D3DMATRIXI__42 equ 52
  40. D3DMATRIXI__43 equ 56
  41. D3DMATRIXI__44 equ 60
  42. endif
  43. PUBLIC _matmul5
  44. _matmul5 PROC
  45. pout equ dword ptr [esp+44]
  46. pin equ dword ptr [esp+48]
  47. pmat equ dword ptr [esp+52]
  48. hout equ dword ptr [esp+56]
  49. tempxx equ dword ptr [esp+16]
  50. tempyy equ dword ptr [esp+20]
  51. tempzz equ dword ptr [esp+24]
  52. tempx equ dword ptr [esp+28]
  53. tempy equ dword ptr [esp+32]
  54. tempz equ dword ptr [esp+36]
  55. sub esp,24 ; Make room for locals
  56. push ebx ; Save regs
  57. push esi ;
  58. push edi ;
  59. push ebp ;
  60. mov eax,pin ; Get in ptr
  61. mov ecx,pmat ; Get mat ptr
  62. mov ebp,pout ; Get out ptr
  63. mov esi,80000000h ; Ready to compute clip codes
  64. ; float x, y, z, w, we;
  65. ; x = in->x*pv->mCTM._11 + in->y*pv-mCTM._21 + in->z*pv->mCTM._31 + pv->mCTM._41;
  66. ; y = in->x*pv->mCTM._12 + in->y*pv->mCTM._22 + in->z*pv->mCTM._32 + pv->mCTM._42;
  67. ; z = in->x*pv->mCTM._13 + in->y*pv->mCTM._23 + in->z*pv->mCTM._33 + pv->mCTM._43;
  68. ; we= in->x*pv->mCTM._14 + in->y*pv->mCTM._24 + in->z*pv->mCTM._34 + pv->mCTM._44;
  69. ; notation in comments on stack gives the progress on the element.
  70. ; x, y, z mean input x y z
  71. ; x1-4 means x*_11, x*_11+y*_21, x*_11+y*_21+z*_31, x*_11+y*_21+z*_31+_41
  72. ; Some intermediate results x*_11+_41 and y*_21+z*_31 are denoted as x2
  73. fld dword ptr [eax+D3DVERTEX_x] ; x1
  74. fmul dword ptr [ecx+D3DMATRIXI__11] ;
  75. fld dword ptr [eax+D3DVERTEX_x] ; w1 x1
  76. fmul dword ptr [ecx+D3DMATRIXI__14] ;
  77. fld dword ptr [eax+D3DVERTEX_y] ; y*_21 w1 x1
  78. fmul dword ptr [ecx+D3DMATRIXI__21] ;
  79. fld dword ptr [eax+D3DVERTEX_y] ; y*_24 y*_21 w1 x1
  80. fmul dword ptr [ecx+D3DMATRIXI__24] ;
  81. fld dword ptr [eax+D3DVERTEX_z] ; z*_31 y*_24 y*_21 w1 x1
  82. fmul dword ptr [ecx+D3DMATRIXI__31] ;
  83. fxch st(2) ; y*_21 y*_24 z*_31 w1 x1
  84. faddp st(4),st ; y*_24 z*_31 w1 x2
  85. fld dword ptr [eax+D3DVERTEX_z] ; z*_34 y*_24 z*_31 w1 x2
  86. fmul dword ptr [ecx+D3DMATRIXI__34] ;
  87. fxch st(1) ; y*_24 z*_34 z*_31 w1 x2
  88. faddp st(3),st ; z*_34 z*_31 w2 x2
  89. fxch st(1) ; z*_31 z*_34 w2 x2
  90. faddp st(3),st ; z*_34 w2 x3
  91. fld dword ptr [eax+D3DVERTEX_x] ; y1 z*_34 w2 x3
  92. fmul dword ptr [ecx+D3DMATRIXI__12] ;
  93. fxch st(1) ; z*_34 y1 w2 x3
  94. faddp st(2),st ; y1 w3 x3
  95. fxch st(2) ; x3 w3 y1
  96. fadd dword ptr [ecx+D3DMATRIXI__41] ; x4 w3 y1
  97. fld dword ptr [eax+D3DVERTEX_x] ; z1 x4 w3 y1
  98. fmul dword ptr [ecx+D3DMATRIXI__13] ;
  99. fld dword ptr [eax+D3DVERTEX_y] ; y*_22 z1 x4 w3 y1
  100. fmul dword ptr [ecx+D3DMATRIXI__22] ;
  101. fld dword ptr [eax+D3DVERTEX_y] ; y*_23 y*_22 z1 x4 w3 y1
  102. fmul dword ptr [ecx+D3DMATRIXI__23] ;
  103. fxch st(4) ; w3 y*_22 z1 x4 y*_23 y1
  104. fadd dword ptr [ecx+D3DMATRIXI__44] ; w4 y*_22 z1 x4 y*_23 y1
  105. fxch st(3) ; x4 y*_22 z1 w4 y*_23 y1
  106. fst dword ptr [ebp] ;
  107. fxch st(1) ; y*_22 x4 z1 w4 y*_23 y1
  108. faddp st(5),st ; x4 z1 w4 y*_23 y2
  109. fld dword ptr [eax+D3DVERTEX_z] ; z*_32 x4 z1 w4 y*_23 y2
  110. fmul dword ptr [ecx+D3DMATRIXI__32] ;
  111. fld dword ptr [eax+D3DVERTEX_z] ; z*_33 z*_32 x4 z1 w4 y*_23 y2
  112. fmul dword ptr [ecx+D3DMATRIXI__33] ;
  113. fxch st(6) ; y2 z*_32 x4 z1 w4 y*_23 z*_33
  114. fadd dword ptr [ecx+D3DMATRIXI__42] ; y3 z*_32 x4 z1 w4 y*_23 z*_33
  115. fxch st(3) ; z1 z*_32 x4 y3 w4 y*_23 z*_33
  116. fadd dword ptr [ecx+D3DMATRIXI__43] ; z2 z*_32 x4 y3 w4 y*_23 z*_33
  117. fxch st(5) ; y*_23 z*_32 x4 y3 w4 z2 z*_33
  118. faddp st(6),st ; z*_32 x4 y3 w4 z2 z2
  119. faddp st(2),st ; x4 y4 w4 z2 z2
  120. ;;
  121. fsubr st,st(2) ; xx y4 w4 z2 z2
  122. fxch st(4) ; z2 y4 w4 z2 xx
  123. faddp st(3),st ; y4 w4 z4 xx
  124. fld st ; y4 y4 w4 z4 xx
  125. fsubr st,st(2) ; yy y4 w4 z4 xx
  126. fxch st(1) ; y4 yy w4 z4 xx
  127. fstp dword ptr [ebp+4] ; yy w4 z4 xx
  128. fxch st(3) ; xx w4 z4 yy
  129. fstp tempxx ; w4 z4 yy
  130. fxch st(1) ; z4 w4 yy
  131. fst dword ptr [ebp+8] ;
  132. fsubr st,st(1) ; zz w4 yy
  133. fxch st(2) ; yy w4 zz
  134. fstp tempyy ; w4 zz
  135. fxch st(1) ; zz w4
  136. fstp tempzz ;
  137. fld1 ; 1 w4
  138. fdiv st,st(1) ; 1/w w
  139. ;; Now compute the clipcodes.
  140. ; D3DVALUE xx = we - x;
  141. ; D3DVALUE yy = we - y;
  142. ; D3DVALUE zz = we - z;
  143. ; clip = ((ASINT32(x) & 0x80000000) >> (32-1)) | // D3DCS_LEFT
  144. ; ((ASINT32(y) & 0x80000000) >> (32-4)) | // D3DCS_BOTTOM
  145. ; ((ASINT32(z) & 0x80000000) >> (32-5)) | // D3DCS_FRONT
  146. ; ((ASINT32(xx) & 0x80000000) >> (32-2)) | // D3DCS_RIGHT
  147. ; ((ASINT32(yy) & 0x80000000) >> (32-3)) | // D3DCS_TOP
  148. ; ((ASINT32(zz) & 0x80000000) >> (32-6)); // D3DCS_BACK
  149. mov eax,dword ptr [ebp] ; Get x
  150. mov ebx,dword ptr [ebp+4] ; Get y
  151. and eax,esi ;
  152. and ebx,esi ;
  153. shr eax,32-1 ; D3DCS_LEFT
  154. mov ecx,dword ptr [ebp+8] ; Get z
  155. shr ebx,32-4 ; D3DCS_BOTTOM
  156. mov edx,tempxx ;
  157. or eax,ebx ; OR together clip flags
  158. and ecx,esi ;
  159. shr ecx,32-5 ; D3DCS_FRONT
  160. and edx,esi ;
  161. shr edx,32-2 ; D3DCS_RIGHT
  162. mov ebx,tempyy ;
  163. or eax,ecx ;
  164. and ebx,esi ;
  165. shr ebx,32-3 ; D3DCS_TOP
  166. or eax,edx ;
  167. mov edx,tempzz
  168. or eax,ebx ;
  169. and edx,esi ;
  170. shr edx,32-6 ; D3DCS_BACK
  171. mov esi,hout ; Propagate diffuse, specular, tu, tv
  172. or eax,edx ; Finish clip flag generation
  173. mov ebx,pmat ;
  174. mov word ptr [esi],ax ; Output clip flags
  175. mov esi,pin ;
  176. test eax,eax ; Bail if clip!=0
  177. jnz ClipNonzero ;
  178. push eax ; Save clip flags
  179. ; ax gets trashed by fstsw in min/max calcs
  180. mov ecx,[esi+D3DTLVERTEX_color]
  181. mov edx,[esi+D3DTLVERTEX_specular]
  182. mov [ebp+D3DTLVERTEX_color],ecx
  183. mov [ebp+D3DTLVERTEX_specular],edx
  184. mov ecx,[esi+D3DTLVERTEX_tu]
  185. mov edx,[esi+D3DTLVERTEX_tv]
  186. mov [ebp+D3DTLVERTEX_tu],ecx
  187. mov [ebp+D3DTLVERTEX_tv],edx
  188. fxch st(1) ; we w
  189. fstp st ;
  190. ; w
  191. fld dword ptr [ebp] ; x w
  192. fmul dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_scaleX]
  193. fld dword ptr [ebp+4] ; y x*scaleX w
  194. fmul dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_scaleY]
  195. fxch st(1) ; x*scaleX y*scaleY w
  196. fmul st,st(2) ; x*w*scaleX y*scaleY w
  197. fxch st(1) ; y*scaleY x*w*scaleX w
  198. fmul st,st(2) ; y*w*scaleY x*w*scaleX w
  199. fxch st(1) ; x*w*scaleX y*w*scaleY w
  200. fadd dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_offsetX]
  201. fxch st(1) ; y x w
  202. fadd dword ptr [ebx+D3DFE_PROCESSVERTICES_vcache+D3DFE_VIEWPORTCACHE_offsetY]
  203. fld dword ptr [ebp+8] ; z y x w
  204. fmul st,st(3) ; z y x w
  205. fxch st(2) ; x y z w
  206. test dword ptr [ebx+D3DFE_PROCESSVERTICES_dwFlags], D3DDP_DONOTUPDATEEXTENTS
  207. jnz NoExtents
  208. ;; update extents rect in PV structure
  209. ; x y z w
  210. fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+0]
  211. fstsw ax
  212. sahf
  213. ja @f
  214. fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+0]
  215. @@: fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+8]
  216. fstsw ax
  217. sahf
  218. jb @f
  219. fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+8]
  220. @@: fxch st(1)
  221. fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+4]
  222. fstsw ax
  223. sahf
  224. ja @f
  225. fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+4]
  226. @@: fcom dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+12]
  227. fstsw ax
  228. sahf
  229. jb @f
  230. fst dword ptr [ebx+D3DFE_PROCESSVERTICES_rExtents+12]
  231. @@: fxch st(1)
  232. NoExtents:
  233. fstp dword ptr [ebp+D3DTLVERTEX_sx]
  234. fstp dword ptr [ebp+D3DTLVERTEX_sy]
  235. fstp dword ptr [ebp+D3DTLVERTEX_sz]
  236. fstp dword ptr [ebp+D3DTLVERTEX_rhw]
  237. pop eax ; Get clip flags back
  238. Return:
  239. pop ebp ; Restore registers
  240. pop edi ;
  241. pop esi ;
  242. pop ebx ;
  243. add esp,24 ; Locals
  244. ret ; Return
  245. ClipNonZero:
  246. fstp st ; Get rid of 1/w
  247. fstp dword ptr [ebp+D3DTLVERTEX_rhw] ; store we
  248. jmp short Return
  249. _matmul5 ENDP
  250. end