Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

738 lines
14 KiB

  1. ; $Id: ftrans.asm,v 1.3 1995/10/20 15:14:41 james Exp $
  2. ;
  3. ; Up to 165K from 143K
  4. ;
  5. ; Copyright (c) RenderMorphics Ltd. 1993, 1994, 1995
  6. ; Version 1.0
  7. ;
  8. ; All rights reserved.
  9. ;
  10. ; This file contains private, unpublished information and may not be
  11. ; copied in part or in whole without express permission of
  12. ; RenderMorphics Ltd.
  13. ;
  14. ; NOTE: Need to set integer pop precision...
  15. ;
  16. OPTION NOM510
  17. .386p
  18. ;.radix 16
  19. NAME transform
  20. include macros.asm
  21. include offsets.asm
  22. procstart macro prefix, xfrm_class
  23. ifdef STACK_CALL
  24. ifdef NT
  25. _&prefix&xfrm_class proc
  26. else
  27. &prefix&xfrm_class proc
  28. endif
  29. else
  30. &prefix&xfrm_class&_ proc
  31. endif
  32. endm
  33. procend macro prefix, xfrm_class
  34. ifdef STACK_CALL
  35. ifdef NT
  36. _&prefix&xfrm_class endp
  37. else
  38. &prefix&xfrm_class endp
  39. endif
  40. else
  41. &prefix&xfrm_class&_ endp
  42. endif
  43. endm
  44. if GEN_XFRM eq 1
  45. xfrmName equ General
  46. else
  47. xfrmName equ Affine
  48. endif
  49. ;ifndef WINNT
  50. ;DGROUP GROUP _DATA
  51. ;endif
  52. _DATA SEGMENT PARA PUBLIC USE32 'DATA'
  53. ; These two are in the same cache line
  54. tx dq 0
  55. ty dq 0
  56. _DATA ENDS
  57. _TEXT SEGMENT DWORD PUBLIC USE32 'CODE'
  58. ;ifdef WINNT
  59. ASSUME CS:_TEXT ,DS:_DATA,SS:_DATA
  60. ;else
  61. ; ASSUME CS:_TEXT ,DS:DGROUP,SS:DGROUP
  62. ;endif
  63. FDROP macro
  64. fstp st(0)
  65. endm
  66. fmat macro op,row,col
  67. op dword ptr [ebp + 4 * ((4 * row) + col)]
  68. endm
  69. column macro i,depth
  70. fld dword ptr [esi + D3DVERTEX_x] ; [1] x
  71. fmat fmul,0,i ; [2] x
  72. fld dword ptr [esi + D3DVERTEX_y] ; [3] y x
  73. fmat fmul,1,i ; [4] y x
  74. fld dword ptr [esi + D3DVERTEX_z] ; [5] z y x
  75. fmat fmul,2,i ; [6] z y x
  76. fxch st(2) ; x y z
  77. fmat fadd,3,i ; [7] x y z
  78. endm
  79. procstart RLDDITransformUnclippedLoop,%xfrmName
  80. ;
  81. ; Set up equates for arguments and automatic storage
  82. ;
  83. beginargs
  84. saveregs <ebp,esi,edi,ebx,ecx>
  85. regargs <x_offset, y_offset, count, vout>
  86. defargs <vin, m, in_size, out_size, z_scale, z_offset>
  87. defargs <minx, maxx, miny, maxy>
  88. endargs
  89. ; Ensure arguments are accessible from the stack, to free the registers
  90. ifndef STACK_CALL
  91. mov [esp + x_offset], eax
  92. mov [esp + y_offset], edx
  93. mov [esp + count], ebx
  94. mov [esp + vout], ecx
  95. endif
  96. mov ebx,[esp + count]
  97. test ebx,ebx
  98. je alldone
  99. ;{ Pick up old extents
  100. mov esi,[esp + minx]
  101. mov edi,[esp + miny]
  102. fld dword ptr [esi]
  103. fadd [_g_dSnap + (16 * 8)]
  104. fld dword ptr [edi]
  105. fadd [_g_dSnap + (16 * 8)]
  106. mov esi,[esp + maxx]
  107. mov edi,[esp + maxy]
  108. fld dword ptr [esi]
  109. fadd [_g_dSnap + (16 * 8)]
  110. fld dword ptr [edi] ; maxy maxx miny minx
  111. fadd [_g_dSnap + (16 * 8)]
  112. fxch st(3) ; minx maxx miny maxy
  113. fstp qword ptr [tx]
  114. fstp qword ptr [ty]
  115. mov eax,dword ptr [tx]
  116. mov ebx,dword ptr [ty]
  117. fstp qword ptr [tx]
  118. fstp qword ptr [ty]
  119. mov ecx,dword ptr [tx]
  120. mov edx,dword ptr [ty]
  121. ;}
  122. mov esi,[esp + vin]
  123. mov edi,[esp + vout]
  124. mov ebp,[esp + m]
  125. ; eax ebx ecx edx
  126. ; minx maxx miny maxy
  127. ; Need to do first loop iteration
  128. column 0,0
  129. fadd ; x'+y' z' x y z
  130. fadd ; x'+y'+z' x y z
  131. column 1,1 ; 2 cycle wait here
  132. fadd ; x'+y' z' tx x y z
  133. jmp smaxy
  134. transloop:
  135. column 0,0 ; 2 cycle wait here
  136. cmp eax,dword ptr [tx]
  137. jg setminx
  138. sminx: fadd ; x'+y' z' x y z
  139. cmp ebx,dword ptr [tx]
  140. jl setmaxx
  141. smaxx: fadd ; tx
  142. column 1,1 ; x' y' z' x
  143. cmp ecx,dword ptr [ty]
  144. jg setminy
  145. sminy: fadd ; x'+y' z' tx
  146. cmp edx,dword ptr [ty]
  147. jl setmaxy
  148. smaxy: fadd ; ty tx
  149. column 2,2
  150. push eax
  151. push ebx
  152. mov eax,[esi + D3DLVERTEX_color]
  153. mov ebx,[esi + D3DLVERTEX_specular]
  154. fadd
  155. mov [edi + D3DTLVERTEX_color],eax
  156. mov [edi + D3DTLVERTEX_specular],ebx
  157. mov eax,[esi + D3DVERTEX_tu]
  158. mov ebx,[esi + D3DVERTEX_tv]
  159. fadd ; tz ty tx
  160. mov [edi + D3DTLVERTEX_tu],eax
  161. mov [edi + D3DTLVERTEX_tv],ebx
  162. if GEN_XFRM
  163. column 3,3
  164. fadd
  165. fadd ; tw tz ty tx
  166. endif
  167. fld [_g_fOne]
  168. fdiv st,st(1) ; 1/tw (tw) tz ty tx
  169. if GEN_XFRM
  170. fxch st(1)
  171. FDROP
  172. endif
  173. ; Do the cache read here, plus anything else?
  174. cmp dword ptr [esp + count + 8],1
  175. je dontscan
  176. mov eax,[esi + 32]
  177. mov ebx,[edi + 32]
  178. dontscan:
  179. pop ebx
  180. pop eax
  181. fxch st(3) ; tx tz ty 1/tz
  182. fmul st,st(3) ; tx/w tz ty 1/tz
  183. fxch st(2) ; ty tz tx/w 1/tz
  184. fmul st,st(3) ; ty/w tz tx/w 1/tz
  185. fxch st(2) ; tx/w tz ty/w 1/tz
  186. fadd dword ptr [esp + x_offset]
  187. fxch st(1) ; tz sx ty/w 1/tz
  188. if GEN_XFRM eq 0
  189. fmul dword ptr [esp + z_scale]
  190. endif
  191. fxch st(2) ; ty/w sx tz 1/tz
  192. fsubr dword ptr [esp + y_offset]
  193. fxch st(2) ; tz sx sy 1/tz
  194. if GEN_XFRM eq 0
  195. fadd dword ptr [esp + z_offset]
  196. endif
  197. fxch st(2) ; sy sx sz 1/tz
  198. fst dword ptr [edi + D3DTLVERTEX_sy]
  199. fadd [_g_dSnap + (16 * 8)]
  200. fxch st(1)
  201. fst dword ptr [edi + D3DTLVERTEX_sx]
  202. fadd [_g_dSnap + (16 * 8)]
  203. fxch st(1)
  204. fstp qword ptr [ty]
  205. fstp qword ptr [tx] ; sz 1/tz
  206. fmul st,st(1)
  207. fxch st(1)
  208. fstp dword ptr [edi + D3DTLVERTEX_rhw]
  209. fstp dword ptr [edi + D3DTLVERTEX_sz]
  210. add edi,32
  211. add esi,32
  212. dec dword ptr [esp + count]
  213. jnz transloop
  214. cmp eax,dword ptr [tx]
  215. jl e1
  216. mov eax,dword ptr [tx]
  217. e1: cmp ebx,dword ptr [tx]
  218. jg e2
  219. mov ebx,dword ptr [tx]
  220. e2: cmp ecx,dword ptr [ty]
  221. jl e3
  222. mov ecx,dword ptr [ty]
  223. e3: cmp edx,dword ptr [ty]
  224. jg e4
  225. mov edx,dword ptr [ty]
  226. e4:
  227. mov dword ptr [tx],eax
  228. mov dword ptr [ty],ecx
  229. mov edi,[esp + minx]
  230. mov esi,[esp + miny]
  231. fld [tx]
  232. fld [ty] ; c a
  233. fsub [_g_dSnap + (16 * 8)]
  234. fxch st(1) ; a c
  235. fsub [_g_dSnap + (16 * 8)]
  236. fxch st(1) ; c a
  237. ; XXX fp slot
  238. fstp dword ptr [esi] ; a
  239. fstp dword ptr [edi] ;
  240. mov dword ptr [tx],ebx
  241. mov dword ptr [ty],edx
  242. mov edi,[esp + maxx]
  243. mov esi,[esp + maxy]
  244. fld [tx]
  245. fld [ty] ; c a
  246. fsub [_g_dSnap + (16 * 8)]
  247. fxch st(1) ; a c
  248. fsub [_g_dSnap + (16 * 8)]
  249. fxch st(1) ; c a
  250. ; XXX fp slot
  251. fstp dword ptr [esi] ; a
  252. fstp dword ptr [edi] ;
  253. alldone:
  254. add esp, vars
  255. pop ecx
  256. pop ebx
  257. pop edi
  258. pop esi
  259. pop ebp
  260. return
  261. setminx:
  262. mov eax,dword ptr [tx]
  263. jmp sminx
  264. setmaxx:
  265. mov ebx,dword ptr [tx]
  266. jmp smaxx
  267. setminy:
  268. mov ecx,dword ptr [ty]
  269. jmp sminy
  270. setmaxy:
  271. mov edx,dword ptr [ty]
  272. jmp smaxy
  273. procend RLDDITransformUnclippedLoop,%xfrmName
  274. cpick macro dst,c0,c1 ; NO CARRY CARRY
  275. sbb dst,dst ; 0 ~0
  276. and dst,c0 xor c1 ; 0 c0^c1
  277. xor dst,c0 ; c0 c1
  278. endm
  279. ;************************************************************************
  280. procstart RLDDITransformClippedLoop,%xfrmName
  281. ;
  282. ; Set up equates for arguments and automatic storage
  283. ;
  284. beginargs
  285. saveregs <ebp,esi,edi,ebx,ecx>
  286. regargs <x_offset, y_offset, count, vout>
  287. defargs <vin, hout, m, in_size, out_size, z_scale, z_offset>
  288. defargs <x_bound, y_bound, r_scale_x, r_scale_y>
  289. defargs <minx, maxx, miny, maxy>
  290. defargs <clip_intersection, clip_union>
  291. endargs
  292. ; Ensure arguments are accessible from the stack, to free the registers
  293. ifndef STACK_CALL
  294. mov [esp + x_offset], eax
  295. mov [esp + y_offset], edx
  296. mov [esp + count], ebx
  297. mov [esp + vout], ecx
  298. else
  299. mov ebx,[esp + count]
  300. mov ecx,[esp + vout]
  301. endif
  302. fldpi
  303. test ebx,ebx
  304. je alldone
  305. ;{ Pick up old extents
  306. mov esi,[esp + minx]
  307. mov edi,[esp + miny]
  308. fld dword ptr [esi]
  309. fadd [_g_dSnap + (16 * 8)]
  310. fld dword ptr [edi]
  311. fadd [_g_dSnap + (16 * 8)]
  312. mov esi,[esp + maxx]
  313. mov edi,[esp + maxy]
  314. fld dword ptr [esi]
  315. fadd [_g_dSnap + (16 * 8)]
  316. fld dword ptr [edi] ; maxy maxx miny minx
  317. fadd [_g_dSnap + (16 * 8)]
  318. fxch st(3) ; minx maxx miny maxy
  319. fstp qword ptr [tx]
  320. fstp qword ptr [ty]
  321. mov eax,dword ptr [tx]
  322. mov ebx,dword ptr [ty]
  323. fstp qword ptr [tx]
  324. fstp qword ptr [ty]
  325. mov ecx,dword ptr [tx]
  326. mov edx,dword ptr [ty]
  327. ;}
  328. mov esi,[esp + vin]
  329. mov ebp,[esp + m]
  330. mov edi,[esp + vout]
  331. ; Need to do first loop iteration
  332. column 0,0 ; 2 cycle wait here
  333. fadd ; x'+y' z' x y z
  334. fadd ; x'+y'+z' x y z
  335. column 1,1 ; 2 cycle wait here
  336. fadd ; x'+y' z' tx x y z
  337. jmp smaxy
  338. transloop:
  339. column 0,0 ; 2 cycle wait here
  340. cmp eax,dword ptr [tx]
  341. jg setminx
  342. sminx: fadd ; x'+y' z'
  343. cmp ebx,dword ptr [tx]
  344. jl setmaxx
  345. smaxx: fadd ; x'+y'+z'
  346. column 1,1 ; 2 cycle wait here
  347. cmp ecx,dword ptr [ty]
  348. jg setminy
  349. sminy: fadd ; x'+y' z'
  350. cmp edx,dword ptr [ty]
  351. jl setmaxy
  352. smaxy: fadd ; ty tx
  353. column 2,2
  354. push eax
  355. push ebx
  356. mov eax,[esi + D3DLVERTEX_color]
  357. mov ebx,[esi + D3DLVERTEX_specular]
  358. fadd
  359. mov [edi + D3DTLVERTEX_color],eax
  360. mov [edi + D3DTLVERTEX_specular],ebx
  361. mov eax,[esi + D3DVERTEX_tu]
  362. mov ebx,[esi + D3DVERTEX_tv]
  363. fadd ; tz ty tx
  364. mov [edi + D3DTLVERTEX_tu],eax
  365. mov [edi + D3DTLVERTEX_tv],ebx
  366. if GEN_XFRM
  367. column 3,3
  368. fadd
  369. fadd ; tw tz ty tx
  370. endif
  371. ; Now set up the clip flags in ebp
  372. xor ebp,ebp
  373. fld dword ptr [esp + 8 + x_bound]
  374. fmul st,st(1) ; tmp (tw) tz ty tx
  375. fcom st(3+GEN_XFRM)
  376. fnstsw ax
  377. sahf
  378. ja x1 ; Skip this if (tmp > tx)
  379. or ebp,D3DCS_RIGHT
  380. x1: fchs ; -tmp (tw) tz ty tx
  381. fcomp st(3+GEN_XFRM) ; (tw) tz ty tx
  382. fnstsw ax
  383. sahf
  384. jbe xpasses ; Skip this if (-tmp <= tx)
  385. or ebp,D3DCS_LEFT
  386. xpasses:
  387. fld dword ptr [esp + 8 + y_bound]
  388. fmul st,st(1) ; tmp (tw) tz ty tx
  389. fcom st(2+GEN_XFRM)
  390. fnstsw ax
  391. sahf
  392. ja y1 ; Skip this if (tmp > ty)
  393. or ebp,D3DCS_TOP
  394. y1: fchs ; -tmp (tw) tz ty tx
  395. fcomp st(2+GEN_XFRM)
  396. fnstsw ax
  397. sahf
  398. jbe ypasses ; Skip this if (-tmp <= ty)
  399. or ebp,D3DCS_BOTTOM
  400. ypasses:
  401. fst [ty] ; ty will hold tw for a while...
  402. ; if GEN_XFRM eq 0 then tw == tz
  403. ; if GEN_XFRM eq 1 then tw != tz in general, so we
  404. ; calculate a true 1/tw and then drop the extra tw
  405. ; off the FP stack
  406. fld [_g_fOne]
  407. fdiv st,st(1) ; 1/tw (tw) tz ty tx
  408. if GEN_XFRM
  409. fxch st(1)
  410. FDROP
  411. endif
  412. ; Do the cache read here, plus anything else?
  413. cmp dword ptr [esp + count],1
  414. je dontscan
  415. mov eax,[esp + in_size + 8]
  416. mov ebx,[esp + out_size + 8]
  417. mov eax,[esi + eax]
  418. mov ebx,[edi + ebx]
  419. dontscan:
  420. pop ebx
  421. pop eax
  422. mov [esp + vout],edi
  423. mov edi,[esp + hout]
  424. ; 1/tw tz ty tx
  425. fxch st(3) ; tx tz ty 1/tw
  426. fld dword ptr [esp + r_scale_x]
  427. fmul st,st(1) ; hx tx tz ty 1/tw
  428. fld dword ptr [esp + r_scale_y]
  429. fmul st,st(4) ; hy hx tx tz ty 1/tw
  430. fxch st(1) ; hx hy tx tz ty 1/tw
  431. fstp dword ptr [edi + D3DHVERTEX_hx]
  432. fstp dword ptr [edi + D3DHVERTEX_hy]
  433. fmul st,st(3) ; tx/w tz ty 1/tw
  434. fxch st(2) ; ty tz tx/w 1/tw
  435. fmul st,st(3) ; ty/w tz tx/w 1/tw
  436. fxch st(2) ; tx/w tz ty/w 1/tw
  437. fadd dword ptr [esp + x_offset]
  438. fxch st(1) ; tz sx ty/w 1/tw
  439. if GEN_XFRM eq 0
  440. fmul dword ptr [esp + z_scale]
  441. endif
  442. fxch st(2) ; ty/w sx tz 1/tw
  443. fsubr dword ptr [esp + y_offset]
  444. fxch st(2) ; tz sx sy 1/tw
  445. if GEN_XFRM eq 0
  446. fadd dword ptr [esp + z_offset]
  447. endif
  448. ; Last clipping flags
  449. fst dword ptr [edi + D3DHVERTEX_hz]
  450. test byte ptr [edi + D3DHVERTEX_hz + 3],80h
  451. jz nofront
  452. or ebp,D3DCS_FRONT
  453. nofront:
  454. fcom [ty]
  455. push eax ;[
  456. fnstsw ax
  457. sahf
  458. pop eax ;]
  459. jb noback
  460. or ebp,D3DCS_BACK
  461. noback:
  462. mov [edi + D3DHVERTEX_dwFlags],ebp
  463. mov edi,[esp + clip_intersection]
  464. and [edi],ebp
  465. mov edi,[esp + clip_union]
  466. or [edi],ebp
  467. test ebp,ebp
  468. jnz outofplay
  469. mov edi,[esp + vout]
  470. fxch st(2) ; sy sx sz 1/tz
  471. fst dword ptr [edi + D3DTLVERTEX_sy]
  472. fadd [_g_dSnap + (16 * 8)]
  473. fxch st(1)
  474. fst dword ptr [edi + D3DTLVERTEX_sx]
  475. fadd [_g_dSnap + (16 * 8)]
  476. fxch st(1)
  477. fstp qword ptr [ty]
  478. fstp qword ptr [tx] ; sz 1/tz y
  479. fmul st,st(1)
  480. fstp dword ptr [edi + D3DTLVERTEX_sz]
  481. ; 1/tz y
  482. fstp dword ptr [edi + D3DTLVERTEX_rhw]
  483. ; y
  484. add esi,[esp + in_size]
  485. add edi,[esp + out_size]
  486. mov ebp,[esp + hout]
  487. add ebp,D3DHVERTEX_size
  488. mov [esp + hout],ebp
  489. mov ebp,[esp + m]
  490. dec dword ptr [esp + count]
  491. jnz transloop
  492. jmp cleanup
  493. outofplay: ; tz sx sy 1/tz
  494. FDROP ; sx sy 1/tz
  495. FDROP ; sy 1/tz y
  496. FDROP ; 1/tz y
  497. mov edi,[esp + vout]
  498. fstp dword ptr [edi + D3DTLVERTEX_rhw]
  499. add esi,[esp + in_size]
  500. add edi,[esp + out_size]
  501. mov ebp,[esp + hout]
  502. add ebp,D3DHVERTEX_size
  503. mov [esp + hout],ebp
  504. mov ebp,[esp + m]
  505. dec dword ptr [esp + count]
  506. jz calcminmax
  507. column 0,0 ; 2 cycle wait here
  508. fadd ; x'+y' z' x y z
  509. fadd ; x'+y'+z' x y z
  510. column 1,1 ; 2 cycle wait here
  511. fadd ; x'+y' z' x y z
  512. jmp smaxy
  513. cleanup:
  514. cmp eax,dword ptr [tx]
  515. jl e1
  516. mov eax,dword ptr [tx]
  517. e1: cmp ebx,dword ptr [tx]
  518. jg e2
  519. mov ebx,dword ptr [tx]
  520. e2: cmp ecx,dword ptr [ty]
  521. jl e3
  522. mov ecx,dword ptr [ty]
  523. e3: cmp edx,dword ptr [ty]
  524. jg e4
  525. mov edx,dword ptr [ty]
  526. e4:
  527. calcminmax:
  528. mov dword ptr [tx],eax
  529. mov dword ptr [ty],ecx
  530. fild dword ptr [tx]
  531. fmul dword ptr [_g_fOoTwoPow16]
  532. fild dword ptr [ty]
  533. fmul dword ptr [_g_fOoTwoPow16]
  534. fxch st(1) ; x y
  535. mov edi,[esp + minx]
  536. mov esi,[esp + miny]
  537. fstp dword ptr [edi]
  538. fstp dword ptr [esi]
  539. mov dword ptr [tx],ebx
  540. mov dword ptr [ty],edx
  541. fild dword ptr [tx]
  542. fmul dword ptr [_g_fOoTwoPow16]
  543. fild dword ptr [ty]
  544. fmul dword ptr [_g_fOoTwoPow16]
  545. fxch st(1) ; x y
  546. mov edi,[esp + maxx]
  547. mov esi,[esp + maxy]
  548. fstp dword ptr [edi]
  549. fstp dword ptr [esi]
  550. alldone:
  551. add esp, vars
  552. pop ecx
  553. pop ebx
  554. pop edi
  555. pop esi
  556. pop ebp
  557. FDROP
  558. return
  559. setminx:
  560. mov eax,dword ptr [tx]
  561. jmp sminx
  562. setmaxx:
  563. mov ebx,dword ptr [tx]
  564. jmp smaxx
  565. setminy:
  566. mov ecx,dword ptr [ty]
  567. jmp sminy
  568. setmaxy:
  569. mov edx,dword ptr [ty]
  570. jmp smaxy
  571. procend RLDDITransformClippedLoop,%xfrmName
  572. if GEN_XFRM
  573. _Rdtsc proc
  574. db 0fh,31h
  575. shrd eax,edx,10
  576. ret
  577. _Rdtsc endp
  578. endif
  579. _TEXT ENDS
  580. extrn _g_fOne:dword
  581. extrn _g_fOoTwoPow16:dword
  582. extrn _g_dSnap:qword
  583. END