Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

896 lines
28 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: rxtri.c
  3. *
  4. * Contains the low-level (rasterization) triangle-rendering routines for the
  5. * Millenium MCD driver.
  6. *
  7. * Copyright (c) 1995 Microsoft Corporation
  8. \**************************************************************************/
  9. #include "precomp.h"
  10. #include "mcdhw.h"
  11. #include "mcdutil.h"
  12. #include "mcdmath.h"
  13. //#undef CHECK_FIFO_FREE
  14. //#define CHECK_FIFO_FREE
  15. static MCDFLOAT fixScale = __MCDFIXSCALE;
  16. VOID FASTCALL __MCDCalcDeltaRGBZ(DEVRC *pRc, MCDVERTEX *a, MCDVERTEX *b,
  17. MCDVERTEX *c)
  18. {
  19. MCDFLOAT oneOverArea, t1, t2, t3, t4;
  20. LARGE_INTEGER temp;
  21. /*
  22. ** t1-4 are delta values for unit changes in x or y for each
  23. ** parameter.
  24. */
  25. #if !(_X86_ && ASM_ACCEL)
  26. if (pRc->privateEnables & (__MCDENABLE_SMOOTH | __MCDENABLE_Z)) {
  27. __MCD_FLOAT_BEGIN_DIVIDE(__MCDONE, pRc->halfArea, &oneOverArea);
  28. }
  29. #endif
  30. if (pRc->privateEnables & __MCDENABLE_SMOOTH) {
  31. MCDFLOAT drAC, dgAC, dbAC, daAC;
  32. MCDFLOAT drBC, dgBC, dbBC, daBC;
  33. MCDCOLOR *ac, *bc, *cc;
  34. #if _X86_ && ASM_ACCEL
  35. __asm{
  36. mov edx, pRc
  37. fstp oneOverArea // finish divide
  38. fld DWORD PTR [OFFSET(DEVRC.dyAC)][edx]
  39. fmul oneOverArea
  40. fld DWORD PTR [OFFSET(DEVRC.dyBC)][edx]
  41. fmul oneOverArea // dyBC dyAC
  42. fld DWORD PTR [OFFSET(DEVRC.dxAC)][edx]
  43. fmul oneOverArea // dxAC dyBC dyAC
  44. fxch ST(1) // dyBC dxAC dyAC
  45. fld DWORD PTR [OFFSET(DEVRC.dxBC)][edx]
  46. fmul oneOverArea // dxBC dyBC dxAC dyAC
  47. fxch ST(3) // dyAC dyBC dxAC dxBC
  48. fstp t1
  49. fstp t2
  50. fstp t3
  51. fstp t4
  52. // Now, calculate deltas:
  53. mov eax, a
  54. mov ecx, c
  55. mov ebx, b
  56. lea eax, [OFFSET(MCDVERTEX.colors) + eax]
  57. lea ecx, [OFFSET(MCDVERTEX.colors) + ecx]
  58. lea ebx, [OFFSET(MCDVERTEX.colors) + ebx]
  59. fld DWORD PTR [OFFSET(MCDCOLOR.r)][ecx]
  60. fsub DWORD PTR [OFFSET(MCDCOLOR.r)][eax]
  61. fld DWORD PTR [OFFSET(MCDCOLOR.r)][ecx]
  62. fsub DWORD PTR [OFFSET(MCDCOLOR.r)][ebx]
  63. // drBC drAC
  64. fld ST(1) // drAC drBC drAC
  65. fmul t2 // drACt2 drBC drAC
  66. fld ST(1) // drBC drACt2 drBC drAC
  67. fmul t1 // drBCt1 drACt2 drBC drAC
  68. fxch ST(2) // drBC drACt2 drBCt1 drAC
  69. fmul t3 // drBCt3 drACt2 drBCt1 drAC
  70. fxch ST(3) // drAC drACt2 drBCt1 drBCt3
  71. fmul t4 // drACt4 drACt2 drBCt1 drBCt3
  72. fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3
  73. fsubp ST(1), ST // drACBC drACt4 drBCt3
  74. fld DWORD PTR [OFFSET(MCDCOLOR.g)][ecx]
  75. fsub DWORD PTR [OFFSET(MCDCOLOR.g)][ebx]
  76. // dgBC drACBC drACt4 drBCt3
  77. fxch ST(2) // drACt4 drACBC dgBC drBCt3
  78. fsubp ST(3), ST // drACBC dgBC drBCAC
  79. fmul DWORD PTR [OFFSET(DEVRC.rScale)][edx]
  80. // DRACBC dgBC drBCAC
  81. fxch ST(2) // drBCAC dgBC DRACBC
  82. fmul DWORD PTR [OFFSET(DEVRC.rScale)][edx]
  83. // DRBCAC dgBC DRACBC
  84. fld DWORD PTR [OFFSET(MCDCOLOR.g)][ecx]
  85. fsub DWORD PTR [OFFSET(MCDCOLOR.g)][eax]
  86. // dgAC DRBCAC dgBC DRACBC
  87. fxch ST(3)
  88. // DRACBC DRBCAC dgBC dgAC
  89. fst DWORD PTR [OFFSET(DEVRC.drdx)][edx]
  90. fistp DWORD PTR [OFFSET(DEVRC.fxdrdx)][edx]
  91. fst DWORD PTR [OFFSET(DEVRC.drdy)][edx]
  92. fistp DWORD PTR [OFFSET(DEVRC.fxdrdy)][edx]
  93. // dgBC dgAC
  94. fld ST(1) // dgAC dgBC dgAC
  95. fmul t2 // dgACt2 dgBC dgAC
  96. fld ST(1) // dgBC dgACt2 dgBC dgAC
  97. fmul t1 // dgBCt1 dgACt2 dgBC dgAC
  98. fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC
  99. fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC
  100. fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3
  101. fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3
  102. fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3
  103. fsubp ST(1), ST // dgACBC dgACt4 dgBCt3
  104. fld DWORD PTR [OFFSET(MCDCOLOR.b)][ecx]
  105. fsub DWORD PTR [OFFSET(MCDCOLOR.b)][ebx]
  106. // dbBC dgACBC dgACt4 dgBCt3
  107. fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3
  108. fsubp ST(3), ST // dgACBC dbBC dgBCAC
  109. fmul DWORD PTR [OFFSET(DEVRC.gScale)][edx]
  110. // DGACBC dbBC dgBCAC
  111. fxch ST(2) // dgBCAC dbBC DGACBC
  112. fmul DWORD PTR [OFFSET(DEVRC.gScale)][edx]
  113. // DGBCAC dbBC DGACBC
  114. fld DWORD PTR [OFFSET(MCDCOLOR.b)][ecx]
  115. fsub DWORD PTR [OFFSET(MCDCOLOR.b)][eax]
  116. // dbAC DGBCAC dbBC DGACBC
  117. fxch ST(3)
  118. // DGACBC DGBCAC dbBC dbAC
  119. fst DWORD PTR [OFFSET(DEVRC.dgdx)][edx]
  120. fistp DWORD PTR [OFFSET(DEVRC.fxdgdx)][edx]
  121. fst DWORD PTR [OFFSET(DEVRC.dgdy)][edx]
  122. fistp DWORD PTR [OFFSET(DEVRC.fxdgdy)][edx]
  123. // dbBC dbAC
  124. fld ST(1) // dbAC dbBC dbAC
  125. fmul t2 // dbACt2 dbBC dbAC
  126. fld ST(1) // dbBC dbACt2 dbBC dbAC
  127. fmul t1 // dbBCt1 dbACt2 dbBC dbAC
  128. fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC
  129. fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC
  130. fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3
  131. fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3
  132. fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3
  133. fsubp ST(1), ST // dbACBC dbACt4 dbBCt3
  134. fxch ST(1) // dbACt4 dbACBC dbBCt3
  135. fsubp ST(2), ST // dbACBC dbBCAC (+1)
  136. fmul DWORD PTR [OFFSET(DEVRC.bScale)][edx]
  137. // DBACBC dbBCAC
  138. fxch ST(1) // dbBCAC DBACBC
  139. fmul DWORD PTR [OFFSET(DEVRC.bScale)][edx]
  140. // DBBCAC DBACBC
  141. fxch ST(1) // DBACBC DBBCAC
  142. fst DWORD PTR [OFFSET(DEVRC.dbdx)][edx] //(+1)
  143. fistp DWORD PTR [OFFSET(DEVRC.fxdbdx)][edx]
  144. fst DWORD PTR [OFFSET(DEVRC.dbdy)][edx]
  145. fistp DWORD PTR [OFFSET(DEVRC.fxdbdy)][edx]
  146. }
  147. #else
  148. ac = &a->colors[0];
  149. bc = &b->colors[0];
  150. cc = &c->colors[0];
  151. drAC = cc->r - ac->r;
  152. drBC = cc->r - bc->r;
  153. dgAC = cc->g - ac->g;
  154. dgBC = cc->g - bc->g;
  155. dbAC = cc->b - ac->b;
  156. dbBC = cc->b - bc->b;
  157. __MCD_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  158. t1 = pRc->dyAC * oneOverArea;
  159. t2 = pRc->dyBC * oneOverArea;
  160. t3 = pRc->dxAC * oneOverArea;
  161. t4 = pRc->dxBC * oneOverArea;
  162. pRc->drdx = (drAC * t2 - drBC * t1) * pRc->rScale;
  163. pRc->drdy = (drBC * t3 - drAC * t4) * pRc->rScale;
  164. pRc->dgdx = (dgAC * t2 - dgBC * t1) * pRc->gScale;
  165. pRc->dgdy = (dgBC * t3 - dgAC * t4) * pRc->gScale;
  166. pRc->dbdx = (dbAC * t2 - dbBC * t1) * pRc->bScale;
  167. pRc->dbdy = (dbBC * t3 - dbAC * t4) * pRc->bScale;
  168. pRc->fxdrdx = FTOL(pRc->drdx);
  169. pRc->fxdrdy = FTOL(pRc->drdy);
  170. pRc->fxdgdx = FTOL(pRc->dgdx);
  171. pRc->fxdgdy = FTOL(pRc->dgdy);
  172. pRc->fxdbdx = FTOL(pRc->dbdx);
  173. pRc->fxdbdy = FTOL(pRc->dbdy);
  174. #endif
  175. } else {
  176. // In this case, we're not smooth shading, but we still need
  177. // to set up the color registers:
  178. BYTE *pjBase;
  179. #if _X86_ && ASM_ACCEL
  180. LONG rTemp, gTemp, bTemp;
  181. _asm{
  182. mov ebx, pRc
  183. mov eax, [OFFSET(DEVRC.pvProvoking)][ebx] // AGI
  184. lea eax, [OFFSET(MCDVERTEX.colors) + eax]
  185. fld DWORD PTR [OFFSET(DEVRC.rScale)][ebx]
  186. fmul DWORD PTR [OFFSET(MCDCOLOR.r)][eax]
  187. fld DWORD PTR [OFFSET(DEVRC.gScale)][ebx]
  188. fmul DWORD PTR [OFFSET(MCDCOLOR.g)][eax]
  189. fld DWORD PTR [OFFSET(DEVRC.bScale)][ebx] // B G R
  190. fmul DWORD PTR [OFFSET(MCDCOLOR.b)][eax]
  191. fxch ST(2) // R G B
  192. fistp rTemp // G B
  193. fistp gTemp
  194. fistp bTemp
  195. }
  196. pjBase = pRc->ppdev->pjBase;
  197. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 3);
  198. CP_WRITE(pjBase, DWG_DR4, rTemp);
  199. CP_WRITE(pjBase, DWG_DR8, gTemp);
  200. CP_WRITE(pjBase, DWG_DR12, bTemp);
  201. #else
  202. MCDCOLOR *pColor = &pRc->pvProvoking->colors[0];
  203. pjBase = pRc->ppdev->pjBase;
  204. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 3);
  205. CP_WRITE(pjBase, DWG_DR4, FTOL(pColor->r * pRc->rScale));
  206. CP_WRITE(pjBase, DWG_DR8, FTOL(pColor->g * pRc->gScale));
  207. CP_WRITE(pjBase, DWG_DR12, FTOL(pColor->b * pRc->bScale));
  208. #endif
  209. }
  210. if (pRc->privateEnables & __MCDENABLE_Z)
  211. {
  212. MCDFLOAT dzAC, dzBC;
  213. if (!(pRc->privateEnables & __MCDENABLE_SMOOTH))
  214. {
  215. #if _X86_ && ASM_ACCEL
  216. _asm {
  217. mov eax, pRc
  218. fstp oneOverArea // finish divide
  219. fld DWORD PTR [OFFSET(DEVRC.dyAC)][eax]
  220. fmul oneOverArea
  221. fld DWORD PTR [OFFSET(DEVRC.dyBC)][eax]
  222. fmul oneOverArea // dyBC dyAC
  223. fld DWORD PTR [OFFSET(DEVRC.dxAC)][eax]
  224. fmul oneOverArea // dxAC dyBC dyAC
  225. fxch ST(1) // dyBC dxAC dyAC
  226. fld DWORD PTR [OFFSET(DEVRC.dxBC)][eax]
  227. fmul oneOverArea // dxBC dyBC dxAC dyAC
  228. fxch ST(3) // dyAC dyBC dxAC dxBC
  229. fstp t1
  230. fstp t2
  231. fstp t3
  232. fstp t4
  233. }
  234. #else
  235. __MCD_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  236. t1 = pRc->dyAC * oneOverArea;
  237. t2 = pRc->dyBC * oneOverArea;
  238. t3 = pRc->dxAC * oneOverArea;
  239. t4 = pRc->dxBC * oneOverArea;
  240. #endif
  241. }
  242. #if _X86_ && ASM_ACCEL
  243. _asm {
  244. mov ecx, c
  245. mov eax, a
  246. mov ebx, b
  247. mov edx, pRc
  248. fld DWORD PTR [OFFSET(MCDVERTEX.windowCoord.z)][ecx]
  249. fsub DWORD PTR [OFFSET(MCDVERTEX.windowCoord.z)][eax]
  250. fld DWORD PTR [OFFSET(MCDVERTEX.windowCoord.z)][ecx]
  251. fsub DWORD PTR [OFFSET(MCDVERTEX.windowCoord.z)][ebx]
  252. // dzBC dzAC
  253. fld ST(1) // dzAC dzBC dzAC
  254. fmul t2 // ACt2 dzBC dzAC
  255. fld ST(1) // dzBC ACt2 dzBC dzAC
  256. fmul t1 // BCt1 ACt2 dzBC dzAC
  257. fxch ST(3) // dzAC ACt2 dzBC BCt1
  258. fmul t4 // ACt4 ACt2 dzBC BCt1
  259. fxch ST(2) // dzBC ACt2 ACt4 BCt1
  260. fmul t3 // BCt3 ACt2 ACt4 BCt1
  261. fsubrp ST(2),ST // ACt2 BCAC BCt1
  262. fsubrp ST(2),ST // BCAC ACBC
  263. fxch ST(1) // ACBC BCAC
  264. fmul DWORD PTR [OFFSET(DEVRC.zScale)][edx] // dzdx BCAC (1 cycle hit!)
  265. fxch ST(1) // BCAC dzdx
  266. fmul DWORD PTR [OFFSET(DEVRC.zScale)][edx] // dzdy dzdx
  267. fxch ST(1) // dzdx dzdy
  268. fst DWORD PTR [OFFSET(DEVRC.dzdx)][edx] // (1 cycle hit!)
  269. fistp temp
  270. mov ebx, DWORD PTR temp
  271. fst DWORD PTR [OFFSET(DEVRC.dzdy)][edx]
  272. mov [OFFSET(DEVRC.fxdzdx)][edx], ebx
  273. fistp temp
  274. mov ebx, DWORD PTR temp
  275. mov [OFFSET(DEVRC.fxdzdy)][edx], ebx
  276. }
  277. #else
  278. dzAC = c->windowCoord.z - a->windowCoord.z;
  279. dzBC = c->windowCoord.z - b->windowCoord.z;
  280. pRc->dzdx = (dzAC * t2 - dzBC * t1) * pRc->zScale;
  281. pRc->dzdy = (dzBC * t3 - dzAC * t4) * pRc->zScale;
  282. pRc->fxdzdx = FTOL(pRc->dzdx);
  283. pRc->fxdzdy = FTOL(pRc->dzdy);
  284. #endif
  285. }
  286. }
  287. VOID FASTCALL __HWSetupDeltas(DEVRC *pRc)
  288. {
  289. BYTE *pjBase = pRc->ppdev->pjBase;
  290. if (pRc->privateEnables & __MCDENABLE_SMOOTH) {
  291. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 9);
  292. CP_WRITE(pjBase, DWG_DWGCTL, pRc->hwTrapFunc);
  293. CP_WRITE(pjBase, DWG_DR2, pRc->fxdzdx);
  294. CP_WRITE(pjBase, DWG_DR3, pRc->fxdzdy);
  295. CP_WRITE(pjBase, DWG_DR6, pRc->fxdrdx);
  296. CP_WRITE(pjBase, DWG_DR7, pRc->fxdrdy);
  297. CP_WRITE(pjBase, DWG_DR10, pRc->fxdgdx);
  298. CP_WRITE(pjBase, DWG_DR11, pRc->fxdgdy);
  299. CP_WRITE(pjBase, DWG_DR14, pRc->fxdbdx);
  300. CP_WRITE(pjBase, DWG_DR15, pRc->fxdbdy);
  301. } else {
  302. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 3);
  303. CP_WRITE(pjBase, DWG_DWGCTL, pRc->hwTrapFunc);
  304. CP_WRITE(pjBase, DWG_DR2, pRc->fxdzdx);
  305. CP_WRITE(pjBase, DWG_DR3, pRc->fxdzdy);
  306. }
  307. }
  308. #define SNAPCOORD(value, intValue)\
  309. intValue = __MCD_VERTEX_FIXED_TO_INT(__MCD_VERTEX_FLOAT_TO_FIXED(value)+\
  310. __MCD_VERTEX_FRAC_HALF);
  311. void FASTCALL __HWDrawTrap(DEVRC *pRc, MCDFLOAT dxLeft, MCDFLOAT dxRight,
  312. LONG y, LONG dy)
  313. {
  314. BYTE *pjBase = pRc->ppdev->pjBase;
  315. ULONG signs = 0;
  316. if (*((ULONG *)&dxLeft) & 0x80000000) {
  317. signs |= sdxl_SUB;
  318. }
  319. if (*((ULONG *)&dxRight) & 0x80000000) {
  320. signs |= sdxr_DEC;
  321. }
  322. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 3);
  323. CP_WRITE(pjBase, DWG_SGN, (scanleft_RIGHT | sdy_ADD | signs));
  324. CP_WRITE(pjBase, DWG_LEN, dy);
  325. CP_START(pjBase, DWG_YDST, y + pRc->yOffset);
  326. }
  327. VOID FASTCALL __HWAdjustLeftEdgeRGBZ(DEVRC *pRc, MCDVERTEX *p,
  328. MCDFLOAT fdxLeft, MCDFLOAT fdyLeft,
  329. MCDFLOAT xFrac, MCDFLOAT yFrac,
  330. MCDFLOAT xErr)
  331. {
  332. BYTE *pjBase = pRc->ppdev->pjBase;
  333. LONG dxLeft, dyLeft, dyLeftErr;
  334. MCDCOLOR *pColor;
  335. #if _X86_ && ASM_ACCEL
  336. _asm {
  337. fld fdxLeft
  338. fmul fixScale
  339. fld fdyLeft
  340. fmul fixScale // leave these on the stack...
  341. }
  342. #else
  343. dxLeft = FLT_TO_FIX(fdxLeft);
  344. dyLeft = FLT_TO_FIX(fdyLeft);
  345. #endif
  346. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 8);
  347. // Adjust the color and z values for the first pixel drawn on the left
  348. // edge to be on the pixel center. This is especially important to
  349. // perform accurate z-buffering.
  350. // We will need to set up the hardware color interpolators:
  351. if (pRc->privateEnables & __MCDENABLE_SMOOTH) {
  352. #if _X86_ && ASM_ACCEL
  353. LONG rTemp, gTemp, bTemp;
  354. // Compute the following in assembly:
  355. //
  356. // rTemp = (r * rScale) + (drdx * xFrac) + (drdy * yFrac);
  357. // gTemp = (g * gScale) + (dgdx * xFrac) + (dgdy * yFrac);
  358. // bTemp = (b * bScale) + (dbdx * xFrac) + (dbdy * yFrac);
  359. _asm{
  360. mov eax, p
  361. mov ebx, pRc
  362. fld xFrac
  363. fmul DWORD PTR [OFFSET(DEVRC.drdx)][ebx]
  364. lea eax, [OFFSET(MCDVERTEX.colors) + eax]
  365. fld yFrac
  366. fmul DWORD PTR [OFFSET(DEVRC.drdy)][ebx]
  367. fld DWORD PTR [OFFSET(MCDCOLOR.r)][eax]
  368. fmul DWORD PTR [OFFSET(DEVRC.rScale)][ebx]
  369. fxch ST(2)
  370. faddp ST(1), ST // R R
  371. fld xFrac
  372. fmul DWORD PTR [OFFSET(DEVRC.dgdx)][ebx]
  373. fld yFrac
  374. fmul DWORD PTR [OFFSET(DEVRC.dgdy)][ebx]
  375. fld DWORD PTR [OFFSET(MCDCOLOR.g)][eax]
  376. fmul DWORD PTR [OFFSET(DEVRC.gScale)][ebx]
  377. fxch ST(2) // G G G R R
  378. faddp ST(1), ST // G G R R
  379. fxch ST(2) // R G G R
  380. faddp ST(3), ST // G G R
  381. fld xFrac
  382. fmul DWORD PTR [OFFSET(DEVRC.dbdx)][ebx]
  383. fld yFrac
  384. fmul DWORD PTR [OFFSET(DEVRC.dbdy)][ebx]
  385. fld DWORD PTR [OFFSET(MCDCOLOR.b)][eax]
  386. fmul DWORD PTR [OFFSET(DEVRC.bScale)][ebx]
  387. fxch ST(2)
  388. faddp ST(1), ST // B B G G R
  389. fxch ST(2) // G B B G R
  390. faddp ST(3), ST // B B G R
  391. fxch ST(2) // G B B R
  392. fistp gTemp // B B R
  393. faddp ST(1), ST // B R
  394. fxch ST(1) // R B
  395. fistp rTemp // B
  396. fistp bTemp // not quite empty, still have dy, dx
  397. }
  398. CP_WRITE(pjBase, DWG_DR4, rTemp + 0x0800);
  399. CP_WRITE(pjBase, DWG_DR8, gTemp + 0x0800);
  400. CP_WRITE(pjBase, DWG_DR12, bTemp + 0x0800);
  401. #else
  402. pColor = &p->colors[0];
  403. CP_WRITE(pjBase, DWG_DR4,
  404. FTOL((pColor->r * pRc->rScale) +
  405. (pRc->drdx * xFrac) + (pRc->drdy * yFrac)) + 0x0800);
  406. CP_WRITE(pjBase, DWG_DR8,
  407. FTOL((pColor->g * pRc->gScale) +
  408. (pRc->dgdx * xFrac) + (pRc->dgdy * yFrac)) + 0x0800);
  409. CP_WRITE(pjBase, DWG_DR12,
  410. FTOL((pColor->b * pRc->bScale) +
  411. (pRc->dbdx * xFrac) + (pRc->dbdy * yFrac)) + 0x0800);
  412. #endif
  413. }
  414. // Now, sub-pixel correct the z-buffer:
  415. if (pRc->privateEnables & __MCDENABLE_Z) {
  416. #if _X86_ && ASM_ACCEL
  417. LARGE_INTEGER zTemp;
  418. if (pRc->MCDState.enables & MCD_POLYGON_OFFSET_FILL_ENABLE) {
  419. MCDFLOAT zOffset;
  420. zOffset = __MCDGetZOffsetDelta(pRc) +
  421. (pRc->MCDState.zOffsetUnits * pRc->zScale);
  422. // zTemp = (z * zScale) + (dzdx * xFrac) + (dzdy * yFrac) + zOffset;
  423. _asm{
  424. mov eax, p
  425. mov ebx, pRc
  426. fld xFrac
  427. fmul DWORD PTR [OFFSET(DEVRC.dzdx)][ebx]
  428. fld yFrac
  429. fmul DWORD PTR [OFFSET(DEVRC.dzdy)][ebx]
  430. fxch ST(1)
  431. fadd zOffset
  432. fld DWORD PTR [OFFSET(MCDVERTEX.windowCoord.z)][eax]
  433. fmul DWORD PTR [OFFSET(DEVRC.zScale)][ebx]
  434. fxch ST(2)
  435. faddp ST(1), ST // OUCH!!
  436. faddp ST(1), ST
  437. fistp zTemp // OUCH!!!
  438. }
  439. CP_WRITE(pjBase, DWG_DR0, zTemp.LowPart);
  440. } else {
  441. // zTemp = (z * zScale) + (dzdx * xFrac) + (dzdy * yFrac);
  442. _asm{
  443. mov eax, p
  444. mov ebx, pRc
  445. fld xFrac
  446. fmul DWORD PTR [OFFSET(DEVRC.dzdx)][ebx]
  447. fld yFrac
  448. fmul DWORD PTR [OFFSET(DEVRC.dzdy)][ebx]
  449. fld DWORD PTR [OFFSET(MCDVERTEX.windowCoord.z)][eax]
  450. fmul DWORD PTR [OFFSET(DEVRC.zScale)][ebx]
  451. fxch ST(2)
  452. faddp ST(1), ST
  453. faddp ST(1), ST // OUCH!!!
  454. fistp zTemp // OUCH!!!
  455. }
  456. CP_WRITE(pjBase, DWG_DR0, zTemp.LowPart);
  457. }
  458. #else
  459. if (pRc->MCDState.enables & MCD_POLYGON_OFFSET_FILL_ENABLE) {
  460. MCDFLOAT zOffset;
  461. zOffset = __MCDGetZOffsetDelta(pRc) +
  462. (pRc->MCDState.zOffsetUnits * pRc->zScale);
  463. CP_WRITE(pjBase, DWG_DR0,
  464. FTOL((p->windowCoord.z * pRc->zScale) + zOffset +
  465. (pRc->dzdx * xFrac) + (pRc->dzdy * yFrac)));
  466. } else {
  467. CP_WRITE(pjBase, DWG_DR0,
  468. FTOL((p->windowCoord.z * pRc->zScale) +
  469. (pRc->dzdx * xFrac) + (pRc->dzdy * yFrac)));
  470. }
  471. #endif
  472. }
  473. // We've handled the color and z setup. Now, take care of the actual
  474. // DDA.
  475. #if _X86_ && ASM_ACCEL
  476. // convert dxLeft and dyLeft to integer:
  477. _asm{
  478. fistp dyLeft
  479. fistp dxLeft
  480. }
  481. #endif
  482. if (dxLeft >= 0) {
  483. ULONG size = (dxLeft | dyLeft) >> 16;
  484. if (size <= 0xff) {
  485. dxLeft >>= (8 + 1);
  486. dyLeft >>= (8 + 1);
  487. } else if (size <= 0xfff) {
  488. dxLeft >>= (12 + 1);
  489. dyLeft >>= (12 + 1);
  490. } else {
  491. dxLeft >>= (16 + 1);
  492. dyLeft >>= (16 + 1);
  493. }
  494. dyLeftErr = FTOL(xErr * (MCDFLOAT)dyLeft);
  495. CP_WRITE(pjBase, DWG_AR1, -dxLeft + dyLeftErr);
  496. CP_WRITE(pjBase, DWG_AR2, -dxLeft);
  497. } else {
  498. ULONG size = (-dxLeft | dyLeft) >> 16;
  499. if (size <= 0xff) {
  500. dxLeft >>= (8 + 1);
  501. dyLeft >>= (8 + 1);
  502. } else if (size <= 0xfff) {
  503. dxLeft >>= (12 + 1);
  504. dyLeft >>= (12 + 1);
  505. } else {
  506. dxLeft >>= (16 + 1);
  507. dyLeft >>= (16 + 1);
  508. }
  509. dyLeftErr = FTOL(xErr * (MCDFLOAT)dyLeft);
  510. CP_WRITE(pjBase, DWG_AR1, dxLeft + dyLeft - 1 - dyLeftErr);
  511. CP_WRITE(pjBase, DWG_AR2, dxLeft);
  512. }
  513. if (!dyLeft)
  514. dyLeft++;
  515. //MCDBG_PRINT("LeftEdge: dxLeft = %x, dyLeft = %x, dyLeftErr = %x", dxLeft, dyLeft, dyLeftErr);
  516. CP_WRITE(pjBase, DWG_AR0, dyLeft);
  517. CP_WRITE(pjBase, DWG_FXLEFT, pRc->ixLeft + pRc->xOffset);
  518. }
  519. VOID FASTCALL __HWAdjustRightEdge(DEVRC *pRc, MCDVERTEX *p,
  520. MCDFLOAT fdxRight, MCDFLOAT fdyRight,
  521. MCDFLOAT xErr)
  522. {
  523. PDEV *ppdev = pRc->ppdev;
  524. BYTE *pjBase = ppdev->pjBase;
  525. LONG dxRight, dyRight, dyRightErr;
  526. #if _X86_ && ASM_ACCEL
  527. _asm {
  528. fld fdxRight
  529. fmul fixScale
  530. fld fdyRight
  531. fmul fixScale // leave these on the stack...
  532. }
  533. #else
  534. dxRight = FLT_TO_FIX(fdxRight);
  535. dyRight = FLT_TO_FIX(fdyRight);
  536. #endif
  537. CHECK_FIFO_FREE(pjBase, pRc->cFifo, 4);
  538. #if _X86_ && ASM_ACCEL
  539. _asm{
  540. fistp dyRight
  541. fistp dxRight
  542. }
  543. #endif
  544. if (dxRight >= 0) {
  545. ULONG size = (dxRight | dyRight) >> 16;
  546. if (size <= 0xff) {
  547. dxRight >>= (8 + 1);
  548. dyRight >>= (8 + 1);
  549. } else if (size <= 0xfff) {
  550. dxRight >>= (12 + 1);
  551. dyRight >>= (12 + 1);
  552. } else {
  553. dxRight >>= (16 + 1);
  554. dyRight >>= (16 + 1);
  555. }
  556. #if _X86_ && ASM_ACCEL
  557. _asm{
  558. fild dyRight
  559. fmul xErr
  560. }
  561. #else
  562. dyRightErr = FTOL(xErr * (MCDFLOAT)dyRight);
  563. #endif
  564. CP_WRITE(pjBase, DWG_AR5, -dxRight);
  565. #if _X86_ && ASM_ACCEL
  566. _asm{
  567. fistp dyRightErr
  568. }
  569. #endif
  570. CP_WRITE(pjBase, DWG_AR4, -dxRight + dyRightErr);
  571. } else {
  572. ULONG size = (-dxRight | dyRight) >> 16;
  573. if (size <= 0xff) {
  574. dxRight >>= (8 + 1);
  575. dyRight >>= (8 + 1);
  576. } else if (size <= 0xfff) {
  577. dxRight >>= (12 + 1);
  578. dyRight >>= (12 + 1);
  579. } else {
  580. dxRight >>= (16 + 1);
  581. dyRight >>= (16 + 1);
  582. }
  583. #if _X86_ && ASM_ACCEL
  584. _asm{
  585. fild dyRight
  586. fmul xErr
  587. }
  588. #else
  589. dyRightErr = FTOL(xErr * (MCDFLOAT)dyRight);
  590. #endif
  591. CP_WRITE(pjBase, DWG_AR5, dxRight);
  592. #if _X86_ && ASM_ACCEL
  593. _asm{
  594. fistp dyRightErr
  595. }
  596. #endif
  597. CP_WRITE(pjBase, DWG_AR4, dxRight + dyRight - 1 - dyRightErr);
  598. }
  599. if (!dyRight)
  600. dyRight++;
  601. CP_WRITE(pjBase, DWG_AR6, dyRight);
  602. CP_WRITE(pjBase, DWG_FXRIGHT, pRc->ixRight + pRc->xOffset);
  603. }
  604. VOID FASTCALL __MCDFillTriangle(DEVRC *pRc, MCDVERTEX *a, MCDVERTEX *b,
  605. MCDVERTEX *c, BOOL bCCW)
  606. {
  607. LONG aIY, bIY, cIY;
  608. MCDFLOAT dxdyAC, dxdyBC, dxdyAB;
  609. MCDFLOAT dx, dy, errX;
  610. MCDFLOAT xLeft, xRight;
  611. MCDFLOAT xLeftRound;
  612. #if _X86_ && ASM_ACCEL
  613. if (pRc->privateEnables & (__MCDENABLE_SMOOTH | __MCDENABLE_Z)) {
  614. // Pre-compute one over polygon half-area
  615. __MCD_FLOAT_BEGIN_DIVIDE(__MCDONE, pRc->halfArea, &pRc->invHalfArea);
  616. }
  617. #endif
  618. //
  619. // Snap each y coordinate to its pixel center
  620. //
  621. SNAPCOORD(a->windowCoord.y, aIY);
  622. SNAPCOORD(b->windowCoord.y, bIY);
  623. SNAPCOORD(c->windowCoord.y, cIY);
  624. //
  625. // Calculate delta values for unit changes in x or y
  626. //
  627. (*pRc->calcDeltas)(pRc, a, b, c);
  628. __MCD_FLOAT_BEGIN_DIVIDE(pRc->dxAC, pRc->dyAC, &dxdyAC);
  629. (*pRc->HWSetupDeltas)(pRc);
  630. //
  631. // Fill the two triangle halves. Note that the edge parameters
  632. // don't need to be recomputed for counter-clockwise triangles,
  633. // making them slightly faster...
  634. //
  635. if (bCCW)
  636. {
  637. __MCD_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
  638. dy = (aIY + __MCDHALF) - a->windowCoord.y;
  639. xLeft = a->windowCoord.x + dy*dxdyAC;
  640. SNAPCOORD(xLeft, pRc->ixLeft);
  641. xLeftRound = pRc->ixLeft + __MCDHALF;
  642. dx = xLeftRound - a->windowCoord.x;
  643. errX = xLeftRound - xLeft;
  644. (*pRc->adjustLeftEdge)(pRc, a, pRc->dxAC, pRc->dyAC, dx, dy, errX);
  645. if (aIY != bIY)
  646. {
  647. dxdyAB = pRc->dxAB / pRc->dyAB;
  648. xRight = a->windowCoord.x + dy*dxdyAB;
  649. SNAPCOORD(xRight, pRc->ixRight);
  650. errX = (pRc->ixRight + __MCDHALF) - xRight;
  651. (*pRc->adjustRightEdge)(pRc, a, pRc->dxAB, pRc->dyAB, errX);
  652. if (bIY != cIY) {
  653. __MCD_FLOAT_BEGIN_DIVIDE(pRc->dxBC, pRc->dyBC, &dxdyBC);
  654. }
  655. (*pRc->HWDrawTrap)(pRc, pRc->dxAC, pRc->dxAB, aIY, bIY - aIY);
  656. } else if (bIY != cIY) {
  657. __MCD_FLOAT_BEGIN_DIVIDE(pRc->dxBC, pRc->dyBC, &dxdyBC);
  658. }
  659. if (bIY != cIY) {
  660. __MCD_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
  661. dy = (bIY + __MCDHALF) - b->windowCoord.y;
  662. xRight = b->windowCoord.x + dy*dxdyBC;
  663. SNAPCOORD(xRight, pRc->ixRight);
  664. errX = (pRc->ixRight + __MCDHALF) - xRight;
  665. (*pRc->adjustRightEdge)(pRc, b, pRc->dxBC, pRc->dyBC, errX);
  666. (*pRc->HWDrawTrap)(pRc, pRc->dxAC, pRc->dxBC, bIY, cIY - bIY);
  667. }
  668. } else {
  669. __MCD_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
  670. dy = (aIY + __MCDHALF) - a->windowCoord.y;
  671. xRight = a->windowCoord.x + dy*dxdyAC;
  672. SNAPCOORD(xRight, pRc->ixRight);
  673. errX = (pRc->ixRight + __MCDHALF) - xRight;
  674. (*pRc->adjustRightEdge)(pRc, a, pRc->dxAC, pRc->dyAC, errX);
  675. if (aIY != bIY)
  676. {
  677. dxdyAB = pRc->dxAB / pRc->dyAB;
  678. xLeft = a->windowCoord.x + dy*dxdyAB;
  679. SNAPCOORD(xLeft, pRc->ixLeft);
  680. xLeftRound = pRc->ixLeft + __MCDHALF;
  681. dx = xLeftRound - a->windowCoord.x;
  682. errX = xLeftRound - xLeft;
  683. (*pRc->adjustLeftEdge)(pRc, a, pRc->dxAB, pRc->dyAB, dx, dy, errX);
  684. if (bIY != cIY) {
  685. __MCD_FLOAT_BEGIN_DIVIDE(pRc->dxBC, pRc->dyBC, &dxdyBC);
  686. }
  687. (*pRc->HWDrawTrap)(pRc, pRc->dxAB, pRc->dxAC, aIY, bIY - aIY);
  688. } else if (bIY != cIY) {
  689. __MCD_FLOAT_BEGIN_DIVIDE(pRc->dxBC, pRc->dyBC, &dxdyBC);
  690. }
  691. if (bIY != cIY)
  692. {
  693. __MCD_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
  694. dy = (bIY + __MCDHALF) - b->windowCoord.y;
  695. xLeft = b->windowCoord.x + dy*dxdyBC;
  696. SNAPCOORD(xLeft, pRc->ixLeft);
  697. xLeftRound = pRc->ixLeft + __MCDHALF;
  698. dx = xLeftRound - b->windowCoord.x;
  699. errX = xLeftRound - xLeft;
  700. (*pRc->adjustLeftEdge)(pRc, b, pRc->dxBC, pRc->dyBC, dx, dy, errX);
  701. (*pRc->HWDrawTrap)(pRc, pRc->dxBC, pRc->dxAC, bIY, cIY - bIY);
  702. }
  703. }
  704. }