Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

938 lines
37 KiB

  1. /******************************module*header*******************************\
  2. * Module Name: mcdtri.c
  3. *
  4. * Contains the low-level (rasterization) triangle-rendering routines for the
  5. * Cirrus Logic 546X MCD driver.
  6. *
  7. * Copyright (c) 1997 Cirrus Logic, Inc.
  8. \**************************************************************************/
  9. #include "precomp.h"
  10. #include "mcdhw.h"
  11. #include "mcdutil.h"
  12. #include "mcdmath.h"
  13. #define QUAKEEDGE_FIX
  14. #define FASTER_RECIP_ORTHO
  15. #define HALF 0x08000
  16. #define ONE 0x10000
  17. #define MCDTRI_PRINT
  18. //#define MAX_W_RATIO (float)1.45
  19. #define MAX_W_RATIO (float)1.80
  20. #define W_RATIO_PERSP_EQ_LINEAR (float)1.03
  21. #define EDGE_SUBDIVIDE_TEST(start,end,WRAMAX,WRBMAX,SPLIT) \
  22. SPLIT = ((start->windowCoord.w > WRAMAX) || (end->windowCoord.w > WRBMAX)) ? 1 : 0;
  23. #define FIND_MIDPOINT(start, end, mid) { \
  24. float recip; \
  25. mid->windowCoord.x = (start->windowCoord.x + end->windowCoord.x) * (float)0.5; \
  26. mid->windowCoord.y = (start->windowCoord.y + end->windowCoord.y) * (float)0.5; \
  27. mid->windowCoord.z = (start->windowCoord.z + end->windowCoord.z) * (float)0.5; \
  28. mid->colors[0].r = (start->colors[0].r + end->colors[0].r) * (float)0.5; \
  29. mid->colors[0].g = (start->colors[0].g + end->colors[0].g) * (float)0.5; \
  30. mid->colors[0].b = (start->colors[0].b + end->colors[0].b) * (float)0.5; \
  31. mid->colors[0].a = (start->colors[0].a + end->colors[0].a) * (float)0.5; \
  32. mid->fog = (start->fog + end->fog) * (float)0.5; \
  33. mid->windowCoord.w = (start->windowCoord.w + end->windowCoord.w) * (float)0.5; \
  34. recip = (float)0.5/mid->windowCoord.w; /* pre-mult by .5 for use below */ \
  35. mid->texCoord.x = recip * (start->texCoord.x * start->windowCoord.w + \
  36. end->texCoord.x * end->windowCoord.w); \
  37. mid->texCoord.y = recip * (start->texCoord.y * start->windowCoord.w + \
  38. end->texCoord.y * end->windowCoord.w); \
  39. }
  40. VOID FASTCALL __MCDSubdivideTriangle(DEVRC *pRc, MCDVERTEX *a, MCDVERTEX *b, MCDVERTEX *c,
  41. int split12, int split23, int split31, int subdiv_levels)
  42. {
  43. MCDVERTEX Vmid12,Vmid23,Vmid31; // 3 possible midpoints
  44. subdiv_levels++;
  45. // find midpoint of edges if they need to be split
  46. if (split12) FIND_MIDPOINT(a,b,((MCDVERTEX *)&Vmid12));
  47. if (split23) FIND_MIDPOINT(b,c,((MCDVERTEX *)&Vmid23));
  48. if (split31) FIND_MIDPOINT(c,a,((MCDVERTEX *)&Vmid31));
  49. #define SPLIT12 0x4
  50. #define SPLIT23 0x2
  51. #define SPLIT31 0x1
  52. // from original vertices and any midpoints found above, create a batch of triangles
  53. switch ((split12<<2) | (split23<<1) | split31)
  54. {
  55. case SPLIT12:
  56. // 2 triangles, 1->2 edge was divided
  57. __MCDPerspTxtTriangle(pRc, a, &Vmid12, c, subdiv_levels);
  58. __MCDPerspTxtTriangle(pRc, b, &Vmid12, c, subdiv_levels);
  59. break;
  60. case SPLIT23:
  61. // 2 triangles, 2->3 edge was divided
  62. __MCDPerspTxtTriangle(pRc, b, &Vmid23, a, subdiv_levels);
  63. __MCDPerspTxtTriangle(pRc, c, &Vmid23, a, subdiv_levels);
  64. break;
  65. case SPLIT31:
  66. // 2 triangles, 3->1 edge was divided
  67. __MCDPerspTxtTriangle(pRc, c, &Vmid31, b, subdiv_levels);
  68. __MCDPerspTxtTriangle(pRc, a, &Vmid31, b, subdiv_levels);
  69. break;
  70. case (SPLIT12|SPLIT23):
  71. // 3 triangles, 1->2 and 2->3 edges were divided
  72. __MCDPerspTxtTriangle(pRc, a, &Vmid23, c, subdiv_levels);
  73. __MCDPerspTxtTriangle(pRc, a, &Vmid23, &Vmid12, subdiv_levels);
  74. __MCDPerspTxtTriangle(pRc, &Vmid12, &Vmid23, b, subdiv_levels);
  75. break;
  76. case (SPLIT23|SPLIT31):
  77. // 3 triangles, 2->3 and 3->1 edges were divided
  78. __MCDPerspTxtTriangle(pRc, a, &Vmid31, b, subdiv_levels);
  79. __MCDPerspTxtTriangle(pRc, b, &Vmid31, &Vmid23, subdiv_levels);
  80. __MCDPerspTxtTriangle(pRc, &Vmid23, &Vmid31, c, subdiv_levels);
  81. break;
  82. case (SPLIT12|SPLIT31):
  83. // 3 triangles, 1->2 and 3->1 edges were divided
  84. __MCDPerspTxtTriangle(pRc, a, &Vmid31, &Vmid12, subdiv_levels);
  85. __MCDPerspTxtTriangle(pRc, b, &Vmid31, &Vmid12, subdiv_levels);
  86. __MCDPerspTxtTriangle(pRc, b, &Vmid31, c, subdiv_levels);
  87. break;
  88. case (SPLIT12|SPLIT23|SPLIT31):
  89. // 4 triangles, all 3 edges were divided
  90. __MCDPerspTxtTriangle(pRc, a, &Vmid31, &Vmid12, subdiv_levels);
  91. __MCDPerspTxtTriangle(pRc, b, &Vmid23, &Vmid12, subdiv_levels);
  92. __MCDPerspTxtTriangle(pRc, c, &Vmid31, &Vmid23, subdiv_levels);
  93. __MCDPerspTxtTriangle(pRc, &Vmid12, &Vmid23, &Vmid31, subdiv_levels);
  94. break;
  95. default:
  96. // original triangle - no subdivisions
  97. // this routine should never be called for this case, but here's insurance
  98. __MCDPerspTxtTriangle(pRc, a, b, c, subdiv_levels);
  99. break;
  100. } // endswitch
  101. }
  102. #define EXCHANGE(i,j) \
  103. { \
  104. ptemp=i; \
  105. i=j; j=ptemp; \
  106. }
  107. #define ROTATE_L(i,j,k) \
  108. { \
  109. ptemp=j; \
  110. j=k;k=i;i=ptemp; \
  111. }
  112. #define SORT_Y_ORDER(a,b,c) \
  113. { \
  114. void *ptemp; \
  115. if( a->windowCoord.y > b->windowCoord.y ) \
  116. if( c->windowCoord.y < b->windowCoord.y ) \
  117. EXCHANGE(a,c) \
  118. else \
  119. if( c->windowCoord.y < a->windowCoord.y ) \
  120. ROTATE_L(a,b,c) \
  121. else \
  122. EXCHANGE(a,b) \
  123. else \
  124. if( c->windowCoord.y < a->windowCoord.y ) \
  125. ROTATE_L(c,b,a) \
  126. else \
  127. if( c->windowCoord.y < b->windowCoord.y ) \
  128. EXCHANGE(b,c) \
  129. }
  130. VOID FASTCALL __MCDPerspTxtTriangle(DEVRC *pRc, MCDVERTEX *a, MCDVERTEX *b, MCDVERTEX *c, int subdiv_levels)
  131. {
  132. int split12, split23, split31;
  133. float w1_times_max = a->windowCoord.w * W_RATIO_PERSP_EQ_LINEAR;
  134. float w2_times_max = b->windowCoord.w * W_RATIO_PERSP_EQ_LINEAR;
  135. float w3_times_max = c->windowCoord.w * W_RATIO_PERSP_EQ_LINEAR;
  136. if ((a->windowCoord.w < w2_times_max) && (b->windowCoord.w < w1_times_max) &&
  137. (b->windowCoord.w < w3_times_max) && (c->windowCoord.w < w2_times_max) &&
  138. (c->windowCoord.w < w1_times_max) && (a->windowCoord.w < w3_times_max))
  139. {
  140. if (subdiv_levels > 1)
  141. {
  142. // this triangle result of subdivision -> must sort in y
  143. SORT_Y_ORDER(a,b,c)
  144. }
  145. __MCDFillTriangle(pRc, a, b, c, TRUE); // ready to render - linear ok
  146. }
  147. else
  148. {
  149. w1_times_max = a->windowCoord.w * MAX_W_RATIO;
  150. w2_times_max = b->windowCoord.w * MAX_W_RATIO;
  151. w3_times_max = c->windowCoord.w * MAX_W_RATIO;
  152. // determine from w ratios which (if any) edges must be subdivided
  153. EDGE_SUBDIVIDE_TEST(a,b,w2_times_max,w1_times_max,split12)
  154. EDGE_SUBDIVIDE_TEST(b,c,w3_times_max,w2_times_max,split23)
  155. EDGE_SUBDIVIDE_TEST(c,a,w1_times_max,w3_times_max,split31)
  156. // if we need to subdivide, and we're not already too many levels deep, do it
  157. // (since subdivision recursive, must limit it to prevent stack overflow in kernel mode)
  158. if ((split12 | split23 | split31) && (subdiv_levels < 4))
  159. __MCDSubdivideTriangle(pRc, a, b, c, split12, split23, split31, subdiv_levels);
  160. else
  161. {
  162. if (subdiv_levels > 1)
  163. {
  164. // this triangle result of subdivision -> must sort in y
  165. SORT_Y_ORDER(a,b,c)
  166. }
  167. __MCDFillTriangle(pRc, a, b, c, FALSE); // ready to render - linear NOT ok
  168. }
  169. }
  170. }
  171. #define FLT_TYPE (float)
  172. #define FLOAT_TO_1616 FLT_TYPE 65536.0
  173. #define FIXED_X_ROUND_FACTOR 0x7fff
  174. //#define INTPR(FLOATVAL) FTOL((FLOATVAL) * FLT_TYPE 1000.0)
  175. #define INTPR(FLOATVAL) 0
  176. /*********************************************************************
  177. * Local Functions
  178. **********************************************************************/
  179. #define RIGHT_TO_LEFT_DIR 0x80000000
  180. #define LEFT_TO_RIGHT_DIR 0
  181. #define EDGE_DISABLE_RIGHT_X 0x20000000
  182. #define EDGE_DISABLE_LEFT_X 0x40000000
  183. #define EDGE_DISABLE_BOTTOM_Y 0x20000000
  184. #define EDGE_DISABLE_TOP_Y 0x40000000
  185. #define EDGE_DISABLE_X EDGE_DISABLE_RIGHT_X
  186. #define EDGE_DISABLE_Y 0
  187. // macros to convert float to precision equivalent to 16.16 representation
  188. #define PREC_FLOAT FLOAT_TO_1616
  189. // rounding done by adding 1/2 of 1/65536, since 1/65536 is 16.16 step size
  190. #define PREC_ROUND ((FLT_TYPE 0.5) / PREC_FLOAT)
  191. #define PREC_1616(inval,outval) { \
  192. float bias = (inval>=0) ? PREC_ROUND : -PREC_ROUND; \
  193. outval=(float)(FTOL((inval+bias)*PREC_FLOAT)) * ((FLT_TYPE 1.0) / PREC_FLOAT); \
  194. }
  195. // for positive values that will be used as negative, unconditionally bias it smaller
  196. // unless it's already too small
  197. #define NEG_PREC_1616(inval,outval) { \
  198. float bias = (inval>0) ? -PREC_ROUND : 0; \
  199. outval=(float)(FTOL((inval+bias)*PREC_FLOAT)) * ((FLT_TYPE 1.0) / PREC_FLOAT); \
  200. }
  201. // convert from float to 16.16 long
  202. #define fix_ieee( val ) FTOL((val) * (float)65536.0)
  203. // convert from float to 8.24 long
  204. #define fix824_ieee( val ) FTOL((val) * (float)16777216.0)
  205. typedef struct {
  206. float a1, a2;
  207. float b1, b2;
  208. } QUADRATIC;
  209. VOID FASTCALL __MCDFillTriangle(DEVRC *pRc, MCDVERTEX *a, MCDVERTEX *b, MCDVERTEX *c, int linear_ok)
  210. {
  211. PDEV *ppdev;
  212. unsigned int *pdwNext;
  213. // output queue stuff...
  214. DWORD *pSrc;
  215. DWORD *pDest;
  216. DWORD *pdwStart;
  217. DWORD *pdwOrig;
  218. DWORD *pdwColor;
  219. DWORD dwOpcode;
  220. int count1, count2;
  221. float frecip_main, frecip_ortho;
  222. float fdx_main;
  223. float ftemp;
  224. float v1red,v1grn,v1blu;
  225. float fv2x,fv2y,fv3x,fv3y,fv32y;
  226. float aroundy, broundy;
  227. float fmain_adj, fwidth, fxincrement, finitwidth1, finitwidth2;
  228. float fdwidth1,fdwidth2;
  229. float awinx, awiny, bwinx, bwiny, cwinx, cwiny;
  230. int int_awiny, int_bwiny, int_cwiny;
  231. float fadjust;
  232. int xflags;
  233. // window coords are float values, and need to have
  234. // viewportadjust (MCDVIEWPORT) values subtracted to get to real screen space
  235. // color values are 0->1 floats and must be multiplied by scale values (MCDRCINFO)
  236. // to get to nbits range (scale = 0xff for 8 bit, 0x7 for 3 bit, etc.)
  237. // Z values are 0->1 floats and must be multiplied by zscale values (MCDRCINFO)
  238. // Caller has already sorted vertices so that a.y <= b.y <= c.y
  239. // Force flat-top/ flat-bottom right triangles to draw toward the center.
  240. // if Main is vertical edge, much better chance of alignment at diagonal
  241. if( a->windowCoord.y == b->windowCoord.y ) { // Flat top
  242. if( b->windowCoord.x == c->windowCoord.x ) {
  243. void *ptemp;
  244. EXCHANGE(a, b);
  245. }
  246. } else
  247. if( b->windowCoord.y == c->windowCoord.y ) { // Flat bottom
  248. if( a->windowCoord.x == b->windowCoord.x ) {
  249. void *ptemp;
  250. EXCHANGE(b, c);
  251. }
  252. }
  253. MCDTRI_PRINT("v1 = %d %d %d c1=%d %d %d",INTPR(a->windowCoord.x),INTPR(a->windowCoord.y),INTPR(a->windowCoord.z),INTPR(a->colors[0].r),INTPR(a->colors[0].g),INTPR(a->colors[0].b));
  254. MCDTRI_PRINT("v2 = %d %d %d c2=%d %d %d",INTPR(b->windowCoord.x),INTPR(b->windowCoord.y),INTPR(b->windowCoord.z),INTPR(b->colors[0].r),INTPR(b->colors[0].g),INTPR(b->colors[0].b));
  255. MCDTRI_PRINT("v3 = %d %d %d c3=%d %d %d",INTPR(c->windowCoord.x),INTPR(c->windowCoord.y),INTPR(c->windowCoord.z),INTPR(c->colors[0].r),INTPR(c->colors[0].g),INTPR(c->colors[0].b));
  256. awinx = a->windowCoord.x + pRc->fxOffset;
  257. awiny = a->windowCoord.y + pRc->fyOffset;
  258. bwinx = b->windowCoord.x + pRc->fxOffset;
  259. bwiny = b->windowCoord.y + pRc->fyOffset;
  260. cwinx = c->windowCoord.x + pRc->fxOffset;
  261. cwiny = c->windowCoord.y + pRc->fyOffset;
  262. // round y's (don't ever need rounded version of c's y)
  263. aroundy = FLT_TYPE FTOL(awiny + FLT_TYPE 0.5);
  264. broundy = FLT_TYPE FTOL(bwiny + FLT_TYPE 0.5);
  265. #if 0
  266. // Someday, may want to convert floats to 16.16 equivalent precision
  267. // I didn't find it necessary, but it's the first thing to try if
  268. // a case comes up with holes....
  269. PREC_1616(awinx,awinx);
  270. PREC_1616(awiny,awiny);
  271. PREC_1616(bwinx,bwinx);
  272. PREC_1616(bwiny,bwiny);
  273. PREC_1616(cwinx,cwinx);
  274. PREC_1616(cwiny,cwiny);
  275. #endif
  276. MCDTRI_PRINT("v1 = %d %d ",INTPR(awinx),INTPR(awiny));
  277. MCDTRI_PRINT("v2 = %d %d ",INTPR(bwinx),INTPR(bwiny));
  278. MCDTRI_PRINT("v3 = %d %d ",INTPR(cwinx),INTPR(cwiny));
  279. fv2x = bwinx - awinx;
  280. fv2y = bwiny - awiny;
  281. fv3x = cwinx - awinx;
  282. fv3y = cwiny - awiny;
  283. fv32y= cwiny - bwiny;
  284. // counts are total number of scan lines traversed
  285. // PERFORMANCE OPTIMIZATION - start divide now for main slope
  286. __MCD_FLOAT_BEGIN_DIVIDE(__MCDONE, fv3y, &frecip_main);
  287. // integer operations "free" since within fdiv latency
  288. ppdev = pRc->ppdev;
  289. pdwNext = ppdev->LL_State.pDL->pdwNext;
  290. pdwOrig = pdwNext;
  291. int_cwiny = FTOL(cwiny);
  292. int_bwiny = FTOL(bwiny);
  293. int_awiny = FTOL(awiny);
  294. count1 = int_bwiny - int_awiny;
  295. count2 = int_cwiny - int_bwiny;
  296. __MCD_FLOAT_SIMPLE_END_DIVIDE(frecip_main);
  297. if ((awiny - int_awiny) == FLT_TYPE 0.0)
  298. {
  299. // start is on whole y - so bump count to include that scanline
  300. // unless identical to b's y
  301. if (bwiny != awiny) count1++;
  302. }
  303. // check for case of adjusted A and real B being on same scan line (flat top)
  304. // even though count not 0
  305. // ex. a.y = 79.60, b.y = 80.00 -> a will be rounded to 80.0, so really
  306. // this is a flat top triangle. In such case, set count1 = 0.
  307. // b will be counted below. Failure to do this results in scanline that
  308. // has B being part of top and bottom, so width delta's applied when
  309. // hardware steps make for some interesting artifacts (see p. 205 of MCD notes)
  310. if (count1 == 1)
  311. {
  312. if ((bwiny - int_bwiny) == FLT_TYPE 0.0)
  313. {
  314. // convert to flat top
  315. count1 = 0;
  316. }
  317. }
  318. // similarly for almost flat bottom triangles...
  319. // If b.y=124.90 and c.y=125.000, we don't want to draw the scan line at
  320. // y=125 since any pixels drawn will be outside the triangle,
  321. // so if c on exact y and count2=1, set count2=0
  322. if (count2 == 1)
  323. {
  324. if ((cwiny - int_cwiny) == FLT_TYPE 0.0)
  325. {
  326. // convert to flat bottom
  327. count2 = 0;
  328. }
  329. }
  330. // main slope - based on precise vertices
  331. //USING MACROS TO OVERLAP DIVIDE WITH INTEGER OPERATIONS
  332. // frecip_main = FLT_TYPE 1.0/fv3y;
  333. fdx_main = fv3x * frecip_main;
  334. PREC_1616(fdx_main,fdx_main);
  335. // width at vtx b - based on precise vertices
  336. fwidth = fv2x - (fdx_main * fv2y);
  337. // make width positive, and set direction flag
  338. if (fwidth<0)
  339. {
  340. fwidth = -fwidth;
  341. xflags = RIGHT_TO_LEFT_DIR | EDGE_DISABLE_X;
  342. }
  343. else
  344. {
  345. xflags = LEFT_TO_RIGHT_DIR | EDGE_DISABLE_X;
  346. }
  347. // if triangle has a top section (i.e. not flat top)....
  348. if (count1)
  349. {
  350. fdwidth1 = fwidth / fv2y;
  351. PREC_1616(fdwidth1,fdwidth1);
  352. if (aroundy < awiny)
  353. {
  354. // rounding produced y less than original, so step to next scan line
  355. // since init width would be negative
  356. aroundy += FLT_TYPE 1.0;
  357. }
  358. // determine distance between actual a and scanline we'll start on
  359. fmain_adj = aroundy - awiny;
  360. // step width1 and x to scan line where we'll start
  361. finitwidth1 = fmain_adj * fdwidth1;
  362. fxincrement = fmain_adj * fdx_main;
  363. }
  364. #ifdef QUAKEEDGE_FIX
  365. else
  366. {
  367. // flat top...
  368. if ((bwiny - int_bwiny) == FLT_TYPE 0.0)
  369. {
  370. // if b on exact scanline, it's part of top, and is counted in count1 above,
  371. // unless this is flat top triangle - in that case, bump count2
  372. // also, if identical to C's y, then flat bottom, so count2 should remain 0
  373. if (cwiny != bwiny) count2++;
  374. }
  375. }
  376. #endif // QUAKEEDGE_FIX
  377. // if triangle has a bottom section (i.e. not flat bottom)....
  378. if (count2)
  379. {
  380. float mid_adjust;
  381. fdwidth2 = fwidth / fv32y;
  382. NEG_PREC_1616(fdwidth2,fdwidth2);
  383. #ifdef QUAKEEDGE_FIX // badedge.sav fix
  384. if ((broundy < bwiny) || ((broundy==bwiny) && count1)) // step to next if b.y on exact scanline, unless flat top triangle
  385. #else
  386. if (broundy < bwiny)
  387. #endif
  388. {
  389. // rounding produced y less than original, so step to next scan line
  390. mid_adjust = (broundy + (float)1.0) - bwiny;
  391. }
  392. else
  393. {
  394. // rounding produced y greater than original (i.e on scan below actual start vertex)
  395. mid_adjust = broundy - bwiny;
  396. }
  397. finitwidth2 = fwidth - (fdwidth2 * mid_adjust);
  398. // if flat top, start x/y adjustments weren't made above
  399. if (!count1)
  400. {
  401. if (aroundy < awiny)
  402. {
  403. // rounding produced y less than original, so step to next scan line
  404. aroundy += FLT_TYPE 1.0;
  405. }
  406. // determine distance between actual a and scanline we'll start on
  407. fmain_adj = aroundy - awiny;
  408. // step x to scan line where we'll start
  409. fxincrement = fmain_adj * fdx_main;
  410. }
  411. }
  412. #ifdef QUAKEEDGE_FIX // badedge2.sav fix
  413. else
  414. {
  415. // flat bottom - if bottom is on exact scanline, don't draw that last scanline
  416. // this will enforce GL restriction that bottom scanlines not drawn for polys
  417. // (special case for this setup code for case of bottom of poly being on exact y value)
  418. if ((bwiny - int_bwiny) == FLT_TYPE 0.0)
  419. {
  420. if ((cwiny == bwiny) && count1) count1--;
  421. }
  422. }
  423. #endif // badedge2.sav fix
  424. // if triangle not a horizontal line (i.e. it traverses at least 1 scan line)....
  425. if (count1 || count2)
  426. {
  427. *(pdwNext+1) = (FTOL((awinx + fxincrement)*FLOAT_TO_1616) + FIXED_X_ROUND_FACTOR) | xflags;
  428. // subtracting special offset added to y to make visual match MSFT software
  429. *(pdwNext+2) = (DWORD)( (FTOL(aroundy)-MCD_CONFORM_ADJUST) << 16 ) | EDGE_DISABLE_Y;
  430. MCDTRI_PRINT(" x, y output = %x %x, yoffset=%x",*(pdwNext+1),*(pdwNext+2),pRc->yOffset);
  431. *(pdwNext+6) = FTOL(fdx_main*FLOAT_TO_1616);
  432. // if triangle has a bottom section, decrement number of scans in top so middle
  433. // scanline is first scanline of bottom section, and has length = finitwidth2
  434. if (!count2)
  435. {
  436. MCDTRI_PRINT(" FLATBOTTOM");
  437. *(pdwNext+8) = ONE + FTOL(finitwidth1*FLOAT_TO_1616);
  438. *(pdwNext+10)= FTOL(fdwidth1*FLOAT_TO_1616);
  439. #ifdef FASTER_RECIP_ORTHO
  440. __MCD_FLOAT_BEGIN_DIVIDE(__MCDONE, fwidth, &frecip_ortho);
  441. #endif
  442. *(pdwNext+7) = count1-1;
  443. *(pdwNext+9) = 0;
  444. *(pdwNext+11)= 0;
  445. }
  446. else if (!count1)
  447. {
  448. MCDTRI_PRINT(" FLATTOP");
  449. *(pdwNext+8) = ONE + FTOL(finitwidth2*FLOAT_TO_1616);
  450. *(pdwNext+10) = FTOL(FLT_TYPE -1.0*fdwidth2*FLOAT_TO_1616);
  451. #ifdef FASTER_RECIP_ORTHO
  452. __MCD_FLOAT_BEGIN_DIVIDE(__MCDONE, fwidth, &frecip_ortho);
  453. #endif
  454. *(pdwNext+7) = count2-1;
  455. *(pdwNext+9) = 0;
  456. *(pdwNext+11)= 0;
  457. }
  458. else
  459. {
  460. MCDTRI_PRINT(" GENERAL");
  461. // sub 1 from count1, since hw adds 1 to account for first scan line
  462. *(pdwNext+8) = ONE + FTOL(finitwidth1*FLOAT_TO_1616);
  463. *(pdwNext+9) = ONE + FTOL(finitwidth2*FLOAT_TO_1616);
  464. *(pdwNext+10)= FTOL(fdwidth1*FLOAT_TO_1616);
  465. *(pdwNext+11)= FTOL(FLT_TYPE -1.0*fdwidth2*FLOAT_TO_1616);
  466. #ifdef FASTER_RECIP_ORTHO
  467. __MCD_FLOAT_BEGIN_DIVIDE(__MCDONE, fwidth, &frecip_ortho);
  468. #endif
  469. *(pdwNext+7) = (count1-1) + (count2 << 16);
  470. }
  471. MCDTRI_PRINT("dxm =%d w1=%d w2=%d dw1=%d dw2=%d",
  472. INTPR(fdx_main),INTPR(finitwidth1),INTPR(finitwidth2),INTPR(fdwidth1),INTPR(fdwidth2));
  473. MCDTRI_PRINT(" %x %x %x %x %x %x",*(pdwNext+6),*(pdwNext+7),*(pdwNext+8),*(pdwNext+9),*(pdwNext+10),*(pdwNext+11));
  474. pdwColor = pdwNext+3;
  475. pdwNext += 12;
  476. }
  477. else
  478. {
  479. // nothing to draw, triangle doesn't traverse any scan lines
  480. MCDTRI_PRINT(" Early return - flat top and bottom");
  481. return;
  482. }
  483. // various integer ops to overlap with fdiv
  484. dwOpcode = pRc->dwPolyOpcode;
  485. pDest = ppdev->LL_State.pRegs + HOST_3D_DATA_PORT;
  486. pdwStart = ppdev->LL_State.pDL->pdwStartOutPtr;
  487. // do inside divide - won't slow us down unless 3D engine indeed not idle
  488. USB_TIMEOUT_FIX(ppdev)
  489. // compute 1/width, used in rgbzuv computations that follow
  490. #ifdef FASTER_RECIP_ORTHO
  491. __MCD_FLOAT_SIMPLE_END_DIVIDE(frecip_ortho);
  492. #else
  493. frecip_ortho = FLT_TYPE 1.0/fwidth;
  494. #endif
  495. PREC_1616(frecip_ortho,frecip_ortho);
  496. if (pRc->privateEnables & __MCDENABLE_SMOOTH)
  497. {
  498. // Calculate and set the color gradients, using gradients to adjust start color
  499. v1red = a->colors[0].r * pRc->rScale;
  500. v1grn = a->colors[0].g * pRc->gScale;
  501. v1blu = a->colors[0].b * pRc->bScale;
  502. ftemp = ((c->colors[0].r * pRc->rScale) - v1red) * frecip_main;
  503. *(pdwNext+0) = FTOL(ftemp);
  504. *(pdwNext+3) = FTOL(((b->colors[0].r * pRc->rScale) - (v1red + (ftemp * fv2y)) ) * frecip_ortho);
  505. // adjust v1red for start vertex's variance from vertex a
  506. *(pdwColor) = FTOL(v1red + (ftemp * fmain_adj));
  507. ftemp = ((c->colors[0].g * pRc->gScale) - v1grn) * frecip_main;
  508. *(pdwNext+1) = FTOL(ftemp);
  509. *(pdwNext+4) = FTOL(((b->colors[0].g * pRc->gScale) - (v1grn + (ftemp * fv2y)) ) * frecip_ortho);
  510. // adjust v1grn for start vertex's variance from vertex a
  511. *(pdwColor+1) = FTOL(v1grn + (ftemp * fmain_adj));
  512. ftemp = ((c->colors[0].b * pRc->bScale) - v1blu) * frecip_main;
  513. *(pdwNext+2) = FTOL(ftemp);
  514. *(pdwNext+5) = FTOL(((b->colors[0].b * pRc->bScale) - (v1blu + (ftemp * fv2y)) ) * frecip_ortho);
  515. // adjust v1blu for start vertex's variance from vertex a
  516. *(pdwColor+2) = FTOL(v1blu + (ftemp * fmain_adj));
  517. MCDTRI_PRINT(" SHADE rgbout = %x %x %x",*(pdwColor),*(pdwColor+1),*(pdwColor+2));
  518. MCDTRI_PRINT(" CSLOPES: %x %x %x %x %x %x",*pdwNext,*(pdwNext+1),*(pdwNext+2),*(pdwNext+3),*(pdwNext+4),*(pdwNext+5));
  519. pdwNext += 6;
  520. }
  521. else
  522. {
  523. MCDCOLOR *pColor = &pRc->pvProvoking->colors[0];
  524. // flat shaded - no adjustment of original colors needed
  525. *(pdwColor) = FTOL(pColor->r * pRc->rScale);
  526. *(pdwColor+1) = FTOL(pColor->g * pRc->gScale);
  527. *(pdwColor+2) = FTOL(pColor->b * pRc->bScale);
  528. MCDTRI_PRINT(" FLAT rgbout = %x %x %x",*(pdwColor),*(pdwColor+1),*(pdwColor+2));
  529. }
  530. if( pRc->privateEnables & __MCDENABLE_Z)
  531. {
  532. // "NICE" Polys for Alpha blended case - see comments above in
  533. // geometry slopes calculations
  534. // Calculate and set the Z value base and gradient using floats
  535. float fdz_main = (c->windowCoord.z - a->windowCoord.z) * frecip_main;
  536. // compute adjustment - if negative z would result, set adjust so final = 0
  537. fadjust = fdz_main * fmain_adj;
  538. if ((a->windowCoord.z + fadjust) < (float)0.0) fadjust = - a->windowCoord.z;
  539. if (pRc->MCDState.enables & MCD_POLYGON_OFFSET_FILL_ENABLE)
  540. {
  541. // APPLY Z OFFSET, and adjust for moved start vertex
  542. MCDFLOAT zOffset;
  543. if (fdz_main > 0)
  544. {
  545. zOffset = (fdz_main * pRc->MCDState.zOffsetFactor) + pRc->MCDState.zOffsetUnits;
  546. }
  547. else
  548. {
  549. zOffset = ((float)-1.0 * fdz_main * pRc->MCDState.zOffsetFactor) + pRc->MCDState.zOffsetUnits;
  550. }
  551. *(pdwNext+0) = FTOL((a->windowCoord.z + fadjust + zOffset) * FLT_TYPE 65536.0);
  552. }
  553. else
  554. {
  555. // NO Z OFFSET - just adjust for moved start vertex
  556. *(pdwNext+0) = FTOL((a->windowCoord.z + fadjust) * FLT_TYPE 65536.0);
  557. }
  558. *(pdwNext+1) = FTOL(fdz_main * FLT_TYPE 65536.0);
  559. *(pdwNext+2) = FTOL((b->windowCoord.z - a->windowCoord.z - (fdz_main * fv2y)) * FLT_TYPE 65536.0 * frecip_ortho);
  560. MCDTRI_PRINT(" Z: %x %x %x",*pdwNext,*(pdwNext+1),*(pdwNext+2));
  561. pdwNext += 3;
  562. }
  563. if (pRc->privateEnables & __MCDENABLE_TEXTURE)
  564. {
  565. if ( (pRc->privateEnables & __MCDENABLE_PERSPECTIVE) && !linear_ok )
  566. {
  567. TEXTURE_VERTEX vmin, vmid, vmax;
  568. QUADRATIC main, mid;
  569. TEXTURE_VERTEX i,imain,midmain,j,jmain;
  570. float del_u_i, del_v_i;
  571. float um,vm;
  572. float a1, a2, du_ortho_add;
  573. float b1, b2, dv_ortho_add;
  574. float sq, recip;
  575. float delta_sq, inv_sumw;
  576. float u1, v1;
  577. float frecip_del_x_mid = frecip_ortho;
  578. int tempi;
  579. vmin.u = a->texCoord.x * pRc->texture_width;
  580. vmin.v = a->texCoord.y * pRc->texture_height;
  581. vmin.w = a->windowCoord.w;
  582. vmid.x = fv2x;
  583. vmid.y = fv2y;
  584. vmid.u = b->texCoord.x * pRc->texture_width;
  585. vmid.v = b->texCoord.y * pRc->texture_height;
  586. vmid.w = b->windowCoord.w;
  587. vmax.x = fv3x;
  588. vmax.y = fv3y;
  589. vmax.u = c->texCoord.x * pRc->texture_width;
  590. vmax.v = c->texCoord.y * pRc->texture_height;
  591. vmax.w = c->windowCoord.w;
  592. // solve quadratic equation for main slope - we'll need exact u values
  593. // along main, and the a1/b1, a2/b2 terms computed are used to compute
  594. // du/v_main, d2u/v_main
  595. delta_sq = frecip_main * frecip_main;
  596. inv_sumw = (float)1.0/(vmin.w + vmax.w);
  597. u1 = (vmin.u*vmin.w + vmax.u*vmax.w) * inv_sumw;
  598. v1 = (vmin.v*vmin.w + vmax.v*vmax.w) * inv_sumw;
  599. main.a1 = (-3*vmin.u + 4*u1 - vmax.u) * frecip_main;
  600. main.a2 = 2*(vmin.u - 2*u1 + vmax.u) * delta_sq;
  601. main.b1 = (-3*vmin.v + 4*v1 - vmax.v) * frecip_main;
  602. main.b2 = 2*(vmin.v - 2*v1 + vmax.v) * delta_sq;
  603. i.y = (float)0.5 * vmid.y;
  604. recip = (float)1.0 / (vmin.w + vmid.w);
  605. i.u = ((vmin.u * vmin.w) + (vmid.u * vmid.w)) * recip;
  606. i.v = ((vmin.v * vmin.w) + (vmid.v * vmid.w)) * recip;
  607. sq = i.y * i.y;
  608. imain.u = main.a2*sq + main.a1*i.y + vmin.u;
  609. imain.v = main.b2*sq + main.b1*i.y + vmin.v;
  610. // vmid coordinates given, just need midmain
  611. sq = vmid.y * vmid.y;
  612. midmain.u = main.a2*sq + main.a1*vmid.y + vmin.u;
  613. midmain.v = main.b2*sq + main.b1*vmid.y + vmin.v;
  614. // j and jmain
  615. j.y = (float)0.5 * (vmax.y + vmid.y);
  616. recip = (float)1.0 / (vmid.w + vmax.w);
  617. j.u = ((vmid.u * vmid.w) + (vmax.u * vmax.w)) * recip;
  618. j.v = ((vmid.v * vmid.w) + (vmax.v * vmax.w)) * recip;
  619. sq = j.y * j.y;
  620. jmain.u = main.a2*sq + main.a1*j.y + vmin.u;
  621. jmain.v = main.b2*sq + main.b1*j.y + vmin.v;
  622. // compute intermediate parameters needed to calculate a1
  623. del_u_i = i.u - imain.u;
  624. del_v_i = i.v - imain.v;
  625. um = j.u - jmain.u - del_u_i;
  626. vm = j.v - jmain.v - del_v_i;
  627. frecip_del_x_mid *= (float)2.0;
  628. a1 = 2*del_u_i - (float)0.5*(vmid.u - midmain.u);
  629. a1 += (vmid.y*frecip_main)*um;
  630. a1 *= frecip_del_x_mid;
  631. a2 = frecip_del_x_mid*(del_u_i*frecip_del_x_mid - a1);
  632. du_ortho_add = 2*um*frecip_del_x_mid*frecip_main;
  633. b1 = 2*del_v_i - (float)0.5*(vmid.v - midmain.v);
  634. b1 += (vmid.y*frecip_main)*vm;
  635. b1 *= frecip_del_x_mid;
  636. b2 = frecip_del_x_mid*(del_v_i*frecip_del_x_mid - b1);
  637. dv_ortho_add = 2*vm*frecip_del_x_mid*frecip_main;
  638. // rewind a1 from i scanline to top of triangle
  639. a1 -= (i.y) * du_ortho_add;
  640. b1 -= (i.y) * dv_ortho_add;
  641. // convert to forward difference terms
  642. a1 += a2;
  643. b1 += b2;
  644. a2 = 2 * a2;
  645. b2 = 2 * b2;
  646. // compute adjustment for v start - if negative would result -> no problem
  647. fadjust = ((main.b1 + main.b2) * fmain_adj) + pRc->texture_bias;
  648. *(pdwNext+0) = fix_ieee(vmin.v + fadjust) & 0x1ffffff; // v
  649. // likewise for u
  650. fadjust = ((main.a1 + main.a2) * fmain_adj) + pRc->texture_bias;
  651. *(pdwNext+1) = fix_ieee(vmin.u + fadjust) & 0x1ffffff; // u
  652. *(pdwNext+2) = fix_ieee(main.b1 + main.b2); // dv_main
  653. *(pdwNext+3) = fix_ieee(main.a1 + main.a2); // du_main
  654. *(pdwNext+4) = fix_ieee(b1); // dv_ortho
  655. *(pdwNext+5) = fix_ieee(a1); // du_ortho
  656. #if DRIVER_5465
  657. *(pdwNext+6) = fix824_ieee(2 * main.b2); // d2v_main
  658. *(pdwNext+7) = fix824_ieee(2 * main.a2); // d2u_main
  659. *(pdwNext+8) = fix824_ieee(b2); // d2v_ortho
  660. *(pdwNext+9) = fix824_ieee(a2); // d2u_ortho
  661. *(pdwNext+10)= fix824_ieee(dv_ortho_add); // dv_ortho_add
  662. *(pdwNext+11)= fix824_ieee(du_ortho_add); // du_ortho_add
  663. #else // DRIVER_5465
  664. // before 5465, only 16 bit fraction in second order terms
  665. *(pdwNext+6) = fix_ieee(2 * main.b2); // d2v_main
  666. *(pdwNext+7) = fix_ieee(2 * main.a2); // d2u_main
  667. *(pdwNext+8) = fix_ieee(b2); // d2v_ortho
  668. *(pdwNext+9) = fix_ieee(a2); // d2u_ortho
  669. *(pdwNext+10)= fix_ieee(dv_ortho_add); // dv_ortho_add
  670. *(pdwNext+11)= fix_ieee(du_ortho_add); // du_ortho_add
  671. #endif // DRIVER_5465
  672. dwOpcode += 6; // 6 parms assumed (linear), add 6 since 12 total
  673. pdwNext += 12;
  674. }
  675. else
  676. // Linear texture mapping parametarization
  677. //
  678. {
  679. float v1_u, v1_v;
  680. float du_main, dv_main;
  681. dwOpcode &= ~LL_PERSPECTIVE; // turn persp bit off
  682. v1_v = a->texCoord.y * pRc->texture_height;
  683. v1_u = a->texCoord.x * pRc->texture_width;
  684. dv_main = ((c->texCoord.y * pRc->texture_height)- v1_v) * frecip_main;
  685. du_main = ((c->texCoord.x * pRc->texture_width) - v1_u) * frecip_main;
  686. // compute adjustment for v start - if negative would result -> no problem
  687. fadjust = (dv_main * fmain_adj) + pRc->texture_bias;
  688. *(pdwNext+0) = fix_ieee(v1_v + fadjust) & 0x1ffffff; // v
  689. // likewise for u...
  690. fadjust = (du_main * fmain_adj) + pRc->texture_bias;
  691. *(pdwNext+1) = fix_ieee(v1_u + fadjust) & 0x1ffffff; // u
  692. *(pdwNext+2) = fix_ieee(dv_main);
  693. *(pdwNext+3) = fix_ieee(du_main);
  694. // dv_ortho, du_ortho
  695. *(pdwNext+4) = fix_ieee(((b->texCoord.y * pRc->texture_height) - (v1_v + (dv_main * count1))) * frecip_ortho);
  696. *(pdwNext+5) = fix_ieee(((b->texCoord.x * pRc->texture_width) - (v1_u + (du_main * count1))) * frecip_ortho);
  697. MCDTRI_PRINT(" LINTEXT: %x %x %x %x %x %x",*pdwNext,*(pdwNext+1),*(pdwNext+2),*(pdwNext+3),*(pdwNext+4),*(pdwNext+5));
  698. pdwNext += 6;
  699. }
  700. }
  701. if (pRc->privateEnables & (__MCDENABLE_BLEND|__MCDENABLE_FOG))
  702. {
  703. float v1alp;
  704. // if CONST alpha blend, don't change alpha regs
  705. if (dwOpcode & ALPHA)
  706. {
  707. if (pRc->privateEnables & __MCDENABLE_BLEND)
  708. {
  709. if (pRc->privateEnables & __MCDENABLE_SMOOTH)
  710. {
  711. // recall that if both blending and fog active, all prims punted back to software
  712. v1alp = a->colors[0].a * pRc->aScale;
  713. ftemp = ((c->colors[0].a * pRc->aScale) - v1alp) * frecip_main;
  714. // load start alpha - adjusting for movement of start vertex from original
  715. *pdwNext = FTOL(v1alp + (ftemp * fmain_adj));
  716. // adjustment could result in negative alpha - set to 0 if so
  717. if (*pdwNext & 0x80000000) *pdwNext = 0;
  718. *(pdwNext+1) = FTOL(ftemp);
  719. *(pdwNext+2) = FTOL(((b->colors[0].a * pRc->aScale) - (v1alp + (ftemp * count1)) ) * frecip_ortho);
  720. }
  721. else
  722. {
  723. v1alp = pRc->pvProvoking->colors[0].a * pRc->aScale;
  724. // alpha constant across triangle, so no adjustment to start
  725. *(pdwNext+0) = FTOL(v1alp) & 0x00ffff00;// bits 31->24 and 7->0 reserved
  726. *(pdwNext+1) = 0;
  727. *(pdwNext+2) = 0;
  728. }
  729. }
  730. else
  731. {
  732. // FOG...
  733. v1alp = a->fog * FLT_TYPE 16777215.0; // convert from 0->1.0 val to 0->ff.ffff val
  734. ftemp = ((c->fog * FLT_TYPE 16777215.0) - v1alp) * frecip_main;
  735. // load start alpha - adjusting for movement of start vertex from original
  736. *pdwNext = FTOL(v1alp + (ftemp * fmain_adj));
  737. // adjustment could result in negative alpha - set to 0 if so
  738. if (*pdwNext & 0x80000000) *pdwNext = 0;
  739. *(pdwNext+1) = FTOL(ftemp);
  740. *(pdwNext+2) = FTOL(((b->fog * FLT_TYPE 16777215.0) - (v1alp + (ftemp * count1)) ) * frecip_ortho);
  741. }
  742. *(pdwNext+0) &= 0x00ffff00;// bits 31->24 and 7->0 reserved
  743. *(pdwNext+1) &= 0xffffff00;// bits 7->0 reserved
  744. *(pdwNext+2) &= 0xffffff00;// bits 7->0 reserved
  745. pdwNext += 3;
  746. }
  747. }
  748. *pdwOrig = dwOpcode;
  749. // output queued data here....
  750. pSrc = pdwStart;
  751. while (pSrc != pdwNext)
  752. {
  753. int len = (*pSrc & 0x3F) + 1;
  754. while( len-- ) *pDest = *pSrc++;
  755. }
  756. ppdev->LL_State.pDL->pdwNext = ppdev->LL_State.pDL->pdwStartOutPtr = pdwStart;
  757. }