Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

578 lines
22 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: fastfill.c
  3. *
  4. * Draws fast solid-coloured, unclipped, non-complex rectangles.
  5. *
  6. * Copyright (c) 1993-1995 Microsoft Corporation
  7. \**************************************************************************/
  8. #include "precomp.h"
  9. #define RIGHT 0
  10. #define LEFT 1
  11. #define FASTFILL_DBG_LEVEL 1
  12. typedef struct _EDGEDATA {
  13. LONG x; // Current x position
  14. LONG dx; // # pixels to advance x on each scan
  15. LONG lError; // Current DDA error
  16. LONG lErrorUp; // DDA error increment on each scan
  17. LONG lErrorDown; // DDA error adjustment
  18. POINTFIX* pptfx; // Points to start of current edge
  19. LONG dptfx; // Delta (in bytes) from pptfx to next point
  20. LONG cy; // Number of scans to go for this edge
  21. } EDGEDATA; /* ed, ped */
  22. /******************************Public*Routine******************************\
  23. * BOOL bMmFastFill
  24. *
  25. * Draws a non-complex, unclipped polygon. 'Non-complex' is defined as
  26. * having only two edges that are monotonic increasing in 'y'. That is,
  27. * the polygon cannot have more than one disconnected segment on any given
  28. * scan. Note that the edges of the polygon can self-intersect, so hourglass
  29. * shapes are permissible. This restriction permits this routine to run two
  30. * simultaneous DDAs, and no sorting of the edges is required.
  31. *
  32. * Note that NT's fill convention is different from that of Win 3.1 or 4.0.
  33. * With the additional complication of fractional end-points, our convention
  34. * is the same as in 'X-Windows'. But a DDA is a DDA is a DDA, so once you
  35. * figure out how we compute the DDA terms for NT, you're golden.
  36. *
  37. * This routine handles patterns only when the S3 hardware patterns can be
  38. * used. The reason for this is that once the S3 pattern initialization is
  39. * done, pattern fills appear to the programmer exactly the same as solid
  40. * fills (with the slight difference that different registers and commands
  41. * are used). Handling 'vIoFillPatSlow' style patterns in this routine
  42. * would be non-trivial...
  43. *
  44. * We take advantage of the fact that the S3 automatically advances the
  45. * current 'y' to the following scan whenever a rectangle is output so that
  46. * we have to write to the accelerator three times for every scan: one for
  47. * the new 'x', one for the new 'width', and one for the drawing command.
  48. *
  49. * This routine is in no way the ultimate convex polygon drawing routine
  50. * (what can I say, I was pressed for time when I wrote this :-). Some
  51. * obvious things that would make it faster:
  52. *
  53. * 1) Write it in Asm and amortize the FIFO checking costs (check out
  54. * i386\fastfill.asm for a version that does this).
  55. *
  56. * 2) Take advantage of any hardware such as the ATI's SCAN_TO_X
  57. * command, or any built-in trapezoid support (note that with NT
  58. * you may get non-integer end-points, so you must be able to
  59. * program the trapezoid DDA terms directly).
  60. *
  61. * 3) Do some rectangle coalescing when both edges are y-major. This
  62. * could permit removal of my vertical-edges special case. I
  63. * was also thinking of special casing y-major left edges on the
  64. * S3, because the S3 leaves the current 'x' unchanged on every blt,
  65. * so a scan that starts on the same 'x' as the one above it
  66. * would require only two commands to the accelerator (obviously,
  67. * this only helps when we're not overdriving the accelerator).
  68. *
  69. * 4) Make the non-complex polygon detection faster. If I could have
  70. * modified memory before the start of after the end of the buffer,
  71. * I could have simplified the detection code. But since I expect
  72. * this buffer to come from GDI, I can't do that. Another thing
  73. * would be to have GDI give a flag on calls that are guaranteed
  74. * to be convex, such as 'Ellipses' and 'RoundRects'. Note that
  75. * the buffer would still have to be scanned to find the top-most
  76. * point.
  77. *
  78. * 5) Special case integer points. Unfortunately, for this to be
  79. * worth-while would require GDI to give us a flag when all the
  80. * end-points of a path are integers, which it doesn't do.
  81. *
  82. * 6) Add rectangular clipping support.
  83. *
  84. * 7) Implement support for a single sub-path that spans multiple
  85. * path data records, so that we don't have to copy all the points
  86. * to a single buffer like we do in 'fillpath.c'.
  87. *
  88. * 8) Use 'ebp' and/or 'esp' as a general register in the inner loops
  89. * of the Asm loops, and also Pentium-optimize the code. It's safe
  90. * to use 'esp' on NT because it's guaranteed that no interrupts
  91. * will be taken in our thread context, and nobody else looks at the
  92. * stack pointer from our context.
  93. *
  94. * 9) Do the fill bottom-up instead of top-down. With the S3, we have
  95. * to only set 'cur_y' once because each drawing command automatically
  96. * advances 'cur_y' (unless the polygon has zero pels lit on a scan),
  97. * so we set this right at the beginning. But for an integer end-point
  98. * polygon, unless the top edge is horizontal, no pixels are lit on
  99. * that first scan (so at the beginning of almost every integer
  100. * polygon, we go through the 'zero width' logic and again set
  101. * 'cur_y'). We could avoid this extra work by building the polygon
  102. * from bottom to top: for the bottom-most point B in a polygon, it
  103. * is guaranteed that any scan with lit pixels will be no lower than
  104. * 'ceiling(B.y) - 1'. Unfortunately, building bottom-up makes the
  105. * fractional-DDA calculations a little more complex, so I didn't do it.
  106. *
  107. * Building bottom-up would also improve the polygon score in version
  108. * 3.11 of a certain benchmark, because it has a big rectangle at the
  109. * top of every polygon -- we would get better processing overlap
  110. * because we wouldn't have to wait around for the accelerator to
  111. * finish drawing the big rectangle.
  112. *
  113. * 10) Make a better guess in the initialization as to which edge is the
  114. * 'left' edge, and which is the 'right'. As it is, we immediately
  115. * go through the swap-edges logic for half of all polygons when we
  116. * start to run the DDA. The reason why I didn't implement better-guess
  117. * code is because it would have to look at the end-point of the top
  118. * edges, and to get at the end-points we have to watch that we don't
  119. * wrap around the ends of the points buffer.
  120. *
  121. * 11) Lots of other things I haven't thought of.
  122. *
  123. * NOTE: Unlike the x86 Asm version, this routine does NOT assume that it
  124. * has 16 FIFO entries available.
  125. *
  126. * Returns TRUE if the polygon was drawn; FALSE if the polygon was complex.
  127. *
  128. \**************************************************************************/
  129. BOOL bMmFastFill(
  130. PDEV* ppdev,
  131. LONG cEdges, // Includes close figure edge
  132. POINTFIX* pptfxFirst,
  133. ULONG ulHwForeMix,
  134. ULONG ulHwBackMix,
  135. ULONG iSolidColor,
  136. BRUSHOBJ* pbo)
  137. {
  138. LONG yTrapezoid; // Top scan for next trapezoid
  139. LONG cyTrapezoid; // Number of scans in current trapezoid
  140. LONG y; // Current Y Location
  141. LONG yStart; // y-position of start point in current edge
  142. LONG dM; // Edge delta in FIX units in x direction
  143. LONG dN; // Edge delta in FIX units in y direction
  144. LONG i;
  145. POINTFIX* pptfxLast; // Points to the last point in the polygon array
  146. POINTFIX* pptfxTop; // Points to the top-most point in the polygon
  147. POINTFIX* pptfxOld; // Start point in current edge
  148. POINTFIX* pptfxScan; // Current edge pointer for finding pptfxTop
  149. LONG cScanEdges; // Number of edges scanned to find pptfxTop
  150. // (doesn't include the closefigure edge)
  151. LONG iEdge;
  152. LONG lQuotient;
  153. LONG lRemainder;
  154. EDGEDATA aed[2]; // DDA terms and stuff
  155. EDGEDATA* ped;
  156. DISPDBG((FASTFILL_DBG_LEVEL,"bMmFastFill %x %x %x\n", ulHwForeMix, ulHwBackMix, ppdev->uBLTDEF << 16 | ulHwForeMix,
  157. ppdev->uBLTDEF << 16 | ulHwForeMix));
  158. REQUIRE(5);
  159. // Set up BltDef and DrawDef
  160. LL_DRAWBLTDEF(ppdev->uBLTDEF << 16 | ulHwForeMix, 2);
  161. /////////////////////////////////////////////////////////////////
  162. // See if the polygon is 'non-complex'
  163. pptfxScan = pptfxFirst;
  164. pptfxTop = pptfxFirst; // Assume for now that the first
  165. // point in path is the topmost
  166. pptfxLast = pptfxFirst + cEdges - 1;
  167. // 'pptfxScan' will always point to the first point in the current
  168. // edge, and 'cScanEdges' will the number of edges remaining, including
  169. // the current one:
  170. cScanEdges = cEdges - 1; // The number of edges, not counting close figure
  171. if ((pptfxScan + 1)->y > pptfxScan->y)
  172. {
  173. // Collect all downs:
  174. do {
  175. if (--cScanEdges == 0)
  176. goto SetUpForFilling;
  177. pptfxScan++;
  178. } while ((pptfxScan + 1)->y >= pptfxScan->y);
  179. // Collect all ups:
  180. do {
  181. if (--cScanEdges == 0)
  182. goto SetUpForFillingCheck;
  183. pptfxScan++;
  184. } while ((pptfxScan + 1)->y <= pptfxScan->y);
  185. // Collect all downs:
  186. pptfxTop = pptfxScan;
  187. do {
  188. if ((pptfxScan + 1)->y > pptfxFirst->y)
  189. break;
  190. if (--cScanEdges == 0)
  191. goto SetUpForFilling;
  192. pptfxScan++;
  193. } while ((pptfxScan + 1)->y >= pptfxScan->y);
  194. DISPDBG((FASTFILL_DBG_LEVEL,"False Exit %s %d\n", __FILE__, __LINE__));
  195. return(FALSE);
  196. }
  197. else
  198. {
  199. // Collect all ups:
  200. do {
  201. pptfxTop++; // We increment this now because we
  202. // want it to point to the very last
  203. // point if we early out in the next
  204. // statement...
  205. if (--cScanEdges == 0)
  206. goto SetUpForFilling;
  207. } while ((pptfxTop + 1)->y <= pptfxTop->y);
  208. // Collect all downs:
  209. pptfxScan = pptfxTop;
  210. do {
  211. if (--cScanEdges == 0)
  212. goto SetUpForFilling;
  213. pptfxScan++;
  214. } while ((pptfxScan + 1)->y >= pptfxScan->y);
  215. // Collect all ups:
  216. do {
  217. if ((pptfxScan + 1)->y < pptfxFirst->y)
  218. break;
  219. if (--cScanEdges == 0)
  220. goto SetUpForFilling;
  221. pptfxScan++;
  222. } while ((pptfxScan + 1)->y <= pptfxScan->y);
  223. DISPDBG((FASTFILL_DBG_LEVEL,"False Exit %s %d\n", __FILE__, __LINE__));
  224. return(FALSE);
  225. }
  226. SetUpForFillingCheck:
  227. // We check to see if the end of the current edge is higher
  228. // than the top edge we've found so far:
  229. if ((pptfxScan + 1)->y < pptfxTop->y)
  230. pptfxTop = pptfxScan + 1;
  231. SetUpForFilling:
  232. /////////////////////////////////////////////////////////////////
  233. // Some Initialization
  234. yTrapezoid = (pptfxTop->y + 15) >> 4;
  235. DISPDBG((FASTFILL_DBG_LEVEL, "%d yTrapezoid init %x\n", __LINE__, yTrapezoid));
  236. // Make sure we initialize the DDAs appropriately:
  237. aed[LEFT].cy = 0;
  238. aed[RIGHT].cy = 0;
  239. // For now, guess as to which is the left and which is the right edge:
  240. aed[LEFT].dptfx = -(LONG) sizeof(POINTFIX);
  241. aed[RIGHT].dptfx = sizeof(POINTFIX);
  242. aed[LEFT].pptfx = pptfxTop;
  243. aed[RIGHT].pptfx = pptfxTop;
  244. if (iSolidColor != -1)
  245. {
  246. /////////////////////////////////////////////////////////////////
  247. // Setup the hardware for solid colours
  248. // Let's set the Foreground Register here since they are
  249. switch (ppdev->ulBitCount)
  250. {
  251. case 8: // For 8 bpp duplicate byte 0 into bytes 1,2,3.
  252. iSolidColor = (iSolidColor & 0xFF) | (iSolidColor << 8);
  253. case 16: // For 16 bpp, duplicate the low word into the high word.
  254. iSolidColor = (iSolidColor & 0xFFFF) | (iSolidColor << 16);
  255. }
  256. DISPDBG((FASTFILL_DBG_LEVEL,"FASTFILL: Set Color %x.\n", iSolidColor));
  257. LL_BGCOLOR(iSolidColor, 2);
  258. }
  259. else
  260. {
  261. /////////////////////////////////////////////////////////////////
  262. // Setup for patterns
  263. }
  264. y = yTrapezoid;
  265. DISPDBG((FASTFILL_DBG_LEVEL, "%d New y %x\n", __LINE__, y));
  266. // done above REQUIRE(1);
  267. LL16(grOP0_opRDRAM.pt.Y, y + ppdev->ptlOffset.y);
  268. NewTrapezoid:
  269. /////////////////////////////////////////////////////////////////
  270. // DDA initialization
  271. for (iEdge = 1; iEdge >= 0; iEdge--)
  272. {
  273. ped = &aed[iEdge];
  274. if (ped->cy == 0)
  275. {
  276. // Need a new DDA:
  277. do {
  278. cEdges--;
  279. if (cEdges < 0)
  280. {
  281. DISPDBG((FASTFILL_DBG_LEVEL,"True Exit %s %d\n", __FILE__, __LINE__));
  282. return(TRUE);
  283. }
  284. // Find the next left edge, accounting for wrapping:
  285. pptfxOld = ped->pptfx;
  286. ped->pptfx = (POINTFIX*) ((BYTE*) ped->pptfx + ped->dptfx);
  287. if (ped->pptfx < pptfxFirst)
  288. ped->pptfx = pptfxLast;
  289. else if (ped->pptfx > pptfxLast)
  290. ped->pptfx = pptfxFirst;
  291. // Have to find the edge that spans yTrapezoid:
  292. ped->cy = ((ped->pptfx->y + 15) >> 4) - yTrapezoid;
  293. // With fractional coordinate end points, we may get edges
  294. // that don't cross any scans, in which case we try the
  295. // next one:
  296. } while (ped->cy <= 0);
  297. // 'pptfx' now points to the end point of the edge spanning
  298. // the scan 'yTrapezoid'.
  299. dN = ped->pptfx->y - pptfxOld->y;
  300. dM = ped->pptfx->x - pptfxOld->x;
  301. ASSERTDD(dN > 0, "Should be going down only");
  302. // Compute the DDA increment terms:
  303. if (dM < 0)
  304. {
  305. dM = -dM;
  306. if (dM < dN) // Can't be '<='
  307. {
  308. ped->dx = -1;
  309. ped->lErrorUp = dN - dM;
  310. }
  311. else
  312. {
  313. QUOTIENT_REMAINDER(dM, dN, lQuotient, lRemainder);
  314. ped->dx = -lQuotient; // - dM / dN
  315. ped->lErrorUp = lRemainder; // dM % dN
  316. if (ped->lErrorUp > 0)
  317. {
  318. ped->dx--;
  319. ped->lErrorUp = dN - ped->lErrorUp;
  320. }
  321. }
  322. }
  323. else
  324. {
  325. if (dM < dN) // Can't be '<='
  326. {
  327. ped->dx = 0;
  328. ped->lErrorUp = dM;
  329. }
  330. else
  331. {
  332. QUOTIENT_REMAINDER(dM, dN, lQuotient, lRemainder);
  333. ped->dx = lQuotient; // dM / dN
  334. ped->lErrorUp = lRemainder; // dM % dN
  335. }
  336. }
  337. ped->lErrorDown = dN; // DDA limit
  338. ped->lError = -1; // Error is initially zero (add dN - 1 for
  339. // the ceiling, but subtract off dN so that
  340. // we can check the sign instead of comparing
  341. // to dN)
  342. ped->x = pptfxOld->x;
  343. yStart = pptfxOld->y;
  344. if ((yStart & 15) != 0)
  345. {
  346. // Advance to the next integer y coordinate
  347. for (i = 16 - (yStart & 15); i != 0; i--)
  348. {
  349. ped->x += ped->dx;
  350. ped->lError += ped->lErrorUp;
  351. if (ped->lError >= 0)
  352. {
  353. ped->lError -= ped->lErrorDown;
  354. ped->x++;
  355. }
  356. }
  357. }
  358. if ((ped->x & 15) != 0)
  359. {
  360. ped->lError -= ped->lErrorDown * (16 - (ped->x & 15));
  361. ped->x += 15; // We'll want the ceiling in just a bit...
  362. }
  363. // Chop off those fractional bits:
  364. ped->x >>= 4;
  365. ped->lError >>= 4;
  366. }
  367. }
  368. cyTrapezoid = min(aed[LEFT].cy, aed[RIGHT].cy); // # of scans in this trap
  369. DISPDBG((FASTFILL_DBG_LEVEL, "%d cyTrapezoid = %d\n",
  370. __LINE__, cyTrapezoid));
  371. aed[LEFT].cy -= cyTrapezoid;
  372. aed[RIGHT].cy -= cyTrapezoid;
  373. yTrapezoid += cyTrapezoid; // Top scan in next trap
  374. // If the left and right edges are vertical, simply output as
  375. // a rectangle:
  376. if (((aed[LEFT].lErrorUp | aed[RIGHT].lErrorUp) == 0) &&
  377. ((aed[LEFT].dx | aed[RIGHT].dx) == 0) &&
  378. (cyTrapezoid > 1))
  379. {
  380. LONG lWidth;
  381. /////////////////////////////////////////////////////////////////
  382. // Vertical-edge special case
  383. ContinueVertical:
  384. lWidth = aed[RIGHT].x - aed[LEFT].x - 1;
  385. DISPDBG((FASTFILL_DBG_LEVEL, "%d lWidth %x %x %x cyTrapezoid %x \n",
  386. __LINE__, lWidth, aed[RIGHT].x, aed[LEFT].x, cyTrapezoid));
  387. if (lWidth >= 0)
  388. {
  389. DISPDBG((FASTFILL_DBG_LEVEL,"%d New x %x\n",__LINE__, aed[LEFT].x));
  390. REQUIRE(5);
  391. // REQUIRE(4);
  392. LL16(grOP0_opRDRAM.pt.X, aed[LEFT].x + ppdev->ptlOffset.x);
  393. LL_BLTEXT(lWidth + 1, cyTrapezoid);
  394. DISPDBG((FASTFILL_DBG_LEVEL, "DO a Blt %x\n",(cyTrapezoid << 16) | (lWidth + 1)));
  395. y += cyTrapezoid;
  396. DISPDBG((FASTFILL_DBG_LEVEL,"%d New y %x\n", __LINE__, y));
  397. LL16(grOP0_opRDRAM.pt.Y, y + ppdev->ptlOffset.y);
  398. }
  399. else if (lWidth == -1)
  400. {
  401. // If the rectangle was too thin to light any pels, we still
  402. // have to advance the y current position:
  403. y = yTrapezoid - cyTrapezoid + 1;
  404. DISPDBG((FASTFILL_DBG_LEVEL, "%d New y %x yTrap %x cyTrap %x\n",
  405. __LINE__, y, yTrapezoid, cyTrapezoid));
  406. REQUIRE(1);
  407. LL16(grOP0_opRDRAM.pt.Y, y + ppdev->ptlOffset.y);
  408. }
  409. else
  410. {
  411. LONG lTmp;
  412. POINTFIX* pptfxTmp;
  413. SWAP(aed[LEFT].x, aed[RIGHT].x, lTmp);
  414. SWAP(aed[LEFT].cy, aed[RIGHT].cy, lTmp);
  415. SWAP(aed[LEFT].dptfx, aed[RIGHT].dptfx, lTmp);
  416. SWAP(aed[LEFT].pptfx, aed[RIGHT].pptfx, pptfxTmp);
  417. goto ContinueVertical;
  418. }
  419. goto NewTrapezoid;
  420. }
  421. while (TRUE)
  422. {
  423. LONG lWidth;
  424. /////////////////////////////////////////////////////////////////
  425. // Run the DDAs
  426. // The very first time through, make sure we set x:
  427. lWidth = aed[RIGHT].x - aed[LEFT].x - 1;
  428. if (lWidth >= 0)
  429. {
  430. DISPDBG((FASTFILL_DBG_LEVEL,"%d New x %x\n", __LINE__, aed[LEFT].x));
  431. REQUIRE(5);
  432. // REQUIRE(4);
  433. LL16(grOP0_opRDRAM.pt.X, aed[LEFT].x + ppdev->ptlOffset.x);
  434. LL_BLTEXT(lWidth + 1, 1);
  435. DISPDBG((FASTFILL_DBG_LEVEL,"%d New y %x\n", __LINE__, y+1));
  436. LL16(grOP0_opRDRAM.pt.Y, ++y + ppdev->ptlOffset.y);
  437. ContinueAfterZero:
  438. // Advance the right wall:
  439. aed[RIGHT].x += aed[RIGHT].dx;
  440. aed[RIGHT].lError += aed[RIGHT].lErrorUp;
  441. if (aed[RIGHT].lError >= 0)
  442. {
  443. aed[RIGHT].lError -= aed[RIGHT].lErrorDown;
  444. aed[RIGHT].x++;
  445. }
  446. // Advance the left wall:
  447. aed[LEFT].x += aed[LEFT].dx;
  448. aed[LEFT].lError += aed[LEFT].lErrorUp;
  449. if (aed[LEFT].lError >= 0)
  450. {
  451. aed[LEFT].lError -= aed[LEFT].lErrorDown;
  452. aed[LEFT].x++;
  453. }
  454. cyTrapezoid--;
  455. if (cyTrapezoid == 0)
  456. goto NewTrapezoid;
  457. }
  458. else if (lWidth == -1)
  459. {
  460. y = yTrapezoid - cyTrapezoid + 1;
  461. DISPDBG((FASTFILL_DBG_LEVEL, "%d New y %x yTrap %x cyTrap %x\n",
  462. __LINE__, y, yTrapezoid, cyTrapezoid));
  463. REQUIRE(1);
  464. LL16(grOP0_opRDRAM.pt.Y, (y + ppdev->ptlOffset.y) );
  465. goto ContinueAfterZero;
  466. }
  467. else
  468. {
  469. // We certainly don't want to optimize for this case because we
  470. // should rarely get self-intersecting polygons (if we're slow,
  471. // the app gets what it deserves):
  472. LONG lTmp;
  473. POINTFIX* pptfxTmp;
  474. SWAP(aed[LEFT].x, aed[RIGHT].x, lTmp);
  475. SWAP(aed[LEFT].dx, aed[RIGHT].dx, lTmp);
  476. SWAP(aed[LEFT].lError, aed[RIGHT].lError, lTmp);
  477. SWAP(aed[LEFT].lErrorUp, aed[RIGHT].lErrorUp, lTmp);
  478. SWAP(aed[LEFT].lErrorDown, aed[RIGHT].lErrorDown, lTmp);
  479. SWAP(aed[LEFT].cy, aed[RIGHT].cy, lTmp);
  480. SWAP(aed[LEFT].dptfx, aed[RIGHT].dptfx, lTmp);
  481. SWAP(aed[LEFT].pptfx, aed[RIGHT].pptfx, pptfxTmp);
  482. continue;
  483. }
  484. }
  485. DISPDBG((FASTFILL_DBG_LEVEL,"Eof Exit %s %d\n", __FILE__, __LINE__));
  486. }