Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1824 lines
63 KiB

  1. #include "precomp.h"
  2. ///////////////////////////////////////////////////////////////////////
  3. // We have to be careful of arithmetic overflow in a number of places.
  4. // Fortunately, the compiler is guaranteed to natively support 64-bit
  5. // signed LONGLONGs and 64-bit unsigned DWORDLONGs.
  6. //
  7. // UUInt32x32To64(a, b) is a macro defined in 'winnt.h' that multiplies
  8. // two 32-bit ULONGs to produce a 64-bit DWORDLONG result.
  9. //
  10. // UInt64By32To32 is our own macro to divide a 64-bit DWORDLONG by
  11. // a 32-bit ULONG to produce a 32-bit ULONG result.
  12. //
  13. // UInt64Mod32To32 is our own macro to modulus a 64-bit DWORDLONG by
  14. // a 32-bit ULONG to produce a 32-bit ULONG result.
  15. //
  16. // 64 bit divides are usually very expensive. Since it's very rare
  17. // that we'll get lines where the upper 32 bits of the 64 bit result
  18. // are used, we can almost always use 32-bit ULONG divides. We still
  19. // must correctly handle the larger cases:
  20. #define UInt64Div32To32(a, b) \
  21. ((((DWORDLONG)(a)) > ULONG_MAX) ? \
  22. (ULONG)((DWORDLONG)(a) / (ULONG)(b)) : \
  23. (ULONG)((ULONG)(a) / (ULONG)(b)))
  24. #define UInt64Mod32To32(a, b) \
  25. ((((DWORDLONG)(a)) > ULONG_MAX) ? \
  26. (ULONG)((DWORDLONG)(a) % (ULONG)(b)) : \
  27. (ULONG)((ULONG)(a) % (ULONG)(b)))
  28. #define SWAPL(x,y,t) {t = x; x = y; y = t;}
  29. FLONG gaflRound[] = {
  30. FL_H_ROUND_DOWN | FL_V_ROUND_DOWN, // no flips
  31. FL_H_ROUND_DOWN | FL_V_ROUND_DOWN, // FL_FLIP_D
  32. FL_H_ROUND_DOWN, // FL_FLIP_V
  33. FL_V_ROUND_DOWN, // FL_FLIP_V | FL_FLIP_D
  34. FL_V_ROUND_DOWN, // FL_FLIP_SLOPE_ONE
  35. 0xbaadf00d, // FL_FLIP_SLOPE_ONE | FL_FLIP_D
  36. FL_H_ROUND_DOWN, // FL_FLIP_SLOPE_ONE | FL_FLIP_V
  37. 0xbaadf00d // FL_FLIP_SLOPE_ONE | FL_FLIP_V | FL_FLIP_D
  38. };
  39. BOOL bIntegerLine(PDEV*, ULONG, ULONG, ULONG, ULONG);
  40. BOOL bHardwareLine(PDEV*, POINTFIX*, POINTFIX*);
  41. /******************************Public*Routine******************************\
  42. * BOOL bLines(ppdev, pptfxFirst, pptfxBuf, cptfx, pls,
  43. * prclClip, apfn[], flStart)
  44. *
  45. * Computes the DDA for the line and gets ready to draw it. Puts the
  46. * pixel data into an array of strips, and calls a strip routine to
  47. * do the actual drawing.
  48. *
  49. * Doing NT Lines Right
  50. * --------------------
  51. *
  52. * In NT, all lines are given to the device driver in fractional
  53. * coordinates, in a 28.4 fixed point format. The lower 4 bits are
  54. * fractional for sub-pixel positioning.
  55. *
  56. * Note that you CANNOT! just round the coordinates to integers
  57. * and pass the results to your favorite integer Bresenham routine!!
  58. * (Unless, of course, you have such a high resolution device that
  59. * nobody will notice -- not likely for a display device.) The
  60. * fractions give a more accurate rendering of the line -- this is
  61. * important for things like our Bezier curves, which would have 'kinks'
  62. * if the points in its polyline approximation were rounded to integers.
  63. *
  64. * Unfortunately, for fractional lines there is more setup work to do
  65. * a DDA than for integer lines. However, the main loop is exactly
  66. * the same (and can be done entirely with 32 bit math).
  67. *
  68. * If You've Got Hardware That Does Bresenham
  69. * ------------------------------------------
  70. *
  71. * A lot of hardware limits DDA error terms to 'n' bits. With fractional
  72. * coordinates, 4 bits are given to the fractional part, letting
  73. * you draw in hardware only those lines that lie entirely in a 2^(n-4)
  74. * by 2^(n-4) pixel space.
  75. *
  76. * And you still have to correctly draw those lines with coordinates
  77. * outside that space! Remember that the screen is only a viewport
  78. * onto a 28.4 by 28.4 space -- if any part of the line is visible
  79. * you MUST render it precisely, regardless of where the end points lie.
  80. * So even if you do it in software, somewhere you'll have to have a
  81. * 32 bit DDA routine.
  82. *
  83. * Our Implementation
  84. * ------------------
  85. *
  86. * We employ a run length slice algorithm: our DDA calculates the
  87. * number of pixels that are in each row (or 'strip') of pixels.
  88. *
  89. * We've separated the running of the DDA and the drawing of pixels:
  90. * we run the DDA for several iterations and store the results in
  91. * a 'strip' buffer (which are the lengths of consecutive pixel rows of
  92. * the line), then we crank up a 'strip drawer' that will draw all the
  93. * strips in the buffer.
  94. *
  95. * We also employ a 'half-flip' to reduce the number of strip
  96. * iterations we need to do in the DDA and strip drawing loops: when a
  97. * (normalized) line's slope is more than 1/2, we do a final flip
  98. * about the line y = (1/2)x. So now, instead of each strip being
  99. * consecutive horizontal or vertical pixel rows, each strip is composed
  100. * of those pixels aligned in 45 degree rows. So a line like (0, 0) to
  101. * (128, 128) would generate only one strip.
  102. *
  103. * We also always draw only left-to-right.
  104. *
  105. * Styled lines may have arbitrary style patterns. We specially
  106. * optimize the default patterns (and call them 'masked' styles).
  107. *
  108. * The DDA Derivation
  109. * ------------------
  110. *
  111. * Here is how I like to think of the DDA calculation.
  112. *
  113. * We employ Knuth's "diamond rule": rendering a one-pixel-wide line
  114. * can be thought of as dragging a one-pixel-wide by one-pixel-high
  115. * diamond along the true line. Pixel centers lie on the integer
  116. * coordinates, and so we light any pixel whose center gets covered
  117. * by the "drag" region (John D. Hobby, Journal of the Association
  118. * for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).
  119. *
  120. * We must define which pixel gets lit when the true line falls
  121. * exactly half-way between two pixels. In this case, we follow
  122. * the rule: when two pels are equidistant, the upper or left pel
  123. * is illuminated, unless the slope is exactly one, in which case
  124. * the upper or right pel is illuminated. (So we make the edges
  125. * of the diamond exclusive, except for the top and left vertices,
  126. * which are inclusive, unless we have slope one.)
  127. *
  128. * This metric decides what pixels should be on any line BEFORE it is
  129. * flipped around for our calculation. Having a consistent metric
  130. * this way will let our lines blend nicely with our curves. The
  131. * metric also dictates that we will never have one pixel turned on
  132. * directly above another that's turned on. We will also never have
  133. * a gap; i.e., there will be exactly one pixel turned on for each
  134. * column between the start and end points. All that remains to be
  135. * done is to decide how many pixels should be turned on for each row.
  136. *
  137. * So lines we draw will consist of varying numbers of pixels on
  138. * successive rows, for example:
  139. *
  140. * ******
  141. * *****
  142. * ******
  143. * *****
  144. *
  145. * We'll call each set of pixels on a row a "strip".
  146. *
  147. * (Please remember that our coordinate space has the origin as the
  148. * upper left pixel on the screen; postive y is down and positive x
  149. * is right.)
  150. *
  151. * Device coordinates are specified as fixed point 28.4 numbers,
  152. * where the first 28 bits are the integer coordinate, and the last
  153. * 4 bits are the fraction. So coordinates may be thought of as
  154. * having the form (x, y) = (M/F, N/F) where F is the constant scaling
  155. * factor F = 2^4 = 16, and M and N are 32 bit integers.
  156. *
  157. * Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs
  158. * left-to-right and whose slope is in the first octant, and let
  159. * dM = M1 - M0 and dN = N1 - N0. Then dM >= 0, dN >= 0 and dM >= dN.
  160. *
  161. * Since the slope of the line is less than 1, the edges of the
  162. * drag region are created by the top and bottom vertices of the
  163. * diamond. At any given pixel row y of the line, we light those
  164. * pixels whose centers are between the left and right edges.
  165. *
  166. * Let mL(n) denote the line representing the left edge of the drag
  167. * region. On pixel row j, the column of the first pixel to be
  168. * lit is
  169. *
  170. * iL(j) = ceiling( mL(j * F) / F)
  171. *
  172. * Since the line's slope is less than one:
  173. *
  174. * iL(j) = ceiling( mL([j + 1/2] F) / F )
  175. *
  176. * Recall the formula for our line:
  177. *
  178. * n(m) = (dN / dM) (m - M0) + N0
  179. *
  180. * m(n) = (dM / dN) (n - N0) + M0
  181. *
  182. * Since the line's slope is less than one, the line representing
  183. * the left edge of the drag region is the original line offset
  184. * by 1/2 pixel in the y direction:
  185. *
  186. * mL(n) = (dM / dN) (n - F/2 - N0) + M0
  187. *
  188. * From this we can figure out the column of the first pixel that
  189. * will be lit on row j, being careful of rounding (if the left
  190. * edge lands exactly on an integer point, the pixel at that
  191. * point is not lit because of our rounding convention):
  192. *
  193. * iL(j) = floor( mL(j F) / F ) + 1
  194. *
  195. * = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1
  196. *
  197. * = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1
  198. *
  199. * F dM j - [ dM (N0 + F/2) - dN M0 ]
  200. * = floor( ---------------------------------- ) + 1
  201. * F dN
  202. *
  203. * dM j - [ dM (N0 + F/2) - dN M0 ] / F
  204. * = floor( ------------------------------------ ) + 1 (1)
  205. * dN
  206. *
  207. * = floor( (dM j + alpha) / dN ) + 1
  208. *
  209. * where
  210. *
  211. * alpha = - [ dM (N0 + F/2) - dN M0 ] / F
  212. *
  213. * We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)
  214. * pixels in row j. Because we are always calculating iL(j) for
  215. * integer quantities of j, we note that the only fractional term
  216. * is constant, and so we can 'throw away' the fractional bits of
  217. * alpha:
  218. *
  219. * beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F ) (2)
  220. *
  221. * so
  222. *
  223. * iL(j) = floor( (dM j + beta) / dN ) + 1 (3)
  224. *
  225. * for integers j.
  226. *
  227. * Note if iR(j) is the line's rightmost pixel on row j, that
  228. * iR(j) = iL(j + 1) - 1.
  229. *
  230. * Similarly, rewriting equation (1) as a function of column i,
  231. * we can determine, given column i, on which pixel row j is the line
  232. * lit:
  233. *
  234. * dN i + [ dM (N0 + F/2) - dN M0 ] / F
  235. * j(i) = ceiling( ------------------------------------ ) - 1
  236. * dM
  237. *
  238. * Floors are easier to compute, so we can rewrite this:
  239. *
  240. * dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F
  241. * j(i) = floor( ----------------------------------------------- ) - 1
  242. * dM
  243. *
  244. * dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM
  245. * = floor( ---------------------------------------------------- )
  246. * dM
  247. *
  248. * dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F
  249. * = floor( ---------------------------------------- )
  250. * dM
  251. *
  252. * We can once again wave our hands and throw away the fractional bits
  253. * of the remainder term:
  254. *
  255. * j(i) = floor( (dN i + gamma) / dM ) (4)
  256. *
  257. * where
  258. *
  259. * gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F ) (5)
  260. *
  261. * We now note that
  262. *
  263. * beta = -gamma - 1 = ~gamma (6)
  264. *
  265. * To draw the pixels of the line, we could evaluate (3) on every scan
  266. * line to determine where the strip starts. Of course, we don't want
  267. * to do that because that would involve a multiply and divide for every
  268. * scan. So we do everything incrementally.
  269. *
  270. * We would like to easily compute c , the number of pixels on scan j:
  271. * j
  272. *
  273. * c = iL(j + 1) - iL(j)
  274. * j
  275. *
  276. * = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN) (7)
  277. *
  278. * This may be rewritten as
  279. *
  280. * c = floor(i + r / dN) - floor(i + r / dN) (8)
  281. * j j+1 j+1 j j
  282. *
  283. * where i , i are integers and r < dN, r < dN.
  284. * j j+1 j j+1
  285. *
  286. * Rewriting (7) again:
  287. *
  288. * c = floor(i + r / dN + dM / dN) - floor(i + r / dN)
  289. * j j j j j
  290. *
  291. *
  292. * = floor((r + dM) / dN) - floor(r / dN)
  293. * j j
  294. *
  295. * This may be rewritten as
  296. *
  297. * c = dI + floor((r + dR) / dN) - floor(r / dN)
  298. * j j j
  299. *
  300. * where dI + dR / dN = dM / dN, dI is an integer and dR < dN.
  301. *
  302. * r is the remainder (or "error") term in the DDA loop: r / dN
  303. * j j
  304. * is the exact fraction of a pixel at which the strip ends. To go
  305. * on to the next scan and compute c we need to know r .
  306. * j+1 j+1
  307. *
  308. * So in the main loop of the DDA:
  309. *
  310. * c = dI + floor((r + dR) / dN) and r = (r + dR) % dN
  311. * j j j+1 j
  312. *
  313. * and we know r < dN, r < dN, and dR < dN.
  314. * j j+1
  315. *
  316. * We have derived the DDA only for lines in the first octant; to
  317. * handle other octants we do the common trick of flipping the line
  318. * to the first octant by first making the line left-to-right by
  319. * exchanging the end-points, then flipping about the lines y = 0 and
  320. * y = x, as necessary. We must record the transformation so we can
  321. * undo them later.
  322. *
  323. * We must also be careful of how the flips affect our rounding. If
  324. * to get the line to the first octant we flipped about x = 0, we now
  325. * have to be careful to round a y value of 1/2 up instead of down as
  326. * we would for a line originally in the first octant (recall that
  327. * "In the case where two pels are equidistant, the upper or left
  328. * pel is illuminated...").
  329. *
  330. * To account for this rounding when running the DDA, we shift the line
  331. * (or not) in the y direction by the smallest amount possible. That
  332. * takes care of rounding for the DDA, but we still have to be careful
  333. * about the rounding when determining the first and last pixels to be
  334. * lit in the line.
  335. *
  336. * Determining The First And Last Pixels In The Line
  337. * -------------------------------------------------
  338. *
  339. * Fractional coordinates also make it harder to determine which pixels
  340. * will be the first and last ones in the line. We've already taken
  341. * the fractional coordinates into account in calculating the DDA, but
  342. * the DDA cannot tell us which are the end pixels because it is quite
  343. * happy to calculate pixels on the line from minus infinity to positive
  344. * infinity.
  345. *
  346. * The diamond rule determines the start and end pixels. (Recall that
  347. * the sides are exclusive except for the left and top vertices.)
  348. * This convention can be thought of in another way: there are diamonds
  349. * around the pixels, and wherever the true line crosses a diamond,
  350. * that pel is illuminated.
  351. *
  352. * Consider a line where we've done the flips to the first octant, and the
  353. * floor of the start coordinates is the origin:
  354. *
  355. * +-----------------------> +x
  356. * |
  357. * | 0 1
  358. * | 0123456789abcdef
  359. * |
  360. * | 0 00000000?1111111
  361. * | 1 00000000 1111111
  362. * | 2 0000000 111111
  363. * | 3 000000 11111
  364. * | 4 00000 ** 1111
  365. * | 5 0000 ****1
  366. * | 6 000 1***
  367. * | 7 00 1 ****
  368. * | 8 ? ***
  369. * | 9 22 3 ****
  370. * | a 222 33 ***
  371. * | b 2222 333 ****
  372. * | c 22222 3333 **
  373. * | d 222222 33333
  374. * | e 2222222 333333
  375. * | f 22222222 3333333
  376. * |
  377. * | 2 3
  378. * v
  379. * +y
  380. *
  381. * If the start of the line lands on the diamond around pixel 0 (shown by
  382. * the '0' region here), pixel 0 is the first pel in the line. The same
  383. * is true for the other pels.
  384. *
  385. * A little more work has to be done if the line starts in the
  386. * 'nether-land' between the diamonds (as illustrated by the '*' line):
  387. * the first pel lit is the first diamond crossed by the line (pixel 1 in
  388. * our example). This calculation is determined by the DDA or slope of
  389. * the line.
  390. *
  391. * If the line starts exactly half way between two adjacent pixels
  392. * (denoted here by the '?' spots), the first pixel is determined by our
  393. * round-down convention (and is dependent on the flips done to
  394. * normalize the line).
  395. *
  396. * Last Pel Exclusive
  397. * ------------------
  398. *
  399. * To eliminate repeatedly lit pels between continuous connected lines,
  400. * we employ a last-pel exclusive convention: if the line ends exactly on
  401. * the diamond around a pel, that pel is not lit. (This eliminates the
  402. * checks we had in the old code to see if we were re-lighting pels.)
  403. *
  404. * The Half Flip
  405. * -------------
  406. *
  407. * To make our run length algorithm more efficient, we employ a "half
  408. * flip". If after normalizing to the first octant, the slope is more
  409. * than 1/2, we subtract the y coordinate from the x coordinate. This
  410. * has the effect of reflecting the coordinates through the line of slope
  411. * 1/2. Note that the diagonal gets mapped into the x-axis after a half
  412. * flip.
  413. *
  414. * How Many Bits Do We Need, Anyway?
  415. * ---------------------------------
  416. *
  417. * Note that if the line is visible on your screen, you must light up
  418. * exactly the correct pixels, no matter where in the 28.4 x 28.4 device
  419. * space the end points of the line lie (meaning you must handle 32 bit
  420. * DDAs, you can certainly have optimized cases for lesser DDAs).
  421. *
  422. * We move the origin to (floor(M0 / F), floor(N0 / F)), so when we
  423. * calculate gamma from (5), we know that 0 <= M0, N0 < F. And we
  424. * are in the first octant, so dM >= dN. Then we know that gamma can
  425. * be in the range [(-1/2)dM, (3/2)dM]. The DDI guarantees us that
  426. * valid lines will have dM and dN values at most 31 bits (unsigned)
  427. * of significance. So gamma requires 33 bits of significance (we store
  428. * this as a 64 bit number for convenience).
  429. *
  430. * When running through the DDA loop, r + dR can have a value in the
  431. * j
  432. * range 0 <= r < 2 dN; thus the result must be a 32 bit unsigned value.
  433. * j
  434. *
  435. * Testing Lines
  436. * -------------
  437. *
  438. * To be NT compliant, a display driver must exactly adhere to GIQ,
  439. * which means that for any given line, the driver must light exactly
  440. * the same pels as does GDI. This can be tested using the Guiman tool
  441. * provided elsewhere in the DDK, and 'ZTest', which draws random lines
  442. * on the screen and to a bitmap, and compares the results.
  443. *
  444. * If You've Got Line Hardware
  445. * ---------------------------
  446. *
  447. * If your hardware already adheres to GIQ, you're all set. Otherwise
  448. * you'll want to look at the S3 sample code and read the following:
  449. *
  450. * 1) You'll want to special case integer-only lines, since they require
  451. * less processing time and are more common (CAD programs will probably
  452. * only ever give integer lines). GDI does not provide a flag saying
  453. * that all lines in a path are integer lines; consequently, you will
  454. * have to explicitly check every line.
  455. *
  456. * 2) You are required to correctly draw any line in the 28.4 device
  457. * space that intersects the viewport. If you have less than 32 bits
  458. * of significance in the hardware for the Bresenham terms, extremely
  459. * long lines would overflow the hardware. For such (rare) cases, you
  460. * can fall back to strip-drawing code (or if your display is a frame
  461. * buffer, fall back to the engine).
  462. *
  463. * 3) If you can explicitly set the Bresenham terms in your hardware, you
  464. * can draw non-integer lines using the hardware. If your hardware has
  465. * 'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)
  466. * pels long (4 bits are required for the fractional part, and one bit is
  467. * used as a sign bit). Note that integer lines don't require the 4
  468. * fractional bits, so if you special case them as in 1), you can do
  469. * integer lines that are up to 2^(n - 1) pels long. See the
  470. * 'bHardwareLine' routine for an example.
  471. *
  472. \**************************************************************************/
  473. BOOL bLines(
  474. PDEV* ppdev,
  475. POINTFIX* pptfxFirst, // Start of first line
  476. POINTFIX* pptfxBuf, // Pointer to buffer of all remaining lines
  477. RUN* prun, // Pointer to runs if doing complex clipping
  478. ULONG cptfx, // Number of points in pptfxBuf or number of runs
  479. // in prun
  480. LINESTATE* pls, // Colour and style info
  481. RECTL* prclClip, // Pointer to clip rectangle if doing simple clipping
  482. PFNSTRIP apfn[], // Array of strip functions
  483. FLONG flStart) // Flags for each line, which is a combination of:
  484. // FL_SIMPLE_CLIP
  485. // FL_COMPLEX_CLIP
  486. // FL_STYLED
  487. // FL_LAST_PEL_INCLUSIVE
  488. // - Should be set only for all integer lines,
  489. // and can't be used with FL_COMPLEX_CLIP
  490. {
  491. ULONG M0;
  492. ULONG dM;
  493. ULONG N0;
  494. ULONG dN;
  495. ULONG dN_Original;
  496. FLONG fl;
  497. LONG x;
  498. LONG y;
  499. LONGLONG llBeta;
  500. LONGLONG llGamma;
  501. LONGLONG dl;
  502. LONGLONG ll;
  503. ULONG ulDelta;
  504. ULONG x0;
  505. ULONG y0;
  506. ULONG x1;
  507. ULONG cStylePels; // Major length of line in pixels for styling
  508. ULONG xStart;
  509. POINTL ptlStart;
  510. STRIP strip;
  511. PFNSTRIP pfn;
  512. LONG cPels;
  513. LONG* plStrip;
  514. LONG* plStripEnd;
  515. LONG cStripsInNextRun;
  516. POINTFIX* pptfxBufEnd = pptfxBuf + cptfx; // Last point in path record
  517. STYLEPOS spThis; // Style pos for this line
  518. // Make PREfast happy. We're just initializing to zero variables
  519. // PREfast is complaining about, still leaving in place risky code.
  520. // The reason: this is a legacy driver which works fine now and it
  521. // should be removed from the product relatively soon.
  522. x = 0;
  523. y = 0;
  524. dN_Original = 0;
  525. llBeta = 0;
  526. llGamma = 0;
  527. cStylePels = 0;
  528. xStart = 0;
  529. do {
  530. /***********************************************************************\
  531. * Start the DDA calculations. *
  532. \***********************************************************************/
  533. M0 = (LONG) pptfxFirst->x;
  534. dM = (LONG) pptfxBuf->x;
  535. N0 = (LONG) pptfxFirst->y;
  536. dN = (LONG) pptfxBuf->y;
  537. fl = flStart;
  538. // Check for non-clipped, non-styled integer endpoint lines
  539. if ((fl & (FL_CLIP | FL_STYLED)) == 0)
  540. {
  541. // Special-case integer end-point lines:
  542. #ifdef S3
  543. #if !defined(i386)
  544. if (((M0 | dM | N0 | dN) & (F - 1)) == 0)
  545. {
  546. if (bIntegerLine(ppdev, M0, N0, dM, dN))
  547. {
  548. goto Next_Line;
  549. }
  550. }
  551. else
  552. #endif
  553. #endif
  554. // Check for fractional endpoint lines that are small enough
  555. // to use the hardware DDA:
  556. #ifdef S3
  557. if (bHardwareLine(ppdev, pptfxFirst, pptfxBuf))
  558. {
  559. goto Next_Line;
  560. }
  561. #endif
  562. }
  563. if ((LONG) M0 > (LONG) dM)
  564. {
  565. // Ensure that we run left-to-right:
  566. register ULONG ulTmp;
  567. SWAPL(M0, dM, ulTmp);
  568. SWAPL(N0, dN, ulTmp);
  569. fl |= FL_FLIP_H;
  570. }
  571. // Compute the delta dx. The DDI says we can never have a valid delta
  572. // with a magnitued more than 2^31 - 1, but GDI never actually checks
  573. // its transforms. So we have to check for this case to avoid overflow:
  574. dM -= M0;
  575. if ((LONG) dM < 0)
  576. {
  577. goto Next_Line;
  578. }
  579. if ((LONG) dN < (LONG) N0)
  580. {
  581. // Line runs from bottom to top, so flip across y = 0:
  582. N0 = -(LONG) N0;
  583. dN = -(LONG) dN;
  584. fl |= FL_FLIP_V;
  585. }
  586. dN -= N0;
  587. if ((LONG) dN < 0)
  588. {
  589. goto Next_Line;
  590. }
  591. // We now have a line running left-to-right, top-to-bottom from (M0, N0)
  592. // to (M0 + dM, N0 + dN):
  593. if (dN >= dM)
  594. {
  595. if (dN == dM)
  596. {
  597. // Have to special case slopes of one:
  598. fl |= FL_FLIP_SLOPE_ONE;
  599. }
  600. else
  601. {
  602. // Since line has slope greater than 1, flip across x = y:
  603. register ULONG ulTmp;
  604. SWAPL(dM, dN, ulTmp);
  605. SWAPL(M0, N0, ulTmp);
  606. fl |= FL_FLIP_D;
  607. }
  608. }
  609. fl |= gaflRound[(fl & FL_ROUND_MASK) >> FL_ROUND_SHIFT];
  610. x = LFLOOR((LONG) M0);
  611. y = LFLOOR((LONG) N0);
  612. M0 = FXFRAC(M0);
  613. N0 = FXFRAC(N0);
  614. // Calculate the remainder term [ dM * (N0 + F/2) - M0 * dN ]:
  615. llGamma = UInt32x32To64(dM, N0 + F/2) - UInt32x32To64(M0, dN);
  616. if (fl & FL_V_ROUND_DOWN) // Adjust so y = 1/2 rounds down
  617. {
  618. llGamma--;
  619. }
  620. llGamma >>= FLOG2;
  621. llBeta = ~llGamma;
  622. /***********************************************************************\
  623. * Figure out which pixels are at the ends of the line. *
  624. \***********************************************************************/
  625. // The toughest part of GIQ is determining the start and end pels.
  626. //
  627. // Our approach here is to calculate x0 and x1 (the inclusive start
  628. // and end columns of the line respectively, relative to our normalized
  629. // origin). Then x1 - x0 + 1 is the number of pels in the line. The
  630. // start point is easily calculated by plugging x0 into our line equation
  631. // (which takes care of whether y = 1/2 rounds up or down in value)
  632. // getting y0, and then undoing the normalizing flips to get back
  633. // into device space.
  634. //
  635. // We look at the fractional parts of the coordinates of the start and
  636. // end points, and call them (M0, N0) and (M1, N1) respectively, where
  637. // 0 <= M0, N0, M1, N1 < 16. We plot (M0, N0) on the following grid
  638. // to determine x0:
  639. //
  640. // +-----------------------> +x
  641. // |
  642. // | 0 1
  643. // | 0123456789abcdef
  644. // |
  645. // | 0 ........?xxxxxxx
  646. // | 1 ..........xxxxxx
  647. // | 2 ...........xxxxx
  648. // | 3 ............xxxx
  649. // | 4 .............xxx
  650. // | 5 ..............xx
  651. // | 6 ...............x
  652. // | 7 ................
  653. // | 8 ................
  654. // | 9 ......**........
  655. // | a ........****...x
  656. // | b ............****
  657. // | c .............xxx****
  658. // | d ............xxxx ****
  659. // | e ...........xxxxx ****
  660. // | f ..........xxxxxx
  661. // |
  662. // | 2 3
  663. // v
  664. //
  665. // +y
  666. //
  667. // This grid accounts for the appropriate rounding of GIQ and last-pel
  668. // exclusion. If (M0, N0) lands on an 'x', x0 = 2. If (M0, N0) lands
  669. // on a '.', x0 = 1. If (M0, N0) lands on a '?', x0 rounds up or down,
  670. // depending on what flips have been done to normalize the line.
  671. //
  672. // For the end point, if (M1, N1) lands on an 'x', x1 =
  673. // floor((M0 + dM) / 16) + 1. If (M1, N1) lands on a '.', x1 =
  674. // floor((M0 + dM)). If (M1, N1) lands on a '?', x1 rounds up or down,
  675. // depending on what flips have been done to normalize the line.
  676. //
  677. // Lines of exactly slope one require a special case for both the start
  678. // and end. For example, if the line ends such that (M1, N1) is (9, 1),
  679. // the line has gone exactly through (8, 0) -- which may be considered
  680. // to be part of 'x' because of rounding! So slopes of exactly slope
  681. // one going through (8, 0) must also be considered as belonging in 'x'.
  682. //
  683. // For lines that go left-to-right, we have the following grid:
  684. //
  685. // +-----------------------> +x
  686. // |
  687. // | 0 1
  688. // | 0123456789abcdef
  689. // |
  690. // | 0 xxxxxxxx?.......
  691. // | 1 xxxxxxx.........
  692. // | 2 xxxxxx..........
  693. // | 3 xxxxx...........
  694. // | 4 xxxx............
  695. // | 5 xxx.............
  696. // | 6 xx..............
  697. // | 7 x...............
  698. // | 8 x...............
  699. // | 9 x.....**........
  700. // | a xx......****....
  701. // | b xxx.........****
  702. // | c xxxx............****
  703. // | d xxxxx........... ****
  704. // | e xxxxxx.......... ****
  705. // | f xxxxxxx.........
  706. // |
  707. // | 2 3
  708. // v
  709. //
  710. // +y
  711. //
  712. // This grid accounts for the appropriate rounding of GIQ and last-pel
  713. // exclusion. If (M0, N0) lands on an 'x', x0 = 0. If (M0, N0) lands
  714. // on a '.', x0 = 1. If (M0, N0) lands on a '?', x0 rounds up or down,
  715. // depending on what flips have been done to normalize the line.
  716. //
  717. // For the end point, if (M1, N1) lands on an 'x', x1 =
  718. // floor((M0 + dM) / 16) - 1. If (M1, N1) lands on a '.', x1 =
  719. // floor((M0 + dM)). If (M1, N1) lands on a '?', x1 rounds up or down,
  720. // depending on what flips have been done to normalize the line.
  721. //
  722. // Lines of exactly slope one must be handled similarly to the right-to-
  723. // left case.
  724. {
  725. // Calculate x0, x1
  726. ULONG N1 = FXFRAC(N0 + dN);
  727. ULONG M1 = FXFRAC(M0 + dM);
  728. x1 = LFLOOR(M0 + dM);
  729. if (fl & FL_LAST_PEL_INCLUSIVE)
  730. {
  731. // It sure is easy to compute the first pel when lines have only
  732. // integer coordinates and are last-pel inclusive:
  733. x0 = 0;
  734. y0 = 0;
  735. // Last-pel inclusive lines that are exactly one pixel long
  736. // have a 'delta-x' and 'delta-y' equal to zero. The problem is
  737. // that our clip code assumes that 'delta-x' is always non-zero
  738. // (since it never happens with last-pel exclusive lines). As
  739. // an inelegant solution, we simply modify 'delta-x' in this
  740. // case -- because the line is exactly one pixel long, changing
  741. // the slope will obviously have no effect on rasterization.
  742. if (x1 == 0)
  743. {
  744. dM = 1;
  745. llGamma = 0;
  746. llBeta = ~llGamma;
  747. }
  748. }
  749. else
  750. {
  751. if (fl & FL_FLIP_H)
  752. {
  753. // ---------------------------------------------------------------
  754. // Line runs right-to-left: <----
  755. // Compute x1:
  756. if (N1 == 0)
  757. {
  758. if (LROUND(M1, fl & FL_H_ROUND_DOWN))
  759. {
  760. x1++;
  761. }
  762. }
  763. else if (abs((LONG) (N1 - F/2)) + M1 > F)
  764. {
  765. x1++;
  766. }
  767. if ((fl & (FL_FLIP_SLOPE_ONE | FL_H_ROUND_DOWN))
  768. == (FL_FLIP_SLOPE_ONE))
  769. {
  770. // Have to special-case diagonal lines going through our
  771. // the point exactly equidistant between two horizontal
  772. // pixels, if we're supposed to round x=1/2 down:
  773. if ((N1 > 0) && (M1 == N1 + 8))
  774. x1++;
  775. // Don't you love special cases? Is this a rhetorical question?
  776. if ((N0 > 0) && (M0 == N0 + 8))
  777. {
  778. x0 = 2;
  779. ulDelta = dN;
  780. goto right_to_left_compute_y0;
  781. }
  782. }
  783. // Compute x0:
  784. x0 = 1;
  785. ulDelta = 0;
  786. if (N0 == 0)
  787. {
  788. if (LROUND(M0, fl & FL_H_ROUND_DOWN))
  789. {
  790. x0 = 2;
  791. ulDelta = dN;
  792. }
  793. }
  794. else if (abs((LONG) (N0 - F/2)) + M0 > F)
  795. {
  796. x0 = 2;
  797. ulDelta = dN;
  798. }
  799. // Compute y0:
  800. right_to_left_compute_y0:
  801. y0 = 0;
  802. ll = llGamma + (LONGLONG) ulDelta;
  803. if (ll >= (LONGLONG) (2 * dM - dN))
  804. y0 = 2;
  805. else if (ll >= (LONGLONG) (dM - dN))
  806. y0 = 1;
  807. }
  808. else
  809. {
  810. // ---------------------------------------------------------------
  811. // Line runs left-to-right: ---->
  812. // Compute x1:
  813. if (!(fl & FL_LAST_PEL_INCLUSIVE))
  814. x1--;
  815. if (M1 > 0)
  816. {
  817. if (N1 == 0)
  818. {
  819. if (LROUND(M1, fl & FL_H_ROUND_DOWN))
  820. x1++;
  821. }
  822. else if (abs((LONG) (N1 - F/2)) <= (LONG) M1)
  823. {
  824. x1++;
  825. }
  826. }
  827. if ((fl & (FL_FLIP_SLOPE_ONE | FL_H_ROUND_DOWN))
  828. == (FL_FLIP_SLOPE_ONE | FL_H_ROUND_DOWN))
  829. {
  830. // Have to special-case diagonal lines going through our
  831. // the point exactly equidistant between two horizontal
  832. // pixels, if we're supposed to round x=1/2 down:
  833. if ((M1 > 0) && (N1 == M1 + 8))
  834. x1--;
  835. if ((M0 > 0) && (N0 == M0 + 8))
  836. {
  837. x0 = 0;
  838. goto left_to_right_compute_y0;
  839. }
  840. }
  841. // Compute x0:
  842. x0 = 0;
  843. if (M0 > 0)
  844. {
  845. if (N0 == 0)
  846. {
  847. if (LROUND(M0, fl & FL_H_ROUND_DOWN))
  848. x0 = 1;
  849. }
  850. else if (abs((LONG) (N0 - F/2)) <= (LONG) M0)
  851. {
  852. x0 = 1;
  853. }
  854. }
  855. // Compute y0:
  856. left_to_right_compute_y0:
  857. y0 = 0;
  858. if (llGamma >= (LONGLONG) (dM - (dN & (-(LONG) x0))))
  859. {
  860. y0 = 1;
  861. }
  862. }
  863. }
  864. }
  865. cStylePels = x1 - x0 + 1;
  866. if ((LONG) cStylePels <= 0)
  867. goto Next_Line;
  868. xStart = x0;
  869. /***********************************************************************\
  870. * Complex clipping. *
  871. \***********************************************************************/
  872. if (fl & FL_COMPLEX_CLIP)
  873. {
  874. dN_Original = dN;
  875. Continue_Complex_Clipping:
  876. if (fl & FL_FLIP_H)
  877. {
  878. // Line runs right-to-left <-----
  879. x0 = xStart + cStylePels - prun->iStop - 1;
  880. x1 = xStart + cStylePels - prun->iStart - 1;
  881. }
  882. else
  883. {
  884. // Line runs left-to-right ----->
  885. x0 = xStart + prun->iStart;
  886. x1 = xStart + prun->iStop;
  887. }
  888. prun++;
  889. // Reset some variables we'll nuke a little later:
  890. dN = dN_Original;
  891. pls->spNext = pls->spComplex;
  892. // No overflow since large integer math is used. Both values
  893. // will be positive:
  894. dl = UInt32x32To64(x0, dN) + llGamma;
  895. // y0 = dl / dM:
  896. y0 = UInt64Div32To32(dl, dM);
  897. ASSERTDD((LONG) y0 >= 0, "y0 weird: Goofed up end pel calc?");
  898. }
  899. /***********************************************************************\
  900. * Simple rectangular clipping. *
  901. \***********************************************************************/
  902. if (fl & FL_SIMPLE_CLIP)
  903. {
  904. ULONG y1;
  905. LONG xRight;
  906. LONG xLeft;
  907. LONG yBottom;
  908. LONG yTop;
  909. // Note that y0 and y1 are actually the lower and upper bounds,
  910. // respectively, of the y coordinates of the line (the line may
  911. // have actually shrunk due to first/last pel clipping).
  912. //
  913. // Also note that x0, y0 are not necessarily zero.
  914. RECTL* prcl = &prclClip[(fl & FL_RECTLCLIP_MASK) >>
  915. FL_RECTLCLIP_SHIFT];
  916. // Normalize to the same point we've normalized for the DDA
  917. // calculations:
  918. xRight = prcl->right - x;
  919. xLeft = prcl->left - x;
  920. yBottom = prcl->bottom - y;
  921. yTop = prcl->top - y;
  922. if (yBottom <= (LONG) y0 ||
  923. xRight <= (LONG) x0 ||
  924. xLeft > (LONG) x1)
  925. {
  926. Totally_Clipped:
  927. if (fl & FL_STYLED)
  928. {
  929. pls->spNext += cStylePels;
  930. if (pls->spNext >= pls->spTotal2)
  931. pls->spNext %= pls->spTotal2;
  932. }
  933. goto Next_Line;
  934. }
  935. if ((LONG) x1 >= xRight)
  936. x1 = xRight - 1;
  937. // We have to know the correct y1, which we haven't bothered to
  938. // calculate up until now. This multiply and divide is quite
  939. // expensive; we could replace it with code similar to that which
  940. // we used for computing y0.
  941. //
  942. // The reason why we need the actual value, and not an upper
  943. // bounds guess like y1 = LFLOOR(dM) + 2 is that we have to be
  944. // careful when calculating x(y) that y0 <= y <= y1, otherwise
  945. // we can overflow on the divide (which, needless to say, is very
  946. // bad).
  947. dl = UInt32x32To64(x1, dN) + llGamma;
  948. // y1 = dl / dM:
  949. y1 = UInt64Div32To32(dl, dM);
  950. if (yTop > (LONG) y1)
  951. goto Totally_Clipped;
  952. if (yBottom <= (LONG) y1)
  953. {
  954. y1 = yBottom;
  955. dl = UInt32x32To64(y1, dM) + llBeta;
  956. // x1 = dl / dN:
  957. x1 = UInt64Div32To32(dl, dN);
  958. }
  959. // At this point, we've taken care of calculating the intercepts
  960. // with the right and bottom edges. Now we work on the left and
  961. // top edges:
  962. if (xLeft > (LONG) x0)
  963. {
  964. x0 = xLeft;
  965. dl = UInt32x32To64(x0, dN) + llGamma;
  966. // y0 = dl / dM;
  967. y0 = UInt64Div32To32(dl, dM);
  968. if (yBottom <= (LONG) y0)
  969. goto Totally_Clipped;
  970. }
  971. if (yTop > (LONG) y0)
  972. {
  973. y0 = yTop;
  974. dl = UInt32x32To64(y0, dM) + llBeta;
  975. // x0 = dl / dN + 1;
  976. x0 = UInt64Div32To32(dl, dN) + 1;
  977. if (xRight <= (LONG) x0)
  978. goto Totally_Clipped;
  979. }
  980. ASSERTDD(x0 <= x1, "Improper rectangle clip");
  981. }
  982. /***********************************************************************\
  983. * Done clipping. Unflip if necessary. *
  984. \***********************************************************************/
  985. ptlStart.x = x + x0;
  986. ptlStart.y = y + y0;
  987. if (fl & FL_FLIP_D)
  988. {
  989. register LONG lTmp;
  990. SWAPL(ptlStart.x, ptlStart.y, lTmp);
  991. }
  992. if (fl & FL_FLIP_V)
  993. {
  994. ptlStart.y = -ptlStart.y;
  995. }
  996. cPels = x1 - x0 + 1;
  997. /***********************************************************************\
  998. * Style calculations. *
  999. \***********************************************************************/
  1000. if (fl & FL_STYLED)
  1001. {
  1002. STYLEPOS sp;
  1003. spThis = pls->spNext;
  1004. pls->spNext += cStylePels;
  1005. {
  1006. if (pls->spNext >= pls->spTotal2)
  1007. pls->spNext %= pls->spTotal2;
  1008. if (fl & FL_FLIP_H)
  1009. sp = pls->spNext - x0 + xStart;
  1010. else
  1011. sp = spThis + x0 - xStart;
  1012. ASSERTDD(fl & FL_STYLED, "Oops");
  1013. // Normalize our target style position:
  1014. if ((sp < 0) || (sp >= pls->spTotal2))
  1015. {
  1016. sp %= pls->spTotal2;
  1017. // The modulus of a negative number is not well-defined
  1018. // in C -- if it's negative we'll adjust it so that it's
  1019. // back in the range [0, spTotal2):
  1020. if (sp < 0)
  1021. sp += pls->spTotal2;
  1022. }
  1023. // Since we always draw the line left-to-right, but styling is
  1024. // always done in the direction of the original line, we have
  1025. // to figure out where we are in the style array for the left
  1026. // edge of this line.
  1027. if (fl & FL_FLIP_H)
  1028. {
  1029. // Line originally ran right-to-left:
  1030. sp = -sp;
  1031. if (sp < 0)
  1032. sp += pls->spTotal2;
  1033. pls->ulStyleMask = ~pls->ulStartMask;
  1034. pls->pspStart = &pls->aspRtoL[0];
  1035. pls->pspEnd = &pls->aspRtoL[pls->cStyle - 1];
  1036. }
  1037. else
  1038. {
  1039. // Line originally ran left-to-right:
  1040. pls->ulStyleMask = pls->ulStartMask;
  1041. pls->pspStart = &pls->aspLtoR[0];
  1042. pls->pspEnd = &pls->aspLtoR[pls->cStyle - 1];
  1043. }
  1044. if (sp >= pls->spTotal)
  1045. {
  1046. sp -= pls->spTotal;
  1047. if (pls->cStyle & 1)
  1048. pls->ulStyleMask = ~pls->ulStyleMask;
  1049. }
  1050. pls->psp = pls->pspStart;
  1051. while (sp >= *pls->psp)
  1052. sp -= *pls->psp++;
  1053. ASSERTDD(pls->psp <= pls->pspEnd,
  1054. "Flew off into NeverNeverLand");
  1055. pls->spRemaining = *pls->psp - sp;
  1056. if ((pls->psp - pls->pspStart) & 1)
  1057. pls->ulStyleMask = ~pls->ulStyleMask;
  1058. }
  1059. }
  1060. plStrip = &strip.alStrips[0];
  1061. plStripEnd = &strip.alStrips[STRIP_MAX]; // Is exclusive
  1062. cStripsInNextRun = 0x7fffffff;
  1063. strip.ptlStart = ptlStart;
  1064. #ifdef S3
  1065. //
  1066. // We cannot due diagonal lines without a DDA. Therefore don't let it happen
  1067. //
  1068. if (2 * dN > dM &&
  1069. !(fl & FL_STYLED))
  1070. {
  1071. // Do a half flip! Remember that we may doing this on the
  1072. // same line multiple times for complex clipping (meaning the
  1073. // affected variables should be reset for every clip run):
  1074. fl |= FL_FLIP_HALF;
  1075. llBeta = llGamma - (LONGLONG) ((LONG) dM);
  1076. dN = dM - dN;
  1077. y0 = x0 - y0; // Note this may overflow, but that's okay
  1078. }
  1079. #endif
  1080. // Now, run the DDA starting at (ptlStart.x, ptlStart.y)!
  1081. strip.flFlips = fl;
  1082. pfn = apfn[(fl & FL_STRIP_MASK) >> FL_STRIP_SHIFT];
  1083. // Now calculate the DDA variables needed to figure out how many pixels
  1084. // go in the very first strip:
  1085. {
  1086. register LONG i;
  1087. register ULONG dI;
  1088. register ULONG dR;
  1089. ULONG r;
  1090. if (dN == 0)
  1091. i = 0x7fffffff;
  1092. else
  1093. {
  1094. dl = UInt32x32To64(y0 + 1, dM) + llBeta;
  1095. ASSERTDD(dl >= 0, "Oops!");
  1096. // i = (dl / dN) - x0 + 1;
  1097. // r = (dl % dN);
  1098. i = UInt64Div32To32(dl, dN);
  1099. r = UInt64Mod32To32(dl, dN);
  1100. i = i - x0 + 1;
  1101. dI = dM / dN;
  1102. dR = dM % dN; // 0 <= dR < dN
  1103. ASSERTDD(dI > 0, "Weird dI");
  1104. }
  1105. ASSERTDD(i > 0 && i <= 0x7fffffff, "Weird initial strip length");
  1106. ASSERTDD(cPels > 0, "Zero pel line");
  1107. /***********************************************************************\
  1108. * Run the DDA! *
  1109. \***********************************************************************/
  1110. while(TRUE)
  1111. {
  1112. cPels -= i;
  1113. if (cPels <= 0)
  1114. break;
  1115. *plStrip++ = i;
  1116. if (plStrip == plStripEnd)
  1117. {
  1118. strip.cStrips = plStrip - &strip.alStrips[0];
  1119. (*pfn)(ppdev, &strip, pls);
  1120. plStrip = &strip.alStrips[0];
  1121. }
  1122. i = dI;
  1123. r += dR;
  1124. if (r >= dN)
  1125. {
  1126. r -= dN;
  1127. i++;
  1128. }
  1129. }
  1130. *plStrip++ = cPels + i;
  1131. strip.cStrips = plStrip - &strip.alStrips[0];
  1132. (*pfn)(ppdev, &strip, pls);
  1133. }
  1134. Next_Line:
  1135. if (fl & FL_COMPLEX_CLIP)
  1136. {
  1137. cptfx--;
  1138. if (cptfx != 0)
  1139. goto Continue_Complex_Clipping;
  1140. break;
  1141. }
  1142. else
  1143. {
  1144. pptfxFirst = pptfxBuf;
  1145. pptfxBuf++;
  1146. }
  1147. } while (pptfxBuf < pptfxBufEnd);
  1148. return(TRUE);
  1149. }
  1150. #ifdef S3
  1151. //////////////////////////////////////////////////////////////////////////
  1152. // General defines for bHardwareLine
  1153. #define HW_FLIP_D 0x0001L // Diagonal flip
  1154. #define HW_FLIP_V 0x0002L // Vertical flip
  1155. #define HW_FLIP_H 0x0004L // Horizontal flip
  1156. #define HW_FLIP_SLOPE_ONE 0x0008L // Normalized line has exactly slope one
  1157. #define HW_FLIP_MASK (HW_FLIP_D | HW_FLIP_V | HW_FLIP_H)
  1158. #define HW_X_ROUND_DOWN 0x0100L // x = 1/2 rounds down in value
  1159. #define HW_Y_ROUND_DOWN 0x0200L // y = 1/2 rounds down in value
  1160. LONG gaiDir[] = { 0, 1, 7, 6, 3, 2, 4, 5 };
  1161. FLONG gaflHardwareRound[] = {
  1162. HW_X_ROUND_DOWN | HW_Y_ROUND_DOWN, // | | |
  1163. HW_X_ROUND_DOWN | HW_Y_ROUND_DOWN, // | | | FLIP_D
  1164. HW_X_ROUND_DOWN, // | | FLIP_V |
  1165. HW_Y_ROUND_DOWN, // | | FLIP_V | FLIP_D
  1166. HW_Y_ROUND_DOWN, // | FLIP_H | |
  1167. HW_X_ROUND_DOWN, // | FLIP_H | | FLIP_D
  1168. 0, // | FLIP_H | FLIP_V |
  1169. 0, // | FLIP_H | FLIP_V | FLIP_D
  1170. HW_Y_ROUND_DOWN, // SLOPE_ONE | | |
  1171. 0xffffffff, // SLOPE_ONE | | | FLIP_D
  1172. HW_X_ROUND_DOWN, // SLOPE_ONE | | FLIP_V |
  1173. 0xffffffff, // SLOPE_ONE | | FLIP_V | FLIP_D
  1174. HW_Y_ROUND_DOWN, // SLOPE_ONE | FLIP_H | |
  1175. 0xffffffff, // SLOPE_ONE | FLIP_H | | FLIP_D
  1176. HW_X_ROUND_DOWN, // SLOPE_ONE | FLIP_H | FLIP_V |
  1177. 0xffffffff // SLOPE_ONE | FLIP_H | FLIP_V | FLIP_D
  1178. };
  1179. //////////////////////////////////////////////////////////////////////////
  1180. // S3 specific defines
  1181. #define DEFAULT_DRAW_CMD (DRAW_LINE | DRAW | DIR_TYPE_XY | MULTIPLE_PIXELS | \
  1182. WRITE | LAST_PIXEL_OFF)
  1183. LONG gaiDrawCmd[] = {
  1184. DEFAULT_DRAW_CMD | PLUS_X | PLUS_Y | 0, // Octant 0
  1185. DEFAULT_DRAW_CMD | PLUS_X | PLUS_Y | MAJOR_Y, // Octant 1
  1186. DEFAULT_DRAW_CMD | PLUS_X | 0 | 0, // Octant 7
  1187. DEFAULT_DRAW_CMD | PLUS_X | 0 | MAJOR_Y, // Octant 6
  1188. DEFAULT_DRAW_CMD | 0 | PLUS_Y | 0, // Octant 3
  1189. DEFAULT_DRAW_CMD | 0 | PLUS_Y | MAJOR_Y, // Octant 2
  1190. DEFAULT_DRAW_CMD | 0 | 0 | 0, // Octant 4
  1191. DEFAULT_DRAW_CMD | 0 | 0 | MAJOR_Y, // Octant 5
  1192. };
  1193. // The S3's hardware can have 13 bits of significance for the error and
  1194. // step terms:
  1195. #define NUM_DDA_BITS 13
  1196. /******************************Public*Routine******************************\
  1197. * BOOL bHardwareLine(ppdev, pptfxStart, pptfxEnd)
  1198. *
  1199. * This routine is useful for folks who have line drawing hardware where
  1200. * they can explicitly set the Bresenham terms -- they can use this routine
  1201. * to draw fractional coordinate GIQ lines with the hardware.
  1202. *
  1203. * Fractional coordinate lines require an extra 4 bits of precision in the
  1204. * Bresenham terms. For example, if your hardware has 13 bits of precision
  1205. * for the terms, you can only draw GIQ lines up to 255 pels long using this
  1206. * routine.
  1207. *
  1208. * Input:
  1209. * pptfxStart - Points to GIQ coordinate of start of line
  1210. * pptfxEnd - Points to GIQ coordinate of end of line
  1211. * NUM_DDA_BITS- The number of bits of precision your hardware can support.
  1212. *
  1213. * Output:
  1214. * returns - TRUE if the line was drawn.
  1215. * FALSE if the line is too long, and the strips code must be
  1216. * used.
  1217. *
  1218. * DDALINE:
  1219. * iDir - Direction of the line, as an octant numbered as follows:
  1220. *
  1221. * \ 5 | 6 /
  1222. * \ | /
  1223. * 4 \ | / 7
  1224. * \ /
  1225. * -----+-----
  1226. * /|\
  1227. * 3 / | \ 0
  1228. * / | \
  1229. * / 2 | 1 \
  1230. *
  1231. * ptlStart - Start pixel of line.
  1232. * cPels - # of pels in line. *NOTE* You must check if this is <= 0!
  1233. * dMajor - Major axis delta.
  1234. * dMinor - Minor axis delta.
  1235. * lErrorTerm - Error term.
  1236. *
  1237. * What you do with the last 3 terms may be a little tricky. They are
  1238. * actually the terms for the formula of the normalized line
  1239. *
  1240. * dMinor * x + (lErrorTerm + dMajor)
  1241. * y(x) = floor( ---------------------------------- )
  1242. * dMajor
  1243. *
  1244. * where y(x) is the y coordinate of the pixel to be lit as a function of
  1245. * the x-coordinate.
  1246. *
  1247. * Every time the line advances one in the major direction 'x', dMinor
  1248. * gets added to the current error term. If the resulting value is >= 0,
  1249. * we know we have to move one pixel in the minor direction 'y', and
  1250. * dMajor must be subtracted from the current error term.
  1251. *
  1252. * If you're trying to figure out what this means for your hardware, you can
  1253. * think of the DDALINE terms as having been computed equivalently as
  1254. * follows:
  1255. *
  1256. * dMinor = 2 * (minor axis delta)
  1257. * dMajor = 2 * (major axis delta)
  1258. * lErrorTerm = - (major axis delta) - fixup
  1259. *
  1260. * That is, if your documentation tells you that for integer lines, a
  1261. * register is supposed to be initialized with the value
  1262. * '2 * (minor axis delta)', you'll actually use dMinor.
  1263. *
  1264. * Example: Setting up the 8514
  1265. *
  1266. * AXSTPSIGN is supposed to be the axial step constant register, defined
  1267. * as 2 * (minor axis delta). You set:
  1268. *
  1269. * AXSTPSIGN = dMinor
  1270. *
  1271. * DGSTPSIGN is supposed to be the diagonal step constant register,
  1272. * defined as 2 * (minor axis delta) - 2 * (major axis delta). You set:
  1273. *
  1274. * DGSTPSIGN = dMinor - dMajor
  1275. *
  1276. * ERR_TERM is supposed to be the adjusted error term, defined as
  1277. * 2 * (minor axis delta) - (major axis delta) - fixup. You set:
  1278. *
  1279. * ERR_TERM = lErrorTerm + dMinor
  1280. *
  1281. * Implementation:
  1282. *
  1283. * You'll want to special case integer lines before calling this routine
  1284. * (since they're very common, take less time to the computation of line
  1285. * terms, and can handle longer lines than this routine because 4 bits
  1286. * aren't being given to the fraction).
  1287. *
  1288. * If a GIQ line is too long to be handled by this routine, you can just
  1289. * use the slower strip routines for that line. Note that you cannot
  1290. * just fail the call -- you must be able to accurately draw any line
  1291. * in the 28.4 device space when it intersects the viewport.
  1292. *
  1293. * Testing:
  1294. *
  1295. * Use Guiman, or some other test that draws random fractional coordinate
  1296. * lines and compares them to what GDI itself draws to a bitmap.
  1297. *
  1298. \**************************************************************************/
  1299. BOOL bHardwareLine(
  1300. PDEV* ppdev,
  1301. POINTFIX* pptfxStart, // Start of line
  1302. POINTFIX* pptfxEnd) // End of line
  1303. {
  1304. FLONG fl; // Various flags
  1305. ULONG M0; // Normalized fractional unit x start coordinate (0 <= M0 < F)
  1306. ULONG N0; // Normalized fractional unit y start coordinate (0 <= N0 < F)
  1307. ULONG M1; // Normalized fractional unit x end coordinate (0 <= M1 < F)
  1308. ULONG N1; // Normalized fractional unit x end coordinate (0 <= N1 < F)
  1309. ULONG dM; // Normalized fractional unit x-delta (0 <= dM)
  1310. ULONG dN; // Normalized fractional unit y-delta (0 <= dN <= dM)
  1311. LONG x; // Normalized x coordinate of origin
  1312. LONG y; // Normalized y coordinate of origin
  1313. LONG x0; // Normalized x offset from origin to start pixel (inclusive)
  1314. LONG y0; // Normalized y offset from origin to start pixel (inclusive)
  1315. LONG x1; // Normalized x offset from origin to end pixel (inclusive)
  1316. LONG lGamma;// Bresenham error term at origin
  1317. LONG cPels; // Number of pixels in line
  1318. /***********************************************************************\
  1319. * Normalize line to the first octant.
  1320. \***********************************************************************/
  1321. fl = 0;
  1322. M0 = pptfxStart->x;
  1323. dM = pptfxEnd->x;
  1324. if ((LONG) dM < (LONG) M0)
  1325. {
  1326. // Line runs from right to left, so flip across x = 0:
  1327. M0 = -(LONG) M0;
  1328. dM = -(LONG) dM;
  1329. fl |= HW_FLIP_H;
  1330. }
  1331. // Compute the delta. The DDI says we can never have a valid delta
  1332. // with a magnitude more than 2^31 - 1, but the engine never actually
  1333. // checks its transforms. To ensure that we'll never puke on our shoes,
  1334. // we check for that case and simply refuse to draw the line:
  1335. dM -= M0;
  1336. if ((LONG) dM < 0)
  1337. return(FALSE);
  1338. N0 = pptfxStart->y;
  1339. dN = pptfxEnd->y;
  1340. if ((LONG) dN < (LONG) N0)
  1341. {
  1342. // Line runs from bottom to top, so flip across y = 0:
  1343. N0 = -(LONG) N0;
  1344. dN = -(LONG) dN;
  1345. fl |= HW_FLIP_V;
  1346. }
  1347. // Compute another delta:
  1348. dN -= N0;
  1349. if ((LONG) dN < 0)
  1350. return(FALSE);
  1351. if (dN >= dM)
  1352. {
  1353. if (dN == dM)
  1354. {
  1355. // Have to special case slopes of one:
  1356. fl |= HW_FLIP_SLOPE_ONE;
  1357. }
  1358. else
  1359. {
  1360. // Since line has slope greater than 1, flip across x = y:
  1361. register ULONG ulTmp;
  1362. ulTmp = dM; dM = dN; dN = ulTmp;
  1363. ulTmp = M0; M0 = N0; N0 = ulTmp;
  1364. fl |= HW_FLIP_D;
  1365. }
  1366. }
  1367. // Figure out if we can do the line in hardware, given that we have a
  1368. // limited number of bits of precision for the Bresenham terms.
  1369. //
  1370. // Remember that one bit has to be kept as a sign bit:
  1371. if ((LONG) dM >= (1L << (NUM_DDA_BITS - 1)))
  1372. return(FALSE);
  1373. fl |= gaflHardwareRound[fl];
  1374. /***********************************************************************\
  1375. * Calculate the error term at pixel 0.
  1376. \***********************************************************************/
  1377. x = LFLOOR((LONG) M0);
  1378. y = LFLOOR((LONG) N0);
  1379. M0 = FXFRAC(M0);
  1380. N0 = FXFRAC(N0);
  1381. // NOTE NOTE NOTE: If this routine were to handle any line in the 28.4
  1382. // space, it will overflow its math (the following part requires 36 bits
  1383. // of precision)! But we get here for lines that the hardware can handle
  1384. // (see the expression (dM >= (1L << (NUM_DDA_BITS - 1))) above?), so if
  1385. // cBits is less than 28, we're safe.
  1386. //
  1387. // If you're going to use this routine to handle all lines in the 28.4
  1388. // device space, you will HAVE to make sure the math doesn't overflow,
  1389. // otherwise you won't be NT compliant! (See 'bHardwareLine' for an example
  1390. // how to do that. You don't have to worry about this if you simply
  1391. // default to the strips code for long lines, because those routines
  1392. // already do the math correctly.)
  1393. // Calculate the remainder term [ dM * (N0 + F/2) - M0 * dN ]. Note
  1394. // that M0 and N0 have at most 4 bits of significance (and if the
  1395. // arguments are properly ordered, on a 486 each multiply would be no
  1396. // more than 13 cycles):
  1397. lGamma = (N0 + F/2) * dM - M0 * dN;
  1398. if (fl & HW_Y_ROUND_DOWN)
  1399. lGamma--;
  1400. lGamma >>= FLOG2;
  1401. /***********************************************************************\
  1402. * Figure out which pixels are at the ends of the line.
  1403. \***********************************************************************/
  1404. // The toughest part of GIQ is determining the start and end pels.
  1405. //
  1406. // Our approach here is to calculate x0 and x1 (the inclusive start
  1407. // and end columns of the line respectively, relative to our normalized
  1408. // origin). Then x1 - x0 + 1 is the number of pels in the line. The
  1409. // start point is easily calculated by plugging x0 into our line equation
  1410. // (which takes care of whether y = 1/2 rounds up or down in value)
  1411. // getting y0, and then undoing the normalizing flips to get back
  1412. // into device space.
  1413. //
  1414. // We look at the fractional parts of the coordinates of the start and
  1415. // end points, and call them (M0, N0) and (M1, N1) respectively, where
  1416. // 0 <= M0, N0, M1, N1 < 16. We plot (M0, N0) on the following grid
  1417. // to determine x0:
  1418. //
  1419. // +-----------------------> +x
  1420. // |
  1421. // | 0 1
  1422. // | 0123456789abcdef
  1423. // |
  1424. // | 0 ........?xxxxxxx
  1425. // | 1 ..........xxxxxx
  1426. // | 2 ...........xxxxx
  1427. // | 3 ............xxxx
  1428. // | 4 .............xxx
  1429. // | 5 ..............xx
  1430. // | 6 ...............x
  1431. // | 7 ................
  1432. // | 8 ................
  1433. // | 9 ......**........
  1434. // | a ........****...x
  1435. // | b ............****
  1436. // | c .............xxx****
  1437. // | d ............xxxx ****
  1438. // | e ...........xxxxx ****
  1439. // | f ..........xxxxxx
  1440. // |
  1441. // | 2 3
  1442. // v
  1443. //
  1444. // +y
  1445. //
  1446. // This grid accounts for the appropriate rounding of GIQ and last-pel
  1447. // exclusion. If (M0, N0) lands on an 'x', x0 = 2. If (M0, N0) lands
  1448. // on a '.', x0 = 1. If (M0, N0) lands on a '?', x0 rounds up or down,
  1449. // depending on what flips have been done to normalize the line.
  1450. //
  1451. // For the end point, if (M1, N1) lands on an 'x', x1 =
  1452. // floor((M0 + dM) / 16) + 1. If (M1, N1) lands on a '.', x1 =
  1453. // floor((M0 + dM)). If (M1, N1) lands on a '?', x1 rounds up or down,
  1454. // depending on what flips have been done to normalize the line.
  1455. //
  1456. // Lines of exactly slope one require a special case for both the start
  1457. // and end. For example, if the line ends such that (M1, N1) is (9, 1),
  1458. // the line has gone exactly through (8, 0) -- which may be considered
  1459. // to be part of 'x' because of rounding! So slopes of exactly slope
  1460. // one going through (8, 0) must also be considered as belonging in 'x'
  1461. // when an x value of 1/2 is supposed to round up in value.
  1462. // Calculate x0, x1:
  1463. N1 = FXFRAC(N0 + dN);
  1464. M1 = FXFRAC(M0 + dM);
  1465. x1 = LFLOOR(M0 + dM);
  1466. // Line runs left-to-right:
  1467. // Compute x1:
  1468. x1--;
  1469. if (M1 > 0)
  1470. {
  1471. if (N1 == 0)
  1472. {
  1473. if (LROUND(M1, fl & HW_X_ROUND_DOWN))
  1474. x1++;
  1475. }
  1476. else if (abs((LONG) (N1 - F/2)) <= (LONG) M1)
  1477. {
  1478. x1++;
  1479. }
  1480. }
  1481. if ((fl & (HW_FLIP_SLOPE_ONE | HW_X_ROUND_DOWN))
  1482. == (HW_FLIP_SLOPE_ONE | HW_X_ROUND_DOWN))
  1483. {
  1484. // Have to special-case diagonal lines going through our
  1485. // the point exactly equidistant between two horizontal
  1486. // pixels, if we're supposed to round x=1/2 down:
  1487. if ((M1 > 0) && (N1 == M1 + 8))
  1488. x1--;
  1489. if ((M0 > 0) && (N0 == M0 + 8))
  1490. {
  1491. x0 = 0;
  1492. goto left_to_right_compute_y0;
  1493. }
  1494. }
  1495. // Compute x0:
  1496. x0 = 0;
  1497. if (M0 > 0)
  1498. {
  1499. if (N0 == 0)
  1500. {
  1501. if (LROUND(M0, fl & HW_X_ROUND_DOWN))
  1502. x0 = 1;
  1503. }
  1504. else if (abs((LONG) (N0 - F/2)) <= (LONG) M0)
  1505. {
  1506. x0 = 1;
  1507. }
  1508. }
  1509. left_to_right_compute_y0:
  1510. /***********************************************************************\
  1511. * Calculate the start pixel.
  1512. \***********************************************************************/
  1513. // We now compute y0 and adjust the error term. We know x0, and we know
  1514. // the current formula for the pixels to be lit on the line:
  1515. //
  1516. // dN * x + lGamma
  1517. // y(x) = floor( --------------- )
  1518. // dM
  1519. //
  1520. // The remainder of this expression is the new error term at (x0, y0).
  1521. // Since x0 is going to be either 0 or 1, we don't actually have to do a
  1522. // multiply or divide to compute y0. Finally, we subtract dM from the
  1523. // new error term so that it is in the range [-dM, 0).
  1524. y0 = 0;
  1525. lGamma += (dN & (-x0));
  1526. lGamma -= dM;
  1527. if (lGamma >= 0)
  1528. {
  1529. y0 = 1;
  1530. lGamma -= dM;
  1531. }
  1532. // Undo our flips to get the start coordinate:
  1533. x += x0;
  1534. y += y0;
  1535. if (fl & HW_FLIP_D)
  1536. {
  1537. register LONG lTmp;
  1538. lTmp = x; x = y; y = lTmp;
  1539. }
  1540. if (fl & HW_FLIP_V)
  1541. {
  1542. y = -y;
  1543. }
  1544. if (fl & HW_FLIP_H)
  1545. {
  1546. x = -x;
  1547. }
  1548. /***********************************************************************\
  1549. * Return the Bresenham terms:
  1550. \***********************************************************************/
  1551. // iDir = gaiDir[fl & HW_FLIP_MASK];
  1552. // ptlStart.x = x;
  1553. // ptlStart.y = y;
  1554. // cPels = x1 - x0 + 1; // NOTE: You'll have to check if cPels <= 0!
  1555. // dMajor = dM;
  1556. // dMinor = dN;
  1557. // lErrorTerm = lGamma;
  1558. /***********************************************************************\
  1559. * Draw the line. S3 specific code follows:
  1560. \***********************************************************************/
  1561. cPels = x1 - x0 + 1;
  1562. if (cPels > 0)
  1563. {
  1564. IO_FIFO_WAIT(ppdev, 7);
  1565. IO_CUR_X(ppdev, x);
  1566. IO_CUR_Y(ppdev, y);
  1567. IO_MAJ_AXIS_PCNT(ppdev, cPels);
  1568. IO_AXSTP(ppdev, dN);
  1569. IO_DIASTP(ppdev, dN - dM);
  1570. IO_ERR_TERM(ppdev, dN + lGamma);
  1571. IO_CMD(ppdev, gaiDrawCmd[fl & HW_FLIP_MASK]);
  1572. }
  1573. return(TRUE);
  1574. }
  1575. #endif
  1576.