Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1686 lines
54 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: bltio.c
  3. *
  4. * Contains the low-level in/out blt functions.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify anything in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1994 Microsoft Corporation
  23. *
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. #if DBG
  27. // Useful aid for disabling any ATI extensions for debugging purposes:
  28. BOOL gb8514a = FALSE;
  29. #endif // DBG
  30. /******************************Public*Routine******************************\
  31. * VOID vIoFillSolid
  32. *
  33. * Fills a list of rectangles with a solid colour.
  34. *
  35. \**************************************************************************/
  36. VOID vIoFillSolid( // Type FNFILL
  37. PDEV* ppdev,
  38. LONG c, // Can't be zero
  39. RECTL* prcl, // List of rectangles to be filled, in relative
  40. // coordinates
  41. ULONG ulHwForeMix, // Hardware mix mode
  42. ULONG ulHwBackMix, // Not used
  43. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  44. POINTL* pptlBrush) // Not used
  45. {
  46. ASSERTDD(c > 0, "Can't handle zero rectangles");
  47. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  48. // It's quite likely that we've just been called from GDI, so it's
  49. // even more likely that the accelerator's graphics engine has been
  50. // sitting around idle. Rather than doing a FIFO_WAIT(3) here and
  51. // then a FIFO_WAIT(5) before outputing the actual rectangle,
  52. // we can avoid an 'in' (which can be quite expensive, depending on
  53. // the card) by doing a single FIFO_WAIT(8) right off the bat:
  54. IO_FIFO_WAIT(ppdev, 8);
  55. IO_PIX_CNTL(ppdev, ALL_ONES);
  56. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  57. IO_FRGD_COLOR(ppdev, rbc.iSolidColor);
  58. while(TRUE)
  59. {
  60. IO_CUR_X(ppdev, prcl->left);
  61. IO_CUR_Y(ppdev, prcl->top);
  62. IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1);
  63. IO_MIN_AXIS_PCNT(ppdev, prcl->bottom - prcl->top - 1);
  64. IO_CMD(ppdev, RECTANGLE_FILL | DRAWING_DIR_TBLRXM |
  65. DRAW | DIR_TYPE_XY |
  66. LAST_PIXEL_ON | MULTIPLE_PIXELS |
  67. WRITE);
  68. if (--c == 0)
  69. return;
  70. prcl++;
  71. IO_FIFO_WAIT(ppdev, 5);
  72. }
  73. }
  74. /******************************Public*Routine******************************\
  75. * VOID vIoSlowPatRealize
  76. *
  77. * This routine transfers an 8x8 pattern to off-screen display memory, and
  78. * duplicates it to make a 64x64 cached realization which is then used by
  79. * vIoFillPatSlow as the basic building block for doing 'slow' pattern output
  80. * via repeated screen-to-screen blts.
  81. *
  82. \**************************************************************************/
  83. VOID vIoSlowPatRealize(
  84. PDEV* ppdev,
  85. RBRUSH* prb, // Points to brush realization structure
  86. BOOL bTransparent) // FALSE for normal patterns; TRUE for
  87. // patterns with a mask when the background
  88. // mix is LEAVE_ALONE.
  89. {
  90. BRUSHENTRY* pbe;
  91. LONG iBrushCache;
  92. LONG x;
  93. LONG y;
  94. BYTE* pjSrc;
  95. BYTE* pjDst;
  96. BYTE jSrc;
  97. LONG i;
  98. WORD awBuf[8];
  99. pbe = prb->pbe;
  100. if ((pbe == NULL) || (pbe->prbVerify != prb))
  101. {
  102. // We have to allocate a new off-screen cache brush entry for
  103. // the brush:
  104. iBrushCache = ppdev->iBrushCache;
  105. pbe = &ppdev->abe[iBrushCache];
  106. iBrushCache++;
  107. if (iBrushCache >= ppdev->cBrushCache)
  108. iBrushCache = 0;
  109. ppdev->iBrushCache = iBrushCache;
  110. // Update our links:
  111. pbe->prbVerify = prb;
  112. prb->pbe = pbe;
  113. }
  114. // Load some pointer variables onto the stack, so that we don't have
  115. // to keep dereferencing their pointers:
  116. x = pbe->x;
  117. y = pbe->y;
  118. prb->bTransparent = bTransparent;
  119. // I considered doing the colour expansion for 1bpp brushes in
  120. // software, but by letting the hardware do it, we don't have
  121. // to do as many OUTs to transfer the pattern.
  122. if (prb->fl & RBRUSH_2COLOR)
  123. {
  124. // We're going to do a colour-expansion ('across the plane')
  125. // bitblt of the 1bpp 8x8 pattern to the screen.
  126. if (!bTransparent)
  127. {
  128. IO_FIFO_WAIT(ppdev, 4);
  129. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | OVERPAINT);
  130. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | OVERPAINT);
  131. IO_FRGD_COLOR(ppdev, prb->ulForeColor);
  132. IO_BKGD_COLOR(ppdev, prb->ulBackColor);
  133. IO_FIFO_WAIT(ppdev, 5);
  134. }
  135. else
  136. {
  137. IO_FIFO_WAIT(ppdev, 7);
  138. IO_FRGD_MIX(ppdev, LOGICAL_1);
  139. IO_BKGD_MIX(ppdev, LOGICAL_0);
  140. }
  141. IO_PIX_CNTL(ppdev, CPU_DATA);
  142. IO_ABS_CUR_X(ppdev, x);
  143. IO_ABS_CUR_Y(ppdev, y);
  144. IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide
  145. IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high
  146. IO_GP_WAIT(ppdev);
  147. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16 | WAIT |
  148. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  149. MULTIPLE_PIXELS | WRITE | BYTE_SWAP);
  150. CHECK_DATA_READY(ppdev);
  151. pjSrc = (BYTE*) &prb->aulPattern[0];
  152. pjDst = (BYTE*) &awBuf[0];
  153. // Convert in-line to nibble arrangment:
  154. // LATER: This should be done in DrvRealizeBrush!
  155. for (i = 8; i != 0; i--)
  156. {
  157. jSrc = *pjSrc;
  158. pjSrc += 2; // We had an extra byte on every row
  159. *pjDst++ = jSrc >> 3;
  160. *pjDst++ = jSrc + jSrc;
  161. }
  162. vDataPortOut(ppdev, &awBuf[0], 8);
  163. // Each word transferred comprises one row of the
  164. // pattern, and there are 8 rows in the pattern
  165. CHECK_DATA_COMPLETE(ppdev);
  166. }
  167. else
  168. {
  169. ASSERTDD(!bTransparent,
  170. "Shouldn't have been asked for transparency with a non-1bpp brush");
  171. IO_FIFO_WAIT(ppdev, 6);
  172. IO_PIX_CNTL(ppdev, ALL_ONES);
  173. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | OVERPAINT);
  174. IO_ABS_CUR_X(ppdev, x);
  175. IO_ABS_CUR_Y(ppdev, y);
  176. IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide
  177. IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high
  178. IO_GP_WAIT(ppdev);
  179. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  180. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  181. SINGLE_PIXEL | WRITE | BYTE_SWAP);
  182. CHECK_DATA_READY(ppdev);
  183. vDataPortOut(ppdev, &prb->aulPattern[0],
  184. ((TOTAL_BRUSH_SIZE / 2) << ppdev->cPelSize));
  185. CHECK_DATA_COMPLETE(ppdev);
  186. }
  187. // �����������������Ŀ
  188. // �0�2�3 �4 �1� We now have an 8x8 colour-expanded copy of
  189. // �����������������Ĵ the pattern sitting in off-screen memory,
  190. // �5 � represented here by square '0'.
  191. // � �
  192. // � � We're now going to expand the pattern to
  193. // � � 72x72 by repeatedly copying larger rectangles
  194. // � � in the indicated order, and doing a 'rolling'
  195. // � � blt to copy vertically.
  196. // � �
  197. // �������������������
  198. // Copy '1':
  199. IO_FIFO_WAIT(ppdev, 7);
  200. IO_PIX_CNTL(ppdev, ALL_ONES);
  201. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | OVERPAINT);
  202. // Note that 'maj_axis_pcnt' and 'min_axis_pcnt' are already
  203. // correct.
  204. IO_ABS_CUR_X(ppdev, x);
  205. IO_ABS_CUR_Y(ppdev, y);
  206. IO_ABS_DEST_X(ppdev, x + 64);
  207. IO_ABS_DEST_Y(ppdev, y);
  208. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  209. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  210. // Copy '2':
  211. IO_FIFO_WAIT(ppdev, 8);
  212. IO_ABS_DEST_X(ppdev, x + 8);
  213. IO_ABS_DEST_Y(ppdev, y);
  214. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  215. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  216. // Copy '3':
  217. IO_ABS_DEST_X(ppdev, x + 16);
  218. IO_ABS_DEST_Y(ppdev, y);
  219. IO_MAJ_AXIS_PCNT(ppdev, 15);
  220. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  221. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  222. IO_ABS_DEST_X(ppdev, x + 32);
  223. // Copy '4':
  224. IO_FIFO_WAIT(ppdev, 8);
  225. IO_ABS_DEST_Y(ppdev, y);
  226. IO_MAJ_AXIS_PCNT(ppdev, 31);
  227. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  228. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  229. // Copy '5':
  230. IO_ABS_DEST_X(ppdev, x);
  231. IO_ABS_DEST_Y(ppdev, y + 8);
  232. IO_MAJ_AXIS_PCNT(ppdev, 71);
  233. IO_MIN_AXIS_PCNT(ppdev, 63);
  234. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  235. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  236. }
  237. /******************************Public*Routine******************************\
  238. * VOID vIoFillPatSlow
  239. *
  240. * Uses the screen-to-screen blting ability of the accelerator to fill a
  241. * list of rectangles with a specified pattern. This routine is 'slow'
  242. * merely in the sense that it doesn't use any built-in hardware pattern
  243. * support that may be built into the accelerator.
  244. *
  245. \**************************************************************************/
  246. VOID vIoFillPatSlow( // Type FNFILL
  247. PDEV* ppdev,
  248. LONG c, // Can't be zero
  249. RECTL* prcl, // List of rectangles to be filled, in relative
  250. // coordinates
  251. ULONG ulHwForeMix, // Hardware mix mode (foreground mix mode if
  252. // the brush has a mask)
  253. ULONG ulHwBackMix, // Not used (unless the brush has a mask, in
  254. // which case it's the background mix mode)
  255. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  256. POINTL* pptlBrush) // Pattern alignment
  257. {
  258. BOOL bTransparent;
  259. BOOL bExponential;
  260. LONG x;
  261. LONG y;
  262. LONG yTmp;
  263. LONG cxToGo;
  264. LONG cyToGo;
  265. LONG cxThis;
  266. LONG cyThis;
  267. LONG xOrg;
  268. LONG yOrg;
  269. LONG xBrush;
  270. LONG yBrush;
  271. LONG cyOriginal;
  272. BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
  273. // for keeping track of the location and status
  274. // of the pattern bits cached in off-screen
  275. // memory
  276. // C'est dommage que je ne connais pas quoi je fais.
  277. ASSERTDD(c > 0, "Can't handle zero rectangles");
  278. ASSERTDD(rbc.prb->pbe != NULL, "Unexpected Null pbe in vIoSlowPatBlt");
  279. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  280. ASSERTDD((ulHwForeMix == ulHwBackMix) || (ulHwBackMix == LEAVE_ALONE),
  281. "Only expect transparency from GDI for masked brushes");
  282. bTransparent = (ulHwForeMix != ulHwBackMix);
  283. if ((rbc.prb->pbe->prbVerify != rbc.prb) ||
  284. (rbc.prb->bTransparent != bTransparent))
  285. {
  286. vIoSlowPatRealize(ppdev, rbc.prb, bTransparent);
  287. }
  288. ASSERTDD(rbc.prb->bTransparent == bTransparent,
  289. "Not realized with correct transparency");
  290. if (!bTransparent)
  291. {
  292. IO_FIFO_WAIT(ppdev, 2);
  293. IO_PIX_CNTL(ppdev, ALL_ONES);
  294. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | ulHwForeMix);
  295. // We special case OVERPAINT mixes because we can implement
  296. // an exponential fill: every blt will double the size of
  297. // the current rectangle by using the portion of the pattern
  298. // that has already been done for this rectangle as the source.
  299. //
  300. // Note that there's no point in also checking for LOGICAL_0
  301. // or LOGICAL_1 because those will be taken care of by the
  302. // solid fill routines, and I can't be bothered to check for
  303. // NOTNEW:
  304. bExponential = (ulHwForeMix == OVERPAINT);
  305. }
  306. else
  307. {
  308. IO_FIFO_WAIT(ppdev, 5);
  309. IO_PIX_CNTL(ppdev, DISPLAY_MEMORY);
  310. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  311. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | LEAVE_ALONE);
  312. IO_FRGD_COLOR(ppdev, rbc.prb->ulForeColor);
  313. IO_RD_MASK(ppdev, 1); // Pick a plane, any plane
  314. bExponential = FALSE;
  315. }
  316. // Note that since we do our brush alignment calculations in
  317. // relative coordinates, we should keep the brush origin in
  318. // relative coordinates as well:
  319. xOrg = pptlBrush->x;
  320. yOrg = pptlBrush->y;
  321. pbe = rbc.prb->pbe;
  322. xBrush = pbe->x;
  323. yBrush = pbe->y;
  324. do {
  325. x = prcl->left;
  326. y = prcl->top;
  327. cxToGo = prcl->right - x;
  328. cyToGo = prcl->bottom - y;
  329. if ((cxToGo <= SLOW_BRUSH_DIMENSION) &&
  330. (cyToGo <= SLOW_BRUSH_DIMENSION))
  331. {
  332. IO_FIFO_WAIT(ppdev, 7);
  333. IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush);
  334. IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush);
  335. IO_DEST_X(ppdev, x);
  336. IO_DEST_Y(ppdev, y);
  337. IO_MAJ_AXIS_PCNT(ppdev, cxToGo - 1);
  338. IO_MIN_AXIS_PCNT(ppdev, cyToGo - 1);
  339. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  340. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  341. }
  342. else if (bExponential)
  343. {
  344. cyThis = SLOW_BRUSH_DIMENSION;
  345. cyToGo -= cyThis;
  346. if (cyToGo < 0)
  347. cyThis += cyToGo;
  348. cxThis = SLOW_BRUSH_DIMENSION;
  349. cxToGo -= cxThis;
  350. if (cxToGo < 0)
  351. cxThis += cxToGo;
  352. IO_FIFO_WAIT(ppdev, 7);
  353. IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1);
  354. IO_MIN_AXIS_PCNT(ppdev, cyThis - 1);
  355. IO_DEST_X(ppdev, x);
  356. IO_DEST_Y(ppdev, y);
  357. IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush);
  358. IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush);
  359. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  360. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  361. IO_FIFO_WAIT(ppdev, 2);
  362. IO_CUR_X(ppdev, x);
  363. IO_CUR_Y(ppdev, y);
  364. x += cxThis;
  365. while (cxToGo > 0)
  366. {
  367. // First, expand out to the right, doubling our size
  368. // each time:
  369. cxToGo -= cxThis;
  370. if (cxToGo < 0)
  371. cxThis += cxToGo;
  372. IO_FIFO_WAIT(ppdev, 4);
  373. IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1);
  374. IO_DEST_X(ppdev, x);
  375. IO_DEST_Y(ppdev, y);
  376. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  377. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  378. x += cxThis;
  379. cxThis *= 2;
  380. }
  381. if (cyToGo > 0)
  382. {
  383. // Now do a 'rolling blt' to pattern the rest vertically:
  384. IO_FIFO_WAIT(ppdev, 5);
  385. IO_DEST_X(ppdev, prcl->left);
  386. IO_DEST_Y(ppdev, prcl->top + cyThis);
  387. IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1);
  388. IO_MIN_AXIS_PCNT(ppdev, cyToGo - 1);
  389. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  390. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  391. }
  392. }
  393. else
  394. {
  395. // We handle arbitrary mixes simply by repeatedly tiling
  396. // our cached pattern over the entire rectangle:
  397. IO_FIFO_WAIT(ppdev, 2);
  398. IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush);
  399. IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush);
  400. cyOriginal = cyToGo; // Have to remember for later...
  401. do {
  402. cxThis = SLOW_BRUSH_DIMENSION;
  403. cxToGo -= cxThis;
  404. if (cxToGo < 0)
  405. cxThis += cxToGo;
  406. IO_FIFO_WAIT(ppdev, 2);
  407. IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1);
  408. IO_DEST_X(ppdev, x);
  409. x += cxThis; // Get ready for next column
  410. cyToGo = cyOriginal; // Have to reset for each new column
  411. yTmp = y;
  412. do {
  413. cyThis = SLOW_BRUSH_DIMENSION;
  414. cyToGo -= cyThis;
  415. if (cyToGo < 0)
  416. cyThis += cyToGo;
  417. IO_FIFO_WAIT(ppdev, 3);
  418. IO_DEST_Y(ppdev, yTmp);
  419. yTmp += cyThis;
  420. IO_MIN_AXIS_PCNT(ppdev, cyThis - 1);
  421. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  422. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  423. } while (cyToGo > 0);
  424. } while (cxToGo > 0);
  425. }
  426. prcl++;
  427. } while (--c != 0);
  428. }
  429. /******************************Public*Routine******************************\
  430. * VOID vIoXfer1bpp
  431. *
  432. * This routine colours expands a monochrome bitmap, possibly with different
  433. * Rop2's for the foreground and background. It will be called in the
  434. * following cases:
  435. *
  436. * 1) To colour-expand the monochrome text buffer for the vFastText routine.
  437. * 2) To blt a 1bpp source with a simple Rop2 between the source and
  438. * destination.
  439. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to
  440. * white and black, and the pattern is a solid colour.
  441. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern
  442. * and destination.
  443. *
  444. * Needless to say, making this routine fast can leverage a lot of
  445. * performance.
  446. *
  447. \**************************************************************************/
  448. VOID vIoXfer1bpp( // Type FNXFER
  449. PDEV* ppdev,
  450. LONG c, // Count of rectangles, can't be zero
  451. RECTL* prcl, // List of destination rectangles, in relative
  452. // coordinates
  453. ULONG ulHwForeMix,// Foreground hardware mix
  454. ULONG ulHwBackMix,// Background hardware mix
  455. SURFOBJ* psoSrc, // Source surface
  456. POINTL* pptlSrc, // Original unclipped source point
  457. RECTL* prclDst, // Original unclipped destination rectangle
  458. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  459. {
  460. LONG dxSrc;
  461. LONG dySrc;
  462. LONG cx;
  463. LONG cy;
  464. LONG lSrcDelta;
  465. BYTE* pjSrcScan0;
  466. BYTE* pjSrc;
  467. LONG cjSrc;
  468. LONG xLeft;
  469. LONG xRight;
  470. LONG yTop;
  471. LONG yBottom;
  472. LONG xRotateLeft;
  473. LONG cBitsNeededForFirstNibblePair;
  474. ASSERTDD(c > 0, "Can't handle zero rectangles");
  475. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  476. ASSERTDD(ulHwBackMix <= 15, "Weird hardware Rop");
  477. ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
  478. IO_FIFO_WAIT(ppdev, 5);
  479. IO_PIX_CNTL(ppdev, CPU_DATA);
  480. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | ulHwBackMix);
  481. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  482. IO_BKGD_COLOR(ppdev, pxlo->pulXlate[0]);
  483. IO_FRGD_COLOR(ppdev, pxlo->pulXlate[1]);
  484. dxSrc = pptlSrc->x - prclDst->left;
  485. dySrc = pptlSrc->y - prclDst->top; // Add to destination to get source
  486. lSrcDelta = psoSrc->lDelta;
  487. pjSrcScan0 = psoSrc->pvScan0;
  488. do {
  489. IO_FIFO_WAIT(ppdev, 6);
  490. yBottom = prcl->bottom;
  491. yTop = prcl->top;
  492. xRight = prcl->right;
  493. xLeft = prcl->left;
  494. cBitsNeededForFirstNibblePair = 8 - (xLeft & 7);
  495. IO_SCISSORS_L(ppdev, xLeft);
  496. xLeft = (xLeft) & ~7;
  497. IO_SCISSORS_R(ppdev, xRight - 1);
  498. xRight = (xRight + 7) & ~7;
  499. IO_CUR_X(ppdev, xLeft);
  500. IO_CUR_Y(ppdev, yTop);
  501. cx = xRight - xLeft;
  502. cy = yBottom - yTop;
  503. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  504. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  505. cjSrc = cx >> 3; // We'll be transferring WORDs,
  506. // but every word accounts for
  507. // 8 pels = 1 byte of the source
  508. pjSrc = pjSrcScan0 + (yTop + dySrc) * lSrcDelta
  509. + ((xLeft + dxSrc) >> 3);
  510. // Start is byte aligned
  511. xRotateLeft = (dxSrc) & 7; // Amount by which to rotate left
  512. IO_GP_WAIT(ppdev);
  513. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  514. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  515. MULTIPLE_PIXELS | WRITE | BYTE_SWAP);
  516. CHECK_DATA_READY(ppdev);
  517. _asm {
  518. ; eax = scratch
  519. ; ebx = count of words output per scan
  520. ; ecx = amount to rotate left
  521. ; edx = port
  522. ; esi = source pointer
  523. ; edi = source delta between end of last scan and start of next
  524. mov ecx,xRotateLeft
  525. mov edx,PIX_TRANS
  526. mov esi,pjSrc
  527. mov edi,lSrcDelta
  528. sub edi,cjSrc
  529. test ecx,ecx
  530. jz UnrotatedScanLoop
  531. RotatedScanLoop:
  532. mov ebx,cjSrc
  533. cmp ecx,cBitsNeededForFirstNibblePair
  534. jge RotatedDontNeedFirstByte
  535. RotatedWordLoop:
  536. mov ah,[esi]
  537. RotatedDontNeedFirstByte:
  538. mov al,[esi + 1]
  539. shl eax,cl
  540. inc esi
  541. mov al,ah
  542. shr al,3
  543. add ah,ah
  544. out dx,ax
  545. dec ebx
  546. jnz RotatedWordLoop
  547. add esi,edi
  548. dec cy
  549. jnz RotatedScanLoop
  550. jmp AllDone
  551. UnrotatedScanLoop:
  552. mov ebx,cjSrc
  553. UnrotatedWordLoop:
  554. mov ah,[esi]
  555. inc esi
  556. mov al,ah
  557. shr al,3
  558. add ah,ah
  559. out dx,ax
  560. dec ebx
  561. jnz UnrotatedWordLoop
  562. add esi,edi
  563. dec cy
  564. jnz UnrotatedScanLoop
  565. AllDone:
  566. }
  567. CHECK_DATA_COMPLETE(ppdev);
  568. prcl++;
  569. } while (--c != 0);
  570. // We always have to reset the clipping:
  571. IO_FIFO_WAIT(ppdev, 2);
  572. IO_ABS_SCISSORS_L(ppdev, 0);
  573. IO_ABS_SCISSORS_R(ppdev, ppdev->cxMemory - 1);
  574. }
  575. /******************************Public*Routine******************************\
  576. * VOID vIoXfer1bppPacked
  577. *
  578. * This is the same routine as 'vIoXfer1bpp', except that it takes
  579. * advantage of the ATI's packed bit transfers to improve speed.
  580. *
  581. * Needless to say, this routine can only be called when running
  582. * on an ATI adapter.
  583. *
  584. \**************************************************************************/
  585. VOID vIoXfer1bppPacked( // Type FNXFER
  586. PDEV* ppdev,
  587. LONG c, // Count of rectangles, can't be zero
  588. RECTL* prcl, // List of destination rectangles, in relative
  589. // coordinates
  590. ULONG ulHwForeMix,// Foreground hardware mix
  591. ULONG ulHwBackMix,// Background hardware mix
  592. SURFOBJ* psoSrc, // Source surface
  593. POINTL* pptlSrc, // Original unclipped source point
  594. RECTL* prclDst, // Original unclipped destination rectangle
  595. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  596. {
  597. LONG dxSrc;
  598. LONG dySrc;
  599. LONG cy;
  600. LONG lSrcDelta;
  601. LONG lTmpDelta;
  602. BYTE* pjSrcScan0;
  603. BYTE* pjSrc;
  604. LONG cwSrc;
  605. LONG xLeft;
  606. LONG xRight;
  607. LONG yTop;
  608. LONG yBottom;
  609. LONG xBiasLeft;
  610. LONG xBiasRight;
  611. #if DBG
  612. {
  613. if (gb8514a)
  614. {
  615. vIoXfer1bpp(ppdev, c, prcl, ulHwForeMix, ulHwBackMix, psoSrc,
  616. pptlSrc, prclDst, pxlo);
  617. return;
  618. }
  619. }
  620. #endif // DBG
  621. ASSERTDD(c > 0, "Can't handle zero rectangles");
  622. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  623. ASSERTDD(ulHwBackMix <= 15, "Weird hardware Rop");
  624. ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
  625. while (INPW(EXT_FIFO_STATUS) & FOURTEEN_WORDS)
  626. ;
  627. OUT_WORD(ALU_FG_FN, ulHwForeMix);
  628. OUT_WORD(ALU_BG_FN, ulHwBackMix);
  629. OUT_WORD(FRGD_COLOR, pxlo->pulXlate[1]);
  630. OUT_WORD(BKGD_COLOR, pxlo->pulXlate[0]);
  631. // Add 'dxSrc' and 'dySrc' to a destination coordinate to get source.
  632. // Because we will be explicitly dealing with absolute destination
  633. // coordinates (we're not using the normal accelerator macros), we have
  634. // to explicitly account for the DFB offset:
  635. dxSrc = pptlSrc->x - (prclDst->left + ppdev->xOffset);
  636. dySrc = pptlSrc->y - (prclDst->top + ppdev->yOffset);
  637. lSrcDelta = psoSrc->lDelta;
  638. pjSrcScan0 = psoSrc->pvScan0;
  639. while (TRUE)
  640. {
  641. // Since we're not using the normal accelerator register macros,
  642. // we have to explicitly account for the DFB offset:
  643. yBottom = prcl->bottom + ppdev->yOffset;
  644. yTop = prcl->top + ppdev->yOffset;
  645. xRight = prcl->right + ppdev->xOffset;
  646. xLeft = prcl->left + ppdev->xOffset;
  647. // Make sure we're word aligned on the source, because we're
  648. // going to be transferring words and we don't want to risk
  649. // reading past the end of the bitmap:
  650. xBiasLeft = (xLeft + dxSrc) & 15;
  651. if (xBiasLeft != 0)
  652. {
  653. // Rev 3 ATI chips have goofy timing bugs on 66 MHz DX-2
  654. // computers where some extended will not be correctly
  655. // set the first time. The extended scissors registers
  656. // have this problem, but setting them twice seems to work:
  657. OUT_WORD(EXT_SCISSOR_L, xLeft);
  658. OUT_WORD(EXT_SCISSOR_L, xLeft);
  659. xLeft -= xBiasLeft;
  660. }
  661. // The width has to be a word multiple:
  662. xBiasRight = (xRight - xLeft) & 15;
  663. if (xBiasRight != 0)
  664. {
  665. OUT_WORD(EXT_SCISSOR_R, xRight - 1);
  666. OUT_WORD(EXT_SCISSOR_R, xRight - 1);
  667. xRight += 16 - xBiasRight;
  668. }
  669. OUT_WORD(DP_CONFIG, FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | DATA_ORDER |
  670. EXT_MONO_SRC_HOST | DRAW | WRITE | DATA_WIDTH);
  671. OUT_WORD(DEST_X_START, xLeft);
  672. OUT_WORD(CUR_X, xLeft);
  673. OUT_WORD(DEST_X_END, xRight);
  674. OUT_WORD(CUR_Y, yTop);
  675. OUT_WORD(DEST_Y_END, yBottom);
  676. cwSrc = (xRight - xLeft) / 16; // We'll be transferring WORDs
  677. pjSrc = pjSrcScan0 + (yTop + dySrc) * lSrcDelta
  678. + (xLeft + dxSrc) / 8;
  679. // Start is byte aligned (note
  680. // that we don't have to add
  681. // xBiasLeft)
  682. cy = yBottom - yTop;
  683. lTmpDelta = lSrcDelta - 2 * cwSrc;
  684. // To be safe, we make sure there are always as many free FIFO entries
  685. // as we'll transfer (note that this implementation isn't particularly
  686. // efficient, especially for short scans):
  687. _asm {
  688. ; eax = used for IN
  689. ; ebx = count of words remaining on current scan
  690. ; ecx = used for REP
  691. ; edx = used for IN and OUT
  692. ; esi = current source pointer
  693. ; edi = count of scans
  694. mov esi,pjSrc
  695. mov edi,cy
  696. Scan_Loop:
  697. mov ebx,cwSrc
  698. Batch_Loop:
  699. mov edx,EXT_FIFO_STATUS
  700. in ax,dx
  701. and eax,SIXTEEN_WORDS
  702. jnz short Batch_Loop
  703. mov edx,PIX_TRANS
  704. sub ebx,16
  705. jle short Finish_Scan
  706. mov ecx,16
  707. rep outsw
  708. jmp short Batch_Loop
  709. Finish_Scan:
  710. add ebx,16
  711. mov ecx,ebx
  712. rep outsw
  713. add esi,lTmpDelta
  714. dec edi
  715. jnz Scan_Loop
  716. }
  717. if ((xBiasLeft | xBiasRight) != 0)
  718. {
  719. // Reset the clipping only if we used it:
  720. while (INPW(EXT_FIFO_STATUS) & FOUR_WORDS)
  721. ;
  722. OUT_WORD(EXT_SCISSOR_L, 0);
  723. OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1);
  724. OUT_WORD(EXT_SCISSOR_L, 0);
  725. OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1);
  726. }
  727. if (--c == 0)
  728. return;
  729. prcl++;
  730. // Do the wait for the next round now:
  731. while (INPW(EXT_FIFO_STATUS) & TEN_WORDS)
  732. ;
  733. }
  734. }
  735. /******************************Public*Routine******************************\
  736. * VOID vIoXfer4bpp
  737. *
  738. * Does a 4bpp transfer from a bitmap to the screen.
  739. *
  740. * NOTE: The screen must be 8bpp for this function to be called!
  741. *
  742. * The reason we implement this is that a lot of resources are kept as 4bpp,
  743. * and used to initialize DFBs, some of which we of course keep off-screen.
  744. *
  745. \**************************************************************************/
  746. // XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use
  747. // for doing the translate. Note that in general stack buffers should
  748. // be kept as small as possible. The OS guarantees us only 8k for stack
  749. // from GDI down to the display driver in low memory situations; if we
  750. // ask for more, we'll access violate. Note also that at any time the
  751. // stack buffer cannot be larger than a page (4k) -- otherwise we may
  752. // miss touching the 'guard page' and access violate then too.
  753. #define XLATE_BUFFER_SIZE 256
  754. VOID vIoXfer4bpp( // Type FNXFER
  755. PDEV* ppdev,
  756. LONG c, // Count of rectangles, can't be zero
  757. RECTL* prcl, // List of destination rectangles, in relative
  758. // coordinates
  759. ULONG ulHwForeMix,// Hardware mix
  760. ULONG ulHwBackMix,// Not used
  761. SURFOBJ* psoSrc, // Source surface
  762. POINTL* pptlSrc, // Original unclipped source point
  763. RECTL* prclDst, // Original unclipped destination rectangle
  764. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  765. {
  766. LONG dx;
  767. LONG dy;
  768. LONG cx;
  769. LONG cy;
  770. LONG lSrcDelta;
  771. BYTE* pjSrcScan0;
  772. BYTE* pjScan;
  773. BYTE* pjSrc;
  774. BYTE* pjDst;
  775. LONG cxThis;
  776. LONG cxToGo;
  777. LONG xSrc;
  778. LONG iLoop;
  779. BYTE jSrc;
  780. ULONG* pulXlate;
  781. BOOL bResetScissors;
  782. BYTE ajBuf[XLATE_BUFFER_SIZE];
  783. ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp");
  784. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  785. ASSERTDD(c > 0, "Can't handle zero rectangles");
  786. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  787. dx = pptlSrc->x - prclDst->left;
  788. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  789. lSrcDelta = psoSrc->lDelta;
  790. pjSrcScan0 = psoSrc->pvScan0;
  791. IO_FIFO_WAIT(ppdev, 7);
  792. IO_PIX_CNTL(ppdev, ALL_ONES);
  793. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | ulHwForeMix);
  794. while(TRUE)
  795. {
  796. cy = prcl->bottom - prcl->top;
  797. cx = prcl->right - prcl->left;
  798. bResetScissors = FALSE;
  799. if (cx & 1)
  800. {
  801. // When using word transfers, the 8514/A will 'byte wrap'
  802. // transfers of odd byte width, such that end words will
  803. // be split so that on byte is the end of one scan, and the
  804. // other byte is the start of the next scan.
  805. //
  806. // This complicates things too much, so we simply always do
  807. // word transfers of even byte width by making use of the
  808. // clipping register:
  809. bResetScissors = TRUE;
  810. IO_SCISSORS_R(ppdev, prcl->right - 1);
  811. IO_MAJ_AXIS_PCNT(ppdev, cx);
  812. }
  813. else
  814. {
  815. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  816. }
  817. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  818. IO_CUR_X(ppdev, prcl->left);
  819. IO_CUR_Y(ppdev, prcl->top);
  820. pulXlate = pxlo->pulXlate;
  821. xSrc = prcl->left + dx;
  822. pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
  823. IO_GP_WAIT(ppdev);
  824. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  825. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  826. SINGLE_PIXEL | WRITE | BYTE_SWAP);
  827. CHECK_DATA_READY(ppdev);
  828. do {
  829. pjSrc = pjScan;
  830. cxToGo = cx; // # of pels per scan in 4bpp source
  831. do {
  832. cxThis = XLATE_BUFFER_SIZE;
  833. // We can handle XLATE_BUFFER_SIZE number
  834. // of pels in this xlate batch
  835. cxToGo -= cxThis; // cxThis will be the actual number of
  836. // pels we'll do in this xlate batch
  837. if (cxToGo < 0)
  838. cxThis += cxToGo;
  839. pjDst = ajBuf; // Points to our temporary batch buffer
  840. // We handle alignment ourselves because it's easy to
  841. // do, rather than pay the cost of setting/resetting
  842. // the scissors register:
  843. if (xSrc & 1)
  844. {
  845. // When unaligned, we have to be careful not to read
  846. // past the end of the 4bpp bitmap (that could
  847. // potentially cause us to access violate):
  848. iLoop = cxThis >> 1; // Each loop handles 2 pels;
  849. // we'll handle odd pel
  850. // separately
  851. jSrc = *pjSrc;
  852. while (iLoop-- != 0)
  853. {
  854. *pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
  855. jSrc = *(++pjSrc);
  856. *pjDst++ = (BYTE) pulXlate[jSrc >> 4];
  857. }
  858. if (cxThis & 1)
  859. *pjDst = (BYTE) pulXlate[jSrc & 0xf];
  860. }
  861. else
  862. {
  863. iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels
  864. do {
  865. jSrc = *pjSrc++;
  866. *pjDst++ = (BYTE) pulXlate[jSrc >> 4];
  867. *pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
  868. } while (--iLoop != 0);
  869. }
  870. // The number of bytes we'll transfer is equal to the number
  871. // of pels we've processed in the batch. Since we're
  872. // transferring words, we have to round up to get the word
  873. // count:
  874. vDataPortOut(ppdev, ajBuf, (cxThis + 1) >> 1);
  875. } while (cxToGo > 0);
  876. pjScan += lSrcDelta; // Advance to next source scan. Note
  877. // that we could have computed the
  878. // value to advance 'pjSrc' directly,
  879. // but this method is less
  880. // error-prone.
  881. } while (--cy != 0);
  882. CHECK_DATA_COMPLETE(ppdev);
  883. // Don't forget to restore the right scissors:
  884. if (bResetScissors)
  885. {
  886. IO_FIFO_WAIT(ppdev, 1);
  887. IO_ABS_SCISSORS_R(ppdev, ppdev->cxMemory - 1);
  888. }
  889. if (--c == 0)
  890. return;
  891. prcl++;
  892. IO_FIFO_WAIT(ppdev, 5);
  893. }
  894. }
  895. /******************************Public*Routine******************************\
  896. * VOID vIoXferNative
  897. *
  898. * Transfers a bitmap that is the same colour depth as the display to
  899. * the screen via the data transfer register, with no palette translation.
  900. *
  901. \**************************************************************************/
  902. VOID vIoXferNative( // Type FNXFER
  903. PDEV* ppdev,
  904. LONG c, // Count of rectangles, can't be zero
  905. RECTL* prcl, // Array of relative coordinates destination rectangles
  906. ULONG ulHwForeMix,// Hardware mix
  907. ULONG ulHwBackMix,// Not used
  908. SURFOBJ* psoSrc, // Source surface
  909. POINTL* pptlSrc, // Original unclipped source point
  910. RECTL* prclDst, // Original unclipped destination rectangle
  911. XLATEOBJ* pxlo) // Not used
  912. {
  913. LONG dx;
  914. LONG dy;
  915. LONG cx;
  916. LONG cy;
  917. LONG lSrcDelta;
  918. BYTE* pjSrcScan0;
  919. BYTE* pjSrc;
  920. LONG cwSrc;
  921. BOOL bResetScissors;
  922. LONG xLeft;
  923. LONG xRight;
  924. LONG yTop;
  925. ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
  926. "Can handle trivial xlate only");
  927. ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
  928. "Source must be same colour depth as screen");
  929. ASSERTDD(c > 0, "Can't handle zero rectangles");
  930. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  931. dx = pptlSrc->x - prclDst->left;
  932. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  933. lSrcDelta = psoSrc->lDelta;
  934. pjSrcScan0 = psoSrc->pvScan0;
  935. IO_FIFO_WAIT(ppdev, 8);
  936. IO_PIX_CNTL(ppdev, ALL_ONES);
  937. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | ulHwForeMix);
  938. while(TRUE)
  939. {
  940. bResetScissors = FALSE;
  941. IO_CUR_Y(ppdev, prcl->top);
  942. yTop = prcl->top;
  943. cy = prcl->bottom - prcl->top;
  944. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  945. xLeft = prcl->left;
  946. xRight = prcl->right;
  947. // Make sure we're word aligned on the source, because we're
  948. // going to be transferring words and we don't want to risk
  949. // reading past the end of the bitmap:
  950. if ((xLeft + dx) & 1)
  951. {
  952. IO_SCISSORS_L(ppdev, xLeft);
  953. xLeft--;
  954. bResetScissors = TRUE;
  955. }
  956. IO_CUR_X(ppdev, xLeft);
  957. cx = xRight - xLeft;
  958. if (cx & 1)
  959. {
  960. IO_SCISSORS_R(ppdev, xRight - 1);
  961. cx++;
  962. bResetScissors = TRUE;
  963. }
  964. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  965. cwSrc = ((cx << ppdev->cPelSize) + 1) >> 1;
  966. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  967. + ((xLeft + dx) << ppdev->cPelSize);
  968. IO_GP_WAIT(ppdev);
  969. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  970. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  971. SINGLE_PIXEL | WRITE | BYTE_SWAP);
  972. CHECK_DATA_READY(ppdev);
  973. do {
  974. vDataPortOut(ppdev, pjSrc, cwSrc);
  975. pjSrc += lSrcDelta;
  976. } while (--cy != 0);
  977. CHECK_DATA_COMPLETE(ppdev);
  978. if (bResetScissors)
  979. {
  980. IO_FIFO_WAIT(ppdev, 2);
  981. IO_ABS_SCISSORS_L(ppdev, 0);
  982. IO_ABS_SCISSORS_R(ppdev, ppdev->cxMemory - 1);
  983. }
  984. if (--c == 0)
  985. return;
  986. prcl++;
  987. IO_FIFO_WAIT(ppdev, 6);
  988. }
  989. }
  990. /******************************Public*Routine******************************\
  991. * VOID vIoCopyBlt
  992. *
  993. * Does a screen-to-screen blt of a list of rectangles.
  994. *
  995. \**************************************************************************/
  996. VOID vIoCopyBlt( // Type FNCOPY
  997. PDEV* ppdev,
  998. LONG c, // Can't be zero
  999. RECTL* prcl, // Array of relative coordinates destination rectangles
  1000. ULONG ulHwMix, // Hardware mix
  1001. POINTL* pptlSrc, // Original unclipped source point
  1002. RECTL* prclDst) // Original unclipped destination rectangle
  1003. {
  1004. LONG dx;
  1005. LONG dy; // Add delta to destination to get source
  1006. LONG cx;
  1007. LONG cy; // Size of current rectangle - 1
  1008. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1009. ASSERTDD(ulHwMix <= 15, "Weird hardware Rop");
  1010. IO_FIFO_WAIT(ppdev, 2);
  1011. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | ulHwMix);
  1012. IO_PIX_CNTL(ppdev, ALL_ONES);
  1013. dx = pptlSrc->x - prclDst->left;
  1014. dy = pptlSrc->y - prclDst->top;
  1015. // The accelerator may not be as fast at doing right-to-left copies, so
  1016. // only do them when the rectangles truly overlap:
  1017. if (!OVERLAP(prclDst, pptlSrc))
  1018. goto Top_Down_Left_To_Right;
  1019. if (prclDst->top <= pptlSrc->y)
  1020. {
  1021. if (prclDst->left <= pptlSrc->x)
  1022. {
  1023. Top_Down_Left_To_Right:
  1024. do {
  1025. IO_FIFO_WAIT(ppdev, 7);
  1026. cx = prcl->right - prcl->left - 1;
  1027. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1028. IO_DEST_X(ppdev, prcl->left);
  1029. IO_CUR_X(ppdev, prcl->left + dx);
  1030. cy = prcl->bottom - prcl->top - 1;
  1031. IO_MIN_AXIS_PCNT(ppdev, cy);
  1032. IO_DEST_Y(ppdev, prcl->top);
  1033. IO_CUR_Y(ppdev, prcl->top + dy);
  1034. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1035. DRAWING_DIR_TBLRXM);
  1036. prcl++;
  1037. } while (--c != 0);
  1038. }
  1039. else
  1040. {
  1041. do {
  1042. IO_FIFO_WAIT(ppdev, 7);
  1043. cx = prcl->right - prcl->left - 1;
  1044. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1045. IO_DEST_X(ppdev, prcl->left + cx);
  1046. IO_CUR_X(ppdev, prcl->left + cx + dx);
  1047. cy = prcl->bottom - prcl->top - 1;
  1048. IO_MIN_AXIS_PCNT(ppdev, cy);
  1049. IO_DEST_Y(ppdev, prcl->top);
  1050. IO_CUR_Y(ppdev, prcl->top + dy);
  1051. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1052. DRAWING_DIR_TBRLXM);
  1053. prcl++;
  1054. } while (--c != 0);
  1055. }
  1056. }
  1057. else
  1058. {
  1059. if (prclDst->left <= pptlSrc->x)
  1060. {
  1061. do {
  1062. IO_FIFO_WAIT(ppdev, 7);
  1063. cx = prcl->right - prcl->left - 1;
  1064. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1065. IO_DEST_X(ppdev, prcl->left);
  1066. IO_CUR_X(ppdev, prcl->left + dx);
  1067. cy = prcl->bottom - prcl->top - 1;
  1068. IO_MIN_AXIS_PCNT(ppdev, cy);
  1069. IO_DEST_Y(ppdev, prcl->top + cy);
  1070. IO_CUR_Y(ppdev, prcl->top + cy + dy);
  1071. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1072. DRAWING_DIR_BTLRXM);
  1073. prcl++;
  1074. } while (--c != 0);
  1075. }
  1076. else
  1077. {
  1078. do {
  1079. IO_FIFO_WAIT(ppdev, 7);
  1080. cx = prcl->right - prcl->left - 1;
  1081. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1082. IO_DEST_X(ppdev, prcl->left + cx);
  1083. IO_CUR_X(ppdev, prcl->left + cx + dx);
  1084. cy = prcl->bottom - prcl->top - 1;
  1085. IO_MIN_AXIS_PCNT(ppdev, cy);
  1086. IO_DEST_Y(ppdev, prcl->top + cy);
  1087. IO_CUR_Y(ppdev, prcl->top + cy + dy);
  1088. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1089. DRAWING_DIR_BTRLXM);
  1090. prcl++;
  1091. } while (--c != 0);
  1092. }
  1093. }
  1094. }
  1095. /******************************Public*Routine******************************\
  1096. * VOID vIoMaskCopy
  1097. *
  1098. * This routine performs a screen-to-screen masked blt.
  1099. *
  1100. * NT has a new API called MaskBlt (which has also been added to Win4.0)
  1101. * which allows an app to specify a monochrome mask on a colour blt. This
  1102. * API is relatively cool because the programmer no longer has to do two
  1103. * separate SRCAND and SRCPAINT calls to do transparency. We can accelerate
  1104. * the call using the hardware, and there is no longer any chance of
  1105. * 'flashing' occuring on the screen.
  1106. *
  1107. * Most often, the colour bitmap for MaskBlt is a compatible-bitmap that
  1108. * we've already stashed in off-screen memory. We do the maskblt by
  1109. * transferring the monochrome bitmap via the data transfer register,
  1110. * and setting the foreground and background mixes to use the on-screen
  1111. * bitmap as appropriate.
  1112. *
  1113. * If you can implement this call and accelerate it using your hardware,
  1114. * please do. It is really useful for app developers and is a big win.
  1115. * Plus, you'll have a head-start for Win4.0 (although the Win4.0 version
  1116. * is simpler because they only allow 0xccaa or 0xaacc rops -- the
  1117. * foreground and background mixes can only be OVERPAINT or LEAVE_ALONE).
  1118. *
  1119. \**************************************************************************/
  1120. VOID vIoMaskCopy( // Type FNMASK
  1121. PDEV* ppdev,
  1122. LONG c, // Can't be zero
  1123. RECTL* prcl, // Array of relative coordinates destination
  1124. // rectangles
  1125. ULONG ulHwForeMix, // Foreground mix
  1126. ULONG ulHwBackMix, // Background mix
  1127. SURFOBJ* psoMsk, // Mask surface
  1128. POINTL* pptlMsk, // Original unclipped mask source point
  1129. SURFOBJ* psoSrc, // Not used
  1130. POINTL* pptlSrc, // Original unclipped source point
  1131. RECTL* prclDst, // Original unclipped destination rectangle
  1132. ULONG iSolidColor, // Not used
  1133. RBRUSH* prb, // Not used
  1134. POINTL* pptlBrush, // Not used
  1135. XLATEOBJ* pxlo) // Not used
  1136. {
  1137. LONG dxSrc;
  1138. LONG dySrc;
  1139. LONG dxMsk;
  1140. LONG dyMsk;
  1141. LONG cy;
  1142. LONG lMskDelta;
  1143. LONG lTmpDelta;
  1144. BYTE* pjMskScan0;
  1145. BYTE* pjMsk;
  1146. LONG cwMsk;
  1147. LONG xLeft;
  1148. LONG xRight;
  1149. LONG yTop;
  1150. LONG yBottom;
  1151. LONG xBiasLeft;
  1152. LONG xBiasRight;
  1153. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1154. ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop");
  1155. ASSERTDD(ulHwBackMix <= 15, "Weird hardware Rop");
  1156. ASSERTDD(pptlMsk != NULL && psoMsk != NULL, "Can't have NULL masks");
  1157. ASSERTDD(psoMsk->iBitmapFormat == BMF_1BPP, "Mask has to be 1bpp");
  1158. ASSERTDD(!OVERLAP(prclDst, pptlSrc), "Source and dest can't overlap!");
  1159. while (INPW(EXT_FIFO_STATUS) & TWO_WORDS)
  1160. ;
  1161. OUT_WORD(ALU_FG_FN, ulHwForeMix);
  1162. OUT_WORD(ALU_BG_FN, ulHwBackMix);
  1163. dxSrc = pptlSrc->x - (prclDst->left + ppdev->xOffset);
  1164. dySrc = pptlSrc->y - (prclDst->top + ppdev->yOffset);
  1165. // Add to the absolute coordinate destination rectangle to
  1166. // get the corresponding absolute coordinate source rectangle
  1167. dxMsk = pptlMsk->x - (prclDst->left + ppdev->xOffset);
  1168. dyMsk = pptlMsk->y - (prclDst->top + ppdev->yOffset);
  1169. // Add to the absolute coordinate destination rectangle to
  1170. // get the corresponding absolute coordinate mask rectangle
  1171. lMskDelta = psoMsk->lDelta;
  1172. pjMskScan0 = psoMsk->pvScan0;
  1173. while (TRUE)
  1174. {
  1175. while (INPW(EXT_FIFO_STATUS) & FIFTEEN_WORDS)
  1176. ;
  1177. // Since we're not using the normal accelerator register macros,
  1178. // we have to explicitly account for the DFB offset:
  1179. yBottom = prcl->bottom + ppdev->yOffset;
  1180. yTop = prcl->top + ppdev->yOffset;
  1181. xRight = prcl->right + ppdev->xOffset;
  1182. xLeft = prcl->left + ppdev->xOffset;
  1183. // The start has to be word aligned:
  1184. xBiasLeft = (xLeft + dxMsk) & 15;
  1185. if (xBiasLeft != 0)
  1186. {
  1187. // Rev 3 ATI chips have goofy timing bugs on 66 MHz DX-2
  1188. // computers where some extended will not be correctly
  1189. // set the first time. The extended scissors registers
  1190. // have this problem, but setting them twice seems to work:
  1191. OUT_WORD(EXT_SCISSOR_L, xLeft);
  1192. OUT_WORD(EXT_SCISSOR_L, xLeft);
  1193. xLeft -= xBiasLeft;
  1194. }
  1195. // The width has to be a word multiple:
  1196. xBiasRight = (xRight - xLeft) & 15;
  1197. if (xBiasRight != 0)
  1198. {
  1199. OUT_WORD(EXT_SCISSOR_R, xRight - 1);
  1200. OUT_WORD(EXT_SCISSOR_R, xRight - 1);
  1201. xRight += 16 - xBiasRight;
  1202. }
  1203. OUT_WORD(DP_CONFIG, FG_COLOR_SRC_BLIT | BG_COLOR_SRC_BLIT | DATA_ORDER |
  1204. EXT_MONO_SRC_HOST | DRAW | WRITE | DATA_WIDTH);
  1205. OUT_WORD(SRC_X, xLeft + dxSrc);
  1206. OUT_WORD(SRC_X_START, xLeft + dxSrc);
  1207. OUT_WORD(SRC_X_END, xRight + dxSrc);
  1208. OUT_WORD(SRC_Y, yTop + dySrc);
  1209. OUT_WORD(SRC_Y_DIR, TOP_TO_BOTTOM);
  1210. OUT_WORD(DEST_X_START, xLeft);
  1211. OUT_WORD(CUR_X, xLeft);
  1212. OUT_WORD(DEST_X_END, xRight);
  1213. OUT_WORD(CUR_Y, yTop);
  1214. OUT_WORD(DEST_Y_END, yBottom);
  1215. cwMsk = (xRight - xLeft) / 16; // We'll be transferring WORDs
  1216. pjMsk = pjMskScan0 + (yTop + dyMsk) * lMskDelta
  1217. + (xLeft + dxMsk) / 8;
  1218. // Start is byte aligned (note
  1219. // that we don't have to add
  1220. // xBiasLeft)
  1221. cy = yBottom - yTop;
  1222. lTmpDelta = lMskDelta - 2 * cwMsk;
  1223. // To be safe, we make sure there are always as many free FIFO entries
  1224. // as we'll transfer (note that this implementation isn't particularly
  1225. // efficient, especially for short scans):
  1226. _asm {
  1227. ; eax = used for IN
  1228. ; ebx = count of words remaining on current scan
  1229. ; ecx = used for REP
  1230. ; edx = used for IN and OUT
  1231. ; esi = current source pointer
  1232. ; edi = count of scans
  1233. mov esi,pjMsk
  1234. mov edi,cy
  1235. Scan_Loop:
  1236. mov ebx,cwMsk
  1237. Batch_Loop:
  1238. mov edx,EXT_FIFO_STATUS
  1239. in ax,dx
  1240. and eax,SIXTEEN_WORDS
  1241. jnz short Batch_Loop
  1242. mov edx,PIX_TRANS
  1243. sub ebx,16
  1244. jle short Finish_Scan
  1245. mov ecx,16
  1246. rep outsw
  1247. jmp short Batch_Loop
  1248. Finish_Scan:
  1249. add ebx,16
  1250. mov ecx,ebx
  1251. rep outsw
  1252. add esi,lTmpDelta
  1253. dec edi
  1254. jnz Scan_Loop
  1255. }
  1256. if ((xBiasLeft | xBiasRight) != 0)
  1257. {
  1258. // Reset the clipping only if we used it:
  1259. while (INPW(EXT_FIFO_STATUS) & FOUR_WORDS)
  1260. ;
  1261. OUT_WORD(EXT_SCISSOR_L, 0);
  1262. OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1);
  1263. OUT_WORD(EXT_SCISSOR_L, 0);
  1264. OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1);
  1265. }
  1266. if (--c == 0)
  1267. return;
  1268. prcl++;
  1269. }
  1270. }
  1271. /******************************Public*Routine******************************\
  1272. * VOID vPutBits
  1273. *
  1274. * Copies the bits from the given surface to the screen, using the memory
  1275. * aperture. Must be pre-clipped.
  1276. *
  1277. * LATER: Do we really need this routine?
  1278. *
  1279. \**************************************************************************/
  1280. VOID vPutBits(
  1281. PDEV* ppdev,
  1282. SURFOBJ* psoSrc, // Source surface
  1283. RECTL* prclDst, // Destination rectangle in absolute coordinates!
  1284. POINTL* pptlSrc) // Source point
  1285. {
  1286. LONG xOffset;
  1287. LONG yOffset;
  1288. // This is ugly. Oh well.
  1289. xOffset = ppdev->xOffset;
  1290. yOffset = ppdev->yOffset;
  1291. ppdev->xOffset = 0;
  1292. ppdev->yOffset = 0;
  1293. vIoXferNative(ppdev, 1, prclDst, OVERPAINT, OVERPAINT, psoSrc, pptlSrc,
  1294. prclDst, NULL);
  1295. ppdev->xOffset = xOffset;
  1296. ppdev->yOffset = yOffset;
  1297. }
  1298. /******************************Public*Routine******************************\
  1299. * VOID vGetBits
  1300. *
  1301. * Copies the bits to the given surface from the screen, using the data
  1302. * transfer register. Must be pre-clipped.
  1303. *
  1304. \**************************************************************************/
  1305. VOID vGetBits(
  1306. PDEV* ppdev,
  1307. SURFOBJ* psoDst, // Destination surface
  1308. RECTL* prclDst, // Destination rectangle
  1309. POINTL* pptlSrc) // Source point in absolute coordinates!
  1310. {
  1311. LONG cx;
  1312. LONG cy;
  1313. LONG lDstDelta;
  1314. BYTE* pjDst;
  1315. DWORD wOdd; // Think of it as a WORD
  1316. ULONG cwDst;
  1317. ULONG cjEndByte;
  1318. IO_FIFO_WAIT(ppdev, 7);
  1319. IO_PIX_CNTL(ppdev, ALL_ONES);
  1320. // LATER: Do we have to set FRGD_MIX?
  1321. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | OVERPAINT);
  1322. IO_ABS_CUR_X(ppdev, pptlSrc->x);
  1323. IO_ABS_CUR_Y(ppdev, pptlSrc->y);
  1324. cx = prclDst->right - prclDst->left;
  1325. cy = prclDst->bottom - prclDst->top;
  1326. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  1327. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  1328. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  1329. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  1330. READ | BYTE_SWAP);
  1331. lDstDelta = psoDst->lDelta;
  1332. pjDst = (BYTE*) psoDst->pvScan0 + prclDst->top * lDstDelta
  1333. + prclDst->left;
  1334. cwDst = (cx >> 1);
  1335. WAIT_FOR_DATA_AVAILABLE(ppdev);
  1336. if ((cx & 1) == 0)
  1337. {
  1338. // Even destination scan length. Life is truly great.
  1339. do {
  1340. vDataPortIn(ppdev, pjDst, cwDst);
  1341. pjDst += lDstDelta;
  1342. } while (--cy != 0);
  1343. }
  1344. else
  1345. {
  1346. // Odd destination scan length.
  1347. //
  1348. // We have to be careful of this case because we want to do WORD
  1349. // transfers, but we can't overwrite either the beginning or ending
  1350. // of the scan. Note that since it's not legal to write a byte past
  1351. // the end of the bitmap or a byte before the beginning of the bitmap
  1352. // as that may cause an access violation, we cannot temporarily save
  1353. // and restore any extra bytes in the destination bitmap.
  1354. cjEndByte = cx - 1; // Byte offset from beginning of scan to
  1355. // last byte in scan. This is the offset
  1356. // to the odd byte that happens because
  1357. // we're inputting WORDs but the length
  1358. // of the destination scan is not a
  1359. // multiple of two.
  1360. while (TRUE)
  1361. {
  1362. vDataPortIn(ppdev, pjDst, cwDst);
  1363. IO_PIX_TRANS_IN(ppdev, wOdd);
  1364. *(pjDst + cjEndByte) = (BYTE) wOdd;
  1365. if (--cy == 0)
  1366. break;
  1367. pjDst += lDstDelta;
  1368. *(pjDst) = (BYTE) (wOdd >> 8);
  1369. vDataPortIn(ppdev, pjDst + 1, cwDst);
  1370. pjDst += lDstDelta;
  1371. if (--cy == 0)
  1372. break;
  1373. }
  1374. }
  1375. }