Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1603 lines
55 KiB

  1. /******************************Module*Header*******************************\
  2. *
  3. * *******************
  4. * * GDI SAMPLE CODE *
  5. * *******************
  6. *
  7. * Module Name: bltio.c
  8. *
  9. * Contains the low-level in/out blt functions. This module mirrors
  10. * 'bltmm.c'.
  11. *
  12. * Hopefully, if you're basing your display driver on this code, to
  13. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  14. * the following routines. You shouldn't have to modify much in
  15. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  16. * and efficient as I could, while still accelerating as many calls as
  17. * possible that would be cost-effective in terms of performance wins
  18. * versus size and effort.
  19. *
  20. * Note: In the following, 'relative' coordinates refers to coordinates
  21. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  22. * 'Absolute' coordinates have had the offset applied. For example,
  23. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  24. * be sitting in offscreen memory starting at coordinate (0, 768) --
  25. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  26. * would be the 'absolute' start coordinate'.
  27. *
  28. * Copyright (c) 1992-1998 Microsoft Corporation
  29. *
  30. \**************************************************************************/
  31. #include "precomp.h"
  32. /******************************Public*Routine******************************\
  33. * VOID vIoImageTransferMm16
  34. *
  35. * Low-level routine for transferring a bitmap image via the data transfer
  36. * register using 16 bit writes and memory-mapped I/O for the transfer,
  37. * but I/O for the setup.
  38. *
  39. * NOTE: Upon entry, there must be 1 guaranteed free empty FIFO!
  40. *
  41. \**************************************************************************/
  42. VOID vIoImageTransferMm16( // Type FNIMAGETRANSFER
  43. PDEV* ppdev,
  44. BYTE* pjSrc, // Source pointer
  45. LONG lDelta, // Delta from start of scan to start of next
  46. LONG cjSrc, // Number of bytes to be output on every scan
  47. LONG cScans, // Number of scans
  48. ULONG ulCmd) // Accelerator command - shouldn't include bus size
  49. {
  50. BYTE* pjMmBase;
  51. LONG cwSrc;
  52. ASSERTDD(cScans > 0, "Can't handle non-positive count of scans");
  53. ASSERTDD((ulCmd & (BUS_SIZE_8 | BUS_SIZE_16 | BUS_SIZE_32)) == 0,
  54. "Shouldn't specify bus size in command -- we handle that");
  55. IO_GP_WAIT(ppdev);
  56. IO_CMD(ppdev, ulCmd | BUS_SIZE_16);
  57. CHECK_DATA_READY(ppdev);
  58. pjMmBase = ppdev->pjMmBase;
  59. cwSrc = (cjSrc) >> 1; // Floor
  60. if (cjSrc & 1)
  61. {
  62. do {
  63. if (cwSrc > 0)
  64. {
  65. MM_TRANSFER_WORD(ppdev, pjMmBase, pjSrc, cwSrc);
  66. }
  67. // Make sure we do only a byte read of the last odd byte
  68. // in the scan so that we'll never read past the end of
  69. // the bitmap:
  70. MM_PIX_TRANS(ppdev, pjMmBase, *(pjSrc + cjSrc - 1));
  71. pjSrc += lDelta;
  72. } while (--cScans != 0);
  73. }
  74. else
  75. {
  76. do {
  77. MM_TRANSFER_WORD(ppdev, pjMmBase, pjSrc, cwSrc);
  78. pjSrc += lDelta;
  79. } while (--cScans != 0);
  80. }
  81. CHECK_DATA_COMPLETE(ppdev);
  82. }
  83. /******************************Public*Routine******************************\
  84. * VOID vIoImageTransferIo16
  85. *
  86. * Low-level routine for transferring a bitmap image via the data transfer
  87. * register using entirely normal I/O.
  88. *
  89. * NOTE: Upon entry, there must be 1 guaranteed free empty FIFO!
  90. *
  91. \**************************************************************************/
  92. VOID vIoImageTransferIo16( // Type FNIMAGETRANSFER
  93. PDEV* ppdev,
  94. BYTE* pjSrc, // Source pointer
  95. LONG lDelta, // Delta from start of scan to start of next
  96. LONG cjSrc, // Number of bytes to be output on every scan
  97. LONG cScans, // Number of scans
  98. ULONG ulCmd) // Accelerator command - shouldn't include bus size
  99. {
  100. LONG cWait;
  101. LONG cwSrc;
  102. volatile LONG i;
  103. ASSERTDD(cScans > 0, "Can't handle non-positive count of scans");
  104. ASSERTDD((ulCmd & (BUS_SIZE_8 | BUS_SIZE_16 | BUS_SIZE_32)) == 0,
  105. "Shouldn't specify bus size in command -- we handle that");
  106. IO_GP_WAIT(ppdev);
  107. IO_CMD(ppdev, ulCmd | BUS_SIZE_16);
  108. CHECK_DATA_READY(ppdev);
  109. cwSrc = (cjSrc) >> 1; // Floor
  110. // Old S3's in fast machines will drop data on monochrome transfers
  111. // unless we insert a busy loop. '185' was the minimum value for which
  112. // my DEC AXP 150 with an ISA 911 S3 stopped dropping data:
  113. cWait = 0;
  114. if ((ulCmd & MULTIPLE_PIXELS) &&
  115. (ppdev->flCaps & CAPS_SLOW_MONO_EXPANDS))
  116. {
  117. cWait = 200; // Add some time to be safe
  118. }
  119. if (cjSrc & 1)
  120. {
  121. do {
  122. if (cwSrc > 0)
  123. {
  124. IO_TRANSFER_WORD(ppdev, pjSrc, cwSrc);
  125. }
  126. // Make sure we do only a byte read of the last odd byte
  127. // in the scan so that we'll never read past the end of
  128. // the bitmap:
  129. IO_PIX_TRANS(ppdev, *(pjSrc + cjSrc - 1));
  130. pjSrc += lDelta;
  131. for (i = cWait; i != 0; i--)
  132. ;
  133. } while (--cScans != 0);
  134. }
  135. else
  136. {
  137. do {
  138. IO_TRANSFER_WORD(ppdev, pjSrc, cwSrc);
  139. pjSrc += lDelta;
  140. for (i = cWait; i != 0; i--)
  141. ;
  142. } while (--cScans != 0);
  143. }
  144. CHECK_DATA_COMPLETE(ppdev);
  145. }
  146. /******************************Public*Routine******************************\
  147. * VOID vIoFillSolid
  148. *
  149. * Fills a list of rectangles with a solid colour.
  150. *
  151. \**************************************************************************/
  152. VOID vIoFillSolid( // Type FNFILL
  153. PDEV* ppdev,
  154. LONG c, // Can't be zero
  155. RECTL* prcl, // List of rectangles to be filled, in relative
  156. // coordinates
  157. ULONG rop4, // rop4
  158. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  159. POINTL* pptlBrush) // Not used
  160. {
  161. ULONG ulHwForeMix;
  162. ASSERTDD(c > 0, "Can't handle zero rectangles");
  163. ulHwForeMix = gaulHwMixFromRop2[(rop4 >> 2) & 0xf];
  164. // It's quite likely that we've just been called from GDI, so it's
  165. // even more likely that the accelerator's graphics engine has been
  166. // sitting around idle. Rather than doing a FIFO_WAIT(3) here and
  167. // then a FIFO_WAIT(5) before outputing the actual rectangle,
  168. // we can avoid an 'in' (which can be quite expensive, depending on
  169. // the card) by doing a single FIFO_WAIT(8) right off the bat:
  170. if (DEPTH32(ppdev))
  171. {
  172. IO_FIFO_WAIT(ppdev, 4);
  173. IO_PIX_CNTL(ppdev, ALL_ONES);
  174. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  175. IO_FRGD_COLOR32(ppdev, rbc.iSolidColor);
  176. IO_FIFO_WAIT(ppdev, 5);
  177. }
  178. else
  179. {
  180. IO_FIFO_WAIT(ppdev, 8);
  181. IO_PIX_CNTL(ppdev, ALL_ONES);
  182. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  183. IO_FRGD_COLOR(ppdev, rbc.iSolidColor);
  184. }
  185. while(TRUE)
  186. {
  187. IO_CUR_X(ppdev, prcl->left);
  188. IO_CUR_Y(ppdev, prcl->top);
  189. IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1);
  190. IO_MIN_AXIS_PCNT(ppdev, prcl->bottom - prcl->top - 1);
  191. IO_CMD(ppdev, RECTANGLE_FILL | DRAWING_DIR_TBLRXM |
  192. DRAW | DIR_TYPE_XY |
  193. LAST_PIXEL_ON | MULTIPLE_PIXELS |
  194. WRITE);
  195. if (--c == 0)
  196. return;
  197. prcl++;
  198. IO_FIFO_WAIT(ppdev, 5);
  199. }
  200. }
  201. /******************************Public*Routine******************************\
  202. * VOID vIoSlowPatRealize
  203. *
  204. * This routine transfers an 8x8 pattern to off-screen display memory, and
  205. * duplicates it to make a 64x64 cached realization which is then used by
  206. * vIoFillPatSlow as the basic building block for doing 'slow' pattern output
  207. * via repeated screen-to-screen blts.
  208. *
  209. \**************************************************************************/
  210. VOID vIoSlowPatRealize(
  211. PDEV* ppdev,
  212. RBRUSH* prb, // Points to brush realization structure
  213. BOOL bTransparent) // FALSE for normal patterns; TRUE for
  214. // patterns with a mask when the background
  215. // mix is LEAVE_ALONE.
  216. {
  217. BRUSHENTRY* pbe;
  218. LONG iBrushCache;
  219. LONG x;
  220. LONG y;
  221. BYTE* pjPattern;
  222. LONG cwPattern;
  223. pbe = prb->pbe;
  224. if ((pbe == NULL) || (pbe->prbVerify != prb))
  225. {
  226. // We have to allocate a new off-screen cache brush entry for
  227. // the brush:
  228. iBrushCache = ppdev->iBrushCache;
  229. pbe = &ppdev->abe[iBrushCache];
  230. iBrushCache++;
  231. if (iBrushCache >= ppdev->cBrushCache)
  232. iBrushCache = 0;
  233. ppdev->iBrushCache = iBrushCache;
  234. // Update our links:
  235. pbe->prbVerify = prb;
  236. prb->pbe = pbe;
  237. }
  238. // Load some pointer variables onto the stack, so that we don't have
  239. // to keep dereferencing their pointers:
  240. x = pbe->x;
  241. y = pbe->y;
  242. prb->bTransparent = bTransparent;
  243. // I considered doing the colour expansion for 1bpp brushes in
  244. // software, but by letting the hardware do it, we don't have
  245. // to do as many OUTs to transfer the pattern.
  246. if (prb->fl & RBRUSH_2COLOR)
  247. {
  248. // We're going to do a colour-expansion ('across the plane')
  249. // bitblt of the 1bpp 8x8 pattern to the screen.
  250. if (!bTransparent)
  251. {
  252. IO_FIFO_WAIT(ppdev, 4);
  253. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | OVERPAINT);
  254. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | OVERPAINT);
  255. IO_FRGD_COLOR(ppdev, prb->ulForeColor);
  256. IO_BKGD_COLOR(ppdev, prb->ulBackColor);
  257. IO_FIFO_WAIT(ppdev, 5);
  258. }
  259. else
  260. {
  261. IO_FIFO_WAIT(ppdev, 7);
  262. IO_FRGD_MIX(ppdev, LOGICAL_1);
  263. IO_BKGD_MIX(ppdev, LOGICAL_0);
  264. }
  265. IO_PIX_CNTL(ppdev, CPU_DATA);
  266. IO_ABS_CUR_X(ppdev, x);
  267. IO_ABS_CUR_Y(ppdev, y);
  268. IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide
  269. IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high
  270. IO_GP_WAIT(ppdev);
  271. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16 | WAIT |
  272. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  273. MULTIPLE_PIXELS | WRITE | BYTE_SWAP);
  274. CHECK_DATA_READY(ppdev);
  275. pjPattern = (BYTE*) &prb->aulPattern[0];
  276. IO_TRANSFER_WORD_ALIGNED(ppdev, pjPattern, 8);
  277. // Each word transferred comprises one row of the
  278. // pattern, and there are 8 rows in the pattern
  279. CHECK_DATA_COMPLETE(ppdev);
  280. }
  281. else
  282. {
  283. ASSERTDD(!bTransparent,
  284. "Shouldn't have been asked for transparency with a non-1bpp brush");
  285. IO_FIFO_WAIT(ppdev, 6);
  286. IO_PIX_CNTL(ppdev, ALL_ONES);
  287. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | OVERPAINT);
  288. IO_ABS_CUR_X(ppdev, x);
  289. IO_ABS_CUR_Y(ppdev, y);
  290. IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide
  291. IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high
  292. IO_GP_WAIT(ppdev);
  293. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  294. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  295. SINGLE_PIXEL | WRITE | BYTE_SWAP);
  296. CHECK_DATA_READY(ppdev);
  297. pjPattern = (BYTE*) &prb->aulPattern[0];
  298. cwPattern = CONVERT_TO_BYTES((TOTAL_BRUSH_SIZE / 2), ppdev);
  299. IO_TRANSFER_WORD_ALIGNED(ppdev, pjPattern, cwPattern);
  300. CHECK_DATA_COMPLETE(ppdev);
  301. }
  302. // �����������������Ŀ
  303. // �0�2�3 �4 �1� We now have an 8x8 colour-expanded copy of
  304. // �����������������Ĵ the pattern sitting in off-screen memory,
  305. // �5 � represented here by square '0'.
  306. // � �
  307. // � � We're now going to expand the pattern to
  308. // � � 72x72 by repeatedly copying larger rectangles
  309. // � � in the indicated order, and doing a 'rolling'
  310. // � � blt to copy vertically.
  311. // � �
  312. // �������������������
  313. // Copy '1':
  314. IO_FIFO_WAIT(ppdev, 6);
  315. IO_PIX_CNTL(ppdev, ALL_ONES);
  316. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | OVERPAINT);
  317. // Note that 'cur_x', 'maj_axis_pcnt' and 'min_axis_pcnt' are already
  318. // correct.
  319. IO_ABS_CUR_Y(ppdev, y);
  320. IO_ABS_DEST_X(ppdev, x + 64);
  321. IO_ABS_DEST_Y(ppdev, y);
  322. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  323. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  324. // Copy '2':
  325. IO_FIFO_WAIT(ppdev, 7);
  326. IO_ABS_DEST_X(ppdev, x + 8);
  327. IO_ABS_DEST_Y(ppdev, y);
  328. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  329. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  330. // Copy '3':
  331. IO_ABS_DEST_X(ppdev, x + 16);
  332. IO_ABS_DEST_Y(ppdev, y);
  333. IO_MAJ_AXIS_PCNT(ppdev, 15);
  334. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  335. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  336. // Copy '4':
  337. IO_FIFO_WAIT(ppdev, 8);
  338. IO_ABS_DEST_X(ppdev, x + 32);
  339. IO_ABS_DEST_Y(ppdev, y);
  340. IO_MAJ_AXIS_PCNT(ppdev, 31);
  341. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  342. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  343. // Copy '5':
  344. IO_ABS_DEST_X(ppdev, x);
  345. IO_MAJ_AXIS_PCNT(ppdev, 71);
  346. IO_MIN_AXIS_PCNT(ppdev, 63);
  347. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  348. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  349. }
  350. /******************************Public*Routine******************************\
  351. * VOID vIoFillPatSlow
  352. *
  353. * Uses the screen-to-screen blting ability of the accelerator to fill a
  354. * list of rectangles with a specified pattern. This routine is 'slow'
  355. * merely in the sense that it doesn't use any built-in hardware pattern
  356. * support that may be built into the accelerator.
  357. *
  358. \**************************************************************************/
  359. VOID vIoFillPatSlow( // Type FNFILL
  360. PDEV* ppdev,
  361. LONG c, // Can't be zero
  362. RECTL* prcl, // List of rectangles to be filled, in relative
  363. // coordinates
  364. ULONG rop4, // rop4
  365. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  366. POINTL* pptlBrush) // Pattern alignment
  367. {
  368. BOOL bTransparent;
  369. ULONG ulHwForeMix;
  370. BOOL bExponential;
  371. LONG x;
  372. LONG y;
  373. LONG cxToGo;
  374. LONG cyToGo;
  375. LONG cxThis;
  376. LONG cyThis;
  377. LONG xOrg;
  378. LONG yOrg;
  379. LONG xBrush;
  380. LONG yBrush;
  381. LONG cyOriginal;
  382. BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
  383. // for keeping track of the location and status
  384. // of the pattern bits cached in off-screen
  385. // memory
  386. ASSERTDD(c > 0, "Can't handle zero rectangles");
  387. ASSERTDD(rbc.prb->pbe != NULL,
  388. "Unexpected Null pbe in vIoFillPatSlow");
  389. ASSERTDD(!(ppdev->flCaps & CAPS_HW_PATTERNS),
  390. "Shouldn't use slow patterns when can do hw patterns");
  391. bTransparent = (((rop4 >> 8) & 0xff) != (rop4 & 0xff));
  392. if ((rbc.prb->pbe->prbVerify != rbc.prb) ||
  393. (rbc.prb->bTransparent != bTransparent))
  394. {
  395. vIoSlowPatRealize(ppdev, rbc.prb, bTransparent);
  396. }
  397. ASSERTDD(rbc.prb->bTransparent == bTransparent,
  398. "Not realized with correct transparency");
  399. ulHwForeMix = gaulHwMixFromRop2[(rop4 >> 2) & 0xf];
  400. if (!bTransparent)
  401. {
  402. IO_FIFO_WAIT(ppdev, 2);
  403. IO_PIX_CNTL(ppdev, ALL_ONES);
  404. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | ulHwForeMix);
  405. // We special case OVERPAINT mixes because we can implement
  406. // an exponential fill: every blt will double the size of
  407. // the current rectangle by using the portion of the pattern
  408. // that has already been done for this rectangle as the source.
  409. //
  410. // Note that there's no point in also checking for LOGICAL_0
  411. // or LOGICAL_1 because those will be taken care of by the
  412. // solid fill routines, and I can't be bothered to check for
  413. // NOTNEW:
  414. bExponential = (ulHwForeMix == OVERPAINT);
  415. }
  416. else
  417. {
  418. IO_FIFO_WAIT(ppdev, 5);
  419. IO_PIX_CNTL(ppdev, DISPLAY_MEMORY);
  420. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  421. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | LEAVE_ALONE);
  422. IO_FRGD_COLOR(ppdev, rbc.prb->ulForeColor);
  423. IO_RD_MASK(ppdev, 1); // Pick a plane, any plane
  424. bExponential = FALSE;
  425. }
  426. // Note that since we do our brush alignment calculations in
  427. // relative coordinates, we should keep the brush origin in
  428. // relative coordinates as well:
  429. xOrg = pptlBrush->x;
  430. yOrg = pptlBrush->y;
  431. pbe = rbc.prb->pbe;
  432. xBrush = pbe->x;
  433. yBrush = pbe->y;
  434. do {
  435. x = prcl->left;
  436. y = prcl->top;
  437. cxToGo = prcl->right - x;
  438. cyToGo = prcl->bottom - y;
  439. if ((cxToGo <= SLOW_BRUSH_DIMENSION) &&
  440. (cyToGo <= SLOW_BRUSH_DIMENSION))
  441. {
  442. IO_FIFO_WAIT(ppdev, 7);
  443. IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush);
  444. IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush);
  445. IO_DEST_X(ppdev, x);
  446. IO_DEST_Y(ppdev, y);
  447. IO_MAJ_AXIS_PCNT(ppdev, cxToGo - 1);
  448. IO_MIN_AXIS_PCNT(ppdev, cyToGo - 1);
  449. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  450. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  451. }
  452. else if (bExponential)
  453. {
  454. cyThis = SLOW_BRUSH_DIMENSION;
  455. cyToGo -= cyThis;
  456. if (cyToGo < 0)
  457. cyThis += cyToGo;
  458. cxThis = SLOW_BRUSH_DIMENSION;
  459. cxToGo -= cxThis;
  460. if (cxToGo < 0)
  461. cxThis += cxToGo;
  462. IO_FIFO_WAIT(ppdev, 7);
  463. IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1);
  464. IO_MIN_AXIS_PCNT(ppdev, cyThis - 1);
  465. IO_DEST_X(ppdev, x);
  466. IO_DEST_Y(ppdev, y);
  467. IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush);
  468. IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush);
  469. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  470. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  471. IO_FIFO_WAIT(ppdev, 2);
  472. IO_CUR_X(ppdev, x);
  473. IO_CUR_Y(ppdev, y);
  474. x += cxThis;
  475. while (cxToGo > 0)
  476. {
  477. // First, expand out to the right, doubling our size
  478. // each time:
  479. cxToGo -= cxThis;
  480. if (cxToGo < 0)
  481. cxThis += cxToGo;
  482. IO_FIFO_WAIT(ppdev, 4);
  483. IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1);
  484. IO_DEST_X(ppdev, x);
  485. IO_DEST_Y(ppdev, y);
  486. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  487. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  488. x += cxThis;
  489. cxThis *= 2;
  490. }
  491. if (cyToGo > 0)
  492. {
  493. // Now do a 'rolling blt' to pattern the rest vertically:
  494. IO_FIFO_WAIT(ppdev, 4);
  495. IO_DEST_X(ppdev, prcl->left);
  496. IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1);
  497. IO_MIN_AXIS_PCNT(ppdev, cyToGo - 1);
  498. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  499. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  500. }
  501. }
  502. else
  503. {
  504. // We handle arbitrary mixes simply by repeatedly tiling
  505. // our cached pattern over the entire rectangle:
  506. IO_FIFO_WAIT(ppdev, 2);
  507. IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush);
  508. IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush);
  509. cyOriginal = cyToGo; // Have to remember for later...
  510. do {
  511. cxThis = SLOW_BRUSH_DIMENSION;
  512. cxToGo -= cxThis;
  513. if (cxToGo < 0)
  514. cxThis += cxToGo;
  515. IO_FIFO_WAIT(ppdev, 3);
  516. IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1);
  517. IO_DEST_Y(ppdev, y);
  518. IO_DEST_X(ppdev, x);
  519. x += cxThis; // Get ready for next column
  520. cyToGo = cyOriginal; // Have to reset for each new column
  521. do {
  522. cyThis = SLOW_BRUSH_DIMENSION;
  523. cyToGo -= cyThis;
  524. if (cyToGo < 0)
  525. cyThis += cyToGo;
  526. IO_FIFO_WAIT(ppdev, 2);
  527. IO_MIN_AXIS_PCNT(ppdev, cyThis - 1);
  528. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  529. MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM);
  530. } while (cyToGo > 0);
  531. } while (cxToGo > 0);
  532. }
  533. prcl++;
  534. } while (--c != 0);
  535. }
  536. /******************************Public*Routine******************************\
  537. * VOID vIoFastPatRealize
  538. *
  539. * This routine transfers an 8x8 pattern to off-screen display memory,
  540. * so that it can be used by the S3 pattern hardware.
  541. *
  542. \**************************************************************************/
  543. VOID vIoFastPatRealize( // Type FNFASTPATREALIZE
  544. PDEV* ppdev,
  545. RBRUSH* prb, // Points to brush realization structure
  546. POINTL* pptlBrush, // Brush origin for aligning realization
  547. BOOL bTransparent) // FALSE for normal patterns; TRUE for
  548. // patterns with a mask when the background
  549. // mix is LEAVE_ALONE.
  550. {
  551. BRUSHENTRY* pbe;
  552. LONG iBrushCache;
  553. LONG x;
  554. LONG y;
  555. LONG i;
  556. LONG xShift;
  557. LONG yShift;
  558. BYTE* pjSrc;
  559. BYTE* pjDst;
  560. LONG cjLeft;
  561. LONG cjRight;
  562. BYTE* pjPattern;
  563. LONG cwPattern;
  564. ULONG aulBrush[TOTAL_BRUSH_SIZE];
  565. // Temporary buffer for aligning brush. Declared
  566. // as an array of ULONGs to get proper dword
  567. // alignment. Also leaves room for brushes that
  568. // are up to 32bpp. Note: this takes up 1/4k!
  569. pbe = prb->pbe;
  570. if ((pbe == NULL) || (pbe->prbVerify != prb))
  571. {
  572. // We have to allocate a new off-screen cache brush entry for
  573. // the brush:
  574. iBrushCache = ppdev->iBrushCache;
  575. pbe = &ppdev->abe[iBrushCache];
  576. iBrushCache++;
  577. if (iBrushCache >= ppdev->cBrushCache)
  578. iBrushCache = 0;
  579. ppdev->iBrushCache = iBrushCache;
  580. // Update our links:
  581. pbe->prbVerify = prb;
  582. prb->pbe = pbe;
  583. }
  584. // Load some variables onto the stack, so that we don't have to keep
  585. // dereferencing their pointers:
  586. x = pbe->x;
  587. y = pbe->y;
  588. // Because we handle only 8x8 brushes, it is easy to compute the
  589. // number of pels by which we have to rotate the brush pattern
  590. // right and down. Note that if we were to handle arbitrary sized
  591. // patterns, this calculation would require a modulus operation.
  592. //
  593. // The brush is aligned in absolute coordinates, so we have to add
  594. // in the surface offset:
  595. xShift = pptlBrush->x + ppdev->xOffset;
  596. yShift = pptlBrush->y + ppdev->yOffset;
  597. prb->ptlBrushOrg.x = xShift; // We have to remember the alignment
  598. prb->ptlBrushOrg.y = yShift; // that we used for caching (we check
  599. // this when we go to see if a brush's
  600. // cache entry is still valid)
  601. xShift &= 7; // Rotate pattern 'xShift' pels right
  602. yShift &= 7; // Rotate pattern 'yShift' pels down
  603. prb->bTransparent = bTransparent;
  604. // I considered doing the colour expansion for 1bpp brushes in
  605. // software, but by letting the hardware do it, we don't have
  606. // to do as many OUTs to transfer the pattern.
  607. if (prb->fl & RBRUSH_2COLOR)
  608. {
  609. // We're going to do a colour-expansion ('across the plane')
  610. // bitblt of the 1bpp 8x8 pattern to the screen. But first
  611. // we'll align it properly by copying it to a temporary buffer
  612. // (which we'll conveniently pack word aligned so that we can do a
  613. // REP OUTSW...)
  614. pjSrc = (BYTE*) &prb->aulPattern[0]; // Copy from the start of the
  615. // brush buffer
  616. pjDst = (BYTE*) &aulBrush[0]; // Copy to our temp buffer
  617. pjDst += yShift * sizeof(WORD); // starting yShift rows down
  618. i = 8 - yShift; // for 8 - yShift rows
  619. do {
  620. *pjDst = (*pjSrc >> xShift) | (*pjSrc << (8 - xShift));
  621. pjDst += sizeof(WORD); // Destination is word packed
  622. pjSrc += sizeof(WORD); // Source is word aligned too
  623. } while (--i != 0);
  624. pjDst -= 8 * sizeof(WORD); // Move to the beginning of the source
  625. ASSERTDD(pjDst == (BYTE*) &aulBrush[0], "pjDst not back at start");
  626. for (; yShift != 0; yShift--)
  627. {
  628. *pjDst = (*pjSrc >> xShift) | (*pjSrc << (8 - xShift));
  629. pjDst += sizeof(WORD); // Destination is word packed
  630. pjSrc += sizeof(WORD); // Source is word aligned too
  631. }
  632. if (bTransparent)
  633. {
  634. IO_FIFO_WAIT(ppdev, 3);
  635. IO_PIX_CNTL(ppdev, CPU_DATA);
  636. IO_FRGD_MIX(ppdev, LOGICAL_1);
  637. IO_BKGD_MIX(ppdev, LOGICAL_0);
  638. }
  639. else
  640. {
  641. if (DEPTH32(ppdev))
  642. {
  643. IO_FIFO_WAIT(ppdev, 7);
  644. IO_PIX_CNTL(ppdev, CPU_DATA);
  645. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | OVERPAINT);
  646. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | OVERPAINT);
  647. IO_FRGD_COLOR32(ppdev, prb->ulForeColor);
  648. IO_BKGD_COLOR32(ppdev, prb->ulBackColor);
  649. }
  650. else
  651. {
  652. IO_FIFO_WAIT(ppdev, 5);
  653. IO_PIX_CNTL(ppdev, CPU_DATA);
  654. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | OVERPAINT);
  655. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | OVERPAINT);
  656. IO_FRGD_COLOR(ppdev, prb->ulForeColor);
  657. IO_BKGD_COLOR(ppdev, prb->ulBackColor);
  658. }
  659. }
  660. IO_FIFO_WAIT(ppdev, 4);
  661. IO_ABS_CUR_X(ppdev, x);
  662. IO_ABS_CUR_Y(ppdev, y);
  663. IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide
  664. IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high
  665. IO_GP_WAIT(ppdev);
  666. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16 | WAIT |
  667. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  668. MULTIPLE_PIXELS | WRITE | BYTE_SWAP);
  669. CHECK_DATA_READY(ppdev);
  670. pjPattern = (BYTE*) &aulBrush[0];
  671. IO_TRANSFER_WORD_ALIGNED(ppdev, pjPattern, 8);
  672. // Each word transferred
  673. // comprises one row of the
  674. // pattern, and there are
  675. // 8 rows in the pattern
  676. CHECK_DATA_COMPLETE(ppdev);
  677. }
  678. else
  679. {
  680. ASSERTDD(!bTransparent,
  681. "Shouldn't have been asked for transparency with a non-1bpp brush");
  682. // We're going to do a straight ('through the plane') bitblt
  683. // of the Xbpp 8x8 pattern to the screen. But first we'll align
  684. // it properly by copying it to a temporary buffer:
  685. cjLeft = CONVERT_TO_BYTES(xShift, ppdev); // Number of bytes pattern
  686. // is shifted to the right
  687. cjRight = CONVERT_TO_BYTES(8, ppdev) - cjLeft; // Number of bytes pattern
  688. // is shifted to the left
  689. pjSrc = (BYTE*) &prb->aulPattern[0]; // Copy from brush buffer
  690. pjDst = (BYTE*) &aulBrush[0]; // Copy to our temp buffer
  691. pjDst += yShift * CONVERT_TO_BYTES(8, ppdev); // starting yShift rows
  692. i = 8 - yShift; // down for 8 - yShift rows
  693. do {
  694. RtlCopyMemory(pjDst + cjLeft, pjSrc, cjRight);
  695. RtlCopyMemory(pjDst, pjSrc + cjRight, cjLeft);
  696. pjDst += cjLeft + cjRight;
  697. pjSrc += cjLeft + cjRight;
  698. } while (--i != 0);
  699. pjDst = (BYTE*) &aulBrush[0]; // Move to the beginning of destination
  700. for (; yShift != 0; yShift--)
  701. {
  702. RtlCopyMemory(pjDst + cjLeft, pjSrc, cjRight);
  703. RtlCopyMemory(pjDst, pjSrc + cjRight, cjLeft);
  704. pjDst += cjLeft + cjRight;
  705. pjSrc += cjLeft + cjRight;
  706. }
  707. IO_FIFO_WAIT(ppdev, 6);
  708. IO_PIX_CNTL(ppdev, ALL_ONES);
  709. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | OVERPAINT);
  710. IO_ABS_CUR_X(ppdev, x);
  711. IO_ABS_CUR_Y(ppdev, y);
  712. IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide
  713. IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high
  714. IO_GP_WAIT(ppdev);
  715. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  716. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  717. SINGLE_PIXEL | WRITE | BYTE_SWAP);
  718. CHECK_DATA_READY(ppdev);
  719. pjPattern = (BYTE*) &aulBrush[0];
  720. cwPattern = CONVERT_TO_BYTES((TOTAL_BRUSH_SIZE / 2), ppdev);
  721. IO_TRANSFER_WORD_ALIGNED(ppdev, pjPattern, cwPattern);
  722. CHECK_DATA_COMPLETE(ppdev);
  723. }
  724. }
  725. /******************************Public*Routine******************************\
  726. * VOID vIoFillPatFast
  727. *
  728. * This routine uses the S3 pattern hardware to draw a patterned list of
  729. * rectangles.
  730. *
  731. \**************************************************************************/
  732. VOID vIoFillPatFast( // Type FNFILL
  733. PDEV* ppdev,
  734. LONG c, // Can't be zero
  735. RECTL* prcl, // List of rectangles to be filled, in relative
  736. // coordinates
  737. ULONG rop4, // rop4
  738. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  739. POINTL* pptlBrush) // Pattern alignment
  740. {
  741. BOOL bTransparent;
  742. ULONG ulHwForeMix;
  743. BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
  744. // for keeping track of the location and status
  745. // of the pattern bits cached in off-screen
  746. // memory
  747. ASSERTDD(c > 0, "Can't handle zero rectangles");
  748. ASSERTDD(ppdev->flCaps & CAPS_HW_PATTERNS,
  749. "Shouldn't use fast patterns when can't do hw patterns");
  750. bTransparent = (((rop4 >> 8) & 0xff) != (rop4 & 0xff));
  751. // The S3's pattern hardware requires that we keep an aligned copy
  752. // of the brush in off-screen memory. We have to update this
  753. // realization if any of the following are true:
  754. //
  755. // 1) The brush alignment has changed;
  756. // 2) The off-screen location we thought we had reserved for our
  757. // realization got overwritten by a different pattern;
  758. // 3) We had realized the pattern to do transparent hatches, but
  759. // we're now being asked to do an opaque pattern, or vice
  760. // versa (since we use different realizations for transparent
  761. // vs. opaque patterns).
  762. //
  763. // To handle the initial realization of a pattern, we're a little
  764. // tricky in order to save an 'if' in the following expression. In
  765. // DrvRealizeBrush, we set 'prb->ptlBrushOrg.x' to be 0x80000000 (a
  766. // very negative number), which is guaranteed not to equal 'pptlBrush->x
  767. // + ppdev->xOffset'. So our check for brush alignment will also
  768. // handle the initialization case (note that this check must occur
  769. // *before* dereferencing 'prb->pbe' because that pointer will be
  770. // NULL for a new pattern).
  771. if ((rbc.prb->ptlBrushOrg.x != pptlBrush->x + ppdev->xOffset) ||
  772. (rbc.prb->ptlBrushOrg.y != pptlBrush->y + ppdev->yOffset) ||
  773. (rbc.prb->pbe->prbVerify != rbc.prb) ||
  774. (rbc.prb->bTransparent != bTransparent))
  775. {
  776. vIoFastPatRealize(ppdev, rbc.prb, pptlBrush, bTransparent);
  777. }
  778. else if (ppdev->flCaps & CAPS_RE_REALIZE_PATTERN)
  779. {
  780. // The initial revs of the Vision chips have a bug where, if
  781. // we have not just drawn the pattern to off-screen memory,
  782. // we have to draw some sort of 1x8 rectangle before using
  783. // the pattern hardware (note that a LEAVE_ALONE rop will not
  784. // work).
  785. IO_FIFO_WAIT(ppdev, 7);
  786. IO_PIX_CNTL(ppdev, ALL_ONES);
  787. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | OVERPAINT);
  788. IO_ABS_CUR_X(ppdev, ppdev->ptlReRealize.x);
  789. IO_ABS_CUR_Y(ppdev, ppdev->ptlReRealize.y);
  790. IO_MAJ_AXIS_PCNT(ppdev, 0);
  791. IO_MIN_AXIS_PCNT(ppdev, 7);
  792. IO_CMD(ppdev, RECTANGLE_FILL | DRAWING_DIR_TBLRXM |
  793. DRAW | DIR_TYPE_XY |
  794. LAST_PIXEL_ON | MULTIPLE_PIXELS |
  795. WRITE);
  796. }
  797. ASSERTDD(rbc.prb->bTransparent == bTransparent,
  798. "Not realized with correct transparency");
  799. pbe = rbc.prb->pbe;
  800. ulHwForeMix = gaulHwMixFromRop2[(rop4 >> 2) & 0xf];
  801. if (!bTransparent)
  802. {
  803. IO_FIFO_WAIT(ppdev, 4);
  804. IO_ABS_CUR_X(ppdev, pbe->x);
  805. IO_ABS_CUR_Y(ppdev, pbe->y);
  806. IO_PIX_CNTL(ppdev, ALL_ONES);
  807. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | ulHwForeMix);
  808. }
  809. else
  810. {
  811. if (DEPTH32(ppdev))
  812. {
  813. IO_FIFO_WAIT(ppdev, 4);
  814. IO_FRGD_COLOR32(ppdev, rbc.prb->ulForeColor);
  815. IO_RD_MASK32(ppdev, 1); // Pick a plane, any plane
  816. IO_FIFO_WAIT(ppdev, 5);
  817. }
  818. else
  819. {
  820. IO_FIFO_WAIT(ppdev, 7);
  821. IO_FRGD_COLOR(ppdev, rbc.prb->ulForeColor);
  822. IO_RD_MASK(ppdev, 1); // Pick a plane, any plane
  823. }
  824. IO_ABS_CUR_X(ppdev, pbe->x);
  825. IO_ABS_CUR_Y(ppdev, pbe->y);
  826. IO_PIX_CNTL(ppdev, DISPLAY_MEMORY);
  827. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  828. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | LEAVE_ALONE);
  829. }
  830. do {
  831. IO_FIFO_WAIT(ppdev, 5);
  832. IO_DEST_X(ppdev, prcl->left);
  833. IO_DEST_Y(ppdev, prcl->top);
  834. IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1);
  835. IO_MIN_AXIS_PCNT(ppdev, prcl->bottom - prcl->top - 1);
  836. IO_CMD(ppdev, PATTERN_FILL | BYTE_SWAP | DRAWING_DIR_TBLRXM |
  837. DRAW | WRITE);
  838. prcl++;
  839. } while (--c != 0);
  840. }
  841. /******************************Public*Routine******************************\
  842. * VOID vIoXfer1bpp
  843. *
  844. * This routine colour expands a monochrome bitmap, possibly with different
  845. * Rop2's for the foreground and background. It will be called in the
  846. * following cases:
  847. *
  848. * 1) To colour-expand the monochrome text buffer for the vFastText routine.
  849. * 2) To blt a 1bpp source with a simple Rop2 between the source and
  850. * destination.
  851. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to
  852. * white and black, and the pattern is a solid colour.
  853. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern
  854. * and destination.
  855. *
  856. * Needless to say, making this routine fast can leverage a lot of
  857. * performance.
  858. *
  859. \**************************************************************************/
  860. VOID vIoXfer1bpp( // Type FNXFER
  861. PDEV* ppdev,
  862. LONG c, // Count of rectangles, can't be zero
  863. RECTL* prcl, // List of destination rectangles, in relative
  864. // coordinates
  865. ROP4 rop4, // rop4
  866. SURFOBJ* psoSrc, // Source surface
  867. POINTL* pptlSrc, // Original unclipped source point
  868. RECTL* prclDst, // Original unclipped destination rectangle
  869. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  870. {
  871. ULONG ulHwForeMix;
  872. ULONG ulHwBackMix;
  873. LONG dxSrc;
  874. LONG dySrc;
  875. LONG cx;
  876. LONG cy;
  877. LONG lSrcDelta;
  878. BYTE* pjSrcScan0;
  879. BYTE* pjSrc;
  880. LONG cjSrc;
  881. LONG xLeft;
  882. LONG yTop;
  883. LONG xBias;
  884. ASSERTDD(c > 0, "Can't handle zero rectangles");
  885. ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
  886. ASSERTDD(((((rop4 & 0xff00) >> 8) == (rop4 & 0xff)) || (rop4 == 0xaacc)),
  887. "Expect weird rops only when opaquing");
  888. // Note that only our text routine calls us with a '0xaacc' rop:
  889. ulHwForeMix = gaulHwMixFromRop2[rop4 & 0xf];
  890. ulHwBackMix = (rop4 != 0xaacc) ? ulHwForeMix : LEAVE_ALONE;
  891. if (DEPTH32(ppdev))
  892. {
  893. IO_FIFO_WAIT(ppdev, 7);
  894. IO_PIX_CNTL(ppdev, CPU_DATA);
  895. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  896. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | ulHwBackMix);
  897. IO_FRGD_COLOR32(ppdev, pxlo->pulXlate[1]);
  898. IO_BKGD_COLOR32(ppdev, pxlo->pulXlate[0]);
  899. }
  900. else
  901. {
  902. IO_FIFO_WAIT(ppdev, 5);
  903. IO_PIX_CNTL(ppdev, CPU_DATA);
  904. IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix);
  905. IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | ulHwBackMix);
  906. IO_FRGD_COLOR(ppdev, pxlo->pulXlate[1]);
  907. IO_BKGD_COLOR(ppdev, pxlo->pulXlate[0]);
  908. }
  909. dxSrc = pptlSrc->x - prclDst->left;
  910. dySrc = pptlSrc->y - prclDst->top; // Add to destination to get source
  911. lSrcDelta = psoSrc->lDelta;
  912. pjSrcScan0 = psoSrc->pvScan0;
  913. do {
  914. IO_FIFO_WAIT(ppdev, 5);
  915. // We'll byte align to the source, but do word transfers
  916. // (implying that we may be doing unaligned reads from the
  917. // source). We do this because it may reduce the total
  918. // number of word outs/writes that we'll have to do to the
  919. // display:
  920. yTop = prcl->top;
  921. xLeft = prcl->left;
  922. xBias = (xLeft + dxSrc) & 7; // This is the byte-align bias
  923. if (xBias != 0)
  924. {
  925. // We could either align in software or use the hardware to do
  926. // it. We'll use the hardware; the cost we pay is the time spent
  927. // setting and resetting one scissors register:
  928. IO_SCISSORS_L(ppdev, xLeft);
  929. xLeft -= xBias;
  930. }
  931. cx = prcl->right - xLeft;
  932. cy = prcl->bottom - yTop;
  933. IO_CUR_X(ppdev, xLeft);
  934. IO_CUR_Y(ppdev, yTop);
  935. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  936. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  937. cjSrc = (cx + 7) / 8; // # bytes to transfer
  938. pjSrc = pjSrcScan0 + (yTop + dySrc) * lSrcDelta
  939. + (xLeft + dxSrc) / 8;
  940. // Start is byte aligned (note
  941. // that we don't have to add
  942. // xBias)
  943. ppdev->pfnImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy,
  944. (RECTANGLE_FILL | WAIT | DRAWING_DIR_TBLRXM |
  945. DRAW | LAST_PIXEL_ON | MULTIPLE_PIXELS |
  946. WRITE | BYTE_SWAP));
  947. if (xBias != 0)
  948. {
  949. IO_FIFO_WAIT(ppdev, 1);
  950. IO_ABS_SCISSORS_L(ppdev, 0); // Reset the clipping if we used it
  951. }
  952. prcl++;
  953. } while (--c != 0);
  954. }
  955. /******************************Public*Routine******************************\
  956. * VOID vIoXfer4bpp
  957. *
  958. * Does a 4bpp transfer from a bitmap to the screen.
  959. *
  960. * NOTE: The screen must be 8bpp for this function to be called!
  961. *
  962. * The reason we implement this is that a lot of resources are kept as 4bpp,
  963. * and used to initialize DFBs, some of which we of course keep off-screen.
  964. *
  965. \**************************************************************************/
  966. // XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use
  967. // for doing the translate. Note that in general stack buffers should
  968. // be kept as small as possible. The OS guarantees us only 8k for stack
  969. // from GDI down to the display driver in low memory situations; if we
  970. // ask for more, we'll access violate. Note also that at any time the
  971. // stack buffer cannot be larger than a page (4k) -- otherwise we may
  972. // miss touching the 'guard page' and access violate then too.
  973. #define XLATE_BUFFER_SIZE 256
  974. VOID vIoXfer4bpp( // Type FNXFER
  975. PDEV* ppdev,
  976. LONG c, // Count of rectangles, can't be zero
  977. RECTL* prcl, // List of destination rectangles, in relative
  978. // coordinates
  979. ULONG rop4, // rop4
  980. SURFOBJ* psoSrc, // Source surface
  981. POINTL* pptlSrc, // Original unclipped source point
  982. RECTL* prclDst, // Original unclipped destination rectangle
  983. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  984. {
  985. LONG dx;
  986. LONG dy;
  987. LONG cx;
  988. LONG cy;
  989. LONG lSrcDelta;
  990. BYTE* pjSrcScan0;
  991. BYTE* pjScan;
  992. BYTE* pjSrc;
  993. BYTE* pjDst;
  994. LONG cxThis;
  995. LONG cxToGo;
  996. LONG xSrc;
  997. LONG iLoop;
  998. BYTE jSrc;
  999. ULONG* pulXlate;
  1000. LONG cwThis;
  1001. BYTE* pjBuf;
  1002. BYTE ajBuf[XLATE_BUFFER_SIZE];
  1003. ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp");
  1004. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  1005. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1006. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1007. "Expect only a rop2");
  1008. dx = pptlSrc->x - prclDst->left;
  1009. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1010. lSrcDelta = psoSrc->lDelta;
  1011. pjSrcScan0 = psoSrc->pvScan0;
  1012. IO_FIFO_WAIT(ppdev, 6);
  1013. IO_PIX_CNTL(ppdev, ALL_ONES);
  1014. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | gaulHwMixFromRop2[rop4 & 0xf]);
  1015. while(TRUE)
  1016. {
  1017. cx = prcl->right - prcl->left;
  1018. cy = prcl->bottom - prcl->top;
  1019. IO_CUR_X(ppdev, prcl->left);
  1020. IO_CUR_Y(ppdev, prcl->top);
  1021. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  1022. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  1023. pulXlate = pxlo->pulXlate;
  1024. xSrc = prcl->left + dx;
  1025. pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
  1026. IO_GP_WAIT(ppdev);
  1027. IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT |
  1028. DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON |
  1029. SINGLE_PIXEL | WRITE | BYTE_SWAP);
  1030. CHECK_DATA_READY(ppdev);
  1031. do {
  1032. pjSrc = pjScan;
  1033. cxToGo = cx; // # of pels per scan in 4bpp source
  1034. do {
  1035. cxThis = XLATE_BUFFER_SIZE;
  1036. // We can handle XLATE_BUFFER_SIZE number
  1037. // of pels in this xlate batch
  1038. cxToGo -= cxThis; // cxThis will be the actual number of
  1039. // pels we'll do in this xlate batch
  1040. if (cxToGo < 0)
  1041. cxThis += cxToGo;
  1042. pjDst = ajBuf; // Points to our temporary batch buffer
  1043. // We handle alignment ourselves because it's easy to
  1044. // do, rather than pay the cost of setting/resetting
  1045. // the scissors register:
  1046. if (xSrc & 1)
  1047. {
  1048. // When unaligned, we have to be careful not to read
  1049. // past the end of the 4bpp bitmap (that could
  1050. // potentially cause us to access violate):
  1051. iLoop = cxThis >> 1; // Each loop handles 2 pels;
  1052. // we'll handle odd pel
  1053. // separately
  1054. jSrc = *pjSrc;
  1055. while (iLoop-- != 0)
  1056. {
  1057. *pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
  1058. jSrc = *(++pjSrc);
  1059. *pjDst++ = (BYTE) pulXlate[jSrc >> 4];
  1060. }
  1061. if (cxThis & 1)
  1062. *pjDst = (BYTE) pulXlate[jSrc & 0xf];
  1063. }
  1064. else
  1065. {
  1066. iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels
  1067. do {
  1068. jSrc = *pjSrc++;
  1069. *pjDst++ = (BYTE) pulXlate[jSrc >> 4];
  1070. *pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
  1071. } while (--iLoop != 0);
  1072. }
  1073. // The number of bytes we'll transfer is equal to the number
  1074. // of pels we've processed in the batch. Since we're
  1075. // transferring words, we have to round up to get the word
  1076. // count:
  1077. cwThis = (cxThis + 1) >> 1;
  1078. pjBuf = ajBuf;
  1079. IO_TRANSFER_WORD_ALIGNED(ppdev, pjBuf, cwThis);
  1080. } while (cxToGo > 0);
  1081. pjScan += lSrcDelta; // Advance to next source scan. Note
  1082. // that we could have computed the
  1083. // value to advance 'pjSrc' directly,
  1084. // but this method is less
  1085. // error-prone.
  1086. } while (--cy != 0);
  1087. CHECK_DATA_COMPLETE(ppdev);
  1088. if (--c == 0)
  1089. return;
  1090. prcl++;
  1091. IO_FIFO_WAIT(ppdev, 4);
  1092. }
  1093. }
  1094. /******************************Public*Routine******************************\
  1095. * VOID vIoXferNative
  1096. *
  1097. * Transfers a bitmap that is the same colour depth as the display to
  1098. * the screen via the data transfer register, with no translation.
  1099. *
  1100. \**************************************************************************/
  1101. VOID vIoXferNative( // Type FNXFER
  1102. PDEV* ppdev,
  1103. LONG c, // Count of rectangles, can't be zero
  1104. RECTL* prcl, // Array of relative coordinates destination rectangles
  1105. ULONG rop4, // rop4
  1106. SURFOBJ* psoSrc, // Source surface
  1107. POINTL* pptlSrc, // Original unclipped source point
  1108. RECTL* prclDst, // Original unclipped destination rectangle
  1109. XLATEOBJ* pxlo) // Not used
  1110. {
  1111. LONG dx;
  1112. LONG dy;
  1113. LONG cx;
  1114. LONG cy;
  1115. LONG lSrcDelta;
  1116. BYTE* pjSrcScan0;
  1117. BYTE* pjSrc;
  1118. LONG cjSrc;
  1119. ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
  1120. "Can handle trivial xlate only");
  1121. ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
  1122. "Source must be same colour depth as screen");
  1123. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1124. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1125. "Expect only a rop2");
  1126. dx = pptlSrc->x - prclDst->left;
  1127. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1128. lSrcDelta = psoSrc->lDelta;
  1129. pjSrcScan0 = psoSrc->pvScan0;
  1130. IO_FIFO_WAIT(ppdev, 6);
  1131. IO_PIX_CNTL(ppdev, ALL_ONES);
  1132. IO_FRGD_MIX(ppdev, SRC_CPU_DATA | gaulHwMixFromRop2[rop4 & 0xf]);
  1133. while(TRUE)
  1134. {
  1135. IO_CUR_X(ppdev, prcl->left);
  1136. IO_CUR_Y(ppdev, prcl->top);
  1137. cx = prcl->right - prcl->left;
  1138. IO_MAJ_AXIS_PCNT(ppdev, cx - 1);
  1139. cy = prcl->bottom - prcl->top;
  1140. IO_MIN_AXIS_PCNT(ppdev, cy - 1);
  1141. cjSrc = CONVERT_TO_BYTES(cx, ppdev);
  1142. pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta
  1143. + CONVERT_TO_BYTES((prcl->left + dx), ppdev);
  1144. ppdev->pfnImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy,
  1145. (RECTANGLE_FILL | WAIT | DRAWING_DIR_TBLRXM |
  1146. DRAW | LAST_PIXEL_ON | SINGLE_PIXEL |
  1147. WRITE | BYTE_SWAP));
  1148. if (--c == 0)
  1149. return;
  1150. prcl++;
  1151. IO_FIFO_WAIT(ppdev, 4);
  1152. }
  1153. }
  1154. /******************************Public*Routine******************************\
  1155. * VOID vIoCopyBlt
  1156. *
  1157. * Does a screen-to-screen blt of a list of rectangles.
  1158. *
  1159. \**************************************************************************/
  1160. VOID vIoCopyBlt( // Type FNCOPY
  1161. PDEV* ppdev,
  1162. LONG c, // Can't be zero
  1163. RECTL* prcl, // Array of relative coordinates destination rectangles
  1164. ULONG rop4, // rop4
  1165. POINTL* pptlSrc, // Original unclipped source point
  1166. RECTL* prclDst) // Original unclipped destination rectangle
  1167. {
  1168. LONG dx;
  1169. LONG dy; // Add delta to destination to get source
  1170. LONG cx;
  1171. LONG cy; // Size of current rectangle - 1
  1172. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1173. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1174. "Expect only a rop2");
  1175. IO_FIFO_WAIT(ppdev, 2);
  1176. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | gaulHwMixFromRop2[rop4 & 0xf]);
  1177. IO_PIX_CNTL(ppdev, ALL_ONES);
  1178. dx = pptlSrc->x - prclDst->left;
  1179. dy = pptlSrc->y - prclDst->top;
  1180. // The accelerator may not be as fast at doing right-to-left copies, so
  1181. // only do them when the rectangles truly overlap:
  1182. if (!OVERLAP(prclDst, pptlSrc))
  1183. goto Top_Down_Left_To_Right;
  1184. if (prclDst->top <= pptlSrc->y)
  1185. {
  1186. if (prclDst->left <= pptlSrc->x)
  1187. {
  1188. Top_Down_Left_To_Right:
  1189. do {
  1190. IO_FIFO_WAIT(ppdev, 7);
  1191. cx = prcl->right - prcl->left - 1;
  1192. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1193. IO_DEST_X(ppdev, prcl->left);
  1194. IO_CUR_X(ppdev, prcl->left + dx);
  1195. cy = prcl->bottom - prcl->top - 1;
  1196. IO_MIN_AXIS_PCNT(ppdev, cy);
  1197. IO_DEST_Y(ppdev, prcl->top);
  1198. IO_CUR_Y(ppdev, prcl->top + dy);
  1199. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1200. DRAWING_DIR_TBLRXM);
  1201. prcl++;
  1202. } while (--c != 0);
  1203. }
  1204. else
  1205. {
  1206. do {
  1207. IO_FIFO_WAIT(ppdev, 7);
  1208. cx = prcl->right - prcl->left - 1;
  1209. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1210. IO_DEST_X(ppdev, prcl->left + cx);
  1211. IO_CUR_X(ppdev, prcl->left + cx + dx);
  1212. cy = prcl->bottom - prcl->top - 1;
  1213. IO_MIN_AXIS_PCNT(ppdev, cy);
  1214. IO_DEST_Y(ppdev, prcl->top);
  1215. IO_CUR_Y(ppdev, prcl->top + dy);
  1216. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1217. DRAWING_DIR_TBRLXM);
  1218. prcl++;
  1219. } while (--c != 0);
  1220. }
  1221. }
  1222. else
  1223. {
  1224. if (prclDst->left <= pptlSrc->x)
  1225. {
  1226. do {
  1227. IO_FIFO_WAIT(ppdev, 7);
  1228. cx = prcl->right - prcl->left - 1;
  1229. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1230. IO_DEST_X(ppdev, prcl->left);
  1231. IO_CUR_X(ppdev, prcl->left + dx);
  1232. cy = prcl->bottom - prcl->top - 1;
  1233. IO_MIN_AXIS_PCNT(ppdev, cy);
  1234. IO_DEST_Y(ppdev, prcl->top + cy);
  1235. IO_CUR_Y(ppdev, prcl->top + cy + dy);
  1236. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1237. DRAWING_DIR_BTLRXM);
  1238. prcl++;
  1239. } while (--c != 0);
  1240. }
  1241. else
  1242. {
  1243. do {
  1244. IO_FIFO_WAIT(ppdev, 7);
  1245. cx = prcl->right - prcl->left - 1;
  1246. IO_MAJ_AXIS_PCNT(ppdev, cx);
  1247. IO_DEST_X(ppdev, prcl->left + cx);
  1248. IO_CUR_X(ppdev, prcl->left + cx + dx);
  1249. cy = prcl->bottom - prcl->top - 1;
  1250. IO_MIN_AXIS_PCNT(ppdev, cy);
  1251. IO_DEST_Y(ppdev, prcl->top + cy);
  1252. IO_CUR_Y(ppdev, prcl->top + cy + dy);
  1253. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE |
  1254. DRAWING_DIR_BTRLXM);
  1255. prcl++;
  1256. } while (--c != 0);
  1257. }
  1258. }
  1259. }
  1260. /******************************Public*Routine******************************\
  1261. * VOID vIoCopyTransparent
  1262. *
  1263. * Does a screen-to-screen blt of a list of rectangles using a source
  1264. * colorkey for transparency.
  1265. *
  1266. \**************************************************************************/
  1267. VOID vIoCopyTransparent( // Type FNCOPYTRANSPARENT
  1268. PDEV* ppdev,
  1269. LONG c, // Can't be zero
  1270. RECTL* prcl, // Array of relative coordinates destination rectangles
  1271. POINTL* pptlSrc, // Original unclipped source point
  1272. RECTL* prclDst, // Original unclipped destination rectangle
  1273. ULONG iColor)
  1274. {
  1275. LONG dx;
  1276. LONG dy; // Add delta to destination to get source
  1277. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1278. dx = pptlSrc->x - prclDst->left;
  1279. dy = pptlSrc->y - prclDst->top;
  1280. if (DEPTH32(ppdev))
  1281. {
  1282. IO_FIFO_WAIT(ppdev, 5);
  1283. IO_COLOR_CMP32(ppdev, iColor);
  1284. }
  1285. else
  1286. {
  1287. IO_FIFO_WAIT(ppdev, 4);
  1288. IO_COLOR_CMP(ppdev, iColor);
  1289. }
  1290. IO_MULTIFUNC_CNTL(ppdev, ppdev->ulMiscState
  1291. | MULT_MISC_COLOR_COMPARE);
  1292. IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | OVERPAINT);
  1293. IO_PIX_CNTL(ppdev, ALL_ONES);
  1294. while (TRUE)
  1295. {
  1296. IO_FIFO_WAIT(ppdev, 7);
  1297. IO_CUR_X(ppdev, prcl->left + dx);
  1298. IO_CUR_Y(ppdev, prcl->top + dy);
  1299. IO_DEST_X(ppdev, prcl->left);
  1300. IO_DEST_Y(ppdev, prcl->top);
  1301. IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1);
  1302. IO_MIN_AXIS_PCNT(ppdev, prcl->bottom - prcl->top - 1);
  1303. IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY |
  1304. WRITE | DRAWING_DIR_TBLRXM);
  1305. if (--c == 0)
  1306. {
  1307. IO_FIFO_WAIT(ppdev, 1);
  1308. IO_MULTIFUNC_CNTL(ppdev, ppdev->ulMiscState);
  1309. return;
  1310. }
  1311. prcl++;
  1312. }
  1313. }