Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

966 lines
31 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: bltmil.c
  3. *
  4. * Contains the low-level blt functions for the Millenium.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1996 Microsoft Corporation
  23. * Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vMilFillSolid
  28. *
  29. * Fills a list of rectangles with a solid colour.
  30. *
  31. \**************************************************************************/
  32. VOID vMilFillSolid(
  33. PDEV* ppdev, // pdev
  34. LONG c, // Number of rectangles to be filled,
  35. // can't be zero
  36. RECTL* prcl, // List of rectangles to be filled
  37. ULONG rop4, // Rop4
  38. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  39. POINTL* pptlBrush) // Pattern alignment
  40. {
  41. BYTE* pjBase;
  42. LONG xOffset;
  43. LONG yOffset;
  44. ULONG ulDwg;
  45. ULONG ulHwMix;
  46. pjBase = ppdev->pjBase;
  47. xOffset = ppdev->xOffset;
  48. yOffset = ppdev->yOffset;
  49. CHECK_FIFO_SPACE(pjBase, 4);
  50. ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE);
  51. if (rop4 == 0xf0f0) // PATCOPY
  52. {
  53. if (ppdev->iBitmapFormat == BMF_24BPP)
  54. {
  55. if (((rbc.iSolidColor & 0x000000ff) !=
  56. ((rbc.iSolidColor >> 8) & 0x000000ff)) ||
  57. ((rbc.iSolidColor & 0x000000ff) !=
  58. ((rbc.iSolidColor >> 16) & 0x000000ff)))
  59. {
  60. // We're in 24bpp, and the color is not a gray level, so we
  61. // can't use block mode.
  62. ulDwg = (opcode_TRAP + blockm_OFF + atype_RPL + solid_SOLID +
  63. arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
  64. bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
  65. }
  66. else
  67. {
  68. // We're in 24bpp, and the color is a gray level, so we
  69. // can use block mode if we prepare our color.
  70. rbc.iSolidColor = (rbc.iSolidColor << 8) |
  71. (rbc.iSolidColor & 0x000000ff);
  72. ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
  73. arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
  74. bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
  75. }
  76. }
  77. else
  78. {
  79. // This is not 24bpp.
  80. ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
  81. arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
  82. bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
  83. }
  84. }
  85. else
  86. {
  87. // The ROP3 is a combination of P and D only:
  88. //
  89. // ROP3 Mga ROP3 Mga ROP3 Mga ROP3 Mga
  90. //
  91. // 0x00 0 0x50 4 0xa0 8 0xf0 c
  92. // 0x05 1 0x55 5 0xa5 9 0xf5 d
  93. // 0x0a 2 0x5a 6 0xaa a 0xfa e
  94. // 0x0f 3 0x5f 7 0xaf b 0xff f
  95. ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
  96. if (ulHwMix == MGA_WHITENESS)
  97. {
  98. rbc.iSolidColor = 0xffffffff;
  99. ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
  100. arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
  101. bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
  102. }
  103. else if (ulHwMix == MGA_BLACKNESS)
  104. {
  105. rbc.iSolidColor = 0x00000000;
  106. ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
  107. arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
  108. bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
  109. }
  110. else
  111. {
  112. ulDwg = (opcode_TRAP + blockm_OFF + atype_RSTR + solid_SOLID +
  113. arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
  114. pattern_OFF + transc_BG_OPAQUE +
  115. (ulHwMix << 16));
  116. }
  117. }
  118. CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
  119. CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, rbc.iSolidColor));
  120. while(TRUE)
  121. {
  122. CP_WRITE(pjBase, DWG_FXBNDRY,
  123. (((prcl->right + xOffset) << bfxright_SHIFT) |
  124. ((prcl->left + xOffset) & bfxleft_MASK)));
  125. // ylength_MASK not is needed since coordinates are within range
  126. CP_START(pjBase, DWG_YDSTLEN,
  127. (((prcl->top + yOffset ) << yval_SHIFT) |
  128. ((prcl->bottom - prcl->top))));
  129. if (--c == 0)
  130. return;
  131. CHECK_FIFO_SPACE(pjBase, 2);
  132. prcl++;
  133. }
  134. }
  135. /******************************Public*Routine******************************\
  136. * VOID vMilPatRealize
  137. *
  138. * Download the Color Brush to the Color brush cache in the Storm offscreen
  139. * memory. For 8, 16, and 32 bpp, we download an 8x8 brush; a special
  140. * routine, vPatRealize24bpp, is used for 24bpp brushes. We'll use direct
  141. * frame buffer access whenever possible.
  142. *
  143. * There are some hardware restrictions concerning the way that a pattern
  144. * must be stored in memory:
  145. * - the first pixel of the pattern must be stored so that the first pixel
  146. * address mod 256 is 0, 8, 16, or 24;
  147. * - each line of 8 pixels is stored continuously, but there must be a
  148. * difference of 32 in the pixel addresses of successive pattern lines.
  149. * This means that we will store patterns in the following way:
  150. *
  151. * +----+---------------+---------------+---------------+---------------+
  152. * | | Pattern 0 | Pattern 1 | Pattern 2 | Pattern 3 |
  153. * |Line| | |1 1 1 1 1 1 1 1|1 1 1 1 1 1 1 1|
  154. * | |0 1 2 3 4 5 6 7|8 9 a b c d e f|0 1 2 3 4 5 6 7|8 9 a b c d e f|
  155. * +----+---------------+---------------+---------------+---------------+
  156. * | 0 |* * * * | X | o o|x x |
  157. * | 1 | * * * *| X | o o | x x |
  158. * | 2 |* * * * | X | o o | x x |
  159. * | 3 | * * * *| X |o o | x x|
  160. * | 4 |* * * * |X X X X X X X X| o o|x x |
  161. * | 5 | * * * *| X | o o | x x |
  162. * | 6 |* * * * | X | o o | x x |
  163. * | 7 | * * * *| X |o o | x x|
  164. * +----+---------------+---------------+---------------+---------------+
  165. *
  166. * where a given pixel address is
  167. * FirstPixelAddress + Line*0x20 + Pattern*0x08 + xPat.
  168. *
  169. \**************************************************************************/
  170. VOID vMilPatRealize(
  171. PDEV* ppdev,
  172. RBRUSH* prb)
  173. {
  174. BYTE* pjBase;
  175. BRUSHENTRY* pbe;
  176. LONG iBrushCache;
  177. ULONG culScan;
  178. ULONG i;
  179. ULONG j;
  180. ULONG* pulBrush;
  181. ULONG* pulDst;
  182. ULONG lDeltaPat;
  183. pjBase = ppdev->pjBase;
  184. // Allocate a new off-screen cache brush entry for the brush.
  185. iBrushCache = ppdev->iBrushCache;
  186. pbe = &ppdev->pbe[iBrushCache];
  187. iBrushCache++;
  188. if (iBrushCache >= ppdev->cBrushCache)
  189. iBrushCache = 0;
  190. ppdev->iBrushCache = iBrushCache;
  191. // Update our links.
  192. pbe->prbVerify = prb;
  193. prb->apbe[IBOARD(ppdev)] = pbe;
  194. // Point to the pattern bits.
  195. pulBrush = prb->aulPattern;
  196. // Calculate delta from end of pattern scan 1 to start of pattern scan2.
  197. lDeltaPat = 8 * ppdev->cjHwPel; // 8 -> 32?
  198. // Convert it to a byte address.
  199. culScan = 2 * ppdev->cjHwPel;
  200. pulDst = (ULONG*) (pbe->pvScan0);
  201. START_DIRECT_ACCESS_STORM(ppdev, pjBase);
  202. for (i = 8; i != 0 ; i--)
  203. {
  204. for (j = 0; j < culScan; j++)
  205. {
  206. pulDst[j] = *pulBrush++;
  207. }
  208. pulDst += lDeltaPat;
  209. }
  210. END_DIRECT_ACCESS_STORM(ppdev, pjBase);
  211. }
  212. /*****************************************************************************
  213. * VOID vMilFillPat
  214. *
  215. * 8, 16, and 32bpp patterned color fills for Storm.
  216. ****************************************************************************/
  217. VOID vMilFillPat(
  218. PDEV* ppdev,
  219. LONG c, // Can't be zero
  220. RECTL* prcl, // List of rectangles to be filled, in relative
  221. // coordinates
  222. ULONG rop4, // Rop4
  223. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  224. POINTL* pptlBrush) // Pattern alignment
  225. {
  226. BRUSHENTRY* pbe;
  227. LONG xOffset;
  228. LONG yOffset;
  229. LONG xLeft;
  230. LONG yTop;
  231. LONG xBrush;
  232. LONG yBrush;
  233. LONG lSrcAdd;
  234. ULONG ulLinear;
  235. BYTE* pjBase;
  236. ASSERTDD(!(rbc.prb->fl & RBRUSH_2COLOR), "Can't do 2 colour brushes here");
  237. // We have to ensure that no other brush took our spot in off-screen
  238. // memory, or we might have to realize the brush for the first time.
  239. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  240. if (pbe->prbVerify != rbc.prb)
  241. {
  242. vMilPatRealize(ppdev, rbc.prb);
  243. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  244. }
  245. pjBase = ppdev->pjBase;
  246. xOffset = ppdev->xOffset;
  247. yOffset = ppdev->yOffset;
  248. lSrcAdd = ppdev->lPatSrcAdd;
  249. CHECK_FIFO_SPACE(pjBase, 6);
  250. CP_WRITE(pjBase, DWG_AR5, 32); // Source (pattern) pitch.
  251. ppdev->HopeFlags = SIGN_CACHE;
  252. if ((rop4 & 0x000000FF) == 0x000000F0)
  253. {
  254. // The rop is PATCOPY.
  255. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RPL + sgnzero_ZERO +
  256. shftzero_ZERO + bop_SRCCOPY +
  257. bltmod_BFCOL + pattern_ON +
  258. transc_BG_OPAQUE));
  259. }
  260. else
  261. {
  262. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RSTR + sgnzero_ZERO +
  263. shftzero_ZERO + bltmod_BFCOL + pattern_ON +
  264. transc_BG_OPAQUE +
  265. (((rop4 & 0x03) + ((rop4 & 0x30) >> 2))
  266. << 16)));
  267. }
  268. // The pattern setup is complete.
  269. while(TRUE)
  270. {
  271. // There is a problem with Storm. We have to program:
  272. // AR3: ssa
  273. // AR0: sea, where sea<18:3> = ssa<18:3> and
  274. // sea< 2:0> = ssa< 2:0> + 2 for 8bpp;
  275. // sea< 2:0> = ssa< 2:0> + 4 for 16bpp;
  276. // sea< 2:0> = ssa< 2:0> + 6 for 32bpp.
  277. // Take into account the brush origin. The upper left pel of the
  278. // brush should be aligned here in the destination surface.
  279. yTop = prcl->top;
  280. xLeft = prcl->left;
  281. xBrush = (xLeft - pptlBrush->x) & 7;
  282. yBrush = (yTop - pptlBrush->y) & 7;
  283. ulLinear = pbe->ulLinear + (yBrush << 5) + xBrush;
  284. CP_WRITE(pjBase, DWG_AR3, ulLinear);
  285. CP_WRITE(pjBase, DWG_AR0, ((ulLinear & 0xfffffff8) |
  286. ((ulLinear+lSrcAdd) & 7)));
  287. CP_WRITE(pjBase, DWG_FXBNDRY,
  288. (((prcl->right + xOffset - 1) << bfxright_SHIFT) |
  289. ((xLeft + xOffset) & bfxleft_MASK)));
  290. // ylength_MASK not is needed since coordinates are within range
  291. CP_START(pjBase, DWG_YDSTLEN,
  292. (((yTop + yOffset ) << yval_SHIFT) |
  293. ((prcl->bottom - yTop))));
  294. if (--c == 0)
  295. return;
  296. CHECK_FIFO_SPACE(pjBase, 4);
  297. prcl++;
  298. }
  299. }
  300. /******************************Public*Routine******************************\
  301. * vMilXfer1bpp
  302. *
  303. * This routine colour expands a monochrome bitmap.
  304. *
  305. \**************************************************************************/
  306. VOID vMilXfer1bpp( // Type FNXFER
  307. PDEV* ppdev,
  308. LONG c, // Count of rectangles, can't be zero
  309. RECTL* prcl, // List of destination rectangles, in relative
  310. // coordinates
  311. ULONG rop4, // Foreground and background hardware mix
  312. SURFOBJ* psoSrc, // Source surface
  313. POINTL* pptlSrc, // Original unclipped source point
  314. RECTL* prclDst, // Original unclipped destination rectangle
  315. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  316. {
  317. LONG xOffset;
  318. LONG yOffset;
  319. ULONG ulBitFlip;
  320. LONG dx;
  321. LONG dy;
  322. LONG xSrc;
  323. LONG ySrc;
  324. LONG xDst;
  325. LONG yDst;
  326. LONG cxDst;
  327. LONG cyDst;
  328. LONG xSrcAlign;
  329. LONG lSrcDelta;
  330. LONG lSrcSkip;
  331. LONG i;
  332. LONG k;
  333. LONG cdSrc;
  334. LONG cdSrcPerScan;
  335. ULONG FCol;
  336. ULONG BCol;
  337. ULONG ul;
  338. BYTE* pjDma;
  339. ULONG* pulXlate;
  340. ULONG* pulSrc;
  341. ULONG* pulDst;
  342. BYTE* pjSrcScan0;
  343. BYTE* pjBase;
  344. LONG cFifo;
  345. LONG xAlign;
  346. ULONG cFullLoops;
  347. ULONG cRemLoops;
  348. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  349. "Expect only an opaquing rop");
  350. pjBase = ppdev->pjBase;
  351. xOffset = ppdev->xOffset;
  352. yOffset = ppdev->yOffset;
  353. ulBitFlip = 0;
  354. dx = pptlSrc->x - prclDst->left;
  355. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  356. pjSrcScan0 = psoSrc->pvScan0;
  357. lSrcDelta = psoSrc->lDelta;
  358. pjDma = pjBase + DMAWND;
  359. ppdev->HopeFlags = SIGN_CACHE;
  360. // Get the foreground and background colors.
  361. pulXlate = pxlo->pulXlate;
  362. FCol = COLOR_REPLICATE(ppdev, pulXlate[1]);
  363. BCol = COLOR_REPLICATE(ppdev, pulXlate[0]);
  364. CHECK_FIFO_SPACE(pjBase, 10);
  365. if (rop4 == 0x0000CCCC) // SRCCOPY
  366. {
  367. if (ppdev->iBitmapFormat == BMF_24BPP)
  368. {
  369. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL +
  370. sgnzero_ZERO + shftzero_ZERO +
  371. bop_SRCCOPY + bltmod_BMONOWF));
  372. }
  373. else
  374. {
  375. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + blockm_ON +
  376. sgnzero_ZERO + shftzero_ZERO +
  377. bop_SRCCOPY + bltmod_BMONOWF));
  378. }
  379. }
  380. else if ((rop4 == 0xb8b8) || (rop4 == 0xe2e2))
  381. {
  382. // We special-cased 0xb8b8 and 0xe2e2 in bitblt.c:
  383. if (rop4 == 0xb8b8)
  384. {
  385. // 0xb8 is weird because it says that the '1' bit is leave-alone,
  386. // but the '0' bit is the destination color. The Millennium can
  387. // only handle transparent blts when the '0' bit is leave-alone,
  388. // so we flip the source bits before we give it to the Millennium.
  389. //
  390. // Since we're limited by the speed of the bus, this additional
  391. // overhead of an extra XOR on every write won't be measurable.
  392. ulBitFlip = (ULONG) -1;
  393. }
  394. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + blockm_OFF +
  395. bop_SRCCOPY + trans_0 + bltmod_BMONO +
  396. pattern_OFF + hbgr_SRC_WINDOWS +
  397. transc_BG_TRANSP));
  398. }
  399. else
  400. {
  401. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RSTR +
  402. sgnzero_ZERO + shftzero_ZERO +
  403. ((rop4 & 0xf) << 16) +
  404. bltmod_BMONOWF));
  405. }
  406. CP_WRITE(pjBase, DWG_BCOL, BCol);
  407. CP_WRITE(pjBase, DWG_FCOL, FCol);
  408. CP_WRITE(pjBase, DWG_AR5, 0);
  409. CP_WRITE(pjBase, DWG_SGN, 0);
  410. while (TRUE)
  411. {
  412. cxDst = prcl->right - prcl->left;
  413. cyDst = prcl->bottom - prcl->top;
  414. xDst = prcl->left + xOffset;
  415. yDst = prcl->top + yOffset;
  416. ySrc = prcl->top + dy;
  417. xSrc = prcl->left + dx;
  418. // Since SSA (AR3) is always zero, we may have to clip the expanded
  419. // ILOAD using CXLEFT, and we'll have to modify FXLEFT accordingly.
  420. xSrcAlign = xSrc & 0x1F;
  421. if (xSrcAlign)
  422. {
  423. // We'll have to use clipping.
  424. CP_WRITE(pjBase, DWG_CXLEFT, xDst);
  425. }
  426. // Number of pixels per line.
  427. CP_WRITE(pjBase, DWG_AR0, (cxDst - 1 + xSrcAlign));
  428. CP_WRITE(pjBase, DWG_AR3, 0);
  429. CP_WRITE(pjBase, DWG_FXBNDRY, (((xDst + cxDst - 1) << bfxright_SHIFT) |
  430. ((xDst - xSrcAlign) & bfxleft_MASK)));
  431. // ylength_MASK not needed since coordinates are within range
  432. CP_START(pjBase, DWG_YDSTLEN, ((yDst << yval_SHIFT) | cyDst));
  433. // Calculate the location of the source rectangle. This points to the
  434. // first dword to be downloaded. It is aligned on a dword boundary.
  435. // The first bit of interest in the first dword is at (xSrc & 0x1f).
  436. pulSrc = (ULONG*)(pjSrcScan0 + (ySrc * lSrcDelta)
  437. + ((xSrc & 0xFFFFFFE0) >> 3));
  438. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  439. BLT_WRITE_ON(ppdev, pjBase);
  440. // Number of bytes, padded to the next dword, to be moved per
  441. // scanline. Since we align the starting dword on a dword boundary,
  442. // we know that we cannot overflow the end of the bitmap.
  443. cdSrc = ((xSrcAlign + cxDst + 0x1F) & 0xFFFFFFE0) >> 3;
  444. lSrcSkip = lSrcDelta - cdSrc;
  445. if (lSrcSkip == 0)
  446. {
  447. // There is no line-to-line increment, we can go full speed.
  448. // Total number of dwords to be sent.
  449. cdSrc = cyDst * (cdSrc >> 2);
  450. while ((cdSrc -= FIFOSIZE) > 0)
  451. {
  452. pulDst = (ULONG*)pjDma;
  453. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  454. for (i = FIFOSIZE; i != 0; i--)
  455. {
  456. CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
  457. }
  458. }
  459. pulDst = (ULONG*)pjDma;
  460. cdSrc += FIFOSIZE;
  461. CHECK_FIFO_SPACE(pjBase, cdSrc);
  462. for (i = cdSrc; i != 0; i--)
  463. {
  464. CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
  465. }
  466. }
  467. else
  468. {
  469. // We can't go full speed.
  470. // Number of full dwords to be moved on each scan. We know that
  471. // we won't overflow the end of the bitmap with this.
  472. cdSrc >>= 2;
  473. cdSrcPerScan = cdSrc;
  474. for (k = cyDst; k != 0; k--)
  475. {
  476. pulDst = (ULONG*)pjDma;
  477. cdSrc = cdSrcPerScan;
  478. while ((cdSrc -= FIFOSIZE) > 0)
  479. {
  480. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  481. for (i = FIFOSIZE; i != 0; i--)
  482. {
  483. CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
  484. }
  485. }
  486. cdSrc += FIFOSIZE;
  487. CHECK_FIFO_SPACE(pjBase, cdSrc);
  488. for (i = cdSrc; i != 0; i--)
  489. {
  490. CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
  491. }
  492. // We're done with the current scan, go to the next one.
  493. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  494. }
  495. }
  496. BLT_WRITE_OFF(ppdev, pjBase);
  497. if (xSrcAlign)
  498. {
  499. // Restore the clipping:
  500. CHECK_FIFO_SPACE(pjBase, 1);
  501. CP_WRITE(pjBase, DWG_CXLEFT, 0);
  502. }
  503. if (--c == 0)
  504. break;
  505. prcl++;
  506. CHECK_FIFO_SPACE(pjBase, 5);
  507. }
  508. }
  509. /******************************Public*Routine******************************\
  510. * LONG lSplitRcl
  511. *
  512. * WRAM-WRAM blts can't span banks, and this routine does the tough work
  513. * of figuring out how much of the blt can be done via WRAM-WRAM in one bank,
  514. * then a regular blt over the bank boundary, and again WRAM-WRAM in the
  515. * next bank.
  516. *
  517. \**************************************************************************/
  518. LONG lSplitRcl(
  519. RECTL *arclDst,
  520. LONG *ayBreak,
  521. LONG cyBreak,
  522. LONG dy,
  523. ULONG flDirCode,
  524. LONG *aiCmd)
  525. {
  526. LONG iBreak = 0;
  527. LONG iSrc = 0;
  528. LONG iDst = 0;
  529. RECTL rcl;
  530. LONG lBoundsTop;
  531. LONG lBoundsBottom;
  532. LONG iCmdLast = 0;
  533. ///////////////////////////////////////////////////////////////////////////////
  534. // See [WRN] comment below before changing this macro. This macro is
  535. // particular to this function.
  536. #define NON_EMPTY_RECT(rcl) ((rcl.right > rcl.left) && (rcl.bottom > rcl.top))
  537. aiCmd[0] = 0;
  538. if (cyBreak == 0)
  539. {
  540. return 1;
  541. }
  542. while (TRUE)
  543. {
  544. rcl = arclDst[iSrc];
  545. // Find the bounding scans of the union of the source and destination.
  546. lBoundsTop = min(rcl.top, rcl.top + dy);
  547. lBoundsBottom = max(rcl.bottom, rcl.bottom + dy);
  548. if ((ayBreak[iBreak] < lBoundsTop) ||
  549. (ayBreak[iBreak] >= lBoundsBottom))
  550. {
  551. // Do nothing
  552. iDst++;
  553. goto next_break;
  554. }
  555. // [WRN] For the following, bottom could be less than top and
  556. // right could be less than left. These should be considered
  557. // empty rectangles, and the macro above reflects this.
  558. arclDst[iDst].left = rcl.left;
  559. arclDst[iDst].right = rcl.right;
  560. arclDst[iDst].top = rcl.top;
  561. arclDst[iDst].bottom = min(rcl.bottom, (ayBreak[iBreak] - dy));
  562. if (NON_EMPTY_RECT(arclDst[iDst]))
  563. {
  564. aiCmd[iDst++] = 0;
  565. iCmdLast = 0;
  566. }
  567. arclDst[iDst].left = rcl.left;
  568. arclDst[iDst].right = rcl.right;
  569. arclDst[iDst].top = max(rcl.top, (ayBreak[iBreak] - dy));
  570. arclDst[iDst].bottom = min(rcl.bottom, (ayBreak[iBreak] + 1));
  571. if (NON_EMPTY_RECT(arclDst[iDst]))
  572. {
  573. aiCmd[iDst++] = 1;
  574. iCmdLast = 1;
  575. }
  576. arclDst[iDst].left = rcl.left;
  577. arclDst[iDst].right = rcl.right;
  578. arclDst[iDst].top = max(rcl.top, (ayBreak[iBreak] + 1));
  579. arclDst[iDst].bottom = rcl.bottom;
  580. if (NON_EMPTY_RECT(arclDst[iDst]))
  581. {
  582. aiCmd[iDst++] = 0;
  583. iCmdLast = 0;
  584. }
  585. next_break:
  586. if ((--cyBreak == 0) ||
  587. (iCmdLast == 1))
  588. {
  589. // If we have run out of breaks, we're done.
  590. // Once the last rectangle is marked slow, it stays slow.
  591. break;
  592. }
  593. iSrc = --iDst;
  594. iBreak++;
  595. };
  596. return iDst;
  597. }
  598. /******************************Public*Routine******************************\
  599. * VOID vMilCopyBlt
  600. *
  601. * Does a screen-to-screen blt of a list of rectangles.
  602. *
  603. \**************************************************************************/
  604. VOID vMilCopyBlt( // Type FNCOPY
  605. PDEV* ppdev,
  606. LONG c, // Can't be zero
  607. RECTL* prcl, // Array of relative coordinates destination rectangles
  608. ULONG rop4, // Rop4
  609. POINTL* pptlSrc, // Original unclipped source point
  610. RECTL* prclDst) // Original unclipped destination rectangle
  611. {
  612. BYTE* pjBase;
  613. LONG xOffset;
  614. LONG yOffset;
  615. LONG dx;
  616. LONG dy;
  617. FLONG flDirCode;
  618. LONG lSignedPitch;
  619. ULONG ulHwMix;
  620. ULONG ulDwg;
  621. LONG yDst;
  622. LONG ySrc;
  623. LONG cy;
  624. LONG xSrc;
  625. LONG lSignedWidth;
  626. LONG lSrcStart;
  627. ULONG ulDwgFast = 0;
  628. LONG cjPelSize;
  629. pjBase = ppdev->pjBase;
  630. xOffset = ppdev->xOffset;
  631. yOffset = ppdev->yOffset;
  632. cjPelSize = ppdev->cjPelSize;
  633. dx = pptlSrc->x - prclDst->left;
  634. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  635. flDirCode = DRAWING_DIR_TBLR;
  636. lSignedPitch = ppdev->cxMemory;
  637. // If the destination and source rectangles overlap, we will have to
  638. // tell the accelerator in which direction the copy should be done:
  639. if (OVERLAP(prclDst, pptlSrc))
  640. {
  641. if (prclDst->left > pptlSrc->x)
  642. {
  643. flDirCode |= scanleft_RIGHT_TO_LEFT;
  644. }
  645. if (prclDst->top > pptlSrc->y)
  646. {
  647. flDirCode |= sdy_BOTTOM_TO_TOP;
  648. lSignedPitch = -lSignedPitch;
  649. }
  650. }
  651. if (rop4 == 0xcccc)
  652. {
  653. ulDwg = opcode_BITBLT | atype_RPL | blockm_OFF |
  654. bltmod_BFCOL | pattern_OFF | transc_BG_OPAQUE |
  655. bop_SRCCOPY | shftzero_ZERO | sgnzero_NO_ZERO;
  656. if ((dy > 0) && (dx == 0))
  657. {
  658. // We enable fast WRAM to WRAM blts only for upward scrolls.
  659. // We could enable it for more blts, but it has stringent
  660. // alignment requirements which aren't likely to be met unless
  661. // it's a vertical scroll.
  662. ulDwgFast = opcode_FBITBLT | atype_RPL | blockm_OFF |
  663. bltmod_BFCOL | pattern_OFF | transc_BG_OPAQUE |
  664. bop_NOP | shftzero_ZERO | sgnzero_NO_ZERO;
  665. }
  666. }
  667. else
  668. {
  669. ulHwMix = rop4 & 0xf;
  670. ulDwg = opcode_BITBLT + atype_RSTR + blockm_OFF + bltmod_BFCOL +
  671. pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16);
  672. }
  673. // The SRC0 to SRC3 registers are probably trashed by the blt, and we
  674. // may be using a different SGN:
  675. ppdev->HopeFlags = 0;
  676. CHECK_FIFO_SPACE(pjBase, 8);
  677. CP_WRITE(pjBase, DWG_SGN, flDirCode);
  678. CP_WRITE(pjBase, DWG_AR5, lSignedPitch);
  679. // If the overhead for setting up the fast blt is too high, then we should
  680. // have a minimum size for prclDst.
  681. if (ulDwgFast)
  682. {
  683. RECTL arclDst[1+(MAX_WRAM_BARRIERS*2)];
  684. LONG aiCmd[1+(MAX_WRAM_BARRIERS*2)];
  685. LONG ayBreak[MAX_WRAM_BARRIERS];
  686. LONG cyBreak;
  687. RECTL *prclDst;
  688. LONG crclDst;
  689. ULONG aulCmd[2] = {ulDwgFast, ulDwg};
  690. LONG i;
  691. cyBreak = ppdev->cyBreak;
  692. for (i = 0; i < cyBreak; i++)
  693. {
  694. // lSplitRcl deals in relative coordinates for the destination and
  695. // source rectangles, so convert the break locations to relative
  696. // coordinates, too:
  697. ayBreak[i] = ppdev->ayBreak[i] - yOffset;
  698. }
  699. while (TRUE)
  700. {
  701. arclDst[0] = *prcl;
  702. prclDst = arclDst;
  703. // split the rectangle at each ayBreak[i]
  704. // If the first scan was on a split, start with the slow blt,
  705. // otherwise, start with the fast blt and alternate.
  706. crclDst = lSplitRcl(arclDst, ayBreak, cyBreak, dy, flDirCode, aiCmd);
  707. i = 0;
  708. while (TRUE)
  709. {
  710. LONG xRight;
  711. ASSERTDD((aiCmd[i] & ~1) == 0, "Only bit 0 of aiCmd[i] should be set.");
  712. CP_WRITE(pjBase, DWG_DWGCTL, aulCmd[aiCmd[i]]);
  713. xRight = prclDst->right + xOffset - 1;
  714. ////////////////////////////////////////////////////////////////
  715. // The following code is a bugfix for the fast WRAM copies
  716. // Extend the right edge to a specific value and then
  717. // clip to the actual desired edge.
  718. CP_WRITE(pjBase, DWG_CXRIGHT, xRight);
  719. switch(cjPelSize)
  720. {
  721. case 1: xRight |= 0x40;
  722. break;
  723. case 2: xRight |= 0x20;
  724. break;
  725. case 4: xRight |= 0x10;
  726. break;
  727. case 3: xRight = (((xRight * 3) + 2) | 0x40) / 3;
  728. break;
  729. }
  730. ////////////////////////////////////////////////////////////////
  731. CP_WRITE(pjBase, DWG_FXBNDRY,
  732. (((xRight) << bfxright_SHIFT) |
  733. ((prclDst->left + xOffset) & bfxleft_MASK)));
  734. yDst = yOffset + prclDst->top;
  735. ySrc = yOffset + prclDst->top + dy;
  736. // ylength_MASK not is needed since coordinates are within range
  737. CP_WRITE(pjBase, DWG_YDSTLEN,
  738. (((yDst) << yval_SHIFT) |
  739. ((prclDst->bottom - prclDst->top))));
  740. xSrc = xOffset + prclDst->left + dx;
  741. lSignedWidth = prclDst->right - prclDst->left - 1;
  742. lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc;
  743. CP_WRITE(pjBase, DWG_AR3, lSrcStart);
  744. CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
  745. if (--crclDst == 0)
  746. break;
  747. prclDst++;
  748. i++;
  749. CHECK_FIFO_SPACE(pjBase, 6);
  750. }
  751. if (--c == 0)
  752. break;
  753. prcl++;
  754. CHECK_FIFO_SPACE(pjBase, 6);
  755. }
  756. // Restore the clipping:
  757. CHECK_FIFO_SPACE(pjBase, 1);
  758. CP_WRITE(pjBase, DWG_CXRIGHT, (ppdev->cxMemory - 1));
  759. }
  760. else
  761. {
  762. CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
  763. while (TRUE)
  764. {
  765. CP_WRITE(pjBase, DWG_FXBNDRY,
  766. (((prcl->right + xOffset - 1) << bfxright_SHIFT) |
  767. ((prcl->left + xOffset) & bfxleft_MASK)));
  768. yDst = yOffset + prcl->top;
  769. ySrc = yOffset + prcl->top + dy;
  770. if (flDirCode & sdy_BOTTOM_TO_TOP)
  771. {
  772. cy = prcl->bottom - prcl->top - 1;
  773. yDst += cy;
  774. ySrc += cy;
  775. }
  776. // ylength_MASK not is needed since coordinates are within range
  777. CP_WRITE(pjBase, DWG_YDSTLEN,
  778. (((yDst) << yval_SHIFT) |
  779. ((prcl->bottom - prcl->top))));
  780. xSrc = xOffset + prcl->left + dx;
  781. lSignedWidth = prcl->right - prcl->left - 1;
  782. if (flDirCode & scanleft_RIGHT_TO_LEFT)
  783. {
  784. xSrc += lSignedWidth;
  785. lSignedWidth = -lSignedWidth;
  786. }
  787. lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc;
  788. CP_WRITE(pjBase, DWG_AR3, lSrcStart);
  789. CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
  790. if (--c == 0)
  791. break;
  792. prcl++;
  793. CHECK_FIFO_SPACE(pjBase, 4);
  794. }
  795. }
  796. }