Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1019 lines
32 KiB

  1. /******************************************************************************\
  2. *
  3. * $Workfile: bltio.c $
  4. *
  5. * Contains the low-level IO blt functions.
  6. *
  7. * Hopefully, if you're basing your display driver on this code, to
  8. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  9. * the following routines. You shouldn't have to modify much in
  10. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  11. * and efficient as I could, while still accelerating as many calls as
  12. * possible that would be cost-effective in terms of performance wins
  13. * versus size and effort.
  14. *
  15. * Note: In the following, 'relative' coordinates refers to coordinates
  16. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  17. * 'Absolute' coordinates have had the offset applied. For example,
  18. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  19. * be sitting in offscreen memory starting at coordinate (0, 768) --
  20. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  21. * would be the 'absolute' start coordinate'.
  22. *
  23. * Copyright (c) 1992-1995 Microsoft Corporation
  24. * Copyright (c) 1996 Cirrus Logic, Inc.
  25. *
  26. * $Log: S:/projects/drivers/ntsrc/display/bltio.c_v $
  27. *
  28. * Rev 1.2 Nov 07 1996 16:47:52 unknown
  29. * Clean up CAPS flags
  30. *
  31. * Rev 1.1 Oct 10 1996 15:36:10 unknown
  32. *
  33. *
  34. * Rev 1.1 12 Aug 1996 16:49:42 frido
  35. * Removed unaccessed local parameters.
  36. *
  37. * jl01 10-08-96 Do Transparent BLT w/o Solid Fill. Refer to PDRs#5511/6817.
  38. \******************************************************************************/
  39. #include "precomp.h"
  40. /**************************************************************************
  41. * VOID vIoFastPatRealize
  42. *
  43. * Realizes a pattern into offscreen memory.
  44. *
  45. **************************************************************************/
  46. VOID vIoFastPatRealize(
  47. PDEV* ppdev,
  48. RBRUSH* prb) // Points to brush realization structure
  49. {
  50. BRUSHENTRY* pbe;
  51. LONG iBrushCache;
  52. BYTE* pjPattern;
  53. LONG cjPattern;
  54. BYTE* pjPorts = ppdev->pjPorts;
  55. LONG lDelta = ppdev->lDelta;
  56. LONG lDeltaPat;
  57. LONG xCnt;
  58. LONG yCnt;
  59. ULONG ulDst;
  60. DISPDBG((10,"vFastPatRealize called"));
  61. pbe = prb->pbe;
  62. if ((pbe == NULL) || (pbe->prbVerify != prb))
  63. {
  64. // We have to allocate a new offscreen cache brush entry for
  65. // the brush:
  66. iBrushCache = ppdev->iBrushCache;
  67. pbe = &ppdev->abe[iBrushCache];
  68. iBrushCache++;
  69. if (iBrushCache >= ppdev->cBrushCache)
  70. iBrushCache = 0;
  71. ppdev->iBrushCache = iBrushCache;
  72. // Update our links:
  73. pbe->prbVerify = prb;
  74. prb->pbe = pbe;
  75. }
  76. //
  77. // Download brush into cache
  78. //
  79. pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
  80. cjPattern = PELS_TO_BYTES(TOTAL_BRUSH_SIZE);
  81. lDeltaPat = PELS_TO_BYTES(8);
  82. xCnt = PELS_TO_BYTES(8);
  83. yCnt = 8;
  84. ulDst = (pbe->y * ppdev->lDelta) + PELS_TO_BYTES(pbe->x);
  85. ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
  86. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  87. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2));
  88. CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1));
  89. CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
  90. CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
  91. CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
  92. CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulDst);
  93. CP_IO_START_BLT(ppdev, pjPorts);
  94. vImageTransfer(ppdev, pjPattern, lDeltaPat, xCnt, yCnt);
  95. //
  96. // Duplicate brush horizontally
  97. //
  98. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  99. CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1));
  100. CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
  101. CP_IO_BLT_MODE(ppdev, pjPorts, 0);
  102. CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2));
  103. CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst);
  104. CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + lDeltaPat));
  105. CP_IO_START_BLT(ppdev, pjPorts);
  106. //
  107. // Duplicate brush vertically
  108. //
  109. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  110. CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (xCnt * 2));
  111. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (xCnt * 2));
  112. CP_IO_BLT_MODE(ppdev, pjPorts, 0);
  113. CP_IO_XCNT(ppdev, pjPorts, ((xCnt * 2) - 1));
  114. CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
  115. CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst);
  116. CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + PELS_TO_BYTES(128)));
  117. CP_IO_START_BLT(ppdev, pjPorts);
  118. #if 0
  119. {
  120. ////////////////////////////////////////////////////////////////
  121. // DEBUG TILED PATTERNS
  122. //
  123. // The following code helps to debug patterns if you break the
  124. // realization code. It copies the 2x2 tiled copy of the brush
  125. // to the visible screen.
  126. //
  127. POINTL ptl;
  128. RECTL rcl;
  129. ptl.x = pbe->x;
  130. ptl.y = pbe->y;
  131. rcl.left = 10;
  132. rcl.right = 10 + 16;
  133. rcl.top = ppdev->cyScreen - 10 - 16;
  134. rcl.bottom = ppdev->cyScreen - 10;
  135. {
  136. LONG lDelta = ppdev->lDelta;
  137. BYTE jHwRop;
  138. BYTE jMode;
  139. //
  140. // Make sure we can write to the video registers.
  141. //
  142. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  143. CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
  144. CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16));
  145. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  146. {
  147. //
  148. // Top to Bottom - Left to Right
  149. //
  150. jMode |= DIR_TBLR;
  151. CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor);
  152. {
  153. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  154. CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(rcl.right - rcl.left) - 1));
  155. CP_IO_YCNT(ppdev, pjPorts, (rcl.bottom - rcl.top - 1));
  156. CP_IO_SRC_ADDR(ppdev, pjPorts, (0 + ((ptl.y) * lDelta) + PELS_TO_BYTES(ptl.x)));
  157. CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ((rcl.top * lDelta) + PELS_TO_BYTES(rcl.left)));
  158. CP_IO_START_BLT(ppdev, pjPorts);
  159. }
  160. }
  161. }
  162. }
  163. #endif
  164. }
  165. /**************************************************************************
  166. * VOID vIoFillPat
  167. *
  168. * This routine uses the pattern hardware to draw a patterned list of
  169. * rectangles.
  170. *
  171. **************************************************************************/
  172. VOID vIoFillPat(
  173. PDEV* ppdev,
  174. LONG c, // Can't be zero
  175. RECTL* prcl, // Array of relative coordinate destination rects
  176. ROP4 rop4, // Obvious?
  177. RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
  178. POINTL* pptlBrush) //
  179. {
  180. BYTE* pjPorts = ppdev->pjPorts;
  181. LONG lDelta = ppdev->lDelta;
  182. ULONG ulAlignedPatternOffset = ppdev->ulAlignedPatternOffset;
  183. ULONG ulPatternAddrBase;
  184. BYTE jHwRop;
  185. BYTE jMode;
  186. BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
  187. // for keeping track of the location and status
  188. // of the pattern bits cached in off-screen
  189. // memory
  190. DISPDBG((10,"vFillPat called"));
  191. ASSERTDD(c > 0, "Can't handle zero rectangles");
  192. ASSERTDD(ppdev->cBpp < 3, "vFillPat only works at 8bpp and 16bpp");
  193. if ((rbc.prb->pbe == NULL) ||
  194. (rbc.prb->pbe->prbVerify != rbc.prb))
  195. {
  196. vIoFastPatRealize(ppdev, rbc.prb);
  197. DISPDBG((5, " -- Brush cache miss, put it at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
  198. }
  199. else
  200. {
  201. DISPDBG((5, " -- Brush cache hit on brush at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
  202. }
  203. pbe = rbc.prb->pbe;
  204. //
  205. // Fill the list of rectangles
  206. //
  207. ulPatternAddrBase = pbe->xy;
  208. jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
  209. jMode = ppdev->jModeColor | ENABLE_8x8_PATTERN_COPY;
  210. do {
  211. ULONG offset = 0;
  212. offset = PELS_TO_BYTES(
  213. (((prcl->top-pptlBrush->y)&7) << 4)
  214. +((prcl->left-pptlBrush->x)&7)
  215. );
  216. // align the pattern to a new location
  217. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  218. CP_IO_BLT_MODE(ppdev, pjPorts, 0);
  219. CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
  220. CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16));
  221. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(8));
  222. CP_IO_SRC_ADDR(ppdev, pjPorts, (ulPatternAddrBase + offset));
  223. CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(8) - 1));
  224. CP_IO_YCNT(ppdev, pjPorts, (8 - 1));
  225. CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulAlignedPatternOffset);
  226. CP_IO_START_BLT(ppdev, pjPorts);
  227. // fill using aligned pattern
  228. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  229. CP_IO_BLT_MODE(ppdev, pjPorts, jMode);
  230. CP_IO_ROP(ppdev, pjPorts, jHwRop);
  231. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  232. CP_IO_SRC_ADDR(ppdev, pjPorts, ulAlignedPatternOffset);
  233. CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
  234. CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
  235. CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
  236. CP_IO_START_BLT(ppdev, pjPorts);
  237. prcl++;
  238. } while (--c != 0);
  239. }
  240. /**************************************************************************
  241. * VOID vIoFillSolid
  242. *
  243. * Does a solid fill to a list of rectangles.
  244. *
  245. **************************************************************************/
  246. VOID vIoFillSolid(
  247. PDEV* ppdev,
  248. LONG c, // Can't be zero
  249. RECTL* prcl, // Array of relative coordinate destination rects
  250. ROP4 rop4, // Obvious?
  251. RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
  252. POINTL* pptlBrush) // Not used
  253. {
  254. BYTE* pjPorts = ppdev->pjPorts;
  255. LONG lDelta = ppdev->lDelta;
  256. LONG cBpp = ppdev->cBpp;
  257. ULONG ulSolidColor;
  258. BYTE jHwRop;
  259. DISPDBG((10,"vFillSolid called"));
  260. ASSERTDD(c > 0, "Can't handle zero rectangles");
  261. ulSolidColor = rbc.iSolidColor;
  262. if (cBpp == 1)
  263. {
  264. ulSolidColor |= ulSolidColor << 8;
  265. ulSolidColor |= ulSolidColor << 16;
  266. }
  267. else if (cBpp == 2)
  268. {
  269. ulSolidColor |= ulSolidColor << 16;
  270. }
  271. jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
  272. //
  273. // Make sure we can write to the video registers.
  274. //
  275. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  276. CP_IO_ROP(ppdev, pjPorts, jHwRop);
  277. CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset);
  278. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  279. CP_IO_BLT_MODE(ppdev, pjPorts, ENABLE_COLOR_EXPAND |
  280. ENABLE_8x8_PATTERN_COPY |
  281. ppdev->jModeColor);
  282. CP_IO_FG_COLOR(ppdev, pjPorts, ulSolidColor);
  283. //
  284. // Fill the list of rectangles
  285. //
  286. while (TRUE)
  287. {
  288. CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
  289. CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
  290. CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
  291. CP_IO_START_BLT(ppdev, pjPorts);
  292. if (--c == 0)
  293. return;
  294. prcl++;
  295. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  296. }
  297. }
  298. /**************************************************************************
  299. * VOID vIoCopyBlt
  300. *
  301. * Does a screen-to-screen blt of a list of rectangles.
  302. *
  303. **************************************************************************/
  304. VOID vIoCopyBlt(
  305. PDEV* ppdev,
  306. LONG c, // Can't be zero
  307. RECTL* prcl, // Array of relative coordinates destination rectangles
  308. ROP4 rop4, // Obvious?
  309. POINTL* pptlSrc, // Original unclipped source point
  310. RECTL* prclDst) // Original unclipped destination rectangle
  311. {
  312. LONG dx;
  313. LONG dy; // Add delta to destination to get source
  314. LONG xyOffset = ppdev->xyOffset;
  315. BYTE* pjPorts = ppdev->pjPorts;
  316. LONG lDelta = ppdev->lDelta;
  317. BYTE jHwRop;
  318. DISPDBG((10,"vCopyBlt called"));
  319. ASSERTDD(c > 0, "Can't handle zero rectangles");
  320. //
  321. // The src-dst delta will be the same for all rectangles
  322. //
  323. dx = pptlSrc->x - prclDst->left;
  324. dy = pptlSrc->y - prclDst->top;
  325. //
  326. // Make sure we can write to the video registers.
  327. //
  328. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  329. jHwRop = gajHwMixFromRop2[rop4 & 0xf];
  330. CP_IO_ROP(ppdev, pjPorts, jHwRop);
  331. CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, lDelta);
  332. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  333. //
  334. // The accelerator may not be as fast at doing right-to-left copies, so
  335. // only do them when the rectangles truly overlap:
  336. //
  337. if (!OVERLAP(prclDst, pptlSrc) ||
  338. (prclDst->top < pptlSrc->y) ||
  339. ((prclDst->top == pptlSrc->y) && (prclDst->left <= pptlSrc->x))
  340. )
  341. {
  342. //
  343. // Top to Bottom - Left to Right
  344. //
  345. DISPDBG((12,"Top to Bottom - Left to Right"));
  346. CP_IO_BLT_MODE(ppdev, pjPorts, DIR_TBLR);
  347. while (TRUE)
  348. {
  349. CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
  350. CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
  351. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->top + dy) * lDelta) + PELS_TO_BYTES(prcl->left + dx)));
  352. CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
  353. CP_IO_START_BLT(ppdev, pjPorts);
  354. if (--c == 0)
  355. return;
  356. prcl++;
  357. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  358. }
  359. }
  360. else
  361. {
  362. //
  363. // Bottom to Top - Right to Left
  364. //
  365. DISPDBG((12,"Bottom to Top - Right to Left"));
  366. CP_IO_BLT_MODE(ppdev, pjPorts, DIR_BTRL);
  367. while (TRUE)
  368. {
  369. CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
  370. CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
  371. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + PELS_TO_BYTES(prcl->right + dx) - 1));
  372. CP_IO_DST_ADDR(ppdev, pjPorts, (((prcl->bottom - 1) * lDelta) + PELS_TO_BYTES(prcl->right) - 1));
  373. CP_IO_START_BLT(ppdev, pjPorts);
  374. if (--c == 0)
  375. return;
  376. prcl++;
  377. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  378. }
  379. }
  380. }
  381. /******************************Public*Routine******************************\
  382. * VOID vIoXfer1bpp
  383. *
  384. * Low-level routine used to transfer monochrome data to the screen using
  385. * DWORD writes to the blt engine.
  386. *
  387. * This can handle opaque or transparent expansions. It does opaque
  388. * expansions by drawing the opaque rectangle first and then transparently
  389. * expands the foreground bits.
  390. *
  391. \**************************************************************************/
  392. VOID vIoXfer1bpp(
  393. PDEV* ppdev,
  394. LONG c, // Count of rectangles, can't be zero
  395. RECTL* prcl, // List of destination rectangles, in relative
  396. // coordinates
  397. ROP4 rop4, // Actually had better be a rop3
  398. SURFOBJ* psoSrc, // Source surface
  399. POINTL* pptlSrc, // Original unclipped source point
  400. RECTL* prclDst, // Original unclipped destination rectangle
  401. XLATEOBJ* pxlo) // Translate that provides color-expansion information
  402. {
  403. ULONG* pulXfer;
  404. ULONG* pul;
  405. LONG ix;
  406. LONG iy;
  407. LONG cxWidthInBytes;
  408. BYTE* pjBits;
  409. POINTL ptlDst;
  410. POINTL ptlSrc;
  411. SIZEL sizlDst;
  412. LONG cxLeftMask;
  413. LONG cxRightMask;
  414. ULONG ulDstAddr;
  415. INT nDwords;
  416. ULONG ulLeftMask;
  417. ULONG ulRightMask;
  418. LONG dx;
  419. LONG dy;
  420. BYTE* pjPorts = ppdev->pjPorts;
  421. LONG lDelta = ppdev->lDelta;
  422. LONG lDeltaSrc = psoSrc->lDelta;
  423. LONG cBpp = ppdev->cBpp;
  424. ULONG ulFgColor = pxlo->pulXlate[1];
  425. ULONG ulBgColor = pxlo->pulXlate[0];
  426. // Since the hardware clipping on some of the Cirrus chips is broken, we
  427. // do the clipping by rounding out the edges to dword boundaries and then
  428. // doing the blt transparently. In the event that we want the expansion
  429. // to be opaque, we do the opaquing blt in advance. One side effect of
  430. // this is that the destination bits are no longer valid for processing
  431. // the rop. This could probably be optimized by doing the edges seperately
  432. // and then doing the middle section in one pass. However, this is
  433. // complicated by a 5434 bug that breaks blts less than 10 pixels wide.
  434. ASSERTDD(c > 0, "Can't handle zero rectangles");
  435. ASSERTDD(((rop4 & 0xff00) == 0xcc00), "Expected foreground rop of 0xcc");
  436. //
  437. // The src-dst delta will be the same for all rectangles
  438. //
  439. dx = pptlSrc->x - prclDst->left;
  440. dy = pptlSrc->y - prclDst->top;
  441. if (cBpp == 1)
  442. {
  443. ulFgColor = (ulFgColor << 8) | (ulFgColor & 0xff);
  444. ulBgColor = (ulBgColor << 8) | (ulBgColor & 0xff);
  445. ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
  446. ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
  447. }
  448. else if (cBpp == 2)
  449. {
  450. ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
  451. ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
  452. }
  453. pulXfer = ppdev->pulXfer;
  454. ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
  455. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  456. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  457. if (rop4 != 0xCCAA)
  458. {
  459. LONG lCnt = c;
  460. RECTL* prclTmp = prcl;
  461. BYTE jHwBgRop = gajHwMixFromRop2[rop4 & 0xf];
  462. CP_IO_ROP(ppdev, pjPorts, jHwBgRop);
  463. CP_IO_FG_COLOR(ppdev, pjPorts, ulBgColor);
  464. CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset);
  465. CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor |
  466. ENABLE_COLOR_EXPAND |
  467. ENABLE_8x8_PATTERN_COPY);
  468. do
  469. {
  470. // calculate the size of the blt
  471. ptlDst.x = prclTmp->left;
  472. ptlDst.y = prclTmp->top;
  473. sizlDst.cx = prclTmp->right - ptlDst.x;
  474. sizlDst.cy = prclTmp->bottom - ptlDst.y;
  475. //
  476. // Fill the background rectangle with the background color
  477. //
  478. // Set the dest addresses
  479. ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
  480. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  481. //
  482. // Tell the hardware how many bytes we'd like to write:
  483. // sizlDst.cx * sizelDst.cy
  484. //
  485. CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1);
  486. CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1);
  487. CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
  488. // Start the blt operation
  489. CP_IO_START_BLT(ppdev, pjPorts);
  490. prclTmp++;
  491. } while (--lCnt != 0);
  492. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  493. }
  494. CP_IO_FG_COLOR(ppdev, pjPorts, ulFgColor);
  495. CP_IO_BG_COLOR(ppdev, pjPorts, ~ulFgColor);
  496. CP_IO_XPAR_COLOR(ppdev, pjPorts, ~ulFgColor);
  497. CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
  498. CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor |
  499. ENABLE_COLOR_EXPAND |
  500. ENABLE_TRANSPARENCY_COMPARE |
  501. SRC_CPU_DATA);
  502. CP_IO_BLT_EXT_MODE(ppdev, pjPorts, 0); // jl01
  503. do
  504. {
  505. // calculate the size of the blt
  506. ptlDst.x = prcl->left;
  507. ptlDst.y = prcl->top;
  508. sizlDst.cx = prcl->right - ptlDst.x;
  509. sizlDst.cy = prcl->bottom - ptlDst.y;
  510. // calculate the number of dwords per scan line
  511. ptlSrc.x = prcl->left + dx;
  512. ptlSrc.y = prcl->top + dy;
  513. // Floor the source.
  514. // Extend the width by the amount required to floor to a dword boundary.
  515. // Set the size of the left mask.
  516. // Floor the dest, so it aligns with the floored source.
  517. if ((cxLeftMask = (ptlSrc.x & 31)))
  518. {
  519. sizlDst.cx += cxLeftMask;
  520. ptlSrc.x &= ~31;
  521. ptlDst.x -= cxLeftMask;
  522. }
  523. ulLeftMask = gaulLeftClipMask[cxLeftMask];
  524. // Ceil the cx to a dword boundary.
  525. if (cxRightMask = (sizlDst.cx & 31))
  526. {
  527. cxRightMask = 32 - cxRightMask;
  528. sizlDst.cx = (sizlDst.cx + 31) & ~31;
  529. }
  530. ulRightMask = gaulRightClipMask[cxRightMask];
  531. if (sizlDst.cx == 32)
  532. {
  533. ulLeftMask &= ulRightMask;
  534. ulRightMask = 0;
  535. }
  536. // Note: At this point sizlDst.cx is the width of the blt in pixels,
  537. // floored to a dword boundary, and ceiled to a dword boundary.
  538. // Calculate the width in Bytes
  539. cxWidthInBytes = sizlDst.cx >> 3;
  540. // Calculate the number of Dwords and any remaining bytes
  541. nDwords = cxWidthInBytes >> 2;
  542. ASSERTDD(((cxWidthInBytes & 0x03) == 0),
  543. "cxWidthInBytes is not a DWORD multiple");
  544. // Calculate the address of the source bitmap
  545. // This is to a byte boundary.
  546. pjBits = (PBYTE) psoSrc->pvScan0;
  547. pjBits += ptlSrc.y * lDeltaSrc;
  548. pjBits += ptlSrc.x >> 3;
  549. ASSERTDD((((ULONG_PTR)pjBits & 0x03) == 0),
  550. "pjBits not DWORD aligned like it should be");
  551. //
  552. // Blt the 1 bpp bitmap
  553. //
  554. ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
  555. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  556. CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1);
  557. CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1);
  558. //
  559. // The 542x chips require a write to the Src Address Register when
  560. // doing a host transfer with color expansion. The value is
  561. // irrelevant, but the write is crucial. This is documented in
  562. // the manual, not the errata. Go figure.
  563. //
  564. CP_IO_SRC_ADDR(ppdev, pjPorts, 0);
  565. CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
  566. CP_IO_START_BLT(ppdev, pjPorts);
  567. //
  568. // Transfer the host bitmap.
  569. //
  570. if (ulRightMask)
  571. {
  572. //
  573. // Blt is > 1 DWORD wide (nDwords > 1)
  574. //
  575. for (iy = 0; iy < sizlDst.cy; iy++)
  576. {
  577. pul = (ULONG*) pjBits;
  578. //*pulXfer++ = *(((ULONG*)pul)++) & ulLeftMask;
  579. WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulLeftMask));
  580. pul++;
  581. for (ix = 0; ix < (nDwords-2); ix++)
  582. {
  583. //*pulXfer++ = *(((ULONG*)pul)++);
  584. WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul)));
  585. pul++;
  586. }
  587. //*pulXfer++ = *(((ULONG*)pul)++) & ulRightMask;
  588. WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulRightMask));
  589. pul++;
  590. pjBits += lDeltaSrc;
  591. //pulXfer = ppdev->pulXfer;
  592. CP_MEMORY_BARRIER(); // Flush memory cache when we reset the address
  593. }
  594. }
  595. else
  596. {
  597. //
  598. // Blt is 1 DWORD wide (nDwords == 1)
  599. //
  600. for (iy = 0; iy < sizlDst.cy; iy++)
  601. {
  602. //*pulXfer = *((ULONG*)pjBits) & ulLeftMask;
  603. WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pjBits) & ulLeftMask));
  604. pjBits += lDeltaSrc;
  605. CP_MEMORY_BARRIER(); // Flush memory cache
  606. }
  607. }
  608. prcl++;
  609. } while (--c != 0);
  610. }
  611. /******************************Public*Routine******************************\
  612. * VOID vIoXfer4bpp
  613. *
  614. * Does a 4bpp transfer from a bitmap to the screen.
  615. *
  616. * NOTE: The screen must be 8bpp for this function to be called!
  617. *
  618. * The reason we implement this is that a lot of resources are kept as 4bpp,
  619. * and used to initialize DFBs, some of which we of course keep off-screen.
  620. *
  621. \**************************************************************************/
  622. // XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use
  623. // for doing the translate. Note that in general stack buffers should
  624. // be kept as small as possible. The OS guarantees us only 8k for stack
  625. // from GDI down to the display driver in low memory situations; if we
  626. // ask for more, we'll access violate. Note also that at any time the
  627. // stack buffer cannot be larger than a page (4k) -- otherwise we may
  628. // miss touching the 'guard page' and access violate then too.
  629. #define XLATE_BUFFER_SIZE 256
  630. VOID vIoXfer4bpp(
  631. PDEV* ppdev,
  632. LONG c, // Count of rectangles, can't be zero
  633. RECTL* prcl, // List of destination rectangles, in relative
  634. // coordinates
  635. ULONG rop4, // rop4
  636. SURFOBJ* psoSrc, // Source surface
  637. POINTL* pptlSrc, // Original unclipped source point
  638. RECTL* prclDst, // Original unclipped destination rectangle
  639. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  640. {
  641. ULONG* pulXfer = ppdev->pulXfer;
  642. BYTE* pjPorts = ppdev->pjPorts;
  643. LONG lDelta = ppdev->lDelta;
  644. ULONG ulDstAddr;
  645. LONG dx;
  646. LONG dy;
  647. LONG cx;
  648. LONG cy;
  649. LONG lSrcDelta;
  650. BYTE* pjSrcScan0;
  651. BYTE* pjScan;
  652. BYTE* pjSrc;
  653. BYTE* pjDst;
  654. LONG cxThis;
  655. LONG cxToGo;
  656. LONG xSrc;
  657. LONG iLoop;
  658. BYTE jSrc;
  659. ULONG* pulXlate;
  660. LONG cdwThis;
  661. BYTE* pjBuf;
  662. BYTE ajBuf[XLATE_BUFFER_SIZE];
  663. ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp");
  664. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  665. ASSERTDD(c > 0, "Can't handle zero rectangles");
  666. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  667. "Expect only a rop2");
  668. DISPDBG((5, "vXfer4bpp: entry"));
  669. dx = pptlSrc->x - prclDst->left;
  670. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  671. lSrcDelta = psoSrc->lDelta;
  672. pjSrcScan0 = psoSrc->pvScan0;
  673. ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
  674. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  675. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  676. CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]);
  677. CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
  678. while(TRUE)
  679. {
  680. ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
  681. cx = prcl->right - prcl->left;
  682. cy = prcl->bottom - prcl->top;
  683. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  684. CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1);
  685. CP_IO_YCNT(ppdev, pjPorts, cy - 1);
  686. CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
  687. pulXlate = pxlo->pulXlate;
  688. xSrc = prcl->left + dx;
  689. pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
  690. CP_IO_START_BLT(ppdev, pjPorts);
  691. do {
  692. pjSrc = pjScan;
  693. cxToGo = cx; // # of pels per scan in 4bpp source
  694. do {
  695. cxThis = XLATE_BUFFER_SIZE;
  696. // We can handle XLATE_BUFFER_SIZE number
  697. // of pels in this xlate batch
  698. cxToGo -= cxThis; // cxThis will be the actual number of
  699. // pels we'll do in this xlate batch
  700. if (cxToGo < 0)
  701. cxThis += cxToGo;
  702. pjDst = ajBuf; // Points to our temporary batch buffer
  703. // We handle alignment ourselves because it's easy to
  704. // do, rather than pay the cost of setting/resetting
  705. // the scissors register:
  706. if (xSrc & 1)
  707. {
  708. // When unaligned, we have to be careful not to read
  709. // past the end of the 4bpp bitmap (that could
  710. // potentially cause us to access violate):
  711. iLoop = cxThis >> 1; // Each loop handles 2 pels;
  712. // we'll handle odd pel
  713. // separately
  714. jSrc = *pjSrc;
  715. while (iLoop-- != 0)
  716. {
  717. *pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
  718. jSrc = *(++pjSrc);
  719. *pjDst++ = (BYTE) pulXlate[jSrc >> 4];
  720. }
  721. if (cxThis & 1)
  722. *pjDst = (BYTE) pulXlate[jSrc & 0xf];
  723. }
  724. else
  725. {
  726. iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels
  727. do {
  728. jSrc = *pjSrc++;
  729. *pjDst++ = (BYTE) pulXlate[jSrc >> 4];
  730. *pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
  731. } while (--iLoop != 0);
  732. }
  733. // The number of bytes we'll transfer is equal to the number
  734. // of pels we've processed in the batch. Since we're
  735. // transferring words, we have to round up to get the word
  736. // count:
  737. cdwThis = (cxThis + 3) >> 2;
  738. pjBuf = ajBuf;
  739. TRANSFER_DWORD_ALIGNED(ppdev, pulXfer, pjBuf, cdwThis);
  740. } while (cxToGo > 0);
  741. pjScan += lSrcDelta; // Advance to next source scan. Note
  742. // that we could have computed the
  743. // value to advance 'pjSrc' directly,
  744. // but this method is less
  745. // error-prone.
  746. } while (--cy != 0);
  747. if (--c == 0)
  748. return;
  749. prcl++;
  750. }
  751. }
  752. /******************************Public*Routine******************************\
  753. * VOID vIoXferNative
  754. *
  755. * Transfers a bitmap that is the same color depth as the display to
  756. * the screen via the data transfer register, with no translation.
  757. *
  758. \**************************************************************************/
  759. VOID vIoXferNative(
  760. PDEV* ppdev,
  761. LONG c, // Count of rectangles, can't be zero
  762. RECTL* prcl, // Array of relative coordinates destination rectangles
  763. ULONG rop4, // rop4
  764. SURFOBJ* psoSrc, // Source surface
  765. POINTL* pptlSrc, // Original unclipped source point
  766. RECTL* prclDst, // Original unclipped destination rectangle
  767. XLATEOBJ* pxlo) // Not used
  768. {
  769. ULONG* pulXfer = ppdev->pulXfer;
  770. BYTE* pjPorts = ppdev->pjPorts;
  771. LONG lDelta = ppdev->lDelta;
  772. ULONG ulDstAddr;
  773. LONG dx;
  774. LONG dy;
  775. LONG cx;
  776. LONG cy;
  777. LONG lSrcDelta;
  778. BYTE* pjSrcScan0;
  779. BYTE* pjSrc;
  780. LONG cjSrc;
  781. ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
  782. "Can handle trivial xlate only");
  783. ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
  784. "Source must be same color depth as screen");
  785. ASSERTDD(c > 0, "Can't handle zero rectangles");
  786. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  787. "Expect only a rop2");
  788. dx = pptlSrc->x - prclDst->left;
  789. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  790. lSrcDelta = psoSrc->lDelta;
  791. pjSrcScan0 = psoSrc->pvScan0;
  792. ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
  793. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  794. CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
  795. CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]);
  796. CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
  797. while(TRUE)
  798. {
  799. ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
  800. cx = prcl->right - prcl->left;
  801. cy = prcl->bottom - prcl->top;
  802. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  803. CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1);
  804. CP_IO_YCNT(ppdev, pjPorts, cy - 1);
  805. CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
  806. cjSrc = PELS_TO_BYTES(cx);
  807. pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta
  808. + (PELS_TO_BYTES(prcl->left + dx));
  809. CP_IO_START_BLT(ppdev, pjPorts);
  810. vImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy);
  811. if (--c == 0)
  812. return;
  813. prcl++;
  814. }
  815. }