Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

505 lines
15 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: bltmga.c
  3. *
  4. * Contains the low-level blt functions.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1996 Microsoft Corporation
  23. * Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vMgaFillSolid
  28. *
  29. * Fills a list of rectangles with a solid colour.
  30. *
  31. \**************************************************************************/
  32. VOID vMgaFillSolid( // Type FNFILL
  33. PDEV* ppdev,
  34. LONG c, // Can't be zero
  35. RECTL* prcl, // List of rectangles to be filled, in relative
  36. // coordinates
  37. ULONG rop4, // Rop4
  38. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  39. POINTL* pptlBrush) // Not used
  40. {
  41. BYTE* pjBase;
  42. LONG xOffset;
  43. LONG yOffset;
  44. ULONG ulDwg;
  45. ULONG ulHwMix;
  46. pjBase = ppdev->pjBase;
  47. xOffset = ppdev->xOffset;
  48. yOffset = ppdev->yOffset;
  49. if (rop4 == 0xf0f0) // PATCOPY
  50. {
  51. ulDwg = opcode_TRAP + atype_RPL + blockm_ON +
  52. pattern_OFF + transc_BG_OPAQUE +
  53. bop_SRCCOPY;
  54. }
  55. else
  56. {
  57. // The ROP3 is a combination of P and D only:
  58. //
  59. // ROP3 Mga ROP3 Mga ROP3 Mga ROP3 Mga
  60. //
  61. // 0x00 0 0x50 4 0xa0 8 0xf0 c
  62. // 0x05 1 0x55 5 0xa5 9 0xf5 d
  63. // 0x0a 2 0x5a 6 0xaa a 0xfa e
  64. // 0x0f 3 0x5f 7 0xaf b 0xff f
  65. ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
  66. if (ulHwMix == MGA_WHITENESS)
  67. {
  68. rbc.iSolidColor = 0xffffffff;
  69. ulDwg = opcode_TRAP + atype_RPL + blockm_ON +
  70. pattern_OFF + transc_BG_OPAQUE +
  71. bop_SRCCOPY;
  72. }
  73. else if (ulHwMix == MGA_BLACKNESS)
  74. {
  75. rbc.iSolidColor = 0;
  76. ulDwg = opcode_TRAP + atype_RPL + blockm_ON +
  77. pattern_OFF + transc_BG_OPAQUE +
  78. bop_SRCCOPY;
  79. }
  80. else
  81. {
  82. ulDwg = opcode_TRAP + atype_RSTR + blockm_OFF +
  83. pattern_OFF + transc_BG_OPAQUE +
  84. (ulHwMix << 16);
  85. }
  86. }
  87. if ((GET_CACHE_FLAGS(ppdev, (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE))) ==
  88. (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE))
  89. {
  90. CHECK_FIFO_SPACE(pjBase, 6);
  91. }
  92. else
  93. {
  94. CHECK_FIFO_SPACE(pjBase, 15);
  95. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  96. {
  97. CP_WRITE(pjBase, DWG_SGN, 0);
  98. }
  99. if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
  100. {
  101. CP_WRITE(pjBase, DWG_AR1, 0);
  102. CP_WRITE(pjBase, DWG_AR2, 0);
  103. CP_WRITE(pjBase, DWG_AR4, 0);
  104. CP_WRITE(pjBase, DWG_AR5, 0);
  105. }
  106. if (!(GET_CACHE_FLAGS(ppdev, PATTERN_CACHE)))
  107. {
  108. CP_WRITE(pjBase, DWG_SRC0, 0xFFFFFFFF);
  109. CP_WRITE(pjBase, DWG_SRC1, 0xFFFFFFFF);
  110. CP_WRITE(pjBase, DWG_SRC2, 0xFFFFFFFF);
  111. CP_WRITE(pjBase, DWG_SRC3, 0xFFFFFFFF);
  112. }
  113. ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE);
  114. }
  115. CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, rbc.iSolidColor));
  116. CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
  117. while(TRUE)
  118. {
  119. CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
  120. CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset);
  121. CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top);
  122. CP_START(pjBase, DWG_YDST, prcl->top + yOffset);
  123. if (--c == 0)
  124. return;
  125. prcl++;
  126. CHECK_FIFO_SPACE(pjBase, 4);
  127. }
  128. }
  129. /******************************Public*Routine******************************\
  130. * VOID vMgaXfer1bpp
  131. *
  132. * This routine colour expands a monochrome bitmap.
  133. *
  134. \**************************************************************************/
  135. VOID vMgaXfer1bpp( // Type FNXFER
  136. PDEV* ppdev,
  137. LONG c, // Count of rectangles, can't be zero
  138. RECTL* prcl, // List of destination rectangles, in relative
  139. // coordinates
  140. ULONG rop4, // Foreground and background hardware mix
  141. SURFOBJ* psoSrc, // Source surface
  142. POINTL* pptlSrc, // Original unclipped source point
  143. RECTL* prclDst, // Original unclipped destination rectangle
  144. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  145. {
  146. BYTE* pjBase;
  147. LONG xOffset;
  148. LONG yOffset;
  149. ULONG ulBitFlip;
  150. LONG dx;
  151. LONG dy;
  152. BYTE* pjSrcScan0;
  153. LONG lSrcDelta;
  154. ULONG ulDwg;
  155. ULONG ulHwMix;
  156. ULONG* pulXlate;
  157. LONG cxDst;
  158. LONG cyDst;
  159. LONG xAlign;
  160. ULONG cFullLoops;
  161. ULONG cRemLoops;
  162. BYTE* pjDma;
  163. ULONG* pulSrc;
  164. ULONG cdSrc;
  165. LONG lSrcSkip;
  166. ULONG* pulDst;
  167. LONG i;
  168. BOOL bHwBug;
  169. LONG cFifo;
  170. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  171. "Expect only an opaquing rop");
  172. pjBase = ppdev->pjBase;
  173. xOffset = ppdev->xOffset;
  174. yOffset = ppdev->yOffset;
  175. ulBitFlip = 0;
  176. dx = pptlSrc->x - prclDst->left;
  177. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  178. pjSrcScan0 = psoSrc->pvScan0;
  179. lSrcDelta = psoSrc->lDelta;
  180. if (rop4 == 0xcccc) // SRCCOPY
  181. {
  182. ulDwg = opcode_ILOAD+atype_RPL+blockm_OFF+bltmod_BMONO+
  183. hbgr_SRC_WINDOWS+pattern_OFF+transc_BG_OPAQUE+bop_SRCCOPY;
  184. }
  185. else if ((rop4 == 0xb8b8) || (rop4 == 0xe2e2))
  186. {
  187. ulDwg = opcode_ILOAD+atype_RPL+blockm_OFF+bop_SRCCOPY+trans_0+
  188. bltmod_BMONO+pattern_OFF+hbgr_SRC_WINDOWS+transc_BG_TRANSP;
  189. // We special-cased 0xb8b8 and 0xe2e2 in bitblt.c:
  190. if (rop4 == 0xb8b8)
  191. {
  192. // 0xb8 is weird because it says that the '1' bit is leave-alone,
  193. // but the '0' bit is the destination color. The Millennium can
  194. // only handle transparent blts when the '0' bit is leave-alone,
  195. // so we flip the source bits before we give it to the Millennium.
  196. //
  197. // Since we're limited by the speed of the bus, this additional
  198. // overhead of an extra XOR on every write won't be measurable.
  199. ulBitFlip = (ULONG) -1;
  200. }
  201. }
  202. else
  203. {
  204. ulHwMix = rop4 & 0xf;
  205. ulDwg = opcode_ILOAD+atype_RSTR+blockm_OFF+bltmod_BMONO+
  206. hbgr_SRC_WINDOWS+pattern_OFF+transc_BG_OPAQUE+ (ulHwMix << 16);
  207. }
  208. pjDma = ppdev->pjBase + DMAWND;
  209. pulXlate = pxlo->pulXlate;
  210. CHECK_FIFO_SPACE(pjBase, 15);
  211. CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
  212. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  213. {
  214. CP_WRITE(pjBase, DWG_SGN, 0);
  215. }
  216. if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
  217. {
  218. CP_WRITE(pjBase, DWG_AR5, 0);
  219. }
  220. // The SRC0 through SRC3 registers are trashed by the blt, and
  221. // other ARx registers will be modified shortly, so signal it:
  222. ppdev->HopeFlags = SIGN_CACHE;
  223. CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, pulXlate[1]));
  224. CP_WRITE(pjBase, DWG_BCOL, COLOR_REPLICATE(ppdev, pulXlate[0]));
  225. while (TRUE)
  226. {
  227. cxDst = (prcl->right - prcl->left);
  228. cyDst = (prcl->bottom - prcl->top);
  229. CP_WRITE(pjBase, DWG_LEN, cyDst);
  230. CP_WRITE(pjBase, DWG_YDST, prcl->top + yOffset);
  231. CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
  232. CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset - 1);
  233. xAlign = (prcl->left + dx) & 31;
  234. bHwBug = ((cxDst >= 128) && (xAlign <= 15));
  235. if (!bHwBug)
  236. {
  237. CP_WRITE(pjBase, DWG_SHIFT, 0);
  238. CP_WRITE(pjBase, DWG_AR3, xAlign);
  239. CP_START(pjBase, DWG_AR0, xAlign + cxDst - 1);
  240. }
  241. else
  242. {
  243. // We have to work around a hardware bug. Start 8 pels to
  244. // the left of the original start.
  245. CP_WRITE(pjBase, DWG_AR3, xAlign + 8);
  246. CP_WRITE(pjBase, DWG_AR0, xAlign + cxDst + 31);
  247. CP_START(pjBase, DWG_SHIFT, (24 << 16));
  248. }
  249. // We have to ensure that the command has been started before doing
  250. // the BLT_WRITE_ON:
  251. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  252. BLT_WRITE_ON(ppdev, pjBase);
  253. // Point to the first dword of the source bitmap that is to be
  254. // downloaded:
  255. pulSrc = (ULONG*) (pjSrcScan0 + (((prcl->top + dy) * lSrcDelta
  256. + ((prcl->left + dx) >> 3)) & ~3L));
  257. // Calculate the number of dwords to be moved per scanline. Since
  258. // we align the starting dword on a dword boundary, we know that
  259. // we cannot overflow the end of the bitmap:
  260. cdSrc = (xAlign + cxDst + 31) >> 5;
  261. lSrcSkip = lSrcDelta - (cdSrc << 2);
  262. if (!(bHwBug) && (lSrcSkip == 0))
  263. {
  264. // It's rather frequent that there will be no scan-to-scan
  265. // delta, and no hardware bug, so we can go full speed:
  266. cdSrc *= cyDst;
  267. cFullLoops = ((cdSrc - 1) / FIFOSIZE);
  268. cRemLoops = ((cdSrc - 1) % FIFOSIZE) + 1;
  269. pulDst = (ULONG*) pjDma;
  270. if (cFullLoops > 0)
  271. {
  272. do {
  273. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  274. for (i = FIFOSIZE; i != 0; i--)
  275. {
  276. CP_WRITE_DMA(ppdev, pulDst, *pulSrc ^ ulBitFlip);
  277. pulSrc++;
  278. }
  279. } while (--cFullLoops != 0);
  280. }
  281. CHECK_FIFO_SPACE(pjBase, (LONG) cRemLoops);
  282. do {
  283. CP_WRITE_DMA(ppdev, pulDst, *pulSrc ^ ulBitFlip);
  284. pulSrc++;
  285. } while (--cRemLoops != 0);
  286. }
  287. else
  288. {
  289. // Okay, blt it the slow way:
  290. cFifo = 0;
  291. do {
  292. pulDst = (ULONG*) pjDma;
  293. if (bHwBug)
  294. {
  295. if (--cFifo < 0)
  296. {
  297. cFifo = FIFOSIZE - 1;
  298. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  299. }
  300. CP_WRITE_DMA(ppdev, pulDst, 0); // Account for hardware bug
  301. }
  302. for (i = cdSrc; i != 0; i--)
  303. {
  304. if (--cFifo < 0)
  305. {
  306. cFifo = FIFOSIZE - 1;
  307. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  308. }
  309. CP_WRITE_DMA(ppdev, pulDst, *pulSrc++ ^ ulBitFlip);
  310. }
  311. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  312. } while (--cyDst != 0);
  313. }
  314. BLT_WRITE_OFF(ppdev, pjBase);
  315. if (--c == 0)
  316. break;
  317. prcl++;
  318. CHECK_FIFO_SPACE(pjBase, 7);
  319. }
  320. }
  321. /******************************Public*Routine******************************\
  322. * VOID vMgaCopyBlt
  323. *
  324. * Does a screen-to-screen blt of a list of rectangles.
  325. *
  326. \**************************************************************************/
  327. VOID vMgaCopyBlt( // Type FNCOPY
  328. PDEV* ppdev,
  329. LONG c, // Can't be zero
  330. RECTL* prcl, // Array of relative coordinates destination rectangles
  331. ULONG rop4, // Rop4
  332. POINTL* pptlSrc, // Original unclipped source point
  333. RECTL* prclDst) // Original unclipped destination rectangle
  334. {
  335. BYTE* pjBase;
  336. LONG xOffset;
  337. LONG yOffset;
  338. LONG dx;
  339. LONG dy;
  340. FLONG flDirCode;
  341. LONG lSignedPitch;
  342. ULONG ulHwMix;
  343. ULONG ulDwg;
  344. LONG yDst;
  345. LONG ySrc;
  346. LONG cy;
  347. LONG xSrc;
  348. LONG lSignedWidth;
  349. LONG lSrcStart;
  350. pjBase = ppdev->pjBase;
  351. xOffset = ppdev->xOffset;
  352. yOffset = ppdev->yOffset;
  353. dx = pptlSrc->x - prclDst->left;
  354. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  355. flDirCode = DRAWING_DIR_TBLR;
  356. lSignedPitch = ppdev->cxMemory;
  357. // If the destination and source rectangles overlap, we will have to
  358. // tell the accelerator in which direction the copy should be done:
  359. if (OVERLAP(prclDst, pptlSrc))
  360. {
  361. if (prclDst->left > pptlSrc->x)
  362. {
  363. flDirCode |= scanleft_RIGHT_TO_LEFT;
  364. }
  365. if (prclDst->top > pptlSrc->y)
  366. {
  367. flDirCode |= sdy_BOTTOM_TO_TOP;
  368. lSignedPitch = -lSignedPitch;
  369. }
  370. }
  371. if (rop4 == 0xcccc)
  372. {
  373. ulDwg = opcode_BITBLT + atype_RPL + blockm_OFF + bltmod_BFCOL +
  374. pattern_OFF + transc_BG_OPAQUE + bop_SRCCOPY;
  375. }
  376. else
  377. {
  378. ulHwMix = rop4 & 0xf;
  379. ulDwg = opcode_BITBLT + atype_RSTR + blockm_OFF + bltmod_BFCOL +
  380. pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16);
  381. }
  382. // The SRC0 to SRC3 registers are probably trashed by the blt, and we
  383. // may be using a different SGN:
  384. ppdev->HopeFlags = 0;
  385. CHECK_FIFO_SPACE(pjBase, 10);
  386. CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
  387. CP_WRITE(pjBase, DWG_SHIFT, 0);
  388. CP_WRITE(pjBase, DWG_SGN, flDirCode);
  389. CP_WRITE(pjBase, DWG_AR5, lSignedPitch);
  390. while (TRUE)
  391. {
  392. CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top);
  393. CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
  394. CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset - 1);
  395. yDst = yOffset + prcl->top;
  396. ySrc = yOffset + prcl->top + dy;
  397. if (flDirCode & sdy_BOTTOM_TO_TOP)
  398. {
  399. cy = prcl->bottom - prcl->top - 1;
  400. yDst += cy;
  401. ySrc += cy;
  402. }
  403. CP_WRITE(pjBase, DWG_YDST, yDst);
  404. xSrc = xOffset + prcl->left + dx;
  405. lSignedWidth = prcl->right - prcl->left - 1;
  406. if (flDirCode & scanleft_RIGHT_TO_LEFT)
  407. {
  408. xSrc += lSignedWidth;
  409. lSignedWidth = -lSignedWidth;
  410. }
  411. lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc;
  412. CP_WRITE(pjBase, DWG_AR3, lSrcStart);
  413. CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
  414. if (--c == 0)
  415. break;
  416. CHECK_FIFO_SPACE(pjBase, 6);
  417. prcl++;
  418. }
  419. }