Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1093 lines
35 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: bltm32.c
  3. *
  4. * Contains the low-level memory-mapped I/O blt functions for the Mach32.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1995 Microsoft Corporation
  23. *
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vM32FillSolid
  28. *
  29. * Fills a list of rectangles with a solid colour.
  30. *
  31. \**************************************************************************/
  32. VOID vM32FillSolid( // Type FNFILL
  33. PDEV* ppdev,
  34. LONG c, // Can't be zero
  35. RECTL* prcl, // List of rectangles to be filled, in relative
  36. // coordinates
  37. ULONG rop4, // rop4
  38. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  39. POINTL* pptlBrush) // Not used
  40. {
  41. BYTE* pjMmBase;
  42. LONG xOffset;
  43. LONG yOffset;
  44. LONG x;
  45. ASSERTDD(c > 0, "Can't handle zero rectangles");
  46. pjMmBase = ppdev->pjMmBase;
  47. xOffset = ppdev->xOffset;
  48. yOffset = ppdev->yOffset;
  49. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  50. M32_OW(pjMmBase, FRGD_COLOR, rbc.iSolidColor);
  51. M32_OW(pjMmBase, ALU_FG_FN, gaul32HwMixFromRop2[(rop4 >> 2) & 0xf]);
  52. M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_FG | WRITE | DRAW);
  53. while (TRUE)
  54. {
  55. x = xOffset + prcl->left;
  56. M32_OW(pjMmBase, CUR_X, x);
  57. M32_OW(pjMmBase, DEST_X_START, x);
  58. M32_OW(pjMmBase, DEST_X_END, xOffset + prcl->right);
  59. M32_OW(pjMmBase, CUR_Y, yOffset + prcl->top);
  60. vM32QuietDown(ppdev, pjMmBase);
  61. M32_OW(pjMmBase, DEST_Y_END, yOffset + prcl->bottom);
  62. if (--c == 0)
  63. return;
  64. prcl++;
  65. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  66. }
  67. }
  68. /******************************Public*Routine******************************\
  69. * VOID vM32FillPatMonochrome
  70. *
  71. * This routine uses the pattern hardware to draw a monochrome patterned
  72. * list of rectangles.
  73. *
  74. * See Blt_DS_P8x8_ENG_IO_66_D0 and Blt_DS_P8x8_ENG_IO_66_D1.
  75. *
  76. \**************************************************************************/
  77. VOID vM32FillPatMonochrome( // Type FNFILL
  78. PDEV* ppdev,
  79. LONG c, // Can't be zero
  80. RECTL* prcl, // List of rectangles to be filled, in relative
  81. // coordinates
  82. ULONG rop4, // rop4
  83. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  84. POINTL* pptlBrush) // Pattern alignment
  85. {
  86. BYTE* pjMmBase;
  87. LONG xOffset;
  88. LONG yOffset;
  89. ULONG ulHwForeMix;
  90. BYTE* pjSrc;
  91. BYTE* pjDst;
  92. LONG xPattern;
  93. LONG yPattern;
  94. LONG xOld;
  95. LONG yOld;
  96. LONG iLeftShift;
  97. LONG iRightShift;
  98. LONG i;
  99. BYTE j;
  100. LONG xLeft;
  101. ULONG aulTmp[2];
  102. WORD* pwPattern;
  103. ASSERTDD(ppdev->iAsic == ASIC_68800_6 || ppdev->iAsic == ASIC_68800AX,
  104. "Wrong ASIC type for monochrome 8x8 patterns");
  105. pjMmBase = ppdev->pjMmBase;
  106. xOffset = ppdev->xOffset;
  107. yOffset = ppdev->yOffset;
  108. xPattern = (pptlBrush->x + xOffset) & 7;
  109. yPattern = (pptlBrush->y + yOffset) & 7;
  110. // If the alignment isn't correct, we'll have to change it:
  111. if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
  112. {
  113. // Remember that we've changed the alignment on our cached brush:
  114. xOld = rbc.prb->ptlBrush.x;
  115. yOld = rbc.prb->ptlBrush.y;
  116. rbc.prb->ptlBrush.x = xPattern;
  117. rbc.prb->ptlBrush.y = yPattern;
  118. // Now do the alignment:
  119. yPattern = (yOld - yPattern);
  120. iRightShift = (xPattern - xOld) & 7;
  121. iLeftShift = 8 - iRightShift;
  122. pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
  123. pjDst = (BYTE*) &aulTmp[0];
  124. for (i = 0; i < 8; i++)
  125. {
  126. j = *(pjSrc + (yPattern++ & 7));
  127. *pjDst++ = (j << iLeftShift) | (j >> iRightShift);
  128. }
  129. rbc.prb->aulPattern[0] = aulTmp[0];
  130. rbc.prb->aulPattern[1] = aulTmp[1];
  131. }
  132. ulHwForeMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
  133. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 16);
  134. M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_FG | EXT_MONO_SRC_PATT | DRAW |
  135. WRITE);
  136. M32_OW(pjMmBase, ALU_FG_FN, ulHwForeMix);
  137. M32_OW(pjMmBase, ALU_BG_FN, ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE
  138. : ulHwForeMix);
  139. M32_OW(pjMmBase, FRGD_COLOR, rbc.prb->ulForeColor);
  140. M32_OW(pjMmBase, BKGD_COLOR, rbc.prb->ulBackColor);
  141. M32_OW(pjMmBase, PATT_LENGTH, 128);
  142. M32_OW(pjMmBase, PATT_DATA_INDEX, 16);
  143. pwPattern = (WORD*) &rbc.prb->aulPattern[0];
  144. M32_OW(pjMmBase, PATT_DATA, *(pwPattern));
  145. M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 1));
  146. M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 2));
  147. M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 3));
  148. while(TRUE)
  149. {
  150. xLeft = xOffset + prcl->left;
  151. M32_OW(pjMmBase, CUR_X, xLeft);
  152. M32_OW(pjMmBase, DEST_X_START, xLeft);
  153. M32_OW(pjMmBase, DEST_X_END, xOffset + prcl->right);
  154. M32_OW(pjMmBase, CUR_Y, yOffset + prcl->top);
  155. M32_OW(pjMmBase, DEST_Y_END, yOffset + prcl->bottom);
  156. if (--c == 0)
  157. break;
  158. prcl++;
  159. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  160. }
  161. }
  162. /******************************Public*Routine******************************\
  163. * VOID vM32FillPatColor
  164. *
  165. * This routine uses the pattern hardware to draw a colour patterned list of
  166. * rectangles.
  167. *
  168. * See Blt_DS_PCOL_ENG_IO_F0_D0 and Blt_DS_PCOL_ENG_IO_F0_D1.
  169. *
  170. \**************************************************************************/
  171. VOID vM32FillPatColor( // Type FNFILL
  172. PDEV* ppdev,
  173. LONG c, // Can't be zero
  174. RECTL* prcl, // List of rectangles to be filled, in relative
  175. // coordinates
  176. ULONG rop4, // rop4
  177. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  178. POINTL* pptlBrush) // Pattern alignment
  179. {
  180. BYTE* pjMmBase;
  181. LONG xOffset;
  182. LONG yOffset;
  183. ULONG ulHwMix;
  184. LONG xLeft;
  185. LONG xRight;
  186. LONG yTop;
  187. LONG cy;
  188. LONG cyVenetian;
  189. LONG cyRoll;
  190. WORD* pwPattern;
  191. LONG xPattern;
  192. LONG yPattern;
  193. ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP,
  194. "Colour patterns work only at 8bpp");
  195. pjMmBase = ppdev->pjMmBase;
  196. xOffset = ppdev->xOffset;
  197. yOffset = ppdev->yOffset;
  198. ulHwMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
  199. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  200. M32_OW(pjMmBase, ALU_FG_FN, ulHwMix);
  201. M32_OW(pjMmBase, SRC_Y_DIR, 1);
  202. M32_OW(pjMmBase, PATT_LENGTH, 7); // 8 pixel wide pattern
  203. while (TRUE)
  204. {
  205. xLeft = xOffset + prcl->left;
  206. xRight = xOffset + prcl->right;
  207. yTop = yOffset + prcl->top;
  208. cy = prcl->bottom - prcl->top;
  209. xPattern = (xLeft - pptlBrush->x - xOffset) & 7;
  210. yPattern = (yTop - pptlBrush->y - yOffset) & 7;
  211. if (ulHwMix == OVERPAINT)
  212. {
  213. cyVenetian = min(cy, 8);
  214. cyRoll = cy - cyVenetian;
  215. }
  216. else
  217. {
  218. cyVenetian = cy;
  219. cyRoll = 0;
  220. }
  221. M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_PATT | DATA_WIDTH | DRAW | WRITE);
  222. M32_OW(pjMmBase, PATT_INDEX, xPattern);
  223. M32_OW(pjMmBase, DEST_X_START, xLeft);
  224. M32_OW(pjMmBase, CUR_X, xLeft);
  225. M32_OW(pjMmBase, DEST_X_END, xRight);
  226. M32_OW(pjMmBase, CUR_Y, yTop);
  227. do {
  228. // Each scan of the pattern is eight bytes:
  229. pwPattern = (WORD*) ((BYTE*) &rbc.prb->aulPattern[0]
  230. + (yPattern << 3));
  231. yPattern = (yPattern + 1) & 7;
  232. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6);
  233. M32_OW(pjMmBase, PATT_DATA_INDEX, 0); // Reset index for download
  234. M32_OW(pjMmBase, PATT_DATA, *(pwPattern));
  235. M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 1));
  236. M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 2));
  237. M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 3));
  238. yTop++;
  239. vM32QuietDown(ppdev, pjMmBase);
  240. M32_OW(pjMmBase, DEST_Y_END, yTop);
  241. } while (--cyVenetian != 0);
  242. if (cyRoll != 0)
  243. {
  244. // When the ROP is PATCOPY, we can take advantage of the fact
  245. // that we've just laid down an entire row of the pattern, and
  246. // can do a 'rolling' screen-to-screen blt to draw the rest:
  247. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  248. M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DATA_WIDTH |
  249. DRAW | WRITE);
  250. M32_OW(pjMmBase, M32_SRC_X, xLeft);
  251. M32_OW(pjMmBase, M32_SRC_X_START, xLeft);
  252. M32_OW(pjMmBase, M32_SRC_X_END, xRight);
  253. M32_OW(pjMmBase, M32_SRC_Y, yTop - 8);
  254. M32_OW(pjMmBase, CUR_Y, yTop);
  255. vM32QuietDown(ppdev, pjMmBase);
  256. M32_OW(pjMmBase, DEST_Y_END, yTop + cyRoll);
  257. }
  258. if (--c == 0)
  259. break;
  260. prcl++;
  261. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6);
  262. }
  263. }
  264. /******************************Public*Routine******************************\
  265. * VOID vM32Xfer1bpp
  266. *
  267. * This routine colour expands a monochrome bitmap, possibly with different
  268. * Rop2's for the foreground and background. It will be called in the
  269. * following cases:
  270. *
  271. * 1) To colour-expand the monochrome text buffer for the vFastText routine.
  272. * 2) To blt a 1bpp source with a simple Rop2 between the source and
  273. * destination.
  274. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to
  275. * white and black, and the pattern is a solid colour.
  276. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern
  277. * and destination.
  278. *
  279. * Needless to say, making this routine fast can leverage a lot of
  280. * performance.
  281. *
  282. \**************************************************************************/
  283. VOID vM32Xfer1bpp( // Type FNXFER
  284. PDEV* ppdev,
  285. LONG c, // Count of rectangles, can't be zero
  286. RECTL* prcl, // List of destination rectangles, in relative
  287. // coordinates
  288. ROP4 rop4, // rop4
  289. SURFOBJ* psoSrc, // Source surface
  290. POINTL* pptlSrc, // Original unclipped source point
  291. RECTL* prclDst, // Original unclipped destination rectangle
  292. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  293. {
  294. BYTE* pjMmBase;
  295. LONG xOffset;
  296. LONG yOffset;
  297. ULONG* pulXlate;
  298. ULONG ulHwForeMix;
  299. LONG dx;
  300. LONG dy;
  301. LONG lSrcDelta;
  302. BYTE* pjSrcScan0;
  303. LONG xLeft;
  304. LONG xRight;
  305. LONG yTop;
  306. LONG cy;
  307. LONG cx;
  308. LONG xBias;
  309. LONG culScan;
  310. LONG lSrcSkip;
  311. ULONG* pulSrc;
  312. LONG i;
  313. ULONG ulFifo;
  314. ASSERTDD(c > 0, "Can't handle zero rectangles");
  315. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  316. "Expect only a rop2");
  317. pjMmBase = ppdev->pjMmBase;
  318. xOffset = ppdev->xOffset;
  319. yOffset = ppdev->yOffset;
  320. ulFifo = 0;
  321. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  322. pulXlate = pxlo->pulXlate;
  323. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 12);
  324. M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | BIT16 |
  325. EXT_MONO_SRC_HOST | DRAW | WRITE | LSB_FIRST) );
  326. M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
  327. M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
  328. M32_OW(pjMmBase, BKGD_COLOR, (WORD) pulXlate[0]);
  329. M32_OW(pjMmBase, FRGD_COLOR, (WORD) pulXlate[1]);
  330. dx = pptlSrc->x - prclDst->left;
  331. dy = pptlSrc->y - prclDst->top;
  332. lSrcDelta = psoSrc->lDelta;
  333. pjSrcScan0 = psoSrc->pvScan0;
  334. while (TRUE)
  335. {
  336. xLeft = prcl->left;
  337. xRight = prcl->right;
  338. // The Mach32 'bit packs' monochrome transfers, but GDI gives
  339. // us monochrome bitmaps whose scans are always dword aligned.
  340. // Consequently, we use the Mach32's clip registers to make
  341. // our transfers a multiple of 32 to match the dword alignment:
  342. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  343. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  344. yTop = prcl->top;
  345. cy = prcl->bottom - yTop;
  346. xBias = (xLeft + dx) & 31; // Floor
  347. xLeft -= xBias;
  348. cx = (xRight - xLeft + 31) & ~31; // Ceiling
  349. M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
  350. M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
  351. M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  352. M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
  353. M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  354. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  355. + ((xLeft + dx) >> 3));
  356. culScan = cx >> 5;
  357. lSrcSkip = lSrcDelta - (culScan << 2);
  358. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
  359. do {
  360. i = culScan;
  361. do {
  362. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  363. M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc) );
  364. M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc + 1) );
  365. pulSrc++;
  366. } while (--i != 0);
  367. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  368. } while (--cy != 0);
  369. if (--c == 0)
  370. break;
  371. prcl++;
  372. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  373. }
  374. // Don't forget to reset the clip register:
  375. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  376. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
  377. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  378. }
  379. /******************************Public*Routine******************************\
  380. * VOID vM32XferNative
  381. *
  382. * Transfers a bitmap that is the same colour depth as the display to
  383. * the screen via the data transfer register, with no translation.
  384. *
  385. \**************************************************************************/
  386. VOID vM32XferNative( // Type FNXFER
  387. PDEV* ppdev,
  388. LONG c, // Count of rectangles, can't be zero
  389. RECTL* prcl, // Array of relative coordinates destination rectangles
  390. ULONG rop4, // rop4
  391. SURFOBJ* psoSrc, // Source surface
  392. POINTL* pptlSrc, // Original unclipped source point
  393. RECTL* prclDst, // Original unclipped destination rectangle
  394. XLATEOBJ* pxlo) // Not used
  395. {
  396. BYTE* pjMmBase;
  397. LONG xOffset;
  398. LONG yOffset;
  399. ULONG ulHwForeMix;
  400. LONG dx;
  401. LONG dy;
  402. LONG lSrcDelta;
  403. BYTE* pjSrcScan0;
  404. LONG xLeft;
  405. LONG xRight;
  406. LONG yTop;
  407. LONG cy;
  408. LONG cx;
  409. LONG xBias;
  410. ULONG* pulSrc;
  411. ULONG culScan;
  412. LONG lSrcSkip;
  413. LONG i;
  414. ULONG ulFifo;
  415. ASSERTDD(c > 0, "Can't handle zero rectangles");
  416. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  417. "Expect only a rop2");
  418. pjMmBase = ppdev->pjMmBase;
  419. xOffset = ppdev->xOffset;
  420. yOffset = ppdev->yOffset;
  421. ulFifo = 0;
  422. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  423. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
  424. M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
  425. DRAW | WRITE | LSB_FIRST) );
  426. M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
  427. M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
  428. dx = pptlSrc->x - prclDst->left;
  429. dy = pptlSrc->y - prclDst->top;
  430. lSrcDelta = psoSrc->lDelta;
  431. pjSrcScan0 = psoSrc->pvScan0;
  432. while (TRUE)
  433. {
  434. xLeft = prcl->left;
  435. xRight = prcl->right;
  436. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  437. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  438. yTop = prcl->top;
  439. cy = prcl->bottom - yTop;
  440. // We compute 'xBias' in order to dword-align the source pointer.
  441. // This way, we don't have to do unaligned reads of the source,
  442. // and we're guaranteed not to read even a byte past the end of
  443. // the bitmap.
  444. //
  445. // Note that this bias works at 24bpp, too:
  446. xBias = (xLeft + dx) & 3; // Floor
  447. xLeft -= xBias;
  448. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  449. M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
  450. M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
  451. M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  452. M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
  453. M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  454. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  455. + ((xLeft + dx) * ppdev->cjPelSize));
  456. culScan = (cx * ppdev->cjPelSize) >> 2;
  457. lSrcSkip = lSrcDelta - (culScan << 2);
  458. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
  459. do {
  460. i = culScan;
  461. do {
  462. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  463. M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc) );
  464. M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc + 1) );
  465. pulSrc++;
  466. } while (--i != 0);
  467. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  468. } while (--cy != 0);
  469. if (--c == 0)
  470. break;
  471. prcl++;
  472. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  473. }
  474. // Don't forget to reset the clip register:
  475. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  476. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
  477. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  478. }
  479. /******************************Public*Routine******************************\
  480. * VOID vM32Xfer4bpp
  481. *
  482. * Does a 4bpp transfer from a bitmap to the screen.
  483. *
  484. * The reason we implement this is that a lot of resources are kept as 4bpp,
  485. * and used to initialize DFBs, some of which we of course keep off-screen.
  486. *
  487. \**************************************************************************/
  488. VOID vM32Xfer4bpp( // Type FNXFER
  489. PDEV* ppdev,
  490. LONG c, // Count of rectangles, can't be zero
  491. RECTL* prcl, // List of destination rectangles, in relative
  492. // coordinates
  493. ULONG rop4, // Rop4
  494. SURFOBJ* psoSrc, // Source surface
  495. POINTL* pptlSrc, // Original unclipped source point
  496. RECTL* prclDst, // Original unclipped destination rectangle
  497. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  498. {
  499. BYTE* pjMmBase;
  500. LONG xOffset;
  501. LONG yOffset;
  502. LONG cjPelSize;
  503. ULONG ulHwForeMix;
  504. LONG xLeft;
  505. LONG xRight;
  506. LONG yTop;
  507. LONG xBias;
  508. LONG dx;
  509. LONG dy;
  510. LONG cx;
  511. LONG cy;
  512. LONG lSrcDelta;
  513. BYTE* pjSrcScan0;
  514. BYTE* pjSrc;
  515. BYTE jSrc;
  516. ULONG* pulXlate;
  517. LONG i;
  518. USHORT uw;
  519. LONG cjSrc;
  520. LONG lSrcSkip;
  521. ULONG ulFifo;
  522. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  523. ASSERTDD(c > 0, "Can't handle zero rectangles");
  524. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  525. pjMmBase = ppdev->pjMmBase;
  526. xOffset = ppdev->xOffset;
  527. yOffset = ppdev->yOffset;
  528. cjPelSize = ppdev->cjPelSize;
  529. pulXlate = pxlo->pulXlate;
  530. ulFifo = 0;
  531. dx = pptlSrc->x - prclDst->left;
  532. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  533. lSrcDelta = psoSrc->lDelta;
  534. pjSrcScan0 = psoSrc->pvScan0;
  535. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  536. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
  537. M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
  538. DRAW | WRITE | LSB_FIRST) );
  539. M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
  540. M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
  541. while(TRUE)
  542. {
  543. xLeft = prcl->left;
  544. xRight = prcl->right;
  545. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  546. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  547. yTop = prcl->top;
  548. cy = prcl->bottom - yTop;
  549. // We compute 'xBias' in order to dword-align the source pointer.
  550. // This way, we don't have to do unaligned reads of the source,
  551. // and we're guaranteed not to read even a byte past the end of
  552. // the bitmap.
  553. //
  554. // Note that this bias works at 24bpp, too:
  555. xBias = (xLeft + dx) & 3; // Floor
  556. xLeft -= xBias;
  557. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  558. M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
  559. M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
  560. M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  561. M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
  562. M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  563. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  564. + ((xLeft + dx) >> 1);
  565. cjSrc = cx >> 1; // Number of source bytes touched
  566. lSrcSkip = lSrcDelta - cjSrc;
  567. if (cjPelSize == 1)
  568. {
  569. // This part handles 8bpp output:
  570. do {
  571. i = cjSrc;
  572. do {
  573. jSrc = *pjSrc++;
  574. uw = (USHORT) (pulXlate[jSrc >> 4]);
  575. uw |= (USHORT) (pulXlate[jSrc & 0xf] << 8);
  576. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
  577. M32_OW(pjMmBase, PIX_TRANS, uw );
  578. } while (--i != 0);
  579. pjSrc += lSrcSkip;
  580. } while (--cy != 0);
  581. }
  582. else if (cjPelSize == 2)
  583. {
  584. // This part handles 16bpp output:
  585. do {
  586. i = cjSrc;
  587. do {
  588. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  589. jSrc = *pjSrc++;
  590. uw = (USHORT) (pulXlate[jSrc >> 4]);
  591. M32_OW(pjMmBase, PIX_TRANS, uw );
  592. uw = (USHORT) (pulXlate[jSrc & 0xf]);
  593. M32_OW(pjMmBase, PIX_TRANS, uw );
  594. } while (--i != 0);
  595. pjSrc += lSrcSkip;
  596. } while (--cy != 0);
  597. }
  598. if (--c == 0)
  599. break;
  600. prcl++;
  601. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  602. }
  603. // Don't forget to reset the clip register:
  604. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  605. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
  606. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  607. }
  608. /******************************Public*Routine******************************\
  609. * VOID vM32Xfer8bpp
  610. *
  611. * Does a 8bpp transfer from a bitmap to the screen.
  612. *
  613. * The reason we implement this is that a lot of resources are kept as 8bpp,
  614. * and used to initialize DFBs, some of which we of course keep off-screen.
  615. *
  616. \**************************************************************************/
  617. VOID vM32Xfer8bpp( // Type FNXFER
  618. PDEV* ppdev,
  619. LONG c, // Count of rectangles, can't be zero
  620. RECTL* prcl, // List of destination rectangles, in relative
  621. // coordinates
  622. ULONG rop4, // Rop4
  623. SURFOBJ* psoSrc, // Source surface
  624. POINTL* pptlSrc, // Original unclipped source point
  625. RECTL* prclDst, // Original unclipped destination rectangle
  626. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  627. {
  628. BYTE* pjMmBase;
  629. LONG xOffset;
  630. LONG yOffset;
  631. LONG cjPelSize;
  632. ULONG ulHwForeMix;
  633. LONG xLeft;
  634. LONG xRight;
  635. LONG yTop;
  636. LONG xBias;
  637. LONG dx;
  638. LONG dy;
  639. LONG cx;
  640. LONG cy;
  641. LONG lSrcDelta;
  642. BYTE* pjSrcScan0;
  643. BYTE* pjSrc;
  644. ULONG* pulXlate;
  645. LONG i;
  646. USHORT uw;
  647. LONG cwSrc;
  648. LONG cxRem;
  649. LONG lSrcSkip;
  650. ULONG ulFifo;
  651. ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp");
  652. ASSERTDD(c > 0, "Can't handle zero rectangles");
  653. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  654. pjMmBase = ppdev->pjMmBase;
  655. xOffset = ppdev->xOffset;
  656. yOffset = ppdev->yOffset;
  657. cjPelSize = ppdev->cjPelSize;
  658. pulXlate = pxlo->pulXlate;
  659. ulFifo = 0;
  660. dx = pptlSrc->x - prclDst->left;
  661. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  662. lSrcDelta = psoSrc->lDelta;
  663. pjSrcScan0 = psoSrc->pvScan0;
  664. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  665. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
  666. M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
  667. DRAW | WRITE | LSB_FIRST) );
  668. M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
  669. M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
  670. while(TRUE)
  671. {
  672. xLeft = prcl->left;
  673. xRight = prcl->right;
  674. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  675. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  676. yTop = prcl->top;
  677. cy = prcl->bottom - yTop;
  678. // We compute 'xBias' in order to dword-align the source pointer.
  679. // This way, we don't have to do unaligned reads of the source,
  680. // and we're guaranteed not to read even a byte past the end of
  681. // the bitmap.
  682. //
  683. // Note that this bias works at 24bpp, too:
  684. xBias = (xLeft + dx) & 3; // Floor
  685. xLeft -= xBias;
  686. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  687. M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
  688. M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
  689. M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  690. M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
  691. M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  692. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  693. + (xLeft + dx);
  694. lSrcSkip = lSrcDelta - cx;
  695. if (cjPelSize == 1)
  696. {
  697. // This part handles 8bpp output:
  698. cwSrc = (cx >> 1);
  699. cxRem = (cx & 1);
  700. do {
  701. for (i = cwSrc; i != 0; i--)
  702. {
  703. uw = (USHORT) (pulXlate[*pjSrc++]);
  704. uw |= (USHORT) (pulXlate[*pjSrc++] << 8);
  705. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
  706. M32_OW(pjMmBase, PIX_TRANS, uw );
  707. }
  708. if (cxRem > 0)
  709. {
  710. uw = (USHORT) (pulXlate[*pjSrc++]);
  711. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
  712. M32_OW(pjMmBase, PIX_TRANS, uw );
  713. }
  714. pjSrc += lSrcSkip;
  715. } while (--cy != 0);
  716. }
  717. else if (cjPelSize == 2)
  718. {
  719. // This part handles 16bpp output:
  720. do {
  721. for (i = cx; i != 0; i--)
  722. {
  723. uw = (USHORT) (pulXlate[*pjSrc++]);
  724. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
  725. M32_OW(pjMmBase, PIX_TRANS, uw );
  726. }
  727. pjSrc += lSrcSkip;
  728. } while (--cy != 0);
  729. }
  730. if (--c == 0)
  731. break;
  732. prcl++;
  733. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  734. }
  735. // Don't forget to reset the clip register:
  736. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  737. M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
  738. M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  739. }
  740. /******************************Public*Routine******************************\
  741. * VOID vM32CopyBlt
  742. *
  743. * Does a screen-to-screen blt of a list of rectangles.
  744. *
  745. * See Blt_DS_SS_ENG_IO_D0 and Blt_DS_SS_TLBR_ENG_IO_D1.
  746. *
  747. \**************************************************************************/
  748. VOID vM32CopyBlt( // Type FNCOPY
  749. PDEV* ppdev,
  750. LONG c, // Can't be zero
  751. RECTL* prcl, // Array of relative coordinates destination rectangles
  752. ULONG rop4, // rop4
  753. POINTL* pptlSrc, // Original unclipped source point
  754. RECTL* prclDst) // Original unclipped destination rectangle
  755. {
  756. BYTE* pjMmBase;
  757. LONG xOffset;
  758. LONG yOffset;
  759. LONG dx;
  760. LONG dy;
  761. LONG xLeft;
  762. LONG yTop;
  763. LONG cx;
  764. LONG cy;
  765. ASSERTDD(c > 0, "Can't handle zero rectangles");
  766. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  767. "Expect only a rop2");
  768. pjMmBase = ppdev->pjMmBase;
  769. xOffset = ppdev->xOffset;
  770. yOffset = ppdev->yOffset;
  771. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 12);
  772. M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DRAW | WRITE);
  773. M32_OW(pjMmBase, ALU_FG_FN, gaul32HwMixFromRop2[rop4 & 0xf]);
  774. dx = pptlSrc->x - prclDst->left;
  775. dy = pptlSrc->y - prclDst->top;
  776. // The accelerator may not be as fast at doing right-to-left copies, so
  777. // only do them when the rectangles truly overlap:
  778. if (!OVERLAP(prclDst, pptlSrc))
  779. {
  780. M32_OW(pjMmBase, SRC_Y_DIR, 1);
  781. goto Top_Down_Left_To_Right;
  782. }
  783. M32_OW(pjMmBase, SRC_Y_DIR, (prclDst->top <= pptlSrc->y));
  784. if (prclDst->top <= pptlSrc->y)
  785. {
  786. if (prclDst->left <= pptlSrc->x)
  787. {
  788. Top_Down_Left_To_Right:
  789. while (TRUE)
  790. {
  791. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  792. yTop = yOffset + prcl->top + dy;
  793. cx = prcl->right - prcl->left;
  794. cy = prcl->bottom - prcl->top;
  795. M32_OW(pjMmBase, M32_SRC_X, xLeft);
  796. M32_OW(pjMmBase, M32_SRC_X_START, xLeft);
  797. M32_OW(pjMmBase, M32_SRC_X_END, xLeft + cx);
  798. M32_OW(pjMmBase, M32_SRC_Y, yTop);
  799. xLeft -= dx; // Source coordinates
  800. yTop -= dy;
  801. M32_OW(pjMmBase, CUR_X, xLeft);
  802. M32_OW(pjMmBase, DEST_X_START, xLeft);
  803. M32_OW(pjMmBase, DEST_X_END, xLeft + cx);
  804. M32_OW(pjMmBase, CUR_Y, yTop);
  805. vM32QuietDown(ppdev, pjMmBase);
  806. M32_OW(pjMmBase, DEST_Y_END, yTop + cy);
  807. if (--c == 0)
  808. break;
  809. prcl++;
  810. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  811. }
  812. }
  813. else
  814. {
  815. while (TRUE)
  816. {
  817. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  818. yTop = yOffset + prcl->top + dy;
  819. cx = prcl->right - prcl->left;
  820. cy = prcl->bottom - prcl->top;
  821. M32_OW(pjMmBase, M32_SRC_X, xLeft + cx);
  822. M32_OW(pjMmBase, M32_SRC_X_START, xLeft + cx);
  823. M32_OW(pjMmBase, M32_SRC_X_END, xLeft);
  824. M32_OW(pjMmBase, M32_SRC_Y, yTop);
  825. xLeft -= dx; // Source coordinates
  826. yTop -= dy;
  827. M32_OW(pjMmBase, CUR_X, xLeft + cx);
  828. M32_OW(pjMmBase, DEST_X_START, xLeft + cx);
  829. M32_OW(pjMmBase, DEST_X_END, xLeft);
  830. M32_OW(pjMmBase, CUR_Y, yTop);
  831. vM32QuietDown(ppdev, pjMmBase);
  832. M32_OW(pjMmBase, DEST_Y_END, yTop + cy);
  833. if (--c == 0)
  834. break;
  835. prcl++;
  836. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  837. }
  838. }
  839. }
  840. else
  841. {
  842. if (prclDst->left <= pptlSrc->x)
  843. {
  844. while (TRUE)
  845. {
  846. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  847. yTop = yOffset + prcl->top + dy - 1;
  848. cx = prcl->right - prcl->left;
  849. cy = prcl->bottom - prcl->top;
  850. M32_OW(pjMmBase, M32_SRC_X, xLeft);
  851. M32_OW(pjMmBase, M32_SRC_X_START, xLeft);
  852. M32_OW(pjMmBase, M32_SRC_X_END, xLeft + cx);
  853. M32_OW(pjMmBase, M32_SRC_Y, yTop + cy);
  854. xLeft -= dx; // Source coordinates
  855. yTop -= dy;
  856. M32_OW(pjMmBase, CUR_X, xLeft);
  857. M32_OW(pjMmBase, DEST_X_START, xLeft);
  858. M32_OW(pjMmBase, DEST_X_END, xLeft + cx);
  859. M32_OW(pjMmBase, CUR_Y, yTop + cy);
  860. vM32QuietDown(ppdev, pjMmBase);
  861. M32_OW(pjMmBase, DEST_Y_END, yTop);
  862. if (--c == 0)
  863. break;
  864. prcl++;
  865. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  866. }
  867. }
  868. else
  869. {
  870. while (TRUE)
  871. {
  872. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  873. yTop = yOffset + prcl->top + dy - 1;
  874. cx = prcl->right - prcl->left;
  875. cy = prcl->bottom - prcl->top;
  876. M32_OW(pjMmBase, M32_SRC_X, xLeft + cx);
  877. M32_OW(pjMmBase, M32_SRC_X_START, xLeft + cx);
  878. M32_OW(pjMmBase, M32_SRC_X_END, xLeft);
  879. M32_OW(pjMmBase, M32_SRC_Y, yTop + cy);
  880. xLeft -= dx; // Source coordinates
  881. yTop -= dy;
  882. M32_OW(pjMmBase, CUR_X, xLeft + cx);
  883. M32_OW(pjMmBase, DEST_X_START, xLeft + cx);
  884. M32_OW(pjMmBase, DEST_X_END, xLeft);
  885. M32_OW(pjMmBase, CUR_Y, yTop + cy);
  886. vM32QuietDown(ppdev, pjMmBase);
  887. M32_OW(pjMmBase, DEST_Y_END, yTop);
  888. if (--c == 0)
  889. break;
  890. prcl++;
  891. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  892. }
  893. }
  894. }
  895. }