Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1093 lines
35 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: blti32.c
  3. *
  4. * Contains the low-level I/O blt functions for the Mach32.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1995 Microsoft Corporation
  23. *
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vI32FillSolid
  28. *
  29. * Fills a list of rectangles with a solid colour.
  30. *
  31. \**************************************************************************/
  32. VOID vI32FillSolid( // Type FNFILL
  33. PDEV* ppdev,
  34. LONG c, // Can't be zero
  35. RECTL* prcl, // List of rectangles to be filled, in relative
  36. // coordinates
  37. ULONG rop4, // rop4
  38. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  39. POINTL* pptlBrush) // Not used
  40. {
  41. BYTE* pjIoBase;
  42. LONG xOffset;
  43. LONG yOffset;
  44. LONG x;
  45. ASSERTDD(c > 0, "Can't handle zero rectangles");
  46. pjIoBase = ppdev->pjIoBase;
  47. xOffset = ppdev->xOffset;
  48. yOffset = ppdev->yOffset;
  49. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 8);
  50. I32_OW(pjIoBase, FRGD_COLOR, rbc.iSolidColor);
  51. I32_OW(pjIoBase, ALU_FG_FN, gaul32HwMixFromRop2[(rop4 >> 2) & 0xf]);
  52. I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_FG | WRITE | DRAW);
  53. while (TRUE)
  54. {
  55. x = xOffset + prcl->left;
  56. I32_OW(pjIoBase, CUR_X, x);
  57. I32_OW(pjIoBase, DEST_X_START, x);
  58. I32_OW(pjIoBase, DEST_X_END, xOffset + prcl->right);
  59. I32_OW(pjIoBase, CUR_Y, yOffset + prcl->top);
  60. vI32QuietDown(ppdev, pjIoBase);
  61. I32_OW(pjIoBase, DEST_Y_END, yOffset + prcl->bottom);
  62. if (--c == 0)
  63. return;
  64. prcl++;
  65. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 5);
  66. }
  67. }
  68. /******************************Public*Routine******************************\
  69. * VOID vI32FillPatMonochrome
  70. *
  71. * This routine uses the pattern hardware to draw a monochrome patterned
  72. * list of rectangles.
  73. *
  74. * See Blt_DS_P8x8_ENG_IO_66_D0 and Blt_DS_P8x8_ENG_IO_66_D1.
  75. *
  76. \**************************************************************************/
  77. VOID vI32FillPatMonochrome( // Type FNFILL
  78. PDEV* ppdev,
  79. LONG c, // Can't be zero
  80. RECTL* prcl, // List of rectangles to be filled, in relative
  81. // coordinates
  82. ULONG rop4, // rop4
  83. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  84. POINTL* pptlBrush) // Pattern alignment
  85. {
  86. BYTE* pjIoBase;
  87. LONG xOffset;
  88. LONG yOffset;
  89. ULONG ulHwForeMix;
  90. BYTE* pjSrc;
  91. BYTE* pjDst;
  92. LONG xPattern;
  93. LONG yPattern;
  94. LONG xOld;
  95. LONG yOld;
  96. LONG iLeftShift;
  97. LONG iRightShift;
  98. LONG i;
  99. BYTE j;
  100. LONG xLeft;
  101. ULONG aulTmp[2];
  102. WORD* pwPattern;
  103. ASSERTDD(ppdev->iAsic == ASIC_68800_6 || ppdev->iAsic == ASIC_68800AX,
  104. "Wrong ASIC type for monochrome 8x8 patterns");
  105. pjIoBase = ppdev->pjIoBase;
  106. xOffset = ppdev->xOffset;
  107. yOffset = ppdev->yOffset;
  108. xPattern = (pptlBrush->x + xOffset) & 7;
  109. yPattern = (pptlBrush->y + yOffset) & 7;
  110. // If the alignment isn't correct, we'll have to change it:
  111. if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
  112. {
  113. // Remember that we've changed the alignment on our cached brush:
  114. xOld = rbc.prb->ptlBrush.x;
  115. yOld = rbc.prb->ptlBrush.y;
  116. rbc.prb->ptlBrush.x = xPattern;
  117. rbc.prb->ptlBrush.y = yPattern;
  118. // Now do the alignment:
  119. yPattern = (yOld - yPattern);
  120. iRightShift = (xPattern - xOld) & 7;
  121. iLeftShift = 8 - iRightShift;
  122. pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
  123. pjDst = (BYTE*) &aulTmp[0];
  124. for (i = 0; i < 8; i++)
  125. {
  126. j = *(pjSrc + (yPattern++ & 7));
  127. *pjDst++ = (j << iLeftShift) | (j >> iRightShift);
  128. }
  129. rbc.prb->aulPattern[0] = aulTmp[0];
  130. rbc.prb->aulPattern[1] = aulTmp[1];
  131. }
  132. ulHwForeMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
  133. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 16);
  134. I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_FG | EXT_MONO_SRC_PATT | DRAW |
  135. WRITE);
  136. I32_OW(pjIoBase, ALU_FG_FN, ulHwForeMix);
  137. I32_OW(pjIoBase, ALU_BG_FN, ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE
  138. : ulHwForeMix);
  139. I32_OW(pjIoBase, FRGD_COLOR, rbc.prb->ulForeColor);
  140. I32_OW(pjIoBase, BKGD_COLOR, rbc.prb->ulBackColor);
  141. I32_OW(pjIoBase, PATT_LENGTH, 128);
  142. I32_OW(pjIoBase, PATT_DATA_INDEX, 16);
  143. pwPattern = (WORD*) &rbc.prb->aulPattern[0];
  144. I32_OW(pjIoBase, PATT_DATA, *(pwPattern));
  145. I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 1));
  146. I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 2));
  147. I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 3));
  148. while(TRUE)
  149. {
  150. xLeft = xOffset + prcl->left;
  151. I32_OW(pjIoBase, CUR_X, xLeft);
  152. I32_OW(pjIoBase, DEST_X_START, xLeft);
  153. I32_OW(pjIoBase, DEST_X_END, xOffset + prcl->right);
  154. I32_OW(pjIoBase, CUR_Y, yOffset + prcl->top);
  155. I32_OW(pjIoBase, DEST_Y_END, yOffset + prcl->bottom);
  156. if (--c == 0)
  157. break;
  158. prcl++;
  159. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 5);
  160. }
  161. }
  162. /******************************Public*Routine******************************\
  163. * VOID vI32FillPatColor
  164. *
  165. * This routine uses the pattern hardware to draw a colour patterned list of
  166. * rectangles.
  167. *
  168. * See Blt_DS_PCOL_ENG_IO_F0_D0 and Blt_DS_PCOL_ENG_IO_F0_D1.
  169. *
  170. \**************************************************************************/
  171. VOID vI32FillPatColor( // Type FNFILL
  172. PDEV* ppdev,
  173. LONG c, // Can't be zero
  174. RECTL* prcl, // List of rectangles to be filled, in relative
  175. // coordinates
  176. ULONG rop4, // rop4
  177. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  178. POINTL* pptlBrush) // Pattern alignment
  179. {
  180. BYTE* pjIoBase;
  181. LONG xOffset;
  182. LONG yOffset;
  183. ULONG ulHwMix;
  184. LONG xLeft;
  185. LONG xRight;
  186. LONG yTop;
  187. LONG cy;
  188. LONG cyVenetian;
  189. LONG cyRoll;
  190. WORD* pwPattern;
  191. LONG xPattern;
  192. LONG yPattern;
  193. ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP,
  194. "Colour patterns work only at 8bpp");
  195. pjIoBase = ppdev->pjIoBase;
  196. xOffset = ppdev->xOffset;
  197. yOffset = ppdev->yOffset;
  198. ulHwMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
  199. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  200. I32_OW(pjIoBase, ALU_FG_FN, ulHwMix);
  201. I32_OW(pjIoBase, SRC_Y_DIR, 1);
  202. I32_OW(pjIoBase, PATT_LENGTH, 7); // 8 pixel wide pattern
  203. while (TRUE)
  204. {
  205. xLeft = xOffset + prcl->left;
  206. xRight = xOffset + prcl->right;
  207. yTop = yOffset + prcl->top;
  208. cy = prcl->bottom - prcl->top;
  209. xPattern = (xLeft - pptlBrush->x - xOffset) & 7;
  210. yPattern = (yTop - pptlBrush->y - yOffset) & 7;
  211. if (ulHwMix == OVERPAINT)
  212. {
  213. cyVenetian = min(cy, 8);
  214. cyRoll = cy - cyVenetian;
  215. }
  216. else
  217. {
  218. cyVenetian = cy;
  219. cyRoll = 0;
  220. }
  221. I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_PATT | DATA_WIDTH | DRAW | WRITE);
  222. I32_OW(pjIoBase, PATT_INDEX, xPattern);
  223. I32_OW(pjIoBase, DEST_X_START, xLeft);
  224. I32_OW(pjIoBase, CUR_X, xLeft);
  225. I32_OW(pjIoBase, DEST_X_END, xRight);
  226. I32_OW(pjIoBase, CUR_Y, yTop);
  227. do {
  228. // Each scan of the pattern is eight bytes:
  229. pwPattern = (WORD*) ((BYTE*) &rbc.prb->aulPattern[0]
  230. + (yPattern << 3));
  231. yPattern = (yPattern + 1) & 7;
  232. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 6);
  233. I32_OW(pjIoBase, PATT_DATA_INDEX, 0); // Reset index for download
  234. I32_OW(pjIoBase, PATT_DATA, *(pwPattern));
  235. I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 1));
  236. I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 2));
  237. I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 3));
  238. yTop++;
  239. vI32QuietDown(ppdev, pjIoBase);
  240. I32_OW(pjIoBase, DEST_Y_END, yTop);
  241. } while (--cyVenetian != 0);
  242. if (cyRoll != 0)
  243. {
  244. // When the ROP is PATCOPY, we can take advantage of the fact
  245. // that we've just laid down an entire row of the pattern, and
  246. // can do a 'rolling' screen-to-screen blt to draw the rest:
  247. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7);
  248. I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DATA_WIDTH |
  249. DRAW | WRITE);
  250. I32_OW(pjIoBase, M32_SRC_X, xLeft);
  251. I32_OW(pjIoBase, M32_SRC_X_START, xLeft);
  252. I32_OW(pjIoBase, M32_SRC_X_END, xRight);
  253. I32_OW(pjIoBase, M32_SRC_Y, yTop - 8);
  254. I32_OW(pjIoBase, CUR_Y, yTop);
  255. vI32QuietDown(ppdev, pjIoBase);
  256. I32_OW(pjIoBase, DEST_Y_END, yTop + cyRoll);
  257. }
  258. if (--c == 0)
  259. break;
  260. prcl++;
  261. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 6);
  262. }
  263. }
  264. /******************************Public*Routine******************************\
  265. * VOID vI32Xfer1bpp
  266. *
  267. * This routine colour expands a monochrome bitmap, possibly with different
  268. * Rop2's for the foreground and background. It will be called in the
  269. * following cases:
  270. *
  271. * 1) To colour-expand the monochrome text buffer for the vFastText routine.
  272. * 2) To blt a 1bpp source with a simple Rop2 between the source and
  273. * destination.
  274. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to
  275. * white and black, and the pattern is a solid colour.
  276. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern
  277. * and destination.
  278. *
  279. * Needless to say, making this routine fast can leverage a lot of
  280. * performance.
  281. *
  282. \**************************************************************************/
  283. VOID vI32Xfer1bpp( // Type FNXFER
  284. PDEV* ppdev,
  285. LONG c, // Count of rectangles, can't be zero
  286. RECTL* prcl, // List of destination rectangles, in relative
  287. // coordinates
  288. ROP4 rop4, // rop4
  289. SURFOBJ* psoSrc, // Source surface
  290. POINTL* pptlSrc, // Original unclipped source point
  291. RECTL* prclDst, // Original unclipped destination rectangle
  292. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  293. {
  294. BYTE* pjIoBase;
  295. LONG xOffset;
  296. LONG yOffset;
  297. ULONG* pulXlate;
  298. ULONG ulHwForeMix;
  299. LONG dx;
  300. LONG dy;
  301. LONG lSrcDelta;
  302. BYTE* pjSrcScan0;
  303. LONG xLeft;
  304. LONG xRight;
  305. LONG yTop;
  306. LONG cy;
  307. LONG cx;
  308. LONG xBias;
  309. LONG culScan;
  310. LONG lSrcSkip;
  311. ULONG* pulSrc;
  312. LONG i;
  313. ULONG ulFifo;
  314. ASSERTDD(c > 0, "Can't handle zero rectangles");
  315. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  316. "Expect only a rop2");
  317. pjIoBase = ppdev->pjIoBase;
  318. xOffset = ppdev->xOffset;
  319. yOffset = ppdev->yOffset;
  320. ulFifo = 0;
  321. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  322. pulXlate = pxlo->pulXlate;
  323. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 12);
  324. I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | BIT16 |
  325. EXT_MONO_SRC_HOST | DRAW | WRITE | LSB_FIRST) );
  326. I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix );
  327. I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
  328. I32_OW(pjIoBase, BKGD_COLOR, (WORD) pulXlate[0]);
  329. I32_OW(pjIoBase, FRGD_COLOR, (WORD) pulXlate[1]);
  330. dx = pptlSrc->x - prclDst->left;
  331. dy = pptlSrc->y - prclDst->top;
  332. lSrcDelta = psoSrc->lDelta;
  333. pjSrcScan0 = psoSrc->pvScan0;
  334. while (TRUE)
  335. {
  336. xLeft = prcl->left;
  337. xRight = prcl->right;
  338. // The Mach32 'bit packs' monochrome transfers, but GDI gives
  339. // us monochrome bitmaps whose scans are always dword aligned.
  340. // Consequently, we use the Mach32's clip registers to make
  341. // our transfers a multiple of 32 to match the dword alignment:
  342. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  343. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  344. yTop = prcl->top;
  345. cy = prcl->bottom - yTop;
  346. xBias = (xLeft + dx) & 31; // Floor
  347. xLeft -= xBias;
  348. cx = (xRight - xLeft + 31) & ~31; // Ceiling
  349. I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset );
  350. I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset );
  351. I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  352. I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
  353. I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  354. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  355. + ((xLeft + dx) >> 3));
  356. culScan = cx >> 5;
  357. lSrcSkip = lSrcDelta - (culScan << 2);
  358. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0,"Source should be dword aligned");
  359. do {
  360. i = culScan;
  361. do {
  362. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  363. I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc) );
  364. I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc + 1) );
  365. pulSrc++;
  366. } while (--i != 0);
  367. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  368. } while (--cy != 0);
  369. if (--c == 0)
  370. break;
  371. prcl++;
  372. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7);
  373. }
  374. // Don't forget to reset the clip register:
  375. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  376. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 );
  377. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  378. }
  379. /******************************Public*Routine******************************\
  380. * VOID vI32XferNative
  381. *
  382. * Transfers a bitmap that is the same colour depth as the display to
  383. * the screen via the data transfer register, with no translation.
  384. *
  385. \**************************************************************************/
  386. VOID vI32XferNative( // Type FNXFER
  387. PDEV* ppdev,
  388. LONG c, // Count of rectangles, can't be zero
  389. RECTL* prcl, // Array of relative coordinates destination rectangles
  390. ULONG rop4, // rop4
  391. SURFOBJ* psoSrc, // Source surface
  392. POINTL* pptlSrc, // Original unclipped source point
  393. RECTL* prclDst, // Original unclipped destination rectangle
  394. XLATEOBJ* pxlo) // Not used
  395. {
  396. BYTE* pjIoBase;
  397. LONG xOffset;
  398. LONG yOffset;
  399. ULONG ulHwForeMix;
  400. LONG dx;
  401. LONG dy;
  402. LONG lSrcDelta;
  403. BYTE* pjSrcScan0;
  404. LONG xLeft;
  405. LONG xRight;
  406. LONG yTop;
  407. LONG cy;
  408. LONG cx;
  409. LONG xBias;
  410. ULONG* pulSrc;
  411. ULONG culScan;
  412. LONG lSrcSkip;
  413. LONG i;
  414. ULONG ulFifo;
  415. ASSERTDD(c > 0, "Can't handle zero rectangles");
  416. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  417. "Expect only a rop2");
  418. pjIoBase = ppdev->pjIoBase;
  419. xOffset = ppdev->xOffset;
  420. yOffset = ppdev->yOffset;
  421. ulFifo = 0;
  422. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  423. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 10);
  424. I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
  425. DRAW | WRITE | LSB_FIRST) );
  426. I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix );
  427. I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
  428. dx = pptlSrc->x - prclDst->left;
  429. dy = pptlSrc->y - prclDst->top;
  430. lSrcDelta = psoSrc->lDelta;
  431. pjSrcScan0 = psoSrc->pvScan0;
  432. while (TRUE)
  433. {
  434. xLeft = prcl->left;
  435. xRight = prcl->right;
  436. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  437. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  438. yTop = prcl->top;
  439. cy = prcl->bottom - yTop;
  440. // We compute 'xBias' in order to dword-align the source pointer.
  441. // This way, we don't have to do unaligned reads of the source,
  442. // and we're guaranteed not to read even a byte past the end of
  443. // the bitmap.
  444. //
  445. // Note that this bias works at 24bpp, too:
  446. xBias = (xLeft + dx) & 3; // Floor
  447. xLeft -= xBias;
  448. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  449. I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset );
  450. I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset );
  451. I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  452. I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
  453. I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  454. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  455. + ((xLeft + dx) * ppdev->cjPelSize));
  456. culScan = (cx * ppdev->cjPelSize) >> 2;
  457. lSrcSkip = lSrcDelta - (culScan << 2);
  458. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
  459. if (cy && culScan)
  460. {
  461. do {
  462. i = culScan;
  463. do {
  464. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  465. I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc) );
  466. I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc + 1) );
  467. pulSrc++;
  468. } while (--i != 0);
  469. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  470. } while (--cy != 0);
  471. }
  472. if (--c == 0)
  473. break;
  474. prcl++;
  475. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7);
  476. }
  477. // Don't forget to reset the clip register:
  478. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  479. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 );
  480. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  481. }
  482. /******************************Public*Routine******************************\
  483. * VOID vI32Xfer4bpp
  484. *
  485. * Does a 4bpp transfer from a bitmap to the screen.
  486. *
  487. * The reason we implement this is that a lot of resources are kept as 4bpp,
  488. * and used to initialize DFBs, some of which we of course keep off-screen.
  489. *
  490. \**************************************************************************/
  491. VOID vI32Xfer4bpp( // Type FNXFER
  492. PDEV* ppdev,
  493. LONG c, // Count of rectangles, can't be zero
  494. RECTL* prcl, // List of destination rectangles, in relative
  495. // coordinates
  496. ULONG rop4, // Rop4
  497. SURFOBJ* psoSrc, // Source surface
  498. POINTL* pptlSrc, // Original unclipped source point
  499. RECTL* prclDst, // Original unclipped destination rectangle
  500. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  501. {
  502. BYTE* pjIoBase;
  503. LONG xOffset;
  504. LONG yOffset;
  505. LONG cjPelSize;
  506. ULONG ulHwForeMix;
  507. LONG xLeft;
  508. LONG xRight;
  509. LONG yTop;
  510. LONG xBias;
  511. LONG dx;
  512. LONG dy;
  513. LONG cx;
  514. LONG cy;
  515. LONG lSrcDelta;
  516. BYTE* pjSrcScan0;
  517. BYTE* pjSrc;
  518. BYTE jSrc;
  519. ULONG* pulXlate;
  520. LONG i;
  521. USHORT uw;
  522. LONG cjSrc;
  523. LONG lSrcSkip;
  524. ULONG ulFifo;
  525. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  526. ASSERTDD(c > 0, "Can't handle zero rectangles");
  527. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  528. pjIoBase = ppdev->pjIoBase;
  529. xOffset = ppdev->xOffset;
  530. yOffset = ppdev->yOffset;
  531. cjPelSize = ppdev->cjPelSize;
  532. pulXlate = pxlo->pulXlate;
  533. ulFifo = 0;
  534. dx = pptlSrc->x - prclDst->left;
  535. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  536. lSrcDelta = psoSrc->lDelta;
  537. pjSrcScan0 = psoSrc->pvScan0;
  538. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  539. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 10);
  540. I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
  541. DRAW | WRITE | LSB_FIRST) );
  542. I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix );
  543. I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
  544. while(TRUE)
  545. {
  546. xLeft = prcl->left;
  547. xRight = prcl->right;
  548. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  549. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  550. yTop = prcl->top;
  551. cy = prcl->bottom - yTop;
  552. // We compute 'xBias' in order to dword-align the source pointer.
  553. // This way, we don't have to do unaligned reads of the source,
  554. // and we're guaranteed not to read even a byte past the end of
  555. // the bitmap.
  556. //
  557. // Note that this bias works at 24bpp, too:
  558. xBias = (xLeft + dx) & 3; // Floor
  559. xLeft -= xBias;
  560. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  561. I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset );
  562. I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset );
  563. I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  564. I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
  565. I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  566. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  567. + ((xLeft + dx) >> 1);
  568. cjSrc = cx >> 1; // Number of source bytes touched
  569. lSrcSkip = lSrcDelta - cjSrc;
  570. if (cjPelSize == 1)
  571. {
  572. // This part handles 8bpp output:
  573. do {
  574. i = cjSrc;
  575. do {
  576. jSrc = *pjSrc++;
  577. uw = (USHORT) (pulXlate[jSrc >> 4]);
  578. uw |= (USHORT) (pulXlate[jSrc & 0xf] << 8);
  579. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1);
  580. I32_OW(pjIoBase, PIX_TRANS, uw );
  581. } while (--i != 0);
  582. pjSrc += lSrcSkip;
  583. } while (--cy != 0);
  584. }
  585. else if (cjPelSize == 2)
  586. {
  587. // This part handles 16bpp output:
  588. do {
  589. i = cjSrc;
  590. do {
  591. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  592. jSrc = *pjSrc++;
  593. uw = (USHORT) (pulXlate[jSrc >> 4]);
  594. I32_OW(pjIoBase, PIX_TRANS, uw );
  595. uw = (USHORT) (pulXlate[jSrc & 0xf]);
  596. I32_OW(pjIoBase, PIX_TRANS, uw );
  597. } while (--i != 0);
  598. pjSrc += lSrcSkip;
  599. } while (--cy != 0);
  600. }
  601. if (--c == 0)
  602. break;
  603. prcl++;
  604. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7);
  605. }
  606. // Don't forget to reset the clip register:
  607. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  608. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 );
  609. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  610. }
  611. /******************************Public*Routine******************************\
  612. * VOID vI32Xfer8bpp
  613. *
  614. * Does a 8bpp transfer from a bitmap to the screen.
  615. *
  616. * The reason we implement this is that a lot of resources are kept as 8bpp,
  617. * and used to initialize DFBs, some of which we of course keep off-screen.
  618. *
  619. \**************************************************************************/
  620. VOID vI32Xfer8bpp( // Type FNXFER
  621. PDEV* ppdev,
  622. LONG c, // Count of rectangles, can't be zero
  623. RECTL* prcl, // List of destination rectangles, in relative
  624. // coordinates
  625. ULONG rop4, // Rop4
  626. SURFOBJ* psoSrc, // Source surface
  627. POINTL* pptlSrc, // Original unclipped source point
  628. RECTL* prclDst, // Original unclipped destination rectangle
  629. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  630. {
  631. BYTE* pjIoBase;
  632. LONG xOffset;
  633. LONG yOffset;
  634. LONG cjPelSize;
  635. ULONG ulHwForeMix;
  636. LONG xLeft;
  637. LONG xRight;
  638. LONG yTop;
  639. LONG xBias;
  640. LONG dx;
  641. LONG dy;
  642. LONG cx;
  643. LONG cy;
  644. LONG lSrcDelta;
  645. BYTE* pjSrcScan0;
  646. BYTE* pjSrc;
  647. ULONG* pulXlate;
  648. LONG i;
  649. USHORT uw;
  650. LONG cwSrc;
  651. LONG cxRem;
  652. LONG lSrcSkip;
  653. ULONG ulFifo;
  654. ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp");
  655. ASSERTDD(c > 0, "Can't handle zero rectangles");
  656. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  657. pjIoBase = ppdev->pjIoBase;
  658. xOffset = ppdev->xOffset;
  659. yOffset = ppdev->yOffset;
  660. cjPelSize = ppdev->cjPelSize;
  661. pulXlate = pxlo->pulXlate;
  662. ulFifo = 0;
  663. dx = pptlSrc->x - prclDst->left;
  664. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  665. lSrcDelta = psoSrc->lDelta;
  666. pjSrcScan0 = psoSrc->pvScan0;
  667. ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
  668. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 10);
  669. I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
  670. DRAW | WRITE | LSB_FIRST) );
  671. I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix );
  672. I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
  673. while(TRUE)
  674. {
  675. xLeft = prcl->left;
  676. xRight = prcl->right;
  677. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
  678. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
  679. yTop = prcl->top;
  680. cy = prcl->bottom - yTop;
  681. // We compute 'xBias' in order to dword-align the source pointer.
  682. // This way, we don't have to do unaligned reads of the source,
  683. // and we're guaranteed not to read even a byte past the end of
  684. // the bitmap.
  685. //
  686. // Note that this bias works at 24bpp, too:
  687. xBias = (xLeft + dx) & 3; // Floor
  688. xLeft -= xBias;
  689. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  690. I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset );
  691. I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset );
  692. I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
  693. I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
  694. I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
  695. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  696. + (xLeft + dx);
  697. lSrcSkip = lSrcDelta - cx;
  698. if (cjPelSize == 1)
  699. {
  700. // This part handles 8bpp output:
  701. cwSrc = (cx >> 1);
  702. cxRem = (cx & 1);
  703. do {
  704. for (i = cwSrc; i != 0; i--)
  705. {
  706. uw = (USHORT) (pulXlate[*pjSrc++]);
  707. uw |= (USHORT) (pulXlate[*pjSrc++] << 8);
  708. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1);
  709. I32_OW(pjIoBase, PIX_TRANS, uw );
  710. }
  711. if (cxRem > 0)
  712. {
  713. uw = (USHORT) (pulXlate[*pjSrc++]);
  714. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1);
  715. I32_OW(pjIoBase, PIX_TRANS, uw );
  716. }
  717. pjSrc += lSrcSkip;
  718. } while (--cy != 0);
  719. }
  720. else if (cjPelSize == 2)
  721. {
  722. // This part handles 16bpp output:
  723. do {
  724. for (i = cx; i != 0; i--)
  725. {
  726. uw = (USHORT) (pulXlate[*pjSrc++]);
  727. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1);
  728. I32_OW(pjIoBase, PIX_TRANS, uw );
  729. }
  730. pjSrc += lSrcSkip;
  731. } while (--cy != 0);
  732. }
  733. if (--c == 0)
  734. break;
  735. prcl++;
  736. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7);
  737. }
  738. // Don't forget to reset the clip register:
  739. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2);
  740. I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 );
  741. I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
  742. }
  743. /******************************Public*Routine******************************\
  744. * VOID vI32CopyBlt
  745. *
  746. * Does a screen-to-screen blt of a list of rectangles.
  747. *
  748. \**************************************************************************/
  749. VOID vI32CopyBlt( // Type FNCOPY
  750. PDEV* ppdev,
  751. LONG c, // Can't be zero
  752. RECTL* prcl, // Array of relative coordinates destination rectangles
  753. ULONG rop4, // rop4
  754. POINTL* pptlSrc, // Original unclipped source point
  755. RECTL* prclDst) // Original unclipped destination rectangle
  756. {
  757. BYTE* pjIoBase;
  758. LONG xOffset;
  759. LONG yOffset;
  760. LONG dx;
  761. LONG dy;
  762. LONG xLeft;
  763. LONG yTop;
  764. LONG cx;
  765. LONG cy;
  766. ASSERTDD(c > 0, "Can't handle zero rectangles");
  767. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  768. "Expect only a rop2");
  769. pjIoBase = ppdev->pjIoBase;
  770. xOffset = ppdev->xOffset;
  771. yOffset = ppdev->yOffset;
  772. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 12);
  773. I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DRAW | WRITE);
  774. I32_OW(pjIoBase, ALU_FG_FN, gaul32HwMixFromRop2[rop4 & 0xf]);
  775. dx = pptlSrc->x - prclDst->left;
  776. dy = pptlSrc->y - prclDst->top;
  777. // The accelerator may not be as fast at doing right-to-left copies, so
  778. // only do them when the rectangles truly overlap:
  779. if (!OVERLAP(prclDst, pptlSrc))
  780. {
  781. I32_OW(pjIoBase, SRC_Y_DIR, 1);
  782. goto Top_Down_Left_To_Right;
  783. }
  784. I32_OW(pjIoBase, SRC_Y_DIR, (prclDst->top <= pptlSrc->y));
  785. if (prclDst->top <= pptlSrc->y)
  786. {
  787. if (prclDst->left <= pptlSrc->x)
  788. {
  789. Top_Down_Left_To_Right:
  790. while (TRUE)
  791. {
  792. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  793. yTop = yOffset + prcl->top + dy;
  794. cx = prcl->right - prcl->left;
  795. cy = prcl->bottom - prcl->top;
  796. I32_OW(pjIoBase, M32_SRC_X, xLeft);
  797. I32_OW(pjIoBase, M32_SRC_X_START, xLeft);
  798. I32_OW(pjIoBase, M32_SRC_X_END, xLeft + cx);
  799. I32_OW(pjIoBase, M32_SRC_Y, yTop);
  800. xLeft -= dx; // Source coordinates
  801. yTop -= dy;
  802. I32_OW(pjIoBase, CUR_X, xLeft);
  803. I32_OW(pjIoBase, DEST_X_START, xLeft);
  804. I32_OW(pjIoBase, DEST_X_END, xLeft + cx);
  805. I32_OW(pjIoBase, CUR_Y, yTop);
  806. vI32QuietDown(ppdev, pjIoBase);
  807. I32_OW(pjIoBase, DEST_Y_END, yTop + cy);
  808. if (--c == 0)
  809. break;
  810. prcl++;
  811. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  812. }
  813. }
  814. else
  815. {
  816. while (TRUE)
  817. {
  818. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  819. yTop = yOffset + prcl->top + dy;
  820. cx = prcl->right - prcl->left;
  821. cy = prcl->bottom - prcl->top;
  822. I32_OW(pjIoBase, M32_SRC_X, xLeft + cx);
  823. I32_OW(pjIoBase, M32_SRC_X_START, xLeft + cx);
  824. I32_OW(pjIoBase, M32_SRC_X_END, xLeft);
  825. I32_OW(pjIoBase, M32_SRC_Y, yTop);
  826. xLeft -= dx; // Source coordinates
  827. yTop -= dy;
  828. I32_OW(pjIoBase, CUR_X, xLeft + cx);
  829. I32_OW(pjIoBase, DEST_X_START, xLeft + cx);
  830. I32_OW(pjIoBase, DEST_X_END, xLeft);
  831. I32_OW(pjIoBase, CUR_Y, yTop);
  832. vI32QuietDown(ppdev, pjIoBase);
  833. I32_OW(pjIoBase, DEST_Y_END, yTop + cy);
  834. if (--c == 0)
  835. break;
  836. prcl++;
  837. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  838. }
  839. }
  840. }
  841. else
  842. {
  843. if (prclDst->left <= pptlSrc->x)
  844. {
  845. while (TRUE)
  846. {
  847. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  848. yTop = yOffset + prcl->top + dy - 1;
  849. cx = prcl->right - prcl->left;
  850. cy = prcl->bottom - prcl->top;
  851. I32_OW(pjIoBase, M32_SRC_X, xLeft);
  852. I32_OW(pjIoBase, M32_SRC_X_START, xLeft);
  853. I32_OW(pjIoBase, M32_SRC_X_END, xLeft + cx);
  854. I32_OW(pjIoBase, M32_SRC_Y, yTop + cy);
  855. xLeft -= dx; // Source coordinates
  856. yTop -= dy;
  857. I32_OW(pjIoBase, CUR_X, xLeft);
  858. I32_OW(pjIoBase, DEST_X_START, xLeft);
  859. I32_OW(pjIoBase, DEST_X_END, xLeft + cx);
  860. I32_OW(pjIoBase, CUR_Y, yTop + cy);
  861. vI32QuietDown(ppdev, pjIoBase);
  862. I32_OW(pjIoBase, DEST_Y_END, yTop);
  863. if (--c == 0)
  864. break;
  865. prcl++;
  866. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  867. }
  868. }
  869. else
  870. {
  871. while (TRUE)
  872. {
  873. xLeft = xOffset + prcl->left + dx; // Destination coordinates
  874. yTop = yOffset + prcl->top + dy - 1;
  875. cx = prcl->right - prcl->left;
  876. cy = prcl->bottom - prcl->top;
  877. I32_OW(pjIoBase, M32_SRC_X, xLeft + cx);
  878. I32_OW(pjIoBase, M32_SRC_X_START, xLeft + cx);
  879. I32_OW(pjIoBase, M32_SRC_X_END, xLeft);
  880. I32_OW(pjIoBase, M32_SRC_Y, yTop + cy);
  881. xLeft -= dx; // Source coordinates
  882. yTop -= dy;
  883. I32_OW(pjIoBase, CUR_X, xLeft + cx);
  884. I32_OW(pjIoBase, DEST_X_START, xLeft + cx);
  885. I32_OW(pjIoBase, DEST_X_END, xLeft);
  886. I32_OW(pjIoBase, CUR_Y, yTop + cy);
  887. vI32QuietDown(ppdev, pjIoBase);
  888. I32_OW(pjIoBase, DEST_Y_END, yTop);
  889. if (--c == 0)
  890. break;
  891. prcl++;
  892. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  893. }
  894. }
  895. }
  896. }