Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2148 lines
68 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: bltm64.c
  3. *
  4. * Contains the low-level memory-mapped I/O blt functions for the Mach64.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1995 Microsoft Corporation
  23. *
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vM64FillSolid
  28. *
  29. * Fills a list of rectangles with a solid colour.
  30. *
  31. \**************************************************************************/
  32. VOID vM64FillSolid( // Type FNFILL
  33. PDEV* ppdev,
  34. LONG c, // Can't be zero
  35. RECTL* prcl, // List of rectangles to be filled, in relative
  36. // coordinates
  37. ULONG rop4, // rop4
  38. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  39. POINTL* pptlBrush) // Not used
  40. {
  41. BYTE* pjMmBase;
  42. LONG xOffset;
  43. LONG yOffset;
  44. ASSERTDD(c > 0, "Can't handle zero rectangles");
  45. pjMmBase = ppdev->pjMmBase;
  46. xOffset = ppdev->xOffset;
  47. yOffset = ppdev->yOffset;
  48. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6);
  49. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  50. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  51. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  52. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  53. while (TRUE)
  54. {
  55. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xOffset + prcl->left,
  56. yOffset + prcl->top));
  57. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(prcl->right - prcl->left,
  58. prcl->bottom - prcl->top));
  59. if (--c == 0)
  60. break;
  61. prcl++;
  62. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  63. }
  64. }
  65. VOID vM64FillSolid24( // Type FNFILL
  66. PDEV* ppdev,
  67. LONG c, // Can't be zero
  68. RECTL* prcl, // List of rectangles to be filled, in relative
  69. // coordinates
  70. ULONG rop4, // rop4
  71. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  72. POINTL* pptlBrush) // Not used
  73. {
  74. BYTE* pjMmBase;
  75. LONG xOffset;
  76. LONG yOffset;
  77. LONG x;
  78. ASSERTDD(c > 0, "Can't handle zero rectangles");
  79. pjMmBase = ppdev->pjMmBase;
  80. xOffset = ppdev->xOffset;
  81. yOffset = ppdev->yOffset;
  82. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  83. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  84. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  85. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  86. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  87. while (TRUE)
  88. {
  89. x = (xOffset + prcl->left) * 3;
  90. M64_OD(pjMmBase, DST_CNTL, 0x83 | ((x/4 % 6) << 8));
  91. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(x,
  92. yOffset + prcl->top));
  93. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST((prcl->right - prcl->left) * 3,
  94. prcl->bottom - prcl->top));
  95. if (--c == 0)
  96. break;
  97. prcl++;
  98. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  99. }
  100. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  101. }
  102. /******************************Public*Routine******************************\
  103. * VOID vM64FillPatMonochrome
  104. *
  105. * This routine uses the pattern hardware to draw a monochrome patterned
  106. * list of rectangles.
  107. *
  108. * See Blt_DS_P8x8_ENG_8G_D0 and Blt_DS_P8x8_ENG_8G_D1.
  109. *
  110. \**************************************************************************/
  111. VOID vM64FillPatMonochrome( // Type FNFILL
  112. PDEV* ppdev,
  113. LONG c, // Can't be zero
  114. RECTL* prcl, // List of rectangles to be filled, in relative
  115. // coordinates
  116. ULONG rop4, // rop4
  117. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  118. POINTL* pptlBrush) // Pattern alignment
  119. {
  120. BYTE* pjMmBase;
  121. LONG xOffset;
  122. LONG yOffset;
  123. BYTE* pjSrc;
  124. BYTE* pjDst;
  125. LONG xPattern;
  126. LONG yPattern;
  127. LONG iLeftShift;
  128. LONG iRightShift;
  129. LONG xOld;
  130. LONG yOld;
  131. LONG i;
  132. BYTE j;
  133. ULONG ulHwForeMix;
  134. ULONG ulHwBackMix;
  135. LONG xLeft;
  136. LONG yTop;
  137. ULONG aulTmp[2];
  138. pjMmBase = ppdev->pjMmBase;
  139. xOffset = ppdev->xOffset;
  140. yOffset = ppdev->yOffset;
  141. xPattern = (pptlBrush->x + xOffset) & 7;
  142. yPattern = (pptlBrush->y + yOffset) & 7;
  143. // If the alignment isn't correct, we'll have to change it:
  144. if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
  145. {
  146. // Remember that we've changed the alignment on our cached brush:
  147. xOld = rbc.prb->ptlBrush.x;
  148. yOld = rbc.prb->ptlBrush.y;
  149. rbc.prb->ptlBrush.x = xPattern;
  150. rbc.prb->ptlBrush.y = yPattern;
  151. // Now do the alignment:
  152. yPattern = (yOld - yPattern);
  153. iRightShift = (xPattern - xOld) & 7;
  154. iLeftShift = 8 - iRightShift;
  155. pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
  156. pjDst = (BYTE*) &aulTmp[0];
  157. for (i = 0; i < 8; i++)
  158. {
  159. j = *(pjSrc + (yPattern++ & 7));
  160. *pjDst++ = (j << iLeftShift) | (j >> iRightShift);
  161. }
  162. rbc.prb->aulPattern[0] = aulTmp[0];
  163. rbc.prb->aulPattern[1] = aulTmp[1];
  164. }
  165. ulHwForeMix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  166. ulHwBackMix = ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE : (ulHwForeMix >> 16);
  167. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
  168. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  169. M64_OD(pjMmBase, PAT_CNTL, PAT_CNTL_MonoEna);
  170. M64_OD(pjMmBase, DP_SRC, DP_SRC_MonoPattern | DP_SRC_FrgdClr << 8);
  171. M64_OD(pjMmBase, DP_MIX, ulHwBackMix | ulHwForeMix);
  172. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.prb->ulForeColor);
  173. M64_OD(pjMmBase, DP_BKGD_CLR, rbc.prb->ulBackColor);
  174. M64_OD(pjMmBase, PAT_REG0, rbc.prb->aulPattern[0]);
  175. M64_OD(pjMmBase, PAT_REG1, rbc.prb->aulPattern[1]);
  176. while(TRUE)
  177. {
  178. xLeft = prcl->left;
  179. yTop = prcl->top;
  180. M64_OD(pjMmBase, DST_Y_X, PACKXY(xLeft + xOffset,
  181. yTop + yOffset));
  182. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY(prcl->right - xLeft,
  183. prcl->bottom - prcl->top));
  184. if (--c == 0)
  185. break;
  186. prcl++;
  187. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
  188. }
  189. }
  190. VOID vM64FillPatMonochrome24( // Type FNFILL
  191. PDEV* ppdev,
  192. LONG c, // Can't be zero
  193. RECTL* prcl, // List of rectangles to be filled, in relative
  194. // coordinates
  195. ULONG rop4, // rop4
  196. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  197. POINTL* pptlBrush) // Pattern alignment
  198. {
  199. BYTE* pjMmBase;
  200. LONG xOffset;
  201. LONG yOffset;
  202. BYTE* pjSrc;
  203. BYTE* pjDst;
  204. LONG xPattern;
  205. LONG yPattern;
  206. LONG iLeftShift;
  207. LONG iRightShift;
  208. LONG xOld;
  209. LONG yOld;
  210. LONG i;
  211. BYTE j;
  212. ULONG ulHwForeMix;
  213. ULONG ulHwBackMix;
  214. LONG xLeft;
  215. LONG yTop;
  216. ULONG aulTmp[2];
  217. LONG x;
  218. pjMmBase = ppdev->pjMmBase;
  219. xOffset = ppdev->xOffset;
  220. yOffset = ppdev->yOffset;
  221. xPattern = (pptlBrush->x + xOffset) & 7;
  222. yPattern = (pptlBrush->y + yOffset) & 7;
  223. // If the alignment isn't correct, we'll have to change it:
  224. if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
  225. {
  226. // Remember that we've changed the alignment on our cached brush:
  227. xOld = rbc.prb->ptlBrush.x;
  228. yOld = rbc.prb->ptlBrush.y;
  229. rbc.prb->ptlBrush.x = xPattern;
  230. rbc.prb->ptlBrush.y = yPattern;
  231. // Now do the alignment:
  232. yPattern = (yOld - yPattern);
  233. iRightShift = (xPattern - xOld) & 7;
  234. iLeftShift = 8 - iRightShift;
  235. pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
  236. pjDst = (BYTE*) &aulTmp[0];
  237. for (i = 0; i < 8; i++)
  238. {
  239. j = *(pjSrc + (yPattern++ & 7));
  240. *pjDst++ = (j << iLeftShift) | (j >> iRightShift);
  241. }
  242. rbc.prb->aulPattern[0] = aulTmp[0];
  243. rbc.prb->aulPattern[1] = aulTmp[1];
  244. }
  245. ulHwForeMix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  246. ulHwBackMix = ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE : (ulHwForeMix >> 16);
  247. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 14);
  248. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  249. M64_OD(pjMmBase, PAT_CNTL, PAT_CNTL_MonoEna);
  250. M64_OD(pjMmBase, DP_SRC, DP_SRC_MonoPattern | DP_SRC_FrgdClr << 8);
  251. M64_OD(pjMmBase, DP_MIX, ulHwBackMix | ulHwForeMix);
  252. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.prb->ulForeColor);
  253. M64_OD(pjMmBase, DP_BKGD_CLR, rbc.prb->ulBackColor);
  254. M64_OD(pjMmBase, PAT_REG0, rbc.prb->aulPattern[0]);
  255. M64_OD(pjMmBase, PAT_REG1, rbc.prb->aulPattern[1]);
  256. // You must turn off DP_BYTE_PIX_ORDER, or else the pattern is incorrectly
  257. // aligned. This took a long time to figure out.
  258. M64_OD(pjMmBase, DP_PIX_WIDTH, 0x00000202);
  259. while(TRUE)
  260. {
  261. xLeft = prcl->left;
  262. yTop = prcl->top;
  263. x = (xLeft + xOffset) * 3;
  264. M64_OD(pjMmBase, DST_CNTL, 0x83 | ((x/4 % 6) << 8));
  265. M64_OD(pjMmBase, DST_Y_X, PACKXY(x,
  266. yTop + yOffset));
  267. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY((prcl->right - xLeft) * 3,
  268. prcl->bottom - prcl->top));
  269. if (--c == 0)
  270. break;
  271. prcl++;
  272. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  273. }
  274. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  275. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  276. }
  277. /******************************Public*Routine******************************\
  278. * VOID vM64PatColorRealize
  279. *
  280. * This routine transfers an 8x8 pattern to off-screen display memory,
  281. * so that it can be used by the Mach64 'general pattern with rotation'
  282. * hardware.
  283. *
  284. * See Blt_DS_PCOL_ENG_8G_D0.
  285. *
  286. \**************************************************************************/
  287. VOID vM64PatColorRealize( // Type FNPATREALIZE
  288. PDEV* ppdev,
  289. RBRUSH* prb) // Points to brush realization structure
  290. {
  291. BRUSHENTRY* pbe;
  292. LONG iBrushCache;
  293. SURFOBJ soSrc;
  294. POINTL ptlSrc;
  295. RECTL rclDst;
  296. // We have to allocate a new off-screen cache brush entry for
  297. // the brush:
  298. iBrushCache = ppdev->iBrushCache;
  299. pbe = &ppdev->abe[iBrushCache];
  300. iBrushCache = (iBrushCache + 1) & (TOTAL_BRUSH_COUNT - 1);
  301. ppdev->iBrushCache = iBrushCache;
  302. // Update our links:
  303. pbe->prbVerify = prb;
  304. prb->apbe[IBOARD(ppdev)] = pbe;
  305. // pfnPutBits looks at only two fields in the SURFOBJ, and since we're
  306. // only going to download a single scan, we don't even have to set
  307. // 'lDelta'.
  308. soSrc.pvScan0 = &prb->aulPattern[0];
  309. ptlSrc.x = 0;
  310. ptlSrc.y = 0;
  311. rclDst.left = pbe->x;
  312. rclDst.right = pbe->x + TOTAL_BRUSH_SIZE;
  313. rclDst.top = pbe->y;
  314. rclDst.bottom = pbe->y + 1;
  315. ppdev->pfnPutBits(ppdev, &soSrc, &rclDst, &ptlSrc);
  316. }
  317. /******************************Public*Routine******************************\
  318. * VOID vM64FillPatColor
  319. *
  320. * This routine uses the pattern hardware to draw a patterned list of
  321. * rectangles.
  322. *
  323. * See Blt_DS_PCOL_ENG_8G_D0 and Blt_DS_PCOL_ENG_8G_D1.
  324. *
  325. \**************************************************************************/
  326. VOID vM64FillPatColor( // Type FNFILL
  327. PDEV* ppdev,
  328. LONG c, // Can't be zero
  329. RECTL* prcl, // List of rectangles to be filled, in relative
  330. // coordinates
  331. ULONG rop4, // rop4
  332. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  333. POINTL* pptlBrush) // Pattern alignment
  334. {
  335. BRUSHENTRY* pbe;
  336. BYTE* pjMmBase;
  337. LONG xOffset;
  338. LONG yOffset;
  339. LONG xLeft;
  340. LONG yTop;
  341. ULONG ulSrc;
  342. // See if the brush has already been put into off-screen memory:
  343. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  344. if ((pbe == NULL) || (pbe->prbVerify != rbc.prb))
  345. {
  346. vM64PatColorRealize(ppdev, rbc.prb);
  347. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  348. }
  349. pjMmBase = ppdev->pjMmBase;
  350. xOffset = ppdev->xOffset;
  351. yOffset = ppdev->yOffset;
  352. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 11);
  353. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  354. M64_OD(pjMmBase, SRC_OFF_PITCH, pbe->ulOffsetPitch);
  355. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  356. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_PatEna | SRC_CNTL_PatRotEna);
  357. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  358. M64_OD(pjMmBase, SRC_Y_X_START, 0);
  359. M64_OD(pjMmBase, SRC_HEIGHT2_WIDTH2, PACKXY(8, 8));
  360. while (TRUE)
  361. {
  362. xLeft = prcl->left;
  363. yTop = prcl->top;
  364. ulSrc = PACKXY_FAST((xLeft - pptlBrush->x) & 7,
  365. (yTop - pptlBrush->y) & 7);
  366. M64_OD(pjMmBase, SRC_Y_X, ulSrc);
  367. M64_OD(pjMmBase, SRC_HEIGHT1_WIDTH1, PACKXY(8, 8) - ulSrc);
  368. M64_OD(pjMmBase, DST_Y_X, PACKXY(xLeft + xOffset,
  369. yTop + yOffset));
  370. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY(prcl->right - prcl->left,
  371. prcl->bottom - prcl->top));
  372. if (--c == 0)
  373. break;
  374. prcl++;
  375. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  376. }
  377. }
  378. VOID vM64FillPatColor24( // Type FNFILL
  379. PDEV* ppdev,
  380. LONG c, // Can't be zero
  381. RECTL* prcl, // List of rectangles to be filled, in relative
  382. // coordinates
  383. ULONG rop4, // rop4
  384. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  385. POINTL* pptlBrush) // Pattern alignment
  386. {
  387. BRUSHENTRY* pbe;
  388. BYTE* pjMmBase;
  389. LONG xOffset;
  390. LONG yOffset;
  391. LONG xLeft;
  392. LONG yTop;
  393. ULONG ulSrc;
  394. // See if the brush has already been put into off-screen memory:
  395. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  396. if ((pbe == NULL) || (pbe->prbVerify != rbc.prb))
  397. {
  398. vM64PatColorRealize(ppdev, rbc.prb);
  399. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  400. }
  401. pjMmBase = ppdev->pjMmBase;
  402. xOffset = ppdev->xOffset;
  403. yOffset = ppdev->yOffset;
  404. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 11);
  405. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  406. M64_OD(pjMmBase, SRC_OFF_PITCH, pbe->ulOffsetPitch);
  407. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  408. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_PatEna | SRC_CNTL_PatRotEna);
  409. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  410. M64_OD(pjMmBase, SRC_Y_X_START, 0);
  411. M64_OD(pjMmBase, SRC_HEIGHT2_WIDTH2, PACKXY(24, 8));
  412. while (TRUE)
  413. {
  414. xLeft = prcl->left;
  415. yTop = prcl->top;
  416. ulSrc = PACKXY_FAST(((xLeft - pptlBrush->x) & 7) * 3,
  417. (yTop - pptlBrush->y) & 7);
  418. M64_OD(pjMmBase, SRC_Y_X, ulSrc);
  419. M64_OD(pjMmBase, SRC_HEIGHT1_WIDTH1, PACKXY(24, 8) - ulSrc);
  420. M64_OD(pjMmBase, DST_Y_X, PACKXY((xLeft + xOffset) * 3,
  421. yTop + yOffset));
  422. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY((prcl->right - prcl->left) * 3,
  423. prcl->bottom - prcl->top));
  424. if (--c == 0)
  425. break;
  426. prcl++;
  427. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  428. }
  429. }
  430. /******************************Public*Routine******************************\
  431. * VOID vM64XferNative
  432. *
  433. * Transfers a bitmap that is the same colour depth as the display to
  434. * the screen via the data transfer register, with no translation.
  435. *
  436. \**************************************************************************/
  437. VOID vM64XferNative( // Type FNXFER
  438. PDEV* ppdev,
  439. LONG c, // Count of rectangles, can't be zero
  440. RECTL* prcl, // Array of relative coordinates destination rectangles
  441. ULONG rop4, // rop4
  442. SURFOBJ* psoSrc, // Source surface
  443. POINTL* pptlSrc, // Original unclipped source point
  444. RECTL* prclDst, // Original unclipped destination rectangle
  445. XLATEOBJ* pxlo) // Not used
  446. {
  447. BYTE* pjMmBase;
  448. LONG xOffset;
  449. LONG yOffset;
  450. ULONG ulHwForeMix;
  451. LONG dx;
  452. LONG dy;
  453. LONG lSrcDelta;
  454. BYTE* pjSrcScan0;
  455. LONG xLeft;
  456. LONG xRight;
  457. LONG yTop;
  458. LONG cy;
  459. LONG cx;
  460. LONG xBias;
  461. ULONG* pulSrc;
  462. ULONG culScan;
  463. LONG lSrcSkip;
  464. LONG i;
  465. ULONG ulFifo;
  466. ASSERTDD(c > 0, "Can't handle zero rectangles");
  467. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  468. "Expect only a rop2");
  469. pjMmBase = ppdev->pjMmBase;
  470. xOffset = ppdev->xOffset;
  471. yOffset = ppdev->yOffset;
  472. ulFifo = 0;
  473. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  474. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 9, ulFifo);
  475. // default registers for hw bugs:
  476. M64_OD(pjMmBase, DP_WRITE_MASK, 0xFFFFFFFF);
  477. M64_OD(pjMmBase, CLR_CMP_CNTL, 0);
  478. M64_OD(pjMmBase, GUI_TRAJ_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  479. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  480. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  481. // The host data pixel width is the same as that of the screen:
  482. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  483. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  484. dx = (pptlSrc->x - prclDst->left) << ppdev->cPelSize; // Bytes
  485. dy = pptlSrc->y - prclDst->top;
  486. lSrcDelta = psoSrc->lDelta;
  487. pjSrcScan0 = psoSrc->pvScan0;
  488. while (TRUE)
  489. {
  490. xLeft = prcl->left;
  491. xRight = prcl->right;
  492. yTop = prcl->top;
  493. cy = prcl->bottom - yTop;
  494. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset, xRight + xOffset - 1));
  495. //
  496. // Convert pixels to bytes.
  497. //
  498. xLeft <<= ppdev->cPelSize;
  499. xRight <<= ppdev->cPelSize;
  500. //
  501. // We compute 'xBias' in order to dword-align the source pointer.
  502. // This way, we don't have to do unaligned reads of the source,
  503. // and we're guaranteed not to read even a byte past the end of
  504. // the bitmap.
  505. //
  506. xBias = (xLeft + dx) & 3; // Floor (bytes)
  507. xLeft -= xBias; // Bytes
  508. cx = (xRight - xLeft + 3) & ~3; // Ceiling (bytes)
  509. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST((xLeft >> ppdev->cPelSize) + xOffset, yTop + yOffset));
  510. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx >> ppdev->cPelSize, cy));
  511. pulSrc = (PULONG)(pjSrcScan0 + (yTop + dy) * lSrcDelta + xLeft + dx);
  512. culScan = cx >> 2; // Dwords
  513. lSrcSkip = lSrcDelta - cx; // Bytes
  514. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
  515. if (culScan && cy)
  516. {
  517. do
  518. {
  519. i = culScan;
  520. do
  521. {
  522. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  523. M64_OD(pjMmBase, HOST_DATA0, *pulSrc);
  524. pulSrc++;
  525. } while (--i != 0);
  526. pulSrc = (PULONG)((BYTE*)pulSrc + lSrcSkip);
  527. } while (--cy != 0);
  528. }
  529. if (--c == 0)
  530. break;
  531. prcl++;
  532. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  533. }
  534. // Don't forget to reset the clip register and the default pixel width:
  535. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 2, ulFifo);
  536. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  537. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  538. }
  539. VOID vM64XferNative24( // Type FNXFER
  540. PDEV* ppdev,
  541. LONG c, // Count of rectangles, can't be zero
  542. RECTL* prcl, // Array of relative coordinates destination rectangles
  543. ULONG rop4, // rop4
  544. SURFOBJ* psoSrc, // Source surface
  545. POINTL* pptlSrc, // Original unclipped source point
  546. RECTL* prclDst, // Original unclipped destination rectangle
  547. XLATEOBJ* pxlo) // Not used
  548. {
  549. BYTE* pjMmBase;
  550. LONG xOffset;
  551. LONG yOffset;
  552. ULONG ulHwForeMix;
  553. LONG dx;
  554. LONG dy;
  555. LONG lSrcDelta;
  556. BYTE* pjSrcScan0;
  557. LONG xLeft;
  558. LONG xRight;
  559. LONG yTop;
  560. LONG cy;
  561. LONG cx;
  562. LONG xBias;
  563. ULONG* pulSrc;
  564. ULONG culScan;
  565. LONG lSrcSkip;
  566. LONG i;
  567. ULONG ulFifo;
  568. ASSERTDD(c > 0, "Can't handle zero rectangles");
  569. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  570. "Expect only a rop2");
  571. pjMmBase = ppdev->pjMmBase;
  572. xOffset = ppdev->xOffset * 3;
  573. yOffset = ppdev->yOffset;
  574. ulFifo = 0;
  575. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  576. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 9, ulFifo);
  577. // default registers for hw bugs:
  578. M64_OD(pjMmBase, DP_WRITE_MASK, 0xFFFFFFFF);
  579. M64_OD(pjMmBase, CLR_CMP_CNTL, 0);
  580. M64_OD(pjMmBase, GUI_TRAJ_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  581. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  582. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  583. // The host data pixel width is the same as that of the screen:
  584. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  585. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  586. dx = (pptlSrc->x - prclDst->left) * 3; // Bytes
  587. dy = pptlSrc->y - prclDst->top;
  588. lSrcDelta = psoSrc->lDelta;
  589. pjSrcScan0 = psoSrc->pvScan0;
  590. while (TRUE)
  591. {
  592. xLeft = prcl->left * 3;
  593. xRight = prcl->right * 3;
  594. yTop = prcl->top;
  595. cy = prcl->bottom - yTop;
  596. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset, xRight + xOffset - 1));
  597. //
  598. // We compute 'xBias' in order to dword-align the source pointer.
  599. // This way, we don't have to do unaligned reads of the source,
  600. // and we're guaranteed not to read even a byte past the end of
  601. // the bitmap.
  602. //
  603. xBias = (xLeft + dx) & 3; // Floor (bytes)
  604. xLeft -= xBias; // Bytes
  605. cx = (xRight - xLeft + 3) & ~3; // Ceiling (bytes)
  606. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset, yTop + yOffset));
  607. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  608. pulSrc = (PULONG)(pjSrcScan0 + (yTop + dy) * lSrcDelta + xLeft + dx);
  609. culScan = cx >> 2; // Dwords
  610. lSrcSkip = lSrcDelta - cx; // Bytes
  611. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
  612. if (culScan && cy)
  613. {
  614. do
  615. {
  616. i = culScan;
  617. do
  618. {
  619. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  620. M64_OD(pjMmBase, HOST_DATA0, *pulSrc);
  621. pulSrc++;
  622. } while (--i != 0);
  623. pulSrc = (PULONG)((BYTE*)pulSrc + lSrcSkip);
  624. } while (--cy != 0);
  625. }
  626. if (--c == 0)
  627. break;
  628. prcl++;
  629. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  630. }
  631. // Don't forget to reset the clip register and the default pixel width:
  632. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 2, ulFifo);
  633. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  634. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  635. }
  636. /******************************Public*Routine******************************\
  637. * VOID vM64Xfer1bpp
  638. *
  639. * This routine colour expands a monochrome bitmap.
  640. *
  641. * See Blt_DS_S1_8G_D0 and Blt_DS_8G_D1.
  642. *
  643. \**************************************************************************/
  644. VOID vM64Xfer1bpp( // Type FNXFER
  645. PDEV* ppdev,
  646. LONG c, // Count of rectangles, can't be zero
  647. RECTL* prcl, // List of destination rectangles, in relative
  648. // coordinates
  649. ROP4 rop4, // rop4
  650. SURFOBJ* psoSrc, // Source surface
  651. POINTL* pptlSrc, // Original unclipped source point
  652. RECTL* prclDst, // Original unclipped destination rectangle
  653. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  654. {
  655. BYTE* pjMmBase;
  656. LONG xOffset;
  657. LONG yOffset;
  658. ULONG* pulXlate;
  659. ULONG ulHwForeMix;
  660. LONG dx;
  661. LONG dy;
  662. LONG lSrcDelta;
  663. BYTE* pjSrcScan0;
  664. LONG xLeft;
  665. LONG xRight;
  666. LONG yTop;
  667. LONG cy;
  668. LONG cx;
  669. LONG xBias;
  670. LONG culScan;
  671. LONG lSrcSkip;
  672. ULONG* pulSrc;
  673. LONG i;
  674. ULONG ulFifo;
  675. ASSERTDD(c > 0, "Can't handle zero rectangles");
  676. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  677. "Expect only a rop2");
  678. pjMmBase = ppdev->pjMmBase;
  679. xOffset = ppdev->xOffset;
  680. yOffset = ppdev->yOffset;
  681. ulFifo = 0;
  682. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  683. pulXlate = pxlo->pulXlate;
  684. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 8, ulFifo);
  685. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  686. M64_OD(pjMmBase, DP_BKGD_CLR, pulXlate[0]);
  687. M64_OD(pjMmBase, DP_FRGD_CLR, pulXlate[1]);
  688. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  689. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 16) | (DP_SRC_FrgdClr << 8) |
  690. (DP_SRC_BkgdClr));
  691. dx = pptlSrc->x - prclDst->left;
  692. dy = pptlSrc->y - prclDst->top;
  693. lSrcDelta = psoSrc->lDelta;
  694. pjSrcScan0 = psoSrc->pvScan0;
  695. while (TRUE)
  696. {
  697. xLeft = prcl->left;
  698. xRight = prcl->right;
  699. // The Mach64 'bit packs' monochrome transfers, but GDI gives
  700. // us monochrome bitmaps whose scans are always dword aligned.
  701. // Consequently, we use the Mach64's clip registers to make
  702. // our transfers a multiple of 32 to match the dword alignment:
  703. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset,
  704. xRight + xOffset - 1));
  705. yTop = prcl->top;
  706. cy = prcl->bottom - yTop;
  707. xBias = (xLeft + dx) & 31; // Floor
  708. xLeft -= xBias;
  709. cx = (xRight - xLeft + 31) & ~31; // Ceiling
  710. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset,
  711. yTop + yOffset));
  712. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  713. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  714. + ((xLeft + dx) >> 3));
  715. culScan = cx >> 5;
  716. lSrcSkip = lSrcDelta - (culScan << 2);
  717. ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
  718. do {
  719. i = culScan;
  720. do {
  721. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  722. M64_OD(pjMmBase, HOST_DATA0, *pulSrc);
  723. pulSrc++;
  724. } while (--i != 0);
  725. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  726. } while (--cy != 0);
  727. if (--c == 0)
  728. break;
  729. prcl++;
  730. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  731. }
  732. // Don't forget to reset the clip register:
  733. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  734. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  735. }
  736. /******************************Public*Routine******************************\
  737. * VOID vM64Xfer4bpp
  738. *
  739. * Does a 4bpp transfer from a bitmap to the screen.
  740. *
  741. * The reason we implement this is that a lot of resources are kept as 4bpp,
  742. * and used to initialize DFBs, some of which we of course keep off-screen.
  743. *
  744. \**************************************************************************/
  745. VOID vM64Xfer4bpp( // Type FNXFER
  746. PDEV* ppdev,
  747. LONG c, // Count of rectangles, can't be zero
  748. RECTL* prcl, // List of destination rectangles, in relative
  749. // coordinates
  750. ULONG rop4, // Rop4
  751. SURFOBJ* psoSrc, // Source surface
  752. POINTL* pptlSrc, // Original unclipped source point
  753. RECTL* prclDst, // Original unclipped destination rectangle
  754. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  755. {
  756. BYTE* pjMmBase;
  757. LONG xOffset;
  758. LONG yOffset;
  759. LONG cjPelSize;
  760. ULONG ulHwForeMix;
  761. LONG xLeft;
  762. LONG xRight;
  763. LONG yTop;
  764. LONG xBias;
  765. LONG dx;
  766. LONG dy;
  767. LONG cx;
  768. LONG cy;
  769. LONG lSrcDelta;
  770. BYTE* pjSrcScan0;
  771. BYTE* pjSrc;
  772. BYTE jSrc;
  773. ULONG* pulXlate;
  774. LONG i;
  775. ULONG ul;
  776. LONG cjSrc;
  777. LONG cwSrc;
  778. LONG lSrcSkip;
  779. ULONG ulFifo;
  780. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  781. ASSERTDD(c > 0, "Can't handle zero rectangles");
  782. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  783. pjMmBase = ppdev->pjMmBase;
  784. xOffset = ppdev->xOffset;
  785. yOffset = ppdev->yOffset;
  786. cjPelSize = ppdev->cjPelSize;
  787. pulXlate = pxlo->pulXlate;
  788. ulFifo = 0;
  789. dx = pptlSrc->x - prclDst->left;
  790. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  791. lSrcDelta = psoSrc->lDelta;
  792. pjSrcScan0 = psoSrc->pvScan0;
  793. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  794. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 7, ulFifo);
  795. // Fix vanishing fills and various color problems:
  796. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  797. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  798. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  799. // The host data pixel width is the same as that of the screen:
  800. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  801. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  802. while(TRUE)
  803. {
  804. xLeft = prcl->left;
  805. xRight = prcl->right;
  806. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset,
  807. xRight + xOffset - 1));
  808. yTop = prcl->top;
  809. cy = prcl->bottom - yTop;
  810. // We compute 'xBias' in order to dword-align the source pointer.
  811. // This way, we don't have to do unaligned reads of the source,
  812. // and we're guaranteed not to read even a byte past the end of
  813. // the bitmap.
  814. //
  815. // Note that this bias works at 24bpp, too:
  816. xBias = (xLeft + dx) & 3; // Floor
  817. xLeft -= xBias;
  818. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  819. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset,
  820. yTop + yOffset));
  821. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  822. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  823. + ((xLeft + dx) >> 1);
  824. cjSrc = cx >> 1; // Number of source bytes touched
  825. lSrcSkip = lSrcDelta - cjSrc;
  826. if (cjPelSize == 1)
  827. {
  828. // This part handles 8bpp output:
  829. cwSrc = (cjSrc >> 1); // Number of whole source words
  830. do {
  831. for (i = cwSrc; i != 0; i--)
  832. {
  833. jSrc = *pjSrc++;
  834. ul = (pulXlate[jSrc >> 4]);
  835. ul |= (pulXlate[jSrc & 0xf] << 8);
  836. jSrc = *pjSrc++;
  837. ul |= (pulXlate[jSrc >> 4] << 16);
  838. ul |= (pulXlate[jSrc & 0xf] << 24);
  839. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  840. M64_OD(pjMmBase, HOST_DATA0, ul);
  841. }
  842. // Handle an odd end byte, if there is one:
  843. if (cjSrc & 1)
  844. {
  845. jSrc = *pjSrc++;
  846. ul = (pulXlate[jSrc >> 4]);
  847. ul |= (pulXlate[jSrc & 0xf] << 8);
  848. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  849. M64_OD(pjMmBase, HOST_DATA0, ul);
  850. }
  851. pjSrc += lSrcSkip;
  852. } while (--cy != 0);
  853. }
  854. else if (cjPelSize == 2)
  855. {
  856. // This part handles 16bpp output:
  857. do {
  858. i = cjSrc;
  859. do {
  860. jSrc = *pjSrc++;
  861. ul = (pulXlate[jSrc >> 4]);
  862. ul |= (pulXlate[jSrc & 0xf] << 16);
  863. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  864. M64_OD(pjMmBase, HOST_DATA0, ul);
  865. } while (--i != 0);
  866. pjSrc += lSrcSkip;
  867. } while (--cy != 0);
  868. }
  869. else
  870. {
  871. // This part handles 32bpp output:
  872. do {
  873. i = cjSrc;
  874. do {
  875. jSrc = *pjSrc++;
  876. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 2, ulFifo);
  877. M64_OD(pjMmBase, HOST_DATA0, pulXlate[jSrc >> 4]);
  878. M64_OD(pjMmBase, HOST_DATA0, pulXlate[jSrc & 0xf]);
  879. } while (--i != 0);
  880. pjSrc += lSrcSkip;
  881. } while (--cy != 0);
  882. }
  883. if (--c == 0)
  884. break;
  885. prcl++;
  886. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  887. }
  888. // Don't forget to reset the clip register and the default pixel width:
  889. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 2, ulFifo);
  890. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  891. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  892. }
  893. /******************************Public*Routine******************************\
  894. * VOID vM64Xfer8bpp
  895. *
  896. * Does a 8bpp transfer from a bitmap to the screen.
  897. *
  898. * The reason we implement this is that a lot of resources are kept as 8bpp,
  899. * and used to initialize DFBs, some of which we of course keep off-screen.
  900. *
  901. \**************************************************************************/
  902. VOID vM64Xfer8bpp( // Type FNXFER
  903. PDEV* ppdev,
  904. LONG c, // Count of rectangles, can't be zero
  905. RECTL* prcl, // List of destination rectangles, in relative
  906. // coordinates
  907. ULONG rop4, // Rop4
  908. SURFOBJ* psoSrc, // Source surface
  909. POINTL* pptlSrc, // Original unclipped source point
  910. RECTL* prclDst, // Original unclipped destination rectangle
  911. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  912. {
  913. BYTE* pjMmBase;
  914. LONG xOffset;
  915. LONG yOffset;
  916. LONG cjPelSize;
  917. ULONG ulHwForeMix;
  918. LONG xLeft;
  919. LONG xRight;
  920. LONG yTop;
  921. LONG xBias;
  922. LONG dx;
  923. LONG dy;
  924. LONG cx;
  925. LONG cy;
  926. LONG lSrcDelta;
  927. BYTE* pjSrcScan0;
  928. BYTE* pjSrc;
  929. ULONG* pulXlate;
  930. LONG i;
  931. ULONG ul;
  932. LONG cdSrc;
  933. LONG cwSrc;
  934. LONG cxRem;
  935. LONG lSrcSkip;
  936. ULONG ulFifo;
  937. ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp");
  938. ASSERTDD(c > 0, "Can't handle zero rectangles");
  939. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  940. pjMmBase = ppdev->pjMmBase;
  941. xOffset = ppdev->xOffset;
  942. yOffset = ppdev->yOffset;
  943. cjPelSize = ppdev->cjPelSize;
  944. pulXlate = pxlo->pulXlate;
  945. ulFifo = 0;
  946. dx = pptlSrc->x - prclDst->left;
  947. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  948. lSrcDelta = psoSrc->lDelta;
  949. pjSrcScan0 = psoSrc->pvScan0;
  950. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  951. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 7, ulFifo);
  952. // Fix vanishing fills and various color problems:
  953. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  954. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  955. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  956. // The host data pixel width is the same as that of the screen:
  957. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  958. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  959. while(TRUE)
  960. {
  961. xLeft = prcl->left;
  962. xRight = prcl->right;
  963. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset,
  964. xRight + xOffset - 1));
  965. yTop = prcl->top;
  966. cy = prcl->bottom - yTop;
  967. // We compute 'xBias' in order to dword-align the source pointer.
  968. // This way, we don't have to do unaligned reads of the source,
  969. // and we're guaranteed not to read even a byte past the end of
  970. // the bitmap.
  971. //
  972. // Note that this bias works at 24bpp, too:
  973. xBias = (xLeft + dx) & 3; // Floor
  974. xLeft -= xBias;
  975. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  976. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset,
  977. yTop + yOffset));
  978. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  979. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  980. + (xLeft + dx);
  981. lSrcSkip = lSrcDelta - cx;
  982. if (cjPelSize == 1)
  983. {
  984. // This part handles 8bpp output:
  985. cdSrc = (cx >> 2);
  986. cxRem = (cx & 3);
  987. do {
  988. for (i = cdSrc; i != 0; i--)
  989. {
  990. ul = (pulXlate[*pjSrc++]);
  991. ul |= (pulXlate[*pjSrc++] << 8);
  992. ul |= (pulXlate[*pjSrc++] << 16);
  993. ul |= (pulXlate[*pjSrc++] << 24);
  994. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  995. M64_OD(pjMmBase, HOST_DATA0, ul);
  996. }
  997. if (cxRem > 0)
  998. {
  999. ul = (pulXlate[*pjSrc++]);
  1000. if (cxRem > 1)
  1001. {
  1002. ul |= (pulXlate[*pjSrc++] << 8);
  1003. if (cxRem > 2)
  1004. {
  1005. ul |= (pulXlate[*pjSrc++] << 16);
  1006. }
  1007. }
  1008. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1009. M64_OD(pjMmBase, HOST_DATA0, ul);
  1010. }
  1011. pjSrc += lSrcSkip;
  1012. } while (--cy != 0);
  1013. }
  1014. else if (cjPelSize == 2)
  1015. {
  1016. // This part handles 16bpp output:
  1017. cwSrc = (cx >> 1);
  1018. cxRem = (cx & 1);
  1019. do {
  1020. for (i = cwSrc; i != 0; i--)
  1021. {
  1022. ul = (pulXlate[*pjSrc++]);
  1023. ul |= (pulXlate[*pjSrc++] << 16);
  1024. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1025. M64_OD(pjMmBase, HOST_DATA0, ul);
  1026. }
  1027. if (cxRem > 0)
  1028. {
  1029. ul = (pulXlate[*pjSrc++]);
  1030. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1031. M64_OD(pjMmBase, HOST_DATA0, ul);
  1032. }
  1033. pjSrc += lSrcSkip;
  1034. } while (--cy != 0);
  1035. }
  1036. else
  1037. {
  1038. // This part handles 32bpp output:
  1039. do {
  1040. i = cx;
  1041. do {
  1042. ul = pulXlate[*pjSrc++];
  1043. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1044. M64_OD(pjMmBase, HOST_DATA0, ul);
  1045. } while (--i != 0);
  1046. pjSrc += lSrcSkip;
  1047. } while (--cy != 0);
  1048. }
  1049. if (--c == 0)
  1050. break;
  1051. prcl++;
  1052. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  1053. }
  1054. // Don't forget to reset the clip register and the default pixel width:
  1055. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 2, ulFifo);
  1056. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  1057. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1058. }
  1059. /******************************Public*Routine******************************\
  1060. * VOID vM64CopyBlt
  1061. *
  1062. * Does a screen-to-screen blt of a list of rectangles.
  1063. *
  1064. * See Blt_DS_SS_ENG_8G_D0 and Blt_DS_SS_TLBR_ENG_8G_D1.
  1065. *
  1066. \**************************************************************************/
  1067. VOID vM64CopyBlt( // Type FNCOPY
  1068. PDEV* ppdev,
  1069. LONG c, // Can't be zero
  1070. RECTL* prcl, // Array of relative coordinates destination rectangles
  1071. ULONG rop4, // rop4
  1072. POINTL* pptlSrc, // Original unclipped source point
  1073. RECTL* prclDst) // Original unclipped destination rectangle
  1074. {
  1075. BYTE* pjMmBase;
  1076. LONG xOffset;
  1077. LONG yOffset;
  1078. LONG dx;
  1079. LONG dy;
  1080. LONG xLeft;
  1081. LONG xRight;
  1082. LONG yTop;
  1083. LONG yBottom;
  1084. LONG cx;
  1085. LONG cy;
  1086. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1087. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1088. "Expect only a rop2");
  1089. pjMmBase = ppdev->pjMmBase;
  1090. xOffset = ppdev->xOffset;
  1091. yOffset = ppdev->yOffset;
  1092. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 11);
  1093. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  1094. M64_OD(pjMmBase, SRC_OFF_PITCH, ppdev->ulScreenOffsetAndPitch);
  1095. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  1096. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[rop4 & 0xf]);
  1097. M64_OD(pjMmBase, SRC_CNTL, 0);
  1098. dx = pptlSrc->x - prclDst->left;
  1099. dy = pptlSrc->y - prclDst->top;
  1100. // The accelerator may not be as fast at doing right-to-left copies, so
  1101. // only do them when the rectangles truly overlap:
  1102. if (!OVERLAP(prclDst, pptlSrc))
  1103. goto Top_Down_Left_To_Right;
  1104. if (prclDst->top <= pptlSrc->y)
  1105. {
  1106. if (prclDst->left <= pptlSrc->x)
  1107. {
  1108. Top_Down_Left_To_Right:
  1109. while (TRUE)
  1110. {
  1111. xLeft = xOffset + prcl->left;
  1112. yTop = yOffset + prcl->top;
  1113. cx = prcl->right - prcl->left;
  1114. cy = prcl->bottom - prcl->top;
  1115. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yTop));
  1116. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yTop + dy));
  1117. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1118. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1119. if (--c == 0)
  1120. break;
  1121. prcl++;
  1122. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  1123. }
  1124. }
  1125. else
  1126. {
  1127. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_YDir);
  1128. while (TRUE)
  1129. {
  1130. xRight = xOffset + prcl->right - 1;
  1131. yTop = yOffset + prcl->top;
  1132. cx = prcl->right - prcl->left;
  1133. cy = prcl->bottom - prcl->top;
  1134. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yTop));
  1135. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yTop + dy));
  1136. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1137. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1138. if (--c == 0)
  1139. break;
  1140. prcl++;
  1141. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  1142. }
  1143. // Since we don't use a default context, we must restore registers:
  1144. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1145. }
  1146. }
  1147. else
  1148. {
  1149. if (prclDst->left <= pptlSrc->x)
  1150. {
  1151. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir);
  1152. while (TRUE)
  1153. {
  1154. xLeft = xOffset + prcl->left;
  1155. yBottom = yOffset + prcl->bottom - 1;
  1156. cx = prcl->right - prcl->left;
  1157. cy = prcl->bottom - prcl->top;
  1158. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yBottom));
  1159. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yBottom + dy));
  1160. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1161. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1162. if (--c == 0)
  1163. break;
  1164. prcl++;
  1165. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  1166. }
  1167. // Since we don't use a default context, we must restore registers:
  1168. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1169. }
  1170. else
  1171. {
  1172. M64_OD(pjMmBase, DST_CNTL, 0);
  1173. while (TRUE)
  1174. {
  1175. xRight = xOffset + prcl->right - 1;
  1176. yBottom = yOffset + prcl->bottom - 1;
  1177. cx = prcl->right - prcl->left;
  1178. cy = prcl->bottom - prcl->top;
  1179. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yBottom));
  1180. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yBottom + dy));
  1181. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1182. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1183. if (--c == 0)
  1184. break;
  1185. prcl++;
  1186. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  1187. }
  1188. // Since we don't use a default context, we must restore registers:
  1189. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1190. }
  1191. }
  1192. }
  1193. VOID vM64CopyBlt24( // Type FNCOPY
  1194. PDEV* ppdev,
  1195. LONG c, // Can't be zero
  1196. RECTL* prcl, // Array of relative coordinates destination rectangles
  1197. ULONG rop4, // rop4
  1198. POINTL* pptlSrc, // Original unclipped source point
  1199. RECTL* prclDst) // Original unclipped destination rectangle
  1200. {
  1201. BYTE* pjMmBase;
  1202. LONG xOffset;
  1203. LONG yOffset;
  1204. LONG dx;
  1205. LONG dy;
  1206. LONG xLeft;
  1207. LONG xRight;
  1208. LONG yTop;
  1209. LONG yBottom;
  1210. LONG cx;
  1211. LONG cy;
  1212. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1213. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1214. "Expect only a rop2");
  1215. pjMmBase = ppdev->pjMmBase;
  1216. xOffset = ppdev->xOffset;
  1217. yOffset = ppdev->yOffset;
  1218. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 11);
  1219. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  1220. M64_OD(pjMmBase, SRC_OFF_PITCH, ppdev->ulScreenOffsetAndPitch);
  1221. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  1222. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[rop4 & 0xf]);
  1223. M64_OD(pjMmBase, SRC_CNTL, 0);
  1224. dx = (pptlSrc->x - prclDst->left) * 3;
  1225. dy = pptlSrc->y - prclDst->top;
  1226. // The accelerator may not be as fast at doing right-to-left copies, so
  1227. // only do them when the rectangles truly overlap:
  1228. if (!OVERLAP(prclDst, pptlSrc))
  1229. goto Top_Down_Left_To_Right;
  1230. if (prclDst->top <= pptlSrc->y)
  1231. {
  1232. if (prclDst->left <= pptlSrc->x)
  1233. {
  1234. Top_Down_Left_To_Right:
  1235. while (TRUE)
  1236. {
  1237. xLeft = (xOffset + prcl->left) * 3;
  1238. yTop = yOffset + prcl->top;
  1239. cx = (prcl->right - prcl->left) * 3;
  1240. cy = prcl->bottom - prcl->top;
  1241. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yTop));
  1242. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yTop + dy));
  1243. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1244. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1245. if (--c == 0)
  1246. break;
  1247. prcl++;
  1248. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  1249. }
  1250. }
  1251. else
  1252. {
  1253. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_YDir);
  1254. while (TRUE)
  1255. {
  1256. xRight = (xOffset + prcl->right) * 3 - 1;
  1257. yTop = yOffset + prcl->top;
  1258. cx = (prcl->right - prcl->left) * 3;
  1259. cy = prcl->bottom - prcl->top;
  1260. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yTop));
  1261. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yTop + dy));
  1262. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1263. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1264. if (--c == 0)
  1265. break;
  1266. prcl++;
  1267. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  1268. }
  1269. // Since we don't use a default context, we must restore registers:
  1270. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1271. }
  1272. }
  1273. else
  1274. {
  1275. if (prclDst->left <= pptlSrc->x)
  1276. {
  1277. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir);
  1278. while (TRUE)
  1279. {
  1280. xLeft = (xOffset + prcl->left) * 3;
  1281. yBottom = yOffset + prcl->bottom - 1;
  1282. cx = (prcl->right - prcl->left) * 3;
  1283. cy = prcl->bottom - prcl->top;
  1284. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yBottom));
  1285. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yBottom + dy));
  1286. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1287. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1288. if (--c == 0)
  1289. break;
  1290. prcl++;
  1291. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  1292. }
  1293. // Since we don't use a default context, we must restore registers:
  1294. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1295. }
  1296. else
  1297. {
  1298. M64_OD(pjMmBase, DST_CNTL, 0);
  1299. while (TRUE)
  1300. {
  1301. xRight = (xOffset + prcl->right) * 3 - 1;
  1302. yBottom = yOffset + prcl->bottom - 1;
  1303. cx = (prcl->right - prcl->left) * 3;
  1304. cy = prcl->bottom - prcl->top;
  1305. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yBottom));
  1306. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yBottom + dy));
  1307. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1308. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1309. if (--c == 0)
  1310. break;
  1311. prcl++;
  1312. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
  1313. }
  1314. // Since we don't use a default context, we must restore registers:
  1315. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1316. }
  1317. }
  1318. }
  1319. /******************************************************************************\
  1320. * Special versions to fix screen source FIFO bug in VT-A4 with 1 MB of SDRAM.
  1321. *
  1322. \******************************************************************************/
  1323. VOID vM64CopyBlt_VTA4( // Type FNCOPY
  1324. PDEV* ppdev,
  1325. LONG c, // Can't be zero
  1326. RECTL* prcl, // Array of relative coordinates destination rectangles
  1327. ULONG rop4, // rop4
  1328. POINTL* pptlSrc, // Original unclipped source point
  1329. RECTL* prclDst) // Original unclipped destination rectangle
  1330. {
  1331. BOOL reset_scissors = FALSE;
  1332. BYTE* pjMmBase;
  1333. LONG xOffset;
  1334. LONG yOffset;
  1335. LONG dx;
  1336. LONG dy;
  1337. LONG xLeft;
  1338. LONG xRight;
  1339. LONG yTop;
  1340. LONG yBottom;
  1341. LONG cx;
  1342. LONG cy;
  1343. LONG remain = 32/ppdev->cjPelSize - 1;
  1344. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1345. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1346. "Expect only a rop2");
  1347. pjMmBase = ppdev->pjMmBase;
  1348. xOffset = ppdev->xOffset;
  1349. yOffset = ppdev->yOffset;
  1350. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 14);
  1351. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  1352. M64_OD(pjMmBase, SRC_OFF_PITCH, ppdev->ulScreenOffsetAndPitch);
  1353. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  1354. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[rop4 & 0xf]);
  1355. M64_OD(pjMmBase, SRC_CNTL, 0);
  1356. dx = pptlSrc->x - prclDst->left;
  1357. dy = pptlSrc->y - prclDst->top;
  1358. // The accelerator may not be as fast at doing right-to-left copies, so
  1359. // only do them when the rectangles truly overlap:
  1360. if (!OVERLAP(prclDst, pptlSrc))
  1361. goto Top_Down_Left_To_Right;
  1362. if (prclDst->top <= pptlSrc->y)
  1363. {
  1364. if (prclDst->left <= pptlSrc->x)
  1365. {
  1366. LONG tmpLeft;
  1367. Top_Down_Left_To_Right:
  1368. while (TRUE)
  1369. {
  1370. xLeft = xOffset + prcl->left;
  1371. yTop = yOffset + prcl->top;
  1372. cx = prcl->right - prcl->left;
  1373. cy = prcl->bottom - prcl->top;
  1374. // 32-byte-align left:
  1375. tmpLeft = xLeft + dx;
  1376. if (tmpLeft & remain)
  1377. {
  1378. M64_OD(pjMmBase, SC_LEFT, xLeft);
  1379. xLeft -= (tmpLeft & remain);
  1380. cx += (tmpLeft & remain);
  1381. tmpLeft &= ~remain;
  1382. reset_scissors = TRUE;
  1383. }
  1384. // 32-byte-align right:
  1385. if (cx & remain)
  1386. {
  1387. M64_OD(pjMmBase, SC_RIGHT, xLeft + cx - 1);
  1388. cx = (cx + remain)/(remain+1) * (remain+1);
  1389. reset_scissors = TRUE;
  1390. }
  1391. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yTop));
  1392. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpLeft, yTop + dy));
  1393. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1394. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1395. if (reset_scissors)
  1396. {
  1397. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1398. reset_scissors = FALSE;
  1399. }
  1400. if (--c == 0)
  1401. break;
  1402. prcl++;
  1403. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  1404. }
  1405. }
  1406. else
  1407. {
  1408. LONG k, tmpRight;
  1409. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_YDir);
  1410. while (TRUE)
  1411. {
  1412. xRight = xOffset + prcl->right - 1;
  1413. yTop = yOffset + prcl->top;
  1414. cx = prcl->right - prcl->left;
  1415. cy = prcl->bottom - prcl->top;
  1416. // 32-byte-align right:
  1417. tmpRight = xRight + dx;
  1418. if ((tmpRight+1) & remain)
  1419. {
  1420. M64_OD(pjMmBase, SC_RIGHT, xRight);
  1421. k = ((tmpRight+1) + remain)/(remain+1) * (remain+1) - 1;
  1422. xRight += k - tmpRight;
  1423. cx += k - tmpRight;
  1424. tmpRight = k;
  1425. reset_scissors = TRUE;
  1426. }
  1427. // 32-byte-align left:
  1428. if (cx & remain)
  1429. {
  1430. M64_OD(pjMmBase, SC_LEFT, xRight - cx + 1);
  1431. cx = (cx + remain)/(remain+1) * (remain+1);
  1432. reset_scissors = TRUE;
  1433. }
  1434. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yTop));
  1435. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpRight, yTop + dy));
  1436. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1437. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1438. if (reset_scissors)
  1439. {
  1440. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1441. reset_scissors = FALSE;
  1442. }
  1443. if (--c == 0)
  1444. break;
  1445. prcl++;
  1446. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  1447. }
  1448. // Since we don't use a default context, we must restore registers:
  1449. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1450. }
  1451. }
  1452. else
  1453. {
  1454. if (prclDst->left <= pptlSrc->x)
  1455. {
  1456. LONG tmpLeft;
  1457. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir);
  1458. while (TRUE)
  1459. {
  1460. xLeft = xOffset + prcl->left;
  1461. yBottom = yOffset + prcl->bottom - 1;
  1462. cx = prcl->right - prcl->left;
  1463. cy = prcl->bottom - prcl->top;
  1464. // 32-byte-align left:
  1465. tmpLeft = xLeft + dx;
  1466. if (tmpLeft & remain)
  1467. {
  1468. M64_OD(pjMmBase, SC_LEFT, xLeft);
  1469. xLeft -= (tmpLeft & remain);
  1470. cx += (tmpLeft & remain);
  1471. tmpLeft &= ~remain;
  1472. reset_scissors = TRUE;
  1473. }
  1474. // 32-byte-align right:
  1475. if (cx & remain)
  1476. {
  1477. M64_OD(pjMmBase, SC_RIGHT, xLeft + cx - 1);
  1478. cx = (cx + remain)/(remain+1) * (remain+1);
  1479. reset_scissors = TRUE;
  1480. }
  1481. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yBottom));
  1482. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpLeft, yBottom + dy));
  1483. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1484. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1485. if (reset_scissors)
  1486. {
  1487. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1488. reset_scissors = FALSE;
  1489. }
  1490. if (--c == 0)
  1491. break;
  1492. prcl++;
  1493. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  1494. }
  1495. // Since we don't use a default context, we must restore registers:
  1496. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1497. }
  1498. else
  1499. {
  1500. LONG k, tmpRight;
  1501. M64_OD(pjMmBase, DST_CNTL, 0);
  1502. while (TRUE)
  1503. {
  1504. xRight = xOffset + prcl->right - 1;
  1505. yBottom = yOffset + prcl->bottom - 1;
  1506. cx = prcl->right - prcl->left;
  1507. cy = prcl->bottom - prcl->top;
  1508. // 32-byte-align right:
  1509. tmpRight = xRight + dx;
  1510. if ((tmpRight+1) & remain)
  1511. {
  1512. M64_OD(pjMmBase, SC_RIGHT, xRight);
  1513. k = ((tmpRight+1) + remain)/(remain+1) * (remain+1) - 1;
  1514. xRight += k - tmpRight;
  1515. cx += k - tmpRight;
  1516. tmpRight = k;
  1517. reset_scissors = TRUE;
  1518. }
  1519. // 32-byte-align left:
  1520. if (cx & remain)
  1521. {
  1522. M64_OD(pjMmBase, SC_LEFT, xRight - cx + 1);
  1523. cx = (cx + remain)/(remain+1) * (remain+1);
  1524. reset_scissors = TRUE;
  1525. }
  1526. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yBottom));
  1527. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpRight, yBottom + dy));
  1528. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1529. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1530. if (reset_scissors)
  1531. {
  1532. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1533. reset_scissors = FALSE;
  1534. }
  1535. if (--c == 0)
  1536. break;
  1537. prcl++;
  1538. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  1539. }
  1540. // Since we don't use a default context, we must restore registers:
  1541. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1542. }
  1543. }
  1544. }
  1545. VOID vM64CopyBlt24_VTA4( // Type FNCOPY
  1546. PDEV* ppdev,
  1547. LONG c, // Can't be zero
  1548. RECTL* prcl, // Array of relative coordinates destination rectangles
  1549. ULONG rop4, // rop4
  1550. POINTL* pptlSrc, // Original unclipped source point
  1551. RECTL* prclDst) // Original unclipped destination rectangle
  1552. {
  1553. BOOL reset_scissors = FALSE;
  1554. BYTE* pjMmBase;
  1555. LONG xOffset;
  1556. LONG yOffset;
  1557. LONG dx;
  1558. LONG dy;
  1559. LONG xLeft;
  1560. LONG xRight;
  1561. LONG yTop;
  1562. LONG yBottom;
  1563. LONG cx;
  1564. LONG cy;
  1565. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1566. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1567. "Expect only a rop2");
  1568. pjMmBase = ppdev->pjMmBase;
  1569. xOffset = ppdev->xOffset;
  1570. yOffset = ppdev->yOffset;
  1571. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 14);
  1572. M64_OD(pjMmBase, CONTEXT_LOAD_CNTL, CONTEXT_LOAD_CmdLoad | ppdev->iDefContext );
  1573. M64_OD(pjMmBase, SRC_OFF_PITCH, ppdev->ulScreenOffsetAndPitch);
  1574. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  1575. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[rop4 & 0xf]);
  1576. M64_OD(pjMmBase, SRC_CNTL, 0);
  1577. dx = (pptlSrc->x - prclDst->left) * 3;
  1578. dy = pptlSrc->y - prclDst->top;
  1579. // The accelerator may not be as fast at doing right-to-left copies, so
  1580. // only do them when the rectangles truly overlap:
  1581. if (!OVERLAP(prclDst, pptlSrc))
  1582. goto Top_Down_Left_To_Right;
  1583. if (prclDst->top <= pptlSrc->y)
  1584. {
  1585. if (prclDst->left <= pptlSrc->x)
  1586. {
  1587. LONG tmpLeft;
  1588. Top_Down_Left_To_Right:
  1589. while (TRUE)
  1590. {
  1591. xLeft = (xOffset + prcl->left) * 3;
  1592. yTop = yOffset + prcl->top;
  1593. cx = (prcl->right - prcl->left) * 3;
  1594. cy = prcl->bottom - prcl->top;
  1595. // 32-byte-align left:
  1596. tmpLeft = xLeft + dx;
  1597. if (tmpLeft & 31)
  1598. {
  1599. M64_OD(pjMmBase, SC_LEFT, xLeft);
  1600. xLeft -= (tmpLeft & 31);
  1601. cx += (tmpLeft & 31);
  1602. tmpLeft &= ~31;
  1603. reset_scissors = TRUE;
  1604. }
  1605. // 32-byte-align right:
  1606. if (cx & 31)
  1607. {
  1608. M64_OD(pjMmBase, SC_RIGHT, xLeft + cx - 1);
  1609. cx = (cx + 31)/32 * 32;
  1610. reset_scissors = TRUE;
  1611. }
  1612. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yTop));
  1613. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpLeft, yTop + dy));
  1614. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1615. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1616. if (reset_scissors)
  1617. {
  1618. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1619. reset_scissors = FALSE;
  1620. }
  1621. if (--c == 0)
  1622. break;
  1623. prcl++;
  1624. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
  1625. }
  1626. }
  1627. else
  1628. {
  1629. LONG k, tmpRight;
  1630. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_YDir);
  1631. while (TRUE)
  1632. {
  1633. xRight = (xOffset + prcl->right) * 3 - 1;
  1634. yTop = yOffset + prcl->top;
  1635. cx = (prcl->right - prcl->left) * 3;
  1636. cy = prcl->bottom - prcl->top;
  1637. // 32-byte-align right:
  1638. tmpRight = xRight + dx;
  1639. if ((tmpRight+1) & 31)
  1640. {
  1641. M64_OD(pjMmBase, SC_RIGHT, xRight);
  1642. k = ((tmpRight+1) + 31)/32 * 32 - 1;
  1643. xRight += k - tmpRight;
  1644. cx += k - tmpRight;
  1645. tmpRight = k;
  1646. reset_scissors = TRUE;
  1647. }
  1648. // 32-byte-align left:
  1649. if (cx & 31)
  1650. {
  1651. M64_OD(pjMmBase, SC_LEFT, xRight - cx + 1);
  1652. cx = (cx + 31)/32 * 32;
  1653. reset_scissors = TRUE;
  1654. }
  1655. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yTop));
  1656. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpRight, yTop + dy));
  1657. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1658. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1659. if (reset_scissors)
  1660. {
  1661. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1662. reset_scissors = FALSE;
  1663. }
  1664. if (--c == 0)
  1665. break;
  1666. prcl++;
  1667. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  1668. }
  1669. // Since we don't use a default context, we must restore registers:
  1670. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1671. }
  1672. }
  1673. else
  1674. {
  1675. if (prclDst->left <= pptlSrc->x)
  1676. {
  1677. LONG tmpLeft;
  1678. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir);
  1679. while (TRUE)
  1680. {
  1681. xLeft = (xOffset + prcl->left) * 3;
  1682. yBottom = yOffset + prcl->bottom - 1;
  1683. cx = (prcl->right - prcl->left) * 3;
  1684. cy = prcl->bottom - prcl->top;
  1685. // 32-byte-align left:
  1686. tmpLeft = xLeft + dx;
  1687. if (tmpLeft & 31)
  1688. {
  1689. M64_OD(pjMmBase, SC_LEFT, xLeft);
  1690. xLeft -= (tmpLeft & 31);
  1691. cx += (tmpLeft & 31);
  1692. tmpLeft &= ~31;
  1693. reset_scissors = TRUE;
  1694. }
  1695. // 32-byte-align right:
  1696. if (cx & 31)
  1697. {
  1698. M64_OD(pjMmBase, SC_RIGHT, xLeft + cx - 1);
  1699. cx = (cx + 31)/32 * 32;
  1700. reset_scissors = TRUE;
  1701. }
  1702. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yBottom));
  1703. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpLeft, yBottom + dy));
  1704. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1705. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1706. if (reset_scissors)
  1707. {
  1708. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1709. reset_scissors = FALSE;
  1710. }
  1711. if (--c == 0)
  1712. break;
  1713. prcl++;
  1714. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  1715. }
  1716. // Since we don't use a default context, we must restore registers:
  1717. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1718. }
  1719. else
  1720. {
  1721. LONG k, tmpRight;
  1722. M64_OD(pjMmBase, DST_CNTL, 0);
  1723. while (TRUE)
  1724. {
  1725. xRight = (xOffset + prcl->right) * 3 - 1;
  1726. yBottom = yOffset + prcl->bottom - 1;
  1727. cx = (prcl->right - prcl->left) * 3;
  1728. cy = prcl->bottom - prcl->top;
  1729. // 32-byte-align right:
  1730. tmpRight = xRight + dx;
  1731. if ((tmpRight+1) & 31)
  1732. {
  1733. M64_OD(pjMmBase, SC_RIGHT, xRight);
  1734. k = ((tmpRight+1) + 31)/32 * 32 - 1;
  1735. xRight += k - tmpRight;
  1736. cx += k - tmpRight;
  1737. tmpRight = k;
  1738. reset_scissors = TRUE;
  1739. }
  1740. // 32-byte-align left:
  1741. if (cx & 31)
  1742. {
  1743. M64_OD(pjMmBase, SC_LEFT, xRight - cx + 1);
  1744. cx = (cx + 31)/32 * 32;
  1745. reset_scissors = TRUE;
  1746. }
  1747. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yBottom));
  1748. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(tmpRight, yBottom + dy));
  1749. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1750. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1751. if (reset_scissors)
  1752. {
  1753. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1754. reset_scissors = FALSE;
  1755. }
  1756. if (--c == 0)
  1757. break;
  1758. prcl++;
  1759. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
  1760. }
  1761. // Since we don't use a default context, we must restore registers:
  1762. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1763. }
  1764. }
  1765. }