Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1986 lines
63 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: bltm64.c
  3. *
  4. * Contains the low-level memory-mapped I/O blt functions for the Mach64.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1995 Microsoft Corporation
  23. *
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vM64FillSolid
  28. *
  29. * Fills a list of rectangles with a solid colour.
  30. *
  31. \**************************************************************************/
  32. VOID vM64FillSolid2( // Type FNFILL
  33. PDEV* ppdev,
  34. LONG c, // Can't be zero
  35. RECTL* prcl, // List of rectangles to be filled, in relative
  36. // coordinates
  37. ULONG rop4, // rop4
  38. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  39. POINTL* pptlBrush) // Not used
  40. {
  41. BYTE* pjMmBase;
  42. LONG xOffset;
  43. LONG yOffset;
  44. ULONG mix;
  45. ASSERTDD(c > 0, "Can't handle zero rectangles");
  46. pjMmBase = ppdev->pjMmBase;
  47. xOffset = ppdev->xOffset;
  48. yOffset = ppdev->yOffset;
  49. mix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  50. if (mix == 0x70000)
  51. {
  52. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  53. M64_OD(pjMmBase, DP_SET_GUI_ENGINE, 0x20100000 | ppdev->SetGuiEngineDefault);
  54. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  55. while (TRUE)
  56. {
  57. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xOffset + prcl->left,
  58. yOffset + prcl->top));
  59. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(prcl->right - prcl->left,
  60. prcl->bottom - prcl->top));
  61. if (--c == 0)
  62. break;
  63. prcl++;
  64. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  65. }
  66. M64_OD(pjMmBase, SRC_CNTL, 0);
  67. }
  68. else
  69. {
  70. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  71. M64_OD(pjMmBase, DP_MIX, mix);
  72. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  73. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  74. while (TRUE)
  75. {
  76. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xOffset + prcl->left,
  77. yOffset + prcl->top));
  78. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(prcl->right - prcl->left,
  79. prcl->bottom - prcl->top));
  80. if (--c == 0)
  81. break;
  82. prcl++;
  83. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  84. }
  85. }
  86. }
  87. // Solid fill routines using block write.
  88. VOID vM64FillSolidUMC( // Type FNFILL
  89. PDEV* ppdev,
  90. LONG c, // Can't be zero
  91. RECTL* prcl, // List of rectangles to be filled, in relative
  92. // coordinates
  93. ULONG rop4, // rop4
  94. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  95. POINTL* pptlBrush) // Not used
  96. {
  97. BYTE* pjMmBase;
  98. LONG xOffset;
  99. LONG yOffset;
  100. LONG dx;
  101. ULONG mix;
  102. BOOL FastFillFlag;
  103. ASSERTDD(c > 0, "Can't handle zero rectangles");
  104. pjMmBase = ppdev->pjMmBase;
  105. xOffset = ppdev->xOffset;
  106. yOffset = ppdev->yOffset;
  107. mix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  108. if (mix == 0x70000)
  109. {
  110. FastFillFlag = FALSE;
  111. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  112. M64_OD(pjMmBase, DP_SET_GUI_ENGINE, 0x100000 | ppdev->SetGuiEngineDefault);
  113. // M64_OD(pjMmBase, DP_MIX, 0x70003);
  114. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  115. // M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  116. while (TRUE)
  117. {
  118. if ((dx = prcl->right - prcl->left) > 32)
  119. {
  120. FastFillFlag = TRUE;
  121. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 6);
  122. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_FastFillEna | SRC_CNTL_RegWriteEna);
  123. M64_OD(pjMmBase, DST_Y_X, 0x00000000);
  124. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, 0x00010001);
  125. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_FastFillEna | SRC_CNTL_BlkWriteEna);
  126. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xOffset + prcl->left, yOffset + prcl->top));
  127. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(dx, prcl->bottom - prcl->top));
  128. }
  129. else
  130. {
  131. if (FastFillFlag == TRUE)
  132. {
  133. FastFillFlag = FALSE;
  134. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  135. M64_OD(pjMmBase, SRC_CNTL, 0);
  136. }
  137. else
  138. {
  139. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  140. }
  141. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xOffset + prcl->left, yOffset + prcl->top));
  142. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(dx, prcl->bottom - prcl->top));
  143. }
  144. if (--c == 0)
  145. break;
  146. prcl++;
  147. }
  148. if (FastFillFlag == TRUE)
  149. {
  150. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  151. M64_OD(pjMmBase, SRC_CNTL, 0);
  152. }
  153. }
  154. else
  155. {
  156. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  157. M64_OD(pjMmBase, DP_MIX, mix);
  158. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  159. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  160. while (TRUE)
  161. {
  162. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xOffset + prcl->left,
  163. yOffset + prcl->top));
  164. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(prcl->right - prcl->left,
  165. prcl->bottom - prcl->top));
  166. if (--c == 0)
  167. break;
  168. prcl++;
  169. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  170. }
  171. }
  172. }
  173. VOID vM64FillSolidUMC24( // Type FNFILL
  174. PDEV* ppdev,
  175. LONG c, // Can't be zero
  176. RECTL* prcl, // List of rectangles to be filled, in relative
  177. // coordinates
  178. ULONG rop4, // rop4
  179. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  180. POINTL* pptlBrush) // Not used
  181. {
  182. BYTE* pjMmBase;
  183. LONG xOffset;
  184. LONG yOffset;
  185. LONG dx, x;
  186. ULONG mix;
  187. BOOL FastFillFlag;
  188. BYTE red, green, blue;
  189. ASSERTDD(c > 0, "Can't handle zero rectangles");
  190. red = (BYTE) ((rbc.iSolidColor & ppdev->flRed) >> REDSHIFT);
  191. green = (BYTE) ((rbc.iSolidColor & ppdev->flGreen) >> GREENSHIFT);
  192. blue = (BYTE) ((rbc.iSolidColor & ppdev->flBlue) >> BLUESHIFT);
  193. pjMmBase = ppdev->pjMmBase;
  194. xOffset = ppdev->xOffset;
  195. yOffset = ppdev->yOffset;
  196. mix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  197. if (mix == 0x70000 && red == green && green == blue)
  198. {
  199. FastFillFlag = FALSE;
  200. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  201. // M64_OD(pjMmBase, DP_SET_GUI_ENGINE, 0x100000 | ppdev->SetGuiEngineDefault);
  202. M64_OD(pjMmBase, DP_MIX, 0x70003);
  203. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  204. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  205. while (TRUE)
  206. {
  207. x = (xOffset + prcl->left) * 3;
  208. if ((dx = (prcl->right - prcl->left)*3) > 96)
  209. {
  210. FastFillFlag = TRUE;
  211. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 6);
  212. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_FastFillEna | SRC_CNTL_RegWriteEna);
  213. M64_OD(pjMmBase, DST_Y_X, 0x00000000);
  214. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, 0x00010001);
  215. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_FastFillEna | SRC_CNTL_BlkWriteEna);
  216. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(x, yOffset + prcl->top));
  217. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(dx, prcl->bottom - prcl->top));
  218. }
  219. else
  220. {
  221. if (FastFillFlag == TRUE)
  222. {
  223. FastFillFlag = FALSE;
  224. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  225. M64_OD(pjMmBase, SRC_CNTL, 0);
  226. }
  227. else
  228. {
  229. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  230. }
  231. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(x, yOffset + prcl->top));
  232. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(dx, prcl->bottom - prcl->top));
  233. }
  234. if (--c == 0)
  235. break;
  236. prcl++;
  237. }
  238. if (FastFillFlag == TRUE)
  239. {
  240. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  241. M64_OD(pjMmBase, SRC_CNTL, 0);
  242. }
  243. }
  244. else
  245. {
  246. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 7);
  247. M64_OD(pjMmBase, DP_MIX, mix);
  248. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  249. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  250. while (TRUE)
  251. {
  252. x = (xOffset + prcl->left) * 3;
  253. M64_OD(pjMmBase, DST_CNTL, 0x83 | ((x/4 % 6) << 8));
  254. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(x,
  255. yOffset + prcl->top));
  256. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST((prcl->right - prcl->left) * 3,
  257. prcl->bottom - prcl->top));
  258. if (--c == 0)
  259. break;
  260. prcl++;
  261. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  262. }
  263. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  264. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  265. }
  266. }
  267. VOID vM64FillSolid24_2( // Type FNFILL
  268. PDEV* ppdev,
  269. LONG c, // Can't be zero
  270. RECTL* prcl, // List of rectangles to be filled, in relative
  271. // coordinates
  272. ULONG rop4, // rop4
  273. RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
  274. POINTL* pptlBrush) // Not used
  275. {
  276. BYTE* pjMmBase;
  277. LONG xOffset;
  278. LONG yOffset;
  279. LONG x;
  280. ASSERTDD(c > 0, "Can't handle zero rectangles");
  281. pjMmBase = ppdev->pjMmBase;
  282. xOffset = ppdev->xOffset;
  283. yOffset = ppdev->yOffset;
  284. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 7);
  285. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  286. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.iSolidColor);
  287. M64_OD(pjMmBase, DP_SRC, DP_SRC_FrgdClr << 8);
  288. while (TRUE)
  289. {
  290. x = (xOffset + prcl->left) * 3;
  291. M64_OD(pjMmBase, DST_CNTL, 0x83 | ((x/4 % 6) << 8));
  292. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(x,
  293. yOffset + prcl->top));
  294. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST((prcl->right - prcl->left) * 3,
  295. prcl->bottom - prcl->top));
  296. if (--c == 0)
  297. break;
  298. prcl++;
  299. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  300. }
  301. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  302. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  303. }
  304. /******************************Public*Routine******************************\
  305. * VOID vM64FillPatMonochrome
  306. *
  307. * This routine uses the pattern hardware to draw a monochrome patterned
  308. * list of rectangles.
  309. *
  310. * See Blt_DS_P8x8_ENG_8G_D0 and Blt_DS_P8x8_ENG_8G_D1.
  311. *
  312. \**************************************************************************/
  313. VOID vM64FillPatMonochrome2( // Type FNFILL
  314. PDEV* ppdev,
  315. LONG c, // Can't be zero
  316. RECTL* prcl, // List of rectangles to be filled, in relative
  317. // coordinates
  318. ULONG rop4, // rop4
  319. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  320. POINTL* pptlBrush) // Pattern alignment
  321. {
  322. BYTE* pjMmBase;
  323. LONG xOffset;
  324. LONG yOffset;
  325. BYTE* pjSrc;
  326. BYTE* pjDst;
  327. LONG xPattern;
  328. LONG yPattern;
  329. LONG iLeftShift;
  330. LONG iRightShift;
  331. LONG xOld;
  332. LONG yOld;
  333. LONG i;
  334. BYTE j;
  335. ULONG ulHwForeMix;
  336. ULONG ulHwBackMix;
  337. LONG xLeft;
  338. LONG yTop;
  339. ULONG aulTmp[2];
  340. pjMmBase = ppdev->pjMmBase;
  341. xOffset = ppdev->xOffset;
  342. yOffset = ppdev->yOffset;
  343. xPattern = (pptlBrush->x + xOffset) & 7;
  344. yPattern = (pptlBrush->y + yOffset) & 7;
  345. // If the alignment isn't correct, we'll have to change it:
  346. if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
  347. {
  348. // Remember that we've changed the alignment on our cached brush:
  349. xOld = rbc.prb->ptlBrush.x;
  350. yOld = rbc.prb->ptlBrush.y;
  351. rbc.prb->ptlBrush.x = xPattern;
  352. rbc.prb->ptlBrush.y = yPattern;
  353. // Now do the alignment:
  354. yPattern = (yOld - yPattern);
  355. iRightShift = (xPattern - xOld) & 7;
  356. iLeftShift = 8 - iRightShift;
  357. pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
  358. pjDst = (BYTE*) &aulTmp[0];
  359. for (i = 0; i < 8; i++)
  360. {
  361. j = *(pjSrc + (yPattern++ & 7));
  362. *pjDst++ = (j << iLeftShift) | (j >> iRightShift);
  363. }
  364. rbc.prb->aulPattern[0] = aulTmp[0];
  365. rbc.prb->aulPattern[1] = aulTmp[1];
  366. }
  367. ulHwForeMix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  368. ulHwBackMix = ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE : (ulHwForeMix >> 16);
  369. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 9);
  370. M64_OD(pjMmBase, PAT_CNTL, PAT_CNTL_MonoEna);
  371. M64_OD(pjMmBase, DP_SRC, DP_SRC_MonoPattern | DP_SRC_FrgdClr << 8);
  372. M64_OD(pjMmBase, DP_MIX, ulHwBackMix | ulHwForeMix);
  373. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.prb->ulForeColor);
  374. M64_OD(pjMmBase, DP_BKGD_CLR, rbc.prb->ulBackColor);
  375. M64_OD(pjMmBase, PAT_REG0, rbc.prb->aulPattern[0]);
  376. M64_OD(pjMmBase, PAT_REG1, rbc.prb->aulPattern[1]);
  377. while(TRUE)
  378. {
  379. xLeft = prcl->left;
  380. yTop = prcl->top;
  381. M64_OD(pjMmBase, DST_Y_X, PACKXY(xLeft + xOffset,
  382. yTop + yOffset));
  383. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY(prcl->right - xLeft,
  384. prcl->bottom - prcl->top));
  385. if (--c == 0)
  386. break;
  387. prcl++;
  388. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  389. }
  390. }
  391. VOID vM64FillPatMonochrome24_2( // Type FNFILL
  392. PDEV* ppdev,
  393. LONG c, // Can't be zero
  394. RECTL* prcl, // List of rectangles to be filled, in relative
  395. // coordinates
  396. ULONG rop4, // rop4
  397. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  398. POINTL* pptlBrush) // Pattern alignment
  399. {
  400. BYTE* pjMmBase;
  401. LONG xOffset;
  402. LONG yOffset;
  403. BYTE* pjSrc;
  404. BYTE* pjDst;
  405. LONG xPattern;
  406. LONG yPattern;
  407. LONG iLeftShift;
  408. LONG iRightShift;
  409. LONG xOld;
  410. LONG yOld;
  411. LONG i;
  412. BYTE j;
  413. ULONG ulHwForeMix;
  414. ULONG ulHwBackMix;
  415. LONG xLeft;
  416. LONG yTop;
  417. ULONG aulTmp[2];
  418. LONG x;
  419. pjMmBase = ppdev->pjMmBase;
  420. xOffset = ppdev->xOffset;
  421. yOffset = ppdev->yOffset;
  422. xPattern = (pptlBrush->x + xOffset) & 7;
  423. yPattern = (pptlBrush->y + yOffset) & 7;
  424. // If the alignment isn't correct, we'll have to change it:
  425. if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
  426. {
  427. // Remember that we've changed the alignment on our cached brush:
  428. xOld = rbc.prb->ptlBrush.x;
  429. yOld = rbc.prb->ptlBrush.y;
  430. rbc.prb->ptlBrush.x = xPattern;
  431. rbc.prb->ptlBrush.y = yPattern;
  432. // Now do the alignment:
  433. yPattern = (yOld - yPattern);
  434. iRightShift = (xPattern - xOld) & 7;
  435. iLeftShift = 8 - iRightShift;
  436. pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
  437. pjDst = (BYTE*) &aulTmp[0];
  438. for (i = 0; i < 8; i++)
  439. {
  440. j = *(pjSrc + (yPattern++ & 7));
  441. *pjDst++ = (j << iLeftShift) | (j >> iRightShift);
  442. }
  443. rbc.prb->aulPattern[0] = aulTmp[0];
  444. rbc.prb->aulPattern[1] = aulTmp[1];
  445. }
  446. ulHwForeMix = gaul64HwMixFromRop2[(rop4 >> 2) & 0xf];
  447. ulHwBackMix = ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE : (ulHwForeMix >> 16);
  448. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 13);
  449. M64_OD(pjMmBase, PAT_CNTL, PAT_CNTL_MonoEna);
  450. M64_OD(pjMmBase, DP_SRC, DP_SRC_MonoPattern | DP_SRC_FrgdClr << 8);
  451. M64_OD(pjMmBase, DP_MIX, ulHwBackMix | ulHwForeMix);
  452. M64_OD(pjMmBase, DP_FRGD_CLR, rbc.prb->ulForeColor);
  453. M64_OD(pjMmBase, DP_BKGD_CLR, rbc.prb->ulBackColor);
  454. M64_OD(pjMmBase, PAT_REG0, rbc.prb->aulPattern[0]);
  455. M64_OD(pjMmBase, PAT_REG1, rbc.prb->aulPattern[1]);
  456. // You must turn off DP_BYTE_PIX_ORDER, or else the pattern is incorrectly
  457. // aligned. This took a long time to figure out.
  458. M64_OD(pjMmBase, DP_PIX_WIDTH, 0x00000202);
  459. while(TRUE)
  460. {
  461. xLeft = prcl->left;
  462. yTop = prcl->top;
  463. x = (xLeft + xOffset) * 3;
  464. M64_OD(pjMmBase, DST_CNTL, 0x83 | ((x/4 % 6) << 8));
  465. M64_OD(pjMmBase, DST_Y_X, PACKXY(x,
  466. yTop + yOffset));
  467. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY((prcl->right - xLeft) * 3,
  468. prcl->bottom - prcl->top));
  469. if (--c == 0)
  470. break;
  471. prcl++;
  472. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 3);
  473. }
  474. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 2);
  475. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  476. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  477. }
  478. /******************************Public*Routine******************************\
  479. * VOID vM64FillPatColor
  480. *
  481. * This routine uses the pattern hardware to draw a patterned list of
  482. * rectangles.
  483. *
  484. * See Blt_DS_PCOL_ENG_8G_D0 and Blt_DS_PCOL_ENG_8G_D1.
  485. *
  486. \**************************************************************************/
  487. VOID vM64FillPatColor2( // Type FNFILL
  488. PDEV* ppdev,
  489. LONG c, // Can't be zero
  490. RECTL* prcl, // List of rectangles to be filled, in relative
  491. // coordinates
  492. ULONG rop4, // rop4
  493. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  494. POINTL* pptlBrush) // Pattern alignment
  495. {
  496. BRUSHENTRY* pbe;
  497. BYTE* pjMmBase;
  498. LONG xOffset;
  499. LONG yOffset;
  500. LONG xLeft;
  501. LONG yTop;
  502. LONG dx, dy;
  503. pjMmBase = ppdev->pjMmBase;
  504. xOffset = ppdev->xOffset;
  505. yOffset = ppdev->yOffset;
  506. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 8);
  507. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  508. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_PatEna);
  509. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  510. M64_OD(pjMmBase, SRC_HEIGHT1_WIDTH1, PACKXY(8, 8));
  511. while (TRUE)
  512. {
  513. xLeft = prcl->left;
  514. yTop = prcl->top;
  515. // Offsets into brush:
  516. dx = (xLeft - pptlBrush->x) & 7;
  517. dy = (yTop - pptlBrush->y) & 7;
  518. // See if the brush has already been put into off-screen memory.
  519. // Also, if pre-rotation is required:
  520. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  521. if ((pbe == NULL) || (pbe->prbVerify != rbc.prb) || dx || dy)
  522. {
  523. BYTE* pjBrush;
  524. BYTE* pjDst;
  525. LONG cx, cx1, cx2, iBrushCache, k;
  526. // We have to allocate a new off-screen cache brush entry for
  527. // the brush:
  528. iBrushCache = ppdev->iBrushCache;
  529. pbe = &ppdev->abe[iBrushCache];
  530. iBrushCache = (iBrushCache + 1) & (TOTAL_BRUSH_COUNT - 1);
  531. ppdev->iBrushCache = iBrushCache;
  532. // Update our links:
  533. pbe->prbVerify = rbc.prb;
  534. rbc.prb->apbe[IBOARD(ppdev)] = pbe;
  535. // Pre-rotate...
  536. cx = 8 * ppdev->cjPelSize;
  537. cx1 = dx * ppdev->cjPelSize;
  538. cx2 = (8 - dx) * ppdev->cjPelSize;
  539. pjBrush = (BYTE*) &rbc.prb->aulPattern[0] + dy * cx;
  540. pjDst = ppdev->pjScreen + pbe->y * ppdev->lDelta
  541. + pbe->x * ppdev->cjPelSize;
  542. vM64QuietDown(ppdev, pjMmBase);
  543. for (k = 0; k < 8; k++)
  544. {
  545. if (k == 8 - dy)
  546. pjBrush = (BYTE*) &rbc.prb->aulPattern[0];
  547. memcpy( pjDst, pjBrush + cx1, cx2 );
  548. memcpy( pjDst + cx2, pjBrush, cx1 );
  549. pjDst += cx;
  550. pjBrush += cx;
  551. }
  552. if (dx || dy)
  553. pbe->prbVerify = NULL;
  554. }
  555. // N.B. - SRC_Y_X may have a constant value, but don't pull it
  556. // out of this while loop. On the GX, SRC_OFF_PITCH causes the
  557. // engine to recalculate SRC_Y_X, so the SRC_Y_X write must follow
  558. // SRC_OFF_PITCH.
  559. M64_OD(pjMmBase, SRC_OFF_PITCH, pbe->ulOffsetPitch);
  560. M64_OD(pjMmBase, SRC_Y_X, 0);
  561. M64_OD(pjMmBase, DST_Y_X, PACKXY(xLeft + xOffset,
  562. yTop + yOffset));
  563. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY(prcl->right - prcl->left,
  564. prcl->bottom - prcl->top));
  565. if (--c == 0)
  566. break;
  567. prcl++;
  568. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  569. }
  570. }
  571. VOID vM64FillPatColor24_2( // Type FNFILL
  572. PDEV* ppdev,
  573. LONG c, // Can't be zero
  574. RECTL* prcl, // List of rectangles to be filled, in relative
  575. // coordinates
  576. ULONG rop4, // rop4
  577. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  578. POINTL* pptlBrush) // Pattern alignment
  579. {
  580. BRUSHENTRY* pbe;
  581. BYTE* pjMmBase;
  582. LONG xOffset;
  583. LONG yOffset;
  584. LONG xLeft;
  585. LONG yTop;
  586. LONG dx, dy;
  587. pjMmBase = ppdev->pjMmBase;
  588. xOffset = ppdev->xOffset;
  589. yOffset = ppdev->yOffset;
  590. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 8);
  591. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[(rop4 >> 2) & 0xf]);
  592. M64_OD(pjMmBase, SRC_CNTL, SRC_CNTL_PatEna);
  593. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  594. M64_OD(pjMmBase, SRC_HEIGHT1_WIDTH1, PACKXY(24, 8));
  595. while (TRUE)
  596. {
  597. xLeft = prcl->left;
  598. yTop = prcl->top;
  599. // Offsets into brush:
  600. dx = (xLeft - pptlBrush->x) & 7;
  601. dy = (yTop - pptlBrush->y) & 7;
  602. // See if the brush has already been put into off-screen memory.
  603. // Also, if pre-rotation is required:
  604. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  605. if ((pbe == NULL) || (pbe->prbVerify != rbc.prb) || dx || dy)
  606. {
  607. BYTE* pjBrush;
  608. BYTE* pjDst;
  609. LONG cx, cx1, cx2, iBrushCache, k;
  610. // We have to allocate a new off-screen cache brush entry for
  611. // the brush:
  612. iBrushCache = ppdev->iBrushCache;
  613. pbe = &ppdev->abe[iBrushCache];
  614. iBrushCache = (iBrushCache + 1) & (TOTAL_BRUSH_COUNT - 1);
  615. ppdev->iBrushCache = iBrushCache;
  616. // Update our links:
  617. pbe->prbVerify = rbc.prb;
  618. rbc.prb->apbe[IBOARD(ppdev)] = pbe;
  619. // Pre-rotate...
  620. cx = 8 * ppdev->cjPelSize;
  621. cx1 = dx * ppdev->cjPelSize;
  622. cx2 = (8 - dx) * ppdev->cjPelSize;
  623. pjBrush = (BYTE*) &rbc.prb->aulPattern[0] + dy * cx;
  624. pjDst = ppdev->pjScreen + pbe->y * ppdev->lDelta
  625. + pbe->x * ppdev->cjPelSize;
  626. vM64QuietDown(ppdev, pjMmBase);
  627. for (k = 0; k < 8; k++)
  628. {
  629. if (k == 8 - dy)
  630. pjBrush = (BYTE*) &rbc.prb->aulPattern[0];
  631. memcpy( pjDst, pjBrush + cx1, cx2 );
  632. memcpy( pjDst + cx2, pjBrush, cx1 );
  633. pjDst += cx;
  634. pjBrush += cx;
  635. }
  636. if (dx || dy)
  637. pbe->prbVerify = NULL;
  638. }
  639. M64_OD(pjMmBase, SRC_OFF_PITCH, pbe->ulOffsetPitch);
  640. M64_OD(pjMmBase, SRC_Y_X, 0);
  641. M64_OD(pjMmBase, DST_Y_X, PACKXY((xLeft + xOffset) * 3,
  642. yTop + yOffset));
  643. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY((prcl->right - prcl->left) * 3,
  644. prcl->bottom - prcl->top));
  645. if (--c == 0)
  646. break;
  647. prcl++;
  648. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  649. }
  650. }
  651. /******************************Public*Routine******************************\
  652. * VOID vM64Xfer1bpp
  653. *
  654. * This routine colour expands a monochrome bitmap.
  655. *
  656. * See Blt_DS_S1_8G_D0 and Blt_DS_8G_D1.
  657. *
  658. \**************************************************************************/
  659. VOID vM64Xfer1bpp2( // Type FNXFER
  660. PDEV* ppdev,
  661. LONG c, // Count of rectangles, can't be zero
  662. RECTL* prcl, // List of destination rectangles, in relative
  663. // coordinates
  664. ROP4 rop4, // rop4
  665. SURFOBJ* psoSrc, // Source surface
  666. POINTL* pptlSrc, // Original unclipped source point
  667. RECTL* prclDst, // Original unclipped destination rectangle
  668. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  669. {
  670. BYTE* pjMmBase;
  671. LONG xOffset;
  672. LONG yOffset;
  673. ULONG* pulXlate;
  674. ULONG ulHwForeMix;
  675. LONG dx;
  676. LONG dy;
  677. LONG lSrcDelta;
  678. BYTE* pjSrcScan0;
  679. LONG xLeft;
  680. LONG xRight;
  681. LONG yTop;
  682. LONG cy;
  683. LONG cx;
  684. LONG xBias;
  685. LONG culScan;
  686. LONG lSrcSkip;
  687. ULONG* pulSrc;
  688. LONG i;
  689. ULONG ulFifo;
  690. ASSERTDD(c > 0, "Can't handle zero rectangles");
  691. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  692. "Expect only a rop2");
  693. pjMmBase = ppdev->pjMmBase;
  694. xOffset = ppdev->xOffset;
  695. yOffset = ppdev->yOffset;
  696. ulFifo = 0;
  697. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  698. pulXlate = pxlo->pulXlate;
  699. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 7, ulFifo);
  700. M64_OD(pjMmBase, DP_BKGD_CLR, pulXlate[0]);
  701. M64_OD(pjMmBase, DP_FRGD_CLR, pulXlate[1]);
  702. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  703. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 16) | (DP_SRC_FrgdClr << 8) |
  704. (DP_SRC_BkgdClr));
  705. dx = pptlSrc->x - prclDst->left;
  706. dy = pptlSrc->y - prclDst->top;
  707. lSrcDelta = psoSrc->lDelta;
  708. pjSrcScan0 = psoSrc->pvScan0;
  709. while (TRUE)
  710. {
  711. xLeft = prcl->left;
  712. xRight = prcl->right;
  713. // The Mach64 'bit packs' monochrome transfers, but GDI gives
  714. // us monochrome bitmaps whose scans are always dword aligned.
  715. // Consequently, we use the Mach64's clip registers to make
  716. // our transfers a multiple of 32 to match the dword alignment:
  717. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset,
  718. xRight + xOffset - 1));
  719. yTop = prcl->top;
  720. cy = prcl->bottom - yTop;
  721. xBias = (xLeft + dx) & 31; // Floor
  722. xLeft -= xBias;
  723. cx = (xRight - xLeft + 31) & ~31; // Ceiling
  724. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset,
  725. yTop + yOffset));
  726. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  727. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  728. + ((xLeft + dx) >> 3));
  729. culScan = cx >> 5;
  730. lSrcSkip = lSrcDelta - (culScan << 2);
  731. ASSERTDD(((DWORD) pulSrc & 3) == 0, "Source should be dword aligned");
  732. do {
  733. i = culScan;
  734. do {
  735. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  736. M64_OD(pjMmBase, HOST_DATA0, *pulSrc);
  737. pulSrc++;
  738. } while (--i != 0);
  739. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
  740. } while (--cy != 0);
  741. if (--c == 0)
  742. break;
  743. prcl++;
  744. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 3, ulFifo);
  745. }
  746. // Don't forget to reset the clip register:
  747. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  748. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  749. }
  750. VOID vM64DataPortOutD_24bppmono(PDEV* ppdev, PBYTE pb, UINT count, LONG pitch);
  751. VOID vM64Xfer1bppto24_2( // Type FNXFER
  752. PDEV* ppdev,
  753. LONG c, // Count of rectangles, can't be zero
  754. RECTL* prcl, // List of destination rectangles, in relative
  755. // coordinates
  756. ROP4 rop4, // rop4
  757. SURFOBJ* psoSrc, // Source surface
  758. POINTL* pptlSrc, // Original unclipped source point
  759. RECTL* prclDst, // Original unclipped destination rectangle
  760. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  761. {
  762. BYTE* pjMmBase;
  763. LONG xOffset;
  764. LONG yOffset;
  765. ULONG* pulXlate;
  766. ULONG ulHwForeMix;
  767. LONG dx;
  768. LONG dy;
  769. LONG lSrcDelta;
  770. BYTE* pjSrcScan0;
  771. LONG xLeft;
  772. LONG xRight;
  773. LONG yTop;
  774. LONG cy;
  775. LONG cx;
  776. LONG xBias;
  777. ULONG* pulSrc;
  778. ULONG ulFifo;
  779. ASSERTDD(c > 0, "Can't handle zero rectangles");
  780. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  781. "Expect only a rop2");
  782. pjMmBase = ppdev->pjMmBase;
  783. xOffset = ppdev->xOffset;
  784. yOffset = ppdev->yOffset;
  785. ulFifo = 0;
  786. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  787. pulXlate = pxlo->pulXlate;
  788. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 8, ulFifo);
  789. M64_OD(pjMmBase, DP_BKGD_CLR, pulXlate[0]);
  790. M64_OD(pjMmBase, DP_FRGD_CLR, pulXlate[1]);
  791. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  792. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 16) | (DP_SRC_FrgdClr << 8) |
  793. (DP_SRC_BkgdClr));
  794. dx = pptlSrc->x - prclDst->left;
  795. dy = pptlSrc->y - prclDst->top;
  796. lSrcDelta = psoSrc->lDelta;
  797. pjSrcScan0 = psoSrc->pvScan0;
  798. while (TRUE)
  799. {
  800. xLeft = prcl->left;
  801. xRight = prcl->right;
  802. // The Mach64 'bit packs' monochrome transfers, but GDI gives
  803. // us monochrome bitmaps whose scans are always dword aligned.
  804. // Consequently, we use the Mach64's clip registers to make
  805. // our transfers a multiple of 32 to match the dword alignment:
  806. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR((xLeft + xOffset)*3,
  807. (xRight + xOffset)*3 - 1));
  808. yTop = prcl->top;
  809. cy = prcl->bottom - yTop;
  810. xBias = (xLeft + dx) & 31; // Floor
  811. xLeft -= xBias;
  812. cx = ((xRight - xLeft)*3 + 31) & ~31; // Ceiling
  813. M64_OD(pjMmBase, DST_CNTL, 0x83 | (((xLeft + xOffset + MAX_NEGX)*3/4 % 6) << 8));
  814. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST((xLeft + xOffset)*3,
  815. yTop + yOffset));
  816. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  817. pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
  818. + ((xLeft + dx) >> 3));
  819. do {
  820. vM64DataPortOutD_24bppmono(ppdev, (PBYTE) pulSrc, cx >> 5, 0);
  821. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcDelta);
  822. } while (--cy != 0);
  823. if (--c == 0)
  824. break;
  825. prcl++;
  826. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  827. }
  828. // Don't forget to reset the clip register:
  829. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  830. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  831. }
  832. /******************************Public*Routine******************************\
  833. * VOID vM64XferNative
  834. *
  835. * Transfers a bitmap that is the same colour depth as the display to
  836. * the screen via the data transfer register, with no translation.
  837. *
  838. \**************************************************************************/
  839. VOID vM64XferNative2( // Type FNXFER
  840. PDEV* ppdev,
  841. LONG c, // Count of rectangles, can't be zero
  842. RECTL* prcl, // Array of relative coordinates destination rectangles
  843. ULONG rop4, // rop4
  844. SURFOBJ* psoSrc, // Source surface
  845. POINTL* pptlSrc, // Original unclipped source point
  846. RECTL* prclDst, // Original unclipped destination rectangle
  847. XLATEOBJ* pxlo) // Not used
  848. {
  849. BYTE* pjMmBase;
  850. LONG xOffset;
  851. LONG yOffset;
  852. ULONG ulHwForeMix;
  853. LONG dx;
  854. LONG dy;
  855. LONG lSrcDelta;
  856. BYTE* pjSrcScan0;
  857. LONG xLeft;
  858. LONG xRight;
  859. LONG yTop;
  860. LONG cy;
  861. LONG cx;
  862. LONG xBias;
  863. ULONG* pulSrc;
  864. ULONG culScan;
  865. LONG lSrcSkip;
  866. LONG i;
  867. ULONG ulFifo;
  868. ASSERTDD(c > 0, "Can't handle zero rectangles");
  869. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  870. "Expect only a rop2");
  871. pjMmBase = ppdev->pjMmBase;
  872. xOffset = ppdev->xOffset;
  873. yOffset = ppdev->yOffset;
  874. ulFifo = 0;
  875. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  876. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 6, ulFifo);
  877. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  878. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  879. //
  880. // The host data pixel width is the same as that of the screen:
  881. //
  882. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  883. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  884. dx = (pptlSrc->x - prclDst->left) << ppdev->cPelSize; // Bytes
  885. dy = pptlSrc->y - prclDst->top;
  886. lSrcDelta = psoSrc->lDelta;
  887. pjSrcScan0 = psoSrc->pvScan0;
  888. while (TRUE)
  889. {
  890. xLeft = prcl->left;
  891. xRight = prcl->right;
  892. yTop = prcl->top;
  893. cy = prcl->bottom - yTop;
  894. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset, xRight + xOffset - 1));
  895. //
  896. // Convert pixels to bytes.
  897. //
  898. xLeft <<= ppdev->cPelSize;
  899. xRight <<= ppdev->cPelSize;
  900. //
  901. // We compute 'xBias' in order to dword-align the source pointer.
  902. // This way, we don't have to do unaligned reads of the source,
  903. // and we're guaranteed not to read even a byte past the end of
  904. // the bitmap.
  905. //
  906. xBias = (xLeft + dx) & 3; // Floor (bytes)
  907. xLeft -= xBias; // Bytes
  908. cx = (xRight - xLeft + 3) & ~3; // Ceiling (bytes)
  909. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST((xLeft >> ppdev->cPelSize) + xOffset, yTop + yOffset));
  910. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx >> ppdev->cPelSize, cy));
  911. pulSrc = (PULONG)(pjSrcScan0 + (yTop + dy) * lSrcDelta + xLeft + dx);
  912. culScan = cx >> 2; // Dwords
  913. lSrcSkip = lSrcDelta - cx; // Bytes
  914. ASSERTDD(((DWORD) pulSrc & 3) == 0, "Source should be dword aligned");
  915. if (culScan && cy)
  916. {
  917. do
  918. {
  919. i = culScan;
  920. do
  921. {
  922. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  923. M64_OD(pjMmBase, HOST_DATA0, *pulSrc);
  924. pulSrc++;
  925. } while (--i != 0);
  926. pulSrc = (PULONG)((BYTE*)pulSrc + lSrcSkip);
  927. } while (--cy != 0);
  928. }
  929. if (--c == 0)
  930. break;
  931. prcl++;
  932. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  933. }
  934. // Don't forget to reset the clip register and the default pixel width:
  935. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 2, ulFifo);
  936. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  937. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  938. }
  939. VOID vM64XferNative24_2( // Type FNXFER
  940. PDEV* ppdev,
  941. LONG c, // Count of rectangles, can't be zero
  942. RECTL* prcl, // Array of relative coordinates destination rectangles
  943. ULONG rop4, // rop4
  944. SURFOBJ* psoSrc, // Source surface
  945. POINTL* pptlSrc, // Original unclipped source point
  946. RECTL* prclDst, // Original unclipped destination rectangle
  947. XLATEOBJ* pxlo) // Not used
  948. {
  949. BYTE* pjMmBase;
  950. LONG xOffset;
  951. LONG yOffset;
  952. ULONG ulHwForeMix;
  953. LONG dx;
  954. LONG dy;
  955. LONG lSrcDelta;
  956. BYTE* pjSrcScan0;
  957. LONG xLeft;
  958. LONG xRight;
  959. LONG yTop;
  960. LONG cy;
  961. LONG cx;
  962. LONG xBias;
  963. ULONG* pulSrc;
  964. ULONG culScan;
  965. LONG lSrcSkip;
  966. LONG i;
  967. ULONG ulFifo;
  968. ASSERTDD(c > 0, "Can't handle zero rectangles");
  969. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  970. "Expect only a rop2");
  971. pjMmBase = ppdev->pjMmBase;
  972. xOffset = ppdev->xOffset * 3;
  973. yOffset = ppdev->yOffset;
  974. ulFifo = 0;
  975. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  976. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 6, ulFifo);
  977. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  978. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  979. // The host data pixel width is the same as that of the screen:
  980. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  981. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  982. dx = (pptlSrc->x - prclDst->left) * 3; // Bytes
  983. dy = pptlSrc->y - prclDst->top;
  984. lSrcDelta = psoSrc->lDelta;
  985. pjSrcScan0 = psoSrc->pvScan0;
  986. while (TRUE)
  987. {
  988. xLeft = prcl->left * 3;
  989. xRight = prcl->right * 3;
  990. yTop = prcl->top;
  991. cy = prcl->bottom - yTop;
  992. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset, xRight + xOffset - 1));
  993. //
  994. // We compute 'xBias' in order to dword-align the source pointer.
  995. // This way, we don't have to do unaligned reads of the source,
  996. // and we're guaranteed not to read even a byte past the end of
  997. // the bitmap.
  998. //
  999. xBias = (xLeft + dx) & 3; // Floor (bytes)
  1000. xLeft -= xBias; // Bytes
  1001. cx = (xRight - xLeft + 3) & ~3; // Ceiling (bytes)
  1002. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset, yTop + yOffset));
  1003. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1004. pulSrc = (PULONG)(pjSrcScan0 + (yTop + dy) * lSrcDelta + xLeft + dx);
  1005. culScan = cx >> 2; // Dwords
  1006. lSrcSkip = lSrcDelta - cx; // Bytes
  1007. ASSERTDD(((DWORD) pulSrc & 3) == 0, "Source should be dword aligned");
  1008. if (culScan && cy)
  1009. {
  1010. do
  1011. {
  1012. i = culScan;
  1013. do
  1014. {
  1015. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1016. M64_OD(pjMmBase, HOST_DATA0, *pulSrc);
  1017. pulSrc++;
  1018. } while (--i != 0);
  1019. pulSrc = (PULONG)((BYTE*)pulSrc + lSrcSkip);
  1020. } while (--cy != 0);
  1021. }
  1022. if (--c == 0)
  1023. break;
  1024. prcl++;
  1025. M64_FAST_FIFO_CHECK(ppdev, pjMmBase, 3, ulFifo);
  1026. }
  1027. // Don't forget to reset the clip register and the default pixel width:
  1028. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 2, ulFifo);
  1029. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  1030. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1031. }
  1032. /******************************Public*Routine******************************\
  1033. * VOID vM64Xfer4bpp
  1034. *
  1035. * Does a 4bpp transfer from a bitmap to the screen.
  1036. *
  1037. * The reason we implement this is that a lot of resources are kept as 4bpp,
  1038. * and used to initialize DFBs, some of which we of course keep off-screen.
  1039. *
  1040. \**************************************************************************/
  1041. VOID vM64Xfer4bpp2( // Type FNXFER
  1042. PDEV* ppdev,
  1043. LONG c, // Count of rectangles, can't be zero
  1044. RECTL* prcl, // List of destination rectangles, in relative
  1045. // coordinates
  1046. ULONG rop4, // Rop4
  1047. SURFOBJ* psoSrc, // Source surface
  1048. POINTL* pptlSrc, // Original unclipped source point
  1049. RECTL* prclDst, // Original unclipped destination rectangle
  1050. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  1051. {
  1052. BYTE* pjMmBase;
  1053. LONG xOffset;
  1054. LONG yOffset;
  1055. LONG cjPelSize;
  1056. ULONG ulHwForeMix;
  1057. LONG xLeft;
  1058. LONG xRight;
  1059. LONG yTop;
  1060. LONG xBias;
  1061. LONG dx;
  1062. LONG dy;
  1063. LONG cx;
  1064. LONG cy;
  1065. LONG lSrcDelta;
  1066. BYTE* pjSrcScan0;
  1067. BYTE* pjSrc;
  1068. BYTE jSrc;
  1069. ULONG* pulXlate;
  1070. LONG i;
  1071. ULONG ul;
  1072. LONG cjSrc;
  1073. LONG cwSrc;
  1074. LONG lSrcSkip;
  1075. ULONG ulFifo;
  1076. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  1077. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1078. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  1079. pjMmBase = ppdev->pjMmBase;
  1080. xOffset = ppdev->xOffset;
  1081. yOffset = ppdev->yOffset;
  1082. cjPelSize = ppdev->cjPelSize;
  1083. pulXlate = pxlo->pulXlate;
  1084. ulFifo = 0;
  1085. dx = pptlSrc->x - prclDst->left;
  1086. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1087. lSrcDelta = psoSrc->lDelta;
  1088. pjSrcScan0 = psoSrc->pvScan0;
  1089. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  1090. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 6, ulFifo);
  1091. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  1092. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  1093. // The host data pixel width is the same as that of the screen:
  1094. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  1095. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  1096. while(TRUE)
  1097. {
  1098. xLeft = prcl->left;
  1099. xRight = prcl->right;
  1100. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset,
  1101. xRight + xOffset - 1));
  1102. yTop = prcl->top;
  1103. cy = prcl->bottom - yTop;
  1104. // We compute 'xBias' in order to dword-align the source pointer.
  1105. // This way, we don't have to do unaligned reads of the source,
  1106. // and we're guaranteed not to read even a byte past the end of
  1107. // the bitmap.
  1108. //
  1109. // Note that this bias works at 24bpp, too:
  1110. xBias = (xLeft + dx) & 3; // Floor
  1111. xLeft -= xBias;
  1112. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  1113. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset,
  1114. yTop + yOffset));
  1115. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1116. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  1117. + ((xLeft + dx) >> 1);
  1118. cjSrc = cx >> 1; // Number of source bytes touched
  1119. lSrcSkip = lSrcDelta - cjSrc;
  1120. if (cjPelSize == 1)
  1121. {
  1122. // This part handles 8bpp output:
  1123. cwSrc = (cjSrc >> 1); // Number of whole source words
  1124. do {
  1125. for (i = cwSrc; i != 0; i--)
  1126. {
  1127. jSrc = *pjSrc++;
  1128. ul = (pulXlate[jSrc >> 4]);
  1129. ul |= (pulXlate[jSrc & 0xf] << 8);
  1130. jSrc = *pjSrc++;
  1131. ul |= (pulXlate[jSrc >> 4] << 16);
  1132. ul |= (pulXlate[jSrc & 0xf] << 24);
  1133. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1134. M64_OD(pjMmBase, HOST_DATA0, ul);
  1135. }
  1136. // Handle an odd end byte, if there is one:
  1137. if (cjSrc & 1)
  1138. {
  1139. jSrc = *pjSrc++;
  1140. ul = (pulXlate[jSrc >> 4]);
  1141. ul |= (pulXlate[jSrc & 0xf] << 8);
  1142. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1143. M64_OD(pjMmBase, HOST_DATA0, ul);
  1144. }
  1145. pjSrc += lSrcSkip;
  1146. } while (--cy != 0);
  1147. }
  1148. else if (cjPelSize == 2)
  1149. {
  1150. // This part handles 16bpp output:
  1151. do {
  1152. i = cjSrc;
  1153. do {
  1154. jSrc = *pjSrc++;
  1155. ul = (pulXlate[jSrc >> 4]);
  1156. ul |= (pulXlate[jSrc & 0xf] << 16);
  1157. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1158. M64_OD(pjMmBase, HOST_DATA0, ul);
  1159. } while (--i != 0);
  1160. pjSrc += lSrcSkip;
  1161. } while (--cy != 0);
  1162. }
  1163. else
  1164. {
  1165. // This part handles 32bpp output:
  1166. do {
  1167. i = cjSrc;
  1168. do {
  1169. jSrc = *pjSrc++;
  1170. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 2, ulFifo);
  1171. M64_OD(pjMmBase, HOST_DATA0, pulXlate[jSrc >> 4]);
  1172. M64_OD(pjMmBase, HOST_DATA0, pulXlate[jSrc & 0xf]);
  1173. } while (--i != 0);
  1174. pjSrc += lSrcSkip;
  1175. } while (--cy != 0);
  1176. }
  1177. if (--c == 0)
  1178. break;
  1179. prcl++;
  1180. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 3, ulFifo);
  1181. }
  1182. // Don't forget to reset the clip register and the default pixel width:
  1183. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 2, ulFifo);
  1184. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  1185. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1186. }
  1187. /******************************Public*Routine******************************\
  1188. * VOID vM64Xfer8bpp
  1189. *
  1190. * Does a 8bpp transfer from a bitmap to the screen.
  1191. *
  1192. * The reason we implement this is that a lot of resources are kept as 8bpp,
  1193. * and used to initialize DFBs, some of which we of course keep off-screen.
  1194. *
  1195. \**************************************************************************/
  1196. VOID vM64Xfer8bpp2( // Type FNXFER
  1197. PDEV* ppdev,
  1198. LONG c, // Count of rectangles, can't be zero
  1199. RECTL* prcl, // List of destination rectangles, in relative
  1200. // coordinates
  1201. ULONG rop4, // Rop4
  1202. SURFOBJ* psoSrc, // Source surface
  1203. POINTL* pptlSrc, // Original unclipped source point
  1204. RECTL* prclDst, // Original unclipped destination rectangle
  1205. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  1206. {
  1207. BYTE* pjMmBase;
  1208. LONG xOffset;
  1209. LONG yOffset;
  1210. LONG cjPelSize;
  1211. ULONG ulHwForeMix;
  1212. LONG xLeft;
  1213. LONG xRight;
  1214. LONG yTop;
  1215. LONG xBias;
  1216. LONG dx;
  1217. LONG dy;
  1218. LONG cx;
  1219. LONG cy;
  1220. LONG lSrcDelta;
  1221. BYTE* pjSrcScan0;
  1222. BYTE* pjSrc;
  1223. ULONG* pulXlate;
  1224. LONG i;
  1225. ULONG ul;
  1226. LONG cdSrc;
  1227. LONG cwSrc;
  1228. LONG cxRem;
  1229. LONG lSrcSkip;
  1230. ULONG ulFifo;
  1231. ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp");
  1232. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1233. ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
  1234. pjMmBase = ppdev->pjMmBase;
  1235. xOffset = ppdev->xOffset;
  1236. yOffset = ppdev->yOffset;
  1237. cjPelSize = ppdev->cjPelSize;
  1238. pulXlate = pxlo->pulXlate;
  1239. ulFifo = 0;
  1240. dx = pptlSrc->x - prclDst->left;
  1241. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1242. lSrcDelta = psoSrc->lDelta;
  1243. pjSrcScan0 = psoSrc->pvScan0;
  1244. ulHwForeMix = gaul64HwMixFromRop2[rop4 & 0xf];
  1245. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 6, ulFifo);
  1246. M64_OD(pjMmBase, DP_MIX, ulHwForeMix | (ulHwForeMix >> 16));
  1247. M64_OD(pjMmBase, DP_SRC, (DP_SRC_Host << 8));
  1248. // The host data pixel width is the same as that of the screen:
  1249. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth |
  1250. ((ppdev->ulMonoPixelWidth & 0xf) << 16));
  1251. while(TRUE)
  1252. {
  1253. xLeft = prcl->left;
  1254. xRight = prcl->right;
  1255. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(xLeft + xOffset,
  1256. xRight + xOffset - 1));
  1257. yTop = prcl->top;
  1258. cy = prcl->bottom - yTop;
  1259. // We compute 'xBias' in order to dword-align the source pointer.
  1260. // This way, we don't have to do unaligned reads of the source,
  1261. // and we're guaranteed not to read even a byte past the end of
  1262. // the bitmap.
  1263. //
  1264. // Note that this bias works at 24bpp, too:
  1265. xBias = (xLeft + dx) & 3; // Floor
  1266. xLeft -= xBias;
  1267. cx = (xRight - xLeft + 3) & ~3; // Ceiling
  1268. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft + xOffset,
  1269. yTop + yOffset));
  1270. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1271. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
  1272. + (xLeft + dx);
  1273. lSrcSkip = lSrcDelta - cx;
  1274. if (cjPelSize == 1)
  1275. {
  1276. // This part handles 8bpp output:
  1277. cdSrc = (cx >> 2);
  1278. cxRem = (cx & 3);
  1279. do {
  1280. for (i = cdSrc; i != 0; i--)
  1281. {
  1282. ul = (pulXlate[*pjSrc++]);
  1283. ul |= (pulXlate[*pjSrc++] << 8);
  1284. ul |= (pulXlate[*pjSrc++] << 16);
  1285. ul |= (pulXlate[*pjSrc++] << 24);
  1286. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1287. M64_OD(pjMmBase, HOST_DATA0, ul);
  1288. }
  1289. if (cxRem > 0)
  1290. {
  1291. ul = (pulXlate[*pjSrc++]);
  1292. if (cxRem > 1)
  1293. {
  1294. ul |= (pulXlate[*pjSrc++] << 8);
  1295. if (cxRem > 2)
  1296. {
  1297. ul |= (pulXlate[*pjSrc++] << 16);
  1298. }
  1299. }
  1300. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1301. M64_OD(pjMmBase, HOST_DATA0, ul);
  1302. }
  1303. pjSrc += lSrcSkip;
  1304. } while (--cy != 0);
  1305. }
  1306. else if (cjPelSize == 2)
  1307. {
  1308. // This part handles 16bpp output:
  1309. cwSrc = (cx >> 1);
  1310. cxRem = (cx & 1);
  1311. do {
  1312. for (i = cwSrc; i != 0; i--)
  1313. {
  1314. ul = (pulXlate[*pjSrc++]);
  1315. ul |= (pulXlate[*pjSrc++] << 16);
  1316. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1317. M64_OD(pjMmBase, HOST_DATA0, ul);
  1318. }
  1319. if (cxRem > 0)
  1320. {
  1321. ul = (pulXlate[*pjSrc++]);
  1322. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1323. M64_OD(pjMmBase, HOST_DATA0, ul);
  1324. }
  1325. pjSrc += lSrcSkip;
  1326. } while (--cy != 0);
  1327. }
  1328. else
  1329. {
  1330. // This part handles 32bpp output:
  1331. do {
  1332. i = cx;
  1333. do {
  1334. ul = pulXlate[*pjSrc++];
  1335. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 1, ulFifo);
  1336. M64_OD(pjMmBase, HOST_DATA0, ul);
  1337. } while (--i != 0);
  1338. pjSrc += lSrcSkip;
  1339. } while (--cy != 0);
  1340. }
  1341. if (--c == 0)
  1342. break;
  1343. prcl++;
  1344. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 3, ulFifo);
  1345. }
  1346. // Don't forget to reset the clip register and the default pixel width:
  1347. M64_FAST_GUI_CHECK(ppdev, pjMmBase, 2, ulFifo);
  1348. M64_OD(pjMmBase, DP_PIX_WIDTH, ppdev->ulMonoPixelWidth);
  1349. M64_OD(pjMmBase, SC_LEFT_RIGHT, PACKPAIR(0, M64_MAX_SCISSOR_R));
  1350. }
  1351. /******************************Public*Routine******************************\
  1352. * VOID vM64CopyBlt
  1353. *
  1354. * Does a screen-to-screen blt of a list of rectangles.
  1355. *
  1356. * See Blt_DS_SS_ENG_8G_D0 and Blt_DS_SS_TLBR_ENG_8G_D1.
  1357. *
  1358. \**************************************************************************/
  1359. VOID vM64CopyBlt2( // Type FNCOPY
  1360. PDEV* ppdev,
  1361. LONG c, // Can't be zero
  1362. RECTL* prcl, // Array of relative coordinates destination rectangles
  1363. ULONG rop4, // rop4
  1364. POINTL* pptlSrc, // Original unclipped source point
  1365. RECTL* prclDst) // Original unclipped destination rectangle
  1366. {
  1367. BYTE* pjMmBase;
  1368. LONG xOffset;
  1369. LONG yOffset;
  1370. LONG dx;
  1371. LONG dy;
  1372. LONG xLeft;
  1373. LONG xRight;
  1374. LONG yTop;
  1375. LONG yBottom;
  1376. LONG cx;
  1377. LONG cy;
  1378. ULONG mix;
  1379. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1380. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1381. "Expect only a rop2");
  1382. pjMmBase = ppdev->pjMmBase;
  1383. xOffset = ppdev->xOffset;
  1384. yOffset = ppdev->yOffset;
  1385. mix = gaul64HwMixFromRop2[rop4 & 0xf];
  1386. if (mix == 0x70000)
  1387. {
  1388. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 8);
  1389. M64_OD(pjMmBase, DP_SET_GUI_ENGINE, 0xB08000 | ppdev->SetGuiEngineDefault);
  1390. M64_OD(pjMmBase, SRC_CNTL, 0);
  1391. }
  1392. else
  1393. {
  1394. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 10);
  1395. M64_OD(pjMmBase, SRC_OFF_PITCH, ppdev->ulScreenOffsetAndPitch);
  1396. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  1397. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[rop4 & 0xf]);
  1398. M64_OD(pjMmBase, SRC_CNTL, 0);
  1399. }
  1400. dx = pptlSrc->x - prclDst->left;
  1401. dy = pptlSrc->y - prclDst->top;
  1402. // The accelerator may not be as fast at doing right-to-left copies, so
  1403. // only do them when the rectangles truly overlap:
  1404. if (!OVERLAP(prclDst, pptlSrc))
  1405. goto Top_Down_Left_To_Right;
  1406. if (prclDst->top <= pptlSrc->y)
  1407. {
  1408. if (prclDst->left <= pptlSrc->x)
  1409. {
  1410. Top_Down_Left_To_Right:
  1411. while (TRUE)
  1412. {
  1413. xLeft = xOffset + prcl->left;
  1414. yTop = yOffset + prcl->top;
  1415. cx = prcl->right - prcl->left;
  1416. cy = prcl->bottom - prcl->top;
  1417. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yTop));
  1418. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yTop + dy));
  1419. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1420. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1421. if (--c == 0)
  1422. break;
  1423. prcl++;
  1424. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  1425. }
  1426. }
  1427. else
  1428. {
  1429. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_YDir);
  1430. while (TRUE)
  1431. {
  1432. xRight = xOffset + prcl->right - 1;
  1433. yTop = yOffset + prcl->top;
  1434. cx = prcl->right - prcl->left;
  1435. cy = prcl->bottom - prcl->top;
  1436. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yTop));
  1437. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yTop + dy));
  1438. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1439. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1440. if (--c == 0)
  1441. break;
  1442. prcl++;
  1443. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  1444. }
  1445. // Since we don't use a default context, we must restore registers:
  1446. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1447. }
  1448. }
  1449. else
  1450. {
  1451. if (prclDst->left <= pptlSrc->x)
  1452. {
  1453. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir);
  1454. while (TRUE)
  1455. {
  1456. xLeft = xOffset + prcl->left;
  1457. yBottom = yOffset + prcl->bottom - 1;
  1458. cx = prcl->right - prcl->left;
  1459. cy = prcl->bottom - prcl->top;
  1460. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yBottom));
  1461. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yBottom + dy));
  1462. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1463. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1464. if (--c == 0)
  1465. break;
  1466. prcl++;
  1467. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  1468. }
  1469. // Since we don't use a default context, we must restore registers:
  1470. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1471. }
  1472. else
  1473. {
  1474. M64_OD(pjMmBase, DST_CNTL, 0);
  1475. while (TRUE)
  1476. {
  1477. xRight = xOffset + prcl->right - 1;
  1478. yBottom = yOffset + prcl->bottom - 1;
  1479. cx = prcl->right - prcl->left;
  1480. cy = prcl->bottom - prcl->top;
  1481. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yBottom));
  1482. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yBottom + dy));
  1483. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1484. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1485. if (--c == 0)
  1486. break;
  1487. prcl++;
  1488. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  1489. }
  1490. // Since we don't use a default context, we must restore registers:
  1491. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1492. }
  1493. }
  1494. }
  1495. VOID vM64CopyBlt24_2( // Type FNCOPY
  1496. PDEV* ppdev,
  1497. LONG c, // Can't be zero
  1498. RECTL* prcl, // Array of relative coordinates destination rectangles
  1499. ULONG rop4, // rop4
  1500. POINTL* pptlSrc, // Original unclipped source point
  1501. RECTL* prclDst) // Original unclipped destination rectangle
  1502. {
  1503. BYTE* pjMmBase;
  1504. LONG xOffset;
  1505. LONG yOffset;
  1506. LONG dx;
  1507. LONG dy;
  1508. LONG xLeft;
  1509. LONG xRight;
  1510. LONG yTop;
  1511. LONG yBottom;
  1512. LONG cx;
  1513. LONG cy;
  1514. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1515. ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
  1516. "Expect only a rop2");
  1517. pjMmBase = ppdev->pjMmBase;
  1518. xOffset = ppdev->xOffset;
  1519. yOffset = ppdev->yOffset;
  1520. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 10);
  1521. M64_OD(pjMmBase, SRC_OFF_PITCH, ppdev->ulScreenOffsetAndPitch);
  1522. M64_OD(pjMmBase, DP_SRC, DP_SRC_Blit << 8);
  1523. M64_OD(pjMmBase, DP_MIX, gaul64HwMixFromRop2[rop4 & 0xf]);
  1524. M64_OD(pjMmBase, SRC_CNTL, 0);
  1525. dx = (pptlSrc->x - prclDst->left) * 3;
  1526. dy = pptlSrc->y - prclDst->top;
  1527. // The accelerator may not be as fast at doing right-to-left copies, so
  1528. // only do them when the rectangles truly overlap:
  1529. if (!OVERLAP(prclDst, pptlSrc))
  1530. goto Top_Down_Left_To_Right;
  1531. if (prclDst->top <= pptlSrc->y)
  1532. {
  1533. if (prclDst->left <= pptlSrc->x)
  1534. {
  1535. Top_Down_Left_To_Right:
  1536. while (TRUE)
  1537. {
  1538. xLeft = (xOffset + prcl->left) * 3;
  1539. yTop = yOffset + prcl->top;
  1540. cx = (prcl->right - prcl->left) * 3;
  1541. cy = prcl->bottom - prcl->top;
  1542. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yTop));
  1543. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yTop + dy));
  1544. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1545. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1546. if (--c == 0)
  1547. break;
  1548. prcl++;
  1549. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  1550. }
  1551. }
  1552. else
  1553. {
  1554. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_YDir);
  1555. while (TRUE)
  1556. {
  1557. xRight = (xOffset + prcl->right) * 3 - 1;
  1558. yTop = yOffset + prcl->top;
  1559. cx = (prcl->right - prcl->left) * 3;
  1560. cy = prcl->bottom - prcl->top;
  1561. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yTop));
  1562. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yTop + dy));
  1563. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1564. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1565. if (--c == 0)
  1566. break;
  1567. prcl++;
  1568. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  1569. }
  1570. // Since we don't use a default context, we must restore registers:
  1571. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  1572. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1573. }
  1574. }
  1575. else
  1576. {
  1577. if (prclDst->left <= pptlSrc->x)
  1578. {
  1579. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir);
  1580. while (TRUE)
  1581. {
  1582. xLeft = (xOffset + prcl->left) * 3;
  1583. yBottom = yOffset + prcl->bottom - 1;
  1584. cx = (prcl->right - prcl->left) * 3;
  1585. cy = prcl->bottom - prcl->top;
  1586. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xLeft, yBottom));
  1587. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xLeft + dx, yBottom + dy));
  1588. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1589. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1590. if (--c == 0)
  1591. break;
  1592. prcl++;
  1593. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 4);
  1594. }
  1595. // Since we don't use a default context, we must restore registers:
  1596. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 1);
  1597. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1598. }
  1599. else
  1600. {
  1601. M64_OD(pjMmBase, DST_CNTL, 0);
  1602. while (TRUE)
  1603. {
  1604. xRight = (xOffset + prcl->right) * 3 - 1;
  1605. yBottom = yOffset + prcl->bottom - 1;
  1606. cx = (prcl->right - prcl->left) * 3;
  1607. cy = prcl->bottom - prcl->top;
  1608. M64_OD(pjMmBase, DST_Y_X, PACKXY_FAST(xRight, yBottom));
  1609. M64_OD(pjMmBase, SRC_Y_X, PACKXY_FAST(xRight + dx, yBottom + dy));
  1610. M64_OD(pjMmBase, SRC_WIDTH1, cx);
  1611. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, PACKXY_FAST(cx, cy));
  1612. if (--c == 0)
  1613. break;
  1614. prcl++;
  1615. M64_CHECK_GUI_SPACE(ppdev, pjMmBase, 5);
  1616. }
  1617. // Since we don't use a default context, we must restore registers:
  1618. M64_OD(pjMmBase, DST_CNTL, DST_CNTL_XDir | DST_CNTL_YDir);
  1619. }
  1620. }
  1621. }