Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2022 lines
64 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: w32blt.c
  3. *
  4. * Contains the low-level memory-mapped IO blt functions.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1996 Microsoft Corporation
  23. *
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /**************************************************************************
  27. * All functions using the accelerator must...
  28. * Wait for the ACL queue to be empty before loading any of the registers.
  29. **************************************************************************/
  30. /**************************************************************************
  31. * The following tables are heinous, but required. The monochrome data
  32. * (also known as Mix-Map or Mask) expander intereprets the data such that
  33. * the least significant bit of a byte is pixel 0 and the most significant
  34. * bit is pixel 7. This is backwards from the way monochrome data is
  35. * interpreted by Windows and Windows NT. Also, the expander will ONLY
  36. * do 1 to 8 expansion, so we need to replicate each bit by the number of
  37. * bytes per pel in the current color depth.
  38. **************************************************************************/
  39. BYTE jReverse[] =
  40. {
  41. // Each element is the bitwise reverse of it's index.
  42. //
  43. // ie. 10000000 -> 00000001 and
  44. // 10010100 -> 00101001.
  45. 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
  46. 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
  47. 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
  48. 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
  49. 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
  50. 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
  51. 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
  52. 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
  53. 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
  54. 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
  55. 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
  56. 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
  57. 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
  58. 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
  59. 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
  60. 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
  61. 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
  62. 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
  63. 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
  64. 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
  65. 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
  66. 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
  67. 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
  68. 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
  69. 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
  70. 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
  71. 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
  72. 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
  73. 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
  74. 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
  75. 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
  76. 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
  77. };
  78. WORD wReverse2x[] =
  79. {
  80. // Each element is the bit doubled bitwise reverse of it's index.
  81. //
  82. // ie. 10000000 -> 0000000000000011 and
  83. // 10010100 -> 0000110011000011.
  84. 0x0000, 0xc000, 0x3000, 0xf000, 0x0c00, 0xcc00, 0x3c00, 0xfc00,
  85. 0x0300, 0xc300, 0x3300, 0xf300, 0x0f00, 0xcf00, 0x3f00, 0xff00,
  86. 0x00c0, 0xc0c0, 0x30c0, 0xf0c0, 0x0cc0, 0xccc0, 0x3cc0, 0xfcc0,
  87. 0x03c0, 0xc3c0, 0x33c0, 0xf3c0, 0x0fc0, 0xcfc0, 0x3fc0, 0xffc0,
  88. 0x0030, 0xc030, 0x3030, 0xf030, 0x0c30, 0xcc30, 0x3c30, 0xfc30,
  89. 0x0330, 0xc330, 0x3330, 0xf330, 0x0f30, 0xcf30, 0x3f30, 0xff30,
  90. 0x00f0, 0xc0f0, 0x30f0, 0xf0f0, 0x0cf0, 0xccf0, 0x3cf0, 0xfcf0,
  91. 0x03f0, 0xc3f0, 0x33f0, 0xf3f0, 0x0ff0, 0xcff0, 0x3ff0, 0xfff0,
  92. 0x000c, 0xc00c, 0x300c, 0xf00c, 0x0c0c, 0xcc0c, 0x3c0c, 0xfc0c,
  93. 0x030c, 0xc30c, 0x330c, 0xf30c, 0x0f0c, 0xcf0c, 0x3f0c, 0xff0c,
  94. 0x00cc, 0xc0cc, 0x30cc, 0xf0cc, 0x0ccc, 0xcccc, 0x3ccc, 0xfccc,
  95. 0x03cc, 0xc3cc, 0x33cc, 0xf3cc, 0x0fcc, 0xcfcc, 0x3fcc, 0xffcc,
  96. 0x003c, 0xc03c, 0x303c, 0xf03c, 0x0c3c, 0xcc3c, 0x3c3c, 0xfc3c,
  97. 0x033c, 0xc33c, 0x333c, 0xf33c, 0x0f3c, 0xcf3c, 0x3f3c, 0xff3c,
  98. 0x00fc, 0xc0fc, 0x30fc, 0xf0fc, 0x0cfc, 0xccfc, 0x3cfc, 0xfcfc,
  99. 0x03fc, 0xc3fc, 0x33fc, 0xf3fc, 0x0ffc, 0xcffc, 0x3ffc, 0xfffc,
  100. 0x0003, 0xc003, 0x3003, 0xf003, 0x0c03, 0xcc03, 0x3c03, 0xfc03,
  101. 0x0303, 0xc303, 0x3303, 0xf303, 0x0f03, 0xcf03, 0x3f03, 0xff03,
  102. 0x00c3, 0xc0c3, 0x30c3, 0xf0c3, 0x0cc3, 0xccc3, 0x3cc3, 0xfcc3,
  103. 0x03c3, 0xc3c3, 0x33c3, 0xf3c3, 0x0fc3, 0xcfc3, 0x3fc3, 0xffc3,
  104. 0x0033, 0xc033, 0x3033, 0xf033, 0x0c33, 0xcc33, 0x3c33, 0xfc33,
  105. 0x0333, 0xc333, 0x3333, 0xf333, 0x0f33, 0xcf33, 0x3f33, 0xff33,
  106. 0x00f3, 0xc0f3, 0x30f3, 0xf0f3, 0x0cf3, 0xccf3, 0x3cf3, 0xfcf3,
  107. 0x03f3, 0xc3f3, 0x33f3, 0xf3f3, 0x0ff3, 0xcff3, 0x3ff3, 0xfff3,
  108. 0x000f, 0xc00f, 0x300f, 0xf00f, 0x0c0f, 0xcc0f, 0x3c0f, 0xfc0f,
  109. 0x030f, 0xc30f, 0x330f, 0xf30f, 0x0f0f, 0xcf0f, 0x3f0f, 0xff0f,
  110. 0x00cf, 0xc0cf, 0x30cf, 0xf0cf, 0x0ccf, 0xcccf, 0x3ccf, 0xfccf,
  111. 0x03cf, 0xc3cf, 0x33cf, 0xf3cf, 0x0fcf, 0xcfcf, 0x3fcf, 0xffcf,
  112. 0x003f, 0xc03f, 0x303f, 0xf03f, 0x0c3f, 0xcc3f, 0x3c3f, 0xfc3f,
  113. 0x033f, 0xc33f, 0x333f, 0xf33f, 0x0f3f, 0xcf3f, 0x3f3f, 0xff3f,
  114. 0x00ff, 0xc0ff, 0x30ff, 0xf0ff, 0x0cff, 0xccff, 0x3cff, 0xfcff,
  115. 0x03ff, 0xc3ff, 0x33ff, 0xf3ff, 0x0fff, 0xcfff, 0x3fff, 0xffff,
  116. };
  117. ULONG aulLeadCnt[] = {0x0, 0x3, 0x2, 0x1};
  118. FNLOWXFER* afnXferI_Narrow[16] =
  119. {
  120. NULL,
  121. vXferI_1_Byte,
  122. vXferI_2_Bytes,
  123. vXferI_3_Bytes
  124. };
  125. FNLOWXFER* afnXferP_Narrow[16] =
  126. {
  127. NULL,
  128. vXferP_1_Byte,
  129. vXferP_2_Bytes,
  130. vXferP_3_Bytes
  131. };
  132. /**************************************************************************
  133. *
  134. * Realizes a pattern into offscreen memory.
  135. *
  136. **************************************************************************/
  137. VOID vFastPatRealize( // Type FNFASTPATREALIZE
  138. PDEV* ppdev,
  139. RBRUSH* prb, // Points to brush realization structure
  140. POINTL* pptlBrush, // Ignored
  141. BOOL bTransparent) // FALSE for normal patterns; TRUE for
  142. // patterns with a mask when the background
  143. // mix is LEAVE_ALONE.
  144. {
  145. BRUSHENTRY* pbe;
  146. LONG iBrushCache;
  147. ULONG ulOffset;
  148. BYTE* pjPattern;
  149. LONG culPattern;
  150. LONG cjPattern;
  151. BYTE* pjDst;
  152. ULONG ulDstOffset;
  153. BYTE* pjBase = ppdev->pjBase;
  154. DISPDBG((10,"vFastPatRealize called"));
  155. //
  156. // Make sure we can write to the video registers.
  157. //
  158. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  159. pbe = prb->pbe;
  160. if ((pbe == NULL) || (pbe->prbVerify != prb))
  161. {
  162. // We have to allocate a new offscreen cache brush entry for
  163. // the brush:
  164. iBrushCache = ppdev->iBrushCache;
  165. pbe = &ppdev->abe[iBrushCache];
  166. iBrushCache++;
  167. if (iBrushCache >= ppdev->cBrushCache)
  168. iBrushCache = 0;
  169. ppdev->iBrushCache = iBrushCache;
  170. // Update our links:
  171. pbe->prbVerify = prb;
  172. prb->pbe = pbe;
  173. }
  174. prb->bTransparent = bTransparent;
  175. ulDstOffset = ((pbe->y * ppdev->lDelta) + (pbe->x * ppdev->cBpp));
  176. pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
  177. cjPattern = PATTERN_SIZE * ppdev->cBpp;
  178. if ((ppdev->ulChipID != W32P) && (ppdev->ulChipID != ET6000))
  179. {
  180. cjPattern *= 4;
  181. }
  182. START_DIRECT_ACCESS(ppdev, pjBase);
  183. if (!ppdev->bAutoBanking)
  184. {
  185. // Set the address where we're going to put the pattern data.
  186. // All data transfers to video memory take place through aperature 0.
  187. CP_MMU_BP0(ppdev, pjBase, ulDstOffset);
  188. pjDst = (PBYTE) ppdev->pjMmu0;
  189. }
  190. else
  191. {
  192. pjDst = ppdev->pjScreen + ulDstOffset;
  193. }
  194. RtlCopyMemory(pjDst, pjPattern, cjPattern);
  195. END_DIRECT_ACCESS(ppdev, pjBase);
  196. }
  197. /**************************************************************************
  198. *
  199. * Does a pattern fill to a list of rectangles.
  200. *
  201. **************************************************************************/
  202. VOID vPatternFillScr(
  203. PDEV* ppdev,
  204. LONG c, // Can't be zero
  205. RECTL* prcl, // Array of relative coordinate destination rects
  206. ROP4 rop4, // Obvious?
  207. RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
  208. POINTL* pptlBrush) //
  209. {
  210. BYTE* pjBase = ppdev->pjBase;
  211. LONG lDelta = ppdev->lDelta;
  212. LONG cBpp = ppdev->cBpp;
  213. BOOL bTransparent;
  214. ULONG ulPatternAddrBase;
  215. ULONG cTile = 0;
  216. BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
  217. // for keeping track of the location and status
  218. // of the pattern bits cached in off-screen
  219. // memory
  220. DISPDBG((10,"vPatternFillScr called"));
  221. bTransparent = ((rop4 & 0xff) != (rop4 >> 8));
  222. ASSERTDD(!bTransparent, "We don't handle transparent brushes yet.");
  223. if ((ppdev->ulChipID != W32P) && (ppdev->ulChipID != ET6000))
  224. {
  225. //
  226. // Patterns are duplicated horizontally and vertically (4 tiles)
  227. //
  228. cTile = 1; // Look, it means one extra to the right
  229. }
  230. ASSERTDD(c > 0, "Can't handle zero rectangles");
  231. if ((rbc.prb->pbe->prbVerify != rbc.prb))
  232. {
  233. vFastPatRealize(ppdev, rbc.prb, NULL, FALSE);
  234. }
  235. ASSERTDD(rbc.prb->bTransparent == bTransparent,
  236. "Not realized with correct transparency");
  237. pbe = rbc.prb->pbe;
  238. //
  239. // Make sure we can write to the video registers.
  240. //
  241. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  242. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  243. CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
  244. CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  245. //
  246. // ### precalc & store the PAT_Y_OFFSET const in the pdev
  247. //
  248. CP_PAT_WRAP(ppdev, pjBase, ppdev->w32PatternWrap);
  249. CP_PAT_Y_OFFSET(ppdev, pjBase, (((PATTERN_OFFSET * cBpp) << cTile) - 1));
  250. //
  251. // Fill the list of rectangles
  252. //
  253. ulPatternAddrBase = (pbe->y * lDelta) + (pbe->x * cBpp);
  254. do {
  255. ULONG offset;
  256. offset = cBpp * (
  257. (((prcl->top-pptlBrush->y)&7) << (3+cTile)) +
  258. ((prcl->left-pptlBrush->x)&7)
  259. );
  260. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  261. CP_PAT_ADDR(ppdev, pjBase, (ulPatternAddrBase + offset));
  262. CP_XCNT(ppdev, pjBase, (((prcl->right - prcl->left) * cBpp) - 1));
  263. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  264. // Set the blit destination address as the base address of MMU aperture 2
  265. // Then start the accelerated operation by writing something to this
  266. // aperture.
  267. SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left)));
  268. START_ACL(ppdev);
  269. prcl++;
  270. } while (--c != 0);
  271. }
  272. /**************************************************************************
  273. *
  274. * Does a solid fill to a list of rectangles.
  275. *
  276. **************************************************************************/
  277. VOID vSolidFillScr(
  278. PDEV* ppdev,
  279. LONG c, // Can't be zero
  280. RECTL* prcl, // Array of relative coordinate destination rects
  281. ROP4 rop4, // Obvious?
  282. RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
  283. POINTL* pptlBrush) // Not used
  284. {
  285. BYTE* pjBase = ppdev->pjBase;
  286. LONG lDelta = ppdev->lDelta;
  287. LONG cBpp = ppdev->cBpp;
  288. ULONG ulSolidColor;
  289. DISPDBG((10,"vSolidFillScr called"));
  290. ASSERTDD(c > 0, "Can't handle zero rectangles");
  291. ASSERTDD((ppdev->cBpp < 3),
  292. "vSolidFillScr only works for 8bpp and 16bpp");
  293. // Make sure we can write to the video registers.
  294. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  295. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  296. CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
  297. CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  298. CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
  299. CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
  300. CP_PAT_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
  301. ulSolidColor = rbc.iSolidColor;
  302. if (cBpp == 1)
  303. {
  304. ulSolidColor &= 0x000000FF; // We may get some extraneous data in the
  305. ulSolidColor |= ulSolidColor << 8;
  306. }
  307. if (cBpp <= 2)
  308. {
  309. ulSolidColor &= 0x0000FFFF;
  310. ulSolidColor |= ulSolidColor << 16;
  311. }
  312. // Set the color in offscreen memory
  313. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  314. if (ppdev->bAutoBanking)
  315. {
  316. *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulSolidColor;
  317. }
  318. else
  319. {
  320. CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
  321. CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulSolidColor);
  322. }
  323. //
  324. // Fill the list of rectangles
  325. //
  326. do {
  327. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  328. CP_XCNT(ppdev, pjBase, ((prcl->right - prcl->left) * cBpp - 1));
  329. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  330. // Set the blt destination address as the base address of MMU aperture 2
  331. // Then start the accelerated operation by writing something to this
  332. // aperture.
  333. SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left)));
  334. START_ACL(ppdev);
  335. prcl++;
  336. } while (--c != 0);
  337. }
  338. VOID vSolidFillScr24(
  339. PDEV* ppdev,
  340. LONG c, // Can't be zero
  341. RECTL* prcl, // Array of relative coordinate destination rects
  342. ROP4 rop4, // Obvious?
  343. RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
  344. POINTL* pptlBrush) // Not used
  345. {
  346. BYTE* pjBase = ppdev->pjBase;
  347. LONG lDelta = ppdev->lDelta;
  348. ULONG ulSolidColor = rbc.iSolidColor;
  349. DISPDBG((10,"vSolidFillScr24 called"));
  350. ASSERTDD(c > 0, "Can't handle zero rectangles");
  351. ASSERTDD((ppdev->cBpp == 3),
  352. "vSolidFillScr24 called when not in 24bpp mode");
  353. ASSERTDD(((ppdev->ulChipID == W32P) || (ppdev->ulChipID == ET6000)),
  354. "24bpp solid fills only accelerated for w32p/ET6000");
  355. #define CBPP 3
  356. //
  357. // Make sure we can write to the video registers.
  358. //
  359. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  360. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  361. CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
  362. CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  363. //
  364. // This must be special cased for the ET6000. I'm not sure why it worked
  365. // for the others, because we have a 3 byte wide pattern, but were setting the
  366. // pattern wrap for a 4 byte wide pattern. We were also setting the Y_offset
  367. // to be 3 when it should be 2, which really means 3 bytes per line. Strange.
  368. //
  369. // Anyway, I've left the code for the others in place and it will get executed
  370. // for them.
  371. //
  372. CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP_24BPP); // 1 line, 3 bytes per line
  373. CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET_24BPP - 1)); // indicates 3 bytes per line
  374. CP_PAT_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
  375. // Set the color in offscreen memory
  376. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  377. if (ppdev->bAutoBanking)
  378. {
  379. *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulSolidColor;
  380. }
  381. else
  382. {
  383. CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
  384. CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulSolidColor);
  385. }
  386. //
  387. // We know that the ACL is idle now, so no wait
  388. //
  389. CP_PEL_DEPTH(ppdev, pjBase, HW_PEL_DEPTH_24BPP);
  390. //
  391. // Fill the list of rectangles
  392. //
  393. do {
  394. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  395. if (ppdev->ulChipID == ET6000)
  396. {
  397. CP_XCNT(ppdev, pjBase, (((prcl->right - prcl->left) * CBPP) - 1));
  398. }
  399. else
  400. {
  401. CP_XCNT(ppdev, pjBase, ((prcl->right - prcl->left - 1) * CBPP));
  402. }
  403. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  404. // Set the blt destination address as the base address of MMU aperture 2
  405. // Then start the accelerated operation by writing something to this
  406. // aperture.
  407. SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (CBPP * prcl->left)));
  408. START_ACL(ppdev);
  409. prcl++;
  410. } while (--c != 0);
  411. // set pixel depth back to 1
  412. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  413. CP_PEL_DEPTH(ppdev, pjBase, HW_PEL_DEPTH_8BPP);
  414. #undef CBPP
  415. }
  416. /**************************************************************************
  417. *
  418. * Does a screen-to-screen blt of a list of rectangles.
  419. *
  420. **************************************************************************/
  421. VOID vScrToScr(
  422. PDEV* ppdev,
  423. LONG c, // Can't be zero
  424. RECTL* prcl, // Array of relative coordinates destination rectangles
  425. ROP4 rop4, // Obvious?
  426. POINTL* pptlSrc, // Original unclipped source point
  427. RECTL* prclDst) // Original unclipped destination rectangle
  428. {
  429. LONG dx;
  430. LONG dy; // Add delta to destination to get source
  431. LONG xyOffset = ppdev->xyOffset;
  432. BYTE* pjBase = ppdev->pjBase;
  433. LONG lDelta = ppdev->lDelta;
  434. LONG cBpp = ppdev->cBpp;
  435. DISPDBG((10,"vScrToScr called"));
  436. ASSERTDD(c > 0, "Can't handle zero rectangles");
  437. //
  438. // The src-dst delta will be the same for all rectangles
  439. //
  440. dx = pptlSrc->x - prclDst->left;
  441. dy = pptlSrc->y - prclDst->top;
  442. //
  443. // Make sure we can write to the video registers.
  444. //
  445. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  446. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  447. CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
  448. CP_SRC_WRAP(ppdev, pjBase, NO_PATTERN_WRAP);
  449. CP_SRC_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  450. CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  451. // ### I don't think this is necessary - WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  452. //
  453. // The accelerator may not be as fast at doing right-to-left copies, so
  454. // only do them when the rectangles truly overlap:
  455. //
  456. if (!OVERLAP(prclDst, pptlSrc))
  457. goto Top_Down_Left_To_Right;
  458. if (prclDst->top <= pptlSrc->y)
  459. {
  460. if (prclDst->left <= pptlSrc->x)
  461. {
  462. Top_Down_Left_To_Right:
  463. //
  464. // Top to Bottom - Left to Right
  465. //
  466. DISPDBG((12,"Top to Bottom - Left to Right"));
  467. CP_XY_DIR(ppdev, pjBase, 0); // Top to Bottom - Left to Right
  468. do {
  469. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  470. CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
  471. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  472. CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + cBpp * (prcl->left + dx)));
  473. // Set the blt destination address as the base address of MMU aperture 2
  474. // Then start the accelerated operation by writing something to this
  475. // aperture.
  476. SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left)));
  477. START_ACL(ppdev);
  478. prcl++;
  479. } while (--c != 0);
  480. }
  481. else
  482. {
  483. //
  484. // Top to Bottom - Right to left
  485. //
  486. DISPDBG((12,"Top to Bottom - Right to left"));
  487. CP_XY_DIR(ppdev, pjBase, RIGHT_TO_LEFT);
  488. do {
  489. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  490. CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
  491. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  492. CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + cBpp * (prcl->right + dx) - 1));
  493. // Set the blt destination address as the base address of MMU aperture 2
  494. // Then start the accelerated operation by writing something to this
  495. // aperture.
  496. SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->right) - 1));
  497. START_ACL(ppdev);
  498. prcl++;
  499. } while (--c != 0);
  500. }
  501. }
  502. else
  503. {
  504. if (prclDst->left <= pptlSrc->x)
  505. {
  506. //
  507. // Bottom to Top - Left to Right
  508. //
  509. DISPDBG((12,"Bottom to Top - Left to Right"));
  510. CP_XY_DIR(ppdev, pjBase, BOTTOM_TO_TOP);
  511. do {
  512. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  513. CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
  514. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  515. CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + cBpp * (prcl->left + dx)));
  516. // Set the blt destination address as the base address of MMU aperture 2
  517. // Then start the accelerated operation by writing something to this
  518. // aperture.
  519. SET_DEST_ADDR(ppdev, (((prcl->bottom - 1) * lDelta) + (cBpp * prcl->left)));
  520. START_ACL(ppdev);
  521. prcl++;
  522. } while (--c != 0);
  523. }
  524. else
  525. {
  526. //
  527. // Bottom to Top - Right to Left
  528. //
  529. DISPDBG((12,"Bottom to Top - Right to Left"));
  530. CP_XY_DIR(ppdev, pjBase, (BOTTOM_TO_TOP | RIGHT_TO_LEFT));
  531. do {
  532. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  533. CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
  534. CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
  535. CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + cBpp * (prcl->right + dx) - 1));
  536. // Set the blt destination address as the base address of MMU aperture 2
  537. // Then start the accelerated operation by writing something to this
  538. // aperture.
  539. SET_DEST_ADDR(ppdev, (((prcl->bottom - 1) * lDelta) + cBpp * (prcl->right) - 1));
  540. START_ACL(ppdev);
  541. prcl++;
  542. } while (--c != 0);
  543. }
  544. }
  545. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  546. CP_XY_DIR(ppdev, pjBase, 0); // Top to Bottom - Left to Right
  547. }
  548. /**************************************************************************
  549. *
  550. * Does a monochrome expansion to video memory.
  551. *
  552. * Make this Xfer1to8bpp and create another for Xfer1to16bpp?
  553. *
  554. **************************************************************************/
  555. VOID vSlowXfer1bpp( // Type FNXFER
  556. PDEV* ppdev,
  557. LONG c, // Count of rectangles, can't be zero
  558. RECTL* prcl, // List of destination rectangles, in relative
  559. // coordinates
  560. ROP4 rop4, // Actually had better be a rop3
  561. SURFOBJ* psoSrc, // Source surface
  562. POINTL* pptlSrc, // Original unclipped source point
  563. RECTL* prclDst, // Original unclipped destination rectangle
  564. XLATEOBJ* pxlo) // Translate that provides color-expansion information
  565. {
  566. LONG dx;
  567. LONG dy;
  568. LONG lSrcDelta;
  569. BYTE* pjSrcScan0;
  570. BYTE* pjSrc;
  571. LONG cjSrc;
  572. LONG cjTrail;
  573. LONG culSrc;
  574. BYTE jFgRop3;
  575. BYTE jBgRop3;
  576. BOOL bW32p;
  577. ULONG ulSolidColorOffset = ppdev->ulSolidColorOffset;
  578. BYTE* pjBase = ppdev->pjBase;
  579. LONG lDelta = ppdev->lDelta;
  580. LONG cBpp = ppdev->cBpp;
  581. ULONG ulFgColor = pxlo->pulXlate[1];
  582. ULONG ulBgColor = pxlo->pulXlate[0];
  583. LONG xyOffset = (ppdev->cBpp * ppdev->xOffset) +
  584. (ppdev->yOffset * ppdev->lDelta);
  585. DISPDBG((10,"vSlowXfer1bpp called"));
  586. DISPDBG((11,"rop4(%04x)", rop4));
  587. ASSERTDD(c > 0, "Can't handle zero rectangles");
  588. ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
  589. ASSERTDD(ppdev->cBpp <= 2, "vSlowXfer1bpp doesn't work at 24 bpp");
  590. bW32p = (ppdev->ulChipID == W32P);
  591. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  592. jFgRop3 = (BYTE)(rop4 >> 8); // point to src color where src is indicated
  593. // point to pat color where src is indicated
  594. if ((BYTE) rop4 != R3_NOP)
  595. {
  596. jBgRop3 = (BYTE)((rop4 & 0xc3) | ((rop4 & 0xf0) >> 2));
  597. }
  598. else
  599. {
  600. jBgRop3 = (BYTE) rop4;
  601. }
  602. DISPDBG((11,"jFgRop3(%04x), jBgRop3(%04x)", jFgRop3, jBgRop3));
  603. CP_FG_ROP(ppdev, pjBase, jFgRop3);
  604. CP_BK_ROP(ppdev, pjBase, jBgRop3);
  605. CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  606. CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
  607. CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
  608. CP_SRC_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
  609. CP_SRC_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
  610. CP_PAT_ADDR(ppdev, pjBase, ulSolidColorOffset + 4);
  611. CP_SRC_ADDR(ppdev, pjBase, ulSolidColorOffset);
  612. {
  613. //
  614. // Set the address where we're going to put the solid color data.
  615. // All data transfers to video memory take place through aperature 0.
  616. //
  617. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  618. CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
  619. //
  620. // Set the color in offscreen memory
  621. //
  622. if (cBpp == 1)
  623. {
  624. ulFgColor |= ulFgColor << 8;
  625. ulBgColor |= ulBgColor << 8;
  626. }
  627. if (cBpp <= 2)
  628. {
  629. ulFgColor |= ulFgColor << 16;
  630. ulBgColor |= ulBgColor << 16;
  631. }
  632. CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulFgColor);
  633. CP_WRITE_MMU_DWORD(ppdev, 0, 4, ulBgColor);
  634. }
  635. CP_ROUTING_CTRL(ppdev, pjBase, CPU_MIX_DATA);
  636. dx = pptlSrc->x - prclDst->left;
  637. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  638. pjSrcScan0 = psoSrc->pvScan0;
  639. DISPDBG((2,"lSrcDelta(%x)", psoSrc->lDelta));
  640. do {
  641. ULONG ulDst;
  642. RECTL rclSrc;
  643. RECTL rclDst;
  644. LONG xBitsPad;
  645. LONG xBitsUsed;
  646. LONG xBytesPad;
  647. //
  648. // load lSrcDelta inside the loop because we adjust it later.
  649. //
  650. lSrcDelta = psoSrc->lDelta;
  651. rclDst = *prcl;
  652. rclSrc.left = rclDst.left + dx;
  653. rclSrc.right = rclDst.right + dx;
  654. rclSrc.top = rclDst.top + dy;
  655. rclSrc.bottom = rclDst.bottom + dy;
  656. // x = prcl->left;
  657. // y = prcl->top;
  658. //
  659. // Calculate number of bits used in first partial.
  660. //
  661. xBitsPad = rclSrc.left & 7;
  662. xBitsUsed = min((8-xBitsPad),(rclSrc.right-rclSrc.left));
  663. xBytesPad = rclDst.left & 3;
  664. if (xBitsPad != 0) // (0 < xBitsUsed < 8)
  665. {
  666. DISPDBG((2,"xBitsUsed(%d) xBitsPad(%d)", xBitsUsed, xBitsPad));
  667. DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)",
  668. rclSrc.left,
  669. rclSrc.top,
  670. rclSrc.right,
  671. rclSrc.bottom,
  672. rclDst.left,
  673. rclDst.top,
  674. rclDst.right,
  675. rclDst.bottom));
  676. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  677. // Do the column of the first xBitsUsed pixels
  678. if (!bW32p)
  679. {
  680. CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT);
  681. }
  682. CP_XCNT(ppdev, pjBase, ((xBitsUsed * cBpp) - 1));
  683. CP_YCNT(ppdev, pjBase, (rclDst.bottom - rclDst.top - 1));
  684. pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta
  685. + (rclSrc.left >> 3);
  686. ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left);
  687. ulDst += xyOffset;
  688. if (bW32p)
  689. {
  690. // We will align the data ourselves.
  691. CP_MIX_ADDR(ppdev, pjBase, 0);
  692. CP_MIX_Y_OFFSET(ppdev, pjBase, -1);
  693. }
  694. CP_MMU_BP2(ppdev, pjBase, ulDst);
  695. CP_DST_ADDR(ppdev, pjBase, ulDst);
  696. if (bW32p) WAIT_FOR_BUSY_ACL(ppdev, pjBase);
  697. if (cBpp == 1)
  698. {
  699. LONG i;
  700. for (i = rclDst.bottom - rclDst.top; i; i--)
  701. {
  702. CP_WRITE_MMU_BYTE(ppdev, 2, 0, jReverse[(*pjSrc << xBitsPad) & 0xff]);
  703. pjSrc += lSrcDelta;
  704. }
  705. }
  706. else // if (cBpp == 2)
  707. {
  708. LONG i;
  709. WORD wTmp;
  710. BYTE * pjCvt = (BYTE *) &wTmp;
  711. for (i = rclDst.bottom - rclDst.top; i; i--)
  712. {
  713. wTmp = wReverse2x[(*pjSrc << xBitsPad) & 0xff];
  714. CP_WRITE_MMU_BYTE(ppdev, 2, 0, pjCvt[0]);
  715. if (xBitsUsed > 4)
  716. {
  717. CP_WRITE_MMU_BYTE(ppdev, 2, 1, pjCvt[1]);
  718. }
  719. pjSrc += lSrcDelta;
  720. }
  721. }
  722. rclSrc.left += xBitsUsed;
  723. rclDst.left += xBitsUsed;
  724. }
  725. // If the entire blt wasn't contained in the first partial byte,
  726. // the we have to do the rest.
  727. if (rclSrc.left < rclSrc.right)
  728. {
  729. DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)",
  730. rclSrc.left,
  731. rclSrc.top,
  732. rclSrc.right,
  733. rclSrc.bottom,
  734. rclDst.left,
  735. rclDst.top,
  736. rclDst.right,
  737. rclDst.bottom));
  738. //
  739. // Legend has it that we need a WAIT_FOR_IDLE_ACL, instead of just
  740. // a WAIT_FOR_EMPTY_ACL_QUEUE, to prevent hanging W32
  741. //
  742. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  743. if (!bW32p)
  744. {
  745. CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_32_BIT);
  746. }
  747. CP_XCNT(ppdev, pjBase, (cBpp * (rclDst.right - rclDst.left) - 1));
  748. CP_YCNT(ppdev, pjBase, (rclDst.bottom - rclDst.top - 1));
  749. cjSrc = (((rclSrc.right * cBpp) + 7) >> 3) -
  750. ((rclSrc.left * cBpp) >> 3); // # bytes to transfer
  751. culSrc = (cjSrc >> 2);
  752. cjTrail = (cjSrc & 3);
  753. DISPDBG((2,"cjSrc(%d)", cjSrc));
  754. DISPDBG((2,"culSrc(%d)", culSrc));
  755. DISPDBG((2,"cjTrail(%d)", cjTrail));
  756. pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta
  757. + (rclSrc.left >> 3);
  758. DISPDBG((2,"pjSrc(%x)", pjSrc));
  759. ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left);
  760. ulDst += xyOffset;
  761. if (bW32p)
  762. {
  763. // We will align the data ourselves.
  764. CP_MIX_ADDR(ppdev, pjBase, 0);
  765. CP_MIX_Y_OFFSET(ppdev, pjBase, -1);
  766. }
  767. CP_MMU_BP2(ppdev, pjBase, ulDst);
  768. CP_DST_ADDR(ppdev, pjBase, ulDst);
  769. if (bW32p) WAIT_FOR_BUSY_ACL(ppdev, pjBase);
  770. {
  771. LONG i;
  772. LONG j;
  773. if (cBpp == 1)
  774. {
  775. lSrcDelta -= cjSrc;
  776. for (i = rclDst.bottom - rclDst.top; i; i--)
  777. {
  778. ULONG cjTmp = cjTrail;
  779. volatile BYTE * pjTmp;
  780. volatile ULONG * pulTmp;
  781. DISPDBG((2,"pjSrc(%x)", pjSrc));
  782. for (j = culSrc; j; j--)
  783. {
  784. ULONG ulTmp = 0;
  785. ulTmp |= (ULONG)jReverse[*pjSrc++];
  786. ulTmp |= (ULONG)jReverse[*pjSrc++] << 8;
  787. ulTmp |= (ULONG)jReverse[*pjSrc++] << 16;
  788. ulTmp |= (ULONG)jReverse[*pjSrc++] << 24;
  789. CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
  790. DISPDBG((2,"Src(%08x) Tmp(%08x)",
  791. *((ULONG *)(pjSrc-4)),
  792. ulTmp
  793. ));
  794. }
  795. if (bW32p)
  796. {
  797. int ndx = 0;
  798. while (cjTmp--)
  799. {
  800. CP_WRITE_MMU_BYTE(ppdev, 2, ndx, jReverse[*pjSrc]);
  801. pjSrc++;
  802. ndx++;
  803. }
  804. }
  805. else
  806. {
  807. if (cjTmp)
  808. {
  809. ULONG ulTmp = 0;
  810. if (cjTmp == 1) goto do_1_byte;
  811. if (cjTmp == 2) goto do_2_bytes;
  812. //
  813. // do all three bytes of the partial
  814. //
  815. ulTmp |= (ULONG)jReverse[pjSrc[2]] << 16;
  816. do_2_bytes:
  817. ulTmp |= (ULONG)jReverse[pjSrc[1]] << 8;
  818. do_1_byte:
  819. ulTmp |= (ULONG)jReverse[pjSrc[0]];
  820. //*pulTmp = ulTmp;
  821. CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
  822. pjSrc += cjTmp;
  823. }
  824. }
  825. pjSrc += lSrcDelta;
  826. }
  827. }
  828. else // if (cBpp == 2)
  829. {
  830. lSrcDelta -= (cjSrc + 1) >> 1;
  831. for (i = rclDst.bottom - rclDst.top; i; i--)
  832. {
  833. ULONG cjTmp = cjTrail;
  834. int ndx = 0;
  835. DISPDBG((2,"pjSrc(%x)", pjSrc));
  836. for (j = culSrc; j; j--)
  837. {
  838. ULONG ulTmp;
  839. ulTmp = (ULONG)wReverse2x[*pjSrc++];
  840. ulTmp |= (ULONG)wReverse2x[*pjSrc++] << 16;
  841. CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
  842. }
  843. if (bW32p)
  844. {
  845. while (cjTmp--)
  846. {
  847. WORD wCvt;
  848. BYTE * pjCvt = (BYTE *) &wCvt;
  849. wCvt = wReverse2x[*pjSrc++];
  850. CP_WRITE_MMU_BYTE(ppdev, 2, ndx, pjCvt[0]);
  851. ndx++;
  852. if (cjTmp)
  853. {
  854. CP_WRITE_MMU_BYTE(ppdev, 2, ndx, pjCvt[1]);
  855. ndx++;
  856. cjTmp--;
  857. }
  858. }
  859. }
  860. else
  861. {
  862. if (cjTmp)
  863. {
  864. ULONG ulTmp;
  865. ulTmp = (ULONG)wReverse2x[pjSrc[0]];
  866. ulTmp |= (ULONG)wReverse2x[pjSrc[1]] << 16;
  867. CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
  868. pjSrc += (cjTmp+1) >> 1;
  869. }
  870. }
  871. pjSrc += lSrcDelta;
  872. }
  873. }
  874. }
  875. }
  876. prcl++;
  877. } while (--c != 0);
  878. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  879. CP_ROUTING_CTRL(ppdev, pjBase, 0);
  880. if (!bW32p)
  881. {
  882. CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT);
  883. }
  884. }
  885. VOID vXferBlt8i(
  886. PDEV* ppdev,
  887. LONG c, // Count of rectangles, can't be zero
  888. RECTL* prcl, // Array of relative coordinates destination rectangles
  889. ROP4 rop4, // Obvious?
  890. SURFOBJ* psoSrc, // Source surface
  891. POINTL* pptlSrc, // Original unclipped source point
  892. RECTL* prclDst, // Original unclipped destination rectangle
  893. XLATEOBJ* pxlo) // Not used
  894. {
  895. BYTE* pjBase = ppdev->pjBase;
  896. BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0;
  897. LONG lDeltaDst = ppdev->lDelta;
  898. LONG lDeltaSrc = psoSrc->lDelta;
  899. POINTL ptlSrc = *pptlSrc;
  900. RECTL rclDst = *prclDst;
  901. LONG cBpp = ppdev->cBpp;
  902. SIZEL sizlBlt;
  903. ULONG ulDstAddr;
  904. BYTE* pjSrc;
  905. INT ix, iy;
  906. LONG dx;
  907. LONG dy; // Add delta to destination to get source
  908. LONG cjLead;
  909. LONG cjTrail;
  910. LONG culMiddle;
  911. LONG xyOffset = (cBpp * ppdev->xOffset) +
  912. (lDeltaDst * ppdev->yOffset);
  913. //
  914. // The src-dst delta will be the same for all rectangles
  915. //
  916. dx = ptlSrc.x - rclDst.left;
  917. dy = ptlSrc.y - rclDst.top;
  918. // Note: Legend has it that if we don't wait for the ACL to become idle,
  919. // then the code will hang on the W32, but not on the W32i.
  920. //
  921. // Since we do a WAIT_FOR_IDLE_ACL we don't need to
  922. // WAIT_FOR_EMPTY_ACL_QUEUE
  923. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  924. CP_ROUTING_CTRL(ppdev, pjBase, CPU_SOURCE_DATA);
  925. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  926. CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
  927. do {
  928. // Calculate blt dimensions in bytes
  929. sizlBlt.cx = cBpp * (prcl->right - prcl->left);
  930. sizlBlt.cy = prcl->bottom - prcl->top;
  931. pjSrc = pjSrcScan0 +
  932. ((prcl->top + dy) * lDeltaSrc) +
  933. ((prcl->left + dx) * cBpp);
  934. cjTrail = cjLead = (LONG)((ULONG_PTR)pjSrc);
  935. cjLead = aulLeadCnt[cjLead & 3];
  936. if (cjLead < sizlBlt.cx)
  937. {
  938. cjTrail += sizlBlt.cx;
  939. cjTrail &= 3;
  940. culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2;
  941. }
  942. else
  943. {
  944. cjLead = sizlBlt.cx;
  945. cjTrail = 0;
  946. culMiddle = 0;
  947. }
  948. ASSERTDD(culMiddle >= 0, "vXferBlt8i: culMiddle < 0");
  949. ulDstAddr = (prcl->top * lDeltaDst) +
  950. (prcl->left * cBpp) +
  951. (xyOffset);
  952. if ((sizlBlt.cx - (cjLead + cjTrail)) & 3)
  953. DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
  954. DISPDBG((8, "rclSrc(%d,%d,%d,%d)",
  955. prcl->left+dx,
  956. prcl->top+dy,
  957. prcl->right+dx,
  958. prcl->bottom+dy
  959. ));
  960. DISPDBG((8, "rclDst(%d,%d,%d,%d)",
  961. prcl->left,
  962. prcl->top,
  963. prcl->right,
  964. prcl->bottom
  965. ));
  966. DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)",
  967. pjSrc,
  968. sizlBlt.cx,
  969. ulDstAddr,
  970. cjLead,
  971. culMiddle,
  972. cjTrail
  973. ));
  974. if (cjLead)
  975. {
  976. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  977. CP_XCNT(ppdev, pjBase, (cjLead - 1));
  978. CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
  979. CP_MMU_BP2(ppdev, pjBase, (ulDstAddr));
  980. afnXferI_Narrow[cjLead](ppdev,
  981. pjSrc,
  982. 0,
  983. sizlBlt.cy,
  984. lDeltaSrc);
  985. }
  986. if (cjTrail)
  987. {
  988. LONG cjOffset = cjLead + (culMiddle<<2);
  989. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  990. CP_XCNT(ppdev, pjBase, (cjTrail - 1));
  991. CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
  992. CP_MMU_BP2(ppdev, pjBase, (ulDstAddr+cjOffset));
  993. afnXferI_Narrow[cjTrail](ppdev,
  994. (pjSrc+cjOffset),
  995. 0,
  996. sizlBlt.cy,
  997. lDeltaSrc);
  998. }
  999. if (culMiddle)
  1000. {
  1001. LONG cjOffset = cjLead;
  1002. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1003. CP_XCNT(ppdev, pjBase, ((culMiddle<<2) - 1));
  1004. CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
  1005. CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_32_BIT);
  1006. CP_MMU_BP2(ppdev, pjBase, (ulDstAddr+cjOffset));
  1007. vXfer_DWORDS(ppdev,
  1008. (pjSrc+cjOffset),
  1009. culMiddle,
  1010. sizlBlt.cy,
  1011. lDeltaSrc);
  1012. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1013. CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT);
  1014. }
  1015. prcl++;
  1016. } while (--c != 0);
  1017. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1018. CP_ROUTING_CTRL(ppdev, pjBase, 0);
  1019. }
  1020. VOID vXferBlt8p(
  1021. PDEV* ppdev,
  1022. LONG c, // Count of rectangles, can't be zero
  1023. RECTL* prcl, // Array of relative coordinates destination rectangles
  1024. ROP4 rop4, // Obvious?
  1025. SURFOBJ* psoSrc, // Source surface
  1026. POINTL* pptlSrc, // Original unclipped source point
  1027. RECTL* prclDst, // Original unclipped destination rectangle
  1028. XLATEOBJ* pxlo) // Not used
  1029. {
  1030. BYTE* pjBase = ppdev->pjBase;
  1031. BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0;
  1032. LONG lDeltaDst = ppdev->lDelta;
  1033. LONG lDeltaSrc = psoSrc->lDelta;
  1034. POINTL ptlSrc = *pptlSrc;
  1035. RECTL rclDst = *prclDst;
  1036. LONG cBpp = ppdev->cBpp;
  1037. SIZEL sizlBlt;
  1038. ULONG ulDstAddr;
  1039. BYTE* pjSrc;
  1040. INT ix, iy;
  1041. LONG dx;
  1042. LONG dy; // Add delta to destination to get source
  1043. LONG iLeadNdx;
  1044. LONG cjLead;
  1045. LONG cjTrail;
  1046. LONG culMiddle;
  1047. LONG xyOffset = (cBpp * ppdev->xOffset) +
  1048. (lDeltaDst * ppdev->yOffset);
  1049. //
  1050. // The src-dst delta will be the same for all rectangles
  1051. //
  1052. dx = ptlSrc.x - rclDst.left;
  1053. dy = ptlSrc.y - rclDst.top;
  1054. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1055. CP_ROUTING_CTRL(ppdev, pjBase, CPU_SOURCE_DATA);
  1056. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  1057. CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
  1058. CP_SRC_ADDR(ppdev, pjBase, 0);
  1059. CP_SRC_Y_OFFSET(ppdev, pjBase, -1);
  1060. do {
  1061. // Calculate blt dimensions in bytes
  1062. sizlBlt.cx = cBpp * (prcl->right - prcl->left);
  1063. sizlBlt.cy = prcl->bottom - prcl->top;
  1064. pjSrc = pjSrcScan0 +
  1065. ((prcl->top + dy) * lDeltaSrc) +
  1066. ((prcl->left + dx) * cBpp);
  1067. cjTrail = iLeadNdx = (LONG)((ULONG_PTR)pjSrc);
  1068. iLeadNdx &= 3;
  1069. cjLead = aulLeadCnt[iLeadNdx];
  1070. if (cjLead < sizlBlt.cx)
  1071. {
  1072. cjTrail += sizlBlt.cx;
  1073. cjTrail &= 3;
  1074. culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2;
  1075. }
  1076. else
  1077. {
  1078. cjLead = sizlBlt.cx;
  1079. cjTrail = 0;
  1080. culMiddle = 0;
  1081. }
  1082. ASSERTDD(culMiddle >= 0, "vXferBlt8i: culMiddle < 0");
  1083. ulDstAddr = (prcl->top * lDeltaDst) +
  1084. (prcl->left * cBpp) +
  1085. (xyOffset);
  1086. if ((sizlBlt.cx - (cjLead + cjTrail)) & 3)
  1087. DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
  1088. DISPDBG((8, "rclSrc(%d,%d,%d,%d)",
  1089. prcl->left+dx,
  1090. prcl->top+dy,
  1091. prcl->right+dx,
  1092. prcl->bottom+dy
  1093. ));
  1094. DISPDBG((8, "rclDst(%d,%d,%d,%d)",
  1095. prcl->left,
  1096. prcl->top,
  1097. prcl->right,
  1098. prcl->bottom
  1099. ));
  1100. DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)",
  1101. pjSrc,
  1102. sizlBlt.cx,
  1103. ulDstAddr,
  1104. cjLead,
  1105. culMiddle,
  1106. cjTrail
  1107. ));
  1108. if (cjLead)
  1109. {
  1110. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1111. CP_XCNT(ppdev, pjBase, (cjLead - 1));
  1112. CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
  1113. // The next two turn off src to dst alignment
  1114. CP_DST_ADDR(ppdev, pjBase, (ulDstAddr));
  1115. WAIT_FOR_BUSY_ACL(ppdev, pjBase);
  1116. afnXferP_Narrow[cjLead](ppdev,
  1117. pjSrc,
  1118. 0,
  1119. sizlBlt.cy,
  1120. lDeltaSrc);
  1121. }
  1122. if (cjTrail)
  1123. {
  1124. LONG cjOffset = cjLead + (culMiddle<<2);
  1125. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1126. CP_XCNT(ppdev, pjBase, (cjTrail - 1));
  1127. CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
  1128. // The next two turn off src to dst alignment
  1129. CP_DST_ADDR(ppdev, pjBase, (ulDstAddr+cjOffset));
  1130. WAIT_FOR_BUSY_ACL(ppdev, pjBase);
  1131. afnXferP_Narrow[cjTrail](ppdev,
  1132. (pjSrc+cjOffset),
  1133. 0,
  1134. sizlBlt.cy,
  1135. lDeltaSrc);
  1136. }
  1137. if (culMiddle)
  1138. {
  1139. LONG cjOffset = cjLead;
  1140. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1141. CP_XCNT(ppdev, pjBase, ((culMiddle<<2) - 1));
  1142. CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
  1143. // The next two turn off src to dst alignment
  1144. CP_DST_ADDR(ppdev, pjBase, (ulDstAddr+cjOffset));
  1145. WAIT_FOR_BUSY_ACL(ppdev, pjBase);
  1146. vXfer_DWORDS(ppdev,
  1147. (pjSrc+cjOffset),
  1148. culMiddle,
  1149. sizlBlt.cy,
  1150. lDeltaSrc);
  1151. }
  1152. prcl++;
  1153. } while (--c != 0);
  1154. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1155. CP_ROUTING_CTRL(ppdev, pjBase, 0);
  1156. }
  1157. //////////////////////////////////////////////////////////////////////
  1158. // N DWORD low level blt routines for vXferNativeI and vXferNativeP
  1159. // A DWORD at a time
  1160. VOID vXfer_DWORDS(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
  1161. {
  1162. LONG iy;
  1163. LONG ix;
  1164. BYTE* pjTmp = pjSrc;
  1165. BYTE* pjBase = ppdev->pjBase;
  1166. // We had better be in 32 bit virtual bus mode
  1167. for (iy = 0; iy < cy; iy++)
  1168. {
  1169. for (ix = 0; ix < culX; ix++)
  1170. {
  1171. CP_WRITE_MMU_DWORD(ppdev, 2, 0, *((ULONG*)pjTmp));
  1172. pjTmp += 4;
  1173. }
  1174. pjTmp = (pjSrc += lDeltaSrc);
  1175. }
  1176. }
  1177. // A BYTE at a time
  1178. VOID vXfer_BYTES(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
  1179. {
  1180. LONG iy;
  1181. LONG ix;
  1182. BYTE* pjTmp = pjSrc;
  1183. BYTE* pjBase = ppdev->pjBase;
  1184. LONG cjX = (culX << 2);
  1185. // We had better be in 8 bit virtual bus mode
  1186. for (iy = 0; iy < cy; iy++)
  1187. {
  1188. for (ix = 0; ix < cjX; ix++)
  1189. {
  1190. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
  1191. pjTmp++;
  1192. }
  1193. pjTmp = (pjSrc += lDeltaSrc);
  1194. }
  1195. }
  1196. //////////////////////////////////////////////////////////////////////
  1197. // Narrow low level blt routines for vXferNativeI
  1198. VOID vXferI_1_Byte(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
  1199. {
  1200. LONG iy;
  1201. LONG ix;
  1202. BYTE* pjTmp = pjSrc;
  1203. BYTE* pjBase = ppdev->pjBase;
  1204. for (iy = 0; iy < cy; iy++)
  1205. {
  1206. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjSrc);
  1207. pjSrc += lDeltaSrc;
  1208. }
  1209. }
  1210. VOID vXferI_2_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
  1211. {
  1212. LONG iy;
  1213. LONG ix;
  1214. BYTE* pjTmp = pjSrc;
  1215. BYTE* pjBase = ppdev->pjBase;
  1216. for (iy = 0; iy < cy; iy++)
  1217. {
  1218. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++;
  1219. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
  1220. pjTmp = (pjSrc += lDeltaSrc);
  1221. }
  1222. }
  1223. VOID vXferI_3_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
  1224. {
  1225. LONG iy;
  1226. LONG ix;
  1227. BYTE* pjTmp = pjSrc;
  1228. BYTE* pjBase = ppdev->pjBase;
  1229. for (iy = 0; iy < cy; iy++)
  1230. {
  1231. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++;
  1232. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++;
  1233. CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
  1234. pjTmp = (pjSrc += lDeltaSrc);
  1235. }
  1236. }
  1237. //////////////////////////////////////////////////////////////////////
  1238. // Narrow low level blt routines for vXferNativeP
  1239. VOID vXferP_1_Byte(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc)
  1240. {
  1241. LONG iy;
  1242. LONG ix;
  1243. BYTE* pjTmp = pjSrc;
  1244. BYTE* pjBase = ppdev->pjBase;
  1245. for (iy = 0; iy < cy; iy++)
  1246. {
  1247. CP_WRITE_MMU_BYTE(ppdev, 2, index, *pjSrc);
  1248. pjSrc += lDeltaSrc;
  1249. }
  1250. }
  1251. VOID vXferP_2_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc)
  1252. {
  1253. LONG iy;
  1254. LONG ix;
  1255. BYTE* pjTmp = pjSrc;
  1256. BYTE* pjBase = ppdev->pjBase;
  1257. for (iy = 0; iy < cy; iy++)
  1258. {
  1259. CP_WRITE_MMU_WORD(ppdev, 2, index, *((WORD*)pjTmp));
  1260. pjTmp = (pjSrc += lDeltaSrc);
  1261. }
  1262. }
  1263. VOID vXferP_3_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc)
  1264. {
  1265. LONG iy;
  1266. LONG ix;
  1267. BYTE* pjTmp = pjSrc;
  1268. BYTE* pjBase = ppdev->pjBase;
  1269. if (index & 1)
  1270. {
  1271. for (iy = 0; iy < cy; iy++)
  1272. {
  1273. CP_WRITE_MMU_BYTE(ppdev, 2, index, *pjTmp);
  1274. pjTmp++;
  1275. CP_WRITE_MMU_WORD(ppdev, 2, index+1, *((WORD*)pjTmp));
  1276. pjTmp = (pjSrc += lDeltaSrc);
  1277. }
  1278. }
  1279. else
  1280. {
  1281. for (iy = 0; iy < cy; iy++)
  1282. {
  1283. CP_WRITE_MMU_WORD(ppdev, 2, index, *((WORD*)pjTmp));
  1284. pjTmp+=2;
  1285. CP_WRITE_MMU_BYTE(ppdev, 2, index+2, *pjTmp);
  1286. pjTmp = (pjSrc += lDeltaSrc);
  1287. }
  1288. }
  1289. }
  1290. // This routine was added to perform accelerated host to screen blts for the
  1291. // ET6000. The W32 had a path from host memory to display memory which allowed
  1292. // ROPs to be performed as the data was transferred. The ET6000 does not have
  1293. // that feature, so to provide accelerated host to screen support we must
  1294. // buffer each scanline of the source in offscreen memory and then perform
  1295. // a blt to move it into the appropriate area of display memory. This is
  1296. // much more efficient than hand coding each rop or punting to GDI.
  1297. VOID vXferET6000(
  1298. PDEV* ppdev,
  1299. LONG c, // Count of rectangles, can't be zero
  1300. RECTL* prcl, // Array of relative coordinates destination rectangles
  1301. ROP4 rop4, // Obvious?
  1302. SURFOBJ* psoSrc, // Source surface
  1303. POINTL* pptlSrc, // Original unclipped source point
  1304. RECTL* prclDst, // Original unclipped destination rectangle
  1305. XLATEOBJ* pxlo) // Not used
  1306. {
  1307. BYTE* pjBase = ppdev->pjBase;
  1308. BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0;
  1309. LONG lDeltaDst = ppdev->lDelta;
  1310. LONG lDeltaSrc = psoSrc->lDelta;
  1311. POINTL ptlSrc = *pptlSrc;
  1312. RECTL rclDst = *prclDst;
  1313. LONG cBpp = ppdev->cBpp;
  1314. SIZEL sizlBlt;
  1315. ULONG ulDstAddr;
  1316. BYTE* pjSrc;
  1317. BYTE* pjDst;
  1318. INT ix, iy;
  1319. LONG dx;
  1320. LONG dy; // Add delta to destination to get source
  1321. LONG iLeadNdx;
  1322. LONG cjLead;
  1323. LONG cjTrail;
  1324. LONG culMiddle;
  1325. LONG xyOffset = (cBpp * ppdev->xOffset) +
  1326. (lDeltaDst * ppdev->yOffset);
  1327. ULONG ulBltBufferOffset = (cBpp * ppdev->pohBltBuffer->x) +
  1328. (lDeltaDst * ppdev->pohBltBuffer->y);
  1329. ULONG BltScanOffset = 0;
  1330. //
  1331. // The src-dst delta will be the same for all rectangles
  1332. //
  1333. dx = ptlSrc.x - rclDst.left;
  1334. dy = ptlSrc.y - rclDst.top;
  1335. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1336. CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
  1337. CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
  1338. CP_SRC_WRAP(ppdev, pjBase, NO_PATTERN_WRAP);
  1339. CP_SRC_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
  1340. CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
  1341. do
  1342. {
  1343. BYTE* pjTmp;
  1344. // Calculate blt dimensions in bytes
  1345. sizlBlt.cx = cBpp * (prcl->right - prcl->left);
  1346. sizlBlt.cy = prcl->bottom - prcl->top;
  1347. pjSrc = pjSrcScan0 +
  1348. ((prcl->top + dy) * lDeltaSrc) +
  1349. ((prcl->left + dx) * cBpp);
  1350. pjTmp = pjSrc;
  1351. cjTrail = iLeadNdx = (LONG)((ULONG_PTR)pjSrc);
  1352. iLeadNdx &= 3;
  1353. cjLead = aulLeadCnt[iLeadNdx];
  1354. if (cjLead < sizlBlt.cx)
  1355. {
  1356. cjTrail += sizlBlt.cx;
  1357. cjTrail &= 3;
  1358. culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2;
  1359. }
  1360. else
  1361. {
  1362. cjLead = sizlBlt.cx;
  1363. cjTrail = 0;
  1364. culMiddle = 0;
  1365. }
  1366. ASSERTDD(culMiddle >= 0, "vXferET6000: culMiddle < 0");
  1367. ulDstAddr = (prcl->top * lDeltaDst) +
  1368. (prcl->left * cBpp) +
  1369. (xyOffset);
  1370. if ((sizlBlt.cx - (cjLead + cjTrail)) & 3)
  1371. DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
  1372. DISPDBG((8, "rclSrc(%d,%d,%d,%d)",
  1373. prcl->left+dx,
  1374. prcl->top+dy,
  1375. prcl->right+dx,
  1376. prcl->bottom+dy
  1377. ));
  1378. DISPDBG((8, "rclDst(%d,%d,%d,%d)",
  1379. prcl->left,
  1380. prcl->top,
  1381. prcl->right,
  1382. prcl->bottom
  1383. ));
  1384. DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)",
  1385. pjSrc,
  1386. sizlBlt.cx,
  1387. ulDstAddr,
  1388. cjLead,
  1389. culMiddle,
  1390. cjTrail
  1391. ));
  1392. for (iy = 0; iy < sizlBlt.cy; iy++)
  1393. {
  1394. LONG ix, lScanLineOffset;
  1395. // We'll first load the first scan line of
  1396. // the BltBuffer and then load the second. The second scan line
  1397. // will be loaded into the BltBuffer while the first is still being
  1398. // processed. We'll alternate between the two segments of our
  1399. // BltBuffer until all scans have been processed.
  1400. pjDst = ppdev->pjScreen + ulBltBufferOffset + BltScanOffset;
  1401. if (cjLead)
  1402. {
  1403. for (ix = 0; ix < cjLead; ix++)
  1404. {
  1405. *pjDst++ = *pjTmp++;
  1406. }
  1407. }
  1408. if (culMiddle)
  1409. {
  1410. for (ix = 0; ix < culMiddle; ix++)
  1411. {
  1412. *((ULONG*)pjDst)++ = *((ULONG*)pjTmp)++;
  1413. }
  1414. }
  1415. if (cjTrail)
  1416. {
  1417. for (ix = 0; ix < cjTrail; ix++)
  1418. {
  1419. *pjDst++ = *pjTmp++;
  1420. }
  1421. }
  1422. // Now that we've loaded our scanline into a segment of our BltBuffer,
  1423. // we need to trigger an accelerator operation to transfer it into
  1424. // visible screen memory. Our static stuff will have already been setup
  1425. // prior to entering any of our loops.
  1426. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1427. CP_XCNT(ppdev, pjBase, (sizlBlt.cx - 1));
  1428. CP_YCNT(ppdev, pjBase, 0); // Only 1 scan at a time
  1429. CP_SRC_ADDR(ppdev, pjBase, (ulBltBufferOffset + BltScanOffset));
  1430. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1431. CP_DST_ADDR(ppdev, pjBase, ulDstAddr);
  1432. BltScanOffset ^= ppdev->lBltBufferPitch;
  1433. pjTmp = (pjSrc += lDeltaSrc);
  1434. ulDstAddr += lDeltaDst;
  1435. } // next cy
  1436. prcl++;
  1437. } while (--c != 0);
  1438. }
  1439. /**************************************************************************
  1440. *
  1441. * Does a monochrome expansion to video memory.
  1442. *
  1443. **************************************************************************/
  1444. VOID vET6000SlowXfer1bpp( // Type FNXFER
  1445. PDEV* ppdev,
  1446. LONG c, // Count of rectangles, can't be zero
  1447. RECTL* prcl, // List of destination rectangles, in relative
  1448. // coordinates
  1449. ROP4 rop4, // Actually had better be a rop3
  1450. SURFOBJ* psoSrc, // Source surface
  1451. POINTL* pptlSrc, // Original unclipped source point
  1452. RECTL* prclDst, // Original unclipped destination rectangle
  1453. XLATEOBJ* pxlo) // Translate that provides color-expansion information
  1454. {
  1455. LONG dx;
  1456. LONG dy;
  1457. LONG lSrcDelta;
  1458. BYTE* pjSrcScan0;
  1459. BYTE* pjSrc;
  1460. LONG cjSrc;
  1461. LONG cjTrail;
  1462. LONG culSrc;
  1463. BYTE jFgRop3;
  1464. BYTE jBgRop3;
  1465. ULONG ulSolidColorOffset = ppdev->ulSolidColorOffset;
  1466. BYTE* pjBase = ppdev->pjBase;
  1467. LONG lDelta = ppdev->lDelta;
  1468. LONG cBpp = ppdev->cBpp;
  1469. ULONG ulFgColor = pxlo->pulXlate[1];
  1470. ULONG ulBgColor = pxlo->pulXlate[0];
  1471. LONG xyOffset = (ppdev->cBpp * ppdev->xOffset) +
  1472. (ppdev->yOffset * ppdev->lDelta);
  1473. LONG lBltBuffer = (ppdev->pohBltBuffer->x * ppdev->cBpp) +
  1474. (ppdev->pohBltBuffer->y * ppdev->lDelta);
  1475. DISPDBG((10,"vET6000SlowXfer1bpp called"));
  1476. DISPDBG((11,"rop4(%04x)", rop4));
  1477. ASSERTDD(c > 0, "Can't handle zero rectangles");
  1478. ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
  1479. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1480. jFgRop3 = (BYTE)(rop4 >> 8); // point to src color where src is indicated
  1481. // point to pat color where src is indicated
  1482. if ((BYTE) rop4 != R3_NOP)
  1483. {
  1484. jBgRop3 = (BYTE)((rop4 & 0xc3) | ((rop4 & 0xf0) >> 2));
  1485. }
  1486. else
  1487. {
  1488. jBgRop3 = (BYTE) rop4;
  1489. }
  1490. DISPDBG((11,"jFgRop3(%04x), jBgRop3(%04x)", jFgRop3, jBgRop3));
  1491. CP_FG_ROP(ppdev, pjBase, jFgRop3);
  1492. CP_BK_ROP(ppdev, pjBase, jBgRop3);
  1493. CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
  1494. CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
  1495. CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
  1496. CP_SRC_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
  1497. CP_SRC_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
  1498. CP_PAT_ADDR(ppdev, pjBase, ulSolidColorOffset + 4);
  1499. CP_SRC_ADDR(ppdev, pjBase, ulSolidColorOffset);
  1500. CP_PEL_DEPTH(ppdev, pjBase, (cBpp - 1) << 4);
  1501. // Here we are going to load the foreground and background colors into
  1502. // display memory. We'll use the area for solid colors that we allocated
  1503. // earlier.
  1504. {
  1505. // Set the color in offscreen memory
  1506. if (cBpp == 1)
  1507. {
  1508. ulFgColor &= 0x000000FF; // We may get some extraneous data in the
  1509. ulBgColor &= 0x000000FF; // unused portion of our color. Clear it.
  1510. ulFgColor |= ulFgColor << 8;
  1511. ulBgColor |= ulBgColor << 8;
  1512. }
  1513. if (cBpp <= 2)
  1514. {
  1515. ulFgColor &= 0x0000FFFF;
  1516. ulBgColor &= 0x0000FFFF;
  1517. ulFgColor |= ulFgColor << 16;
  1518. ulBgColor |= ulBgColor << 16;
  1519. }
  1520. // We don't want to change the colors if the accelerator is active, because
  1521. // a previous oepration might be using them.
  1522. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  1523. *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulFgColor;
  1524. *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset + 4) = ulBgColor;
  1525. }
  1526. // This is the mix control register for the ET6000. We are setting it to
  1527. // use a mix ROP of 2, which specifies that a 0 in the mixmap selects the
  1528. // background color and 1 selects the foreground color. Bit 7 says that
  1529. // we want bit 7 of each byte in our mix data to be pixel 0. This should
  1530. // be the way that NT wants it. We also have to set our mask ROP so we
  1531. // can get the data onto the screen.
  1532. CP_ROUTING_CTRL(ppdev, pjBase, 0xB2);
  1533. dx = pptlSrc->x - prclDst->left;
  1534. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1535. pjSrcScan0 = psoSrc->pvScan0;
  1536. DISPDBG((2,"lSrcDelta(%x)", psoSrc->lDelta));
  1537. do
  1538. {
  1539. ULONG ulDst;
  1540. RECTL rclSrc;
  1541. RECTL rclDst;
  1542. BYTE* pjTmp;
  1543. BYTE* pjDst;
  1544. LONG i;
  1545. BYTE *pjMmu1 = ppdev->pjMmu1;
  1546. long lDwords, lBytes, lStart;
  1547. int cBitsToSkip;
  1548. // load lSrcDelta inside the loop because we adjust it later.
  1549. lSrcDelta = psoSrc->lDelta;
  1550. rclDst = *prcl;
  1551. rclSrc.left = rclDst.left + dx;
  1552. rclSrc.right = rclDst.right + dx;
  1553. rclSrc.top = rclDst.top + dy;
  1554. rclSrc.bottom = rclDst.bottom + dy;
  1555. // x = prcl->left;
  1556. // y = prcl->top;
  1557. DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)",
  1558. rclSrc.left,
  1559. rclSrc.top,
  1560. rclSrc.right,
  1561. rclSrc.bottom,
  1562. rclDst.left,
  1563. rclDst.top,
  1564. rclDst.right,
  1565. rclDst.bottom));
  1566. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1567. CP_XCNT(ppdev, pjBase, ((rclSrc.right - rclSrc.left) * cBpp) - 1);
  1568. CP_YCNT(ppdev, pjBase, 0); // 1 scan at a time
  1569. pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta
  1570. + (rclSrc.left >> 3);
  1571. cBitsToSkip = rclSrc.left % 8;
  1572. pjTmp = pjSrc;
  1573. ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left);
  1574. ulDst += xyOffset;
  1575. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  1576. // We are going to transfer the mix map into our BltBuffer so
  1577. // we can get it to the screen.
  1578. CP_MIX_Y_OFFSET(ppdev, pjBase, 0); // 1 scan at a time
  1579. // We are using the rectangle dimensions to determine how many pixels per line to move. This
  1580. // fixes a bug exposed by the HCT when we had to clip a large temporary buffer and would draw
  1581. // using data close to the end of the buffer. We would get a protection exception depending on
  1582. // whether we ran too close to the end of the buffer. lSrcDelta will still be used when
  1583. // stepping through the source bitmap, but not to determine how many pixels will be drawn.
  1584. //
  1585. // We're adding cBitsToSkip back into here because it's necessary to compute the correct number
  1586. // of bytes to move. We always round to the next byte.
  1587. // i = abs(lSrcDelta); // this doesn't work
  1588. i = ((rclSrc.right - rclSrc.left) + cBitsToSkip + 7) >> 3; // Round up before shift.
  1589. lDwords = i / 4;
  1590. lBytes = i % 4;
  1591. lStart = 0;
  1592. // Here we are going to transfer the monochrome bitmap to the screen.
  1593. // We'll double buffer it to get some more throughput.
  1594. for (i=0; i < (rclSrc.bottom - rclSrc.top); i++)
  1595. {
  1596. long ix;
  1597. pjDst = ppdev->pjScreen + lBltBuffer + lStart;
  1598. ix = lDwords;
  1599. while (ix--)
  1600. {
  1601. *((ULONG*)pjDst)++ = *((ULONG*)pjTmp)++;
  1602. }
  1603. ix = lBytes;
  1604. while (ix--)
  1605. {
  1606. *pjDst++ = *pjTmp++;
  1607. }
  1608. WAIT_FOR_IDLE_ACL(ppdev, pjBase);
  1609. // We have to add in rclSrc.left mod 8 to compensate for the possibility
  1610. // of starting to draw to soon in our bitmap. This generally occurs when
  1611. // clipping text or moving windows where we are only asked to draw
  1612. // part of a monochrome bitmap.
  1613. CP_MIX_ADDR(ppdev, pjBase, ((lBltBuffer + lStart) * 8) + cBitsToSkip);
  1614. CP_DST_ADDR(ppdev, pjBase, ulDst);
  1615. pjTmp = (pjSrc += lSrcDelta);
  1616. ulDst += lDelta;
  1617. lStart ^= ppdev->lBltBufferPitch;
  1618. }
  1619. prcl++;
  1620. } while (--c != 0);
  1621. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  1622. CP_ROUTING_CTRL(ppdev, pjBase, 0x33);
  1623. CP_PEL_DEPTH(ppdev, pjBase, 0);
  1624. }