Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

918 lines
26 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: blt.c
  3. *
  4. * Contains the low-level blt functions.
  5. *
  6. * Hopefully, if you're basing your display driver on this code, to
  7. * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
  8. * the following routines. You shouldn't have to modify much in
  9. * 'bitblt.c'. I've tried to make these routines as few, modular, simple,
  10. * and efficient as I could, while still accelerating as many calls as
  11. * possible that would be cost-effective in terms of performance wins
  12. * versus size and effort.
  13. *
  14. * Note: In the following, 'relative' coordinates refers to coordinates
  15. * that haven't yet had the offscreen bitmap (DFB) offset applied.
  16. * 'Absolute' coordinates have had the offset applied. For example,
  17. * we may be told to blt to (1, 1) of the bitmap, but the bitmap may
  18. * be sitting in offscreen memory starting at coordinate (0, 768) --
  19. * (1, 1) would be the 'relative' start coordinate, and (1, 769)
  20. * would be the 'absolute' start coordinate'.
  21. *
  22. * Copyright (c) 1992-1996 Microsoft Corporation
  23. * Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
  24. \**************************************************************************/
  25. #include "precomp.h"
  26. /******************************Public*Routine******************************\
  27. * VOID vFillPat1bpp
  28. *
  29. \**************************************************************************/
  30. VOID vFillPat1bpp( // Type FNFILL
  31. PDEV* ppdev,
  32. LONG c, // Can't be zero
  33. RECTL* prcl, // List of rectangles to be filled, in relative
  34. // coordinates
  35. ULONG rop4, // Rop4
  36. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  37. POINTL* pptlBrush) // Pattern alignment
  38. {
  39. BYTE* pjBase;
  40. RBRUSH* prb;
  41. LONG xOffset;
  42. LONG yOffset;
  43. ULONG ulDwg;
  44. ULONG ulHwMix;
  45. ASSERTDD(rbc.prb->fl & RBRUSH_2COLOR, "Must be 2 colour pattern here");
  46. pjBase = ppdev->pjBase;
  47. xOffset = ppdev->xOffset;
  48. yOffset = ppdev->yOffset;
  49. if ((rop4 & 0xff) == 0xf0)
  50. {
  51. ulDwg = opcode_TRAP + blockm_OFF + atype_RPL + bop_SRCCOPY;
  52. }
  53. else
  54. {
  55. ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
  56. ulDwg = opcode_TRAP + blockm_OFF + atype_RSTR + (ulHwMix << 16);
  57. }
  58. if (((rop4 >> 8) & 0xff) == (rop4 & 0xff))
  59. {
  60. // Normal opaque mode:
  61. ulDwg |= transc_BG_OPAQUE;
  62. }
  63. else
  64. {
  65. // GDI guarantees us that if the foreground and background
  66. // ROPs are different, the background rop is LEAVEALONE:
  67. ulDwg |= transc_BG_TRANSP;
  68. }
  69. if ((GET_CACHE_FLAGS(ppdev, (SIGN_CACHE | ARX_CACHE))) == (SIGN_CACHE | ARX_CACHE))
  70. {
  71. CHECK_FIFO_SPACE(pjBase, 12);
  72. }
  73. else
  74. {
  75. CHECK_FIFO_SPACE(pjBase, 17);
  76. CP_WRITE(pjBase, DWG_SGN, 0);
  77. CP_WRITE(pjBase, DWG_AR1, 0);
  78. CP_WRITE(pjBase, DWG_AR2, 0);
  79. CP_WRITE(pjBase, DWG_AR4, 0);
  80. CP_WRITE(pjBase, DWG_AR5, 0);
  81. }
  82. ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE);
  83. CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
  84. CP_WRITE(pjBase, DWG_SHIFT, ((-(pptlBrush->y + yOffset) & 7) << 4) |
  85. (-(pptlBrush->x + xOffset) & 7));
  86. prb = rbc.prb;
  87. CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, prb->ulColor[1]));
  88. CP_WRITE(pjBase, DWG_BCOL, COLOR_REPLICATE(ppdev, prb->ulColor[0]));
  89. CP_WRITE(pjBase, DWG_SRC0, prb->aulPattern[0]);
  90. CP_WRITE(pjBase, DWG_SRC1, prb->aulPattern[1]);
  91. CP_WRITE(pjBase, DWG_SRC2, prb->aulPattern[2]);
  92. CP_WRITE(pjBase, DWG_SRC3, prb->aulPattern[3]);
  93. while(TRUE)
  94. {
  95. CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
  96. CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset);
  97. CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top);
  98. CP_START(pjBase, DWG_YDST, prcl->top + yOffset);
  99. if (--c == 0)
  100. return;
  101. prcl++;
  102. CHECK_FIFO_SPACE(pjBase, 4);
  103. }
  104. }
  105. /******************************Public*Routine******************************\
  106. * VOID vXfer4bpp
  107. *
  108. * Does a 4bpp transfer from a bitmap to the screen.
  109. *
  110. * The reason we implement this is that a lot of resources are kept as 4bpp,
  111. * and used to initialize DFBs, some of which we of course keep off-screen.
  112. *
  113. \**************************************************************************/
  114. VOID vXfer4bpp( // Type FNXFER
  115. PDEV* ppdev,
  116. LONG c, // Count of rectangles, can't be zero
  117. RECTL* prcl, // List of destination rectangles, in relative
  118. // coordinates
  119. ULONG rop4, // Rop4
  120. SURFOBJ* psoSrc, // Source surface
  121. POINTL* pptlSrc, // Original unclipped source point
  122. RECTL* prclDst, // Original unclipped destination rectangle
  123. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  124. {
  125. BYTE* pjBase;
  126. LONG xOffset;
  127. LONG yOffset;
  128. LONG cjPelSize;
  129. LONG dx;
  130. LONG dy;
  131. LONG cx;
  132. LONG cy;
  133. LONG lSrcDelta;
  134. BYTE* pjSrcScan0;
  135. BYTE* pjSrc;
  136. BYTE* pjDst;
  137. LONG xSrc;
  138. LONG iLoop;
  139. BYTE jSrc;
  140. ULONG* pulXlate;
  141. ULONG ulHwMix;
  142. ULONG ulCtl;
  143. LONG i;
  144. ULONG ul;
  145. LONG xBug;
  146. LONG xAbsLeft;
  147. BOOL bHwBug;
  148. LONG cjSrc;
  149. LONG cwSrc;
  150. LONG lSrcSkip;
  151. LONG cxRem;
  152. ULONG ul0;
  153. ULONG ul1;
  154. ULONG ulBoardId;
  155. ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
  156. ASSERTDD(c > 0, "Can't handle zero rectangles");
  157. pjBase = ppdev->pjBase;
  158. xOffset = ppdev->xOffset;
  159. yOffset = ppdev->yOffset;
  160. cjPelSize = ppdev->cjPelSize;
  161. pulXlate = pxlo->pulXlate;
  162. ulBoardId = ppdev->ulBoardId;
  163. dx = pptlSrc->x - prclDst->left;
  164. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  165. lSrcDelta = psoSrc->lDelta;
  166. pjSrcScan0 = psoSrc->pvScan0;
  167. if (rop4 == 0xcccc) // SRCCOPY
  168. {
  169. ulCtl = (opcode_ILOAD + atype_RPL + blockm_OFF + pattern_OFF +
  170. transc_BG_OPAQUE + bop_SRCCOPY);
  171. }
  172. else
  173. {
  174. ulHwMix = rop4 & 0xf;
  175. ulCtl = (opcode_ILOAD + atype_RSTR + blockm_OFF + pattern_OFF +
  176. transc_BG_OPAQUE + (ulHwMix << 16));
  177. }
  178. if (ulBoardId != MGA_STORM)
  179. {
  180. if (cjPelSize >= 3)
  181. {
  182. ulCtl |= (hcprs_SRC_24_BPP | bltmod_BUCOL);
  183. xBug = 0;
  184. }
  185. else
  186. {
  187. ulCtl |= (bltmod_BFCOL);
  188. xBug = (8 >> cjPelSize); // 8bpp and 16bpp have h/w alignment bugs
  189. }
  190. }
  191. else
  192. {
  193. ulCtl |= (bltmod_BFCOL);
  194. xBug = 0;
  195. }
  196. CHECK_FIFO_SPACE(pjBase, 11);
  197. CP_WRITE(pjBase, DWG_DWGCTL, ulCtl);
  198. CP_WRITE(pjBase, DWG_SHIFT, 0);
  199. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  200. {
  201. CP_WRITE(pjBase, DWG_SGN, 0);
  202. }
  203. if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
  204. {
  205. CP_WRITE(pjBase, DWG_AR5, 0);
  206. }
  207. // The SRC0 - SRC3 registers will be trashed by the blt. AR0 will
  208. // be modified shortly:
  209. ppdev->HopeFlags = SIGN_CACHE;
  210. while(TRUE)
  211. {
  212. cx = prcl->right - prcl->left;
  213. cy = prcl->bottom - prcl->top;
  214. CP_WRITE(pjBase, DWG_FXRIGHT, xOffset + prcl->right - 1);
  215. CP_WRITE(pjBase, DWG_YDST, yOffset + prcl->top);
  216. CP_WRITE(pjBase, DWG_LEN, cy);
  217. CP_WRITE(pjBase, DWG_AR3, 0);
  218. xSrc = prcl->left + dx;
  219. pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
  220. xAbsLeft = (xOffset + prcl->left);
  221. CP_WRITE(pjBase, DWG_CXLEFT, xAbsLeft);
  222. xAbsLeft -= (xSrc & 1); // Align to start of first source byte
  223. cx += (xSrc & 1);
  224. bHwBug = (ulBoardId != MGA_STORM) && (xAbsLeft & xBug);
  225. if (!bHwBug)
  226. {
  227. CP_WRITE(pjBase, DWG_FXLEFT, xAbsLeft);
  228. CP_START(pjBase, DWG_AR0, cx - 1);
  229. }
  230. else
  231. {
  232. CP_WRITE(pjBase, DWG_FXLEFT, xAbsLeft - xBug);
  233. CP_START(pjBase, DWG_AR0, cx + xBug - 1);
  234. }
  235. cjSrc = (cx + 1) >> 1; // Number of source bytes touched
  236. lSrcSkip = lSrcDelta - cjSrc;
  237. // Make sure the MGA is ready to take the data:
  238. CHECK_FIFO_SPACE(pjBase, 32);
  239. if (cjPelSize == 1)
  240. {
  241. // This part handles 8bpp output:
  242. cwSrc = (cjSrc >> 1); // Number of whole source words
  243. do {
  244. if (bHwBug)
  245. CP_WRITE_SRC(pjBase, 0);
  246. for (i = cwSrc; i != 0; i--)
  247. {
  248. jSrc = *pjSrc++;
  249. ul = (pulXlate[jSrc >> 4]);
  250. ul |= (pulXlate[jSrc & 0xf] << 8);
  251. jSrc = *pjSrc++;
  252. ul |= (pulXlate[jSrc >> 4] << 16);
  253. ul |= (pulXlate[jSrc & 0xf] << 24);
  254. CP_WRITE_SRC(pjBase, ul);
  255. }
  256. // Handle an odd end byte, if there is one:
  257. if (cjSrc & 1)
  258. {
  259. jSrc = *pjSrc++;
  260. ul = (pulXlate[jSrc >> 4]);
  261. ul |= (pulXlate[jSrc & 0xf] << 8);
  262. CP_WRITE_SRC(pjBase, ul);
  263. }
  264. pjSrc += lSrcSkip;
  265. } while (--cy != 0);
  266. }
  267. else if (cjPelSize == 2)
  268. {
  269. // This part handles 16bpp output:
  270. do {
  271. if (bHwBug)
  272. CP_WRITE_SRC(pjBase, 0);
  273. i = cjSrc;
  274. do {
  275. jSrc = *pjSrc++;
  276. ul = (pulXlate[jSrc >> 4]);
  277. ul |= (pulXlate[jSrc & 0xf] << 16);
  278. CP_WRITE_SRC(pjBase, ul);
  279. } while (--i != 0);
  280. pjSrc += lSrcSkip;
  281. } while (--cy != 0);
  282. }
  283. else if (cjPelSize == 4)
  284. {
  285. cjSrc = cx >> 1; // Number of whole source bytes touched
  286. cxRem = cx & 1;
  287. // This part handles 32bpp output:
  288. do {
  289. if (bHwBug)
  290. CP_WRITE_SRC(pjBase, 0);
  291. i = cjSrc;
  292. while (i--) // may be 0
  293. {
  294. jSrc = *pjSrc++;
  295. ul = (pulXlate[jSrc >> 4]);
  296. CP_WRITE_SRC(pjBase, ul);
  297. ul = (pulXlate[jSrc & 0xf]);
  298. CP_WRITE_SRC(pjBase, ul);
  299. }
  300. if (cxRem)
  301. {
  302. jSrc = *pjSrc++;
  303. ul = (pulXlate[jSrc >> 4]);
  304. CP_WRITE_SRC(pjBase, ul);
  305. }
  306. pjSrc += lSrcSkip;
  307. } while (--cy != 0);
  308. }
  309. else
  310. {
  311. // This part handles packed 24bpp output:
  312. ASSERTDD(!bHwBug, "There is no hardware bug when higher than 16bpp");
  313. cwSrc = (cx >> 2); // Number of whole source words
  314. cxRem = (cx & 3);
  315. if (cxRem == 3)
  316. {
  317. // Merge this case into the whole word case:
  318. cwSrc++;
  319. cxRem = 0;
  320. }
  321. do {
  322. for (i = cwSrc; i != 0; i--)
  323. {
  324. jSrc = *pjSrc++;
  325. ul0 = (pulXlate[jSrc >> 4]);
  326. ul1 = (pulXlate[jSrc & 0xf]);
  327. ul = ul0 | (ul1 << 24);
  328. CP_WRITE_SRC(pjBase, ul);
  329. jSrc = *pjSrc++;
  330. ul0 = (pulXlate[jSrc >> 4]);
  331. ul = (ul1 >> 8) | (ul0 << 16);
  332. CP_WRITE_SRC(pjBase, ul);
  333. ul1 = (pulXlate[jSrc & 0xf]);
  334. ul = (ul1 << 8) | (ul0 >> 16);
  335. CP_WRITE_SRC(pjBase, ul);
  336. }
  337. if (cxRem > 0)
  338. {
  339. jSrc = *pjSrc++;
  340. ul0 = (pulXlate[jSrc >> 4]);
  341. ul1 = (pulXlate[jSrc & 0xf]);
  342. ul = ul0 | (ul1 << 24);
  343. CP_WRITE_SRC(pjBase, ul);
  344. if (cxRem > 1)
  345. {
  346. ul = (ul1 >> 8);
  347. CP_WRITE_SRC(pjBase, ul);
  348. }
  349. }
  350. pjSrc += lSrcSkip;
  351. } while (--cy != 0);
  352. }
  353. if (--c == 0)
  354. {
  355. // Restore the clipping:
  356. CHECK_FIFO_SPACE(pjBase, 1);
  357. CP_WRITE(pjBase, DWG_CXLEFT, 0);
  358. return;
  359. }
  360. prcl++;
  361. CHECK_FIFO_SPACE(pjBase, 7);
  362. }
  363. }
  364. /******************************Public*Routine******************************\
  365. * VOID vXfer8bpp
  366. *
  367. * Does a 8bpp transfer from a bitmap to the screen.
  368. *
  369. * The reason we implement this is that a lot of resources are kept as 8bpp,
  370. * and used to initialize DFBs, some of which we of course keep off-screen.
  371. *
  372. \**************************************************************************/
  373. VOID vXfer8bpp( // Type FNXFER
  374. PDEV* ppdev,
  375. LONG c, // Count of rectangles, can't be zero
  376. RECTL* prcl, // List of destination rectangles, in relative
  377. // coordinates
  378. ULONG rop4, // Rop4
  379. SURFOBJ* psoSrc, // Source surface
  380. POINTL* pptlSrc, // Original unclipped source point
  381. RECTL* prclDst, // Original unclipped destination rectangle
  382. XLATEOBJ* pxlo) // Translate that provides colour-expansion information
  383. {
  384. BYTE* pjBase;
  385. LONG xOffset;
  386. LONG yOffset;
  387. LONG cjPelSize;
  388. LONG dx;
  389. LONG dy;
  390. LONG cx;
  391. LONG cy;
  392. LONG lSrcDelta;
  393. BYTE* pjSrcScan0;
  394. BYTE* pjSrc;
  395. BYTE* pjDst;
  396. LONG xSrc;
  397. LONG iLoop;
  398. ULONG* pulXlate;
  399. ULONG ulHwMix;
  400. ULONG ulCtl;
  401. LONG i;
  402. ULONG ul;
  403. LONG xBug;
  404. LONG xAbsLeft;
  405. BOOL bHwBug;
  406. LONG cwSrc;
  407. LONG cdSrc;
  408. LONG lSrcSkip;
  409. LONG cxRem;
  410. ULONG ul0;
  411. ULONG ul1;
  412. ULONG ulBoardId;
  413. ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp");
  414. ASSERTDD(c > 0, "Can't handle zero rectangles");
  415. ASSERTDD(pxlo->pulXlate != NULL, "Must be a translate");
  416. pjBase = ppdev->pjBase;
  417. xOffset = ppdev->xOffset;
  418. yOffset = ppdev->yOffset;
  419. cjPelSize = ppdev->cjPelSize;
  420. pulXlate = pxlo->pulXlate;
  421. ulBoardId = ppdev->ulBoardId;
  422. dx = pptlSrc->x - prclDst->left;
  423. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  424. lSrcDelta = psoSrc->lDelta;
  425. pjSrcScan0 = psoSrc->pvScan0;
  426. if (rop4 == 0xcccc) // SRCCOPY
  427. {
  428. ulCtl = (opcode_ILOAD + atype_RPL + blockm_OFF + pattern_OFF +
  429. transc_BG_OPAQUE + bop_SRCCOPY);
  430. }
  431. else
  432. {
  433. ulHwMix = rop4 & 0xf;
  434. ulCtl = (opcode_ILOAD + atype_RSTR + blockm_OFF + pattern_OFF +
  435. transc_BG_OPAQUE + (ulHwMix << 16));
  436. }
  437. if (ulBoardId != MGA_STORM)
  438. {
  439. if (cjPelSize >= 3)
  440. {
  441. ulCtl |= (hcprs_SRC_24_BPP | bltmod_BUCOL);
  442. xBug = 0;
  443. }
  444. else
  445. {
  446. ulCtl |= (bltmod_BFCOL);
  447. xBug = (8 >> cjPelSize); // 8bpp and 16bpp have h/w alignment bugs
  448. }
  449. }
  450. else
  451. {
  452. ulCtl |= (bltmod_BFCOL);
  453. xBug = 0;
  454. }
  455. CHECK_FIFO_SPACE(pjBase, 11);
  456. CP_WRITE(pjBase, DWG_DWGCTL, ulCtl);
  457. CP_WRITE(pjBase, DWG_SHIFT, 0);
  458. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  459. {
  460. CP_WRITE(pjBase, DWG_SGN, 0);
  461. }
  462. if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
  463. {
  464. CP_WRITE(pjBase, DWG_AR5, 0);
  465. }
  466. // The SRC0 - SRC3 registers will be trashed by the blt. AR0 will
  467. // be modified shortly:
  468. ppdev->HopeFlags = SIGN_CACHE;
  469. while(TRUE)
  470. {
  471. cx = prcl->right - prcl->left;
  472. cy = prcl->bottom - prcl->top;
  473. CP_WRITE(pjBase, DWG_FXRIGHT, xOffset + prcl->right - 1);
  474. CP_WRITE(pjBase, DWG_YDST, yOffset + prcl->top);
  475. CP_WRITE(pjBase, DWG_LEN, cy);
  476. CP_WRITE(pjBase, DWG_AR3, 0);
  477. xSrc = prcl->left + dx;
  478. pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + xSrc;
  479. xAbsLeft = (xOffset + prcl->left);
  480. bHwBug = (ulBoardId != MGA_STORM) && (xAbsLeft & xBug);
  481. if (!bHwBug)
  482. {
  483. CP_WRITE(pjBase, DWG_FXLEFT, xAbsLeft);
  484. CP_START(pjBase, DWG_AR0, cx - 1);
  485. }
  486. else
  487. {
  488. CP_WRITE(pjBase, DWG_CXLEFT, xAbsLeft);
  489. CP_WRITE(pjBase, DWG_FXLEFT, xAbsLeft - xBug);
  490. CP_START(pjBase, DWG_AR0, cx + xBug - 1);
  491. }
  492. lSrcSkip = lSrcDelta - cx;
  493. // Make sure the MGA is ready to take the data:
  494. CHECK_FIFO_SPACE(pjBase, 32);
  495. if (cjPelSize == 1)
  496. {
  497. // This part handles 8bpp output:
  498. cdSrc = (cx >> 2);
  499. cxRem = (cx & 3);
  500. do {
  501. if (bHwBug)
  502. CP_WRITE_SRC(pjBase, 0);
  503. for (i = cdSrc; i != 0; i--)
  504. {
  505. ul = (pulXlate[*pjSrc++]);
  506. ul |= (pulXlate[*pjSrc++] << 8);
  507. ul |= (pulXlate[*pjSrc++] << 16);
  508. ul |= (pulXlate[*pjSrc++] << 24);
  509. CP_WRITE_SRC(pjBase, ul);
  510. }
  511. if (cxRem > 0)
  512. {
  513. ul = (pulXlate[*pjSrc++]);
  514. if (cxRem > 1)
  515. {
  516. ul |= (pulXlate[*pjSrc++] << 8);
  517. if (cxRem > 2)
  518. {
  519. ul |= (pulXlate[*pjSrc++] << 16);
  520. }
  521. }
  522. CP_WRITE_SRC(pjBase, ul);
  523. }
  524. pjSrc += lSrcSkip;
  525. } while (--cy != 0);
  526. }
  527. else if (cjPelSize == 2)
  528. {
  529. // This part handles 16bpp output:
  530. cwSrc = (cx >> 1);
  531. cxRem = (cx & 1);
  532. do {
  533. if (bHwBug)
  534. CP_WRITE_SRC(pjBase, 0);
  535. for (i = cwSrc; i != 0; i--)
  536. {
  537. ul = (pulXlate[*pjSrc++]);
  538. ul |= (pulXlate[*pjSrc++] << 16);
  539. CP_WRITE_SRC(pjBase, ul);
  540. }
  541. if (cxRem > 0)
  542. {
  543. ul = (pulXlate[*pjSrc++]);
  544. CP_WRITE_SRC(pjBase, ul);
  545. }
  546. pjSrc += lSrcSkip;
  547. } while (--cy != 0);
  548. }
  549. else if (cjPelSize == 4)
  550. {
  551. // This part handles 32bpp output:
  552. cdSrc = cx;
  553. do {
  554. if (bHwBug)
  555. CP_WRITE_SRC(pjBase, 0);
  556. for (i = cdSrc; i != 0; i--)
  557. {
  558. ul = (pulXlate[*pjSrc++]);
  559. CP_WRITE_SRC(pjBase, ul);
  560. }
  561. pjSrc += lSrcSkip;
  562. } while (--cy != 0);
  563. }
  564. else
  565. {
  566. // This part handles packed 24bpp output:
  567. ASSERTDD(!bHwBug, "There is no hardware bug when higher than 16bpp");
  568. cdSrc = (cx >> 2);
  569. cxRem = (cx & 3);
  570. do {
  571. for (i = cdSrc; i != 0; i--)
  572. {
  573. ul0 = (pulXlate[*pjSrc++]);
  574. ul1 = (pulXlate[*pjSrc++]);
  575. ul = ul0 | (ul1 << 24);
  576. CP_WRITE_SRC(pjBase, ul);
  577. ul0 = (pulXlate[*pjSrc++]);
  578. ul = (ul1 >> 8) | (ul0 << 16);
  579. CP_WRITE_SRC(pjBase, ul);
  580. ul1 = (pulXlate[*pjSrc++]);
  581. ul = (ul1 << 8) | (ul0 >> 16);
  582. CP_WRITE_SRC(pjBase, ul);
  583. }
  584. if (cxRem > 0)
  585. {
  586. ul0 = (pulXlate[*pjSrc++]);
  587. ul = ul0;
  588. if (cxRem > 1)
  589. {
  590. ul1 = (pulXlate[*pjSrc++]);
  591. ul |= (ul1 << 24);
  592. CP_WRITE_SRC(pjBase, ul);
  593. ul = (ul1 >> 8);
  594. if (cxRem > 2)
  595. {
  596. ul0 = (pulXlate[*pjSrc++]);
  597. ul |= (ul0 << 16);
  598. CP_WRITE_SRC(pjBase, ul);
  599. ul = (ul0 >> 16);
  600. }
  601. }
  602. CP_WRITE_SRC(pjBase, ul);
  603. }
  604. pjSrc += lSrcSkip;
  605. } while (--cy != 0);
  606. }
  607. if (bHwBug)
  608. {
  609. // Restore the clipping:
  610. CHECK_FIFO_SPACE(pjBase, 1);
  611. CP_WRITE(pjBase, DWG_CXLEFT, 0);
  612. }
  613. if (--c == 0)
  614. return;
  615. prcl++;
  616. CHECK_FIFO_SPACE(pjBase, 7);
  617. }
  618. }
  619. /******************************Public*Routine******************************\
  620. * VOID vXferNative
  621. *
  622. * Transfers a bitmap that is the same colour depth as the display to
  623. * the screen via the data transfer register, with no translation.
  624. *
  625. \**************************************************************************/
  626. VOID vXferNative( // Type FNXFER
  627. PDEV* ppdev,
  628. LONG c, // Count of rectangles, can't be zero
  629. RECTL* prcl, // Array of relative coordinates destination rectangles
  630. ULONG rop4, // Rop4
  631. SURFOBJ* psoSrc, // Source surface
  632. POINTL* pptlSrc, // Original unclipped source point
  633. RECTL* prclDst, // Original unclipped destination rectangle
  634. XLATEOBJ* pxlo) // Not used
  635. {
  636. BYTE* pjBase;
  637. LONG xOffset;
  638. LONG yOffset;
  639. LONG cjPel;
  640. LONG dx;
  641. LONG dy;
  642. BYTE* pjSrcScan0;
  643. LONG lSrcDelta;
  644. ULONG ulCtl;
  645. ULONG ulHwMix;
  646. LONG yTop;
  647. LONG xLeft;
  648. LONG xAbsLeft;
  649. LONG xBug;
  650. BOOL bHwBug;
  651. LONG xRight;
  652. LONG cy;
  653. LONG xOriginalLeft;
  654. BYTE* pjSrc;
  655. LONG cdSrc;
  656. ULONG ulBoardId;
  657. pjBase = ppdev->pjBase;
  658. xOffset = ppdev->xOffset;
  659. yOffset = ppdev->yOffset;
  660. cjPel = ppdev->cjPelSize;
  661. ulBoardId = ppdev->ulBoardId;
  662. dx = pptlSrc->x - prclDst->left;
  663. dy = pptlSrc->y - prclDst->top;
  664. pjSrcScan0 = psoSrc->pvScan0;
  665. lSrcDelta = psoSrc->lDelta;
  666. if (rop4 == 0xcccc) // SRCCOPY
  667. {
  668. ulCtl = (opcode_ILOAD + atype_RPL + blockm_OFF + pattern_OFF +
  669. transc_BG_OPAQUE + bop_SRCCOPY);
  670. }
  671. else
  672. {
  673. ulHwMix = rop4 & 0xf;
  674. ulCtl = (opcode_ILOAD + atype_RSTR + blockm_OFF + pattern_OFF +
  675. transc_BG_OPAQUE + (ulHwMix << 16));
  676. }
  677. if ((ulBoardId != MGA_STORM) && (ppdev->iBitmapFormat == BMF_24BPP))
  678. {
  679. ulCtl |= (hcprs_SRC_24_BPP | bltmod_BUCOL);
  680. }
  681. else
  682. {
  683. ulCtl |= (bltmod_BFCOL);
  684. }
  685. CHECK_FIFO_SPACE(pjBase, 11);
  686. CP_WRITE(pjBase, DWG_DWGCTL, ulCtl);
  687. CP_WRITE(pjBase, DWG_SHIFT, 0);
  688. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  689. {
  690. CP_WRITE(pjBase, DWG_SGN, 0);
  691. }
  692. if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
  693. {
  694. CP_WRITE(pjBase, DWG_AR5, 0);
  695. }
  696. // The SRC0 - SRC3 registers will be trashed by the blt. AR0 will
  697. // be modified shortly:
  698. ppdev->HopeFlags = SIGN_CACHE;
  699. while (TRUE)
  700. {
  701. yTop = prcl->top;
  702. cy = prcl->bottom - yTop;
  703. xRight = prcl->right;
  704. xLeft = prcl->left;
  705. xOriginalLeft = xLeft;
  706. // Adjust the destination so that the source is dword aligned.
  707. // Note that this works at 24bpp (but is less restrictive than
  708. // it could be at 16bpp):
  709. xLeft -= (xLeft + dx) & 3;
  710. // Since we're using hardware clipping, the start is always
  711. // dword aligned:
  712. pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) * cjPel);
  713. cdSrc = ((xRight - xLeft) * cjPel + 3) >> 2;
  714. CP_WRITE(pjBase, DWG_FXRIGHT, xOffset + xRight - 1);
  715. CP_WRITE(pjBase, DWG_YDST, yOffset + yTop);
  716. CP_WRITE(pjBase, DWG_LEN, cy);
  717. CP_WRITE(pjBase, DWG_AR3, 0);
  718. xAbsLeft = (xOffset + xLeft);
  719. xBug = (8 >> cjPel); // 4 for 8bpp, 2 for 16bpp
  720. bHwBug = (ulBoardId != MGA_STORM) && (xAbsLeft & xBug) && (cjPel < 3);
  721. if (!bHwBug) // 24bpp doesn't have h/w bug
  722. {
  723. // Don't have to work-around the hardware bug:
  724. if (xLeft != xOriginalLeft)
  725. {
  726. // Since we always dword align the source by adjusting
  727. // the destination rectangle, we may have to set the clip
  728. // register to compensate:
  729. CP_WRITE(pjBase, DWG_CXLEFT, xOffset + xOriginalLeft);
  730. }
  731. CP_WRITE(pjBase, DWG_FXLEFT, xAbsLeft);
  732. CP_START(pjBase, DWG_AR0, xRight - xLeft - 1);
  733. // Make sure the MGA is ready to take the data:
  734. CHECK_FIFO_SPACE(pjBase, 32);
  735. do {
  736. DATA_TRANSFER(pjBase, pjSrc, cdSrc);
  737. pjSrc += lSrcDelta;
  738. } while (--cy != 0);
  739. if (xLeft != xOriginalLeft)
  740. {
  741. CHECK_FIFO_SPACE(pjBase, 1);
  742. CP_WRITE(pjBase, DWG_CXLEFT, 0);
  743. }
  744. }
  745. else
  746. {
  747. // Work-around the hardware bug:
  748. CP_WRITE(pjBase, DWG_CXLEFT, xOffset + xOriginalLeft);
  749. CP_WRITE(pjBase, DWG_FXLEFT, xAbsLeft - xBug);
  750. CP_START(pjBase, DWG_AR0, xRight - xLeft + xBug - 1);
  751. // Make sure the MGA is ready to take the data:
  752. CHECK_FIFO_SPACE(pjBase, 32);
  753. do {
  754. DATA_TRANSFER(pjBase, pjSrc, 1); // Account for h/w bug
  755. DATA_TRANSFER(pjBase, pjSrc, cdSrc);
  756. pjSrc += lSrcDelta;
  757. } while (--cy != 0);
  758. CHECK_FIFO_SPACE(pjBase, 1);
  759. CP_WRITE(pjBase, DWG_CXLEFT, 0);
  760. }
  761. if (--c == 0)
  762. break;
  763. prcl++;
  764. CHECK_FIFO_SPACE(pjBase, 7);
  765. }
  766. }