Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

764 lines
24 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: blt16.c
  3. *
  4. * This module contains the low-level blt functions that are specific to
  5. * 16bpp.
  6. *
  7. * Copyright (c) 1992-1996 Microsoft Corporation
  8. * Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
  9. \**************************************************************************/
  10. #include "precomp.h"
  11. /******************************Public*Routine******************************\
  12. * VOID vMgaPatRealize16bpp
  13. *
  14. \**************************************************************************/
  15. VOID vMgaPatRealize16bpp(
  16. PDEV* ppdev,
  17. RBRUSH* prb)
  18. {
  19. BYTE* pjBase;
  20. BRUSHENTRY* pbe;
  21. LONG iBrushCache;
  22. LONG i;
  23. ULONG* pulSrc;
  24. pjBase = ppdev->pjBase;
  25. // We have to allocate a new off-screen cache brush entry for
  26. // the brush:
  27. iBrushCache = ppdev->iBrushCache;
  28. pbe = &ppdev->pbe[iBrushCache];
  29. iBrushCache++;
  30. if (iBrushCache >= ppdev->cBrushCache)
  31. iBrushCache = 0;
  32. ppdev->iBrushCache = iBrushCache;
  33. // Update our links:
  34. pbe->prbVerify = prb;
  35. prb->apbe[IBOARD(ppdev)] = pbe;
  36. CHECK_FIFO_SPACE(pjBase, 11);
  37. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + blockm_OFF +
  38. bop_SRCCOPY + bltmod_BFCOL + pattern_OFF +
  39. transc_BG_OPAQUE));
  40. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  41. {
  42. CP_WRITE(pjBase, DWG_SGN, 0);
  43. }
  44. // The SRC0 - SRC3 registers will be trashed by the blt:
  45. ppdev->HopeFlags = SIGN_CACHE;
  46. // Since our brushes are always interleaved, we want to send down
  47. // 2 pels, skip 2 pels, send down 2 pels, etc. So we contrive to
  48. // adjust the blt width and pitch to do that automatically for us:
  49. CP_WRITE(pjBase, DWG_AR3, 0); // Source start address, not
  50. // included in ARX_CACHE
  51. CP_WRITE(pjBase, DWG_SHIFT, 0);
  52. CP_WRITE(pjBase, DWG_LEN, 8); // Transfering 8 scans
  53. CP_WRITE(pjBase, DWG_AR0, 15); // Source width is 16
  54. CP_WRITE(pjBase, DWG_AR5, 32); // Source pitch is 32
  55. CP_WRITE(pjBase, DWG_FXLEFT, pbe->ulLeft);
  56. CP_WRITE(pjBase, DWG_FXRIGHT, pbe->ulLeft + 15);
  57. CP_WRITE(pjBase, DWG_YDST, pbe->ulYDst);
  58. CP_START(pjBase, DWG_PITCH, 32 + ylin_LINEARIZE_NOT);
  59. CHECK_FIFO_SPACE(pjBase, 32); // Make sure MGA is ready
  60. for (pulSrc = prb->aulPattern, i = 8; i != 0; i--, pulSrc += 4)
  61. {
  62. CP_WRITE_SRC(pjBase, *(pulSrc));
  63. CP_WRITE_SRC(pjBase, *(pulSrc + 1));
  64. CP_WRITE_SRC(pjBase, *(pulSrc + 2));
  65. CP_WRITE_SRC(pjBase, *(pulSrc + 3));
  66. // Repeat the brush's scan, because the off-screen pattern has to
  67. // be 16 x 8:
  68. CP_WRITE_SRC(pjBase, *(pulSrc));
  69. CP_WRITE_SRC(pjBase, *(pulSrc + 1));
  70. CP_WRITE_SRC(pjBase, *(pulSrc + 2));
  71. CP_WRITE_SRC(pjBase, *(pulSrc + 3));
  72. }
  73. // Don't forget to restore the pitch:
  74. CHECK_FIFO_SPACE(pjBase, 1);
  75. CP_WRITE(pjBase, DWG_PITCH, ppdev->cxMemory);
  76. }
  77. /******************************Public*Routine******************************\
  78. * VOID vMgaFillPat16bpp
  79. *
  80. \**************************************************************************/
  81. VOID vMgaFillPat16bpp( // Type FNFILL
  82. PDEV* ppdev,
  83. LONG c, // Can't be zero
  84. RECTL* prcl, // List of rectangles to be filled, in relative
  85. // coordinates
  86. ULONG rop4, // Rop4
  87. RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
  88. POINTL* pptlBrush) // Pattern alignment
  89. {
  90. BYTE* pjBase;
  91. BRUSHENTRY* pbe;
  92. LONG xOffset;
  93. LONG yOffset;
  94. ULONG ulHwMix;
  95. LONG yTop;
  96. LONG xLeft;
  97. LONG xBrush;
  98. LONG yBrush;
  99. LONG cx;
  100. LONG cy;
  101. ULONG ulAr3;
  102. ULONG ulAr0;
  103. CHAR cFifo;
  104. LONG xAlign;
  105. LONG cxThis;
  106. ASSERTDD(!(rbc.prb->fl & RBRUSH_2COLOR), "Can't do 2 colour brushes here");
  107. ASSERTDD((rbc.prb != NULL) && (rbc.prb->apbe[IBOARD(ppdev)] != NULL),
  108. "apbe[iBoard] should be initialized to &beUnrealizedBrush");
  109. // We have to ensure that no other brush took our spot in off-screen
  110. // memory, or we might have to realize the brush for the first time:
  111. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  112. if (pbe->prbVerify != rbc.prb)
  113. {
  114. vMgaPatRealize16bpp(ppdev, rbc.prb);
  115. pbe = rbc.prb->apbe[IBOARD(ppdev)];
  116. }
  117. pjBase = ppdev->pjBase;
  118. xOffset = ppdev->xOffset;
  119. yOffset = ppdev->yOffset;
  120. do {
  121. cFifo = GET_FIFO_SPACE(pjBase) - 4;
  122. } while (cFifo < 0);
  123. if (rop4 == 0xf0f0) // PATCOPY
  124. {
  125. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RPL + blockm_OFF +
  126. trans_0 + bltmod_BFCOL + pattern_ON +
  127. transc_BG_OPAQUE + bop_SRCCOPY));
  128. }
  129. else
  130. {
  131. ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
  132. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RSTR + blockm_OFF +
  133. trans_0 + bltmod_BFCOL + pattern_ON +
  134. transc_BG_OPAQUE + (ulHwMix << 16)));
  135. }
  136. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  137. {
  138. CP_WRITE(pjBase, DWG_SGN, 0);
  139. }
  140. ppdev->HopeFlags = SIGN_CACHE;
  141. CP_WRITE(pjBase, DWG_SHIFT, 0);
  142. CP_WRITE(pjBase, DWG_AR5, 32);
  143. while (TRUE)
  144. {
  145. // We must be careful here, there are some hardware limitations. We
  146. // must check the width and the alignment of the blt to decide how to
  147. // slice the operation along X. Here are the limitations:
  148. //
  149. // - if the destination is aligned on a 16-pel address, then we are
  150. // limited to 16-pel wide slices;
  151. // - if the destination is not aligned on a 16-pel address, then we
  152. // are limited to 8-pel wide slices.
  153. //
  154. // This means that if the width is 8 or less, we can do it right away;
  155. // if not, then we must first do one or two slices limited to 8 pels,
  156. // then a bunch of 16-pel slices, and maybe a last slice to complete
  157. // the blt.
  158. yTop = prcl->top;
  159. xLeft = prcl->left;
  160. cy = prcl->bottom - yTop;
  161. cx = prcl->right - xLeft;
  162. xBrush = (xLeft - pptlBrush->x) & 7;
  163. yBrush = (yTop - pptlBrush->y) & 7;
  164. ulAr3 = pbe->ulLinear + (yBrush << 5) + xBrush;
  165. ulAr0 = pbe->ulLinear + (yBrush << 5) + 15;
  166. xLeft += xOffset; // Convert to absolute coordinates
  167. yTop += yOffset;
  168. if (cx > 8)
  169. {
  170. xAlign = (xLeft & 7);
  171. if (xAlign != 0)
  172. {
  173. cFifo -= 6;
  174. while (cFifo < 0)
  175. {
  176. cFifo = GET_FIFO_SPACE(pjBase) - 6;
  177. }
  178. cxThis = 8 - xAlign;
  179. CP_WRITE(pjBase, DWG_AR3, ulAr3);
  180. CP_WRITE(pjBase, DWG_AR0, ulAr0);
  181. CP_WRITE(pjBase, DWG_LEN, cy);
  182. CP_WRITE(pjBase, DWG_YDST, yTop);
  183. CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
  184. CP_START(pjBase, DWG_FXRIGHT, xLeft + cxThis - 1);
  185. xLeft += cxThis;
  186. cx -= cxThis;
  187. ulAr3 = ulAr0 - 15 + ((ulAr3 + cxThis) & 7);
  188. }
  189. if (cx > 8)
  190. {
  191. if (xLeft & 15)
  192. {
  193. cFifo -= 6;
  194. while (cFifo < 0)
  195. {
  196. cFifo = GET_FIFO_SPACE(pjBase) - 6;
  197. }
  198. CP_WRITE(pjBase, DWG_AR3, ulAr3);
  199. CP_WRITE(pjBase, DWG_AR0, ulAr0);
  200. CP_WRITE(pjBase, DWG_LEN, cy);
  201. CP_WRITE(pjBase, DWG_YDST, yTop);
  202. CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
  203. CP_START(pjBase, DWG_FXRIGHT, xLeft + 7);
  204. xLeft += 8;
  205. cx -= 8;
  206. }
  207. while (cx > 16)
  208. {
  209. cFifo -= 6;
  210. while (cFifo < 0)
  211. {
  212. cFifo = GET_FIFO_SPACE(pjBase) - 6;
  213. }
  214. CP_WRITE(pjBase, DWG_AR3, ulAr3);
  215. CP_WRITE(pjBase, DWG_AR0, ulAr0);
  216. CP_WRITE(pjBase, DWG_LEN, cy);
  217. CP_WRITE(pjBase, DWG_YDST, yTop);
  218. CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
  219. CP_START(pjBase, DWG_FXRIGHT, xLeft + 15);
  220. xLeft += 16;
  221. cx -= 16;
  222. }
  223. }
  224. }
  225. // Do the final strip:
  226. cFifo -= 6;
  227. while (cFifo < 0)
  228. {
  229. cFifo = GET_FIFO_SPACE(pjBase) - 6;
  230. }
  231. CP_WRITE(pjBase, DWG_AR3, ulAr3);
  232. CP_WRITE(pjBase, DWG_AR0, ulAr0);
  233. CP_WRITE(pjBase, DWG_LEN, cy);
  234. CP_WRITE(pjBase, DWG_YDST, yTop);
  235. CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
  236. CP_START(pjBase, DWG_FXRIGHT, xLeft + cx - 1);
  237. if (--c == 0)
  238. break;
  239. prcl++;
  240. }
  241. }
  242. /******************************Public*Routine******************************\
  243. * VOID vMgaGet16bppSliceFromScreen
  244. *
  245. * Get a limited number of pels from the screen and make sure that the
  246. * transfer went OK. This assumes that the IDUMP is almost fully set up,
  247. * and that a number of dwords must be jumped over at the end of each
  248. * destination scanline.
  249. *
  250. \**************************************************************************/
  251. VOID vMgaGet16bppSliceFromScreen(
  252. PDEV* ppdev, // pdev
  253. ULONG ulSSA, // Source start address for current slice
  254. ULONG ulSEA, // Source end address for current slice
  255. ULONG ulLen, // Nb of scanlines in current slice
  256. LONG NbDWordsPerScan,// Nb of dwords to be read in each scanline
  257. LONG lPreDWordBytes, // Nb bytes before any dword on a scan
  258. LONG lDWords, // Nb dwords to be moved on a scan
  259. LONG lPostDWordBytes,// Nb bytes after all dwords on a scan
  260. LONG lDestDelta, // Increment to get from one dest scan to the next
  261. BYTE bPreShift, // Shift to align first byte to be stored
  262. ULONG** ppulDest) // Ptr to where to store the first dword we read
  263. {
  264. BYTE* pjBase;
  265. ULONG temp, HstStatus, AbortCnt;
  266. ULONG* pulDest;
  267. ULONG* locpulDest;
  268. ULONG* pDMAWindow;
  269. LONG i, TotalDWords, locTotalDWords;
  270. BYTE* pbDest;
  271. pjBase = ppdev->pjBase;
  272. AbortCnt = 1000;
  273. pDMAWindow = (ULONG*) (ppdev->pjBase + DMAWND);
  274. // We want to stop reading just before the last dword is read.
  275. TotalDWords = (NbDWordsPerScan * ulLen) - 1;
  276. do {
  277. CHECK_FIFO_SPACE(pjBase, 3);
  278. // This is where we'll start storing data.
  279. pulDest = *ppulDest;
  280. // Complete the IDUMP setup.
  281. CP_WRITE(pjBase, DWG_AR3, ulSSA);
  282. CP_WRITE(pjBase, DWG_AR0, ulSEA);
  283. // Turn the pseudoDMA on.
  284. BLT_READ_ON(ppdev, pjBase);
  285. CP_START(pjBase, DWG_LEN, ulLen);
  286. // Make sure the setup is complete.
  287. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  288. if (TotalDWords)
  289. {
  290. // There is at least one dword left to be read.
  291. // Make a copy so that we can play with it.
  292. locTotalDWords = TotalDWords;
  293. do {
  294. // Make a copy for updating to the next scan.
  295. locpulDest = pulDest;
  296. if (lPreDWordBytes)
  297. {
  298. // There are pixels to be stored as bytes.
  299. // Read 2 pixels and shift them into place.
  300. locTotalDWords--;
  301. temp = CP_READ_DMA(ppdev, pDMAWindow);
  302. temp &= ppdev->ulPlnWt;
  303. temp >>= bPreShift;
  304. pbDest = (BYTE*)pulDest;
  305. for (i = 0; i < lPreDWordBytes; i++)
  306. {
  307. *pbDest = (BYTE) temp;
  308. temp >>= 8;
  309. pbDest++;
  310. }
  311. pulDest = (ULONG*)pbDest;
  312. if (locTotalDWords == 0)
  313. {
  314. // This was the end of the current slice.
  315. // Exit the do-while loop.
  316. if ((NbDWordsPerScan == 1) && (lPreDWordBytes != 0))
  317. {
  318. // Since it was a narrow slice, the next read
  319. // goes on the next scan, so add in the delta:
  320. (UCHAR*) pulDest = (UCHAR*) locpulDest + lDestDelta;
  321. pbDest = (UCHAR*) pulDest;
  322. }
  323. break;
  324. }
  325. }
  326. // We should be dword-aligned in the destination now.
  327. // Copy a number of full dwords from the current scanline.
  328. for (i = 0; i < lDWords; i++)
  329. {
  330. temp = CP_READ_DMA(ppdev, pDMAWindow);
  331. *pulDest++ = temp & ppdev->ulPlnWt;
  332. }
  333. // We're left with this many dwords to be read.
  334. locTotalDWords -= lDWords;
  335. if (locTotalDWords != 0)
  336. {
  337. // This was not the last scanline, so we must read a
  338. // possibly partial dword to end this scan.
  339. if (lPostDWordBytes)
  340. {
  341. // There are pixels to be stored as bytes.
  342. locTotalDWords--;
  343. temp = CP_READ_DMA(ppdev, pDMAWindow);
  344. temp &= ppdev->ulPlnWt;
  345. if (lPostDWordBytes == 4)
  346. {
  347. *pulDest = temp;
  348. }
  349. else
  350. {
  351. pbDest = (BYTE*)pulDest;
  352. *pbDest = (BYTE)temp;
  353. pbDest++;
  354. temp >>= 8;
  355. *pbDest = (BYTE)temp;
  356. }
  357. }
  358. // We should be done with this scan.
  359. // We're done with the current scan, go to the next one.
  360. (UCHAR*) pulDest = (UCHAR*) locpulDest + lDestDelta;
  361. }
  362. } while (locTotalDWords > 0);
  363. }
  364. // Check for the EngineBusy flag.
  365. for (i = 0; i < 7; i++)
  366. {
  367. HstStatus = CP_READ_STATUS(pjBase);
  368. }
  369. if (HstStatus &= (dwgengsts_MASK >> 16))
  370. {
  371. // The drawing engine is still busy, while it should not be:
  372. // there was a problem with this slice.
  373. // Empty the DMA window.
  374. do {
  375. CP_READ_DMA(ppdev, pDMAWindow);
  376. // Check for the EngineBusy flag. If the engine is still
  377. // busy, then we'll have to read another dword.
  378. for (i = 0; i < 7; i++)
  379. {
  380. temp = CP_READ_STATUS(pjBase);
  381. }
  382. } while (temp & (dwgengsts_MASK >> 16));
  383. // The DMA window should now be empty.
  384. // We cannot check the HST_STATUS two lower bytes anymore,
  385. // so this is new.
  386. if (--AbortCnt > 0)
  387. {
  388. // Signal we'll have to do this again.
  389. HstStatus = 1;
  390. }
  391. else
  392. {
  393. // We tried hard enough, desist.
  394. HstStatus = 0;
  395. }
  396. }
  397. // The last dword to be read should be available now.
  398. temp = CP_READ_DMA(ppdev, pDMAWindow);
  399. temp &= ppdev->ulPlnWt;
  400. // We must take some care so as not to write after the end of the
  401. // destination bitmap.
  402. pbDest = (BYTE*)pulDest;
  403. if ((NbDWordsPerScan == 1) && (lPreDWordBytes != 0))
  404. {
  405. // The X extent was smaller than 2.
  406. for (i = 0; i < lPreDWordBytes; i++)
  407. {
  408. *pbDest = (BYTE)temp;
  409. pbDest++;
  410. temp >>= 8;
  411. }
  412. }
  413. else if (lPostDWordBytes > 0)
  414. {
  415. // There are pixels to be stored as bytes.
  416. if (lPostDWordBytes == 4)
  417. {
  418. // We can store a dword.
  419. *pulDest = temp;
  420. }
  421. else
  422. {
  423. *pbDest = (BYTE)temp;
  424. pbDest++;
  425. temp >>= 8;
  426. *pbDest = (BYTE)temp;
  427. }
  428. }
  429. else
  430. {
  431. // Store the last dword.
  432. *pulDest = temp;
  433. }
  434. // Turn the pseudoDMA off.
  435. BLT_READ_OFF(ppdev, pjBase);
  436. // Redo the whole thing if there was a problem with this slice.
  437. } while (HstStatus);
  438. // Update the destination pointer for the calling routine.
  439. *ppulDest += ((ulLen * lDestDelta) / sizeof(ULONG));
  440. }
  441. /******************************Public*Routine******************************\
  442. * VOID vMgaGetBits16bpp
  443. *
  444. * Reads the bits from the screen at 16bpp.
  445. *
  446. \**************************************************************************/
  447. VOID vMgaGetBits16bpp(
  448. PDEV* ppdev, // Current src pdev
  449. SURFOBJ* psoDst, // Destination surface for the color bits
  450. RECTL* prclDst, // Area to be modified within the dest surface,
  451. // in absolute coordinates
  452. POINTL* pptlSrc) // Upper left corner of source rectangle,
  453. // in absolute coordinates
  454. {
  455. BYTE* pjBase;
  456. BYTE* pbScan0;
  457. BYTE* pbDestRect;
  458. LONG xSrc, ySrc, xTrg, yTrg, cxTrg, cyTrg, lDestDelta, cySlice,
  459. xTrgAl, cxTrgAl, lPreDWordBytes, lDWords,
  460. lPostDWordBytes, NbDWordsPerScan, TotalDWords, i;
  461. ULONG ulSSA, ulSEA, ulSSAIncrement, temp,
  462. NbDWords, NbBytesPerScan;
  463. ULONG* pDW;
  464. ULONG* locpDW;
  465. BYTE bPreShift;
  466. DWORD dwi, dwo;
  467. BYTE* pbDest;
  468. pjBase = ppdev->pjBase;
  469. // Calculate the size of the target rectangle, and pick up
  470. // some convenient locals.
  471. // Starting (x,y) and extents within the destination bitmap.
  472. // If an extent is 0 or negative, we don't have anything to do.
  473. cxTrg = prclDst->right - prclDst->left;
  474. cyTrg = prclDst->bottom - prclDst->top;
  475. xTrg = prclDst->left;
  476. yTrg = prclDst->top;
  477. ASSERTDD(cxTrg > 0 && cyTrg > 0, "Shouldn't get empty extents");
  478. // First scanline of the destination bitmap.
  479. pbScan0 = (BYTE*) psoDst->pvScan0;
  480. // Starting (x,y) on the screen.
  481. xSrc = pptlSrc->x;
  482. ySrc = pptlSrc->y;
  483. // Scan increment within the destination bitmap.
  484. lDestDelta = psoDst->lDelta;
  485. // Calculate the location of the destination rectangle.
  486. pbDestRect = pbScan0 + (yTrg * lDestDelta) + (2 * xTrg);
  487. // Set the registers that can be set now for the operation.
  488. // SIGN_CACHE=1 and cuts 1 register from the setup.
  489. CHECK_FIFO_SPACE(pjBase, 6);
  490. // DWGCTL IDUMP+RPL+SRCCOPY+blockm_OFF+bltmod_BFCOL+patt_OFF+BG_OPAQUE
  491. // SGN 0
  492. // SHIFT 0
  493. // AR0 sea: ySrc*pitch + xSrc + cxTrg - 1
  494. // AR3 ssa: ySrc*pitch + xSrc
  495. // AR5 Screen pitch
  496. // FXLEFT 0
  497. // FXRIGHT cxTrg - 1
  498. // LEN cyTrg
  499. // MCTLWTST special value required by IDUMP bug fix
  500. if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
  501. {
  502. CP_WRITE(pjBase, DWG_SGN, 0);
  503. }
  504. // The SRC0-3 registers are trashed by the blt.
  505. ppdev->HopeFlags = SIGN_CACHE;
  506. CP_WRITE(pjBase, DWG_SHIFT, 0);
  507. CP_WRITE(pjBase, DWG_FXLEFT, 0);
  508. CP_WRITE(pjBase, DWG_AR5, ppdev->cxMemory);
  509. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_IDUMP+atype_RPL+blockm_OFF+
  510. bop_SRCCOPY+bltmod_BFCOL+pattern_OFF+
  511. transc_BG_OPAQUE));
  512. // Recipe for IDUMP fix. We must break the IDUMP into a number of
  513. // smaller IDUMPS, according to the following formula:
  514. //
  515. // 0 < cx < 256 ==> cYSlice = int(1024/(cx << 2)) << 2 = int( 256/cx)<<2
  516. // 256 < cx < 1024 ==> cYSlice = int(4096/(cx << 2)) << 2 = int(1024/cx)<<2
  517. // 1024 < cx < 1600 ==> cYSlice = int(1600/(cx << 2)) << 2 = int(1600/cx)<<2
  518. //
  519. // We will modify it this way:
  520. //
  521. // 0 < cx <= 256 ==> cYSlice = int(1024/(cx << 2)) << 2 = int( 256/cx)<<2
  522. // 256 < cx <= 512 ==> cYSlice = int(4096/(cx << 2)) << 2 = int(1024/cx)<<2
  523. // 512 < cx ==> cYSlice = 4
  524. if (cxTrg > 512)
  525. {
  526. cySlice = 4;
  527. }
  528. else if (cxTrg > 256)
  529. {
  530. cySlice = (1024 / cxTrg) << 2;
  531. }
  532. else
  533. {
  534. cySlice = (256 / cxTrg) << 2;
  535. }
  536. // Number of bytes, padded to the next dword, to be moved per scanline.
  537. NbBytesPerScan = (2*cxTrg + 3) & -4;
  538. NbDWords = NbBytesPerScan >> 2;
  539. pDW = (ULONG*) pbDestRect;
  540. // There will probably be a number of full slices (of height cySlice).
  541. // Source Start Address of the first slice.
  542. ulSSA = ySrc * ppdev->cxMemory + xSrc + ppdev->ulYDstOrg;
  543. ulSEA = ulSSA + cxTrg - 1;
  544. // Increment to get to the SSA of the next full slice.
  545. ulSSAIncrement = cySlice * ppdev->cxMemory;
  546. // We can't go full speed.
  547. // Compute alignment parameters for the blt. We want to read the
  548. // minimum number of dwords from the screen, and we want to align
  549. // the write into memory on dword boundaries. We want to do it
  550. // this way:
  551. //
  552. // width -> 1 2 3 4 5
  553. // ---- ---- --------- -------------- --------------
  554. // xTrg&1
  555. // 0 --10 DWxx DWxx --10 DWxx DWxx DWxx DWxx --10
  556. // 1 --10 3210 32-- DWxx 32-- DWxx --10 32-- DWxx DWxx
  557. //
  558. // where 0, 1, 2, or 3 means that the corresponding byte of the dword
  559. // that was read in is stored as a byte, and DWxx means that the dword
  560. // that was read in is stored as a dword.
  561. // Compute some useful values.
  562. xTrgAl = xTrg & 0x01; // 0, 1
  563. cxTrgAl = cxTrg - xTrgAl;
  564. if (cxTrgAl < 2)
  565. {
  566. // The width is really small.
  567. // On each scanline:
  568. lPreDWordBytes = 2*cxTrg; // Nb of bytes before the first dword
  569. lDWords = 0; // Nb of dwords to be stored
  570. lPostDWordBytes = 0; // Nb of bytes after the last dword.
  571. NbDWordsPerScan = 1; // Nb of dwords to be read in.
  572. bPreShift = 0; // How to shift the first dword.
  573. }
  574. else
  575. {
  576. // Pixels will be stored as bytes and dwords.
  577. lPreDWordBytes = 2*xTrgAl; // Nb of bytes before the first dword
  578. lDWords = cxTrgAl / 2;
  579. if((lPostDWordBytes = 2 * (cxTrgAl & 1)) == 0)
  580. {
  581. lPostDWordBytes = 4;
  582. lDWords--;
  583. }
  584. NbDWordsPerScan = (xTrgAl + cxTrg + 1)/2;
  585. bPreShift = (BYTE)(16 * xTrgAl);
  586. ulSSA -= xTrgAl;
  587. }
  588. CP_WRITE(pjBase, DWG_FXRIGHT, (bPreShift/16) + cxTrg - 1);
  589. while ((cyTrg -= cySlice) >= 0)
  590. {
  591. // There is another full height slice to be read.
  592. vMgaGet16bppSliceFromScreen(ppdev, ulSSA, ulSEA,
  593. (ULONG) cySlice, NbDWordsPerScan,
  594. lPreDWordBytes, lDWords,
  595. lPostDWordBytes, lDestDelta,
  596. bPreShift, &pDW);
  597. // Bump Source Start Address to the start of the next slice.
  598. ulSSA += ulSSAIncrement;
  599. ulSEA += ulSSAIncrement;
  600. }
  601. // Make cyTrg positive again, and read the last slice, if any.
  602. if ((cyTrg += cySlice) != 0)
  603. {
  604. // There is a last, partial slice to be read.
  605. vMgaGet16bppSliceFromScreen(ppdev, ulSSA, ulSEA,
  606. (ULONG) cyTrg, NbDWordsPerScan,
  607. lPreDWordBytes, lDWords,
  608. lPostDWordBytes, lDestDelta,
  609. bPreShift, &pDW);
  610. }
  611. }