Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1528 lines
45 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: stretch.c
  3. *
  4. * Copyright (c) 1993-1996 Microsoft Corporation
  5. \**************************************************************************/
  6. #include "precomp.h"
  7. #define STRETCH_MAX_EXTENT 32767
  8. typedef DWORDLONG ULONGLONG;
  9. /******************************Public*Routine******************************\
  10. * VOID vMgaDirectStretch8Narrow
  11. *
  12. * Hardware assisted stretchblt at 8bpp when the width is 7 or less, for
  13. * old MGAs.
  14. *
  15. \**************************************************************************/
  16. VOID vMgaDirectStretch8Narrow(
  17. STR_BLT* pStrBlt)
  18. {
  19. BYTE* pjSrc;
  20. BYTE* pjDstEnd;
  21. ULONG ulDst;
  22. ULONG xAccum;
  23. ULONG xTmp;
  24. ULONG yTmp;
  25. ULONG ulDstScan;
  26. BYTE* pjDstScan;
  27. PDEV* ppdev = pStrBlt->ppdev;
  28. BYTE* pjBase = ppdev->pjBase;
  29. LONG yDst = pStrBlt->YDstStart;
  30. LONG xDst = pStrBlt->XDstStart;
  31. LONG xSrc = pStrBlt->XSrcStart;
  32. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  33. LONG yCount = pStrBlt->YDstCount;
  34. LONG WidthX = pStrBlt->XDstEnd - xDst;
  35. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  36. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  37. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  38. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  39. ULONG yInt = 0;
  40. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  41. ulDstScan = ppdev->ulYDstOrg + (yDst + ppdev->yOffset) * ppdev->cxMemory
  42. + (xDst + ppdev->xOffset);
  43. pjDstScan = ppdev->pjBase + SRCWND + (ulDstScan & 31);
  44. // We can't touch the frame buffer while the accelerator is doing
  45. // any drawing:
  46. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  47. WAIT_NOT_BUSY(pjBase);
  48. do {
  49. ULONG yTmp = yAccum + yFrac;
  50. BYTE jSrc0;
  51. BYTE* pjDst;
  52. BYTE* pjDstEndNarrow;
  53. CP_WRITE_REGISTER(pjBase + HST_DSTPAGE, ulDstScan);
  54. ulDstScan += ppdev->cxMemory; // Increment to next scan
  55. pjDst = pjDstScan;
  56. pjSrc = pjSrcScan;
  57. xAccum = pStrBlt->ulXFracAccumulator;
  58. pjDstEndNarrow = pjDst + WidthX;
  59. do {
  60. jSrc0 = *pjSrc;
  61. xTmp = xAccum + xFrac;
  62. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  63. *pjDst++ = jSrc0;
  64. xAccum = xTmp;
  65. } while (pjDst != pjDstEndNarrow);
  66. pjSrcScan += yInt;
  67. if (yTmp < yAccum)
  68. {
  69. pjSrcScan += pStrBlt->lDeltaSrc;
  70. }
  71. yAccum = yTmp;
  72. } while (--yCount);
  73. }
  74. /******************************Public*Routine******************************\
  75. * VOID vMilDirectStretch8Narrow
  76. *
  77. * Hardware assisted stretchblt at 8bpp when the width is 7 or less, for
  78. * Millenniums.
  79. *
  80. \**************************************************************************/
  81. VOID vMilDirectStretch8Narrow(
  82. STR_BLT* pStrBlt)
  83. {
  84. BYTE* pjSrc;
  85. BYTE* pjDstEnd;
  86. ULONG ulDst;
  87. ULONG xAccum;
  88. ULONG xTmp;
  89. ULONG yTmp;
  90. ULONG ulDstScan;
  91. BYTE* pjDstScan;
  92. PDEV* ppdev = pStrBlt->ppdev;
  93. BYTE* pjBase = ppdev->pjBase;
  94. LONG yDst = pStrBlt->YDstStart;
  95. LONG xDst = pStrBlt->XDstStart;
  96. LONG xSrc = pStrBlt->XSrcStart;
  97. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  98. LONG yCount = pStrBlt->YDstCount;
  99. LONG WidthX = pStrBlt->XDstEnd - xDst;
  100. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  101. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  102. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  103. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  104. ULONG yInt = 0;
  105. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  106. ulDstScan = ppdev->ulYDstOrg + (yDst + ppdev->yOffset) * ppdev->cxMemory
  107. + (xDst + ppdev->xOffset);
  108. pjDstScan = ppdev->pjScreen;
  109. // We can't touch the frame buffer while the accelerator is doing
  110. // any drawing:
  111. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  112. WAIT_NOT_BUSY(pjBase);
  113. do {
  114. ULONG yTmp = yAccum + yFrac;
  115. BYTE jSrc0;
  116. BYTE* pjDst;
  117. BYTE* pjDstEndNarrow;
  118. pjDst = pjDstScan + ulDstScan;
  119. ulDstScan += ppdev->cxMemory; // Increment to next scan
  120. pjSrc = pjSrcScan;
  121. xAccum = pStrBlt->ulXFracAccumulator;
  122. pjDstEndNarrow = pjDst + WidthX;
  123. do {
  124. jSrc0 = *pjSrc;
  125. xTmp = xAccum + xFrac;
  126. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  127. *pjDst++ = jSrc0;
  128. xAccum = xTmp;
  129. } while (pjDst != pjDstEndNarrow);
  130. pjSrcScan += yInt;
  131. if (yTmp < yAccum)
  132. {
  133. pjSrcScan += pStrBlt->lDeltaSrc;
  134. }
  135. yAccum = yTmp;
  136. } while (--yCount);
  137. }
  138. /******************************Public*Routine******************************\
  139. * VOID vMilDirectStretch8
  140. *
  141. * Hardware assisted stretchblt at 8bpp when the width is 8 or more,
  142. * for Millenniums.
  143. *
  144. \**************************************************************************/
  145. VOID vMilDirectStretch8(
  146. STR_BLT* pStrBlt)
  147. {
  148. BYTE* pjSrc;
  149. BYTE* pjDstEnd;
  150. LONG WidthXAln;
  151. ULONG ulDst;
  152. ULONG xAccum;
  153. ULONG xTmp;
  154. ULONG yTmp;
  155. BYTE* pjOldScan;
  156. LONG cyDuplicate;
  157. LONG xDuplicate;
  158. LONG yDuplicate;
  159. LONG lDuplicate;
  160. ULONG yInt;
  161. PDEV* ppdev = pStrBlt->ppdev;
  162. BYTE* pjBase = ppdev->pjBase;
  163. LONG cxMemory = ppdev->cxMemory;
  164. LONG xDst = pStrBlt->XDstStart;
  165. LONG xSrc = pStrBlt->XSrcStart;
  166. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  167. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  168. LONG yDst = pStrBlt->YDstStart;
  169. LONG yCount = pStrBlt->YDstCount;
  170. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  171. LONG WidthX = pStrBlt->XDstEnd - xDst;
  172. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  173. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  174. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  175. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  176. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  177. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  178. LONG lDeltaDst = pStrBlt->lDeltaDst;
  179. LONG lDeltaSrc = pStrBlt->lDeltaSrc;
  180. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  181. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  182. yDuplicate = yDst + ppdev->yOffset - 1;
  183. xDuplicate = xDst + ppdev->xOffset + ppdev->ulYDstOrg;
  184. START_DIRECT_ACCESS_STORM(ppdev, pjBase);
  185. do {
  186. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  187. ULONG yTmp;
  188. pjSrc = pjSrcScan;
  189. xAccum = pStrBlt->ulXFracAccumulator;
  190. switch (StartAln) {
  191. case 1:
  192. jSrc0 = *pjSrc;
  193. xTmp = xAccum + xFrac;
  194. pjSrc = pjSrc + xInt;
  195. if (xTmp < xAccum)
  196. pjSrc++;
  197. *pjDst++ = jSrc0;
  198. xAccum = xTmp;
  199. case 2:
  200. jSrc0 = *pjSrc;
  201. xTmp = xAccum + xFrac;
  202. pjSrc = pjSrc + xInt;
  203. if (xTmp < xAccum)
  204. pjSrc++;
  205. *pjDst++ = jSrc0;
  206. xAccum = xTmp;
  207. case 3:
  208. jSrc0 = *pjSrc;
  209. xTmp = xAccum + xFrac;
  210. pjSrc = pjSrc + xInt;
  211. if (xTmp < xAccum)
  212. pjSrc++;
  213. *pjDst++ = jSrc0;
  214. xAccum = xTmp;
  215. }
  216. pjDstEnd = pjDst + WidthXAln;
  217. while (pjDst != pjDstEnd)
  218. {
  219. jSrc0 = *pjSrc;
  220. xTmp = xAccum + xFrac;
  221. pjSrc = pjSrc + xInt;
  222. if (xTmp < xAccum)
  223. pjSrc++;
  224. jSrc1 = *pjSrc;
  225. xAccum = xTmp + xFrac;
  226. pjSrc = pjSrc + xInt;
  227. if (xAccum < xTmp)
  228. pjSrc++;
  229. jSrc2 = *pjSrc;
  230. xTmp = xAccum + xFrac;
  231. pjSrc = pjSrc + xInt;
  232. if (xTmp < xAccum)
  233. pjSrc++;
  234. jSrc3 = *pjSrc;
  235. xAccum = xTmp + xFrac;
  236. pjSrc = pjSrc + xInt;
  237. if (xAccum < xTmp)
  238. pjSrc++;
  239. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  240. *(PULONG)pjDst = ulDst;
  241. pjDst += 4;
  242. }
  243. switch (EndAln) {
  244. case 3:
  245. jSrc0 = *pjSrc;
  246. xTmp = xAccum + xFrac;
  247. pjSrc = pjSrc + xInt;
  248. if (xTmp < xAccum)
  249. pjSrc++;
  250. *pjDst++ = jSrc0;
  251. xAccum = xTmp;
  252. case 2:
  253. jSrc0 = *pjSrc;
  254. xTmp = xAccum + xFrac;
  255. pjSrc = pjSrc + xInt;
  256. if (xTmp < xAccum)
  257. pjSrc++;
  258. *pjDst++ = jSrc0;
  259. xAccum = xTmp;
  260. case 1:
  261. jSrc0 = *pjSrc;
  262. *pjDst++ = jSrc0;
  263. }
  264. // Now count the number of duplicate scans:
  265. pjDst += lDstStride;
  266. yDuplicate++;
  267. cyDuplicate = -1;
  268. pjOldScan = pjSrcScan;
  269. do {
  270. cyDuplicate++;
  271. pjSrcScan += yInt;
  272. yTmp = yAccum + yFrac;
  273. if (yTmp < yAccum)
  274. {
  275. pjSrcScan += lDeltaSrc;
  276. }
  277. yAccum = yTmp;
  278. yCount--;
  279. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  280. // Duplicate the scan 'cyDuplicate' times with one blt:
  281. if (cyDuplicate != 0)
  282. {
  283. lDuplicate = yDuplicate * cxMemory + xDuplicate;
  284. CHECK_FIFO_SPACE(pjBase, 4);
  285. CP_WRITE(pjBase, DWG_AR3, lDuplicate);
  286. CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
  287. CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
  288. atype_RPL |
  289. blockm_OFF |
  290. bltmod_BFCOL |
  291. pattern_OFF |
  292. transc_BG_OPAQUE |
  293. bop_SRCCOPY |
  294. shftzero_ZERO |
  295. sgnzero_ZERO);
  296. CP_START(pjBase, DWG_YDSTLEN, ((yDuplicate + 1) << yval_SHIFT) |
  297. cyDuplicate);
  298. yDuplicate += cyDuplicate;
  299. pjDst += cyDuplicate * lDeltaDst;
  300. }
  301. } while (yCount != 0);
  302. END_DIRECT_ACCESS_STORM(ppdev, pjBase);
  303. }
  304. /******************************Public*Routine******************************\
  305. * VOID vMilDirectStretch16
  306. *
  307. * Hardware assisted stretchblt at 16bpp, for Millenniums.
  308. *
  309. \**************************************************************************/
  310. VOID vMilDirectStretch16(
  311. STR_BLT* pStrBlt)
  312. {
  313. BYTE* pjOldScan;
  314. USHORT* pusSrc;
  315. USHORT* pusDstEnd;
  316. LONG WidthXAln;
  317. ULONG ulDst;
  318. ULONG xAccum;
  319. ULONG xTmp;
  320. ULONG yTmp;
  321. LONG cyDuplicate;
  322. LONG xDuplicate;
  323. LONG yDuplicate;
  324. LONG lDuplicate;
  325. LONG yInt;
  326. PDEV* ppdev = pStrBlt->ppdev;
  327. BYTE* pjBase = ppdev->pjBase;
  328. LONG cxMemory = ppdev->cxMemory;
  329. LONG xDst = pStrBlt->XDstStart;
  330. LONG xSrc = pStrBlt->XSrcStart;
  331. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  332. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  333. LONG yDst = pStrBlt->YDstStart;
  334. LONG yCount = pStrBlt->YDstCount;
  335. ULONG StartAln = ((ULONG)((ULONG_PTR)pusDst & 0x02)) >> 1;
  336. LONG WidthX = pStrBlt->XDstEnd - xDst;
  337. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  338. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  339. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  340. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  341. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  342. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  343. LONG lDeltaDst = pStrBlt->lDeltaDst;
  344. LONG lDeltaSrc = pStrBlt->lDeltaSrc;
  345. WidthXAln = WidthX - EndAln - StartAln;
  346. yInt = pStrBlt->lDeltaSrc * (LONG)(pStrBlt->ulYDstToSrcIntCeil);
  347. yDuplicate = yDst + ppdev->yOffset - 1;
  348. xDuplicate = xDst + ppdev->xOffset + ppdev->ulYDstOrg;
  349. START_DIRECT_ACCESS_STORM(ppdev, pjBase);
  350. // Loop stretching each scan line
  351. do {
  352. USHORT usSrc0,usSrc1;
  353. ULONG yTmp;
  354. pusSrc = (USHORT*) pjSrcScan;
  355. xAccum = pStrBlt->ulXFracAccumulator;
  356. if (StartAln)
  357. {
  358. usSrc0 = *pusSrc;
  359. xTmp = xAccum + xFrac;
  360. pusSrc = pusSrc + xInt;
  361. if (xTmp < xAccum)
  362. pusSrc++;
  363. *pusDst++ = usSrc0;
  364. xAccum = xTmp;
  365. }
  366. pusDstEnd = pusDst + WidthXAln;
  367. while (pusDst != pusDstEnd)
  368. {
  369. usSrc0 = *pusSrc;
  370. xTmp = xAccum + xFrac;
  371. pusSrc = pusSrc + xInt;
  372. if (xTmp < xAccum)
  373. pusSrc++;
  374. usSrc1 = *pusSrc;
  375. xAccum = xTmp + xFrac;
  376. pusSrc = pusSrc + xInt;
  377. if (xAccum < xTmp)
  378. pusSrc++;
  379. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  380. *(ULONG*)pusDst = ulDst;
  381. pusDst+=2;
  382. }
  383. if (EndAln)
  384. {
  385. usSrc0 = *pusSrc;
  386. *pusDst++ = usSrc0;
  387. }
  388. // Now count the number of duplicate scans:
  389. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  390. yDuplicate++;
  391. cyDuplicate = -1;
  392. pjOldScan = pjSrcScan;
  393. do {
  394. cyDuplicate++;
  395. pjSrcScan += yInt;
  396. yTmp = yAccum + yFrac;
  397. if (yTmp < yAccum)
  398. {
  399. pjSrcScan += lDeltaSrc;
  400. }
  401. yAccum = yTmp;
  402. yCount--;
  403. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  404. // Duplicate the scan 'cyDuplicate' times with one blt:
  405. if (cyDuplicate != 0)
  406. {
  407. lDuplicate = yDuplicate * cxMemory + xDuplicate;
  408. CHECK_FIFO_SPACE(pjBase, 4);
  409. CP_WRITE(pjBase, DWG_AR3, lDuplicate);
  410. CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
  411. CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
  412. atype_RPL |
  413. blockm_OFF |
  414. bltmod_BFCOL |
  415. pattern_OFF |
  416. transc_BG_OPAQUE |
  417. bop_SRCCOPY |
  418. shftzero_ZERO |
  419. sgnzero_ZERO);
  420. CP_START(pjBase, DWG_YDSTLEN, ((yDuplicate + 1) << yval_SHIFT) |
  421. cyDuplicate);
  422. yDuplicate += cyDuplicate;
  423. pusDst = (USHORT*) ((BYTE*) pusDst + cyDuplicate * lDeltaDst);
  424. }
  425. } while (yCount != 0);
  426. END_DIRECT_ACCESS_STORM(ppdev, pjBase);
  427. }
  428. /******************************Public*Routine******************************\
  429. * VOID vMilDirectStretch24
  430. *
  431. * Hardware assisted stretchblt at 24bpp, for Millenniums.
  432. *
  433. * We use the data-transfer register so that we don't have to worry about
  434. * funky alignments.
  435. *
  436. \**************************************************************************/
  437. VOID vMilDirectStretch24(
  438. STR_BLT* pStrBlt)
  439. {
  440. BYTE* pjSrc;
  441. LONG lAddress;
  442. ULONG ulDst;
  443. ULONG xAccum;
  444. ULONG xTmp;
  445. ULONG yTmp;
  446. LONG i;
  447. LONG cyDuplicate;
  448. LONG xDuplicate;
  449. LONG yDuplicate;
  450. LONG lDuplicate;
  451. BYTE* pjOldScan;
  452. LONG xDstLeft;
  453. LONG xDstRight;
  454. LONG xDstRightFast;
  455. LONG yDstTop;
  456. LONG cyBreak;
  457. LONG iBreak;
  458. PDEV* ppdev = pStrBlt->ppdev;
  459. BYTE* pjBase = ppdev->pjBase;
  460. LONG cxMemory = ppdev->cxMemory;
  461. BYTE* pjSrcScan = pStrBlt->pjSrcScan + pStrBlt->XSrcStart * 3;
  462. LONG yCount = pStrBlt->YDstCount;
  463. LONG WidthX = pStrBlt->XDstEnd - pStrBlt->XDstStart;
  464. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil * 3;
  465. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  466. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  467. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  468. LONG lDeltaSrc = pStrBlt->lDeltaSrc;
  469. ULONG yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  470. ULONG ulXFracAccumulator = pStrBlt->ulXFracAccumulator;
  471. yDstTop = ppdev->yOffset + pStrBlt->YDstStart;
  472. xDstLeft = ppdev->xOffset + pStrBlt->XDstStart;
  473. xDstRight = ppdev->xOffset + pStrBlt->XDstEnd - 1; // Note inclusive
  474. xDstRightFast = (((xDstRight * 3) + 2) | 0x40) / 3;
  475. // Figure out how many scans we can duplicate before we hit the first
  476. // WRAM boundary:
  477. cyBreak = 0xffff;
  478. for (iBreak = 0; iBreak < ppdev->cyBreak; iBreak++)
  479. {
  480. if (ppdev->ayBreak[iBreak] >= yDstTop)
  481. {
  482. cyBreak = ppdev->ayBreak[iBreak] - yDstTop;
  483. break;
  484. }
  485. }
  486. ppdev->HopeFlags = SIGN_CACHE;
  487. CHECK_FIFO_SPACE(pjBase, 6);
  488. CP_WRITE(pjBase, DWG_FXBNDRY, (xDstRight << bfxright_SHIFT) | xDstLeft);
  489. CP_WRITE(pjBase, DWG_AR5, cxMemory);
  490. CP_WRITE(pjBase, DWG_AR3, 0);
  491. CP_WRITE(pjBase, DWG_AR0, WidthX - 1);
  492. CP_WRITE(pjBase, DWG_YDST, yDstTop);
  493. CP_WRITE(pjBase, DWG_CXRIGHT, xDstRight); // For fast-blt work-around
  494. yDuplicate = yDstTop - 1;
  495. xDuplicate = xDstLeft + ppdev->ulYDstOrg;
  496. do {
  497. CHECK_FIFO_SPACE(pjBase, 2);
  498. CP_WRITE(pjBase, DWG_DWGCTL, opcode_ILOAD |
  499. atype_RPL |
  500. blockm_OFF |
  501. pattern_OFF |
  502. transc_BG_OPAQUE |
  503. bop_SRCCOPY |
  504. bltmod_BFCOL |
  505. shftzero_ZERO |
  506. sgnzero_ZERO);
  507. CP_START(pjBase, DWG_LEN, 1);
  508. // Make sure the MGA is ready to take the data:
  509. CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
  510. pjSrc = pjSrcScan;
  511. xAccum = ulXFracAccumulator;
  512. i = WidthX;
  513. while (TRUE)
  514. {
  515. ulDst = *(pjSrc); // Pixel 0
  516. ulDst |= *(pjSrc + 1) << 8;
  517. ulDst |= *(pjSrc + 2) << 16;
  518. if (--i == 0)
  519. break;
  520. pjSrc += xInt;
  521. xTmp = xAccum + xFrac;
  522. if (xTmp < xAccum)
  523. pjSrc += 3;
  524. ulDst |= *(pjSrc) << 24; // Pixel 1
  525. CP_WRITE_SRC(pjBase, ulDst);
  526. ulDst = *(pjSrc + 1);
  527. ulDst |= *(pjSrc + 2) << 8;
  528. if (--i == 0)
  529. break;
  530. pjSrc += xInt;
  531. xAccum = xTmp + xFrac;
  532. if (xAccum < xTmp)
  533. pjSrc += 3;
  534. ulDst |= *(pjSrc) << 16; // Pixel 2
  535. ulDst |= *(pjSrc + 1) << 24;
  536. CP_WRITE_SRC(pjBase, ulDst);
  537. ulDst = *(pjSrc + 2);
  538. if (--i == 0)
  539. break;
  540. pjSrc += xInt;
  541. xTmp = xAccum + xFrac;
  542. if (xTmp < xAccum)
  543. pjSrc += 3;
  544. ulDst |= *(pjSrc) << 8; // Pixel 3
  545. ulDst |= *(pjSrc + 1) << 16;
  546. ulDst |= *(pjSrc + 2) << 24;
  547. if (--i == 0)
  548. break;
  549. CP_WRITE_SRC(pjBase, ulDst);
  550. pjSrc += xInt;
  551. xAccum = xTmp + xFrac;
  552. if (xAccum < xTmp)
  553. pjSrc += 3;
  554. }
  555. // Write out the remainder of the scan:
  556. CP_WRITE_SRC(pjBase, ulDst);
  557. // Now count the number of duplicate scans:
  558. cyBreak--;
  559. yDuplicate++;
  560. cyDuplicate = -1;
  561. pjOldScan = pjSrcScan;
  562. do {
  563. cyDuplicate++;
  564. pjSrcScan += yInt;
  565. yTmp = yAccum + yFrac;
  566. if (yTmp < yAccum)
  567. {
  568. pjSrcScan += lDeltaSrc;
  569. }
  570. yAccum = yTmp;
  571. yCount--;
  572. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  573. // Duplicate the scan 'cyDuplicate' times with one blt:
  574. if (cyDuplicate != 0)
  575. {
  576. lDuplicate = (yDuplicate * cxMemory) + xDuplicate;
  577. yDuplicate += cyDuplicate;
  578. CHECK_FIFO_SPACE(pjBase, 8);
  579. CP_WRITE(pjBase, DWG_AR3, lDuplicate);
  580. CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
  581. cyBreak -= cyDuplicate;
  582. if (cyBreak >= 0)
  583. {
  584. // We haven't crossed a WRAM boundary, so we can use a
  585. // WRAM-WRAM blt to duplicate the scan:
  586. CP_WRITE(pjBase, DWG_DWGCTL, opcode_FBITBLT |
  587. atype_RPL |
  588. blockm_OFF |
  589. bltmod_BFCOL |
  590. pattern_OFF |
  591. transc_BG_OPAQUE |
  592. bop_NOP |
  593. shftzero_ZERO |
  594. sgnzero_ZERO);
  595. CP_WRITE(pjBase, DWG_FXRIGHT, xDstRightFast);
  596. CP_START(pjBase, DWG_LEN, cyDuplicate);
  597. CP_WRITE(pjBase, DWG_FXRIGHT, xDstRight);
  598. }
  599. else
  600. {
  601. CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
  602. atype_RPL |
  603. blockm_OFF |
  604. bltmod_BFCOL |
  605. pattern_OFF |
  606. transc_BG_OPAQUE |
  607. bop_SRCCOPY |
  608. shftzero_ZERO |
  609. sgnzero_ZERO);
  610. CP_START(pjBase, DWG_LEN, cyDuplicate);
  611. iBreak++;
  612. if (iBreak >= ppdev->cyBreak)
  613. {
  614. // That was the last break we have to worry about:
  615. cyBreak = 0xffff;
  616. }
  617. else
  618. {
  619. cyBreak += ppdev->ayBreak[iBreak]
  620. - ppdev->ayBreak[iBreak - 1];
  621. }
  622. }
  623. CP_WRITE(pjBase, DWG_AR3, 0);
  624. CP_WRITE(pjBase, DWG_AR0, WidthX - 1);
  625. }
  626. } while (yCount != 0);
  627. CHECK_FIFO_SPACE(pjBase, 1);
  628. CP_WRITE(pjBase, DWG_CXRIGHT, ppdev->cxMemory - 1);
  629. }
  630. /******************************Public*Routine******************************\
  631. * VOID vMilDirectStretch32
  632. *
  633. * Hardware assisted stretchblt at 32bpp, for Millenniums.
  634. *
  635. \**************************************************************************/
  636. VOID vMilDirectStretch32(
  637. STR_BLT* pStrBlt)
  638. {
  639. BYTE* pjOldScan;
  640. ULONG* pulSrc;
  641. ULONG* pulDstEnd;
  642. ULONG ulDst;
  643. ULONG xAccum;
  644. ULONG xTmp;
  645. ULONG yTmp;
  646. LONG cyDuplicate;
  647. LONG xDuplicate;
  648. LONG yDuplicate;
  649. LONG lDuplicate;
  650. ULONG yInt;
  651. PDEV* ppdev = pStrBlt->ppdev;
  652. BYTE* pjBase = ppdev->pjBase;
  653. LONG cxMemory = ppdev->cxMemory;
  654. LONG xDst = pStrBlt->XDstStart;
  655. LONG xSrc = pStrBlt->XSrcStart;
  656. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc * 4;
  657. ULONG* pulDst = (ULONG*)(pStrBlt->pjDstScan) + xDst;
  658. LONG yDst = pStrBlt->YDstStart;
  659. LONG yCount = pStrBlt->YDstCount;
  660. LONG WidthX = pStrBlt->XDstEnd - xDst;
  661. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  662. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  663. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  664. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  665. LONG lDstStride = pStrBlt->lDeltaDst - 4*WidthX;
  666. LONG lDeltaDst = pStrBlt->lDeltaDst;
  667. LONG lDeltaSrc = pStrBlt->lDeltaSrc;
  668. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  669. yDuplicate = yDst + ppdev->yOffset - 1;
  670. xDuplicate = xDst + ppdev->xOffset + ppdev->ulYDstOrg;
  671. START_DIRECT_ACCESS_STORM(ppdev, pjBase);
  672. do {
  673. ULONG ulSrc;
  674. ULONG yTmp;
  675. pulSrc = (ULONG*) pjSrcScan;
  676. xAccum = pStrBlt->ulXFracAccumulator;
  677. pulDstEnd = pulDst + WidthX;
  678. while (pulDst != pulDstEnd)
  679. {
  680. ulSrc = *pulSrc;
  681. xTmp = xAccum + xFrac;
  682. pulSrc = pulSrc + xInt;
  683. if (xTmp < xAccum)
  684. pulSrc++;
  685. *(ULONG*)pulDst = ulSrc;
  686. pulDst++;
  687. xAccum = xTmp;
  688. }
  689. // Now count the number of duplicate scans:
  690. pulDst = (ULONG*) ((BYTE*) pulDst + lDstStride);
  691. yDuplicate++;
  692. cyDuplicate = -1;
  693. pjOldScan = pjSrcScan;
  694. do {
  695. cyDuplicate++;
  696. pjSrcScan += yInt;
  697. yTmp = yAccum + yFrac;
  698. if (yTmp < yAccum)
  699. {
  700. pjSrcScan += lDeltaSrc;
  701. }
  702. yAccum = yTmp;
  703. yCount--;
  704. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  705. // Duplicate the scan 'cyDuplicate' times with one blt:
  706. if (cyDuplicate != 0)
  707. {
  708. lDuplicate = yDuplicate * cxMemory + xDuplicate;
  709. CHECK_FIFO_SPACE(pjBase, 4);
  710. CP_WRITE(pjBase, DWG_AR3, lDuplicate);
  711. CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
  712. CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
  713. atype_RPL |
  714. blockm_OFF |
  715. bltmod_BFCOL |
  716. pattern_OFF |
  717. transc_BG_OPAQUE |
  718. bop_SRCCOPY |
  719. shftzero_ZERO |
  720. sgnzero_ZERO);
  721. CP_START(pjBase, DWG_YDSTLEN, ((yDuplicate + 1) << yval_SHIFT) |
  722. cyDuplicate);
  723. yDuplicate += cyDuplicate;
  724. pulDst = (ULONG*) ((BYTE*) pulDst + cyDuplicate * lDeltaDst);
  725. }
  726. } while (yCount != 0);
  727. END_DIRECT_ACCESS_STORM(ppdev, pjBase);
  728. }
  729. /******************************Public*Routine******************************\
  730. *
  731. * Routine Description:
  732. *
  733. * StretchBlt using integer math. Must be from one surface to another
  734. * surface of the same format.
  735. *
  736. * Arguments:
  737. *
  738. * ppdev - PDEV for device
  739. * prclDst - Pointer to rectangle of Dst extents
  740. * pvSrc - Pointer to start of Src bitmap
  741. * lDeltaSrc - Bytes from start of Src scan line to start of next
  742. * prclSrc - Pointer to rectangle of Src extents
  743. * prclClip - Clip Dest to this rect
  744. *
  745. * Return Value:
  746. *
  747. * Status
  748. *
  749. \**************************************************************************/
  750. VOID vStretchDIB(
  751. PDEV* ppdev,
  752. RECTL* prclDst,
  753. VOID* pvSrc,
  754. LONG lDeltaSrc,
  755. RECTL* prclSrc,
  756. RECTL* prclClip)
  757. {
  758. STR_BLT StrBlt;
  759. ULONG XSrcToDstIntFloor;
  760. ULONG XSrcToDstFracFloor;
  761. ULONG ulXDstToSrcIntCeil;
  762. ULONG ulXDstToSrcFracCeil;
  763. ULONG YSrcToDstIntFloor;
  764. ULONG YSrcToDstFracFloor;
  765. ULONG ulYDstToSrcIntCeil;
  766. ULONG ulYDstToSrcFracCeil;
  767. LONG SrcIntScan;
  768. LONG DstDeltaScanEnd;
  769. ULONG ulXFracAccumulator;
  770. ULONG ulYFracAccumulator;
  771. LONG LeftClipDistance;
  772. LONG TopClipDistance;
  773. BOOL bStretch;
  774. union {
  775. LARGE_INTEGER large;
  776. ULONGLONG li;
  777. } liInit;
  778. PFN_DIRSTRETCH pfnStr;
  779. //
  780. // Calculate exclusive start and end points:
  781. //
  782. LONG WidthDst = prclDst->right - prclDst->left;
  783. LONG HeightDst = prclDst->bottom - prclDst->top;
  784. LONG WidthSrc = prclSrc->right - prclSrc->left;
  785. LONG HeightSrc = prclSrc->bottom - prclSrc->top;
  786. LONG XSrcStart = prclSrc->left;
  787. LONG XSrcEnd = prclSrc->right;
  788. LONG XDstStart = prclDst->left;
  789. LONG XDstEnd = prclDst->right;
  790. LONG YSrcStart = prclSrc->top;
  791. LONG YSrcEnd = prclSrc->bottom;
  792. LONG YDstStart = prclDst->top;
  793. LONG YDstEnd = prclDst->bottom;
  794. //
  795. // Validate parameters:
  796. //
  797. ASSERTDD(pvSrc != (VOID*)NULL, "Bad source bitmap pointer");
  798. ASSERTDD(prclDst != (RECTL*)NULL, "Bad destination rectangle");
  799. ASSERTDD(prclSrc != (RECTL*)NULL, "Bad source rectangle");
  800. ASSERTDD((WidthDst > 0) && (HeightDst > 0) &&
  801. (WidthSrc > 0) && (HeightSrc > 0),
  802. "Can't do mirroring or empty rectangles here");
  803. ASSERTDD((WidthDst <= STRETCH_MAX_EXTENT) &&
  804. (HeightDst <= STRETCH_MAX_EXTENT) &&
  805. (WidthSrc <= STRETCH_MAX_EXTENT) &&
  806. (HeightSrc <= STRETCH_MAX_EXTENT), "Stretch exceeds limits");
  807. ASSERTDD(prclClip != NULL, "Bad clip rectangle");
  808. //
  809. // Calculate X Dst to Src mapping
  810. //
  811. //
  812. // dst->src = ( CEIL( (2k*WidthSrc)/WidthDst) ) / 2k
  813. //
  814. // = ( FLOOR( (2k*WidthSrc -1) / WidthDst) + 1) / 2k
  815. //
  816. // where 2k = 2 ^ 32
  817. //
  818. {
  819. ULONGLONG liWidthSrc;
  820. ULONGLONG liQuo;
  821. ULONG ulTemp;
  822. //
  823. // Work around a compiler bug dealing with the assignment
  824. // 'liHeightSrc = (((LONGLONG)HeightSrc) << 32) - 1':
  825. //
  826. liInit.large.LowPart = (ULONG) -1;
  827. liInit.large.HighPart = WidthSrc - 1;
  828. liWidthSrc = liInit.li;
  829. liQuo = liWidthSrc / (ULONGLONG) WidthDst;
  830. ulXDstToSrcIntCeil = (ULONG)(liQuo >> 32);
  831. ulXDstToSrcFracCeil = (ULONG)liQuo;
  832. //
  833. // Now add 1, use fake carry:
  834. //
  835. ulTemp = ulXDstToSrcFracCeil + 1;
  836. ulXDstToSrcIntCeil += (ulTemp < ulXDstToSrcFracCeil);
  837. ulXDstToSrcFracCeil = ulTemp;
  838. }
  839. //
  840. // Calculate Y Dst to Src mapping
  841. //
  842. //
  843. // dst->src = ( CEIL( (2k*HeightSrc)/HeightDst) ) / 2k
  844. //
  845. // = ( FLOOR( (2k*HeightSrc -1) / HeightDst) + 1) / 2k
  846. //
  847. // where 2k = 2 ^ 32
  848. //
  849. {
  850. ULONGLONG liHeightSrc;
  851. ULONGLONG liQuo;
  852. ULONG ulTemp;
  853. //
  854. // Work around a compiler bug dealing with the assignment
  855. // 'liHeightSrc = (((LONGLONG)HeightSrc) << 32) - 1':
  856. //
  857. liInit.large.LowPart = (ULONG) -1;
  858. liInit.large.HighPart = HeightSrc - 1;
  859. liHeightSrc = liInit.li;
  860. liQuo = liHeightSrc / (ULONGLONG) HeightDst;
  861. ulYDstToSrcIntCeil = (ULONG)(liQuo >> 32);
  862. ulYDstToSrcFracCeil = (ULONG)liQuo;
  863. //
  864. // Now add 1, use fake carry:
  865. //
  866. ulTemp = ulYDstToSrcFracCeil + 1;
  867. ulYDstToSrcIntCeil += (ulTemp < ulYDstToSrcFracCeil);
  868. ulYDstToSrcFracCeil = ulTemp;
  869. }
  870. //
  871. // Now clip Dst in X, and/or calc src clipping effect on dst
  872. //
  873. // adjust left and right edges if needed, record
  874. // distance adjusted for fixing the src
  875. //
  876. if (XDstStart < prclClip->left)
  877. {
  878. XDstStart = prclClip->left;
  879. }
  880. if (XDstEnd > prclClip->right)
  881. {
  882. XDstEnd = prclClip->right;
  883. }
  884. //
  885. // Check for totally clipped out destination:
  886. //
  887. if (XDstEnd <= XDstStart)
  888. {
  889. return;
  890. }
  891. LeftClipDistance = XDstStart - prclDst->left;
  892. {
  893. ULONG ulTempInt;
  894. ULONG ulTempFrac;
  895. //
  896. // Calculate displacement for .5 in destination and add:
  897. //
  898. ulTempFrac = (ulXDstToSrcFracCeil >> 1) | (ulXDstToSrcIntCeil << 31);
  899. ulTempInt = (ulXDstToSrcIntCeil >> 1);
  900. XSrcStart += ulTempInt;
  901. ulXFracAccumulator = ulTempFrac;
  902. if (LeftClipDistance != 0)
  903. {
  904. ULONGLONG ullFraction;
  905. ULONG ulTmp;
  906. ullFraction = UInt32x32To64(ulXDstToSrcFracCeil, LeftClipDistance);
  907. ulTmp = ulXFracAccumulator;
  908. ulXFracAccumulator += (ULONG) (ullFraction);
  909. if (ulXFracAccumulator < ulTmp)
  910. XSrcStart++;
  911. XSrcStart += (ulXDstToSrcIntCeil * LeftClipDistance)
  912. + (ULONG) (ullFraction >> 32);
  913. }
  914. }
  915. //
  916. // Now clip Dst in Y, and/or calc src clipping effect on dst
  917. //
  918. // adjust top and bottom edges if needed, record
  919. // distance adjusted for fixing the src
  920. //
  921. if (YDstStart < prclClip->top)
  922. {
  923. YDstStart = prclClip->top;
  924. }
  925. if (YDstEnd > prclClip->bottom)
  926. {
  927. YDstEnd = prclClip->bottom;
  928. }
  929. //
  930. // Check for totally clipped out destination:
  931. //
  932. if (YDstEnd <= YDstStart)
  933. {
  934. return;
  935. }
  936. TopClipDistance = YDstStart - prclDst->top;
  937. {
  938. ULONG ulTempInt;
  939. ULONG ulTempFrac;
  940. //
  941. // Calculate displacement for .5 in destination and add:
  942. //
  943. ulTempFrac = (ulYDstToSrcFracCeil >> 1) | (ulYDstToSrcIntCeil << 31);
  944. ulTempInt = ulYDstToSrcIntCeil >> 1;
  945. YSrcStart += (LONG)ulTempInt;
  946. ulYFracAccumulator = ulTempFrac;
  947. if (TopClipDistance != 0)
  948. {
  949. ULONGLONG ullFraction;
  950. ULONG ulTmp;
  951. ullFraction = UInt32x32To64(ulYDstToSrcFracCeil, TopClipDistance);
  952. ulTmp = ulYFracAccumulator;
  953. ulYFracAccumulator += (ULONG) (ullFraction);
  954. if (ulYFracAccumulator < ulTmp)
  955. YSrcStart++;
  956. YSrcStart += (ulYDstToSrcIntCeil * TopClipDistance)
  957. + (ULONG) (ullFraction >> 32);
  958. }
  959. }
  960. //
  961. // Warm up the hardware if doing an expanding stretch in 'y':
  962. //
  963. bStretch = (HeightDst > HeightSrc);
  964. if (bStretch)
  965. {
  966. BYTE* pjBase = ppdev->pjBase;
  967. LONG xOffset = ppdev->xOffset;
  968. CHECK_FIFO_SPACE(pjBase, 6);
  969. CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT + atype_RPL + blockm_OFF +
  970. bltmod_BFCOL + pattern_OFF +
  971. transc_BG_OPAQUE + bop_SRCCOPY);
  972. CP_WRITE(pjBase, DWG_SHIFT, 0);
  973. CP_WRITE(pjBase, DWG_SGN, 0);
  974. CP_WRITE(pjBase, DWG_AR5, ppdev->cxMemory);
  975. CP_WRITE(pjBase, DWG_FXLEFT, XDstStart + xOffset);
  976. CP_WRITE(pjBase, DWG_FXRIGHT, XDstEnd + xOffset - 1);
  977. ppdev->HopeFlags = SIGN_CACHE;
  978. }
  979. //
  980. // Fill out blt structure, then call format-specific stretch code
  981. //
  982. StrBlt.ppdev = ppdev;
  983. StrBlt.XDstEnd = XDstEnd;
  984. StrBlt.YDstStart = YDstStart;
  985. StrBlt.YDstCount = YDstEnd - YDstStart;
  986. if (StrBlt.YDstCount > 0)
  987. {
  988. //
  989. // Caclulate starting scan line address. Since the inner loop
  990. // routines are format dependent, they must add XDstStart/XSrcStart
  991. // to pjDstScan/pjSrcScan to get the actual starting pixel address.
  992. //
  993. StrBlt.pjSrcScan = (BYTE*) pvSrc + (YSrcStart * lDeltaSrc);
  994. StrBlt.pjDstScan = ppdev->pjScreen
  995. + (ppdev->yOffset + YDstStart) * ppdev->lDelta
  996. + (ppdev->xOffset + ppdev->ulYDstOrg) * ppdev->cjPelSize;
  997. StrBlt.lDeltaSrc = lDeltaSrc;
  998. StrBlt.XSrcStart = XSrcStart;
  999. StrBlt.XDstStart = XDstStart;
  1000. StrBlt.lDeltaDst = ppdev->lDelta;
  1001. StrBlt.ulXDstToSrcIntCeil = ulXDstToSrcIntCeil;
  1002. StrBlt.ulXDstToSrcFracCeil = ulXDstToSrcFracCeil;
  1003. StrBlt.ulYDstToSrcIntCeil = ulYDstToSrcIntCeil;
  1004. StrBlt.ulYDstToSrcFracCeil = ulYDstToSrcFracCeil;
  1005. StrBlt.ulXFracAccumulator = ulXFracAccumulator;
  1006. StrBlt.ulYFracAccumulator = ulYFracAccumulator;
  1007. if (ppdev->ulBoardId == MGA_STORM)
  1008. {
  1009. if (ppdev->iBitmapFormat == BMF_8BPP)
  1010. {
  1011. if ((XDstEnd - XDstStart) < 7)
  1012. pfnStr = vMilDirectStretch8Narrow;
  1013. else
  1014. pfnStr = vMilDirectStretch8;
  1015. }
  1016. else if (ppdev->iBitmapFormat == BMF_16BPP)
  1017. {
  1018. pfnStr = vMilDirectStretch16;
  1019. }
  1020. else if (ppdev->iBitmapFormat == BMF_24BPP)
  1021. {
  1022. pfnStr = vMilDirectStretch24;
  1023. }
  1024. else
  1025. {
  1026. ASSERTDD(ppdev->iBitmapFormat == BMF_32BPP, "Expected 32bpp");
  1027. pfnStr = vMilDirectStretch32;
  1028. }
  1029. }
  1030. else
  1031. {
  1032. #if defined(_X86_)
  1033. {
  1034. if (ppdev->iBitmapFormat == BMF_8BPP)
  1035. {
  1036. if ((XDstEnd - XDstStart) < 7)
  1037. pfnStr = vMgaDirectStretch8Narrow;
  1038. else
  1039. pfnStr = vMgaDirectStretch8;
  1040. }
  1041. else
  1042. {
  1043. ASSERTDD(ppdev->iBitmapFormat == BMF_16BPP, "Expected 16bpp");
  1044. pfnStr = vMgaDirectStretch16;
  1045. }
  1046. }
  1047. #endif
  1048. }
  1049. (*pfnStr)(&StrBlt);
  1050. }
  1051. }
  1052. /******************************Public*Routine******************************\
  1053. * BOOL DrvStretchBlt
  1054. *
  1055. \**************************************************************************/
  1056. BOOL DrvStretchBlt(
  1057. SURFOBJ* psoDst,
  1058. SURFOBJ* psoSrc,
  1059. SURFOBJ* psoMsk,
  1060. CLIPOBJ* pco,
  1061. XLATEOBJ* pxlo,
  1062. COLORADJUSTMENT* pca,
  1063. POINTL* pptlHTOrg,
  1064. RECTL* prclDst,
  1065. RECTL* prclSrc,
  1066. POINTL* pptlMsk,
  1067. ULONG iMode)
  1068. {
  1069. DSURF* pdsurfSrc;
  1070. DSURF* pdsurfDst;
  1071. PDEV* ppdev;
  1072. OH* poh;
  1073. SURFOBJ* psoDstNew;
  1074. SURFOBJ* psoSrcNew;
  1075. BOOL bPunt = FALSE;
  1076. BOOL bRet;
  1077. // GDI guarantees us that for a StretchBlt the destination surface
  1078. // will always be a device surface, and not a DIB:
  1079. ppdev = (PDEV*) psoDst->dhpdev;
  1080. pdsurfDst = (DSURF*) psoDst->dhsurf;
  1081. pdsurfSrc = (DSURF*) psoSrc->dhsurf;
  1082. poh = pdsurfDst->poh;
  1083. ppdev->xOffset = poh->x;
  1084. ppdev->yOffset = poh->y;
  1085. // It's quicker for GDI to do a StretchBlt when the source surface
  1086. // is not a device-managed surface, because then it can directly
  1087. // read the source bits without having to allocate a temporary
  1088. // buffer and call DrvCopyBits to get a copy that it can use.
  1089. if (ppdev->ulBoardId == MGA_STORM)
  1090. {
  1091. psoDstNew = psoDst;
  1092. if (psoSrc->iType != STYPE_BITMAP)
  1093. {
  1094. pdsurfSrc = (DSURF*) psoSrc->dhsurf;
  1095. if (pdsurfSrc->dt == DT_SCREEN)
  1096. {
  1097. // The source is a device bitmap that is currently stored
  1098. // in device memory.
  1099. psoSrcNew = psoSrc;
  1100. bPunt = TRUE;
  1101. }
  1102. else
  1103. {
  1104. ASSERTDD(pdsurfSrc->dt == DT_DIB, "Can only handle DIB DFBs here");
  1105. // The source was a device bitmap that we just converted
  1106. // to a DIB:
  1107. psoSrcNew = pdsurfSrc->pso;
  1108. }
  1109. }
  1110. else
  1111. {
  1112. psoSrcNew = psoSrc;
  1113. }
  1114. }
  1115. else
  1116. {
  1117. psoDstNew = psoDst;
  1118. if (psoSrc->iType == STYPE_DEVBITMAP)
  1119. {
  1120. pdsurfSrc = (DSURF*) psoSrc->dhsurf;
  1121. if (pdsurfSrc->dt == DT_SCREEN)
  1122. {
  1123. // The source is a device bitmap that is currently stored
  1124. // in device memory.
  1125. psoSrcNew = psoSrc;
  1126. bPunt = TRUE;
  1127. }
  1128. else
  1129. {
  1130. ASSERTDD(pdsurfSrc->dt == DT_DIB, "Can only handle DIB DFBs here");
  1131. // The source was a device bitmap that we just converted
  1132. // to a DIB:
  1133. psoSrcNew = pdsurfSrc->pso;
  1134. }
  1135. }
  1136. else
  1137. {
  1138. psoSrcNew = psoSrc;
  1139. }
  1140. // With the old MGA's, we have fast stretch code only for 8bpp and
  1141. // 16bpp:
  1142. if (ppdev->iBitmapFormat > BMF_16BPP)
  1143. {
  1144. bPunt = TRUE;
  1145. }
  1146. // With the old MGA's, we have fast stretch code only for x86:
  1147. #if !defined(_X86_)
  1148. {
  1149. bPunt = TRUE;
  1150. }
  1151. #endif
  1152. }
  1153. if (pdsurfDst->dt == DT_DIB)
  1154. {
  1155. // The destination was a device bitmap that we just converted
  1156. // to a DIB:
  1157. psoDstNew = pdsurfDst->pso;
  1158. bPunt = TRUE;
  1159. }
  1160. if (!bPunt)
  1161. {
  1162. RECTL rclClip;
  1163. RECTL* prclClip;
  1164. ULONG cxDst;
  1165. ULONG cyDst;
  1166. ULONG cxSrc;
  1167. ULONG cySrc;
  1168. BOOL bMore;
  1169. CLIPENUM ce;
  1170. LONG c;
  1171. LONG i;
  1172. if ((psoSrcNew->iType == STYPE_BITMAP) &&
  1173. (psoMsk == NULL) &&
  1174. ((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL)) &&
  1175. ((psoSrcNew->iBitmapFormat == ppdev->iBitmapFormat)))
  1176. {
  1177. cxDst = prclDst->right - prclDst->left;
  1178. cyDst = prclDst->bottom - prclDst->top;
  1179. cxSrc = prclSrc->right - prclSrc->left;
  1180. cySrc = prclSrc->bottom - prclSrc->top;
  1181. // Our 'vStretchDIB' routine requires that the stretch be
  1182. // non-inverting, within a certain size, to have no source
  1183. // clipping, and to have no empty rectangles (the latter is the
  1184. // reason for the '- 1' on the unsigned compare here):
  1185. if (((cxSrc - 1) < STRETCH_MAX_EXTENT) &&
  1186. ((cySrc - 1) < STRETCH_MAX_EXTENT) &&
  1187. ((cxDst - 1) < STRETCH_MAX_EXTENT) &&
  1188. ((cyDst - 1) < STRETCH_MAX_EXTENT) &&
  1189. (prclSrc->left >= 0) &&
  1190. (prclSrc->top >= 0) &&
  1191. (prclSrc->right <= psoSrcNew->sizlBitmap.cx) &&
  1192. (prclSrc->bottom <= psoSrcNew->sizlBitmap.cy))
  1193. {
  1194. // Our snazzy routine only does COLORONCOLOR. But for
  1195. // stretching blts, BLACKONWHITE and WHITEONBLACK are also
  1196. // equivalent to COLORONCOLOR:
  1197. if ((iMode == COLORONCOLOR) ||
  1198. ((iMode < COLORONCOLOR) && (cxSrc <= cxDst) && (cySrc <= cyDst)))
  1199. {
  1200. if ((pco == NULL) || (pco->iDComplexity == DC_TRIVIAL))
  1201. {
  1202. rclClip.left = LONG_MIN;
  1203. rclClip.top = LONG_MIN;
  1204. rclClip.right = LONG_MAX;
  1205. rclClip.bottom = LONG_MAX;
  1206. prclClip = &rclClip;
  1207. StretchSingleClipRect:
  1208. vStretchDIB(ppdev,
  1209. prclDst,
  1210. psoSrcNew->pvScan0,
  1211. psoSrcNew->lDelta,
  1212. prclSrc,
  1213. prclClip);
  1214. return(TRUE);
  1215. }
  1216. else if (pco->iDComplexity == DC_RECT)
  1217. {
  1218. prclClip = &pco->rclBounds;
  1219. goto StretchSingleClipRect;
  1220. }
  1221. else
  1222. {
  1223. CLIPOBJ_cEnumStart(pco, FALSE, CT_RECTANGLES, CD_ANY, 0);
  1224. do {
  1225. bMore = CLIPOBJ_bEnum(pco, sizeof(ce), (ULONG*) &ce);
  1226. c = cIntersect(prclDst, ce.arcl, ce.c);
  1227. if (c != 0)
  1228. {
  1229. for (i = 0; i < c; i++)
  1230. {
  1231. vStretchDIB(ppdev,
  1232. prclDst,
  1233. psoSrcNew->pvScan0,
  1234. psoSrcNew->lDelta,
  1235. prclSrc,
  1236. &ce.arcl[i]);
  1237. }
  1238. }
  1239. } while (bMore);
  1240. return(TRUE);
  1241. }
  1242. }
  1243. }
  1244. }
  1245. }
  1246. // GDI is nice enough to handle the cases where 'psoDst' and/or 'psoSrc'
  1247. // are device-managed surfaces, but it ain't gonna be fast...
  1248. if (ppdev->ulBoardId == MGA_STORM)
  1249. {
  1250. START_DIRECT_ACCESS_STORM(ppdev, ppdev->pjBase);
  1251. }
  1252. else
  1253. {
  1254. START_DIRECT_ACCESS_MGA(ppdev, ppdev->pjBase);
  1255. }
  1256. bRet = EngStretchBlt(psoDstNew, psoSrcNew, psoMsk, pco, pxlo, pca, pptlHTOrg,
  1257. prclDst, prclSrc, pptlMsk, iMode);
  1258. if (ppdev->ulBoardId == MGA_STORM)
  1259. {
  1260. END_DIRECT_ACCESS_STORM(ppdev, ppdev->pjBase);
  1261. }
  1262. else
  1263. {
  1264. END_DIRECT_ACCESS_MGA(ppdev, ppdev->pjBase);
  1265. }
  1266. return(bRet);
  1267. }