Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1304 lines
36 KiB

  1. /******************************************************************************\
  2. *
  3. * $Workfile: str.c $
  4. *
  5. * Copyright (c) 1993-1997 Microsoft Corporation
  6. * Copyright (c) 1996-1997 Cirrus Logic, Inc.,
  7. *
  8. * $Log: S:/projects/drivers/ntsrc/display/STR.C_V $
  9. *
  10. * Rev 1.3 10 Jan 1997 15:40:16 PLCHU
  11. *
  12. *
  13. * Rev 1.2 Nov 01 1996 16:52:02 unknown
  14. *
  15. * Rev 1.1 Oct 10 1996 15:38:58 unknown
  16. *
  17. * Rev 1.1 12 Aug 1996 16:54:52 frido
  18. * Removed unaccessed local variables.
  19. *
  20. * sge01 : 11-01-96 Fix 24bpp stretch address calculation problem
  21. * chu01 : 01-02-97 5480 BitBLT enhacement
  22. *
  23. \******************************************************************************/
  24. #include "precomp.h"
  25. /******************************Public*Routine******************************\
  26. *
  27. * Routine Name
  28. *
  29. * vDirectStretch8
  30. *
  31. * Routine Description:
  32. *
  33. * Stretch blt 8->8
  34. *
  35. * NOTE: This routine doesn't handle cases where the blt stretch starts
  36. * and ends in the same destination dword! vDirectStretchNarrow
  37. * is expected to have been called for that case.
  38. *
  39. * Arguments:
  40. *
  41. * pStrBlt - contains all params for blt
  42. *
  43. * Return Value:
  44. *
  45. * VOID
  46. *
  47. \**************************************************************************/
  48. VOID vDirectStretch8(
  49. STR_BLT* pStrBlt)
  50. {
  51. BYTE* pjSrc;
  52. BYTE* pjDstEnd;
  53. LONG WidthXAln;
  54. ULONG ulDst;
  55. ULONG xAccum;
  56. ULONG xTmp;
  57. BYTE* pjOldScan;
  58. LONG cyDuplicate;
  59. PDEV* ppdev = pStrBlt->ppdev;
  60. LONG xDst = pStrBlt->XDstStart;
  61. LONG xSrc = pStrBlt->XSrcStart;
  62. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  63. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  64. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  65. LONG yCount = pStrBlt->YDstCount;
  66. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  67. LONG WidthX = pStrBlt->XDstEnd - xDst;
  68. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  69. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  70. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  71. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  72. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  73. ULONG yInt = 0;
  74. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  75. BYTE* pjPorts = ppdev->pjPorts;
  76. BYTE* pjBase = ppdev->pjBase;
  77. LONG lDelta = ppdev->lDelta;
  78. LONG xyOffset = ppdev->xyOffset;
  79. LONG xDstBytes = xDst;
  80. LONG WidthXBytes = WidthX;
  81. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  82. //
  83. // if this is a shrinking blt, calc src scan line stride
  84. //
  85. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  86. {
  87. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  88. }
  89. //
  90. // loop drawing each scan line
  91. //
  92. //
  93. // at least 7 wide (DST) blt
  94. //
  95. do {
  96. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  97. ULONG yTmp;
  98. pjSrc = pjSrcScan;
  99. xAccum = pStrBlt->ulXFracAccumulator;
  100. //
  101. // a single src scan line is being written
  102. //
  103. if (ppdev->flCaps & CAPS_MM_IO)
  104. {
  105. CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
  106. }
  107. else
  108. {
  109. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  110. }
  111. switch (StartAln) {
  112. case 1:
  113. jSrc0 = *pjSrc;
  114. xTmp = xAccum + xFrac;
  115. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  116. *pjDst++ = jSrc0;
  117. xAccum = xTmp;
  118. case 2:
  119. jSrc0 = *pjSrc;
  120. xTmp = xAccum + xFrac;
  121. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  122. *pjDst++ = jSrc0;
  123. xAccum = xTmp;
  124. case 3:
  125. jSrc0 = *pjSrc;
  126. xTmp = xAccum + xFrac;
  127. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  128. *pjDst++ = jSrc0;
  129. xAccum = xTmp;
  130. }
  131. pjDstEnd = pjDst + WidthXAln;
  132. while (pjDst != pjDstEnd)
  133. {
  134. jSrc0 = *pjSrc;
  135. xTmp = xAccum + xFrac;
  136. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  137. jSrc1 = *pjSrc;
  138. xAccum = xTmp + xFrac;
  139. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  140. jSrc2 = *pjSrc;
  141. xTmp = xAccum + xFrac;
  142. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  143. jSrc3 = *pjSrc;
  144. xAccum = xTmp + xFrac;
  145. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  146. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  147. *(PULONG)pjDst = ulDst;
  148. pjDst += 4;
  149. }
  150. switch (EndAln) {
  151. case 3:
  152. jSrc0 = *pjSrc;
  153. xTmp = xAccum + xFrac;
  154. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  155. *pjDst++ = jSrc0;
  156. xAccum = xTmp;
  157. case 2:
  158. jSrc0 = *pjSrc;
  159. xTmp = xAccum + xFrac;
  160. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  161. *pjDst++ = jSrc0;
  162. xAccum = xTmp;
  163. case 1:
  164. jSrc0 = *pjSrc;
  165. xTmp = xAccum + xFrac;
  166. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  167. *pjDst++ = jSrc0;
  168. }
  169. pjOldScan = pjSrcScan;
  170. pjSrcScan += yInt;
  171. yTmp = yAccum + yFrac;
  172. if (yTmp < yAccum)
  173. {
  174. pjSrcScan += pStrBlt->lDeltaSrc;
  175. }
  176. yAccum = yTmp;
  177. pjDst = (pjDst + lDstStride);
  178. yDst++;
  179. yCount--;
  180. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  181. {
  182. // It's an expanding stretch in 'y'; the scan we just laid down
  183. // will be copied at least once using the hardware:
  184. cyDuplicate = 0;
  185. do {
  186. cyDuplicate++;
  187. pjSrcScan += yInt;
  188. yTmp = yAccum + yFrac;
  189. if (yTmp < yAccum)
  190. {
  191. pjSrcScan += pStrBlt->lDeltaSrc;
  192. }
  193. yAccum = yTmp;
  194. pjDst = (pjDst + pStrBlt->lDeltaDst);
  195. yCount--;
  196. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  197. // The scan is to be copied 'cyDuplicate' times using the
  198. // hardware.
  199. //
  200. // We don't need to WAIT_FOR_BLT_COMPLETE since we did it above.
  201. //
  202. if (ppdev->flCaps & CAPS_MM_IO)
  203. {
  204. CP_MM_XCNT(ppdev, pjBase, (WidthXBytes - 1));
  205. CP_MM_YCNT(ppdev, pjBase, (cyDuplicate - 1));
  206. CP_MM_SRC_ADDR(ppdev, pjBase, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  207. CP_MM_DST_ADDR(ppdev, pjBase, ((yDst * lDelta) + xDstBytes));
  208. CP_MM_START_BLT(ppdev, pjBase);
  209. }
  210. else
  211. {
  212. CP_IO_XCNT(ppdev, pjPorts, (WidthXBytes - 1));
  213. CP_IO_YCNT(ppdev, pjPorts, (cyDuplicate - 1));
  214. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  215. CP_IO_DST_ADDR(ppdev, pjPorts, ((yDst * lDelta) + xDstBytes));
  216. CP_IO_START_BLT(ppdev, pjPorts);
  217. }
  218. yDst += cyDuplicate;
  219. }
  220. } while (yCount != 0);
  221. }
  222. /******************************Public*Routine******************************\
  223. *
  224. * Routine Name
  225. *
  226. * vDirectStretch16
  227. *
  228. * Routine Description:
  229. *
  230. * Stretch blt 16->16
  231. *
  232. * Arguments:
  233. *
  234. * pStrBlt - contains all params for blt
  235. *
  236. * Return Value:
  237. *
  238. * VOID
  239. *
  240. \**************************************************************************/
  241. VOID vDirectStretch16(
  242. STR_BLT* pStrBlt)
  243. {
  244. BYTE* pjOldScan;
  245. USHORT* pusSrc;
  246. USHORT* pusDstEnd;
  247. LONG WidthXAln;
  248. ULONG ulDst;
  249. ULONG xAccum;
  250. ULONG xTmp;
  251. LONG cyDuplicate;
  252. PDEV* ppdev = pStrBlt->ppdev;
  253. LONG xDst = pStrBlt->XDstStart;
  254. LONG xSrc = pStrBlt->XSrcStart;
  255. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  256. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  257. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  258. LONG yCount = pStrBlt->YDstCount;
  259. ULONG StartAln = (ULONG)((ULONG_PTR)pusDst & 0x02) >> 1;
  260. LONG WidthX = pStrBlt->XDstEnd - xDst;
  261. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  262. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  263. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  264. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  265. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  266. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  267. ULONG yInt = 0;
  268. BYTE* pjPorts = ppdev->pjPorts;
  269. BYTE* pjBase = ppdev->pjBase;
  270. LONG lDelta = ppdev->lDelta;
  271. LONG xyOffset = ppdev->xyOffset;
  272. LONG xDstBytes = xDst * 2;
  273. LONG WidthXBytes = WidthX * 2;
  274. WidthXAln = WidthX - EndAln - StartAln;
  275. //
  276. // if this is a shrinking blt, calc src scan line stride
  277. //
  278. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  279. {
  280. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  281. }
  282. // Loop stretching each scan line
  283. do {
  284. USHORT usSrc0,usSrc1;
  285. ULONG yTmp;
  286. pusSrc = (USHORT*) pjSrcScan;
  287. xAccum = pStrBlt->ulXFracAccumulator;
  288. // A single source scan line is being written:
  289. if (ppdev->flCaps & CAPS_MM_IO)
  290. {
  291. CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
  292. }
  293. else
  294. {
  295. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  296. }
  297. if (StartAln)
  298. {
  299. usSrc0 = *pusSrc;
  300. xTmp = xAccum + xFrac;
  301. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  302. *pusDst++ = usSrc0;
  303. xAccum = xTmp;
  304. }
  305. pusDstEnd = pusDst + WidthXAln;
  306. while (pusDst != pusDstEnd)
  307. {
  308. usSrc0 = *pusSrc;
  309. xTmp = xAccum + xFrac;
  310. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  311. usSrc1 = *pusSrc;
  312. xAccum = xTmp + xFrac;
  313. pusSrc = pusSrc + xInt + (xAccum < xTmp);
  314. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  315. *(ULONG*)pusDst = ulDst;
  316. pusDst+=2;
  317. }
  318. if (EndAln)
  319. {
  320. usSrc0 = *pusSrc;
  321. xTmp = xAccum + xFrac;
  322. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  323. *pusDst++ = usSrc0;
  324. }
  325. pjOldScan = pjSrcScan;
  326. pjSrcScan += yInt;
  327. yTmp = yAccum + yFrac;
  328. if (yTmp < yAccum)
  329. {
  330. pjSrcScan += pStrBlt->lDeltaSrc;
  331. }
  332. yAccum = yTmp;
  333. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  334. yDst++;
  335. yCount--;
  336. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  337. {
  338. // It's an expanding stretch in 'y'; the scan we just laid down
  339. // will be copied at least once using the hardware:
  340. cyDuplicate = 0;
  341. do {
  342. cyDuplicate++;
  343. pjSrcScan += yInt;
  344. yTmp = yAccum + yFrac;
  345. if (yTmp < yAccum)
  346. {
  347. pjSrcScan += pStrBlt->lDeltaSrc;
  348. }
  349. yAccum = yTmp;
  350. pusDst = (USHORT*) ((BYTE*) pusDst + pStrBlt->lDeltaDst);
  351. yCount--;
  352. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  353. // The scan is to be copied 'cyDuplicate' times using the
  354. // hardware.
  355. //
  356. // We don't need to WAIT_FOR_BLT_COMPLETE since we did it above.
  357. //
  358. if (ppdev->flCaps & CAPS_MM_IO)
  359. {
  360. CP_MM_XCNT(ppdev, pjBase, (WidthXBytes - 1));
  361. CP_MM_YCNT(ppdev, pjBase, (cyDuplicate - 1));
  362. CP_MM_SRC_ADDR(ppdev, pjBase, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  363. CP_MM_DST_ADDR(ppdev, pjBase, ((yDst * lDelta) + xDstBytes));
  364. CP_MM_START_BLT(ppdev, pjBase);
  365. }
  366. else
  367. {
  368. CP_IO_XCNT(ppdev, pjPorts, (WidthXBytes - 1));
  369. CP_IO_YCNT(ppdev, pjPorts, (cyDuplicate - 1));
  370. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  371. CP_IO_DST_ADDR(ppdev, pjPorts, ((yDst * lDelta) + xDstBytes));
  372. CP_IO_START_BLT(ppdev, pjPorts);
  373. }
  374. yDst += cyDuplicate;
  375. }
  376. } while (yCount != 0);
  377. }
  378. /******************************Public*Routine******************************\
  379. *
  380. * Routine Name
  381. *
  382. * vDirectStretch24
  383. *
  384. * Routine Description:
  385. *
  386. * Stretch blt 24->24
  387. *
  388. * Arguments:
  389. *
  390. * pStrBlt - contains all params for blt
  391. *
  392. * Return Value:
  393. *
  394. * VOID
  395. *
  396. \**************************************************************************/
  397. VOID vDirectStretch24(
  398. STR_BLT* pStrBlt)
  399. {
  400. BYTE* pbSrc;
  401. BYTE* pbDstEnd;
  402. LONG WidthXAln;
  403. ULONG xAccum;
  404. ULONG xTmp;
  405. BYTE* pjOldScan;
  406. LONG cyDuplicate;
  407. ULONG ulSrc0;
  408. BYTE bDst0,bDst1,bDst2;
  409. ULONG xBits;
  410. PDEV* ppdev = pStrBlt->ppdev;
  411. LONG xDst = pStrBlt->XDstStart;
  412. LONG xSrc = pStrBlt->XSrcStart;
  413. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + (xSrc << 1) + xSrc; // 3 bytes per pixel
  414. BYTE* pbDST = (pStrBlt->pjDstScan) + (xDst << 1) + xDst;
  415. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  416. LONG yCount = pStrBlt->YDstCount;
  417. LONG WidthX = pStrBlt->XDstEnd - xDst;
  418. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  419. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  420. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  421. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  422. ULONG yInt = 0;
  423. LONG lDstStride = pStrBlt->lDeltaDst - (WidthX << 1) - WidthX;
  424. BYTE* pjPorts = ppdev->pjPorts;
  425. BYTE* pjBase = ppdev->pjBase;
  426. LONG lDelta = ppdev->lDelta;
  427. LONG xyOffset = ppdev->xyOffset;
  428. LONG xDstBytes = (xDst << 1) + xDst;
  429. LONG WidthXBytes = (WidthX << 1) + WidthX;
  430. //
  431. // if this is a shrinking blt, calc src scan line stride
  432. //
  433. if (pStrBlt->ulYDstToSrcIntCeil != 0) // enlargement ?
  434. { // yes.
  435. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  436. }
  437. // Loop stretching each scan line
  438. do {
  439. ULONG yTmp;
  440. pbSrc = pjSrcScan;
  441. xAccum = pStrBlt->ulXFracAccumulator;
  442. // A single source scan line is being written:
  443. if (ppdev->flCaps & CAPS_MM_IO) // Blt Engine Ready?
  444. {
  445. CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
  446. }
  447. else
  448. {
  449. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  450. }
  451. pbDstEnd = pbDST + WidthXBytes - 3;
  452. while (pbDST < pbDstEnd)
  453. {
  454. ulSrc0 = *((ULONG*)pbSrc);
  455. bDst0 = (BYTE) (ulSrc0 & 0xff);
  456. bDst1 = (BYTE) ((ulSrc0 >> 8) & 0xff);
  457. bDst2 = (BYTE) ((ulSrc0 >> 16) & 0xff);
  458. xTmp = xAccum + xFrac;
  459. xBits = xInt + (xTmp < xAccum);
  460. xAccum = xTmp;
  461. pbSrc += (xBits << 1) + xBits;
  462. *pbDST++ = bDst0;
  463. *pbDST++ = bDst1;
  464. *pbDST++ = bDst2;
  465. }
  466. //
  467. // do the last pixel using bye
  468. //
  469. *pbDST++ = *pbSrc++;
  470. *pbDST++ = *pbSrc++;
  471. *pbDST++ = *pbSrc++;
  472. pjOldScan = pjSrcScan;
  473. pjSrcScan += yInt;
  474. yTmp = yAccum + yFrac;
  475. if (yTmp < yAccum)
  476. {
  477. pjSrcScan += pStrBlt->lDeltaSrc;
  478. }
  479. yAccum = yTmp;
  480. pbDST += lDstStride;
  481. yDst++;
  482. yCount--;
  483. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  484. {
  485. // It's an expanding stretch in 'y'; the scan we just laid down
  486. // will be copied at least once using the hardware:
  487. cyDuplicate = 0;
  488. do {
  489. cyDuplicate++;
  490. pjSrcScan += yInt;
  491. yTmp = yAccum + yFrac;
  492. if (yTmp < yAccum)
  493. {
  494. pjSrcScan += pStrBlt->lDeltaSrc;
  495. }
  496. yAccum = yTmp;
  497. pbDST += pStrBlt->lDeltaDst;
  498. yCount--;
  499. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  500. // The scan is to be copied 'cyDuplicate' times using the
  501. // hardware.
  502. //
  503. // We don't need to WAIT_FOR_BLT_COMPLETE since we did it above.
  504. //
  505. if (ppdev->flCaps & CAPS_MM_IO)
  506. {
  507. CP_MM_XCNT(ppdev, pjBase, (WidthXBytes - 1));
  508. CP_MM_YCNT(ppdev, pjBase, (cyDuplicate - 1));
  509. CP_MM_SRC_ADDR(ppdev, pjBase, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  510. CP_MM_DST_ADDR(ppdev, pjBase, ((yDst * lDelta) + xDstBytes));
  511. CP_MM_START_BLT(ppdev, pjBase);
  512. }
  513. else
  514. {
  515. CP_IO_XCNT(ppdev, pjPorts, (WidthXBytes - 1));
  516. CP_IO_YCNT(ppdev, pjPorts, (cyDuplicate - 1));
  517. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  518. CP_IO_DST_ADDR(ppdev, pjPorts, ((yDst * lDelta) + xDstBytes));
  519. CP_IO_START_BLT(ppdev, pjPorts);
  520. }
  521. yDst += cyDuplicate;
  522. }
  523. } while (yCount != 0);
  524. }
  525. // chu01
  526. /******************************Public*Routine******************************\
  527. *
  528. * B i t B L T E n h a n c e m e n t F o r C L - G D 5 4 8 0
  529. *
  530. \**************************************************************************/
  531. /******************************Public*Routine******************************\
  532. *
  533. * Routine Name
  534. *
  535. * vDirectStretch8_80
  536. *
  537. * Routine Description:
  538. *
  539. * Stretch blt 8->8
  540. * This is for BLT enhancement only, such CL-GD5480.
  541. *
  542. * NOTE: This routine doesn't handle cases where the blt stretch starts
  543. * and ends in the same destination dword! vDirectStretchNarrow
  544. * is expected to have been called for that case.
  545. *
  546. * Arguments:
  547. *
  548. * pStrBlt - contains all params for blt
  549. *
  550. * Return Value:
  551. *
  552. * VOID
  553. *
  554. \**************************************************************************/
  555. VOID vDirectStretch8_80(
  556. STR_BLT* pStrBlt)
  557. {
  558. BYTE* pjSrc;
  559. BYTE* pjDstEnd;
  560. LONG WidthXAln;
  561. ULONG ulDst;
  562. ULONG xAccum;
  563. ULONG xTmp;
  564. BYTE* pjOldScan;
  565. LONG cyDuplicate;
  566. PDEV* ppdev = pStrBlt->ppdev;
  567. LONG xDst = pStrBlt->XDstStart;
  568. LONG xSrc = pStrBlt->XSrcStart;
  569. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  570. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  571. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  572. LONG yCount = pStrBlt->YDstCount;
  573. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  574. LONG WidthX = pStrBlt->XDstEnd - xDst;
  575. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  576. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  577. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  578. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  579. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  580. ULONG yInt = 0;
  581. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  582. BYTE* pjPorts = ppdev->pjPorts;
  583. BYTE* pjBase = ppdev->pjBase;
  584. LONG lDelta = ppdev->lDelta;
  585. LONG xyOffset = ppdev->xyOffset;
  586. LONG xDstBytes = xDst;
  587. LONG WidthXBytes = WidthX;
  588. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  589. //
  590. // if this is a shrinking blt, calc src scan line stride
  591. //
  592. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  593. {
  594. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  595. }
  596. //
  597. // loop drawing each scan line
  598. //
  599. //
  600. // at least 7 wide (DST) blt
  601. //
  602. do {
  603. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  604. ULONG yTmp;
  605. pjSrc = pjSrcScan;
  606. xAccum = pStrBlt->ulXFracAccumulator;
  607. //
  608. // a single src scan line is being written
  609. //
  610. if (ppdev->flCaps & CAPS_MM_IO)
  611. {
  612. CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
  613. }
  614. else
  615. {
  616. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  617. }
  618. switch (StartAln) {
  619. case 1:
  620. jSrc0 = *pjSrc;
  621. xTmp = xAccum + xFrac;
  622. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  623. *pjDst++ = jSrc0;
  624. xAccum = xTmp;
  625. case 2:
  626. jSrc0 = *pjSrc;
  627. xTmp = xAccum + xFrac;
  628. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  629. *pjDst++ = jSrc0;
  630. xAccum = xTmp;
  631. case 3:
  632. jSrc0 = *pjSrc;
  633. xTmp = xAccum + xFrac;
  634. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  635. *pjDst++ = jSrc0;
  636. xAccum = xTmp;
  637. }
  638. pjDstEnd = pjDst + WidthXAln;
  639. while (pjDst != pjDstEnd)
  640. {
  641. jSrc0 = *pjSrc;
  642. xTmp = xAccum + xFrac;
  643. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  644. jSrc1 = *pjSrc;
  645. xAccum = xTmp + xFrac;
  646. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  647. jSrc2 = *pjSrc;
  648. xTmp = xAccum + xFrac;
  649. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  650. jSrc3 = *pjSrc;
  651. xAccum = xTmp + xFrac;
  652. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  653. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  654. *(PULONG)pjDst = ulDst;
  655. pjDst += 4;
  656. }
  657. switch (EndAln) {
  658. case 3:
  659. jSrc0 = *pjSrc;
  660. xTmp = xAccum + xFrac;
  661. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  662. *pjDst++ = jSrc0;
  663. xAccum = xTmp;
  664. case 2:
  665. jSrc0 = *pjSrc;
  666. xTmp = xAccum + xFrac;
  667. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  668. *pjDst++ = jSrc0;
  669. xAccum = xTmp;
  670. case 1:
  671. jSrc0 = *pjSrc;
  672. xTmp = xAccum + xFrac;
  673. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  674. *pjDst++ = jSrc0;
  675. }
  676. pjOldScan = pjSrcScan;
  677. pjSrcScan += yInt;
  678. yTmp = yAccum + yFrac;
  679. if (yTmp < yAccum)
  680. {
  681. pjSrcScan += pStrBlt->lDeltaSrc;
  682. }
  683. yAccum = yTmp;
  684. pjDst = (pjDst + lDstStride);
  685. yDst++;
  686. yCount--;
  687. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  688. {
  689. // It's an expanding stretch in 'y'; the scan we just laid down
  690. // will be copied at least once using the hardware:
  691. cyDuplicate = 0;
  692. do {
  693. cyDuplicate++;
  694. pjSrcScan += yInt;
  695. yTmp = yAccum + yFrac;
  696. if (yTmp < yAccum)
  697. {
  698. pjSrcScan += pStrBlt->lDeltaSrc;
  699. }
  700. yAccum = yTmp;
  701. pjDst = (pjDst + pStrBlt->lDeltaDst);
  702. yCount--;
  703. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  704. // The scan is to be copied 'cyDuplicate' times using the
  705. // hardware.
  706. //
  707. // We don't need to WAIT_FOR_BLT_COMPLETE since we did it above.
  708. //
  709. if (ppdev->flCaps & CAPS_MM_IO)
  710. {
  711. // GR33
  712. CP_MM_BLT_EXT_MODE(ppdev, pjBase, ENABLE_XY_POSITION) ;
  713. // GR20, GR21
  714. CP_MM_XCNT(ppdev, pjBase, (WidthX - 1)) ;
  715. // GR22, GR23
  716. CP_MM_YCNT(ppdev, pjBase, (cyDuplicate - 1)) ;
  717. // GR2C, GR2D, GR2E
  718. CP_MM_SRC_ADDR(ppdev, pjBase, xyOffset) ;
  719. // GR44, GR45, GR46, GR47
  720. CP_MM_SRC_XY(ppdev, pjBase, xDst, (yDst - 1)) ;
  721. // GR28, GR29, GR2A
  722. CP_MM_DST_ADDR(ppdev, pjBase, 0) ;
  723. // GR42, GR43
  724. CP_MM_DST_Y(ppdev, pjBase, yDst) ;
  725. // GR40, GR41
  726. CP_MM_DST_X(ppdev, pjBase, xDst) ;
  727. }
  728. else
  729. {
  730. CP_IO_XCNT(ppdev, pjPorts, (WidthXBytes - 1));
  731. CP_IO_YCNT(ppdev, pjPorts, (cyDuplicate - 1));
  732. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  733. CP_IO_DST_ADDR(ppdev, pjPorts, ((yDst * lDelta) + xDstBytes));
  734. CP_IO_START_BLT(ppdev, pjPorts);
  735. }
  736. yDst += cyDuplicate;
  737. }
  738. } while (yCount != 0);
  739. // GR33
  740. CP_MM_BLT_EXT_MODE(ppdev, pjBase, 0) ;
  741. } // vDirectStretch8_80
  742. /******************************Public*Routine******************************\
  743. *
  744. * Routine Name
  745. *
  746. * vDirectStretch16_80
  747. *
  748. * Routine Description:
  749. *
  750. * Stretch blt 16->16
  751. * This is for BLT enhancement only, such CL-GD5480.
  752. *
  753. * Arguments:
  754. *
  755. * pStrBlt - contains all params for blt
  756. *
  757. * Return Value:
  758. *
  759. * VOID
  760. *
  761. \**************************************************************************/
  762. VOID vDirectStretch16_80(
  763. STR_BLT* pStrBlt)
  764. {
  765. BYTE* pjOldScan;
  766. USHORT* pusSrc;
  767. USHORT* pusDstEnd;
  768. LONG WidthXAln;
  769. ULONG ulDst;
  770. ULONG xAccum;
  771. ULONG xTmp;
  772. LONG cyDuplicate;
  773. PDEV* ppdev = pStrBlt->ppdev;
  774. LONG xDst = pStrBlt->XDstStart;
  775. LONG xSrc = pStrBlt->XSrcStart;
  776. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  777. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  778. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  779. LONG yCount = pStrBlt->YDstCount;
  780. ULONG StartAln = ((ULONG)((ULONG_PTR)pusDst & 0x02)) >> 1;
  781. LONG WidthX = pStrBlt->XDstEnd - xDst;
  782. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  783. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  784. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  785. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  786. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  787. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  788. ULONG yInt = 0;
  789. BYTE* pjPorts = ppdev->pjPorts;
  790. BYTE* pjBase = ppdev->pjBase;
  791. LONG lDelta = ppdev->lDelta;
  792. LONG xyOffset = ppdev->xyOffset;
  793. LONG xDstBytes = xDst * 2;
  794. LONG WidthXBytes = WidthX * 2;
  795. WidthXAln = WidthX - EndAln - StartAln;
  796. //
  797. // if this is a shrinking blt, calc src scan line stride
  798. //
  799. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  800. {
  801. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  802. }
  803. // Loop stretching each scan line
  804. do {
  805. USHORT usSrc0,usSrc1;
  806. ULONG yTmp;
  807. pusSrc = (USHORT*) pjSrcScan;
  808. xAccum = pStrBlt->ulXFracAccumulator;
  809. // A single source scan line is being written:
  810. if (ppdev->flCaps & CAPS_MM_IO)
  811. {
  812. CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
  813. }
  814. else
  815. {
  816. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  817. }
  818. if (StartAln)
  819. {
  820. usSrc0 = *pusSrc;
  821. xTmp = xAccum + xFrac;
  822. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  823. *pusDst++ = usSrc0;
  824. xAccum = xTmp;
  825. }
  826. pusDstEnd = pusDst + WidthXAln;
  827. while (pusDst != pusDstEnd)
  828. {
  829. usSrc0 = *pusSrc;
  830. xTmp = xAccum + xFrac;
  831. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  832. usSrc1 = *pusSrc;
  833. xAccum = xTmp + xFrac;
  834. pusSrc = pusSrc + xInt + (xAccum < xTmp);
  835. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  836. *(ULONG*)pusDst = ulDst;
  837. pusDst+=2;
  838. }
  839. if (EndAln)
  840. {
  841. usSrc0 = *pusSrc;
  842. xTmp = xAccum + xFrac;
  843. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  844. *pusDst++ = usSrc0;
  845. }
  846. pjOldScan = pjSrcScan;
  847. pjSrcScan += yInt;
  848. yTmp = yAccum + yFrac;
  849. if (yTmp < yAccum)
  850. {
  851. pjSrcScan += pStrBlt->lDeltaSrc;
  852. }
  853. yAccum = yTmp;
  854. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  855. yDst++;
  856. yCount--;
  857. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  858. {
  859. // It's an expanding stretch in 'y'; the scan we just laid down
  860. // will be copied at least once using the hardware:
  861. cyDuplicate = 0;
  862. do {
  863. cyDuplicate++;
  864. pjSrcScan += yInt;
  865. yTmp = yAccum + yFrac;
  866. if (yTmp < yAccum)
  867. {
  868. pjSrcScan += pStrBlt->lDeltaSrc;
  869. }
  870. yAccum = yTmp;
  871. pusDst = (USHORT*) ((BYTE*) pusDst + pStrBlt->lDeltaDst);
  872. yCount--;
  873. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  874. // The scan is to be copied 'cyDuplicate' times using the
  875. // hardware.
  876. //
  877. // We don't need to WAIT_FOR_BLT_COMPLETE since we did it above.
  878. //
  879. if (ppdev->flCaps & CAPS_MM_IO)
  880. {
  881. // GR33
  882. CP_MM_BLT_EXT_MODE(ppdev, pjBase, ENABLE_XY_POSITION) ;
  883. // GR20, GR21
  884. CP_MM_XCNT(ppdev, pjBase, ((WidthX << 1) - 1)) ;
  885. // GR22, GR23
  886. CP_MM_YCNT(ppdev, pjBase, (cyDuplicate - 1)) ;
  887. // GR2C, GR2D, GR2E
  888. CP_MM_SRC_ADDR(ppdev, pjBase, xyOffset) ;
  889. // GR44, GR45, GR46, GR47
  890. CP_MM_SRC_XY(ppdev, pjBase, xDst << 1, (yDst - 1)) ;
  891. // GR28, GR29, GR2A
  892. CP_MM_DST_ADDR(ppdev, pjBase, 0) ;
  893. // GR42, GR43
  894. CP_MM_DST_Y(ppdev, pjBase, yDst) ;
  895. // GR40, GR41
  896. CP_MM_DST_X(ppdev, pjBase, xDst << 1) ;
  897. }
  898. else
  899. {
  900. CP_IO_XCNT(ppdev, pjPorts, (WidthXBytes - 1));
  901. CP_IO_YCNT(ppdev, pjPorts, (cyDuplicate - 1));
  902. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  903. CP_IO_DST_ADDR(ppdev, pjPorts, ((yDst * lDelta) + xDstBytes));
  904. CP_IO_START_BLT(ppdev, pjPorts);
  905. }
  906. yDst += cyDuplicate;
  907. }
  908. } while (yCount != 0);
  909. // GR33
  910. CP_MM_BLT_EXT_MODE(ppdev, pjBase, 0) ;
  911. } // vDirectStretch16_80
  912. /******************************Public*Routine******************************\
  913. *
  914. * Routine Name
  915. *
  916. * vDirectStretch24_80
  917. *
  918. * Routine Description:
  919. *
  920. * Stretch blt 24->24.
  921. * This is for BLT enhancement only, such CL-GD5480.
  922. *
  923. * Arguments:
  924. *
  925. * pStrBlt - contains all params for blt
  926. *
  927. * Return Value:
  928. *
  929. * VOID
  930. *
  931. \**************************************************************************/
  932. VOID vDirectStretch24_80(
  933. STR_BLT* pStrBlt)
  934. {
  935. BYTE* pbSrc;
  936. BYTE* pbDstEnd;
  937. LONG WidthXAln;
  938. ULONG xAccum;
  939. ULONG xTmp;
  940. BYTE* pjOldScan;
  941. LONG cyDuplicate;
  942. ULONG ulSrc0;
  943. BYTE bDst0,bDst1,bDst2;
  944. ULONG xBits;
  945. PDEV* ppdev = pStrBlt->ppdev;
  946. LONG xDst = pStrBlt->XDstStart;
  947. LONG xSrc = pStrBlt->XSrcStart;
  948. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + (xSrc << 1) + xSrc; // 3 bytes per pixel
  949. BYTE* pbDST = (pStrBlt->pjDstScan) + (xDst << 1) + xDst;
  950. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  951. LONG yCount = pStrBlt->YDstCount;
  952. LONG WidthX = pStrBlt->XDstEnd - xDst;
  953. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  954. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  955. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  956. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  957. ULONG yInt = 0;
  958. LONG lDstStride = pStrBlt->lDeltaDst - (WidthX << 1) - WidthX;
  959. BYTE* pjPorts = ppdev->pjPorts;
  960. BYTE* pjBase = ppdev->pjBase;
  961. LONG lDelta = ppdev->lDelta;
  962. LONG xyOffset = ppdev->xyOffset;
  963. LONG xDstBytes = (xDst << 1) + xDst;
  964. LONG WidthXBytes = (WidthX << 1) + WidthX;
  965. //
  966. // if this is a shrinking blt, calc src scan line stride
  967. //
  968. if (pStrBlt->ulYDstToSrcIntCeil != 0) // enlargement ?
  969. { // yes.
  970. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  971. }
  972. // Loop stretching each scan line
  973. do {
  974. ULONG yTmp;
  975. pbSrc = pjSrcScan;
  976. xAccum = pStrBlt->ulXFracAccumulator;
  977. // A single source scan line is being written:
  978. if (ppdev->flCaps & CAPS_MM_IO) // Blt Engine Ready?
  979. {
  980. CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
  981. }
  982. else
  983. {
  984. CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
  985. }
  986. pbDstEnd = pbDST + WidthXBytes - 3;
  987. while (pbDST < pbDstEnd)
  988. {
  989. ulSrc0 = *((ULONG*)pbSrc);
  990. bDst0 = (BYTE) (ulSrc0 & 0xff);
  991. bDst1 = (BYTE) ((ulSrc0 >> 8) & 0xff);
  992. bDst2 = (BYTE) ((ulSrc0 >> 16) & 0xff);
  993. xTmp = xAccum + xFrac;
  994. xBits = xInt + (xTmp < xAccum);
  995. xAccum = xTmp;
  996. pbSrc += (xBits << 1) + xBits;
  997. *pbDST++ = bDst0;
  998. *pbDST++ = bDst1;
  999. *pbDST++ = bDst2;
  1000. }
  1001. //
  1002. // do the last pixel using bye
  1003. //
  1004. *pbDST++ = *pbSrc++;
  1005. *pbDST++ = *pbSrc++;
  1006. *pbDST++ = *pbSrc++;
  1007. pjOldScan = pjSrcScan;
  1008. pjSrcScan += yInt;
  1009. yTmp = yAccum + yFrac;
  1010. if (yTmp < yAccum)
  1011. {
  1012. pjSrcScan += pStrBlt->lDeltaSrc;
  1013. }
  1014. yAccum = yTmp;
  1015. pbDST += lDstStride;
  1016. yDst++;
  1017. yCount--;
  1018. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  1019. {
  1020. // It's an expanding stretch in 'y'; the scan we just laid down
  1021. // will be copied at least once using the hardware:
  1022. cyDuplicate = 0;
  1023. do {
  1024. cyDuplicate++;
  1025. pjSrcScan += yInt;
  1026. yTmp = yAccum + yFrac;
  1027. if (yTmp < yAccum)
  1028. {
  1029. pjSrcScan += pStrBlt->lDeltaSrc;
  1030. }
  1031. yAccum = yTmp;
  1032. pbDST += pStrBlt->lDeltaDst;
  1033. yCount--;
  1034. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  1035. // The scan is to be copied 'cyDuplicate' times using the
  1036. // hardware.
  1037. //
  1038. // We don't need to WAIT_FOR_BLT_COMPLETE since we did it above.
  1039. //
  1040. if (ppdev->flCaps & CAPS_MM_IO)
  1041. {
  1042. // GR33
  1043. CP_MM_BLT_EXT_MODE(ppdev, pjBase, ENABLE_XY_POSITION) ;
  1044. // GR20, GR21
  1045. CP_MM_XCNT(ppdev, pjBase, (WidthX * 3 - 1)) ;
  1046. // GR22, GR23
  1047. CP_MM_YCNT(ppdev, pjBase, (cyDuplicate - 1)) ;
  1048. // GR2C, GR2D, GR2E
  1049. CP_MM_SRC_ADDR(ppdev, pjBase, xyOffset) ;
  1050. // GR44, GR45, GR46, GR47
  1051. CP_MM_SRC_XY(ppdev, pjBase, xDst * 3, (yDst - 1)) ;
  1052. // GR28, GR29, GR2A
  1053. CP_MM_DST_ADDR(ppdev, pjBase, 0) ;
  1054. // GR42, GR43
  1055. CP_MM_DST_Y(ppdev, pjBase, yDst) ;
  1056. // GR40, GR41
  1057. CP_MM_DST_X(ppdev, pjBase, xDst * 3) ;
  1058. }
  1059. else
  1060. {
  1061. CP_IO_XCNT(ppdev, pjPorts, (WidthXBytes - 1));
  1062. CP_IO_YCNT(ppdev, pjPorts, (cyDuplicate - 1));
  1063. CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((yDst - 1) * lDelta) + xDstBytes));
  1064. CP_IO_DST_ADDR(ppdev, pjPorts, ((yDst * lDelta) + xDstBytes));
  1065. CP_IO_START_BLT(ppdev, pjPorts);
  1066. }
  1067. yDst += cyDuplicate;
  1068. }
  1069. } while (yCount != 0);
  1070. // GR33
  1071. CP_MM_BLT_EXT_MODE(ppdev, pjBase, ENABLE_XY_POSITION) ;
  1072. } // vDirectStretch24_80