Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

431 lines
12 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: stretch.c
  3. *
  4. * Copyright (c) 1993-1995 Microsoft Corporation
  5. \**************************************************************************/
  6. #include "precomp.h"
  7. #define STRETCH_MAX_EXTENT 32767
  8. /******************************Public*Routine******************************\
  9. *
  10. * Routine Name
  11. *
  12. * vDirectStretch8
  13. *
  14. * Routine Description:
  15. *
  16. * Stretch blt 8->8
  17. *
  18. * NOTE: This routine doesn't handle cases where the blt stretch starts
  19. * and ends in the same destination dword! vDirectStretchNarrow
  20. * is expected to have been called for that case.
  21. *
  22. * Arguments:
  23. *
  24. * pStrBlt - contains all params for blt
  25. *
  26. * Return Value:
  27. *
  28. * VOID
  29. *
  30. \**************************************************************************/
  31. VOID vDirectStretch8(
  32. STR_BLT* pStrBlt)
  33. {
  34. BYTE* pjSrc;
  35. BYTE* pjDstEnd;
  36. LONG WidthXAln;
  37. ULONG ulDst;
  38. ULONG xAccum;
  39. ULONG xTmp;
  40. ULONG yTmp;
  41. BYTE* pjOldScan;
  42. LONG cyDuplicate;
  43. PDEV* ppdev = pStrBlt->ppdev;
  44. LONG xDst = pStrBlt->XDstStart;
  45. LONG xSrc = pStrBlt->XSrcStart;
  46. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  47. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  48. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  49. LONG yCount = pStrBlt->YDstCount;
  50. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  51. LONG WidthX = pStrBlt->XDstEnd - xDst;
  52. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  53. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  54. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  55. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  56. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  57. ULONG yInt = 0;
  58. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  59. BYTE* pjBase = ppdev->pjBase;
  60. LONG cBpp = 1;
  61. LONG lDelta = ppdev->lDelta;
  62. ULONG ulSrcAddr = yDst * lDelta + xDst * cBpp + ppdev->xyOffset;
  63. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  64. //
  65. // if this is a shrinking blt, calc src scan line stride
  66. //
  67. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  68. {
  69. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  70. }
  71. //
  72. // loop drawing each scan line
  73. //
  74. //
  75. // at least 7 wide (DST) blt
  76. //
  77. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  78. CP_XCNT(ppdev, pjBase, (WidthX * cBpp) - 1);
  79. CP_YCNT(ppdev, pjBase, 0); // we'll do one line at a time
  80. do {
  81. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  82. ULONG yTmp;
  83. pjSrc = pjSrcScan;
  84. xAccum = pStrBlt->ulXFracAccumulator;
  85. //
  86. // a single src scan line is being written
  87. //
  88. switch (StartAln) {
  89. case 1:
  90. jSrc0 = *pjSrc;
  91. xTmp = xAccum + xFrac;
  92. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  93. *pjDst++ = jSrc0;
  94. xAccum = xTmp;
  95. case 2:
  96. jSrc0 = *pjSrc;
  97. xTmp = xAccum + xFrac;
  98. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  99. *pjDst++ = jSrc0;
  100. xAccum = xTmp;
  101. case 3:
  102. jSrc0 = *pjSrc;
  103. xTmp = xAccum + xFrac;
  104. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  105. *pjDst++ = jSrc0;
  106. xAccum = xTmp;
  107. }
  108. pjDstEnd = pjDst + WidthXAln;
  109. while (pjDst != pjDstEnd)
  110. {
  111. jSrc0 = *pjSrc;
  112. xTmp = xAccum + xFrac;
  113. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  114. jSrc1 = *pjSrc;
  115. xAccum = xTmp + xFrac;
  116. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  117. jSrc2 = *pjSrc;
  118. xTmp = xAccum + xFrac;
  119. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  120. jSrc3 = *pjSrc;
  121. xAccum = xTmp + xFrac;
  122. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  123. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  124. *(PULONG)pjDst = ulDst;
  125. pjDst += 4;
  126. }
  127. switch (EndAln) {
  128. case 3:
  129. jSrc0 = *pjSrc;
  130. xTmp = xAccum + xFrac;
  131. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  132. *pjDst++ = jSrc0;
  133. xAccum = xTmp;
  134. case 2:
  135. jSrc0 = *pjSrc;
  136. xTmp = xAccum + xFrac;
  137. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  138. *pjDst++ = jSrc0;
  139. xAccum = xTmp;
  140. case 1:
  141. jSrc0 = *pjSrc;
  142. xTmp = xAccum + xFrac;
  143. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  144. *pjDst++ = jSrc0;
  145. }
  146. pjOldScan = pjSrcScan;
  147. pjSrcScan += yInt;
  148. yTmp = yAccum + yFrac;
  149. if (yTmp < yAccum)
  150. {
  151. pjSrcScan += pStrBlt->lDeltaSrc;
  152. }
  153. yAccum = yTmp;
  154. pjDst = (pjDst + lDstStride);
  155. yCount--;
  156. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  157. {
  158. // It's an expanding stretch in 'y'; the scan we just laid down
  159. // will be copied at least once using the hardware:
  160. cyDuplicate = 0;
  161. do {
  162. cyDuplicate++;
  163. pjSrcScan += yInt;
  164. yTmp = yAccum + yFrac;
  165. if (yTmp < yAccum)
  166. {
  167. pjSrcScan += pStrBlt->lDeltaSrc;
  168. }
  169. yAccum = yTmp;
  170. pjDst = (pjDst + pStrBlt->lDeltaDst);
  171. yCount--;
  172. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  173. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  174. CP_SRC_ADDR(ppdev, pjBase, ulSrcAddr);
  175. while (cyDuplicate)
  176. {
  177. // Set the blt destination address as the base address of MMU aperture 2
  178. // Then start the accelerated operation by writing something to this
  179. // aperture.
  180. //
  181. // NOTE: The destination is the ulSrcAddr + lDetla. Additionally,
  182. // ulSrcAddr must be incremented by lDelta for each time through
  183. // this loop. So, instead of maintaining a ulDstAddr, we'll
  184. // just piggy back off of ulSrcAddr.
  185. ulSrcAddr += lDelta;
  186. SET_DEST_ADDR_ABS(ppdev, ulSrcAddr);
  187. START_ACL(ppdev);
  188. if (--cyDuplicate)
  189. {
  190. //
  191. // Only wait if we are going to loop again!
  192. //
  193. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  194. }
  195. }
  196. }
  197. ulSrcAddr += lDelta;
  198. } while (yCount != 0);
  199. }
  200. /******************************Public*Routine******************************\
  201. *
  202. * Routine Name
  203. *
  204. * vDirectStretch16
  205. *
  206. * Routine Description:
  207. *
  208. * Stretch blt 16->16
  209. *
  210. * Arguments:
  211. *
  212. * pStrBlt - contains all params for blt
  213. *
  214. * Return Value:
  215. *
  216. * VOID
  217. *
  218. \**************************************************************************/
  219. VOID vDirectStretch16(
  220. STR_BLT* pStrBlt)
  221. {
  222. BYTE* pjOldScan;
  223. USHORT* pusSrc;
  224. USHORT* pusDstEnd;
  225. LONG WidthXAln;
  226. ULONG ulDst;
  227. ULONG xAccum;
  228. ULONG xTmp;
  229. ULONG yTmp;
  230. LONG cyDuplicate;
  231. PDEV* ppdev = pStrBlt->ppdev;
  232. LONG xDst = pStrBlt->XDstStart;
  233. LONG xSrc = pStrBlt->XSrcStart;
  234. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  235. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  236. LONG yDst = pStrBlt->YDstStart; // + ppdev->yOffset;
  237. LONG yCount = pStrBlt->YDstCount;
  238. ULONG StartAln = (ULONG)(((ULONG_PTR)pusDst & 0x02) >> 1);
  239. LONG WidthX = pStrBlt->XDstEnd - xDst;
  240. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  241. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  242. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  243. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  244. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  245. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  246. ULONG yInt = 0;
  247. BYTE* pjBase = ppdev->pjBase;
  248. LONG cBpp = 2;
  249. LONG lDelta = ppdev->lDelta;
  250. ULONG ulSrcAddr = yDst * lDelta + xDst * cBpp + ppdev->xyOffset;
  251. WidthXAln = WidthX - EndAln - StartAln;
  252. //
  253. // if this is a shrinking blt, calc src scan line stride
  254. //
  255. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  256. {
  257. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  258. }
  259. // Loop stretching each scan line
  260. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  261. CP_XCNT(ppdev, pjBase, (WidthX * cBpp) - 1);
  262. CP_YCNT(ppdev, pjBase, 0); // we'll do one line at a time
  263. do {
  264. USHORT usSrc0,usSrc1;
  265. ULONG yTmp;
  266. pusSrc = (USHORT*) pjSrcScan;
  267. xAccum = pStrBlt->ulXFracAccumulator;
  268. // A single source scan line is being written:
  269. if (StartAln)
  270. {
  271. usSrc0 = *pusSrc;
  272. xTmp = xAccum + xFrac;
  273. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  274. *pusDst++ = usSrc0;
  275. xAccum = xTmp;
  276. }
  277. pusDstEnd = pusDst + WidthXAln;
  278. while (pusDst != pusDstEnd)
  279. {
  280. usSrc0 = *pusSrc;
  281. xTmp = xAccum + xFrac;
  282. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  283. usSrc1 = *pusSrc;
  284. xAccum = xTmp + xFrac;
  285. pusSrc = pusSrc + xInt + (xAccum < xTmp);
  286. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  287. *(ULONG*)pusDst = ulDst;
  288. pusDst+=2;
  289. }
  290. if (EndAln)
  291. {
  292. usSrc0 = *pusSrc;
  293. xTmp = xAccum + xFrac;
  294. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  295. *pusDst++ = usSrc0;
  296. }
  297. pjOldScan = pjSrcScan;
  298. pjSrcScan += yInt;
  299. yTmp = yAccum + yFrac;
  300. if (yTmp < yAccum)
  301. {
  302. pjSrcScan += pStrBlt->lDeltaSrc;
  303. }
  304. yAccum = yTmp;
  305. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  306. yCount--;
  307. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  308. {
  309. // It's an expanding stretch in 'y'; the scan we just laid down
  310. // will be copied at least once using the hardware:
  311. cyDuplicate = 0;
  312. do {
  313. cyDuplicate++;
  314. pjSrcScan += yInt;
  315. yTmp = yAccum + yFrac;
  316. if (yTmp < yAccum)
  317. {
  318. pjSrcScan += pStrBlt->lDeltaSrc;
  319. }
  320. yAccum = yTmp;
  321. pusDst = (USHORT*) ((BYTE*) pusDst + pStrBlt->lDeltaDst);
  322. yCount--;
  323. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  324. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  325. CP_SRC_ADDR(ppdev, pjBase, ulSrcAddr);
  326. while (cyDuplicate)
  327. {
  328. // Set the blt destination address as the base address of MMU aperture 2
  329. // Then start the accelerated operation by writing something to this
  330. // aperture.
  331. //
  332. // NOTE: The destination is the ulSrcAddr + lDetla. Additionally,
  333. // ulSrcAddr must be incremented by lDelta for each time through
  334. // this loop. So, instead of maintaining a ulDstAddr, we'll
  335. // just piggy back off of ulSrcAddr.
  336. ulSrcAddr += lDelta;
  337. SET_DEST_ADDR_ABS(ppdev, ulSrcAddr);
  338. START_ACL(ppdev);
  339. if (--cyDuplicate)
  340. {
  341. //
  342. // Only wait if we are going to loop again!
  343. //
  344. WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
  345. }
  346. }
  347. }
  348. ulSrcAddr += lDelta;
  349. } while (yCount != 0);
  350. }