Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1049 lines
30 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: str.c
  3. *
  4. * Contains the 'C' versions of some inner-loop routines for the
  5. * partially hardware accelerated StretchBlt.
  6. *
  7. * Copyright (c) 1993-1995 Microsoft Corporation
  8. \**************************************************************************/
  9. #include "precomp.h"
  10. /******************************Public*Routine******************************\
  11. *
  12. * Routine Name
  13. *
  14. * vDirectStretch8
  15. *
  16. * Routine Description:
  17. *
  18. * Stretch blt 8->8
  19. *
  20. * NOTE: This routine doesn't handle cases where the blt stretch starts
  21. * and ends in the same destination dword! vDirectStretchNarrow
  22. * is expected to have been called for that case.
  23. *
  24. * Arguments:
  25. *
  26. * pStrBlt - contains all params for blt
  27. *
  28. * Return Value:
  29. *
  30. * VOID
  31. *
  32. \**************************************************************************/
  33. VOID vM64DirectStretch8(
  34. STR_BLT* pStrBlt)
  35. {
  36. BYTE* pjSrc;
  37. BYTE* pjDstEnd;
  38. LONG WidthXAln;
  39. ULONG ulDst;
  40. ULONG xAccum;
  41. ULONG xTmp;
  42. ULONG yTmp;
  43. BYTE* pjOldScan;
  44. LONG cyDuplicate;
  45. PDEV* ppdev = pStrBlt->ppdev;
  46. LONG xDst = pStrBlt->XDstStart;
  47. LONG xSrc = pStrBlt->XSrcStart;
  48. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  49. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  50. LONG yDst = pStrBlt->YDstStart + ppdev->yOffset;
  51. LONG yCount = pStrBlt->YDstCount;
  52. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  53. LONG WidthX = pStrBlt->XDstEnd - xDst;
  54. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  55. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  56. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  57. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  58. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  59. ULONG yInt = 0;
  60. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  61. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  62. BYTE* pjMmBase = ppdev->pjMmBase;
  63. xDst += ppdev->xOffset;
  64. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  65. //
  66. // if this is a shrinking blt, calc src scan line stride
  67. //
  68. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  69. {
  70. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  71. }
  72. //
  73. // loop drawing each scan line
  74. //
  75. //
  76. // at least 7 wide (DST) blt
  77. //
  78. do {
  79. pjSrc = pjSrcScan;
  80. xAccum = pStrBlt->ulXFracAccumulator;
  81. //
  82. // a single src scan line is being written
  83. //
  84. switch (StartAln) {
  85. case 1:
  86. jSrc0 = *pjSrc;
  87. xTmp = xAccum + xFrac;
  88. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  89. *pjDst++ = jSrc0;
  90. xAccum = xTmp;
  91. case 2:
  92. jSrc0 = *pjSrc;
  93. xTmp = xAccum + xFrac;
  94. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  95. *pjDst++ = jSrc0;
  96. xAccum = xTmp;
  97. case 3:
  98. jSrc0 = *pjSrc;
  99. xTmp = xAccum + xFrac;
  100. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  101. *pjDst++ = jSrc0;
  102. xAccum = xTmp;
  103. }
  104. pjDstEnd = pjDst + WidthXAln;
  105. while (pjDst != pjDstEnd)
  106. {
  107. jSrc0 = *pjSrc;
  108. xTmp = xAccum + xFrac;
  109. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  110. jSrc1 = *pjSrc;
  111. xAccum = xTmp + xFrac;
  112. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  113. jSrc2 = *pjSrc;
  114. xTmp = xAccum + xFrac;
  115. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  116. jSrc3 = *pjSrc;
  117. xAccum = xTmp + xFrac;
  118. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  119. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  120. *(PULONG)pjDst = ulDst;
  121. pjDst += 4;
  122. }
  123. switch (EndAln) {
  124. case 3:
  125. jSrc0 = *pjSrc;
  126. xTmp = xAccum + xFrac;
  127. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  128. *pjDst++ = jSrc0;
  129. xAccum = xTmp;
  130. case 2:
  131. jSrc0 = *pjSrc;
  132. xTmp = xAccum + xFrac;
  133. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  134. *pjDst++ = jSrc0;
  135. xAccum = xTmp;
  136. case 1:
  137. jSrc0 = *pjSrc;
  138. xTmp = xAccum + xFrac;
  139. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  140. *pjDst++ = jSrc0;
  141. }
  142. pjOldScan = pjSrcScan;
  143. pjSrcScan += yInt;
  144. yTmp = yAccum + yFrac;
  145. if (yTmp < yAccum)
  146. {
  147. pjSrcScan += pStrBlt->lDeltaSrc;
  148. }
  149. yAccum = yTmp;
  150. pjDst = (pjDst + lDstStride);
  151. yDst++;
  152. yCount--;
  153. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  154. {
  155. // It's an expanding stretch in 'y'; the scan we just laid down
  156. // will be copied at least once using the hardware:
  157. cyDuplicate = 0;
  158. do {
  159. cyDuplicate++;
  160. pjSrcScan += yInt;
  161. yTmp = yAccum + yFrac;
  162. if (yTmp < yAccum)
  163. {
  164. pjSrcScan += pStrBlt->lDeltaSrc;
  165. }
  166. yAccum = yTmp;
  167. pjDst = (pjDst + pStrBlt->lDeltaDst);
  168. yCount--;
  169. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  170. // The scan is to be copied 'cyDuplicate' times using the
  171. // hardware.
  172. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  173. M64_OD(pjMmBase, SRC_Y_X, (yDst - 1) | (xDst << 16) );
  174. M64_OD(pjMmBase, DST_Y_X, yDst | (xDst << 16) );
  175. M64_OD(pjMmBase, SRC_HEIGHT1_WIDTH1, 1 | (WidthX << 16) );
  176. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, cyDuplicate | (WidthX << 16) );
  177. yDst += cyDuplicate;
  178. }
  179. } while (yCount != 0);
  180. }
  181. VOID vM32DirectStretch8(
  182. STR_BLT* pStrBlt)
  183. {
  184. BYTE* pjSrc;
  185. BYTE* pjDstEnd;
  186. LONG WidthXAln;
  187. ULONG ulDst;
  188. ULONG xAccum;
  189. ULONG xTmp;
  190. ULONG yTmp;
  191. BYTE* pjOldScan;
  192. LONG cyDuplicate;
  193. PDEV* ppdev = pStrBlt->ppdev;
  194. LONG xDst = pStrBlt->XDstStart;
  195. LONG xSrc = pStrBlt->XSrcStart;
  196. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  197. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  198. LONG yDst = pStrBlt->YDstStart + ppdev->yOffset;
  199. LONG yCount = pStrBlt->YDstCount;
  200. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  201. LONG WidthX = pStrBlt->XDstEnd - xDst;
  202. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  203. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  204. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  205. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  206. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  207. ULONG yInt = 0;
  208. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  209. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  210. BYTE* pjMmBase = ppdev->pjMmBase;
  211. xDst += ppdev->xOffset;
  212. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  213. //
  214. // if this is a shrinking blt, calc src scan line stride
  215. //
  216. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  217. {
  218. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  219. }
  220. //
  221. // loop drawing each scan line
  222. //
  223. //
  224. // at least 7 wide (DST) blt
  225. //
  226. do {
  227. pjSrc = pjSrcScan;
  228. xAccum = pStrBlt->ulXFracAccumulator;
  229. //
  230. // a single src scan line is being written
  231. //
  232. switch (StartAln) {
  233. case 1:
  234. jSrc0 = *pjSrc;
  235. xTmp = xAccum + xFrac;
  236. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  237. *pjDst++ = jSrc0;
  238. xAccum = xTmp;
  239. case 2:
  240. jSrc0 = *pjSrc;
  241. xTmp = xAccum + xFrac;
  242. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  243. *pjDst++ = jSrc0;
  244. xAccum = xTmp;
  245. case 3:
  246. jSrc0 = *pjSrc;
  247. xTmp = xAccum + xFrac;
  248. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  249. *pjDst++ = jSrc0;
  250. xAccum = xTmp;
  251. }
  252. pjDstEnd = pjDst + WidthXAln;
  253. while (pjDst != pjDstEnd)
  254. {
  255. jSrc0 = *pjSrc;
  256. xTmp = xAccum + xFrac;
  257. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  258. jSrc1 = *pjSrc;
  259. xAccum = xTmp + xFrac;
  260. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  261. jSrc2 = *pjSrc;
  262. xTmp = xAccum + xFrac;
  263. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  264. jSrc3 = *pjSrc;
  265. xAccum = xTmp + xFrac;
  266. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  267. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  268. *(PULONG)pjDst = ulDst;
  269. pjDst += 4;
  270. }
  271. switch (EndAln) {
  272. case 3:
  273. jSrc0 = *pjSrc;
  274. xTmp = xAccum + xFrac;
  275. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  276. *pjDst++ = jSrc0;
  277. xAccum = xTmp;
  278. case 2:
  279. jSrc0 = *pjSrc;
  280. xTmp = xAccum + xFrac;
  281. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  282. *pjDst++ = jSrc0;
  283. xAccum = xTmp;
  284. case 1:
  285. jSrc0 = *pjSrc;
  286. xTmp = xAccum + xFrac;
  287. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  288. *pjDst++ = jSrc0;
  289. }
  290. pjOldScan = pjSrcScan;
  291. pjSrcScan += yInt;
  292. yTmp = yAccum + yFrac;
  293. if (yTmp < yAccum)
  294. {
  295. pjSrcScan += pStrBlt->lDeltaSrc;
  296. }
  297. yAccum = yTmp;
  298. pjDst = (pjDst + lDstStride);
  299. yDst++;
  300. yCount--;
  301. // 32 to fix bizarre hardware bug (?) -- totally heuristic
  302. if ((yCount != 0) && (pjSrcScan == pjOldScan) && (WidthX >= 32))
  303. {
  304. // It's an expanding stretch in 'y'; the scan we just laid down
  305. // will be copied at least once using the hardware:
  306. cyDuplicate = 0;
  307. do {
  308. cyDuplicate++;
  309. pjSrcScan += yInt;
  310. yTmp = yAccum + yFrac;
  311. if (yTmp < yAccum)
  312. {
  313. pjSrcScan += pStrBlt->lDeltaSrc;
  314. }
  315. yAccum = yTmp;
  316. pjDst = (pjDst + pStrBlt->lDeltaDst);
  317. yCount--;
  318. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  319. // The scan is to be copied 'cyDuplicate' times using the
  320. // hardware.
  321. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  322. M32_OW(pjMmBase, M32_SRC_X, (SHORT) xDst );
  323. M32_OW(pjMmBase, M32_SRC_X_START, (SHORT) xDst );
  324. M32_OW(pjMmBase, M32_SRC_X_END, (SHORT) (xDst + WidthX) );
  325. M32_OW(pjMmBase, M32_SRC_Y, (SHORT) (yDst - 1) ); // the line to replicate
  326. M32_OW(pjMmBase, CUR_X, (SHORT) xDst );
  327. M32_OW(pjMmBase, DEST_X_START, (SHORT) xDst );
  328. M32_OW(pjMmBase, DEST_X_END, (SHORT) (xDst + WidthX) );
  329. M32_OW(pjMmBase, CUR_Y, (SHORT) yDst );
  330. vM32QuietDown(ppdev, pjMmBase);
  331. M32_OW(pjMmBase, DEST_Y_END, (SHORT) (yDst + cyDuplicate) );
  332. yDst += cyDuplicate;
  333. }
  334. } while (yCount != 0);
  335. }
  336. VOID vI32DirectStretch8(
  337. STR_BLT* pStrBlt)
  338. {
  339. BYTE* pjSrc;
  340. BYTE* pjDstEnd;
  341. LONG WidthXAln;
  342. ULONG ulDst;
  343. ULONG xAccum;
  344. ULONG xTmp;
  345. ULONG yTmp;
  346. BYTE* pjOldScan;
  347. LONG cyDuplicate;
  348. PDEV* ppdev = pStrBlt->ppdev;
  349. LONG xDst = pStrBlt->XDstStart;
  350. LONG xSrc = pStrBlt->XSrcStart;
  351. BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
  352. BYTE* pjDst = pStrBlt->pjDstScan + xDst;
  353. LONG yDst = pStrBlt->YDstStart + ppdev->yOffset;
  354. LONG yCount = pStrBlt->YDstCount;
  355. ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
  356. LONG WidthX = pStrBlt->XDstEnd - xDst;
  357. ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
  358. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  359. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  360. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  361. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  362. ULONG yInt = 0;
  363. LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
  364. BYTE jSrc0,jSrc1,jSrc2,jSrc3;
  365. BYTE* pjIoBase = ppdev->pjIoBase;
  366. xDst += ppdev->xOffset;
  367. WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
  368. //
  369. // if this is a shrinking blt, calc src scan line stride
  370. //
  371. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  372. {
  373. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  374. }
  375. //
  376. // loop drawing each scan line
  377. //
  378. //
  379. // at least 7 wide (DST) blt
  380. //
  381. do {
  382. pjSrc = pjSrcScan;
  383. xAccum = pStrBlt->ulXFracAccumulator;
  384. //
  385. // a single src scan line is being written
  386. //
  387. switch (StartAln) {
  388. case 1:
  389. jSrc0 = *pjSrc;
  390. xTmp = xAccum + xFrac;
  391. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  392. *pjDst++ = jSrc0;
  393. xAccum = xTmp;
  394. case 2:
  395. jSrc0 = *pjSrc;
  396. xTmp = xAccum + xFrac;
  397. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  398. *pjDst++ = jSrc0;
  399. xAccum = xTmp;
  400. case 3:
  401. jSrc0 = *pjSrc;
  402. xTmp = xAccum + xFrac;
  403. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  404. *pjDst++ = jSrc0;
  405. xAccum = xTmp;
  406. }
  407. pjDstEnd = pjDst + WidthXAln;
  408. while (pjDst != pjDstEnd)
  409. {
  410. jSrc0 = *pjSrc;
  411. xTmp = xAccum + xFrac;
  412. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  413. jSrc1 = *pjSrc;
  414. xAccum = xTmp + xFrac;
  415. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  416. jSrc2 = *pjSrc;
  417. xTmp = xAccum + xFrac;
  418. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  419. jSrc3 = *pjSrc;
  420. xAccum = xTmp + xFrac;
  421. pjSrc = pjSrc + xInt + (xAccum < xTmp);
  422. ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
  423. *(PULONG)pjDst = ulDst;
  424. pjDst += 4;
  425. }
  426. switch (EndAln) {
  427. case 3:
  428. jSrc0 = *pjSrc;
  429. xTmp = xAccum + xFrac;
  430. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  431. *pjDst++ = jSrc0;
  432. xAccum = xTmp;
  433. case 2:
  434. jSrc0 = *pjSrc;
  435. xTmp = xAccum + xFrac;
  436. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  437. *pjDst++ = jSrc0;
  438. xAccum = xTmp;
  439. case 1:
  440. jSrc0 = *pjSrc;
  441. xTmp = xAccum + xFrac;
  442. pjSrc = pjSrc + xInt + (xTmp < xAccum);
  443. *pjDst++ = jSrc0;
  444. }
  445. pjOldScan = pjSrcScan;
  446. pjSrcScan += yInt;
  447. yTmp = yAccum + yFrac;
  448. if (yTmp < yAccum)
  449. {
  450. pjSrcScan += pStrBlt->lDeltaSrc;
  451. }
  452. yAccum = yTmp;
  453. pjDst = (pjDst + lDstStride);
  454. yDst++;
  455. yCount--;
  456. // 32 to fix bizarre hardware bug (?) -- totally heuristic
  457. if ((yCount != 0) && (pjSrcScan == pjOldScan) && (WidthX >= 32))
  458. {
  459. // It's an expanding stretch in 'y'; the scan we just laid down
  460. // will be copied at least once using the hardware:
  461. cyDuplicate = 0;
  462. do {
  463. cyDuplicate++;
  464. pjSrcScan += yInt;
  465. yTmp = yAccum + yFrac;
  466. if (yTmp < yAccum)
  467. {
  468. pjSrcScan += pStrBlt->lDeltaSrc;
  469. }
  470. yAccum = yTmp;
  471. pjDst = (pjDst + pStrBlt->lDeltaDst);
  472. yCount--;
  473. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  474. // The scan is to be copied 'cyDuplicate' times using the
  475. // hardware.
  476. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  477. I32_OW(pjIoBase, M32_SRC_X, (SHORT) xDst );
  478. I32_OW(pjIoBase, M32_SRC_X_START, (SHORT) xDst );
  479. I32_OW(pjIoBase, M32_SRC_X_END, (SHORT) (xDst + WidthX) );
  480. I32_OW(pjIoBase, M32_SRC_Y, (SHORT) (yDst - 1) ); // the line to replicate
  481. I32_OW(pjIoBase, CUR_X, (SHORT) xDst );
  482. I32_OW(pjIoBase, DEST_X_START, (SHORT) xDst );
  483. I32_OW(pjIoBase, DEST_X_END, (SHORT) (xDst + WidthX) );
  484. I32_OW(pjIoBase, CUR_Y, (SHORT) yDst );
  485. vI32QuietDown(ppdev, pjIoBase);
  486. I32_OW(pjIoBase, DEST_Y_END, (SHORT) (yDst + cyDuplicate) );
  487. yDst += cyDuplicate;
  488. }
  489. } while (yCount != 0);
  490. }
  491. /******************************Public*Routine******************************\
  492. *
  493. * Routine Name
  494. *
  495. * vDirectStretch16
  496. *
  497. * Routine Description:
  498. *
  499. * Stretch blt 16->16
  500. *
  501. * Arguments:
  502. *
  503. * pStrBlt - contains all params for blt
  504. *
  505. * Return Value:
  506. *
  507. * VOID
  508. *
  509. \**************************************************************************/
  510. VOID vM64DirectStretch16(
  511. STR_BLT* pStrBlt)
  512. {
  513. BYTE* pjOldScan;
  514. USHORT* pusSrc;
  515. USHORT* pusDstEnd;
  516. LONG WidthXAln;
  517. ULONG ulDst;
  518. ULONG xAccum;
  519. ULONG xTmp;
  520. ULONG yTmp;
  521. LONG cyDuplicate;
  522. PDEV* ppdev = pStrBlt->ppdev;
  523. LONG xDst = pStrBlt->XDstStart;
  524. LONG xSrc = pStrBlt->XSrcStart;
  525. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  526. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  527. LONG yDst = pStrBlt->YDstStart + ppdev->yOffset;
  528. LONG yCount = pStrBlt->YDstCount;
  529. ULONG StartAln = (ULONG)(((ULONG_PTR)pusDst & 0x02) >> 1);
  530. LONG WidthX = pStrBlt->XDstEnd - xDst;
  531. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  532. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  533. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  534. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  535. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  536. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  537. ULONG yInt = 0;
  538. USHORT usSrc0,usSrc1;
  539. BYTE* pjMmBase = ppdev->pjMmBase;
  540. xDst += ppdev->xOffset;
  541. WidthXAln = WidthX - EndAln - StartAln;
  542. //
  543. // if this is a shrinking blt, calc src scan line stride
  544. //
  545. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  546. {
  547. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  548. }
  549. // Loop stretching each scan line
  550. do {
  551. pusSrc = (USHORT*) pjSrcScan;
  552. xAccum = pStrBlt->ulXFracAccumulator;
  553. // A single source scan line is being written:
  554. if (StartAln)
  555. {
  556. usSrc0 = *pusSrc;
  557. xTmp = xAccum + xFrac;
  558. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  559. *pusDst++ = usSrc0;
  560. xAccum = xTmp;
  561. }
  562. pusDstEnd = pusDst + WidthXAln;
  563. while (pusDst != pusDstEnd)
  564. {
  565. usSrc0 = *pusSrc;
  566. xTmp = xAccum + xFrac;
  567. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  568. usSrc1 = *pusSrc;
  569. xAccum = xTmp + xFrac;
  570. pusSrc = pusSrc + xInt + (xAccum < xTmp);
  571. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  572. *(ULONG*)pusDst = ulDst;
  573. pusDst+=2;
  574. }
  575. if (EndAln)
  576. {
  577. usSrc0 = *pusSrc;
  578. xTmp = xAccum + xFrac;
  579. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  580. *pusDst++ = usSrc0;
  581. }
  582. pjOldScan = pjSrcScan;
  583. pjSrcScan += yInt;
  584. yTmp = yAccum + yFrac;
  585. if (yTmp < yAccum)
  586. {
  587. pjSrcScan += pStrBlt->lDeltaSrc;
  588. }
  589. yAccum = yTmp;
  590. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  591. yDst++;
  592. yCount--;
  593. if ((yCount != 0) && (pjSrcScan == pjOldScan))
  594. {
  595. // It's an expanding stretch in 'y'; the scan we just laid down
  596. // will be copied at least once using the hardware:
  597. cyDuplicate = 0;
  598. do {
  599. cyDuplicate++;
  600. pjSrcScan += yInt;
  601. yTmp = yAccum + yFrac;
  602. if (yTmp < yAccum)
  603. {
  604. pjSrcScan += pStrBlt->lDeltaSrc;
  605. }
  606. yAccum = yTmp;
  607. pusDst = (USHORT*) ((BYTE*) pusDst + pStrBlt->lDeltaDst);
  608. yCount--;
  609. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  610. // The scan is to be copied 'cyDuplicate' times using the
  611. // hardware.
  612. M64_CHECK_FIFO_SPACE(ppdev, pjMmBase, 4);
  613. M64_OD(pjMmBase, SRC_Y_X, (yDst - 1) | (xDst << 16) );
  614. M64_OD(pjMmBase, DST_Y_X, yDst | (xDst << 16) );
  615. M64_OD(pjMmBase, SRC_HEIGHT1_WIDTH1, 1 | (WidthX << 16) );
  616. M64_OD(pjMmBase, DST_HEIGHT_WIDTH, cyDuplicate | (WidthX << 16) );
  617. yDst += cyDuplicate;
  618. }
  619. } while (yCount != 0);
  620. }
  621. VOID vM32DirectStretch16(
  622. STR_BLT* pStrBlt)
  623. {
  624. BYTE* pjOldScan;
  625. USHORT* pusSrc;
  626. USHORT* pusDstEnd;
  627. LONG WidthXAln;
  628. ULONG ulDst;
  629. ULONG xAccum;
  630. ULONG xTmp;
  631. ULONG yTmp;
  632. LONG cyDuplicate;
  633. PDEV* ppdev = pStrBlt->ppdev;
  634. LONG xDst = pStrBlt->XDstStart;
  635. LONG xSrc = pStrBlt->XSrcStart;
  636. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  637. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  638. LONG yDst = pStrBlt->YDstStart + ppdev->yOffset;
  639. LONG yCount = pStrBlt->YDstCount;
  640. ULONG StartAln = (ULONG)(((ULONG_PTR)pusDst & 0x02) >> 1);
  641. LONG WidthX = pStrBlt->XDstEnd - xDst;
  642. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  643. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  644. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  645. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  646. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  647. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  648. ULONG yInt = 0;
  649. USHORT usSrc0,usSrc1;
  650. BYTE* pjMmBase = ppdev->pjMmBase;
  651. xDst += ppdev->xOffset;
  652. WidthXAln = WidthX - EndAln - StartAln;
  653. //
  654. // if this is a shrinking blt, calc src scan line stride
  655. //
  656. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  657. {
  658. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  659. }
  660. // Loop stretching each scan line
  661. do {
  662. pusSrc = (USHORT*) pjSrcScan;
  663. xAccum = pStrBlt->ulXFracAccumulator;
  664. // A single source scan line is being written:
  665. if (StartAln)
  666. {
  667. usSrc0 = *pusSrc;
  668. xTmp = xAccum + xFrac;
  669. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  670. *pusDst++ = usSrc0;
  671. xAccum = xTmp;
  672. }
  673. pusDstEnd = pusDst + WidthXAln;
  674. while (pusDst != pusDstEnd)
  675. {
  676. usSrc0 = *pusSrc;
  677. xTmp = xAccum + xFrac;
  678. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  679. usSrc1 = *pusSrc;
  680. xAccum = xTmp + xFrac;
  681. pusSrc = pusSrc + xInt + (xAccum < xTmp);
  682. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  683. *(ULONG*)pusDst = ulDst;
  684. pusDst+=2;
  685. }
  686. if (EndAln)
  687. {
  688. usSrc0 = *pusSrc;
  689. xTmp = xAccum + xFrac;
  690. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  691. *pusDst++ = usSrc0;
  692. }
  693. pjOldScan = pjSrcScan;
  694. pjSrcScan += yInt;
  695. yTmp = yAccum + yFrac;
  696. if (yTmp < yAccum)
  697. {
  698. pjSrcScan += pStrBlt->lDeltaSrc;
  699. }
  700. yAccum = yTmp;
  701. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  702. yDst++;
  703. yCount--;
  704. // 32 to fix bizarre hardware bug (?) -- totally heuristic
  705. if ((yCount != 0) && (pjSrcScan == pjOldScan) && (WidthX >= 32))
  706. {
  707. // It's an expanding stretch in 'y'; the scan we just laid down
  708. // will be copied at least once using the hardware:
  709. cyDuplicate = 0;
  710. do {
  711. cyDuplicate++;
  712. pjSrcScan += yInt;
  713. yTmp = yAccum + yFrac;
  714. if (yTmp < yAccum)
  715. {
  716. pjSrcScan += pStrBlt->lDeltaSrc;
  717. }
  718. yAccum = yTmp;
  719. pusDst = (USHORT*) ((BYTE*) pusDst + pStrBlt->lDeltaDst);
  720. yCount--;
  721. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  722. // The scan is to be copied 'cyDuplicate' times using the
  723. // hardware.
  724. M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
  725. M32_OW(pjMmBase, M32_SRC_X, (SHORT) xDst );
  726. M32_OW(pjMmBase, M32_SRC_X_START, (SHORT) xDst );
  727. M32_OW(pjMmBase, M32_SRC_X_END, (SHORT) (xDst + WidthX) );
  728. M32_OW(pjMmBase, M32_SRC_Y, (SHORT) (yDst - 1) ); // the line to replicate
  729. M32_OW(pjMmBase, CUR_X, (SHORT) xDst );
  730. M32_OW(pjMmBase, DEST_X_START, (SHORT) xDst );
  731. M32_OW(pjMmBase, DEST_X_END, (SHORT) (xDst + WidthX) );
  732. M32_OW(pjMmBase, CUR_Y, (SHORT) yDst );
  733. vM32QuietDown(ppdev, pjMmBase);
  734. M32_OW(pjMmBase, DEST_Y_END, (SHORT) (yDst + cyDuplicate) );
  735. yDst += cyDuplicate;
  736. }
  737. } while (yCount != 0);
  738. }
  739. VOID vI32DirectStretch16(
  740. STR_BLT* pStrBlt)
  741. {
  742. BYTE* pjOldScan;
  743. USHORT* pusSrc;
  744. USHORT* pusDstEnd;
  745. LONG WidthXAln;
  746. ULONG ulDst;
  747. ULONG xAccum;
  748. ULONG xTmp;
  749. ULONG yTmp;
  750. LONG cyDuplicate;
  751. PDEV* ppdev = pStrBlt->ppdev;
  752. LONG xDst = pStrBlt->XDstStart;
  753. LONG xSrc = pStrBlt->XSrcStart;
  754. BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
  755. USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
  756. LONG yDst = pStrBlt->YDstStart + ppdev->yOffset;
  757. LONG yCount = pStrBlt->YDstCount;
  758. ULONG StartAln = (ULONG)(((ULONG_PTR)pusDst & 0x02) >> 1);
  759. LONG WidthX = pStrBlt->XDstEnd - xDst;
  760. ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
  761. ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
  762. ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
  763. ULONG yAccum = pStrBlt->ulYFracAccumulator;
  764. ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
  765. LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
  766. ULONG yInt = 0;
  767. USHORT usSrc0,usSrc1;
  768. BYTE* pjIoBase = ppdev->pjIoBase;
  769. xDst += ppdev->xOffset;
  770. WidthXAln = WidthX - EndAln - StartAln;
  771. //
  772. // if this is a shrinking blt, calc src scan line stride
  773. //
  774. if (pStrBlt->ulYDstToSrcIntCeil != 0)
  775. {
  776. yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
  777. }
  778. // Loop stretching each scan line
  779. do {
  780. pusSrc = (USHORT*) pjSrcScan;
  781. xAccum = pStrBlt->ulXFracAccumulator;
  782. // A single source scan line is being written:
  783. if (StartAln)
  784. {
  785. usSrc0 = *pusSrc;
  786. xTmp = xAccum + xFrac;
  787. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  788. *pusDst++ = usSrc0;
  789. xAccum = xTmp;
  790. }
  791. pusDstEnd = pusDst + WidthXAln;
  792. while (pusDst != pusDstEnd)
  793. {
  794. usSrc0 = *pusSrc;
  795. xTmp = xAccum + xFrac;
  796. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  797. usSrc1 = *pusSrc;
  798. xAccum = xTmp + xFrac;
  799. pusSrc = pusSrc + xInt + (xAccum < xTmp);
  800. ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
  801. *(ULONG*)pusDst = ulDst;
  802. pusDst+=2;
  803. }
  804. if (EndAln)
  805. {
  806. usSrc0 = *pusSrc;
  807. xTmp = xAccum + xFrac;
  808. pusSrc = pusSrc + xInt + (xTmp < xAccum);
  809. *pusDst++ = usSrc0;
  810. }
  811. pjOldScan = pjSrcScan;
  812. pjSrcScan += yInt;
  813. yTmp = yAccum + yFrac;
  814. if (yTmp < yAccum)
  815. {
  816. pjSrcScan += pStrBlt->lDeltaSrc;
  817. }
  818. yAccum = yTmp;
  819. pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
  820. yDst++;
  821. yCount--;
  822. // 32 to fix bizarre hardware bug (?) -- totally heuristic
  823. if ((yCount != 0) && (pjSrcScan == pjOldScan) && (WidthX >= 32))
  824. {
  825. // It's an expanding stretch in 'y'; the scan we just laid down
  826. // will be copied at least once using the hardware:
  827. cyDuplicate = 0;
  828. do {
  829. cyDuplicate++;
  830. pjSrcScan += yInt;
  831. yTmp = yAccum + yFrac;
  832. if (yTmp < yAccum)
  833. {
  834. pjSrcScan += pStrBlt->lDeltaSrc;
  835. }
  836. yAccum = yTmp;
  837. pusDst = (USHORT*) ((BYTE*) pusDst + pStrBlt->lDeltaDst);
  838. yCount--;
  839. } while ((yCount != 0) && (pjSrcScan == pjOldScan));
  840. // The scan is to be copied 'cyDuplicate' times using the
  841. // hardware.
  842. I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9);
  843. I32_OW(pjIoBase, M32_SRC_X, (SHORT) xDst );
  844. I32_OW(pjIoBase, M32_SRC_X_START, (SHORT) xDst );
  845. I32_OW(pjIoBase, M32_SRC_X_END, (SHORT) (xDst + WidthX) );
  846. I32_OW(pjIoBase, M32_SRC_Y, (SHORT) (yDst - 1) ); // the line to replicate
  847. I32_OW(pjIoBase, CUR_X, (SHORT) xDst );
  848. I32_OW(pjIoBase, DEST_X_START, (SHORT) xDst );
  849. I32_OW(pjIoBase, DEST_X_END, (SHORT) (xDst + WidthX) );
  850. I32_OW(pjIoBase, CUR_Y, (SHORT) yDst );
  851. vI32QuietDown(ppdev, pjIoBase);
  852. I32_OW(pjIoBase, DEST_Y_END, (SHORT) (yDst + cyDuplicate) );
  853. yDst += cyDuplicate;
  854. }
  855. } while (yCount != 0);
  856. }