Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3273 lines
114 KiB

  1. /******************************Module*Header**********************************\
  2. *
  3. * *******************
  4. * * GDI SAMPLE CODE *
  5. * *******************
  6. *
  7. * Module Name: pxrxXfer.c
  8. *
  9. * Content: Bit transfer code
  10. *
  11. * Copyright (c) 1994-1999 3Dlabs Inc. Ltd. All rights reserved.
  12. * Copyright (c) 1995-2003 Microsoft Corporation. All rights reserved.
  13. \*****************************************************************************/
  14. #include "precomp.h"
  15. #include "ereg.h"
  16. #include "pxrx.h"
  17. #if _DEBUG
  18. static BOOL trapOnMisAlignment = TRUE;
  19. #define TEST_DWORD_ALIGNED(ptr) \
  20. do { \
  21. ULONG addr = (ULONG) ptr; \
  22. \
  23. if( trapOnMisAlignment ) \
  24. ASSERTDD((addr & 3) == 0, "TEST_DWORD_ALIGNED(ptr) failed!"); \
  25. else \
  26. if( addr & 3 ) \
  27. DISPDBG((-1, "TEST_DWORD_ALIGNED(0x%08X) is out by %d bytes!", \
  28. addr, addr & 3)); \
  29. } while(0)
  30. #else
  31. # define TEST_DWORD_ALIGNED(addr) do { ; } while(0)
  32. #endif
  33. /**************************************************************************\
  34. *
  35. * VOID pxrxXfer1bpp
  36. *
  37. \**************************************************************************/
  38. VOID pxrxXfer1bpp(
  39. PPDEV ppdev,
  40. RECTL *prcl,
  41. LONG count,
  42. ULONG fgLogicOp,
  43. ULONG bgLogicOp,
  44. SURFOBJ *psoSrc,
  45. POINTL *pptlSrc,
  46. RECTL *prclDst,
  47. XLATEOBJ *pxlo )
  48. {
  49. DWORD config2D, render2D;
  50. LONG cx;
  51. LONG cy;
  52. LONG lSrcDelta;
  53. BYTE *pjSrcScan0;
  54. BYTE *pjSrc;
  55. LONG dxSrc;
  56. LONG dySrc;
  57. LONG xLeft;
  58. LONG yTop;
  59. LONG xOffset;
  60. ULONG fgColor;
  61. ULONG bgColor;
  62. RBRUSH_COLOR rbc;
  63. GLINT_DECL;
  64. ASSERTDD(count > 0, "Can't handle zero rectangles");
  65. ASSERTDD(fgLogicOp <= 15, "Weird fg hardware Rop");
  66. ASSERTDD(bgLogicOp <= 15, "Weird bg hardware Rop");
  67. ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
  68. DISPDBG((DBGLVL, "pxrxXfer1bpp: original dstRect: (%d,%d) to (%d,%d)",
  69. prclDst->left, prclDst->top,
  70. prclDst->right, prclDst->bottom));
  71. dxSrc = pptlSrc->x - prclDst->left;
  72. dySrc = pptlSrc->y - prclDst->top; // Add to destination to get source
  73. lSrcDelta = psoSrc->lDelta;
  74. pjSrcScan0 = psoSrc->pvScan0;
  75. DISPDBG((DBGLVL, "bitmap baseAddr 0x%x, stride %d, w %d, h %d",
  76. pjSrcScan0, lSrcDelta,
  77. psoSrc->sizlBitmap.cx, psoSrc->sizlBitmap.cy));
  78. DISPDBG((DBGLVL, "fgColor 0x%x, bgColor 0x%x",
  79. pxlo->pulXlate[1], pxlo->pulXlate[0]));
  80. DISPDBG((DBGLVL, "fgLogicOp %d, bgLogicOp %d",
  81. fgLogicOp, bgLogicOp));
  82. fgColor = pxlo->pulXlate[1];
  83. bgColor = pxlo->pulXlate[0];
  84. // we get some common operations which are really noops. we can save
  85. // lots of time by cutting these out. As this happens a lot for masking
  86. // operations it's worth doing.
  87. if( ((fgLogicOp == __GLINT_LOGICOP_AND) && (fgColor == ppdev->ulWhite)) ||
  88. ((fgLogicOp == __GLINT_LOGICOP_OR ) && (fgColor == 0)) ||
  89. ((fgLogicOp == __GLINT_LOGICOP_XOR) && (fgColor == 0)) )
  90. {
  91. fgLogicOp = __GLINT_LOGICOP_NOOP;
  92. }
  93. // same for background
  94. if( ((bgLogicOp == __GLINT_LOGICOP_AND) && (bgColor == ppdev->ulWhite)) ||
  95. ((bgLogicOp == __GLINT_LOGICOP_OR ) && (bgColor == 0)) ||
  96. ((bgLogicOp == __GLINT_LOGICOP_XOR) && (bgColor == 0)) )
  97. {
  98. bgLogicOp = __GLINT_LOGICOP_NOOP;
  99. }
  100. if( (fgLogicOp == __GLINT_LOGICOP_NOOP) &&
  101. (bgLogicOp == __GLINT_LOGICOP_NOOP) )
  102. {
  103. DISPDBG((DBGLVL, "both ops are no-op so lets quit now"));
  104. return;
  105. }
  106. config2D = glintInfo->config2D;
  107. config2D &= ~(__CONFIG2D_LOGOP_FORE_ENABLE |
  108. __CONFIG2D_LOGOP_BACK_ENABLE |
  109. __CONFIG2D_ENABLES);
  110. config2D |= __CONFIG2D_CONSTANTSRC |
  111. __CONFIG2D_FBWRITE |
  112. __CONFIG2D_USERSCISSOR;
  113. render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCBITMASK;
  114. if( (fgLogicOp != __GLINT_LOGICOP_COPY) ||
  115. (bgLogicOp != __GLINT_LOGICOP_NOOP) )
  116. {
  117. config2D &= ~(__CONFIG2D_LOGOP_FORE_MASK |
  118. __CONFIG2D_LOGOP_BACK_MASK);
  119. config2D |= __CONFIG2D_OPAQUESPANS |
  120. __CONFIG2D_LOGOP_FORE(fgLogicOp) |
  121. __CONFIG2D_LOGOP_BACK(bgLogicOp);
  122. render2D |= __RENDER2D_SPANS;
  123. }
  124. SET_WRITE_BUFFERS;
  125. WAIT_PXRX_DMA_TAGS( 6 );
  126. if( LogicopReadDest[fgLogicOp] || LogicopReadDest[bgLogicOp] )
  127. {
  128. config2D |= __CONFIG2D_FBDESTREAD;
  129. SET_READ_BUFFERS;
  130. }
  131. if( LogicOpReadSrc[fgLogicOp] )
  132. {
  133. LOAD_FOREGROUNDCOLOUR( fgColor );
  134. }
  135. if( LogicOpReadSrc[bgLogicOp] )
  136. {
  137. LOAD_BACKGROUNDCOLOUR( bgColor );
  138. }
  139. LOAD_CONFIG2D( config2D );
  140. while( TRUE )
  141. {
  142. DISPDBG((DBGLVL, "mono download to rect (%d,%d) to (%d,%d)",
  143. prcl->left, prcl->top, prcl->right, prcl->bottom));
  144. yTop = prcl->top;
  145. xLeft = prcl->left;
  146. cx = prcl->right - xLeft;
  147. cy = prcl->bottom - yTop;
  148. // pjSrc is first dword containing a bit to download. xOffset is the
  149. // offset to that bit. i.e. the bit offset from the previous 32bit
  150. // boundary at the left hand edge of the rectangle.
  151. xOffset = (xLeft + dxSrc) & 0x1f;
  152. pjSrc = (BYTE*)((UINT_PTR)(pjSrcScan0 +
  153. (yTop + dySrc) * lSrcDelta +
  154. (xLeft + dxSrc) / 8 // byte aligned
  155. ) & ~3); // dword aligned
  156. DISPDBG((DBGLVL, "pjSrc 0x%x, lSrcDelta %d", pjSrc, lSrcDelta));
  157. DISPDBG((DBGLVL, "\txOffset %d, cx %d, cy %d", xOffset, cx, cy));
  158. // this algorithm downloads aligned 32-bit chunks from the
  159. // source but uses the scissor clip to define the edge of the
  160. // rectangle.
  161. //
  162. {
  163. ULONG AlignWidth, LeftEdge;
  164. AlignWidth = (xOffset + cx + 31) & ~31;
  165. LeftEdge = xLeft - xOffset;
  166. DISPDBG((7, "AlignWidth %d", AlignWidth));
  167. WAIT_PXRX_DMA_DWORDS( 5 );
  168. QUEUE_PXRX_DMA_INDEX4( __GlintTagFillScissorMinXY,
  169. __GlintTagFillScissorMaxXY,
  170. __GlintTagFillRectanglePosition,
  171. __GlintTagFillRender2D );
  172. QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(xLeft, 0) );
  173. QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->right, 0x7fff) );
  174. QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(LeftEdge, yTop) );
  175. QUEUE_PXRX_DMA_DWORD( render2D |
  176. __RENDER2D_WIDTH(AlignWidth) |
  177. __RENDER2D_HEIGHT(cy) );
  178. SEND_PXRX_DMA_BATCH;
  179. //@@BEGIN_DDKSPLIT
  180. #if USE_RLE_DOWNLOADS
  181. pxrxMonoDownloadRLE( ppdev,
  182. AlignWidth,
  183. (ULONG *) pjSrc,
  184. lSrcDelta >> 2,
  185. cy );
  186. #else
  187. //@@END_DDKSPLIT
  188. pxrxMonoDownloadRaw( ppdev,
  189. AlignWidth,
  190. (ULONG *) pjSrc,
  191. lSrcDelta >> 2,
  192. cy );
  193. //@@BEGIN_DDKSPLIT
  194. #endif
  195. //@@END_DDKSPLIT
  196. }
  197. if( --count == 0 )
  198. {
  199. break;
  200. }
  201. prcl++;
  202. }
  203. // Reset the scissor maximums:
  204. if( ppdev->cPelSize == GLINTDEPTH32 ) {
  205. WAIT_PXRX_DMA_TAGS( 1 );
  206. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
  207. //@@BEGIN_DDKSPLIT
  208. // SEND_PXRX_DMA_BATCH;
  209. //@@END_DDKSPLIT
  210. }
  211. FLUSH_PXRX_PATCHED_RENDER2D(prclDst->left, prclDst->right);
  212. SEND_PXRX_DMA_BATCH;
  213. DISPDBG((DBGLVL, "pxrxXfer1bpp returning"));
  214. }
  215. /**************************************************************************\
  216. *
  217. * void pxrxMonoDownloadRaw
  218. *
  219. \**************************************************************************/
  220. void pxrxMonoDownloadRaw(
  221. PPDEV ppdev,
  222. ULONG AlignWidth,
  223. ULONG *pjSrc,
  224. LONG lSrcDelta,
  225. LONG cy )
  226. {
  227. GLINT_DECL;
  228. if( AlignWidth == 32 )
  229. {
  230. LONG nSpaces = 0;
  231. ULONG bits;
  232. DISPDBG((DBGLVL, "Doing Single Word per scan download"));
  233. do
  234. {
  235. nSpaces = 10;
  236. WAIT_FREE_PXRX_DMA_DWORDS( nSpaces );
  237. if( cy < --nSpaces )
  238. {
  239. nSpaces = cy;
  240. }
  241. cy -= nSpaces;
  242. QUEUE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, nSpaces );
  243. while( --nSpaces >= 0 )
  244. {
  245. TEST_DWORD_ALIGNED( pjSrc );
  246. QUEUE_PXRX_DMA_DWORD( *pjSrc );
  247. pjSrc += lSrcDelta;
  248. }
  249. SEND_PXRX_DMA_BATCH;
  250. } while( cy > 0 );
  251. }
  252. else
  253. {
  254. // multiple 32 bit words per scanline. convert the delta to the
  255. // delta as we need it at the end of each line by subtracting the
  256. // width in bytes of the data we're downloading. Note, pjSrc
  257. // is always 1 LONG short of the end of the line because we break
  258. // before adding on the last ULONG. Thus, we subtract sizeof(ULONG)
  259. // from the original adjustment.
  260. LONG nScan = AlignWidth >> 5;
  261. LONG nRemainder;
  262. ULONG bits;
  263. DISPDBG((7, "Doing Multiple Word per scan download"));
  264. while( TRUE )
  265. {
  266. WAIT_PXRX_DMA_DWORDS( nScan + 1 );
  267. QUEUE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, nScan );
  268. TEST_DWORD_ALIGNED( pjSrc );
  269. QUEUE_PXRX_DMA_BUFF( pjSrc, nScan );
  270. SEND_PXRX_DMA_BATCH;
  271. pjSrc += lSrcDelta;
  272. if( --cy == 0 )
  273. {
  274. break;
  275. }
  276. }
  277. }
  278. }
  279. /**************************************************************************\
  280. *
  281. * VOID pxrxXfer8bpp
  282. *
  283. \**************************************************************************/
  284. VOID pxrxXfer8bpp(
  285. PPDEV ppdev,
  286. RECTL *prcl,
  287. LONG count,
  288. ULONG logicOp,
  289. ULONG bgLogicOp,
  290. SURFOBJ *psoSrc,
  291. POINTL *pptlSrc,
  292. RECTL *prclDst,
  293. XLATEOBJ *pxlo )
  294. {
  295. ULONG config2D, render2D, lutMode, pixelSize;
  296. BOOL invalidLUT = FALSE;
  297. LONG dx, dy, cy;
  298. LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff;
  299. ULONG AlignWidth, LeftEdge;
  300. BYTE* pjSrcScan0;
  301. ULONG* pjSrc;
  302. UINT_PTR startPos;
  303. LONG cPelInv;
  304. ULONG ul;
  305. LONG nRemainder;
  306. //@@BEGIN_DDKSPLIT
  307. #if USE_RLE_DOWNLOADS
  308. ULONG len, data, holdCount;
  309. ULONG *tagPtr;
  310. #endif
  311. //@@END_DDKSPLIT
  312. GLINT_DECL;
  313. DISPDBG((DBGLVL, "pxrxXfer8bpp(): src = (%d,%d) -> (%d,%d), "
  314. "count = %d, logicOp = %d, palette id = %d",
  315. prcl->left, prcl->right, prcl->top, prcl->bottom,
  316. count, logicOp, pxlo->iUniq));
  317. // Set up the LUT table:
  318. if( (ppdev->PalLUTType != LUTCACHE_XLATE) ||
  319. (ppdev->iPalUniq != pxlo->iUniq) )
  320. {
  321. // Someone has hijacked the LUT so we need to invalidate it:
  322. ppdev->PalLUTType = LUTCACHE_XLATE;
  323. ppdev->iPalUniq = pxlo->iUniq;
  324. invalidLUT = TRUE;
  325. }
  326. else
  327. {
  328. DISPDBG((DBGLVL, "pxrxXfer8bpp: reusing cached xlate"));
  329. }
  330. WAIT_PXRX_DMA_TAGS( 1 + 1 );
  331. lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8));
  332. lutMode |= (ppdev->cPelSize + 2) << 8;
  333. LOAD_LUTMODE( lutMode );
  334. if( invalidLUT )
  335. {
  336. ULONG *pulXlate = pxlo->pulXlate;
  337. LONG cEntries = 256;
  338. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
  339. if( ppdev->cPelSize == 0 )
  340. {
  341. // 8bpp
  342. WAIT_PXRX_DMA_TAGS( cEntries );
  343. do
  344. {
  345. ul = *(pulXlate++);
  346. ul |= ul << 8;
  347. ul |= ul << 16;
  348. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
  349. } while( --cEntries );
  350. }
  351. else if( ppdev->cPelSize == 1 )
  352. {
  353. // 16bpp
  354. WAIT_PXRX_DMA_TAGS( cEntries );
  355. do
  356. {
  357. ul = *(pulXlate++);
  358. ul |= ul << 16;
  359. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
  360. } while( --cEntries );
  361. }
  362. else
  363. {
  364. WAIT_PXRX_DMA_DWORDS( 1 + cEntries );
  365. QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries );
  366. QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries );
  367. }
  368. }
  369. config2D = __CONFIG2D_FBWRITE |
  370. __CONFIG2D_USERSCISSOR |
  371. __CONFIG2D_EXTERNALSRC |
  372. __CONFIG2D_LUTENABLE;
  373. render2D = __RENDER2D_INCX |
  374. __RENDER2D_INCY |
  375. __RENDER2D_OP_SYNCDATA |
  376. __RENDER2D_SPANS;
  377. SET_WRITE_BUFFERS;
  378. WAIT_PXRX_DMA_TAGS( 6 );
  379. if( logicOp != __GLINT_LOGICOP_COPY )
  380. {
  381. config2D |= __CONFIG2D_LOGOP_FORE(logicOp) | __CONFIG2D_FBWRITE;
  382. render2D |= __RENDER2D_SPANS;
  383. if( LogicopReadDest[logicOp] )
  384. {
  385. config2D |= __CONFIG2D_FBDESTREAD;
  386. SET_READ_BUFFERS;
  387. }
  388. }
  389. LOAD_CONFIG2D( config2D );
  390. //@@BEGIN_DDKSPLIT
  391. #if USE_RLE_DOWNLOADS
  392. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
  393. #endif
  394. //@@END_DDKSPLIT
  395. cPelInv = 2 - ppdev->cPelSize;
  396. pixelSize = (1 << 31) | // Everything before the LUT runs at 8bpp
  397. (2 << 2) |
  398. (2 << 4) |
  399. (2 << 6) |
  400. (cPelInv << 8) |
  401. (cPelInv << 10) |
  402. (cPelInv << 12) |
  403. (cPelInv << 14);
  404. QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
  405. dx = pptlSrc->x - prclDst->left;
  406. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  407. lSrcDelta = psoSrc->lDelta;
  408. pjSrcScan0 = psoSrc->pvScan0;
  409. while( TRUE )
  410. {
  411. DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
  412. prcl->left, prcl->top,
  413. prcl->right, prcl->bottom));
  414. // 8bpp => 1 pixel per byte => 4 pixels per dword
  415. // Assume source bitmap width is dword aligned
  416. ASSERTDD((lSrcDelta & 3) == 0,
  417. "pxrxXfer8bpp: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
  418. startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta))
  419. + (prcl->left + dx); // pointer to first pixel,
  420. // in pixels/bytes
  421. pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword
  422. // aligned first pixel
  423. if(NULL == pjSrc)
  424. {
  425. DISPDBG((ERRLVL, "ERROR: pxrxXfer8bpp return ,has pjSrc NULL"));
  426. return;
  427. }
  428. alignOff = (ULONG)(startPos & 3); // number of pixels past dword
  429. // alignment of a scanline
  430. LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
  431. AlignWidth = ((prcl->right - LeftEdge) + 3) & ~3; // dword aligned width
  432. // in pixels
  433. cy = prcl->bottom - prcl->top; // number of scanlines to do
  434. DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, "
  435. "startPos = 0x%08X, pjSrc = 0x%08X",
  436. pjSrcScan0, startPos, pjSrc));
  437. DISPDBG((DBGLVL, "offset = %d pixels", alignOff));
  438. DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords",
  439. LeftEdge, LeftEdge + AlignWidth,
  440. AlignWidth, AlignWidth >> 2));
  441. WAIT_PXRX_DMA_TAGS( 4 );
  442. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
  443. MAKEDWORD_XY(prcl->left, 0) );
  444. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
  445. MAKEDWORD_XY(prcl->right, 0x7fff) );
  446. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
  447. MAKEDWORD_XY(LeftEdge, prcl->top) );
  448. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D,
  449. render2D |
  450. __RENDER2D_WIDTH(AlignWidth) |
  451. __RENDER2D_HEIGHT(cy) );
  452. SEND_PXRX_DMA_BATCH;
  453. AlignWidth >>= 2; // dword aligned width in dwords
  454. lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
  455. // (start to start)
  456. lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
  457. // (end to start)
  458. DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords",
  459. lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
  460. //@@BEGIN_DDKSPLIT
  461. #if USE_RLE_DOWNLOADS
  462. // Do an RLE download:
  463. tagPtr = NULL;
  464. do
  465. {
  466. WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
  467. nRemainder = AlignWidth;
  468. while( nRemainder-- )
  469. {
  470. TEST_DWORD_ALIGNED( pjSrc );
  471. data = *(pjSrc++);
  472. len = 1;
  473. TEST_DWORD_ALIGNED( pjSrc );
  474. while( nRemainder && (*pjSrc == data) )
  475. {
  476. pjSrc++;
  477. len++;
  478. nRemainder--;
  479. TEST_DWORD_ALIGNED( pjSrc );
  480. }
  481. if( len >= 4 )
  482. {
  483. if( tagPtr )
  484. {
  485. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
  486. holdCount );
  487. tagPtr = NULL;
  488. }
  489. QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData,
  490. __GlintTagRLCount );
  491. QUEUE_PXRX_DMA_DWORD( data );
  492. QUEUE_PXRX_DMA_DWORD( len );
  493. len = 0;
  494. }
  495. else
  496. {
  497. if( !tagPtr )
  498. {
  499. QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
  500. holdCount = 0;
  501. }
  502. holdCount += len;
  503. while( len-- )
  504. {
  505. QUEUE_PXRX_DMA_DWORD( data );
  506. }
  507. }
  508. }
  509. if( tagPtr )
  510. {
  511. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
  512. holdCount );
  513. tagPtr = NULL;
  514. }
  515. pjSrc += lTrueDelta;
  516. SEND_PXRX_DMA_BATCH;
  517. } while( --cy > 0 );
  518. #else
  519. //@@END_DDKSPLIT
  520. // Do a raw download:
  521. while( TRUE )
  522. {
  523. DISPDBG((DBGLVL, "cy = %d", cy));
  524. WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
  525. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth );
  526. TEST_DWORD_ALIGNED( pjSrc );
  527. QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
  528. SEND_PXRX_DMA_BATCH;
  529. if( --cy == 0 )
  530. {
  531. break;
  532. }
  533. pjSrc += lSrcDeltaDW;
  534. }
  535. //@@BEGIN_DDKSPLIT
  536. #endif
  537. //@@END_DDKSPLIT
  538. if( --count == 0 )
  539. {
  540. break;
  541. }
  542. prcl++;
  543. }
  544. // Reset some defaults:
  545. WAIT_PXRX_DMA_TAGS( 2 );
  546. QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv );
  547. if( ppdev->cPelSize == GLINTDEPTH32 )
  548. {
  549. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
  550. }
  551. SEND_PXRX_DMA_BATCH;
  552. DISPDBG((DBGLVL, "pxrxXfer8bpp return"));
  553. }
  554. /**************************************************************************\
  555. *
  556. * VOID pxrxXferImage
  557. *
  558. \**************************************************************************/
  559. VOID pxrxXferImage(
  560. PPDEV ppdev,
  561. RECTL *prcl,
  562. LONG count,
  563. ULONG logicOp,
  564. ULONG bgLogicOp,
  565. SURFOBJ *psoSrc,
  566. POINTL *pptlSrc,
  567. RECTL *prclDst,
  568. XLATEOBJ *pxlo )
  569. {
  570. DWORD config2D, render2D;
  571. LONG dx, dy, cy;
  572. LONG lSrcDelta, lTrueDelta, lSrcDeltaDW, alignOff;
  573. BYTE* pjSrcScan0;
  574. ULONG* pjSrc;
  575. UINT_PTR startPos;
  576. LONG cPel, cPelInv;
  577. ULONG cPelMask;
  578. ULONG AlignWidth, LeftEdge;
  579. LONG nRemainder;
  580. //@@BEGIN_DDKSPLIT
  581. #if USE_RLE_DOWNLOADS
  582. ULONG len, data, holdCount;
  583. ULONG *tagPtr;
  584. #endif
  585. //@@END_DDKSPLIT
  586. GLINT_DECL;
  587. SEND_PXRX_DMA_FORCE;
  588. ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
  589. "Can handle trivial xlate only");
  590. ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
  591. "Source must be same colour depth as screen");
  592. ASSERTDD(count > 0,
  593. "Can't handle zero rectangles");
  594. ASSERTDD(logicOp <= 15,
  595. "Weird hardware Rop");
  596. dx = pptlSrc->x - prclDst->left;
  597. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  598. cPel = ppdev->cPelSize; // number of bytes per pixel = 1 << cPel
  599. cPelInv = 2 - cPel; // number of pixels per byte = 1 << cPelInv
  600. // (pixels -> dwords = >> cPenInv)
  601. cPelMask = (1 << cPelInv) - 1; // mask to obtain number of pixels
  602. // past a dword
  603. lSrcDelta = psoSrc->lDelta;
  604. pjSrcScan0 = psoSrc->pvScan0;
  605. DISPDBG((DBGLVL, "pxrxXferImage with logic op %d for %d rects",
  606. logicOp, count));
  607. config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
  608. __CONFIG2D_LOGOP_BACK_ENABLE |
  609. __CONFIG2D_ENABLES);
  610. config2D |= __CONFIG2D_FBWRITE |
  611. __CONFIG2D_USERSCISSOR;
  612. render2D = __RENDER2D_INCX |
  613. __RENDER2D_INCY |
  614. __RENDER2D_OP_SYNCDATA |
  615. __RENDER2D_SPANS;
  616. SET_WRITE_BUFFERS;
  617. WAIT_PXRX_DMA_TAGS( 5 );
  618. if( logicOp != __GLINT_LOGICOP_COPY )
  619. {
  620. config2D &= ~__CONFIG2D_LOGOP_FORE_MASK;
  621. config2D |= __CONFIG2D_LOGOP_FORE(logicOp) |
  622. __CONFIG2D_EXTERNALSRC;
  623. if( LogicopReadDest[logicOp] )
  624. {
  625. config2D |= __CONFIG2D_FBDESTREAD;
  626. SET_READ_BUFFERS;
  627. }
  628. }
  629. LOAD_CONFIG2D( config2D );
  630. //@@BEGIN_DDKSPLIT
  631. #if USE_RLE_DOWNLOADS
  632. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget,
  633. __GlintTagColor );
  634. #endif
  635. //@@END_DDKSPLIT
  636. while( TRUE )
  637. {
  638. cy = prcl->bottom - prcl->top;
  639. DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
  640. prcl->left, prcl->top, prcl->right, prcl->bottom));
  641. ASSERTDD((lSrcDelta & 3) == 0,
  642. "pxrxXferImage: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
  643. // pjSrc points to the first pixel to copy
  644. // lTrueDelta is the additional amount to add onto the pjSrc pointer
  645. // when we get to the end of the scanline.
  646. startPos = ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta) +
  647. ((prcl->left + dx) << cPel);
  648. alignOff = ((ULONG) (startPos & 3)) >> cPel; // number of pixels past
  649. // dword aligned start
  650. pjSrc = (ULONG *) (startPos & ~3); // dword aligned pointer to 1st pixel
  651. if(NULL == pjSrc)
  652. {
  653. DISPDBG((ERRLVL, "ERROR: "
  654. "pxrxXferImage return because of pjSrc NULL"));
  655. return;
  656. }
  657. // dword aligned left edge in pixels
  658. LeftEdge = prcl->left - alignOff;
  659. // dword aligned width in pixels
  660. AlignWidth = (prcl->right - LeftEdge + cPelMask) & ~cPelMask;
  661. DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels",
  662. LeftEdge, LeftEdge + AlignWidth, AlignWidth));
  663. DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, "
  664. "pjSrc = 0x%08X, alignOff = %d pixels",
  665. pjSrcScan0, pjSrc, alignOff));
  666. ASSERTDD( ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta) +
  667. ((LeftEdge + dx) << cPel) == (UINT_PTR) pjSrc,
  668. "pxrxXferImage: "
  669. "Aligned left edge does not match aligned pjSrc!" );
  670. WAIT_PXRX_DMA_DWORDS( 5 );
  671. QUEUE_PXRX_DMA_INDEX4( __GlintTagFillScissorMinXY,
  672. __GlintTagFillScissorMaxXY,
  673. __GlintTagFillRectanglePosition,
  674. __GlintTagFillRender2D );
  675. QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->left, 0) );
  676. QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->right, 0x7fff) );
  677. QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(LeftEdge, prcl->top) );
  678. QUEUE_PXRX_DMA_DWORD( render2D |
  679. __RENDER2D_WIDTH(AlignWidth) |
  680. __RENDER2D_HEIGHT(cy) );
  681. SEND_PXRX_DMA_BATCH;
  682. AlignWidth >>= cPelInv; // dword aligned width in dwords
  683. lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
  684. //(start to start)
  685. lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
  686. // (end to start)
  687. DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords",
  688. lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
  689. //@@BEGIN_DDKSPLIT
  690. #if USE_RLE_DOWNLOADS
  691. // Do an RLE download:
  692. tagPtr = NULL;
  693. do
  694. {
  695. WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
  696. nRemainder = AlignWidth;
  697. while( nRemainder-- )
  698. {
  699. TEST_DWORD_ALIGNED( pjSrc );
  700. data = *(pjSrc++);
  701. len = 1;
  702. TEST_DWORD_ALIGNED( pjSrc );
  703. while( nRemainder && (*pjSrc == data) )
  704. {
  705. pjSrc++;
  706. len++;
  707. nRemainder--;
  708. TEST_DWORD_ALIGNED( pjSrc );
  709. }
  710. if( len >= 4 )
  711. {
  712. if( tagPtr )
  713. {
  714. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
  715. holdCount );
  716. tagPtr = NULL;
  717. }
  718. QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData,
  719. __GlintTagRLCount );
  720. QUEUE_PXRX_DMA_DWORD( data );
  721. QUEUE_PXRX_DMA_DWORD( len );
  722. len = 0;
  723. }
  724. else
  725. {
  726. if( !tagPtr )
  727. {
  728. QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
  729. holdCount = 0;
  730. }
  731. holdCount += len;
  732. while( len-- )
  733. {
  734. QUEUE_PXRX_DMA_DWORD( data );
  735. }
  736. }
  737. }
  738. if( tagPtr )
  739. {
  740. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount );
  741. tagPtr = NULL;
  742. }
  743. pjSrc += lTrueDelta;
  744. // SEND_PXRX_DMA_BATCH;
  745. } while( --cy > 0 );
  746. #else
  747. //@@END_DDKSPLIT
  748. // Do a raw download:
  749. while( TRUE )
  750. {
  751. DISPDBG((DBGLVL, "cy = %d", cy));
  752. WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
  753. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth );
  754. TEST_DWORD_ALIGNED( pjSrc );
  755. QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
  756. // SEND_PXRX_DMA_BATCH;
  757. if( --cy == 0 )
  758. {
  759. break;
  760. }
  761. pjSrc += lSrcDeltaDW;
  762. }
  763. //@@BEGIN_DDKSPLIT
  764. #endif
  765. //@@END_DDKSPLIT
  766. if( --count == 0 )
  767. {
  768. break;
  769. }
  770. prcl++;
  771. }
  772. // Reset the scissor maximums:
  773. if( ppdev->cPelSize == GLINTDEPTH32 )
  774. {
  775. WAIT_PXRX_DMA_TAGS( 1 );
  776. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
  777. // SEND_PXRX_DMA_BATCH;
  778. }
  779. FLUSH_PXRX_PATCHED_RENDER2D(prclDst->left, prclDst->right);
  780. SEND_PXRX_DMA_BATCH;
  781. DISPDBG((DBGLVL, "pxrxXferImage return"));
  782. }
  783. /**************************************************************************\
  784. *
  785. * VOID pxrxXfer4bpp
  786. *
  787. \**************************************************************************/
  788. VOID pxrxXfer4bpp(
  789. PPDEV ppdev,
  790. RECTL *prcl,
  791. LONG count,
  792. ULONG logicOp,
  793. ULONG bgLogicOp,
  794. SURFOBJ *psoSrc,
  795. POINTL *pptlSrc,
  796. RECTL *prclDst,
  797. XLATEOBJ *pxlo )
  798. {
  799. ULONG config2D, render2D, lutMode, pixelSize;
  800. BOOL invalidLUT = FALSE;
  801. LONG dx, dy;
  802. LONG cy;
  803. BYTE* pjSrcScan0;
  804. ULONG* pjSrc;
  805. LONG cPelInv;
  806. ULONG ul;
  807. ULONG AlignWidth, LeftEdge;
  808. UINT_PTR startPos;
  809. LONG nRemainder;
  810. LONG lSrcDelta, lSrcDeltaDW;
  811. LONG alignOff;
  812. GLINT_DECL;
  813. DISPDBG((DBGLVL, "pxrxXfer4bpp(): src = (%d,%d) -> (%d,%d), count = %d, "
  814. "logicOp = %d, palette id = %d",
  815. prcl->left, prcl->right, prcl->top, prcl->bottom, count,
  816. logicOp, pxlo->iUniq));
  817. // Set up the LUT table:
  818. if( (ppdev->PalLUTType != LUTCACHE_XLATE) ||
  819. (ppdev->iPalUniq != pxlo->iUniq) )
  820. {
  821. // Someone has hijacked the LUT so we need to invalidate it:
  822. ppdev->PalLUTType = LUTCACHE_XLATE;
  823. ppdev->iPalUniq = pxlo->iUniq;
  824. invalidLUT = TRUE;
  825. }
  826. else
  827. {
  828. DISPDBG((DBGLVL, "pxrxXfer4bpp: reusing cached xlate"));
  829. }
  830. WAIT_PXRX_DMA_TAGS( 1 + 1 + 16 );
  831. lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8));
  832. lutMode |= (ppdev->cPelSize + 2) << 8;
  833. LOAD_LUTMODE( lutMode );
  834. if( invalidLUT )
  835. {
  836. ULONG *pulXlate = pxlo->pulXlate;
  837. LONG cEntries = 16;
  838. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
  839. if( ppdev->cPelSize == 0 ) // 8bpp
  840. {
  841. do
  842. {
  843. ul = *(pulXlate++);
  844. ul |= ul << 8;
  845. ul |= ul << 16;
  846. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
  847. } while( --cEntries );
  848. }
  849. else if( ppdev->cPelSize == 1 ) // 16bpp
  850. {
  851. do
  852. {
  853. ul = *(pulXlate++);
  854. ul |= ul << 16;
  855. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
  856. } while( --cEntries );
  857. }
  858. else
  859. {
  860. QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries );
  861. QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries );
  862. }
  863. }
  864. config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
  865. __CONFIG2D_LOGOP_BACK_ENABLE |
  866. __CONFIG2D_ENABLES);
  867. config2D |= __CONFIG2D_FBWRITE |
  868. __CONFIG2D_USERSCISSOR;
  869. render2D = __RENDER2D_INCX |
  870. __RENDER2D_INCY |
  871. __RENDER2D_OP_SYNCDATA |
  872. __RENDER2D_SPANS;
  873. SET_WRITE_BUFFERS;
  874. WAIT_PXRX_DMA_TAGS( 6 );
  875. if( logicOp != __GLINT_LOGICOP_COPY )
  876. {
  877. config2D &= ~(__CONFIG2D_LOGOP_FORE_MASK |
  878. __CONFIG2D_LOGOP_BACK_MASK);
  879. config2D |= __CONFIG2D_LOGOP_FORE(logicOp) |
  880. __CONFIG2D_FBWRITE;
  881. render2D |= __RENDER2D_SPANS;
  882. if( LogicopReadDest[logicOp] )
  883. {
  884. config2D |= __CONFIG2D_FBDESTREAD;
  885. SET_READ_BUFFERS;
  886. }
  887. if( LogicOpReadSrc[logicOp] )
  888. {
  889. config2D |= __CONFIG2D_EXTERNALSRC |
  890. __CONFIG2D_LUTENABLE;
  891. }
  892. }
  893. else
  894. {
  895. config2D |= __CONFIG2D_EXTERNALSRC |
  896. __CONFIG2D_LUTENABLE;
  897. }
  898. LOAD_CONFIG2D( config2D );
  899. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
  900. cPelInv = 2 - ppdev->cPelSize;
  901. // Everything before the LUT runs at 8bpp
  902. pixelSize = (1 << 31) |
  903. (2 << 2) |
  904. (2 << 4) |
  905. (2 << 6) |
  906. (2 << 16) |
  907. (cPelInv << 8) |
  908. (cPelInv << 10) |
  909. (cPelInv << 12) |
  910. (cPelInv << 14);
  911. QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
  912. dx = pptlSrc->x - prclDst->left;
  913. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  914. // cPel = ppdev->cPelSize;
  915. // cPelMask = (1 << cPelInv) - 1;
  916. lSrcDelta = psoSrc->lDelta;
  917. pjSrcScan0 = psoSrc->pvScan0;
  918. while( TRUE )
  919. {
  920. DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
  921. prcl->left, prcl->top, prcl->right, prcl->bottom));
  922. // 4bpp => 2 pixels per byte => 8 pixels per dword
  923. // Assume source bitmap width is dword aligned
  924. ASSERTDD( (lSrcDelta & 3) == 0,
  925. "pxrxXfer4bpp: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
  926. // pointer to first pixel, in bytes (32/64 bits long)
  927. startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) +
  928. ((prcl->left + dx) >> 1);
  929. pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword
  930. // aligned first pixel
  931. if(NULL == pjSrc)
  932. {
  933. DISPDBG((ERRLVL, "ERROR: "
  934. "pxrxXfer4bpp return because of pjSrc NULL"));
  935. return;
  936. }
  937. // pointer to first pixel, in pixels (33/65 bits long!)
  938. startPos = (( ((UINT_PTR) pjSrcScan0) +
  939. ((prcl->top + dy) * lSrcDelta)) << 1)
  940. + (prcl->left + dx);
  941. alignOff = (ULONG)(startPos & 7); // number of pixels past dword
  942. // alignment of a scanline
  943. LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
  944. // dword aligned width in pixels
  945. AlignWidth = ((prcl->right - LeftEdge) + 7) & ~7;
  946. cy = prcl->bottom - prcl->top; // number of scanlines to do
  947. DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, startPos = 0x%08X (>>1), "
  948. "pjSrc = 0x%08X",
  949. pjSrcScan0, startPos >> 1, pjSrc));
  950. DISPDBG((DBGLVL, "offset = %d pixels", alignOff));
  951. DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords",
  952. LeftEdge, LeftEdge + AlignWidth,
  953. AlignWidth, AlignWidth >> 3));
  954. WAIT_PXRX_DMA_TAGS( 4 );
  955. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
  956. MAKEDWORD_XY(prcl->left, 0) );
  957. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
  958. MAKEDWORD_XY(prcl->right, 0x7fff) );
  959. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
  960. MAKEDWORD_XY(LeftEdge, prcl->top) );
  961. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
  962. __RENDER2D_WIDTH(AlignWidth) |
  963. __RENDER2D_HEIGHT(cy) );
  964. SEND_PXRX_DMA_BATCH;
  965. AlignWidth >>= 3; // dword aligned width in dwords
  966. lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset in dwords
  967. DISPDBG((DBGLVL, "Delta = %d pixels = %d dwords",
  968. lSrcDelta << 1, lSrcDeltaDW));
  969. // pjSrc = dword aligned pointer to first
  970. // dword of first scanline
  971. // AlignWidth = number of dwords per scanline
  972. // lTrueDelta = dword offset between first dwords
  973. // of consecutive scanlines
  974. // cy = number of scanlines
  975. while( TRUE )
  976. {
  977. nRemainder = AlignWidth;
  978. DISPDBG((DBGLVL, "cy = %d", cy));
  979. WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
  980. QUEUE_PXRX_DMA_HOLD( __GlintTagPacked4Pixels, AlignWidth );
  981. TEST_DWORD_ALIGNED( pjSrc );
  982. QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
  983. if( --cy == 0 )
  984. {
  985. break;
  986. }
  987. pjSrc += lSrcDeltaDW;
  988. SEND_PXRX_DMA_BATCH;
  989. }
  990. if( --count == 0 )
  991. {
  992. break;
  993. }
  994. prcl++;
  995. }
  996. // Reset some defaults:
  997. WAIT_PXRX_DMA_TAGS( 2 );
  998. QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv );
  999. if( ppdev->cPelSize == GLINTDEPTH32 )
  1000. {
  1001. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
  1002. }
  1003. SEND_PXRX_DMA_BATCH;
  1004. DISPDBG((DBGLVL, "pxrxXfer4bpp return"));
  1005. }
  1006. /**************************************************************************\
  1007. *
  1008. * VOID pxrxCopyXfer24bpp
  1009. *
  1010. \**************************************************************************/
  1011. VOID pxrxCopyXfer24bpp(
  1012. PPDEV ppdev,
  1013. SURFOBJ *psoSrc,
  1014. POINTL *pptlSrc,
  1015. RECTL *prclDst,
  1016. RECTL *prcl,
  1017. LONG count )
  1018. {
  1019. ULONG config2D, render2D, pixelSize;
  1020. LONG dx, dy, cy, LeftEdge;
  1021. LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff;
  1022. UINT_PTR startPos;
  1023. BYTE* pjSrcScan0;
  1024. ULONG* pjSrc;
  1025. LONG cPelInv;
  1026. ULONG ul, nRemainder;
  1027. ULONG padLeft, padLeftDW, padRight, padRightDW, dataWidth;
  1028. ULONG AlignWidth, AlignWidthDW, AlignExtra;
  1029. GLINT_DECL;
  1030. DISPDBG((DBGLVL, "pxrxCopyXfer24bpp(): "
  1031. "src = (%d,%d) -> (%d,%d), count = %d",
  1032. prcl->left, prcl->right, prcl->top, prcl->bottom, count));
  1033. config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
  1034. __CONFIG2D_LOGOP_BACK_ENABLE |
  1035. __CONFIG2D_ENABLES);
  1036. config2D |= __CONFIG2D_FBWRITE |
  1037. __CONFIG2D_EXTERNALSRC |
  1038. __CONFIG2D_USERSCISSOR;
  1039. render2D = __RENDER2D_INCX |
  1040. __RENDER2D_INCY |
  1041. __RENDER2D_OP_SYNCDATA |
  1042. __RENDER2D_SPANS;
  1043. SET_WRITE_BUFFERS;
  1044. WAIT_PXRX_DMA_TAGS( 3 );
  1045. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
  1046. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadGlyphWidth, 3 );
  1047. LOAD_CONFIG2D( config2D );
  1048. dx = pptlSrc->x - prclDst->left;
  1049. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1050. lSrcDelta = psoSrc->lDelta;
  1051. pjSrcScan0 = psoSrc->pvScan0;
  1052. while( TRUE )
  1053. {
  1054. DISPDBG((DBGLVL, "download to rect "
  1055. "(%d,%d -> %d,%d) + (%d, %d) = (%d x %d)",
  1056. prcl->left, prcl->top, prcl->right, prcl->bottom,
  1057. dx, dy,
  1058. prcl->right - prcl->left,
  1059. prcl->bottom - prcl->top));
  1060. // 24bpp => 1 pixel per 3 bytes => 4 pixel per 3 dwords
  1061. // Assume source bitmap width is dword aligned
  1062. ASSERTDD( (lSrcDelta & 3) == 0,
  1063. "pxrxCopyXfer24bpp: "
  1064. "SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!" );
  1065. ASSERTDD( (((UINT_PTR) pjSrcScan0) & 3) == 0,
  1066. "pxrxCopyXfer24bpp: "
  1067. "SOURCE BITMAP START LOCATION IS NOT DWORD ALIGNED!!!" );
  1068. cy = prcl->bottom - prcl->top; // number of scanlines to do
  1069. startPos = (((UINT_PTR) pjSrcScan0) +
  1070. ((prcl->top + dy) * lSrcDelta)) +
  1071. ((prcl->left + dx) * 3); // pointer to first pixel of first
  1072. // scanline, in bytes
  1073. alignOff = (ULONG)(startPos & 3); // number of bytes past dword
  1074. // alignment to first pixel
  1075. pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword aligned
  1076. // first pixel
  1077. if(NULL == pjSrc)
  1078. {
  1079. DISPDBG((ERRLVL, "ERROR: "
  1080. "pxrxCopyXfer24bpp return because of pjSrc NULL"));
  1081. return;
  1082. }
  1083. padLeft = (4 - alignOff) % 4; // number of pixels to add to regain
  1084. // dword alignment on left edge
  1085. padLeftDW = (padLeft * 3) / 4; // number of dwords to add
  1086. // on the left edge
  1087. LeftEdge = prcl->left - padLeft;
  1088. // dword aligned width in pixels (= 4 pixel aligned = 3 dword aligned!)
  1089. AlignWidth = (prcl->right - LeftEdge + 3) & ~3;
  1090. // number of pixels overhang on the right
  1091. padRight = (LeftEdge + AlignWidth) - prcl->right;
  1092. // number of dwords to add on the right edge
  1093. padRightDW = (padRight * 3) / 4;
  1094. AlignWidthDW = (AlignWidth * 3) / 4; // dword aligned width in dwords
  1095. lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset
  1096. // in dwords
  1097. // the amount of AlignWidth which is actually src bitmap
  1098. dataWidth = AlignWidthDW - padLeftDW - padRightDW;
  1099. DISPDBG((DBGLVL, "startPos = 0x%08X, alignOff = %d, "
  1100. "pjSrc = 0x%08X, lSrcDeltaDW = %d",
  1101. startPos, alignOff, pjSrc, lSrcDeltaDW));
  1102. DISPDBG((DBGLVL, "padLeft = %d pixels = %d dwords, LeftEdge = %d",
  1103. padLeft, padLeftDW, LeftEdge));
  1104. DISPDBG((DBGLVL, "AlignWidth = %d pixels = %d dwords",
  1105. AlignWidth, AlignWidthDW));
  1106. DISPDBG((DBGLVL, "padRight = %d pixels = %d dwords", padRight, padRightDW));
  1107. WAIT_PXRX_DMA_TAGS( 4 );
  1108. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
  1109. MAKEDWORD_XY(prcl->left, 0));
  1110. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
  1111. MAKEDWORD_XY(prcl->right, 0x7fff));
  1112. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
  1113. MAKEDWORD_XY(LeftEdge, prcl->top));
  1114. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
  1115. __RENDER2D_WIDTH(AlignWidth) |
  1116. __RENDER2D_HEIGHT(cy) );
  1117. while( cy-- )
  1118. {
  1119. DISPDBG((DBGLVL, "cy = %d", cy));
  1120. WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 1 );
  1121. QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
  1122. if( padLeftDW )
  1123. {
  1124. QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
  1125. }
  1126. if( padLeftDW == 2 )
  1127. {
  1128. QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
  1129. }
  1130. QUEUE_PXRX_DMA_BUFF( pjSrc, dataWidth );
  1131. if( padRightDW )
  1132. {
  1133. QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
  1134. }
  1135. if( padRightDW == 2 )
  1136. {
  1137. QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
  1138. }
  1139. SEND_PXRX_DMA_BATCH;
  1140. pjSrc += lSrcDeltaDW;
  1141. }
  1142. //@@BEGIN_DDKSPLIT
  1143. /*/
  1144. alignOff = (prcl->left + dx + 3) & ~3; // number of pixels past dword alignment of first pixel of a scanline
  1145. pjSrc = (ULONG *) (startPos - (alignOff * 3)); // dword pointer to dword aligned first pixel
  1146. LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
  1147. AlignWidth = ((((prcl->right - LeftEdge) * 3) + 3) & ~3) / 3; // dword aligned width in pixels (IS NOT = 4 pixel aligned = 3 dword aligned!)
  1148. AlignExtra = AlignWidth - (prcl->right - LeftEdge); // extra pixels beyond the genuine width (which might overstomp a page boundary)
  1149. if( AlignExtra )
  1150. cy--;
  1151. DISPDBG((7, "pjSrcScan0 = 0x%08X, startPos = 0x%08X, pjSrc = 0x%08X", pjSrcScan0, startPos, pjSrc));
  1152. DISPDBG((7, "offset = %d pixels", alignOff));
  1153. DISPDBG((7, "Aligned rect = (%d -> %d) => %d pixels", LeftEdge, LeftEdge + AlignWidth, AlignWidth));
  1154. DISPDBG((7, "Rendering %d scanlines", cy));
  1155. WAIT_PXRX_DMA_TAGS( 4 );
  1156. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0) );
  1157. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff) );
  1158. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top) );
  1159. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) );
  1160. AlignWidthDW = (AlignWidth * 3) / 4; // dword aligned width in dwords
  1161. lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset in dwords
  1162. DISPDBG((7, "Delta = %d bytes = %d dwords (%d dwords wide)", lSrcDelta, lSrcDeltaDW, AlignWidthDW));
  1163. while( cy-- ) {
  1164. DISPDBG((9, "cy = %d", cy));
  1165. WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 1 );
  1166. QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
  1167. TEST_DWORD_ALIGNED( pjSrc );
  1168. QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidthDW );
  1169. SEND_PXRX_DMA_BATCH;
  1170. pjSrc += lSrcDeltaDW;
  1171. }
  1172. if( AlignExtra ) {
  1173. ULONG dataWidth;
  1174. ULONG dataExtra;
  1175. dataWidth = ((((prcl->right - LeftEdge) * 3) + 3) & ~3) / 4; // dword aligned width in dwords, 1 dword aligned
  1176. dataExtra = AlignWidthDW - dataWidth; // extra dwords past end of image
  1177. DISPDBG((7, "Last scanline: %d + %d = %d pixels = %d + %d = %d dwords",
  1178. prcl->right - LeftEdge, AlignExtra, AlignWidth, dataWidth, dataExtra, AlignWidthDW));
  1179. ASSERTDD( (dataWidth + dataExtra) == AlignWidthDW, "pxrxCopyXfer24bpp: Last scanline does not add up!" );
  1180. WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 5 );
  1181. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->bottom - 1) );
  1182. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(1) );
  1183. TEST_DWORD_ALIGNED( pjSrc );
  1184. QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
  1185. QUEUE_PXRX_DMA_BUFF( pjSrc, dataWidth ); // Send the partial scanline
  1186. while( dataExtra-- )
  1187. QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); // Pad out to flush the data
  1188. // Resend download target to flush the remaining partial pixels ???
  1189. }
  1190. /**/
  1191. //@@END_DDKSPLIT
  1192. if( --count == 0 )
  1193. {
  1194. break;
  1195. }
  1196. prcl++;
  1197. }
  1198. // Reset the scissor maximums:
  1199. if( ppdev->cPelSize == GLINTDEPTH32 )
  1200. {
  1201. WAIT_PXRX_DMA_TAGS( 1 );
  1202. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
  1203. }
  1204. SEND_PXRX_DMA_BATCH;
  1205. DISPDBG((DBGLVL, "pxrxCopyXfer24bpp return"));
  1206. }
  1207. /**************************************************************************\
  1208. *
  1209. * VOID pxrxCopyXfer8bppLge
  1210. *
  1211. \**************************************************************************/
  1212. VOID pxrxCopyXfer8bppLge(
  1213. PPDEV ppdev,
  1214. SURFOBJ *psoSrc,
  1215. POINTL *pptlSrc,
  1216. RECTL *prclDst,
  1217. RECTL *prcl,
  1218. LONG count,
  1219. XLATEOBJ *pxlo )
  1220. {
  1221. ULONG config2D, render2D, lutMode, pixelSize;
  1222. BOOL invalidLUT = FALSE;
  1223. LONG dx, dy, cy;
  1224. LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff;
  1225. ULONG AlignWidth, LeftEdge;
  1226. BYTE* pjSrcScan0;
  1227. ULONG* pjSrc;
  1228. UINT_PTR startPos;
  1229. LONG cPelInv;
  1230. ULONG ul, i;
  1231. LONG nRemainder;
  1232. //@@BEGIN_DDKSPLIT
  1233. #if USE_RLE_DOWNLOADS
  1234. ULONG len, data, holdCount;
  1235. #endif
  1236. //@@END_DDKSPLIT
  1237. ULONG *tagPtr;
  1238. ULONG *pulXlate = pxlo->pulXlate;
  1239. GLINT_DECL;
  1240. DISPDBG((DBGLVL, "pxrxCopyXfer8bpp(): src = (%d,%d) -> (%d,%d), "
  1241. "count = %d, palette id = %d",
  1242. prcl->left, prcl->right, prcl->top, prcl->bottom,
  1243. count, pxlo->iUniq));
  1244. SET_WRITE_BUFFERS;
  1245. if( (count == 1) &&
  1246. ((cy = (prcl->bottom - prcl->top)) == 1) )
  1247. {
  1248. ULONG width = prcl->right - prcl->left, extra;
  1249. BYTE *srcPtr;
  1250. config2D = __CONFIG2D_FBWRITE |
  1251. __CONFIG2D_EXTERNALSRC;
  1252. render2D = __RENDER2D_INCX |
  1253. __RENDER2D_INCY |
  1254. __RENDER2D_OP_SYNCDATA |
  1255. __RENDER2D_SPANS;
  1256. dx = pptlSrc->x - prclDst->left;
  1257. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1258. lSrcDelta = psoSrc->lDelta;
  1259. pjSrcScan0 = psoSrc->pvScan0;
  1260. startPos = (((UINT_PTR) pjSrcScan0) +
  1261. ((prcl->top + dy) * lSrcDelta)) + (prcl->left + dx);
  1262. srcPtr = (BYTE *) startPos;
  1263. WAIT_PXRX_DMA_DWORDS( 7 + width );
  1264. LOAD_CONFIG2D( config2D );
  1265. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
  1266. MAKEDWORD_XY(prcl->left, prcl->top) );
  1267. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
  1268. __RENDER2D_WIDTH(width) |
  1269. __RENDER2D_HEIGHT(1) );
  1270. if( ppdev->cPelSize == 0 ) // 8bpp
  1271. {
  1272. extra = width & 3;
  1273. width >>= 2;
  1274. if( extra )
  1275. {
  1276. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width + 1 );
  1277. QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width + 1 );
  1278. } else {
  1279. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width );
  1280. QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width );
  1281. }
  1282. DISPDBG((DBGLVL, "width was %d, is now %d + %d",
  1283. prcl->right - prcl->left, width, extra));
  1284. for( i = 0; i < width; i++, srcPtr += 4 )
  1285. {
  1286. *(tagPtr++) = (pulXlate[srcPtr[3]] << 24) |
  1287. (pulXlate[srcPtr[2]] << 16) |
  1288. (pulXlate[srcPtr[1]] << 8) |
  1289. pulXlate[srcPtr[0]];
  1290. }
  1291. if( extra == 1 )
  1292. {
  1293. *(tagPtr++) = pulXlate[srcPtr[0]];
  1294. }
  1295. else if( extra == 2 )
  1296. {
  1297. *(tagPtr++) = (pulXlate[srcPtr[1]] << 8) |
  1298. pulXlate[srcPtr[0]];
  1299. }
  1300. else if (extra == 3)
  1301. {
  1302. *(tagPtr++) = (pulXlate[srcPtr[2]] << 16) |
  1303. (pulXlate[srcPtr[1]] << 8) |
  1304. pulXlate[srcPtr[0]];
  1305. }
  1306. }
  1307. else if( ppdev->cPelSize == 1 ) // 16bpp
  1308. {
  1309. extra = width & 1;
  1310. width >>= 1;
  1311. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width + extra );
  1312. QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width + extra );
  1313. DISPDBG((DBGLVL, "width was %d, is now %d + %d",
  1314. prcl->right - prcl->left, width, extra));
  1315. for( i = 0; i < width; i++, srcPtr += 2 )
  1316. {
  1317. *(tagPtr++) = (pulXlate[srcPtr[1]] << 16) |
  1318. pulXlate[srcPtr[0]];
  1319. }
  1320. if( extra )
  1321. {
  1322. *(tagPtr++) = pulXlate[srcPtr[0]];
  1323. }
  1324. }
  1325. else
  1326. {
  1327. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width );
  1328. QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width );
  1329. DISPDBG((DBGLVL, "width was %d, is now %d + %d",
  1330. prcl->right - prcl->left, width, 0));
  1331. for( i = 0; i < width; i++ )
  1332. {
  1333. *(tagPtr++) = pulXlate[*(srcPtr++)];
  1334. }
  1335. }
  1336. SEND_PXRX_DMA_BATCH;
  1337. return;
  1338. }
  1339. // Set up the LUT table:
  1340. if( (ppdev->PalLUTType != LUTCACHE_XLATE) ||
  1341. (ppdev->iPalUniq != pxlo->iUniq) )
  1342. {
  1343. // Someone has hijacked the LUT so we need to invalidate it:
  1344. ppdev->PalLUTType = LUTCACHE_XLATE;
  1345. ppdev->iPalUniq = pxlo->iUniq;
  1346. invalidLUT = TRUE;
  1347. }
  1348. else
  1349. {
  1350. DISPDBG((DBGLVL, "pxrxCopyXfer8bpp: reusing cached xlate"));
  1351. }
  1352. WAIT_PXRX_DMA_TAGS( 1 + 1 );
  1353. lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8));
  1354. lutMode |= (ppdev->cPelSize + 2) << 8;
  1355. LOAD_LUTMODE( lutMode );
  1356. if( invalidLUT )
  1357. {
  1358. LONG cEntries = 256;
  1359. pulXlate = pxlo->pulXlate;
  1360. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
  1361. if( ppdev->cPelSize == 0 ) // 8bpp
  1362. {
  1363. WAIT_PXRX_DMA_TAGS( cEntries );
  1364. do
  1365. {
  1366. ul = *(pulXlate++);
  1367. ul |= ul << 8;
  1368. ul |= ul << 16;
  1369. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
  1370. } while( --cEntries );
  1371. }
  1372. else if( ppdev->cPelSize == 1 ) // 16bpp
  1373. {
  1374. WAIT_PXRX_DMA_TAGS( cEntries );
  1375. do
  1376. {
  1377. ul = *(pulXlate++);
  1378. ul |= ul << 16;
  1379. QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
  1380. } while( --cEntries );
  1381. }
  1382. else
  1383. {
  1384. WAIT_PXRX_DMA_DWORDS( 1 + cEntries );
  1385. QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries );
  1386. QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries );
  1387. }
  1388. }
  1389. config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
  1390. __CONFIG2D_LOGOP_BACK_ENABLE |
  1391. __CONFIG2D_ENABLES);
  1392. config2D |= __CONFIG2D_FBWRITE |
  1393. __CONFIG2D_USERSCISSOR |
  1394. __CONFIG2D_EXTERNALSRC |
  1395. __CONFIG2D_LUTENABLE;
  1396. render2D = __RENDER2D_INCX |
  1397. __RENDER2D_INCY |
  1398. __RENDER2D_OP_SYNCDATA |
  1399. __RENDER2D_SPANS;
  1400. WAIT_PXRX_DMA_TAGS( 3 );
  1401. LOAD_CONFIG2D( config2D );
  1402. //@@BEGIN_DDKSPLIT
  1403. #if USE_RLE_DOWNLOADS
  1404. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
  1405. #endif
  1406. //@@END_DDKSPLIT
  1407. cPelInv = 2 - ppdev->cPelSize;
  1408. // Everything before the LUT runs at 8bpp
  1409. pixelSize = (1 << 31) |
  1410. (2 << 2) |
  1411. (2 << 4) |
  1412. (2 << 6) |
  1413. (cPelInv << 8) |
  1414. (cPelInv << 10) |
  1415. (cPelInv << 12) |
  1416. (cPelInv << 14);
  1417. QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
  1418. dx = pptlSrc->x - prclDst->left;
  1419. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  1420. lSrcDelta = psoSrc->lDelta;
  1421. pjSrcScan0 = psoSrc->pvScan0;
  1422. while( TRUE )
  1423. {
  1424. DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
  1425. prcl->left, prcl->top, prcl->right, prcl->bottom));
  1426. // 8bpp => 1 pixel per byte => 4 pixels per dword
  1427. // Assume source bitmap width is dword aligned
  1428. ASSERTDD( (lSrcDelta & 3) == 0,
  1429. "pxrxCopyXfer8bpp: "
  1430. "SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!" );
  1431. // pointer to first pixel, in pixels/bytes
  1432. startPos = (((UINT_PTR) pjSrcScan0) +
  1433. ((prcl->top + dy) * lSrcDelta))
  1434. + (prcl->left + dx);
  1435. // dword pointer to dword aligned first pixel
  1436. pjSrc = (ULONG *) (startPos & ~3);
  1437. if(NULL == pjSrc)
  1438. {
  1439. DISPDBG((ERRLVL, "ERROR: pxrxCopyXfer8bppLge "
  1440. "return because of pjSrc NULL"));
  1441. return;
  1442. }
  1443. alignOff = (ULONG)(startPos & 3); // number of pixels past dword
  1444. // alignment of a scanline
  1445. LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
  1446. AlignWidth = ((prcl->right - LeftEdge) + 3) & ~3; // dword aligned width
  1447. // in pixels
  1448. cy = prcl->bottom - prcl->top; // number of scanlines to do
  1449. DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, startPos = 0x%08X, "
  1450. "pjSrc = 0x%08X",
  1451. pjSrcScan0, startPos, pjSrc));
  1452. DISPDBG((DBGLVL, "offset = %d pixels", alignOff));
  1453. DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords",
  1454. LeftEdge, LeftEdge + AlignWidth,
  1455. AlignWidth, AlignWidth >> 2));
  1456. WAIT_PXRX_DMA_TAGS( 4 );
  1457. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
  1458. MAKEDWORD_XY(prcl->left, 0) );
  1459. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
  1460. MAKEDWORD_XY(prcl->right, 0x7fff) );
  1461. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
  1462. MAKEDWORD_XY(LeftEdge, prcl->top) );
  1463. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
  1464. __RENDER2D_WIDTH(AlignWidth) |
  1465. __RENDER2D_HEIGHT(cy) );
  1466. SEND_PXRX_DMA_BATCH;
  1467. AlignWidth >>= 2; // dword aligned width in dwords
  1468. lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
  1469. // (start to start)
  1470. lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
  1471. // (end to start)
  1472. DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords",
  1473. lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
  1474. //@@BEGIN_DDKSPLIT
  1475. #if USE_RLE_DOWNLOADS
  1476. // Do an RLE download:
  1477. tagPtr = NULL;
  1478. do
  1479. {
  1480. WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
  1481. nRemainder = AlignWidth;
  1482. while( nRemainder-- )
  1483. {
  1484. TEST_DWORD_ALIGNED( pjSrc );
  1485. data = *(pjSrc++);
  1486. len = 1;
  1487. TEST_DWORD_ALIGNED( pjSrc );
  1488. while( nRemainder && (*pjSrc == data) )
  1489. {
  1490. pjSrc++;
  1491. len++;
  1492. nRemainder--;
  1493. TEST_DWORD_ALIGNED( pjSrc );
  1494. }
  1495. if( len >= 4 )
  1496. {
  1497. if( tagPtr )
  1498. {
  1499. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
  1500. holdCount );
  1501. tagPtr = NULL;
  1502. }
  1503. QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount );
  1504. QUEUE_PXRX_DMA_DWORD( data );
  1505. QUEUE_PXRX_DMA_DWORD( len );
  1506. len = 0;
  1507. }
  1508. else
  1509. {
  1510. if( !tagPtr )
  1511. {
  1512. QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
  1513. holdCount = 0;
  1514. }
  1515. holdCount += len;
  1516. while( len-- )
  1517. {
  1518. QUEUE_PXRX_DMA_DWORD( data );
  1519. }
  1520. }
  1521. }
  1522. if( tagPtr )
  1523. {
  1524. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount );
  1525. tagPtr = NULL;
  1526. }
  1527. pjSrc += lTrueDelta;
  1528. SEND_PXRX_DMA_BATCH;
  1529. } while( --cy > 0 );
  1530. #else
  1531. //@@END_DDKSPLIT
  1532. // Do a raw download:
  1533. while( TRUE )
  1534. {
  1535. DISPDBG((DBGLVL, "cy = %d", cy));
  1536. WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
  1537. QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth );
  1538. TEST_DWORD_ALIGNED( pjSrc );
  1539. QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
  1540. SEND_PXRX_DMA_BATCH;
  1541. if( --cy == 0 )
  1542. {
  1543. break;
  1544. }
  1545. pjSrc += lSrcDeltaDW;
  1546. }
  1547. //@@BEGIN_DDKSPLIT
  1548. #endif
  1549. //@@END_DDKSPLIT
  1550. if( --count == 0 )
  1551. {
  1552. break;
  1553. }
  1554. prcl++;
  1555. }
  1556. // Reset some defaults:
  1557. WAIT_PXRX_DMA_TAGS( 2 );
  1558. QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv );
  1559. if( ppdev->cPelSize == GLINTDEPTH32 )
  1560. QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
  1561. SEND_PXRX_DMA_BATCH;
  1562. DISPDBG((DBGLVL, "pxrxCopyXfer8bpp return"));
  1563. }
  1564. //****************************************************************************
  1565. // FUNC: pxrxMemUpload
  1566. // ARGS: ppdev (I) - pointer to the physical device object
  1567. // crcl (I) - number of destination clipping rectangles
  1568. // prcl (I) - array of destination clipping rectangles
  1569. // psoDst (I) - destination surface
  1570. // pptlSrc (I) - offset into source surface
  1571. // prclDst (I) - unclipped destination rectangle
  1572. // RETN: void
  1573. //****************************************************************************
  1574. VOID pxrxMemUpload(
  1575. PPDEV ppdev,
  1576. LONG crcl,
  1577. RECTL *prcl,
  1578. SURFOBJ *psoDst,
  1579. POINTL *pptlSrc,
  1580. RECTL *prclDst)
  1581. {
  1582. BYTE *pDst, *pSrc;
  1583. LONG dwScanLineSize, cySrc, lSrcOff, lSrcStride;
  1584. GLINT_DECL;
  1585. // Make sure we're not performing other operations on the fb areas we want
  1586. SYNC_WITH_GLINT;
  1587. ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat,
  1588. "Dest must be same colour depth as screen");
  1589. ASSERTDD(crcl > 0, "Can't handle zero rectangles");
  1590. for(; --crcl >= 0; ++prcl)
  1591. {
  1592. // This gives an offset for offscreen DIBs (zero for primary rectangles)
  1593. lSrcOff = ppdev->DstPixelOrigin +
  1594. (ppdev->xyOffsetDst & 0xffff) +
  1595. (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
  1596. // Determine stride on wheter we are blitting from the
  1597. // primary or from an offscreen DIB
  1598. if (( ppdev->DstPixelOrigin == 0 ) &&
  1599. (ppdev->xyOffsetDst == 0) )
  1600. {
  1601. lSrcStride = ppdev->lDelta;
  1602. }
  1603. else
  1604. {
  1605. lSrcStride = ppdev->DstPixelDelta * ppdev->cjPelSize;
  1606. }
  1607. // pSrc must point to mem mapped primary
  1608. pSrc = (BYTE *)ppdev->pjScreen
  1609. + (lSrcOff * ppdev->cjPelSize)
  1610. + ((LONG)pptlSrc->x * ppdev->cjPelSize)
  1611. + ((LONG)pptlSrc->y * lSrcStride);
  1612. // pDst must point to the sysmem SURFOBJ
  1613. pDst = (BYTE *)psoDst->pvScan0
  1614. + ((LONG)prcl->left * ppdev->cjPelSize)
  1615. + ((LONG)prcl->top * (LONG)psoDst->lDelta);
  1616. // dwScanLineSize must have the right size to transfer in bytes
  1617. dwScanLineSize = ((LONG)prcl->right - (LONG)prcl->left) * ppdev->cjPelSize;
  1618. // Number of scan lines to transfer
  1619. cySrc = prcl->bottom - prcl->top;
  1620. // Do the copy
  1621. while (--cySrc >= 0)
  1622. {
  1623. // memcpy(dst, src, size)
  1624. memcpy(pDst, pSrc, dwScanLineSize);
  1625. pDst += psoDst->lDelta; // add stride
  1626. pSrc += lSrcStride; // add stride
  1627. }
  1628. }
  1629. } // pxrxMemUpload
  1630. //****************************************************************************
  1631. // FUNC: pxrxFifoUpload
  1632. // ARGS: ppdev (I) - pointer to the physical device object
  1633. // crcl (I) - number of destination clipping rectangles
  1634. // prcl (I) - array of destination clipping rectangles
  1635. // psoDst (I) - destination surface
  1636. // pptlSrc (I) - offset into source surface
  1637. // prclDst (I) - unclipped destination rectangle
  1638. // RETN: void
  1639. //----------------------------------------------------------------------------
  1640. // upload from on-chip source into host memory surface. Upload in spans
  1641. // (64-bit aligned) to minimise messages through the core and entries in the
  1642. // host out fifo.
  1643. //****************************************************************************
  1644. VOID pxrxFifoUpload(
  1645. PPDEV ppdev,
  1646. LONG crcl,
  1647. RECTL *prcl,
  1648. SURFOBJ *psoDst,
  1649. POINTL *pptlSrc,
  1650. RECTL *prclDst)
  1651. {
  1652. LONG xDomSrc, xSubSrc, yStartSrc, cxSrc, cySrc;
  1653. LONG culPerSrcScan;
  1654. LONG culDstDelta;
  1655. BOOL bRemPerSrcScan;
  1656. ULONG *pulDst, *pulDstScan;
  1657. ULONG leftMask, rightMask;
  1658. LONG cul, ul;
  1659. LONG cFifoSpaces;
  1660. __GlintFilterModeFmat FilterMode;
  1661. GLINT_DECL;
  1662. WAIT_PXRX_DMA_TAGS(1);
  1663. QUEUE_PXRX_DMA_TAG( __GlintTagFBDestReadMode, (glintInfo->fbDestMode | 0x103));
  1664. SEND_PXRX_DMA_FORCE;
  1665. //@@BEGIN_DDKSPLIT
  1666. #if USE_RLE_UPLOADS
  1667. // NB. using cxSrc >= 16 is slightly slower overall. These tests were empirically developed
  1668. // from WB99 BG & HE benchmarks
  1669. cxSrc = prcl->right - prcl->left;
  1670. if(cxSrc >= 32 && (cxSrc < 80 || (cxSrc >= 128 && cxSrc < 256) || cxSrc == ppdev->cxScreen))
  1671. {
  1672. pxrxRLEFifoUpload(ppdev, crcl, prcl, psoDst, pptlSrc, prclDst);
  1673. return;
  1674. }
  1675. #endif //USE_RLE_UPLOADS
  1676. //@@END_DDKSPLIT
  1677. DISPDBG((DBGLVL, "pxrxFifoUpload: prcl = (%d, %d -> %d, %d), "
  1678. "prclDst = (%d, %d -> %d, %d), ptlSrc(%d, %d), count = %d",
  1679. prcl->left, prcl->top, prcl->right, prcl->bottom,
  1680. prclDst->left, prclDst->top, prclDst->right,
  1681. prclDst->bottom, pptlSrc->x, pptlSrc->y, crcl));
  1682. DISPDBG((DBGLVL, "pxrxFifoUpload: psoDst: cx = %d, cy = %d, "
  1683. "lDelta = %d, pvScan0=%P)",
  1684. psoDst->sizlBitmap.cx, psoDst->sizlBitmap.cy,
  1685. psoDst->lDelta, psoDst->pvScan0));
  1686. DISPDBG((DBGLVL, "pxrxFifoUpload: xyOffsetDst = (%d, %d), "
  1687. "xyOffsetSrc = (%d, %d)",
  1688. ppdev->xyOffsetDst & 0xFFFF, ppdev->xyOffsetDst >> 16,
  1689. ppdev->xyOffsetSrc & 0xFFFF, ppdev->xyOffsetSrc >> 16));
  1690. ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat,
  1691. "Dest must be same colour depth as screen");
  1692. ASSERTDD(crcl > 0, "Can't handle zero rectangles");
  1693. WAIT_PXRX_DMA_TAGS(5);
  1694. LOAD_CONFIG2D(__CONFIG2D_FBDESTREAD);
  1695. SET_READ_BUFFERS;
  1696. // enable filter mode so we can get Sync
  1697. // and color messages on the output FIFO
  1698. *(DWORD *)(&FilterMode) = 0;
  1699. FilterMode.Synchronization = __GLINT_FILTER_TAG;
  1700. FilterMode.Color = __GLINT_FILTER_DATA;
  1701. QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, *(DWORD *)(&FilterMode));
  1702. for(; --crcl >= 0; ++prcl)
  1703. {
  1704. DISPDBG((DBGLVL, "pxrxFifoUpload: dest prcl(%xh,%xh..%xh,%xh)",
  1705. prcl->left, prcl->top, prcl->right, prcl->bottom));
  1706. // calculate pixel-aligned source
  1707. xDomSrc = pptlSrc->x + prcl->left - prclDst->left;
  1708. xSubSrc = pptlSrc->x + prcl->right - prclDst->left;
  1709. yStartSrc = pptlSrc->y + prcl->top - prclDst->top;
  1710. cySrc = prcl->bottom - prcl->top;
  1711. DISPDBG((DBGLVL, "pxrxFifoUpload: src (%xh,%xh..%xh,%xh)",
  1712. xDomSrc, yStartSrc, xSubSrc, yStartSrc + cySrc));
  1713. // will upload ulongs aligned to ulongs
  1714. if (ppdev->cPelSize == GLINTDEPTH32)
  1715. {
  1716. cxSrc = xSubSrc - xDomSrc;
  1717. culPerSrcScan = cxSrc;
  1718. leftMask = 0xFFFFFFFF;
  1719. rightMask = 0xFFFFFFFF;
  1720. }
  1721. else
  1722. {
  1723. if (ppdev->cPelSize == GLINTDEPTH16)
  1724. {
  1725. ULONG cPixFromUlongBoundary = prcl->left & 1;
  1726. xDomSrc -= cPixFromUlongBoundary;
  1727. cxSrc = xSubSrc - xDomSrc;
  1728. culPerSrcScan = (xSubSrc - xDomSrc + 1) >> 1;
  1729. leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 4);
  1730. rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 1) << 4);
  1731. }
  1732. else
  1733. {
  1734. ULONG cPixFromUlongBoundary = prcl->left & 3;
  1735. xDomSrc -= cPixFromUlongBoundary;
  1736. cxSrc = xSubSrc - xDomSrc;
  1737. culPerSrcScan = (xSubSrc - xDomSrc + 3) >> 2;
  1738. leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 3);
  1739. rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 3) << 3);
  1740. }
  1741. // We just want a single mask if the area to upload is less
  1742. // than one word wide.
  1743. if (culPerSrcScan == 1)
  1744. {
  1745. leftMask &= rightMask;
  1746. }
  1747. }
  1748. // uploading 64 bit aligned source
  1749. bRemPerSrcScan = culPerSrcScan & 1;
  1750. // Work out where the destination data goes to
  1751. culDstDelta = psoDst->lDelta >> 2;
  1752. pulDst = ((ULONG *)psoDst->pvScan0) +
  1753. (prcl->left >> (2 - ppdev->cPelSize))
  1754. + culDstDelta * prcl->top;
  1755. DISPDBG((DBGLVL, "pxrxFifoUpload: uploading aligned "
  1756. "src (%xh,%xh..%xh,%xh)",
  1757. xDomSrc, yStartSrc,
  1758. xDomSrc + cxSrc, yStartSrc + cySrc));
  1759. // Render the rectangle
  1760. WAIT_PXRX_DMA_TAGS(2);
  1761. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
  1762. MAKEDWORD_XY(xDomSrc, yStartSrc));
  1763. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D,
  1764. __RENDER2D_OP_NORMAL |
  1765. __RENDER2D_SPANS |
  1766. __RENDER2D_INCY |
  1767. __RENDER2D_INCX |
  1768. __RENDER2D_WIDTH(cxSrc) |
  1769. __RENDER2D_HEIGHT(cySrc));
  1770. SEND_PXRX_DMA_FORCE;
  1771. // If the start and end masks are 0xffffffff, we can just upload
  1772. // the words and put them directly into the destination. Otherwise,
  1773. // or the first and last word on any scanline we have to mask
  1774. // off any pixels that are outside the render area. We know the
  1775. // glint will have 0 in the undesired right hand edge pixels, as
  1776. // these were not in the render area. We dont know anything about
  1777. // the destination though.
  1778. if ((leftMask == 0xFFFFFFFF) && (rightMask == 0xFFFFFFFF))
  1779. {
  1780. DISPDBG((DBGLVL, "pxrxFifoUpload: no edge masks"));
  1781. while (--cySrc >= 0)
  1782. {
  1783. pulDstScan = pulDst;
  1784. pulDst += culDstDelta;
  1785. DISPDBG((DBGLVL, "pxrxFifoUpload: uploading scan of %xh "
  1786. "ulongs to %p (Remainder %xh)",
  1787. culPerSrcScan, pulDstScan, bRemPerSrcScan));
  1788. cul = culPerSrcScan;
  1789. while(cul)
  1790. {
  1791. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1792. if (cFifoSpaces > cul)
  1793. {
  1794. cFifoSpaces = cul;
  1795. }
  1796. cul -= cFifoSpaces;
  1797. while (--cFifoSpaces >= 0)
  1798. {
  1799. READ_OUTPUT_FIFO(ul);
  1800. DISPDBG((DBGLVL, "pxrxFifoUpload: read %08.8xh from "
  1801. "output FIFO", ul));
  1802. *pulDstScan++ = ul;
  1803. }
  1804. }
  1805. if(bRemPerSrcScan)
  1806. {
  1807. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1808. READ_OUTPUT_FIFO(ul);
  1809. DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder %08.8xh "
  1810. "from output FIFO", ul));
  1811. }
  1812. }
  1813. }
  1814. else if(culPerSrcScan == 1)
  1815. {
  1816. DISPDBG((DBGLVL, "pxrxFifoUpload: single ulong per scan"));
  1817. while (--cySrc >= 0)
  1818. {
  1819. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1820. READ_OUTPUT_FIFO(ul);
  1821. DISPDBG((DBGLVL, "pxrxFifoUpload: "
  1822. "read %08.8xh from output FIFO", ul));
  1823. // leftMask contains both masks in this case
  1824. *pulDst = (*pulDst & ~leftMask) | (ul & leftMask);
  1825. ASSERTDD(bRemPerSrcScan, "one word per scan upload should "
  1826. "always leave a remainder");
  1827. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1828. READ_OUTPUT_FIFO(ul);
  1829. DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder %08.8xh "
  1830. "from output FIFO", ul));
  1831. pulDst += culDstDelta;
  1832. }
  1833. }
  1834. else
  1835. {
  1836. DISPDBG((DBGLVL, "pxrxFifoUpload: scan with left & right edge "
  1837. "masks: %08.8x .. %08.8x", leftMask, rightMask));
  1838. while (--cySrc >= 0)
  1839. {
  1840. pulDstScan = pulDst;
  1841. pulDst += culDstDelta;
  1842. DISPDBG((DBGLVL, "pxrxFifoUpload: uploading scan of %xh "
  1843. "ulongs to %p", culPerSrcScan, pulDstScan));
  1844. // get first ulong
  1845. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1846. --cFifoSpaces;
  1847. READ_OUTPUT_FIFO(ul);
  1848. DISPDBG((DBGLVL, "pxrxFifoUpload: "
  1849. "read %08.8xh from output FIFO", ul));
  1850. *pulDstScan++ = (*pulDstScan & ~leftMask) | (ul & leftMask);
  1851. // get middle ulongs
  1852. cul = culPerSrcScan - 2;
  1853. while (cul)
  1854. {
  1855. if (cFifoSpaces > cul)
  1856. {
  1857. cFifoSpaces = cul;
  1858. }
  1859. cul -= cFifoSpaces;
  1860. while (--cFifoSpaces >= 0)
  1861. {
  1862. READ_OUTPUT_FIFO(ul);
  1863. DISPDBG((DBGLVL, "pxrxFifoUpload: "
  1864. "read %08.8xh from output FIFO", ul));
  1865. *pulDstScan++ = ul;
  1866. }
  1867. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1868. }
  1869. // get last ulong
  1870. READ_OUTPUT_FIFO(ul);
  1871. DISPDBG((DBGLVL, "pxrxFifoUpload: "
  1872. "read %08.8xh from output FIFO", ul));
  1873. *pulDstScan = (*pulDstScan & ~rightMask) | (ul & rightMask);
  1874. if(bRemPerSrcScan)
  1875. {
  1876. WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
  1877. READ_OUTPUT_FIFO(ul);
  1878. DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder "
  1879. "%08.8xh from output FIFO", ul));
  1880. }
  1881. }
  1882. }
  1883. }
  1884. #if DBG
  1885. cul = 0xaa55aa55;
  1886. DISPDBG((DBGLVL, "pxrxFifoUpload: waiting for sync (id = %08.8xh)", cul));
  1887. WAIT_PXRX_DMA_TAGS(1);
  1888. QUEUE_PXRX_DMA_TAG(__GlintTagSync, cul);
  1889. SEND_PXRX_DMA_FORCE;
  1890. do
  1891. {
  1892. WAIT_OUTPUT_FIFO_READY;
  1893. READ_OUTPUT_FIFO(ul);
  1894. DISPDBG((DBGLVL, "pxrxFifoUpload: read %08.8xh from output FIFO", ul));
  1895. if(ul != __GlintTagSync)
  1896. {
  1897. DISPDBG((ERRLVL,"pxrxFifoUpload: didn't read back sync!"));
  1898. }
  1899. }
  1900. while(ul != __GlintTagSync);
  1901. DISPDBG((DBGLVL, "pxrxFifoUpload: got sync"));
  1902. #endif
  1903. // no need to initiate DMA with this tag - it will get flushed with the
  1904. // next primitive and meanwhile will not affect local memory
  1905. WAIT_PXRX_DMA_TAGS(1);
  1906. QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, 0);
  1907. SEND_PXRX_DMA_BATCH;
  1908. GLINT_CORE_IDLE;
  1909. DISPDBG((DBGLVL, "pxrxFifoUpload: done"));
  1910. }
  1911. //****************************************************************************
  1912. // VOID vGlintCopyBltBypassDownloadXlate8bpp
  1913. //
  1914. // using the bypass mechanism we can take advantage of write-combining
  1915. // which can be quicker than using the FIFO
  1916. // NB. supports 32bpp and 16bpp destinations
  1917. //****************************************************************************
  1918. VOID vGlintCopyBltBypassDownloadXlate8bpp(
  1919. PDEV *ppdev,
  1920. SURFOBJ *psoSrc,
  1921. POINTL *pptlSrc,
  1922. RECTL *prclDst,
  1923. RECTL *prclClip,
  1924. LONG crclClip,
  1925. XLATEOBJ *pxlo)
  1926. {
  1927. LONG xOff;
  1928. BYTE *pjSrcScan0;
  1929. LONG cjSrcDelta, xSrcOff, ySrcOff;
  1930. ULONG *pulDstScan0;
  1931. LONG culDstDelta, xDstOff;
  1932. LONG cScans, cPixPerScan, c;
  1933. ULONG cjSrcDeltaRem, cjDstDeltaRem;
  1934. ULONG *aulXlate;
  1935. BYTE *pjSrc;
  1936. GLINT_DECL;
  1937. //@@BEGIN_DDKSPLIT
  1938. #if 0
  1939. {
  1940. SIZEL sizlDst;
  1941. sizlDst.cx = prclClip->right - prclClip->left;
  1942. sizlDst.cy = prclClip->bottom - prclClip->top;
  1943. DISPDBG((DBGLVL, "vGlintCopyBltBypassDownloadXlate8bpp(): "
  1944. "cRects(%d) sizlDst(%d,%d)",
  1945. crclClip, sizlDst.cx, sizlDst.cy));
  1946. }
  1947. #endif //DBG
  1948. //@@END_DDKSPLIT
  1949. pjSrcScan0 = (BYTE *)psoSrc->pvScan0;
  1950. cjSrcDelta = psoSrc->lDelta;
  1951. // need to add arclClip[n].left to get xSrc
  1952. xSrcOff = pptlSrc->x - prclDst->left;
  1953. // need to add arclClip[n].top to get ySrc
  1954. ySrcOff = pptlSrc->y - prclDst->top;
  1955. pulDstScan0 = (ULONG *)ppdev->pjScreen;
  1956. culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
  1957. xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
  1958. (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
  1959. aulXlate = pxlo->pulXlate;
  1960. SYNC_IF_CORE_BUSY;
  1961. for (; --crclClip >= 0; ++prclClip)
  1962. {
  1963. cScans = prclClip->bottom - prclClip->top;
  1964. cPixPerScan = prclClip->right - prclClip->left;
  1965. cjSrcDeltaRem = cjSrcDelta - cPixPerScan;
  1966. pjSrc = -1 + pjSrcScan0 + xSrcOff + prclClip->left
  1967. + ((prclClip->top + ySrcOff) * cjSrcDelta);
  1968. if (ppdev->cPelSize == GLINTDEPTH32)
  1969. {
  1970. ULONG *pulDst;
  1971. cjDstDeltaRem = (culDstDelta - cPixPerScan) << 2;
  1972. pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left
  1973. + prclClip->top * culDstDelta;
  1974. for (;
  1975. --cScans >= 0;
  1976. pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
  1977. {
  1978. for(c = cPixPerScan; --c >= 0;)
  1979. {
  1980. *++pulDst = aulXlate[*++pjSrc];
  1981. }
  1982. }
  1983. }
  1984. else // (GLINTDEPTH16)
  1985. {
  1986. USHORT *pusDst;
  1987. cjDstDeltaRem =
  1988. (culDstDelta << 2) - (cPixPerScan << ppdev->cPelSize);
  1989. pusDst = -1 + (USHORT *)pulDstScan0 + xDstOff + prclClip->left
  1990. + ((prclClip->top * culDstDelta) << 1);
  1991. for (;
  1992. --cScans >= 0;
  1993. pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem)
  1994. {
  1995. for (c = cPixPerScan; --c >= 0;)
  1996. {
  1997. *++pusDst = (USHORT)aulXlate[*++pjSrc];
  1998. }
  1999. }
  2000. }
  2001. }
  2002. }
  2003. //@@BEGIN_DDKSPLIT
  2004. #if 0
  2005. /**************************************************************************\
  2006. *
  2007. * void pxrxMonoDownloadRLE
  2008. *
  2009. \**************************************************************************/
  2010. void pxrxMonoDownloadRLE(
  2011. PPDEV ppdev,
  2012. ULONG AlignWidth,
  2013. ULONG *pjSrc,
  2014. LONG lSrcDelta,
  2015. LONG cy )
  2016. {
  2017. ULONG len, data, holdCount;
  2018. ULONG *tagPtr = NULL;
  2019. GLINT_DECL;
  2020. WAIT_PXRX_DMA_TAGS( 1 );
  2021. QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget,
  2022. __GlintTagBitMaskPattern );
  2023. if( AlignWidth == 32 )
  2024. {
  2025. ULONG bits;
  2026. DISPDBG((DBGLVL, "Doing Single Word per scan download"));
  2027. WAIT_PXRX_DMA_DWORDS( cy + 1 );
  2028. while( cy-- )
  2029. {
  2030. TEST_DWORD_ALIGNED( pjSrc );
  2031. data = *pjSrc;
  2032. pjSrc += lSrcDelta;
  2033. len = 1;
  2034. TEST_DWORD_ALIGNED( pjSrc );
  2035. while( cy && (*pjSrc == data) )
  2036. {
  2037. pjSrc += lSrcDelta;
  2038. len++;
  2039. cy--;
  2040. TEST_DWORD_ALIGNED( pjSrc );
  2041. }
  2042. if( len >= 4 )
  2043. {
  2044. if( tagPtr )
  2045. {
  2046. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern,
  2047. holdCount );
  2048. tagPtr = NULL;
  2049. }
  2050. QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount );
  2051. QUEUE_PXRX_DMA_DWORD( data );
  2052. QUEUE_PXRX_DMA_DWORD( len );
  2053. len = 0;
  2054. }
  2055. else
  2056. {
  2057. if( !tagPtr )
  2058. {
  2059. QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
  2060. holdCount = 0;
  2061. }
  2062. holdCount += len;
  2063. while( len-- )
  2064. {
  2065. QUEUE_PXRX_DMA_DWORD( data );
  2066. }
  2067. }
  2068. }
  2069. if( tagPtr )
  2070. {
  2071. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern,
  2072. holdCount );
  2073. tagPtr = NULL;
  2074. }
  2075. }
  2076. else
  2077. {
  2078. // multiple 32 bit words per scanline. convert the delta to the
  2079. // delta as we need it at the end of each line by subtracting the
  2080. // width in bytes of the data we're downloading. Note, pjSrc
  2081. // is always 1 LONG short of the end of the line because we break
  2082. // before adding on the last ULONG. Thus, we subtract sizeof(ULONG)
  2083. // from the original adjustment.
  2084. LONG nRemainder;
  2085. ULONG bits;
  2086. LONG lSrcDeltaScan = lSrcDelta - (AlignWidth >> 5);
  2087. DISPDBG((DBGLVL, "Doing Multiple Word per scan download"));
  2088. while( TRUE )
  2089. {
  2090. nRemainder = AlignWidth >> 5;
  2091. WAIT_PXRX_DMA_DWORDS( nRemainder + 1 );
  2092. while( nRemainder-- )
  2093. {
  2094. TEST_DWORD_ALIGNED( pjSrc );
  2095. data = *(pjSrc++);
  2096. len = 1;
  2097. TEST_DWORD_ALIGNED( pjSrc );
  2098. while( nRemainder && (*pjSrc == data) )
  2099. {
  2100. pjSrc++;
  2101. len++;
  2102. nRemainder--;
  2103. TEST_DWORD_ALIGNED( pjSrc );
  2104. }
  2105. if( len >= 4 )
  2106. {
  2107. if( tagPtr )
  2108. {
  2109. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD(
  2110. __GlintTagBitMaskPattern,
  2111. holdCount );
  2112. tagPtr = NULL;
  2113. }
  2114. QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData,
  2115. __GlintTagRLCount );
  2116. QUEUE_PXRX_DMA_DWORD( data );
  2117. QUEUE_PXRX_DMA_DWORD( len );
  2118. len = 0;
  2119. }
  2120. else
  2121. {
  2122. if( !tagPtr )
  2123. {
  2124. QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
  2125. holdCount = 0;
  2126. }
  2127. holdCount += len;
  2128. while( len-- )
  2129. {
  2130. QUEUE_PXRX_DMA_DWORD( data );
  2131. }
  2132. }
  2133. }
  2134. if( tagPtr )
  2135. {
  2136. *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern,
  2137. holdCount );
  2138. tagPtr = NULL;
  2139. }
  2140. if( --cy == 0 )
  2141. {
  2142. break;
  2143. }
  2144. SEND_PXRX_DMA_BATCH;
  2145. pjSrc += lSrcDeltaScan;
  2146. }
  2147. }
  2148. SEND_PXRX_DMA_BATCH;
  2149. }
  2150. //*********************************************************************************************
  2151. // FUNC: pxrxRLEFifoUpload
  2152. // ARGS: ppdev (I) - pointer to the physical device object
  2153. // crcl (I) - number of destination clipping rectangles
  2154. // prcl (I) - array of destination clipping rectangles
  2155. // psoDst (I) - destination surface
  2156. // pptlSrc (I) - offset into source surface
  2157. // prclDst (I) - unclipped destination rectangle
  2158. // RETN: void
  2159. //---------------------------------------------------------------------------------------------
  2160. // upload from on-chip source into host memory surface. Upload in spans (64-bit aligned) to
  2161. // minimise messages through the core and entries in the host out fifo. Upload is RLE encoded.
  2162. //*********************************************************************************************
  2163. VOID pxrxRLEFifoUpload(PPDEV ppdev, LONG crcl, RECTL *prcl, SURFOBJ *psoDst, POINTL *pptlSrc, RECTL *prclDst)
  2164. {
  2165. LONG xDomSrc, xSubSrc, yStartSrc, cxSrc, cySrc;
  2166. LONG culPerSrcScan;
  2167. LONG culDstDelta;
  2168. BOOL bRemPerSrcScan;
  2169. ULONG *pulDst, *pulDstScan;
  2170. ULONG leftMask, rightMask;
  2171. LONG cul, ul;
  2172. LONG cFifoSpaces;
  2173. ULONG RLECount, RLEData;
  2174. __GlintFilterModeFmat FilterMode;
  2175. GLINT_DECL;
  2176. DISPDBG((7, "pxrxFifoUpload: prcl = (%d, %d -> %d, %d), prclDst = (%d, %d -> %d, %d), ptlSrc(%d, %d), count = %d",
  2177. prcl->left, prcl->top, prcl->right, prcl->bottom,
  2178. prclDst->left, prclDst->top, prclDst->right, prclDst->bottom, pptlSrc->x, pptlSrc->y, crcl));
  2179. DISPDBG((7, "pxrxFifoUpload: psoDst: cx = %d, cy = %d, lDelta = %d, pvScan0=%P)",
  2180. psoDst->sizlBitmap.cx, psoDst->sizlBitmap.cy, psoDst->lDelta, psoDst->pvScan0));
  2181. DISPDBG((7, "pxrxFifoUpload: xyOffsetDst = (%d, %d), xyOffsetSrc = (%d, %d)",
  2182. ppdev->xyOffsetDst & 0xFFFF, ppdev->xyOffsetDst >> 16,
  2183. ppdev->xyOffsetSrc & 0xFFFF, ppdev->xyOffsetSrc >> 16));
  2184. ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat, "Dest must be same colour depth as screen");
  2185. ASSERTDD(crcl > 0, "Can't handle zero rectangles");
  2186. WAIT_PXRX_DMA_TAGS(6);
  2187. QUEUE_PXRX_DMA_TAG( __GlintTagRLEMask, 0xffffffff);
  2188. LOAD_CONFIG2D(__CONFIG2D_FBDESTREAD);
  2189. SET_READ_BUFFERS;
  2190. // enable filter mode so we can get Sync and color messages on the output FIFO
  2191. *(DWORD *)(&FilterMode) = 0;
  2192. FilterMode.Synchronization = __GLINT_FILTER_TAG;
  2193. FilterMode.Color = __GLINT_FILTER_DATA;
  2194. FilterMode.RLEHostOut = TRUE;
  2195. QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, *(DWORD*)(&FilterMode));
  2196. for(; --crcl >= 0; ++prcl)
  2197. {
  2198. DISPDBG((7, "pxrxFifoUpload: dest prcl(%xh,%xh..%xh,%xh)", prcl->left, prcl->top, prcl->right, prcl->bottom));
  2199. // calculate pixel-aligned source
  2200. xDomSrc = pptlSrc->x + prcl->left - prclDst->left;
  2201. xSubSrc = pptlSrc->x + prcl->right - prclDst->left;
  2202. yStartSrc = pptlSrc->y + prcl->top - prclDst->top;
  2203. cySrc = prcl->bottom - prcl->top;
  2204. DISPDBG((8, "pxrxFifoUpload: src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc, xSubSrc, yStartSrc + cySrc));
  2205. // will upload ulongs aligned to ulongs
  2206. if (ppdev->cPelSize == GLINTDEPTH32)
  2207. {
  2208. cxSrc = xSubSrc - xDomSrc;
  2209. culPerSrcScan = cxSrc;
  2210. leftMask = 0xFFFFFFFF;
  2211. rightMask = 0xFFFFFFFF;
  2212. }
  2213. else
  2214. {
  2215. if (ppdev->cPelSize == GLINTDEPTH16)
  2216. {
  2217. ULONG cPixFromUlongBoundary = prcl->left & 1;
  2218. xDomSrc -= cPixFromUlongBoundary;
  2219. cxSrc = xSubSrc - xDomSrc;
  2220. culPerSrcScan = (xSubSrc - xDomSrc + 1) >> 1;
  2221. leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 4);
  2222. rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 1) << 4);
  2223. }
  2224. else
  2225. {
  2226. ULONG cPixFromUlongBoundary = prcl->left & 3;
  2227. xDomSrc -= cPixFromUlongBoundary;
  2228. cxSrc = xSubSrc - xDomSrc;
  2229. culPerSrcScan = (xSubSrc - xDomSrc + 3) >> 2;
  2230. leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 3);
  2231. rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 3) << 3);
  2232. }
  2233. // We just want a single mask if the area to upload is less than one word wide.
  2234. if (culPerSrcScan == 1)
  2235. leftMask &= rightMask;
  2236. }
  2237. // uploading 64 bit aligned source
  2238. bRemPerSrcScan = culPerSrcScan & 1;
  2239. // the remainder will be encoded in the run: it's simpler just to add it in now
  2240. // then check bRemPerSrcScan during the upload
  2241. DISPDBG((8, "pxrxFifoUpload: Adding remainder into culPerSrcScan for RLE"));
  2242. culPerSrcScan += bRemPerSrcScan;
  2243. // Work out where the destination data goes to
  2244. culDstDelta = psoDst->lDelta >> 2;
  2245. pulDst = ((ULONG *)psoDst->pvScan0) + (prcl->left >> (2 - ppdev->cPelSize)) + culDstDelta * prcl->top;
  2246. DISPDBG((8, "pxrxFifoUpload: uploading aligned src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc,
  2247. xDomSrc + cxSrc, yStartSrc + cySrc));
  2248. // Render the rectangle
  2249. WAIT_PXRX_DMA_TAGS(2);
  2250. QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,MAKEDWORD_XY(xDomSrc, yStartSrc));
  2251. QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, __RENDER2D_OP_NORMAL | __RENDER2D_SPANS |
  2252. __RENDER2D_INCY | __RENDER2D_INCX |
  2253. __RENDER2D_WIDTH(cxSrc) | __RENDER2D_HEIGHT(cySrc));
  2254. SEND_PXRX_DMA_FORCE;
  2255. // If the start and end masks are 0xffffffff, we can just upload the words and put them
  2256. // directly into the destination. Otherwise, or the first and last word on any scanline
  2257. // we have to mask off any pixels that are outside the render area. We know the glint will
  2258. // have 0 in the undesired right hand edge pixels, as these were not in the render area. We
  2259. // dont know anything about the destination though.
  2260. if (leftMask == 0xFFFFFFFF && rightMask == 0xFFFFFFFF)
  2261. {
  2262. DISPDBG((8, "pxrxFifoUpload: no edge masks"));
  2263. while (--cySrc >= 0)
  2264. {
  2265. pulDstScan = pulDst;
  2266. pulDst += culDstDelta;
  2267. DISPDBG((9, "pxrxFifoUpload: uploading scan of %xh ulongs to %p (Remainder %xh)",
  2268. culPerSrcScan, pulDstScan, bRemPerSrcScan));
  2269. cul = culPerSrcScan;
  2270. while(cul)
  2271. {
  2272. WAIT_OUTPUT_FIFO_COUNT(2);
  2273. READ_OUTPUT_FIFO(RLECount);
  2274. READ_OUTPUT_FIFO(RLEData);
  2275. DISPDBG((10, "pxrxFifoUpload: RLECount = %xh RLEData = 08.8xh", RLECount, RLEData));
  2276. cul -= RLECount;
  2277. if(cul == 0 && bRemPerSrcScan)
  2278. {
  2279. // discard the last ulong
  2280. --RLECount;
  2281. }
  2282. while(RLECount--)
  2283. {
  2284. DISPDBG((10, "pxrxFifoUpload: written ulong"));
  2285. *pulDstScan++ = RLEData;
  2286. }
  2287. }
  2288. }
  2289. }
  2290. else if(culPerSrcScan == 1)
  2291. {
  2292. DISPDBG((8, "pxrxFifoUpload: single ulong per scan"));
  2293. while (--cySrc >= 0)
  2294. {
  2295. // the remainder has already been added into culPerSrcScan so this can't happen
  2296. DISPDBG((ERRLVL,"pxrxFifoUpload: got single ulong per scan - but we always upload 64 bit quanta!"));
  2297. pulDst += culDstDelta;
  2298. }
  2299. }
  2300. else
  2301. {
  2302. DISPDBG((8, "pxrxFifoUpload: scan with left & right edge masks: %08.8x .. %08.8x", leftMask, rightMask));
  2303. while (--cySrc >= 0)
  2304. {
  2305. pulDstScan = pulDst;
  2306. pulDst += culDstDelta;
  2307. DISPDBG((9, "pxrxFifoUpload: uploading scan of %xh ulongs to %p", culPerSrcScan, pulDstScan));
  2308. cul = culPerSrcScan;
  2309. while(cul)
  2310. {
  2311. WAIT_OUTPUT_FIFO_COUNT(2);
  2312. READ_OUTPUT_FIFO(RLECount);
  2313. READ_OUTPUT_FIFO(RLEData);
  2314. DISPDBG((10, "pxrxFifoUpload: RLECount = %xh RLEData = %08.8xh", RLECount, RLEData));
  2315. if(cul - bRemPerSrcScan == 0)
  2316. {
  2317. DISPDBG((10, "pxrxFifoUpload: discarding last ulong"));
  2318. break;
  2319. }
  2320. if(culPerSrcScan - bRemPerSrcScan == 1)
  2321. {
  2322. // one pixel per scan
  2323. DISPDBG((10, "pxrxFifoUpload: written single pixel scan"));
  2324. *pulDstScan = (*pulDstScan & ~leftMask) | (RLEData & leftMask);
  2325. cul -= RLECount;
  2326. continue;
  2327. }
  2328. if(cul == culPerSrcScan)
  2329. {
  2330. DISPDBG((10, "pxrxFifoUpload: written left edge"));
  2331. *pulDstScan++ = (*pulDstScan & ~leftMask) | (RLEData & leftMask); // first ulong
  2332. --RLECount;
  2333. --cul;
  2334. }
  2335. cul -= RLECount;
  2336. if(cul == 0)
  2337. {
  2338. // this is the last run of the scan: process the last ulong separately in order
  2339. // to apply the right edge mask
  2340. RLECount -= 1 + bRemPerSrcScan;
  2341. }
  2342. else if(cul - bRemPerSrcScan == 0)
  2343. {
  2344. // this is the penultimate run of the scan and the last one will just include the
  2345. // remainder: process the last ulong separately in order to apply the right edge mask
  2346. --RLECount;
  2347. }
  2348. while(RLECount--)
  2349. {
  2350. DISPDBG((10, "pxrxFifoUpload: written middle ulong"));
  2351. *pulDstScan++ = RLEData;
  2352. }
  2353. if(cul == 0 || cul - bRemPerSrcScan == 0)
  2354. {
  2355. DISPDBG((10, "pxrxFifoUpload: written right edge"));
  2356. *pulDstScan = (*pulDstScan & ~rightMask) | (RLEData & rightMask); // last ulong
  2357. #if DBG
  2358. if(cul - bRemPerSrcScan == 0)
  2359. {
  2360. DISPDBG((10, "pxrxFifoUpload: discarding last ulong"));
  2361. }
  2362. #endif
  2363. }
  2364. }
  2365. }
  2366. }
  2367. }
  2368. #if DBG
  2369. cul = 0xaa55aa55;
  2370. DISPDBG((8, "pxrxFifoUpload: waiting for sync (id = %08.8xh)", cul));
  2371. WAIT_PXRX_DMA_TAGS(1);
  2372. QUEUE_PXRX_DMA_TAG(__GlintTagSync, cul);
  2373. SEND_PXRX_DMA_FORCE;
  2374. do
  2375. {
  2376. WAIT_OUTPUT_FIFO_READY;
  2377. READ_OUTPUT_FIFO(ul);
  2378. DISPDBG((8, "pxrxFifoUpload: read %08.8xh from output FIFO", ul));
  2379. if(ul != __GlintTagSync)
  2380. {
  2381. DISPDBG((ERRLVL,"pxrxFifoUpload: didn't read back sync!"));
  2382. }
  2383. }
  2384. while(ul != __GlintTagSync);
  2385. DISPDBG((8, "pxrxFifoUpload: got sync"));
  2386. #endif
  2387. // no need to initiate DMA with this tag - it will get flushed with the next primitive and
  2388. // meanwhile will not affect local memory
  2389. WAIT_PXRX_DMA_TAGS(1);
  2390. QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, 0);
  2391. SEND_PXRX_DMA_BATCH;
  2392. GLINT_CORE_IDLE;
  2393. DISPDBG((7, "pxrxFifoUpload: done"));
  2394. }
  2395. //****************************************************************************
  2396. // FUNC: vGlintCopyBltBypassDownload32bpp
  2397. // DESC: using the bypass mechanism we can take advantage of write-combining
  2398. // which can be quicker than using the FIFO
  2399. //****************************************************************************
  2400. VOID vGlintCopyBltBypassDownload32bpp(
  2401. PDEV *ppdev,
  2402. SURFOBJ *psoSrc,
  2403. POINTL *pptlSrc,
  2404. RECTL *prclDst,
  2405. RECTL *prclClip,
  2406. LONG crclClip)
  2407. {
  2408. LONG xOff;
  2409. ULONG *pulSrcScan0;
  2410. LONG culSrcDelta, xSrcOff, ySrcOff;
  2411. ULONG *pulDstScan0;
  2412. LONG culDstDelta, xDstOff;
  2413. LONG cScans, cPixPerScan, c;
  2414. ULONG cjSrcDeltaRem, cjDstDeltaRem;
  2415. ULONG *pulSrc;
  2416. ULONG *pulDst;
  2417. ULONG tmp0, tmp1, tmp2;
  2418. GLINT_DECL;
  2419. #if DBG && 0
  2420. {
  2421. SIZEL sizlDst;
  2422. sizlDst.cx = prclClip->right - prclClip->left;
  2423. sizlDst.cy = prclClip->bottom - prclClip->top;
  2424. DISPDBG((-1, "vGlintCopyBltBypassDownload32bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
  2425. }
  2426. #endif //DBG
  2427. pulSrcScan0 = (ULONG *)psoSrc->pvScan0;
  2428. culSrcDelta = psoSrc->lDelta >> 2;
  2429. xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
  2430. ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
  2431. pulDstScan0 = (ULONG *)ppdev->pjScreen;
  2432. culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
  2433. xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
  2434. (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
  2435. SYNC_IF_CORE_BUSY;
  2436. for (; --crclClip >= 0; ++prclClip)
  2437. {
  2438. cScans = prclClip->bottom - prclClip->top;
  2439. cPixPerScan = prclClip->right - prclClip->left;
  2440. cjSrcDeltaRem = (culSrcDelta - cPixPerScan) * 4;
  2441. cjDstDeltaRem = (culDstDelta - cPixPerScan) * 4;
  2442. // calc source & destination address, -1 to allow for prefix-increment
  2443. pulSrc = -1 + pulSrcScan0 + xSrcOff + prclClip->left
  2444. + ((prclClip->top + ySrcOff) * culSrcDelta);
  2445. pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left
  2446. + prclClip->top * culDstDelta;
  2447. for (; --cScans >= 0; (BYTE *)pulSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
  2448. {
  2449. #if defined(_X86_)
  2450. __asm
  2451. {
  2452. mov edi, pulDst
  2453. mov ecx, cPixPerScan
  2454. mov esi, pulSrc
  2455. shr ecx, 2
  2456. push ebp
  2457. test ecx, ecx
  2458. jle EndOfLine
  2459. LoopFours:
  2460. mov eax, [esi+4]
  2461. mov ebx, [esi+8]
  2462. mov edx, [esi+12]
  2463. mov ebp, [esi+16]
  2464. add esi, 16
  2465. mov [edi+4], eax
  2466. mov [edi+8], ebx
  2467. add edi, 16
  2468. mov [edi-4], edx
  2469. dec ecx
  2470. mov [edi], ebp
  2471. jne LoopFours
  2472. EndOfLine:
  2473. pop ebp
  2474. mov pulSrc, esi
  2475. mov pulDst, edi
  2476. }
  2477. // do the remaining 0, 1, 2 or 3 pixels on this line
  2478. switch (cPixPerScan & 3)
  2479. {
  2480. case 3:
  2481. tmp0 = *++pulSrc;
  2482. tmp1 = *++pulSrc;
  2483. tmp2 = *++pulSrc;
  2484. *++pulDst = tmp0;
  2485. *++pulDst = tmp1;
  2486. *++pulDst = tmp2;
  2487. break;
  2488. case 2:
  2489. tmp0 = *++pulSrc;
  2490. tmp1 = *++pulSrc;
  2491. *++pulDst = tmp0;
  2492. *++pulDst = tmp1;
  2493. break;
  2494. case 1:
  2495. tmp0 = *++pulSrc;
  2496. *++pulDst = tmp0;
  2497. }
  2498. #else
  2499. for(c = cPixPerScan; --c >= 0;)
  2500. {
  2501. *++pulDst = *++pulSrc;
  2502. }
  2503. #endif
  2504. }
  2505. }
  2506. }
  2507. //****************************************************************************
  2508. // FUNC: vGlintCopyBltBypassDownload24bppTo32bpp
  2509. // DESC: using the bypass mechanism we can take advantage of write-combining
  2510. // which can be quicker than using the FIFO
  2511. //****************************************************************************
  2512. VOID vGlintCopyBltBypassDownload24bppTo32bpp(
  2513. PDEV *ppdev,
  2514. SURFOBJ *psoSrc,
  2515. POINTL *pptlSrc,
  2516. RECTL *prclDst,
  2517. RECTL *prclClip,
  2518. LONG crclClip)
  2519. {
  2520. LONG xOff;
  2521. BYTE *pjSrcScan0;
  2522. LONG cjSrcDelta;
  2523. LONG xSrcOff, ySrcOff;
  2524. ULONG *pulDstScan0;
  2525. LONG culDstDelta, xDstOff;
  2526. LONG cScans, cPixPerScan, c;
  2527. BYTE *pjSrc;
  2528. BYTE *pj;
  2529. ULONG *pulDst, *puld;
  2530. GLINT_DECL;
  2531. #if DBG && 0
  2532. {
  2533. SIZEL sizlDst;
  2534. sizlDst.cx = prclClip->right - prclClip->left;
  2535. sizlDst.cy = prclClip->bottom - prclClip->top;
  2536. DISPDBG((-1, "vGlintCopyBltBypassDownload24bppTo32bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
  2537. }
  2538. #endif //DBG
  2539. pjSrcScan0 = (BYTE *)psoSrc->pvScan0;
  2540. cjSrcDelta = psoSrc->lDelta;
  2541. xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
  2542. ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
  2543. pulDstScan0 = (ULONG *)ppdev->pjScreen;
  2544. culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
  2545. xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
  2546. (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
  2547. SYNC_IF_CORE_BUSY;
  2548. for (; --crclClip >= 0; ++prclClip)
  2549. {
  2550. cScans = prclClip->bottom - prclClip->top;
  2551. cPixPerScan = prclClip->right - prclClip->left;
  2552. // calc source & destination address, -1 to allow for prefix-increment
  2553. // convert x values to 24bpp coords (but avoid multiplication by 3)
  2554. c = xSrcOff + prclClip->left;
  2555. c = c + (c << 1);
  2556. pjSrc = pjSrcScan0 + c + ((prclClip->top + ySrcOff) * cjSrcDelta);
  2557. pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left
  2558. + prclClip->top * culDstDelta;
  2559. for (; --cScans >= 0; pjSrc += cjSrcDelta, pulDst += culDstDelta)
  2560. {
  2561. // read one less pixel per scan than there actually is to avoid any possibility of
  2562. // a memory access violation (we read 4 bytes but only 3 of them might be valid)
  2563. for (pj = pjSrc, puld = pulDst, c = cPixPerScan-1; --c >= 0; pj += 3)
  2564. {
  2565. *++puld = *(ULONG *)pj & 0x00ffffff;
  2566. }
  2567. // now do the last pixel
  2568. ++puld;
  2569. *(USHORT *)puld = *(USHORT *)pj;
  2570. ((BYTE *)puld)[2] = ((BYTE *)pj)[2];
  2571. }
  2572. }
  2573. }
  2574. //****************************************************************************
  2575. // FUNC: vGlintCopyBltBypassDownload16bpp
  2576. // DESC: using the bypass mechanism we can take advantage of write-combining
  2577. // which can be quicker than using the FIFO
  2578. //****************************************************************************
  2579. VOID vGlintCopyBltBypassDownload16bpp(
  2580. PDEV *ppdev,
  2581. SURFOBJ *psoSrc,
  2582. POINTL *pptlSrc,
  2583. RECTL *prclDst,
  2584. RECTL *prclClip,
  2585. LONG crclClip)
  2586. {
  2587. LONG xOff;
  2588. ULONG *pulSrcScan0;
  2589. LONG culSrcDelta, xSrcOff, ySrcOff;
  2590. ULONG *pulDstScan0;
  2591. LONG culDstDelta, xDstOff;
  2592. LONG cScans, cPixPerScan;
  2593. ULONG *pulSrc;
  2594. ULONG *pulDst;
  2595. GLINT_DECL;
  2596. #if DBG && 0
  2597. {
  2598. SIZEL sizlDst;
  2599. sizlDst.cx = prclClip->right - prclClip->left;
  2600. sizlDst.cy = prclClip->bottom - prclClip->top;
  2601. DISPDBG((-1, "vGlintCopyBltBypassDownload16bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
  2602. }
  2603. #endif //DBG
  2604. pulSrcScan0 = (ULONG *)psoSrc->pvScan0;
  2605. culSrcDelta = psoSrc->lDelta >> 2;
  2606. xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
  2607. ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
  2608. pulDstScan0 = (ULONG *)ppdev->pjScreen;
  2609. culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
  2610. xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
  2611. (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
  2612. SYNC_IF_CORE_BUSY;
  2613. for (; --crclClip >= 0; ++prclClip)
  2614. {
  2615. cScans = prclClip->bottom - prclClip->top;
  2616. cPixPerScan = prclClip->right - prclClip->left;
  2617. pulSrc = (ULONG *)((USHORT *)pulSrcScan0 + xSrcOff + prclClip->left)
  2618. + ((prclClip->top + ySrcOff) * culSrcDelta);
  2619. pulDst = (ULONG *)((USHORT *)pulDstScan0 + xDstOff + prclClip->left)
  2620. + prclClip->top * culDstDelta;
  2621. for (; --cScans >= 0; pulSrc += culSrcDelta, pulDst += culDstDelta)
  2622. {
  2623. ULONG *pulSrcScan = pulSrc;
  2624. ULONG *pulDstScan = pulDst;
  2625. LONG cPix = cPixPerScan;
  2626. LONG cWords;
  2627. if ((UINT_PTR)pulDstScan % sizeof(ULONG))
  2628. {
  2629. // we're not on a ulong boundary so write the first pixel of the scanline
  2630. *(USHORT *)pulDstScan = *(USHORT *)pulSrcScan;
  2631. pulDstScan = (ULONG *)((USHORT *)pulDstScan + 1);
  2632. pulSrcScan = (ULONG *)((USHORT *)pulSrcScan + 1);
  2633. --cPix;
  2634. }
  2635. // write out the ulong-aligned words of the scanline
  2636. for (cWords = cPix / 2; --cWords >= 0;)
  2637. {
  2638. *pulDstScan++ = *pulSrcScan++;
  2639. }
  2640. // write any remaining pixel
  2641. if (cPix % 2)
  2642. {
  2643. *(USHORT *)pulDstScan = *(USHORT *)pulSrcScan;
  2644. }
  2645. }
  2646. }
  2647. }
  2648. //****************************************************************************
  2649. // FUNC: vGlintCopyBltBypassDownloadXlate4bpp
  2650. // DESC: using the bypass mechanism we can take advantage of write-combining
  2651. // which can be quicker than using the FIFO
  2652. // NB. supports 32bpp and 16bpp destinations. Doesn't yet support 24bpp
  2653. // destinations. No plans to add 8bpp support.
  2654. //****************************************************************************
  2655. VOID vGlintCopyBltBypassDownloadXlate4bpp(
  2656. PDEV *ppdev,
  2657. SURFOBJ *psoSrc,
  2658. POINTL *pptlSrc,
  2659. RECTL *prclDst,
  2660. RECTL *prclClip,
  2661. LONG crclClip,
  2662. XLATEOBJ *pxlo)
  2663. {
  2664. LONG xOff;
  2665. BYTE *pjSrcScan0;
  2666. LONG cjSrcDelta, xSrcOff, ySrcOff;
  2667. ULONG *pulDstScan0;
  2668. LONG culDstDelta, xDstOff;
  2669. LONG cScans, cPixPerScan, c;
  2670. ULONG cjSrcDeltaRem, cjDstDeltaRem;
  2671. ULONG *aulXlate;
  2672. BOOL bSrcLowNybble;
  2673. BYTE *pjSrc, j, *pj;
  2674. GLINT_DECL;
  2675. #if DBG && 0
  2676. {
  2677. SIZEL sizlDst;
  2678. sizlDst.cx = prclClip->right - prclClip->left;
  2679. sizlDst.cy = prclClip->bottom - prclClip->top;
  2680. DISPDBG((-1, "vGlintCopyBltBypassDownloadXlate4bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
  2681. }
  2682. #endif //DBG
  2683. pjSrcScan0 = (BYTE *)psoSrc->pvScan0;
  2684. cjSrcDelta = psoSrc->lDelta;
  2685. xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
  2686. ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
  2687. pulDstScan0 = (ULONG *)ppdev->pjScreen;
  2688. culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
  2689. xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
  2690. (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
  2691. aulXlate = pxlo->pulXlate;
  2692. SYNC_IF_CORE_BUSY;
  2693. for (; --crclClip >= 0; ++prclClip)
  2694. {
  2695. cScans = prclClip->bottom - prclClip->top;
  2696. cPixPerScan = prclClip->right - prclClip->left;
  2697. bSrcLowNybble = (xSrcOff + prclClip->left) & 1;
  2698. cjSrcDeltaRem = cjSrcDelta - (cPixPerScan / 2 + ((cPixPerScan & 1) || bSrcLowNybble));
  2699. pjSrc = -1 + pjSrcScan0 + (xSrcOff + prclClip->left) / 2
  2700. + ((prclClip->top + ySrcOff) * cjSrcDelta);
  2701. if (ppdev->cPelSize == GLINTDEPTH32)
  2702. {
  2703. ULONG *pulDst;
  2704. cjDstDeltaRem = (culDstDelta - cPixPerScan) * 4;
  2705. pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta;
  2706. if (bSrcLowNybble)
  2707. {
  2708. for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
  2709. {
  2710. j = *++pjSrc;
  2711. for (c = cPixPerScan / 2; --c >= 0;)
  2712. {
  2713. *++pulDst = aulXlate[j & 0xf];
  2714. j = *++pjSrc;
  2715. *++pulDst = aulXlate[j >> 4];
  2716. }
  2717. if (cPixPerScan & 1)
  2718. {
  2719. *++pulDst = aulXlate[j & 0xf];
  2720. }
  2721. }
  2722. }
  2723. else
  2724. {
  2725. for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
  2726. {
  2727. for (c = cPixPerScan / 2; --c >= 0;)
  2728. {
  2729. j = *++pjSrc;
  2730. *++pulDst = aulXlate[j >> 4];
  2731. *++pulDst = aulXlate[j & 0xf];
  2732. }
  2733. if (cPixPerScan & 1)
  2734. {
  2735. j = *++pjSrc;
  2736. *++pulDst = aulXlate[j >> 4];
  2737. }
  2738. }
  2739. }
  2740. }
  2741. else if (ppdev->cPelSize == GLINTDEPTH16)
  2742. {
  2743. USHORT *pusDst;
  2744. cjDstDeltaRem = (culDstDelta << 2) - (cPixPerScan << ppdev->cPelSize);
  2745. pusDst = -1 + (USHORT *)pulDstScan0 + xDstOff + prclClip->left
  2746. + prclClip->top * culDstDelta * 2;
  2747. if (bSrcLowNybble)
  2748. {
  2749. for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem)
  2750. {
  2751. j = *++pjSrc;
  2752. for (c = cPixPerScan / 2; --c >= 0;)
  2753. {
  2754. *++pusDst = (USHORT)aulXlate[j & 0xf];
  2755. j = *++pjSrc;
  2756. *++pusDst = (USHORT)aulXlate[j >> 4];
  2757. }
  2758. if (cPixPerScan & 1)
  2759. {
  2760. *++pusDst = (USHORT)aulXlate[j & 0xf];
  2761. }
  2762. }
  2763. }
  2764. else
  2765. {
  2766. for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem)
  2767. {
  2768. for (c = cPixPerScan / 2; --c >= 0;)
  2769. {
  2770. j = *++pjSrc;
  2771. *++pusDst = (USHORT)aulXlate[j >> 4];
  2772. *++pusDst = (USHORT)aulXlate[j & 0xf];
  2773. }
  2774. if (cPixPerScan & 1)
  2775. {
  2776. j = *++pjSrc;
  2777. *++pusDst = (USHORT)aulXlate[j >> 4];
  2778. }
  2779. }
  2780. }
  2781. }
  2782. }
  2783. }
  2784. #endif
  2785. //@@END_DDKSPLIT