|
|
/******************************Module*Header**********************************\
* * ******************* * * GDI SAMPLE CODE * * ******************* * * Module Name: pxrxXfer.c * * Content: Bit transfer code * * Copyright (c) 1994-1999 3Dlabs Inc. Ltd. All rights reserved. * Copyright (c) 1995-2003 Microsoft Corporation. All rights reserved. \*****************************************************************************/
#include "precomp.h"
#include "ereg.h"
#include "pxrx.h"
#if _DEBUG
static BOOL trapOnMisAlignment = TRUE;
#define TEST_DWORD_ALIGNED(ptr) \
do { \ ULONG addr = (ULONG) ptr; \ \ if( trapOnMisAlignment ) \ ASSERTDD((addr & 3) == 0, "TEST_DWORD_ALIGNED(ptr) failed!"); \ else \ if( addr & 3 ) \ DISPDBG((-1, "TEST_DWORD_ALIGNED(0x%08X) is out by %d bytes!", \ addr, addr & 3)); \ } while(0) #else
# define TEST_DWORD_ALIGNED(addr) do { ; } while(0)
#endif
/**************************************************************************\
* * VOID pxrxXfer1bpp * \**************************************************************************/ VOID pxrxXfer1bpp( PPDEV ppdev, RECTL *prcl, LONG count, ULONG fgLogicOp, ULONG bgLogicOp, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, XLATEOBJ *pxlo ) { DWORD config2D, render2D; LONG cx; LONG cy; LONG lSrcDelta; BYTE *pjSrcScan0; BYTE *pjSrc; LONG dxSrc; LONG dySrc; LONG xLeft; LONG yTop; LONG xOffset; ULONG fgColor; ULONG bgColor; RBRUSH_COLOR rbc; GLINT_DECL;
ASSERTDD(count > 0, "Can't handle zero rectangles"); ASSERTDD(fgLogicOp <= 15, "Weird fg hardware Rop"); ASSERTDD(bgLogicOp <= 15, "Weird bg hardware Rop"); ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
DISPDBG((DBGLVL, "pxrxXfer1bpp: original dstRect: (%d,%d) to (%d,%d)", prclDst->left, prclDst->top, prclDst->right, prclDst->bottom));
dxSrc = pptlSrc->x - prclDst->left; dySrc = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; DISPDBG((DBGLVL, "bitmap baseAddr 0x%x, stride %d, w %d, h %d", pjSrcScan0, lSrcDelta, psoSrc->sizlBitmap.cx, psoSrc->sizlBitmap.cy)); DISPDBG((DBGLVL, "fgColor 0x%x, bgColor 0x%x", pxlo->pulXlate[1], pxlo->pulXlate[0])); DISPDBG((DBGLVL, "fgLogicOp %d, bgLogicOp %d", fgLogicOp, bgLogicOp));
fgColor = pxlo->pulXlate[1]; bgColor = pxlo->pulXlate[0];
// we get some common operations which are really noops. we can save
// lots of time by cutting these out. As this happens a lot for masking
// operations it's worth doing.
if( ((fgLogicOp == __GLINT_LOGICOP_AND) && (fgColor == ppdev->ulWhite)) || ((fgLogicOp == __GLINT_LOGICOP_OR ) && (fgColor == 0)) || ((fgLogicOp == __GLINT_LOGICOP_XOR) && (fgColor == 0)) ) { fgLogicOp = __GLINT_LOGICOP_NOOP; }
// same for background
if( ((bgLogicOp == __GLINT_LOGICOP_AND) && (bgColor == ppdev->ulWhite)) || ((bgLogicOp == __GLINT_LOGICOP_OR ) && (bgColor == 0)) || ((bgLogicOp == __GLINT_LOGICOP_XOR) && (bgColor == 0)) ) { bgLogicOp = __GLINT_LOGICOP_NOOP; }
if( (fgLogicOp == __GLINT_LOGICOP_NOOP) && (bgLogicOp == __GLINT_LOGICOP_NOOP) ) { DISPDBG((DBGLVL, "both ops are no-op so lets quit now")); return; }
config2D = glintInfo->config2D; config2D &= ~(__CONFIG2D_LOGOP_FORE_ENABLE | __CONFIG2D_LOGOP_BACK_ENABLE | __CONFIG2D_ENABLES); config2D |= __CONFIG2D_CONSTANTSRC | __CONFIG2D_FBWRITE | __CONFIG2D_USERSCISSOR; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCBITMASK;
if( (fgLogicOp != __GLINT_LOGICOP_COPY) || (bgLogicOp != __GLINT_LOGICOP_NOOP) ) { config2D &= ~(__CONFIG2D_LOGOP_FORE_MASK | __CONFIG2D_LOGOP_BACK_MASK); config2D |= __CONFIG2D_OPAQUESPANS | __CONFIG2D_LOGOP_FORE(fgLogicOp) | __CONFIG2D_LOGOP_BACK(bgLogicOp); render2D |= __RENDER2D_SPANS; }
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 6 );
if( LogicopReadDest[fgLogicOp] || LogicopReadDest[bgLogicOp] ) { config2D |= __CONFIG2D_FBDESTREAD; SET_READ_BUFFERS; }
if( LogicOpReadSrc[fgLogicOp] ) { LOAD_FOREGROUNDCOLOUR( fgColor ); } if( LogicOpReadSrc[bgLogicOp] ) { LOAD_BACKGROUNDCOLOUR( bgColor ); }
LOAD_CONFIG2D( config2D );
while( TRUE ) { DISPDBG((DBGLVL, "mono download to rect (%d,%d) to (%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom));
yTop = prcl->top; xLeft = prcl->left; cx = prcl->right - xLeft; cy = prcl->bottom - yTop;
// pjSrc is first dword containing a bit to download. xOffset is the
// offset to that bit. i.e. the bit offset from the previous 32bit
// boundary at the left hand edge of the rectangle.
xOffset = (xLeft + dxSrc) & 0x1f; pjSrc = (BYTE*)((UINT_PTR)(pjSrcScan0 + (yTop + dySrc) * lSrcDelta + (xLeft + dxSrc) / 8 // byte aligned
) & ~3); // dword aligned
DISPDBG((DBGLVL, "pjSrc 0x%x, lSrcDelta %d", pjSrc, lSrcDelta)); DISPDBG((DBGLVL, "\txOffset %d, cx %d, cy %d", xOffset, cx, cy));
// this algorithm downloads aligned 32-bit chunks from the
// source but uses the scissor clip to define the edge of the
// rectangle.
//
{ ULONG AlignWidth, LeftEdge; AlignWidth = (xOffset + cx + 31) & ~31; LeftEdge = xLeft - xOffset;
DISPDBG((7, "AlignWidth %d", AlignWidth));
WAIT_PXRX_DMA_DWORDS( 5 ); QUEUE_PXRX_DMA_INDEX4( __GlintTagFillScissorMinXY, __GlintTagFillScissorMaxXY, __GlintTagFillRectanglePosition, __GlintTagFillRender2D );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(xLeft, 0) ); QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(LeftEdge, yTop) ); QUEUE_PXRX_DMA_DWORD( render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) ); SEND_PXRX_DMA_BATCH;
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
pxrxMonoDownloadRLE( ppdev, AlignWidth, (ULONG *) pjSrc, lSrcDelta >> 2, cy ); #else
//@@END_DDKSPLIT
pxrxMonoDownloadRaw( ppdev, AlignWidth, (ULONG *) pjSrc, lSrcDelta >> 2, cy ); //@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
}
if( --count == 0 ) { break; }
prcl++; }
// Reset the scissor maximums:
if( ppdev->cPelSize == GLINTDEPTH32 ) { WAIT_PXRX_DMA_TAGS( 1 ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF ); //@@BEGIN_DDKSPLIT
// SEND_PXRX_DMA_BATCH;
//@@END_DDKSPLIT
}
FLUSH_PXRX_PATCHED_RENDER2D(prclDst->left, prclDst->right); SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXfer1bpp returning")); }
/**************************************************************************\
* * void pxrxMonoDownloadRaw * \**************************************************************************/ void pxrxMonoDownloadRaw( PPDEV ppdev, ULONG AlignWidth, ULONG *pjSrc, LONG lSrcDelta, LONG cy ) { GLINT_DECL;
if( AlignWidth == 32 ) { LONG nSpaces = 0; ULONG bits; DISPDBG((DBGLVL, "Doing Single Word per scan download")); do { nSpaces = 10; WAIT_FREE_PXRX_DMA_DWORDS( nSpaces ); if( cy < --nSpaces ) { nSpaces = cy; } cy -= nSpaces;
QUEUE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, nSpaces ); while( --nSpaces >= 0 ) { TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_DWORD( *pjSrc ); pjSrc += lSrcDelta; } SEND_PXRX_DMA_BATCH; } while( cy > 0 ); } else { // multiple 32 bit words per scanline. convert the delta to the
// delta as we need it at the end of each line by subtracting the
// width in bytes of the data we're downloading. Note, pjSrc
// is always 1 LONG short of the end of the line because we break
// before adding on the last ULONG. Thus, we subtract sizeof(ULONG)
// from the original adjustment.
LONG nScan = AlignWidth >> 5; LONG nRemainder; ULONG bits;
DISPDBG((7, "Doing Multiple Word per scan download")); while( TRUE ) { WAIT_PXRX_DMA_DWORDS( nScan + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, nScan ); TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_BUFF( pjSrc, nScan ); SEND_PXRX_DMA_BATCH; pjSrc += lSrcDelta;
if( --cy == 0 ) { break; } } } }
/**************************************************************************\
* * VOID pxrxXfer8bpp * \**************************************************************************/ VOID pxrxXfer8bpp( PPDEV ppdev, RECTL *prcl, LONG count, ULONG logicOp, ULONG bgLogicOp, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, XLATEOBJ *pxlo ) { ULONG config2D, render2D, lutMode, pixelSize; BOOL invalidLUT = FALSE; LONG dx, dy, cy; LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff; ULONG AlignWidth, LeftEdge; BYTE* pjSrcScan0; ULONG* pjSrc; UINT_PTR startPos; LONG cPelInv; ULONG ul; LONG nRemainder; //@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
ULONG len, data, holdCount; ULONG *tagPtr; #endif
//@@END_DDKSPLIT
GLINT_DECL;
DISPDBG((DBGLVL, "pxrxXfer8bpp(): src = (%d,%d) -> (%d,%d), " "count = %d, logicOp = %d, palette id = %d", prcl->left, prcl->right, prcl->top, prcl->bottom, count, logicOp, pxlo->iUniq));
// Set up the LUT table:
if( (ppdev->PalLUTType != LUTCACHE_XLATE) || (ppdev->iPalUniq != pxlo->iUniq) ) { // Someone has hijacked the LUT so we need to invalidate it:
ppdev->PalLUTType = LUTCACHE_XLATE; ppdev->iPalUniq = pxlo->iUniq; invalidLUT = TRUE; } else { DISPDBG((DBGLVL, "pxrxXfer8bpp: reusing cached xlate")); }
WAIT_PXRX_DMA_TAGS( 1 + 1 );
lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8)); lutMode |= (ppdev->cPelSize + 2) << 8; LOAD_LUTMODE( lutMode );
if( invalidLUT ) { ULONG *pulXlate = pxlo->pulXlate; LONG cEntries = 256;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
if( ppdev->cPelSize == 0 ) { // 8bpp
WAIT_PXRX_DMA_TAGS( cEntries );
do { ul = *(pulXlate++); ul |= ul << 8; ul |= ul << 16; QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul ); } while( --cEntries ); } else if( ppdev->cPelSize == 1 ) { // 16bpp
WAIT_PXRX_DMA_TAGS( cEntries );
do { ul = *(pulXlate++); ul |= ul << 16; QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul ); } while( --cEntries ); } else { WAIT_PXRX_DMA_DWORDS( 1 + cEntries );
QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries ); QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries ); } }
config2D = __CONFIG2D_FBWRITE | __CONFIG2D_USERSCISSOR | __CONFIG2D_EXTERNALSRC | __CONFIG2D_LUTENABLE; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCDATA | __RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 6 );
if( logicOp != __GLINT_LOGICOP_COPY ) { config2D |= __CONFIG2D_LOGOP_FORE(logicOp) | __CONFIG2D_FBWRITE; render2D |= __RENDER2D_SPANS;
if( LogicopReadDest[logicOp] ) { config2D |= __CONFIG2D_FBDESTREAD; SET_READ_BUFFERS; } }
LOAD_CONFIG2D( config2D );
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor ); #endif
//@@END_DDKSPLIT
cPelInv = 2 - ppdev->cPelSize; pixelSize = (1 << 31) | // Everything before the LUT runs at 8bpp
(2 << 2) | (2 << 4) | (2 << 6) | (cPelInv << 8) | (cPelInv << 10) | (cPelInv << 12) | (cPelInv << 14); QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while( TRUE ) { DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom));
// 8bpp => 1 pixel per byte => 4 pixels per dword
// Assume source bitmap width is dword aligned
ASSERTDD((lSrcDelta & 3) == 0, "pxrxXfer8bpp: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) + (prcl->left + dx); // pointer to first pixel,
// in pixels/bytes
pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword
// aligned first pixel
if(NULL == pjSrc) { DISPDBG((ERRLVL, "ERROR: pxrxXfer8bpp return ,has pjSrc NULL")); return; } alignOff = (ULONG)(startPos & 3); // number of pixels past dword
// alignment of a scanline
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
AlignWidth = ((prcl->right - LeftEdge) + 3) & ~3; // dword aligned width
// in pixels
cy = prcl->bottom - prcl->top; // number of scanlines to do
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, " "startPos = 0x%08X, pjSrc = 0x%08X", pjSrcScan0, startPos, pjSrc)); DISPDBG((DBGLVL, "offset = %d pixels", alignOff)); DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords", LeftEdge, LeftEdge + AlignWidth, AlignWidth, AlignWidth >> 2));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0) ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff) ); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top) ); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) ); SEND_PXRX_DMA_BATCH;
AlignWidth >>= 2; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
// (start to start)
lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
// (end to start)
DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords", lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
// Do an RLE download:
tagPtr = NULL;
do { WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
nRemainder = AlignWidth; while( nRemainder-- ) { TEST_DWORD_ALIGNED( pjSrc ); data = *(pjSrc++); len = 1;
TEST_DWORD_ALIGNED( pjSrc ); while( nRemainder && (*pjSrc == data) ) { pjSrc++; len++; nRemainder--; TEST_DWORD_ALIGNED( pjSrc ); }
if( len >= 4 ) { if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount ); tagPtr = NULL; }
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount ); QUEUE_PXRX_DMA_DWORD( data ); QUEUE_PXRX_DMA_DWORD( len ); len = 0; } else { if( !tagPtr ) { QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr ); holdCount = 0; }
holdCount += len; while( len-- ) { QUEUE_PXRX_DMA_DWORD( data ); } } }
if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount ); tagPtr = NULL; } pjSrc += lTrueDelta; SEND_PXRX_DMA_BATCH; } while( --cy > 0 ); #else
//@@END_DDKSPLIT
// Do a raw download:
while( TRUE ) { DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth ); TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth ); SEND_PXRX_DMA_BATCH;
if( --cy == 0 ) { break; }
pjSrc += lSrcDeltaDW; } //@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
if( --count == 0 ) { break; }
prcl++; }
// Reset some defaults:
WAIT_PXRX_DMA_TAGS( 2 ); QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv ); if( ppdev->cPelSize == GLINTDEPTH32 ) { QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF ); }
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXfer8bpp return")); }
/**************************************************************************\
* * VOID pxrxXferImage * \**************************************************************************/ VOID pxrxXferImage( PPDEV ppdev, RECTL *prcl, LONG count, ULONG logicOp, ULONG bgLogicOp, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, XLATEOBJ *pxlo ) { DWORD config2D, render2D; LONG dx, dy, cy; LONG lSrcDelta, lTrueDelta, lSrcDeltaDW, alignOff; BYTE* pjSrcScan0; ULONG* pjSrc; UINT_PTR startPos; LONG cPel, cPelInv; ULONG cPelMask; ULONG AlignWidth, LeftEdge; LONG nRemainder; //@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
ULONG len, data, holdCount; ULONG *tagPtr; #endif
//@@END_DDKSPLIT
GLINT_DECL;
SEND_PXRX_DMA_FORCE;
ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL), "Can handle trivial xlate only"); ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat, "Source must be same colour depth as screen"); ASSERTDD(count > 0, "Can't handle zero rectangles"); ASSERTDD(logicOp <= 15, "Weird hardware Rop");
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
cPel = ppdev->cPelSize; // number of bytes per pixel = 1 << cPel
cPelInv = 2 - cPel; // number of pixels per byte = 1 << cPelInv
// (pixels -> dwords = >> cPenInv)
cPelMask = (1 << cPelInv) - 1; // mask to obtain number of pixels
// past a dword
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
DISPDBG((DBGLVL, "pxrxXferImage with logic op %d for %d rects", logicOp, count));
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE | __CONFIG2D_LOGOP_BACK_ENABLE | __CONFIG2D_ENABLES); config2D |= __CONFIG2D_FBWRITE | __CONFIG2D_USERSCISSOR; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCDATA | __RENDER2D_SPANS; SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 5 );
if( logicOp != __GLINT_LOGICOP_COPY ) { config2D &= ~__CONFIG2D_LOGOP_FORE_MASK; config2D |= __CONFIG2D_LOGOP_FORE(logicOp) | __CONFIG2D_EXTERNALSRC;
if( LogicopReadDest[logicOp] ) { config2D |= __CONFIG2D_FBDESTREAD; SET_READ_BUFFERS; } }
LOAD_CONFIG2D( config2D );
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor ); #endif
//@@END_DDKSPLIT
while( TRUE ) { cy = prcl->bottom - prcl->top;
DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom));
ASSERTDD((lSrcDelta & 3) == 0, "pxrxXferImage: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
// pjSrc points to the first pixel to copy
// lTrueDelta is the additional amount to add onto the pjSrc pointer
// when we get to the end of the scanline.
startPos = ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta) + ((prcl->left + dx) << cPel); alignOff = ((ULONG) (startPos & 3)) >> cPel; // number of pixels past
// dword aligned start
pjSrc = (ULONG *) (startPos & ~3); // dword aligned pointer to 1st pixel
if(NULL == pjSrc) { DISPDBG((ERRLVL, "ERROR: " "pxrxXferImage return because of pjSrc NULL")); return; } // dword aligned left edge in pixels
LeftEdge = prcl->left - alignOff; // dword aligned width in pixels
AlignWidth = (prcl->right - LeftEdge + cPelMask) & ~cPelMask; DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels", LeftEdge, LeftEdge + AlignWidth, AlignWidth)); DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, " "pjSrc = 0x%08X, alignOff = %d pixels", pjSrcScan0, pjSrc, alignOff));
ASSERTDD( ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta) + ((LeftEdge + dx) << cPel) == (UINT_PTR) pjSrc, "pxrxXferImage: " "Aligned left edge does not match aligned pjSrc!" );
WAIT_PXRX_DMA_DWORDS( 5 );
QUEUE_PXRX_DMA_INDEX4( __GlintTagFillScissorMinXY, __GlintTagFillScissorMaxXY, __GlintTagFillRectanglePosition, __GlintTagFillRender2D ); QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->left, 0) ); QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->right, 0x7fff) ); QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(LeftEdge, prcl->top) ); QUEUE_PXRX_DMA_DWORD( render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) ); SEND_PXRX_DMA_BATCH;
AlignWidth >>= cPelInv; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
//(start to start)
lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
// (end to start)
DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords", lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
// Do an RLE download:
tagPtr = NULL;
do { WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
nRemainder = AlignWidth; while( nRemainder-- ) { TEST_DWORD_ALIGNED( pjSrc ); data = *(pjSrc++); len = 1;
TEST_DWORD_ALIGNED( pjSrc ); while( nRemainder && (*pjSrc == data) ) { pjSrc++; len++; nRemainder--; TEST_DWORD_ALIGNED( pjSrc ); }
if( len >= 4 ) { if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount ); tagPtr = NULL; }
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount ); QUEUE_PXRX_DMA_DWORD( data ); QUEUE_PXRX_DMA_DWORD( len ); len = 0; } else { if( !tagPtr ) { QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr ); holdCount = 0; }
holdCount += len; while( len-- ) { QUEUE_PXRX_DMA_DWORD( data ); } } }
if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount ); tagPtr = NULL; } pjSrc += lTrueDelta; // SEND_PXRX_DMA_BATCH;
} while( --cy > 0 ); #else
//@@END_DDKSPLIT
// Do a raw download:
while( TRUE ) { DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth ); TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth ); // SEND_PXRX_DMA_BATCH;
if( --cy == 0 ) { break; } pjSrc += lSrcDeltaDW; } //@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
if( --count == 0 ) { break; }
prcl++; }
// Reset the scissor maximums:
if( ppdev->cPelSize == GLINTDEPTH32 ) { WAIT_PXRX_DMA_TAGS( 1 ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF ); // SEND_PXRX_DMA_BATCH;
} FLUSH_PXRX_PATCHED_RENDER2D(prclDst->left, prclDst->right); SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXferImage return")); }
/**************************************************************************\
* * VOID pxrxXfer4bpp * \**************************************************************************/ VOID pxrxXfer4bpp( PPDEV ppdev, RECTL *prcl, LONG count, ULONG logicOp, ULONG bgLogicOp, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, XLATEOBJ *pxlo ) { ULONG config2D, render2D, lutMode, pixelSize; BOOL invalidLUT = FALSE; LONG dx, dy; LONG cy; BYTE* pjSrcScan0; ULONG* pjSrc; LONG cPelInv; ULONG ul; ULONG AlignWidth, LeftEdge; UINT_PTR startPos; LONG nRemainder; LONG lSrcDelta, lSrcDeltaDW; LONG alignOff; GLINT_DECL;
DISPDBG((DBGLVL, "pxrxXfer4bpp(): src = (%d,%d) -> (%d,%d), count = %d, " "logicOp = %d, palette id = %d", prcl->left, prcl->right, prcl->top, prcl->bottom, count, logicOp, pxlo->iUniq));
// Set up the LUT table:
if( (ppdev->PalLUTType != LUTCACHE_XLATE) || (ppdev->iPalUniq != pxlo->iUniq) ) { // Someone has hijacked the LUT so we need to invalidate it:
ppdev->PalLUTType = LUTCACHE_XLATE; ppdev->iPalUniq = pxlo->iUniq; invalidLUT = TRUE; } else { DISPDBG((DBGLVL, "pxrxXfer4bpp: reusing cached xlate")); }
WAIT_PXRX_DMA_TAGS( 1 + 1 + 16 );
lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8)); lutMode |= (ppdev->cPelSize + 2) << 8; LOAD_LUTMODE( lutMode );
if( invalidLUT ) { ULONG *pulXlate = pxlo->pulXlate; LONG cEntries = 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
if( ppdev->cPelSize == 0 ) // 8bpp
{ do { ul = *(pulXlate++); ul |= ul << 8; ul |= ul << 16; QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul ); } while( --cEntries ); } else if( ppdev->cPelSize == 1 ) // 16bpp
{ do { ul = *(pulXlate++); ul |= ul << 16; QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul ); } while( --cEntries ); } else { QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries ); QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries ); } }
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE | __CONFIG2D_LOGOP_BACK_ENABLE | __CONFIG2D_ENABLES); config2D |= __CONFIG2D_FBWRITE | __CONFIG2D_USERSCISSOR; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCDATA | __RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 6 );
if( logicOp != __GLINT_LOGICOP_COPY ) { config2D &= ~(__CONFIG2D_LOGOP_FORE_MASK | __CONFIG2D_LOGOP_BACK_MASK); config2D |= __CONFIG2D_LOGOP_FORE(logicOp) | __CONFIG2D_FBWRITE; render2D |= __RENDER2D_SPANS;
if( LogicopReadDest[logicOp] ) { config2D |= __CONFIG2D_FBDESTREAD; SET_READ_BUFFERS; }
if( LogicOpReadSrc[logicOp] ) { config2D |= __CONFIG2D_EXTERNALSRC | __CONFIG2D_LUTENABLE; } } else { config2D |= __CONFIG2D_EXTERNALSRC | __CONFIG2D_LUTENABLE; }
LOAD_CONFIG2D( config2D );
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor ); cPelInv = 2 - ppdev->cPelSize; // Everything before the LUT runs at 8bpp
pixelSize = (1 << 31) | (2 << 2) | (2 << 4) | (2 << 6) | (2 << 16) | (cPelInv << 8) | (cPelInv << 10) | (cPelInv << 12) | (cPelInv << 14); QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
// cPel = ppdev->cPelSize;
// cPelMask = (1 << cPelInv) - 1;
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while( TRUE ) { DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom));
// 4bpp => 2 pixels per byte => 8 pixels per dword
// Assume source bitmap width is dword aligned
ASSERTDD( (lSrcDelta & 3) == 0, "pxrxXfer4bpp: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
// pointer to first pixel, in bytes (32/64 bits long)
startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) + ((prcl->left + dx) >> 1); pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword
// aligned first pixel
if(NULL == pjSrc) { DISPDBG((ERRLVL, "ERROR: " "pxrxXfer4bpp return because of pjSrc NULL")); return; }
// pointer to first pixel, in pixels (33/65 bits long!)
startPos = (( ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) << 1) + (prcl->left + dx); alignOff = (ULONG)(startPos & 7); // number of pixels past dword
// alignment of a scanline
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
// dword aligned width in pixels
AlignWidth = ((prcl->right - LeftEdge) + 7) & ~7; cy = prcl->bottom - prcl->top; // number of scanlines to do
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, startPos = 0x%08X (>>1), " "pjSrc = 0x%08X", pjSrcScan0, startPos >> 1, pjSrc)); DISPDBG((DBGLVL, "offset = %d pixels", alignOff)); DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords", LeftEdge, LeftEdge + AlignWidth, AlignWidth, AlignWidth >> 3));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0) ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff) ); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top) ); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) ); SEND_PXRX_DMA_BATCH;
AlignWidth >>= 3; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset in dwords
DISPDBG((DBGLVL, "Delta = %d pixels = %d dwords", lSrcDelta << 1, lSrcDeltaDW));
// pjSrc = dword aligned pointer to first
// dword of first scanline
// AlignWidth = number of dwords per scanline
// lTrueDelta = dword offset between first dwords
// of consecutive scanlines
// cy = number of scanlines
while( TRUE ) { nRemainder = AlignWidth;
DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagPacked4Pixels, AlignWidth ); TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
if( --cy == 0 ) { break; } pjSrc += lSrcDeltaDW; SEND_PXRX_DMA_BATCH; }
if( --count == 0 ) { break; }
prcl++; }
// Reset some defaults:
WAIT_PXRX_DMA_TAGS( 2 ); QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv ); if( ppdev->cPelSize == GLINTDEPTH32 ) { QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF ); }
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXfer4bpp return")); }
/**************************************************************************\
* * VOID pxrxCopyXfer24bpp * \**************************************************************************/
VOID pxrxCopyXfer24bpp( PPDEV ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prcl, LONG count ) { ULONG config2D, render2D, pixelSize; LONG dx, dy, cy, LeftEdge; LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff; UINT_PTR startPos; BYTE* pjSrcScan0; ULONG* pjSrc; LONG cPelInv; ULONG ul, nRemainder; ULONG padLeft, padLeftDW, padRight, padRightDW, dataWidth; ULONG AlignWidth, AlignWidthDW, AlignExtra; GLINT_DECL;
DISPDBG((DBGLVL, "pxrxCopyXfer24bpp(): " "src = (%d,%d) -> (%d,%d), count = %d", prcl->left, prcl->right, prcl->top, prcl->bottom, count));
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE | __CONFIG2D_LOGOP_BACK_ENABLE | __CONFIG2D_ENABLES); config2D |= __CONFIG2D_FBWRITE | __CONFIG2D_EXTERNALSRC | __CONFIG2D_USERSCISSOR; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCDATA | __RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 3 );
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor ); QUEUE_PXRX_DMA_TAG( __GlintTagDownloadGlyphWidth, 3 ); LOAD_CONFIG2D( config2D );
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while( TRUE ) { DISPDBG((DBGLVL, "download to rect " "(%d,%d -> %d,%d) + (%d, %d) = (%d x %d)", prcl->left, prcl->top, prcl->right, prcl->bottom, dx, dy, prcl->right - prcl->left, prcl->bottom - prcl->top));
// 24bpp => 1 pixel per 3 bytes => 4 pixel per 3 dwords
// Assume source bitmap width is dword aligned
ASSERTDD( (lSrcDelta & 3) == 0, "pxrxCopyXfer24bpp: " "SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!" ); ASSERTDD( (((UINT_PTR) pjSrcScan0) & 3) == 0, "pxrxCopyXfer24bpp: " "SOURCE BITMAP START LOCATION IS NOT DWORD ALIGNED!!!" );
cy = prcl->bottom - prcl->top; // number of scanlines to do
startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) + ((prcl->left + dx) * 3); // pointer to first pixel of first
// scanline, in bytes
alignOff = (ULONG)(startPos & 3); // number of bytes past dword
// alignment to first pixel
pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword aligned
// first pixel
if(NULL == pjSrc) { DISPDBG((ERRLVL, "ERROR: " "pxrxCopyXfer24bpp return because of pjSrc NULL")); return; } padLeft = (4 - alignOff) % 4; // number of pixels to add to regain
// dword alignment on left edge
padLeftDW = (padLeft * 3) / 4; // number of dwords to add
// on the left edge
LeftEdge = prcl->left - padLeft;
// dword aligned width in pixels (= 4 pixel aligned = 3 dword aligned!)
AlignWidth = (prcl->right - LeftEdge + 3) & ~3; // number of pixels overhang on the right
padRight = (LeftEdge + AlignWidth) - prcl->right; // number of dwords to add on the right edge
padRightDW = (padRight * 3) / 4;
AlignWidthDW = (AlignWidth * 3) / 4; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset
// in dwords
// the amount of AlignWidth which is actually src bitmap
dataWidth = AlignWidthDW - padLeftDW - padRightDW;
DISPDBG((DBGLVL, "startPos = 0x%08X, alignOff = %d, " "pjSrc = 0x%08X, lSrcDeltaDW = %d", startPos, alignOff, pjSrc, lSrcDeltaDW)); DISPDBG((DBGLVL, "padLeft = %d pixels = %d dwords, LeftEdge = %d", padLeft, padLeftDW, LeftEdge)); DISPDBG((DBGLVL, "AlignWidth = %d pixels = %d dwords", AlignWidth, AlignWidthDW)); DISPDBG((DBGLVL, "padRight = %d pixels = %d dwords", padRight, padRightDW));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0)); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff)); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top)); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) );
while( cy-- ) { DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
if( padLeftDW ) { QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); } if( padLeftDW == 2 ) { QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); }
QUEUE_PXRX_DMA_BUFF( pjSrc, dataWidth );
if( padRightDW ) { QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); } if( padRightDW == 2 ) { QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); }
SEND_PXRX_DMA_BATCH;
pjSrc += lSrcDeltaDW; } //@@BEGIN_DDKSPLIT
/*/
alignOff = (prcl->left + dx + 3) & ~3; // number of pixels past dword alignment of first pixel of a scanline
pjSrc = (ULONG *) (startPos - (alignOff * 3)); // dword pointer to dword aligned first pixel
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
AlignWidth = ((((prcl->right - LeftEdge) * 3) + 3) & ~3) / 3; // dword aligned width in pixels (IS NOT = 4 pixel aligned = 3 dword aligned!)
AlignExtra = AlignWidth - (prcl->right - LeftEdge); // extra pixels beyond the genuine width (which might overstomp a page boundary)
if( AlignExtra ) cy--;
DISPDBG((7, "pjSrcScan0 = 0x%08X, startPos = 0x%08X, pjSrc = 0x%08X", pjSrcScan0, startPos, pjSrc)); DISPDBG((7, "offset = %d pixels", alignOff)); DISPDBG((7, "Aligned rect = (%d -> %d) => %d pixels", LeftEdge, LeftEdge + AlignWidth, AlignWidth)); DISPDBG((7, "Rendering %d scanlines", cy));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0) ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff) ); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top) ); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) );
AlignWidthDW = (AlignWidth * 3) / 4; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset in dwords
DISPDBG((7, "Delta = %d bytes = %d dwords (%d dwords wide)", lSrcDelta, lSrcDeltaDW, AlignWidthDW));
while( cy-- ) { DISPDBG((9, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW ); TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidthDW ); SEND_PXRX_DMA_BATCH;
pjSrc += lSrcDeltaDW; }
if( AlignExtra ) { ULONG dataWidth; ULONG dataExtra;
dataWidth = ((((prcl->right - LeftEdge) * 3) + 3) & ~3) / 4; // dword aligned width in dwords, 1 dword aligned
dataExtra = AlignWidthDW - dataWidth; // extra dwords past end of image
DISPDBG((7, "Last scanline: %d + %d = %d pixels = %d + %d = %d dwords", prcl->right - LeftEdge, AlignExtra, AlignWidth, dataWidth, dataExtra, AlignWidthDW)); ASSERTDD( (dataWidth + dataExtra) == AlignWidthDW, "pxrxCopyXfer24bpp: Last scanline does not add up!" );
WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 5 );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->bottom - 1) ); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(1) );
TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW ); QUEUE_PXRX_DMA_BUFF( pjSrc, dataWidth ); // Send the partial scanline
while( dataExtra-- ) QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); // Pad out to flush the data
// Resend download target to flush the remaining partial pixels ???
} /**/ //@@END_DDKSPLIT
if( --count == 0 ) { break; }
prcl++; }
// Reset the scissor maximums:
if( ppdev->cPelSize == GLINTDEPTH32 ) { WAIT_PXRX_DMA_TAGS( 1 ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF ); }
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxCopyXfer24bpp return")); }
/**************************************************************************\
* * VOID pxrxCopyXfer8bppLge * \**************************************************************************/ VOID pxrxCopyXfer8bppLge( PPDEV ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prcl, LONG count, XLATEOBJ *pxlo ) { ULONG config2D, render2D, lutMode, pixelSize; BOOL invalidLUT = FALSE; LONG dx, dy, cy; LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff; ULONG AlignWidth, LeftEdge; BYTE* pjSrcScan0; ULONG* pjSrc; UINT_PTR startPos; LONG cPelInv; ULONG ul, i; LONG nRemainder; //@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
ULONG len, data, holdCount; #endif
//@@END_DDKSPLIT
ULONG *tagPtr; ULONG *pulXlate = pxlo->pulXlate; GLINT_DECL;
DISPDBG((DBGLVL, "pxrxCopyXfer8bpp(): src = (%d,%d) -> (%d,%d), " "count = %d, palette id = %d", prcl->left, prcl->right, prcl->top, prcl->bottom, count, pxlo->iUniq));
SET_WRITE_BUFFERS;
if( (count == 1) && ((cy = (prcl->bottom - prcl->top)) == 1) ) { ULONG width = prcl->right - prcl->left, extra; BYTE *srcPtr;
config2D = __CONFIG2D_FBWRITE | __CONFIG2D_EXTERNALSRC; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCDATA | __RENDER2D_SPANS;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) + (prcl->left + dx); srcPtr = (BYTE *) startPos;
WAIT_PXRX_DMA_DWORDS( 7 + width );
LOAD_CONFIG2D( config2D );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(prcl->left, prcl->top) ); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(width) | __RENDER2D_HEIGHT(1) );
if( ppdev->cPelSize == 0 ) // 8bpp
{ extra = width & 3; width >>= 2;
if( extra ) { QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width + 1 ); QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width + 1 ); } else { QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width ); QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width ); }
DISPDBG((DBGLVL, "width was %d, is now %d + %d", prcl->right - prcl->left, width, extra));
for( i = 0; i < width; i++, srcPtr += 4 ) { *(tagPtr++) = (pulXlate[srcPtr[3]] << 24) | (pulXlate[srcPtr[2]] << 16) | (pulXlate[srcPtr[1]] << 8) | pulXlate[srcPtr[0]]; }
if( extra == 1 ) { *(tagPtr++) = pulXlate[srcPtr[0]]; } else if( extra == 2 ) { *(tagPtr++) = (pulXlate[srcPtr[1]] << 8) | pulXlate[srcPtr[0]]; } else if (extra == 3) { *(tagPtr++) = (pulXlate[srcPtr[2]] << 16) | (pulXlate[srcPtr[1]] << 8) | pulXlate[srcPtr[0]]; } } else if( ppdev->cPelSize == 1 ) // 16bpp
{ extra = width & 1; width >>= 1;
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width + extra ); QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width + extra );
DISPDBG((DBGLVL, "width was %d, is now %d + %d", prcl->right - prcl->left, width, extra));
for( i = 0; i < width; i++, srcPtr += 2 ) { *(tagPtr++) = (pulXlate[srcPtr[1]] << 16) | pulXlate[srcPtr[0]]; }
if( extra ) { *(tagPtr++) = pulXlate[srcPtr[0]]; } } else { QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width ); QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width );
DISPDBG((DBGLVL, "width was %d, is now %d + %d", prcl->right - prcl->left, width, 0));
for( i = 0; i < width; i++ ) { *(tagPtr++) = pulXlate[*(srcPtr++)]; } }
SEND_PXRX_DMA_BATCH;
return; }
// Set up the LUT table:
if( (ppdev->PalLUTType != LUTCACHE_XLATE) || (ppdev->iPalUniq != pxlo->iUniq) ) { // Someone has hijacked the LUT so we need to invalidate it:
ppdev->PalLUTType = LUTCACHE_XLATE; ppdev->iPalUniq = pxlo->iUniq; invalidLUT = TRUE; } else { DISPDBG((DBGLVL, "pxrxCopyXfer8bpp: reusing cached xlate")); }
WAIT_PXRX_DMA_TAGS( 1 + 1 );
lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8)); lutMode |= (ppdev->cPelSize + 2) << 8; LOAD_LUTMODE( lutMode );
if( invalidLUT ) { LONG cEntries = 256; pulXlate = pxlo->pulXlate;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
if( ppdev->cPelSize == 0 ) // 8bpp
{ WAIT_PXRX_DMA_TAGS( cEntries );
do { ul = *(pulXlate++); ul |= ul << 8; ul |= ul << 16; QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul ); } while( --cEntries ); } else if( ppdev->cPelSize == 1 ) // 16bpp
{ WAIT_PXRX_DMA_TAGS( cEntries );
do { ul = *(pulXlate++); ul |= ul << 16; QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul ); } while( --cEntries ); } else { WAIT_PXRX_DMA_DWORDS( 1 + cEntries );
QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries ); QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries ); } }
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE | __CONFIG2D_LOGOP_BACK_ENABLE | __CONFIG2D_ENABLES); config2D |= __CONFIG2D_FBWRITE | __CONFIG2D_USERSCISSOR | __CONFIG2D_EXTERNALSRC | __CONFIG2D_LUTENABLE; render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCDATA | __RENDER2D_SPANS;
WAIT_PXRX_DMA_TAGS( 3 );
LOAD_CONFIG2D( config2D );
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor ); #endif
//@@END_DDKSPLIT
cPelInv = 2 - ppdev->cPelSize; // Everything before the LUT runs at 8bpp
pixelSize = (1 << 31) | (2 << 2) | (2 << 4) | (2 << 6) | (cPelInv << 8) | (cPelInv << 10) | (cPelInv << 12) | (cPelInv << 14); QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while( TRUE ) { DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom));
// 8bpp => 1 pixel per byte => 4 pixels per dword
// Assume source bitmap width is dword aligned
ASSERTDD( (lSrcDelta & 3) == 0, "pxrxCopyXfer8bpp: " "SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!" );
// pointer to first pixel, in pixels/bytes
startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) + (prcl->left + dx);
// dword pointer to dword aligned first pixel
pjSrc = (ULONG *) (startPos & ~3); if(NULL == pjSrc) { DISPDBG((ERRLVL, "ERROR: pxrxCopyXfer8bppLge " "return because of pjSrc NULL")); return; } alignOff = (ULONG)(startPos & 3); // number of pixels past dword
// alignment of a scanline
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
AlignWidth = ((prcl->right - LeftEdge) + 3) & ~3; // dword aligned width
// in pixels
cy = prcl->bottom - prcl->top; // number of scanlines to do
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, startPos = 0x%08X, " "pjSrc = 0x%08X", pjSrcScan0, startPos, pjSrc)); DISPDBG((DBGLVL, "offset = %d pixels", alignOff)); DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords", LeftEdge, LeftEdge + AlignWidth, AlignWidth, AlignWidth >> 2));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0) ); QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff) ); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top) ); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) ); SEND_PXRX_DMA_BATCH;
AlignWidth >>= 2; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
// (start to start)
lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
// (end to start)
DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords", lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
// Do an RLE download:
tagPtr = NULL;
do { WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
nRemainder = AlignWidth; while( nRemainder-- ) { TEST_DWORD_ALIGNED( pjSrc ); data = *(pjSrc++); len = 1;
TEST_DWORD_ALIGNED( pjSrc ); while( nRemainder && (*pjSrc == data) ) { pjSrc++; len++; nRemainder--; TEST_DWORD_ALIGNED( pjSrc ); }
if( len >= 4 ) { if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount ); tagPtr = NULL; }
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount ); QUEUE_PXRX_DMA_DWORD( data ); QUEUE_PXRX_DMA_DWORD( len ); len = 0; } else { if( !tagPtr ) { QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr ); holdCount = 0; }
holdCount += len; while( len-- ) { QUEUE_PXRX_DMA_DWORD( data ); } } }
if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount ); tagPtr = NULL; } pjSrc += lTrueDelta; SEND_PXRX_DMA_BATCH; } while( --cy > 0 ); #else
//@@END_DDKSPLIT
// Do a raw download:
while( TRUE ) { DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 ); QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth ); TEST_DWORD_ALIGNED( pjSrc ); QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth ); SEND_PXRX_DMA_BATCH;
if( --cy == 0 ) { break; }
pjSrc += lSrcDeltaDW; } //@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
if( --count == 0 ) { break; }
prcl++; }
// Reset some defaults:
WAIT_PXRX_DMA_TAGS( 2 ); QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv ); if( ppdev->cPelSize == GLINTDEPTH32 ) QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxCopyXfer8bpp return")); }
//****************************************************************************
// FUNC: pxrxMemUpload
// ARGS: ppdev (I) - pointer to the physical device object
// crcl (I) - number of destination clipping rectangles
// prcl (I) - array of destination clipping rectangles
// psoDst (I) - destination surface
// pptlSrc (I) - offset into source surface
// prclDst (I) - unclipped destination rectangle
// RETN: void
//****************************************************************************
VOID pxrxMemUpload( PPDEV ppdev, LONG crcl, RECTL *prcl, SURFOBJ *psoDst, POINTL *pptlSrc, RECTL *prclDst) { BYTE *pDst, *pSrc; LONG dwScanLineSize, cySrc, lSrcOff, lSrcStride; GLINT_DECL;
// Make sure we're not performing other operations on the fb areas we want
SYNC_WITH_GLINT; ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat, "Dest must be same colour depth as screen"); ASSERTDD(crcl > 0, "Can't handle zero rectangles");
for(; --crcl >= 0; ++prcl) { // This gives an offset for offscreen DIBs (zero for primary rectangles)
lSrcOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) + (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
// Determine stride on wheter we are blitting from the
// primary or from an offscreen DIB
if (( ppdev->DstPixelOrigin == 0 ) && (ppdev->xyOffsetDst == 0) ) { lSrcStride = ppdev->lDelta; } else { lSrcStride = ppdev->DstPixelDelta * ppdev->cjPelSize; } // pSrc must point to mem mapped primary
pSrc = (BYTE *)ppdev->pjScreen + (lSrcOff * ppdev->cjPelSize) + ((LONG)pptlSrc->x * ppdev->cjPelSize) + ((LONG)pptlSrc->y * lSrcStride); // pDst must point to the sysmem SURFOBJ
pDst = (BYTE *)psoDst->pvScan0 + ((LONG)prcl->left * ppdev->cjPelSize) + ((LONG)prcl->top * (LONG)psoDst->lDelta);
// dwScanLineSize must have the right size to transfer in bytes
dwScanLineSize = ((LONG)prcl->right - (LONG)prcl->left) * ppdev->cjPelSize;
// Number of scan lines to transfer
cySrc = prcl->bottom - prcl->top;
// Do the copy
while (--cySrc >= 0) { // memcpy(dst, src, size)
memcpy(pDst, pSrc, dwScanLineSize); pDst += psoDst->lDelta; // add stride
pSrc += lSrcStride; // add stride
} }
} // pxrxMemUpload
//****************************************************************************
// FUNC: pxrxFifoUpload
// ARGS: ppdev (I) - pointer to the physical device object
// crcl (I) - number of destination clipping rectangles
// prcl (I) - array of destination clipping rectangles
// psoDst (I) - destination surface
// pptlSrc (I) - offset into source surface
// prclDst (I) - unclipped destination rectangle
// RETN: void
//----------------------------------------------------------------------------
// upload from on-chip source into host memory surface. Upload in spans
// (64-bit aligned) to minimise messages through the core and entries in the
// host out fifo.
//****************************************************************************
VOID pxrxFifoUpload( PPDEV ppdev, LONG crcl, RECTL *prcl, SURFOBJ *psoDst, POINTL *pptlSrc, RECTL *prclDst) { LONG xDomSrc, xSubSrc, yStartSrc, cxSrc, cySrc; LONG culPerSrcScan; LONG culDstDelta; BOOL bRemPerSrcScan; ULONG *pulDst, *pulDstScan; ULONG leftMask, rightMask; LONG cul, ul; LONG cFifoSpaces; __GlintFilterModeFmat FilterMode; GLINT_DECL;
WAIT_PXRX_DMA_TAGS(1); QUEUE_PXRX_DMA_TAG( __GlintTagFBDestReadMode, (glintInfo->fbDestMode | 0x103)); SEND_PXRX_DMA_FORCE;
//@@BEGIN_DDKSPLIT
#if USE_RLE_UPLOADS
// NB. using cxSrc >= 16 is slightly slower overall. These tests were empirically developed
// from WB99 BG & HE benchmarks
cxSrc = prcl->right - prcl->left; if(cxSrc >= 32 && (cxSrc < 80 || (cxSrc >= 128 && cxSrc < 256) || cxSrc == ppdev->cxScreen)) { pxrxRLEFifoUpload(ppdev, crcl, prcl, psoDst, pptlSrc, prclDst); return; }
#endif //USE_RLE_UPLOADS
//@@END_DDKSPLIT
DISPDBG((DBGLVL, "pxrxFifoUpload: prcl = (%d, %d -> %d, %d), " "prclDst = (%d, %d -> %d, %d), ptlSrc(%d, %d), count = %d", prcl->left, prcl->top, prcl->right, prcl->bottom, prclDst->left, prclDst->top, prclDst->right, prclDst->bottom, pptlSrc->x, pptlSrc->y, crcl));
DISPDBG((DBGLVL, "pxrxFifoUpload: psoDst: cx = %d, cy = %d, " "lDelta = %d, pvScan0=%P)", psoDst->sizlBitmap.cx, psoDst->sizlBitmap.cy, psoDst->lDelta, psoDst->pvScan0)); DISPDBG((DBGLVL, "pxrxFifoUpload: xyOffsetDst = (%d, %d), " "xyOffsetSrc = (%d, %d)", ppdev->xyOffsetDst & 0xFFFF, ppdev->xyOffsetDst >> 16, ppdev->xyOffsetSrc & 0xFFFF, ppdev->xyOffsetSrc >> 16));
ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat, "Dest must be same colour depth as screen"); ASSERTDD(crcl > 0, "Can't handle zero rectangles");
WAIT_PXRX_DMA_TAGS(5);
LOAD_CONFIG2D(__CONFIG2D_FBDESTREAD); SET_READ_BUFFERS;
// enable filter mode so we can get Sync
// and color messages on the output FIFO
*(DWORD *)(&FilterMode) = 0; FilterMode.Synchronization = __GLINT_FILTER_TAG; FilterMode.Color = __GLINT_FILTER_DATA; QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, *(DWORD *)(&FilterMode));
for(; --crcl >= 0; ++prcl) { DISPDBG((DBGLVL, "pxrxFifoUpload: dest prcl(%xh,%xh..%xh,%xh)", prcl->left, prcl->top, prcl->right, prcl->bottom));
// calculate pixel-aligned source
xDomSrc = pptlSrc->x + prcl->left - prclDst->left; xSubSrc = pptlSrc->x + prcl->right - prclDst->left; yStartSrc = pptlSrc->y + prcl->top - prclDst->top; cySrc = prcl->bottom - prcl->top;
DISPDBG((DBGLVL, "pxrxFifoUpload: src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc, xSubSrc, yStartSrc + cySrc));
// will upload ulongs aligned to ulongs
if (ppdev->cPelSize == GLINTDEPTH32) { cxSrc = xSubSrc - xDomSrc; culPerSrcScan = cxSrc; leftMask = 0xFFFFFFFF; rightMask = 0xFFFFFFFF; } else { if (ppdev->cPelSize == GLINTDEPTH16) { ULONG cPixFromUlongBoundary = prcl->left & 1;
xDomSrc -= cPixFromUlongBoundary; cxSrc = xSubSrc - xDomSrc; culPerSrcScan = (xSubSrc - xDomSrc + 1) >> 1;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 4); rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 1) << 4);
} else { ULONG cPixFromUlongBoundary = prcl->left & 3;
xDomSrc -= cPixFromUlongBoundary; cxSrc = xSubSrc - xDomSrc; culPerSrcScan = (xSubSrc - xDomSrc + 3) >> 2;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 3); rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 3) << 3);
} // We just want a single mask if the area to upload is less
// than one word wide.
if (culPerSrcScan == 1) { leftMask &= rightMask; } }
// uploading 64 bit aligned source
bRemPerSrcScan = culPerSrcScan & 1;
// Work out where the destination data goes to
culDstDelta = psoDst->lDelta >> 2; pulDst = ((ULONG *)psoDst->pvScan0) + (prcl->left >> (2 - ppdev->cPelSize)) + culDstDelta * prcl->top;
DISPDBG((DBGLVL, "pxrxFifoUpload: uploading aligned " "src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc, xDomSrc + cxSrc, yStartSrc + cySrc));
// Render the rectangle
WAIT_PXRX_DMA_TAGS(2); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(xDomSrc, yStartSrc)); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, __RENDER2D_OP_NORMAL | __RENDER2D_SPANS | __RENDER2D_INCY | __RENDER2D_INCX | __RENDER2D_WIDTH(cxSrc) | __RENDER2D_HEIGHT(cySrc)); SEND_PXRX_DMA_FORCE; // If the start and end masks are 0xffffffff, we can just upload
// the words and put them directly into the destination. Otherwise,
// or the first and last word on any scanline we have to mask
// off any pixels that are outside the render area. We know the
// glint will have 0 in the undesired right hand edge pixels, as
// these were not in the render area. We dont know anything about
// the destination though.
if ((leftMask == 0xFFFFFFFF) && (rightMask == 0xFFFFFFFF)) { DISPDBG((DBGLVL, "pxrxFifoUpload: no edge masks")); while (--cySrc >= 0) { pulDstScan = pulDst; pulDst += culDstDelta;
DISPDBG((DBGLVL, "pxrxFifoUpload: uploading scan of %xh " "ulongs to %p (Remainder %xh)", culPerSrcScan, pulDstScan, bRemPerSrcScan));
cul = culPerSrcScan; while(cul) { WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); if (cFifoSpaces > cul) { cFifoSpaces = cul; }
cul -= cFifoSpaces; while (--cFifoSpaces >= 0) { READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: read %08.8xh from " "output FIFO", ul)); *pulDstScan++ = ul; } } if(bRemPerSrcScan) { WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder %08.8xh " "from output FIFO", ul)); } } } else if(culPerSrcScan == 1) { DISPDBG((DBGLVL, "pxrxFifoUpload: single ulong per scan"));
while (--cySrc >= 0) { WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: " "read %08.8xh from output FIFO", ul));
// leftMask contains both masks in this case
*pulDst = (*pulDst & ~leftMask) | (ul & leftMask);
ASSERTDD(bRemPerSrcScan, "one word per scan upload should " "always leave a remainder"); WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder %08.8xh " "from output FIFO", ul)); pulDst += culDstDelta; } } else { DISPDBG((DBGLVL, "pxrxFifoUpload: scan with left & right edge " "masks: %08.8x .. %08.8x", leftMask, rightMask));
while (--cySrc >= 0) { pulDstScan = pulDst; pulDst += culDstDelta;
DISPDBG((DBGLVL, "pxrxFifoUpload: uploading scan of %xh " "ulongs to %p", culPerSrcScan, pulDstScan));
// get first ulong
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); --cFifoSpaces; READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: " "read %08.8xh from output FIFO", ul)); *pulDstScan++ = (*pulDstScan & ~leftMask) | (ul & leftMask); // get middle ulongs
cul = culPerSrcScan - 2; while (cul) { if (cFifoSpaces > cul) { cFifoSpaces = cul; }
cul -= cFifoSpaces; while (--cFifoSpaces >= 0) { READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: " "read %08.8xh from output FIFO", ul)); *pulDstScan++ = ul; } WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); } // get last ulong
READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: " "read %08.8xh from output FIFO", ul)); *pulDstScan = (*pulDstScan & ~rightMask) | (ul & rightMask);
if(bRemPerSrcScan) { WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces); READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder " "%08.8xh from output FIFO", ul)); } } } }
#if DBG
cul = 0xaa55aa55; DISPDBG((DBGLVL, "pxrxFifoUpload: waiting for sync (id = %08.8xh)", cul)); WAIT_PXRX_DMA_TAGS(1); QUEUE_PXRX_DMA_TAG(__GlintTagSync, cul); SEND_PXRX_DMA_FORCE; do { WAIT_OUTPUT_FIFO_READY; READ_OUTPUT_FIFO(ul); DISPDBG((DBGLVL, "pxrxFifoUpload: read %08.8xh from output FIFO", ul)); if(ul != __GlintTagSync) { DISPDBG((ERRLVL,"pxrxFifoUpload: didn't read back sync!")); } } while(ul != __GlintTagSync); DISPDBG((DBGLVL, "pxrxFifoUpload: got sync")); #endif
// no need to initiate DMA with this tag - it will get flushed with the
// next primitive and meanwhile will not affect local memory
WAIT_PXRX_DMA_TAGS(1); QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, 0); SEND_PXRX_DMA_BATCH;
GLINT_CORE_IDLE;
DISPDBG((DBGLVL, "pxrxFifoUpload: done")); }
//****************************************************************************
// VOID vGlintCopyBltBypassDownloadXlate8bpp
//
// using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
// NB. supports 32bpp and 16bpp destinations
//****************************************************************************
VOID vGlintCopyBltBypassDownloadXlate8bpp( PDEV *ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prclClip, LONG crclClip, XLATEOBJ *pxlo) { LONG xOff; BYTE *pjSrcScan0; LONG cjSrcDelta, xSrcOff, ySrcOff; ULONG *pulDstScan0; LONG culDstDelta, xDstOff; LONG cScans, cPixPerScan, c; ULONG cjSrcDeltaRem, cjDstDeltaRem; ULONG *aulXlate; BYTE *pjSrc; GLINT_DECL;
//@@BEGIN_DDKSPLIT
#if 0
{ SIZEL sizlDst; sizlDst.cx = prclClip->right - prclClip->left; sizlDst.cy = prclClip->bottom - prclClip->top; DISPDBG((DBGLVL, "vGlintCopyBltBypassDownloadXlate8bpp(): " "cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy)); } #endif //DBG
//@@END_DDKSPLIT
pjSrcScan0 = (BYTE *)psoSrc->pvScan0; cjSrcDelta = psoSrc->lDelta; // need to add arclClip[n].left to get xSrc
xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].top to get ySrc
ySrcOff = pptlSrc->y - prclDst->top;
pulDstScan0 = (ULONG *)ppdev->pjScreen; culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize); xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) + (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
aulXlate = pxlo->pulXlate;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip) { cScans = prclClip->bottom - prclClip->top; cPixPerScan = prclClip->right - prclClip->left; cjSrcDeltaRem = cjSrcDelta - cPixPerScan; pjSrc = -1 + pjSrcScan0 + xSrcOff + prclClip->left + ((prclClip->top + ySrcOff) * cjSrcDelta);
if (ppdev->cPelSize == GLINTDEPTH32) { ULONG *pulDst; cjDstDeltaRem = (culDstDelta - cPixPerScan) << 2; pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta; for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem) { for(c = cPixPerScan; --c >= 0;) { *++pulDst = aulXlate[*++pjSrc]; } } } else // (GLINTDEPTH16)
{ USHORT *pusDst; cjDstDeltaRem = (culDstDelta << 2) - (cPixPerScan << ppdev->cPelSize); pusDst = -1 + (USHORT *)pulDstScan0 + xDstOff + prclClip->left + ((prclClip->top * culDstDelta) << 1); for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem) { for (c = cPixPerScan; --c >= 0;) { *++pusDst = (USHORT)aulXlate[*++pjSrc]; } } } } }
//@@BEGIN_DDKSPLIT
#if 0
/**************************************************************************\
* * void pxrxMonoDownloadRLE * \**************************************************************************/ void pxrxMonoDownloadRLE( PPDEV ppdev, ULONG AlignWidth, ULONG *pjSrc, LONG lSrcDelta, LONG cy ) { ULONG len, data, holdCount; ULONG *tagPtr = NULL; GLINT_DECL;
WAIT_PXRX_DMA_TAGS( 1 ); QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagBitMaskPattern );
if( AlignWidth == 32 ) { ULONG bits; DISPDBG((DBGLVL, "Doing Single Word per scan download"));
WAIT_PXRX_DMA_DWORDS( cy + 1 );
while( cy-- ) { TEST_DWORD_ALIGNED( pjSrc ); data = *pjSrc; pjSrc += lSrcDelta; len = 1;
TEST_DWORD_ALIGNED( pjSrc ); while( cy && (*pjSrc == data) ) { pjSrc += lSrcDelta; len++; cy--; TEST_DWORD_ALIGNED( pjSrc ); }
if( len >= 4 ) { if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, holdCount ); tagPtr = NULL; }
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount ); QUEUE_PXRX_DMA_DWORD( data ); QUEUE_PXRX_DMA_DWORD( len ); len = 0; } else { if( !tagPtr ) { QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr ); holdCount = 0; }
holdCount += len; while( len-- ) { QUEUE_PXRX_DMA_DWORD( data ); } } }
if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, holdCount ); tagPtr = NULL; } } else { // multiple 32 bit words per scanline. convert the delta to the
// delta as we need it at the end of each line by subtracting the
// width in bytes of the data we're downloading. Note, pjSrc
// is always 1 LONG short of the end of the line because we break
// before adding on the last ULONG. Thus, we subtract sizeof(ULONG)
// from the original adjustment.
LONG nRemainder; ULONG bits; LONG lSrcDeltaScan = lSrcDelta - (AlignWidth >> 5);
DISPDBG((DBGLVL, "Doing Multiple Word per scan download"));
while( TRUE ) { nRemainder = AlignWidth >> 5; WAIT_PXRX_DMA_DWORDS( nRemainder + 1 );
while( nRemainder-- ) { TEST_DWORD_ALIGNED( pjSrc ); data = *(pjSrc++); len = 1;
TEST_DWORD_ALIGNED( pjSrc ); while( nRemainder && (*pjSrc == data) ) { pjSrc++; len++; nRemainder--; TEST_DWORD_ALIGNED( pjSrc ); }
if( len >= 4 ) { if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, holdCount ); tagPtr = NULL; }
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount ); QUEUE_PXRX_DMA_DWORD( data ); QUEUE_PXRX_DMA_DWORD( len ); len = 0; } else { if( !tagPtr ) { QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr ); holdCount = 0; }
holdCount += len; while( len-- ) { QUEUE_PXRX_DMA_DWORD( data ); } } }
if( tagPtr ) { *tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, holdCount ); tagPtr = NULL; }
if( --cy == 0 ) { break; }
SEND_PXRX_DMA_BATCH; pjSrc += lSrcDeltaScan; } } SEND_PXRX_DMA_BATCH; }
//*********************************************************************************************
// FUNC: pxrxRLEFifoUpload
// ARGS: ppdev (I) - pointer to the physical device object
// crcl (I) - number of destination clipping rectangles
// prcl (I) - array of destination clipping rectangles
// psoDst (I) - destination surface
// pptlSrc (I) - offset into source surface
// prclDst (I) - unclipped destination rectangle
// RETN: void
//---------------------------------------------------------------------------------------------
// upload from on-chip source into host memory surface. Upload in spans (64-bit aligned) to
// minimise messages through the core and entries in the host out fifo. Upload is RLE encoded.
//*********************************************************************************************
VOID pxrxRLEFifoUpload(PPDEV ppdev, LONG crcl, RECTL *prcl, SURFOBJ *psoDst, POINTL *pptlSrc, RECTL *prclDst) { LONG xDomSrc, xSubSrc, yStartSrc, cxSrc, cySrc; LONG culPerSrcScan; LONG culDstDelta; BOOL bRemPerSrcScan; ULONG *pulDst, *pulDstScan; ULONG leftMask, rightMask; LONG cul, ul; LONG cFifoSpaces; ULONG RLECount, RLEData; __GlintFilterModeFmat FilterMode; GLINT_DECL;
DISPDBG((7, "pxrxFifoUpload: prcl = (%d, %d -> %d, %d), prclDst = (%d, %d -> %d, %d), ptlSrc(%d, %d), count = %d", prcl->left, prcl->top, prcl->right, prcl->bottom, prclDst->left, prclDst->top, prclDst->right, prclDst->bottom, pptlSrc->x, pptlSrc->y, crcl));
DISPDBG((7, "pxrxFifoUpload: psoDst: cx = %d, cy = %d, lDelta = %d, pvScan0=%P)", psoDst->sizlBitmap.cx, psoDst->sizlBitmap.cy, psoDst->lDelta, psoDst->pvScan0)); DISPDBG((7, "pxrxFifoUpload: xyOffsetDst = (%d, %d), xyOffsetSrc = (%d, %d)", ppdev->xyOffsetDst & 0xFFFF, ppdev->xyOffsetDst >> 16, ppdev->xyOffsetSrc & 0xFFFF, ppdev->xyOffsetSrc >> 16));
ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat, "Dest must be same colour depth as screen"); ASSERTDD(crcl > 0, "Can't handle zero rectangles");
WAIT_PXRX_DMA_TAGS(6); QUEUE_PXRX_DMA_TAG( __GlintTagRLEMask, 0xffffffff); LOAD_CONFIG2D(__CONFIG2D_FBDESTREAD); SET_READ_BUFFERS;
// enable filter mode so we can get Sync and color messages on the output FIFO
*(DWORD *)(&FilterMode) = 0; FilterMode.Synchronization = __GLINT_FILTER_TAG; FilterMode.Color = __GLINT_FILTER_DATA; FilterMode.RLEHostOut = TRUE; QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, *(DWORD*)(&FilterMode));
for(; --crcl >= 0; ++prcl) { DISPDBG((7, "pxrxFifoUpload: dest prcl(%xh,%xh..%xh,%xh)", prcl->left, prcl->top, prcl->right, prcl->bottom));
// calculate pixel-aligned source
xDomSrc = pptlSrc->x + prcl->left - prclDst->left; xSubSrc = pptlSrc->x + prcl->right - prclDst->left; yStartSrc = pptlSrc->y + prcl->top - prclDst->top; cySrc = prcl->bottom - prcl->top;
DISPDBG((8, "pxrxFifoUpload: src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc, xSubSrc, yStartSrc + cySrc));
// will upload ulongs aligned to ulongs
if (ppdev->cPelSize == GLINTDEPTH32) { cxSrc = xSubSrc - xDomSrc; culPerSrcScan = cxSrc; leftMask = 0xFFFFFFFF; rightMask = 0xFFFFFFFF; } else { if (ppdev->cPelSize == GLINTDEPTH16) { ULONG cPixFromUlongBoundary = prcl->left & 1;
xDomSrc -= cPixFromUlongBoundary; cxSrc = xSubSrc - xDomSrc; culPerSrcScan = (xSubSrc - xDomSrc + 1) >> 1;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 4); rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 1) << 4);
} else { ULONG cPixFromUlongBoundary = prcl->left & 3;
xDomSrc -= cPixFromUlongBoundary; cxSrc = xSubSrc - xDomSrc; culPerSrcScan = (xSubSrc - xDomSrc + 3) >> 2;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 3); rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 3) << 3);
} // We just want a single mask if the area to upload is less than one word wide.
if (culPerSrcScan == 1) leftMask &= rightMask; }
// uploading 64 bit aligned source
bRemPerSrcScan = culPerSrcScan & 1;
// the remainder will be encoded in the run: it's simpler just to add it in now
// then check bRemPerSrcScan during the upload
DISPDBG((8, "pxrxFifoUpload: Adding remainder into culPerSrcScan for RLE")); culPerSrcScan += bRemPerSrcScan;
// Work out where the destination data goes to
culDstDelta = psoDst->lDelta >> 2; pulDst = ((ULONG *)psoDst->pvScan0) + (prcl->left >> (2 - ppdev->cPelSize)) + culDstDelta * prcl->top;
DISPDBG((8, "pxrxFifoUpload: uploading aligned src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc, xDomSrc + cxSrc, yStartSrc + cySrc));
// Render the rectangle
WAIT_PXRX_DMA_TAGS(2); QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,MAKEDWORD_XY(xDomSrc, yStartSrc)); QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, __RENDER2D_OP_NORMAL | __RENDER2D_SPANS | __RENDER2D_INCY | __RENDER2D_INCX | __RENDER2D_WIDTH(cxSrc) | __RENDER2D_HEIGHT(cySrc)); SEND_PXRX_DMA_FORCE; // If the start and end masks are 0xffffffff, we can just upload the words and put them
// directly into the destination. Otherwise, or the first and last word on any scanline
// we have to mask off any pixels that are outside the render area. We know the glint will
// have 0 in the undesired right hand edge pixels, as these were not in the render area. We
// dont know anything about the destination though.
if (leftMask == 0xFFFFFFFF && rightMask == 0xFFFFFFFF) { DISPDBG((8, "pxrxFifoUpload: no edge masks")); while (--cySrc >= 0) { pulDstScan = pulDst; pulDst += culDstDelta;
DISPDBG((9, "pxrxFifoUpload: uploading scan of %xh ulongs to %p (Remainder %xh)", culPerSrcScan, pulDstScan, bRemPerSrcScan));
cul = culPerSrcScan; while(cul) { WAIT_OUTPUT_FIFO_COUNT(2); READ_OUTPUT_FIFO(RLECount); READ_OUTPUT_FIFO(RLEData); DISPDBG((10, "pxrxFifoUpload: RLECount = %xh RLEData = 08.8xh", RLECount, RLEData)); cul -= RLECount; if(cul == 0 && bRemPerSrcScan) { // discard the last ulong
--RLECount; } while(RLECount--) { DISPDBG((10, "pxrxFifoUpload: written ulong")); *pulDstScan++ = RLEData; } } } } else if(culPerSrcScan == 1) { DISPDBG((8, "pxrxFifoUpload: single ulong per scan"));
while (--cySrc >= 0) { // the remainder has already been added into culPerSrcScan so this can't happen
DISPDBG((ERRLVL,"pxrxFifoUpload: got single ulong per scan - but we always upload 64 bit quanta!")); pulDst += culDstDelta; } } else { DISPDBG((8, "pxrxFifoUpload: scan with left & right edge masks: %08.8x .. %08.8x", leftMask, rightMask));
while (--cySrc >= 0) { pulDstScan = pulDst; pulDst += culDstDelta;
DISPDBG((9, "pxrxFifoUpload: uploading scan of %xh ulongs to %p", culPerSrcScan, pulDstScan));
cul = culPerSrcScan; while(cul) { WAIT_OUTPUT_FIFO_COUNT(2); READ_OUTPUT_FIFO(RLECount); READ_OUTPUT_FIFO(RLEData); DISPDBG((10, "pxrxFifoUpload: RLECount = %xh RLEData = %08.8xh", RLECount, RLEData));
if(cul - bRemPerSrcScan == 0) { DISPDBG((10, "pxrxFifoUpload: discarding last ulong")); break; }
if(culPerSrcScan - bRemPerSrcScan == 1) { // one pixel per scan
DISPDBG((10, "pxrxFifoUpload: written single pixel scan")); *pulDstScan = (*pulDstScan & ~leftMask) | (RLEData & leftMask); cul -= RLECount; continue; }
if(cul == culPerSrcScan) { DISPDBG((10, "pxrxFifoUpload: written left edge")); *pulDstScan++ = (*pulDstScan & ~leftMask) | (RLEData & leftMask); // first ulong
--RLECount; --cul; } cul -= RLECount; if(cul == 0) { // this is the last run of the scan: process the last ulong separately in order
// to apply the right edge mask
RLECount -= 1 + bRemPerSrcScan; } else if(cul - bRemPerSrcScan == 0) { // this is the penultimate run of the scan and the last one will just include the
// remainder: process the last ulong separately in order to apply the right edge mask
--RLECount; } while(RLECount--) { DISPDBG((10, "pxrxFifoUpload: written middle ulong")); *pulDstScan++ = RLEData; }
if(cul == 0 || cul - bRemPerSrcScan == 0) { DISPDBG((10, "pxrxFifoUpload: written right edge")); *pulDstScan = (*pulDstScan & ~rightMask) | (RLEData & rightMask); // last ulong
#if DBG
if(cul - bRemPerSrcScan == 0) { DISPDBG((10, "pxrxFifoUpload: discarding last ulong")); } #endif
} } } } }
#if DBG
cul = 0xaa55aa55; DISPDBG((8, "pxrxFifoUpload: waiting for sync (id = %08.8xh)", cul)); WAIT_PXRX_DMA_TAGS(1); QUEUE_PXRX_DMA_TAG(__GlintTagSync, cul); SEND_PXRX_DMA_FORCE; do { WAIT_OUTPUT_FIFO_READY; READ_OUTPUT_FIFO(ul); DISPDBG((8, "pxrxFifoUpload: read %08.8xh from output FIFO", ul)); if(ul != __GlintTagSync) { DISPDBG((ERRLVL,"pxrxFifoUpload: didn't read back sync!")); } } while(ul != __GlintTagSync); DISPDBG((8, "pxrxFifoUpload: got sync")); #endif
// no need to initiate DMA with this tag - it will get flushed with the next primitive and
// meanwhile will not affect local memory
WAIT_PXRX_DMA_TAGS(1); QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, 0); SEND_PXRX_DMA_BATCH;
GLINT_CORE_IDLE;
DISPDBG((7, "pxrxFifoUpload: done")); }
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownload32bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
//****************************************************************************
VOID vGlintCopyBltBypassDownload32bpp( PDEV *ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prclClip, LONG crclClip) { LONG xOff; ULONG *pulSrcScan0; LONG culSrcDelta, xSrcOff, ySrcOff; ULONG *pulDstScan0; LONG culDstDelta, xDstOff; LONG cScans, cPixPerScan, c; ULONG cjSrcDeltaRem, cjDstDeltaRem; ULONG *pulSrc; ULONG *pulDst; ULONG tmp0, tmp1, tmp2; GLINT_DECL;
#if DBG && 0
{ SIZEL sizlDst; sizlDst.cx = prclClip->right - prclClip->left; sizlDst.cy = prclClip->bottom - prclClip->top; DISPDBG((-1, "vGlintCopyBltBypassDownload32bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy)); } #endif //DBG
pulSrcScan0 = (ULONG *)psoSrc->pvScan0; culSrcDelta = psoSrc->lDelta >> 2; xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen; culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize); xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) + (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip) { cScans = prclClip->bottom - prclClip->top; cPixPerScan = prclClip->right - prclClip->left; cjSrcDeltaRem = (culSrcDelta - cPixPerScan) * 4; cjDstDeltaRem = (culDstDelta - cPixPerScan) * 4;
// calc source & destination address, -1 to allow for prefix-increment
pulSrc = -1 + pulSrcScan0 + xSrcOff + prclClip->left + ((prclClip->top + ySrcOff) * culSrcDelta); pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta;
for (; --cScans >= 0; (BYTE *)pulSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem) { #if defined(_X86_)
__asm { mov edi, pulDst mov ecx, cPixPerScan mov esi, pulSrc shr ecx, 2 push ebp test ecx, ecx jle EndOfLine LoopFours: mov eax, [esi+4] mov ebx, [esi+8] mov edx, [esi+12] mov ebp, [esi+16] add esi, 16 mov [edi+4], eax mov [edi+8], ebx add edi, 16 mov [edi-4], edx dec ecx mov [edi], ebp jne LoopFours EndOfLine: pop ebp mov pulSrc, esi mov pulDst, edi } // do the remaining 0, 1, 2 or 3 pixels on this line
switch (cPixPerScan & 3) { case 3: tmp0 = *++pulSrc; tmp1 = *++pulSrc; tmp2 = *++pulSrc; *++pulDst = tmp0; *++pulDst = tmp1; *++pulDst = tmp2; break; case 2: tmp0 = *++pulSrc; tmp1 = *++pulSrc; *++pulDst = tmp0; *++pulDst = tmp1; break; case 1: tmp0 = *++pulSrc; *++pulDst = tmp0; }
#else
for(c = cPixPerScan; --c >= 0;) { *++pulDst = *++pulSrc; } #endif
} } }
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownload24bppTo32bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
//****************************************************************************
VOID vGlintCopyBltBypassDownload24bppTo32bpp( PDEV *ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prclClip, LONG crclClip) { LONG xOff; BYTE *pjSrcScan0; LONG cjSrcDelta; LONG xSrcOff, ySrcOff; ULONG *pulDstScan0; LONG culDstDelta, xDstOff; LONG cScans, cPixPerScan, c; BYTE *pjSrc; BYTE *pj; ULONG *pulDst, *puld; GLINT_DECL;
#if DBG && 0
{ SIZEL sizlDst; sizlDst.cx = prclClip->right - prclClip->left; sizlDst.cy = prclClip->bottom - prclClip->top; DISPDBG((-1, "vGlintCopyBltBypassDownload24bppTo32bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy)); } #endif //DBG
pjSrcScan0 = (BYTE *)psoSrc->pvScan0; cjSrcDelta = psoSrc->lDelta; xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen; culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize); xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) + (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip) { cScans = prclClip->bottom - prclClip->top; cPixPerScan = prclClip->right - prclClip->left;
// calc source & destination address, -1 to allow for prefix-increment
// convert x values to 24bpp coords (but avoid multiplication by 3)
c = xSrcOff + prclClip->left; c = c + (c << 1); pjSrc = pjSrcScan0 + c + ((prclClip->top + ySrcOff) * cjSrcDelta); pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta;
for (; --cScans >= 0; pjSrc += cjSrcDelta, pulDst += culDstDelta) { // read one less pixel per scan than there actually is to avoid any possibility of
// a memory access violation (we read 4 bytes but only 3 of them might be valid)
for (pj = pjSrc, puld = pulDst, c = cPixPerScan-1; --c >= 0; pj += 3) { *++puld = *(ULONG *)pj & 0x00ffffff; } // now do the last pixel
++puld; *(USHORT *)puld = *(USHORT *)pj; ((BYTE *)puld)[2] = ((BYTE *)pj)[2]; } } }
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownload16bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
//****************************************************************************
VOID vGlintCopyBltBypassDownload16bpp( PDEV *ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prclClip, LONG crclClip) { LONG xOff; ULONG *pulSrcScan0; LONG culSrcDelta, xSrcOff, ySrcOff; ULONG *pulDstScan0; LONG culDstDelta, xDstOff; LONG cScans, cPixPerScan; ULONG *pulSrc; ULONG *pulDst; GLINT_DECL;
#if DBG && 0
{ SIZEL sizlDst; sizlDst.cx = prclClip->right - prclClip->left; sizlDst.cy = prclClip->bottom - prclClip->top; DISPDBG((-1, "vGlintCopyBltBypassDownload16bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy)); } #endif //DBG
pulSrcScan0 = (ULONG *)psoSrc->pvScan0; culSrcDelta = psoSrc->lDelta >> 2; xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen; culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize); xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) + (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip) { cScans = prclClip->bottom - prclClip->top; cPixPerScan = prclClip->right - prclClip->left;
pulSrc = (ULONG *)((USHORT *)pulSrcScan0 + xSrcOff + prclClip->left) + ((prclClip->top + ySrcOff) * culSrcDelta); pulDst = (ULONG *)((USHORT *)pulDstScan0 + xDstOff + prclClip->left) + prclClip->top * culDstDelta;
for (; --cScans >= 0; pulSrc += culSrcDelta, pulDst += culDstDelta) { ULONG *pulSrcScan = pulSrc; ULONG *pulDstScan = pulDst; LONG cPix = cPixPerScan; LONG cWords;
if ((UINT_PTR)pulDstScan % sizeof(ULONG)) { // we're not on a ulong boundary so write the first pixel of the scanline
*(USHORT *)pulDstScan = *(USHORT *)pulSrcScan; pulDstScan = (ULONG *)((USHORT *)pulDstScan + 1); pulSrcScan = (ULONG *)((USHORT *)pulSrcScan + 1); --cPix; }
// write out the ulong-aligned words of the scanline
for (cWords = cPix / 2; --cWords >= 0;) { *pulDstScan++ = *pulSrcScan++; }
// write any remaining pixel
if (cPix % 2) { *(USHORT *)pulDstScan = *(USHORT *)pulSrcScan; } } } }
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownloadXlate4bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
// NB. supports 32bpp and 16bpp destinations. Doesn't yet support 24bpp
// destinations. No plans to add 8bpp support.
//****************************************************************************
VOID vGlintCopyBltBypassDownloadXlate4bpp( PDEV *ppdev, SURFOBJ *psoSrc, POINTL *pptlSrc, RECTL *prclDst, RECTL *prclClip, LONG crclClip, XLATEOBJ *pxlo) { LONG xOff; BYTE *pjSrcScan0; LONG cjSrcDelta, xSrcOff, ySrcOff; ULONG *pulDstScan0; LONG culDstDelta, xDstOff; LONG cScans, cPixPerScan, c; ULONG cjSrcDeltaRem, cjDstDeltaRem; ULONG *aulXlate; BOOL bSrcLowNybble; BYTE *pjSrc, j, *pj; GLINT_DECL;
#if DBG && 0
{ SIZEL sizlDst; sizlDst.cx = prclClip->right - prclClip->left; sizlDst.cy = prclClip->bottom - prclClip->top; DISPDBG((-1, "vGlintCopyBltBypassDownloadXlate4bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy)); } #endif //DBG
pjSrcScan0 = (BYTE *)psoSrc->pvScan0; cjSrcDelta = psoSrc->lDelta; xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen; culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize); xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) + (ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
aulXlate = pxlo->pulXlate;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip) { cScans = prclClip->bottom - prclClip->top; cPixPerScan = prclClip->right - prclClip->left; bSrcLowNybble = (xSrcOff + prclClip->left) & 1; cjSrcDeltaRem = cjSrcDelta - (cPixPerScan / 2 + ((cPixPerScan & 1) || bSrcLowNybble)); pjSrc = -1 + pjSrcScan0 + (xSrcOff + prclClip->left) / 2 + ((prclClip->top + ySrcOff) * cjSrcDelta);
if (ppdev->cPelSize == GLINTDEPTH32) { ULONG *pulDst;
cjDstDeltaRem = (culDstDelta - cPixPerScan) * 4; pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta;
if (bSrcLowNybble) { for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem) { j = *++pjSrc; for (c = cPixPerScan / 2; --c >= 0;) { *++pulDst = aulXlate[j & 0xf]; j = *++pjSrc; *++pulDst = aulXlate[j >> 4]; } if (cPixPerScan & 1) { *++pulDst = aulXlate[j & 0xf]; } } } else { for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem) { for (c = cPixPerScan / 2; --c >= 0;) { j = *++pjSrc; *++pulDst = aulXlate[j >> 4]; *++pulDst = aulXlate[j & 0xf]; } if (cPixPerScan & 1) { j = *++pjSrc; *++pulDst = aulXlate[j >> 4]; } } } } else if (ppdev->cPelSize == GLINTDEPTH16) { USHORT *pusDst;
cjDstDeltaRem = (culDstDelta << 2) - (cPixPerScan << ppdev->cPelSize); pusDst = -1 + (USHORT *)pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta * 2;
if (bSrcLowNybble) { for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem) { j = *++pjSrc; for (c = cPixPerScan / 2; --c >= 0;) { *++pusDst = (USHORT)aulXlate[j & 0xf]; j = *++pjSrc; *++pusDst = (USHORT)aulXlate[j >> 4]; } if (cPixPerScan & 1) { *++pusDst = (USHORT)aulXlate[j & 0xf]; } } } else { for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem) { for (c = cPixPerScan / 2; --c >= 0;) { j = *++pjSrc; *++pusDst = (USHORT)aulXlate[j >> 4]; *++pusDst = (USHORT)aulXlate[j & 0xf]; } if (cPixPerScan & 1) { j = *++pjSrc; *++pusDst = (USHORT)aulXlate[j >> 4]; } } } } } }
#endif
//@@END_DDKSPLIT
|