Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3273 lines
114 KiB

/******************************Module*Header**********************************\
*
* *******************
* * GDI SAMPLE CODE *
* *******************
*
* Module Name: pxrxXfer.c
*
* Content: Bit transfer code
*
* Copyright (c) 1994-1999 3Dlabs Inc. Ltd. All rights reserved.
* Copyright (c) 1995-2003 Microsoft Corporation. All rights reserved.
\*****************************************************************************/
#include "precomp.h"
#include "ereg.h"
#include "pxrx.h"
#if _DEBUG
static BOOL trapOnMisAlignment = TRUE;
#define TEST_DWORD_ALIGNED(ptr) \
do { \
ULONG addr = (ULONG) ptr; \
\
if( trapOnMisAlignment ) \
ASSERTDD((addr & 3) == 0, "TEST_DWORD_ALIGNED(ptr) failed!"); \
else \
if( addr & 3 ) \
DISPDBG((-1, "TEST_DWORD_ALIGNED(0x%08X) is out by %d bytes!", \
addr, addr & 3)); \
} while(0)
#else
# define TEST_DWORD_ALIGNED(addr) do { ; } while(0)
#endif
/**************************************************************************\
*
* VOID pxrxXfer1bpp
*
\**************************************************************************/
VOID pxrxXfer1bpp(
PPDEV ppdev,
RECTL *prcl,
LONG count,
ULONG fgLogicOp,
ULONG bgLogicOp,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
XLATEOBJ *pxlo )
{
DWORD config2D, render2D;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE *pjSrcScan0;
BYTE *pjSrc;
LONG dxSrc;
LONG dySrc;
LONG xLeft;
LONG yTop;
LONG xOffset;
ULONG fgColor;
ULONG bgColor;
RBRUSH_COLOR rbc;
GLINT_DECL;
ASSERTDD(count > 0, "Can't handle zero rectangles");
ASSERTDD(fgLogicOp <= 15, "Weird fg hardware Rop");
ASSERTDD(bgLogicOp <= 15, "Weird bg hardware Rop");
ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
DISPDBG((DBGLVL, "pxrxXfer1bpp: original dstRect: (%d,%d) to (%d,%d)",
prclDst->left, prclDst->top,
prclDst->right, prclDst->bottom));
dxSrc = pptlSrc->x - prclDst->left;
dySrc = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
DISPDBG((DBGLVL, "bitmap baseAddr 0x%x, stride %d, w %d, h %d",
pjSrcScan0, lSrcDelta,
psoSrc->sizlBitmap.cx, psoSrc->sizlBitmap.cy));
DISPDBG((DBGLVL, "fgColor 0x%x, bgColor 0x%x",
pxlo->pulXlate[1], pxlo->pulXlate[0]));
DISPDBG((DBGLVL, "fgLogicOp %d, bgLogicOp %d",
fgLogicOp, bgLogicOp));
fgColor = pxlo->pulXlate[1];
bgColor = pxlo->pulXlate[0];
// we get some common operations which are really noops. we can save
// lots of time by cutting these out. As this happens a lot for masking
// operations it's worth doing.
if( ((fgLogicOp == __GLINT_LOGICOP_AND) && (fgColor == ppdev->ulWhite)) ||
((fgLogicOp == __GLINT_LOGICOP_OR ) && (fgColor == 0)) ||
((fgLogicOp == __GLINT_LOGICOP_XOR) && (fgColor == 0)) )
{
fgLogicOp = __GLINT_LOGICOP_NOOP;
}
// same for background
if( ((bgLogicOp == __GLINT_LOGICOP_AND) && (bgColor == ppdev->ulWhite)) ||
((bgLogicOp == __GLINT_LOGICOP_OR ) && (bgColor == 0)) ||
((bgLogicOp == __GLINT_LOGICOP_XOR) && (bgColor == 0)) )
{
bgLogicOp = __GLINT_LOGICOP_NOOP;
}
if( (fgLogicOp == __GLINT_LOGICOP_NOOP) &&
(bgLogicOp == __GLINT_LOGICOP_NOOP) )
{
DISPDBG((DBGLVL, "both ops are no-op so lets quit now"));
return;
}
config2D = glintInfo->config2D;
config2D &= ~(__CONFIG2D_LOGOP_FORE_ENABLE |
__CONFIG2D_LOGOP_BACK_ENABLE |
__CONFIG2D_ENABLES);
config2D |= __CONFIG2D_CONSTANTSRC |
__CONFIG2D_FBWRITE |
__CONFIG2D_USERSCISSOR;
render2D = __RENDER2D_INCX | __RENDER2D_INCY | __RENDER2D_OP_SYNCBITMASK;
if( (fgLogicOp != __GLINT_LOGICOP_COPY) ||
(bgLogicOp != __GLINT_LOGICOP_NOOP) )
{
config2D &= ~(__CONFIG2D_LOGOP_FORE_MASK |
__CONFIG2D_LOGOP_BACK_MASK);
config2D |= __CONFIG2D_OPAQUESPANS |
__CONFIG2D_LOGOP_FORE(fgLogicOp) |
__CONFIG2D_LOGOP_BACK(bgLogicOp);
render2D |= __RENDER2D_SPANS;
}
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 6 );
if( LogicopReadDest[fgLogicOp] || LogicopReadDest[bgLogicOp] )
{
config2D |= __CONFIG2D_FBDESTREAD;
SET_READ_BUFFERS;
}
if( LogicOpReadSrc[fgLogicOp] )
{
LOAD_FOREGROUNDCOLOUR( fgColor );
}
if( LogicOpReadSrc[bgLogicOp] )
{
LOAD_BACKGROUNDCOLOUR( bgColor );
}
LOAD_CONFIG2D( config2D );
while( TRUE )
{
DISPDBG((DBGLVL, "mono download to rect (%d,%d) to (%d,%d)",
prcl->left, prcl->top, prcl->right, prcl->bottom));
yTop = prcl->top;
xLeft = prcl->left;
cx = prcl->right - xLeft;
cy = prcl->bottom - yTop;
// pjSrc is first dword containing a bit to download. xOffset is the
// offset to that bit. i.e. the bit offset from the previous 32bit
// boundary at the left hand edge of the rectangle.
xOffset = (xLeft + dxSrc) & 0x1f;
pjSrc = (BYTE*)((UINT_PTR)(pjSrcScan0 +
(yTop + dySrc) * lSrcDelta +
(xLeft + dxSrc) / 8 // byte aligned
) & ~3); // dword aligned
DISPDBG((DBGLVL, "pjSrc 0x%x, lSrcDelta %d", pjSrc, lSrcDelta));
DISPDBG((DBGLVL, "\txOffset %d, cx %d, cy %d", xOffset, cx, cy));
// this algorithm downloads aligned 32-bit chunks from the
// source but uses the scissor clip to define the edge of the
// rectangle.
//
{
ULONG AlignWidth, LeftEdge;
AlignWidth = (xOffset + cx + 31) & ~31;
LeftEdge = xLeft - xOffset;
DISPDBG((7, "AlignWidth %d", AlignWidth));
WAIT_PXRX_DMA_DWORDS( 5 );
QUEUE_PXRX_DMA_INDEX4( __GlintTagFillScissorMinXY,
__GlintTagFillScissorMaxXY,
__GlintTagFillRectanglePosition,
__GlintTagFillRender2D );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(xLeft, 0) );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(LeftEdge, yTop) );
QUEUE_PXRX_DMA_DWORD( render2D |
__RENDER2D_WIDTH(AlignWidth) |
__RENDER2D_HEIGHT(cy) );
SEND_PXRX_DMA_BATCH;
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
pxrxMonoDownloadRLE( ppdev,
AlignWidth,
(ULONG *) pjSrc,
lSrcDelta >> 2,
cy );
#else
//@@END_DDKSPLIT
pxrxMonoDownloadRaw( ppdev,
AlignWidth,
(ULONG *) pjSrc,
lSrcDelta >> 2,
cy );
//@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
}
if( --count == 0 )
{
break;
}
prcl++;
}
// Reset the scissor maximums:
if( ppdev->cPelSize == GLINTDEPTH32 ) {
WAIT_PXRX_DMA_TAGS( 1 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
//@@BEGIN_DDKSPLIT
// SEND_PXRX_DMA_BATCH;
//@@END_DDKSPLIT
}
FLUSH_PXRX_PATCHED_RENDER2D(prclDst->left, prclDst->right);
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXfer1bpp returning"));
}
/**************************************************************************\
*
* void pxrxMonoDownloadRaw
*
\**************************************************************************/
void pxrxMonoDownloadRaw(
PPDEV ppdev,
ULONG AlignWidth,
ULONG *pjSrc,
LONG lSrcDelta,
LONG cy )
{
GLINT_DECL;
if( AlignWidth == 32 )
{
LONG nSpaces = 0;
ULONG bits;
DISPDBG((DBGLVL, "Doing Single Word per scan download"));
do
{
nSpaces = 10;
WAIT_FREE_PXRX_DMA_DWORDS( nSpaces );
if( cy < --nSpaces )
{
nSpaces = cy;
}
cy -= nSpaces;
QUEUE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, nSpaces );
while( --nSpaces >= 0 )
{
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_DWORD( *pjSrc );
pjSrc += lSrcDelta;
}
SEND_PXRX_DMA_BATCH;
} while( cy > 0 );
}
else
{
// multiple 32 bit words per scanline. convert the delta to the
// delta as we need it at the end of each line by subtracting the
// width in bytes of the data we're downloading. Note, pjSrc
// is always 1 LONG short of the end of the line because we break
// before adding on the last ULONG. Thus, we subtract sizeof(ULONG)
// from the original adjustment.
LONG nScan = AlignWidth >> 5;
LONG nRemainder;
ULONG bits;
DISPDBG((7, "Doing Multiple Word per scan download"));
while( TRUE )
{
WAIT_PXRX_DMA_DWORDS( nScan + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern, nScan );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_BUFF( pjSrc, nScan );
SEND_PXRX_DMA_BATCH;
pjSrc += lSrcDelta;
if( --cy == 0 )
{
break;
}
}
}
}
/**************************************************************************\
*
* VOID pxrxXfer8bpp
*
\**************************************************************************/
VOID pxrxXfer8bpp(
PPDEV ppdev,
RECTL *prcl,
LONG count,
ULONG logicOp,
ULONG bgLogicOp,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
XLATEOBJ *pxlo )
{
ULONG config2D, render2D, lutMode, pixelSize;
BOOL invalidLUT = FALSE;
LONG dx, dy, cy;
LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff;
ULONG AlignWidth, LeftEdge;
BYTE* pjSrcScan0;
ULONG* pjSrc;
UINT_PTR startPos;
LONG cPelInv;
ULONG ul;
LONG nRemainder;
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
ULONG len, data, holdCount;
ULONG *tagPtr;
#endif
//@@END_DDKSPLIT
GLINT_DECL;
DISPDBG((DBGLVL, "pxrxXfer8bpp(): src = (%d,%d) -> (%d,%d), "
"count = %d, logicOp = %d, palette id = %d",
prcl->left, prcl->right, prcl->top, prcl->bottom,
count, logicOp, pxlo->iUniq));
// Set up the LUT table:
if( (ppdev->PalLUTType != LUTCACHE_XLATE) ||
(ppdev->iPalUniq != pxlo->iUniq) )
{
// Someone has hijacked the LUT so we need to invalidate it:
ppdev->PalLUTType = LUTCACHE_XLATE;
ppdev->iPalUniq = pxlo->iUniq;
invalidLUT = TRUE;
}
else
{
DISPDBG((DBGLVL, "pxrxXfer8bpp: reusing cached xlate"));
}
WAIT_PXRX_DMA_TAGS( 1 + 1 );
lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8));
lutMode |= (ppdev->cPelSize + 2) << 8;
LOAD_LUTMODE( lutMode );
if( invalidLUT )
{
ULONG *pulXlate = pxlo->pulXlate;
LONG cEntries = 256;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
if( ppdev->cPelSize == 0 )
{
// 8bpp
WAIT_PXRX_DMA_TAGS( cEntries );
do
{
ul = *(pulXlate++);
ul |= ul << 8;
ul |= ul << 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
} while( --cEntries );
}
else if( ppdev->cPelSize == 1 )
{
// 16bpp
WAIT_PXRX_DMA_TAGS( cEntries );
do
{
ul = *(pulXlate++);
ul |= ul << 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
} while( --cEntries );
}
else
{
WAIT_PXRX_DMA_DWORDS( 1 + cEntries );
QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries );
QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries );
}
}
config2D = __CONFIG2D_FBWRITE |
__CONFIG2D_USERSCISSOR |
__CONFIG2D_EXTERNALSRC |
__CONFIG2D_LUTENABLE;
render2D = __RENDER2D_INCX |
__RENDER2D_INCY |
__RENDER2D_OP_SYNCDATA |
__RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 6 );
if( logicOp != __GLINT_LOGICOP_COPY )
{
config2D |= __CONFIG2D_LOGOP_FORE(logicOp) | __CONFIG2D_FBWRITE;
render2D |= __RENDER2D_SPANS;
if( LogicopReadDest[logicOp] )
{
config2D |= __CONFIG2D_FBDESTREAD;
SET_READ_BUFFERS;
}
}
LOAD_CONFIG2D( config2D );
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
#endif
//@@END_DDKSPLIT
cPelInv = 2 - ppdev->cPelSize;
pixelSize = (1 << 31) | // Everything before the LUT runs at 8bpp
(2 << 2) |
(2 << 4) |
(2 << 6) |
(cPelInv << 8) |
(cPelInv << 10) |
(cPelInv << 12) |
(cPelInv << 14);
QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
while( TRUE )
{
DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
prcl->left, prcl->top,
prcl->right, prcl->bottom));
// 8bpp => 1 pixel per byte => 4 pixels per dword
// Assume source bitmap width is dword aligned
ASSERTDD((lSrcDelta & 3) == 0,
"pxrxXfer8bpp: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta))
+ (prcl->left + dx); // pointer to first pixel,
// in pixels/bytes
pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword
// aligned first pixel
if(NULL == pjSrc)
{
DISPDBG((ERRLVL, "ERROR: pxrxXfer8bpp return ,has pjSrc NULL"));
return;
}
alignOff = (ULONG)(startPos & 3); // number of pixels past dword
// alignment of a scanline
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
AlignWidth = ((prcl->right - LeftEdge) + 3) & ~3; // dword aligned width
// in pixels
cy = prcl->bottom - prcl->top; // number of scanlines to do
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, "
"startPos = 0x%08X, pjSrc = 0x%08X",
pjSrcScan0, startPos, pjSrc));
DISPDBG((DBGLVL, "offset = %d pixels", alignOff));
DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords",
LeftEdge, LeftEdge + AlignWidth,
AlignWidth, AlignWidth >> 2));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
MAKEDWORD_XY(prcl->left, 0) );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
MAKEDWORD_XY(LeftEdge, prcl->top) );
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D,
render2D |
__RENDER2D_WIDTH(AlignWidth) |
__RENDER2D_HEIGHT(cy) );
SEND_PXRX_DMA_BATCH;
AlignWidth >>= 2; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
// (start to start)
lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
// (end to start)
DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords",
lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
// Do an RLE download:
tagPtr = NULL;
do
{
WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
nRemainder = AlignWidth;
while( nRemainder-- )
{
TEST_DWORD_ALIGNED( pjSrc );
data = *(pjSrc++);
len = 1;
TEST_DWORD_ALIGNED( pjSrc );
while( nRemainder && (*pjSrc == data) )
{
pjSrc++;
len++;
nRemainder--;
TEST_DWORD_ALIGNED( pjSrc );
}
if( len >= 4 )
{
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
holdCount );
tagPtr = NULL;
}
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData,
__GlintTagRLCount );
QUEUE_PXRX_DMA_DWORD( data );
QUEUE_PXRX_DMA_DWORD( len );
len = 0;
}
else
{
if( !tagPtr )
{
QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
holdCount = 0;
}
holdCount += len;
while( len-- )
{
QUEUE_PXRX_DMA_DWORD( data );
}
}
}
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
holdCount );
tagPtr = NULL;
}
pjSrc += lTrueDelta;
SEND_PXRX_DMA_BATCH;
} while( --cy > 0 );
#else
//@@END_DDKSPLIT
// Do a raw download:
while( TRUE )
{
DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
SEND_PXRX_DMA_BATCH;
if( --cy == 0 )
{
break;
}
pjSrc += lSrcDeltaDW;
}
//@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
if( --count == 0 )
{
break;
}
prcl++;
}
// Reset some defaults:
WAIT_PXRX_DMA_TAGS( 2 );
QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv );
if( ppdev->cPelSize == GLINTDEPTH32 )
{
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
}
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXfer8bpp return"));
}
/**************************************************************************\
*
* VOID pxrxXferImage
*
\**************************************************************************/
VOID pxrxXferImage(
PPDEV ppdev,
RECTL *prcl,
LONG count,
ULONG logicOp,
ULONG bgLogicOp,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
XLATEOBJ *pxlo )
{
DWORD config2D, render2D;
LONG dx, dy, cy;
LONG lSrcDelta, lTrueDelta, lSrcDeltaDW, alignOff;
BYTE* pjSrcScan0;
ULONG* pjSrc;
UINT_PTR startPos;
LONG cPel, cPelInv;
ULONG cPelMask;
ULONG AlignWidth, LeftEdge;
LONG nRemainder;
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
ULONG len, data, holdCount;
ULONG *tagPtr;
#endif
//@@END_DDKSPLIT
GLINT_DECL;
SEND_PXRX_DMA_FORCE;
ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
"Can handle trivial xlate only");
ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
"Source must be same colour depth as screen");
ASSERTDD(count > 0,
"Can't handle zero rectangles");
ASSERTDD(logicOp <= 15,
"Weird hardware Rop");
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
cPel = ppdev->cPelSize; // number of bytes per pixel = 1 << cPel
cPelInv = 2 - cPel; // number of pixels per byte = 1 << cPelInv
// (pixels -> dwords = >> cPenInv)
cPelMask = (1 << cPelInv) - 1; // mask to obtain number of pixels
// past a dword
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
DISPDBG((DBGLVL, "pxrxXferImage with logic op %d for %d rects",
logicOp, count));
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
__CONFIG2D_LOGOP_BACK_ENABLE |
__CONFIG2D_ENABLES);
config2D |= __CONFIG2D_FBWRITE |
__CONFIG2D_USERSCISSOR;
render2D = __RENDER2D_INCX |
__RENDER2D_INCY |
__RENDER2D_OP_SYNCDATA |
__RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 5 );
if( logicOp != __GLINT_LOGICOP_COPY )
{
config2D &= ~__CONFIG2D_LOGOP_FORE_MASK;
config2D |= __CONFIG2D_LOGOP_FORE(logicOp) |
__CONFIG2D_EXTERNALSRC;
if( LogicopReadDest[logicOp] )
{
config2D |= __CONFIG2D_FBDESTREAD;
SET_READ_BUFFERS;
}
}
LOAD_CONFIG2D( config2D );
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget,
__GlintTagColor );
#endif
//@@END_DDKSPLIT
while( TRUE )
{
cy = prcl->bottom - prcl->top;
DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
prcl->left, prcl->top, prcl->right, prcl->bottom));
ASSERTDD((lSrcDelta & 3) == 0,
"pxrxXferImage: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
// pjSrc points to the first pixel to copy
// lTrueDelta is the additional amount to add onto the pjSrc pointer
// when we get to the end of the scanline.
startPos = ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta) +
((prcl->left + dx) << cPel);
alignOff = ((ULONG) (startPos & 3)) >> cPel; // number of pixels past
// dword aligned start
pjSrc = (ULONG *) (startPos & ~3); // dword aligned pointer to 1st pixel
if(NULL == pjSrc)
{
DISPDBG((ERRLVL, "ERROR: "
"pxrxXferImage return because of pjSrc NULL"));
return;
}
// dword aligned left edge in pixels
LeftEdge = prcl->left - alignOff;
// dword aligned width in pixels
AlignWidth = (prcl->right - LeftEdge + cPelMask) & ~cPelMask;
DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels",
LeftEdge, LeftEdge + AlignWidth, AlignWidth));
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, "
"pjSrc = 0x%08X, alignOff = %d pixels",
pjSrcScan0, pjSrc, alignOff));
ASSERTDD( ((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta) +
((LeftEdge + dx) << cPel) == (UINT_PTR) pjSrc,
"pxrxXferImage: "
"Aligned left edge does not match aligned pjSrc!" );
WAIT_PXRX_DMA_DWORDS( 5 );
QUEUE_PXRX_DMA_INDEX4( __GlintTagFillScissorMinXY,
__GlintTagFillScissorMaxXY,
__GlintTagFillRectanglePosition,
__GlintTagFillRender2D );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->left, 0) );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_DWORD( MAKEDWORD_XY(LeftEdge, prcl->top) );
QUEUE_PXRX_DMA_DWORD( render2D |
__RENDER2D_WIDTH(AlignWidth) |
__RENDER2D_HEIGHT(cy) );
SEND_PXRX_DMA_BATCH;
AlignWidth >>= cPelInv; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
//(start to start)
lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
// (end to start)
DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords",
lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
// Do an RLE download:
tagPtr = NULL;
do
{
WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
nRemainder = AlignWidth;
while( nRemainder-- )
{
TEST_DWORD_ALIGNED( pjSrc );
data = *(pjSrc++);
len = 1;
TEST_DWORD_ALIGNED( pjSrc );
while( nRemainder && (*pjSrc == data) )
{
pjSrc++;
len++;
nRemainder--;
TEST_DWORD_ALIGNED( pjSrc );
}
if( len >= 4 )
{
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
holdCount );
tagPtr = NULL;
}
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData,
__GlintTagRLCount );
QUEUE_PXRX_DMA_DWORD( data );
QUEUE_PXRX_DMA_DWORD( len );
len = 0;
}
else
{
if( !tagPtr )
{
QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
holdCount = 0;
}
holdCount += len;
while( len-- )
{
QUEUE_PXRX_DMA_DWORD( data );
}
}
}
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount );
tagPtr = NULL;
}
pjSrc += lTrueDelta;
// SEND_PXRX_DMA_BATCH;
} while( --cy > 0 );
#else
//@@END_DDKSPLIT
// Do a raw download:
while( TRUE )
{
DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
// SEND_PXRX_DMA_BATCH;
if( --cy == 0 )
{
break;
}
pjSrc += lSrcDeltaDW;
}
//@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
if( --count == 0 )
{
break;
}
prcl++;
}
// Reset the scissor maximums:
if( ppdev->cPelSize == GLINTDEPTH32 )
{
WAIT_PXRX_DMA_TAGS( 1 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
// SEND_PXRX_DMA_BATCH;
}
FLUSH_PXRX_PATCHED_RENDER2D(prclDst->left, prclDst->right);
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXferImage return"));
}
/**************************************************************************\
*
* VOID pxrxXfer4bpp
*
\**************************************************************************/
VOID pxrxXfer4bpp(
PPDEV ppdev,
RECTL *prcl,
LONG count,
ULONG logicOp,
ULONG bgLogicOp,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
XLATEOBJ *pxlo )
{
ULONG config2D, render2D, lutMode, pixelSize;
BOOL invalidLUT = FALSE;
LONG dx, dy;
LONG cy;
BYTE* pjSrcScan0;
ULONG* pjSrc;
LONG cPelInv;
ULONG ul;
ULONG AlignWidth, LeftEdge;
UINT_PTR startPos;
LONG nRemainder;
LONG lSrcDelta, lSrcDeltaDW;
LONG alignOff;
GLINT_DECL;
DISPDBG((DBGLVL, "pxrxXfer4bpp(): src = (%d,%d) -> (%d,%d), count = %d, "
"logicOp = %d, palette id = %d",
prcl->left, prcl->right, prcl->top, prcl->bottom, count,
logicOp, pxlo->iUniq));
// Set up the LUT table:
if( (ppdev->PalLUTType != LUTCACHE_XLATE) ||
(ppdev->iPalUniq != pxlo->iUniq) )
{
// Someone has hijacked the LUT so we need to invalidate it:
ppdev->PalLUTType = LUTCACHE_XLATE;
ppdev->iPalUniq = pxlo->iUniq;
invalidLUT = TRUE;
}
else
{
DISPDBG((DBGLVL, "pxrxXfer4bpp: reusing cached xlate"));
}
WAIT_PXRX_DMA_TAGS( 1 + 1 + 16 );
lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8));
lutMode |= (ppdev->cPelSize + 2) << 8;
LOAD_LUTMODE( lutMode );
if( invalidLUT )
{
ULONG *pulXlate = pxlo->pulXlate;
LONG cEntries = 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
if( ppdev->cPelSize == 0 ) // 8bpp
{
do
{
ul = *(pulXlate++);
ul |= ul << 8;
ul |= ul << 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
} while( --cEntries );
}
else if( ppdev->cPelSize == 1 ) // 16bpp
{
do
{
ul = *(pulXlate++);
ul |= ul << 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
} while( --cEntries );
}
else
{
QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries );
QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries );
}
}
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
__CONFIG2D_LOGOP_BACK_ENABLE |
__CONFIG2D_ENABLES);
config2D |= __CONFIG2D_FBWRITE |
__CONFIG2D_USERSCISSOR;
render2D = __RENDER2D_INCX |
__RENDER2D_INCY |
__RENDER2D_OP_SYNCDATA |
__RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 6 );
if( logicOp != __GLINT_LOGICOP_COPY )
{
config2D &= ~(__CONFIG2D_LOGOP_FORE_MASK |
__CONFIG2D_LOGOP_BACK_MASK);
config2D |= __CONFIG2D_LOGOP_FORE(logicOp) |
__CONFIG2D_FBWRITE;
render2D |= __RENDER2D_SPANS;
if( LogicopReadDest[logicOp] )
{
config2D |= __CONFIG2D_FBDESTREAD;
SET_READ_BUFFERS;
}
if( LogicOpReadSrc[logicOp] )
{
config2D |= __CONFIG2D_EXTERNALSRC |
__CONFIG2D_LUTENABLE;
}
}
else
{
config2D |= __CONFIG2D_EXTERNALSRC |
__CONFIG2D_LUTENABLE;
}
LOAD_CONFIG2D( config2D );
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
cPelInv = 2 - ppdev->cPelSize;
// Everything before the LUT runs at 8bpp
pixelSize = (1 << 31) |
(2 << 2) |
(2 << 4) |
(2 << 6) |
(2 << 16) |
(cPelInv << 8) |
(cPelInv << 10) |
(cPelInv << 12) |
(cPelInv << 14);
QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
// cPel = ppdev->cPelSize;
// cPelMask = (1 << cPelInv) - 1;
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
while( TRUE )
{
DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
prcl->left, prcl->top, prcl->right, prcl->bottom));
// 4bpp => 2 pixels per byte => 8 pixels per dword
// Assume source bitmap width is dword aligned
ASSERTDD( (lSrcDelta & 3) == 0,
"pxrxXfer4bpp: SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!");
// pointer to first pixel, in bytes (32/64 bits long)
startPos = (((UINT_PTR) pjSrcScan0) + ((prcl->top + dy) * lSrcDelta)) +
((prcl->left + dx) >> 1);
pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword
// aligned first pixel
if(NULL == pjSrc)
{
DISPDBG((ERRLVL, "ERROR: "
"pxrxXfer4bpp return because of pjSrc NULL"));
return;
}
// pointer to first pixel, in pixels (33/65 bits long!)
startPos = (( ((UINT_PTR) pjSrcScan0) +
((prcl->top + dy) * lSrcDelta)) << 1)
+ (prcl->left + dx);
alignOff = (ULONG)(startPos & 7); // number of pixels past dword
// alignment of a scanline
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
// dword aligned width in pixels
AlignWidth = ((prcl->right - LeftEdge) + 7) & ~7;
cy = prcl->bottom - prcl->top; // number of scanlines to do
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, startPos = 0x%08X (>>1), "
"pjSrc = 0x%08X",
pjSrcScan0, startPos >> 1, pjSrc));
DISPDBG((DBGLVL, "offset = %d pixels", alignOff));
DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords",
LeftEdge, LeftEdge + AlignWidth,
AlignWidth, AlignWidth >> 3));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
MAKEDWORD_XY(prcl->left, 0) );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
MAKEDWORD_XY(LeftEdge, prcl->top) );
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
__RENDER2D_WIDTH(AlignWidth) |
__RENDER2D_HEIGHT(cy) );
SEND_PXRX_DMA_BATCH;
AlignWidth >>= 3; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset in dwords
DISPDBG((DBGLVL, "Delta = %d pixels = %d dwords",
lSrcDelta << 1, lSrcDeltaDW));
// pjSrc = dword aligned pointer to first
// dword of first scanline
// AlignWidth = number of dwords per scanline
// lTrueDelta = dword offset between first dwords
// of consecutive scanlines
// cy = number of scanlines
while( TRUE )
{
nRemainder = AlignWidth;
DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagPacked4Pixels, AlignWidth );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
if( --cy == 0 )
{
break;
}
pjSrc += lSrcDeltaDW;
SEND_PXRX_DMA_BATCH;
}
if( --count == 0 )
{
break;
}
prcl++;
}
// Reset some defaults:
WAIT_PXRX_DMA_TAGS( 2 );
QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv );
if( ppdev->cPelSize == GLINTDEPTH32 )
{
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
}
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxXfer4bpp return"));
}
/**************************************************************************\
*
* VOID pxrxCopyXfer24bpp
*
\**************************************************************************/
VOID pxrxCopyXfer24bpp(
PPDEV ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prcl,
LONG count )
{
ULONG config2D, render2D, pixelSize;
LONG dx, dy, cy, LeftEdge;
LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff;
UINT_PTR startPos;
BYTE* pjSrcScan0;
ULONG* pjSrc;
LONG cPelInv;
ULONG ul, nRemainder;
ULONG padLeft, padLeftDW, padRight, padRightDW, dataWidth;
ULONG AlignWidth, AlignWidthDW, AlignExtra;
GLINT_DECL;
DISPDBG((DBGLVL, "pxrxCopyXfer24bpp(): "
"src = (%d,%d) -> (%d,%d), count = %d",
prcl->left, prcl->right, prcl->top, prcl->bottom, count));
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
__CONFIG2D_LOGOP_BACK_ENABLE |
__CONFIG2D_ENABLES);
config2D |= __CONFIG2D_FBWRITE |
__CONFIG2D_EXTERNALSRC |
__CONFIG2D_USERSCISSOR;
render2D = __RENDER2D_INCX |
__RENDER2D_INCY |
__RENDER2D_OP_SYNCDATA |
__RENDER2D_SPANS;
SET_WRITE_BUFFERS;
WAIT_PXRX_DMA_TAGS( 3 );
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadGlyphWidth, 3 );
LOAD_CONFIG2D( config2D );
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
while( TRUE )
{
DISPDBG((DBGLVL, "download to rect "
"(%d,%d -> %d,%d) + (%d, %d) = (%d x %d)",
prcl->left, prcl->top, prcl->right, prcl->bottom,
dx, dy,
prcl->right - prcl->left,
prcl->bottom - prcl->top));
// 24bpp => 1 pixel per 3 bytes => 4 pixel per 3 dwords
// Assume source bitmap width is dword aligned
ASSERTDD( (lSrcDelta & 3) == 0,
"pxrxCopyXfer24bpp: "
"SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!" );
ASSERTDD( (((UINT_PTR) pjSrcScan0) & 3) == 0,
"pxrxCopyXfer24bpp: "
"SOURCE BITMAP START LOCATION IS NOT DWORD ALIGNED!!!" );
cy = prcl->bottom - prcl->top; // number of scanlines to do
startPos = (((UINT_PTR) pjSrcScan0) +
((prcl->top + dy) * lSrcDelta)) +
((prcl->left + dx) * 3); // pointer to first pixel of first
// scanline, in bytes
alignOff = (ULONG)(startPos & 3); // number of bytes past dword
// alignment to first pixel
pjSrc = (ULONG *) (startPos & ~3); // dword pointer to dword aligned
// first pixel
if(NULL == pjSrc)
{
DISPDBG((ERRLVL, "ERROR: "
"pxrxCopyXfer24bpp return because of pjSrc NULL"));
return;
}
padLeft = (4 - alignOff) % 4; // number of pixels to add to regain
// dword alignment on left edge
padLeftDW = (padLeft * 3) / 4; // number of dwords to add
// on the left edge
LeftEdge = prcl->left - padLeft;
// dword aligned width in pixels (= 4 pixel aligned = 3 dword aligned!)
AlignWidth = (prcl->right - LeftEdge + 3) & ~3;
// number of pixels overhang on the right
padRight = (LeftEdge + AlignWidth) - prcl->right;
// number of dwords to add on the right edge
padRightDW = (padRight * 3) / 4;
AlignWidthDW = (AlignWidth * 3) / 4; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset
// in dwords
// the amount of AlignWidth which is actually src bitmap
dataWidth = AlignWidthDW - padLeftDW - padRightDW;
DISPDBG((DBGLVL, "startPos = 0x%08X, alignOff = %d, "
"pjSrc = 0x%08X, lSrcDeltaDW = %d",
startPos, alignOff, pjSrc, lSrcDeltaDW));
DISPDBG((DBGLVL, "padLeft = %d pixels = %d dwords, LeftEdge = %d",
padLeft, padLeftDW, LeftEdge));
DISPDBG((DBGLVL, "AlignWidth = %d pixels = %d dwords",
AlignWidth, AlignWidthDW));
DISPDBG((DBGLVL, "padRight = %d pixels = %d dwords", padRight, padRightDW));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
MAKEDWORD_XY(prcl->left, 0));
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
MAKEDWORD_XY(prcl->right, 0x7fff));
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
MAKEDWORD_XY(LeftEdge, prcl->top));
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
__RENDER2D_WIDTH(AlignWidth) |
__RENDER2D_HEIGHT(cy) );
while( cy-- )
{
DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
if( padLeftDW )
{
QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
}
if( padLeftDW == 2 )
{
QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
}
QUEUE_PXRX_DMA_BUFF( pjSrc, dataWidth );
if( padRightDW )
{
QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
}
if( padRightDW == 2 )
{
QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD );
}
SEND_PXRX_DMA_BATCH;
pjSrc += lSrcDeltaDW;
}
//@@BEGIN_DDKSPLIT
/*/
alignOff = (prcl->left + dx + 3) & ~3; // number of pixels past dword alignment of first pixel of a scanline
pjSrc = (ULONG *) (startPos - (alignOff * 3)); // dword pointer to dword aligned first pixel
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
AlignWidth = ((((prcl->right - LeftEdge) * 3) + 3) & ~3) / 3; // dword aligned width in pixels (IS NOT = 4 pixel aligned = 3 dword aligned!)
AlignExtra = AlignWidth - (prcl->right - LeftEdge); // extra pixels beyond the genuine width (which might overstomp a page boundary)
if( AlignExtra )
cy--;
DISPDBG((7, "pjSrcScan0 = 0x%08X, startPos = 0x%08X, pjSrc = 0x%08X", pjSrcScan0, startPos, pjSrc));
DISPDBG((7, "offset = %d pixels", alignOff));
DISPDBG((7, "Aligned rect = (%d -> %d) => %d pixels", LeftEdge, LeftEdge + AlignWidth, AlignWidth));
DISPDBG((7, "Rendering %d scanlines", cy));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY, MAKEDWORD_XY(prcl->left, 0) );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->top) );
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(cy) );
AlignWidthDW = (AlignWidth * 3) / 4; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // dword aligned scanline offset in dwords
DISPDBG((7, "Delta = %d bytes = %d dwords (%d dwords wide)", lSrcDelta, lSrcDeltaDW, AlignWidthDW));
while( cy-- ) {
DISPDBG((9, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidthDW );
SEND_PXRX_DMA_BATCH;
pjSrc += lSrcDeltaDW;
}
if( AlignExtra ) {
ULONG dataWidth;
ULONG dataExtra;
dataWidth = ((((prcl->right - LeftEdge) * 3) + 3) & ~3) / 4; // dword aligned width in dwords, 1 dword aligned
dataExtra = AlignWidthDW - dataWidth; // extra dwords past end of image
DISPDBG((7, "Last scanline: %d + %d = %d pixels = %d + %d = %d dwords",
prcl->right - LeftEdge, AlignExtra, AlignWidth, dataWidth, dataExtra, AlignWidthDW));
ASSERTDD( (dataWidth + dataExtra) == AlignWidthDW, "pxrxCopyXfer24bpp: Last scanline does not add up!" );
WAIT_PXRX_DMA_DWORDS( AlignWidthDW + 5 );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition, MAKEDWORD_XY(LeftEdge, prcl->bottom - 1) );
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D | __RENDER2D_WIDTH(AlignWidth) | __RENDER2D_HEIGHT(1) );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_HOLD( __GlintTagGlyphData, AlignWidthDW );
QUEUE_PXRX_DMA_BUFF( pjSrc, dataWidth ); // Send the partial scanline
while( dataExtra-- )
QUEUE_PXRX_DMA_DWORD( 0xDEADDEAD ); // Pad out to flush the data
// Resend download target to flush the remaining partial pixels ???
}
/**/
//@@END_DDKSPLIT
if( --count == 0 )
{
break;
}
prcl++;
}
// Reset the scissor maximums:
if( ppdev->cPelSize == GLINTDEPTH32 )
{
WAIT_PXRX_DMA_TAGS( 1 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
}
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxCopyXfer24bpp return"));
}
/**************************************************************************\
*
* VOID pxrxCopyXfer8bppLge
*
\**************************************************************************/
VOID pxrxCopyXfer8bppLge(
PPDEV ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prcl,
LONG count,
XLATEOBJ *pxlo )
{
ULONG config2D, render2D, lutMode, pixelSize;
BOOL invalidLUT = FALSE;
LONG dx, dy, cy;
LONG lSrcDelta, lSrcDeltaDW, lTrueDelta, alignOff;
ULONG AlignWidth, LeftEdge;
BYTE* pjSrcScan0;
ULONG* pjSrc;
UINT_PTR startPos;
LONG cPelInv;
ULONG ul, i;
LONG nRemainder;
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
ULONG len, data, holdCount;
#endif
//@@END_DDKSPLIT
ULONG *tagPtr;
ULONG *pulXlate = pxlo->pulXlate;
GLINT_DECL;
DISPDBG((DBGLVL, "pxrxCopyXfer8bpp(): src = (%d,%d) -> (%d,%d), "
"count = %d, palette id = %d",
prcl->left, prcl->right, prcl->top, prcl->bottom,
count, pxlo->iUniq));
SET_WRITE_BUFFERS;
if( (count == 1) &&
((cy = (prcl->bottom - prcl->top)) == 1) )
{
ULONG width = prcl->right - prcl->left, extra;
BYTE *srcPtr;
config2D = __CONFIG2D_FBWRITE |
__CONFIG2D_EXTERNALSRC;
render2D = __RENDER2D_INCX |
__RENDER2D_INCY |
__RENDER2D_OP_SYNCDATA |
__RENDER2D_SPANS;
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
startPos = (((UINT_PTR) pjSrcScan0) +
((prcl->top + dy) * lSrcDelta)) + (prcl->left + dx);
srcPtr = (BYTE *) startPos;
WAIT_PXRX_DMA_DWORDS( 7 + width );
LOAD_CONFIG2D( config2D );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
MAKEDWORD_XY(prcl->left, prcl->top) );
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
__RENDER2D_WIDTH(width) |
__RENDER2D_HEIGHT(1) );
if( ppdev->cPelSize == 0 ) // 8bpp
{
extra = width & 3;
width >>= 2;
if( extra )
{
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width + 1 );
QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width + 1 );
} else {
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width );
QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width );
}
DISPDBG((DBGLVL, "width was %d, is now %d + %d",
prcl->right - prcl->left, width, extra));
for( i = 0; i < width; i++, srcPtr += 4 )
{
*(tagPtr++) = (pulXlate[srcPtr[3]] << 24) |
(pulXlate[srcPtr[2]] << 16) |
(pulXlate[srcPtr[1]] << 8) |
pulXlate[srcPtr[0]];
}
if( extra == 1 )
{
*(tagPtr++) = pulXlate[srcPtr[0]];
}
else if( extra == 2 )
{
*(tagPtr++) = (pulXlate[srcPtr[1]] << 8) |
pulXlate[srcPtr[0]];
}
else if (extra == 3)
{
*(tagPtr++) = (pulXlate[srcPtr[2]] << 16) |
(pulXlate[srcPtr[1]] << 8) |
pulXlate[srcPtr[0]];
}
}
else if( ppdev->cPelSize == 1 ) // 16bpp
{
extra = width & 1;
width >>= 1;
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width + extra );
QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width + extra );
DISPDBG((DBGLVL, "width was %d, is now %d + %d",
prcl->right - prcl->left, width, extra));
for( i = 0; i < width; i++, srcPtr += 2 )
{
*(tagPtr++) = (pulXlate[srcPtr[1]] << 16) |
pulXlate[srcPtr[0]];
}
if( extra )
{
*(tagPtr++) = pulXlate[srcPtr[0]];
}
}
else
{
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, width );
QUEUE_PXRX_DMA_BUFF_DELAYED( tagPtr, width );
DISPDBG((DBGLVL, "width was %d, is now %d + %d",
prcl->right - prcl->left, width, 0));
for( i = 0; i < width; i++ )
{
*(tagPtr++) = pulXlate[*(srcPtr++)];
}
}
SEND_PXRX_DMA_BATCH;
return;
}
// Set up the LUT table:
if( (ppdev->PalLUTType != LUTCACHE_XLATE) ||
(ppdev->iPalUniq != pxlo->iUniq) )
{
// Someone has hijacked the LUT so we need to invalidate it:
ppdev->PalLUTType = LUTCACHE_XLATE;
ppdev->iPalUniq = pxlo->iUniq;
invalidLUT = TRUE;
}
else
{
DISPDBG((DBGLVL, "pxrxCopyXfer8bpp: reusing cached xlate"));
}
WAIT_PXRX_DMA_TAGS( 1 + 1 );
lutMode = glintInfo->lutMode & ~((3 << 2) | (1 << 4) | (7 << 8));
lutMode |= (ppdev->cPelSize + 2) << 8;
LOAD_LUTMODE( lutMode );
if( invalidLUT )
{
LONG cEntries = 256;
pulXlate = pxlo->pulXlate;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTIndex, 0 );
if( ppdev->cPelSize == 0 ) // 8bpp
{
WAIT_PXRX_DMA_TAGS( cEntries );
do
{
ul = *(pulXlate++);
ul |= ul << 8;
ul |= ul << 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
} while( --cEntries );
}
else if( ppdev->cPelSize == 1 ) // 16bpp
{
WAIT_PXRX_DMA_TAGS( cEntries );
do
{
ul = *(pulXlate++);
ul |= ul << 16;
QUEUE_PXRX_DMA_TAG( __PXRXTagLUTData, ul );
} while( --cEntries );
}
else
{
WAIT_PXRX_DMA_DWORDS( 1 + cEntries );
QUEUE_PXRX_DMA_HOLD( __PXRXTagLUTData, cEntries );
QUEUE_PXRX_DMA_BUFF( pulXlate, cEntries );
}
}
config2D = glintInfo->config2D & ~(__CONFIG2D_LOGOP_FORE_ENABLE |
__CONFIG2D_LOGOP_BACK_ENABLE |
__CONFIG2D_ENABLES);
config2D |= __CONFIG2D_FBWRITE |
__CONFIG2D_USERSCISSOR |
__CONFIG2D_EXTERNALSRC |
__CONFIG2D_LUTENABLE;
render2D = __RENDER2D_INCX |
__RENDER2D_INCY |
__RENDER2D_OP_SYNCDATA |
__RENDER2D_SPANS;
WAIT_PXRX_DMA_TAGS( 3 );
LOAD_CONFIG2D( config2D );
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget, __GlintTagColor );
#endif
//@@END_DDKSPLIT
cPelInv = 2 - ppdev->cPelSize;
// Everything before the LUT runs at 8bpp
pixelSize = (1 << 31) |
(2 << 2) |
(2 << 4) |
(2 << 6) |
(cPelInv << 8) |
(cPelInv << 10) |
(cPelInv << 12) |
(cPelInv << 14);
QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, pixelSize );
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
while( TRUE )
{
DISPDBG((DBGLVL, "download to rect (%d,%d) to (%d,%d)",
prcl->left, prcl->top, prcl->right, prcl->bottom));
// 8bpp => 1 pixel per byte => 4 pixels per dword
// Assume source bitmap width is dword aligned
ASSERTDD( (lSrcDelta & 3) == 0,
"pxrxCopyXfer8bpp: "
"SOURCE BITMAP WIDTH IS NOT DWORD ALIGNED!!!" );
// pointer to first pixel, in pixels/bytes
startPos = (((UINT_PTR) pjSrcScan0) +
((prcl->top + dy) * lSrcDelta))
+ (prcl->left + dx);
// dword pointer to dword aligned first pixel
pjSrc = (ULONG *) (startPos & ~3);
if(NULL == pjSrc)
{
DISPDBG((ERRLVL, "ERROR: pxrxCopyXfer8bppLge "
"return because of pjSrc NULL"));
return;
}
alignOff = (ULONG)(startPos & 3); // number of pixels past dword
// alignment of a scanline
LeftEdge = prcl->left - alignOff; // dword aligned left edge in pixels
AlignWidth = ((prcl->right - LeftEdge) + 3) & ~3; // dword aligned width
// in pixels
cy = prcl->bottom - prcl->top; // number of scanlines to do
DISPDBG((DBGLVL, "pjSrcScan0 = 0x%08X, startPos = 0x%08X, "
"pjSrc = 0x%08X",
pjSrcScan0, startPos, pjSrc));
DISPDBG((DBGLVL, "offset = %d pixels", alignOff));
DISPDBG((DBGLVL, "Aligned rect = (%d -> %d) => %d pixels => %d dwords",
LeftEdge, LeftEdge + AlignWidth,
AlignWidth, AlignWidth >> 2));
WAIT_PXRX_DMA_TAGS( 4 );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMinXY,
MAKEDWORD_XY(prcl->left, 0) );
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY,
MAKEDWORD_XY(prcl->right, 0x7fff) );
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
MAKEDWORD_XY(LeftEdge, prcl->top) );
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, render2D |
__RENDER2D_WIDTH(AlignWidth) |
__RENDER2D_HEIGHT(cy) );
SEND_PXRX_DMA_BATCH;
AlignWidth >>= 2; // dword aligned width in dwords
lSrcDeltaDW = lSrcDelta >> 2; // scanline delta in dwords
// (start to start)
lTrueDelta = lSrcDeltaDW - AlignWidth; // scanline delta in dwords
// (end to start)
DISPDBG((DBGLVL, "Delta = %d bytes = %d dwords -> %d - %d dwords",
lSrcDelta, lSrcDeltaDW, lTrueDelta, AlignWidth));
//@@BEGIN_DDKSPLIT
#if USE_RLE_DOWNLOADS
// Do an RLE download:
tagPtr = NULL;
do
{
WAIT_PXRX_DMA_TAGS( AlignWidth + 1 );
nRemainder = AlignWidth;
while( nRemainder-- )
{
TEST_DWORD_ALIGNED( pjSrc );
data = *(pjSrc++);
len = 1;
TEST_DWORD_ALIGNED( pjSrc );
while( nRemainder && (*pjSrc == data) )
{
pjSrc++;
len++;
nRemainder--;
TEST_DWORD_ALIGNED( pjSrc );
}
if( len >= 4 )
{
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor,
holdCount );
tagPtr = NULL;
}
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount );
QUEUE_PXRX_DMA_DWORD( data );
QUEUE_PXRX_DMA_DWORD( len );
len = 0;
}
else
{
if( !tagPtr )
{
QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
holdCount = 0;
}
holdCount += len;
while( len-- )
{
QUEUE_PXRX_DMA_DWORD( data );
}
}
}
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagColor, holdCount );
tagPtr = NULL;
}
pjSrc += lTrueDelta;
SEND_PXRX_DMA_BATCH;
} while( --cy > 0 );
#else
//@@END_DDKSPLIT
// Do a raw download:
while( TRUE )
{
DISPDBG((DBGLVL, "cy = %d", cy));
WAIT_PXRX_DMA_DWORDS( AlignWidth + 1 );
QUEUE_PXRX_DMA_HOLD( __GlintTagColor, AlignWidth );
TEST_DWORD_ALIGNED( pjSrc );
QUEUE_PXRX_DMA_BUFF( pjSrc, AlignWidth );
SEND_PXRX_DMA_BATCH;
if( --cy == 0 )
{
break;
}
pjSrc += lSrcDeltaDW;
}
//@@BEGIN_DDKSPLIT
#endif
//@@END_DDKSPLIT
if( --count == 0 )
{
break;
}
prcl++;
}
// Reset some defaults:
WAIT_PXRX_DMA_TAGS( 2 );
QUEUE_PXRX_DMA_TAG( __GlintTagPixelSize, cPelInv );
if( ppdev->cPelSize == GLINTDEPTH32 )
QUEUE_PXRX_DMA_TAG( __GlintTagScissorMaxXY, 0x7FFF7FFF );
SEND_PXRX_DMA_BATCH;
DISPDBG((DBGLVL, "pxrxCopyXfer8bpp return"));
}
//****************************************************************************
// FUNC: pxrxMemUpload
// ARGS: ppdev (I) - pointer to the physical device object
// crcl (I) - number of destination clipping rectangles
// prcl (I) - array of destination clipping rectangles
// psoDst (I) - destination surface
// pptlSrc (I) - offset into source surface
// prclDst (I) - unclipped destination rectangle
// RETN: void
//****************************************************************************
VOID pxrxMemUpload(
PPDEV ppdev,
LONG crcl,
RECTL *prcl,
SURFOBJ *psoDst,
POINTL *pptlSrc,
RECTL *prclDst)
{
BYTE *pDst, *pSrc;
LONG dwScanLineSize, cySrc, lSrcOff, lSrcStride;
GLINT_DECL;
// Make sure we're not performing other operations on the fb areas we want
SYNC_WITH_GLINT;
ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat,
"Dest must be same colour depth as screen");
ASSERTDD(crcl > 0, "Can't handle zero rectangles");
for(; --crcl >= 0; ++prcl)
{
// This gives an offset for offscreen DIBs (zero for primary rectangles)
lSrcOff = ppdev->DstPixelOrigin +
(ppdev->xyOffsetDst & 0xffff) +
(ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
// Determine stride on wheter we are blitting from the
// primary or from an offscreen DIB
if (( ppdev->DstPixelOrigin == 0 ) &&
(ppdev->xyOffsetDst == 0) )
{
lSrcStride = ppdev->lDelta;
}
else
{
lSrcStride = ppdev->DstPixelDelta * ppdev->cjPelSize;
}
// pSrc must point to mem mapped primary
pSrc = (BYTE *)ppdev->pjScreen
+ (lSrcOff * ppdev->cjPelSize)
+ ((LONG)pptlSrc->x * ppdev->cjPelSize)
+ ((LONG)pptlSrc->y * lSrcStride);
// pDst must point to the sysmem SURFOBJ
pDst = (BYTE *)psoDst->pvScan0
+ ((LONG)prcl->left * ppdev->cjPelSize)
+ ((LONG)prcl->top * (LONG)psoDst->lDelta);
// dwScanLineSize must have the right size to transfer in bytes
dwScanLineSize = ((LONG)prcl->right - (LONG)prcl->left) * ppdev->cjPelSize;
// Number of scan lines to transfer
cySrc = prcl->bottom - prcl->top;
// Do the copy
while (--cySrc >= 0)
{
// memcpy(dst, src, size)
memcpy(pDst, pSrc, dwScanLineSize);
pDst += psoDst->lDelta; // add stride
pSrc += lSrcStride; // add stride
}
}
} // pxrxMemUpload
//****************************************************************************
// FUNC: pxrxFifoUpload
// ARGS: ppdev (I) - pointer to the physical device object
// crcl (I) - number of destination clipping rectangles
// prcl (I) - array of destination clipping rectangles
// psoDst (I) - destination surface
// pptlSrc (I) - offset into source surface
// prclDst (I) - unclipped destination rectangle
// RETN: void
//----------------------------------------------------------------------------
// upload from on-chip source into host memory surface. Upload in spans
// (64-bit aligned) to minimise messages through the core and entries in the
// host out fifo.
//****************************************************************************
VOID pxrxFifoUpload(
PPDEV ppdev,
LONG crcl,
RECTL *prcl,
SURFOBJ *psoDst,
POINTL *pptlSrc,
RECTL *prclDst)
{
LONG xDomSrc, xSubSrc, yStartSrc, cxSrc, cySrc;
LONG culPerSrcScan;
LONG culDstDelta;
BOOL bRemPerSrcScan;
ULONG *pulDst, *pulDstScan;
ULONG leftMask, rightMask;
LONG cul, ul;
LONG cFifoSpaces;
__GlintFilterModeFmat FilterMode;
GLINT_DECL;
WAIT_PXRX_DMA_TAGS(1);
QUEUE_PXRX_DMA_TAG( __GlintTagFBDestReadMode, (glintInfo->fbDestMode | 0x103));
SEND_PXRX_DMA_FORCE;
//@@BEGIN_DDKSPLIT
#if USE_RLE_UPLOADS
// NB. using cxSrc >= 16 is slightly slower overall. These tests were empirically developed
// from WB99 BG & HE benchmarks
cxSrc = prcl->right - prcl->left;
if(cxSrc >= 32 && (cxSrc < 80 || (cxSrc >= 128 && cxSrc < 256) || cxSrc == ppdev->cxScreen))
{
pxrxRLEFifoUpload(ppdev, crcl, prcl, psoDst, pptlSrc, prclDst);
return;
}
#endif //USE_RLE_UPLOADS
//@@END_DDKSPLIT
DISPDBG((DBGLVL, "pxrxFifoUpload: prcl = (%d, %d -> %d, %d), "
"prclDst = (%d, %d -> %d, %d), ptlSrc(%d, %d), count = %d",
prcl->left, prcl->top, prcl->right, prcl->bottom,
prclDst->left, prclDst->top, prclDst->right,
prclDst->bottom, pptlSrc->x, pptlSrc->y, crcl));
DISPDBG((DBGLVL, "pxrxFifoUpload: psoDst: cx = %d, cy = %d, "
"lDelta = %d, pvScan0=%P)",
psoDst->sizlBitmap.cx, psoDst->sizlBitmap.cy,
psoDst->lDelta, psoDst->pvScan0));
DISPDBG((DBGLVL, "pxrxFifoUpload: xyOffsetDst = (%d, %d), "
"xyOffsetSrc = (%d, %d)",
ppdev->xyOffsetDst & 0xFFFF, ppdev->xyOffsetDst >> 16,
ppdev->xyOffsetSrc & 0xFFFF, ppdev->xyOffsetSrc >> 16));
ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat,
"Dest must be same colour depth as screen");
ASSERTDD(crcl > 0, "Can't handle zero rectangles");
WAIT_PXRX_DMA_TAGS(5);
LOAD_CONFIG2D(__CONFIG2D_FBDESTREAD);
SET_READ_BUFFERS;
// enable filter mode so we can get Sync
// and color messages on the output FIFO
*(DWORD *)(&FilterMode) = 0;
FilterMode.Synchronization = __GLINT_FILTER_TAG;
FilterMode.Color = __GLINT_FILTER_DATA;
QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, *(DWORD *)(&FilterMode));
for(; --crcl >= 0; ++prcl)
{
DISPDBG((DBGLVL, "pxrxFifoUpload: dest prcl(%xh,%xh..%xh,%xh)",
prcl->left, prcl->top, prcl->right, prcl->bottom));
// calculate pixel-aligned source
xDomSrc = pptlSrc->x + prcl->left - prclDst->left;
xSubSrc = pptlSrc->x + prcl->right - prclDst->left;
yStartSrc = pptlSrc->y + prcl->top - prclDst->top;
cySrc = prcl->bottom - prcl->top;
DISPDBG((DBGLVL, "pxrxFifoUpload: src (%xh,%xh..%xh,%xh)",
xDomSrc, yStartSrc, xSubSrc, yStartSrc + cySrc));
// will upload ulongs aligned to ulongs
if (ppdev->cPelSize == GLINTDEPTH32)
{
cxSrc = xSubSrc - xDomSrc;
culPerSrcScan = cxSrc;
leftMask = 0xFFFFFFFF;
rightMask = 0xFFFFFFFF;
}
else
{
if (ppdev->cPelSize == GLINTDEPTH16)
{
ULONG cPixFromUlongBoundary = prcl->left & 1;
xDomSrc -= cPixFromUlongBoundary;
cxSrc = xSubSrc - xDomSrc;
culPerSrcScan = (xSubSrc - xDomSrc + 1) >> 1;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 4);
rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 1) << 4);
}
else
{
ULONG cPixFromUlongBoundary = prcl->left & 3;
xDomSrc -= cPixFromUlongBoundary;
cxSrc = xSubSrc - xDomSrc;
culPerSrcScan = (xSubSrc - xDomSrc + 3) >> 2;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 3);
rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 3) << 3);
}
// We just want a single mask if the area to upload is less
// than one word wide.
if (culPerSrcScan == 1)
{
leftMask &= rightMask;
}
}
// uploading 64 bit aligned source
bRemPerSrcScan = culPerSrcScan & 1;
// Work out where the destination data goes to
culDstDelta = psoDst->lDelta >> 2;
pulDst = ((ULONG *)psoDst->pvScan0) +
(prcl->left >> (2 - ppdev->cPelSize))
+ culDstDelta * prcl->top;
DISPDBG((DBGLVL, "pxrxFifoUpload: uploading aligned "
"src (%xh,%xh..%xh,%xh)",
xDomSrc, yStartSrc,
xDomSrc + cxSrc, yStartSrc + cySrc));
// Render the rectangle
WAIT_PXRX_DMA_TAGS(2);
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,
MAKEDWORD_XY(xDomSrc, yStartSrc));
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D,
__RENDER2D_OP_NORMAL |
__RENDER2D_SPANS |
__RENDER2D_INCY |
__RENDER2D_INCX |
__RENDER2D_WIDTH(cxSrc) |
__RENDER2D_HEIGHT(cySrc));
SEND_PXRX_DMA_FORCE;
// If the start and end masks are 0xffffffff, we can just upload
// the words and put them directly into the destination. Otherwise,
// or the first and last word on any scanline we have to mask
// off any pixels that are outside the render area. We know the
// glint will have 0 in the undesired right hand edge pixels, as
// these were not in the render area. We dont know anything about
// the destination though.
if ((leftMask == 0xFFFFFFFF) && (rightMask == 0xFFFFFFFF))
{
DISPDBG((DBGLVL, "pxrxFifoUpload: no edge masks"));
while (--cySrc >= 0)
{
pulDstScan = pulDst;
pulDst += culDstDelta;
DISPDBG((DBGLVL, "pxrxFifoUpload: uploading scan of %xh "
"ulongs to %p (Remainder %xh)",
culPerSrcScan, pulDstScan, bRemPerSrcScan));
cul = culPerSrcScan;
while(cul)
{
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
if (cFifoSpaces > cul)
{
cFifoSpaces = cul;
}
cul -= cFifoSpaces;
while (--cFifoSpaces >= 0)
{
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: read %08.8xh from "
"output FIFO", ul));
*pulDstScan++ = ul;
}
}
if(bRemPerSrcScan)
{
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder %08.8xh "
"from output FIFO", ul));
}
}
}
else if(culPerSrcScan == 1)
{
DISPDBG((DBGLVL, "pxrxFifoUpload: single ulong per scan"));
while (--cySrc >= 0)
{
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: "
"read %08.8xh from output FIFO", ul));
// leftMask contains both masks in this case
*pulDst = (*pulDst & ~leftMask) | (ul & leftMask);
ASSERTDD(bRemPerSrcScan, "one word per scan upload should "
"always leave a remainder");
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder %08.8xh "
"from output FIFO", ul));
pulDst += culDstDelta;
}
}
else
{
DISPDBG((DBGLVL, "pxrxFifoUpload: scan with left & right edge "
"masks: %08.8x .. %08.8x", leftMask, rightMask));
while (--cySrc >= 0)
{
pulDstScan = pulDst;
pulDst += culDstDelta;
DISPDBG((DBGLVL, "pxrxFifoUpload: uploading scan of %xh "
"ulongs to %p", culPerSrcScan, pulDstScan));
// get first ulong
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
--cFifoSpaces;
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: "
"read %08.8xh from output FIFO", ul));
*pulDstScan++ = (*pulDstScan & ~leftMask) | (ul & leftMask);
// get middle ulongs
cul = culPerSrcScan - 2;
while (cul)
{
if (cFifoSpaces > cul)
{
cFifoSpaces = cul;
}
cul -= cFifoSpaces;
while (--cFifoSpaces >= 0)
{
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: "
"read %08.8xh from output FIFO", ul));
*pulDstScan++ = ul;
}
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
}
// get last ulong
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: "
"read %08.8xh from output FIFO", ul));
*pulDstScan = (*pulDstScan & ~rightMask) | (ul & rightMask);
if(bRemPerSrcScan)
{
WAIT_OUTPUT_FIFO_NOT_EMPTY(cFifoSpaces);
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: read remainder "
"%08.8xh from output FIFO", ul));
}
}
}
}
#if DBG
cul = 0xaa55aa55;
DISPDBG((DBGLVL, "pxrxFifoUpload: waiting for sync (id = %08.8xh)", cul));
WAIT_PXRX_DMA_TAGS(1);
QUEUE_PXRX_DMA_TAG(__GlintTagSync, cul);
SEND_PXRX_DMA_FORCE;
do
{
WAIT_OUTPUT_FIFO_READY;
READ_OUTPUT_FIFO(ul);
DISPDBG((DBGLVL, "pxrxFifoUpload: read %08.8xh from output FIFO", ul));
if(ul != __GlintTagSync)
{
DISPDBG((ERRLVL,"pxrxFifoUpload: didn't read back sync!"));
}
}
while(ul != __GlintTagSync);
DISPDBG((DBGLVL, "pxrxFifoUpload: got sync"));
#endif
// no need to initiate DMA with this tag - it will get flushed with the
// next primitive and meanwhile will not affect local memory
WAIT_PXRX_DMA_TAGS(1);
QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, 0);
SEND_PXRX_DMA_BATCH;
GLINT_CORE_IDLE;
DISPDBG((DBGLVL, "pxrxFifoUpload: done"));
}
//****************************************************************************
// VOID vGlintCopyBltBypassDownloadXlate8bpp
//
// using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
// NB. supports 32bpp and 16bpp destinations
//****************************************************************************
VOID vGlintCopyBltBypassDownloadXlate8bpp(
PDEV *ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prclClip,
LONG crclClip,
XLATEOBJ *pxlo)
{
LONG xOff;
BYTE *pjSrcScan0;
LONG cjSrcDelta, xSrcOff, ySrcOff;
ULONG *pulDstScan0;
LONG culDstDelta, xDstOff;
LONG cScans, cPixPerScan, c;
ULONG cjSrcDeltaRem, cjDstDeltaRem;
ULONG *aulXlate;
BYTE *pjSrc;
GLINT_DECL;
//@@BEGIN_DDKSPLIT
#if 0
{
SIZEL sizlDst;
sizlDst.cx = prclClip->right - prclClip->left;
sizlDst.cy = prclClip->bottom - prclClip->top;
DISPDBG((DBGLVL, "vGlintCopyBltBypassDownloadXlate8bpp(): "
"cRects(%d) sizlDst(%d,%d)",
crclClip, sizlDst.cx, sizlDst.cy));
}
#endif //DBG
//@@END_DDKSPLIT
pjSrcScan0 = (BYTE *)psoSrc->pvScan0;
cjSrcDelta = psoSrc->lDelta;
// need to add arclClip[n].left to get xSrc
xSrcOff = pptlSrc->x - prclDst->left;
// need to add arclClip[n].top to get ySrc
ySrcOff = pptlSrc->y - prclDst->top;
pulDstScan0 = (ULONG *)ppdev->pjScreen;
culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
(ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
aulXlate = pxlo->pulXlate;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip)
{
cScans = prclClip->bottom - prclClip->top;
cPixPerScan = prclClip->right - prclClip->left;
cjSrcDeltaRem = cjSrcDelta - cPixPerScan;
pjSrc = -1 + pjSrcScan0 + xSrcOff + prclClip->left
+ ((prclClip->top + ySrcOff) * cjSrcDelta);
if (ppdev->cPelSize == GLINTDEPTH32)
{
ULONG *pulDst;
cjDstDeltaRem = (culDstDelta - cPixPerScan) << 2;
pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left
+ prclClip->top * culDstDelta;
for (;
--cScans >= 0;
pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
{
for(c = cPixPerScan; --c >= 0;)
{
*++pulDst = aulXlate[*++pjSrc];
}
}
}
else // (GLINTDEPTH16)
{
USHORT *pusDst;
cjDstDeltaRem =
(culDstDelta << 2) - (cPixPerScan << ppdev->cPelSize);
pusDst = -1 + (USHORT *)pulDstScan0 + xDstOff + prclClip->left
+ ((prclClip->top * culDstDelta) << 1);
for (;
--cScans >= 0;
pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem)
{
for (c = cPixPerScan; --c >= 0;)
{
*++pusDst = (USHORT)aulXlate[*++pjSrc];
}
}
}
}
}
//@@BEGIN_DDKSPLIT
#if 0
/**************************************************************************\
*
* void pxrxMonoDownloadRLE
*
\**************************************************************************/
void pxrxMonoDownloadRLE(
PPDEV ppdev,
ULONG AlignWidth,
ULONG *pjSrc,
LONG lSrcDelta,
LONG cy )
{
ULONG len, data, holdCount;
ULONG *tagPtr = NULL;
GLINT_DECL;
WAIT_PXRX_DMA_TAGS( 1 );
QUEUE_PXRX_DMA_TAG( __GlintTagDownloadTarget,
__GlintTagBitMaskPattern );
if( AlignWidth == 32 )
{
ULONG bits;
DISPDBG((DBGLVL, "Doing Single Word per scan download"));
WAIT_PXRX_DMA_DWORDS( cy + 1 );
while( cy-- )
{
TEST_DWORD_ALIGNED( pjSrc );
data = *pjSrc;
pjSrc += lSrcDelta;
len = 1;
TEST_DWORD_ALIGNED( pjSrc );
while( cy && (*pjSrc == data) )
{
pjSrc += lSrcDelta;
len++;
cy--;
TEST_DWORD_ALIGNED( pjSrc );
}
if( len >= 4 )
{
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern,
holdCount );
tagPtr = NULL;
}
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData, __GlintTagRLCount );
QUEUE_PXRX_DMA_DWORD( data );
QUEUE_PXRX_DMA_DWORD( len );
len = 0;
}
else
{
if( !tagPtr )
{
QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
holdCount = 0;
}
holdCount += len;
while( len-- )
{
QUEUE_PXRX_DMA_DWORD( data );
}
}
}
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern,
holdCount );
tagPtr = NULL;
}
}
else
{
// multiple 32 bit words per scanline. convert the delta to the
// delta as we need it at the end of each line by subtracting the
// width in bytes of the data we're downloading. Note, pjSrc
// is always 1 LONG short of the end of the line because we break
// before adding on the last ULONG. Thus, we subtract sizeof(ULONG)
// from the original adjustment.
LONG nRemainder;
ULONG bits;
LONG lSrcDeltaScan = lSrcDelta - (AlignWidth >> 5);
DISPDBG((DBGLVL, "Doing Multiple Word per scan download"));
while( TRUE )
{
nRemainder = AlignWidth >> 5;
WAIT_PXRX_DMA_DWORDS( nRemainder + 1 );
while( nRemainder-- )
{
TEST_DWORD_ALIGNED( pjSrc );
data = *(pjSrc++);
len = 1;
TEST_DWORD_ALIGNED( pjSrc );
while( nRemainder && (*pjSrc == data) )
{
pjSrc++;
len++;
nRemainder--;
TEST_DWORD_ALIGNED( pjSrc );
}
if( len >= 4 )
{
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD(
__GlintTagBitMaskPattern,
holdCount );
tagPtr = NULL;
}
QUEUE_PXRX_DMA_INDEX2( __GlintTagRLData,
__GlintTagRLCount );
QUEUE_PXRX_DMA_DWORD( data );
QUEUE_PXRX_DMA_DWORD( len );
len = 0;
}
else
{
if( !tagPtr )
{
QUEUE_PXRX_DMA_DWORD_DELAYED( tagPtr );
holdCount = 0;
}
holdCount += len;
while( len-- )
{
QUEUE_PXRX_DMA_DWORD( data );
}
}
}
if( tagPtr )
{
*tagPtr = ASSEMBLE_PXRX_DMA_HOLD( __GlintTagBitMaskPattern,
holdCount );
tagPtr = NULL;
}
if( --cy == 0 )
{
break;
}
SEND_PXRX_DMA_BATCH;
pjSrc += lSrcDeltaScan;
}
}
SEND_PXRX_DMA_BATCH;
}
//*********************************************************************************************
// FUNC: pxrxRLEFifoUpload
// ARGS: ppdev (I) - pointer to the physical device object
// crcl (I) - number of destination clipping rectangles
// prcl (I) - array of destination clipping rectangles
// psoDst (I) - destination surface
// pptlSrc (I) - offset into source surface
// prclDst (I) - unclipped destination rectangle
// RETN: void
//---------------------------------------------------------------------------------------------
// upload from on-chip source into host memory surface. Upload in spans (64-bit aligned) to
// minimise messages through the core and entries in the host out fifo. Upload is RLE encoded.
//*********************************************************************************************
VOID pxrxRLEFifoUpload(PPDEV ppdev, LONG crcl, RECTL *prcl, SURFOBJ *psoDst, POINTL *pptlSrc, RECTL *prclDst)
{
LONG xDomSrc, xSubSrc, yStartSrc, cxSrc, cySrc;
LONG culPerSrcScan;
LONG culDstDelta;
BOOL bRemPerSrcScan;
ULONG *pulDst, *pulDstScan;
ULONG leftMask, rightMask;
LONG cul, ul;
LONG cFifoSpaces;
ULONG RLECount, RLEData;
__GlintFilterModeFmat FilterMode;
GLINT_DECL;
DISPDBG((7, "pxrxFifoUpload: prcl = (%d, %d -> %d, %d), prclDst = (%d, %d -> %d, %d), ptlSrc(%d, %d), count = %d",
prcl->left, prcl->top, prcl->right, prcl->bottom,
prclDst->left, prclDst->top, prclDst->right, prclDst->bottom, pptlSrc->x, pptlSrc->y, crcl));
DISPDBG((7, "pxrxFifoUpload: psoDst: cx = %d, cy = %d, lDelta = %d, pvScan0=%P)",
psoDst->sizlBitmap.cx, psoDst->sizlBitmap.cy, psoDst->lDelta, psoDst->pvScan0));
DISPDBG((7, "pxrxFifoUpload: xyOffsetDst = (%d, %d), xyOffsetSrc = (%d, %d)",
ppdev->xyOffsetDst & 0xFFFF, ppdev->xyOffsetDst >> 16,
ppdev->xyOffsetSrc & 0xFFFF, ppdev->xyOffsetSrc >> 16));
ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat, "Dest must be same colour depth as screen");
ASSERTDD(crcl > 0, "Can't handle zero rectangles");
WAIT_PXRX_DMA_TAGS(6);
QUEUE_PXRX_DMA_TAG( __GlintTagRLEMask, 0xffffffff);
LOAD_CONFIG2D(__CONFIG2D_FBDESTREAD);
SET_READ_BUFFERS;
// enable filter mode so we can get Sync and color messages on the output FIFO
*(DWORD *)(&FilterMode) = 0;
FilterMode.Synchronization = __GLINT_FILTER_TAG;
FilterMode.Color = __GLINT_FILTER_DATA;
FilterMode.RLEHostOut = TRUE;
QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, *(DWORD*)(&FilterMode));
for(; --crcl >= 0; ++prcl)
{
DISPDBG((7, "pxrxFifoUpload: dest prcl(%xh,%xh..%xh,%xh)", prcl->left, prcl->top, prcl->right, prcl->bottom));
// calculate pixel-aligned source
xDomSrc = pptlSrc->x + prcl->left - prclDst->left;
xSubSrc = pptlSrc->x + prcl->right - prclDst->left;
yStartSrc = pptlSrc->y + prcl->top - prclDst->top;
cySrc = prcl->bottom - prcl->top;
DISPDBG((8, "pxrxFifoUpload: src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc, xSubSrc, yStartSrc + cySrc));
// will upload ulongs aligned to ulongs
if (ppdev->cPelSize == GLINTDEPTH32)
{
cxSrc = xSubSrc - xDomSrc;
culPerSrcScan = cxSrc;
leftMask = 0xFFFFFFFF;
rightMask = 0xFFFFFFFF;
}
else
{
if (ppdev->cPelSize == GLINTDEPTH16)
{
ULONG cPixFromUlongBoundary = prcl->left & 1;
xDomSrc -= cPixFromUlongBoundary;
cxSrc = xSubSrc - xDomSrc;
culPerSrcScan = (xSubSrc - xDomSrc + 1) >> 1;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 4);
rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 1) << 4);
}
else
{
ULONG cPixFromUlongBoundary = prcl->left & 3;
xDomSrc -= cPixFromUlongBoundary;
cxSrc = xSubSrc - xDomSrc;
culPerSrcScan = (xSubSrc - xDomSrc + 3) >> 2;
leftMask = 0xFFFFFFFF << (cPixFromUlongBoundary << 3);
rightMask = 0xFFFFFFFF >> (((xDomSrc - xSubSrc) & 3) << 3);
}
// We just want a single mask if the area to upload is less than one word wide.
if (culPerSrcScan == 1)
leftMask &= rightMask;
}
// uploading 64 bit aligned source
bRemPerSrcScan = culPerSrcScan & 1;
// the remainder will be encoded in the run: it's simpler just to add it in now
// then check bRemPerSrcScan during the upload
DISPDBG((8, "pxrxFifoUpload: Adding remainder into culPerSrcScan for RLE"));
culPerSrcScan += bRemPerSrcScan;
// Work out where the destination data goes to
culDstDelta = psoDst->lDelta >> 2;
pulDst = ((ULONG *)psoDst->pvScan0) + (prcl->left >> (2 - ppdev->cPelSize)) + culDstDelta * prcl->top;
DISPDBG((8, "pxrxFifoUpload: uploading aligned src (%xh,%xh..%xh,%xh)", xDomSrc, yStartSrc,
xDomSrc + cxSrc, yStartSrc + cySrc));
// Render the rectangle
WAIT_PXRX_DMA_TAGS(2);
QUEUE_PXRX_DMA_TAG( __GlintTagRectanglePosition,MAKEDWORD_XY(xDomSrc, yStartSrc));
QUEUE_PXRX_DMA_TAG( __GlintTagRender2D, __RENDER2D_OP_NORMAL | __RENDER2D_SPANS |
__RENDER2D_INCY | __RENDER2D_INCX |
__RENDER2D_WIDTH(cxSrc) | __RENDER2D_HEIGHT(cySrc));
SEND_PXRX_DMA_FORCE;
// If the start and end masks are 0xffffffff, we can just upload the words and put them
// directly into the destination. Otherwise, or the first and last word on any scanline
// we have to mask off any pixels that are outside the render area. We know the glint will
// have 0 in the undesired right hand edge pixels, as these were not in the render area. We
// dont know anything about the destination though.
if (leftMask == 0xFFFFFFFF && rightMask == 0xFFFFFFFF)
{
DISPDBG((8, "pxrxFifoUpload: no edge masks"));
while (--cySrc >= 0)
{
pulDstScan = pulDst;
pulDst += culDstDelta;
DISPDBG((9, "pxrxFifoUpload: uploading scan of %xh ulongs to %p (Remainder %xh)",
culPerSrcScan, pulDstScan, bRemPerSrcScan));
cul = culPerSrcScan;
while(cul)
{
WAIT_OUTPUT_FIFO_COUNT(2);
READ_OUTPUT_FIFO(RLECount);
READ_OUTPUT_FIFO(RLEData);
DISPDBG((10, "pxrxFifoUpload: RLECount = %xh RLEData = 08.8xh", RLECount, RLEData));
cul -= RLECount;
if(cul == 0 && bRemPerSrcScan)
{
// discard the last ulong
--RLECount;
}
while(RLECount--)
{
DISPDBG((10, "pxrxFifoUpload: written ulong"));
*pulDstScan++ = RLEData;
}
}
}
}
else if(culPerSrcScan == 1)
{
DISPDBG((8, "pxrxFifoUpload: single ulong per scan"));
while (--cySrc >= 0)
{
// the remainder has already been added into culPerSrcScan so this can't happen
DISPDBG((ERRLVL,"pxrxFifoUpload: got single ulong per scan - but we always upload 64 bit quanta!"));
pulDst += culDstDelta;
}
}
else
{
DISPDBG((8, "pxrxFifoUpload: scan with left & right edge masks: %08.8x .. %08.8x", leftMask, rightMask));
while (--cySrc >= 0)
{
pulDstScan = pulDst;
pulDst += culDstDelta;
DISPDBG((9, "pxrxFifoUpload: uploading scan of %xh ulongs to %p", culPerSrcScan, pulDstScan));
cul = culPerSrcScan;
while(cul)
{
WAIT_OUTPUT_FIFO_COUNT(2);
READ_OUTPUT_FIFO(RLECount);
READ_OUTPUT_FIFO(RLEData);
DISPDBG((10, "pxrxFifoUpload: RLECount = %xh RLEData = %08.8xh", RLECount, RLEData));
if(cul - bRemPerSrcScan == 0)
{
DISPDBG((10, "pxrxFifoUpload: discarding last ulong"));
break;
}
if(culPerSrcScan - bRemPerSrcScan == 1)
{
// one pixel per scan
DISPDBG((10, "pxrxFifoUpload: written single pixel scan"));
*pulDstScan = (*pulDstScan & ~leftMask) | (RLEData & leftMask);
cul -= RLECount;
continue;
}
if(cul == culPerSrcScan)
{
DISPDBG((10, "pxrxFifoUpload: written left edge"));
*pulDstScan++ = (*pulDstScan & ~leftMask) | (RLEData & leftMask); // first ulong
--RLECount;
--cul;
}
cul -= RLECount;
if(cul == 0)
{
// this is the last run of the scan: process the last ulong separately in order
// to apply the right edge mask
RLECount -= 1 + bRemPerSrcScan;
}
else if(cul - bRemPerSrcScan == 0)
{
// this is the penultimate run of the scan and the last one will just include the
// remainder: process the last ulong separately in order to apply the right edge mask
--RLECount;
}
while(RLECount--)
{
DISPDBG((10, "pxrxFifoUpload: written middle ulong"));
*pulDstScan++ = RLEData;
}
if(cul == 0 || cul - bRemPerSrcScan == 0)
{
DISPDBG((10, "pxrxFifoUpload: written right edge"));
*pulDstScan = (*pulDstScan & ~rightMask) | (RLEData & rightMask); // last ulong
#if DBG
if(cul - bRemPerSrcScan == 0)
{
DISPDBG((10, "pxrxFifoUpload: discarding last ulong"));
}
#endif
}
}
}
}
}
#if DBG
cul = 0xaa55aa55;
DISPDBG((8, "pxrxFifoUpload: waiting for sync (id = %08.8xh)", cul));
WAIT_PXRX_DMA_TAGS(1);
QUEUE_PXRX_DMA_TAG(__GlintTagSync, cul);
SEND_PXRX_DMA_FORCE;
do
{
WAIT_OUTPUT_FIFO_READY;
READ_OUTPUT_FIFO(ul);
DISPDBG((8, "pxrxFifoUpload: read %08.8xh from output FIFO", ul));
if(ul != __GlintTagSync)
{
DISPDBG((ERRLVL,"pxrxFifoUpload: didn't read back sync!"));
}
}
while(ul != __GlintTagSync);
DISPDBG((8, "pxrxFifoUpload: got sync"));
#endif
// no need to initiate DMA with this tag - it will get flushed with the next primitive and
// meanwhile will not affect local memory
WAIT_PXRX_DMA_TAGS(1);
QUEUE_PXRX_DMA_TAG(__GlintTagFilterMode, 0);
SEND_PXRX_DMA_BATCH;
GLINT_CORE_IDLE;
DISPDBG((7, "pxrxFifoUpload: done"));
}
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownload32bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
//****************************************************************************
VOID vGlintCopyBltBypassDownload32bpp(
PDEV *ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prclClip,
LONG crclClip)
{
LONG xOff;
ULONG *pulSrcScan0;
LONG culSrcDelta, xSrcOff, ySrcOff;
ULONG *pulDstScan0;
LONG culDstDelta, xDstOff;
LONG cScans, cPixPerScan, c;
ULONG cjSrcDeltaRem, cjDstDeltaRem;
ULONG *pulSrc;
ULONG *pulDst;
ULONG tmp0, tmp1, tmp2;
GLINT_DECL;
#if DBG && 0
{
SIZEL sizlDst;
sizlDst.cx = prclClip->right - prclClip->left;
sizlDst.cy = prclClip->bottom - prclClip->top;
DISPDBG((-1, "vGlintCopyBltBypassDownload32bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
}
#endif //DBG
pulSrcScan0 = (ULONG *)psoSrc->pvScan0;
culSrcDelta = psoSrc->lDelta >> 2;
xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen;
culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
(ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip)
{
cScans = prclClip->bottom - prclClip->top;
cPixPerScan = prclClip->right - prclClip->left;
cjSrcDeltaRem = (culSrcDelta - cPixPerScan) * 4;
cjDstDeltaRem = (culDstDelta - cPixPerScan) * 4;
// calc source & destination address, -1 to allow for prefix-increment
pulSrc = -1 + pulSrcScan0 + xSrcOff + prclClip->left
+ ((prclClip->top + ySrcOff) * culSrcDelta);
pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left
+ prclClip->top * culDstDelta;
for (; --cScans >= 0; (BYTE *)pulSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
{
#if defined(_X86_)
__asm
{
mov edi, pulDst
mov ecx, cPixPerScan
mov esi, pulSrc
shr ecx, 2
push ebp
test ecx, ecx
jle EndOfLine
LoopFours:
mov eax, [esi+4]
mov ebx, [esi+8]
mov edx, [esi+12]
mov ebp, [esi+16]
add esi, 16
mov [edi+4], eax
mov [edi+8], ebx
add edi, 16
mov [edi-4], edx
dec ecx
mov [edi], ebp
jne LoopFours
EndOfLine:
pop ebp
mov pulSrc, esi
mov pulDst, edi
}
// do the remaining 0, 1, 2 or 3 pixels on this line
switch (cPixPerScan & 3)
{
case 3:
tmp0 = *++pulSrc;
tmp1 = *++pulSrc;
tmp2 = *++pulSrc;
*++pulDst = tmp0;
*++pulDst = tmp1;
*++pulDst = tmp2;
break;
case 2:
tmp0 = *++pulSrc;
tmp1 = *++pulSrc;
*++pulDst = tmp0;
*++pulDst = tmp1;
break;
case 1:
tmp0 = *++pulSrc;
*++pulDst = tmp0;
}
#else
for(c = cPixPerScan; --c >= 0;)
{
*++pulDst = *++pulSrc;
}
#endif
}
}
}
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownload24bppTo32bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
//****************************************************************************
VOID vGlintCopyBltBypassDownload24bppTo32bpp(
PDEV *ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prclClip,
LONG crclClip)
{
LONG xOff;
BYTE *pjSrcScan0;
LONG cjSrcDelta;
LONG xSrcOff, ySrcOff;
ULONG *pulDstScan0;
LONG culDstDelta, xDstOff;
LONG cScans, cPixPerScan, c;
BYTE *pjSrc;
BYTE *pj;
ULONG *pulDst, *puld;
GLINT_DECL;
#if DBG && 0
{
SIZEL sizlDst;
sizlDst.cx = prclClip->right - prclClip->left;
sizlDst.cy = prclClip->bottom - prclClip->top;
DISPDBG((-1, "vGlintCopyBltBypassDownload24bppTo32bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
}
#endif //DBG
pjSrcScan0 = (BYTE *)psoSrc->pvScan0;
cjSrcDelta = psoSrc->lDelta;
xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen;
culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
(ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip)
{
cScans = prclClip->bottom - prclClip->top;
cPixPerScan = prclClip->right - prclClip->left;
// calc source & destination address, -1 to allow for prefix-increment
// convert x values to 24bpp coords (but avoid multiplication by 3)
c = xSrcOff + prclClip->left;
c = c + (c << 1);
pjSrc = pjSrcScan0 + c + ((prclClip->top + ySrcOff) * cjSrcDelta);
pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left
+ prclClip->top * culDstDelta;
for (; --cScans >= 0; pjSrc += cjSrcDelta, pulDst += culDstDelta)
{
// read one less pixel per scan than there actually is to avoid any possibility of
// a memory access violation (we read 4 bytes but only 3 of them might be valid)
for (pj = pjSrc, puld = pulDst, c = cPixPerScan-1; --c >= 0; pj += 3)
{
*++puld = *(ULONG *)pj & 0x00ffffff;
}
// now do the last pixel
++puld;
*(USHORT *)puld = *(USHORT *)pj;
((BYTE *)puld)[2] = ((BYTE *)pj)[2];
}
}
}
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownload16bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
//****************************************************************************
VOID vGlintCopyBltBypassDownload16bpp(
PDEV *ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prclClip,
LONG crclClip)
{
LONG xOff;
ULONG *pulSrcScan0;
LONG culSrcDelta, xSrcOff, ySrcOff;
ULONG *pulDstScan0;
LONG culDstDelta, xDstOff;
LONG cScans, cPixPerScan;
ULONG *pulSrc;
ULONG *pulDst;
GLINT_DECL;
#if DBG && 0
{
SIZEL sizlDst;
sizlDst.cx = prclClip->right - prclClip->left;
sizlDst.cy = prclClip->bottom - prclClip->top;
DISPDBG((-1, "vGlintCopyBltBypassDownload16bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
}
#endif //DBG
pulSrcScan0 = (ULONG *)psoSrc->pvScan0;
culSrcDelta = psoSrc->lDelta >> 2;
xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen;
culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
(ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip)
{
cScans = prclClip->bottom - prclClip->top;
cPixPerScan = prclClip->right - prclClip->left;
pulSrc = (ULONG *)((USHORT *)pulSrcScan0 + xSrcOff + prclClip->left)
+ ((prclClip->top + ySrcOff) * culSrcDelta);
pulDst = (ULONG *)((USHORT *)pulDstScan0 + xDstOff + prclClip->left)
+ prclClip->top * culDstDelta;
for (; --cScans >= 0; pulSrc += culSrcDelta, pulDst += culDstDelta)
{
ULONG *pulSrcScan = pulSrc;
ULONG *pulDstScan = pulDst;
LONG cPix = cPixPerScan;
LONG cWords;
if ((UINT_PTR)pulDstScan % sizeof(ULONG))
{
// we're not on a ulong boundary so write the first pixel of the scanline
*(USHORT *)pulDstScan = *(USHORT *)pulSrcScan;
pulDstScan = (ULONG *)((USHORT *)pulDstScan + 1);
pulSrcScan = (ULONG *)((USHORT *)pulSrcScan + 1);
--cPix;
}
// write out the ulong-aligned words of the scanline
for (cWords = cPix / 2; --cWords >= 0;)
{
*pulDstScan++ = *pulSrcScan++;
}
// write any remaining pixel
if (cPix % 2)
{
*(USHORT *)pulDstScan = *(USHORT *)pulSrcScan;
}
}
}
}
//****************************************************************************
// FUNC: vGlintCopyBltBypassDownloadXlate4bpp
// DESC: using the bypass mechanism we can take advantage of write-combining
// which can be quicker than using the FIFO
// NB. supports 32bpp and 16bpp destinations. Doesn't yet support 24bpp
// destinations. No plans to add 8bpp support.
//****************************************************************************
VOID vGlintCopyBltBypassDownloadXlate4bpp(
PDEV *ppdev,
SURFOBJ *psoSrc,
POINTL *pptlSrc,
RECTL *prclDst,
RECTL *prclClip,
LONG crclClip,
XLATEOBJ *pxlo)
{
LONG xOff;
BYTE *pjSrcScan0;
LONG cjSrcDelta, xSrcOff, ySrcOff;
ULONG *pulDstScan0;
LONG culDstDelta, xDstOff;
LONG cScans, cPixPerScan, c;
ULONG cjSrcDeltaRem, cjDstDeltaRem;
ULONG *aulXlate;
BOOL bSrcLowNybble;
BYTE *pjSrc, j, *pj;
GLINT_DECL;
#if DBG && 0
{
SIZEL sizlDst;
sizlDst.cx = prclClip->right - prclClip->left;
sizlDst.cy = prclClip->bottom - prclClip->top;
DISPDBG((-1, "vGlintCopyBltBypassDownloadXlate4bpp(): cRects(%d) sizlDst(%d,%d)", crclClip, sizlDst.cx, sizlDst.cy));
}
#endif //DBG
pjSrcScan0 = (BYTE *)psoSrc->pvScan0;
cjSrcDelta = psoSrc->lDelta;
xSrcOff = pptlSrc->x - prclDst->left; // need to add arclClip[n].left to get xSrc
ySrcOff = pptlSrc->y - prclDst->top; // need to add arclClip[n].top to get ySrc
pulDstScan0 = (ULONG *)ppdev->pjScreen;
culDstDelta = ppdev->DstPixelDelta >> (2 - ppdev->cPelSize);
xDstOff = ppdev->DstPixelOrigin + (ppdev->xyOffsetDst & 0xffff) +
(ppdev->xyOffsetDst >> 16) * ppdev->DstPixelDelta;
aulXlate = pxlo->pulXlate;
SYNC_IF_CORE_BUSY;
for (; --crclClip >= 0; ++prclClip)
{
cScans = prclClip->bottom - prclClip->top;
cPixPerScan = prclClip->right - prclClip->left;
bSrcLowNybble = (xSrcOff + prclClip->left) & 1;
cjSrcDeltaRem = cjSrcDelta - (cPixPerScan / 2 + ((cPixPerScan & 1) || bSrcLowNybble));
pjSrc = -1 + pjSrcScan0 + (xSrcOff + prclClip->left) / 2
+ ((prclClip->top + ySrcOff) * cjSrcDelta);
if (ppdev->cPelSize == GLINTDEPTH32)
{
ULONG *pulDst;
cjDstDeltaRem = (culDstDelta - cPixPerScan) * 4;
pulDst = -1 + pulDstScan0 + xDstOff + prclClip->left + prclClip->top * culDstDelta;
if (bSrcLowNybble)
{
for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
{
j = *++pjSrc;
for (c = cPixPerScan / 2; --c >= 0;)
{
*++pulDst = aulXlate[j & 0xf];
j = *++pjSrc;
*++pulDst = aulXlate[j >> 4];
}
if (cPixPerScan & 1)
{
*++pulDst = aulXlate[j & 0xf];
}
}
}
else
{
for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pulDst += cjDstDeltaRem)
{
for (c = cPixPerScan / 2; --c >= 0;)
{
j = *++pjSrc;
*++pulDst = aulXlate[j >> 4];
*++pulDst = aulXlate[j & 0xf];
}
if (cPixPerScan & 1)
{
j = *++pjSrc;
*++pulDst = aulXlate[j >> 4];
}
}
}
}
else if (ppdev->cPelSize == GLINTDEPTH16)
{
USHORT *pusDst;
cjDstDeltaRem = (culDstDelta << 2) - (cPixPerScan << ppdev->cPelSize);
pusDst = -1 + (USHORT *)pulDstScan0 + xDstOff + prclClip->left
+ prclClip->top * culDstDelta * 2;
if (bSrcLowNybble)
{
for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem)
{
j = *++pjSrc;
for (c = cPixPerScan / 2; --c >= 0;)
{
*++pusDst = (USHORT)aulXlate[j & 0xf];
j = *++pjSrc;
*++pusDst = (USHORT)aulXlate[j >> 4];
}
if (cPixPerScan & 1)
{
*++pusDst = (USHORT)aulXlate[j & 0xf];
}
}
}
else
{
for (; --cScans >= 0; pjSrc += cjSrcDeltaRem, (BYTE *)pusDst += cjDstDeltaRem)
{
for (c = cPixPerScan / 2; --c >= 0;)
{
j = *++pjSrc;
*++pusDst = (USHORT)aulXlate[j >> 4];
*++pusDst = (USHORT)aulXlate[j & 0xf];
}
if (cPixPerScan & 1)
{
j = *++pjSrc;
*++pusDst = (USHORT)aulXlate[j >> 4];
}
}
}
}
}
}
#endif
//@@END_DDKSPLIT