Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1051 lines
33 KiB

/******************************Module*Header*******************************\
* Module Name: bltio.c
*
* Contains the low-level MM blt functions.
*
* Hopefully, if you're basing your display driver on this code, to
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
* the following routines. You shouldn't have to modify much in
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
* and efficient as I could, while still accelerating as many calls as
* possible that would be cost-effective in terms of performance wins
* versus size and effort.
*
* Note: In the following, 'relative' coordinates refers to coordinates
* that haven't yet had the offscreen bitmap (DFB) offset applied.
* 'Absolute' coordinates have had the offset applied. For example,
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
* be sitting in offscreen memory starting at coordinate (0, 768) --
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
* would be the 'absolute' start coordinate'.
*
* Copyright (c) 1992-1995 Microsoft Corporation
*
\**************************************************************************/
#include "precomp.h"
/**************************************************************************
* VOID vMmFastPatRealize
*
* Realizes a pattern into offscreen memory.
*
**************************************************************************/
VOID vMmFastPatRealize(
PDEV* ppdev,
RBRUSH* prb) // Points to brush realization structure
{
BRUSHENTRY* pbe;
LONG iBrushCache;
ULONG ulOffset;
BYTE* pjPattern;
LONG cjPattern;
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
LONG lDeltaPat;
LONG lDeltaSrc;
LONG xCnt;
LONG yCnt;
ULONG ulDst;
DISPDBG((10,"vFastPatRealize called"));
pbe = prb->pbe;
if ((pbe == NULL) || (pbe->prbVerify != prb))
{
// We have to allocate a new offscreen cache brush entry for
// the brush:
iBrushCache = ppdev->iBrushCache;
pbe = &ppdev->abe[iBrushCache];
iBrushCache++;
if (iBrushCache >= ppdev->cBrushCache)
iBrushCache = 0;
ppdev->iBrushCache = iBrushCache;
// Update our links:
pbe->prbVerify = prb;
prb->pbe = pbe;
}
//
// Download brush into cache
//
pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
cjPattern = PELS_TO_BYTES(TOTAL_BRUSH_SIZE);
lDeltaPat = PELS_TO_BYTES(8);
xCnt = PELS_TO_BYTES(8);
yCnt = 8;
if (ppdev->cBitsPerPixel == 24)
{
lDeltaSrc = 32; // same as PELS_TO_BYTES(8) for 32bpp
}
else
{
lDeltaSrc = lDeltaPat; // PELS_TO_BYTES(8)
}
ulDst = (pbe->y * ppdev->lDelta) + PELS_TO_BYTES(pbe->x);
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, (lDeltaSrc * 2));
CP_MM_XCNT(ppdev, pjBase, (xCnt - 1));
CP_MM_YCNT(ppdev, pjBase, (yCnt - 1));
CP_MM_BLT_MODE(ppdev, pjBase, SRC_CPU_DATA);
CP_MM_ROP(ppdev, pjBase, CL_SRC_COPY);
CP_MM_DST_ADDR_ABS(ppdev, pjBase, ulDst);
CP_MM_START_BLT(ppdev, pjBase);
vImageTransfer(ppdev, pjPattern, lDeltaPat, xCnt, yCnt);
//
// Duplicate brush horizontally
//
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_XCNT(ppdev, pjBase, (xCnt - 1));
CP_MM_YCNT(ppdev, pjBase, (yCnt - 1));
CP_MM_BLT_MODE(ppdev, pjBase, 0);
CP_MM_SRC_Y_OFFSET(ppdev, pjBase, (lDeltaSrc * 2));
CP_MM_SRC_ADDR(ppdev, pjBase, ulDst);
CP_MM_DST_ADDR_ABS(ppdev, pjBase, (ulDst + lDeltaPat));
CP_MM_START_BLT(ppdev, pjBase);
//
// Duplicate brush vertically
//
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_SRC_Y_OFFSET(ppdev, pjBase, (lDeltaSrc * 2));
CP_MM_DST_Y_OFFSET(ppdev, pjBase, (lDeltaSrc * 2));
CP_MM_BLT_MODE(ppdev, pjBase, 0);
CP_MM_XCNT(ppdev, pjBase, ((lDeltaSrc * 2) - 1));
CP_MM_YCNT(ppdev, pjBase, (yCnt - 1));
CP_MM_SRC_ADDR(ppdev, pjBase, ulDst);
if (ppdev->cBitsPerPixel == 24)
{
CP_MM_DST_ADDR_ABS(ppdev, pjBase, (ulDst + 512)); // 128 * 4
}
else
{
CP_MM_DST_ADDR_ABS(ppdev, pjBase, (ulDst + PELS_TO_BYTES(128)));
}
CP_MM_START_BLT(ppdev, pjBase);
#if 0
{
////////////////////////////////////////////////////////////////
// DEBUG TILED PATTERNS
//
// The following code helps to debug patterns if you break the
// realization code. It copies the 2x2 tiled copy of the brush
// to the visible screen.
//
POINTL ptl;
RECTL rcl;
ptl.x = pbe->x;
ptl.y = pbe->y;
rcl.left = 10;
rcl.right = 10 + 16;
rcl.top = ppdev->cyScreen - 10 - 16;
rcl.bottom = ppdev->cyScreen - 10;
{
LONG lDelta = ppdev->lDelta;
BYTE jHwRop;
BYTE jMode;
//
// Make sure we can write to the video registers.
//
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_ROP(ppdev, pjBase, CL_SRC_COPY);
CP_MM_SRC_Y_OFFSET(ppdev, pjBase, PELS_TO_BYTES(16));
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
{
//
// Top to Bottom - Left to Right
//
jMode |= DIR_TBLR;
CP_MM_BLT_MODE(ppdev, pjBase, ppdev->jModeColor);
{
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_XCNT(ppdev, pjBase, (PELS_TO_BYTES(rcl.right - rcl.left) - 1));
CP_MM_YCNT(ppdev, pjBase, (rcl.bottom - rcl.top - 1));
CP_MM_SRC_ADDR(ppdev, pjBase, (0 + ((ptl.y) * lDelta) + PELS_TO_BYTES(ptl.x)));
CP_MM_DST_ADDR_ABS(ppdev, pjBase, ((rcl.top * lDelta) + PELS_TO_BYTES(rcl.left)));
CP_MM_START_BLT(ppdev, pjBase);
}
}
}
}
#endif
}
/**************************************************************************
* VOID vMmFillPat
*
* This routine uses the pattern hardware to draw a patterned list of
* rectangles.
*
**************************************************************************/
VOID vMmFillPat(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) //
{
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
ULONG ulAlignedPatternOffset = ppdev->ulAlignedPatternOffset;
ULONG ulPatternAddrBase;
BYTE jHwRop;
BYTE jMode;
BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
// for keeping track of the location and status
// of the pattern bits cached in off-screen
// memory
DISPDBG((10,"vFillPat called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(ppdev->cBpp < 4, "vFillPat only works at 8bpp, 16bpp, and 24bpp");
if ((rbc.prb->pbe == NULL) ||
(rbc.prb->pbe->prbVerify != rbc.prb))
{
vMmFastPatRealize(ppdev, rbc.prb);
DISPDBG((5, " -- Brush cache miss, put it at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
}
else
{
DISPDBG((5, " -- Brush cache hit on brush at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
}
pbe = rbc.prb->pbe;
//
// Fill the list of rectangles
//
ulPatternAddrBase = pbe->xy;
jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
jMode = ppdev->jModeColor | ENABLE_8x8_PATTERN_COPY;
do {
ULONG offset = 0;
ULONG XOffset, YOffset;
YOffset = ((prcl->top - pptlBrush->y) & 7) << 4;
XOffset = (prcl->left - pptlBrush->x) & 7;
// align the pattern to a new location
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_BLT_MODE(ppdev, pjBase, 0);
CP_MM_ROP(ppdev, pjBase, CL_SRC_COPY);
if (ppdev->cBitsPerPixel == 24)
{
offset = (YOffset * 4) + (XOffset * 3);
CP_MM_SRC_Y_OFFSET(ppdev, pjBase, 64);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, 32);
}
else
{
offset = PELS_TO_BYTES(YOffset + XOffset);
CP_MM_SRC_Y_OFFSET(ppdev, pjBase, PELS_TO_BYTES(16));
CP_MM_DST_Y_OFFSET(ppdev, pjBase, PELS_TO_BYTES(8));
}
CP_MM_SRC_ADDR(ppdev, pjBase, (ulPatternAddrBase + offset));
CP_MM_XCNT(ppdev, pjBase, (PELS_TO_BYTES(8) - 1));
CP_MM_YCNT(ppdev, pjBase, (8 - 1));
CP_MM_DST_ADDR_ABS(ppdev, pjBase, ulAlignedPatternOffset);
CP_MM_START_BLT(ppdev, pjBase);
// fill using aligned pattern
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_BLT_MODE(ppdev, pjBase, jMode);
CP_MM_ROP(ppdev, pjBase, jHwRop);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
CP_MM_SRC_ADDR(ppdev, pjBase, ulAlignedPatternOffset);
CP_MM_XCNT(ppdev, pjBase, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_MM_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_MM_DST_ADDR(ppdev, pjBase, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
CP_MM_START_BLT(ppdev, pjBase);
prcl++;
} while (--c != 0);
}
/**************************************************************************
* VOID vMmFillSolid
*
* Does a solid fill to a list of rectangles.
*
**************************************************************************/
VOID vMmFillSolid(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
LONG cBpp = ppdev->cBpp;
ULONG ulSolidColor;
BYTE jHwRop;
DISPDBG((10,"vFillSolid called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
ulSolidColor = rbc.iSolidColor;
if (cBpp == 1)
{
ulSolidColor |= ulSolidColor << 8;
ulSolidColor |= ulSolidColor << 16;
}
else if (cBpp == 2)
{
ulSolidColor |= ulSolidColor << 16;
}
jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
//
// Make sure we can write to the video registers.
//
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_ROP(ppdev, pjBase, jHwRop);
CP_MM_SRC_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
CP_MM_BLT_MODE(ppdev, pjBase, ENABLE_COLOR_EXPAND |
ENABLE_8x8_PATTERN_COPY |
ppdev->jModeColor);
CP_MM_FG_COLOR(ppdev, pjBase, ulSolidColor);
if (ppdev->flCaps & CAPS_IS_5436)
{
CP_MM_BLT_EXT_MODE(ppdev, pjBase, ENABLE_SOLID_FILL);
}
//
// Fill the list of rectangles
//
while (TRUE)
{
CP_MM_XCNT(ppdev, pjBase, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_MM_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_MM_DST_ADDR(ppdev, pjBase, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
CP_MM_START_BLT(ppdev, pjBase);
if (--c == 0)
return;
prcl++;
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
}
}
/**************************************************************************
* VOID vMmCopyBlt
*
* Does a screen-to-screen blt of a list of rectangles.
*
**************************************************************************/
VOID vMmCopyBlt(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ROP4 rop4, // Obvious?
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{
LONG dx;
LONG dy; // Add delta to destination to get source
LONG xyOffset = ppdev->xyOffset;
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
BYTE jHwRop;
DISPDBG((10,"vCopyBlt called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
//
// The src-dst delta will be the same for all rectangles
//
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
//
// Make sure we can write to the video registers.
//
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
jHwRop = gajHwMixFromRop2[rop4 & 0xf];
CP_MM_ROP(ppdev, pjBase, jHwRop);
CP_MM_SRC_Y_OFFSET(ppdev, pjBase, lDelta);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
//
// The accelerator may not be as fast at doing right-to-left copies, so
// only do them when the rectangles truly overlap:
//
if (!OVERLAP(prclDst, pptlSrc) ||
(prclDst->top < pptlSrc->y) ||
((prclDst->top == pptlSrc->y) && (prclDst->left <= pptlSrc->x))
)
{
//
// Top to Bottom - Left to Right
//
DISPDBG((12,"Top to Bottom - Left to Right"));
CP_MM_BLT_MODE(ppdev, pjBase, DIR_TBLR);
while (TRUE)
{
CP_MM_XCNT(ppdev, pjBase, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_MM_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_MM_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + PELS_TO_BYTES(prcl->left + dx)));
CP_MM_DST_ADDR(ppdev, pjBase, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
CP_MM_START_BLT(ppdev, pjBase);
if (--c == 0)
return;
prcl++;
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
}
}
else
{
//
// Bottom to Top - Right to Left
//
DISPDBG((12,"Bottom to Top - Right to Left"));
CP_MM_BLT_MODE(ppdev, pjBase, DIR_BTRL);
while (TRUE)
{
CP_MM_XCNT(ppdev, pjBase, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_MM_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_MM_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + PELS_TO_BYTES(prcl->right + dx) - 1));
CP_MM_DST_ADDR(ppdev, pjBase, (((prcl->bottom - 1) * lDelta) + PELS_TO_BYTES(prcl->right) - 1));
CP_MM_START_BLT(ppdev, pjBase);
if (--c == 0)
return;
prcl++;
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
}
}
}
/******************************Public*Routine******************************\
* VOID vMmXfer1bpp
*
* Low-level routine used to transfer monochrome data to the screen using
* DWORD writes to the blt engine.
*
* This can handle opaque or transparent expansions. It does opaque
* expansions by drawing the opaque rectangle first and then transparently
* expands the foreground bits.
*
\**************************************************************************/
VOID vMmXfer1bpp(
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // Actually had better be a rop3
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides color-expansion information
{
ULONG *pulXfer;
ULONG *pul;
LONG ix;
LONG iy;
LONG cxWidthInBytes;
LONG cxWidthInDwords;
BYTE* pjBits;
POINTL ptlDst;
POINTL ptlSrc;
SIZEL sizlDst;
LONG cxLeftMask;
LONG cxRightMask;
ULONG ulDstAddr;
INT nDwords;
ULONG ulLeftMask;
ULONG ulRightMask;
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
LONG lDeltaSrc = psoSrc->lDelta;
LONG cBpp = ppdev->cBpp;
ULONG ulFgColor = pxlo->pulXlate[1];
ULONG ulBgColor = pxlo->pulXlate[0];
ULONG ulSolidColor;
LONG dx; // Add delta to estination to get source
LONG dy; // Add delta to destination to get source
// Since the hardware clipping on some of the Cirrus chips is broken, we
// do the clipping by rounding out the edges to dword boundaries and then
// doing the blt transparently. In the event that we want the expansion
// to be opaque, we do the opaquing blt in advance. One side effect of
// this is that the destination bits are no longer valid for processing
// the rop. This could probably be optimized by doing the edges seperately
// and then doing the middle section in one pass. However, this is
// complicated by a 5434 bug that breaks blts less than 10 pixels wide.
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) == 0xcc00), "Expected foreground rop of 0xcc");
//
// The src-dst delta will be the same for all rectangles
//
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
if (cBpp == 1)
{
ulFgColor = (ulFgColor << 8) | (ulFgColor & 0xff);
ulBgColor = (ulBgColor << 8) | (ulBgColor & 0xff);
ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
}
else if (cBpp == 2)
{
ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
}
pulXfer = ppdev->pulXfer;
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
if (rop4 != 0xCCAA)
{
LONG lCnt = c;
RECTL* prclTmp = prcl;
BYTE jHwBgRop = gajHwMixFromRop2[rop4 & 0xf];
CP_MM_ROP(ppdev, pjBase, jHwBgRop);
CP_MM_FG_COLOR(ppdev, pjBase, ulBgColor);
CP_MM_SRC_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
CP_MM_BLT_MODE(ppdev, pjBase, ppdev->jModeColor |
ENABLE_COLOR_EXPAND |
ENABLE_8x8_PATTERN_COPY);
do
{
// calculate the size of the blt
ptlDst.x = prclTmp->left;
ptlDst.y = prclTmp->top;
sizlDst.cx = prclTmp->right - ptlDst.x;
sizlDst.cy = prclTmp->bottom - ptlDst.y;
//
// Fill the background rectangle with the background color
//
// Set the dest addresses
ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_XCNT(ppdev, pjBase, PELS_TO_BYTES(sizlDst.cx) - 1);
CP_MM_YCNT(ppdev, pjBase, sizlDst.cy - 1);
CP_MM_DST_ADDR(ppdev, pjBase, ulDstAddr);
// Start the blt operation
CP_MM_START_BLT(ppdev, pjBase);
prclTmp++;
} while (--lCnt != 0);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
}
CP_MM_FG_COLOR(ppdev, pjBase, ulFgColor);
CP_MM_BG_COLOR(ppdev, pjBase, ~ulFgColor);
CP_IO_XPAR_COLOR(ppdev, pjBase, ~ulFgColor);
CP_MM_ROP(ppdev, pjBase, CL_SRC_COPY);
CP_MM_BLT_MODE(ppdev, pjBase, ppdev->jModeColor |
ENABLE_COLOR_EXPAND |
ENABLE_TRANSPARENCY_COMPARE |
SRC_CPU_DATA);
do
{
// calculate the size of the blt
ptlDst.x = prcl->left;
ptlDst.y = prcl->top;
sizlDst.cx = prcl->right - ptlDst.x;
sizlDst.cy = prcl->bottom - ptlDst.y;
// calculate the number of dwords per scan line
ptlSrc.x = prcl->left + dx;
ptlSrc.y = prcl->top + dy;
// Floor the source.
// Extend the width by the amount required to floor to a dword boundary.
// Set the size of the left mask.
// Floor the dest, so it aligns with the floored source.
if ((cxLeftMask = (ptlSrc.x & 31)))
{
sizlDst.cx += cxLeftMask;
ptlSrc.x &= ~31;
ptlDst.x -= cxLeftMask;
}
ulLeftMask = gaulLeftClipMask[cxLeftMask];
// Ceil the cx to a dword boundary.
if (cxRightMask = (sizlDst.cx & 31))
{
cxRightMask = 32 - cxRightMask;
sizlDst.cx = (sizlDst.cx + 31) & ~31;
}
ulRightMask = gaulRightClipMask[cxRightMask];
if (sizlDst.cx == 32)
{
ulLeftMask &= ulRightMask;
ulRightMask = 0;
}
// Note: At this point sizlDst.cx is the width of the blt in pixels,
// floored to a dword boundary, and ceiled to a dword boundary.
// Calculate the width in Bytes
cxWidthInBytes = sizlDst.cx >> 3;
// Calculate the number of Dwords and any remaining bytes
nDwords = cxWidthInBytes >> 2;
ASSERTDD(((cxWidthInBytes & 0x03) == 0),
"cxWidthInBytes is not a DWORD multiple");
// Calculate the address of the source bitmap
// This is to a byte boundary.
pjBits = (PBYTE) psoSrc->pvScan0;
pjBits += ptlSrc.y * lDeltaSrc;
pjBits += ptlSrc.x >> 3;
ASSERTDD((((ULONG)pjBits & 0x03) == 0),
"pjBits not DWORD aligned like it should be");
//
// Blt the 1 bpp bitmap
//
ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_XCNT(ppdev, pjBase, PELS_TO_BYTES(sizlDst.cx) - 1);
CP_MM_YCNT(ppdev, pjBase, sizlDst.cy - 1);
//
// The 542x chips require a write to the Src Address Register when
// doing a host transfer with color expansion. The value is
// irrelevant, but the write is crucial. This is documented in
// the manual, not the errata. Go figure.
//
CP_MM_SRC_ADDR(ppdev, pjBase, 0);
CP_MM_DST_ADDR(ppdev, pjBase, ulDstAddr);
CP_MM_START_BLT(ppdev, pjBase);
//
// Transfer the host bitmap.
//
if (ulRightMask)
{
//
// Blt is > 1 DWORD wide (nDwords > 1)
//
for (iy = 0; iy < sizlDst.cy; iy++)
{
pul = (ULONG*) pjBits;
//*pulXfer++ = *(((ULONG*)pul)++) & ulLeftMask;
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulLeftMask));
pul++;
pulXfer++;
for (ix = 0; ix < (nDwords-2); ix++)
{
//*pulXfer++ = *(((ULONG*)pul)++);
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul)));
pul++;
pulXfer++;
}
//*pulXfer++ = *(((ULONG*)pul)++) & ulRightMask;
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulRightMask));
pul++;
pulXfer++;
pjBits += lDeltaSrc;
pulXfer = ppdev->pulXfer;
CP_MEMORY_BARRIER(); // Flush memory cache when we reset the address
}
}
else
{
//
// Blt is 1 DWORD wide (nDwords == 1)
//
for (iy = 0; iy < sizlDst.cy; iy++)
{
//*pulXfer = *((ULONG*)pjBits) & ulLeftMask;
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pjBits) & ulLeftMask));
pjBits += lDeltaSrc;
CP_MEMORY_BARRIER(); // Flush memory cache
}
}
prcl++;
} while (--c != 0);
}
/******************************Public*Routine******************************\
* VOID vMmXfer4bpp
*
* Does a 4bpp transfer from a bitmap to the screen.
*
* NOTE: The screen must be 8bpp for this function to be called!
*
* The reason we implement this is that a lot of resources are kept as 4bpp,
* and used to initialize DFBs, some of which we of course keep off-screen.
*
\**************************************************************************/
// XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use
// for doing the translate. Note that in general stack buffers should
// be kept as small as possible. The OS guarantees us only 8k for stack
// from GDI down to the display driver in low memory situations; if we
// ask for more, we'll access violate. Note also that at any time the
// stack buffer cannot be larger than a page (4k) -- otherwise we may
// miss touching the 'guard page' and access violate then too.
#define XLATE_BUFFER_SIZE 256
VOID vMmXfer4bpp(
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{
ULONG* pulXfer = ppdev->pulXfer;
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
ULONG ulDstAddr;
LONG dx;
LONG dy;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
BYTE* pjScan;
BYTE* pjSrc;
BYTE* pjDst;
LONG cxThis;
LONG cxToGo;
LONG xSrc;
LONG iLoop;
BYTE jSrc;
ULONG* pulXlate;
LONG cdwThis;
BYTE* pjBuf;
BYTE ajBuf[XLATE_BUFFER_SIZE];
ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp");
ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
DISPDBG((5, "vXfer4bpp: entry"));
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
CP_MM_ROP(ppdev, pjBase, gajHwMixFromRop2[rop4 & 0xf]);
CP_MM_BLT_MODE(ppdev, pjBase, SRC_CPU_DATA);
while(TRUE)
{
ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_XCNT(ppdev, pjBase, PELS_TO_BYTES(cx) - 1);
CP_MM_YCNT(ppdev, pjBase, cy - 1);
CP_MM_DST_ADDR(ppdev, pjBase, ulDstAddr);
pulXlate = pxlo->pulXlate;
xSrc = prcl->left + dx;
pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
CP_MM_START_BLT(ppdev, pjBase);
do {
pjSrc = pjScan;
cxToGo = cx; // # of pels per scan in 4bpp source
do {
cxThis = XLATE_BUFFER_SIZE;
// We can handle XLATE_BUFFER_SIZE number
// of pels in this xlate batch
cxToGo -= cxThis; // cxThis will be the actual number of
// pels we'll do in this xlate batch
if (cxToGo < 0)
cxThis += cxToGo;
pjDst = ajBuf; // Points to our temporary batch buffer
// We handle alignment ourselves because it's easy to
// do, rather than pay the cost of setting/resetting
// the scissors register:
if (xSrc & 1)
{
// When unaligned, we have to be careful not to read
// past the end of the 4bpp bitmap (that could
// potentially cause us to access violate):
iLoop = cxThis >> 1; // Each loop handles 2 pels;
// we'll handle odd pel
// separately
jSrc = *pjSrc;
while (iLoop-- != 0)
{
*pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
jSrc = *(++pjSrc);
*pjDst++ = (BYTE) pulXlate[jSrc >> 4];
}
if (cxThis & 1)
*pjDst = (BYTE) pulXlate[jSrc & 0xf];
}
else
{
iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels
do {
jSrc = *pjSrc++;
*pjDst++ = (BYTE) pulXlate[jSrc >> 4];
*pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
} while (--iLoop != 0);
}
// The number of bytes we'll transfer is equal to the number
// of pels we've processed in the batch. Since we're
// transferring words, we have to round up to get the word
// count:
cdwThis = (cxThis + 3) >> 2;
pjBuf = ajBuf;
TRANSFER_DWORD_ALIGNED(ppdev, pulXfer, pjBuf, cdwThis);
} while (cxToGo > 0);
pjScan += lSrcDelta; // Advance to next source scan. Note
// that we could have computed the
// value to advance 'pjSrc' directly,
// but this method is less
// error-prone.
} while (--cy != 0);
if (--c == 0)
return;
prcl++;
}
}
/******************************Public*Routine******************************\
* VOID vMmXferNative
*
* Transfers a bitmap that is the same color depth as the display to
* the screen via the data transfer register, with no translation.
*
\**************************************************************************/
VOID vMmXferNative(
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{
ULONG* pulXfer = ppdev->pulXfer;
BYTE* pjBase = ppdev->pjBase;
LONG lDelta = ppdev->lDelta;
ULONG ulDstAddr;
LONG dx;
LONG dy;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
BYTE* pjSrc;
LONG cjSrc;
ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
"Can handle trivial xlate only");
ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
"Source must be same color depth as screen");
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_DST_Y_OFFSET(ppdev, pjBase, lDelta);
CP_MM_ROP(ppdev, pjBase, gajHwMixFromRop2[rop4 & 0xf]);
CP_MM_BLT_MODE(ppdev, pjBase, SRC_CPU_DATA);
while(TRUE)
{
ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
CP_MM_WAIT_FOR_BLT_COMPLETE(ppdev, pjBase);
CP_MM_XCNT(ppdev, pjBase, PELS_TO_BYTES(cx) - 1);
CP_MM_YCNT(ppdev, pjBase, cy - 1);
CP_MM_DST_ADDR(ppdev, pjBase, ulDstAddr);
cjSrc = PELS_TO_BYTES(cx);
pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta
+ (PELS_TO_BYTES(prcl->left + dx));
CP_MM_START_BLT(ppdev, pjBase);
vImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy);
if (--c == 0)
return;
prcl++;
}
}