mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1019 lines
32 KiB
1019 lines
32 KiB
/******************************************************************************\
|
|
*
|
|
* $Workfile: bltio.c $
|
|
*
|
|
* Contains the low-level IO blt functions.
|
|
*
|
|
* Hopefully, if you're basing your display driver on this code, to
|
|
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
|
|
* the following routines. You shouldn't have to modify much in
|
|
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
|
|
* and efficient as I could, while still accelerating as many calls as
|
|
* possible that would be cost-effective in terms of performance wins
|
|
* versus size and effort.
|
|
*
|
|
* Note: In the following, 'relative' coordinates refers to coordinates
|
|
* that haven't yet had the offscreen bitmap (DFB) offset applied.
|
|
* 'Absolute' coordinates have had the offset applied. For example,
|
|
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
|
|
* be sitting in offscreen memory starting at coordinate (0, 768) --
|
|
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
|
|
* would be the 'absolute' start coordinate'.
|
|
*
|
|
* Copyright (c) 1992-1995 Microsoft Corporation
|
|
* Copyright (c) 1996 Cirrus Logic, Inc.
|
|
*
|
|
* $Log: S:/projects/drivers/ntsrc/display/bltio.c_v $
|
|
*
|
|
* Rev 1.2 Nov 07 1996 16:47:52 unknown
|
|
* Clean up CAPS flags
|
|
*
|
|
* Rev 1.1 Oct 10 1996 15:36:10 unknown
|
|
*
|
|
*
|
|
* Rev 1.1 12 Aug 1996 16:49:42 frido
|
|
* Removed unaccessed local parameters.
|
|
*
|
|
* jl01 10-08-96 Do Transparent BLT w/o Solid Fill. Refer to PDRs#5511/6817.
|
|
\******************************************************************************/
|
|
|
|
#include "precomp.h"
|
|
|
|
/**************************************************************************
|
|
* VOID vIoFastPatRealize
|
|
*
|
|
* Realizes a pattern into offscreen memory.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vIoFastPatRealize(
|
|
PDEV* ppdev,
|
|
RBRUSH* prb) // Points to brush realization structure
|
|
{
|
|
BRUSHENTRY* pbe;
|
|
LONG iBrushCache;
|
|
BYTE* pjPattern;
|
|
LONG cjPattern;
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG lDeltaPat;
|
|
LONG xCnt;
|
|
LONG yCnt;
|
|
ULONG ulDst;
|
|
|
|
DISPDBG((10,"vFastPatRealize called"));
|
|
|
|
pbe = prb->pbe;
|
|
if ((pbe == NULL) || (pbe->prbVerify != prb))
|
|
{
|
|
// We have to allocate a new offscreen cache brush entry for
|
|
// the brush:
|
|
|
|
iBrushCache = ppdev->iBrushCache;
|
|
pbe = &ppdev->abe[iBrushCache];
|
|
|
|
iBrushCache++;
|
|
if (iBrushCache >= ppdev->cBrushCache)
|
|
iBrushCache = 0;
|
|
|
|
ppdev->iBrushCache = iBrushCache;
|
|
|
|
// Update our links:
|
|
|
|
pbe->prbVerify = prb;
|
|
prb->pbe = pbe;
|
|
}
|
|
|
|
//
|
|
// Download brush into cache
|
|
//
|
|
|
|
pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
|
|
cjPattern = PELS_TO_BYTES(TOTAL_BRUSH_SIZE);
|
|
|
|
lDeltaPat = PELS_TO_BYTES(8);
|
|
xCnt = PELS_TO_BYTES(8);
|
|
yCnt = 8;
|
|
|
|
ulDst = (pbe->y * ppdev->lDelta) + PELS_TO_BYTES(pbe->x);
|
|
|
|
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2));
|
|
CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
|
|
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
|
|
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulDst);
|
|
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
vImageTransfer(ppdev, pjPattern, lDeltaPat, xCnt, yCnt);
|
|
|
|
//
|
|
// Duplicate brush horizontally
|
|
//
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, 0);
|
|
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2));
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst);
|
|
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + lDeltaPat));
|
|
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
//
|
|
// Duplicate brush vertically
|
|
//
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (xCnt * 2));
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (xCnt * 2));
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, 0);
|
|
CP_IO_XCNT(ppdev, pjPorts, ((xCnt * 2) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst);
|
|
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + PELS_TO_BYTES(128)));
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
#if 0
|
|
{
|
|
////////////////////////////////////////////////////////////////
|
|
// DEBUG TILED PATTERNS
|
|
//
|
|
// The following code helps to debug patterns if you break the
|
|
// realization code. It copies the 2x2 tiled copy of the brush
|
|
// to the visible screen.
|
|
//
|
|
|
|
POINTL ptl;
|
|
RECTL rcl;
|
|
|
|
ptl.x = pbe->x;
|
|
ptl.y = pbe->y;
|
|
|
|
rcl.left = 10;
|
|
rcl.right = 10 + 16;
|
|
rcl.top = ppdev->cyScreen - 10 - 16;
|
|
rcl.bottom = ppdev->cyScreen - 10;
|
|
|
|
{
|
|
LONG lDelta = ppdev->lDelta;
|
|
BYTE jHwRop;
|
|
BYTE jMode;
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
|
|
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16));
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
|
|
{
|
|
//
|
|
// Top to Bottom - Left to Right
|
|
//
|
|
|
|
jMode |= DIR_TBLR;
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor);
|
|
|
|
{
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(rcl.right - rcl.left) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (rcl.bottom - rcl.top - 1));
|
|
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, (0 + ((ptl.y) * lDelta) + PELS_TO_BYTES(ptl.x)));
|
|
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ((rcl.top * lDelta) + PELS_TO_BYTES(rcl.left)));
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************************
|
|
* VOID vIoFillPat
|
|
*
|
|
* This routine uses the pattern hardware to draw a patterned list of
|
|
* rectangles.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vIoFillPat(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinate destination rects
|
|
ROP4 rop4, // Obvious?
|
|
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
|
|
POINTL* pptlBrush) //
|
|
{
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
ULONG ulAlignedPatternOffset = ppdev->ulAlignedPatternOffset;
|
|
ULONG ulPatternAddrBase;
|
|
BYTE jHwRop;
|
|
BYTE jMode;
|
|
BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
|
|
// for keeping track of the location and status
|
|
// of the pattern bits cached in off-screen
|
|
// memory
|
|
|
|
DISPDBG((10,"vFillPat called"));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD(ppdev->cBpp < 3, "vFillPat only works at 8bpp and 16bpp");
|
|
|
|
if ((rbc.prb->pbe == NULL) ||
|
|
(rbc.prb->pbe->prbVerify != rbc.prb))
|
|
{
|
|
vIoFastPatRealize(ppdev, rbc.prb);
|
|
DISPDBG((5, " -- Brush cache miss, put it at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
|
|
}
|
|
else
|
|
{
|
|
DISPDBG((5, " -- Brush cache hit on brush at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
|
|
}
|
|
|
|
pbe = rbc.prb->pbe;
|
|
|
|
//
|
|
// Fill the list of rectangles
|
|
//
|
|
|
|
ulPatternAddrBase = pbe->xy;
|
|
jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
|
|
jMode = ppdev->jModeColor | ENABLE_8x8_PATTERN_COPY;
|
|
|
|
do {
|
|
ULONG offset = 0;
|
|
|
|
offset = PELS_TO_BYTES(
|
|
(((prcl->top-pptlBrush->y)&7) << 4)
|
|
+((prcl->left-pptlBrush->x)&7)
|
|
);
|
|
|
|
// align the pattern to a new location
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, 0);
|
|
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
|
|
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16));
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(8));
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, (ulPatternAddrBase + offset));
|
|
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(8) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (8 - 1));
|
|
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulAlignedPatternOffset);
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
// fill using aligned pattern
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, jMode);
|
|
CP_IO_ROP(ppdev, pjPorts, jHwRop);
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, ulAlignedPatternOffset);
|
|
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
|
|
|
|
/**************************************************************************
|
|
* VOID vIoFillSolid
|
|
*
|
|
* Does a solid fill to a list of rectangles.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vIoFillSolid(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinate destination rects
|
|
ROP4 rop4, // Obvious?
|
|
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
|
|
POINTL* pptlBrush) // Not used
|
|
{
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
ULONG ulSolidColor;
|
|
BYTE jHwRop;
|
|
|
|
DISPDBG((10,"vFillSolid called"));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
|
|
ulSolidColor = rbc.iSolidColor;
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
ulSolidColor |= ulSolidColor << 8;
|
|
ulSolidColor |= ulSolidColor << 16;
|
|
}
|
|
else if (cBpp == 2)
|
|
{
|
|
ulSolidColor |= ulSolidColor << 16;
|
|
}
|
|
|
|
jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_ROP(ppdev, pjPorts, jHwRop);
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset);
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, ENABLE_COLOR_EXPAND |
|
|
ENABLE_8x8_PATTERN_COPY |
|
|
ppdev->jModeColor);
|
|
CP_IO_FG_COLOR(ppdev, pjPorts, ulSolidColor);
|
|
|
|
//
|
|
// Fill the list of rectangles
|
|
//
|
|
|
|
while (TRUE)
|
|
{
|
|
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
if (--c == 0)
|
|
return;
|
|
|
|
prcl++;
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
}
|
|
}
|
|
|
|
|
|
/**************************************************************************
|
|
* VOID vIoCopyBlt
|
|
*
|
|
* Does a screen-to-screen blt of a list of rectangles.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vIoCopyBlt(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ROP4 rop4, // Obvious?
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst) // Original unclipped destination rectangle
|
|
{
|
|
LONG dx;
|
|
LONG dy; // Add delta to destination to get source
|
|
|
|
LONG xyOffset = ppdev->xyOffset;
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
BYTE jHwRop;
|
|
|
|
DISPDBG((10,"vCopyBlt called"));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
|
|
//
|
|
// The src-dst delta will be the same for all rectangles
|
|
//
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top;
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
jHwRop = gajHwMixFromRop2[rop4 & 0xf];
|
|
CP_IO_ROP(ppdev, pjPorts, jHwRop);
|
|
|
|
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
|
|
//
|
|
// The accelerator may not be as fast at doing right-to-left copies, so
|
|
// only do them when the rectangles truly overlap:
|
|
//
|
|
|
|
if (!OVERLAP(prclDst, pptlSrc) ||
|
|
(prclDst->top < pptlSrc->y) ||
|
|
((prclDst->top == pptlSrc->y) && (prclDst->left <= pptlSrc->x))
|
|
)
|
|
{
|
|
//
|
|
// Top to Bottom - Left to Right
|
|
//
|
|
|
|
DISPDBG((12,"Top to Bottom - Left to Right"));
|
|
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, DIR_TBLR);
|
|
|
|
while (TRUE)
|
|
{
|
|
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
|
|
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->top + dy) * lDelta) + PELS_TO_BYTES(prcl->left + dx)));
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
if (--c == 0)
|
|
return;
|
|
|
|
prcl++;
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Bottom to Top - Right to Left
|
|
//
|
|
|
|
DISPDBG((12,"Bottom to Top - Right to Left"));
|
|
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, DIR_BTRL);
|
|
|
|
while (TRUE)
|
|
{
|
|
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
|
|
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
|
|
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + PELS_TO_BYTES(prcl->right + dx) - 1));
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, (((prcl->bottom - 1) * lDelta) + PELS_TO_BYTES(prcl->right) - 1));
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
if (--c == 0)
|
|
return;
|
|
|
|
prcl++;
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
}
|
|
}
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vIoXfer1bpp
|
|
*
|
|
* Low-level routine used to transfer monochrome data to the screen using
|
|
* DWORD writes to the blt engine.
|
|
*
|
|
* This can handle opaque or transparent expansions. It does opaque
|
|
* expansions by drawing the opaque rectangle first and then transparently
|
|
* expands the foreground bits.
|
|
*
|
|
\**************************************************************************/
|
|
VOID vIoXfer1bpp(
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // List of destination rectangles, in relative
|
|
// coordinates
|
|
ROP4 rop4, // Actually had better be a rop3
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Translate that provides color-expansion information
|
|
{
|
|
ULONG* pulXfer;
|
|
ULONG* pul;
|
|
LONG ix;
|
|
LONG iy;
|
|
LONG cxWidthInBytes;
|
|
BYTE* pjBits;
|
|
POINTL ptlDst;
|
|
POINTL ptlSrc;
|
|
SIZEL sizlDst;
|
|
LONG cxLeftMask;
|
|
LONG cxRightMask;
|
|
ULONG ulDstAddr;
|
|
INT nDwords;
|
|
ULONG ulLeftMask;
|
|
ULONG ulRightMask;
|
|
LONG dx;
|
|
LONG dy;
|
|
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG lDeltaSrc = psoSrc->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
ULONG ulFgColor = pxlo->pulXlate[1];
|
|
ULONG ulBgColor = pxlo->pulXlate[0];
|
|
|
|
// Since the hardware clipping on some of the Cirrus chips is broken, we
|
|
// do the clipping by rounding out the edges to dword boundaries and then
|
|
// doing the blt transparently. In the event that we want the expansion
|
|
// to be opaque, we do the opaquing blt in advance. One side effect of
|
|
// this is that the destination bits are no longer valid for processing
|
|
// the rop. This could probably be optimized by doing the edges seperately
|
|
// and then doing the middle section in one pass. However, this is
|
|
// complicated by a 5434 bug that breaks blts less than 10 pixels wide.
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD(((rop4 & 0xff00) == 0xcc00), "Expected foreground rop of 0xcc");
|
|
|
|
//
|
|
// The src-dst delta will be the same for all rectangles
|
|
//
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top;
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
ulFgColor = (ulFgColor << 8) | (ulFgColor & 0xff);
|
|
ulBgColor = (ulBgColor << 8) | (ulBgColor & 0xff);
|
|
ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
|
|
ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
|
|
}
|
|
else if (cBpp == 2)
|
|
{
|
|
ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
|
|
ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
|
|
}
|
|
|
|
pulXfer = ppdev->pulXfer;
|
|
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
|
|
if (rop4 != 0xCCAA)
|
|
{
|
|
LONG lCnt = c;
|
|
RECTL* prclTmp = prcl;
|
|
BYTE jHwBgRop = gajHwMixFromRop2[rop4 & 0xf];
|
|
|
|
CP_IO_ROP(ppdev, pjPorts, jHwBgRop);
|
|
CP_IO_FG_COLOR(ppdev, pjPorts, ulBgColor);
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset);
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor |
|
|
ENABLE_COLOR_EXPAND |
|
|
ENABLE_8x8_PATTERN_COPY);
|
|
|
|
do
|
|
{
|
|
// calculate the size of the blt
|
|
|
|
ptlDst.x = prclTmp->left;
|
|
ptlDst.y = prclTmp->top;
|
|
sizlDst.cx = prclTmp->right - ptlDst.x;
|
|
sizlDst.cy = prclTmp->bottom - ptlDst.y;
|
|
|
|
//
|
|
// Fill the background rectangle with the background color
|
|
//
|
|
|
|
// Set the dest addresses
|
|
|
|
ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
//
|
|
// Tell the hardware how many bytes we'd like to write:
|
|
// sizlDst.cx * sizelDst.cy
|
|
//
|
|
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1);
|
|
CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1);
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
|
|
|
|
// Start the blt operation
|
|
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
prclTmp++;
|
|
} while (--lCnt != 0);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
}
|
|
|
|
CP_IO_FG_COLOR(ppdev, pjPorts, ulFgColor);
|
|
|
|
CP_IO_BG_COLOR(ppdev, pjPorts, ~ulFgColor);
|
|
CP_IO_XPAR_COLOR(ppdev, pjPorts, ~ulFgColor);
|
|
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor |
|
|
ENABLE_COLOR_EXPAND |
|
|
ENABLE_TRANSPARENCY_COMPARE |
|
|
SRC_CPU_DATA);
|
|
CP_IO_BLT_EXT_MODE(ppdev, pjPorts, 0); // jl01
|
|
|
|
do
|
|
{
|
|
// calculate the size of the blt
|
|
|
|
ptlDst.x = prcl->left;
|
|
ptlDst.y = prcl->top;
|
|
sizlDst.cx = prcl->right - ptlDst.x;
|
|
sizlDst.cy = prcl->bottom - ptlDst.y;
|
|
|
|
// calculate the number of dwords per scan line
|
|
|
|
ptlSrc.x = prcl->left + dx;
|
|
ptlSrc.y = prcl->top + dy;
|
|
|
|
// Floor the source.
|
|
// Extend the width by the amount required to floor to a dword boundary.
|
|
// Set the size of the left mask.
|
|
// Floor the dest, so it aligns with the floored source.
|
|
|
|
if ((cxLeftMask = (ptlSrc.x & 31)))
|
|
{
|
|
sizlDst.cx += cxLeftMask;
|
|
ptlSrc.x &= ~31;
|
|
ptlDst.x -= cxLeftMask;
|
|
}
|
|
|
|
ulLeftMask = gaulLeftClipMask[cxLeftMask];
|
|
|
|
// Ceil the cx to a dword boundary.
|
|
|
|
if (cxRightMask = (sizlDst.cx & 31))
|
|
{
|
|
cxRightMask = 32 - cxRightMask;
|
|
sizlDst.cx = (sizlDst.cx + 31) & ~31;
|
|
}
|
|
|
|
ulRightMask = gaulRightClipMask[cxRightMask];
|
|
|
|
if (sizlDst.cx == 32)
|
|
{
|
|
ulLeftMask &= ulRightMask;
|
|
ulRightMask = 0;
|
|
}
|
|
|
|
// Note: At this point sizlDst.cx is the width of the blt in pixels,
|
|
// floored to a dword boundary, and ceiled to a dword boundary.
|
|
|
|
// Calculate the width in Bytes
|
|
|
|
cxWidthInBytes = sizlDst.cx >> 3;
|
|
|
|
// Calculate the number of Dwords and any remaining bytes
|
|
|
|
nDwords = cxWidthInBytes >> 2;
|
|
|
|
ASSERTDD(((cxWidthInBytes & 0x03) == 0),
|
|
"cxWidthInBytes is not a DWORD multiple");
|
|
|
|
// Calculate the address of the source bitmap
|
|
// This is to a byte boundary.
|
|
|
|
pjBits = (PBYTE) psoSrc->pvScan0;
|
|
pjBits += ptlSrc.y * lDeltaSrc;
|
|
pjBits += ptlSrc.x >> 3;
|
|
|
|
ASSERTDD((((ULONG_PTR)pjBits & 0x03) == 0),
|
|
"pjBits not DWORD aligned like it should be");
|
|
|
|
//
|
|
// Blt the 1 bpp bitmap
|
|
//
|
|
|
|
ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1);
|
|
CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1);
|
|
|
|
//
|
|
// The 542x chips require a write to the Src Address Register when
|
|
// doing a host transfer with color expansion. The value is
|
|
// irrelevant, but the write is crucial. This is documented in
|
|
// the manual, not the errata. Go figure.
|
|
//
|
|
|
|
CP_IO_SRC_ADDR(ppdev, pjPorts, 0);
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
|
|
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
//
|
|
// Transfer the host bitmap.
|
|
//
|
|
if (ulRightMask)
|
|
{
|
|
//
|
|
// Blt is > 1 DWORD wide (nDwords > 1)
|
|
//
|
|
|
|
for (iy = 0; iy < sizlDst.cy; iy++)
|
|
{
|
|
pul = (ULONG*) pjBits;
|
|
|
|
//*pulXfer++ = *(((ULONG*)pul)++) & ulLeftMask;
|
|
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulLeftMask));
|
|
pul++;
|
|
|
|
for (ix = 0; ix < (nDwords-2); ix++)
|
|
{
|
|
//*pulXfer++ = *(((ULONG*)pul)++);
|
|
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul)));
|
|
pul++;
|
|
}
|
|
//*pulXfer++ = *(((ULONG*)pul)++) & ulRightMask;
|
|
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulRightMask));
|
|
pul++;
|
|
|
|
pjBits += lDeltaSrc;
|
|
//pulXfer = ppdev->pulXfer;
|
|
CP_MEMORY_BARRIER(); // Flush memory cache when we reset the address
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Blt is 1 DWORD wide (nDwords == 1)
|
|
//
|
|
|
|
for (iy = 0; iy < sizlDst.cy; iy++)
|
|
{
|
|
//*pulXfer = *((ULONG*)pjBits) & ulLeftMask;
|
|
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pjBits) & ulLeftMask));
|
|
pjBits += lDeltaSrc;
|
|
CP_MEMORY_BARRIER(); // Flush memory cache
|
|
}
|
|
}
|
|
|
|
prcl++;
|
|
} while (--c != 0);
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vIoXfer4bpp
|
|
*
|
|
* Does a 4bpp transfer from a bitmap to the screen.
|
|
*
|
|
* NOTE: The screen must be 8bpp for this function to be called!
|
|
*
|
|
* The reason we implement this is that a lot of resources are kept as 4bpp,
|
|
* and used to initialize DFBs, some of which we of course keep off-screen.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
// XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use
|
|
// for doing the translate. Note that in general stack buffers should
|
|
// be kept as small as possible. The OS guarantees us only 8k for stack
|
|
// from GDI down to the display driver in low memory situations; if we
|
|
// ask for more, we'll access violate. Note also that at any time the
|
|
// stack buffer cannot be larger than a page (4k) -- otherwise we may
|
|
// miss touching the 'guard page' and access violate then too.
|
|
|
|
#define XLATE_BUFFER_SIZE 256
|
|
|
|
VOID vIoXfer4bpp(
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // List of destination rectangles, in relative
|
|
// coordinates
|
|
ULONG rop4, // rop4
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
|
|
{
|
|
ULONG* pulXfer = ppdev->pulXfer;
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
ULONG ulDstAddr;
|
|
LONG dx;
|
|
LONG dy;
|
|
LONG cx;
|
|
LONG cy;
|
|
LONG lSrcDelta;
|
|
BYTE* pjSrcScan0;
|
|
BYTE* pjScan;
|
|
BYTE* pjSrc;
|
|
BYTE* pjDst;
|
|
LONG cxThis;
|
|
LONG cxToGo;
|
|
LONG xSrc;
|
|
LONG iLoop;
|
|
BYTE jSrc;
|
|
ULONG* pulXlate;
|
|
LONG cdwThis;
|
|
BYTE* pjBuf;
|
|
BYTE ajBuf[XLATE_BUFFER_SIZE];
|
|
|
|
ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp");
|
|
ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
|
|
"Expect only a rop2");
|
|
|
|
DISPDBG((5, "vXfer4bpp: entry"));
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
|
|
|
|
lSrcDelta = psoSrc->lDelta;
|
|
pjSrcScan0 = psoSrc->pvScan0;
|
|
|
|
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]);
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
|
|
|
|
while(TRUE)
|
|
{
|
|
ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
|
|
cx = prcl->right - prcl->left;
|
|
cy = prcl->bottom - prcl->top;
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1);
|
|
CP_IO_YCNT(ppdev, pjPorts, cy - 1);
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
|
|
|
|
pulXlate = pxlo->pulXlate;
|
|
xSrc = prcl->left + dx;
|
|
pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
|
|
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
|
|
do {
|
|
pjSrc = pjScan;
|
|
cxToGo = cx; // # of pels per scan in 4bpp source
|
|
do {
|
|
cxThis = XLATE_BUFFER_SIZE;
|
|
// We can handle XLATE_BUFFER_SIZE number
|
|
// of pels in this xlate batch
|
|
cxToGo -= cxThis; // cxThis will be the actual number of
|
|
// pels we'll do in this xlate batch
|
|
if (cxToGo < 0)
|
|
cxThis += cxToGo;
|
|
|
|
pjDst = ajBuf; // Points to our temporary batch buffer
|
|
|
|
// We handle alignment ourselves because it's easy to
|
|
// do, rather than pay the cost of setting/resetting
|
|
// the scissors register:
|
|
|
|
if (xSrc & 1)
|
|
{
|
|
// When unaligned, we have to be careful not to read
|
|
// past the end of the 4bpp bitmap (that could
|
|
// potentially cause us to access violate):
|
|
|
|
iLoop = cxThis >> 1; // Each loop handles 2 pels;
|
|
// we'll handle odd pel
|
|
// separately
|
|
jSrc = *pjSrc;
|
|
while (iLoop-- != 0)
|
|
{
|
|
*pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
|
|
jSrc = *(++pjSrc);
|
|
*pjDst++ = (BYTE) pulXlate[jSrc >> 4];
|
|
}
|
|
|
|
if (cxThis & 1)
|
|
*pjDst = (BYTE) pulXlate[jSrc & 0xf];
|
|
}
|
|
else
|
|
{
|
|
iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels
|
|
do {
|
|
jSrc = *pjSrc++;
|
|
|
|
*pjDst++ = (BYTE) pulXlate[jSrc >> 4];
|
|
*pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
|
|
|
|
} while (--iLoop != 0);
|
|
}
|
|
|
|
// The number of bytes we'll transfer is equal to the number
|
|
// of pels we've processed in the batch. Since we're
|
|
// transferring words, we have to round up to get the word
|
|
// count:
|
|
|
|
cdwThis = (cxThis + 3) >> 2;
|
|
pjBuf = ajBuf;
|
|
|
|
TRANSFER_DWORD_ALIGNED(ppdev, pulXfer, pjBuf, cdwThis);
|
|
|
|
} while (cxToGo > 0);
|
|
|
|
pjScan += lSrcDelta; // Advance to next source scan. Note
|
|
// that we could have computed the
|
|
// value to advance 'pjSrc' directly,
|
|
// but this method is less
|
|
// error-prone.
|
|
|
|
} while (--cy != 0);
|
|
|
|
if (--c == 0)
|
|
return;
|
|
|
|
prcl++;
|
|
}
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vIoXferNative
|
|
*
|
|
* Transfers a bitmap that is the same color depth as the display to
|
|
* the screen via the data transfer register, with no translation.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
VOID vIoXferNative(
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ULONG rop4, // rop4
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Not used
|
|
{
|
|
ULONG* pulXfer = ppdev->pulXfer;
|
|
BYTE* pjPorts = ppdev->pjPorts;
|
|
LONG lDelta = ppdev->lDelta;
|
|
ULONG ulDstAddr;
|
|
LONG dx;
|
|
LONG dy;
|
|
LONG cx;
|
|
LONG cy;
|
|
LONG lSrcDelta;
|
|
BYTE* pjSrcScan0;
|
|
BYTE* pjSrc;
|
|
LONG cjSrc;
|
|
|
|
ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
|
|
"Can handle trivial xlate only");
|
|
ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
|
|
"Source must be same color depth as screen");
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
|
|
"Expect only a rop2");
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
|
|
|
|
lSrcDelta = psoSrc->lDelta;
|
|
pjSrcScan0 = psoSrc->pvScan0;
|
|
|
|
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
|
|
CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]);
|
|
CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
|
|
|
|
while(TRUE)
|
|
{
|
|
ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
|
|
cx = prcl->right - prcl->left;
|
|
cy = prcl->bottom - prcl->top;
|
|
|
|
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
|
|
|
|
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1);
|
|
CP_IO_YCNT(ppdev, pjPorts, cy - 1);
|
|
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
|
|
|
|
cjSrc = PELS_TO_BYTES(cx);
|
|
pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta
|
|
+ (PELS_TO_BYTES(prcl->left + dx));
|
|
|
|
CP_IO_START_BLT(ppdev, pjPorts);
|
|
vImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy);
|
|
|
|
if (--c == 0)
|
|
return;
|
|
|
|
prcl++;
|
|
}
|
|
}
|