mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
505 lines
15 KiB
505 lines
15 KiB
/******************************Module*Header*******************************\
|
|
* Module Name: bltmga.c
|
|
*
|
|
* Contains the low-level blt functions.
|
|
*
|
|
* Hopefully, if you're basing your display driver on this code, to
|
|
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
|
|
* the following routines. You shouldn't have to modify much in
|
|
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
|
|
* and efficient as I could, while still accelerating as many calls as
|
|
* possible that would be cost-effective in terms of performance wins
|
|
* versus size and effort.
|
|
*
|
|
* Note: In the following, 'relative' coordinates refers to coordinates
|
|
* that haven't yet had the offscreen bitmap (DFB) offset applied.
|
|
* 'Absolute' coordinates have had the offset applied. For example,
|
|
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
|
|
* be sitting in offscreen memory starting at coordinate (0, 768) --
|
|
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
|
|
* would be the 'absolute' start coordinate'.
|
|
*
|
|
* Copyright (c) 1992-1996 Microsoft Corporation
|
|
* Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
|
|
\**************************************************************************/
|
|
|
|
#include "precomp.h"
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vMgaFillSolid
|
|
*
|
|
* Fills a list of rectangles with a solid colour.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
VOID vMgaFillSolid( // Type FNFILL
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // List of rectangles to be filled, in relative
|
|
// coordinates
|
|
ULONG rop4, // Rop4
|
|
RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
|
|
POINTL* pptlBrush) // Not used
|
|
{
|
|
BYTE* pjBase;
|
|
LONG xOffset;
|
|
LONG yOffset;
|
|
ULONG ulDwg;
|
|
ULONG ulHwMix;
|
|
|
|
pjBase = ppdev->pjBase;
|
|
xOffset = ppdev->xOffset;
|
|
yOffset = ppdev->yOffset;
|
|
|
|
if (rop4 == 0xf0f0) // PATCOPY
|
|
{
|
|
ulDwg = opcode_TRAP + atype_RPL + blockm_ON +
|
|
pattern_OFF + transc_BG_OPAQUE +
|
|
bop_SRCCOPY;
|
|
}
|
|
else
|
|
{
|
|
// The ROP3 is a combination of P and D only:
|
|
//
|
|
// ROP3 Mga ROP3 Mga ROP3 Mga ROP3 Mga
|
|
//
|
|
// 0x00 0 0x50 4 0xa0 8 0xf0 c
|
|
// 0x05 1 0x55 5 0xa5 9 0xf5 d
|
|
// 0x0a 2 0x5a 6 0xaa a 0xfa e
|
|
// 0x0f 3 0x5f 7 0xaf b 0xff f
|
|
|
|
ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
|
|
|
|
if (ulHwMix == MGA_WHITENESS)
|
|
{
|
|
rbc.iSolidColor = 0xffffffff;
|
|
ulDwg = opcode_TRAP + atype_RPL + blockm_ON +
|
|
pattern_OFF + transc_BG_OPAQUE +
|
|
bop_SRCCOPY;
|
|
}
|
|
else if (ulHwMix == MGA_BLACKNESS)
|
|
{
|
|
rbc.iSolidColor = 0;
|
|
ulDwg = opcode_TRAP + atype_RPL + blockm_ON +
|
|
pattern_OFF + transc_BG_OPAQUE +
|
|
bop_SRCCOPY;
|
|
}
|
|
else
|
|
{
|
|
ulDwg = opcode_TRAP + atype_RSTR + blockm_OFF +
|
|
pattern_OFF + transc_BG_OPAQUE +
|
|
(ulHwMix << 16);
|
|
}
|
|
}
|
|
|
|
if ((GET_CACHE_FLAGS(ppdev, (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE))) ==
|
|
(SIGN_CACHE | ARX_CACHE | PATTERN_CACHE))
|
|
{
|
|
CHECK_FIFO_SPACE(pjBase, 6);
|
|
}
|
|
else
|
|
{
|
|
CHECK_FIFO_SPACE(pjBase, 15);
|
|
|
|
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_SGN, 0);
|
|
}
|
|
if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_AR1, 0);
|
|
CP_WRITE(pjBase, DWG_AR2, 0);
|
|
CP_WRITE(pjBase, DWG_AR4, 0);
|
|
CP_WRITE(pjBase, DWG_AR5, 0);
|
|
}
|
|
if (!(GET_CACHE_FLAGS(ppdev, PATTERN_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_SRC0, 0xFFFFFFFF);
|
|
CP_WRITE(pjBase, DWG_SRC1, 0xFFFFFFFF);
|
|
CP_WRITE(pjBase, DWG_SRC2, 0xFFFFFFFF);
|
|
CP_WRITE(pjBase, DWG_SRC3, 0xFFFFFFFF);
|
|
}
|
|
|
|
ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE);
|
|
}
|
|
|
|
CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, rbc.iSolidColor));
|
|
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
|
|
|
|
while(TRUE)
|
|
{
|
|
CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
|
|
CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset);
|
|
CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top);
|
|
CP_START(pjBase, DWG_YDST, prcl->top + yOffset);
|
|
|
|
if (--c == 0)
|
|
return;
|
|
|
|
prcl++;
|
|
CHECK_FIFO_SPACE(pjBase, 4);
|
|
}
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vMgaXfer1bpp
|
|
*
|
|
* This routine colour expands a monochrome bitmap.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
VOID vMgaXfer1bpp( // Type FNXFER
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // List of destination rectangles, in relative
|
|
// coordinates
|
|
ULONG rop4, // Foreground and background hardware mix
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
|
|
{
|
|
BYTE* pjBase;
|
|
LONG xOffset;
|
|
LONG yOffset;
|
|
ULONG ulBitFlip;
|
|
LONG dx;
|
|
LONG dy;
|
|
BYTE* pjSrcScan0;
|
|
LONG lSrcDelta;
|
|
ULONG ulDwg;
|
|
ULONG ulHwMix;
|
|
ULONG* pulXlate;
|
|
LONG cxDst;
|
|
LONG cyDst;
|
|
LONG xAlign;
|
|
ULONG cFullLoops;
|
|
ULONG cRemLoops;
|
|
BYTE* pjDma;
|
|
ULONG* pulSrc;
|
|
ULONG cdSrc;
|
|
LONG lSrcSkip;
|
|
ULONG* pulDst;
|
|
LONG i;
|
|
BOOL bHwBug;
|
|
LONG cFifo;
|
|
|
|
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
|
|
"Expect only an opaquing rop");
|
|
|
|
pjBase = ppdev->pjBase;
|
|
xOffset = ppdev->xOffset;
|
|
yOffset = ppdev->yOffset;
|
|
|
|
ulBitFlip = 0;
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
|
|
|
|
pjSrcScan0 = psoSrc->pvScan0;
|
|
lSrcDelta = psoSrc->lDelta;
|
|
|
|
if (rop4 == 0xcccc) // SRCCOPY
|
|
{
|
|
ulDwg = opcode_ILOAD+atype_RPL+blockm_OFF+bltmod_BMONO+
|
|
hbgr_SRC_WINDOWS+pattern_OFF+transc_BG_OPAQUE+bop_SRCCOPY;
|
|
}
|
|
else if ((rop4 == 0xb8b8) || (rop4 == 0xe2e2))
|
|
{
|
|
ulDwg = opcode_ILOAD+atype_RPL+blockm_OFF+bop_SRCCOPY+trans_0+
|
|
bltmod_BMONO+pattern_OFF+hbgr_SRC_WINDOWS+transc_BG_TRANSP;
|
|
|
|
// We special-cased 0xb8b8 and 0xe2e2 in bitblt.c:
|
|
|
|
if (rop4 == 0xb8b8)
|
|
{
|
|
// 0xb8 is weird because it says that the '1' bit is leave-alone,
|
|
// but the '0' bit is the destination color. The Millennium can
|
|
// only handle transparent blts when the '0' bit is leave-alone,
|
|
// so we flip the source bits before we give it to the Millennium.
|
|
//
|
|
// Since we're limited by the speed of the bus, this additional
|
|
// overhead of an extra XOR on every write won't be measurable.
|
|
|
|
ulBitFlip = (ULONG) -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ulHwMix = rop4 & 0xf;
|
|
|
|
ulDwg = opcode_ILOAD+atype_RSTR+blockm_OFF+bltmod_BMONO+
|
|
hbgr_SRC_WINDOWS+pattern_OFF+transc_BG_OPAQUE+ (ulHwMix << 16);
|
|
}
|
|
|
|
pjDma = ppdev->pjBase + DMAWND;
|
|
pulXlate = pxlo->pulXlate;
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 15);
|
|
|
|
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
|
|
|
|
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_SGN, 0);
|
|
}
|
|
|
|
if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_AR5, 0);
|
|
}
|
|
|
|
// The SRC0 through SRC3 registers are trashed by the blt, and
|
|
// other ARx registers will be modified shortly, so signal it:
|
|
|
|
ppdev->HopeFlags = SIGN_CACHE;
|
|
|
|
CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, pulXlate[1]));
|
|
CP_WRITE(pjBase, DWG_BCOL, COLOR_REPLICATE(ppdev, pulXlate[0]));
|
|
|
|
while (TRUE)
|
|
{
|
|
cxDst = (prcl->right - prcl->left);
|
|
cyDst = (prcl->bottom - prcl->top);
|
|
|
|
CP_WRITE(pjBase, DWG_LEN, cyDst);
|
|
CP_WRITE(pjBase, DWG_YDST, prcl->top + yOffset);
|
|
CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
|
|
CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset - 1);
|
|
|
|
xAlign = (prcl->left + dx) & 31;
|
|
|
|
bHwBug = ((cxDst >= 128) && (xAlign <= 15));
|
|
|
|
if (!bHwBug)
|
|
{
|
|
CP_WRITE(pjBase, DWG_SHIFT, 0);
|
|
CP_WRITE(pjBase, DWG_AR3, xAlign);
|
|
CP_START(pjBase, DWG_AR0, xAlign + cxDst - 1);
|
|
}
|
|
else
|
|
{
|
|
// We have to work around a hardware bug. Start 8 pels to
|
|
// the left of the original start.
|
|
|
|
CP_WRITE(pjBase, DWG_AR3, xAlign + 8);
|
|
CP_WRITE(pjBase, DWG_AR0, xAlign + cxDst + 31);
|
|
CP_START(pjBase, DWG_SHIFT, (24 << 16));
|
|
}
|
|
|
|
// We have to ensure that the command has been started before doing
|
|
// the BLT_WRITE_ON:
|
|
|
|
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
|
|
BLT_WRITE_ON(ppdev, pjBase);
|
|
|
|
// Point to the first dword of the source bitmap that is to be
|
|
// downloaded:
|
|
|
|
pulSrc = (ULONG*) (pjSrcScan0 + (((prcl->top + dy) * lSrcDelta
|
|
+ ((prcl->left + dx) >> 3)) & ~3L));
|
|
|
|
// Calculate the number of dwords to be moved per scanline. Since
|
|
// we align the starting dword on a dword boundary, we know that
|
|
// we cannot overflow the end of the bitmap:
|
|
|
|
cdSrc = (xAlign + cxDst + 31) >> 5;
|
|
|
|
lSrcSkip = lSrcDelta - (cdSrc << 2);
|
|
|
|
if (!(bHwBug) && (lSrcSkip == 0))
|
|
{
|
|
// It's rather frequent that there will be no scan-to-scan
|
|
// delta, and no hardware bug, so we can go full speed:
|
|
|
|
cdSrc *= cyDst;
|
|
|
|
cFullLoops = ((cdSrc - 1) / FIFOSIZE);
|
|
cRemLoops = ((cdSrc - 1) % FIFOSIZE) + 1;
|
|
|
|
pulDst = (ULONG*) pjDma;
|
|
|
|
if (cFullLoops > 0)
|
|
{
|
|
do {
|
|
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
|
|
|
|
for (i = FIFOSIZE; i != 0; i--)
|
|
{
|
|
CP_WRITE_DMA(ppdev, pulDst, *pulSrc ^ ulBitFlip);
|
|
pulSrc++;
|
|
}
|
|
} while (--cFullLoops != 0);
|
|
}
|
|
|
|
CHECK_FIFO_SPACE(pjBase, (LONG) cRemLoops);
|
|
|
|
do {
|
|
CP_WRITE_DMA(ppdev, pulDst, *pulSrc ^ ulBitFlip);
|
|
pulSrc++;
|
|
} while (--cRemLoops != 0);
|
|
}
|
|
else
|
|
{
|
|
// Okay, blt it the slow way:
|
|
|
|
cFifo = 0;
|
|
|
|
do {
|
|
pulDst = (ULONG*) pjDma;
|
|
|
|
if (bHwBug)
|
|
{
|
|
if (--cFifo < 0)
|
|
{
|
|
cFifo = FIFOSIZE - 1;
|
|
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
|
|
}
|
|
CP_WRITE_DMA(ppdev, pulDst, 0); // Account for hardware bug
|
|
}
|
|
|
|
for (i = cdSrc; i != 0; i--)
|
|
{
|
|
if (--cFifo < 0)
|
|
{
|
|
cFifo = FIFOSIZE - 1;
|
|
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
|
|
}
|
|
CP_WRITE_DMA(ppdev, pulDst, *pulSrc++ ^ ulBitFlip);
|
|
}
|
|
|
|
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
|
|
|
|
} while (--cyDst != 0);
|
|
}
|
|
|
|
BLT_WRITE_OFF(ppdev, pjBase);
|
|
|
|
if (--c == 0)
|
|
break;
|
|
|
|
prcl++;
|
|
CHECK_FIFO_SPACE(pjBase, 7);
|
|
}
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vMgaCopyBlt
|
|
*
|
|
* Does a screen-to-screen blt of a list of rectangles.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
VOID vMgaCopyBlt( // Type FNCOPY
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ULONG rop4, // Rop4
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst) // Original unclipped destination rectangle
|
|
{
|
|
BYTE* pjBase;
|
|
LONG xOffset;
|
|
LONG yOffset;
|
|
LONG dx;
|
|
LONG dy;
|
|
FLONG flDirCode;
|
|
LONG lSignedPitch;
|
|
ULONG ulHwMix;
|
|
ULONG ulDwg;
|
|
LONG yDst;
|
|
LONG ySrc;
|
|
LONG cy;
|
|
LONG xSrc;
|
|
LONG lSignedWidth;
|
|
LONG lSrcStart;
|
|
|
|
pjBase = ppdev->pjBase;
|
|
xOffset = ppdev->xOffset;
|
|
yOffset = ppdev->yOffset;
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
|
|
|
|
flDirCode = DRAWING_DIR_TBLR;
|
|
lSignedPitch = ppdev->cxMemory;
|
|
|
|
// If the destination and source rectangles overlap, we will have to
|
|
// tell the accelerator in which direction the copy should be done:
|
|
|
|
if (OVERLAP(prclDst, pptlSrc))
|
|
{
|
|
if (prclDst->left > pptlSrc->x)
|
|
{
|
|
flDirCode |= scanleft_RIGHT_TO_LEFT;
|
|
}
|
|
if (prclDst->top > pptlSrc->y)
|
|
{
|
|
flDirCode |= sdy_BOTTOM_TO_TOP;
|
|
lSignedPitch = -lSignedPitch;
|
|
}
|
|
}
|
|
|
|
if (rop4 == 0xcccc)
|
|
{
|
|
ulDwg = opcode_BITBLT + atype_RPL + blockm_OFF + bltmod_BFCOL +
|
|
pattern_OFF + transc_BG_OPAQUE + bop_SRCCOPY;
|
|
}
|
|
else
|
|
{
|
|
ulHwMix = rop4 & 0xf;
|
|
|
|
ulDwg = opcode_BITBLT + atype_RSTR + blockm_OFF + bltmod_BFCOL +
|
|
pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16);
|
|
}
|
|
|
|
// The SRC0 to SRC3 registers are probably trashed by the blt, and we
|
|
// may be using a different SGN:
|
|
|
|
ppdev->HopeFlags = 0;
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 10);
|
|
|
|
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
|
|
CP_WRITE(pjBase, DWG_SHIFT, 0);
|
|
CP_WRITE(pjBase, DWG_SGN, flDirCode);
|
|
CP_WRITE(pjBase, DWG_AR5, lSignedPitch);
|
|
|
|
while (TRUE)
|
|
{
|
|
CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top);
|
|
CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset);
|
|
CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset - 1);
|
|
|
|
yDst = yOffset + prcl->top;
|
|
ySrc = yOffset + prcl->top + dy;
|
|
|
|
if (flDirCode & sdy_BOTTOM_TO_TOP)
|
|
{
|
|
cy = prcl->bottom - prcl->top - 1;
|
|
yDst += cy;
|
|
ySrc += cy;
|
|
}
|
|
|
|
CP_WRITE(pjBase, DWG_YDST, yDst);
|
|
|
|
xSrc = xOffset + prcl->left + dx;
|
|
lSignedWidth = prcl->right - prcl->left - 1;
|
|
if (flDirCode & scanleft_RIGHT_TO_LEFT)
|
|
{
|
|
xSrc += lSignedWidth;
|
|
lSignedWidth = -lSignedWidth;
|
|
}
|
|
|
|
lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc;
|
|
CP_WRITE(pjBase, DWG_AR3, lSrcStart);
|
|
CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
|
|
|
|
if (--c == 0)
|
|
break;
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 6);
|
|
prcl++;
|
|
}
|
|
}
|
|
|