|
|
/******************************Module*Header*******************************\
* Module Name: bltmga.c * * Contains the low-level blt functions. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify much in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1996 Microsoft Corporation * Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd. \**************************************************************************/
#include "precomp.h"
/******************************Public*Routine******************************\
* VOID vMgaFillSolid * * Fills a list of rectangles with a solid colour. * \**************************************************************************/
VOID vMgaFillSolid( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // Rop4
RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{ BYTE* pjBase; LONG xOffset; LONG yOffset; ULONG ulDwg; ULONG ulHwMix;
pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
if (rop4 == 0xf0f0) // PATCOPY
{ ulDwg = opcode_TRAP + atype_RPL + blockm_ON + pattern_OFF + transc_BG_OPAQUE + bop_SRCCOPY; } else { // The ROP3 is a combination of P and D only:
//
// ROP3 Mga ROP3 Mga ROP3 Mga ROP3 Mga
//
// 0x00 0 0x50 4 0xa0 8 0xf0 c
// 0x05 1 0x55 5 0xa5 9 0xf5 d
// 0x0a 2 0x5a 6 0xaa a 0xfa e
// 0x0f 3 0x5f 7 0xaf b 0xff f
ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
if (ulHwMix == MGA_WHITENESS) { rbc.iSolidColor = 0xffffffff; ulDwg = opcode_TRAP + atype_RPL + blockm_ON + pattern_OFF + transc_BG_OPAQUE + bop_SRCCOPY; } else if (ulHwMix == MGA_BLACKNESS) { rbc.iSolidColor = 0; ulDwg = opcode_TRAP + atype_RPL + blockm_ON + pattern_OFF + transc_BG_OPAQUE + bop_SRCCOPY; } else { ulDwg = opcode_TRAP + atype_RSTR + blockm_OFF + pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16); } }
if ((GET_CACHE_FLAGS(ppdev, (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE))) == (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE)) { CHECK_FIFO_SPACE(pjBase, 6); } else { CHECK_FIFO_SPACE(pjBase, 15);
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE))) { CP_WRITE(pjBase, DWG_SGN, 0); } if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE))) { CP_WRITE(pjBase, DWG_AR1, 0); CP_WRITE(pjBase, DWG_AR2, 0); CP_WRITE(pjBase, DWG_AR4, 0); CP_WRITE(pjBase, DWG_AR5, 0); } if (!(GET_CACHE_FLAGS(ppdev, PATTERN_CACHE))) { CP_WRITE(pjBase, DWG_SRC0, 0xFFFFFFFF); CP_WRITE(pjBase, DWG_SRC1, 0xFFFFFFFF); CP_WRITE(pjBase, DWG_SRC2, 0xFFFFFFFF); CP_WRITE(pjBase, DWG_SRC3, 0xFFFFFFFF); }
ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE); }
CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, rbc.iSolidColor)); CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
while(TRUE) { CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset); CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset); CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top); CP_START(pjBase, DWG_YDST, prcl->top + yOffset);
if (--c == 0) return;
prcl++; CHECK_FIFO_SPACE(pjBase, 4); } }
/******************************Public*Routine******************************\
* VOID vMgaXfer1bpp * * This routine colour expands a monochrome bitmap. * \**************************************************************************/
VOID vMgaXfer1bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Foreground and background hardware mix
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjBase; LONG xOffset; LONG yOffset; ULONG ulBitFlip; LONG dx; LONG dy; BYTE* pjSrcScan0; LONG lSrcDelta; ULONG ulDwg; ULONG ulHwMix; ULONG* pulXlate; LONG cxDst; LONG cyDst; LONG xAlign; ULONG cFullLoops; ULONG cRemLoops; BYTE* pjDma; ULONG* pulSrc; ULONG cdSrc; LONG lSrcSkip; ULONG* pulDst; LONG i; BOOL bHwBug; LONG cFifo;
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only an opaquing rop");
pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
ulBitFlip = 0;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
pjSrcScan0 = psoSrc->pvScan0; lSrcDelta = psoSrc->lDelta;
if (rop4 == 0xcccc) // SRCCOPY
{ ulDwg = opcode_ILOAD+atype_RPL+blockm_OFF+bltmod_BMONO+ hbgr_SRC_WINDOWS+pattern_OFF+transc_BG_OPAQUE+bop_SRCCOPY; } else if ((rop4 == 0xb8b8) || (rop4 == 0xe2e2)) { ulDwg = opcode_ILOAD+atype_RPL+blockm_OFF+bop_SRCCOPY+trans_0+ bltmod_BMONO+pattern_OFF+hbgr_SRC_WINDOWS+transc_BG_TRANSP;
// We special-cased 0xb8b8 and 0xe2e2 in bitblt.c:
if (rop4 == 0xb8b8) { // 0xb8 is weird because it says that the '1' bit is leave-alone,
// but the '0' bit is the destination color. The Millennium can
// only handle transparent blts when the '0' bit is leave-alone,
// so we flip the source bits before we give it to the Millennium.
//
// Since we're limited by the speed of the bus, this additional
// overhead of an extra XOR on every write won't be measurable.
ulBitFlip = (ULONG) -1; } } else { ulHwMix = rop4 & 0xf;
ulDwg = opcode_ILOAD+atype_RSTR+blockm_OFF+bltmod_BMONO+ hbgr_SRC_WINDOWS+pattern_OFF+transc_BG_OPAQUE+ (ulHwMix << 16); }
pjDma = ppdev->pjBase + DMAWND; pulXlate = pxlo->pulXlate;
CHECK_FIFO_SPACE(pjBase, 15);
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE))) { CP_WRITE(pjBase, DWG_SGN, 0); }
if (!(GET_CACHE_FLAGS(ppdev, ARX_CACHE))) { CP_WRITE(pjBase, DWG_AR5, 0); }
// The SRC0 through SRC3 registers are trashed by the blt, and
// other ARx registers will be modified shortly, so signal it:
ppdev->HopeFlags = SIGN_CACHE;
CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, pulXlate[1])); CP_WRITE(pjBase, DWG_BCOL, COLOR_REPLICATE(ppdev, pulXlate[0]));
while (TRUE) { cxDst = (prcl->right - prcl->left); cyDst = (prcl->bottom - prcl->top);
CP_WRITE(pjBase, DWG_LEN, cyDst); CP_WRITE(pjBase, DWG_YDST, prcl->top + yOffset); CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset); CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset - 1);
xAlign = (prcl->left + dx) & 31;
bHwBug = ((cxDst >= 128) && (xAlign <= 15));
if (!bHwBug) { CP_WRITE(pjBase, DWG_SHIFT, 0); CP_WRITE(pjBase, DWG_AR3, xAlign); CP_START(pjBase, DWG_AR0, xAlign + cxDst - 1); } else { // We have to work around a hardware bug. Start 8 pels to
// the left of the original start.
CP_WRITE(pjBase, DWG_AR3, xAlign + 8); CP_WRITE(pjBase, DWG_AR0, xAlign + cxDst + 31); CP_START(pjBase, DWG_SHIFT, (24 << 16)); }
// We have to ensure that the command has been started before doing
// the BLT_WRITE_ON:
CHECK_FIFO_SPACE(pjBase, FIFOSIZE); BLT_WRITE_ON(ppdev, pjBase);
// Point to the first dword of the source bitmap that is to be
// downloaded:
pulSrc = (ULONG*) (pjSrcScan0 + (((prcl->top + dy) * lSrcDelta + ((prcl->left + dx) >> 3)) & ~3L));
// Calculate the number of dwords to be moved per scanline. Since
// we align the starting dword on a dword boundary, we know that
// we cannot overflow the end of the bitmap:
cdSrc = (xAlign + cxDst + 31) >> 5;
lSrcSkip = lSrcDelta - (cdSrc << 2);
if (!(bHwBug) && (lSrcSkip == 0)) { // It's rather frequent that there will be no scan-to-scan
// delta, and no hardware bug, so we can go full speed:
cdSrc *= cyDst;
cFullLoops = ((cdSrc - 1) / FIFOSIZE); cRemLoops = ((cdSrc - 1) % FIFOSIZE) + 1;
pulDst = (ULONG*) pjDma;
if (cFullLoops > 0) { do { CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
for (i = FIFOSIZE; i != 0; i--) { CP_WRITE_DMA(ppdev, pulDst, *pulSrc ^ ulBitFlip); pulSrc++; } } while (--cFullLoops != 0); }
CHECK_FIFO_SPACE(pjBase, (LONG) cRemLoops);
do { CP_WRITE_DMA(ppdev, pulDst, *pulSrc ^ ulBitFlip); pulSrc++; } while (--cRemLoops != 0); } else { // Okay, blt it the slow way:
cFifo = 0;
do { pulDst = (ULONG*) pjDma;
if (bHwBug) { if (--cFifo < 0) { cFifo = FIFOSIZE - 1; CHECK_FIFO_SPACE(pjBase, FIFOSIZE); } CP_WRITE_DMA(ppdev, pulDst, 0); // Account for hardware bug
}
for (i = cdSrc; i != 0; i--) { if (--cFifo < 0) { cFifo = FIFOSIZE - 1; CHECK_FIFO_SPACE(pjBase, FIFOSIZE); } CP_WRITE_DMA(ppdev, pulDst, *pulSrc++ ^ ulBitFlip); }
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cyDst != 0); }
BLT_WRITE_OFF(ppdev, pjBase);
if (--c == 0) break;
prcl++; CHECK_FIFO_SPACE(pjBase, 7); } }
/******************************Public*Routine******************************\
* VOID vMgaCopyBlt * * Does a screen-to-screen blt of a list of rectangles. * \**************************************************************************/
VOID vMgaCopyBlt( // Type FNCOPY
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // Rop4
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{ BYTE* pjBase; LONG xOffset; LONG yOffset; LONG dx; LONG dy; FLONG flDirCode; LONG lSignedPitch; ULONG ulHwMix; ULONG ulDwg; LONG yDst; LONG ySrc; LONG cy; LONG xSrc; LONG lSignedWidth; LONG lSrcStart;
pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
flDirCode = DRAWING_DIR_TBLR; lSignedPitch = ppdev->cxMemory;
// If the destination and source rectangles overlap, we will have to
// tell the accelerator in which direction the copy should be done:
if (OVERLAP(prclDst, pptlSrc)) { if (prclDst->left > pptlSrc->x) { flDirCode |= scanleft_RIGHT_TO_LEFT; } if (prclDst->top > pptlSrc->y) { flDirCode |= sdy_BOTTOM_TO_TOP; lSignedPitch = -lSignedPitch; } }
if (rop4 == 0xcccc) { ulDwg = opcode_BITBLT + atype_RPL + blockm_OFF + bltmod_BFCOL + pattern_OFF + transc_BG_OPAQUE + bop_SRCCOPY; } else { ulHwMix = rop4 & 0xf;
ulDwg = opcode_BITBLT + atype_RSTR + blockm_OFF + bltmod_BFCOL + pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16); }
// The SRC0 to SRC3 registers are probably trashed by the blt, and we
// may be using a different SGN:
ppdev->HopeFlags = 0;
CHECK_FIFO_SPACE(pjBase, 10);
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg); CP_WRITE(pjBase, DWG_SHIFT, 0); CP_WRITE(pjBase, DWG_SGN, flDirCode); CP_WRITE(pjBase, DWG_AR5, lSignedPitch);
while (TRUE) { CP_WRITE(pjBase, DWG_LEN, prcl->bottom - prcl->top); CP_WRITE(pjBase, DWG_FXLEFT, prcl->left + xOffset); CP_WRITE(pjBase, DWG_FXRIGHT, prcl->right + xOffset - 1);
yDst = yOffset + prcl->top; ySrc = yOffset + prcl->top + dy;
if (flDirCode & sdy_BOTTOM_TO_TOP) { cy = prcl->bottom - prcl->top - 1; yDst += cy; ySrc += cy; }
CP_WRITE(pjBase, DWG_YDST, yDst);
xSrc = xOffset + prcl->left + dx; lSignedWidth = prcl->right - prcl->left - 1; if (flDirCode & scanleft_RIGHT_TO_LEFT) { xSrc += lSignedWidth; lSignedWidth = -lSignedWidth; }
lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc; CP_WRITE(pjBase, DWG_AR3, lSrcStart); CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
if (--c == 0) break;
CHECK_FIFO_SPACE(pjBase, 6); prcl++; } }
|