Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

966 lines
31 KiB

/******************************Module*Header*******************************\
* Module Name: bltmil.c
*
* Contains the low-level blt functions for the Millenium.
*
* Hopefully, if you're basing your display driver on this code, to
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
* the following routines. You shouldn't have to modify much in
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
* and efficient as I could, while still accelerating as many calls as
* possible that would be cost-effective in terms of performance wins
* versus size and effort.
*
* Note: In the following, 'relative' coordinates refers to coordinates
* that haven't yet had the offscreen bitmap (DFB) offset applied.
* 'Absolute' coordinates have had the offset applied. For example,
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
* be sitting in offscreen memory starting at coordinate (0, 768) --
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
* would be the 'absolute' start coordinate'.
*
* Copyright (c) 1992-1996 Microsoft Corporation
* Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
\**************************************************************************/
#include "precomp.h"
/******************************Public*Routine******************************\
* VOID vMilFillSolid
*
* Fills a list of rectangles with a solid colour.
*
\**************************************************************************/
VOID vMilFillSolid(
PDEV* ppdev, // pdev
LONG c, // Number of rectangles to be filled,
// can't be zero
RECTL* prcl, // List of rectangles to be filled
ULONG rop4, // Rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{
BYTE* pjBase;
LONG xOffset;
LONG yOffset;
ULONG ulDwg;
ULONG ulHwMix;
pjBase = ppdev->pjBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
CHECK_FIFO_SPACE(pjBase, 4);
ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE);
if (rop4 == 0xf0f0) // PATCOPY
{
if (ppdev->iBitmapFormat == BMF_24BPP)
{
if (((rbc.iSolidColor & 0x000000ff) !=
((rbc.iSolidColor >> 8) & 0x000000ff)) ||
((rbc.iSolidColor & 0x000000ff) !=
((rbc.iSolidColor >> 16) & 0x000000ff)))
{
// We're in 24bpp, and the color is not a gray level, so we
// can't use block mode.
ulDwg = (opcode_TRAP + blockm_OFF + atype_RPL + solid_SOLID +
arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
}
else
{
// We're in 24bpp, and the color is a gray level, so we
// can use block mode if we prepare our color.
rbc.iSolidColor = (rbc.iSolidColor << 8) |
(rbc.iSolidColor & 0x000000ff);
ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
}
}
else
{
// This is not 24bpp.
ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
}
}
else
{
// The ROP3 is a combination of P and D only:
//
// ROP3 Mga ROP3 Mga ROP3 Mga ROP3 Mga
//
// 0x00 0 0x50 4 0xa0 8 0xf0 c
// 0x05 1 0x55 5 0xa5 9 0xf5 d
// 0x0a 2 0x5a 6 0xaa a 0xfa e
// 0x0f 3 0x5f 7 0xaf b 0xff f
ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
if (ulHwMix == MGA_WHITENESS)
{
rbc.iSolidColor = 0xffffffff;
ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
}
else if (ulHwMix == MGA_BLACKNESS)
{
rbc.iSolidColor = 0x00000000;
ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID +
arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE);
}
else
{
ulDwg = (opcode_TRAP + blockm_OFF + atype_RSTR + solid_SOLID +
arzero_ZERO + sgnzero_ZERO + shftzero_ZERO +
pattern_OFF + transc_BG_OPAQUE +
(ulHwMix << 16));
}
}
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, rbc.iSolidColor));
while(TRUE)
{
CP_WRITE(pjBase, DWG_FXBNDRY,
(((prcl->right + xOffset) << bfxright_SHIFT) |
((prcl->left + xOffset) & bfxleft_MASK)));
// ylength_MASK not is needed since coordinates are within range
CP_START(pjBase, DWG_YDSTLEN,
(((prcl->top + yOffset ) << yval_SHIFT) |
((prcl->bottom - prcl->top))));
if (--c == 0)
return;
CHECK_FIFO_SPACE(pjBase, 2);
prcl++;
}
}
/******************************Public*Routine******************************\
* VOID vMilPatRealize
*
* Download the Color Brush to the Color brush cache in the Storm offscreen
* memory. For 8, 16, and 32 bpp, we download an 8x8 brush; a special
* routine, vPatRealize24bpp, is used for 24bpp brushes. We'll use direct
* frame buffer access whenever possible.
*
* There are some hardware restrictions concerning the way that a pattern
* must be stored in memory:
* - the first pixel of the pattern must be stored so that the first pixel
* address mod 256 is 0, 8, 16, or 24;
* - each line of 8 pixels is stored continuously, but there must be a
* difference of 32 in the pixel addresses of successive pattern lines.
* This means that we will store patterns in the following way:
*
* +----+---------------+---------------+---------------+---------------+
* | | Pattern 0 | Pattern 1 | Pattern 2 | Pattern 3 |
* |Line| | |1 1 1 1 1 1 1 1|1 1 1 1 1 1 1 1|
* | |0 1 2 3 4 5 6 7|8 9 a b c d e f|0 1 2 3 4 5 6 7|8 9 a b c d e f|
* +----+---------------+---------------+---------------+---------------+
* | 0 |* * * * | X | o o|x x |
* | 1 | * * * *| X | o o | x x |
* | 2 |* * * * | X | o o | x x |
* | 3 | * * * *| X |o o | x x|
* | 4 |* * * * |X X X X X X X X| o o|x x |
* | 5 | * * * *| X | o o | x x |
* | 6 |* * * * | X | o o | x x |
* | 7 | * * * *| X |o o | x x|
* +----+---------------+---------------+---------------+---------------+
*
* where a given pixel address is
* FirstPixelAddress + Line*0x20 + Pattern*0x08 + xPat.
*
\**************************************************************************/
VOID vMilPatRealize(
PDEV* ppdev,
RBRUSH* prb)
{
BYTE* pjBase;
BRUSHENTRY* pbe;
LONG iBrushCache;
ULONG culScan;
ULONG i;
ULONG j;
ULONG* pulBrush;
ULONG* pulDst;
ULONG lDeltaPat;
pjBase = ppdev->pjBase;
// Allocate a new off-screen cache brush entry for the brush.
iBrushCache = ppdev->iBrushCache;
pbe = &ppdev->pbe[iBrushCache];
iBrushCache++;
if (iBrushCache >= ppdev->cBrushCache)
iBrushCache = 0;
ppdev->iBrushCache = iBrushCache;
// Update our links.
pbe->prbVerify = prb;
prb->apbe[IBOARD(ppdev)] = pbe;
// Point to the pattern bits.
pulBrush = prb->aulPattern;
// Calculate delta from end of pattern scan 1 to start of pattern scan2.
lDeltaPat = 8 * ppdev->cjHwPel; // 8 -> 32?
// Convert it to a byte address.
culScan = 2 * ppdev->cjHwPel;
pulDst = (ULONG*) (pbe->pvScan0);
START_DIRECT_ACCESS_STORM(ppdev, pjBase);
for (i = 8; i != 0 ; i--)
{
for (j = 0; j < culScan; j++)
{
pulDst[j] = *pulBrush++;
}
pulDst += lDeltaPat;
}
END_DIRECT_ACCESS_STORM(ppdev, pjBase);
}
/*****************************************************************************
* VOID vMilFillPat
*
* 8, 16, and 32bpp patterned color fills for Storm.
****************************************************************************/
VOID vMilFillPat(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // Rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{
BRUSHENTRY* pbe;
LONG xOffset;
LONG yOffset;
LONG xLeft;
LONG yTop;
LONG xBrush;
LONG yBrush;
LONG lSrcAdd;
ULONG ulLinear;
BYTE* pjBase;
ASSERTDD(!(rbc.prb->fl & RBRUSH_2COLOR), "Can't do 2 colour brushes here");
// We have to ensure that no other brush took our spot in off-screen
// memory, or we might have to realize the brush for the first time.
pbe = rbc.prb->apbe[IBOARD(ppdev)];
if (pbe->prbVerify != rbc.prb)
{
vMilPatRealize(ppdev, rbc.prb);
pbe = rbc.prb->apbe[IBOARD(ppdev)];
}
pjBase = ppdev->pjBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
lSrcAdd = ppdev->lPatSrcAdd;
CHECK_FIFO_SPACE(pjBase, 6);
CP_WRITE(pjBase, DWG_AR5, 32); // Source (pattern) pitch.
ppdev->HopeFlags = SIGN_CACHE;
if ((rop4 & 0x000000FF) == 0x000000F0)
{
// The rop is PATCOPY.
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RPL + sgnzero_ZERO +
shftzero_ZERO + bop_SRCCOPY +
bltmod_BFCOL + pattern_ON +
transc_BG_OPAQUE));
}
else
{
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RSTR + sgnzero_ZERO +
shftzero_ZERO + bltmod_BFCOL + pattern_ON +
transc_BG_OPAQUE +
(((rop4 & 0x03) + ((rop4 & 0x30) >> 2))
<< 16)));
}
// The pattern setup is complete.
while(TRUE)
{
// There is a problem with Storm. We have to program:
// AR3: ssa
// AR0: sea, where sea<18:3> = ssa<18:3> and
// sea< 2:0> = ssa< 2:0> + 2 for 8bpp;
// sea< 2:0> = ssa< 2:0> + 4 for 16bpp;
// sea< 2:0> = ssa< 2:0> + 6 for 32bpp.
// Take into account the brush origin. The upper left pel of the
// brush should be aligned here in the destination surface.
yTop = prcl->top;
xLeft = prcl->left;
xBrush = (xLeft - pptlBrush->x) & 7;
yBrush = (yTop - pptlBrush->y) & 7;
ulLinear = pbe->ulLinear + (yBrush << 5) + xBrush;
CP_WRITE(pjBase, DWG_AR3, ulLinear);
CP_WRITE(pjBase, DWG_AR0, ((ulLinear & 0xfffffff8) |
((ulLinear+lSrcAdd) & 7)));
CP_WRITE(pjBase, DWG_FXBNDRY,
(((prcl->right + xOffset - 1) << bfxright_SHIFT) |
((xLeft + xOffset) & bfxleft_MASK)));
// ylength_MASK not is needed since coordinates are within range
CP_START(pjBase, DWG_YDSTLEN,
(((yTop + yOffset ) << yval_SHIFT) |
((prcl->bottom - yTop))));
if (--c == 0)
return;
CHECK_FIFO_SPACE(pjBase, 4);
prcl++;
}
}
/******************************Public*Routine******************************\
* vMilXfer1bpp
*
* This routine colour expands a monochrome bitmap.
*
\**************************************************************************/
VOID vMilXfer1bpp( // Type FNXFER
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Foreground and background hardware mix
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{
LONG xOffset;
LONG yOffset;
ULONG ulBitFlip;
LONG dx;
LONG dy;
LONG xSrc;
LONG ySrc;
LONG xDst;
LONG yDst;
LONG cxDst;
LONG cyDst;
LONG xSrcAlign;
LONG lSrcDelta;
LONG lSrcSkip;
LONG i;
LONG k;
LONG cdSrc;
LONG cdSrcPerScan;
ULONG FCol;
ULONG BCol;
ULONG ul;
BYTE* pjDma;
ULONG* pulXlate;
ULONG* pulSrc;
ULONG* pulDst;
BYTE* pjSrcScan0;
BYTE* pjBase;
LONG cFifo;
LONG xAlign;
ULONG cFullLoops;
ULONG cRemLoops;
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only an opaquing rop");
pjBase = ppdev->pjBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
ulBitFlip = 0;
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
pjSrcScan0 = psoSrc->pvScan0;
lSrcDelta = psoSrc->lDelta;
pjDma = pjBase + DMAWND;
ppdev->HopeFlags = SIGN_CACHE;
// Get the foreground and background colors.
pulXlate = pxlo->pulXlate;
FCol = COLOR_REPLICATE(ppdev, pulXlate[1]);
BCol = COLOR_REPLICATE(ppdev, pulXlate[0]);
CHECK_FIFO_SPACE(pjBase, 10);
if (rop4 == 0x0000CCCC) // SRCCOPY
{
if (ppdev->iBitmapFormat == BMF_24BPP)
{
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL +
sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + bltmod_BMONOWF));
}
else
{
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + blockm_ON +
sgnzero_ZERO + shftzero_ZERO +
bop_SRCCOPY + bltmod_BMONOWF));
}
}
else if ((rop4 == 0xb8b8) || (rop4 == 0xe2e2))
{
// We special-cased 0xb8b8 and 0xe2e2 in bitblt.c:
if (rop4 == 0xb8b8)
{
// 0xb8 is weird because it says that the '1' bit is leave-alone,
// but the '0' bit is the destination color. The Millennium can
// only handle transparent blts when the '0' bit is leave-alone,
// so we flip the source bits before we give it to the Millennium.
//
// Since we're limited by the speed of the bus, this additional
// overhead of an extra XOR on every write won't be measurable.
ulBitFlip = (ULONG) -1;
}
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + blockm_OFF +
bop_SRCCOPY + trans_0 + bltmod_BMONO +
pattern_OFF + hbgr_SRC_WINDOWS +
transc_BG_TRANSP));
}
else
{
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RSTR +
sgnzero_ZERO + shftzero_ZERO +
((rop4 & 0xf) << 16) +
bltmod_BMONOWF));
}
CP_WRITE(pjBase, DWG_BCOL, BCol);
CP_WRITE(pjBase, DWG_FCOL, FCol);
CP_WRITE(pjBase, DWG_AR5, 0);
CP_WRITE(pjBase, DWG_SGN, 0);
while (TRUE)
{
cxDst = prcl->right - prcl->left;
cyDst = prcl->bottom - prcl->top;
xDst = prcl->left + xOffset;
yDst = prcl->top + yOffset;
ySrc = prcl->top + dy;
xSrc = prcl->left + dx;
// Since SSA (AR3) is always zero, we may have to clip the expanded
// ILOAD using CXLEFT, and we'll have to modify FXLEFT accordingly.
xSrcAlign = xSrc & 0x1F;
if (xSrcAlign)
{
// We'll have to use clipping.
CP_WRITE(pjBase, DWG_CXLEFT, xDst);
}
// Number of pixels per line.
CP_WRITE(pjBase, DWG_AR0, (cxDst - 1 + xSrcAlign));
CP_WRITE(pjBase, DWG_AR3, 0);
CP_WRITE(pjBase, DWG_FXBNDRY, (((xDst + cxDst - 1) << bfxright_SHIFT) |
((xDst - xSrcAlign) & bfxleft_MASK)));
// ylength_MASK not needed since coordinates are within range
CP_START(pjBase, DWG_YDSTLEN, ((yDst << yval_SHIFT) | cyDst));
// Calculate the location of the source rectangle. This points to the
// first dword to be downloaded. It is aligned on a dword boundary.
// The first bit of interest in the first dword is at (xSrc & 0x1f).
pulSrc = (ULONG*)(pjSrcScan0 + (ySrc * lSrcDelta)
+ ((xSrc & 0xFFFFFFE0) >> 3));
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
BLT_WRITE_ON(ppdev, pjBase);
// Number of bytes, padded to the next dword, to be moved per
// scanline. Since we align the starting dword on a dword boundary,
// we know that we cannot overflow the end of the bitmap.
cdSrc = ((xSrcAlign + cxDst + 0x1F) & 0xFFFFFFE0) >> 3;
lSrcSkip = lSrcDelta - cdSrc;
if (lSrcSkip == 0)
{
// There is no line-to-line increment, we can go full speed.
// Total number of dwords to be sent.
cdSrc = cyDst * (cdSrc >> 2);
while ((cdSrc -= FIFOSIZE) > 0)
{
pulDst = (ULONG*)pjDma;
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
for (i = FIFOSIZE; i != 0; i--)
{
CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
}
}
pulDst = (ULONG*)pjDma;
cdSrc += FIFOSIZE;
CHECK_FIFO_SPACE(pjBase, cdSrc);
for (i = cdSrc; i != 0; i--)
{
CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
}
}
else
{
// We can't go full speed.
// Number of full dwords to be moved on each scan. We know that
// we won't overflow the end of the bitmap with this.
cdSrc >>= 2;
cdSrcPerScan = cdSrc;
for (k = cyDst; k != 0; k--)
{
pulDst = (ULONG*)pjDma;
cdSrc = cdSrcPerScan;
while ((cdSrc -= FIFOSIZE) > 0)
{
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
for (i = FIFOSIZE; i != 0; i--)
{
CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
}
}
cdSrc += FIFOSIZE;
CHECK_FIFO_SPACE(pjBase, cdSrc);
for (i = cdSrc; i != 0; i--)
{
CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip);
}
// We're done with the current scan, go to the next one.
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
}
}
BLT_WRITE_OFF(ppdev, pjBase);
if (xSrcAlign)
{
// Restore the clipping:
CHECK_FIFO_SPACE(pjBase, 1);
CP_WRITE(pjBase, DWG_CXLEFT, 0);
}
if (--c == 0)
break;
prcl++;
CHECK_FIFO_SPACE(pjBase, 5);
}
}
/******************************Public*Routine******************************\
* LONG lSplitRcl
*
* WRAM-WRAM blts can't span banks, and this routine does the tough work
* of figuring out how much of the blt can be done via WRAM-WRAM in one bank,
* then a regular blt over the bank boundary, and again WRAM-WRAM in the
* next bank.
*
\**************************************************************************/
LONG lSplitRcl(
RECTL *arclDst,
LONG *ayBreak,
LONG cyBreak,
LONG dy,
ULONG flDirCode,
LONG *aiCmd)
{
LONG iBreak = 0;
LONG iSrc = 0;
LONG iDst = 0;
RECTL rcl;
LONG lBoundsTop;
LONG lBoundsBottom;
LONG iCmdLast = 0;
///////////////////////////////////////////////////////////////////////////////
// See [WRN] comment below before changing this macro. This macro is
// particular to this function.
#define NON_EMPTY_RECT(rcl) ((rcl.right > rcl.left) && (rcl.bottom > rcl.top))
aiCmd[0] = 0;
if (cyBreak == 0)
{
return 1;
}
while (TRUE)
{
rcl = arclDst[iSrc];
// Find the bounding scans of the union of the source and destination.
lBoundsTop = min(rcl.top, rcl.top + dy);
lBoundsBottom = max(rcl.bottom, rcl.bottom + dy);
if ((ayBreak[iBreak] < lBoundsTop) ||
(ayBreak[iBreak] >= lBoundsBottom))
{
// Do nothing
iDst++;
goto next_break;
}
// [WRN] For the following, bottom could be less than top and
// right could be less than left. These should be considered
// empty rectangles, and the macro above reflects this.
arclDst[iDst].left = rcl.left;
arclDst[iDst].right = rcl.right;
arclDst[iDst].top = rcl.top;
arclDst[iDst].bottom = min(rcl.bottom, (ayBreak[iBreak] - dy));
if (NON_EMPTY_RECT(arclDst[iDst]))
{
aiCmd[iDst++] = 0;
iCmdLast = 0;
}
arclDst[iDst].left = rcl.left;
arclDst[iDst].right = rcl.right;
arclDst[iDst].top = max(rcl.top, (ayBreak[iBreak] - dy));
arclDst[iDst].bottom = min(rcl.bottom, (ayBreak[iBreak] + 1));
if (NON_EMPTY_RECT(arclDst[iDst]))
{
aiCmd[iDst++] = 1;
iCmdLast = 1;
}
arclDst[iDst].left = rcl.left;
arclDst[iDst].right = rcl.right;
arclDst[iDst].top = max(rcl.top, (ayBreak[iBreak] + 1));
arclDst[iDst].bottom = rcl.bottom;
if (NON_EMPTY_RECT(arclDst[iDst]))
{
aiCmd[iDst++] = 0;
iCmdLast = 0;
}
next_break:
if ((--cyBreak == 0) ||
(iCmdLast == 1))
{
// If we have run out of breaks, we're done.
// Once the last rectangle is marked slow, it stays slow.
break;
}
iSrc = --iDst;
iBreak++;
};
return iDst;
}
/******************************Public*Routine******************************\
* VOID vMilCopyBlt
*
* Does a screen-to-screen blt of a list of rectangles.
*
\**************************************************************************/
VOID vMilCopyBlt( // Type FNCOPY
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // Rop4
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{
BYTE* pjBase;
LONG xOffset;
LONG yOffset;
LONG dx;
LONG dy;
FLONG flDirCode;
LONG lSignedPitch;
ULONG ulHwMix;
ULONG ulDwg;
LONG yDst;
LONG ySrc;
LONG cy;
LONG xSrc;
LONG lSignedWidth;
LONG lSrcStart;
ULONG ulDwgFast = 0;
LONG cjPelSize;
pjBase = ppdev->pjBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
cjPelSize = ppdev->cjPelSize;
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
flDirCode = DRAWING_DIR_TBLR;
lSignedPitch = ppdev->cxMemory;
// If the destination and source rectangles overlap, we will have to
// tell the accelerator in which direction the copy should be done:
if (OVERLAP(prclDst, pptlSrc))
{
if (prclDst->left > pptlSrc->x)
{
flDirCode |= scanleft_RIGHT_TO_LEFT;
}
if (prclDst->top > pptlSrc->y)
{
flDirCode |= sdy_BOTTOM_TO_TOP;
lSignedPitch = -lSignedPitch;
}
}
if (rop4 == 0xcccc)
{
ulDwg = opcode_BITBLT | atype_RPL | blockm_OFF |
bltmod_BFCOL | pattern_OFF | transc_BG_OPAQUE |
bop_SRCCOPY | shftzero_ZERO | sgnzero_NO_ZERO;
if ((dy > 0) && (dx == 0))
{
// We enable fast WRAM to WRAM blts only for upward scrolls.
// We could enable it for more blts, but it has stringent
// alignment requirements which aren't likely to be met unless
// it's a vertical scroll.
ulDwgFast = opcode_FBITBLT | atype_RPL | blockm_OFF |
bltmod_BFCOL | pattern_OFF | transc_BG_OPAQUE |
bop_NOP | shftzero_ZERO | sgnzero_NO_ZERO;
}
}
else
{
ulHwMix = rop4 & 0xf;
ulDwg = opcode_BITBLT + atype_RSTR + blockm_OFF + bltmod_BFCOL +
pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16);
}
// The SRC0 to SRC3 registers are probably trashed by the blt, and we
// may be using a different SGN:
ppdev->HopeFlags = 0;
CHECK_FIFO_SPACE(pjBase, 8);
CP_WRITE(pjBase, DWG_SGN, flDirCode);
CP_WRITE(pjBase, DWG_AR5, lSignedPitch);
// If the overhead for setting up the fast blt is too high, then we should
// have a minimum size for prclDst.
if (ulDwgFast)
{
RECTL arclDst[1+(MAX_WRAM_BARRIERS*2)];
LONG aiCmd[1+(MAX_WRAM_BARRIERS*2)];
LONG ayBreak[MAX_WRAM_BARRIERS];
LONG cyBreak;
RECTL *prclDst;
LONG crclDst;
ULONG aulCmd[2] = {ulDwgFast, ulDwg};
LONG i;
cyBreak = ppdev->cyBreak;
for (i = 0; i < cyBreak; i++)
{
// lSplitRcl deals in relative coordinates for the destination and
// source rectangles, so convert the break locations to relative
// coordinates, too:
ayBreak[i] = ppdev->ayBreak[i] - yOffset;
}
while (TRUE)
{
arclDst[0] = *prcl;
prclDst = arclDst;
// split the rectangle at each ayBreak[i]
// If the first scan was on a split, start with the slow blt,
// otherwise, start with the fast blt and alternate.
crclDst = lSplitRcl(arclDst, ayBreak, cyBreak, dy, flDirCode, aiCmd);
i = 0;
while (TRUE)
{
LONG xRight;
ASSERTDD((aiCmd[i] & ~1) == 0, "Only bit 0 of aiCmd[i] should be set.");
CP_WRITE(pjBase, DWG_DWGCTL, aulCmd[aiCmd[i]]);
xRight = prclDst->right + xOffset - 1;
////////////////////////////////////////////////////////////////
// The following code is a bugfix for the fast WRAM copies
// Extend the right edge to a specific value and then
// clip to the actual desired edge.
CP_WRITE(pjBase, DWG_CXRIGHT, xRight);
switch(cjPelSize)
{
case 1: xRight |= 0x40;
break;
case 2: xRight |= 0x20;
break;
case 4: xRight |= 0x10;
break;
case 3: xRight = (((xRight * 3) + 2) | 0x40) / 3;
break;
}
////////////////////////////////////////////////////////////////
CP_WRITE(pjBase, DWG_FXBNDRY,
(((xRight) << bfxright_SHIFT) |
((prclDst->left + xOffset) & bfxleft_MASK)));
yDst = yOffset + prclDst->top;
ySrc = yOffset + prclDst->top + dy;
// ylength_MASK not is needed since coordinates are within range
CP_WRITE(pjBase, DWG_YDSTLEN,
(((yDst) << yval_SHIFT) |
((prclDst->bottom - prclDst->top))));
xSrc = xOffset + prclDst->left + dx;
lSignedWidth = prclDst->right - prclDst->left - 1;
lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc;
CP_WRITE(pjBase, DWG_AR3, lSrcStart);
CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
if (--crclDst == 0)
break;
prclDst++;
i++;
CHECK_FIFO_SPACE(pjBase, 6);
}
if (--c == 0)
break;
prcl++;
CHECK_FIFO_SPACE(pjBase, 6);
}
// Restore the clipping:
CHECK_FIFO_SPACE(pjBase, 1);
CP_WRITE(pjBase, DWG_CXRIGHT, (ppdev->cxMemory - 1));
}
else
{
CP_WRITE(pjBase, DWG_DWGCTL, ulDwg);
while (TRUE)
{
CP_WRITE(pjBase, DWG_FXBNDRY,
(((prcl->right + xOffset - 1) << bfxright_SHIFT) |
((prcl->left + xOffset) & bfxleft_MASK)));
yDst = yOffset + prcl->top;
ySrc = yOffset + prcl->top + dy;
if (flDirCode & sdy_BOTTOM_TO_TOP)
{
cy = prcl->bottom - prcl->top - 1;
yDst += cy;
ySrc += cy;
}
// ylength_MASK not is needed since coordinates are within range
CP_WRITE(pjBase, DWG_YDSTLEN,
(((yDst) << yval_SHIFT) |
((prcl->bottom - prcl->top))));
xSrc = xOffset + prcl->left + dx;
lSignedWidth = prcl->right - prcl->left - 1;
if (flDirCode & scanleft_RIGHT_TO_LEFT)
{
xSrc += lSignedWidth;
lSignedWidth = -lSignedWidth;
}
lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc;
CP_WRITE(pjBase, DWG_AR3, lSrcStart);
CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth);
if (--c == 0)
break;
prcl++;
CHECK_FIFO_SPACE(pjBase, 4);
}
}
}