|
|
/******************************Module*Header*******************************\
* Module Name: w32blt.c * * Contains the low-level memory-mapped IO blt functions. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify much in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1996 Microsoft Corporation * \**************************************************************************/
#include "precomp.h"
/**************************************************************************
* All functions using the accelerator must... * Wait for the ACL queue to be empty before loading any of the registers. **************************************************************************/
/**************************************************************************
* The following tables are heinous, but required. The monochrome data * (also known as Mix-Map or Mask) expander intereprets the data such that * the least significant bit of a byte is pixel 0 and the most significant * bit is pixel 7. This is backwards from the way monochrome data is * interpreted by Windows and Windows NT. Also, the expander will ONLY * do 1 to 8 expansion, so we need to replicate each bit by the number of * bytes per pel in the current color depth. **************************************************************************/
BYTE jReverse[] = { // Each element is the bitwise reverse of it's index.
//
// ie. 10000000 -> 00000001 and
// 10010100 -> 00101001.
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, };
WORD wReverse2x[] = { // Each element is the bit doubled bitwise reverse of it's index.
//
// ie. 10000000 -> 0000000000000011 and
// 10010100 -> 0000110011000011.
0x0000, 0xc000, 0x3000, 0xf000, 0x0c00, 0xcc00, 0x3c00, 0xfc00, 0x0300, 0xc300, 0x3300, 0xf300, 0x0f00, 0xcf00, 0x3f00, 0xff00, 0x00c0, 0xc0c0, 0x30c0, 0xf0c0, 0x0cc0, 0xccc0, 0x3cc0, 0xfcc0, 0x03c0, 0xc3c0, 0x33c0, 0xf3c0, 0x0fc0, 0xcfc0, 0x3fc0, 0xffc0, 0x0030, 0xc030, 0x3030, 0xf030, 0x0c30, 0xcc30, 0x3c30, 0xfc30, 0x0330, 0xc330, 0x3330, 0xf330, 0x0f30, 0xcf30, 0x3f30, 0xff30, 0x00f0, 0xc0f0, 0x30f0, 0xf0f0, 0x0cf0, 0xccf0, 0x3cf0, 0xfcf0, 0x03f0, 0xc3f0, 0x33f0, 0xf3f0, 0x0ff0, 0xcff0, 0x3ff0, 0xfff0, 0x000c, 0xc00c, 0x300c, 0xf00c, 0x0c0c, 0xcc0c, 0x3c0c, 0xfc0c, 0x030c, 0xc30c, 0x330c, 0xf30c, 0x0f0c, 0xcf0c, 0x3f0c, 0xff0c, 0x00cc, 0xc0cc, 0x30cc, 0xf0cc, 0x0ccc, 0xcccc, 0x3ccc, 0xfccc, 0x03cc, 0xc3cc, 0x33cc, 0xf3cc, 0x0fcc, 0xcfcc, 0x3fcc, 0xffcc, 0x003c, 0xc03c, 0x303c, 0xf03c, 0x0c3c, 0xcc3c, 0x3c3c, 0xfc3c, 0x033c, 0xc33c, 0x333c, 0xf33c, 0x0f3c, 0xcf3c, 0x3f3c, 0xff3c, 0x00fc, 0xc0fc, 0x30fc, 0xf0fc, 0x0cfc, 0xccfc, 0x3cfc, 0xfcfc, 0x03fc, 0xc3fc, 0x33fc, 0xf3fc, 0x0ffc, 0xcffc, 0x3ffc, 0xfffc, 0x0003, 0xc003, 0x3003, 0xf003, 0x0c03, 0xcc03, 0x3c03, 0xfc03, 0x0303, 0xc303, 0x3303, 0xf303, 0x0f03, 0xcf03, 0x3f03, 0xff03, 0x00c3, 0xc0c3, 0x30c3, 0xf0c3, 0x0cc3, 0xccc3, 0x3cc3, 0xfcc3, 0x03c3, 0xc3c3, 0x33c3, 0xf3c3, 0x0fc3, 0xcfc3, 0x3fc3, 0xffc3, 0x0033, 0xc033, 0x3033, 0xf033, 0x0c33, 0xcc33, 0x3c33, 0xfc33, 0x0333, 0xc333, 0x3333, 0xf333, 0x0f33, 0xcf33, 0x3f33, 0xff33, 0x00f3, 0xc0f3, 0x30f3, 0xf0f3, 0x0cf3, 0xccf3, 0x3cf3, 0xfcf3, 0x03f3, 0xc3f3, 0x33f3, 0xf3f3, 0x0ff3, 0xcff3, 0x3ff3, 0xfff3, 0x000f, 0xc00f, 0x300f, 0xf00f, 0x0c0f, 0xcc0f, 0x3c0f, 0xfc0f, 0x030f, 0xc30f, 0x330f, 0xf30f, 0x0f0f, 0xcf0f, 0x3f0f, 0xff0f, 0x00cf, 0xc0cf, 0x30cf, 0xf0cf, 0x0ccf, 0xcccf, 0x3ccf, 0xfccf, 0x03cf, 0xc3cf, 0x33cf, 0xf3cf, 0x0fcf, 0xcfcf, 0x3fcf, 0xffcf, 0x003f, 0xc03f, 0x303f, 0xf03f, 0x0c3f, 0xcc3f, 0x3c3f, 0xfc3f, 0x033f, 0xc33f, 0x333f, 0xf33f, 0x0f3f, 0xcf3f, 0x3f3f, 0xff3f, 0x00ff, 0xc0ff, 0x30ff, 0xf0ff, 0x0cff, 0xccff, 0x3cff, 0xfcff, 0x03ff, 0xc3ff, 0x33ff, 0xf3ff, 0x0fff, 0xcfff, 0x3fff, 0xffff, };
ULONG aulLeadCnt[] = {0x0, 0x3, 0x2, 0x1};
FNLOWXFER* afnXferI_Narrow[16] = { NULL, vXferI_1_Byte, vXferI_2_Bytes, vXferI_3_Bytes };
FNLOWXFER* afnXferP_Narrow[16] = { NULL, vXferP_1_Byte, vXferP_2_Bytes, vXferP_3_Bytes };
/**************************************************************************
* * Realizes a pattern into offscreen memory. * **************************************************************************/
VOID vFastPatRealize( // Type FNFASTPATREALIZE
PDEV* ppdev, RBRUSH* prb, // Points to brush realization structure
POINTL* pptlBrush, // Ignored
BOOL bTransparent) // FALSE for normal patterns; TRUE for
// patterns with a mask when the background
// mix is LEAVE_ALONE.
{ BRUSHENTRY* pbe; LONG iBrushCache; ULONG ulOffset; BYTE* pjPattern; LONG culPattern; LONG cjPattern; BYTE* pjDst; ULONG ulDstOffset;
BYTE* pjBase = ppdev->pjBase;
DISPDBG((10,"vFastPatRealize called"));
//
// Make sure we can write to the video registers.
//
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
pbe = prb->pbe; if ((pbe == NULL) || (pbe->prbVerify != prb)) { // We have to allocate a new offscreen cache brush entry for
// the brush:
iBrushCache = ppdev->iBrushCache; pbe = &ppdev->abe[iBrushCache];
iBrushCache++; if (iBrushCache >= ppdev->cBrushCache) iBrushCache = 0;
ppdev->iBrushCache = iBrushCache;
// Update our links:
pbe->prbVerify = prb; prb->pbe = pbe; }
prb->bTransparent = bTransparent;
ulDstOffset = ((pbe->y * ppdev->lDelta) + (pbe->x * ppdev->cBpp)); pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
cjPattern = PATTERN_SIZE * ppdev->cBpp; if ((ppdev->ulChipID != W32P) && (ppdev->ulChipID != ET6000)) { cjPattern *= 4; }
START_DIRECT_ACCESS(ppdev, pjBase);
if (!ppdev->bAutoBanking) { // Set the address where we're going to put the pattern data.
// All data transfers to video memory take place through aperature 0.
CP_MMU_BP0(ppdev, pjBase, ulDstOffset); pjDst = (PBYTE) ppdev->pjMmu0; } else { pjDst = ppdev->pjScreen + ulDstOffset; }
RtlCopyMemory(pjDst, pjPattern, cjPattern);
END_DIRECT_ACCESS(ppdev, pjBase); }
/**************************************************************************
* * Does a pattern fill to a list of rectangles. * **************************************************************************/
VOID vPatternFillScr( PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) //
{ BYTE* pjBase = ppdev->pjBase; LONG lDelta = ppdev->lDelta; LONG cBpp = ppdev->cBpp; BOOL bTransparent; ULONG ulPatternAddrBase; ULONG cTile = 0; BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
// for keeping track of the location and status
// of the pattern bits cached in off-screen
// memory
DISPDBG((10,"vPatternFillScr called"));
bTransparent = ((rop4 & 0xff) != (rop4 >> 8)); ASSERTDD(!bTransparent, "We don't handle transparent brushes yet.");
if ((ppdev->ulChipID != W32P) && (ppdev->ulChipID != ET6000)) { //
// Patterns are duplicated horizontally and vertically (4 tiles)
//
cTile = 1; // Look, it means one extra to the right
}
ASSERTDD(c > 0, "Can't handle zero rectangles");
if ((rbc.prb->pbe->prbVerify != rbc.prb)) { vFastPatRealize(ppdev, rbc.prb, NULL, FALSE); }
ASSERTDD(rbc.prb->bTransparent == bTransparent, "Not realized with correct transparency");
pbe = rbc.prb->pbe;
//
// Make sure we can write to the video registers.
//
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
//
// ### precalc & store the PAT_Y_OFFSET const in the pdev
//
CP_PAT_WRAP(ppdev, pjBase, ppdev->w32PatternWrap); CP_PAT_Y_OFFSET(ppdev, pjBase, (((PATTERN_OFFSET * cBpp) << cTile) - 1));
//
// Fill the list of rectangles
//
ulPatternAddrBase = (pbe->y * lDelta) + (pbe->x * cBpp);
do { ULONG offset;
offset = cBpp * ( (((prcl->top-pptlBrush->y)&7) << (3+cTile)) + ((prcl->left-pptlBrush->x)&7) );
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_PAT_ADDR(ppdev, pjBase, (ulPatternAddrBase + offset));
CP_XCNT(ppdev, pjBase, (((prcl->right - prcl->left) * cBpp) - 1)); CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
// Set the blit destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left))); START_ACL(ppdev);
prcl++;
} while (--c != 0); }
/**************************************************************************
* * Does a solid fill to a list of rectangles. * **************************************************************************/
VOID vSolidFillScr( PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{ BYTE* pjBase = ppdev->pjBase; LONG lDelta = ppdev->lDelta; LONG cBpp = ppdev->cBpp; ULONG ulSolidColor;
DISPDBG((10,"vSolidFillScr called"));
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD((ppdev->cBpp < 3), "vSolidFillScr only works for 8bpp and 16bpp");
// Make sure we can write to the video registers.
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1)); CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP); CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1)); CP_PAT_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
ulSolidColor = rbc.iSolidColor;
if (cBpp == 1) { ulSolidColor &= 0x000000FF; // We may get some extraneous data in the
ulSolidColor |= ulSolidColor << 8; } if (cBpp <= 2) { ulSolidColor &= 0x0000FFFF; ulSolidColor |= ulSolidColor << 16; }
// Set the color in offscreen memory
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
if (ppdev->bAutoBanking) { *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulSolidColor; } else { CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset); CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulSolidColor); }
//
// Fill the list of rectangles
//
do { WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_XCNT(ppdev, pjBase, ((prcl->right - prcl->left) * cBpp - 1)); CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
// Set the blt destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left))); START_ACL(ppdev);
prcl++;
} while (--c != 0); }
VOID vSolidFillScr24( PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{ BYTE* pjBase = ppdev->pjBase; LONG lDelta = ppdev->lDelta; ULONG ulSolidColor = rbc.iSolidColor;
DISPDBG((10,"vSolidFillScr24 called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD((ppdev->cBpp == 3), "vSolidFillScr24 called when not in 24bpp mode");
ASSERTDD(((ppdev->ulChipID == W32P) || (ppdev->ulChipID == ET6000)), "24bpp solid fills only accelerated for w32p/ET6000");
#define CBPP 3
//
// Make sure we can write to the video registers.
//
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1)); //
// This must be special cased for the ET6000. I'm not sure why it worked
// for the others, because we have a 3 byte wide pattern, but were setting the
// pattern wrap for a 4 byte wide pattern. We were also setting the Y_offset
// to be 3 when it should be 2, which really means 3 bytes per line. Strange.
//
// Anyway, I've left the code for the others in place and it will get executed
// for them.
//
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP_24BPP); // 1 line, 3 bytes per line
CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET_24BPP - 1)); // indicates 3 bytes per line
CP_PAT_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
// Set the color in offscreen memory
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
if (ppdev->bAutoBanking) { *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulSolidColor; } else { CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset); CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulSolidColor); }
//
// We know that the ACL is idle now, so no wait
//
CP_PEL_DEPTH(ppdev, pjBase, HW_PEL_DEPTH_24BPP);
//
// Fill the list of rectangles
//
do { WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
if (ppdev->ulChipID == ET6000) { CP_XCNT(ppdev, pjBase, (((prcl->right - prcl->left) * CBPP) - 1)); } else { CP_XCNT(ppdev, pjBase, ((prcl->right - prcl->left - 1) * CBPP)); } CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
// Set the blt destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (CBPP * prcl->left))); START_ACL(ppdev);
prcl++;
} while (--c != 0);
// set pixel depth back to 1
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_PEL_DEPTH(ppdev, pjBase, HW_PEL_DEPTH_8BPP); #undef CBPP
}
/**************************************************************************
* * Does a screen-to-screen blt of a list of rectangles. * **************************************************************************/
VOID vScrToScr( PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ROP4 rop4, // Obvious?
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{ LONG dx; LONG dy; // Add delta to destination to get source
LONG xyOffset = ppdev->xyOffset; BYTE* pjBase = ppdev->pjBase; LONG lDelta = ppdev->lDelta; LONG cBpp = ppdev->cBpp;
DISPDBG((10,"vScrToScr called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
//
// The src-dst delta will be the same for all rectangles
//
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
//
// Make sure we can write to the video registers.
//
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff)); CP_SRC_WRAP(ppdev, pjBase, NO_PATTERN_WRAP); CP_SRC_Y_OFFSET(ppdev, pjBase, (lDelta - 1)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
// ### I don't think this is necessary - WAIT_FOR_IDLE_ACL(ppdev, pjBase);
//
// The accelerator may not be as fast at doing right-to-left copies, so
// only do them when the rectangles truly overlap:
//
if (!OVERLAP(prclDst, pptlSrc)) goto Top_Down_Left_To_Right;
if (prclDst->top <= pptlSrc->y) { if (prclDst->left <= pptlSrc->x) {
Top_Down_Left_To_Right:
//
// Top to Bottom - Left to Right
//
DISPDBG((12,"Top to Bottom - Left to Right"));
CP_XY_DIR(ppdev, pjBase, 0); // Top to Bottom - Left to Right
do {
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1)); CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + cBpp * (prcl->left + dx)));
// Set the blt destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left))); START_ACL(ppdev);
prcl++;
} while (--c != 0); } else { //
// Top to Bottom - Right to left
//
DISPDBG((12,"Top to Bottom - Right to left"));
CP_XY_DIR(ppdev, pjBase, RIGHT_TO_LEFT);
do {
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1)); CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + cBpp * (prcl->right + dx) - 1));
// Set the blt destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->right) - 1)); START_ACL(ppdev);
prcl++;
} while (--c != 0); } } else { if (prclDst->left <= pptlSrc->x) { //
// Bottom to Top - Left to Right
//
DISPDBG((12,"Bottom to Top - Left to Right"));
CP_XY_DIR(ppdev, pjBase, BOTTOM_TO_TOP);
do {
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1)); CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + cBpp * (prcl->left + dx)));
// Set the blt destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, (((prcl->bottom - 1) * lDelta) + (cBpp * prcl->left))); START_ACL(ppdev);
prcl++;
} while (--c != 0); } else { //
// Bottom to Top - Right to Left
//
DISPDBG((12,"Bottom to Top - Right to Left"));
CP_XY_DIR(ppdev, pjBase, (BOTTOM_TO_TOP | RIGHT_TO_LEFT));
do {
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1)); CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + cBpp * (prcl->right + dx) - 1));
// Set the blt destination address as the base address of MMU aperture 2
// Then start the accelerated operation by writing something to this
// aperture.
SET_DEST_ADDR(ppdev, (((prcl->bottom - 1) * lDelta) + cBpp * (prcl->right) - 1)); START_ACL(ppdev);
prcl++;
} while (--c != 0); } }
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XY_DIR(ppdev, pjBase, 0); // Top to Bottom - Left to Right
}
/**************************************************************************
* * Does a monochrome expansion to video memory. * * Make this Xfer1to8bpp and create another for Xfer1to16bpp? * **************************************************************************/
VOID vSlowXfer1bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // Actually had better be a rop3
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides color-expansion information
{ LONG dx; LONG dy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; LONG cjSrc; LONG cjTrail; LONG culSrc; BYTE jFgRop3; BYTE jBgRop3; BOOL bW32p;
ULONG ulSolidColorOffset = ppdev->ulSolidColorOffset; BYTE* pjBase = ppdev->pjBase; LONG lDelta = ppdev->lDelta; LONG cBpp = ppdev->cBpp; ULONG ulFgColor = pxlo->pulXlate[1]; ULONG ulBgColor = pxlo->pulXlate[0];
LONG xyOffset = (ppdev->cBpp * ppdev->xOffset) + (ppdev->yOffset * ppdev->lDelta);
DISPDBG((10,"vSlowXfer1bpp called"));
DISPDBG((11,"rop4(%04x)", rop4));
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources"); ASSERTDD(ppdev->cBpp <= 2, "vSlowXfer1bpp doesn't work at 24 bpp");
bW32p = (ppdev->ulChipID == W32P);
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
jFgRop3 = (BYTE)(rop4 >> 8); // point to src color where src is indicated
// point to pat color where src is indicated
if ((BYTE) rop4 != R3_NOP) { jBgRop3 = (BYTE)((rop4 & 0xc3) | ((rop4 & 0xf0) >> 2)); } else { jBgRop3 = (BYTE) rop4; }
DISPDBG((11,"jFgRop3(%04x), jBgRop3(%04x)", jFgRop3, jBgRop3));
CP_FG_ROP(ppdev, pjBase, jFgRop3); CP_BK_ROP(ppdev, pjBase, jBgRop3); CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP); CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1)); CP_SRC_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP); CP_SRC_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1)); CP_PAT_ADDR(ppdev, pjBase, ulSolidColorOffset + 4); CP_SRC_ADDR(ppdev, pjBase, ulSolidColorOffset);
{ //
// Set the address where we're going to put the solid color data.
// All data transfers to video memory take place through aperature 0.
//
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
//
// Set the color in offscreen memory
//
if (cBpp == 1) { ulFgColor |= ulFgColor << 8; ulBgColor |= ulBgColor << 8; } if (cBpp <= 2) { ulFgColor |= ulFgColor << 16; ulBgColor |= ulBgColor << 16; }
CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulFgColor); CP_WRITE_MMU_DWORD(ppdev, 0, 4, ulBgColor); }
CP_ROUTING_CTRL(ppdev, pjBase, CPU_MIX_DATA);
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
pjSrcScan0 = psoSrc->pvScan0;
DISPDBG((2,"lSrcDelta(%x)", psoSrc->lDelta));
do { ULONG ulDst; RECTL rclSrc; RECTL rclDst; LONG xBitsPad; LONG xBitsUsed; LONG xBytesPad;
//
// load lSrcDelta inside the loop because we adjust it later.
//
lSrcDelta = psoSrc->lDelta;
rclDst = *prcl; rclSrc.left = rclDst.left + dx; rclSrc.right = rclDst.right + dx; rclSrc.top = rclDst.top + dy; rclSrc.bottom = rclDst.bottom + dy;
// x = prcl->left;
// y = prcl->top;
//
// Calculate number of bits used in first partial.
//
xBitsPad = rclSrc.left & 7; xBitsUsed = min((8-xBitsPad),(rclSrc.right-rclSrc.left)); xBytesPad = rclDst.left & 3;
if (xBitsPad != 0) // (0 < xBitsUsed < 8)
{
DISPDBG((2,"xBitsUsed(%d) xBitsPad(%d)", xBitsUsed, xBitsPad)); DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)", rclSrc.left, rclSrc.top, rclSrc.right, rclSrc.bottom, rclDst.left, rclDst.top, rclDst.right, rclDst.bottom));
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
// Do the column of the first xBitsUsed pixels
if (!bW32p) { CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT); }
CP_XCNT(ppdev, pjBase, ((xBitsUsed * cBpp) - 1)); CP_YCNT(ppdev, pjBase, (rclDst.bottom - rclDst.top - 1));
pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta + (rclSrc.left >> 3);
ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left); ulDst += xyOffset;
if (bW32p) { // We will align the data ourselves.
CP_MIX_ADDR(ppdev, pjBase, 0); CP_MIX_Y_OFFSET(ppdev, pjBase, -1); }
CP_MMU_BP2(ppdev, pjBase, ulDst);
CP_DST_ADDR(ppdev, pjBase, ulDst);
if (bW32p) WAIT_FOR_BUSY_ACL(ppdev, pjBase);
if (cBpp == 1) { LONG i;
for (i = rclDst.bottom - rclDst.top; i; i--) { CP_WRITE_MMU_BYTE(ppdev, 2, 0, jReverse[(*pjSrc << xBitsPad) & 0xff]); pjSrc += lSrcDelta; } } else // if (cBpp == 2)
{ LONG i; WORD wTmp; BYTE * pjCvt = (BYTE *) &wTmp;
for (i = rclDst.bottom - rclDst.top; i; i--) { wTmp = wReverse2x[(*pjSrc << xBitsPad) & 0xff]; CP_WRITE_MMU_BYTE(ppdev, 2, 0, pjCvt[0]); if (xBitsUsed > 4) { CP_WRITE_MMU_BYTE(ppdev, 2, 1, pjCvt[1]); } pjSrc += lSrcDelta; } }
rclSrc.left += xBitsUsed; rclDst.left += xBitsUsed; }
// If the entire blt wasn't contained in the first partial byte,
// the we have to do the rest.
if (rclSrc.left < rclSrc.right) { DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)", rclSrc.left, rclSrc.top, rclSrc.right, rclSrc.bottom, rclDst.left, rclDst.top, rclDst.right, rclDst.bottom));
//
// Legend has it that we need a WAIT_FOR_IDLE_ACL, instead of just
// a WAIT_FOR_EMPTY_ACL_QUEUE, to prevent hanging W32
//
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
if (!bW32p) { CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_32_BIT); }
CP_XCNT(ppdev, pjBase, (cBpp * (rclDst.right - rclDst.left) - 1)); CP_YCNT(ppdev, pjBase, (rclDst.bottom - rclDst.top - 1));
cjSrc = (((rclSrc.right * cBpp) + 7) >> 3) - ((rclSrc.left * cBpp) >> 3); // # bytes to transfer
culSrc = (cjSrc >> 2); cjTrail = (cjSrc & 3);
DISPDBG((2,"cjSrc(%d)", cjSrc)); DISPDBG((2,"culSrc(%d)", culSrc)); DISPDBG((2,"cjTrail(%d)", cjTrail));
pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta + (rclSrc.left >> 3);
DISPDBG((2,"pjSrc(%x)", pjSrc));
ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left); ulDst += xyOffset;
if (bW32p) { // We will align the data ourselves.
CP_MIX_ADDR(ppdev, pjBase, 0); CP_MIX_Y_OFFSET(ppdev, pjBase, -1); } CP_MMU_BP2(ppdev, pjBase, ulDst);
CP_DST_ADDR(ppdev, pjBase, ulDst);
if (bW32p) WAIT_FOR_BUSY_ACL(ppdev, pjBase);
{ LONG i; LONG j;
if (cBpp == 1) { lSrcDelta -= cjSrc;
for (i = rclDst.bottom - rclDst.top; i; i--) { ULONG cjTmp = cjTrail; volatile BYTE * pjTmp; volatile ULONG * pulTmp;
DISPDBG((2,"pjSrc(%x)", pjSrc));
for (j = culSrc; j; j--) { ULONG ulTmp = 0;
ulTmp |= (ULONG)jReverse[*pjSrc++]; ulTmp |= (ULONG)jReverse[*pjSrc++] << 8; ulTmp |= (ULONG)jReverse[*pjSrc++] << 16; ulTmp |= (ULONG)jReverse[*pjSrc++] << 24; CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
DISPDBG((2,"Src(%08x) Tmp(%08x)", *((ULONG *)(pjSrc-4)), ulTmp )); }
if (bW32p) { int ndx = 0; while (cjTmp--) { CP_WRITE_MMU_BYTE(ppdev, 2, ndx, jReverse[*pjSrc]); pjSrc++; ndx++; } } else { if (cjTmp) { ULONG ulTmp = 0; if (cjTmp == 1) goto do_1_byte; if (cjTmp == 2) goto do_2_bytes;
//
// do all three bytes of the partial
//
ulTmp |= (ULONG)jReverse[pjSrc[2]] << 16; do_2_bytes: ulTmp |= (ULONG)jReverse[pjSrc[1]] << 8; do_1_byte: ulTmp |= (ULONG)jReverse[pjSrc[0]];
//*pulTmp = ulTmp;
CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
pjSrc += cjTmp; } }
pjSrc += lSrcDelta; } } else // if (cBpp == 2)
{ lSrcDelta -= (cjSrc + 1) >> 1;
for (i = rclDst.bottom - rclDst.top; i; i--) { ULONG cjTmp = cjTrail; int ndx = 0;
DISPDBG((2,"pjSrc(%x)", pjSrc));
for (j = culSrc; j; j--) { ULONG ulTmp;
ulTmp = (ULONG)wReverse2x[*pjSrc++]; ulTmp |= (ULONG)wReverse2x[*pjSrc++] << 16; CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp); }
if (bW32p) { while (cjTmp--) { WORD wCvt; BYTE * pjCvt = (BYTE *) &wCvt;
wCvt = wReverse2x[*pjSrc++]; CP_WRITE_MMU_BYTE(ppdev, 2, ndx, pjCvt[0]); ndx++; if (cjTmp) { CP_WRITE_MMU_BYTE(ppdev, 2, ndx, pjCvt[1]); ndx++; cjTmp--; } } } else { if (cjTmp) { ULONG ulTmp;
ulTmp = (ULONG)wReverse2x[pjSrc[0]]; ulTmp |= (ULONG)wReverse2x[pjSrc[1]] << 16; CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
pjSrc += (cjTmp+1) >> 1; } }
pjSrc += lSrcDelta; } } } }
prcl++; } while (--c != 0);
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_ROUTING_CTRL(ppdev, pjBase, 0); if (!bW32p) { CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT); } }
VOID vXferBlt8i( PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ROP4 rop4, // Obvious?
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{ BYTE* pjBase = ppdev->pjBase; BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0; LONG lDeltaDst = ppdev->lDelta; LONG lDeltaSrc = psoSrc->lDelta; POINTL ptlSrc = *pptlSrc; RECTL rclDst = *prclDst; LONG cBpp = ppdev->cBpp; SIZEL sizlBlt; ULONG ulDstAddr; BYTE* pjSrc; INT ix, iy; LONG dx; LONG dy; // Add delta to destination to get source
LONG cjLead; LONG cjTrail; LONG culMiddle; LONG xyOffset = (cBpp * ppdev->xOffset) + (lDeltaDst * ppdev->yOffset);
//
// The src-dst delta will be the same for all rectangles
//
dx = ptlSrc.x - rclDst.left; dy = ptlSrc.y - rclDst.top;
// Note: Legend has it that if we don't wait for the ACL to become idle,
// then the code will hang on the W32, but not on the W32i.
//
// Since we do a WAIT_FOR_IDLE_ACL we don't need to
// WAIT_FOR_EMPTY_ACL_QUEUE
WAIT_FOR_IDLE_ACL(ppdev, pjBase); CP_ROUTING_CTRL(ppdev, pjBase, CPU_SOURCE_DATA); CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
do { // Calculate blt dimensions in bytes
sizlBlt.cx = cBpp * (prcl->right - prcl->left); sizlBlt.cy = prcl->bottom - prcl->top;
pjSrc = pjSrcScan0 + ((prcl->top + dy) * lDeltaSrc) + ((prcl->left + dx) * cBpp);
cjTrail = cjLead = (LONG)((ULONG_PTR)pjSrc); cjLead = aulLeadCnt[cjLead & 3]; if (cjLead < sizlBlt.cx) { cjTrail += sizlBlt.cx; cjTrail &= 3; culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2; } else { cjLead = sizlBlt.cx; cjTrail = 0; culMiddle = 0; }
ASSERTDD(culMiddle >= 0, "vXferBlt8i: culMiddle < 0");
ulDstAddr = (prcl->top * lDeltaDst) + (prcl->left * cBpp) + (xyOffset);
if ((sizlBlt.cx - (cjLead + cjTrail)) & 3) DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
DISPDBG((8, "rclSrc(%d,%d,%d,%d)", prcl->left+dx, prcl->top+dy, prcl->right+dx, prcl->bottom+dy ));
DISPDBG((8, "rclDst(%d,%d,%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom ));
DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)", pjSrc, sizlBlt.cx, ulDstAddr, cjLead, culMiddle, cjTrail ));
if (cjLead) { WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, (cjLead - 1)); CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1)); CP_MMU_BP2(ppdev, pjBase, (ulDstAddr)); afnXferI_Narrow[cjLead](ppdev, pjSrc, 0, sizlBlt.cy, lDeltaSrc); }
if (cjTrail) { LONG cjOffset = cjLead + (culMiddle<<2); WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, (cjTrail - 1)); CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1)); CP_MMU_BP2(ppdev, pjBase, (ulDstAddr+cjOffset)); afnXferI_Narrow[cjTrail](ppdev, (pjSrc+cjOffset), 0, sizlBlt.cy, lDeltaSrc); }
if (culMiddle) { LONG cjOffset = cjLead; WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, ((culMiddle<<2) - 1)); CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1)); CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_32_BIT); CP_MMU_BP2(ppdev, pjBase, (ulDstAddr+cjOffset)); vXfer_DWORDS(ppdev, (pjSrc+cjOffset), culMiddle, sizlBlt.cy, lDeltaSrc); WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT); }
prcl++; } while (--c != 0);
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_ROUTING_CTRL(ppdev, pjBase, 0); }
VOID vXferBlt8p( PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ROP4 rop4, // Obvious?
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{ BYTE* pjBase = ppdev->pjBase; BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0; LONG lDeltaDst = ppdev->lDelta; LONG lDeltaSrc = psoSrc->lDelta; POINTL ptlSrc = *pptlSrc; RECTL rclDst = *prclDst; LONG cBpp = ppdev->cBpp; SIZEL sizlBlt; ULONG ulDstAddr; BYTE* pjSrc; INT ix, iy; LONG dx; LONG dy; // Add delta to destination to get source
LONG iLeadNdx; LONG cjLead; LONG cjTrail; LONG culMiddle; LONG xyOffset = (cBpp * ppdev->xOffset) + (lDeltaDst * ppdev->yOffset);
//
// The src-dst delta will be the same for all rectangles
//
dx = ptlSrc.x - rclDst.left; dy = ptlSrc.y - rclDst.top;
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_ROUTING_CTRL(ppdev, pjBase, CPU_SOURCE_DATA); CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1)); CP_SRC_ADDR(ppdev, pjBase, 0); CP_SRC_Y_OFFSET(ppdev, pjBase, -1);
do { // Calculate blt dimensions in bytes
sizlBlt.cx = cBpp * (prcl->right - prcl->left); sizlBlt.cy = prcl->bottom - prcl->top;
pjSrc = pjSrcScan0 + ((prcl->top + dy) * lDeltaSrc) + ((prcl->left + dx) * cBpp);
cjTrail = iLeadNdx = (LONG)((ULONG_PTR)pjSrc); iLeadNdx &= 3; cjLead = aulLeadCnt[iLeadNdx]; if (cjLead < sizlBlt.cx) { cjTrail += sizlBlt.cx; cjTrail &= 3; culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2; } else { cjLead = sizlBlt.cx; cjTrail = 0; culMiddle = 0; }
ASSERTDD(culMiddle >= 0, "vXferBlt8i: culMiddle < 0");
ulDstAddr = (prcl->top * lDeltaDst) + (prcl->left * cBpp) + (xyOffset);
if ((sizlBlt.cx - (cjLead + cjTrail)) & 3) DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
DISPDBG((8, "rclSrc(%d,%d,%d,%d)", prcl->left+dx, prcl->top+dy, prcl->right+dx, prcl->bottom+dy ));
DISPDBG((8, "rclDst(%d,%d,%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom ));
DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)", pjSrc, sizlBlt.cx, ulDstAddr, cjLead, culMiddle, cjTrail ));
if (cjLead) { WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, (cjLead - 1)); CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1)); // The next two turn off src to dst alignment
CP_DST_ADDR(ppdev, pjBase, (ulDstAddr)); WAIT_FOR_BUSY_ACL(ppdev, pjBase); afnXferP_Narrow[cjLead](ppdev, pjSrc, 0, sizlBlt.cy, lDeltaSrc); }
if (cjTrail) { LONG cjOffset = cjLead + (culMiddle<<2); WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, (cjTrail - 1)); CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1)); // The next two turn off src to dst alignment
CP_DST_ADDR(ppdev, pjBase, (ulDstAddr+cjOffset)); WAIT_FOR_BUSY_ACL(ppdev, pjBase); afnXferP_Narrow[cjTrail](ppdev, (pjSrc+cjOffset), 0, sizlBlt.cy, lDeltaSrc); }
if (culMiddle) { LONG cjOffset = cjLead; WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, ((culMiddle<<2) - 1)); CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1)); // The next two turn off src to dst alignment
CP_DST_ADDR(ppdev, pjBase, (ulDstAddr+cjOffset)); WAIT_FOR_BUSY_ACL(ppdev, pjBase); vXfer_DWORDS(ppdev, (pjSrc+cjOffset), culMiddle, sizlBlt.cy, lDeltaSrc); }
prcl++; } while (--c != 0);
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_ROUTING_CTRL(ppdev, pjBase, 0); }
//////////////////////////////////////////////////////////////////////
// N DWORD low level blt routines for vXferNativeI and vXferNativeP
// A DWORD at a time
VOID vXfer_DWORDS(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
// We had better be in 32 bit virtual bus mode
for (iy = 0; iy < cy; iy++) { for (ix = 0; ix < culX; ix++) { CP_WRITE_MMU_DWORD(ppdev, 2, 0, *((ULONG*)pjTmp)); pjTmp += 4; } pjTmp = (pjSrc += lDeltaSrc); } }
// A BYTE at a time
VOID vXfer_BYTES(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase; LONG cjX = (culX << 2);
// We had better be in 8 bit virtual bus mode
for (iy = 0; iy < cy; iy++) { for (ix = 0; ix < cjX; ix++) { CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++; } pjTmp = (pjSrc += lDeltaSrc); } }
//////////////////////////////////////////////////////////////////////
// Narrow low level blt routines for vXferNativeI
VOID vXferI_1_Byte(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjSrc);
pjSrc += lDeltaSrc; } }
VOID vXferI_2_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++; CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
pjTmp = (pjSrc += lDeltaSrc); } }
VOID vXferI_3_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++; CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++; CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
pjTmp = (pjSrc += lDeltaSrc); } }
//////////////////////////////////////////////////////////////////////
// Narrow low level blt routines for vXferNativeP
VOID vXferP_1_Byte(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_BYTE(ppdev, 2, index, *pjSrc);
pjSrc += lDeltaSrc; } }
VOID vXferP_2_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_WORD(ppdev, 2, index, *((WORD*)pjTmp));
pjTmp = (pjSrc += lDeltaSrc); } }
VOID vXferP_3_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc) { LONG iy; LONG ix; BYTE* pjTmp = pjSrc; BYTE* pjBase = ppdev->pjBase;
if (index & 1) { for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_BYTE(ppdev, 2, index, *pjTmp); pjTmp++; CP_WRITE_MMU_WORD(ppdev, 2, index+1, *((WORD*)pjTmp));
pjTmp = (pjSrc += lDeltaSrc); } } else { for (iy = 0; iy < cy; iy++) { CP_WRITE_MMU_WORD(ppdev, 2, index, *((WORD*)pjTmp)); pjTmp+=2; CP_WRITE_MMU_BYTE(ppdev, 2, index+2, *pjTmp);
pjTmp = (pjSrc += lDeltaSrc); } } }
// This routine was added to perform accelerated host to screen blts for the
// ET6000. The W32 had a path from host memory to display memory which allowed
// ROPs to be performed as the data was transferred. The ET6000 does not have
// that feature, so to provide accelerated host to screen support we must
// buffer each scanline of the source in offscreen memory and then perform
// a blt to move it into the appropriate area of display memory. This is
// much more efficient than hand coding each rop or punting to GDI.
VOID vXferET6000( PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ROP4 rop4, // Obvious?
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{ BYTE* pjBase = ppdev->pjBase; BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0; LONG lDeltaDst = ppdev->lDelta; LONG lDeltaSrc = psoSrc->lDelta; POINTL ptlSrc = *pptlSrc; RECTL rclDst = *prclDst; LONG cBpp = ppdev->cBpp; SIZEL sizlBlt; ULONG ulDstAddr; BYTE* pjSrc; BYTE* pjDst; INT ix, iy; LONG dx; LONG dy; // Add delta to destination to get source
LONG iLeadNdx; LONG cjLead; LONG cjTrail; LONG culMiddle; LONG xyOffset = (cBpp * ppdev->xOffset) + (lDeltaDst * ppdev->yOffset); ULONG ulBltBufferOffset = (cBpp * ppdev->pohBltBuffer->x) + (lDeltaDst * ppdev->pohBltBuffer->y); ULONG BltScanOffset = 0;
//
// The src-dst delta will be the same for all rectangles
//
dx = ptlSrc.x - rclDst.left; dy = ptlSrc.y - rclDst.top;
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_FG_ROP(ppdev, pjBase, (rop4 >> 8)); CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff)); CP_SRC_WRAP(ppdev, pjBase, NO_PATTERN_WRAP); CP_SRC_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1)); CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
do { BYTE* pjTmp;
// Calculate blt dimensions in bytes
sizlBlt.cx = cBpp * (prcl->right - prcl->left); sizlBlt.cy = prcl->bottom - prcl->top;
pjSrc = pjSrcScan0 + ((prcl->top + dy) * lDeltaSrc) + ((prcl->left + dx) * cBpp);
pjTmp = pjSrc;
cjTrail = iLeadNdx = (LONG)((ULONG_PTR)pjSrc); iLeadNdx &= 3; cjLead = aulLeadCnt[iLeadNdx]; if (cjLead < sizlBlt.cx) { cjTrail += sizlBlt.cx; cjTrail &= 3; culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2; } else { cjLead = sizlBlt.cx; cjTrail = 0; culMiddle = 0; }
ASSERTDD(culMiddle >= 0, "vXferET6000: culMiddle < 0");
ulDstAddr = (prcl->top * lDeltaDst) + (prcl->left * cBpp) + (xyOffset);
if ((sizlBlt.cx - (cjLead + cjTrail)) & 3) DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
DISPDBG((8, "rclSrc(%d,%d,%d,%d)", prcl->left+dx, prcl->top+dy, prcl->right+dx, prcl->bottom+dy ));
DISPDBG((8, "rclDst(%d,%d,%d,%d)", prcl->left, prcl->top, prcl->right, prcl->bottom ));
DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)", pjSrc, sizlBlt.cx, ulDstAddr, cjLead, culMiddle, cjTrail ));
for (iy = 0; iy < sizlBlt.cy; iy++) { LONG ix, lScanLineOffset;
// We'll first load the first scan line of
// the BltBuffer and then load the second. The second scan line
// will be loaded into the BltBuffer while the first is still being
// processed. We'll alternate between the two segments of our
// BltBuffer until all scans have been processed.
pjDst = ppdev->pjScreen + ulBltBufferOffset + BltScanOffset;
if (cjLead) { for (ix = 0; ix < cjLead; ix++) { *pjDst++ = *pjTmp++; } }
if (culMiddle) { for (ix = 0; ix < culMiddle; ix++) { *((ULONG*)pjDst)++ = *((ULONG*)pjTmp)++; } } if (cjTrail) { for (ix = 0; ix < cjTrail; ix++) { *pjDst++ = *pjTmp++; } }
// Now that we've loaded our scanline into a segment of our BltBuffer,
// we need to trigger an accelerator operation to transfer it into
// visible screen memory. Our static stuff will have already been setup
// prior to entering any of our loops.
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_XCNT(ppdev, pjBase, (sizlBlt.cx - 1)); CP_YCNT(ppdev, pjBase, 0); // Only 1 scan at a time
CP_SRC_ADDR(ppdev, pjBase, (ulBltBufferOffset + BltScanOffset)); WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_DST_ADDR(ppdev, pjBase, ulDstAddr);
BltScanOffset ^= ppdev->lBltBufferPitch; pjTmp = (pjSrc += lDeltaSrc);
ulDstAddr += lDeltaDst; } // next cy
prcl++; } while (--c != 0); } /**************************************************************************
* * Does a monochrome expansion to video memory. * **************************************************************************/
VOID vET6000SlowXfer1bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // Actually had better be a rop3
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides color-expansion information
{ LONG dx; LONG dy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; LONG cjSrc; LONG cjTrail; LONG culSrc; BYTE jFgRop3; BYTE jBgRop3;
ULONG ulSolidColorOffset = ppdev->ulSolidColorOffset; BYTE* pjBase = ppdev->pjBase; LONG lDelta = ppdev->lDelta; LONG cBpp = ppdev->cBpp; ULONG ulFgColor = pxlo->pulXlate[1]; ULONG ulBgColor = pxlo->pulXlate[0];
LONG xyOffset = (ppdev->cBpp * ppdev->xOffset) + (ppdev->yOffset * ppdev->lDelta); LONG lBltBuffer = (ppdev->pohBltBuffer->x * ppdev->cBpp) + (ppdev->pohBltBuffer->y * ppdev->lDelta);
DISPDBG((10,"vET6000SlowXfer1bpp called"));
DISPDBG((11,"rop4(%04x)", rop4));
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
jFgRop3 = (BYTE)(rop4 >> 8); // point to src color where src is indicated
// point to pat color where src is indicated
if ((BYTE) rop4 != R3_NOP) { jBgRop3 = (BYTE)((rop4 & 0xc3) | ((rop4 & 0xf0) >> 2)); } else { jBgRop3 = (BYTE) rop4; }
DISPDBG((11,"jFgRop3(%04x), jBgRop3(%04x)", jFgRop3, jBgRop3));
CP_FG_ROP(ppdev, pjBase, jFgRop3); CP_BK_ROP(ppdev, pjBase, jBgRop3); CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP); CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1)); CP_SRC_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP); CP_SRC_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1)); CP_PAT_ADDR(ppdev, pjBase, ulSolidColorOffset + 4); CP_SRC_ADDR(ppdev, pjBase, ulSolidColorOffset); CP_PEL_DEPTH(ppdev, pjBase, (cBpp - 1) << 4);
// Here we are going to load the foreground and background colors into
// display memory. We'll use the area for solid colors that we allocated
// earlier.
{ // Set the color in offscreen memory
if (cBpp == 1) { ulFgColor &= 0x000000FF; // We may get some extraneous data in the
ulBgColor &= 0x000000FF; // unused portion of our color. Clear it.
ulFgColor |= ulFgColor << 8; ulBgColor |= ulBgColor << 8; } if (cBpp <= 2) { ulFgColor &= 0x0000FFFF; ulBgColor &= 0x0000FFFF; ulFgColor |= ulFgColor << 16; ulBgColor |= ulBgColor << 16; }
// We don't want to change the colors if the accelerator is active, because
// a previous oepration might be using them.
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
*(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulFgColor; *(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset + 4) = ulBgColor; }
// This is the mix control register for the ET6000. We are setting it to
// use a mix ROP of 2, which specifies that a 0 in the mixmap selects the
// background color and 1 selects the foreground color. Bit 7 says that
// we want bit 7 of each byte in our mix data to be pixel 0. This should
// be the way that NT wants it. We also have to set our mask ROP so we
// can get the data onto the screen.
CP_ROUTING_CTRL(ppdev, pjBase, 0xB2);
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
pjSrcScan0 = psoSrc->pvScan0;
DISPDBG((2,"lSrcDelta(%x)", psoSrc->lDelta));
do { ULONG ulDst; RECTL rclSrc; RECTL rclDst; BYTE* pjTmp; BYTE* pjDst; LONG i; BYTE *pjMmu1 = ppdev->pjMmu1; long lDwords, lBytes, lStart; int cBitsToSkip;
// load lSrcDelta inside the loop because we adjust it later.
lSrcDelta = psoSrc->lDelta;
rclDst = *prcl; rclSrc.left = rclDst.left + dx; rclSrc.right = rclDst.right + dx; rclSrc.top = rclDst.top + dy; rclSrc.bottom = rclDst.bottom + dy;
// x = prcl->left;
// y = prcl->top;
DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)", rclSrc.left, rclSrc.top, rclSrc.right, rclSrc.bottom, rclDst.left, rclDst.top, rclDst.right, rclDst.bottom));
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
CP_XCNT(ppdev, pjBase, ((rclSrc.right - rclSrc.left) * cBpp) - 1); CP_YCNT(ppdev, pjBase, 0); // 1 scan at a time
pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta + (rclSrc.left >> 3); cBitsToSkip = rclSrc.left % 8; pjTmp = pjSrc;
ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left); ulDst += xyOffset;
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
// We are going to transfer the mix map into our BltBuffer so
// we can get it to the screen.
CP_MIX_Y_OFFSET(ppdev, pjBase, 0); // 1 scan at a time
// We are using the rectangle dimensions to determine how many pixels per line to move. This
// fixes a bug exposed by the HCT when we had to clip a large temporary buffer and would draw
// using data close to the end of the buffer. We would get a protection exception depending on
// whether we ran too close to the end of the buffer. lSrcDelta will still be used when
// stepping through the source bitmap, but not to determine how many pixels will be drawn.
//
// We're adding cBitsToSkip back into here because it's necessary to compute the correct number
// of bytes to move. We always round to the next byte.
// i = abs(lSrcDelta); // this doesn't work
i = ((rclSrc.right - rclSrc.left) + cBitsToSkip + 7) >> 3; // Round up before shift.
lDwords = i / 4; lBytes = i % 4; lStart = 0;
// Here we are going to transfer the monochrome bitmap to the screen.
// We'll double buffer it to get some more throughput.
for (i=0; i < (rclSrc.bottom - rclSrc.top); i++) { long ix;
pjDst = ppdev->pjScreen + lBltBuffer + lStart; ix = lDwords;
while (ix--) { *((ULONG*)pjDst)++ = *((ULONG*)pjTmp)++; }
ix = lBytes; while (ix--) { *pjDst++ = *pjTmp++; }
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
// We have to add in rclSrc.left mod 8 to compensate for the possibility
// of starting to draw to soon in our bitmap. This generally occurs when
// clipping text or moving windows where we are only asked to draw
// part of a monochrome bitmap.
CP_MIX_ADDR(ppdev, pjBase, ((lBltBuffer + lStart) * 8) + cBitsToSkip); CP_DST_ADDR(ppdev, pjBase, ulDst); pjTmp = (pjSrc += lSrcDelta); ulDst += lDelta; lStart ^= ppdev->lBltBufferPitch; } prcl++; } while (--c != 0);
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase); CP_ROUTING_CTRL(ppdev, pjBase, 0x33); CP_PEL_DEPTH(ppdev, pjBase, 0); }
|