|
|
/******************************Module*Header*******************************\
* Module Name: blti32.c * * Contains the low-level I/O blt functions for the Mach32. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify much in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1995 Microsoft Corporation * \**************************************************************************/
#include "precomp.h"
/******************************Public*Routine******************************\
* VOID vI32FillSolid * * Fills a list of rectangles with a solid colour. * \**************************************************************************/
VOID vI32FillSolid( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; LONG x;
ASSERTDD(c > 0, "Can't handle zero rectangles");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 8);
I32_OW(pjIoBase, FRGD_COLOR, rbc.iSolidColor); I32_OW(pjIoBase, ALU_FG_FN, gaul32HwMixFromRop2[(rop4 >> 2) & 0xf]); I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_FG | WRITE | DRAW);
while (TRUE) { x = xOffset + prcl->left; I32_OW(pjIoBase, CUR_X, x); I32_OW(pjIoBase, DEST_X_START, x); I32_OW(pjIoBase, DEST_X_END, xOffset + prcl->right); I32_OW(pjIoBase, CUR_Y, yOffset + prcl->top);
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yOffset + prcl->bottom);
if (--c == 0) return;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 5); } }
/******************************Public*Routine******************************\
* VOID vI32FillPatMonochrome * * This routine uses the pattern hardware to draw a monochrome patterned * list of rectangles. * * See Blt_DS_P8x8_ENG_IO_66_D0 and Blt_DS_P8x8_ENG_IO_66_D1. * \**************************************************************************/
VOID vI32FillPatMonochrome( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; ULONG ulHwForeMix; BYTE* pjSrc; BYTE* pjDst; LONG xPattern; LONG yPattern; LONG xOld; LONG yOld; LONG iLeftShift; LONG iRightShift; LONG i; BYTE j; LONG xLeft; ULONG aulTmp[2]; WORD* pwPattern;
ASSERTDD(ppdev->iAsic == ASIC_68800_6 || ppdev->iAsic == ASIC_68800AX, "Wrong ASIC type for monochrome 8x8 patterns");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
xPattern = (pptlBrush->x + xOffset) & 7; yPattern = (pptlBrush->y + yOffset) & 7;
// If the alignment isn't correct, we'll have to change it:
if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y)) { // Remember that we've changed the alignment on our cached brush:
xOld = rbc.prb->ptlBrush.x; yOld = rbc.prb->ptlBrush.y;
rbc.prb->ptlBrush.x = xPattern; rbc.prb->ptlBrush.y = yPattern;
// Now do the alignment:
yPattern = (yOld - yPattern); iRightShift = (xPattern - xOld) & 7; iLeftShift = 8 - iRightShift;
pjSrc = (BYTE*) &rbc.prb->aulPattern[0]; pjDst = (BYTE*) &aulTmp[0];
for (i = 0; i < 8; i++) { j = *(pjSrc + (yPattern++ & 7)); *pjDst++ = (j << iLeftShift) | (j >> iRightShift); }
rbc.prb->aulPattern[0] = aulTmp[0]; rbc.prb->aulPattern[1] = aulTmp[1]; }
ulHwForeMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 16); I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_FG | EXT_MONO_SRC_PATT | DRAW | WRITE); I32_OW(pjIoBase, ALU_FG_FN, ulHwForeMix); I32_OW(pjIoBase, ALU_BG_FN, ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE : ulHwForeMix);
I32_OW(pjIoBase, FRGD_COLOR, rbc.prb->ulForeColor); I32_OW(pjIoBase, BKGD_COLOR, rbc.prb->ulBackColor); I32_OW(pjIoBase, PATT_LENGTH, 128); I32_OW(pjIoBase, PATT_DATA_INDEX, 16);
pwPattern = (WORD*) &rbc.prb->aulPattern[0]; I32_OW(pjIoBase, PATT_DATA, *(pwPattern)); I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 1)); I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 2)); I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 3));
while(TRUE) { xLeft = xOffset + prcl->left; I32_OW(pjIoBase, CUR_X, xLeft); I32_OW(pjIoBase, DEST_X_START, xLeft); I32_OW(pjIoBase, DEST_X_END, xOffset + prcl->right); I32_OW(pjIoBase, CUR_Y, yOffset + prcl->top); I32_OW(pjIoBase, DEST_Y_END, yOffset + prcl->bottom);
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 5); } }
/******************************Public*Routine******************************\
* VOID vI32FillPatColor * * This routine uses the pattern hardware to draw a colour patterned list of * rectangles. * * See Blt_DS_PCOL_ENG_IO_F0_D0 and Blt_DS_PCOL_ENG_IO_F0_D1. * \**************************************************************************/
VOID vI32FillPatColor( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; ULONG ulHwMix; LONG xLeft; LONG xRight; LONG yTop; LONG cy; LONG cyVenetian; LONG cyRoll; WORD* pwPattern; LONG xPattern; LONG yPattern;
ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Colour patterns work only at 8bpp");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
ulHwMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9); I32_OW(pjIoBase, ALU_FG_FN, ulHwMix); I32_OW(pjIoBase, SRC_Y_DIR, 1); I32_OW(pjIoBase, PATT_LENGTH, 7); // 8 pixel wide pattern
while (TRUE) { xLeft = xOffset + prcl->left; xRight = xOffset + prcl->right; yTop = yOffset + prcl->top; cy = prcl->bottom - prcl->top;
xPattern = (xLeft - pptlBrush->x - xOffset) & 7; yPattern = (yTop - pptlBrush->y - yOffset) & 7;
if (ulHwMix == OVERPAINT) { cyVenetian = min(cy, 8); cyRoll = cy - cyVenetian; } else { cyVenetian = cy; cyRoll = 0; }
I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_PATT | DATA_WIDTH | DRAW | WRITE); I32_OW(pjIoBase, PATT_INDEX, xPattern); I32_OW(pjIoBase, DEST_X_START, xLeft); I32_OW(pjIoBase, CUR_X, xLeft); I32_OW(pjIoBase, DEST_X_END, xRight); I32_OW(pjIoBase, CUR_Y, yTop);
do { // Each scan of the pattern is eight bytes:
pwPattern = (WORD*) ((BYTE*) &rbc.prb->aulPattern[0] + (yPattern << 3)); yPattern = (yPattern + 1) & 7;
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 6); I32_OW(pjIoBase, PATT_DATA_INDEX, 0); // Reset index for download
I32_OW(pjIoBase, PATT_DATA, *(pwPattern)); I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 1)); I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 2)); I32_OW(pjIoBase, PATT_DATA, *(pwPattern + 3)); yTop++;
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yTop);
} while (--cyVenetian != 0);
if (cyRoll != 0) { // When the ROP is PATCOPY, we can take advantage of the fact
// that we've just laid down an entire row of the pattern, and
// can do a 'rolling' screen-to-screen blt to draw the rest:
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7); I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DATA_WIDTH | DRAW | WRITE); I32_OW(pjIoBase, M32_SRC_X, xLeft); I32_OW(pjIoBase, M32_SRC_X_START, xLeft); I32_OW(pjIoBase, M32_SRC_X_END, xRight); I32_OW(pjIoBase, M32_SRC_Y, yTop - 8); I32_OW(pjIoBase, CUR_Y, yTop);
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yTop + cyRoll); }
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 6); } }
/******************************Public*Routine******************************\
* VOID vI32Xfer1bpp * * This routine colour expands a monochrome bitmap, possibly with different * Rop2's for the foreground and background. It will be called in the * following cases: * * 1) To colour-expand the monochrome text buffer for the vFastText routine. * 2) To blt a 1bpp source with a simple Rop2 between the source and * destination. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to * white and black, and the pattern is a solid colour. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern * and destination. * * Needless to say, making this routine fast can leverage a lot of * performance. * \**************************************************************************/
VOID vI32Xfer1bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; ULONG* pulXlate; ULONG ulHwForeMix; LONG dx; LONG dy; LONG lSrcDelta; BYTE* pjSrcScan0; LONG xLeft; LONG xRight; LONG yTop; LONG cy; LONG cx; LONG xBias; LONG culScan; LONG lSrcSkip; ULONG* pulSrc; LONG i; ULONG ulFifo;
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; ulFifo = 0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; pulXlate = pxlo->pulXlate; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 12); I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | BIT16 | EXT_MONO_SRC_HOST | DRAW | WRITE | LSB_FIRST) ); I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix ); I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix ); I32_OW(pjIoBase, BKGD_COLOR, (WORD) pulXlate[0]); I32_OW(pjIoBase, FRGD_COLOR, (WORD) pulXlate[1]);
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while (TRUE) { xLeft = prcl->left; xRight = prcl->right;
// The Mach32 'bit packs' monochrome transfers, but GDI gives
// us monochrome bitmaps whose scans are always dword aligned.
// Consequently, we use the Mach32's clip registers to make
// our transfers a multiple of 32 to match the dword alignment:
I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
xBias = (xLeft + dx) & 31; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 31) & ~31; // Ceiling
I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) >> 3)); culScan = cx >> 5; lSrcSkip = lSrcDelta - (culScan << 2);
ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0,"Source should be dword aligned");
do { i = culScan; do { I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc) ); I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc + 1) ); pulSrc++;
} while (--i != 0);
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cy != 0);
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7); }
// Don't forget to reset the clip register:
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vI32XferNative * * Transfers a bitmap that is the same colour depth as the display to * the screen via the data transfer register, with no translation. * \**************************************************************************/
VOID vI32XferNative( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; ULONG ulHwForeMix; LONG dx; LONG dy; LONG lSrcDelta; BYTE* pjSrcScan0; LONG xLeft; LONG xRight; LONG yTop; LONG cy; LONG cx; LONG xBias; ULONG* pulSrc; ULONG culScan; LONG lSrcSkip; LONG i; ULONG ulFifo;
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; ulFifo = 0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 10); I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 | DRAW | WRITE | LSB_FIRST) ); I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix ); I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while (TRUE) { xLeft = prcl->left; xRight = prcl->right;
I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 3) & ~3; // Ceiling
I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) * ppdev->cjPelSize)); culScan = (cx * ppdev->cjPelSize) >> 2; lSrcSkip = lSrcDelta - (culScan << 2);
ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
if (cy && culScan) { do { i = culScan; do { I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc) ); I32_OW(pjIoBase, PIX_TRANS, *((USHORT*) pulSrc + 1) ); pulSrc++;
} while (--i != 0);
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cy != 0); } if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7); }
// Don't forget to reset the clip register:
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vI32Xfer4bpp * * Does a 4bpp transfer from a bitmap to the screen. * * The reason we implement this is that a lot of resources are kept as 4bpp, * and used to initialize DFBs, some of which we of course keep off-screen. * \**************************************************************************/
VOID vI32Xfer4bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; LONG cjPelSize; ULONG ulHwForeMix; LONG xLeft; LONG xRight; LONG yTop; LONG xBias; LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; BYTE jSrc; ULONG* pulXlate; LONG i; USHORT uw; LONG cjSrc; LONG lSrcSkip; ULONG ulFifo;
ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; cjPelSize = ppdev->cjPelSize; pulXlate = pxlo->pulXlate; ulFifo = 0;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 10); I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 | DRAW | WRITE | LSB_FIRST) ); I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix ); I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
while(TRUE) { xLeft = prcl->left; xRight = prcl->right;
I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 3) & ~3; // Ceiling
I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) >> 1); cjSrc = cx >> 1; // Number of source bytes touched
lSrcSkip = lSrcDelta - cjSrc;
if (cjPelSize == 1) { // This part handles 8bpp output:
do { i = cjSrc; do { jSrc = *pjSrc++; uw = (USHORT) (pulXlate[jSrc >> 4]); uw |= (USHORT) (pulXlate[jSrc & 0xf] << 8); I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1); I32_OW(pjIoBase, PIX_TRANS, uw ); } while (--i != 0);
pjSrc += lSrcSkip; } while (--cy != 0); } else if (cjPelSize == 2) { // This part handles 16bpp output:
do { i = cjSrc; do { I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); jSrc = *pjSrc++; uw = (USHORT) (pulXlate[jSrc >> 4]); I32_OW(pjIoBase, PIX_TRANS, uw ); uw = (USHORT) (pulXlate[jSrc & 0xf]); I32_OW(pjIoBase, PIX_TRANS, uw ); } while (--i != 0);
pjSrc += lSrcSkip; } while (--cy != 0); }
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7); }
// Don't forget to reset the clip register:
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vI32Xfer8bpp * * Does a 8bpp transfer from a bitmap to the screen. * * The reason we implement this is that a lot of resources are kept as 8bpp, * and used to initialize DFBs, some of which we of course keep off-screen. * \**************************************************************************/
VOID vI32Xfer8bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; LONG cjPelSize; ULONG ulHwForeMix; LONG xLeft; LONG xRight; LONG yTop; LONG xBias; LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; ULONG* pulXlate; LONG i; USHORT uw; LONG cwSrc; LONG cxRem; LONG lSrcSkip; ULONG ulFifo;
ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; cjPelSize = ppdev->cjPelSize; pulXlate = pxlo->pulXlate; ulFifo = 0;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 10); I32_OW(pjIoBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 | DRAW | WRITE | LSB_FIRST) ); I32_OW(pjIoBase, ALU_FG_FN, (WORD) ulHwForeMix ); I32_OW(pjIoBase, ALU_BG_FN, (WORD) ulHwForeMix );
while(TRUE) { xLeft = prcl->left; xRight = prcl->right;
I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 3) & ~3; // Ceiling
I32_OW(pjIoBase, CUR_X, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_START, (WORD) xLeft + xOffset ); I32_OW(pjIoBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); I32_OW(pjIoBase, CUR_Y, (WORD) yTop + yOffset );
I32_OW(pjIoBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta + (xLeft + dx); lSrcSkip = lSrcDelta - cx;
if (cjPelSize == 1) { // This part handles 8bpp output:
cwSrc = (cx >> 1); cxRem = (cx & 1);
do { for (i = cwSrc; i != 0; i--) { uw = (USHORT) (pulXlate[*pjSrc++]); uw |= (USHORT) (pulXlate[*pjSrc++] << 8); I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1); I32_OW(pjIoBase, PIX_TRANS, uw ); }
if (cxRem > 0) { uw = (USHORT) (pulXlate[*pjSrc++]); I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1); I32_OW(pjIoBase, PIX_TRANS, uw ); }
pjSrc += lSrcSkip; } while (--cy != 0); } else if (cjPelSize == 2) { // This part handles 16bpp output:
do { for (i = cx; i != 0; i--) { uw = (USHORT) (pulXlate[*pjSrc++]); I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 1); I32_OW(pjIoBase, PIX_TRANS, uw ); }
pjSrc += lSrcSkip; } while (--cy != 0); }
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 7); }
// Don't forget to reset the clip register:
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 2); I32_OW(pjIoBase, EXT_SCISSOR_L, (SHORT) 0 ); I32_OW(pjIoBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vI32CopyBlt * * Does a screen-to-screen blt of a list of rectangles. * \**************************************************************************/
VOID vI32CopyBlt( // Type FNCOPY
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{ BYTE* pjIoBase; LONG xOffset; LONG yOffset; LONG dx; LONG dy; LONG xLeft; LONG yTop; LONG cx; LONG cy;
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2");
pjIoBase = ppdev->pjIoBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 12);
I32_OW(pjIoBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DRAW | WRITE); I32_OW(pjIoBase, ALU_FG_FN, gaul32HwMixFromRop2[rop4 & 0xf]);
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
// The accelerator may not be as fast at doing right-to-left copies, so
// only do them when the rectangles truly overlap:
if (!OVERLAP(prclDst, pptlSrc)) { I32_OW(pjIoBase, SRC_Y_DIR, 1); goto Top_Down_Left_To_Right; }
I32_OW(pjIoBase, SRC_Y_DIR, (prclDst->top <= pptlSrc->y));
if (prclDst->top <= pptlSrc->y) { if (prclDst->left <= pptlSrc->x) {
Top_Down_Left_To_Right:
while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
I32_OW(pjIoBase, M32_SRC_X, xLeft); I32_OW(pjIoBase, M32_SRC_X_START, xLeft); I32_OW(pjIoBase, M32_SRC_X_END, xLeft + cx); I32_OW(pjIoBase, M32_SRC_Y, yTop);
xLeft -= dx; // Source coordinates
yTop -= dy;
I32_OW(pjIoBase, CUR_X, xLeft); I32_OW(pjIoBase, DEST_X_START, xLeft); I32_OW(pjIoBase, DEST_X_END, xLeft + cx); I32_OW(pjIoBase, CUR_Y, yTop);
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yTop + cy);
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9); } } else { while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
I32_OW(pjIoBase, M32_SRC_X, xLeft + cx); I32_OW(pjIoBase, M32_SRC_X_START, xLeft + cx); I32_OW(pjIoBase, M32_SRC_X_END, xLeft); I32_OW(pjIoBase, M32_SRC_Y, yTop);
xLeft -= dx; // Source coordinates
yTop -= dy;
I32_OW(pjIoBase, CUR_X, xLeft + cx); I32_OW(pjIoBase, DEST_X_START, xLeft + cx); I32_OW(pjIoBase, DEST_X_END, xLeft); I32_OW(pjIoBase, CUR_Y, yTop);
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yTop + cy);
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9); } } } else { if (prclDst->left <= pptlSrc->x) { while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy - 1; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
I32_OW(pjIoBase, M32_SRC_X, xLeft); I32_OW(pjIoBase, M32_SRC_X_START, xLeft); I32_OW(pjIoBase, M32_SRC_X_END, xLeft + cx); I32_OW(pjIoBase, M32_SRC_Y, yTop + cy);
xLeft -= dx; // Source coordinates
yTop -= dy;
I32_OW(pjIoBase, CUR_X, xLeft); I32_OW(pjIoBase, DEST_X_START, xLeft); I32_OW(pjIoBase, DEST_X_END, xLeft + cx); I32_OW(pjIoBase, CUR_Y, yTop + cy);
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yTop);
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9); } } else { while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy - 1; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
I32_OW(pjIoBase, M32_SRC_X, xLeft + cx); I32_OW(pjIoBase, M32_SRC_X_START, xLeft + cx); I32_OW(pjIoBase, M32_SRC_X_END, xLeft); I32_OW(pjIoBase, M32_SRC_Y, yTop + cy);
xLeft -= dx; // Source coordinates
yTop -= dy;
I32_OW(pjIoBase, CUR_X, xLeft + cx); I32_OW(pjIoBase, DEST_X_START, xLeft + cx); I32_OW(pjIoBase, DEST_X_END, xLeft); I32_OW(pjIoBase, CUR_Y, yTop + cy);
vI32QuietDown(ppdev, pjIoBase);
I32_OW(pjIoBase, DEST_Y_END, yTop);
if (--c == 0) break;
prcl++; I32_CHECK_FIFO_SPACE(ppdev, pjIoBase, 9); } } } }
|