|
|
/******************************Module*Header*******************************\
* Module Name: bltm32.c * * Contains the low-level memory-mapped I/O blt functions for the Mach32. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify much in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1995 Microsoft Corporation * \**************************************************************************/
#include "precomp.h"
/******************************Public*Routine******************************\
* VOID vM32FillSolid * * Fills a list of rectangles with a solid colour. * \**************************************************************************/
VOID vM32FillSolid( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; LONG x;
ASSERTDD(c > 0, "Can't handle zero rectangles");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
M32_OW(pjMmBase, FRGD_COLOR, rbc.iSolidColor); M32_OW(pjMmBase, ALU_FG_FN, gaul32HwMixFromRop2[(rop4 >> 2) & 0xf]); M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_FG | WRITE | DRAW);
while (TRUE) { x = xOffset + prcl->left; M32_OW(pjMmBase, CUR_X, x); M32_OW(pjMmBase, DEST_X_START, x); M32_OW(pjMmBase, DEST_X_END, xOffset + prcl->right); M32_OW(pjMmBase, CUR_Y, yOffset + prcl->top);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yOffset + prcl->bottom);
if (--c == 0) return;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5); } }
/******************************Public*Routine******************************\
* VOID vM32FillPatMonochrome * * This routine uses the pattern hardware to draw a monochrome patterned * list of rectangles. * * See Blt_DS_P8x8_ENG_IO_66_D0 and Blt_DS_P8x8_ENG_IO_66_D1. * \**************************************************************************/
VOID vM32FillPatMonochrome( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; ULONG ulHwForeMix; BYTE* pjSrc; BYTE* pjDst; LONG xPattern; LONG yPattern; LONG xOld; LONG yOld; LONG iLeftShift; LONG iRightShift; LONG i; BYTE j; LONG xLeft; ULONG aulTmp[2]; WORD* pwPattern;
ASSERTDD(ppdev->iAsic == ASIC_68800_6 || ppdev->iAsic == ASIC_68800AX, "Wrong ASIC type for monochrome 8x8 patterns");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
xPattern = (pptlBrush->x + xOffset) & 7; yPattern = (pptlBrush->y + yOffset) & 7;
// If the alignment isn't correct, we'll have to change it:
if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y)) { // Remember that we've changed the alignment on our cached brush:
xOld = rbc.prb->ptlBrush.x; yOld = rbc.prb->ptlBrush.y;
rbc.prb->ptlBrush.x = xPattern; rbc.prb->ptlBrush.y = yPattern;
// Now do the alignment:
yPattern = (yOld - yPattern); iRightShift = (xPattern - xOld) & 7; iLeftShift = 8 - iRightShift;
pjSrc = (BYTE*) &rbc.prb->aulPattern[0]; pjDst = (BYTE*) &aulTmp[0];
for (i = 0; i < 8; i++) { j = *(pjSrc + (yPattern++ & 7)); *pjDst++ = (j << iLeftShift) | (j >> iRightShift); }
rbc.prb->aulPattern[0] = aulTmp[0]; rbc.prb->aulPattern[1] = aulTmp[1]; }
ulHwForeMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 16); M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_FG | EXT_MONO_SRC_PATT | DRAW | WRITE); M32_OW(pjMmBase, ALU_FG_FN, ulHwForeMix); M32_OW(pjMmBase, ALU_BG_FN, ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE : ulHwForeMix);
M32_OW(pjMmBase, FRGD_COLOR, rbc.prb->ulForeColor); M32_OW(pjMmBase, BKGD_COLOR, rbc.prb->ulBackColor); M32_OW(pjMmBase, PATT_LENGTH, 128); M32_OW(pjMmBase, PATT_DATA_INDEX, 16);
pwPattern = (WORD*) &rbc.prb->aulPattern[0]; M32_OW(pjMmBase, PATT_DATA, *(pwPattern)); M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 1)); M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 2)); M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 3));
while(TRUE) { xLeft = xOffset + prcl->left; M32_OW(pjMmBase, CUR_X, xLeft); M32_OW(pjMmBase, DEST_X_START, xLeft); M32_OW(pjMmBase, DEST_X_END, xOffset + prcl->right); M32_OW(pjMmBase, CUR_Y, yOffset + prcl->top); M32_OW(pjMmBase, DEST_Y_END, yOffset + prcl->bottom);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5); } }
/******************************Public*Routine******************************\
* VOID vM32FillPatColor * * This routine uses the pattern hardware to draw a colour patterned list of * rectangles. * * See Blt_DS_PCOL_ENG_IO_F0_D0 and Blt_DS_PCOL_ENG_IO_F0_D1. * \**************************************************************************/
VOID vM32FillPatColor( // Type FNFILL
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; ULONG ulHwMix; LONG xLeft; LONG xRight; LONG yTop; LONG cy; LONG cyVenetian; LONG cyRoll; WORD* pwPattern; LONG xPattern; LONG yPattern;
ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Colour patterns work only at 8bpp");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
ulHwMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9); M32_OW(pjMmBase, ALU_FG_FN, ulHwMix); M32_OW(pjMmBase, SRC_Y_DIR, 1); M32_OW(pjMmBase, PATT_LENGTH, 7); // 8 pixel wide pattern
while (TRUE) { xLeft = xOffset + prcl->left; xRight = xOffset + prcl->right; yTop = yOffset + prcl->top; cy = prcl->bottom - prcl->top;
xPattern = (xLeft - pptlBrush->x - xOffset) & 7; yPattern = (yTop - pptlBrush->y - yOffset) & 7;
if (ulHwMix == OVERPAINT) { cyVenetian = min(cy, 8); cyRoll = cy - cyVenetian; } else { cyVenetian = cy; cyRoll = 0; }
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_PATT | DATA_WIDTH | DRAW | WRITE); M32_OW(pjMmBase, PATT_INDEX, xPattern); M32_OW(pjMmBase, DEST_X_START, xLeft); M32_OW(pjMmBase, CUR_X, xLeft); M32_OW(pjMmBase, DEST_X_END, xRight); M32_OW(pjMmBase, CUR_Y, yTop);
do { // Each scan of the pattern is eight bytes:
pwPattern = (WORD*) ((BYTE*) &rbc.prb->aulPattern[0] + (yPattern << 3)); yPattern = (yPattern + 1) & 7;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6); M32_OW(pjMmBase, PATT_DATA_INDEX, 0); // Reset index for download
M32_OW(pjMmBase, PATT_DATA, *(pwPattern)); M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 1)); M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 2)); M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 3)); yTop++;
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop);
} while (--cyVenetian != 0);
if (cyRoll != 0) { // When the ROP is PATCOPY, we can take advantage of the fact
// that we've just laid down an entire row of the pattern, and
// can do a 'rolling' screen-to-screen blt to draw the rest:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7); M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DATA_WIDTH | DRAW | WRITE); M32_OW(pjMmBase, M32_SRC_X, xLeft); M32_OW(pjMmBase, M32_SRC_X_START, xLeft); M32_OW(pjMmBase, M32_SRC_X_END, xRight); M32_OW(pjMmBase, M32_SRC_Y, yTop - 8); M32_OW(pjMmBase, CUR_Y, yTop);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop + cyRoll); }
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6); } }
/******************************Public*Routine******************************\
* VOID vM32Xfer1bpp * * This routine colour expands a monochrome bitmap, possibly with different * Rop2's for the foreground and background. It will be called in the * following cases: * * 1) To colour-expand the monochrome text buffer for the vFastText routine. * 2) To blt a 1bpp source with a simple Rop2 between the source and * destination. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to * white and black, and the pattern is a solid colour. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern * and destination. * * Needless to say, making this routine fast can leverage a lot of * performance. * \**************************************************************************/
VOID vM32Xfer1bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; ULONG* pulXlate; ULONG ulHwForeMix; LONG dx; LONG dy; LONG lSrcDelta; BYTE* pjSrcScan0; LONG xLeft; LONG xRight; LONG yTop; LONG cy; LONG cx; LONG xBias; LONG culScan; LONG lSrcSkip; ULONG* pulSrc; LONG i; ULONG ulFifo;
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; ulFifo = 0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; pulXlate = pxlo->pulXlate; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 12); M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | BIT16 | EXT_MONO_SRC_HOST | DRAW | WRITE | LSB_FIRST) ); M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix ); M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix ); M32_OW(pjMmBase, BKGD_COLOR, (WORD) pulXlate[0]); M32_OW(pjMmBase, FRGD_COLOR, (WORD) pulXlate[1]);
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while (TRUE) { xLeft = prcl->left; xRight = prcl->right;
// The Mach32 'bit packs' monochrome transfers, but GDI gives
// us monochrome bitmaps whose scans are always dword aligned.
// Consequently, we use the Mach32's clip registers to make
// our transfers a multiple of 32 to match the dword alignment:
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
xBias = (xLeft + dx) & 31; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 31) & ~31; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) >> 3)); culScan = cx >> 5; lSrcSkip = lSrcDelta - (culScan << 2);
ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
do { i = culScan; do { M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc) ); M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc + 1) ); pulSrc++;
} while (--i != 0);
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cy != 0);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7); }
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vM32XferNative * * Transfers a bitmap that is the same colour depth as the display to * the screen via the data transfer register, with no translation. * \**************************************************************************/
VOID vM32XferNative( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; ULONG ulHwForeMix; LONG dx; LONG dy; LONG lSrcDelta; BYTE* pjSrcScan0; LONG xLeft; LONG xRight; LONG yTop; LONG cy; LONG cx; LONG xBias; ULONG* pulSrc; ULONG culScan; LONG lSrcSkip; LONG i; ULONG ulFifo;
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; ulFifo = 0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10); M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 | DRAW | WRITE | LSB_FIRST) ); M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix ); M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
while (TRUE) { xLeft = prcl->left; xRight = prcl->right;
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 3) & ~3; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) * ppdev->cjPelSize)); culScan = (cx * ppdev->cjPelSize) >> 2; lSrcSkip = lSrcDelta - (culScan << 2);
ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
do { i = culScan; do { M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc) ); M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc + 1) ); pulSrc++;
} while (--i != 0);
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cy != 0);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7); }
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vM32Xfer4bpp * * Does a 4bpp transfer from a bitmap to the screen. * * The reason we implement this is that a lot of resources are kept as 4bpp, * and used to initialize DFBs, some of which we of course keep off-screen. * \**************************************************************************/
VOID vM32Xfer4bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; LONG cjPelSize; ULONG ulHwForeMix; LONG xLeft; LONG xRight; LONG yTop; LONG xBias; LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; BYTE jSrc; ULONG* pulXlate; LONG i; USHORT uw; LONG cjSrc; LONG lSrcSkip; ULONG ulFifo;
ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; cjPelSize = ppdev->cjPelSize; pulXlate = pxlo->pulXlate; ulFifo = 0;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10); M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 | DRAW | WRITE | LSB_FIRST) ); M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix ); M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
while(TRUE) { xLeft = prcl->left; xRight = prcl->right;
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 3) & ~3; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) >> 1); cjSrc = cx >> 1; // Number of source bytes touched
lSrcSkip = lSrcDelta - cjSrc;
if (cjPelSize == 1) { // This part handles 8bpp output:
do { i = cjSrc; do { jSrc = *pjSrc++; uw = (USHORT) (pulXlate[jSrc >> 4]); uw |= (USHORT) (pulXlate[jSrc & 0xf] << 8); M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1); M32_OW(pjMmBase, PIX_TRANS, uw ); } while (--i != 0);
pjSrc += lSrcSkip; } while (--cy != 0); } else if (cjPelSize == 2) { // This part handles 16bpp output:
do { i = cjSrc; do { M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); jSrc = *pjSrc++; uw = (USHORT) (pulXlate[jSrc >> 4]); M32_OW(pjMmBase, PIX_TRANS, uw ); uw = (USHORT) (pulXlate[jSrc & 0xf]); M32_OW(pjMmBase, PIX_TRANS, uw ); } while (--i != 0);
pjSrc += lSrcSkip; } while (--cy != 0); }
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7); }
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vM32Xfer8bpp * * Does a 8bpp transfer from a bitmap to the screen. * * The reason we implement this is that a lot of resources are kept as 8bpp, * and used to initialize DFBs, some of which we of course keep off-screen. * \**************************************************************************/
VOID vM32Xfer8bpp( // Type FNXFER
PDEV* ppdev, LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; LONG cjPelSize; ULONG ulHwForeMix; LONG xLeft; LONG xRight; LONG yTop; LONG xBias; LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; ULONG* pulXlate; LONG i; USHORT uw; LONG cwSrc; LONG cxRem; LONG lSrcSkip; ULONG ulFifo;
ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; cjPelSize = ppdev->cjPelSize; pulXlate = pxlo->pulXlate; ulFifo = 0;
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf]; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10); M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 | DRAW | WRITE | LSB_FIRST) ); M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix ); M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
while(TRUE) { xLeft = prcl->left; xRight = prcl->right;
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top; cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias; cx = (xRight - xLeft + 3) & ~3; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset ); M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) ); M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta + (xLeft + dx); lSrcSkip = lSrcDelta - cx;
if (cjPelSize == 1) { // This part handles 8bpp output:
cwSrc = (cx >> 1); cxRem = (cx & 1);
do { for (i = cwSrc; i != 0; i--) { uw = (USHORT) (pulXlate[*pjSrc++]); uw |= (USHORT) (pulXlate[*pjSrc++] << 8); M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1); M32_OW(pjMmBase, PIX_TRANS, uw ); }
if (cxRem > 0) { uw = (USHORT) (pulXlate[*pjSrc++]); M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1); M32_OW(pjMmBase, PIX_TRANS, uw ); }
pjSrc += lSrcSkip; } while (--cy != 0); } else if (cjPelSize == 2) { // This part handles 16bpp output:
do { for (i = cx; i != 0; i--) { uw = (USHORT) (pulXlate[*pjSrc++]); M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1); M32_OW(pjMmBase, PIX_TRANS, uw ); }
pjSrc += lSrcSkip; } while (--cy != 0); }
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7); }
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2); M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 ); M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR ); }
/******************************Public*Routine******************************\
* VOID vM32CopyBlt * * Does a screen-to-screen blt of a list of rectangles. * * See Blt_DS_SS_ENG_IO_D0 and Blt_DS_SS_TLBR_ENG_IO_D1. * \**************************************************************************/
VOID vM32CopyBlt( // Type FNCOPY
PDEV* ppdev, LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{ BYTE* pjMmBase; LONG xOffset; LONG yOffset; LONG dx; LONG dy; LONG xLeft; LONG yTop; LONG cx; LONG cy;
ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2");
pjMmBase = ppdev->pjMmBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 12);
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DRAW | WRITE); M32_OW(pjMmBase, ALU_FG_FN, gaul32HwMixFromRop2[rop4 & 0xf]);
dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top;
// The accelerator may not be as fast at doing right-to-left copies, so
// only do them when the rectangles truly overlap:
if (!OVERLAP(prclDst, pptlSrc)) { M32_OW(pjMmBase, SRC_Y_DIR, 1); goto Top_Down_Left_To_Right; }
M32_OW(pjMmBase, SRC_Y_DIR, (prclDst->top <= pptlSrc->y));
if (prclDst->top <= pptlSrc->y) { if (prclDst->left <= pptlSrc->x) {
Top_Down_Left_To_Right:
while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft); M32_OW(pjMmBase, M32_SRC_X_START, xLeft); M32_OW(pjMmBase, M32_SRC_X_END, xLeft + cx); M32_OW(pjMmBase, M32_SRC_Y, yTop);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft); M32_OW(pjMmBase, DEST_X_START, xLeft); M32_OW(pjMmBase, DEST_X_END, xLeft + cx); M32_OW(pjMmBase, CUR_Y, yTop);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop + cy);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9); } } else { while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft + cx); M32_OW(pjMmBase, M32_SRC_X_START, xLeft + cx); M32_OW(pjMmBase, M32_SRC_X_END, xLeft); M32_OW(pjMmBase, M32_SRC_Y, yTop);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft + cx); M32_OW(pjMmBase, DEST_X_START, xLeft + cx); M32_OW(pjMmBase, DEST_X_END, xLeft); M32_OW(pjMmBase, CUR_Y, yTop);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop + cy);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9); } } } else { if (prclDst->left <= pptlSrc->x) { while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy - 1; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft); M32_OW(pjMmBase, M32_SRC_X_START, xLeft); M32_OW(pjMmBase, M32_SRC_X_END, xLeft + cx); M32_OW(pjMmBase, M32_SRC_Y, yTop + cy);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft); M32_OW(pjMmBase, DEST_X_START, xLeft); M32_OW(pjMmBase, DEST_X_END, xLeft + cx); M32_OW(pjMmBase, CUR_Y, yTop + cy);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9); } } else { while (TRUE) { xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy - 1; cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft + cx); M32_OW(pjMmBase, M32_SRC_X_START, xLeft + cx); M32_OW(pjMmBase, M32_SRC_X_END, xLeft); M32_OW(pjMmBase, M32_SRC_Y, yTop + cy);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft + cx); M32_OW(pjMmBase, DEST_X_START, xLeft + cx); M32_OW(pjMmBase, DEST_X_END, xLeft); M32_OW(pjMmBase, CUR_Y, yTop + cy);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop);
if (--c == 0) break;
prcl++; M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9); } } } }
|