/******************************Module*Header*******************************\ * Module Name: bltio.c * * Contains the low-level in/out blt functions. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify anything in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1994 Microsoft Corporation * \**************************************************************************/ #include "precomp.h" #if DBG // Useful aid for disabling any ATI extensions for debugging purposes: BOOL gb8514a = FALSE; #endif // DBG /******************************Public*Routine******************************\ * VOID vIoFillSolid * * Fills a list of rectangles with a solid colour. * \**************************************************************************/ VOID vIoFillSolid( // Type FNFILL PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // List of rectangles to be filled, in relative // coordinates ULONG ulHwForeMix, // Hardware mix mode ULONG ulHwBackMix, // Not used RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor POINTL* pptlBrush) // Not used { ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); // It's quite likely that we've just been called from GDI, so it's // even more likely that the accelerator's graphics engine has been // sitting around idle. Rather than doing a FIFO_WAIT(3) here and // then a FIFO_WAIT(5) before outputing the actual rectangle, // we can avoid an 'in' (which can be quite expensive, depending on // the card) by doing a single FIFO_WAIT(8) right off the bat: IO_FIFO_WAIT(ppdev, 8); IO_PIX_CNTL(ppdev, ALL_ONES); IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix); IO_FRGD_COLOR(ppdev, rbc.iSolidColor); while(TRUE) { IO_CUR_X(ppdev, prcl->left); IO_CUR_Y(ppdev, prcl->top); IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1); IO_MIN_AXIS_PCNT(ppdev, prcl->bottom - prcl->top - 1); IO_CMD(ppdev, RECTANGLE_FILL | DRAWING_DIR_TBLRXM | DRAW | DIR_TYPE_XY | LAST_PIXEL_ON | MULTIPLE_PIXELS | WRITE); if (--c == 0) return; prcl++; IO_FIFO_WAIT(ppdev, 5); } } /******************************Public*Routine******************************\ * VOID vIoSlowPatRealize * * This routine transfers an 8x8 pattern to off-screen display memory, and * duplicates it to make a 64x64 cached realization which is then used by * vIoFillPatSlow as the basic building block for doing 'slow' pattern output * via repeated screen-to-screen blts. * \**************************************************************************/ VOID vIoSlowPatRealize( PDEV* ppdev, RBRUSH* prb, // Points to brush realization structure BOOL bTransparent) // FALSE for normal patterns; TRUE for // patterns with a mask when the background // mix is LEAVE_ALONE. { BRUSHENTRY* pbe; LONG iBrushCache; LONG x; LONG y; BYTE* pjSrc; BYTE* pjDst; BYTE jSrc; LONG i; WORD awBuf[8]; pbe = prb->pbe; if ((pbe == NULL) || (pbe->prbVerify != prb)) { // We have to allocate a new off-screen cache brush entry for // the brush: iBrushCache = ppdev->iBrushCache; pbe = &ppdev->abe[iBrushCache]; iBrushCache++; if (iBrushCache >= ppdev->cBrushCache) iBrushCache = 0; ppdev->iBrushCache = iBrushCache; // Update our links: pbe->prbVerify = prb; prb->pbe = pbe; } // Load some pointer variables onto the stack, so that we don't have // to keep dereferencing their pointers: x = pbe->x; y = pbe->y; prb->bTransparent = bTransparent; // I considered doing the colour expansion for 1bpp brushes in // software, but by letting the hardware do it, we don't have // to do as many OUTs to transfer the pattern. if (prb->fl & RBRUSH_2COLOR) { // We're going to do a colour-expansion ('across the plane') // bitblt of the 1bpp 8x8 pattern to the screen. if (!bTransparent) { IO_FIFO_WAIT(ppdev, 4); IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | OVERPAINT); IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | OVERPAINT); IO_FRGD_COLOR(ppdev, prb->ulForeColor); IO_BKGD_COLOR(ppdev, prb->ulBackColor); IO_FIFO_WAIT(ppdev, 5); } else { IO_FIFO_WAIT(ppdev, 7); IO_FRGD_MIX(ppdev, LOGICAL_1); IO_BKGD_MIX(ppdev, LOGICAL_0); } IO_PIX_CNTL(ppdev, CPU_DATA); IO_ABS_CUR_X(ppdev, x); IO_ABS_CUR_Y(ppdev, y); IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high IO_GP_WAIT(ppdev); IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16 | WAIT | DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON | MULTIPLE_PIXELS | WRITE | BYTE_SWAP); CHECK_DATA_READY(ppdev); pjSrc = (BYTE*) &prb->aulPattern[0]; pjDst = (BYTE*) &awBuf[0]; // Convert in-line to nibble arrangment: // LATER: This should be done in DrvRealizeBrush! for (i = 8; i != 0; i--) { jSrc = *pjSrc; pjSrc += 2; // We had an extra byte on every row *pjDst++ = jSrc >> 3; *pjDst++ = jSrc + jSrc; } vDataPortOut(ppdev, &awBuf[0], 8); // Each word transferred comprises one row of the // pattern, and there are 8 rows in the pattern CHECK_DATA_COMPLETE(ppdev); } else { ASSERTDD(!bTransparent, "Shouldn't have been asked for transparency with a non-1bpp brush"); IO_FIFO_WAIT(ppdev, 6); IO_PIX_CNTL(ppdev, ALL_ONES); IO_FRGD_MIX(ppdev, SRC_CPU_DATA | OVERPAINT); IO_ABS_CUR_X(ppdev, x); IO_ABS_CUR_Y(ppdev, y); IO_MAJ_AXIS_PCNT(ppdev, 7); // Brush is 8 wide IO_MIN_AXIS_PCNT(ppdev, 7); // Brush is 8 high IO_GP_WAIT(ppdev); IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT | DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON | SINGLE_PIXEL | WRITE | BYTE_SWAP); CHECK_DATA_READY(ppdev); vDataPortOut(ppdev, &prb->aulPattern[0], ((TOTAL_BRUSH_SIZE / 2) << ppdev->cPelSize)); CHECK_DATA_COMPLETE(ppdev); } // ÚÄÂÄÂÄÄÄÂÄÄÄÄÄÄÄÂÄ¿ // ³0³2³3 ³4 ³1³ We now have an 8x8 colour-expanded copy of // ÃÄÁÄÁÄÄÄÁÄÄÄÄÄÄÄÁÄ´ the pattern sitting in off-screen memory, // ³5 ³ represented here by square '0'. // ³ ³ // ³ ³ We're now going to expand the pattern to // ³ ³ 72x72 by repeatedly copying larger rectangles // ³ ³ in the indicated order, and doing a 'rolling' // ³ ³ blt to copy vertically. // ³ ³ // ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÙ // Copy '1': IO_FIFO_WAIT(ppdev, 7); IO_PIX_CNTL(ppdev, ALL_ONES); IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | OVERPAINT); // Note that 'maj_axis_pcnt' and 'min_axis_pcnt' are already // correct. IO_ABS_CUR_X(ppdev, x); IO_ABS_CUR_Y(ppdev, y); IO_ABS_DEST_X(ppdev, x + 64); IO_ABS_DEST_Y(ppdev, y); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); // Copy '2': IO_FIFO_WAIT(ppdev, 8); IO_ABS_DEST_X(ppdev, x + 8); IO_ABS_DEST_Y(ppdev, y); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); // Copy '3': IO_ABS_DEST_X(ppdev, x + 16); IO_ABS_DEST_Y(ppdev, y); IO_MAJ_AXIS_PCNT(ppdev, 15); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); IO_ABS_DEST_X(ppdev, x + 32); // Copy '4': IO_FIFO_WAIT(ppdev, 8); IO_ABS_DEST_Y(ppdev, y); IO_MAJ_AXIS_PCNT(ppdev, 31); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); // Copy '5': IO_ABS_DEST_X(ppdev, x); IO_ABS_DEST_Y(ppdev, y + 8); IO_MAJ_AXIS_PCNT(ppdev, 71); IO_MIN_AXIS_PCNT(ppdev, 63); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); } /******************************Public*Routine******************************\ * VOID vIoFillPatSlow * * Uses the screen-to-screen blting ability of the accelerator to fill a * list of rectangles with a specified pattern. This routine is 'slow' * merely in the sense that it doesn't use any built-in hardware pattern * support that may be built into the accelerator. * \**************************************************************************/ VOID vIoFillPatSlow( // Type FNFILL PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // List of rectangles to be filled, in relative // coordinates ULONG ulHwForeMix, // Hardware mix mode (foreground mix mode if // the brush has a mask) ULONG ulHwBackMix, // Not used (unless the brush has a mask, in // which case it's the background mix mode) RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure POINTL* pptlBrush) // Pattern alignment { BOOL bTransparent; BOOL bExponential; LONG x; LONG y; LONG yTmp; LONG cxToGo; LONG cyToGo; LONG cxThis; LONG cyThis; LONG xOrg; LONG yOrg; LONG xBrush; LONG yBrush; LONG cyOriginal; BRUSHENTRY* pbe; // Pointer to brush entry data, which is used // for keeping track of the location and status // of the pattern bits cached in off-screen // memory // C'est dommage que je ne connais pas quoi je fais. ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(rbc.prb->pbe != NULL, "Unexpected Null pbe in vIoSlowPatBlt"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); ASSERTDD((ulHwForeMix == ulHwBackMix) || (ulHwBackMix == LEAVE_ALONE), "Only expect transparency from GDI for masked brushes"); bTransparent = (ulHwForeMix != ulHwBackMix); if ((rbc.prb->pbe->prbVerify != rbc.prb) || (rbc.prb->bTransparent != bTransparent)) { vIoSlowPatRealize(ppdev, rbc.prb, bTransparent); } ASSERTDD(rbc.prb->bTransparent == bTransparent, "Not realized with correct transparency"); if (!bTransparent) { IO_FIFO_WAIT(ppdev, 2); IO_PIX_CNTL(ppdev, ALL_ONES); IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | ulHwForeMix); // We special case OVERPAINT mixes because we can implement // an exponential fill: every blt will double the size of // the current rectangle by using the portion of the pattern // that has already been done for this rectangle as the source. // // Note that there's no point in also checking for LOGICAL_0 // or LOGICAL_1 because those will be taken care of by the // solid fill routines, and I can't be bothered to check for // NOTNEW: bExponential = (ulHwForeMix == OVERPAINT); } else { IO_FIFO_WAIT(ppdev, 5); IO_PIX_CNTL(ppdev, DISPLAY_MEMORY); IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix); IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | LEAVE_ALONE); IO_FRGD_COLOR(ppdev, rbc.prb->ulForeColor); IO_RD_MASK(ppdev, 1); // Pick a plane, any plane bExponential = FALSE; } // Note that since we do our brush alignment calculations in // relative coordinates, we should keep the brush origin in // relative coordinates as well: xOrg = pptlBrush->x; yOrg = pptlBrush->y; pbe = rbc.prb->pbe; xBrush = pbe->x; yBrush = pbe->y; do { x = prcl->left; y = prcl->top; cxToGo = prcl->right - x; cyToGo = prcl->bottom - y; if ((cxToGo <= SLOW_BRUSH_DIMENSION) && (cyToGo <= SLOW_BRUSH_DIMENSION)) { IO_FIFO_WAIT(ppdev, 7); IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush); IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush); IO_DEST_X(ppdev, x); IO_DEST_Y(ppdev, y); IO_MAJ_AXIS_PCNT(ppdev, cxToGo - 1); IO_MIN_AXIS_PCNT(ppdev, cyToGo - 1); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); } else if (bExponential) { cyThis = SLOW_BRUSH_DIMENSION; cyToGo -= cyThis; if (cyToGo < 0) cyThis += cyToGo; cxThis = SLOW_BRUSH_DIMENSION; cxToGo -= cxThis; if (cxToGo < 0) cxThis += cxToGo; IO_FIFO_WAIT(ppdev, 7); IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1); IO_MIN_AXIS_PCNT(ppdev, cyThis - 1); IO_DEST_X(ppdev, x); IO_DEST_Y(ppdev, y); IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush); IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); IO_FIFO_WAIT(ppdev, 2); IO_CUR_X(ppdev, x); IO_CUR_Y(ppdev, y); x += cxThis; while (cxToGo > 0) { // First, expand out to the right, doubling our size // each time: cxToGo -= cxThis; if (cxToGo < 0) cxThis += cxToGo; IO_FIFO_WAIT(ppdev, 4); IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1); IO_DEST_X(ppdev, x); IO_DEST_Y(ppdev, y); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); x += cxThis; cxThis *= 2; } if (cyToGo > 0) { // Now do a 'rolling blt' to pattern the rest vertically: IO_FIFO_WAIT(ppdev, 5); IO_DEST_X(ppdev, prcl->left); IO_DEST_Y(ppdev, prcl->top + cyThis); IO_MAJ_AXIS_PCNT(ppdev, prcl->right - prcl->left - 1); IO_MIN_AXIS_PCNT(ppdev, cyToGo - 1); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); } } else { // We handle arbitrary mixes simply by repeatedly tiling // our cached pattern over the entire rectangle: IO_FIFO_WAIT(ppdev, 2); IO_ABS_CUR_X(ppdev, ((x - xOrg) & 7) + xBrush); IO_ABS_CUR_Y(ppdev, ((y - yOrg) & 7) + yBrush); cyOriginal = cyToGo; // Have to remember for later... do { cxThis = SLOW_BRUSH_DIMENSION; cxToGo -= cxThis; if (cxToGo < 0) cxThis += cxToGo; IO_FIFO_WAIT(ppdev, 2); IO_MAJ_AXIS_PCNT(ppdev, cxThis - 1); IO_DEST_X(ppdev, x); x += cxThis; // Get ready for next column cyToGo = cyOriginal; // Have to reset for each new column yTmp = y; do { cyThis = SLOW_BRUSH_DIMENSION; cyToGo -= cyThis; if (cyToGo < 0) cyThis += cyToGo; IO_FIFO_WAIT(ppdev, 3); IO_DEST_Y(ppdev, yTmp); yTmp += cyThis; IO_MIN_AXIS_PCNT(ppdev, cyThis - 1); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | MULTIPLE_PIXELS | DRAWING_DIR_TBLRXM); } while (cyToGo > 0); } while (cxToGo > 0); } prcl++; } while (--c != 0); } /******************************Public*Routine******************************\ * VOID vIoXfer1bpp * * This routine colours expands a monochrome bitmap, possibly with different * Rop2's for the foreground and background. It will be called in the * following cases: * * 1) To colour-expand the monochrome text buffer for the vFastText routine. * 2) To blt a 1bpp source with a simple Rop2 between the source and * destination. * 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to * white and black, and the pattern is a solid colour. * 4) To handle a true Rop4 that works out to be Rop2's between the pattern * and destination. * * Needless to say, making this routine fast can leverage a lot of * performance. * \**************************************************************************/ VOID vIoXfer1bpp( // Type FNXFER PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // List of destination rectangles, in relative // coordinates ULONG ulHwForeMix,// Foreground hardware mix ULONG ulHwBackMix,// Background hardware mix SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Translate that provides colour-expansion information { LONG dxSrc; LONG dySrc; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; LONG cjSrc; LONG xLeft; LONG xRight; LONG yTop; LONG yBottom; LONG xRotateLeft; LONG cBitsNeededForFirstNibblePair; ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); ASSERTDD(ulHwBackMix <= 15, "Weird hardware Rop"); ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources"); IO_FIFO_WAIT(ppdev, 5); IO_PIX_CNTL(ppdev, CPU_DATA); IO_BKGD_MIX(ppdev, BACKGROUND_COLOR | ulHwBackMix); IO_FRGD_MIX(ppdev, FOREGROUND_COLOR | ulHwForeMix); IO_BKGD_COLOR(ppdev, pxlo->pulXlate[0]); IO_FRGD_COLOR(ppdev, pxlo->pulXlate[1]); dxSrc = pptlSrc->x - prclDst->left; dySrc = pptlSrc->y - prclDst->top; // Add to destination to get source lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; do { IO_FIFO_WAIT(ppdev, 6); yBottom = prcl->bottom; yTop = prcl->top; xRight = prcl->right; xLeft = prcl->left; cBitsNeededForFirstNibblePair = 8 - (xLeft & 7); IO_SCISSORS_L(ppdev, xLeft); xLeft = (xLeft) & ~7; IO_SCISSORS_R(ppdev, xRight - 1); xRight = (xRight + 7) & ~7; IO_CUR_X(ppdev, xLeft); IO_CUR_Y(ppdev, yTop); cx = xRight - xLeft; cy = yBottom - yTop; IO_MAJ_AXIS_PCNT(ppdev, cx - 1); IO_MIN_AXIS_PCNT(ppdev, cy - 1); cjSrc = cx >> 3; // We'll be transferring WORDs, // but every word accounts for // 8 pels = 1 byte of the source pjSrc = pjSrcScan0 + (yTop + dySrc) * lSrcDelta + ((xLeft + dxSrc) >> 3); // Start is byte aligned xRotateLeft = (dxSrc) & 7; // Amount by which to rotate left IO_GP_WAIT(ppdev); IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT | DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON | MULTIPLE_PIXELS | WRITE | BYTE_SWAP); CHECK_DATA_READY(ppdev); _asm { ; eax = scratch ; ebx = count of words output per scan ; ecx = amount to rotate left ; edx = port ; esi = source pointer ; edi = source delta between end of last scan and start of next mov ecx,xRotateLeft mov edx,PIX_TRANS mov esi,pjSrc mov edi,lSrcDelta sub edi,cjSrc test ecx,ecx jz UnrotatedScanLoop RotatedScanLoop: mov ebx,cjSrc cmp ecx,cBitsNeededForFirstNibblePair jge RotatedDontNeedFirstByte RotatedWordLoop: mov ah,[esi] RotatedDontNeedFirstByte: mov al,[esi + 1] shl eax,cl inc esi mov al,ah shr al,3 add ah,ah out dx,ax dec ebx jnz RotatedWordLoop add esi,edi dec cy jnz RotatedScanLoop jmp AllDone UnrotatedScanLoop: mov ebx,cjSrc UnrotatedWordLoop: mov ah,[esi] inc esi mov al,ah shr al,3 add ah,ah out dx,ax dec ebx jnz UnrotatedWordLoop add esi,edi dec cy jnz UnrotatedScanLoop AllDone: } CHECK_DATA_COMPLETE(ppdev); prcl++; } while (--c != 0); // We always have to reset the clipping: IO_FIFO_WAIT(ppdev, 2); IO_ABS_SCISSORS_L(ppdev, 0); IO_ABS_SCISSORS_R(ppdev, ppdev->cxMemory - 1); } /******************************Public*Routine******************************\ * VOID vIoXfer1bppPacked * * This is the same routine as 'vIoXfer1bpp', except that it takes * advantage of the ATI's packed bit transfers to improve speed. * * Needless to say, this routine can only be called when running * on an ATI adapter. * \**************************************************************************/ VOID vIoXfer1bppPacked( // Type FNXFER PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // List of destination rectangles, in relative // coordinates ULONG ulHwForeMix,// Foreground hardware mix ULONG ulHwBackMix,// Background hardware mix SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Translate that provides colour-expansion information { LONG dxSrc; LONG dySrc; LONG cy; LONG lSrcDelta; LONG lTmpDelta; BYTE* pjSrcScan0; BYTE* pjSrc; LONG cwSrc; LONG xLeft; LONG xRight; LONG yTop; LONG yBottom; LONG xBiasLeft; LONG xBiasRight; #if DBG { if (gb8514a) { vIoXfer1bpp(ppdev, c, prcl, ulHwForeMix, ulHwBackMix, psoSrc, pptlSrc, prclDst, pxlo); return; } } #endif // DBG ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); ASSERTDD(ulHwBackMix <= 15, "Weird hardware Rop"); ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources"); while (INPW(EXT_FIFO_STATUS) & FOURTEEN_WORDS) ; OUT_WORD(ALU_FG_FN, ulHwForeMix); OUT_WORD(ALU_BG_FN, ulHwBackMix); OUT_WORD(FRGD_COLOR, pxlo->pulXlate[1]); OUT_WORD(BKGD_COLOR, pxlo->pulXlate[0]); // Add 'dxSrc' and 'dySrc' to a destination coordinate to get source. // Because we will be explicitly dealing with absolute destination // coordinates (we're not using the normal accelerator macros), we have // to explicitly account for the DFB offset: dxSrc = pptlSrc->x - (prclDst->left + ppdev->xOffset); dySrc = pptlSrc->y - (prclDst->top + ppdev->yOffset); lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; while (TRUE) { // Since we're not using the normal accelerator register macros, // we have to explicitly account for the DFB offset: yBottom = prcl->bottom + ppdev->yOffset; yTop = prcl->top + ppdev->yOffset; xRight = prcl->right + ppdev->xOffset; xLeft = prcl->left + ppdev->xOffset; // Make sure we're word aligned on the source, because we're // going to be transferring words and we don't want to risk // reading past the end of the bitmap: xBiasLeft = (xLeft + dxSrc) & 15; if (xBiasLeft != 0) { // Rev 3 ATI chips have goofy timing bugs on 66 MHz DX-2 // computers where some extended will not be correctly // set the first time. The extended scissors registers // have this problem, but setting them twice seems to work: OUT_WORD(EXT_SCISSOR_L, xLeft); OUT_WORD(EXT_SCISSOR_L, xLeft); xLeft -= xBiasLeft; } // The width has to be a word multiple: xBiasRight = (xRight - xLeft) & 15; if (xBiasRight != 0) { OUT_WORD(EXT_SCISSOR_R, xRight - 1); OUT_WORD(EXT_SCISSOR_R, xRight - 1); xRight += 16 - xBiasRight; } OUT_WORD(DP_CONFIG, FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | DATA_ORDER | EXT_MONO_SRC_HOST | DRAW | WRITE | DATA_WIDTH); OUT_WORD(DEST_X_START, xLeft); OUT_WORD(CUR_X, xLeft); OUT_WORD(DEST_X_END, xRight); OUT_WORD(CUR_Y, yTop); OUT_WORD(DEST_Y_END, yBottom); cwSrc = (xRight - xLeft) / 16; // We'll be transferring WORDs pjSrc = pjSrcScan0 + (yTop + dySrc) * lSrcDelta + (xLeft + dxSrc) / 8; // Start is byte aligned (note // that we don't have to add // xBiasLeft) cy = yBottom - yTop; lTmpDelta = lSrcDelta - 2 * cwSrc; // To be safe, we make sure there are always as many free FIFO entries // as we'll transfer (note that this implementation isn't particularly // efficient, especially for short scans): _asm { ; eax = used for IN ; ebx = count of words remaining on current scan ; ecx = used for REP ; edx = used for IN and OUT ; esi = current source pointer ; edi = count of scans mov esi,pjSrc mov edi,cy Scan_Loop: mov ebx,cwSrc Batch_Loop: mov edx,EXT_FIFO_STATUS in ax,dx and eax,SIXTEEN_WORDS jnz short Batch_Loop mov edx,PIX_TRANS sub ebx,16 jle short Finish_Scan mov ecx,16 rep outsw jmp short Batch_Loop Finish_Scan: add ebx,16 mov ecx,ebx rep outsw add esi,lTmpDelta dec edi jnz Scan_Loop } if ((xBiasLeft | xBiasRight) != 0) { // Reset the clipping only if we used it: while (INPW(EXT_FIFO_STATUS) & FOUR_WORDS) ; OUT_WORD(EXT_SCISSOR_L, 0); OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1); OUT_WORD(EXT_SCISSOR_L, 0); OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1); } if (--c == 0) return; prcl++; // Do the wait for the next round now: while (INPW(EXT_FIFO_STATUS) & TEN_WORDS) ; } } /******************************Public*Routine******************************\ * VOID vIoXfer4bpp * * Does a 4bpp transfer from a bitmap to the screen. * * NOTE: The screen must be 8bpp for this function to be called! * * The reason we implement this is that a lot of resources are kept as 4bpp, * and used to initialize DFBs, some of which we of course keep off-screen. * \**************************************************************************/ // XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use // for doing the translate. Note that in general stack buffers should // be kept as small as possible. The OS guarantees us only 8k for stack // from GDI down to the display driver in low memory situations; if we // ask for more, we'll access violate. Note also that at any time the // stack buffer cannot be larger than a page (4k) -- otherwise we may // miss touching the 'guard page' and access violate then too. #define XLATE_BUFFER_SIZE 256 VOID vIoXfer4bpp( // Type FNXFER PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // List of destination rectangles, in relative // coordinates ULONG ulHwForeMix,// Hardware mix ULONG ulHwBackMix,// Not used SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Translate that provides colour-expansion information { LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjScan; BYTE* pjSrc; BYTE* pjDst; LONG cxThis; LONG cxToGo; LONG xSrc; LONG iLoop; BYTE jSrc; ULONG* pulXlate; BOOL bResetScissors; BYTE ajBuf[XLATE_BUFFER_SIZE]; ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp"); ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; IO_FIFO_WAIT(ppdev, 7); IO_PIX_CNTL(ppdev, ALL_ONES); IO_FRGD_MIX(ppdev, SRC_CPU_DATA | ulHwForeMix); while(TRUE) { cy = prcl->bottom - prcl->top; cx = prcl->right - prcl->left; bResetScissors = FALSE; if (cx & 1) { // When using word transfers, the 8514/A will 'byte wrap' // transfers of odd byte width, such that end words will // be split so that on byte is the end of one scan, and the // other byte is the start of the next scan. // // This complicates things too much, so we simply always do // word transfers of even byte width by making use of the // clipping register: bResetScissors = TRUE; IO_SCISSORS_R(ppdev, prcl->right - 1); IO_MAJ_AXIS_PCNT(ppdev, cx); } else { IO_MAJ_AXIS_PCNT(ppdev, cx - 1); } IO_MIN_AXIS_PCNT(ppdev, cy - 1); IO_CUR_X(ppdev, prcl->left); IO_CUR_Y(ppdev, prcl->top); pulXlate = pxlo->pulXlate; xSrc = prcl->left + dx; pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1); IO_GP_WAIT(ppdev); IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT | DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON | SINGLE_PIXEL | WRITE | BYTE_SWAP); CHECK_DATA_READY(ppdev); do { pjSrc = pjScan; cxToGo = cx; // # of pels per scan in 4bpp source do { cxThis = XLATE_BUFFER_SIZE; // We can handle XLATE_BUFFER_SIZE number // of pels in this xlate batch cxToGo -= cxThis; // cxThis will be the actual number of // pels we'll do in this xlate batch if (cxToGo < 0) cxThis += cxToGo; pjDst = ajBuf; // Points to our temporary batch buffer // We handle alignment ourselves because it's easy to // do, rather than pay the cost of setting/resetting // the scissors register: if (xSrc & 1) { // When unaligned, we have to be careful not to read // past the end of the 4bpp bitmap (that could // potentially cause us to access violate): iLoop = cxThis >> 1; // Each loop handles 2 pels; // we'll handle odd pel // separately jSrc = *pjSrc; while (iLoop-- != 0) { *pjDst++ = (BYTE) pulXlate[jSrc & 0xf]; jSrc = *(++pjSrc); *pjDst++ = (BYTE) pulXlate[jSrc >> 4]; } if (cxThis & 1) *pjDst = (BYTE) pulXlate[jSrc & 0xf]; } else { iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels do { jSrc = *pjSrc++; *pjDst++ = (BYTE) pulXlate[jSrc >> 4]; *pjDst++ = (BYTE) pulXlate[jSrc & 0xf]; } while (--iLoop != 0); } // The number of bytes we'll transfer is equal to the number // of pels we've processed in the batch. Since we're // transferring words, we have to round up to get the word // count: vDataPortOut(ppdev, ajBuf, (cxThis + 1) >> 1); } while (cxToGo > 0); pjScan += lSrcDelta; // Advance to next source scan. Note // that we could have computed the // value to advance 'pjSrc' directly, // but this method is less // error-prone. } while (--cy != 0); CHECK_DATA_COMPLETE(ppdev); // Don't forget to restore the right scissors: if (bResetScissors) { IO_FIFO_WAIT(ppdev, 1); IO_ABS_SCISSORS_R(ppdev, ppdev->cxMemory - 1); } if (--c == 0) return; prcl++; IO_FIFO_WAIT(ppdev, 5); } } /******************************Public*Routine******************************\ * VOID vIoXferNative * * Transfers a bitmap that is the same colour depth as the display to * the screen via the data transfer register, with no palette translation. * \**************************************************************************/ VOID vIoXferNative( // Type FNXFER PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // Array of relative coordinates destination rectangles ULONG ulHwForeMix,// Hardware mix ULONG ulHwBackMix,// Not used SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Not used { LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; LONG cwSrc; BOOL bResetScissors; LONG xLeft; LONG xRight; LONG yTop; ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL), "Can handle trivial xlate only"); ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat, "Source must be same colour depth as screen"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; IO_FIFO_WAIT(ppdev, 8); IO_PIX_CNTL(ppdev, ALL_ONES); IO_FRGD_MIX(ppdev, SRC_CPU_DATA | ulHwForeMix); while(TRUE) { bResetScissors = FALSE; IO_CUR_Y(ppdev, prcl->top); yTop = prcl->top; cy = prcl->bottom - prcl->top; IO_MIN_AXIS_PCNT(ppdev, cy - 1); xLeft = prcl->left; xRight = prcl->right; // Make sure we're word aligned on the source, because we're // going to be transferring words and we don't want to risk // reading past the end of the bitmap: if ((xLeft + dx) & 1) { IO_SCISSORS_L(ppdev, xLeft); xLeft--; bResetScissors = TRUE; } IO_CUR_X(ppdev, xLeft); cx = xRight - xLeft; if (cx & 1) { IO_SCISSORS_R(ppdev, xRight - 1); cx++; bResetScissors = TRUE; } IO_MAJ_AXIS_PCNT(ppdev, cx - 1); cwSrc = ((cx << ppdev->cPelSize) + 1) >> 1; pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta + ((xLeft + dx) << ppdev->cPelSize); IO_GP_WAIT(ppdev); IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT | DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON | SINGLE_PIXEL | WRITE | BYTE_SWAP); CHECK_DATA_READY(ppdev); do { vDataPortOut(ppdev, pjSrc, cwSrc); pjSrc += lSrcDelta; } while (--cy != 0); CHECK_DATA_COMPLETE(ppdev); if (bResetScissors) { IO_FIFO_WAIT(ppdev, 2); IO_ABS_SCISSORS_L(ppdev, 0); IO_ABS_SCISSORS_R(ppdev, ppdev->cxMemory - 1); } if (--c == 0) return; prcl++; IO_FIFO_WAIT(ppdev, 6); } } /******************************Public*Routine******************************\ * VOID vIoCopyBlt * * Does a screen-to-screen blt of a list of rectangles. * \**************************************************************************/ VOID vIoCopyBlt( // Type FNCOPY PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // Array of relative coordinates destination rectangles ULONG ulHwMix, // Hardware mix POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst) // Original unclipped destination rectangle { LONG dx; LONG dy; // Add delta to destination to get source LONG cx; LONG cy; // Size of current rectangle - 1 ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwMix <= 15, "Weird hardware Rop"); IO_FIFO_WAIT(ppdev, 2); IO_FRGD_MIX(ppdev, SRC_DISPLAY_MEMORY | ulHwMix); IO_PIX_CNTL(ppdev, ALL_ONES); dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // The accelerator may not be as fast at doing right-to-left copies, so // only do them when the rectangles truly overlap: if (!OVERLAP(prclDst, pptlSrc)) goto Top_Down_Left_To_Right; if (prclDst->top <= pptlSrc->y) { if (prclDst->left <= pptlSrc->x) { Top_Down_Left_To_Right: do { IO_FIFO_WAIT(ppdev, 7); cx = prcl->right - prcl->left - 1; IO_MAJ_AXIS_PCNT(ppdev, cx); IO_DEST_X(ppdev, prcl->left); IO_CUR_X(ppdev, prcl->left + dx); cy = prcl->bottom - prcl->top - 1; IO_MIN_AXIS_PCNT(ppdev, cy); IO_DEST_Y(ppdev, prcl->top); IO_CUR_Y(ppdev, prcl->top + dy); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_TBLRXM); prcl++; } while (--c != 0); } else { do { IO_FIFO_WAIT(ppdev, 7); cx = prcl->right - prcl->left - 1; IO_MAJ_AXIS_PCNT(ppdev, cx); IO_DEST_X(ppdev, prcl->left + cx); IO_CUR_X(ppdev, prcl->left + cx + dx); cy = prcl->bottom - prcl->top - 1; IO_MIN_AXIS_PCNT(ppdev, cy); IO_DEST_Y(ppdev, prcl->top); IO_CUR_Y(ppdev, prcl->top + dy); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_TBRLXM); prcl++; } while (--c != 0); } } else { if (prclDst->left <= pptlSrc->x) { do { IO_FIFO_WAIT(ppdev, 7); cx = prcl->right - prcl->left - 1; IO_MAJ_AXIS_PCNT(ppdev, cx); IO_DEST_X(ppdev, prcl->left); IO_CUR_X(ppdev, prcl->left + dx); cy = prcl->bottom - prcl->top - 1; IO_MIN_AXIS_PCNT(ppdev, cy); IO_DEST_Y(ppdev, prcl->top + cy); IO_CUR_Y(ppdev, prcl->top + cy + dy); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_BTLRXM); prcl++; } while (--c != 0); } else { do { IO_FIFO_WAIT(ppdev, 7); cx = prcl->right - prcl->left - 1; IO_MAJ_AXIS_PCNT(ppdev, cx); IO_DEST_X(ppdev, prcl->left + cx); IO_CUR_X(ppdev, prcl->left + cx + dx); cy = prcl->bottom - prcl->top - 1; IO_MIN_AXIS_PCNT(ppdev, cy); IO_DEST_Y(ppdev, prcl->top + cy); IO_CUR_Y(ppdev, prcl->top + cy + dy); IO_CMD(ppdev, BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_BTRLXM); prcl++; } while (--c != 0); } } } /******************************Public*Routine******************************\ * VOID vIoMaskCopy * * This routine performs a screen-to-screen masked blt. * * NT has a new API called MaskBlt (which has also been added to Win4.0) * which allows an app to specify a monochrome mask on a colour blt. This * API is relatively cool because the programmer no longer has to do two * separate SRCAND and SRCPAINT calls to do transparency. We can accelerate * the call using the hardware, and there is no longer any chance of * 'flashing' occuring on the screen. * * Most often, the colour bitmap for MaskBlt is a compatible-bitmap that * we've already stashed in off-screen memory. We do the maskblt by * transferring the monochrome bitmap via the data transfer register, * and setting the foreground and background mixes to use the on-screen * bitmap as appropriate. * * If you can implement this call and accelerate it using your hardware, * please do. It is really useful for app developers and is a big win. * Plus, you'll have a head-start for Win4.0 (although the Win4.0 version * is simpler because they only allow 0xccaa or 0xaacc rops -- the * foreground and background mixes can only be OVERPAINT or LEAVE_ALONE). * \**************************************************************************/ VOID vIoMaskCopy( // Type FNMASK PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // Array of relative coordinates destination // rectangles ULONG ulHwForeMix, // Foreground mix ULONG ulHwBackMix, // Background mix SURFOBJ* psoMsk, // Mask surface POINTL* pptlMsk, // Original unclipped mask source point SURFOBJ* psoSrc, // Not used POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle ULONG iSolidColor, // Not used RBRUSH* prb, // Not used POINTL* pptlBrush, // Not used XLATEOBJ* pxlo) // Not used { LONG dxSrc; LONG dySrc; LONG dxMsk; LONG dyMsk; LONG cy; LONG lMskDelta; LONG lTmpDelta; BYTE* pjMskScan0; BYTE* pjMsk; LONG cwMsk; LONG xLeft; LONG xRight; LONG yTop; LONG yBottom; LONG xBiasLeft; LONG xBiasRight; ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ulHwForeMix <= 15, "Weird hardware Rop"); ASSERTDD(ulHwBackMix <= 15, "Weird hardware Rop"); ASSERTDD(pptlMsk != NULL && psoMsk != NULL, "Can't have NULL masks"); ASSERTDD(psoMsk->iBitmapFormat == BMF_1BPP, "Mask has to be 1bpp"); ASSERTDD(!OVERLAP(prclDst, pptlSrc), "Source and dest can't overlap!"); while (INPW(EXT_FIFO_STATUS) & TWO_WORDS) ; OUT_WORD(ALU_FG_FN, ulHwForeMix); OUT_WORD(ALU_BG_FN, ulHwBackMix); dxSrc = pptlSrc->x - (prclDst->left + ppdev->xOffset); dySrc = pptlSrc->y - (prclDst->top + ppdev->yOffset); // Add to the absolute coordinate destination rectangle to // get the corresponding absolute coordinate source rectangle dxMsk = pptlMsk->x - (prclDst->left + ppdev->xOffset); dyMsk = pptlMsk->y - (prclDst->top + ppdev->yOffset); // Add to the absolute coordinate destination rectangle to // get the corresponding absolute coordinate mask rectangle lMskDelta = psoMsk->lDelta; pjMskScan0 = psoMsk->pvScan0; while (TRUE) { while (INPW(EXT_FIFO_STATUS) & FIFTEEN_WORDS) ; // Since we're not using the normal accelerator register macros, // we have to explicitly account for the DFB offset: yBottom = prcl->bottom + ppdev->yOffset; yTop = prcl->top + ppdev->yOffset; xRight = prcl->right + ppdev->xOffset; xLeft = prcl->left + ppdev->xOffset; // The start has to be word aligned: xBiasLeft = (xLeft + dxMsk) & 15; if (xBiasLeft != 0) { // Rev 3 ATI chips have goofy timing bugs on 66 MHz DX-2 // computers where some extended will not be correctly // set the first time. The extended scissors registers // have this problem, but setting them twice seems to work: OUT_WORD(EXT_SCISSOR_L, xLeft); OUT_WORD(EXT_SCISSOR_L, xLeft); xLeft -= xBiasLeft; } // The width has to be a word multiple: xBiasRight = (xRight - xLeft) & 15; if (xBiasRight != 0) { OUT_WORD(EXT_SCISSOR_R, xRight - 1); OUT_WORD(EXT_SCISSOR_R, xRight - 1); xRight += 16 - xBiasRight; } OUT_WORD(DP_CONFIG, FG_COLOR_SRC_BLIT | BG_COLOR_SRC_BLIT | DATA_ORDER | EXT_MONO_SRC_HOST | DRAW | WRITE | DATA_WIDTH); OUT_WORD(SRC_X, xLeft + dxSrc); OUT_WORD(SRC_X_START, xLeft + dxSrc); OUT_WORD(SRC_X_END, xRight + dxSrc); OUT_WORD(SRC_Y, yTop + dySrc); OUT_WORD(SRC_Y_DIR, TOP_TO_BOTTOM); OUT_WORD(DEST_X_START, xLeft); OUT_WORD(CUR_X, xLeft); OUT_WORD(DEST_X_END, xRight); OUT_WORD(CUR_Y, yTop); OUT_WORD(DEST_Y_END, yBottom); cwMsk = (xRight - xLeft) / 16; // We'll be transferring WORDs pjMsk = pjMskScan0 + (yTop + dyMsk) * lMskDelta + (xLeft + dxMsk) / 8; // Start is byte aligned (note // that we don't have to add // xBiasLeft) cy = yBottom - yTop; lTmpDelta = lMskDelta - 2 * cwMsk; // To be safe, we make sure there are always as many free FIFO entries // as we'll transfer (note that this implementation isn't particularly // efficient, especially for short scans): _asm { ; eax = used for IN ; ebx = count of words remaining on current scan ; ecx = used for REP ; edx = used for IN and OUT ; esi = current source pointer ; edi = count of scans mov esi,pjMsk mov edi,cy Scan_Loop: mov ebx,cwMsk Batch_Loop: mov edx,EXT_FIFO_STATUS in ax,dx and eax,SIXTEEN_WORDS jnz short Batch_Loop mov edx,PIX_TRANS sub ebx,16 jle short Finish_Scan mov ecx,16 rep outsw jmp short Batch_Loop Finish_Scan: add ebx,16 mov ecx,ebx rep outsw add esi,lTmpDelta dec edi jnz Scan_Loop } if ((xBiasLeft | xBiasRight) != 0) { // Reset the clipping only if we used it: while (INPW(EXT_FIFO_STATUS) & FOUR_WORDS) ; OUT_WORD(EXT_SCISSOR_L, 0); OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1); OUT_WORD(EXT_SCISSOR_L, 0); OUT_WORD(EXT_SCISSOR_R, ppdev->cxMemory - 1); } if (--c == 0) return; prcl++; } } /******************************Public*Routine******************************\ * VOID vPutBits * * Copies the bits from the given surface to the screen, using the memory * aperture. Must be pre-clipped. * * LATER: Do we really need this routine? * \**************************************************************************/ VOID vPutBits( PDEV* ppdev, SURFOBJ* psoSrc, // Source surface RECTL* prclDst, // Destination rectangle in absolute coordinates! POINTL* pptlSrc) // Source point { LONG xOffset; LONG yOffset; // This is ugly. Oh well. xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; ppdev->xOffset = 0; ppdev->yOffset = 0; vIoXferNative(ppdev, 1, prclDst, OVERPAINT, OVERPAINT, psoSrc, pptlSrc, prclDst, NULL); ppdev->xOffset = xOffset; ppdev->yOffset = yOffset; } /******************************Public*Routine******************************\ * VOID vGetBits * * Copies the bits to the given surface from the screen, using the data * transfer register. Must be pre-clipped. * \**************************************************************************/ VOID vGetBits( PDEV* ppdev, SURFOBJ* psoDst, // Destination surface RECTL* prclDst, // Destination rectangle POINTL* pptlSrc) // Source point in absolute coordinates! { LONG cx; LONG cy; LONG lDstDelta; BYTE* pjDst; DWORD wOdd; // Think of it as a WORD ULONG cwDst; ULONG cjEndByte; IO_FIFO_WAIT(ppdev, 7); IO_PIX_CNTL(ppdev, ALL_ONES); // LATER: Do we have to set FRGD_MIX? IO_FRGD_MIX(ppdev, SRC_CPU_DATA | OVERPAINT); IO_ABS_CUR_X(ppdev, pptlSrc->x); IO_ABS_CUR_Y(ppdev, pptlSrc->y); cx = prclDst->right - prclDst->left; cy = prclDst->bottom - prclDst->top; IO_MAJ_AXIS_PCNT(ppdev, cx - 1); IO_MIN_AXIS_PCNT(ppdev, cy - 1); IO_CMD(ppdev, RECTANGLE_FILL | BUS_SIZE_16| WAIT | DRAWING_DIR_TBLRXM | DRAW | LAST_PIXEL_ON | READ | BYTE_SWAP); lDstDelta = psoDst->lDelta; pjDst = (BYTE*) psoDst->pvScan0 + prclDst->top * lDstDelta + prclDst->left; cwDst = (cx >> 1); WAIT_FOR_DATA_AVAILABLE(ppdev); if ((cx & 1) == 0) { // Even destination scan length. Life is truly great. do { vDataPortIn(ppdev, pjDst, cwDst); pjDst += lDstDelta; } while (--cy != 0); } else { // Odd destination scan length. // // We have to be careful of this case because we want to do WORD // transfers, but we can't overwrite either the beginning or ending // of the scan. Note that since it's not legal to write a byte past // the end of the bitmap or a byte before the beginning of the bitmap // as that may cause an access violation, we cannot temporarily save // and restore any extra bytes in the destination bitmap. cjEndByte = cx - 1; // Byte offset from beginning of scan to // last byte in scan. This is the offset // to the odd byte that happens because // we're inputting WORDs but the length // of the destination scan is not a // multiple of two. while (TRUE) { vDataPortIn(ppdev, pjDst, cwDst); IO_PIX_TRANS_IN(ppdev, wOdd); *(pjDst + cjEndByte) = (BYTE) wOdd; if (--cy == 0) break; pjDst += lDstDelta; *(pjDst) = (BYTE) (wOdd >> 8); vDataPortIn(ppdev, pjDst + 1, cwDst); pjDst += lDstDelta; if (--cy == 0) break; } } }