Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

778 lines
27 KiB

/******************************Module*Header**********************************\
*
* *******************
* * GDI SAMPLE CODE *
* *******************
*
* Module Name: download.c
*
* Contains the upload and download routines.
*
* Copyright (c) 1994-1998 3Dlabs Inc. Ltd. All rights reserved.
* Copyright (c) 1995-1999 Microsoft Corporation. All rights reserved.
\*****************************************************************************/
#include "precomp.h"
#include "gdi.h"
//-----------------------------------------------------------------------------
//
// VOID vDownloadNative(GFNPB* ppb)
//
// Does a download of a native surface for a list of rectangles.
// Note: this download takes the advantage of Permedia 2 packed data read.
// Because of the permedia 2 hardware limitation, we can only use the
// packedData download when the logic OP is SRC_COPY or the destination
// is aligned to the packed data being downloaded. This will typically be
// when the surface is 32 bpp. Otherwise, we just do the regular download
//
// Argumentes needed from function block (GFNPB)
// ppdev-------PPDev
// psurfSrc----Source surface
// psurfDst----Destination surface
// pRects------Pointer to a list of rectangles information which needed to be
// filled
// lNumRects---Number of rectangles to fill
// prclDst-----Points to a RECTL structure that defines the rectangular area
// to be modified
// pptlSrc-----Original unclipped source point
//
//-----------------------------------------------------------------------------
VOID
vDownloadNative(GFNPB* ppb)
{
PDev* ppdev = ppb->ppdev;
Surf* psurfDst = ppb->psurfDst;
SURFOBJ* pSrcSurface = ppb->psoSrc;
RECTL* pRects = ppb->pRects;
RECTL* prclDst = ppb->prclDst;
POINTL* pptlSrc = ppb->pptlSrc;
BOOL bEnablePacked;
DWORD dwRenderBits = __RENDER_TRAPEZOID_PRIMITIVE
| __RENDER_SYNC_ON_HOST_DATA;
LONG lNumRects = ppb->lNumRects;
LONG lSrcStride;
LONG lXOffset = pptlSrc->x - prclDst->left;
LONG lYOffset = pptlSrc->y - prclDst->top;
ULONG ulLogicOP = ulRop2ToLogicop(ppb->ulRop4 & 0xf);
ULONG* pBuffer;
//
// Note: Due to the hardware limitation, we can take the advantage of
// Permedia 2 PackedData copy only when the logic OP is SRC_COPY, or
// the destination is aligned to the packed data being downloaded.
// This will typically be when the surface is 32 bpp.
//
if ( (ulLogicOP == K_LOGICOP_COPY)
||(pSrcSurface->iBitmapFormat == BMF_32BPP) )
{
bEnablePacked = TRUE;
}
else
{
bEnablePacked = FALSE;
}
DBG_GDI((6, "vDownloadNative called, logicop=%d", ulLogicOP));
DBG_GDI((6, "source SURFOBJ=0x%x", pSrcSurface));
DBG_GDI((6, "pptlSrc(x, y)(%d, %d) logicop=%d",
pptlSrc->x, pptlSrc->y, ulLogicOP));
DBG_GDI((6, "prclDst(left, right, top, bottom)(%d, %d, %d, %d)",
prclDst->left, prclDst->right, prclDst->top, prclDst->bottom));
DBG_GDI((6, "lXOffset=%d, lYOffset=%d", lXOffset, lYOffset));
vCheckGdiContext(ppdev);
InputBufferReserve(ppdev, 10, &pBuffer);
//
// Setup loop invariant state
//
pBuffer[0] = __Permedia2TagLogicalOpMode;
pBuffer[1] = P2_ENABLED_LOGICALOP(ulLogicOP);
pBuffer[2] = __Permedia2TagFBWindowBase;
pBuffer[3] = psurfDst->ulPixOffset;
pBuffer[4] = __Permedia2TagFBPixelOffset;
pBuffer[5] = 0;
pBuffer[6] = __Permedia2TagFBReadPixel;
pBuffer[7] = ppdev->cPelSize;
pBuffer[8] = __Permedia2TagdY;
pBuffer[9] = INTtoFIXED(1);
pBuffer += 10;
InputBufferCommit(ppdev, pBuffer);
//
// Loop all the rectangles to render
//
while( lNumRects-- )
{
ULONG ulMask = ppdev->dwBppMask;
DWORD dwReadMode = PM_FBREADMODE_PARTIAL(psurfDst->ulPackedPP)
| LogicopReadDest[ulLogicOP];
ULONG ulStartXDom;
ULONG ulStartXSub;
LONG lSrcLeft = lXOffset + pRects->left;
//
// Calculate the 3 bit 2's compliment shift that is required to align
// the source pixels with the destination. This relative offset can be
// used to shift the downloaded data to the 32 bit destination alignment
// that packing requires. This enables you to read DWORD aligned data
// on the host despite the data not being aligned correctly for the
// packing.
//
ULONG ulOffset = ( (pRects->left & ulMask)
- (lSrcLeft & ulMask)) & 0x7;
DBG_GDI((6, "ulOffset = 0x%x", ulOffset));
DBG_GDI((6, "pRects(left, right, top, bottom)(%d, %d, %d, %d)",
pRects->left, pRects->right, pRects->top, pRects->bottom));
if ( (bEnablePacked == FALSE) && (ulOffset == 0) )
{
//
// As long as the source and dest are aligned, then we can still use
// the packed data copy, even with logic OPs
//
DBG_GDI((6, "Turn packed data on when src and dst are aligned"));
bEnablePacked = TRUE;
}
ULONG ulWidth = pRects->right - pRects->left;
ULONG ulHeight = pRects->bottom - pRects->top;
ULONG ulDstLeft;
ULONG ulDstRight;
ULONG ulDstWidth;
LONG lSrcRight;
ULONG ulSrcWidth;
ULONG ulExtra;
if ( bEnablePacked == TRUE )
{
ULONG ulShift = ppdev->bBppShift;
ulDstLeft = pRects->left >> ulShift;
ulDstRight = (pRects->right + ulMask) >> ulShift;
ulDstWidth = ulDstRight - ulDstLeft;
lSrcRight = (lSrcLeft + ulWidth + ulMask) >> ulShift;
lSrcLeft >>= ulShift;
ulSrcWidth = (ULONG)(lSrcRight - lSrcLeft);
//
// We need to convert from pixel coordinates to ULONG coordinates.
// Also, we need to set the destination width to the greater of the
// source width or destination width. If destination width is
// greater then the source width, we need to remember this so that
// we can download an additional dummy value without reading past
// the end of the source data (which could result in an access
// fault).
//
if( ulDstWidth <= ulSrcWidth )
{
ulExtra = 0;
ulWidth = ulSrcWidth;
}
else
{
ulWidth = ulDstWidth;
ulExtra = 1;
}
dwReadMode |= (PM_FBREADMODE_RELATIVEOFFSET(ulOffset)
| PM_FBREADMODE_READSOURCE(__PERMEDIA_DISABLE)
| PM_FBREADMODE_PACKEDDATA(__PERMEDIA_ENABLE) );
ulStartXDom = INTtoFIXED(ulDstLeft);
ulStartXSub = INTtoFIXED(ulDstLeft + ulWidth);
}
else
{
dwReadMode |= PM_FBREADMODE_RELATIVEOFFSET(0);
ulStartXDom = INTtoFIXED(pRects->left);
ulStartXSub = INTtoFIXED(pRects->right);
}
InputBufferReserve(ppdev, 14, &pBuffer);
pBuffer[0] = __Permedia2TagFBReadMode;
pBuffer[1] = dwReadMode;
pBuffer[2] = __Permedia2TagStartXDom;
pBuffer[3] = ulStartXDom;
pBuffer[4] = __Permedia2TagStartXSub;
pBuffer[5] = ulStartXSub;
//
// Test result shows that it won't hurt if we are doing non-packed
// download and setting this register. If we move this settings
// inside the "bEnablePacked == TRUE" case, then we need the extra
// InputBufferReserve/InputBufferCommit for packed data download
// which will hurt performance
//
pBuffer[6] = __Permedia2TagPackedDataLimits;
pBuffer[7] = PM_PACKEDDATALIMITS_OFFSET(ulOffset)
|(INTtoFIXED(pRects->left)
| pRects->right);
pBuffer[8] = __Permedia2TagStartY;
pBuffer[9] = INTtoFIXED(pRects->top);
pBuffer[10] = __Permedia2TagCount;
pBuffer[11] = ulHeight;
pBuffer[12] = __Permedia2TagRender;
pBuffer[13] = dwRenderBits;
pBuffer += 14;
InputBufferCommit(ppdev, pBuffer);
if ( bEnablePacked == TRUE )
{
ULONG* pulSrcStart = (ULONG*)(pSrcSurface->pvScan0);
lSrcStride = pSrcSurface->lDelta >> 2;
ULONG* pulSrc = (ULONG*)(pulSrcStart
+ ((lYOffset + pRects->top) * lSrcStride)
+ lSrcLeft);
ULONG* pulData = pulSrc;
while ( ulHeight-- )
{
ULONG ulTemp = ulSrcWidth;
ULONG* pulSrcTemp = pulData;
InputBufferReserve(ppdev, ulWidth + 1, &pBuffer);
pBuffer[0] = __Permedia2TagColor | ((ulWidth - 1) << 16);
pBuffer +=1;
while ( ulTemp-- )
{
*pBuffer++ = *pulSrcTemp++;
}
if ( ulExtra )
{
*pBuffer++ = 0;
}
InputBufferCommit(ppdev, pBuffer);
pulData += lSrcStride;
}// while ( ulHeight-- )
}// PackedEnabled case
else if ( pSrcSurface->iBitmapFormat == BMF_16BPP )
{
USHORT* psSrcStart = (USHORT*)(pSrcSurface->pvScan0);
lSrcStride = pSrcSurface->lDelta >> 1;
USHORT* psSrc = (USHORT*)(psSrcStart
+ ((lYOffset + pRects->top) * lSrcStride)
+ lSrcLeft);
USHORT* psData = psSrc;
while ( ulHeight-- )
{
ULONG ulTemp = ulWidth;
USHORT* psSrcTemp = psData;
InputBufferReserve(ppdev, ulWidth + 1, &pBuffer);
pBuffer[0] = __Permedia2TagColor | ((ulWidth - 1) << 16);
pBuffer +=1;
while ( ulTemp-- )
{
*pBuffer++ = (ULONG)(*psSrcTemp++);
}
InputBufferCommit(ppdev, pBuffer);
psData += lSrcStride;
}// while ( ulHeight-- )
}// 16 bpp non-packed case
else if ( pSrcSurface->iBitmapFormat == BMF_8BPP )
{
BYTE* pcSrcStart = (BYTE*)(pSrcSurface->pvScan0);
lSrcStride = pSrcSurface->lDelta;
BYTE* pcSrc = (BYTE*)(pcSrcStart
+ ((lYOffset + pRects->top) * lSrcStride)
+ lSrcLeft);
BYTE* pcData = pcSrc;
while ( ulHeight-- )
{
ULONG ulTemp = ulWidth;
BYTE* pcSrcTemp = pcData;
InputBufferReserve(ppdev, ulWidth + 1, &pBuffer);
pBuffer[0] = __Permedia2TagColor | ((ulWidth - 1) << 16);
pBuffer +=1;
while ( ulTemp-- )
{
*pBuffer++ = (ULONG)(*pcSrcTemp++);
}
InputBufferCommit(ppdev, pBuffer);
pcData += lSrcStride;
}// while ( ulHeight-- )
}// 8 bpp non-packed case
else
{
//
// Since we have a check in DrvBitBlt
// if(psoSrc->iBitmapFormat == pb.ppdev->iBitmapFormat) before we
// allow it to call this function, so this ASSERT should never
// be hit. It will if we implement 24 bpp download late.
//
ASSERTDD(0, "we don't handle it for now");
}
//
// Next rectangle
//
pRects++;
}// while( lNumRects-- )
}// vDownloadNative()
//-----------------------------------------------------------------------------
//
// VOID vDowload4Bpp(GFNPB* ppb)
//
// Does a download of a 4bpp surface for a list of rectangles.
//
// Argumentes needed from function block (GFNPB)
// ppdev-------PPDev
// psurfSrc----Source surface
// psurfDst----Destination surface
// pRects------Pointer to a list of rectangles information which needed to be
// filled
// lNumRects---Number of rectangles to fill
// prclDst-----Points to a RECTL structure that defines the rectangular area
// to be modified
// pptlSrc-----Original unclipped source point
//
//-----------------------------------------------------------------------------
ULONG gDownload4BppEnabled = 1;
#if 0
VOID
vDownload4Bpp(GFNPB* ppb)
{
PDev* ppdev = ppb->ppdev;
Surf* psurfDst = ppb->psurfDst;
RECTL* prcl = ppb->pRects;
LONG c = ppb->lNumRects;
RECTL* prclDst = ppb->prclDst;
POINTL* pptlSrc = ppb->pptlSrc;
DWORD dwRenderBits = __RENDER_TRAPEZOID_PRIMITIVE | __RENDER_SYNC_ON_HOST_DATA;
BYTE* pbSrcStart = (BYTE *) ppb->psoSrc->pvScan0;
LONG lSrcStride = ppb->psoSrc->lDelta;
ULONG ulOffset = ((pptlSrc->x & 1) -
(prclDst->left & ppdev->dwBppMask)) & 0x7;
if(!gDownload4BppEnabled) return;
PERMEDIA_DECL_VARS;
PERMEDIA_DECL_INIT;
VALIDATE_GDI_CONTEXT;
// P2_CHECK_STATE;
P2_DEFAULT_FB_DEPTH;
// setup loop invariant state
WAIT_INPUT_FIFO(4);
SEND_PERMEDIA_DATA(LogicalOpMode, __PERMEDIA_DISABLE);
if(ppdev->cPelSize < 2)
{
SEND_PERMEDIA_DATA(FBReadMode, psurfDst->ulPackedPP |
PM_FBREADMODE_PACKEDDATA(__PERMEDIA_ENABLE) |
PM_FBREADMODE_RELATIVEOFFSET(ulOffset));
}
else
{
// Do we even need this at all???
SEND_PERMEDIA_DATA(FBReadMode, psurfDst->ulPackedPP);
}
SEND_PERMEDIA_DATA(FBWindowBase, psurfDst->ulPixOffset);
SEND_PERMEDIA_DATA(FBPixelOffset, 0);
DEXE_INPUT_FIFO();
while(c--) {
LONG lSrcLeft = pptlSrc->x + (prcl->left - prclDst->left);
LONG lSrcTop = pptlSrc->y + (prcl->top - prclDst->top);
ASSERTDD(lSrcLeft >= 0, "ugh");
ASSERTDD(lSrcTop >= 0, "ugh");
// Render the rectangle
ULONG left = prcl->left >> ppdev->bBppShift;
ULONG right = (prcl->right + ppdev->dwBppMask) >> ppdev->bBppShift;
ULONG width = right - left;
ULONG count = prcl->bottom - prcl->top;
WAIT_INPUT_FIFO((ppdev->cPelSize < 2 ? 6 : 5));
SEND_PERMEDIA_DATA(StartXDom, left << 16);
SEND_PERMEDIA_DATA(StartXSub, right << 16);
if(ppdev->cPelSize < 2)
{
SEND_PERMEDIA_DATA(PackedDataLimits,
PM_PACKEDDATALIMITS_OFFSET(ulOffset)
| (prcl->left << 16) | prcl->right);
}
SEND_PERMEDIA_DATA(StartY, prcl->top << 16);
SEND_PERMEDIA_DATA(Count, count);
SEND_PERMEDIA_DATA(Render, dwRenderBits);
DEXE_INPUT_FIFO();
BYTE * srcScan = (BYTE *)(pbSrcStart + (lSrcTop * lSrcStride))
+ (lSrcLeft >> 1);
ULONG* aulXlate = ppb->pxlo->pulXlate;
while(count--)
{
LONG remaining = width;
ULONG* lp = pPermedia->GetDMAPtr(width+1);
BYTE* src = srcScan;
*lp++ = __Permedia2TagColor | ((width-1) << 16);
switch(ppdev->cPelSize)
{
case 0:
while(remaining-- > 0)
{
*lp++ = aulXlate[src[0] & 0x0F] |
(aulXlate[(src[0] & 0xF0) >> 4] << 8) |
(aulXlate[src[1] & 0xf] << 16) |
(aulXlate[(src[1] & 0xf0) >> 4] << 24);
src += 2;
}
break;
case 1:
while(remaining >= 8)
{
remaining -= 8;
lp[0] = aulXlate[src[0] & 0x0F] | (aulXlate[(src[0] & 0xF0) >> 4] << 16);
lp[1] = aulXlate[src[1] & 0x0F] | (aulXlate[(src[1] & 0xF0) >> 4] << 16);
lp[2] = aulXlate[src[2] & 0x0F] | (aulXlate[(src[2] & 0xF0) >> 4] << 16);
lp[3] = aulXlate[src[3] & 0x0F] | (aulXlate[(src[3] & 0xF0) >> 4] << 16);
lp[4] = aulXlate[src[4] & 0x0F] | (aulXlate[(src[4] & 0xF0) >> 4] << 16);
lp[5] = aulXlate[src[5] & 0x0F] | (aulXlate[(src[5] & 0xF0) >> 4] << 16);
lp[6] = aulXlate[src[6] & 0x0F] | (aulXlate[(src[6] & 0xF0) >> 4] << 16);
lp[7] = aulXlate[src[7] & 0x0F] | (aulXlate[(src[7] & 0xF0) >> 4] << 16);
lp+=8;
src+=8;
}
while(remaining-- > 0)
{
*lp++ = aulXlate[src[0] & 0x0F] | (aulXlate[(src[0] & 0xF0) >> 4] << 16);
src++;
}
break;
case 2:
if(lSrcLeft & 1)
{
*lp++ = aulXlate[(src[0] & 0xf0) >> 4];
src++;
remaining--;
}
while(remaining >= 8)
{
remaining -= 8;
lp[0] = aulXlate[src[0] & 0x0F];
lp[1] = aulXlate[(src[0] & 0xf0) >> 4];
lp[2] = aulXlate[src[1] & 0x0F];
lp[3] = aulXlate[(src[1] & 0xf0) >> 4];
lp[4] = aulXlate[src[2] & 0x0F];
lp[5] = aulXlate[(src[2] & 0xf0) >> 4];
lp[6] = aulXlate[src[3] & 0x0F];
lp[7] = aulXlate[(src[3] & 0xf0) >> 4];
src+=4;
lp += 8;
}
while(remaining > 1)
{
remaining -= 2;
*lp++ = aulXlate[src[0] & 0x0F];
*lp++ = aulXlate[(src[0] & 0xf0) >> 4];
src++;
}
if(remaining)
{
*lp++ = aulXlate[src[0] & 0xf];
}
break;
}
srcScan += lSrcStride;
pPermedia->DoneDMAPtr();
}
prcl++;
}
}// vDownload4Bpp()
#endif
//-----------------------------------------------------------------------------
//
// VOID vDowload4Bpp(GFNPB* ppb)
//
// Does a download of a 4bpp surface for a list of rectangles.
//
// Argumentes needed from function block (GFNPB)
// ppdev-------PPDev
// psurfSrc----Source surface
// psurfDst----Destination surface
// pRects------Pointer to a list of rectangles information which needed to be
// filled
// lNumRects---Number of rectangles to fill
// prclDst-----Points to a RECTL structure that defines the rectangular area
// to be modified
// pptlSrc-----Original unclipped source point
//
//-----------------------------------------------------------------------------
VOID
vDownload24Bpp(GFNPB* ppb)
{
#if 0
PDev* ppdev = ppb->ppdev;
Surf* psurfDst = ppb->psurfDst;
RECTL* prcl = ppb->pRects;
LONG c = ppb->lNumRects;
RECTL* prclDst = ppb->prclDst;
POINTL* pptlSrc = ppb->pptlSrc;
DWORD dwRenderBits = __RENDER_TRAPEZOID_PRIMITIVE
| __RENDER_SYNC_ON_HOST_DATA;
BYTE* pbSrcStart = (BYTE*)ppb->psoSrc->pvScan0;
LONG lSrcStride = ppb->psoSrc->lDelta;
ULONG ulOffset = ((pptlSrc->x & ppdev->dwBppMask)
- (prclDst->left & ppdev->dwBppMask)) & 0x7;
PERMEDIA_DECL_VARS;
PERMEDIA_DECL_INIT;
VALIDATE_GDI_CONTEXT;
P2_CHECK_STATE;
P2_DEFAULT_FB_DEPTH;
// setup loop invariant state
WAIT_INPUT_FIFO(4);
SEND_PERMEDIA_DATA(LogicalOpMode, __PERMEDIA_DISABLE);
SEND_PERMEDIA_DATA(FBReadMode, psurfDst->ulPackedPP |
PM_FBREADMODE_PACKEDDATA(__PERMEDIA_ENABLE) |
PM_FBREADMODE_RELATIVEOFFSET(ulOffset));
SEND_PERMEDIA_DATA(FBWindowBase, psurfDst->ulPixOffset);
SEND_PERMEDIA_DATA(FBPixelOffset, 0);
DEXE_INPUT_FIFO();
while(c--)
{
LONG lSrcLeft = pptlSrc->x + (prcl->left - prclDst->left);
LONG lSrcTop = pptlSrc->y + (prcl->top - prclDst->top);
ASSERTDD(lSrcLeft >= 0, "ugh");
ASSERTDD(lSrcTop >= 0, "ugh");
// Render the rectangle
ULONG left = prcl->left >> ppdev->bBppShift;
ULONG right = (prcl->right + ppdev->dwBppMask) >> ppdev->bBppShift;
ULONG width = right - left;
ULONG count = prcl->bottom - prcl->top;
WAIT_INPUT_FIFO(6);
SEND_PERMEDIA_DATA(StartXDom, left << 16);
SEND_PERMEDIA_DATA(StartXSub, right << 16);
SEND_PERMEDIA_DATA(PackedDataLimits,
PM_PACKEDDATALIMITS_OFFSET(ulOffset)
| (prcl->left << 16) | prcl->right);
SEND_PERMEDIA_DATA(StartY, prcl->top << 16);
SEND_PERMEDIA_DATA(Count, count);
SEND_PERMEDIA_DATA(Render, dwRenderBits);
DEXE_INPUT_FIFO();
ULONG * src = (ULONG *) (pbSrcStart + (lSrcTop * lSrcStride)
+ ((lSrcLeft & ~(ppdev->dwBppMask)) << ppdev->cPelSize));
#if 0
BLKLD_INPUT_FIFO_LINES(__Permedia2TagColor, src, width, count, lSrcStride);
#else
while(count--)
{
ULONG i;
for(i=0; i<width; i++)
{
WAIT_INPUT_FIFO(1);
SEND_PERMEDIA_DATA(Color, 0);
EXE_INPUT_FIFO();
}
}
#endif
prcl++;
}
#endif
}// vDownload24Bpp()
//-----------------------------------------------------------------------------
//
// VOID vUploadNative
//
// Does a VM-to-SM copy of a list of rectangles.
//
// Argumentes needed from function block (GFNPB)
// ppdev-------PPDev
// psurfSrc----Source surface
// psurfDst----Destination surface
// pRects------Pointer to a list of rectangles information which needed to be
// filled
// lNumRects---Number of rectangles to fill
// prclDst-----Points to a RECTL structure that defines the rectangular area
// to be modified
// pptlSrc-----Original unclipped source point
//
//-----------------------------------------------------------------------------
VOID
vUploadNative(GFNPB* ppb)
{
PDev* ppdev = ppb->ppdev;
POINTL* pptlSrc = ppb->pptlSrc;
RECTL* prclDst = ppb->prclDst;
RECTL* pRects = ppb->pRects;
Surf* psurfSrc = ppb->psurfSrc;
SURFOBJ* psoDst = ppb->psoDst;
BYTE* pbDst;
BYTE* pbDstStart = (BYTE*)psoDst->pvScan0;
BYTE* pbSrc;
BYTE* pbSrcStart = (BYTE*)ppdev->pjScreen + psurfSrc->ulByteOffset;
LONG lDstStride = psoDst->lDelta;
LONG lNumRects = ppb->lNumRects;
LONG lSrcStride = psurfSrc->lDelta;
InputBufferSync(ppdev);
DBG_GDI((6, "vUploadNative called"));
while( lNumRects-- )
{
LONG lWidthInBytes = (pRects->right - pRects->left)
<< ppdev->cPelSize;
LONG lHeight = pRects->bottom - pRects->top;
LONG lSrcX = pptlSrc->x + (pRects->left - prclDst->left);
LONG lSrcY = pptlSrc->y + (pRects->top - prclDst->top);
if( (lWidthInBytes != 0) && (lHeight != 0) )
{
pbSrc = pbSrcStart + (lSrcX << ppdev->cPelSize); // Offset in Bytes
pbSrc += (lSrcY * lSrcStride); // Add vertical offset
pbDst = pbDstStart + (pRects->left << ppdev->cPelSize);
pbDst += (pRects->top * lDstStride);
//
// Up to this point, "pbSrc" points to the beginning of the bits
// needs to be copied and "pbDst" points to the position for the
// receiving bits
//
// Now copy it row by row, vertically
//
while( lHeight-- )
{
LONG lCount = lWidthInBytes;
//
// If the source address is not DWORD aligned,
// (pbSrc & 0x3 != 0), then we copy these bytes first until
// it reaches DWORD aligned condition
//
// The reason we are doing alignment is unaligned DWORD reads
// are twice as expensive as aligned reads
//
while( (((ULONG_PTR)pbSrc & 0x3)) && (lCount > 0) )
{
*pbDst++ = *pbSrc++;
lCount--;
}
//
// Up to this point, the source should be DWORD aligned. So we
// can start to do uploading at DWORD level till there are less
// than bytes left
//
ULONG* pulSrc = (ULONG*)pbSrc;
ULONG* pulDst = (ULONG*)pbDst;
while( lCount >= 4 )
{
*(ULONG UNALIGNED*)pulDst++ = *pulSrc++;
lCount -= 4;
}
//
// Now copy the last several left over bytes
//
pbSrc = (BYTE*)pulSrc;
pbDst = (BYTE*)pulDst;
while( lCount > 0 )
{
*pbDst++ = *pbSrc++;
lCount--;
}
//
// Move onto next line
//
pbSrc += (lSrcStride - lWidthInBytes);
pbDst += (lDstStride - lWidthInBytes);
}// while( lHeight-- )
}// if( (lWidthInBytes != 0) && (lHeight != 0) )
pRects++;
}// while( lNumRects-- )
}// vUploadNative