|
|
/******************************Module*Header*******************************\
* Module Name: srcblt16.cxx * * This contains the bitmap simulation functions that blt to a 16 bit/pel * DIB surface. * * Created: 07-Feb-1991 19:27:49 * Author: Patrick Haluptzok patrickh * * Copyright (c) 1990-1999 Microsoft Corporation * \**************************************************************************/
#include "precomp.hxx"
// Turn off validations
#if 1
// On free builds, don't call any verification code:
#define VERIFYS16D16(psb)
#define VERIFYS24D16(psb)
#define VERIFYS32D16(psb)
#else
// On checked builds, verify the RGB conversions:
VOID VERIFYS16D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting
// If it was on the same surface it would be the identity case.
ASSERTGDI(psb->xDir == 1, "vSrcCopyS16D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS16D16 - direction not up to down");
// These are our holding variables
PUSHORT pusSrcTemp; PUSHORT pusDstTemp; ULONG cxTemp; PUSHORT pusSrc = (PUSHORT) (psb->pjSrc + (2 * psb->xSrcStart)); PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart)); ULONG cx = psb->cx; ULONG cy = psb->cy; XLATE *pxlo = psb->pxlo;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
while(1) { pusSrcTemp = pusSrc; pusDstTemp = pusDst; cxTemp = cx;
while(cxTemp--) { if (*(pusDstTemp++) != (USHORT) (pxlo->ulTranslate((ULONG) *(pusSrcTemp++)))) RIP("RGB mis-match"); }
if (--cy) { pusSrc = (PUSHORT) (((PBYTE) pusSrc) + psb->lDeltaSrc); pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst); } else break; } }
VOID VERIFYS24D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting
ASSERTGDI(psb->xDir == 1, "vSrcCopyS24D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS24D16 - direction not up to down");
// These are our holding variables
ULONG ulDink; // variable to dink around with the bytes in
PBYTE pjSrcTemp; PUSHORT pusDstTemp; ULONG cxTemp; PBYTE pjSrc = psb->pjSrc + (3 * psb->xSrcStart); PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart)); ULONG cx = psb->cx; ULONG cy = psb->cy; XLATE *pxlo = psb->pxlo;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
while(1) {
pjSrcTemp = pjSrc; pusDstTemp = pusDst; cxTemp = cx;
while(cxTemp--) { ulDink = *(pjSrcTemp + 2); ulDink = ulDink << 8; ulDink |= (ULONG) *(pjSrcTemp + 1); ulDink = ulDink << 8; ulDink |= (ULONG) *pjSrcTemp;
if (*pusDstTemp != (USHORT) (pxlo->ulTranslate(ulDink))) RIP("RGB mis-match");
pusDstTemp++; pjSrcTemp += 3; }
if (--cy) { pjSrc += psb->lDeltaSrc; pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst); } else break; } }
VOID VERIFYS32D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting.
ASSERTGDI(psb->xDir == 1, "vSrcCopyS32D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS32D16 - direction not up to down");
// These are our holding variables
PULONG pulSrcTemp; PUSHORT pusDstTemp; ULONG cxTemp; PULONG pulSrc = (PULONG) (psb->pjSrc + (4 * psb->xSrcStart)); PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart)); ULONG cx = psb->cx; ULONG cy = psb->cy; XLATE *pxlo = psb->pxlo; ULONG ulLastSrcPel; USHORT usLastDstPel;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
usLastDstPel = (USHORT) (pxlo->ulTranslate(ulLastSrcPel = *pulSrc));
while(1) {
pulSrcTemp = pulSrc; pusDstTemp = pusDst; cxTemp = cx;
while(cxTemp--) { ULONG ulTemp;
if ((ulTemp = *(pulSrcTemp)) != ulLastSrcPel) { ulLastSrcPel = ulTemp; usLastDstPel = (USHORT) (pxlo->ulTranslate(ulLastSrcPel)); }
if (*pusDstTemp++ != usLastDstPel) RIP("RGB mis-match");
pulSrcTemp++; }
if (--cy) { pulSrc = (PULONG) (((PBYTE) pulSrc) + psb->lDeltaSrc); pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst); } else break; } }
#endif
/*******************Public*Routine*****************\
* vSrcCopyS1D16 * * There are three main loops in this function. * * The first loop deals with the full byte part mapping * the Dst while fetching/shifting the matching 8 bits * from the Src. * * The second loop deals with the left starting * pixels. * * The third loop deals with the ending pixels. * * For the full bytes, we walk thru Src one byte at a time * and expand to Dst 8 words at a time. Dst is * DWORD aligned. * * We expand the starting/ending pixels one bit * at a time. * * History: * 17-Oct-1994 -by- Lingyun Wang [lingyunw] * Wrote it. * \**************************************************/ VOID vSrcCopyS1D16(PBLTINFO psb) { BYTE jSrc; // holds a source byte
INT iDst; // Position in the first 8 Dst words
INT iSrc; // bit position in the first Src byte
PBYTE pjDst; // pointer to the Src bytes
PBYTE pjSrc; // pointer to the Dst bytes
LONG xSrcEnd = psb->xSrcEnd; LONG cy; // number of rows
LONG cx; // number of pixels
BYTE alignL; // alignment bits to the left
BYTE alignR; // alignment bits to the right
LONG cibytes; //number of full 8 bytes dealed with
BOOL bNextByte; LONG xDstEnd = psb->xDstStart+psb->cx; LONG lDeltaDst; LONG lDeltaSrc; USHORT ausTable[2]; ULONG ulB = (ULONG)(psb->pxlo->pulXlate[0]); ULONG uF = (ULONG)(psb->pxlo->pulXlate[1]); USHORT usB = (USHORT)(psb->pxlo->pulXlate[0]); USHORT usF = (USHORT)(psb->pxlo->pulXlate[1]); ULONG aulTable[4]; INT count; BOOL bNextSrc = TRUE;
// We assume we are doing left to right top to bottom blting
ASSERTGDI(psb->xDir == 1, "vSrcCopyS1D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS1D16 - direction not up to down");
ASSERTGDI(psb->cy != 0, "ERROR: Src Move cy == 0");
//DbgPrint ("vsrccopys1d16\n");
// Generate aulTable. 4 entries.
// Each 2 bits will be an index to the aulTable
// which translates to a 32 bit ULONG
ULONG ulValB = ulB; ULONG ulValF = uF;
ulValB = (ulValB << 16) | ulValB; ulValF = (ulValF << 16) | ulValF;
aulTable[0] = ulValB; //0 0
aulTable[1] = (ulValF<<16) | (ulValB>>16); //1 0
aulTable[2] = (ulValB<<16) | (ulValF>>16); //0 1
aulTable[3] = ulValF ; //1 1
// Generate ausTable.
// Two entries. This table used when dealing
// with begin and end parts.
ausTable[0] = usB; ausTable[1] = usF;
//Get Src and Dst start positions
iSrc = psb->xSrcStart & 0x0007; iDst = psb->xDstStart & 0x0007;
if (iSrc < iDst) alignL = 8 - (iDst - iSrc); else alignL = iSrc - iDst;
alignR = 8 - alignL;
cx=psb->cx;
lDeltaDst = psb->lDeltaDst; lDeltaSrc = psb->lDeltaSrc;
// if there is a next 8 words
bNextByte = !((xDstEnd>>3) == (psb->xDstStart>>3));
// if Src and Dst are aligned, use a separete loop
// to obtain better performance;
// If not, we shift the Src bytes to match with
// the Dst 8 bytes (2 dwords) one at a time
if (bNextByte) { long iStrideSrc; long iStrideDst; PBYTE pjSrcEnd;
// Get first Dst full 8 words
pjDst = psb->pjDst + 2*((psb->xDstStart+7)&~0x07);
// Get the Src byte that matches the first Dst
// full 8 bytes
pjSrc = psb->pjSrc + ((psb->xSrcStart+((8-iDst)&0x07)) >> 3);
//Get the number of full 8 words
cibytes = (xDstEnd>>3)-((psb->xDstStart+7)>>3);
//the increment to the full byte on the next scan line
iStrideDst = lDeltaDst - cibytes*16; iStrideSrc = lDeltaSrc - cibytes;
// deal with our special case
cy = psb->cy;
if (!alignL) { while (cy--) { pjSrcEnd = pjSrc + cibytes;
while (pjSrc != pjSrcEnd) { jSrc = *pjSrc++;
*(PULONG) (pjDst + 0) = aulTable[(jSrc >> 6) & 0x03]; *(PULONG) (pjDst + 4) = aulTable[(jSrc >> 4) & 0x03]; *(PULONG) (pjDst + 8) = aulTable[(jSrc >> 2)& 0x03]; *(PULONG) (pjDst + 12) = aulTable[jSrc & 0x03];
pjDst +=16; }
pjDst += iStrideDst; pjSrc += iStrideSrc; }
} //end of if (!alignL)
// Here comes our general case for the main full
// bytes part
else // if not aligned
{ BYTE jRem; //remainder
while (cy--) { jRem = *pjSrc << alignL;
pjSrcEnd = pjSrc + cibytes;
while (pjSrc != pjSrcEnd) { jSrc = ((*(++pjSrc))>>alignR) | jRem;
*(PULONG) (pjDst + 0) = aulTable[(jSrc >> 6) & 0x03]; *(PULONG) (pjDst + 4) = aulTable[(jSrc >> 4) & 0x03]; *(PULONG) (pjDst + 8) = aulTable[(jSrc >> 2)& 0x03]; *(PULONG) (pjDst + 12) = aulTable[jSrc & 0x03];
pjDst +=16;
//next remainder
jRem = *pjSrc << alignL; }
// go to the beginging full byte of
// next scan line
pjDst += iStrideDst; pjSrc += iStrideSrc; } } //else
} //if
// End of our dealing with the full bytes
//Deal with the starting pixels
if (!bNextByte) { count = cx; bNextSrc = ((iSrc+cx) > 8); } else count = 8-iDst;
if (iDst | !bNextByte) { PBYTE pjDstTemp; PBYTE pjDstEnd;
pjDst = psb->pjDst + 2*psb->xDstStart; pjSrc = psb->pjSrc + (psb->xSrcStart>>3);
cy = psb->cy;
if (iSrc > iDst) { if (bNextSrc) { while (cy--) { jSrc = *pjSrc << alignL; jSrc |= *(pjSrc+1) >> alignR;
jSrc <<= iDst;
pjDstTemp = pjDst; pjDstEnd = pjDst + count*2;
while (pjDstTemp != pjDstEnd) { *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
jSrc <<= 1; pjDstTemp += 2; }
pjDst += lDeltaDst; pjSrc += lDeltaSrc; } } else { while (cy--) { jSrc = *pjSrc << alignL;
jSrc <<= iDst;
pjDstTemp = pjDst; pjDstEnd = pjDst + count*2;
while (pjDstTemp != pjDstEnd) { *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
jSrc <<= 1; pjDstTemp += 2; }
pjDst += lDeltaDst; pjSrc += lDeltaSrc; } } } else //if (iSrc < iDst)
{ while (cy--) { jSrc = *pjSrc << iSrc;
pjDstTemp = pjDst; pjDstEnd = pjDst + 2*count;
while (pjDstTemp != pjDstEnd) { *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
jSrc <<= 1; pjDstTemp += 2; }
pjDst += lDeltaDst; pjSrc += lDeltaSrc; }
}
} //if
// Deal with the ending pixels
if ((xDstEnd & 0x0007) && bNextByte) { PBYTE pjDstTemp; PBYTE pjDstEnd;
// Get the last partial bytes on the
// scan line
pjDst = psb->pjDst+2*(xDstEnd&~0x07);
// Get the Src byte that matches the
// right partial Dst 8 bytes
pjSrc = psb->pjSrc + ((psb->xSrcEnd-1) >>3);
// Get the ending position in the last
// Src and Dst bytes
iSrc = (psb->xSrcEnd-1) & 0x0007; iDst = (xDstEnd-1) & 0x0007;
count = iDst+1;
cy = psb->cy;
if (iSrc >= iDst) { while (cy--) { jSrc = *pjSrc << alignL;
pjDstTemp = pjDst; pjDstEnd = pjDst + 2*count;
while (pjDstTemp != pjDstEnd) { *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
jSrc <<= 1; pjDstTemp += 2; }
pjDst += lDeltaDst; pjSrc += lDeltaSrc; } } else if (iSrc < iDst) { while (cy--) { jSrc = *(pjSrc-1) << alignL;
jSrc |= *pjSrc >> alignR;
pjDstTemp = pjDst;
pjDstEnd = pjDst + 2*count;
while (pjDstTemp != pjDstEnd) { *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
jSrc <<= 1; pjDstTemp += 2; }
pjDst += lDeltaDst; pjSrc += lDeltaSrc; } } } //if
}
/******************************Public*Routine******************************\
* vSrcCopyS4D16 * * * History: * 06-Feb-1991 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/
VOID vSrcCopyS4D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting
ASSERTGDI(psb->xDir == 1, "vSrcCopyS4D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS4D16 - direction not up to down");
BYTE jSrc; LONG i; PUSHORT pusDst; PBYTE pjSrc; PUSHORT pusDstHolder = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart)); PBYTE pjSrcHolder = psb->pjSrc + (psb->xSrcStart >> 1); ULONG cy = psb->cy; XLATE *pxlo = psb->pxlo; PULONG pulXlate = psb->pxlo->pulXlate;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
while(1) { pusDst = pusDstHolder; pjSrc = pjSrcHolder;
i = psb->xSrcStart;
if (i & 0x00000001) jSrc = *(pjSrc++);
while(i != psb->xSrcEnd) { if (i & 0x00000001) *(pusDst++) = (USHORT) pulXlate[jSrc & 0x0F]; else { // We need a new byte
jSrc = *(pjSrc++); *(pusDst++) = (USHORT) pulXlate[((ULONG) (jSrc & 0xF0)) >> 4]; }
++i; }
if (--cy) { pjSrcHolder += psb->lDeltaSrc; pusDstHolder = (PUSHORT) (((PBYTE) pusDstHolder) + psb->lDeltaDst); } else break; } }
/******************************Public*Routine******************************\
* vSrcCopyS8D16 * * * History: * 06-Feb-1991 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/
VOID vSrcCopyS8D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting
ASSERTGDI(psb->xDir == 1, "vSrcCopyS8D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS8D16 - direction not up to down");
// These are our holding variables
PBYTE pjSrc = psb->pjSrc + psb->xSrcStart; PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart); LONG cx = psb->cx; LONG cy = psb->cy; XLATE *pxlo = psb->pxlo; PULONG pulXlate = psb->pxlo->pulXlate; LONG lSrcSkip = psb->lDeltaSrc - cx; LONG lDstSkip = psb->lDeltaDst - (cx * 2); LONG i;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
while(1) { i = cx;
// Get 'dword' alignment on the destination:
if (((ULONG_PTR) pjDst) & 2) { *((USHORT*) pjDst) = (USHORT) pulXlate[*pjSrc]; pjDst += 2; pjSrc += 1; i--; }
// Now write pixels a dword at a time. This is almost a 2x win
// over doing word writes if we're writing to frame buffer memory
// over the PCI bus on Pentium class systems, because the PCI
// write throughput is so slow:
while(1) { i -=2; if (i < 0) break;
*((ULONG*) pjDst) = (pulXlate[*(pjSrc)]) | (pulXlate[*(pjSrc + 1)] << 16); pjDst += 4; pjSrc += 2; }
// Take care of the end alignment:
if (i & 1) { *((USHORT*) pjDst) = (USHORT) pulXlate[*pjSrc]; pjDst += 2; pjSrc += 1; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; } }
/******************************Public*Routine******************************\
* vSrcCopyS16D16 * * * History: * 07-Feb-1991 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/ VOID vSrcCopyS16D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting
// If it was on the same surface it would be the identity case.
ASSERTGDI(psb->xDir == 1, "vSrcCopyS16D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS16D16 - direction not up to down");
// These are our holding variables
PBYTE pjSrc = psb->pjSrc + (2 * psb->xSrcStart); PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart); ULONG cx = psb->cx; ULONG cy = psb->cy; XLATE *pxlo = psb->pxlo; XEPALOBJ palSrc(pxlo->ppalSrc); XEPALOBJ palDst(pxlo->ppalDst); LONG lSrcSkip = psb->lDeltaSrc - (cx * 2); LONG lDstSkip = psb->lDeltaDst - (cx * 2); PFN_pfnXlate pfnXlate; LONG i; USHORT us; ULONG ul;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
// Optimize 5-5-5 to 5-6-5.
if (palSrc.bIs555() && palDst.bIs565()) { while (1) { i = cx;
if (((ULONG_PTR) pjDst) & 2) { us = *((USHORT*) pjSrc);
*((USHORT*) pjDst) = ((us) & 0x001f) | ((us << 1) & 0xffc0) | ((us >> 4) & 0x0020); pjDst += 2; pjSrc += 2; i--; }
while(1) { i -=2; if (i < 0) break;
ul = *(UNALIGNED_DWORD_POINTER(pjSrc));
*((ULONG*) pjDst) = ((ul) & 0x001f001f) | ((ul << 1) & 0xffc0ffc0) | ((ul >> 4) & 0x00200020); pjDst += 4; pjSrc += 4; }
if (i & 1) { us = *((USHORT*) pjSrc);
*((USHORT*) pjDst) = ((us) & 0x001f) | ((us << 1) & 0xffc0) | ((us >> 4) & 0x0020); pjDst += 2; pjSrc += 2; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS16D16(psb); return; }
// Optimize 5-6-5 to 5-5-5.
if (palSrc.bIs565() && palDst.bIs555()) { while (1) { i = cx;
if (((ULONG_PTR) pjDst) & 2) { us = *((USHORT*) pjSrc);
*((USHORT*) pjDst) = ((us) & 0x001f) | ((us >> 1) & 0x7fe0); pjDst += 2; pjSrc += 2; i--; }
while(1) { i -=2; if (i < 0) break;
ul = *(UNALIGNED_DWORD_POINTER(pjSrc));
*((ULONG*) pjDst) = ((ul) & 0x001f001f) | ((ul >> 1) & 0x7fe07fe0); pjDst += 4; pjSrc += 4; }
if (i & 1) { us = *((USHORT*) pjSrc);
*((USHORT*) pjDst) = ((us) & 0x001f) | ((us >> 1) & 0x7fe0); pjDst += 2; pjSrc += 2; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS16D16(psb); return; }
// Finally, fall back to the generic case:
pfnXlate = pxlo->pfnXlateBetweenBitfields();
while (1) { i = cx;
do { *((USHORT*) pjDst) = (USHORT) pfnXlate(pxlo, *((USHORT*) pjSrc)); pjDst += 2; pjSrc += 2;
} while (--i != 0);
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS16D16(psb); }
/******************************Public*Routine******************************\
* vSrcCopyS16D16Identity * * This is the special case no translate blting. All the SmDn should have * them if m==n. Identity xlates only occur amoung matching format bitmaps. * * History: * 06-Feb-1991 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/
VOID vSrcCopyS16D16Identity(PBLTINFO psb) { // These are our holding variables
PUSHORT pusSrc = (PUSHORT) (psb->pjSrc + (2 * psb->xSrcStart)); PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart)); ULONG cx = psb->cx; ULONG cy = psb->cy;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
if (psb->xDir < 0) { pusSrc -= (cx - 1); pusDst -= (cx - 1); }
cx = cx << 1;
while(1) { if(psb->fSrcAlignedRd) vSrcAlignCopyMemory((PBYTE)pusDst,(PBYTE)pusSrc,cx); else RtlMoveMemory((PVOID)pusDst, (PVOID)pusSrc, cx);
if (--cy) { pusSrc = (PUSHORT) (((PBYTE) pusSrc) + psb->lDeltaSrc); pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst); } else break; } }
/******************************Public*Routine******************************\
* vSrcCopyS24D16 * * * History: * 06-Feb-1991 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/
VOID vSrcCopyS24D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting
ASSERTGDI(psb->xDir == 1, "vSrcCopyS24D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS24D16 - direction not up to down");
// These are our holding variables
PBYTE pjSrc = psb->pjSrc + (3 * psb->xSrcStart); PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart); ULONG cx = psb->cx; ULONG cy = psb->cy; LONG lSrcSkip = psb->lDeltaSrc - (cx * 3); LONG lDstSkip = psb->lDeltaDst - (cx * 2); XLATE *pxlo = psb->pxlo; XEPALOBJ palSrc(pxlo->ppalSrc); XEPALOBJ palDst(pxlo->ppalDst); PFN_pfnXlate pfnXlate; ULONG ul; ULONG ul0; ULONG ul1; LONG i;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0"); ASSERTGDI(((pxlo->flXlate & (XO_TABLE | XO_TO_MONO)) == 0) && ((pxlo->flPrivate & XLATE_PAL_MANAGED) == 0), "ERROR: flXlate != 0 or flPrivate != 0"); ASSERTGDI(((XEPALOBJ) pxlo->ppalDst).cEntries() == 0, "ERROR: cEntries != 0"); ASSERTGDI(palDst.bIsBitfields(), "ERROR: destination not bitfields");
if (palSrc.bIsBGR()) {
// First, try to optimize BGR to 5-6-5:
if (palDst.bIs565()) { while (1) { i = cx;
if (((ULONG_PTR) pjDst) & 2) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 3) & 0x07e0) | ((*(pjSrc + 2) << 8) & 0xf800);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 3; i--; }
#if defined(_X86_)
_asm {
mov esi, pjSrc mov edi, pjDst sub i, 2 js Done_565_Loop
Middle_565_Loop:
movzx eax, byte ptr [esi] movzx ebx, byte ptr [esi+1] shr eax, 3 shl ebx, 3 movzx edx, byte ptr [esi+2] movzx ecx, byte ptr [esi+3] shl edx, 8 shl ecx, 13 or eax, edx or ebx, ecx movzx edx, byte ptr [esi+4] movzx ecx, byte ptr [esi+5] shl edx, 19 shl ecx, 24 or eax, edx or ebx, ecx and eax, 0x07e0f81f and ebx, 0xf81f07e0 or eax, ebx add esi, 6 mov [edi], eax add edi, 4 sub i, 2 jns Middle_565_Loop
Done_565_Loop:
mov pjSrc, esi mov pjDst, edi }
#else
while (1) { i -= 2; if (i < 0) break;
ul0 = (*(pjSrc) >> 3) | (*(pjSrc + 2) << 8) | (*(pjSrc + 4) << 19); ul1 = (*(pjSrc + 1) << 3) | (*(pjSrc + 3) << 13) | (*(pjSrc + 5) << 24);
*((ULONG*) pjDst) = (ul0 & 0x07e0f81f) | (ul1 & 0xf81f07e0);
pjDst += 4; pjSrc += 6; }
#endif
if (i & 1) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 3) & 0x07e0) | ((*(pjSrc + 2) << 8) & 0xf800);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 3; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS24D16(psb); return; }
// Next, try to optimize BGR to 5-5-5:
if (palDst.bIs555()) { while (1) { i = cx;
if (((ULONG_PTR) pjDst) & 2) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 2) & 0x03e0) | ((*(pjSrc + 2) << 7) & 0x7c00);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 3; i--; }
#if defined(_X86_)
_asm {
mov esi, pjSrc mov edi, pjDst sub i, 2 js Done_555_Loop
Middle_555_Loop:
movzx eax, byte ptr [esi] movzx ebx, byte ptr [esi+1] shr eax, 3 shl ebx, 2 movzx edx, byte ptr [esi+2] movzx ecx, byte ptr [esi+3] shl edx, 7 shl ecx, 13 or eax, edx or ebx, ecx movzx edx, byte ptr [esi+4] movzx ecx, byte ptr [esi+5] shl edx, 18 shl ecx, 23 or eax, edx or ebx, ecx and eax, 0x03e07c1f and ebx, 0x7c1f03e0 or eax, ebx add esi, 6 mov [edi], eax add edi, 4 sub i, 2 jns Middle_555_Loop
Done_555_Loop:
mov pjSrc, esi mov pjDst, edi }
#else
while (1) { i -= 2; if (i < 0) break;
ul0 = (*(pjSrc) >> 3) | (*(pjSrc + 2) << 7) | (*(pjSrc + 4) << 18); ul1 = (*(pjSrc + 1) << 2) | (*(pjSrc + 3) << 13) | (*(pjSrc + 5) << 23);
*((ULONG*) pjDst) = (ul0 & 0x03e07c1f) | (ul1 & 0x7c1f03e0);
pjDst += 4; pjSrc += 6; }
#endif
if (i & 1) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 2) & 0x03e0) | ((*(pjSrc + 2) << 7) & 0x7c00);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 3; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS24D16(psb); return; } }
// Finally, fall back to the generic case:
pfnXlate = pxlo->pfnXlateBetweenBitfields();
while (1) { i = cx;
do { ul = ((ULONG) *(pjSrc)) | ((ULONG) *(pjSrc + 1) << 8) | ((ULONG) *(pjSrc + 2) << 16);
*((USHORT*) pjDst) = (USHORT) pfnXlate(pxlo, ul); pjDst += 2; pjSrc += 3;
} while (--i != 0);
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS24D16(psb); }
/******************************Public*Routine******************************\
* vSrcCopyS32D16 * * * History: * 07-Feb-1991 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/
VOID vSrcCopyS32D16(PBLTINFO psb) { // We assume we are doing left to right top to bottom blting.
ASSERTGDI(psb->xDir == 1, "vSrcCopyS32D16 - direction not left to right"); ASSERTGDI(psb->yDir == 1, "vSrcCopyS32D16 - direction not up to down");
// These are our holding variables
PBYTE pjSrc = psb->pjSrc + (4 * psb->xSrcStart); PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart); ULONG cx = psb->cx; ULONG cy = psb->cy; LONG lSrcSkip = psb->lDeltaSrc - (cx * 4); LONG lDstSkip = psb->lDeltaDst - (cx * 2); XLATE *pxlo = psb->pxlo; XEPALOBJ palSrc(pxlo->ppalSrc); XEPALOBJ palDst(pxlo->ppalDst); PFN_pfnXlate pfnXlate; ULONG ul; ULONG ul0; ULONG ul1; LONG i;
ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
if (palSrc.bIsBGR()) {
// First, try to optimize BGR to 5-6-5:
if (palDst.bIs565()) { while (1) { i = cx;
if (((ULONG_PTR) pjDst) & 2) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 3) & 0x07e0) | ((*(pjSrc + 2) << 8) & 0xf800);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 4; i--; }
#if defined(_X86_)
_asm {
mov esi, pjSrc mov edi, pjDst sub i, 2 js Done_565_Loop
Middle_565_Loop:
movzx eax, byte ptr [esi] movzx ebx, byte ptr [esi+1] shr eax, 3 shl ebx, 3 movzx edx, byte ptr [esi+2] movzx ecx, byte ptr [esi+4] shl edx, 8 shl ecx, 13 or eax, edx or ebx, ecx movzx edx, byte ptr [esi+5] movzx ecx, byte ptr [esi+6] shl edx, 19 shl ecx, 24 or eax, edx or ebx, ecx and eax, 0x07e0f81f and ebx, 0xf81f07e0 or eax, ebx add esi, 8 mov [edi], eax add edi, 4 sub i, 2 jns Middle_565_Loop
Done_565_Loop:
mov pjSrc, esi mov pjDst, edi }
#else
while (1) { i -= 2; if (i < 0) break;
ul0 = (*(pjSrc) >> 3) | (*(pjSrc + 2) << 8) | (*(pjSrc + 5) << 19); ul1 = (*(pjSrc + 1) << 3) | (*(pjSrc + 4) << 13) | (*(pjSrc + 6) << 24);
*((ULONG*) pjDst) = (ul0 & 0x07e0f81f) | (ul1 & 0xf81f07e0);
pjDst += 4; pjSrc += 8; }
#endif
if (i & 1) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 3) & 0x07e0) | ((*(pjSrc + 2) << 8) & 0xf800);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 4; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS32D16(psb); return; }
// Next, try to optimize BGR to 5-5-5:
if (palDst.bIs555()) { while (1) { i = cx;
if (((ULONG_PTR) pjDst) & 2) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 2) & 0x03e0) | ((*(pjSrc + 2) << 7) & 0x7c00);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 4; i--; }
#if defined(_X86_)
_asm {
mov esi, pjSrc mov edi, pjDst sub i, 2 js Done_555_Loop
Middle_555_Loop:
movzx eax, byte ptr [esi] movzx ebx, byte ptr [esi+1] shr eax, 3 shl ebx, 2 movzx edx, byte ptr [esi+2] movzx ecx, byte ptr [esi+4] shl edx, 7 shl ecx, 13 or eax, edx or ebx, ecx movzx edx, byte ptr [esi+5] movzx ecx, byte ptr [esi+6] shl edx, 18 shl ecx, 23 or eax, edx or ebx, ecx and eax, 0x03e07c1f and ebx, 0x7c1f03e0 or eax, ebx add esi, 8 mov [edi], eax add edi, 4 sub i, 2 jns Middle_555_Loop
Done_555_Loop:
mov pjSrc, esi mov pjDst, edi }
#else
while (1) { i -= 2; if (i < 0) break;
ul0 = (*(pjSrc) >> 3) | (*(pjSrc + 2) << 7) | (*(pjSrc + 5) << 18); ul1 = (*(pjSrc + 1) << 2) | (*(pjSrc + 4) << 13) | (*(pjSrc + 6) << 23);
*((ULONG*) pjDst) = (ul0 & 0x03e07c1f) | (ul1 & 0x7c1f03e0);
pjDst += 4; pjSrc += 8; }
#endif
if (i & 1) { ul = ((*(pjSrc) >> 3)) | ((*(pjSrc + 1) << 2) & 0x03e0) | ((*(pjSrc + 2) << 7) & 0x7c00);
*((USHORT*) pjDst) = (USHORT) ul; pjDst += 2; pjSrc += 4; }
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS32D16(psb); return; } }
// Finally, fall back to the generic case:
pfnXlate = pxlo->pfnXlateBetweenBitfields();
while (1) { i = cx;
do { *((USHORT*) pjDst) = (USHORT) pfnXlate(pxlo, *((ULONG*) pjSrc)); pjDst += 2; pjSrc += 4;
} while (--i != 0);
if (--cy == 0) break;
pjSrc += lSrcSkip; pjDst += lDstSkip; }
VERIFYS32D16(psb); }
|