You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1218 lines
39 KiB
1218 lines
39 KiB
/*++
|
|
|
|
Copyright (c) 1996 - 1999 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
raster.c
|
|
|
|
Abstract:
|
|
|
|
The module contains the functions associated with transposing bitmaps.
|
|
This includes rotation of 1, 4, 8, and 24 bit formats as well as special
|
|
transformations of color formats for planar of vertical head devices.
|
|
|
|
Environment:
|
|
|
|
Windows NT Unidrv driver
|
|
|
|
Revision History:
|
|
|
|
12/15/96 -alvins-
|
|
Created
|
|
|
|
--*/
|
|
#include "raster.h"
|
|
#include "rmrender.h"
|
|
|
|
|
|
/*
|
|
* The transpose table: maps one byte into two longs, such that the
|
|
* 8 bits of the byte turn into 64 bits: each bit of the original is
|
|
* turned into one byte of output.
|
|
* THUS:
|
|
* Input byte: hgfedcba
|
|
* transposes into output bytes:
|
|
* 0000000a 0000000b 0000000c 0000000d
|
|
* 0000000e 0000000f 0000000g 0000000h
|
|
*
|
|
* The table is allocated at DrvEnableSurface time, thus ensuring that
|
|
* we do not allocate memory that we are not going to use.
|
|
*/
|
|
|
|
#define TABLE_SIZE (256 * 2 * sizeof( DWORD ))
|
|
|
|
/*
|
|
* We also need a similar table for colour separation. This one
|
|
* consists of 256 DWORDs, and is used to split the RGB(K) format
|
|
* input byte into an output DWORD with the two R bits in one byte,
|
|
* the two G bits in the next byte etc. Used for single pin colour
|
|
* printers, like the HP PaintJet.
|
|
* The table is generated according to the following rule:
|
|
*
|
|
* INPUT BYTE: KRGBkrgb
|
|
*
|
|
* OUTPUT DWORD: 000000Kk 000000Rr 000000Gg 000000Bb
|
|
*/
|
|
|
|
#define SEP_TABLE_SIZE (256 * sizeof( DWORD ))
|
|
|
|
|
|
|
|
//*******************************************************
|
|
BOOL
|
|
bInitTrans (
|
|
PDEV *pPDev
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function initializes the transpose tables. This is done to make
|
|
the table independent of whether the processor is big endian or little
|
|
endian since the data is generated by the processor that is going
|
|
to use it!
|
|
|
|
Arguments:
|
|
|
|
pPDev Pointer to PDEV structure
|
|
|
|
Return Value:
|
|
|
|
TRUE for success and FALSE for failure (MemAlloc failure)
|
|
|
|
--*/
|
|
{
|
|
/*
|
|
* Function to generate the transposition table. There is nothing
|
|
* difficult about generating the table. The only trick is the use
|
|
* of the union. This allows us to setup a DWORD table with the
|
|
* byte ordering of the hardware on which we are running. This is
|
|
* achieved by writing the data into the BYTE entry, then using
|
|
* the same memory as a DWORD to be put away into memory. The reason
|
|
* for using DWORDS is to get maximum benefit from memory references
|
|
* in the inner loop of the transpose functions.
|
|
* Note that the 8/24 bits per pel case is special, as we are shuffling
|
|
* bytes around, and thus do not need any tables. For this case,
|
|
* return TRUE without allocating any storage.
|
|
*/
|
|
|
|
register DWORD *pdw;
|
|
register int iShift, j;
|
|
|
|
int i;
|
|
PRASTERPDEV pRPDev = pPDev->pRasterPDEV;
|
|
|
|
union
|
|
{
|
|
BYTE b[ 8 ]; /* Exactly 64 bits */
|
|
DWORD dw[ 2 ]; /* Also exactly 64 bits */
|
|
} u;
|
|
|
|
|
|
if( pRPDev->sDevBPP == 8 || pRPDev->sDevBPP == 24)
|
|
{
|
|
pRPDev->pdwTrans = NULL;
|
|
|
|
return TRUE; /* Byte operations - no table needed */
|
|
}
|
|
|
|
if( !(pRPDev->pdwTrans = (DWORD *)MemAlloc( TABLE_SIZE )) )
|
|
return FALSE;
|
|
|
|
|
|
pdw = pRPDev->pdwTrans; /* Speedier access */
|
|
|
|
|
|
/*
|
|
* Colour requires different tables, as the pixel data consists of
|
|
* 4 bits which need to move in a single group.
|
|
*/
|
|
|
|
if( pRPDev->fDump & RES_DM_COLOR )
|
|
{
|
|
/*
|
|
* First generate the landscape to portrait transpose data.
|
|
* The only complication is maintaining 4 bit nibbles as a single
|
|
* entity.
|
|
*/
|
|
u.dw[0] = 0;
|
|
for (iShift = 0; iShift < 256; iShift++)
|
|
{
|
|
u.b[1] = (BYTE)((iShift >> 4) & 0x0f);
|
|
u.b[3] = (BYTE)(iShift & 0x0f);
|
|
*pdw = u.dw[0];
|
|
*(pdw+1) = u.dw[0] << 4;
|
|
pdw += 2;
|
|
}
|
|
/*
|
|
* There is an additional transpose operation that requires
|
|
* 4 bit pixel data be transformed to another format.
|
|
*/
|
|
pRPDev->pdwColrSep = (DWORD *)MemAlloc( (pRPDev->fDump & RES_DM_GDI) ?
|
|
SEP_TABLE_SIZE : TABLE_SIZE );
|
|
if( pRPDev->pdwColrSep == NULL )
|
|
{
|
|
MemFree((LPSTR)pRPDev->pdwTrans );
|
|
pRPDev->pdwTrans = 0;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
pdw = pRPDev->pdwColrSep; /* Speedier access */
|
|
|
|
if( pRPDev->fDump & RES_DM_GDI )
|
|
{
|
|
|
|
/*
|
|
* HP Paintjet type devices require separating the RGB pixels
|
|
* (2 per colour per byte) into bytes where the two bits for
|
|
* each color are consecutive.
|
|
*/
|
|
for( i = 0; i <= 0xff; i++ )
|
|
{
|
|
u.dw[ 0 ] = 0;
|
|
|
|
iShift = i;
|
|
if (!(pRPDev->fColorFormat & DC_OEM_BLACK))
|
|
{
|
|
//
|
|
// if required combine the RGB to CMY(K) conversion
|
|
//
|
|
if ( !(pRPDev->fColorFormat & DC_PRIMARY_RGB))
|
|
{
|
|
iShift = (~iShift) & 0x77;
|
|
if (pRPDev->fColorFormat & DC_EXTRACT_BLK)
|
|
{
|
|
if( (iShift & 0x07) == 0x07 )
|
|
iShift = (iShift & ~0x07) | 0x08;
|
|
|
|
if( (iShift & 0x70) == 0x70 )
|
|
iShift = (iShift & ~0x70) | 0x80;
|
|
}
|
|
}
|
|
else
|
|
iShift &= 0x77;
|
|
}
|
|
|
|
/* The two bits Bb */
|
|
u.b[ 3 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
iShift >>= 1;
|
|
|
|
|
|
/* The two bits Gg */
|
|
u.b[ 2 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
iShift >>= 1;
|
|
|
|
|
|
/* The two bits Rr */
|
|
u.b[ 1 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
iShift >>= 1;
|
|
|
|
|
|
/* The two bits Kk */
|
|
u.b[ 0 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
|
|
*pdw++ = u.dw[ 0 ]; /* Safe for posterity */
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The dot matrix case. Here we will call the relevant
|
|
* transpose function, but use the modified table below. This
|
|
* table will do the colour separation, and will result in the
|
|
* transpose operation splitting up the data for each head pass.
|
|
*/
|
|
|
|
for( i = 0; i <= 0xff; i++ )
|
|
{
|
|
/* Each bit of i goes into one byte of the output */
|
|
u.dw[ 0 ] = 0;
|
|
u.dw[ 1 ] = 0;
|
|
|
|
iShift = i;
|
|
if (!(pRPDev->fColorFormat & DC_OEM_BLACK))
|
|
{
|
|
//
|
|
// if required combine the RGB to CMY(K) conversion
|
|
//
|
|
if ( !(pRPDev->fColorFormat & DC_PRIMARY_RGB))
|
|
{
|
|
iShift = (~iShift) & 0x77;
|
|
if (pRPDev->fColorFormat & DC_EXTRACT_BLK)
|
|
{
|
|
if( (iShift & 0x07) == 0x07 )
|
|
iShift = (iShift & ~0x07) | 0x08;
|
|
|
|
if( (iShift & 0x70) == 0x70 )
|
|
iShift = (iShift & ~0x70) | 0x80;
|
|
}
|
|
}
|
|
else
|
|
iShift &= 0x77;
|
|
}
|
|
|
|
for( j = 8; --j >= 0; )
|
|
{
|
|
u.b[ j ] = (BYTE)(iShift & 0x1);
|
|
iShift >>= 1;
|
|
}
|
|
|
|
/* Store the result */
|
|
*pdw = u.dw[0];
|
|
*(pdw+1) = u.dw[1];
|
|
pdw += 2;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Monochrome case - simple transpositions.
|
|
*/
|
|
|
|
for( i = 0; i <= 0xff; i++ )
|
|
{
|
|
/* Each bit of i goes into one byte of the output */
|
|
iShift = i;
|
|
u.dw[ 0 ] = 0;
|
|
u.dw[ 1 ] = 0;
|
|
|
|
for( j = 8; --j >= 0; )
|
|
{
|
|
u.b[ j ] = (BYTE)(iShift & 0x1);
|
|
iShift >>= 1;
|
|
}
|
|
|
|
/* Store the result */
|
|
*pdw = u.dw[0];
|
|
*(pdw+1) = u.dw[1];
|
|
pdw += 2;
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
//*******************************************************
|
|
void
|
|
vTrans8x8 (
|
|
BYTE *pbIn,
|
|
RENDER *pRData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Function to transpose the input array into the output array,
|
|
where the input data is to be considered 8 rows of bitmap data,
|
|
and the output area is dword aligned.
|
|
|
|
Arguments:
|
|
|
|
pbIn Pointer to input data buffer to transform
|
|
pRData Pointer to render structure containing all the
|
|
necessary information about transforming
|
|
|
|
Return Value:
|
|
|
|
none
|
|
|
|
--*/
|
|
{
|
|
/*
|
|
* The technique is quite simple, though not necessarily obvious.
|
|
* Take an 8 scan line by 8 bits block of data, and transform it
|
|
* into 8 bytes with bits in the scan line order, rather than
|
|
* along the scan line as supplied.
|
|
* To do this as quickly as possible, each byte to be converted
|
|
* is used as an index into a lookup table; each table entry is
|
|
* 64 bits long (a pair of longs above). These 64 bits are ORed
|
|
* with the running total of 64 bits (the two variables, dw0, dw1);
|
|
* shift the running total one bit left. Repeat this operation
|
|
* for the corresponding byte in the next scan line - this is
|
|
* the new table lookup index. Repeat for all 8 bytes in the 8
|
|
* scan lines being processed. Store the 64 bit temporary results
|
|
* in the output dword array. Move to the next byte in the
|
|
* scan line, and repeat the loop for this column.
|
|
*/
|
|
|
|
register DWORD dw0, dw1; /* Inner loop temporaries */
|
|
register BYTE *pbTemp;
|
|
register DWORD *pdw;
|
|
|
|
register int cbLine; /* Bytes per line in scan data */
|
|
register int i; /* Loop variable. */
|
|
|
|
|
|
int iWide; /* Pixels across the bitmap */
|
|
DWORD *pdwOut; /* Destination */
|
|
DWORD *pdwTrans; /* Local copy of output buffer */
|
|
|
|
|
|
/*
|
|
* Some initialisation: byte count, area limits, etc.
|
|
*/
|
|
|
|
|
|
cbLine = pRData->cbTLine;
|
|
pdwOut = pRData->pvTransBuf;
|
|
pdwTrans = pRData->Trans.pdwTransTab;
|
|
|
|
if( pRData->iTransHigh != 8 )
|
|
{
|
|
/* This can happen at the end of a page. */
|
|
|
|
vTrans8N( pbIn, pRData );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Scan across the lines in groups of 8 bits. In the case that the
|
|
* input is not a multiple of 8, we will produce a few extra
|
|
* bytes at the end; the caller should allow for this when allocating
|
|
* storage for pdwOut. The consequence is that the last few
|
|
* bytes will contain garbage; presumably the caller will not
|
|
* process them further.
|
|
*/
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
|
|
/*
|
|
* Loop DOWN the scanlines at the starting byte location,
|
|
* generating the transposed data as we go.
|
|
*/
|
|
|
|
for( i = BBITS, pbTemp = pbIn++; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
}
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
*pdwOut = dw0;
|
|
*(pdwOut + 1) = dw1;
|
|
pdwOut += 2;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
//*******************************************************
|
|
void
|
|
vTrans8N (
|
|
BYTE *pbIn,
|
|
RENDER *pRData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Function to transpose the input array into the output array,
|
|
where the input data is to be considered N rows of bitmap data,
|
|
and the output area is byte aligned.
|
|
|
|
Arguments:
|
|
|
|
pbIn Pointer to input data buffer to transform
|
|
pRData Pointer to render structure containing all the
|
|
necessary information about transforming
|
|
|
|
Return Value:
|
|
|
|
none
|
|
|
|
--*/
|
|
{
|
|
/*
|
|
* The technique is quite simple, though not necessarily obvious.
|
|
* Take an 8 scan line by 8 bits block of data, and transform it
|
|
* into 8 bytes with bits in the scan line order, rather than
|
|
* along the scan line as supplied.
|
|
* To do this as quickly as possible, each byte to be converted
|
|
* is used as an index into a lookup table; each table entry is
|
|
* 64 bits long (a pair of longs above). These 64 bits are ORed
|
|
* with the running total of 64 bits (the two variables, dw0, dw1);
|
|
* shift the running total one bit left. Repeat this operation
|
|
* for the corresponding byte in the next scan line - this is
|
|
* the new table lookup index. Repeat for all 8 bytes in the 8
|
|
* scan lines being processed. Store the 64 bit temporary results
|
|
* in the output dword array. Move to the next byte in the
|
|
* scan line, and repeat the loop for this column.
|
|
* This function is based on the special 8 X 8 case (vTrans8x8).
|
|
* The significant differences are that the transposed data needs
|
|
* to be written byte at a time (instead of DWORD at a time),
|
|
* and that there are N scan lines to convert in each loop.
|
|
*/
|
|
|
|
DWORD dw0, dw1; /* Inner loop temporaries */
|
|
BYTE *pbTemp;
|
|
DWORD *pdw;
|
|
int cbLine; /* Bytes per line in scan data */
|
|
int i; /* Loop variable. */
|
|
int iBand; /* For moving down the scan lines */
|
|
|
|
int iSkip; /* Output interleave factor */
|
|
int iWide; /* Pixels across the bitmap */
|
|
|
|
BYTE *pbOut; /* Destination, local copy */
|
|
BYTE *pbBase; /* Start addr of 8 scan line group */
|
|
BYTE *pbOutTmp; /* For output loop */
|
|
|
|
DWORD *pdwTrans; /* Speedier access */
|
|
BOOL bOptimize = FALSE;
|
|
|
|
|
|
|
|
/*
|
|
* Set up the local variables from the RENDER structure passed in.
|
|
*/
|
|
|
|
cbLine = pRData->cbTLine;
|
|
iSkip = pRData->iTransSkip;
|
|
pbOut = pRData->pvTransBuf; /* Reserved for us! */
|
|
pdwTrans = pRData->Trans.pdwTransTab;
|
|
|
|
// if the translation table isn't inverting bits and the rows are DWORD aligned
|
|
// we can optimize the algorithm by initializing everything to white
|
|
// and then skipping the rotation of 32x8 white areas.
|
|
//
|
|
if (pdwTrans[0] == 0 && !(cbLine & 3) && pRData->iPassHigh == 1)
|
|
{
|
|
bOptimize = TRUE;
|
|
FillMemory (pbOut, pRData->iTransWide * iSkip, 0xff);
|
|
}
|
|
/*
|
|
* To ease MMU thrashing, we scan ACROSS the bitmap in 8 line
|
|
* groups. This results in closer memory references, and so less
|
|
* page faults and so faster execution. Hence, the outer most loop
|
|
* loops DOWN the scanlines. The next inner loop scans across groups
|
|
* of 8 scan lines at a time, while the inner most loop transposes
|
|
* one byte by 8 scan lines of bitmap image.
|
|
* Note that processing the data this way causes a slight increase
|
|
* in scattered memory addresses when writing the output data.
|
|
* There is no way to avoid one or the other memory references being
|
|
* scattered; however, the output area is smaller than the input
|
|
* input, so scattering here will be less severe to the MMU.
|
|
*/
|
|
for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS )
|
|
{
|
|
/*
|
|
* Have selected the next group of 8 scan lines to process,
|
|
* so scan from left to right, transposing data in 8 x 8 bit
|
|
* groups. This is the size that can be done very quickly with
|
|
* a 32 bit environment.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += BBITS * cbLine; /* Next address */
|
|
|
|
pbOutTmp = pbOut;
|
|
++pbOut; /* Onto the next byte sequence */
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
//
|
|
// White space optimization. If input data is white then
|
|
// we don't have to output rotated data since the
|
|
// output buffer is already initialized to white.
|
|
//
|
|
if (bOptimize)
|
|
{
|
|
// test whether we are dword aligned so we can check
|
|
// 32x8 area for white
|
|
if (!((ULONG_PTR)pbBase & 3) && iWide >= DWBITS)
|
|
{
|
|
if (*(DWORD *)&pbBase[0] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine*2] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine*3] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine*4] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine*5] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine*6] == -1 &&
|
|
*(DWORD *)&pbBase[cbLine*7] == -1)
|
|
{
|
|
pbBase += 4;
|
|
iWide -= BBITS * 3;
|
|
pbOutTmp += iSkip * DWBITS;
|
|
continue;
|
|
}
|
|
}
|
|
// check 8x8 area for white
|
|
else
|
|
{
|
|
if (pbBase[0] == (BYTE)-1 &&
|
|
pbBase[cbLine] == (BYTE)-1 &&
|
|
pbBase[cbLine*2] == (BYTE)-1 &&
|
|
pbBase[cbLine*3] == (BYTE)-1 &&
|
|
pbBase[cbLine*4] == (BYTE)-1 &&
|
|
pbBase[cbLine*5] == (BYTE)-1 &&
|
|
pbBase[cbLine*6] == (BYTE)-1 &&
|
|
pbBase[cbLine*7] == (BYTE)-1)
|
|
{
|
|
pbBase++;
|
|
pbOutTmp += iSkip * BBITS;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 bit array. We can do that quickly.
|
|
*/
|
|
pbTemp = pbBase++;
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
for( i = BBITS; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
/* The INNER loop - the bit swapping operations */
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
}
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
*pbOutTmp = (BYTE)dw0;
|
|
|
|
pbOutTmp += iSkip;
|
|
dw0 >>= BBITS; /* One byte's worth */
|
|
*pbOutTmp = (BYTE)dw0;
|
|
|
|
pbOutTmp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbOutTmp = (BYTE)dw0;
|
|
|
|
pbOutTmp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbOutTmp = (BYTE)dw0;
|
|
|
|
pbOutTmp += iSkip;
|
|
*pbOutTmp = (BYTE)dw1;
|
|
|
|
pbOutTmp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbOutTmp = (BYTE)dw1;
|
|
|
|
pbOutTmp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbOutTmp = (BYTE)dw1;
|
|
|
|
pbOutTmp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbOutTmp = (BYTE)dw1;
|
|
|
|
pbOutTmp += iSkip; /* Next chunk of output data */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* There may be some scan lines remaining. If so, iBand will
|
|
* be > 0, and that indicates the number of output scan lines
|
|
* remaining.
|
|
*/
|
|
|
|
if( iBand > 0 )
|
|
{
|
|
|
|
/*
|
|
* This is basically the same as the stripped down version
|
|
* in the outer loop above. Note that the output data is still
|
|
* byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE
|
|
* ZERO FILLED. This may not be what is desired - it is for
|
|
* transposing bits to output to a dot matrix printer where
|
|
* the page length is not a multiple of the number of pins.
|
|
* I don't know if that can ever happen.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbOutTmp = pbOut;
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 bit array. We can do that quickly.
|
|
*/
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
pbTemp = pbBase++;
|
|
|
|
/*
|
|
* The inner loop now only transposes as many scan lines
|
|
* as the bitmap actually contains - we must not run off
|
|
* the end of memory.
|
|
*/
|
|
|
|
for( i = iBand; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
/* The INNER loop - the bit swapping operations */
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
|
|
}
|
|
|
|
// white fill remaining bits
|
|
//
|
|
pdw = pdwTrans + (pRData->ubFillWhite << 1);
|
|
i = BBITS - iBand;
|
|
while (--i >= 0)
|
|
{
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
}
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
pbTemp = pbOutTmp;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS; /* One byte's worth */
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbOutTmp += BBITS * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Define the number of pels transposed per loop iteration. In the case
|
|
* of a colour bitmap, this is 2, since there are 4 bits per pel, thus
|
|
* 2 per byte.
|
|
*/
|
|
|
|
#define PELS_PER_LOOP (BBITS / 4)
|
|
|
|
|
|
//*******************************************************
|
|
void
|
|
vTrans8N4BPP (
|
|
BYTE *pbIn,
|
|
RENDER *pRData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Function to transpose the input array into the output array,
|
|
where the input data is to be considered N rows of bitmap data,
|
|
and the output area is byte aligned.
|
|
This version works on 4 bits per pel bitmaps (colour for us).
|
|
|
|
Arguments:
|
|
|
|
pbIn Pointer to input data buffer to transform
|
|
pRData Pointer to render structure containing all the
|
|
necessary information about transforming
|
|
|
|
Return Value:
|
|
|
|
none
|
|
|
|
--*/
|
|
{
|
|
/*
|
|
* The technique is quite simple, though not necessarily obvious.
|
|
* Take an 8 scan line by 8 bits block of data, and transform it
|
|
* into 8 bytes with bits in the scan line order, rather than
|
|
* along the scan line as supplied.
|
|
* To do this as quickly as possible, each byte to be converted
|
|
* is used as an index into a lookup table; each table entry is
|
|
* 64 bits long (a pair of longs above). These 64 bits are ORed
|
|
* with the running total of 64 bits (the two variables, dw0, dw1);
|
|
* shift the running total one bit left. Repeat this operation
|
|
* for the corresponding byte in the next scan line - this is
|
|
* the new table lookup index. Repeat for all 8 bytes in the 8
|
|
* scan lines being processed. Store the 64 bit temporary results
|
|
* in the output dword array. Move to the next byte in the
|
|
* scan line, and repeat the loop for this column.
|
|
* This function is based on the special 8 X 8 case (vTrans8x8).
|
|
* The significant differences are that the transposed data needs
|
|
* to be written byte at a time (instead of DWORD at a time),
|
|
* and that there are N scan lines to convert in each loop.
|
|
*/
|
|
|
|
register DWORD dw0, dw1; /* Inner loop temporaries */
|
|
register BYTE *pbTemp;
|
|
register DWORD *pdw;
|
|
|
|
register int cbLine; /* Bytes per line in scan data */
|
|
register int i; /* Loop variable. */
|
|
register int iBand; /* For moving down the scan lines */
|
|
|
|
int iSkip; /* Output interleave factor */
|
|
int iWide; /* Pixels across the bitmap */
|
|
|
|
DWORD *pdwOut; /* Destination, local copy */
|
|
BYTE *pbBase; /* Start addr of 8 scan line group */
|
|
DWORD *pdwOutTmp; /* For output loop */
|
|
|
|
DWORD *pdwTrans; /* Speedier access */
|
|
|
|
|
|
/*
|
|
* Set up the local variables from the RENDER structure passed in.
|
|
* See the above function for explanation of iSkip.
|
|
*/
|
|
|
|
cbLine = pRData->cbTLine;
|
|
iSkip = pRData->iTransSkip / DWBYTES;
|
|
pdwOut = pRData->pvTransBuf; /* Reserved for us! */
|
|
pdwTrans = pRData->Trans.pdwTransTab;
|
|
|
|
/*
|
|
* To ease MMU thrashing, we scan ACROSS the bitmap in 8 line
|
|
* groups. This results in closer memory references, and so less
|
|
* page faults and faster execution. Hence, the outer most loop
|
|
* loops DOWN the scanlines. Then next inner loop scans across groups
|
|
* of 8 scan lines at a time, while the inner most loop transposes
|
|
* one byte by 8 scan lines of bitmap image.
|
|
* Note that processing the data this way causes a slight increase
|
|
* in scattered memory addresses when writing the output data.
|
|
* There is no way to avoid one or the other memory references being
|
|
* scattered; however, the output area is smaller than the input
|
|
* input, so scattering here will be less severe on the MMU.
|
|
*/
|
|
|
|
|
|
for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS )
|
|
{
|
|
|
|
/*
|
|
* Have selected the next group of 8 scan lines to process,
|
|
* so scan from left to right, transposing data in 8 x 8 bit
|
|
* groups. This is the size that can be done very quickly with
|
|
* a 32 bit environment.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += BBITS * cbLine; /* Next address */
|
|
|
|
pdwOutTmp = pdwOut;
|
|
++pdwOut; /* Onto the next byte sequence */
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 pixel array. We can do that quickly.
|
|
*/
|
|
|
|
pbTemp = pbBase++;
|
|
|
|
dw0 = *(pdwTrans + 1 + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
dw0 |= *(pdwTrans + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
|
|
dw0 >>= 8;
|
|
|
|
dw0 |= *(pdwTrans + 1 + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
dw0 |= *(pdwTrans + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
|
|
dw1 = *(pdwTrans + 1 + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
dw1 |= *(pdwTrans + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
|
|
dw1 >>= 8;
|
|
|
|
dw1 |= *(pdwTrans + 1 + (*pbTemp << 1));
|
|
pbTemp += cbLine;
|
|
dw1 |= *(pdwTrans + (*pbTemp << 1));
|
|
|
|
*(WORD *)pdwOutTmp = (WORD)dw0;
|
|
*(((WORD *)pdwOutTmp)+1) = (WORD)dw1;
|
|
*(pdwOutTmp+iSkip) = (dw1 & 0xffff0000) | (dw0 >> 16);
|
|
pdwOutTmp += PELS_PER_LOOP * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
* There may be some scan lines remaining. If so, iBand will
|
|
* be > 0, and that indicates the number of output scan lines
|
|
* remaining.
|
|
*/
|
|
|
|
if( iBand > 0 )
|
|
{
|
|
|
|
/*
|
|
* This is basically the same as the stripped down version
|
|
* in the outer loop above. Note that the output data is still
|
|
* byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE
|
|
* ZERO FILLED. This may not be what is desired - it is for
|
|
* transposing bits to output to a dot matrix printer where
|
|
* the page length is not a multiple of the number of pins.
|
|
* I don't know if that can ever happen.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pdwOutTmp = pdwOut;
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 bit array. We can do that quickly.
|
|
*/
|
|
pbTemp = pbBase++;
|
|
|
|
dw0 = *(pdwTrans + 1 + (*pbTemp << 1));
|
|
dw1 = 0;
|
|
if (iBand > 1)
|
|
{
|
|
pbTemp += cbLine;
|
|
dw0 |= *(pdwTrans + (*pbTemp << 1));
|
|
dw0 >>= 8;
|
|
if (iBand > 2)
|
|
{
|
|
pbTemp += cbLine;
|
|
dw0 |= *(pdwTrans + 1 + (*pbTemp << 1));
|
|
if (iBand > 3)
|
|
{
|
|
pbTemp += cbLine;
|
|
dw0 |= *(pdwTrans + (*pbTemp << 1));
|
|
if (iBand > 4)
|
|
{
|
|
pbTemp += cbLine;
|
|
dw1 = *(pdwTrans + 1 + (*pbTemp << 1));
|
|
if (iBand > 5)
|
|
{
|
|
pbTemp += cbLine;
|
|
dw1 |= *(pdwTrans + (*pbTemp << 1));
|
|
dw1 >>= 8;
|
|
if (iBand > 6)
|
|
{
|
|
pbTemp += cbLine;
|
|
dw1 |= *(pdwTrans + 1 + (*pbTemp << 1));
|
|
}
|
|
}
|
|
else
|
|
dw1 >>= 8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
dw0 >>= 8;
|
|
|
|
*(WORD *)pdwOutTmp = (WORD)dw0;
|
|
*(((WORD *)pdwOutTmp)+1) = (WORD)dw1;
|
|
*(pdwOutTmp+iSkip) = (dw1 & 0xffff0000) | (dw0 >> 16);
|
|
|
|
pdwOutTmp += 2 * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
//*******************************************************
|
|
void
|
|
vTransColSep (
|
|
register BYTE *pbIn,
|
|
RENDER *pRData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Function to transpose the colour bits in a 4 Bits Per Pel colour
|
|
bitmap into an array of bytes, where the bytes are ordered in
|
|
the same way as the original bits. An example of this is provided
|
|
in the explanation for the SEP_TABLE_SIZE value at the top of this file.
|
|
|
|
Arguments:
|
|
|
|
pbIn Pointer to input data buffer to transform
|
|
pRData Pointer to render structure containing all the
|
|
necessary information about transforming
|
|
|
|
Return Value:
|
|
|
|
none
|
|
|
|
--*/
|
|
{
|
|
/*
|
|
* Operation is quite simple - pass along the input array byte
|
|
* at a time, and use each 4 byte group to generate a DWORD of
|
|
* output - placed in pdwOut. The previously generated translation
|
|
* table is especially formulated to do this job!
|
|
*
|
|
* NOTE: pdwOut and pbIn MAY POINT TO THE SAME ADDRESS! THERE IS
|
|
* NO OVERLAP IN OPERATIONS TO CAUSE CONFUSION.
|
|
*/
|
|
|
|
register DWORD dwTemp;
|
|
register DWORD *pdwSep;
|
|
|
|
int iI;
|
|
int iBlock;
|
|
DWORD *pdwOut; /* Destination - DWORD aligned */
|
|
DWORD dwWhite;
|
|
|
|
|
|
iBlock = pRData->cDWLine * pRData->iNumScans;
|
|
|
|
pdwSep = pRData->pdwColrSep; /* Colour separation table */
|
|
pdwOut = pRData->pvTransBuf; /* Where the data goes */
|
|
|
|
|
|
/* Loop through the line in 4 byte groups */
|
|
|
|
//
|
|
// calculate the white conversion value
|
|
//
|
|
dwWhite = *(pdwSep + 0x77);
|
|
dwWhite |= (dwWhite << 2) | (dwWhite << 4) | (dwWhite << 6);
|
|
|
|
//
|
|
// convert the data to planar including RGB to CMY(K)
|
|
//
|
|
for (iI = iBlock; --iI >= 0;)
|
|
{
|
|
if (*(DWORD *)pbIn == 0x77777777L)
|
|
{
|
|
*pdwOut++ = dwWhite;
|
|
}
|
|
else
|
|
{
|
|
dwTemp = *(pdwSep + *pbIn);
|
|
|
|
dwTemp <<= 2;
|
|
dwTemp |= *(pdwSep + pbIn[1]);
|
|
|
|
dwTemp <<= 2;
|
|
dwTemp |= *(pdwSep + pbIn[2]);
|
|
|
|
*pdwOut++ = (dwTemp << 2) | *(pdwSep + pbIn[3]);
|
|
}
|
|
pbIn += DWBYTES;
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
//*******************************************************
|
|
void
|
|
vTrans8BPP (
|
|
BYTE *pbIn,
|
|
RENDER *pRData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
The transpose function for 8 bits per pel bitmaps. This is rather
|
|
easy, as all we do is shuffle bytes!
|
|
|
|
Arguments:
|
|
|
|
pbIn Pointer to input data buffer to transform
|
|
pRData Pointer to render structure containing all the
|
|
necessary information about transforming
|
|
|
|
Return Value:
|
|
|
|
none
|
|
|
|
--*/
|
|
{
|
|
|
|
/*
|
|
* Scan along the input bitmap, writing the data to the output
|
|
* in column order. This results in reduced MMU thrashing, as
|
|
* the output addresses are all limited to a much smaller range
|
|
* than the incoming addresses.
|
|
*/
|
|
|
|
register BYTE *pbBase; /* Scan along input bitmap */
|
|
register BYTE *pbOut; /* The output scan column pointer */
|
|
|
|
int iBand; /* Count down scan lines */
|
|
int iSkip; /* Offset between output bytes */
|
|
int iWide; /* Loop across the input scan line */
|
|
int cbLine; /* Bytes per input scan line */
|
|
|
|
BYTE *pbOutBase; /* Start of column of output data */
|
|
|
|
|
|
/*
|
|
* Set up the local copies (for faster access) of data passed in.
|
|
*/
|
|
|
|
cbLine = pRData->cbTLine;
|
|
iSkip = pRData->iTransSkip;
|
|
pbOutBase = pRData->pvTransBuf; /* Base output buffer address */
|
|
|
|
|
|
for( iBand = pRData->iTransHigh; iBand > 0; --iBand )
|
|
{
|
|
/*
|
|
* This loop processes scan lines in the input bitmap. As
|
|
* we progress across the scan line, the output data is written
|
|
* in column order.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += cbLine; /* Next scan line, DWORD aligned */
|
|
|
|
pbOut = pbOutBase;
|
|
++pbOutBase; /* One column across output area */
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* This loop traverses the input scan line, taking bytes
|
|
* and writing them to the output area in column order.
|
|
*/
|
|
|
|
*pbOut = *pbBase++;
|
|
pbOut += iSkip;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
//*******************************************************
|
|
void
|
|
vTrans24BPP (
|
|
BYTE *pbIn,
|
|
RENDER *pRData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
The transpose function for 8 bits per pel bitmaps. This is rather
|
|
easy, as all we do is shuffle bytes!
|
|
|
|
Arguments:
|
|
|
|
pbIn Pointer to input data buffer to transform
|
|
pRData Pointer to render structure containing all the
|
|
necessary information about transforming
|
|
|
|
Return Value:
|
|
|
|
none
|
|
|
|
--*/
|
|
{
|
|
/*
|
|
* Scan along the input bitmap, writing the data to the output
|
|
* in column order. This results in reduced MMU thrashing, as
|
|
* the output addresses are all limited to a much smaller range
|
|
* than the incoming addresses.
|
|
*/
|
|
|
|
register BYTE *pbBase; /* Scan along input bitmap */
|
|
register BYTE *pbOut; /* The output scan column pointer */
|
|
|
|
int iBand; /* Count down scan lines */
|
|
int iSkip; /* Offset between output bytes */
|
|
int iWide; /* Loop across the input scan line */
|
|
int iCol;
|
|
int iRow;
|
|
int cbLine; /* Bytes per input scan line */
|
|
int iBytesLeftOver;
|
|
|
|
BYTE *pbOutBase; /* Start of column of output data */
|
|
|
|
|
|
/*
|
|
* Set up the local copies (for faster access) of data passed in.
|
|
*/
|
|
|
|
iSkip = pRData->iTransSkip;
|
|
cbLine = pRData->cbTLine;
|
|
pbOutBase = pRData->pvTransBuf; /* Base output buffer address */
|
|
iCol = pRData->iTransWide/pRData->iBPP;
|
|
iRow = pRData->iTransHigh;
|
|
iBytesLeftOver = (pRData->iTransHigh *pRData->iBPP) % DWBITS;
|
|
|
|
for( iBand = iRow; iBand > 0; --iBand )
|
|
{
|
|
/*
|
|
* This loop processes scan lines in the input bitmap. As
|
|
* we progress across the scan line, the output data is written
|
|
* in column order.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += cbLine; /* Next scan line, DWORD aligned */
|
|
|
|
pbOut = pbOutBase;
|
|
pbOutBase+=3; /* One column across output area */
|
|
|
|
for( iWide = iCol; iWide > 0; --iWide )
|
|
{
|
|
/*
|
|
* This loop traverses the input scan line, taking bytes
|
|
* and writing them to the output area in column order.
|
|
*/
|
|
*pbOut = *pbBase++;
|
|
*(pbOut+1) = *pbBase++;
|
|
*(pbOut+2) = *pbBase++;
|
|
pbOut += iSkip;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|