mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1175 lines
38 KiB
1175 lines
38 KiB
/**************************** Function Header ******************************
|
|
* transpos.c
|
|
* Functions associated with transposing bitmaps. Several flavours
|
|
* are available, to be used as appropriate in some special cases.
|
|
* The special cases are generally faster than the general method.
|
|
*
|
|
* HISTORY:
|
|
* 10:46 on Wed 27 Feb 1991 -by- Lindsay Harris [lindsayh]
|
|
* Added table creation at load time - big endian/little endian
|
|
*
|
|
* 14:24 on Wed 23 Jan 1991 -by- Lindsay Harris [lindsayh]
|
|
* Created it - initially for the 8 x 8 case.
|
|
*
|
|
* Copyright (C) 1991 - 1993 Micrososft Corporation
|
|
*
|
|
***************************************************************************/
|
|
|
|
#include <stddef.h>
|
|
#include <windows.h>
|
|
#include <libproto.h>
|
|
#include <winddi.h>
|
|
#include "pdev.h"
|
|
#include "win30def.h"
|
|
#include "udmindrv.h"
|
|
#include "udpfm.h"
|
|
#include "uddevice.h"
|
|
#include "udrender.h"
|
|
#include "rasdd.h"
|
|
|
|
/*
|
|
* The transpose table: maps one byte into two longs, such that the
|
|
* 8 bits of the byte turn into 64 bits: each bit of the original is
|
|
* turned into one byte of output.
|
|
* THUS:
|
|
* Input byte: hgfedcba
|
|
* transposes into output bytes:
|
|
* 0000000a 0000000b 0000000c 0000000d
|
|
* 0000000e 0000000f 0000000g 0000000h
|
|
*
|
|
* The table is allocated at DrvEnableSurface time, thus ensuring that
|
|
* we do not allocate memory that we are not going to use.
|
|
*/
|
|
|
|
#define TABLE_SIZE (256 * 2 * sizeof( DWORD ))
|
|
|
|
/*
|
|
* We also need a similar table for colour separation. This one
|
|
* consists of 256 DWORDs, and is used to split the RGB(K) format
|
|
* input byte into an output DWORD with the two R bits in one byte,
|
|
* the two G bits in the next byte etc. Used for single pin colour
|
|
* printers, like the HP PaintJet.
|
|
* The table is generated according to the following rule:
|
|
*
|
|
* INPUT BYTE: KRGBkrgb
|
|
*
|
|
* OUTPUT DWORD: 000000Kk 000000Rr 000000Gg 000000Bb
|
|
*/
|
|
|
|
#define SEP_TABLE_SIZE (256 * sizeof( DWORD ))
|
|
|
|
|
|
|
|
/************************** Function Header *******************************
|
|
* vInitTrans
|
|
* Initialise the transpose tables. This is done to make the tables
|
|
* independent of whether the processor is big endian or little endian,
|
|
* since the data is generated by the processor that is going to
|
|
* use it! There are still some minor questions of byte ordering,
|
|
* but nothing too major to resolve.
|
|
*
|
|
* RETURNS:
|
|
* TRUE/FALSE; FALSE for lack of storage for table.
|
|
*
|
|
* HISTORY:
|
|
* Friday December 3 1993 -by- Norman Hendley [normanh]
|
|
* Changed graphics mode check to RES_DM_GDI from nPins
|
|
*
|
|
* 10:52 on Wed 27 Feb 1991 -by- Lindsay Harris [lindsayh]
|
|
* Borrowed from program which generated original table.
|
|
*
|
|
**************************************************************************/
|
|
|
|
BOOL
|
|
bInitTrans( pPDev )
|
|
PDEV *pPDev;
|
|
{
|
|
|
|
/*
|
|
* Function to generate the transposition table. There is nothing
|
|
* difficult about generating the table. The only trick is the use
|
|
* of the union. This allows us to setup a DWORD table with the
|
|
* byte ordering of the hardware on which we are running. This is
|
|
* achieved by writing the data into the BYTE entry, then using
|
|
* the same memory as a DWORD to be put away into memory. The reason
|
|
* for using DWORDS is to get maximum benefit from memory references
|
|
* in the inner loop of the transpose functions.
|
|
* Storage space is allocated on the heap, and the address is
|
|
* stored in the PDEV.
|
|
* Note that the 8 bits per pel case is special, as we are shuffling
|
|
* bytes around, and thus do not need any tables. For this case,
|
|
* return TRUE without allocating any storage.
|
|
*
|
|
* Returns TRUE for success, FALSE meaning storage unavailable.
|
|
*/
|
|
|
|
register DWORD *pdw;
|
|
register int iShift, j;
|
|
|
|
int i;
|
|
|
|
#define pUDPDev ((UD_PDEV *)(pPDev->pUDPDev))
|
|
|
|
union
|
|
{
|
|
BYTE b[ 8 ]; /* Exactly 64 bits */
|
|
DWORD dw[ 2 ]; /* Also exactly 64 bits */
|
|
} u;
|
|
|
|
|
|
if( pUDPDev->sBitsPixel == 8 )
|
|
{
|
|
pPDev->pdwTrans = NULL;
|
|
|
|
return TRUE; /* Byte operations - no table needed */
|
|
}
|
|
|
|
if( !(pPDev->pdwTrans = (DWORD *)HeapAlloc( pPDev->hheap, 0, TABLE_SIZE )) )
|
|
return FALSE;
|
|
|
|
|
|
pdw = pPDev->pdwTrans; /* Speedier access */
|
|
|
|
|
|
/*
|
|
* Colour requires different tables, as the pixel data consists of
|
|
* 4 bits which need to move in a single group.
|
|
*/
|
|
|
|
if( pUDPDev->Resolution.fDump & RES_DM_COLOR )
|
|
{
|
|
/*
|
|
* First generate the landscape to portrait transpose data.
|
|
* The only complication is maintaining 4 bit nibbles as a single
|
|
* entity.
|
|
*/
|
|
|
|
for( iShift = 0; iShift < 256; iShift++ )
|
|
{
|
|
/*
|
|
* The low nibble goes into the highest byte address, the
|
|
* next nibble goes 4 bytes lower in memory.
|
|
*/
|
|
u.dw[ 0 ] = 0;
|
|
u.dw[ 1 ] = 0;
|
|
|
|
u.b[ 3 ] = (BYTE)((iShift >> 4) & 0x0f);
|
|
u.b[ 7 ] = (BYTE)(iShift & 0x0f);
|
|
|
|
/* Store the result */
|
|
|
|
*pdw++ = u.dw[ 0 ];
|
|
*pdw++ = u.dw[ 1 ];
|
|
}
|
|
|
|
/*
|
|
* There is an additional transpose operation required for single
|
|
* pin colour printers. The HP Paintjet typifies this class.
|
|
* This operation is required to separate the RGB pixels (2 of each
|
|
* colour per byte) into bytes that may be sent to the printer,
|
|
* such that all R bytes are sent in one go, followed by G etc.
|
|
* For multiple pin printers, this falls out of the standard
|
|
* transpose operations.
|
|
*/
|
|
|
|
|
|
|
|
if( pUDPDev->Resolution.fDump & RES_DM_GDI )
|
|
{
|
|
pPDev->pdwColrSep = (DWORD *)HeapAlloc( pPDev->hheap, 0, SEP_TABLE_SIZE );
|
|
if( pPDev->pdwColrSep == NULL )
|
|
{
|
|
HeapFree( pPDev->hheap, 0, (LPSTR)pPDev->pdwTrans );
|
|
pPDev->pdwTrans = 0;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
pdw = pPDev->pdwColrSep; /* Speedier access */
|
|
|
|
/*
|
|
* The explanation above for SEP_TABLE_SIZE explains what is
|
|
* taking place in the following loops.
|
|
*/
|
|
|
|
for( i = 0; i <= 0xff; i++ )
|
|
{
|
|
u.dw[ 0 ] = 0;
|
|
iShift = i & 0x77; /* Only use 3 bits per pel */
|
|
|
|
if( pUDPDev->fColorFormat & DC_EXTRACT_BLK )
|
|
{
|
|
if( pUDPDev->fColorFormat & DC_PRIMARY_RGB )
|
|
{
|
|
/*
|
|
* Whenever we have a 0 nibble, replace it with 8.
|
|
* This does the colour separation for us! The
|
|
* separation happens when the transpose happens.
|
|
*/
|
|
|
|
if( (iShift & 0x07) == 0 )
|
|
iShift |= 0x08;
|
|
|
|
if( (iShift & 0x70) == 0 )
|
|
iShift |= 0x80;
|
|
}
|
|
else
|
|
{
|
|
/* CMY - same idea, different conditions! */
|
|
if( (iShift & 0x07) == 0x07 )
|
|
iShift = (iShift & ~0x07) | 0x08;
|
|
|
|
if( (iShift & 0x70) == 0x70 )
|
|
iShift = (iShift & ~0x70) | 0x80;
|
|
}
|
|
}
|
|
|
|
/* The two bits Bb */
|
|
u.b[ 3 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
iShift >>= 1;
|
|
|
|
|
|
/* The two bits Gg */
|
|
u.b[ 2 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
iShift >>= 1;
|
|
|
|
|
|
/* The two bits Rr */
|
|
u.b[ 1 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
iShift >>= 1;
|
|
|
|
|
|
/* The two bits Kk */
|
|
u.b[ 0 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
|
|
|
|
*pdw++ = u.dw[ 0 ]; /* Safe for posterity */
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The dot matrix case. Here we will call the relevant
|
|
* transpose function, but use the modified table below. This
|
|
* table will do the colour separation, and will result in the
|
|
* transpose operation splitting up the data for each head pass.
|
|
*/
|
|
|
|
pPDev->pdwColrSep = (DWORD *)HeapAlloc( pPDev->hheap, 0, TABLE_SIZE );
|
|
if( pPDev->pdwColrSep == NULL )
|
|
{
|
|
HeapFree( pPDev->hheap, 0, (LPSTR)pPDev->pdwTrans );
|
|
pPDev->pdwTrans = 0;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
pdw = pPDev->pdwColrSep; /* Speedier access */
|
|
|
|
for( i = 0; i <= 0xff; i++ )
|
|
{
|
|
/* Each bit of i goes into one byte of the output */
|
|
u.dw[ 0 ] = 0;
|
|
u.dw[ 1 ] = 0;
|
|
|
|
iShift = i & 0x77; /* Only 3 bits per pel */
|
|
|
|
if( pUDPDev->fColorFormat & DC_EXTRACT_BLK )
|
|
{
|
|
if( pUDPDev->fColorFormat & DC_PRIMARY_RGB )
|
|
{
|
|
/*
|
|
* Whenever we have a 0 nibble, replace it with 8.
|
|
* This does the colour separation for us! The
|
|
* separation happens when the transpose happens.
|
|
*/
|
|
|
|
if( (iShift & 0x07) == 0 )
|
|
iShift |= 0x08;
|
|
|
|
if( (iShift & 0x70) == 0 )
|
|
iShift |= 0x80;
|
|
}
|
|
else
|
|
{
|
|
/* CMY - same idea, different conditions! */
|
|
if( (iShift & 0x07) == 0x07 )
|
|
iShift = (iShift & ~0x07) | 0x08;
|
|
|
|
if( (iShift & 0x70) == 0x70 )
|
|
iShift = (iShift & ~0x70) | 0x80;
|
|
}
|
|
}
|
|
|
|
for( j = 8; --j >= 0; )
|
|
{
|
|
u.b[ j ] = (BYTE)(iShift & 0x1);
|
|
iShift >>= 1;
|
|
}
|
|
|
|
/* Store the result */
|
|
|
|
*pdw++ = u.dw[ 0 ];
|
|
*pdw++ = u.dw[ 1 ];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Monochrome case - simple transpositions.
|
|
*/
|
|
|
|
for( i = 0; i <= 0xff; i++ )
|
|
{
|
|
/* Each bit of i goes into one byte of the output */
|
|
iShift = i;
|
|
u.dw[ 0 ] = 0;
|
|
u.dw[ 1 ] = 0;
|
|
|
|
for( j = 8; --j >= 0; )
|
|
{
|
|
u.b[ j ] = (BYTE)(iShift & 0x1);
|
|
iShift >>= 1;
|
|
}
|
|
|
|
/* Store the result */
|
|
|
|
*pdw++ = u.dw[ 0 ];
|
|
*pdw++ = u.dw[ 1 ];
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
#undef pUDPDev
|
|
|
|
|
|
/************************** Function Header *******************************
|
|
* vTrans8x8
|
|
* Function to transpose the input array into the output array,
|
|
* where the input data is to be considered 8 rows of bitmap data,
|
|
* and the output area is dword aligned.
|
|
*
|
|
* RETURNS:
|
|
* Nothing
|
|
*
|
|
* HISTORY:
|
|
* 14:27 on Wed 23 Jan 1991 -by- Lindsay Harris [lindsayh]
|
|
* First incarnation.
|
|
*
|
|
*************************************************************************/
|
|
|
|
void
|
|
vTrans8x8( pbIn, pRData )
|
|
BYTE *pbIn; /* Source */
|
|
RENDER *pRData; /* Rendering info */
|
|
{
|
|
/*
|
|
* The technique is quite simple, though not necessarily obvious.
|
|
* Take an 8 scan line by 8 bits block of data, and transform it
|
|
* into 8 bytes with bits in the scan line order, rather than
|
|
* along the scan line as supplied.
|
|
* To do this as quickly as possible, each byte to be converted
|
|
* is used as an index into a lookup table; each table entry is
|
|
* 64 bits long (a pair of longs above). These 64 bits are ORed
|
|
* with the running total of 64 bits (the two variables, dw0, dw1);
|
|
* shift the running total one bit left. Repeat this operation
|
|
* for the corresponding byte in the next scan line - this is
|
|
* the new table lookup index. Repeat for all 8 bytes in the 8
|
|
* scan lines being processed. Store the 64 bit temporary results
|
|
* in the output dword array. Move to the next byte in the
|
|
* scan line, and repeat the loop for this column.
|
|
*/
|
|
|
|
register DWORD dw0, dw1; /* Inner loop temporaries */
|
|
register BYTE *pbTemp;
|
|
register DWORD *pdw;
|
|
|
|
register int cbLine; /* Bytes per line in scan data */
|
|
register int i; /* Loop variable. */
|
|
|
|
|
|
int iWide; /* Pixels across the bitmap */
|
|
DWORD *pdwOut; /* Destination */
|
|
DWORD *pdwTrans; /* Local copy of output buffer */
|
|
|
|
|
|
/*
|
|
* Some initialisation: byte count, area limits, etc.
|
|
*/
|
|
|
|
|
|
cbLine = pRData->cbTLine;
|
|
pdwOut = pRData->pvTransBuf;
|
|
pdwTrans = pRData->Trans.pdwTransTab;
|
|
|
|
if( pRData->iTransHigh != 8 )
|
|
{
|
|
/* This can happen at the end of a page. */
|
|
|
|
vTrans8N( pbIn, pRData );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Scan across the lines in groups of 8 bits. In the case that the
|
|
* input is not a multiple of 8, we will produce a few extra
|
|
* bytes at the end; the caller should allow for this when allocating
|
|
* storage for pdwOut. The consequence is that the last few
|
|
* bytes will contain garbage; presumably the caller will not
|
|
* process them further.
|
|
*/
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
|
|
/*
|
|
* Loop DOWN the scanlines at the starting byte location,
|
|
* generating the transposed data as we go.
|
|
*/
|
|
|
|
for( i = BBITS, pbTemp = pbIn++; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
}
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
*pdwOut = dw0;
|
|
*(pdwOut + 1) = dw1;
|
|
pdwOut += 2;
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/************************** Function Header *******************************
|
|
* vTrans8N
|
|
* Function to transpose the input array into the output array,
|
|
* where the input data is to be considered N rows of bitmap data,
|
|
* and the output area is byte aligned.
|
|
*
|
|
* RETURNS:
|
|
* Nothing
|
|
*
|
|
* HISTORY:
|
|
* 16:34 on Mon 28 Jan 1991 -by- Lindsay Harris [lindsayh]
|
|
* First incarnation.
|
|
*
|
|
*************************************************************************/
|
|
|
|
void
|
|
vTrans8N( pbIn, pRData )
|
|
BYTE *pbIn; /* Source */
|
|
RENDER *pRData; /* Overall rendering info */
|
|
{
|
|
/*
|
|
* The technique is quite simple, though not necessarily obvious.
|
|
* Take an 8 scan line by 8 bits block of data, and transform it
|
|
* into 8 bytes with bits in the scan line order, rather than
|
|
* along the scan line as supplied.
|
|
* To do this as quickly as possible, each byte to be converted
|
|
* is used as an index into a lookup table; each table entry is
|
|
* 64 bits long (a pair of longs above). These 64 bits are ORed
|
|
* with the running total of 64 bits (the two variables, dw0, dw1);
|
|
* shift the running total one bit left. Repeat this operation
|
|
* for the corresponding byte in the next scan line - this is
|
|
* the new table lookup index. Repeat for all 8 bytes in the 8
|
|
* scan lines being processed. Store the 64 bit temporary results
|
|
* in the output dword array. Move to the next byte in the
|
|
* scan line, and repeat the loop for this column.
|
|
* This function is based on the special 8 X 8 case (vTrans8x8).
|
|
* The significant differences are that the transposed data needs
|
|
* to be written byte at a time (instead of DWORD at a time),
|
|
* and that there are N scan lines to convert in each loop.
|
|
*/
|
|
|
|
register DWORD dw0, dw1; /* Inner loop temporaries */
|
|
register BYTE *pbTemp;
|
|
register DWORD *pdw;
|
|
|
|
register int cbLine; /* Bytes per line in scan data */
|
|
register int i; /* Loop variable. */
|
|
register int iBand; /* For moving down the scan lines */
|
|
|
|
int iSkip; /* Output interleave factor */
|
|
int iWide; /* Pixels across the bitmap */
|
|
|
|
BYTE *pbOut; /* Destination, local copy */
|
|
BYTE *pbBase; /* Start addr of 8 scan line group */
|
|
BYTE *pbOutTmp; /* For output loop */
|
|
|
|
DWORD *pdwTrans; /* Speedier access */
|
|
|
|
|
|
|
|
/*
|
|
* Set up the local variables from the RENDER structure passed in.
|
|
*/
|
|
|
|
cbLine = pRData->cbTLine;
|
|
iSkip = pRData->iTransSkip;
|
|
pbOut = pRData->pvTransBuf; /* Reserved for us! */
|
|
pdwTrans = pRData->Trans.pdwTransTab;
|
|
|
|
/*
|
|
* To ease MMU thrashing, we scan ACROSS the bitmap in 8 line
|
|
* groups. This results in closer memory references, and so less
|
|
* page faults and so faster execution. Hence, the outer most loop
|
|
* loops DOWN the scanlines. The next inner loop scans across groups
|
|
* of 8 scan lines at a time, while the inner most loop transposes
|
|
* one byte by 8 scan lines of bitmap image.
|
|
* Note that processing the data this way causes a slight increase
|
|
* in scattered memory addresses when writing the output data.
|
|
* There is no way to avoid one or the other memory references being
|
|
* scattered; however, the output area is smaller than the input
|
|
* input, so scattering here will be less severe to the MMU.
|
|
*/
|
|
|
|
for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS )
|
|
{
|
|
|
|
/*
|
|
* Have selected the next group of 8 scan lines to process,
|
|
* so scan from left to right, transposing data in 8 x 8 bit
|
|
* groups. This is the size that can be done very quickly with
|
|
* a 32 bit environment.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += BBITS * cbLine; /* Next address */
|
|
|
|
pbOutTmp = pbOut;
|
|
++pbOut; /* Onto the next byte sequence */
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 bit array. We can do that quickly.
|
|
*/
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
pbTemp = pbBase++;
|
|
|
|
for( i = BBITS; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
/* The INNER loop - the bit swapping operations */
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
|
|
}
|
|
|
|
/* !!!LindsayH: Note that the following code is big endian/little endian
|
|
* sensitive, and currently works on the 80386 (which ever way that is).
|
|
* There are two alternatives to cure this problem: first is to have
|
|
* another function, with the order of byte extraction reversed; second
|
|
* is to offset the value in pbTemp, and change the sign of iSkip.
|
|
* There are disadvantages to both.
|
|
* FOR NOW, this is not a problem, and will be left as an exercise
|
|
* for the student.
|
|
*/
|
|
/* Store the two temporary values in the output buffer. */
|
|
pbTemp = pbOutTmp;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS; /* One byte's worth */
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbOutTmp += BBITS * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
* There may be some scan lines remaining. If so, iBand will
|
|
* be > 0, and that indicates the number of output scan lines
|
|
* remaining.
|
|
*/
|
|
|
|
if( iBand > 0 )
|
|
{
|
|
|
|
/*
|
|
* This is basically the same as the stripped down version
|
|
* in the outer loop above. Note that the output data is still
|
|
* byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE
|
|
* ZERO FILLED. This may not be what is desired - it is for
|
|
* transposing bits to output to a dot matrix printer where
|
|
* the page length is not a multiple of the number of pins.
|
|
* I don't know if that can ever happen.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbOutTmp = pbOut;
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 bit array. We can do that quickly.
|
|
*/
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
pbTemp = pbBase++;
|
|
|
|
/*
|
|
* The inner loop now only transposes as many scan lines
|
|
* as the bitmap actually contains - we must not run off
|
|
* the end of memory.
|
|
*/
|
|
|
|
for( i = iBand; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
/* The INNER loop - the bit swapping operations */
|
|
dw0 <<= 1;
|
|
dw1 <<= 1;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
|
|
}
|
|
|
|
/* Zero fill the missing bits */
|
|
dw0 <<= BBITS - iBand;
|
|
dw1 <<= BBITS - iBand;
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
pbTemp = pbOutTmp;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS; /* One byte's worth */
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
dw0 >>= BBITS;
|
|
*pbTemp = (BYTE)dw0;
|
|
|
|
pbTemp += iSkip;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbTemp += iSkip;
|
|
dw1 >>= BBITS;
|
|
*pbTemp = (BYTE)dw1;
|
|
|
|
pbOutTmp += BBITS * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Define the number of pels transposed per loop iteration. In the case
|
|
* of a colour bitmap, this is 2, since there are 4 bits per pel, thus
|
|
* 2 per byte.
|
|
*/
|
|
|
|
#define PELS_PER_LOOP (BBITS / 4)
|
|
|
|
|
|
/************************** Function Header *******************************
|
|
* vTrans8N4BPP
|
|
* Function to transpose the input array into the output array,
|
|
* where the input data is to be considered N rows of bitmap data,
|
|
* and the output area is byte aligned.
|
|
* This version works on 4 bits per pel bitmaps (colour for us).
|
|
*
|
|
* RETURNS:
|
|
* Nothing
|
|
*
|
|
* HISTORY:
|
|
* 15:20 on Tue 30 Jul 1991 -by- Lindsay Harris [lindsayh]
|
|
* First incarnation, based on vTrans8N.
|
|
*
|
|
*************************************************************************/
|
|
|
|
void
|
|
vTrans8N4BPP( pbIn, pRData )
|
|
BYTE *pbIn; /* Source */
|
|
RENDER *pRData; /* Overall rendering info */
|
|
{
|
|
/*
|
|
* The technique is quite simple, though not necessarily obvious.
|
|
* Take an 8 scan line by 8 bits block of data, and transform it
|
|
* into 8 bytes with bits in the scan line order, rather than
|
|
* along the scan line as supplied.
|
|
* To do this as quickly as possible, each byte to be converted
|
|
* is used as an index into a lookup table; each table entry is
|
|
* 64 bits long (a pair of longs above). These 64 bits are ORed
|
|
* with the running total of 64 bits (the two variables, dw0, dw1);
|
|
* shift the running total one bit left. Repeat this operation
|
|
* for the corresponding byte in the next scan line - this is
|
|
* the new table lookup index. Repeat for all 8 bytes in the 8
|
|
* scan lines being processed. Store the 64 bit temporary results
|
|
* in the output dword array. Move to the next byte in the
|
|
* scan line, and repeat the loop for this column.
|
|
* This function is based on the special 8 X 8 case (vTrans8x8).
|
|
* The significant differences are that the transposed data needs
|
|
* to be written byte at a time (instead of DWORD at a time),
|
|
* and that there are N scan lines to convert in each loop.
|
|
*/
|
|
|
|
register DWORD dw0, dw1; /* Inner loop temporaries */
|
|
register BYTE *pbTemp;
|
|
register DWORD *pdw;
|
|
|
|
register int cbLine; /* Bytes per line in scan data */
|
|
register int i; /* Loop variable. */
|
|
register int iBand; /* For moving down the scan lines */
|
|
|
|
int iSkip; /* Output interleave factor */
|
|
int iWide; /* Pixels across the bitmap */
|
|
|
|
DWORD *pdwOut; /* Destination, local copy */
|
|
BYTE *pbBase; /* Start addr of 8 scan line group */
|
|
DWORD *pdwOutTmp; /* For output loop */
|
|
|
|
DWORD *pdwTrans; /* Speedier access */
|
|
|
|
|
|
|
|
/*
|
|
* Set up the local variables from the RENDER structure passed in.
|
|
* See the above function for explanation of iSkip.
|
|
*/
|
|
|
|
cbLine = pRData->cbTLine;
|
|
iSkip = pRData->iTransSkip / DWBYTES;
|
|
pdwOut = pRData->pvTransBuf; /* Reserved for us! */
|
|
pdwTrans = pRData->Trans.pdwTransTab;
|
|
|
|
/*
|
|
* To ease MMU thrashing, we scan ACROSS the bitmap in 8 line
|
|
* groups. This results in closer memory references, and so less
|
|
* page faults and faster execution. Hence, the outer most loop
|
|
* loops DOWN the scanlines. Then next inner loop scans across groups
|
|
* of 8 scan lines at a time, while the inner most loop transposes
|
|
* one byte by 8 scan lines of bitmap image.
|
|
* Note that processing the data this way causes a slight increase
|
|
* in scattered memory addresses when writing the output data.
|
|
* There is no way to avoid one or the other memory references being
|
|
* scattered; however, the output area is smaller than the input
|
|
* input, so scattering here will be less severe on the MMU.
|
|
*/
|
|
|
|
|
|
for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS )
|
|
{
|
|
|
|
/*
|
|
* Have selected the next group of 8 scan lines to process,
|
|
* so scan from left to right, transposing data in 8 x 8 bit
|
|
* groups. This is the size that can be done very quickly with
|
|
* a 32 bit environment.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += BBITS * cbLine; /* Next address */
|
|
|
|
pdwOutTmp = pdwOut;
|
|
++pdwOut; /* Onto the next byte sequence */
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 pixel array. We can do that quickly.
|
|
*/
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
pbTemp = pbBase++;
|
|
|
|
for( i = BBITS; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
/* The INNER loop - the bit swapping operations */
|
|
dw0 >>= 8;
|
|
dw1 >>= 8;
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw << 4;
|
|
dw1 |= *(pdw + 1) << 4;
|
|
|
|
pbTemp += cbLine;
|
|
--i;
|
|
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
|
|
}
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
|
|
*pdwOutTmp = dw0;
|
|
*(pdwOutTmp + iSkip) = dw1;
|
|
|
|
pdwOutTmp += PELS_PER_LOOP * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
* There may be some scan lines remaining. If so, iBand will
|
|
* be > 0, and that indicates the number of output scan lines
|
|
* remaining.
|
|
*/
|
|
|
|
if( iBand > 0 )
|
|
{
|
|
|
|
/*
|
|
* This is basically the same as the stripped down version
|
|
* in the outer loop above. Note that the output data is still
|
|
* byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE
|
|
* ZERO FILLED. This may not be what is desired - it is for
|
|
* transposing bits to output to a dot matrix printer where
|
|
* the page length is not a multiple of the number of pins.
|
|
* I don't know if that can ever happen.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pdwOutTmp = pdwOut;
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* Process the bitmap byte at a time moving across, and
|
|
* 8 scan lines high. This corresponds to transposing an
|
|
* 8 x 8 bit array. We can do that quickly.
|
|
*/
|
|
dw0 = 0;
|
|
dw1 = 0;
|
|
pbTemp = pbBase++;
|
|
|
|
/*
|
|
* The inner loop now only transposes as many scan lines
|
|
* as the bitmap actually contains - we must not run off
|
|
* the end of memory.
|
|
*/
|
|
|
|
for( i = iBand; --i >= 0; pbTemp += cbLine )
|
|
{
|
|
/* The INNER loop - the bit swapping operations. */
|
|
|
|
pdw = pdwTrans + (*pbTemp << 1);
|
|
|
|
if( (i ^ iBand) & 0x1 )
|
|
{
|
|
/* Every even time through the loop */
|
|
dw0 >>= 8;
|
|
dw1 >>= 8;
|
|
|
|
dw0 |= *pdw << 4;
|
|
dw1 |= *(pdw + 1) << 4;
|
|
}
|
|
else
|
|
{
|
|
/* Odd times through the loop */
|
|
dw0 |= *pdw;
|
|
dw1 |= *(pdw + 1);
|
|
}
|
|
|
|
}
|
|
|
|
/* Zero fill the missing bits */
|
|
dw0 >>= 8 * ((BBITS - iBand) / 2);
|
|
dw1 >>= 8 * ((BBITS - iBand) / 2);
|
|
|
|
/* Store the two temporary values in the output buffer. */
|
|
|
|
*pdwOutTmp = dw0;
|
|
*(pdwOutTmp + iSkip) = dw1;
|
|
|
|
pdwOutTmp += 2 * iSkip; /* Next chunk of output data */
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/***************************** Function Header ******************************
|
|
* vTransColSep()
|
|
* Function to transpose the colour bits in a 4 Bits Per Pel colour
|
|
* bitmap into an array of bytes, where the bytes are ordered in
|
|
* the same way as the original bits. An example of this is provided
|
|
* in the explanation for the SEP_TABLE_SIZE value at the top of this file.
|
|
*
|
|
* RETURNS:
|
|
* Nothing
|
|
*
|
|
* HISTORY:
|
|
* 13:48 on Mon 10 Jun 1991 -by- Lindsay Harris [lindsayh]
|
|
* Time ZERO
|
|
*
|
|
***************************************************************************/
|
|
|
|
void
|
|
vTransColSep( pbIn, pRData )
|
|
register BYTE *pbIn; /* Source */
|
|
RENDER *pRData; /* Overall rendering info */
|
|
{
|
|
/*
|
|
* Operation is quite simple - pass along the input array byte
|
|
* at a time, and use each 4 byte group to generate a DWORD of
|
|
* output - placed in pdwOut. The previously generated translation
|
|
* table is especially formulated to do this job!
|
|
*
|
|
* NOTE: pdwOut and pbIn MAY POINT TO THE SAME ADDRESS! THERE IS
|
|
* NO OVERLAP IN OPERATIONS TO CAUSE CONFUSION.
|
|
*/
|
|
|
|
register DWORD dwTemp;
|
|
register DWORD *pdwSep;
|
|
|
|
int iI;
|
|
int iBlock;
|
|
DWORD *pdwOut; /* Destination - DWORD aligned */
|
|
|
|
|
|
iBlock = pRData->cDWLine * pRData->iNumScans;
|
|
|
|
pdwSep = pRData->pdwColrSep; /* Colour separation table */
|
|
pdwOut = pRData->pvTransBuf; /* Where the data goes */
|
|
|
|
|
|
/* Loop through the line in 4 byte groups */
|
|
for( iI = iBlock; --iI >= 0; )
|
|
{
|
|
|
|
dwTemp = *(pdwSep + *pbIn++);
|
|
|
|
dwTemp <<= 2;
|
|
dwTemp |= *(pdwSep + *pbIn++);
|
|
|
|
dwTemp <<= 2;
|
|
dwTemp |= *(pdwSep + *pbIn++);
|
|
|
|
*pdwOut++ = (dwTemp << 2) | *(pdwSep + *pbIn++);
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
/************************** Function Header ********************************
|
|
* vTrans8BPP
|
|
* The transpose function for 8 bits per pel bitmaps. This is rather
|
|
* easy, as all we do is shuffle bytes!
|
|
*
|
|
* RETURNS:
|
|
* Nothing
|
|
*
|
|
* HISTORY:
|
|
* 17:32 on Tue 03 Nov 1992 -by- Lindsay Harris [lindsayh]
|
|
* Initial version.
|
|
*
|
|
****************************************************************************/
|
|
|
|
void
|
|
vTrans8BPP( pbIn, pRData )
|
|
BYTE *pbIn; /* Source */
|
|
RENDER *pRData; /* Overall rendering info */
|
|
{
|
|
|
|
|
|
/*
|
|
* Scan along the input bitmap, writing the data to the output
|
|
* in column order. This results in reduced MMU thrashing, as
|
|
* the output addresses are all limited to a much smaller range
|
|
* than the incoming addresses.
|
|
*/
|
|
|
|
register BYTE *pbBase; /* Scan along input bitmap */
|
|
register BYTE *pbOut; /* The output scan column pointer */
|
|
|
|
int iBand; /* Count down scan lines */
|
|
int iSkip; /* Offset between output bytes */
|
|
int iWide; /* Loop across the input scan line */
|
|
int cbLine; /* Bytes per input scan line */
|
|
|
|
BYTE *pbOutBase; /* Start of column of output data */
|
|
|
|
|
|
/*
|
|
* Set up the local copies (for faster access) of data passed in.
|
|
*/
|
|
|
|
cbLine = pRData->cbTLine;
|
|
iSkip = pRData->iTransSkip;
|
|
pbOutBase = pRData->pvTransBuf; /* Base output buffer address */
|
|
|
|
|
|
for( iBand = pRData->iTransHigh; iBand > 0; --iBand )
|
|
{
|
|
/*
|
|
* This loop processes scan lines in the input bitmap. As
|
|
* we progress across the scan line, the output data is written
|
|
* in column order.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += cbLine; /* Next scan line, DWORD aligned */
|
|
|
|
pbOut = pbOutBase;
|
|
++pbOutBase; /* One column across output area */
|
|
|
|
for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
|
|
{
|
|
/*
|
|
* This loop traverses the input scan line, taking bytes
|
|
* and writing them to the output area in column order.
|
|
*/
|
|
|
|
*pbOut = *pbBase++;
|
|
pbOut += iSkip;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/************************** Function Header ********************************
|
|
* vTrans24BPP
|
|
* The transpose function for 8 bits per pel bitmaps. This is rather
|
|
* easy, as all we do is shuffle bytes!
|
|
*
|
|
* RETURNS:
|
|
* Nothing
|
|
*
|
|
* HISTORY:
|
|
* 17:32 on Tue 03 Nov 1992 -by- Lindsay Harris [lindsayh]
|
|
* Initial version.
|
|
*
|
|
****************************************************************************/
|
|
|
|
void
|
|
vTrans24BPP( pbIn, pRData )
|
|
BYTE *pbIn; /* Source */
|
|
RENDER *pRData; /* Overall rendering info */
|
|
{
|
|
|
|
|
|
/*
|
|
* Scan along the input bitmap, writing the data to the output
|
|
* in column order. This results in reduced MMU thrashing, as
|
|
* the output addresses are all limited to a much smaller range
|
|
* than the incoming addresses.
|
|
*/
|
|
|
|
register BYTE *pbBase; /* Scan along input bitmap */
|
|
register BYTE *pbOut; /* The output scan column pointer */
|
|
|
|
int iBand; /* Count down scan lines */
|
|
int iSkip; /* Offset between output bytes */
|
|
int iWide; /* Loop across the input scan line */
|
|
int iCol;
|
|
int iRow;
|
|
int cbLine; /* Bytes per input scan line */
|
|
int iBytesLeftOver;
|
|
|
|
BYTE *pbOutBase; /* Start of column of output data */
|
|
|
|
|
|
/*
|
|
* Set up the local copies (for faster access) of data passed in.
|
|
*/
|
|
|
|
iSkip = pRData->iTransSkip;
|
|
cbLine = pRData->cbTLine;
|
|
pbOutBase = pRData->pvTransBuf; /* Base output buffer address */
|
|
iCol = pRData->iTransWide/pRData->iBPP;
|
|
iRow = pRData->iTransHigh;
|
|
iBytesLeftOver = (pRData->iTransHigh *pRData->iBPP) % DWBITS;
|
|
|
|
for( iBand = iRow; iBand > 0; --iBand )
|
|
{
|
|
/*
|
|
* This loop processes scan lines in the input bitmap. As
|
|
* we progress across the scan line, the output data is written
|
|
* in column order.
|
|
*/
|
|
|
|
pbBase = pbIn;
|
|
pbIn += cbLine; /* Next scan line, DWORD aligned */
|
|
|
|
pbOut = pbOutBase;
|
|
pbOutBase+=3; /* One column across output area */
|
|
|
|
for( iWide = iCol; iWide > 0; --iWide )
|
|
{
|
|
/*
|
|
* This loop traverses the input scan line, taking bytes
|
|
* and writing them to the output area in column order.
|
|
* Since we are writing bytes three at a time iSkip
|
|
* points to the end of the first RGB triplet in the line.
|
|
* therefore we have to backup two bytes so pbOut will
|
|
* point to the beginning of the RGB triplet.
|
|
*/
|
|
|
|
*pbOut++ = *pbBase++;
|
|
*pbOut++ = *pbBase++;
|
|
*pbOut = *pbBase++;
|
|
pbOut += (iSkip-2);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|