/**************************** Function Header ****************************** * transpos.c * Functions associated with transposing bitmaps. Several flavours * are available, to be used as appropriate in some special cases. * The special cases are generally faster than the general method. * * HISTORY: * 10:46 on Wed 27 Feb 1991 -by- Lindsay Harris [lindsayh] * Added table creation at load time - big endian/little endian * * 14:24 on Wed 23 Jan 1991 -by- Lindsay Harris [lindsayh] * Created it - initially for the 8 x 8 case. * * Copyright (C) 1991 - 1993 Micrososft Corporation * ***************************************************************************/ #include #include #include #include #include "pdev.h" #include "win30def.h" #include "udmindrv.h" #include "udpfm.h" #include "uddevice.h" #include "udrender.h" #include "rasdd.h" /* * The transpose table: maps one byte into two longs, such that the * 8 bits of the byte turn into 64 bits: each bit of the original is * turned into one byte of output. * THUS: * Input byte: hgfedcba * transposes into output bytes: * 0000000a 0000000b 0000000c 0000000d * 0000000e 0000000f 0000000g 0000000h * * The table is allocated at DrvEnableSurface time, thus ensuring that * we do not allocate memory that we are not going to use. */ #define TABLE_SIZE (256 * 2 * sizeof( DWORD )) /* * We also need a similar table for colour separation. This one * consists of 256 DWORDs, and is used to split the RGB(K) format * input byte into an output DWORD with the two R bits in one byte, * the two G bits in the next byte etc. Used for single pin colour * printers, like the HP PaintJet. * The table is generated according to the following rule: * * INPUT BYTE: KRGBkrgb * * OUTPUT DWORD: 000000Kk 000000Rr 000000Gg 000000Bb */ #define SEP_TABLE_SIZE (256 * sizeof( DWORD )) /************************** Function Header ******************************* * vInitTrans * Initialise the transpose tables. This is done to make the tables * independent of whether the processor is big endian or little endian, * since the data is generated by the processor that is going to * use it! There are still some minor questions of byte ordering, * but nothing too major to resolve. * * RETURNS: * TRUE/FALSE; FALSE for lack of storage for table. * * HISTORY: * Friday December 3 1993 -by- Norman Hendley [normanh] * Changed graphics mode check to RES_DM_GDI from nPins * * 10:52 on Wed 27 Feb 1991 -by- Lindsay Harris [lindsayh] * Borrowed from program which generated original table. * **************************************************************************/ BOOL bInitTrans( pPDev ) PDEV *pPDev; { /* * Function to generate the transposition table. There is nothing * difficult about generating the table. The only trick is the use * of the union. This allows us to setup a DWORD table with the * byte ordering of the hardware on which we are running. This is * achieved by writing the data into the BYTE entry, then using * the same memory as a DWORD to be put away into memory. The reason * for using DWORDS is to get maximum benefit from memory references * in the inner loop of the transpose functions. * Storage space is allocated on the heap, and the address is * stored in the PDEV. * Note that the 8 bits per pel case is special, as we are shuffling * bytes around, and thus do not need any tables. For this case, * return TRUE without allocating any storage. * * Returns TRUE for success, FALSE meaning storage unavailable. */ register DWORD *pdw; register int iShift, j; int i; #define pUDPDev ((UD_PDEV *)(pPDev->pUDPDev)) union { BYTE b[ 8 ]; /* Exactly 64 bits */ DWORD dw[ 2 ]; /* Also exactly 64 bits */ } u; if( pUDPDev->sBitsPixel == 8 ) { pPDev->pdwTrans = NULL; return TRUE; /* Byte operations - no table needed */ } if( !(pPDev->pdwTrans = (DWORD *)HeapAlloc( pPDev->hheap, 0, TABLE_SIZE )) ) return FALSE; pdw = pPDev->pdwTrans; /* Speedier access */ /* * Colour requires different tables, as the pixel data consists of * 4 bits which need to move in a single group. */ if( pUDPDev->Resolution.fDump & RES_DM_COLOR ) { /* * First generate the landscape to portrait transpose data. * The only complication is maintaining 4 bit nibbles as a single * entity. */ for( iShift = 0; iShift < 256; iShift++ ) { /* * The low nibble goes into the highest byte address, the * next nibble goes 4 bytes lower in memory. */ u.dw[ 0 ] = 0; u.dw[ 1 ] = 0; u.b[ 3 ] = (BYTE)((iShift >> 4) & 0x0f); u.b[ 7 ] = (BYTE)(iShift & 0x0f); /* Store the result */ *pdw++ = u.dw[ 0 ]; *pdw++ = u.dw[ 1 ]; } /* * There is an additional transpose operation required for single * pin colour printers. The HP Paintjet typifies this class. * This operation is required to separate the RGB pixels (2 of each * colour per byte) into bytes that may be sent to the printer, * such that all R bytes are sent in one go, followed by G etc. * For multiple pin printers, this falls out of the standard * transpose operations. */ if( pUDPDev->Resolution.fDump & RES_DM_GDI ) { pPDev->pdwColrSep = (DWORD *)HeapAlloc( pPDev->hheap, 0, SEP_TABLE_SIZE ); if( pPDev->pdwColrSep == NULL ) { HeapFree( pPDev->hheap, 0, (LPSTR)pPDev->pdwTrans ); pPDev->pdwTrans = 0; return FALSE; } pdw = pPDev->pdwColrSep; /* Speedier access */ /* * The explanation above for SEP_TABLE_SIZE explains what is * taking place in the following loops. */ for( i = 0; i <= 0xff; i++ ) { u.dw[ 0 ] = 0; iShift = i & 0x77; /* Only use 3 bits per pel */ if( pUDPDev->fColorFormat & DC_EXTRACT_BLK ) { if( pUDPDev->fColorFormat & DC_PRIMARY_RGB ) { /* * Whenever we have a 0 nibble, replace it with 8. * This does the colour separation for us! The * separation happens when the transpose happens. */ if( (iShift & 0x07) == 0 ) iShift |= 0x08; if( (iShift & 0x70) == 0 ) iShift |= 0x80; } else { /* CMY - same idea, different conditions! */ if( (iShift & 0x07) == 0x07 ) iShift = (iShift & ~0x07) | 0x08; if( (iShift & 0x70) == 0x70 ) iShift = (iShift & ~0x70) | 0x80; } } /* The two bits Bb */ u.b[ 3 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1)); iShift >>= 1; /* The two bits Gg */ u.b[ 2 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1)); iShift >>= 1; /* The two bits Rr */ u.b[ 1 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1)); iShift >>= 1; /* The two bits Kk */ u.b[ 0 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1)); *pdw++ = u.dw[ 0 ]; /* Safe for posterity */ } } else { /* * The dot matrix case. Here we will call the relevant * transpose function, but use the modified table below. This * table will do the colour separation, and will result in the * transpose operation splitting up the data for each head pass. */ pPDev->pdwColrSep = (DWORD *)HeapAlloc( pPDev->hheap, 0, TABLE_SIZE ); if( pPDev->pdwColrSep == NULL ) { HeapFree( pPDev->hheap, 0, (LPSTR)pPDev->pdwTrans ); pPDev->pdwTrans = 0; return FALSE; } pdw = pPDev->pdwColrSep; /* Speedier access */ for( i = 0; i <= 0xff; i++ ) { /* Each bit of i goes into one byte of the output */ u.dw[ 0 ] = 0; u.dw[ 1 ] = 0; iShift = i & 0x77; /* Only 3 bits per pel */ if( pUDPDev->fColorFormat & DC_EXTRACT_BLK ) { if( pUDPDev->fColorFormat & DC_PRIMARY_RGB ) { /* * Whenever we have a 0 nibble, replace it with 8. * This does the colour separation for us! The * separation happens when the transpose happens. */ if( (iShift & 0x07) == 0 ) iShift |= 0x08; if( (iShift & 0x70) == 0 ) iShift |= 0x80; } else { /* CMY - same idea, different conditions! */ if( (iShift & 0x07) == 0x07 ) iShift = (iShift & ~0x07) | 0x08; if( (iShift & 0x70) == 0x70 ) iShift = (iShift & ~0x70) | 0x80; } } for( j = 8; --j >= 0; ) { u.b[ j ] = (BYTE)(iShift & 0x1); iShift >>= 1; } /* Store the result */ *pdw++ = u.dw[ 0 ]; *pdw++ = u.dw[ 1 ]; } } } else { /* * Monochrome case - simple transpositions. */ for( i = 0; i <= 0xff; i++ ) { /* Each bit of i goes into one byte of the output */ iShift = i; u.dw[ 0 ] = 0; u.dw[ 1 ] = 0; for( j = 8; --j >= 0; ) { u.b[ j ] = (BYTE)(iShift & 0x1); iShift >>= 1; } /* Store the result */ *pdw++ = u.dw[ 0 ]; *pdw++ = u.dw[ 1 ]; } } return TRUE; } #undef pUDPDev /************************** Function Header ******************************* * vTrans8x8 * Function to transpose the input array into the output array, * where the input data is to be considered 8 rows of bitmap data, * and the output area is dword aligned. * * RETURNS: * Nothing * * HISTORY: * 14:27 on Wed 23 Jan 1991 -by- Lindsay Harris [lindsayh] * First incarnation. * *************************************************************************/ void vTrans8x8( pbIn, pRData ) BYTE *pbIn; /* Source */ RENDER *pRData; /* Rendering info */ { /* * The technique is quite simple, though not necessarily obvious. * Take an 8 scan line by 8 bits block of data, and transform it * into 8 bytes with bits in the scan line order, rather than * along the scan line as supplied. * To do this as quickly as possible, each byte to be converted * is used as an index into a lookup table; each table entry is * 64 bits long (a pair of longs above). These 64 bits are ORed * with the running total of 64 bits (the two variables, dw0, dw1); * shift the running total one bit left. Repeat this operation * for the corresponding byte in the next scan line - this is * the new table lookup index. Repeat for all 8 bytes in the 8 * scan lines being processed. Store the 64 bit temporary results * in the output dword array. Move to the next byte in the * scan line, and repeat the loop for this column. */ register DWORD dw0, dw1; /* Inner loop temporaries */ register BYTE *pbTemp; register DWORD *pdw; register int cbLine; /* Bytes per line in scan data */ register int i; /* Loop variable. */ int iWide; /* Pixels across the bitmap */ DWORD *pdwOut; /* Destination */ DWORD *pdwTrans; /* Local copy of output buffer */ /* * Some initialisation: byte count, area limits, etc. */ cbLine = pRData->cbTLine; pdwOut = pRData->pvTransBuf; pdwTrans = pRData->Trans.pdwTransTab; if( pRData->iTransHigh != 8 ) { /* This can happen at the end of a page. */ vTrans8N( pbIn, pRData ); return; } /* * Scan across the lines in groups of 8 bits. In the case that the * input is not a multiple of 8, we will produce a few extra * bytes at the end; the caller should allow for this when allocating * storage for pdwOut. The consequence is that the last few * bytes will contain garbage; presumably the caller will not * process them further. */ for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS ) { dw0 = 0; dw1 = 0; /* * Loop DOWN the scanlines at the starting byte location, * generating the transposed data as we go. */ for( i = BBITS, pbTemp = pbIn++; --i >= 0; pbTemp += cbLine ) { dw0 <<= 1; dw1 <<= 1; pdw = pdwTrans + (*pbTemp << 1); dw0 |= *pdw; dw1 |= *(pdw + 1); } /* Store the two temporary values in the output buffer. */ *pdwOut = dw0; *(pdwOut + 1) = dw1; pdwOut += 2; } return; } /************************** Function Header ******************************* * vTrans8N * Function to transpose the input array into the output array, * where the input data is to be considered N rows of bitmap data, * and the output area is byte aligned. * * RETURNS: * Nothing * * HISTORY: * 16:34 on Mon 28 Jan 1991 -by- Lindsay Harris [lindsayh] * First incarnation. * *************************************************************************/ void vTrans8N( pbIn, pRData ) BYTE *pbIn; /* Source */ RENDER *pRData; /* Overall rendering info */ { /* * The technique is quite simple, though not necessarily obvious. * Take an 8 scan line by 8 bits block of data, and transform it * into 8 bytes with bits in the scan line order, rather than * along the scan line as supplied. * To do this as quickly as possible, each byte to be converted * is used as an index into a lookup table; each table entry is * 64 bits long (a pair of longs above). These 64 bits are ORed * with the running total of 64 bits (the two variables, dw0, dw1); * shift the running total one bit left. Repeat this operation * for the corresponding byte in the next scan line - this is * the new table lookup index. Repeat for all 8 bytes in the 8 * scan lines being processed. Store the 64 bit temporary results * in the output dword array. Move to the next byte in the * scan line, and repeat the loop for this column. * This function is based on the special 8 X 8 case (vTrans8x8). * The significant differences are that the transposed data needs * to be written byte at a time (instead of DWORD at a time), * and that there are N scan lines to convert in each loop. */ register DWORD dw0, dw1; /* Inner loop temporaries */ register BYTE *pbTemp; register DWORD *pdw; register int cbLine; /* Bytes per line in scan data */ register int i; /* Loop variable. */ register int iBand; /* For moving down the scan lines */ int iSkip; /* Output interleave factor */ int iWide; /* Pixels across the bitmap */ BYTE *pbOut; /* Destination, local copy */ BYTE *pbBase; /* Start addr of 8 scan line group */ BYTE *pbOutTmp; /* For output loop */ DWORD *pdwTrans; /* Speedier access */ /* * Set up the local variables from the RENDER structure passed in. */ cbLine = pRData->cbTLine; iSkip = pRData->iTransSkip; pbOut = pRData->pvTransBuf; /* Reserved for us! */ pdwTrans = pRData->Trans.pdwTransTab; /* * To ease MMU thrashing, we scan ACROSS the bitmap in 8 line * groups. This results in closer memory references, and so less * page faults and so faster execution. Hence, the outer most loop * loops DOWN the scanlines. The next inner loop scans across groups * of 8 scan lines at a time, while the inner most loop transposes * one byte by 8 scan lines of bitmap image. * Note that processing the data this way causes a slight increase * in scattered memory addresses when writing the output data. * There is no way to avoid one or the other memory references being * scattered; however, the output area is smaller than the input * input, so scattering here will be less severe to the MMU. */ for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS ) { /* * Have selected the next group of 8 scan lines to process, * so scan from left to right, transposing data in 8 x 8 bit * groups. This is the size that can be done very quickly with * a 32 bit environment. */ pbBase = pbIn; pbIn += BBITS * cbLine; /* Next address */ pbOutTmp = pbOut; ++pbOut; /* Onto the next byte sequence */ for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS ) { /* * Process the bitmap byte at a time moving across, and * 8 scan lines high. This corresponds to transposing an * 8 x 8 bit array. We can do that quickly. */ dw0 = 0; dw1 = 0; pbTemp = pbBase++; for( i = BBITS; --i >= 0; pbTemp += cbLine ) { /* The INNER loop - the bit swapping operations */ dw0 <<= 1; dw1 <<= 1; pdw = pdwTrans + (*pbTemp << 1); dw0 |= *pdw; dw1 |= *(pdw + 1); } /* !!!LindsayH: Note that the following code is big endian/little endian * sensitive, and currently works on the 80386 (which ever way that is). * There are two alternatives to cure this problem: first is to have * another function, with the order of byte extraction reversed; second * is to offset the value in pbTemp, and change the sign of iSkip. * There are disadvantages to both. * FOR NOW, this is not a problem, and will be left as an exercise * for the student. */ /* Store the two temporary values in the output buffer. */ pbTemp = pbOutTmp; *pbTemp = (BYTE)dw0; pbTemp += iSkip; dw0 >>= BBITS; /* One byte's worth */ *pbTemp = (BYTE)dw0; pbTemp += iSkip; dw0 >>= BBITS; *pbTemp = (BYTE)dw0; pbTemp += iSkip; dw0 >>= BBITS; *pbTemp = (BYTE)dw0; pbTemp += iSkip; *pbTemp = (BYTE)dw1; pbTemp += iSkip; dw1 >>= BBITS; *pbTemp = (BYTE)dw1; pbTemp += iSkip; dw1 >>= BBITS; *pbTemp = (BYTE)dw1; pbTemp += iSkip; dw1 >>= BBITS; *pbTemp = (BYTE)dw1; pbOutTmp += BBITS * iSkip; /* Next chunk of output data */ } } /* * There may be some scan lines remaining. If so, iBand will * be > 0, and that indicates the number of output scan lines * remaining. */ if( iBand > 0 ) { /* * This is basically the same as the stripped down version * in the outer loop above. Note that the output data is still * byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE * ZERO FILLED. This may not be what is desired - it is for * transposing bits to output to a dot matrix printer where * the page length is not a multiple of the number of pins. * I don't know if that can ever happen. */ pbBase = pbIn; pbOutTmp = pbOut; for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS ) { /* * Process the bitmap byte at a time moving across, and * 8 scan lines high. This corresponds to transposing an * 8 x 8 bit array. We can do that quickly. */ dw0 = 0; dw1 = 0; pbTemp = pbBase++; /* * The inner loop now only transposes as many scan lines * as the bitmap actually contains - we must not run off * the end of memory. */ for( i = iBand; --i >= 0; pbTemp += cbLine ) { /* The INNER loop - the bit swapping operations */ dw0 <<= 1; dw1 <<= 1; pdw = pdwTrans + (*pbTemp << 1); dw0 |= *pdw; dw1 |= *(pdw + 1); } /* Zero fill the missing bits */ dw0 <<= BBITS - iBand; dw1 <<= BBITS - iBand; /* Store the two temporary values in the output buffer. */ pbTemp = pbOutTmp; *pbTemp = (BYTE)dw0; pbTemp += iSkip; dw0 >>= BBITS; /* One byte's worth */ *pbTemp = (BYTE)dw0; pbTemp += iSkip; dw0 >>= BBITS; *pbTemp = (BYTE)dw0; pbTemp += iSkip; dw0 >>= BBITS; *pbTemp = (BYTE)dw0; pbTemp += iSkip; *pbTemp = (BYTE)dw1; pbTemp += iSkip; dw1 >>= BBITS; *pbTemp = (BYTE)dw1; pbTemp += iSkip; dw1 >>= BBITS; *pbTemp = (BYTE)dw1; pbTemp += iSkip; dw1 >>= BBITS; *pbTemp = (BYTE)dw1; pbOutTmp += BBITS * iSkip; /* Next chunk of output data */ } } return; } /* * Define the number of pels transposed per loop iteration. In the case * of a colour bitmap, this is 2, since there are 4 bits per pel, thus * 2 per byte. */ #define PELS_PER_LOOP (BBITS / 4) /************************** Function Header ******************************* * vTrans8N4BPP * Function to transpose the input array into the output array, * where the input data is to be considered N rows of bitmap data, * and the output area is byte aligned. * This version works on 4 bits per pel bitmaps (colour for us). * * RETURNS: * Nothing * * HISTORY: * 15:20 on Tue 30 Jul 1991 -by- Lindsay Harris [lindsayh] * First incarnation, based on vTrans8N. * *************************************************************************/ void vTrans8N4BPP( pbIn, pRData ) BYTE *pbIn; /* Source */ RENDER *pRData; /* Overall rendering info */ { /* * The technique is quite simple, though not necessarily obvious. * Take an 8 scan line by 8 bits block of data, and transform it * into 8 bytes with bits in the scan line order, rather than * along the scan line as supplied. * To do this as quickly as possible, each byte to be converted * is used as an index into a lookup table; each table entry is * 64 bits long (a pair of longs above). These 64 bits are ORed * with the running total of 64 bits (the two variables, dw0, dw1); * shift the running total one bit left. Repeat this operation * for the corresponding byte in the next scan line - this is * the new table lookup index. Repeat for all 8 bytes in the 8 * scan lines being processed. Store the 64 bit temporary results * in the output dword array. Move to the next byte in the * scan line, and repeat the loop for this column. * This function is based on the special 8 X 8 case (vTrans8x8). * The significant differences are that the transposed data needs * to be written byte at a time (instead of DWORD at a time), * and that there are N scan lines to convert in each loop. */ register DWORD dw0, dw1; /* Inner loop temporaries */ register BYTE *pbTemp; register DWORD *pdw; register int cbLine; /* Bytes per line in scan data */ register int i; /* Loop variable. */ register int iBand; /* For moving down the scan lines */ int iSkip; /* Output interleave factor */ int iWide; /* Pixels across the bitmap */ DWORD *pdwOut; /* Destination, local copy */ BYTE *pbBase; /* Start addr of 8 scan line group */ DWORD *pdwOutTmp; /* For output loop */ DWORD *pdwTrans; /* Speedier access */ /* * Set up the local variables from the RENDER structure passed in. * See the above function for explanation of iSkip. */ cbLine = pRData->cbTLine; iSkip = pRData->iTransSkip / DWBYTES; pdwOut = pRData->pvTransBuf; /* Reserved for us! */ pdwTrans = pRData->Trans.pdwTransTab; /* * To ease MMU thrashing, we scan ACROSS the bitmap in 8 line * groups. This results in closer memory references, and so less * page faults and faster execution. Hence, the outer most loop * loops DOWN the scanlines. Then next inner loop scans across groups * of 8 scan lines at a time, while the inner most loop transposes * one byte by 8 scan lines of bitmap image. * Note that processing the data this way causes a slight increase * in scattered memory addresses when writing the output data. * There is no way to avoid one or the other memory references being * scattered; however, the output area is smaller than the input * input, so scattering here will be less severe on the MMU. */ for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS ) { /* * Have selected the next group of 8 scan lines to process, * so scan from left to right, transposing data in 8 x 8 bit * groups. This is the size that can be done very quickly with * a 32 bit environment. */ pbBase = pbIn; pbIn += BBITS * cbLine; /* Next address */ pdwOutTmp = pdwOut; ++pdwOut; /* Onto the next byte sequence */ for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS ) { /* * Process the bitmap byte at a time moving across, and * 8 scan lines high. This corresponds to transposing an * 8 x 8 pixel array. We can do that quickly. */ dw0 = 0; dw1 = 0; pbTemp = pbBase++; for( i = BBITS; --i >= 0; pbTemp += cbLine ) { /* The INNER loop - the bit swapping operations */ dw0 >>= 8; dw1 >>= 8; pdw = pdwTrans + (*pbTemp << 1); dw0 |= *pdw << 4; dw1 |= *(pdw + 1) << 4; pbTemp += cbLine; --i; pdw = pdwTrans + (*pbTemp << 1); dw0 |= *pdw; dw1 |= *(pdw + 1); } /* Store the two temporary values in the output buffer. */ *pdwOutTmp = dw0; *(pdwOutTmp + iSkip) = dw1; pdwOutTmp += PELS_PER_LOOP * iSkip; /* Next chunk of output data */ } } /* * There may be some scan lines remaining. If so, iBand will * be > 0, and that indicates the number of output scan lines * remaining. */ if( iBand > 0 ) { /* * This is basically the same as the stripped down version * in the outer loop above. Note that the output data is still * byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE * ZERO FILLED. This may not be what is desired - it is for * transposing bits to output to a dot matrix printer where * the page length is not a multiple of the number of pins. * I don't know if that can ever happen. */ pbBase = pbIn; pdwOutTmp = pdwOut; for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS ) { /* * Process the bitmap byte at a time moving across, and * 8 scan lines high. This corresponds to transposing an * 8 x 8 bit array. We can do that quickly. */ dw0 = 0; dw1 = 0; pbTemp = pbBase++; /* * The inner loop now only transposes as many scan lines * as the bitmap actually contains - we must not run off * the end of memory. */ for( i = iBand; --i >= 0; pbTemp += cbLine ) { /* The INNER loop - the bit swapping operations. */ pdw = pdwTrans + (*pbTemp << 1); if( (i ^ iBand) & 0x1 ) { /* Every even time through the loop */ dw0 >>= 8; dw1 >>= 8; dw0 |= *pdw << 4; dw1 |= *(pdw + 1) << 4; } else { /* Odd times through the loop */ dw0 |= *pdw; dw1 |= *(pdw + 1); } } /* Zero fill the missing bits */ dw0 >>= 8 * ((BBITS - iBand) / 2); dw1 >>= 8 * ((BBITS - iBand) / 2); /* Store the two temporary values in the output buffer. */ *pdwOutTmp = dw0; *(pdwOutTmp + iSkip) = dw1; pdwOutTmp += 2 * iSkip; /* Next chunk of output data */ } } return; } /***************************** Function Header ****************************** * vTransColSep() * Function to transpose the colour bits in a 4 Bits Per Pel colour * bitmap into an array of bytes, where the bytes are ordered in * the same way as the original bits. An example of this is provided * in the explanation for the SEP_TABLE_SIZE value at the top of this file. * * RETURNS: * Nothing * * HISTORY: * 13:48 on Mon 10 Jun 1991 -by- Lindsay Harris [lindsayh] * Time ZERO * ***************************************************************************/ void vTransColSep( pbIn, pRData ) register BYTE *pbIn; /* Source */ RENDER *pRData; /* Overall rendering info */ { /* * Operation is quite simple - pass along the input array byte * at a time, and use each 4 byte group to generate a DWORD of * output - placed in pdwOut. The previously generated translation * table is especially formulated to do this job! * * NOTE: pdwOut and pbIn MAY POINT TO THE SAME ADDRESS! THERE IS * NO OVERLAP IN OPERATIONS TO CAUSE CONFUSION. */ register DWORD dwTemp; register DWORD *pdwSep; int iI; int iBlock; DWORD *pdwOut; /* Destination - DWORD aligned */ iBlock = pRData->cDWLine * pRData->iNumScans; pdwSep = pRData->pdwColrSep; /* Colour separation table */ pdwOut = pRData->pvTransBuf; /* Where the data goes */ /* Loop through the line in 4 byte groups */ for( iI = iBlock; --iI >= 0; ) { dwTemp = *(pdwSep + *pbIn++); dwTemp <<= 2; dwTemp |= *(pdwSep + *pbIn++); dwTemp <<= 2; dwTemp |= *(pdwSep + *pbIn++); *pdwOut++ = (dwTemp << 2) | *(pdwSep + *pbIn++); } return; } /************************** Function Header ******************************** * vTrans8BPP * The transpose function for 8 bits per pel bitmaps. This is rather * easy, as all we do is shuffle bytes! * * RETURNS: * Nothing * * HISTORY: * 17:32 on Tue 03 Nov 1992 -by- Lindsay Harris [lindsayh] * Initial version. * ****************************************************************************/ void vTrans8BPP( pbIn, pRData ) BYTE *pbIn; /* Source */ RENDER *pRData; /* Overall rendering info */ { /* * Scan along the input bitmap, writing the data to the output * in column order. This results in reduced MMU thrashing, as * the output addresses are all limited to a much smaller range * than the incoming addresses. */ register BYTE *pbBase; /* Scan along input bitmap */ register BYTE *pbOut; /* The output scan column pointer */ int iBand; /* Count down scan lines */ int iSkip; /* Offset between output bytes */ int iWide; /* Loop across the input scan line */ int cbLine; /* Bytes per input scan line */ BYTE *pbOutBase; /* Start of column of output data */ /* * Set up the local copies (for faster access) of data passed in. */ cbLine = pRData->cbTLine; iSkip = pRData->iTransSkip; pbOutBase = pRData->pvTransBuf; /* Base output buffer address */ for( iBand = pRData->iTransHigh; iBand > 0; --iBand ) { /* * This loop processes scan lines in the input bitmap. As * we progress across the scan line, the output data is written * in column order. */ pbBase = pbIn; pbIn += cbLine; /* Next scan line, DWORD aligned */ pbOut = pbOutBase; ++pbOutBase; /* One column across output area */ for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS ) { /* * This loop traverses the input scan line, taking bytes * and writing them to the output area in column order. */ *pbOut = *pbBase++; pbOut += iSkip; } } return; } /************************** Function Header ******************************** * vTrans24BPP * The transpose function for 8 bits per pel bitmaps. This is rather * easy, as all we do is shuffle bytes! * * RETURNS: * Nothing * * HISTORY: * 17:32 on Tue 03 Nov 1992 -by- Lindsay Harris [lindsayh] * Initial version. * ****************************************************************************/ void vTrans24BPP( pbIn, pRData ) BYTE *pbIn; /* Source */ RENDER *pRData; /* Overall rendering info */ { /* * Scan along the input bitmap, writing the data to the output * in column order. This results in reduced MMU thrashing, as * the output addresses are all limited to a much smaller range * than the incoming addresses. */ register BYTE *pbBase; /* Scan along input bitmap */ register BYTE *pbOut; /* The output scan column pointer */ int iBand; /* Count down scan lines */ int iSkip; /* Offset between output bytes */ int iWide; /* Loop across the input scan line */ int iCol; int iRow; int cbLine; /* Bytes per input scan line */ int iBytesLeftOver; BYTE *pbOutBase; /* Start of column of output data */ /* * Set up the local copies (for faster access) of data passed in. */ iSkip = pRData->iTransSkip; cbLine = pRData->cbTLine; pbOutBase = pRData->pvTransBuf; /* Base output buffer address */ iCol = pRData->iTransWide/pRData->iBPP; iRow = pRData->iTransHigh; iBytesLeftOver = (pRData->iTransHigh *pRData->iBPP) % DWBITS; for( iBand = iRow; iBand > 0; --iBand ) { /* * This loop processes scan lines in the input bitmap. As * we progress across the scan line, the output data is written * in column order. */ pbBase = pbIn; pbIn += cbLine; /* Next scan line, DWORD aligned */ pbOut = pbOutBase; pbOutBase+=3; /* One column across output area */ for( iWide = iCol; iWide > 0; --iWide ) { /* * This loop traverses the input scan line, taking bytes * and writing them to the output area in column order. * Since we are writing bytes three at a time iSkip * points to the end of the first RGB triplet in the line. * therefore we have to backup two bytes so pbOut will * point to the beginning of the RGB triplet. */ *pbOut++ = *pbBase++; *pbOut++ = *pbBase++; *pbOut = *pbBase++; pbOut += (iSkip-2); } } return; }