Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1218 lines
39 KiB

  1. /*++
  2. Copyright (c) 1996 - 1999 Microsoft Corporation
  3. Module Name:
  4. raster.c
  5. Abstract:
  6. The module contains the functions associated with transposing bitmaps.
  7. This includes rotation of 1, 4, 8, and 24 bit formats as well as special
  8. transformations of color formats for planar of vertical head devices.
  9. Environment:
  10. Windows NT Unidrv driver
  11. Revision History:
  12. 12/15/96 -alvins-
  13. Created
  14. --*/
  15. #include "raster.h"
  16. #include "rmrender.h"
  17. /*
  18. * The transpose table: maps one byte into two longs, such that the
  19. * 8 bits of the byte turn into 64 bits: each bit of the original is
  20. * turned into one byte of output.
  21. * THUS:
  22. * Input byte: hgfedcba
  23. * transposes into output bytes:
  24. * 0000000a 0000000b 0000000c 0000000d
  25. * 0000000e 0000000f 0000000g 0000000h
  26. *
  27. * The table is allocated at DrvEnableSurface time, thus ensuring that
  28. * we do not allocate memory that we are not going to use.
  29. */
  30. #define TABLE_SIZE (256 * 2 * sizeof( DWORD ))
  31. /*
  32. * We also need a similar table for colour separation. This one
  33. * consists of 256 DWORDs, and is used to split the RGB(K) format
  34. * input byte into an output DWORD with the two R bits in one byte,
  35. * the two G bits in the next byte etc. Used for single pin colour
  36. * printers, like the HP PaintJet.
  37. * The table is generated according to the following rule:
  38. *
  39. * INPUT BYTE: KRGBkrgb
  40. *
  41. * OUTPUT DWORD: 000000Kk 000000Rr 000000Gg 000000Bb
  42. */
  43. #define SEP_TABLE_SIZE (256 * sizeof( DWORD ))
  44. //*******************************************************
  45. BOOL
  46. bInitTrans (
  47. PDEV *pPDev
  48. )
  49. /*++
  50. Routine Description:
  51. This function initializes the transpose tables. This is done to make
  52. the table independent of whether the processor is big endian or little
  53. endian since the data is generated by the processor that is going
  54. to use it!
  55. Arguments:
  56. pPDev Pointer to PDEV structure
  57. Return Value:
  58. TRUE for success and FALSE for failure (MemAlloc failure)
  59. --*/
  60. {
  61. /*
  62. * Function to generate the transposition table. There is nothing
  63. * difficult about generating the table. The only trick is the use
  64. * of the union. This allows us to setup a DWORD table with the
  65. * byte ordering of the hardware on which we are running. This is
  66. * achieved by writing the data into the BYTE entry, then using
  67. * the same memory as a DWORD to be put away into memory. The reason
  68. * for using DWORDS is to get maximum benefit from memory references
  69. * in the inner loop of the transpose functions.
  70. * Note that the 8/24 bits per pel case is special, as we are shuffling
  71. * bytes around, and thus do not need any tables. For this case,
  72. * return TRUE without allocating any storage.
  73. */
  74. register DWORD *pdw;
  75. register int iShift, j;
  76. int i;
  77. PRASTERPDEV pRPDev = pPDev->pRasterPDEV;
  78. union
  79. {
  80. BYTE b[ 8 ]; /* Exactly 64 bits */
  81. DWORD dw[ 2 ]; /* Also exactly 64 bits */
  82. } u;
  83. if( pRPDev->sDevBPP == 8 || pRPDev->sDevBPP == 24)
  84. {
  85. pRPDev->pdwTrans = NULL;
  86. return TRUE; /* Byte operations - no table needed */
  87. }
  88. if( !(pRPDev->pdwTrans = (DWORD *)MemAlloc( TABLE_SIZE )) )
  89. return FALSE;
  90. pdw = pRPDev->pdwTrans; /* Speedier access */
  91. /*
  92. * Colour requires different tables, as the pixel data consists of
  93. * 4 bits which need to move in a single group.
  94. */
  95. if( pRPDev->fDump & RES_DM_COLOR )
  96. {
  97. /*
  98. * First generate the landscape to portrait transpose data.
  99. * The only complication is maintaining 4 bit nibbles as a single
  100. * entity.
  101. */
  102. u.dw[0] = 0;
  103. for (iShift = 0; iShift < 256; iShift++)
  104. {
  105. u.b[1] = (BYTE)((iShift >> 4) & 0x0f);
  106. u.b[3] = (BYTE)(iShift & 0x0f);
  107. *pdw = u.dw[0];
  108. *(pdw+1) = u.dw[0] << 4;
  109. pdw += 2;
  110. }
  111. /*
  112. * There is an additional transpose operation that requires
  113. * 4 bit pixel data be transformed to another format.
  114. */
  115. pRPDev->pdwColrSep = (DWORD *)MemAlloc( (pRPDev->fDump & RES_DM_GDI) ?
  116. SEP_TABLE_SIZE : TABLE_SIZE );
  117. if( pRPDev->pdwColrSep == NULL )
  118. {
  119. MemFree((LPSTR)pRPDev->pdwTrans );
  120. pRPDev->pdwTrans = 0;
  121. return FALSE;
  122. }
  123. pdw = pRPDev->pdwColrSep; /* Speedier access */
  124. if( pRPDev->fDump & RES_DM_GDI )
  125. {
  126. /*
  127. * HP Paintjet type devices require separating the RGB pixels
  128. * (2 per colour per byte) into bytes where the two bits for
  129. * each color are consecutive.
  130. */
  131. for( i = 0; i <= 0xff; i++ )
  132. {
  133. u.dw[ 0 ] = 0;
  134. iShift = i;
  135. if (!(pRPDev->fColorFormat & DC_OEM_BLACK))
  136. {
  137. //
  138. // if required combine the RGB to CMY(K) conversion
  139. //
  140. if ( !(pRPDev->fColorFormat & DC_PRIMARY_RGB))
  141. {
  142. iShift = (~iShift) & 0x77;
  143. if (pRPDev->fColorFormat & DC_EXTRACT_BLK)
  144. {
  145. if( (iShift & 0x07) == 0x07 )
  146. iShift = (iShift & ~0x07) | 0x08;
  147. if( (iShift & 0x70) == 0x70 )
  148. iShift = (iShift & ~0x70) | 0x80;
  149. }
  150. }
  151. else
  152. iShift &= 0x77;
  153. }
  154. /* The two bits Bb */
  155. u.b[ 3 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
  156. iShift >>= 1;
  157. /* The two bits Gg */
  158. u.b[ 2 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
  159. iShift >>= 1;
  160. /* The two bits Rr */
  161. u.b[ 1 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
  162. iShift >>= 1;
  163. /* The two bits Kk */
  164. u.b[ 0 ] = (BYTE)(((iShift >> 3) & 0x02) | (iShift & 0x1));
  165. *pdw++ = u.dw[ 0 ]; /* Safe for posterity */
  166. }
  167. }
  168. else
  169. {
  170. /*
  171. * The dot matrix case. Here we will call the relevant
  172. * transpose function, but use the modified table below. This
  173. * table will do the colour separation, and will result in the
  174. * transpose operation splitting up the data for each head pass.
  175. */
  176. for( i = 0; i <= 0xff; i++ )
  177. {
  178. /* Each bit of i goes into one byte of the output */
  179. u.dw[ 0 ] = 0;
  180. u.dw[ 1 ] = 0;
  181. iShift = i;
  182. if (!(pRPDev->fColorFormat & DC_OEM_BLACK))
  183. {
  184. //
  185. // if required combine the RGB to CMY(K) conversion
  186. //
  187. if ( !(pRPDev->fColorFormat & DC_PRIMARY_RGB))
  188. {
  189. iShift = (~iShift) & 0x77;
  190. if (pRPDev->fColorFormat & DC_EXTRACT_BLK)
  191. {
  192. if( (iShift & 0x07) == 0x07 )
  193. iShift = (iShift & ~0x07) | 0x08;
  194. if( (iShift & 0x70) == 0x70 )
  195. iShift = (iShift & ~0x70) | 0x80;
  196. }
  197. }
  198. else
  199. iShift &= 0x77;
  200. }
  201. for( j = 8; --j >= 0; )
  202. {
  203. u.b[ j ] = (BYTE)(iShift & 0x1);
  204. iShift >>= 1;
  205. }
  206. /* Store the result */
  207. *pdw = u.dw[0];
  208. *(pdw+1) = u.dw[1];
  209. pdw += 2;
  210. }
  211. }
  212. }
  213. else
  214. {
  215. /*
  216. * Monochrome case - simple transpositions.
  217. */
  218. for( i = 0; i <= 0xff; i++ )
  219. {
  220. /* Each bit of i goes into one byte of the output */
  221. iShift = i;
  222. u.dw[ 0 ] = 0;
  223. u.dw[ 1 ] = 0;
  224. for( j = 8; --j >= 0; )
  225. {
  226. u.b[ j ] = (BYTE)(iShift & 0x1);
  227. iShift >>= 1;
  228. }
  229. /* Store the result */
  230. *pdw = u.dw[0];
  231. *(pdw+1) = u.dw[1];
  232. pdw += 2;
  233. }
  234. }
  235. return TRUE;
  236. }
  237. //*******************************************************
  238. void
  239. vTrans8x8 (
  240. BYTE *pbIn,
  241. RENDER *pRData
  242. )
  243. /*++
  244. Routine Description:
  245. Function to transpose the input array into the output array,
  246. where the input data is to be considered 8 rows of bitmap data,
  247. and the output area is dword aligned.
  248. Arguments:
  249. pbIn Pointer to input data buffer to transform
  250. pRData Pointer to render structure containing all the
  251. necessary information about transforming
  252. Return Value:
  253. none
  254. --*/
  255. {
  256. /*
  257. * The technique is quite simple, though not necessarily obvious.
  258. * Take an 8 scan line by 8 bits block of data, and transform it
  259. * into 8 bytes with bits in the scan line order, rather than
  260. * along the scan line as supplied.
  261. * To do this as quickly as possible, each byte to be converted
  262. * is used as an index into a lookup table; each table entry is
  263. * 64 bits long (a pair of longs above). These 64 bits are ORed
  264. * with the running total of 64 bits (the two variables, dw0, dw1);
  265. * shift the running total one bit left. Repeat this operation
  266. * for the corresponding byte in the next scan line - this is
  267. * the new table lookup index. Repeat for all 8 bytes in the 8
  268. * scan lines being processed. Store the 64 bit temporary results
  269. * in the output dword array. Move to the next byte in the
  270. * scan line, and repeat the loop for this column.
  271. */
  272. register DWORD dw0, dw1; /* Inner loop temporaries */
  273. register BYTE *pbTemp;
  274. register DWORD *pdw;
  275. register int cbLine; /* Bytes per line in scan data */
  276. register int i; /* Loop variable. */
  277. int iWide; /* Pixels across the bitmap */
  278. DWORD *pdwOut; /* Destination */
  279. DWORD *pdwTrans; /* Local copy of output buffer */
  280. /*
  281. * Some initialisation: byte count, area limits, etc.
  282. */
  283. cbLine = pRData->cbTLine;
  284. pdwOut = pRData->pvTransBuf;
  285. pdwTrans = pRData->Trans.pdwTransTab;
  286. if( pRData->iTransHigh != 8 )
  287. {
  288. /* This can happen at the end of a page. */
  289. vTrans8N( pbIn, pRData );
  290. return;
  291. }
  292. /*
  293. * Scan across the lines in groups of 8 bits. In the case that the
  294. * input is not a multiple of 8, we will produce a few extra
  295. * bytes at the end; the caller should allow for this when allocating
  296. * storage for pdwOut. The consequence is that the last few
  297. * bytes will contain garbage; presumably the caller will not
  298. * process them further.
  299. */
  300. for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
  301. {
  302. dw0 = 0;
  303. dw1 = 0;
  304. /*
  305. * Loop DOWN the scanlines at the starting byte location,
  306. * generating the transposed data as we go.
  307. */
  308. for( i = BBITS, pbTemp = pbIn++; --i >= 0; pbTemp += cbLine )
  309. {
  310. dw0 <<= 1;
  311. dw1 <<= 1;
  312. pdw = pdwTrans + (*pbTemp << 1);
  313. dw0 |= *pdw;
  314. dw1 |= *(pdw + 1);
  315. }
  316. /* Store the two temporary values in the output buffer. */
  317. *pdwOut = dw0;
  318. *(pdwOut + 1) = dw1;
  319. pdwOut += 2;
  320. }
  321. return;
  322. }
  323. //*******************************************************
  324. void
  325. vTrans8N (
  326. BYTE *pbIn,
  327. RENDER *pRData
  328. )
  329. /*++
  330. Routine Description:
  331. Function to transpose the input array into the output array,
  332. where the input data is to be considered N rows of bitmap data,
  333. and the output area is byte aligned.
  334. Arguments:
  335. pbIn Pointer to input data buffer to transform
  336. pRData Pointer to render structure containing all the
  337. necessary information about transforming
  338. Return Value:
  339. none
  340. --*/
  341. {
  342. /*
  343. * The technique is quite simple, though not necessarily obvious.
  344. * Take an 8 scan line by 8 bits block of data, and transform it
  345. * into 8 bytes with bits in the scan line order, rather than
  346. * along the scan line as supplied.
  347. * To do this as quickly as possible, each byte to be converted
  348. * is used as an index into a lookup table; each table entry is
  349. * 64 bits long (a pair of longs above). These 64 bits are ORed
  350. * with the running total of 64 bits (the two variables, dw0, dw1);
  351. * shift the running total one bit left. Repeat this operation
  352. * for the corresponding byte in the next scan line - this is
  353. * the new table lookup index. Repeat for all 8 bytes in the 8
  354. * scan lines being processed. Store the 64 bit temporary results
  355. * in the output dword array. Move to the next byte in the
  356. * scan line, and repeat the loop for this column.
  357. * This function is based on the special 8 X 8 case (vTrans8x8).
  358. * The significant differences are that the transposed data needs
  359. * to be written byte at a time (instead of DWORD at a time),
  360. * and that there are N scan lines to convert in each loop.
  361. */
  362. DWORD dw0, dw1; /* Inner loop temporaries */
  363. BYTE *pbTemp;
  364. DWORD *pdw;
  365. int cbLine; /* Bytes per line in scan data */
  366. int i; /* Loop variable. */
  367. int iBand; /* For moving down the scan lines */
  368. int iSkip; /* Output interleave factor */
  369. int iWide; /* Pixels across the bitmap */
  370. BYTE *pbOut; /* Destination, local copy */
  371. BYTE *pbBase; /* Start addr of 8 scan line group */
  372. BYTE *pbOutTmp; /* For output loop */
  373. DWORD *pdwTrans; /* Speedier access */
  374. BOOL bOptimize = FALSE;
  375. /*
  376. * Set up the local variables from the RENDER structure passed in.
  377. */
  378. cbLine = pRData->cbTLine;
  379. iSkip = pRData->iTransSkip;
  380. pbOut = pRData->pvTransBuf; /* Reserved for us! */
  381. pdwTrans = pRData->Trans.pdwTransTab;
  382. // if the translation table isn't inverting bits and the rows are DWORD aligned
  383. // we can optimize the algorithm by initializing everything to white
  384. // and then skipping the rotation of 32x8 white areas.
  385. //
  386. if (pdwTrans[0] == 0 && !(cbLine & 3) && pRData->iPassHigh == 1)
  387. {
  388. bOptimize = TRUE;
  389. FillMemory (pbOut, pRData->iTransWide * iSkip, 0xff);
  390. }
  391. /*
  392. * To ease MMU thrashing, we scan ACROSS the bitmap in 8 line
  393. * groups. This results in closer memory references, and so less
  394. * page faults and so faster execution. Hence, the outer most loop
  395. * loops DOWN the scanlines. The next inner loop scans across groups
  396. * of 8 scan lines at a time, while the inner most loop transposes
  397. * one byte by 8 scan lines of bitmap image.
  398. * Note that processing the data this way causes a slight increase
  399. * in scattered memory addresses when writing the output data.
  400. * There is no way to avoid one or the other memory references being
  401. * scattered; however, the output area is smaller than the input
  402. * input, so scattering here will be less severe to the MMU.
  403. */
  404. for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS )
  405. {
  406. /*
  407. * Have selected the next group of 8 scan lines to process,
  408. * so scan from left to right, transposing data in 8 x 8 bit
  409. * groups. This is the size that can be done very quickly with
  410. * a 32 bit environment.
  411. */
  412. pbBase = pbIn;
  413. pbIn += BBITS * cbLine; /* Next address */
  414. pbOutTmp = pbOut;
  415. ++pbOut; /* Onto the next byte sequence */
  416. for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
  417. {
  418. //
  419. // White space optimization. If input data is white then
  420. // we don't have to output rotated data since the
  421. // output buffer is already initialized to white.
  422. //
  423. if (bOptimize)
  424. {
  425. // test whether we are dword aligned so we can check
  426. // 32x8 area for white
  427. if (!((ULONG_PTR)pbBase & 3) && iWide >= DWBITS)
  428. {
  429. if (*(DWORD *)&pbBase[0] == -1 &&
  430. *(DWORD *)&pbBase[cbLine] == -1 &&
  431. *(DWORD *)&pbBase[cbLine*2] == -1 &&
  432. *(DWORD *)&pbBase[cbLine*3] == -1 &&
  433. *(DWORD *)&pbBase[cbLine*4] == -1 &&
  434. *(DWORD *)&pbBase[cbLine*5] == -1 &&
  435. *(DWORD *)&pbBase[cbLine*6] == -1 &&
  436. *(DWORD *)&pbBase[cbLine*7] == -1)
  437. {
  438. pbBase += 4;
  439. iWide -= BBITS * 3;
  440. pbOutTmp += iSkip * DWBITS;
  441. continue;
  442. }
  443. }
  444. // check 8x8 area for white
  445. else
  446. {
  447. if (pbBase[0] == (BYTE)-1 &&
  448. pbBase[cbLine] == (BYTE)-1 &&
  449. pbBase[cbLine*2] == (BYTE)-1 &&
  450. pbBase[cbLine*3] == (BYTE)-1 &&
  451. pbBase[cbLine*4] == (BYTE)-1 &&
  452. pbBase[cbLine*5] == (BYTE)-1 &&
  453. pbBase[cbLine*6] == (BYTE)-1 &&
  454. pbBase[cbLine*7] == (BYTE)-1)
  455. {
  456. pbBase++;
  457. pbOutTmp += iSkip * BBITS;
  458. continue;
  459. }
  460. }
  461. }
  462. /*
  463. * Process the bitmap byte at a time moving across, and
  464. * 8 scan lines high. This corresponds to transposing an
  465. * 8 x 8 bit array. We can do that quickly.
  466. */
  467. pbTemp = pbBase++;
  468. dw0 = 0;
  469. dw1 = 0;
  470. for( i = BBITS; --i >= 0; pbTemp += cbLine )
  471. {
  472. /* The INNER loop - the bit swapping operations */
  473. dw0 <<= 1;
  474. dw1 <<= 1;
  475. pdw = pdwTrans + (*pbTemp << 1);
  476. dw0 |= *pdw;
  477. dw1 |= *(pdw + 1);
  478. }
  479. /* Store the two temporary values in the output buffer. */
  480. *pbOutTmp = (BYTE)dw0;
  481. pbOutTmp += iSkip;
  482. dw0 >>= BBITS; /* One byte's worth */
  483. *pbOutTmp = (BYTE)dw0;
  484. pbOutTmp += iSkip;
  485. dw0 >>= BBITS;
  486. *pbOutTmp = (BYTE)dw0;
  487. pbOutTmp += iSkip;
  488. dw0 >>= BBITS;
  489. *pbOutTmp = (BYTE)dw0;
  490. pbOutTmp += iSkip;
  491. *pbOutTmp = (BYTE)dw1;
  492. pbOutTmp += iSkip;
  493. dw1 >>= BBITS;
  494. *pbOutTmp = (BYTE)dw1;
  495. pbOutTmp += iSkip;
  496. dw1 >>= BBITS;
  497. *pbOutTmp = (BYTE)dw1;
  498. pbOutTmp += iSkip;
  499. dw1 >>= BBITS;
  500. *pbOutTmp = (BYTE)dw1;
  501. pbOutTmp += iSkip; /* Next chunk of output data */
  502. }
  503. }
  504. /*
  505. * There may be some scan lines remaining. If so, iBand will
  506. * be > 0, and that indicates the number of output scan lines
  507. * remaining.
  508. */
  509. if( iBand > 0 )
  510. {
  511. /*
  512. * This is basically the same as the stripped down version
  513. * in the outer loop above. Note that the output data is still
  514. * byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE
  515. * ZERO FILLED. This may not be what is desired - it is for
  516. * transposing bits to output to a dot matrix printer where
  517. * the page length is not a multiple of the number of pins.
  518. * I don't know if that can ever happen.
  519. */
  520. pbBase = pbIn;
  521. pbOutTmp = pbOut;
  522. for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
  523. {
  524. /*
  525. * Process the bitmap byte at a time moving across, and
  526. * 8 scan lines high. This corresponds to transposing an
  527. * 8 x 8 bit array. We can do that quickly.
  528. */
  529. dw0 = 0;
  530. dw1 = 0;
  531. pbTemp = pbBase++;
  532. /*
  533. * The inner loop now only transposes as many scan lines
  534. * as the bitmap actually contains - we must not run off
  535. * the end of memory.
  536. */
  537. for( i = iBand; --i >= 0; pbTemp += cbLine )
  538. {
  539. /* The INNER loop - the bit swapping operations */
  540. dw0 <<= 1;
  541. dw1 <<= 1;
  542. pdw = pdwTrans + (*pbTemp << 1);
  543. dw0 |= *pdw;
  544. dw1 |= *(pdw + 1);
  545. }
  546. // white fill remaining bits
  547. //
  548. pdw = pdwTrans + (pRData->ubFillWhite << 1);
  549. i = BBITS - iBand;
  550. while (--i >= 0)
  551. {
  552. dw0 <<= 1;
  553. dw1 <<= 1;
  554. dw0 |= *pdw;
  555. dw1 |= *(pdw + 1);
  556. }
  557. /* Store the two temporary values in the output buffer. */
  558. pbTemp = pbOutTmp;
  559. *pbTemp = (BYTE)dw0;
  560. pbTemp += iSkip;
  561. dw0 >>= BBITS; /* One byte's worth */
  562. *pbTemp = (BYTE)dw0;
  563. pbTemp += iSkip;
  564. dw0 >>= BBITS;
  565. *pbTemp = (BYTE)dw0;
  566. pbTemp += iSkip;
  567. dw0 >>= BBITS;
  568. *pbTemp = (BYTE)dw0;
  569. pbTemp += iSkip;
  570. *pbTemp = (BYTE)dw1;
  571. pbTemp += iSkip;
  572. dw1 >>= BBITS;
  573. *pbTemp = (BYTE)dw1;
  574. pbTemp += iSkip;
  575. dw1 >>= BBITS;
  576. *pbTemp = (BYTE)dw1;
  577. pbTemp += iSkip;
  578. dw1 >>= BBITS;
  579. *pbTemp = (BYTE)dw1;
  580. pbOutTmp += BBITS * iSkip; /* Next chunk of output data */
  581. }
  582. }
  583. return;
  584. }
  585. /*
  586. * Define the number of pels transposed per loop iteration. In the case
  587. * of a colour bitmap, this is 2, since there are 4 bits per pel, thus
  588. * 2 per byte.
  589. */
  590. #define PELS_PER_LOOP (BBITS / 4)
  591. //*******************************************************
  592. void
  593. vTrans8N4BPP (
  594. BYTE *pbIn,
  595. RENDER *pRData
  596. )
  597. /*++
  598. Routine Description:
  599. Function to transpose the input array into the output array,
  600. where the input data is to be considered N rows of bitmap data,
  601. and the output area is byte aligned.
  602. This version works on 4 bits per pel bitmaps (colour for us).
  603. Arguments:
  604. pbIn Pointer to input data buffer to transform
  605. pRData Pointer to render structure containing all the
  606. necessary information about transforming
  607. Return Value:
  608. none
  609. --*/
  610. {
  611. /*
  612. * The technique is quite simple, though not necessarily obvious.
  613. * Take an 8 scan line by 8 bits block of data, and transform it
  614. * into 8 bytes with bits in the scan line order, rather than
  615. * along the scan line as supplied.
  616. * To do this as quickly as possible, each byte to be converted
  617. * is used as an index into a lookup table; each table entry is
  618. * 64 bits long (a pair of longs above). These 64 bits are ORed
  619. * with the running total of 64 bits (the two variables, dw0, dw1);
  620. * shift the running total one bit left. Repeat this operation
  621. * for the corresponding byte in the next scan line - this is
  622. * the new table lookup index. Repeat for all 8 bytes in the 8
  623. * scan lines being processed. Store the 64 bit temporary results
  624. * in the output dword array. Move to the next byte in the
  625. * scan line, and repeat the loop for this column.
  626. * This function is based on the special 8 X 8 case (vTrans8x8).
  627. * The significant differences are that the transposed data needs
  628. * to be written byte at a time (instead of DWORD at a time),
  629. * and that there are N scan lines to convert in each loop.
  630. */
  631. register DWORD dw0, dw1; /* Inner loop temporaries */
  632. register BYTE *pbTemp;
  633. register DWORD *pdw;
  634. register int cbLine; /* Bytes per line in scan data */
  635. register int i; /* Loop variable. */
  636. register int iBand; /* For moving down the scan lines */
  637. int iSkip; /* Output interleave factor */
  638. int iWide; /* Pixels across the bitmap */
  639. DWORD *pdwOut; /* Destination, local copy */
  640. BYTE *pbBase; /* Start addr of 8 scan line group */
  641. DWORD *pdwOutTmp; /* For output loop */
  642. DWORD *pdwTrans; /* Speedier access */
  643. /*
  644. * Set up the local variables from the RENDER structure passed in.
  645. * See the above function for explanation of iSkip.
  646. */
  647. cbLine = pRData->cbTLine;
  648. iSkip = pRData->iTransSkip / DWBYTES;
  649. pdwOut = pRData->pvTransBuf; /* Reserved for us! */
  650. pdwTrans = pRData->Trans.pdwTransTab;
  651. /*
  652. * To ease MMU thrashing, we scan ACROSS the bitmap in 8 line
  653. * groups. This results in closer memory references, and so less
  654. * page faults and faster execution. Hence, the outer most loop
  655. * loops DOWN the scanlines. Then next inner loop scans across groups
  656. * of 8 scan lines at a time, while the inner most loop transposes
  657. * one byte by 8 scan lines of bitmap image.
  658. * Note that processing the data this way causes a slight increase
  659. * in scattered memory addresses when writing the output data.
  660. * There is no way to avoid one or the other memory references being
  661. * scattered; however, the output area is smaller than the input
  662. * input, so scattering here will be less severe on the MMU.
  663. */
  664. for( iBand = pRData->iTransHigh; iBand >= BBITS; iBand -= BBITS )
  665. {
  666. /*
  667. * Have selected the next group of 8 scan lines to process,
  668. * so scan from left to right, transposing data in 8 x 8 bit
  669. * groups. This is the size that can be done very quickly with
  670. * a 32 bit environment.
  671. */
  672. pbBase = pbIn;
  673. pbIn += BBITS * cbLine; /* Next address */
  674. pdwOutTmp = pdwOut;
  675. ++pdwOut; /* Onto the next byte sequence */
  676. for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
  677. {
  678. /*
  679. * Process the bitmap byte at a time moving across, and
  680. * 8 scan lines high. This corresponds to transposing an
  681. * 8 x 8 pixel array. We can do that quickly.
  682. */
  683. pbTemp = pbBase++;
  684. dw0 = *(pdwTrans + 1 + (*pbTemp << 1));
  685. pbTemp += cbLine;
  686. dw0 |= *(pdwTrans + (*pbTemp << 1));
  687. pbTemp += cbLine;
  688. dw0 >>= 8;
  689. dw0 |= *(pdwTrans + 1 + (*pbTemp << 1));
  690. pbTemp += cbLine;
  691. dw0 |= *(pdwTrans + (*pbTemp << 1));
  692. pbTemp += cbLine;
  693. dw1 = *(pdwTrans + 1 + (*pbTemp << 1));
  694. pbTemp += cbLine;
  695. dw1 |= *(pdwTrans + (*pbTemp << 1));
  696. pbTemp += cbLine;
  697. dw1 >>= 8;
  698. dw1 |= *(pdwTrans + 1 + (*pbTemp << 1));
  699. pbTemp += cbLine;
  700. dw1 |= *(pdwTrans + (*pbTemp << 1));
  701. *(WORD *)pdwOutTmp = (WORD)dw0;
  702. *(((WORD *)pdwOutTmp)+1) = (WORD)dw1;
  703. *(pdwOutTmp+iSkip) = (dw1 & 0xffff0000) | (dw0 >> 16);
  704. pdwOutTmp += PELS_PER_LOOP * iSkip; /* Next chunk of output data */
  705. }
  706. }
  707. /*
  708. * There may be some scan lines remaining. If so, iBand will
  709. * be > 0, and that indicates the number of output scan lines
  710. * remaining.
  711. */
  712. if( iBand > 0 )
  713. {
  714. /*
  715. * This is basically the same as the stripped down version
  716. * in the outer loop above. Note that the output data is still
  717. * byte aligned, IT IS PRESUMED THAT THE 'MISSING' LINES ARE
  718. * ZERO FILLED. This may not be what is desired - it is for
  719. * transposing bits to output to a dot matrix printer where
  720. * the page length is not a multiple of the number of pins.
  721. * I don't know if that can ever happen.
  722. */
  723. pbBase = pbIn;
  724. pdwOutTmp = pdwOut;
  725. for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
  726. {
  727. /*
  728. * Process the bitmap byte at a time moving across, and
  729. * 8 scan lines high. This corresponds to transposing an
  730. * 8 x 8 bit array. We can do that quickly.
  731. */
  732. pbTemp = pbBase++;
  733. dw0 = *(pdwTrans + 1 + (*pbTemp << 1));
  734. dw1 = 0;
  735. if (iBand > 1)
  736. {
  737. pbTemp += cbLine;
  738. dw0 |= *(pdwTrans + (*pbTemp << 1));
  739. dw0 >>= 8;
  740. if (iBand > 2)
  741. {
  742. pbTemp += cbLine;
  743. dw0 |= *(pdwTrans + 1 + (*pbTemp << 1));
  744. if (iBand > 3)
  745. {
  746. pbTemp += cbLine;
  747. dw0 |= *(pdwTrans + (*pbTemp << 1));
  748. if (iBand > 4)
  749. {
  750. pbTemp += cbLine;
  751. dw1 = *(pdwTrans + 1 + (*pbTemp << 1));
  752. if (iBand > 5)
  753. {
  754. pbTemp += cbLine;
  755. dw1 |= *(pdwTrans + (*pbTemp << 1));
  756. dw1 >>= 8;
  757. if (iBand > 6)
  758. {
  759. pbTemp += cbLine;
  760. dw1 |= *(pdwTrans + 1 + (*pbTemp << 1));
  761. }
  762. }
  763. else
  764. dw1 >>= 8;
  765. }
  766. }
  767. }
  768. }
  769. else
  770. dw0 >>= 8;
  771. *(WORD *)pdwOutTmp = (WORD)dw0;
  772. *(((WORD *)pdwOutTmp)+1) = (WORD)dw1;
  773. *(pdwOutTmp+iSkip) = (dw1 & 0xffff0000) | (dw0 >> 16);
  774. pdwOutTmp += 2 * iSkip; /* Next chunk of output data */
  775. }
  776. }
  777. return;
  778. }
  779. //*******************************************************
  780. void
  781. vTransColSep (
  782. register BYTE *pbIn,
  783. RENDER *pRData
  784. )
  785. /*++
  786. Routine Description:
  787. Function to transpose the colour bits in a 4 Bits Per Pel colour
  788. bitmap into an array of bytes, where the bytes are ordered in
  789. the same way as the original bits. An example of this is provided
  790. in the explanation for the SEP_TABLE_SIZE value at the top of this file.
  791. Arguments:
  792. pbIn Pointer to input data buffer to transform
  793. pRData Pointer to render structure containing all the
  794. necessary information about transforming
  795. Return Value:
  796. none
  797. --*/
  798. {
  799. /*
  800. * Operation is quite simple - pass along the input array byte
  801. * at a time, and use each 4 byte group to generate a DWORD of
  802. * output - placed in pdwOut. The previously generated translation
  803. * table is especially formulated to do this job!
  804. *
  805. * NOTE: pdwOut and pbIn MAY POINT TO THE SAME ADDRESS! THERE IS
  806. * NO OVERLAP IN OPERATIONS TO CAUSE CONFUSION.
  807. */
  808. register DWORD dwTemp;
  809. register DWORD *pdwSep;
  810. int iI;
  811. int iBlock;
  812. DWORD *pdwOut; /* Destination - DWORD aligned */
  813. DWORD dwWhite;
  814. iBlock = pRData->cDWLine * pRData->iNumScans;
  815. pdwSep = pRData->pdwColrSep; /* Colour separation table */
  816. pdwOut = pRData->pvTransBuf; /* Where the data goes */
  817. /* Loop through the line in 4 byte groups */
  818. //
  819. // calculate the white conversion value
  820. //
  821. dwWhite = *(pdwSep + 0x77);
  822. dwWhite |= (dwWhite << 2) | (dwWhite << 4) | (dwWhite << 6);
  823. //
  824. // convert the data to planar including RGB to CMY(K)
  825. //
  826. for (iI = iBlock; --iI >= 0;)
  827. {
  828. if (*(DWORD *)pbIn == 0x77777777L)
  829. {
  830. *pdwOut++ = dwWhite;
  831. }
  832. else
  833. {
  834. dwTemp = *(pdwSep + *pbIn);
  835. dwTemp <<= 2;
  836. dwTemp |= *(pdwSep + pbIn[1]);
  837. dwTemp <<= 2;
  838. dwTemp |= *(pdwSep + pbIn[2]);
  839. *pdwOut++ = (dwTemp << 2) | *(pdwSep + pbIn[3]);
  840. }
  841. pbIn += DWBYTES;
  842. }
  843. return;
  844. }
  845. //*******************************************************
  846. void
  847. vTrans8BPP (
  848. BYTE *pbIn,
  849. RENDER *pRData
  850. )
  851. /*++
  852. Routine Description:
  853. The transpose function for 8 bits per pel bitmaps. This is rather
  854. easy, as all we do is shuffle bytes!
  855. Arguments:
  856. pbIn Pointer to input data buffer to transform
  857. pRData Pointer to render structure containing all the
  858. necessary information about transforming
  859. Return Value:
  860. none
  861. --*/
  862. {
  863. /*
  864. * Scan along the input bitmap, writing the data to the output
  865. * in column order. This results in reduced MMU thrashing, as
  866. * the output addresses are all limited to a much smaller range
  867. * than the incoming addresses.
  868. */
  869. register BYTE *pbBase; /* Scan along input bitmap */
  870. register BYTE *pbOut; /* The output scan column pointer */
  871. int iBand; /* Count down scan lines */
  872. int iSkip; /* Offset between output bytes */
  873. int iWide; /* Loop across the input scan line */
  874. int cbLine; /* Bytes per input scan line */
  875. BYTE *pbOutBase; /* Start of column of output data */
  876. /*
  877. * Set up the local copies (for faster access) of data passed in.
  878. */
  879. cbLine = pRData->cbTLine;
  880. iSkip = pRData->iTransSkip;
  881. pbOutBase = pRData->pvTransBuf; /* Base output buffer address */
  882. for( iBand = pRData->iTransHigh; iBand > 0; --iBand )
  883. {
  884. /*
  885. * This loop processes scan lines in the input bitmap. As
  886. * we progress across the scan line, the output data is written
  887. * in column order.
  888. */
  889. pbBase = pbIn;
  890. pbIn += cbLine; /* Next scan line, DWORD aligned */
  891. pbOut = pbOutBase;
  892. ++pbOutBase; /* One column across output area */
  893. for( iWide = pRData->iTransWide; iWide > 0; iWide -= BBITS )
  894. {
  895. /*
  896. * This loop traverses the input scan line, taking bytes
  897. * and writing them to the output area in column order.
  898. */
  899. *pbOut = *pbBase++;
  900. pbOut += iSkip;
  901. }
  902. }
  903. return;
  904. }
  905. //*******************************************************
  906. void
  907. vTrans24BPP (
  908. BYTE *pbIn,
  909. RENDER *pRData
  910. )
  911. /*++
  912. Routine Description:
  913. The transpose function for 8 bits per pel bitmaps. This is rather
  914. easy, as all we do is shuffle bytes!
  915. Arguments:
  916. pbIn Pointer to input data buffer to transform
  917. pRData Pointer to render structure containing all the
  918. necessary information about transforming
  919. Return Value:
  920. none
  921. --*/
  922. {
  923. /*
  924. * Scan along the input bitmap, writing the data to the output
  925. * in column order. This results in reduced MMU thrashing, as
  926. * the output addresses are all limited to a much smaller range
  927. * than the incoming addresses.
  928. */
  929. register BYTE *pbBase; /* Scan along input bitmap */
  930. register BYTE *pbOut; /* The output scan column pointer */
  931. int iBand; /* Count down scan lines */
  932. int iSkip; /* Offset between output bytes */
  933. int iWide; /* Loop across the input scan line */
  934. int iCol;
  935. int iRow;
  936. int cbLine; /* Bytes per input scan line */
  937. int iBytesLeftOver;
  938. BYTE *pbOutBase; /* Start of column of output data */
  939. /*
  940. * Set up the local copies (for faster access) of data passed in.
  941. */
  942. iSkip = pRData->iTransSkip;
  943. cbLine = pRData->cbTLine;
  944. pbOutBase = pRData->pvTransBuf; /* Base output buffer address */
  945. iCol = pRData->iTransWide/pRData->iBPP;
  946. iRow = pRData->iTransHigh;
  947. iBytesLeftOver = (pRData->iTransHigh *pRData->iBPP) % DWBITS;
  948. for( iBand = iRow; iBand > 0; --iBand )
  949. {
  950. /*
  951. * This loop processes scan lines in the input bitmap. As
  952. * we progress across the scan line, the output data is written
  953. * in column order.
  954. */
  955. pbBase = pbIn;
  956. pbIn += cbLine; /* Next scan line, DWORD aligned */
  957. pbOut = pbOutBase;
  958. pbOutBase+=3; /* One column across output area */
  959. for( iWide = iCol; iWide > 0; --iWide )
  960. {
  961. /*
  962. * This loop traverses the input scan line, taking bytes
  963. * and writing them to the output area in column order.
  964. */
  965. *pbOut = *pbBase++;
  966. *(pbOut+1) = *pbBase++;
  967. *(pbOut+2) = *pbBase++;
  968. pbOut += iSkip;
  969. }
  970. }
  971. return;
  972. }