Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1731 lines
33 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. Mrcf.c
  5. Abstract:
  6. This module implements the Compress routines for the Double Space File System
  7. Author:
  8. Gary Kimura [GaryKi] 26-May-1993
  9. Revision History:
  10. --*/
  11. #include <ntos.h>
  12. #include <stdio.h>
  13. #include "mrcf.h"
  14. //
  15. // The debug macros
  16. //
  17. #ifdef MRCFDBG
  18. #define DbgDoit(X) {X;}
  19. #define ChPrint(b) (isprint(b) ? b : '.')
  20. #define DbgPrint printf
  21. #else
  22. #define DbgDoit(X) {NOTHING;}
  23. #endif // MRCFDBG
  24. //
  25. // Compress this much before each EOS
  26. //
  27. #define cbCOMPRESSCHUNK (512)
  28. //
  29. // Maximum back-pointer value, also used to indicate end of compressed stream!
  30. //
  31. #define wBACKPOINTERMAX (4415)
  32. //
  33. // bitsEND_OF_STREAM - bits that mark end of compressed stream (EOS)
  34. //
  35. // This pattern is used to indicate the end of a "chunk" in a compressed
  36. // data stream. The Compress code compresses up to 512 bytes, writes
  37. // this pattern, and continues.
  38. //
  39. // NOTE: This diagram is interpreted right to left.
  40. //
  41. // ? ---offset----
  42. //
  43. // ?.111-1111-1111-1.1.11
  44. //
  45. // \---7F---/ \----FF---/
  46. //
  47. // This is a 12-bit "match" code with a maximum offset.
  48. // NOTE: There is no length component!
  49. //
  50. // Define the EOS and also say how many bits it is.
  51. //
  52. #define bitsEND_OF_STREAM (0x7FFF)
  53. #define cbitsEND_OF_STREAM (15)
  54. //
  55. // MDSIGNATURE - Signature at start of each compressed block
  56. //
  57. // This 4-byte signature is used as a check to ensure that we
  58. // are decompressing data we compressed, and also to indicate
  59. // which compression method was used.
  60. //
  61. // NOTE: A compressed block consists of one or more "chunks", separated
  62. // by the bitsEND_OF_STREAM pattern.
  63. //
  64. // Byte Word
  65. // ----------- ---------
  66. // 0 1 2 3 0 1 Meaning
  67. // -- -- -- -- ---- ---- ----------------
  68. // 44 53 00 01 5344 0100 MaxCompression
  69. // 44 53 00 02 5344 0200 StandardCompression
  70. //
  71. // NOTE: The *WORD* values are listed to be clear about the
  72. // byte ordering!
  73. //
  74. typedef struct _MDSIGNATURE {
  75. //
  76. // Must be MD_STAMP
  77. //
  78. USHORT sigStamp;
  79. //
  80. // mdsSTANDARD or mdsMAX
  81. //
  82. USHORT sigType;
  83. } MDSIGNATURE;
  84. typedef MDSIGNATURE *PMDSIGNATURE;
  85. #define MD_STAMP 0x5344 // Signature stamp at start of compressed blk
  86. #define mdsSTANDARD 0x0200 // StandardCompressed block
  87. #define MASK_VALID_mds 0x0300 // All other bits must be zero
  88. //
  89. // Local procedure declarations and macros
  90. //
  91. #define min(a,b) (a < b ? a : b)
  92. //
  93. // PFNFINDMATCH - Lookup function type for XxxxCompression routines
  94. //
  95. typedef ULONG (*PFNFINDMATCH) (
  96. ULONG UncompressedIndex,
  97. PUCHAR UncompressedBuffer,
  98. ULONG UncompressedLength,
  99. PULONG MatchedStringIndex,
  100. PMRCF_STANDARD_COMPRESS WorkSpace
  101. );
  102. //
  103. // Local procedure prototypes
  104. //
  105. VOID
  106. MrcfSetBitBuffer (
  107. PUCHAR pb,
  108. ULONG cb,
  109. PMRCF_BIT_IO BitIo
  110. );
  111. VOID
  112. MrcfFillBitBuffer (
  113. PMRCF_BIT_IO BitIo
  114. );
  115. USHORT
  116. MrcfReadBit (
  117. PMRCF_BIT_IO BitIo
  118. );
  119. USHORT
  120. MrcfReadNBits (
  121. LONG cbits,
  122. PMRCF_BIT_IO BitIo
  123. );
  124. #ifdef DOUBLE_SPACE_WRITE
  125. ULONG
  126. MrcfDoCompress (
  127. PUCHAR CompressedBuffer,
  128. ULONG CompressedLength,
  129. PUCHAR UncompressedBuffer,
  130. ULONG UncompressedLength,
  131. PFNFINDMATCH FindMatchFunction,
  132. PMRCF_STANDARD_COMPRESS WorkSpace
  133. );
  134. ULONG
  135. MrcfCompressChunk (
  136. PUCHAR UncompressedBuffer,
  137. ULONG UncompressedIndex,
  138. ULONG UncompressedLength,
  139. PFNFINDMATCH FindMatchFunction,
  140. PMRCF_STANDARD_COMPRESS WorkSpace
  141. );
  142. ULONG
  143. MrcfFindMatchStandard (
  144. ULONG UncompressedIndex,
  145. PUCHAR UncompressedBuffer,
  146. ULONG UncompressedLength,
  147. PULONG MatchedStringIndex,
  148. PMRCF_STANDARD_COMPRESS WorkSpace
  149. );
  150. ULONG
  151. MrcfGetMatchLength (
  152. PUCHAR UncompressedBuffer,
  153. ULONG MatchIndex,
  154. ULONG CurrentIndex,
  155. ULONG UncompressedLength
  156. );
  157. BOOLEAN
  158. MrcfEncodeByte (
  159. UCHAR b,
  160. PMRCF_BIT_IO BitIo
  161. );
  162. BOOLEAN
  163. MrcfEncodeMatch (
  164. ULONG off,
  165. ULONG cb,
  166. PMRCF_BIT_IO BitIo
  167. );
  168. BOOLEAN
  169. MrcfWriteBit (
  170. ULONG bit,
  171. PMRCF_BIT_IO BitIo
  172. );
  173. BOOLEAN
  174. MrcfWriteNBits (
  175. ULONG abits,
  176. LONG cbits,
  177. PMRCF_BIT_IO BitIo
  178. );
  179. ULONG
  180. MrcfFlushBitBuffer (
  181. PMRCF_BIT_IO BitIo
  182. );
  183. #endif // DOUBLE_SPACE_WRITE
  184. //**** unconverted routines ****
  185. VOID
  186. MrcfDoInterMaxPairs (
  187. ULONG ibU,
  188. PUCHAR pbU,
  189. ULONG cbMatch,
  190. PVOID WorkSpace
  191. );
  192. ULONG
  193. MrcfDoMaxPairLookup (
  194. ULONG ibU,
  195. PUCHAR pbU,
  196. PVOID WorkSpace
  197. );
  198. ULONG
  199. MrcfFindMatchMax (
  200. ULONG ibU,
  201. PUCHAR pbU,
  202. ULONG cbU,
  203. PULONG piPrev,
  204. BOOLEAN fLast,
  205. PVOID WorkSpace
  206. );
  207. ULONG
  208. MrcfDecompress (
  209. PUCHAR UncompressedBuffer,
  210. ULONG UncompressedLength,
  211. PUCHAR CompressedBuffer,
  212. ULONG CompressedLength,
  213. PMRCF_DECOMPRESS WorkSpace
  214. )
  215. /*++
  216. Routine Description:
  217. This routine decompresses a buffer of StandardCompressed or MaxCompressed
  218. data.
  219. Arguments:
  220. UncompressedBuffer - buffer to receive uncompressed data
  221. UncompressedLength - length of UncompressedBuffer
  222. NOTE: UncompressedLength must be the EXACT length of the uncompressed
  223. data, as Decompress uses this information to detect
  224. when decompression is complete. If this value is
  225. incorrect, Decompress may crash!
  226. CompressedBuffer - buffer containing compressed data
  227. CompressedLength - length of CompressedBuffer
  228. WorkSpace - pointer to a private work area for use by this operation
  229. Return Value:
  230. ULONG - Returns the size of the decompressed data in bytes. Returns 0 if
  231. there was an error in the decompress.
  232. --*/
  233. {
  234. ULONG cbMatch; // Length of match string
  235. ULONG i; // Index in UncompressedBuffer to receive decoded data
  236. ULONG iMatch; // Index in UncompressedBuffer of matched string
  237. ULONG k; // Number of bits in length string
  238. ULONG off; // Offset from i in UncompressedBuffer of match string
  239. USHORT x; // Current bit being examined
  240. ULONG y;
  241. //
  242. // verify that compressed data starts with proper signature
  243. //
  244. if (CompressedLength < sizeof(MDSIGNATURE) || // Must have signature
  245. ((PMDSIGNATURE)CompressedBuffer)->sigStamp != MD_STAMP || // Stamp must be OK
  246. ((PMDSIGNATURE)CompressedBuffer)->sigType & (~MASK_VALID_mds)) { // Type must be OK
  247. return 0;
  248. }
  249. //
  250. // Skip over the valid signature
  251. //
  252. CompressedLength -= sizeof(MDSIGNATURE);
  253. CompressedBuffer += sizeof(MDSIGNATURE);
  254. //
  255. // Set up for decompress, start filling UncompressedBuffer at front
  256. //
  257. i = 0;
  258. //
  259. // Set statics to save parm passing
  260. //
  261. MrcfSetBitBuffer(CompressedBuffer,CompressedLength,&WorkSpace->BitIo);
  262. while (TRUE) {
  263. DbgDoit( DbgPrint("UncompressedOffset i = %3x ",i) );
  264. DbgDoit( DbgPrint("CompressedOffset = (%3x.%2x) ", WorkSpace->BitIo.cbBBInitial - WorkSpace->BitIo.cbBB, 16 - WorkSpace->BitIo.cbitsBB) );
  265. y = MrcfReadNBits(2,&WorkSpace->BitIo);
  266. //
  267. // Check if next 7 bits are a byte
  268. // 1 if 128..255 (0x80..0xff), 2 if 0..127 (0x00..0x7f)
  269. //
  270. if (y == 1 || y == 2) {
  271. ASSERTMSG("Don't exceed expected length ", i<UncompressedLength);
  272. UncompressedBuffer[i] = (UCHAR)((y == 1 ? 0x80 : 0) | MrcfReadNBits(7,&WorkSpace->BitIo));
  273. DbgDoit( DbgPrint("byte: %02x = '%c'\n",(USHORT)UncompressedBuffer[i],ChPrint(UncompressedBuffer[i])) );
  274. i++;
  275. } else {
  276. //
  277. // Have match sequence
  278. //
  279. DbgDoit( DbgPrint("offset(") );
  280. //
  281. // Get the offset
  282. //
  283. if (y == 0) {
  284. //
  285. // next 6 bits are offset
  286. //
  287. off = MrcfReadNBits(6,&WorkSpace->BitIo);
  288. DbgDoit( DbgPrint("%x): %x",6,off) );
  289. ASSERTMSG("offset 0 is invalid ", off != 0);
  290. } else {
  291. x = MrcfReadBit(&WorkSpace->BitIo);
  292. if (x == 0) {
  293. //
  294. // next 8 bits are offset-64 (0x40)
  295. //
  296. off = MrcfReadNBits(8, &WorkSpace->BitIo) + 64;
  297. DbgDoit( DbgPrint("%x): %x",8,off) );
  298. } else {
  299. //
  300. // next 12 bits are offset-320 (0x140)
  301. //
  302. off = MrcfReadNBits(12, &WorkSpace->BitIo) + 320;
  303. DbgDoit( DbgPrint("%x): %x",12,off) );
  304. if (off == wBACKPOINTERMAX) {
  305. //
  306. // EOS marker
  307. //
  308. DbgDoit( DbgPrint("; EOS\n") );
  309. if (i >= UncompressedLength) {
  310. //
  311. // Done with entire buffer
  312. //
  313. DbgDoit( DbgPrint("Uncompressed Length = %x\n",i) );
  314. return i;
  315. } else {
  316. //
  317. // More to do
  318. // Done with a 512-byte chunk
  319. //
  320. continue;
  321. }
  322. }
  323. }
  324. }
  325. ASSERTMSG("Don't exceed expected length ", i<UncompressedLength);
  326. ASSERTMSG("Cannot match before start of uncoded buffer! ", off <= i);
  327. //
  328. // Get the length - logarithmically encoded
  329. //
  330. for (k=0; (x=MrcfReadBit(&WorkSpace->BitIo)) == 0; k++) { NOTHING; }
  331. ASSERT(k <= 8);
  332. if (k == 0) {
  333. //
  334. // All matches at least 2 chars long
  335. //
  336. cbMatch = 2;
  337. } else {
  338. cbMatch = (1 << k) + 1 + MrcfReadNBits(k, &WorkSpace->BitIo);
  339. }
  340. DbgDoit( DbgPrint("; length=%x; '",cbMatch) );
  341. ASSERTMSG("Don't exceed buffer size ", (i - off + cbMatch - 1) <= UncompressedLength);
  342. //
  343. // Copy the matched string
  344. //
  345. iMatch = i - off;
  346. while ( (cbMatch > 0) && (i<UncompressedLength) ) {
  347. DbgDoit( DbgPrint("%c",ChPrint(UncompressedBuffer[iMatch])) );
  348. UncompressedBuffer[i++] = UncompressedBuffer[iMatch++];
  349. cbMatch--;
  350. }
  351. DbgDoit( DbgPrint("'\n") );
  352. ASSERTMSG("Should have copied it all ", cbMatch == 0);
  353. }
  354. }
  355. }
  356. //
  357. // Internal Support Routine
  358. //
  359. VOID
  360. MrcfSetBitBuffer (
  361. PUCHAR pb,
  362. ULONG cb,
  363. PMRCF_BIT_IO BitIo
  364. )
  365. /*++
  366. Routine Description:
  367. Set statics with coded buffer pointer and length
  368. Arguments:
  369. pb - pointer to compressed data buffer
  370. cb - length of compressed data buffer
  371. BitIo - Supplies a pointer to the bit buffer statics
  372. Return Value:
  373. None.
  374. --*/
  375. {
  376. BitIo->pbBB = pb;
  377. BitIo->cbBB = cb;
  378. BitIo->cbBBInitial = cb;
  379. BitIo->cbitsBB = 0;
  380. BitIo->abitsBB = 0;
  381. }
  382. //
  383. // Internal Support Routine
  384. //
  385. USHORT
  386. MrcfReadBit (
  387. PMRCF_BIT_IO BitIo
  388. )
  389. /*++
  390. Routine Description:
  391. Get next bit from bit buffer
  392. Arguments:
  393. BitIo - Supplies a pointer to the bit buffer statics
  394. Return Value:
  395. USHORT - Returns next bit (0 or 1)
  396. --*/
  397. {
  398. USHORT bit;
  399. //
  400. // Check if no bits available
  401. //
  402. if ((BitIo->cbitsBB) == 0) {
  403. MrcfFillBitBuffer(BitIo);
  404. }
  405. //
  406. // Decrement the bit count
  407. // get the bit, remove it, and return the bit
  408. //
  409. (BitIo->cbitsBB)--;
  410. bit = (BitIo->abitsBB) & 1;
  411. (BitIo->abitsBB) >>= 1;
  412. return bit;
  413. }
  414. //
  415. // Internal Support Routine
  416. //
  417. USHORT
  418. MrcfReadNBits (
  419. LONG cbits,
  420. PMRCF_BIT_IO BitIo
  421. )
  422. /*++
  423. Routine Description:
  424. Get next N bits from bit buffer
  425. Arguments:
  426. cbits - count of bits to get
  427. BitIo - Supplies a pointer to the bit buffer statics
  428. Return Value:
  429. USHORT - Returns next cbits bits.
  430. --*/
  431. {
  432. ULONG abits; // Bits to return
  433. LONG cbitsPart; // Partial count of bits
  434. ULONG cshift; // Shift count
  435. ULONG mask; // Mask
  436. //
  437. // Largest number of bits we should read at one time is 12 bits for
  438. // a 12-bit offset. The largest length field component that we
  439. // read is 8 bits. If this routine were used for some other purpose,
  440. // it can support up to 15 (NOT 16) bit reads, due to how the masking
  441. // code works.
  442. //
  443. ASSERT(cbits <= 12);
  444. //
  445. // No shift and no bits yet
  446. //
  447. cshift = 0;
  448. abits = 0;
  449. while (cbits > 0) {
  450. //
  451. // If not bits available get some bits
  452. //
  453. if ((BitIo->cbitsBB) == 0) {
  454. MrcfFillBitBuffer(BitIo);
  455. }
  456. //
  457. // Number of bits we can read
  458. //
  459. cbitsPart = min((BitIo->cbitsBB), cbits);
  460. //
  461. // Mask for bits we want, extract and store them
  462. //
  463. mask = (1 << cbitsPart) - 1;
  464. abits |= ((BitIo->abitsBB) & mask) << cshift;
  465. //
  466. // Remember the next chunk of bits
  467. //
  468. cshift = cbitsPart;
  469. //
  470. // Update bit buffer, move remaining bits down and
  471. // update count of bits left
  472. //
  473. (BitIo->abitsBB) >>= cbitsPart;
  474. (BitIo->cbitsBB) -= cbitsPart;
  475. //
  476. // Update count of bits left to read
  477. //
  478. cbits -= cbitsPart;
  479. }
  480. //
  481. // Return requested bits
  482. //
  483. return (USHORT)abits;
  484. }
  485. //
  486. // Internal Support Routine
  487. //
  488. VOID
  489. MrcfFillBitBuffer (
  490. PMRCF_BIT_IO BitIo
  491. )
  492. /*++
  493. Routine Description:
  494. Fill abitsBB from static bit buffer
  495. Arguments:
  496. BitIo - Supplies a pointer to the bit buffer statics
  497. Return Value:
  498. None.
  499. --*/
  500. {
  501. ASSERT((BitIo->cbitsBB) == 0);
  502. switch (BitIo->cbBB) {
  503. case 0:
  504. ASSERTMSG("no bits left in coded buffer!", FALSE);
  505. break;
  506. case 1:
  507. //
  508. // Get last byte and adjust count
  509. //
  510. BitIo->cbitsBB = 8;
  511. BitIo->abitsBB = *(BitIo->pbBB)++;
  512. BitIo->cbBB--;
  513. break;
  514. default:
  515. //
  516. // Get word and adjust count
  517. //
  518. BitIo->cbitsBB = 16;
  519. BitIo->abitsBB = *((USHORT *)(BitIo->pbBB))++;
  520. BitIo->cbBB -= 2;
  521. break;
  522. }
  523. }
  524. #ifdef DOUBLE_SPACE_WRITE
  525. ULONG
  526. MrcfStandardCompress (
  527. PUCHAR CompressedBuffer,
  528. ULONG CompressedLength,
  529. PUCHAR UncompressedBuffer,
  530. ULONG UncompressedLength,
  531. PMRCF_STANDARD_COMPRESS WorkSpace
  532. )
  533. /*++
  534. Routine Description:
  535. This routine compresses a buffer using the standard compression algorithm.
  536. Arguments:
  537. CompressedBuffer - buffer to receive compressed data
  538. CompressedLength - length of CompressedBuffer
  539. UncompressedBuffer - buffer containing uncompressed data
  540. UncompressedLength - length of UncompressedBuffer
  541. WorkSpace - pointer to a private work area for use by this operation
  542. Return Value:
  543. ULONG - Returns the size of the compressed data in bytes. Returns 0 if
  544. the data is not compressible
  545. --*/
  546. {
  547. ULONG i,j;
  548. //
  549. // Fill lookup tables with initial values
  550. //
  551. for (i=0; i<256; i++) {
  552. for (j = 0; j < cMAXSLOTS; j++) {
  553. WorkSpace->ltX[i][j] = ltUNUSED; // Mark offset look-up entries unused
  554. WorkSpace->abChar[i][j] = bRARE; // Mark match char entries unused
  555. }
  556. WorkSpace->abMRUX[i] = mruUNUSED; // MRU pointer = unused
  557. }
  558. //
  559. // Do compression, first set type and then do the compression
  560. //
  561. ((PMDSIGNATURE)CompressedBuffer)->sigType = mdsSTANDARD;
  562. return MrcfDoCompress( CompressedBuffer,
  563. CompressedLength,
  564. UncompressedBuffer,
  565. UncompressedLength,
  566. MrcfFindMatchStandard,
  567. WorkSpace );
  568. }
  569. //
  570. // Internal Support Routine
  571. //
  572. ULONG
  573. MrcfDoCompress (
  574. PUCHAR CompressedBuffer,
  575. ULONG CompressedLength,
  576. PUCHAR UncompressedBuffer,
  577. ULONG UncompressedLength,
  578. PFNFINDMATCH FindMatchFunction,
  579. PMRCF_STANDARD_COMPRESS WorkSpace
  580. )
  581. /*++
  582. Routine Description:
  583. This routine compresses a data buffer
  584. Arguments:
  585. CompressedBuffer - buffer to receive compressed data
  586. CompressedLength - length of CompressedBuffer
  587. UncompressedBuffer - buffer containing uncompressed data
  588. UncompressedLength - length of UncompressedBuffer
  589. FindMatchFunction - matching function
  590. WorkSpace - Supplies a pointer to the bit buffer statics
  591. Return Value:
  592. ULONG - Returns the size of the compressed data in bytes. Returns 0 if
  593. the data is not compressible
  594. --*/
  595. {
  596. ULONG cbDone; // Count of uncompressed bytes processed so far
  597. ULONG cb; // Count of bytes processed in a chunk
  598. ASSERT(CompressedLength >= UncompressedLength);
  599. //
  600. // Treat zero-length request specially as Not compressible
  601. //
  602. if (UncompressedLength == 0) { return 0; }
  603. //
  604. // Write signature to compressed data block
  605. //
  606. ((PMDSIGNATURE)CompressedBuffer)->sigStamp = MD_STAMP;
  607. CompressedLength -= sizeof(MDSIGNATURE);
  608. CompressedBuffer += sizeof(MDSIGNATURE);
  609. //
  610. // Set statics to save parm passing
  611. //
  612. MrcfSetBitBuffer(CompressedBuffer, CompressedLength, &WorkSpace->BitIo);
  613. //
  614. // Start with first chunk
  615. // and process entire buffer
  616. //
  617. cbDone = 0;
  618. while (cbDone < UncompressedLength) {
  619. //
  620. // Compress a chunk
  621. //
  622. cb = MrcfCompressChunk( UncompressedBuffer,
  623. cbDone,
  624. UncompressedLength,
  625. FindMatchFunction,
  626. WorkSpace );
  627. //
  628. // Check if we could not compress, i.e., Not compressible
  629. //
  630. if (cb == 0) { return 0; }
  631. cbDone += cb;
  632. if (FALSE) { //**** if (WorkSpace->fMaxCmp) {
  633. //
  634. // MAXCMP check
  635. //
  636. if ((cbDone < UncompressedLength) && (WorkSpace->BitIo.cbBB < 586)) { return 0; }
  637. } else {
  638. //
  639. // RCOMP check
  640. //
  641. //**** if (WorkSpace->BitIo.cbBB <= 586) { return 0; }
  642. }
  643. }
  644. ASSERT(cbDone == UncompressedLength);
  645. //
  646. // Make sure we saved some space
  647. //
  648. cb = sizeof(MDSIGNATURE) + MrcfFlushBitBuffer( &WorkSpace->BitIo );
  649. if (TRUE) { //**** if (!WorkSpace->fMaxCmp) {
  650. if (cb+8 >= UncompressedLength) { return 0; }
  651. }
  652. if (cb < UncompressedLength) {
  653. return cb;
  654. } else {
  655. return 0;
  656. }
  657. }
  658. //
  659. // Internal Support Routine
  660. //
  661. ULONG
  662. MrcfCompressChunk (
  663. PUCHAR UncompressedBuffer,
  664. ULONG UncompressedIndex,
  665. ULONG UncompressedLength,
  666. PFNFINDMATCH FindMatchFunction,
  667. PMRCF_STANDARD_COMPRESS WorkSpace
  668. )
  669. /*++
  670. Routine Description:
  671. This routine compresses a chunk of uncompressed data
  672. Arguments:
  673. UncompressedBuffer - buffer containing uncompressed data
  674. UncompressedIndex - index in UncompressedBuffer to start compressing (0 => first byte)
  675. UncompressedLength - length of UncompressedBuffer
  676. FindMatchFunction - matching function
  677. WorkSpace - Supplies a pointer to the bit buffer statics
  678. Return Value:
  679. ULONG - Returns the non-zero count of uncompressed bytes processed.
  680. Returns 0 if the data is not compressible
  681. --*/
  682. {
  683. UCHAR b1; // First byte of pair
  684. UCHAR b2; // Second byte of pair
  685. ULONG cbChunk; // Count of bytes in chunk to compress
  686. ULONG cbMatch; // Count of bytes matched
  687. ULONG cbUChunk; // Phony buffer length, for compressing this chunk
  688. BOOLEAN fLast; // TRUE if this is the last chunk
  689. ULONG i; // Index in byte stream being compressed
  690. ULONG iPrev; // Previous table entry
  691. ASSERT(UncompressedLength > 0);
  692. ASSERT(UncompressedBuffer != 0);
  693. ASSERT(UncompressedIndex < UncompressedLength);
  694. //
  695. // Only compress one chunk
  696. //
  697. cbChunk = min((UncompressedLength-UncompressedIndex),cbCOMPRESSCHUNK);
  698. //
  699. // Limit to chunk length
  700. //
  701. cbUChunk = UncompressedIndex + cbChunk;
  702. ASSERT(cbUChunk <= UncompressedLength);
  703. //
  704. // TRUE if last chunk of buffer
  705. //
  706. fLast = (cbUChunk == UncompressedLength);
  707. //
  708. // Limit to chunk length
  709. //
  710. UncompressedLength = cbUChunk;
  711. //
  712. // Scan each pair of bytes
  713. //
  714. //
  715. // First byte of input
  716. //
  717. b2 = UncompressedBuffer[UncompressedIndex];
  718. //
  719. // Process all bytes in chunk
  720. //
  721. for (i=UncompressedIndex+1; i<UncompressedLength; ) {
  722. //
  723. // Set Last byte, Next byte, and find a match
  724. //
  725. b1 = b2;
  726. b2 = UncompressedBuffer[i];
  727. cbMatch = (*FindMatchFunction)(i,UncompressedBuffer,UncompressedLength,&iPrev,WorkSpace);
  728. //
  729. // Check if we got match
  730. //
  731. if (cbMatch >= 2) {
  732. DbgDoit( DbgPrint("<Match>: '%c%c' at offset %x for length %x\n",
  733. ChPrint(UncompressedBuffer[i-1]),ChPrint(UncompressedBuffer[i]),i-1, cbMatch) );
  734. //
  735. // Pass offset and length, and check for failure (i.e., data incompressible)
  736. //
  737. if (!MrcfEncodeMatch(i-iPrev,cbMatch,&WorkSpace->BitIo)) {
  738. return 0;
  739. }
  740. //
  741. // Now we have to continue with the first pair of bytes
  742. // after the string we matched: mmmmmmm12
  743. //
  744. //
  745. // i is index of 2nd byte after match!
  746. //
  747. i += cbMatch;
  748. //
  749. // Check if at least 1 byte still to compress, if so
  750. // get 1st byte after match, for loop, otherwise put out EOS
  751. //
  752. if (i <= UncompressedLength) {
  753. b2 = UncompressedBuffer[i-1];
  754. } else {
  755. goto WriteEOS;
  756. }
  757. } else {
  758. //
  759. // No match found, Store one byte and continue, and check
  760. // for failure (i.e., data incompressible)
  761. //
  762. if (!MrcfEncodeByte(b1,&WorkSpace->BitIo)) {
  763. return 0;
  764. }
  765. //
  766. // Advance to next byte
  767. //
  768. i++;
  769. }
  770. }
  771. //
  772. // Store last byte, and again check for failure
  773. //
  774. if (!MrcfEncodeByte(b2,&WorkSpace->BitIo)) {
  775. return 0;
  776. }
  777. WriteEOS:
  778. //
  779. // write out EOS, and check for failure otherwise return how much
  780. // data we processed
  781. //
  782. if (!MrcfWriteNBits( bitsEND_OF_STREAM,
  783. cbitsEND_OF_STREAM,
  784. &WorkSpace->BitIo )) {
  785. return 0;
  786. } else {
  787. return cbChunk;
  788. }
  789. }
  790. //
  791. // Internal Support Routine
  792. //
  793. ULONG
  794. MrcfFindMatchStandard (
  795. ULONG UncompressedIndex,
  796. PUCHAR UncompressedBuffer,
  797. ULONG UncompressedLength,
  798. PULONG MatchedStringIndex,
  799. PMRCF_STANDARD_COMPRESS WorkSpace
  800. )
  801. /*++
  802. Routine Description:
  803. This routine does a standard compression lookup
  804. Arguments:
  805. UncompressedIndex - index into UncompressedBuffer[] of *2nd* byte of pair to match
  806. UncompressedBuffer - buffer containing uncompressed data
  807. UncompressedLength - length of UncompressedBuffer
  808. MatchedStringIndex - pointer to int to receive index of start of matched string
  809. WorkSpace - Supplies a pointer to the bit buffer statics
  810. Return Value:
  811. ULONG - Returns length of match. If the return value is >= 2 then
  812. *MatchedStringIndex = index of matched string (*2nd* byte in pair).
  813. Otherwise the match length is 0 or 1
  814. --*/
  815. {
  816. ULONG i;
  817. ULONG iMRU;
  818. ULONG iChar;
  819. ULONG iPrev;
  820. //
  821. // Are there exactly two bytes left? If so then do not check for match.
  822. //
  823. if (UncompressedIndex == (UncompressedLength-1)) { return 0; }
  824. //
  825. // 1st char is index to look-up tables
  826. //
  827. iChar = UncompressedBuffer[UncompressedIndex-1];
  828. //
  829. // Can't match if 1st MRU ent is unused
  830. //
  831. if (WorkSpace->abMRUX[iChar] != mruUNUSED) {
  832. for (i = 0; i < cMAXSLOTS; i++) {
  833. if (WorkSpace->abChar[iChar][i] == UncompressedBuffer[UncompressedIndex]) {
  834. iPrev = WorkSpace->ltX[iChar][i];
  835. WorkSpace->ltX[iChar][i] = UncompressedIndex;
  836. if ((UncompressedIndex - iPrev) >= wBACKPOINTERMAX) { return 0; }
  837. *MatchedStringIndex = iPrev;
  838. return MrcfGetMatchLength( UncompressedBuffer,
  839. iPrev,
  840. UncompressedIndex,
  841. UncompressedLength );
  842. }
  843. }
  844. }
  845. //
  846. // Cycle MRU index for char
  847. // Update char match table
  848. // Location of this char pair
  849. //
  850. iMRU = (WorkSpace->abMRUX[iChar] += 1) & (cMAXSLOTS - 1);
  851. WorkSpace->abChar[iChar][iMRU] = UncompressedBuffer[UncompressedIndex];
  852. WorkSpace->ltX[iChar][iMRU] = UncompressedIndex;
  853. return 0;
  854. }
  855. //
  856. // Internal Support Routine
  857. //
  858. ULONG
  859. MrcfGetMatchLength (
  860. PUCHAR UncompressedBuffer,
  861. ULONG MatchIndex,
  862. ULONG CurrentIndex,
  863. ULONG UncompressedLength
  864. )
  865. /*++
  866. Routine Description:
  867. Find length of matching strings
  868. Arguments:
  869. UncompressedBuffer - uncompressed data buffer
  870. MatchIndex - index of 2nd byte in UncompressedBuffer of match (MatchIndex < CurrentIndex)
  871. CurrentIndex - index of 2nd byte in UncompressedBuffer that is being compressed
  872. UncompressedLength - length of UncompressedBuffer
  873. Return Value:
  874. ULONG - Returns length of matching strings (0, or 2 or greater)
  875. --*/
  876. {
  877. ULONG cb;
  878. ASSERT(MatchIndex >= 0);
  879. ASSERT(MatchIndex < CurrentIndex);
  880. ASSERT(CurrentIndex < UncompressedLength);
  881. //
  882. // Point back to start of both strings
  883. //
  884. MatchIndex--;
  885. CurrentIndex--;
  886. //
  887. // No bytes matched, yet
  888. //
  889. cb = 0;
  890. //
  891. // Scan for end of match, or end of buffer
  892. //
  893. while ((CurrentIndex<UncompressedLength) && (UncompressedBuffer[MatchIndex] == UncompressedBuffer[CurrentIndex])) {
  894. MatchIndex++;
  895. CurrentIndex++;
  896. cb++;
  897. }
  898. return cb;
  899. }
  900. //
  901. // Internal Support Routine
  902. //
  903. BOOLEAN
  904. MrcfEncodeByte (
  905. UCHAR b,
  906. PMRCF_BIT_IO BitIo
  907. )
  908. /*++
  909. Routine Description:
  910. Write one byte to compressed bit stream
  911. Arguments:
  912. b - byte to write
  913. BitIo - Supplies a pointer to the bit buffer statics
  914. Return Value:
  915. BOOLEAN - TRUE if the bit stream was updated and FALSE if overran buffer
  916. --*/
  917. {
  918. ULONG abits;
  919. DbgDoit( DbgPrint("<MrcfEncodeByte>: byte=%02x '%c'\n",b,ChPrint(b)) );
  920. abits = ((b & 0x7F) << 2) | ((b < 128) ? 2 : 1);
  921. //
  922. // Write to bitstream
  923. //
  924. return MrcfWriteNBits(abits, 9, BitIo);
  925. }
  926. //
  927. // Internal Support Routine
  928. //
  929. BOOLEAN
  930. MrcfEncodeMatch (
  931. ULONG off,
  932. ULONG cb,
  933. PMRCF_BIT_IO BitIo
  934. )
  935. /*++
  936. Routine Description:
  937. Write a match to compressed bit stream
  938. Arguments:
  939. off - offset of match (must be greater than 0)
  940. cb - length of match (must be at least 2)
  941. BitIo - Supplies a pointer to the bit buffer statics
  942. Return Value:
  943. BOOLEAN - TRUE if the compress stream was updated and FALSE if overran buffer
  944. --*/
  945. {
  946. ULONG abits;
  947. ULONG cbits;
  948. ULONG cbSave;
  949. ULONG mask;
  950. ASSERT(off > 0);
  951. ASSERT(off < wBACKPOINTERMAX);
  952. ASSERT(cb >= 2);
  953. DbgDoit( DbgPrint("<MrcfEncodeMatch>: off=%x len=%x\n",off,cb) );
  954. //
  955. // Encode the match bits and offset portion
  956. //
  957. if (off < 64) {
  958. //
  959. // Use 6-bit offset encoding
  960. //
  961. abits = (off << 2) | 0x0; // .00 = <offset>+<6-bit>+<match>
  962. if (!MrcfWriteNBits(abits,6+2,BitIo)) {
  963. //
  964. // Opps overran the compression buffer
  965. //
  966. return FALSE;
  967. }
  968. } else if (off < 320) {
  969. //
  970. // Use 8-bit offset encoding
  971. //
  972. abits = ((off - 64) << 3) | 0x3; // 0.11 = <offset>+<8-bit>+<match>
  973. if (!MrcfWriteNBits(abits,8+3,BitIo)) {
  974. //
  975. // Opps overran the compression buffer
  976. //
  977. return FALSE;
  978. }
  979. } else { // (off >= 320)
  980. //
  981. // Use 12-bit offset encoding
  982. //
  983. abits = ((off - 320) << 3) | 0x7; // 1.11 = <offset>+<12-bit>+<match>
  984. if (!MrcfWriteNBits(abits,12+3,BitIo)) {
  985. //
  986. // Opps overran the compression buffer
  987. //
  988. return FALSE;
  989. }
  990. }
  991. //
  992. // Encode the length logarithmically
  993. //
  994. cb -= 1;
  995. cbSave = cb; // Save to get remainder later
  996. cbits = 0;
  997. while (cb > 1) {
  998. cbits++;
  999. //
  1000. // Put out another 0 for the length, and
  1001. // watch for buffer overflow
  1002. //
  1003. if (!MrcfWriteBit(0, BitIo)) {
  1004. return FALSE;
  1005. }
  1006. //
  1007. // Shift count right (avoid sign bit)
  1008. //
  1009. ((USHORT)cb) >>= 1;
  1010. }
  1011. //
  1012. // Terminate length bit string
  1013. //
  1014. if (!MrcfWriteBit(1, BitIo)) {
  1015. return FALSE;
  1016. }
  1017. if (cbits > 0) {
  1018. //
  1019. // Mask for bits we want, and get remainder
  1020. //
  1021. mask = (1 << cbits) - 1;
  1022. abits = cbSave & mask;
  1023. if (!MrcfWriteNBits(abits,cbits,BitIo)) {
  1024. return FALSE;
  1025. }
  1026. }
  1027. return TRUE;
  1028. }
  1029. //
  1030. // Internal Support Routine
  1031. //
  1032. BOOLEAN
  1033. MrcfWriteBit (
  1034. ULONG bit,
  1035. PMRCF_BIT_IO BitIo
  1036. )
  1037. /*++
  1038. Routine Description:
  1039. Write a bit to the bit buffer
  1040. Arguments:
  1041. bit - bit to write (0 or 1)
  1042. BitIo - Supplies a pointer to the bit buffer statics
  1043. Return Value:
  1044. BOOLEAN - returns TRUE if the compresed bit stream was updated and
  1045. FALSE if overran buffer.
  1046. --*/
  1047. {
  1048. ASSERT((bit == 0) || (bit == 1));
  1049. ASSERTMSG("Must be room for at least one bit ", (BitIo->cbitsBB) < 16);
  1050. DbgDoit( DbgPrint("<MrcfWriteBit>: bit=%x\n",bit) );
  1051. //
  1052. // Write one bit
  1053. //
  1054. (BitIo->abitsBB) |= bit << (BitIo->cbitsBB);
  1055. (BitIo->cbitsBB)++;
  1056. //
  1057. // Check if abitsBB is full and write compressed data buffer
  1058. //
  1059. if ((BitIo->cbitsBB) >= 16) {
  1060. return (MrcfFlushBitBuffer(BitIo) != 0);
  1061. } else {
  1062. return TRUE;
  1063. }
  1064. }
  1065. //
  1066. // Internal Support Routine
  1067. //
  1068. BOOLEAN
  1069. MrcfWriteNBits (
  1070. ULONG abits,
  1071. LONG cbits,
  1072. PMRCF_BIT_IO BitIo
  1073. )
  1074. /*++
  1075. Routine Description:
  1076. Write N bits to the bit buffer
  1077. Arguments:
  1078. abits - bits to write
  1079. cbits - count of bits write
  1080. BitIo - Supplies a pointer to the bit buffer statics
  1081. Return Value:
  1082. BOOLEAN - returns TRUE if the compressed bit stream was updated and
  1083. FALSE if overran buffer
  1084. --*/
  1085. {
  1086. LONG cbitsPart;
  1087. ULONG mask;
  1088. ASSERT(cbits > 0);
  1089. ASSERT(cbits <= 16);
  1090. ASSERTMSG("Must be room for at least one bit ", (BitIo->cbitsBB) < 16);
  1091. DbgDoit( DbgPrint("<MrcfWriteNBits>: bits=%04x count=%x\n",abits,cbits) );
  1092. while (cbits > 0) {
  1093. //
  1094. // Number of bits we can write
  1095. //
  1096. cbitsPart = min(16-(BitIo->cbitsBB), cbits);
  1097. mask = (1 << cbitsPart) - 1;
  1098. //
  1099. // Move part of bits to buffer
  1100. //
  1101. (BitIo->abitsBB) |= (abits & mask) << (BitIo->cbitsBB);
  1102. //
  1103. // Update count of bits written
  1104. //
  1105. (BitIo->cbitsBB) += cbitsPart;
  1106. //
  1107. // Check if buffer if full
  1108. //
  1109. if ((BitIo->cbitsBB) >= 16) {
  1110. //
  1111. // Write compressed data buffer
  1112. //
  1113. if (!MrcfFlushBitBuffer(BitIo)) {
  1114. return FALSE;
  1115. }
  1116. }
  1117. //
  1118. // Reduce number of bits left to write and move remaining bits over
  1119. //
  1120. cbits -= cbitsPart;
  1121. abits >>= cbitsPart;
  1122. }
  1123. return TRUE;
  1124. }
  1125. //
  1126. // Internal Support Routine
  1127. //
  1128. ULONG
  1129. MrcfFlushBitBuffer (
  1130. PMRCF_BIT_IO BitIo
  1131. )
  1132. /*++
  1133. Routine Description:
  1134. Write remaining bits to compressed data buffer
  1135. Arguments:
  1136. BitIo - Supplies a pointer to the bit buffer statics
  1137. Return Value:
  1138. ULONG - Returns total count of bytes written to the compressed data
  1139. buffer since the last call to MrcfSetBitBuffer(). Returns 0 if
  1140. overran buffer
  1141. --*/
  1142. {
  1143. ASSERT((BitIo->cbitsBB) >= 0);
  1144. ASSERT((BitIo->cbitsBB) <= 16);
  1145. //
  1146. // Move bits to the compressed data buffer
  1147. //
  1148. while ((BitIo->cbitsBB) > 0) {
  1149. //
  1150. // Process low and high half.
  1151. // Check if output buffer is out of room
  1152. //
  1153. if ((BitIo->cbBB) == 0) { return 0; }
  1154. //
  1155. // Store a byte, adjust the count, get high half, nd adjust
  1156. // count of bits remaining
  1157. //
  1158. *(BitIo->pbBB)++ = (UCHAR)((BitIo->abitsBB) & 0xFF);
  1159. (BitIo->cbBB)--;
  1160. (BitIo->abitsBB) >>= 8;
  1161. (BitIo->cbitsBB) -= 8;
  1162. }
  1163. //
  1164. // Reset bit buffer, "abitsBB >>= 8" guarantees abitsBB is clear
  1165. //
  1166. ASSERT((BitIo->abitsBB) == 0);
  1167. (BitIo->cbitsBB) = 0;
  1168. return (BitIo->cbBBInitial)-(BitIo->cbBB);
  1169. }
  1170. #endif // DOUBLE_SPACE_WRITE