Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

182 lines
5.5 KiB

  1. /*
  2. ** lzcomp.c - Routines used in Lempel-Ziv compression (a la 1977 article).
  3. **
  4. ** Author: DavidDi
  5. */
  6. // Headers
  7. ///////////
  8. #include "lz_common.h"
  9. #include "lz_buffers.h"
  10. #include "lz_header.h"
  11. #include "lzcommon.h"
  12. /*
  13. ** int LZEncode(int doshSource, int doshDest);
  14. **
  15. ** Compress input file into output file.
  16. **
  17. ** Arguments: doshSource - open DOS file handle of input file
  18. ** doshDest - open DOS file handle of output file
  19. **
  20. ** Returns: int - TRUE if compression was successful. One of the LZERROR_
  21. ** codes if the compression failed.
  22. **
  23. ** Globals:
  24. */
  25. INT LZEncode(INT doshSource, INT doshDest, PLZINFO pLZI)
  26. {
  27. INT i, len, f,
  28. iCurChar, // current ring buffer position
  29. iCurString, // start of current string in ring buffer
  30. iCodeBuf, // index of next open buffer position
  31. cbLastMatch; // length of last match
  32. BYTE byte, // temporary storage for next byte to write
  33. byteMask, // bit mask (and counter) for eight code units
  34. codeBuf[1 + 8 * MAX_LITERAL_LEN]; // temporary storage for encoded data
  35. #if 0
  36. pLZI->cbMaxMatchLen = LZ_MAX_MATCH_LEN;
  37. #else
  38. pLZI->cbMaxMatchLen = FIRST_MAX_MATCH_LEN;
  39. #endif
  40. ResetBuffers();
  41. pLZI->cblOutSize += HEADER_LEN;
  42. // Initialize encoding trees.
  43. if (!LZInitTree(pLZI)) {
  44. return( LZERROR_GLOBALLOC );
  45. }
  46. // CodeBuf[1..16] saves eight units of code, and CodeBuf[0] works as eight
  47. // flags. '1' representing that the unit is an unencoded letter (1 byte),
  48. // '0' a position-and-length pair (2 bytes). Thus, eight units require at
  49. // most 16 bytes of code, plus the one byte of flags.
  50. codeBuf[0] = (BYTE)0;
  51. byteMask = (BYTE)1;
  52. iCodeBuf = 1;
  53. iCurString = 0;
  54. iCurChar = RING_BUF_LEN - pLZI->cbMaxMatchLen;
  55. for (i = 0; i < RING_BUF_LEN - pLZI->cbMaxMatchLen; i++)
  56. pLZI->rgbyteRingBuf[i] = BUF_CLEAR_BYTE;
  57. // Read bytes into the last cbMaxMatchLen bytes of the buffer.
  58. for (len = 0; len < pLZI->cbMaxMatchLen && ((f = ReadByte(byte)) != END_OF_INPUT);
  59. len++)
  60. {
  61. if (f != TRUE) {
  62. return( f );
  63. }
  64. pLZI->rgbyteRingBuf[iCurChar + len] = byte;
  65. }
  66. // Insert the cbMaxMatchLen strings, each of which begins with one or more
  67. // 'space' characters. Note the order in which these strings are inserted.
  68. // This way, degenerate trees will be less likely to occur.
  69. for (i = 1; i <= pLZI->cbMaxMatchLen; i++)
  70. LZInsertNode(iCurChar - i, FALSE, pLZI);
  71. // Finally, insert the whole string just read. The global variables
  72. // cbCurMatch and iCurMatch are set.
  73. LZInsertNode(iCurChar, FALSE, pLZI);
  74. do // while (len > 0)
  75. {
  76. // cbCurMatch may be spuriously long near the end of text.
  77. if (pLZI->cbCurMatch > len)
  78. pLZI->cbCurMatch = len;
  79. if (pLZI->cbCurMatch <= MAX_LITERAL_LEN)
  80. {
  81. // This match isn't long enough to encode, so copy it directly.
  82. pLZI->cbCurMatch = 1;
  83. // Set 'one uncoded byte' bit flag.
  84. codeBuf[0] |= byteMask;
  85. // Write literal byte.
  86. codeBuf[iCodeBuf++] = pLZI->rgbyteRingBuf[iCurChar];
  87. }
  88. else
  89. {
  90. // This match is long enough to encode. Send its position and
  91. // length pair. N.b., pLZI->cbCurMatch > MAX_LITERAL_LEN.
  92. codeBuf[iCodeBuf++] = (BYTE)pLZI->iCurMatch;
  93. codeBuf[iCodeBuf++] = (BYTE)((pLZI->iCurMatch >> 4 & 0xf0) |
  94. (pLZI->cbCurMatch - (MAX_LITERAL_LEN + 1)));
  95. }
  96. // Shift mask left one bit.
  97. if ((byteMask <<= 1) == (BYTE)0)
  98. {
  99. // Send at most 8 units of code together.
  100. for (i = 0; i < iCodeBuf; i++)
  101. if ((f = WriteByte(codeBuf[i])) != TRUE) {
  102. return( f );
  103. }
  104. // Reset flags and mask.
  105. codeBuf[0] = (BYTE)0;
  106. byteMask = (BYTE)1;
  107. iCodeBuf = 1;
  108. }
  109. cbLastMatch = pLZI->cbCurMatch;
  110. for (i = 0; i < cbLastMatch && ((f = ReadByte(byte)) != END_OF_INPUT);
  111. i++)
  112. {
  113. if (f != TRUE) {
  114. return( f );
  115. }
  116. // Delete old string.
  117. LZDeleteNode(iCurString, pLZI);
  118. pLZI->rgbyteRingBuf[iCurString] = byte;
  119. // If the start position is near the end of buffer, extend the
  120. // buffer to make string comparison easier.
  121. if (iCurString < pLZI->cbMaxMatchLen - 1)
  122. pLZI->rgbyteRingBuf[iCurString + RING_BUF_LEN] = byte;
  123. // Increment position in ring buffer modulo RING_BUF_LEN.
  124. iCurString = (iCurString + 1) & (RING_BUF_LEN - 1);
  125. iCurChar = (iCurChar + 1) & (RING_BUF_LEN - 1);
  126. // Register the string in rgbyteRingBuf[r..r + cbMaxMatchLen - 1].
  127. LZInsertNode(iCurChar, FALSE, pLZI);
  128. }
  129. while (i++ < cbLastMatch)
  130. {
  131. // No need to read after the end of the input, but the buffer may
  132. // not be empty.
  133. LZDeleteNode(iCurString, pLZI);
  134. iCurString = (iCurString + 1) & (RING_BUF_LEN - 1);
  135. iCurChar = (iCurChar + 1) & (RING_BUF_LEN - 1);
  136. if (--len)
  137. LZInsertNode(iCurChar, FALSE, pLZI);
  138. }
  139. } while (len > 0); // until there is no input to process
  140. if (iCodeBuf > 1)
  141. // Send remaining code.
  142. for (i = 0; i < iCodeBuf; i++)
  143. if ((f = WriteByte(codeBuf[i])) != TRUE) {
  144. return( f );
  145. }
  146. // Flush output buffer to file.
  147. if ((f = FlushOutputBuffer(doshDest, pLZI)) != TRUE) {
  148. return( f );
  149. }
  150. LZFreeTree(pLZI);
  151. return(TRUE);
  152. }