Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

422 lines
11 KiB

  1. /*
  2. * encdata.c
  3. *
  4. * Encode a block into the output stream
  5. */
  6. #include "encoder.h"
  7. #define OUT_CHAR \
  8. c = context->enc_LitData[l]; \
  9. OUTPUT_BITS(context->enc_main_tree_len[c], context->enc_main_tree_code[c]);
  10. /*
  11. * Macro to output bits into the encoding stream
  12. */
  13. #define OUTPUT_BITS(N,X) \
  14. { \
  15. context->enc_bitbuf |= (((ulong) (X)) << (context->enc_bitcount-(N))); \
  16. context->enc_bitcount -= (N); \
  17. while (context->enc_bitcount <= 16) \
  18. { \
  19. if (context->enc_output_buffer_curpos >= context->enc_output_buffer_end) \
  20. { \
  21. context->enc_output_overflow = true; \
  22. context->enc_output_buffer_curpos = context->enc_output_buffer_start; \
  23. } \
  24. *context->enc_output_buffer_curpos++ = (byte) ((context->enc_bitbuf >> 16) & 255); \
  25. *context->enc_output_buffer_curpos++ = (byte) (context->enc_bitbuf >> 24); \
  26. context->enc_bitbuf <<= 16; \
  27. context->enc_bitcount += 16; \
  28. } \
  29. }
  30. /*
  31. * Given the initial state of the repeated offset buffers at
  32. * the beginning of this block, calculate the final state of the
  33. * repeated offset buffers after outputting this block as if it
  34. * were compressed data.
  35. *
  36. * First try to do it the quick way, by starting at the last
  37. * match and working backwards, to find three consecutive matches
  38. * which don't use repeated offsets. If this fails, we'll have
  39. * to take the initial state of the three offsets at the beginning
  40. * of the block, and evolve them to the end of the block.
  41. */
  42. void get_final_repeated_offset_states(t_encoder_context *context, ulong distances)
  43. {
  44. ulong MatchPos;
  45. signed long d; /* must be signed */
  46. byte consecutive;
  47. consecutive = 0;
  48. for (d = distances-1; d >= 0; d--)
  49. {
  50. if (context->enc_DistData[d] > 2)
  51. {
  52. /* NOT a repeated offset */
  53. consecutive++;
  54. /* do we have three consecutive non-repeated-offsets? */
  55. if (consecutive >= 3)
  56. break;
  57. }
  58. else
  59. {
  60. consecutive = 0;
  61. }
  62. }
  63. /*
  64. * If we didn't find three consecutive matches which
  65. * don't use repeated offsets, then we have to start
  66. * from the beginning and evolve the repeated offsets.
  67. *
  68. * Otherwise, we start at the first of the consecutive
  69. * matches.
  70. */
  71. if (consecutive < 3)
  72. {
  73. d = 0;
  74. }
  75. for (; d < (signed long) distances; d++)
  76. {
  77. MatchPos = context->enc_DistData[d];
  78. if (MatchPos == 0)
  79. {
  80. }
  81. else if (MatchPos <= 2)
  82. {
  83. ulong temp;
  84. temp = context->enc_repeated_offset_at_literal_zero[MatchPos];
  85. context->enc_repeated_offset_at_literal_zero[MatchPos] = context->enc_repeated_offset_at_literal_zero[0];
  86. context->enc_repeated_offset_at_literal_zero[0] = temp;
  87. }
  88. else
  89. {
  90. context->enc_repeated_offset_at_literal_zero[2] = context->enc_repeated_offset_at_literal_zero[1];
  91. context->enc_repeated_offset_at_literal_zero[1] = context->enc_repeated_offset_at_literal_zero[0];
  92. context->enc_repeated_offset_at_literal_zero[0] = MatchPos-2;
  93. }
  94. }
  95. }
  96. /*
  97. * Encode a block with no compression
  98. *
  99. * bufpos is the position in the file from which the first
  100. * literal in this block starts. To reference memory, we will
  101. * use enc_MemWindow[bufpos] (remember that enc_MemWindow is
  102. * moved backwards every time we copymem).
  103. *
  104. * Since this data was originally matched into the compressor,
  105. * our recent match offsets will have been changed; however,
  106. * since this is an uncompressed block, the decoder won't be
  107. * updating them. Therefore, we need to tell the decoder
  108. * the state of the match offsets after it has finished
  109. * decoding the uncompressed data - we store these in this
  110. * block.
  111. */
  112. void encode_uncompressed_block(t_encoder_context *context, ulong bufpos, ulong block_size)
  113. {
  114. int i;
  115. int j;
  116. bool block_size_odd;
  117. ulong val;
  118. /*
  119. * Align on a byte boundary
  120. */
  121. output_bits(context, context->enc_bitcount-16, 0);
  122. /*
  123. * Now output the contents of the repeated offset
  124. * buffers, since we need to preserve the state of
  125. * the encoder
  126. */
  127. for (i = 0; i < NUM_REPEATED_OFFSETS; i++)
  128. {
  129. val = context->enc_repeated_offset_at_literal_zero[i];
  130. for (j = 0; j < sizeof(long); j++)
  131. {
  132. *context->enc_output_buffer_curpos++ = (byte) val;
  133. val >>= 8;
  134. }
  135. }
  136. block_size_odd = block_size & 1;
  137. /*
  138. * Write out uncompressed data
  139. */
  140. while (block_size > 0)
  141. {
  142. *context->enc_output_buffer_curpos++ = context->enc_MemWindow[bufpos];
  143. bufpos++;
  144. block_size--;
  145. context->enc_input_running_total++;
  146. if (context->enc_input_running_total == CHUNK_SIZE)
  147. {
  148. perform_flush_output_callback(context);
  149. context->enc_num_block_splits = 0;
  150. }
  151. }
  152. /*
  153. * Add pad byte to keep the output word-aligned
  154. */
  155. if (block_size_odd)
  156. {
  157. *context->enc_output_buffer_curpos++ = 0;
  158. }
  159. context->enc_bitcount = 32;
  160. context->enc_bitbuf = 0;
  161. }
  162. /*
  163. * Estimate the size of the data in the buffer, in bytes
  164. */
  165. ulong estimate_compressed_block_size(t_encoder_context *context)
  166. {
  167. ulong block_size = 0; /* output size in bits */
  168. ulong i;
  169. byte mpslot;
  170. /* Estimation of tree size */
  171. block_size = 150*8;
  172. /* Tally bits to output characters */
  173. for (i = 0; i < NUM_CHARS; i++)
  174. block_size += (context->enc_main_tree_len[i]*context->enc_main_tree_freq[i]);
  175. /* Tally bits to output matches */
  176. for (mpslot = 0; mpslot < context->enc_num_position_slots; mpslot++)
  177. {
  178. long element;
  179. int primary;
  180. element = NUM_CHARS + (mpslot << NL_SHIFT);
  181. /* For primary == NUM_PRIMARY_LENGTHS we have secondary lengths */
  182. for (primary = 0; primary <= NUM_PRIMARY_LENGTHS; primary++)
  183. {
  184. block_size += ((context->enc_main_tree_len[element] + enc_extra_bits[mpslot]) *
  185. context->enc_main_tree_freq[element]);
  186. element++;
  187. }
  188. }
  189. for (i = 0; i < NUM_SECONDARY_LENGTHS; i++)
  190. block_size += (context->enc_secondary_tree_freq[i] * context->enc_secondary_tree_len[i]);
  191. /* round up */
  192. return (block_size+7) >> 3;
  193. }
  194. /*
  195. * Encode block with NO special encoding of the lower 3
  196. * position bits
  197. */
  198. void encode_verbatim_block(t_encoder_context *context, ulong literal_to_end_at)
  199. {
  200. ulong MatchPos;
  201. ulong d = 0;
  202. ulong l = 0;
  203. byte MatchLength;
  204. byte c;
  205. byte mpSlot;
  206. while (l < literal_to_end_at)
  207. {
  208. if (!IsMatch(l))
  209. {
  210. OUT_CHAR;
  211. l++;
  212. context->enc_input_running_total++;
  213. }
  214. else
  215. {
  216. /* Note, 0 means MatchLen=3, 1 means MatchLen=4, ... */
  217. MatchLength = context->enc_LitData[l++];
  218. /* Delta match pos */
  219. MatchPos = context->enc_DistData[d++];
  220. mpSlot = (byte) MP_SLOT(MatchPos);
  221. if (MatchLength < NUM_PRIMARY_LENGTHS)
  222. {
  223. OUTPUT_BITS(
  224. context->enc_main_tree_len[ NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength],
  225. context->enc_main_tree_code[NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength]
  226. );
  227. }
  228. else
  229. {
  230. OUTPUT_BITS(
  231. context->enc_main_tree_len [(NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)],
  232. context->enc_main_tree_code[(NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)]
  233. );
  234. OUTPUT_BITS(
  235. context->enc_secondary_tree_len[ MatchLength - NUM_PRIMARY_LENGTHS],
  236. context->enc_secondary_tree_code[MatchLength - NUM_PRIMARY_LENGTHS]
  237. );
  238. }
  239. if (enc_extra_bits[ mpSlot ])
  240. {
  241. OUTPUT_BITS(
  242. enc_extra_bits[mpSlot],
  243. MatchPos & enc_slot_mask[mpSlot]
  244. );
  245. }
  246. context->enc_input_running_total += (MatchLength+MIN_MATCH);
  247. }
  248. if (context->enc_input_running_total == CHUNK_SIZE)
  249. {
  250. perform_flush_output_callback(context);
  251. context->enc_num_block_splits = 0;
  252. }
  253. _ASSERTE (context->enc_input_running_total < CHUNK_SIZE);
  254. }
  255. }
  256. /*
  257. * aligned block encoding
  258. */
  259. void encode_aligned_block(t_encoder_context *context, ulong literal_to_end_at)
  260. {
  261. ulong MatchPos;
  262. byte MatchLength;
  263. byte c;
  264. byte mpSlot;
  265. byte Lower;
  266. ulong l = 0;
  267. ulong d = 0;
  268. while (l < literal_to_end_at)
  269. {
  270. if (!IsMatch(l))
  271. {
  272. OUT_CHAR;
  273. l++;
  274. context->enc_input_running_total++;
  275. }
  276. else
  277. {
  278. /* Note, 0 means MatchLen=3, 1 means MatchLen=4, ... */
  279. MatchLength = context->enc_LitData[l++];
  280. /* Delta match pos */
  281. MatchPos = context->enc_DistData[d++];
  282. mpSlot = (byte) MP_SLOT(MatchPos);
  283. if (MatchLength < NUM_PRIMARY_LENGTHS)
  284. {
  285. OUTPUT_BITS(
  286. context->enc_main_tree_len[ NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength],
  287. context->enc_main_tree_code[NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength]
  288. );
  289. }
  290. else
  291. {
  292. OUTPUT_BITS(
  293. context->enc_main_tree_len[ (NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)],
  294. context->enc_main_tree_code[(NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)]
  295. );
  296. OUTPUT_BITS(
  297. context->enc_secondary_tree_len[ MatchLength - NUM_PRIMARY_LENGTHS],
  298. context->enc_secondary_tree_code[MatchLength - NUM_PRIMARY_LENGTHS]
  299. );
  300. }
  301. if (enc_extra_bits[ mpSlot ] >= 3)
  302. {
  303. if (enc_extra_bits[ mpSlot ] > 3)
  304. {
  305. OUTPUT_BITS(
  306. enc_extra_bits[mpSlot] - 3,
  307. (MatchPos >> 3) & ( (1 << (enc_extra_bits[mpSlot]-3)) -1)
  308. );
  309. }
  310. Lower = (byte) (MatchPos & 7);
  311. OUTPUT_BITS(
  312. context->enc_aligned_tree_len[Lower],
  313. context->enc_aligned_tree_code[Lower]
  314. );
  315. }
  316. else if (enc_extra_bits[ mpSlot ])
  317. {
  318. OUTPUT_BITS(
  319. enc_extra_bits[mpSlot],
  320. MatchPos & enc_slot_mask[ mpSlot ]
  321. );
  322. }
  323. context->enc_input_running_total += (MatchLength+MIN_MATCH);
  324. }
  325. if (context->enc_input_running_total == CHUNK_SIZE)
  326. {
  327. perform_flush_output_callback(context);
  328. context->enc_num_block_splits = 0;
  329. }
  330. _ASSERTE (context->enc_input_running_total < CHUNK_SIZE);
  331. }
  332. }
  333. void perform_flush_output_callback(t_encoder_context *context)
  334. {
  335. long output_size;
  336. /*
  337. * Do this only if there is any input to account for, so we don't
  338. * end up outputting blocks where comp_size > 0 and uncmp_size = 0.
  339. */
  340. if (context->enc_input_running_total > 0)
  341. {
  342. flush_output_bit_buffer(context);
  343. output_size = (long)(context->enc_output_buffer_curpos - context->enc_output_buffer_start);
  344. if (output_size > 0)
  345. {
  346. (*context->enc_output_callback_function)(
  347. context->enc_fci_data,
  348. context->enc_output_buffer_start,
  349. (long) (context->enc_output_buffer_curpos - context->enc_output_buffer_start),
  350. context->enc_input_running_total
  351. );
  352. }
  353. }
  354. context->enc_input_running_total = 0;
  355. context->enc_output_buffer_curpos = context->enc_output_buffer_start;
  356. /* initialise bit buffer */
  357. context->enc_bitcount = 32;
  358. context->enc_bitbuf = 0;
  359. }