Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

494 lines
15 KiB

  1. /*
  2. * encdata.c
  3. *
  4. * Encode a block into the output stream
  5. */
  6. #include "encoder.h"
  7. /*
  8. * Macro to output bits into the encoding stream
  9. */
  10. #define OUTPUT_BITS(N,X) output_bits( context, (N), (X))
  11. void output_bits( t_encoder_context *context, ulong numbits, ulong value ) {
  12. context->enc_bitbuf |= ( value << ( context->enc_bitcount - numbits ));
  13. context->enc_bitcount -= (char)numbits;
  14. while ( context->enc_bitcount <= 16 ) {
  15. if ( context->enc_output_buffer_curpos >= context->enc_output_buffer_end ) {
  16. context->enc_output_buffer_curpos = context->enc_output_buffer_start;
  17. context->enc_output_overflow = true;
  18. }
  19. *context->enc_output_buffer_curpos++ = (byte)( context->enc_bitbuf >> 16 );
  20. *context->enc_output_buffer_curpos++ = (byte)( context->enc_bitbuf >> 24 );
  21. context->enc_bitbuf <<= 16;
  22. context->enc_bitcount += 16;
  23. }
  24. }
  25. #ifdef EXTRALONGMATCHES
  26. #define OUTPUT_EXTRA_LENGTH(X) output_extra_length( context, (X))
  27. void output_extra_length( t_encoder_context *context, ulong ExtraLength ) {
  28. //
  29. // Extra match length is encoded like this:
  30. //
  31. // 0xxxxxxxx (8-bit value)
  32. // 10xxxxxxxxxx (10-bit value plus 2^8)
  33. // 110xxxxxxxxxxxx (12-bit value plus 2^8 plus 2^10)
  34. // 111xxxxxxxxxxxxxxx (15-bit value)
  35. //
  36. // 15 bits is the largest possible because a match cannot
  37. // span a 32K boundary.
  38. //
  39. ulong EncodingValue = ExtraLength;
  40. ASSERT( ExtraLength < 32768 );
  41. if ( EncodingValue < ( 1 << 8 )) {
  42. OUTPUT_BITS( 9, EncodingValue );
  43. }
  44. else {
  45. EncodingValue -= ( 1 << 8 );
  46. if ( EncodingValue < ( 1 << 10 )) {
  47. OUTPUT_BITS( 12, (( 1 << 11 ) | EncodingValue ));
  48. }
  49. else {
  50. EncodingValue -= ( 1 << 10 );
  51. if ( EncodingValue < ( 1 << 12 )) {
  52. OUTPUT_BITS( 15, (( 1 << 14 ) | ( 1 << 13 ) | EncodingValue ));
  53. }
  54. else {
  55. OUTPUT_BITS( 3, 7 ); // can't OUTPUT_BITS more than 16 per call
  56. OUTPUT_BITS( 15, ExtraLength );
  57. }
  58. }
  59. }
  60. }
  61. #endif /* EXTRALONGMATCHES */
  62. /*
  63. * Given the initial state of the repeated offset buffers at
  64. * the beginning of this block, calculate the final state of the
  65. * repeated offset buffers after outputting this block as if it
  66. * were compressed data.
  67. *
  68. * First try to do it the quick way, by starting at the last
  69. * match and working backwards, to find three consecutive matches
  70. * which don't use repeated offsets. If this fails, we'll have
  71. * to take the initial state of the three offsets at the beginning
  72. * of the block, and evolve them to the end of the block.
  73. */
  74. void get_final_repeated_offset_states(t_encoder_context *context, ulong distances)
  75. {
  76. ulong MatchPos;
  77. signed long d; /* must be signed */
  78. byte consecutive;
  79. consecutive = 0;
  80. for (d = distances-1; d >= 0; d--)
  81. {
  82. if (context->enc_DistData[d] > 2)
  83. {
  84. /* NOT a repeated offset */
  85. consecutive++;
  86. /* do we have three consecutive non-repeated-offsets? */
  87. if (consecutive >= 3)
  88. break;
  89. }
  90. else
  91. {
  92. consecutive = 0;
  93. }
  94. }
  95. /*
  96. * If we didn't find three consecutive matches which
  97. * don't use repeated offsets, then we have to start
  98. * from the beginning and evolve the repeated offsets.
  99. *
  100. * Otherwise, we start at the first of the consecutive
  101. * matches.
  102. */
  103. if (consecutive < 3)
  104. {
  105. d = 0;
  106. }
  107. for (; d < (signed long) distances; d++)
  108. {
  109. MatchPos = context->enc_DistData[d];
  110. if (MatchPos == 0)
  111. {
  112. }
  113. else if (MatchPos <= 2)
  114. {
  115. ulong temp;
  116. temp = context->enc_repeated_offset_at_literal_zero[MatchPos];
  117. context->enc_repeated_offset_at_literal_zero[MatchPos] = context->enc_repeated_offset_at_literal_zero[0];
  118. context->enc_repeated_offset_at_literal_zero[0] = temp;
  119. }
  120. else
  121. {
  122. context->enc_repeated_offset_at_literal_zero[2] = context->enc_repeated_offset_at_literal_zero[1];
  123. context->enc_repeated_offset_at_literal_zero[1] = context->enc_repeated_offset_at_literal_zero[0];
  124. context->enc_repeated_offset_at_literal_zero[0] = MatchPos-2;
  125. }
  126. }
  127. }
  128. /*
  129. * Encode a block with no compression
  130. *
  131. * bufpos is the position in the file from which the first
  132. * literal in this block starts. To reference memory, we will
  133. * use enc_MemWindow[bufpos] (remember that enc_MemWindow is
  134. * moved backwards every time we copymem).
  135. *
  136. * Since this data was originally matched into the compressor,
  137. * our recent match offsets will have been changed; however,
  138. * since this is an uncompressed block, the decoder won't be
  139. * updating them. Therefore, we need to tell the decoder
  140. * the state of the match offsets after it has finished
  141. * decoding the uncompressed data - we store these in this
  142. * block.
  143. */
  144. void encode_uncompressed_block(t_encoder_context *context, ulong bufpos, ulong block_size)
  145. {
  146. int i;
  147. int j;
  148. bool block_size_odd;
  149. ulong val;
  150. /*
  151. * Align on a byte boundary
  152. */
  153. output_bits(context, context->enc_bitcount-16, 0);
  154. /*
  155. * Now output the contents of the repeated offset
  156. * buffers, since we need to preserve the state of
  157. * the encoder
  158. */
  159. for (i = 0; i < NUM_REPEATED_OFFSETS; i++)
  160. {
  161. val = context->enc_repeated_offset_at_literal_zero[i];
  162. for (j = 0; j < sizeof(long); j++)
  163. {
  164. *context->enc_output_buffer_curpos++ = (byte) val;
  165. val >>= 8;
  166. }
  167. }
  168. block_size_odd = block_size & 1;
  169. /*
  170. * Write out uncompressed data
  171. */
  172. while (block_size > 0)
  173. {
  174. *context->enc_output_buffer_curpos++ = context->enc_MemWindow[bufpos];
  175. bufpos++;
  176. block_size--;
  177. context->enc_input_running_total++;
  178. if (context->enc_input_running_total == CHUNK_SIZE)
  179. {
  180. perform_flush_output_callback(context);
  181. context->enc_num_block_splits = 0;
  182. }
  183. }
  184. /*
  185. * Add pad byte to keep the output word-aligned
  186. */
  187. if (block_size_odd)
  188. {
  189. *context->enc_output_buffer_curpos++ = 0;
  190. }
  191. context->enc_bitcount = 32;
  192. context->enc_bitbuf = 0;
  193. }
  194. /*
  195. * Estimate the size of the data in the buffer, in bytes
  196. */
  197. ulong estimate_compressed_block_size(t_encoder_context *context)
  198. {
  199. ulong block_size = 0; /* output size in bits */
  200. ulong i;
  201. ulong mpslot;
  202. /* Estimation of tree size */
  203. block_size = 150*8;
  204. /* Tally bits to output characters */
  205. for (i = 0; i < NUM_CHARS; i++)
  206. block_size += (context->enc_main_tree_len[i]*context->enc_main_tree_freq[i]);
  207. /* Tally bits to output matches */
  208. for (mpslot = 0; mpslot < context->enc_num_position_slots; mpslot++)
  209. {
  210. long element;
  211. int primary;
  212. element = NUM_CHARS + (mpslot << NL_SHIFT);
  213. /* For primary == NUM_PRIMARY_LENGTHS we have secondary lengths */
  214. for (primary = 0; primary <= NUM_PRIMARY_LENGTHS; primary++)
  215. {
  216. block_size += ((context->enc_main_tree_len[element] + enc_extra_bits[mpslot]) *
  217. context->enc_main_tree_freq[element]);
  218. element++;
  219. }
  220. }
  221. for (i = 0; i < NUM_SECONDARY_LENGTHS; i++)
  222. block_size += (context->enc_secondary_tree_freq[i] * context->enc_secondary_tree_len[i]);
  223. /* round up */
  224. return (block_size+7) >> 3;
  225. }
  226. /*
  227. * Encode block with NO special encoding of the lower 3
  228. * position bits
  229. */
  230. void encode_verbatim_block(t_encoder_context *context, ulong literal_to_end_at)
  231. {
  232. ulong MatchPos;
  233. ulong d = 0;
  234. ulong l = 0;
  235. ulong MatchLength;
  236. byte c;
  237. ulong mpSlot;
  238. for ( l = 0; l < literal_to_end_at; l++ ) {
  239. if (!IsMatch(l))
  240. {
  241. c = context->enc_LitData[l];
  242. OUTPUT_BITS(context->enc_main_tree_len[c], context->enc_main_tree_code[c]);
  243. context->enc_input_running_total++;
  244. }
  245. else
  246. {
  247. /* Note, 0 means MatchLen=3, 1 means MatchLen=4, ... */
  248. MatchLength = context->enc_LitData[l];
  249. /* Delta match pos */
  250. MatchPos = context->enc_DistData[d++];
  251. mpSlot = MP_SLOT(MatchPos);
  252. if (MatchLength < NUM_PRIMARY_LENGTHS)
  253. {
  254. OUTPUT_BITS(
  255. context->enc_main_tree_len[ NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength],
  256. context->enc_main_tree_code[NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength]
  257. );
  258. }
  259. else
  260. {
  261. OUTPUT_BITS(
  262. context->enc_main_tree_len [(NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)],
  263. context->enc_main_tree_code[(NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)]
  264. );
  265. OUTPUT_BITS(
  266. context->enc_secondary_tree_len[ MatchLength - NUM_PRIMARY_LENGTHS],
  267. context->enc_secondary_tree_code[MatchLength - NUM_PRIMARY_LENGTHS]
  268. );
  269. }
  270. if (enc_extra_bits[ mpSlot ])
  271. {
  272. OUTPUT_BITS(
  273. enc_extra_bits[mpSlot],
  274. MatchPos & enc_slot_mask[mpSlot]
  275. );
  276. }
  277. #ifdef EXTRALONGMATCHES
  278. if ( MatchLength == ( MAX_MATCH - MIN_MATCH )) {
  279. MatchLength += context->enc_ExtraLength[ l ];
  280. OUTPUT_EXTRA_LENGTH( context->enc_ExtraLength[ l ] );
  281. }
  282. #endif
  283. context->enc_input_running_total += (MatchLength+MIN_MATCH);
  284. }
  285. _ASSERTE (context->enc_input_running_total <= CHUNK_SIZE);
  286. if (context->enc_input_running_total == CHUNK_SIZE)
  287. {
  288. perform_flush_output_callback(context);
  289. context->enc_num_block_splits = 0;
  290. }
  291. _ASSERTE (context->enc_input_running_total < CHUNK_SIZE);
  292. }
  293. }
  294. /*
  295. * aligned block encoding
  296. */
  297. void encode_aligned_block(t_encoder_context *context, ulong literal_to_end_at)
  298. {
  299. ulong MatchPos;
  300. ulong MatchLength;
  301. byte c;
  302. ulong mpSlot;
  303. byte Lower;
  304. ulong l = 0;
  305. ulong d = 0;
  306. for ( l = 0; l < literal_to_end_at; l++ ) {
  307. if (!IsMatch(l))
  308. {
  309. c = context->enc_LitData[l];
  310. OUTPUT_BITS(context->enc_main_tree_len[c], context->enc_main_tree_code[c]);
  311. context->enc_input_running_total++;
  312. }
  313. else
  314. {
  315. /* Note, 0 means MatchLen=3, 1 means MatchLen=4, ... */
  316. MatchLength = context->enc_LitData[l];
  317. /* Delta match pos */
  318. MatchPos = context->enc_DistData[d++];
  319. mpSlot = MP_SLOT(MatchPos);
  320. if (MatchLength < NUM_PRIMARY_LENGTHS)
  321. {
  322. OUTPUT_BITS(
  323. context->enc_main_tree_len[ NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength],
  324. context->enc_main_tree_code[NUM_CHARS+(mpSlot<<NL_SHIFT)+MatchLength]
  325. );
  326. }
  327. else
  328. {
  329. OUTPUT_BITS(
  330. context->enc_main_tree_len[ (NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)],
  331. context->enc_main_tree_code[(NUM_CHARS+NUM_PRIMARY_LENGTHS)+(mpSlot<<NL_SHIFT)]
  332. );
  333. OUTPUT_BITS(
  334. context->enc_secondary_tree_len[ MatchLength - NUM_PRIMARY_LENGTHS],
  335. context->enc_secondary_tree_code[MatchLength - NUM_PRIMARY_LENGTHS]
  336. );
  337. }
  338. if (enc_extra_bits[ mpSlot ] >= 3)
  339. {
  340. if (enc_extra_bits[ mpSlot ] > 3)
  341. {
  342. OUTPUT_BITS(
  343. enc_extra_bits[mpSlot] - 3,
  344. (MatchPos >> 3) & ( (1 << (enc_extra_bits[mpSlot]-3)) -1)
  345. );
  346. }
  347. Lower = (byte) (MatchPos & 7);
  348. OUTPUT_BITS(
  349. context->enc_aligned_tree_len[Lower],
  350. context->enc_aligned_tree_code[Lower]
  351. );
  352. }
  353. else if (enc_extra_bits[ mpSlot ])
  354. {
  355. OUTPUT_BITS(
  356. enc_extra_bits[mpSlot],
  357. MatchPos & enc_slot_mask[ mpSlot ]
  358. );
  359. }
  360. #ifdef EXTRALONGMATCHES
  361. if ( MatchLength == ( MAX_MATCH - MIN_MATCH )) {
  362. MatchLength += context->enc_ExtraLength[ l ];
  363. OUTPUT_EXTRA_LENGTH( context->enc_ExtraLength[ l ] );
  364. }
  365. #endif
  366. context->enc_input_running_total += (MatchLength+MIN_MATCH);
  367. }
  368. _ASSERTE (context->enc_input_running_total <= CHUNK_SIZE);
  369. if (context->enc_input_running_total == CHUNK_SIZE)
  370. {
  371. perform_flush_output_callback(context);
  372. context->enc_num_block_splits = 0;
  373. }
  374. _ASSERTE (context->enc_input_running_total < CHUNK_SIZE);
  375. }
  376. }
  377. void perform_flush_output_callback(t_encoder_context *context)
  378. {
  379. long output_size;
  380. /*
  381. * Do this only if there is any input to account for, so we don't
  382. * end up outputting blocks where comp_size > 0 and uncmp_size = 0.
  383. */
  384. if (context->enc_input_running_total > 0)
  385. {
  386. flush_output_bit_buffer(context);
  387. output_size = (ulong)(context->enc_output_buffer_curpos - context->enc_output_buffer_start);
  388. if (output_size > 0)
  389. {
  390. (*context->enc_output_callback_function)(
  391. context->enc_fci_data,
  392. context->enc_output_buffer_start,
  393. (ulong)(context->enc_output_buffer_curpos - context->enc_output_buffer_start),
  394. context->enc_input_running_total
  395. );
  396. }
  397. }
  398. context->enc_input_running_total = 0;
  399. context->enc_output_buffer_curpos = context->enc_output_buffer_start;
  400. /* initialise bit buffer */
  401. context->enc_bitcount = 32;
  402. context->enc_bitbuf = 0;
  403. }