Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

515 lines
15 KiB

  1. /*
  2. * stdenc.c
  3. *
  4. * Standard encoder
  5. */
  6. #include <string.h>
  7. #include <stdio.h>
  8. #include <crtdbg.h>
  9. #include "deflate.h"
  10. //
  11. // Update hash variable "h" with character c
  12. //
  13. #define UPDATE_HASH(h,c) \
  14. h = ((h) << STD_ENCODER_HASH_SHIFT) ^ (c);
  15. //
  16. // Insert a string into the hash chain at location bufpos
  17. //
  18. // Assertions check that we never attempt to insert near the end of the buffer
  19. // (since our hash value is based on values at bufpos, bufpos+1, bufpos+2) and
  20. // that our hash value is always valid for the bytes we are inserting.
  21. //
  22. #define INSERT_STRING(search,bufpos) \
  23. { \
  24. _ASSERT((bufpos + 2) < context->bufpos_end); \
  25. UPDATE_HASH(hash, window[bufpos+2]); \
  26. _ASSERT((unsigned int) STD_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & STD_ENCODER_HASH_MASK)); \
  27. search = lookup[hash & STD_ENCODER_HASH_MASK]; \
  28. lookup[hash & STD_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \
  29. prev[bufpos & WINDOW_MASK] = (t_search_node) search; \
  30. }
  31. #define CHECK_FLUSH_RECORDING_BUFFER() \
  32. if (recording_bitcount >= 16) \
  33. { \
  34. *recording_bufptr++ = (BYTE) recording_bitbuf; \
  35. *recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); \
  36. recording_bitbuf >>= 16; \
  37. recording_bitcount -= 16; \
  38. }
  39. #define OUTPUT_RECORDING_DATA(count,data) \
  40. recording_bitbuf |= ((data) << recording_bitcount); \
  41. recording_bitcount += (count);
  42. //
  43. // Record unmatched symbol c
  44. //
  45. #define RECORD_CHAR(c) \
  46. context->outputting_block_num_literals++; \
  47. context->std_encoder->literal_tree_freq[c]++; \
  48. _ASSERT(context->std_encoder->recording_literal_tree_len[c] != 0); \
  49. OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[c], context->std_encoder->recording_literal_tree_code[c]); \
  50. CHECK_FLUSH_RECORDING_BUFFER();
  51. //
  52. // Record a match with length match_len (>= MIN_MATCH) and displacement match_pos
  53. //
  54. #define RECORD_MATCH(match_len, match_pos) \
  55. { \
  56. int pos_slot = POS_SLOT(match_pos); \
  57. int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
  58. int item = (NUM_CHARS+1) + len_slot; \
  59. int extra_dist_bits = g_ExtraDistanceBits[pos_slot]; \
  60. int extra_len_bits = g_ExtraLengthBits[len_slot]; \
  61. _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
  62. _ASSERT(context->outputting_block_num_literals >= 0 && context->outputting_block_num_literals < STD_ENCODER_MAX_ITEMS); \
  63. _ASSERT(context->std_encoder->recording_literal_tree_len[item] != 0); \
  64. _ASSERT(context->std_encoder->recording_dist_tree_len[pos_slot] != 0); \
  65. context->outputting_block_num_literals++; \
  66. context->std_encoder->literal_tree_freq[(NUM_CHARS + 1) + len_slot]++; \
  67. context->std_encoder->dist_tree_freq[pos_slot]++; \
  68. OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[item], context->std_encoder->recording_literal_tree_code[item]); \
  69. CHECK_FLUSH_RECORDING_BUFFER(); \
  70. if (extra_len_bits > 0) \
  71. { \
  72. OUTPUT_RECORDING_DATA(extra_len_bits, (match_len-MIN_MATCH) & ((1 << extra_len_bits)-1)); \
  73. CHECK_FLUSH_RECORDING_BUFFER(); \
  74. } \
  75. OUTPUT_RECORDING_DATA(context->std_encoder->recording_dist_tree_len[pos_slot], context->std_encoder->recording_dist_tree_code[pos_slot]); \
  76. CHECK_FLUSH_RECORDING_BUFFER(); \
  77. if (extra_dist_bits > 0) \
  78. { \
  79. OUTPUT_RECORDING_DATA(extra_dist_bits, match_pos & ((1 << extra_dist_bits)-1)); \
  80. CHECK_FLUSH_RECORDING_BUFFER(); \
  81. } \
  82. }
  83. #define FLUSH_RECORDING_BITBUF() \
  84. *recording_bufptr++ = (BYTE) recording_bitbuf; \
  85. *recording_bufptr++ = (BYTE) (recording_bitbuf >> 8);
  86. //
  87. // Verifies that all of the hash pointers in the hash table are correct, and that everything
  88. // in the same hash chain has the same hash value
  89. //
  90. #ifdef FULL_DEBUG
  91. #define VERIFY_HASHES(bufpos) StdEncoderVerifyHashes(context, bufpos)
  92. #else
  93. #define VERIFY_HASHES(bufpos) ;
  94. #endif
  95. static void StdEncoderMoveWindows(t_encoder_context *context);
  96. static int StdEncoderFindMatch(
  97. const BYTE * window,
  98. const USHORT * prev,
  99. long bufpos,
  100. long search,
  101. unsigned int * match_pos,
  102. int cutoff,
  103. int nice_length
  104. );
  105. void StdEncoderDeflate(
  106. t_encoder_context * context,
  107. int search_depth,
  108. int lazy_match_threshold,
  109. int good_length,
  110. int nice_length
  111. )
  112. {
  113. long bufpos;
  114. unsigned int hash;
  115. t_std_encoder * encoder = context->std_encoder;
  116. byte * window = encoder->window;
  117. t_search_node * prev = encoder->prev;
  118. t_search_node * lookup = encoder->lookup;
  119. unsigned long recording_bitbuf;
  120. int recording_bitcount;
  121. byte * recording_bufptr;
  122. byte * end_recording_bufptr;
  123. // restore literal/match bitmap variables
  124. end_recording_bufptr = &encoder->lit_dist_buffer[STD_ENCODER_LIT_DIST_BUFFER_SIZE-8];
  125. recording_bufptr = encoder->recording_bufptr;
  126. recording_bitbuf = encoder->recording_bitbuf;
  127. recording_bitcount = encoder->recording_bitcount;
  128. bufpos = context->bufpos;
  129. VERIFY_HASHES(bufpos);
  130. //
  131. // Recalculate our hash
  132. //
  133. // One disadvantage of the way we do our hashing is that matches are not permitted in the last
  134. // few characters near bufpos_end.
  135. //
  136. hash = 0;
  137. UPDATE_HASH(hash, window[bufpos]);
  138. UPDATE_HASH(hash, window[bufpos+1]);
  139. while (bufpos < context->bufpos_end)
  140. {
  141. int match_len;
  142. t_match_pos match_pos;
  143. t_match_pos search;
  144. if (context->bufpos_end - bufpos <= 3)
  145. {
  146. // don't insert any strings when we get close to the end of the buffer,
  147. // since we will end up using corrupted hash values (the data after bufpos_end
  148. // is undefined, and those bytes would be swept into the hash value if we
  149. // calculated a hash at bufpos_end-2, for example, since our hash value is
  150. // build from 3 consecutive characters in the buffer).
  151. match_len = 0;
  152. }
  153. else
  154. {
  155. INSERT_STRING(search,bufpos);
  156. // find a match at what we'll call position X
  157. if (search != 0)
  158. {
  159. match_len = StdEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);
  160. // truncate match if we're too close to the end of the buffer
  161. if (bufpos + match_len > context->bufpos_end)
  162. match_len = context->bufpos_end - bufpos;
  163. }
  164. else
  165. {
  166. match_len = 0;
  167. }
  168. }
  169. if (match_len < MIN_MATCH)
  170. {
  171. // didn't find a match, so output unmatched char
  172. RECORD_CHAR(window[bufpos]);
  173. bufpos++;
  174. }
  175. else
  176. {
  177. // bufpos now points to X+1
  178. bufpos++;
  179. // is this match so good (long) that we should take it automatically without
  180. // checking X+1 ?
  181. if (match_len <= lazy_match_threshold)
  182. {
  183. int next_match_len;
  184. t_match_pos next_match_pos;
  185. // sets search
  186. INSERT_STRING(search,bufpos);
  187. // no, so check for a better match at X+1
  188. if (search != 0)
  189. {
  190. next_match_len = StdEncoderFindMatch(
  191. window,
  192. prev,
  193. bufpos,
  194. search,
  195. &next_match_pos,
  196. match_len < good_length ? search_depth : (search_depth >> 2),
  197. nice_length
  198. );
  199. // truncate match if we're too close to the end of the buffer
  200. // note: next_match_len could now be < MIN_MATCH
  201. if (bufpos + next_match_len > context->bufpos_end)
  202. next_match_len = context->bufpos_end - bufpos;
  203. }
  204. else
  205. {
  206. next_match_len = 0;
  207. }
  208. // right now X and X+1 are both inserted into the search tree
  209. if (next_match_len > match_len)
  210. {
  211. // since next_match_len > match_len, it can't be < MIN_MATCH here
  212. // match at X+1 is better, so output unmatched char at X
  213. RECORD_CHAR(window[bufpos-1]);
  214. // now output match at location X+1
  215. RECORD_MATCH(next_match_len, next_match_pos);
  216. // insert remainder of second match into search tree
  217. //
  218. // example: (*=inserted already)
  219. //
  220. // X X+1 X+2 X+3 X+4
  221. // * *
  222. // nextmatchlen=3
  223. // bufpos
  224. //
  225. // If next_match_len == 3, we want to perform 2
  226. // insertions (at X+2 and X+3). However, first we must
  227. // inc bufpos.
  228. //
  229. bufpos++; // now points to X+2
  230. match_len = next_match_len;
  231. goto insert;
  232. }
  233. else
  234. {
  235. // match at X is better, so take it
  236. RECORD_MATCH(match_len, match_pos);
  237. //
  238. // Insert remainder of first match into search tree, minus the first
  239. // two locations, which were inserted by the FindMatch() calls.
  240. //
  241. // For example, if match_len == 3, then we've inserted at X and X+1
  242. // already (and bufpos is now pointing at X+1), and now we need to insert
  243. // only at X+2.
  244. //
  245. match_len--;
  246. bufpos++; // now bufpos points to X+2
  247. goto insert;
  248. }
  249. }
  250. else /* match_length >= good_match */
  251. {
  252. // in assertion: bufpos points to X+1, location X inserted already
  253. // first match is so good that we're not even going to check at X+1
  254. RECORD_MATCH(match_len, match_pos);
  255. // insert remainder of match at X into search tree
  256. insert:
  257. if (context->bufpos_end - bufpos <= match_len)
  258. {
  259. bufpos += (match_len-1);
  260. }
  261. else
  262. {
  263. while (--match_len > 0)
  264. {
  265. t_match_pos ignore; // we're not interested in the search position
  266. INSERT_STRING(ignore,bufpos);
  267. bufpos++;
  268. }
  269. }
  270. }
  271. }
  272. // literal buffer or distance buffer filled up (or close to filling up)?
  273. if (context->outputting_block_num_literals >= STD_ENCODER_MAX_ITEMS-4 ||
  274. recording_bufptr >= end_recording_bufptr)
  275. {
  276. // yes, then we must output a block
  277. _ASSERT(context->outputting_block_num_literals <= STD_ENCODER_MAX_ITEMS);
  278. // flush our recording matches bit buffer
  279. FLUSH_RECORDING_BITBUF();
  280. StdEncoderOutputBlock(context);
  281. // did we output the whole block?
  282. if (context->state != STATE_NORMAL)
  283. break;
  284. // we did output the whole block, so reset literal encoding
  285. recording_bufptr = encoder->recording_bufptr;
  286. recording_bitbuf = encoder->recording_bitbuf;
  287. recording_bitcount = encoder->recording_bitcount;
  288. }
  289. } /* end ... while (bufpos < bufpos_end) */
  290. _ASSERT(bufpos <= context->bufpos_end);
  291. // save recording state
  292. encoder->recording_bufptr = recording_bufptr;
  293. encoder->recording_bitbuf = recording_bitbuf;
  294. encoder->recording_bitcount = recording_bitcount;
  295. context->bufpos = bufpos;
  296. VERIFY_HASHES(bufpos);
  297. if (context->bufpos == 2*WINDOW_SIZE)
  298. StdEncoderMoveWindows(context);
  299. }
  300. static int StdEncoderFindMatch(
  301. const BYTE * window,
  302. const USHORT * prev,
  303. long bufpos,
  304. long search,
  305. unsigned int * match_pos,
  306. int cutoff,
  307. int nice_length
  308. )
  309. {
  310. const BYTE * window_bufpos = &window[bufpos];
  311. long earliest; // how far back we can look
  312. int best_match = 0; // best match length found so far
  313. t_match_pos l_match_pos = 0;
  314. _ASSERT(bufpos >= 0 && bufpos < 2*WINDOW_SIZE);
  315. _ASSERT(search < bufpos);
  316. _ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos));
  317. earliest = bufpos - WINDOW_SIZE;
  318. _ASSERT(earliest >= 0);
  319. while (search > earliest)
  320. {
  321. _ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos));
  322. _ASSERT(search < bufpos);
  323. if (window_bufpos[best_match] == window[search + best_match])
  324. {
  325. int j;
  326. for (j = 0; j < MAX_MATCH; j++)
  327. {
  328. if (window_bufpos[j] != window[search+j])
  329. break;
  330. }
  331. if (j > best_match)
  332. {
  333. best_match = j;
  334. l_match_pos = search; // absolute position
  335. if (j > nice_length)
  336. break;
  337. }
  338. }
  339. if (--cutoff == 0)
  340. break;
  341. search = (long) prev[search & WINDOW_MASK];
  342. }
  343. // turn l_match_pos into relative position
  344. l_match_pos = bufpos - l_match_pos - 1;
  345. if (best_match == 3 && l_match_pos >= STD_ENCODER_MATCH3_DIST_THRESHOLD)
  346. return 0;
  347. _ASSERT(best_match < MIN_MATCH || l_match_pos < WINDOW_SIZE);
  348. *match_pos = l_match_pos;
  349. return best_match;
  350. }
  351. static void StdEncoderMoveWindows(t_encoder_context *context)
  352. {
  353. if (context->bufpos >= 2*WINDOW_SIZE)
  354. {
  355. int i;
  356. t_search_node *lookup = context->std_encoder->lookup;
  357. t_search_node *prev = context->std_encoder->prev;
  358. BYTE *window = context->std_encoder->window;
  359. VERIFY_HASHES(2*WINDOW_SIZE);
  360. memcpy(&window[0], &window[context->bufpos - WINDOW_SIZE], WINDOW_SIZE);
  361. for (i = 0; i < STD_ENCODER_HASH_TABLE_SIZE; i++)
  362. {
  363. long val = ((long) lookup[i]) - WINDOW_SIZE;
  364. if (val <= 0)
  365. lookup[i] = (t_search_node) 0;
  366. else
  367. lookup[i] = (t_search_node) val;
  368. }
  369. for (i = 0; i < WINDOW_SIZE; i++)
  370. {
  371. long val = ((long) prev[i]) - WINDOW_SIZE;
  372. if (val <= 0)
  373. prev[i] = (t_search_node) 0;
  374. else
  375. prev[i] = (t_search_node) val;
  376. }
  377. #ifdef FULL_DEBUG
  378. memset(&window[WINDOW_SIZE], 0, WINDOW_SIZE);
  379. #endif
  380. VERIFY_HASHES(2*WINDOW_SIZE);
  381. context->bufpos = WINDOW_SIZE;
  382. context->bufpos_end = context->bufpos;
  383. }
  384. }
  385. //
  386. // Zero the running frequency counts
  387. //
  388. // Also set freq[END_OF_BLOCK_CODE] = 1
  389. //
  390. void StdEncoderZeroFrequencyCounts(t_std_encoder *encoder)
  391. {
  392. _ASSERT(encoder != NULL);
  393. memset(encoder->literal_tree_freq, 0, sizeof(encoder->literal_tree_freq));
  394. memset(encoder->dist_tree_freq, 0, sizeof(encoder->dist_tree_freq));
  395. encoder->literal_tree_freq[END_OF_BLOCK_CODE] = 1;
  396. }
  397. void StdEncoderReset(t_encoder_context *context)
  398. {
  399. t_std_encoder *encoder = context->std_encoder;
  400. _ASSERT(encoder != NULL);
  401. memset(encoder->lookup, 0, sizeof(encoder->lookup));
  402. context->window_size = WINDOW_SIZE;
  403. context->bufpos = context->window_size;
  404. context->bufpos_end = context->bufpos;
  405. encoder->recording_bitbuf = 0;
  406. encoder->recording_bitcount = 0;
  407. encoder->recording_bufptr = encoder->lit_dist_buffer;
  408. DeflateInitRecordingTables(
  409. encoder->recording_literal_tree_len,
  410. encoder->recording_literal_tree_code,
  411. encoder->recording_dist_tree_len,
  412. encoder->recording_dist_tree_code
  413. );
  414. StdEncoderZeroFrequencyCounts(encoder);
  415. }
  416. BOOL StdEncoderInit(t_encoder_context *context)
  417. {
  418. context->std_encoder = (t_std_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_std_encoder));
  419. if (context->std_encoder == NULL)
  420. return FALSE;
  421. StdEncoderReset(context);
  422. return TRUE;
  423. }