Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

626 lines
20 KiB

  1. /*
  2. * fstenc.c
  3. *
  4. * Fast encoder
  5. *
  6. * This is a one pass encoder which uses predefined trees. However, since these are not the same
  7. * trees defined for a fixed block (we use better trees than that), we output a dynamic block header.
  8. */
  9. #include <string.h>
  10. #include <stdio.h>
  11. #include <crtdbg.h>
  12. #include "deflate.h"
  13. #include "fasttbl.h"
  14. //
  15. // For debugging purposes:
  16. //
  17. // Verifies that all of the hash pointers in the hash table are correct, and that everything
  18. // in the same hash chain has the same hash value
  19. //
  20. #ifdef FULL_DEBUG
  21. #define VERIFY_HASHES(bufpos) FastEncoderVerifyHashes(context, bufpos)
  22. #else
  23. #define VERIFY_HASHES(bufpos) ;
  24. #endif
  25. //
  26. // Update hash variable "h" with character c
  27. //
  28. #define UPDATE_HASH(h,c) \
  29. h = ((h) << FAST_ENCODER_HASH_SHIFT) ^ (c);
  30. //
  31. // Insert a string into the hash chain at location bufpos
  32. //
  33. #define INSERT_STRING(search,bufpos) \
  34. { \
  35. UPDATE_HASH(hash, window[bufpos+2]); \
  36. \
  37. _ASSERT((unsigned int) FAST_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & FAST_ENCODER_HASH_MASK)); \
  38. \
  39. search = lookup[hash & FAST_ENCODER_HASH_MASK]; \
  40. lookup[hash & FAST_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \
  41. prev[bufpos & FAST_ENCODER_WINDOW_MASK] = (t_search_node) (search); \
  42. }
  43. //
  44. // Output bits function which uses local variables for the bit buffer
  45. //
  46. #define LOCAL_OUTPUT_BITS(n, x) \
  47. { \
  48. bitbuf |= ((x) << bitcount); \
  49. bitcount += (n); \
  50. if (bitcount >= 16) \
  51. { \
  52. *output_curpos++ = (BYTE) bitbuf; \
  53. *output_curpos++ = (BYTE) (bitbuf >> 8); \
  54. bitcount -= 16; \
  55. bitbuf >>= 16; \
  56. } \
  57. }
  58. //
  59. // Output unmatched symbol c
  60. //
  61. #define OUTPUT_CHAR(c) \
  62. LOCAL_OUTPUT_BITS(g_FastEncoderLiteralCodeInfo[c] & 31, g_FastEncoderLiteralCodeInfo[c] >> 5);
  63. //
  64. // Output a match with length match_len (>= MIN_MATCH) and displacement match_pos
  65. //
  66. // Optimisation: unlike the other encoders, here we have an array of codes for each match
  67. // length (not just each match length slot), complete with all the extra bits filled in, in
  68. // a single array element.
  69. //
  70. // There are many advantages to doing this:
  71. //
  72. // 1. A single array lookup on g_FastEncoderLiteralCodeInfo, instead of separate array lookups
  73. // on g_LengthLookup (to get the length slot), g_FastEncoderLiteralTreeLength,
  74. // g_FastEncoderLiteralTreeCode, g_ExtraLengthBits, and g_BitMask
  75. //
  76. // 2. The array is an array of ULONGs, so no access penalty, unlike for accessing those USHORT
  77. // code arrays in the other encoders (although they could be made into ULONGs with some
  78. // modifications to the source).
  79. //
  80. // Note, if we could guarantee that code_len <= 16 always, then we could skip an if statement here.
  81. //
  82. // A completely different optimisation is used for the distance codes since, obviously, a table for
  83. // all 8192 distances combining their extra bits is not feasible. The distance codeinfo table is
  84. // made up of code[], len[] and # extra_bits for this code.
  85. //
  86. // The advantages are similar to the above; a ULONG array instead of a USHORT and BYTE array, better
  87. // cache locality, fewer memory operations.
  88. //
  89. #define OUTPUT_MATCH(match_len, match_pos) \
  90. { \
  91. int extra_bits; \
  92. int code_len; \
  93. ULONG code_info; \
  94. \
  95. _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
  96. \
  97. code_info = g_FastEncoderLiteralCodeInfo[(NUM_CHARS+1-MIN_MATCH)+match_len]; \
  98. code_len = code_info & 31; \
  99. _ASSERT(code_len != 0); \
  100. if (code_len <= 16) \
  101. { \
  102. LOCAL_OUTPUT_BITS(code_len, code_info >> 5); \
  103. } \
  104. else \
  105. { \
  106. LOCAL_OUTPUT_BITS(16, (code_info >> 5) & 65535); \
  107. LOCAL_OUTPUT_BITS(code_len-16, code_info >> (5+16)); \
  108. } \
  109. code_info = g_FastEncoderDistanceCodeInfo[POS_SLOT(match_pos)]; \
  110. LOCAL_OUTPUT_BITS(code_info & 15, code_info >> 8); \
  111. extra_bits = (code_info >> 4) & 15; \
  112. if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \
  113. }
  114. //
  115. // This commented out code is the old way of doing things, which is what the other encoders use
  116. //
  117. #if 0
  118. #define OUTPUT_MATCH(match_len, match_pos) \
  119. { \
  120. int pos_slot = POS_SLOT(match_pos); \
  121. int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
  122. int extra_bits; \
  123. \
  124. _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
  125. _ASSERT(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot] != 0); \
  126. _ASSERT(g_FastEncoderDistanceTreeLength[pos_slot] != 0); \
  127. \
  128. LOCAL_OUTPUT_BITS(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot], g_FastEncoderLiteralTreeCode[(NUM_CHARS+1)+len_slot]); \
  129. extra_bits = g_ExtraLengthBits[len_slot]; \
  130. if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_len-MIN_MATCH) & g_BitMask[extra_bits]); \
  131. \
  132. LOCAL_OUTPUT_BITS(g_FastEncoderDistanceTreeLength[pos_slot], g_FastEncoderDistanceTreeCode[pos_slot]); \
  133. extra_bits = g_ExtraDistanceBits[pos_slot]; \
  134. if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \
  135. }
  136. #endif
  137. //
  138. // Local function prototypes
  139. //
  140. static void FastEncoderMoveWindows(t_encoder_context *context);
  141. static int FastEncoderFindMatch(
  142. const BYTE * window,
  143. const USHORT * prev,
  144. long bufpos,
  145. long search,
  146. t_match_pos * match_pos,
  147. int cutoff,
  148. int nice_length
  149. );
  150. //
  151. // Output the block type and tree structure for our hard-coded trees.
  152. //
  153. // Functionally equivalent to:
  154. //
  155. // outputBits(context, 1, 1); // "final" block flag
  156. // outputBits(context, 2, BLOCKTYPE_DYNAMIC);
  157. // outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
  158. //
  159. // However, all of the above has smartly been cached in global data, so we just memcpy().
  160. //
  161. void FastEncoderOutputPreamble(t_encoder_context *context)
  162. {
  163. #if 0
  164. // slow way:
  165. outputBits(context, 1+2, 1 | (BLOCKTYPE_DYNAMIC << 1));
  166. outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
  167. #endif
  168. // make sure tree has been init
  169. _ASSERT(g_FastEncoderTreeLength > 0);
  170. // make sure we have enough space to output tree
  171. _ASSERT(context->output_curpos + g_FastEncoderTreeLength < context->output_endpos);
  172. // fast way:
  173. memcpy(context->output_curpos, g_FastEncoderTreeStructureData, g_FastEncoderTreeLength);
  174. context->output_curpos += g_FastEncoderTreeLength;
  175. // need to get final states of bitbuf and bitcount after outputting all that stuff
  176. context->bitbuf = g_FastEncoderPostTreeBitbuf;
  177. context->bitcount = g_FastEncoderPostTreeBitcount;
  178. }
  179. //
  180. // Fast encoder deflate function
  181. //
  182. void FastEncoderDeflate(
  183. t_encoder_context * context,
  184. int search_depth, // # hash links to traverse
  185. int lazy_match_threshold, // don't search @ X+1 if match length @ X is > lazy
  186. int good_length, // divide traversal depth by 4 if match length > good
  187. int nice_length // in match finder, if we find >= nice_length match, quit immediately
  188. )
  189. {
  190. long bufpos;
  191. unsigned int hash;
  192. unsigned long bitbuf;
  193. int bitcount;
  194. BYTE * output_curpos;
  195. t_fast_encoder *encoder = context->fast_encoder;
  196. byte * window = encoder->window; // make local copies of context variables
  197. t_search_node * prev = encoder->prev;
  198. t_search_node * lookup = encoder->lookup;
  199. //
  200. // If this is the first time in here (since last reset) then we need to output our dynamic
  201. // block header
  202. //
  203. if (encoder->fOutputBlockHeader == FALSE)
  204. {
  205. encoder->fOutputBlockHeader = TRUE;
  206. //
  207. // Watch out! Calls to outputBits() and outputTreeStructure() use the bit buffer
  208. // variables stored in the context, not our local cached variables.
  209. //
  210. FastEncoderOutputPreamble(context);
  211. }
  212. //
  213. // Copy bitbuf vars into local variables since we're now using OUTPUT_BITS macro.
  214. // Do not call anything that uses the context structure's bit buffer variables!
  215. //
  216. output_curpos = context->output_curpos;
  217. bitbuf = context->bitbuf;
  218. bitcount = context->bitcount;
  219. // copy bufpos into local variable
  220. bufpos = context->bufpos;
  221. VERIFY_HASHES(bufpos); // debug mode: verify that the hash table is correct
  222. // initialise the value of the hash
  223. // no problem if locations bufpos, bufpos+1 are invalid (not enough data), since we will
  224. // never insert using that hash value
  225. hash = 0;
  226. UPDATE_HASH(hash, window[bufpos]);
  227. UPDATE_HASH(hash, window[bufpos+1]);
  228. // while we haven't come to the end of the input, and we still aren't close to the end
  229. // of the output
  230. while (bufpos < context->bufpos_end && output_curpos < context->output_near_end_threshold)
  231. {
  232. int match_len;
  233. t_match_pos match_pos;
  234. t_match_pos search;
  235. VERIFY_HASHES(bufpos); // debugger: verify that hash table is correct
  236. if (context->bufpos_end - bufpos <= 3)
  237. {
  238. // The hash value becomes corrupt when we get within 3 characters of the end of the
  239. // input buffer, since the hash value is based on 3 characters. We just stop
  240. // inserting into the hash table at this point, and allow no matches.
  241. match_len = 0;
  242. }
  243. else
  244. {
  245. // insert string into hash table and return most recent location of same hash value
  246. INSERT_STRING(search,bufpos);
  247. // did we find a recent location of this hash value?
  248. if (search != 0)
  249. {
  250. // yes, now find a match at what we'll call position X
  251. match_len = FastEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);
  252. // truncate match if we're too close to the end of the input buffer
  253. if (bufpos + match_len > context->bufpos_end)
  254. match_len = context->bufpos_end - bufpos;
  255. }
  256. else
  257. {
  258. // no most recent location found
  259. match_len = 0;
  260. }
  261. }
  262. if (match_len < MIN_MATCH)
  263. {
  264. // didn't find a match, so output unmatched char
  265. OUTPUT_CHAR(window[bufpos]);
  266. bufpos++;
  267. }
  268. else
  269. {
  270. // bufpos now points to X+1
  271. bufpos++;
  272. // is this match so good (long) that we should take it automatically without
  273. // checking X+1 ?
  274. if (match_len <= lazy_match_threshold)
  275. {
  276. int next_match_len;
  277. t_match_pos next_match_pos;
  278. // sets search
  279. INSERT_STRING(search,bufpos);
  280. // no, so check for a better match at X+1
  281. if (search != 0)
  282. {
  283. next_match_len = FastEncoderFindMatch(
  284. window,
  285. prev,
  286. bufpos,
  287. search,
  288. &next_match_pos,
  289. match_len < good_length ? search_depth : (search_depth >> 2),
  290. nice_length
  291. );
  292. // truncate match if we're too close to the end of the buffer
  293. // note: next_match_len could now be < MIN_MATCH
  294. if (bufpos + next_match_len > context->bufpos_end)
  295. next_match_len = context->bufpos_end - bufpos;
  296. }
  297. else
  298. {
  299. next_match_len = 0;
  300. }
  301. // right now X and X+1 are both inserted into the search tree
  302. if (next_match_len > match_len)
  303. {
  304. // since next_match_len > match_len, it can't be < MIN_MATCH here
  305. // match at X+1 is better, so output unmatched char at X
  306. OUTPUT_CHAR(window[bufpos-1]);
  307. // now output match at location X+1
  308. OUTPUT_MATCH(next_match_len, next_match_pos);
  309. // insert remainder of second match into search tree
  310. //
  311. // example: (*=inserted already)
  312. //
  313. // X X+1 X+2 X+3 X+4
  314. // * *
  315. // nextmatchlen=3
  316. // bufpos
  317. //
  318. // If next_match_len == 3, we want to perform 2
  319. // insertions (at X+2 and X+3). However, first we must
  320. // inc bufpos.
  321. //
  322. bufpos++; // now points to X+2
  323. match_len = next_match_len;
  324. goto insert;
  325. }
  326. else
  327. {
  328. // match at X is better, so take it
  329. OUTPUT_MATCH(match_len, match_pos);
  330. //
  331. // Insert remainder of first match into search tree, minus the first
  332. // two locations, which were inserted by the FindMatch() calls.
  333. //
  334. // For example, if match_len == 3, then we've inserted at X and X+1
  335. // already (and bufpos is now pointing at X+1), and now we need to insert
  336. // only at X+2.
  337. //
  338. match_len--;
  339. bufpos++; // now bufpos points to X+2
  340. goto insert;
  341. }
  342. }
  343. else /* match_length >= good_match */
  344. {
  345. // in assertion: bufpos points to X+1, location X inserted already
  346. // first match is so good that we're not even going to check at X+1
  347. OUTPUT_MATCH(match_len, match_pos);
  348. // insert remainder of match at X into search tree
  349. insert:
  350. if (context->bufpos_end - bufpos <= match_len)
  351. {
  352. bufpos += (match_len-1);
  353. }
  354. else
  355. {
  356. while (--match_len > 0)
  357. {
  358. t_match_pos ignore;
  359. INSERT_STRING(ignore,bufpos);
  360. bufpos++;
  361. }
  362. }
  363. }
  364. }
  365. } /* end ... while (bufpos < bufpos_end) */
  366. // store local variables back in context
  367. context->bufpos = bufpos;
  368. context->bitbuf = bitbuf;
  369. context->bitcount = bitcount;
  370. context->output_curpos = output_curpos;
  371. VERIFY_HASHES(bufpos); // debugger: verify that hash table is correct
  372. if (bufpos == context->bufpos_end)
  373. context->state = STATE_NORMAL;
  374. else
  375. context->state = STATE_OUTPUTTING_BLOCK;
  376. // slide the window if bufpos has reached 2*window size
  377. if (context->bufpos == 2*FAST_ENCODER_WINDOW_SIZE)
  378. FastEncoderMoveWindows(context);
  379. }
  380. static void FastEncoderMoveWindows(t_encoder_context *context)
  381. {
  382. t_search_node *lookup = context->fast_encoder->lookup;
  383. t_search_node *prev = context->fast_encoder->prev;
  384. BYTE *window = context->fast_encoder->window;
  385. int i;
  386. _ASSERT(context->bufpos == 2*FAST_ENCODER_WINDOW_SIZE);
  387. // verify that the hash table is correct
  388. VERIFY_HASHES(2*FAST_ENCODER_WINDOW_SIZE);
  389. memcpy(&window[0], &window[context->bufpos - FAST_ENCODER_WINDOW_SIZE], FAST_ENCODER_WINDOW_SIZE);
  390. // move all the hash pointers back
  391. // BUGBUG We are incurring a performance penalty since lookup[] is a USHORT array. Would be
  392. // nice to subtract from two locations at a time.
  393. for (i = 0; i < FAST_ENCODER_HASH_TABLE_SIZE; i++)
  394. {
  395. long val = ((long) lookup[i]) - FAST_ENCODER_WINDOW_SIZE;
  396. if (val <= 0) // too far away now? then set to zero
  397. lookup[i] = (t_search_node) 0;
  398. else
  399. lookup[i] = (t_search_node) val;
  400. }
  401. // prev[]'s are absolute pointers, not relative pointers, so we have to move them back too
  402. // making prev[]'s into relative pointers poses problems of its own
  403. for (i = 0; i < FAST_ENCODER_WINDOW_SIZE; i++)
  404. {
  405. long val = ((long) prev[i]) - FAST_ENCODER_WINDOW_SIZE;
  406. if (val <= 0)
  407. prev[i] = (t_search_node) 0;
  408. else
  409. prev[i] = (t_search_node) val;
  410. }
  411. #ifdef FULL_DEBUG
  412. // For debugging, wipe the window clean, so that if there is a bug in our hashing,
  413. // the hash pointers will now point to locations which are not valid for the hash value
  414. // (and will be caught by our ASSERTs).
  415. memset(&window[FAST_ENCODER_WINDOW_SIZE], 0, FAST_ENCODER_WINDOW_SIZE);
  416. #endif
  417. VERIFY_HASHES(2*FAST_ENCODER_WINDOW_SIZE); // debug: verify hash table is correct
  418. context->bufpos = FAST_ENCODER_WINDOW_SIZE;
  419. context->bufpos_end = context->bufpos;
  420. }
  421. //
  422. // Find match
  423. //
  424. // Returns match length found. A match length < MIN_MATCH means no match was found.
  425. //
  426. static int FastEncoderFindMatch(
  427. const BYTE * window, // window array
  428. const USHORT * prev, // prev ptr array
  429. long bufpos, // current buffer position
  430. long search, // where to start searching
  431. t_match_pos * match_pos, // return match position here
  432. int cutoff, // # links to traverse
  433. int nice_length // stop immediately if we find a match >= nice_length
  434. )
  435. {
  436. // make local copies of context variables
  437. long earliest;
  438. int best_match = 0; // best match length found so far
  439. t_match_pos l_match_pos = 0; // absolute match position of best match found
  440. BYTE want_char;
  441. _ASSERT(bufpos >= 0 && bufpos < 2*FAST_ENCODER_WINDOW_SIZE);
  442. _ASSERT(search < bufpos);
  443. _ASSERT(FAST_ENCODER_RECALCULATE_HASH(search) == FAST_ENCODER_RECALCULATE_HASH(bufpos));
  444. // the earliest we can look
  445. earliest = bufpos - FAST_ENCODER_WINDOW_SIZE;
  446. _ASSERT(earliest >= 0);
  447. // store window[bufpos + best_match]
  448. want_char = window[bufpos];
  449. while (search > earliest)
  450. {
  451. // make sure all our hash links are valid
  452. _ASSERT(FAST_ENCODER_RECALCULATE_HASH(search) == FAST_ENCODER_RECALCULATE_HASH(bufpos));
  453. // Start by checking the character that would allow us to increase the match
  454. // length by one. This improves performance quite a bit.
  455. if (window[search + best_match] == want_char)
  456. {
  457. int j;
  458. // Now make sure that all the other characters are correct
  459. for (j = 0; j < MAX_MATCH; j++)
  460. {
  461. if (window[bufpos+j] != window[search+j])
  462. break;
  463. }
  464. if (j > best_match)
  465. {
  466. best_match = j;
  467. l_match_pos = search; // absolute position
  468. if (j > nice_length)
  469. break;
  470. want_char = window[bufpos+j];
  471. }
  472. }
  473. if (--cutoff == 0)
  474. break;
  475. // make sure we're always going backwards
  476. _ASSERT(prev[search & FAST_ENCODER_WINDOW_MASK] < search);
  477. search = (long) prev[search & FAST_ENCODER_WINDOW_MASK];
  478. }
  479. // doesn't necessarily mean we found a match; best_match could be > 0 and < MIN_MATCH
  480. *match_pos = bufpos - l_match_pos - 1; // convert absolute to relative position
  481. // don't allow match length 3's which are too far away to be worthwhile
  482. if (best_match == 3 && *match_pos >= FAST_ENCODER_MATCH3_DIST_THRESHOLD)
  483. return 0;
  484. _ASSERT(best_match < MIN_MATCH || *match_pos < FAST_ENCODER_WINDOW_SIZE);
  485. return best_match;
  486. }
  487. void FastEncoderReset(t_encoder_context *context)
  488. {
  489. _ASSERT(context->fast_encoder != NULL);
  490. // zero hash table
  491. memset(context->fast_encoder->lookup, 0, sizeof(context->fast_encoder->lookup));
  492. context->window_size = FAST_ENCODER_WINDOW_SIZE;
  493. context->bufpos = FAST_ENCODER_WINDOW_SIZE;
  494. context->bufpos_end = context->bufpos;
  495. context->fast_encoder->fOutputBlockHeader = FALSE;
  496. }
  497. BOOL FastEncoderInit(t_encoder_context *context)
  498. {
  499. context->fast_encoder = (t_fast_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_fast_encoder));
  500. if (context->fast_encoder == NULL)
  501. return FALSE;
  502. FastEncoderReset(context);
  503. return TRUE;
  504. }
  505. //
  506. // Pregenerate the structure of the dynamic tree header which is output for
  507. // the fast encoder. Also record the final states of bitcount and bitbuf
  508. // after outputting.
  509. //
  510. void FastEncoderGenerateDynamicTreeEncoding(void)
  511. {
  512. t_encoder_context context;
  513. // Create a fake context with output pointers into our global data
  514. memset(&context, 0, sizeof(context));
  515. context.output_curpos = g_FastEncoderTreeStructureData;
  516. context.output_endpos = g_FastEncoderTreeStructureData + sizeof(g_FastEncoderTreeStructureData);
  517. context.output_near_end_threshold = context.output_endpos - 16;
  518. InitBitBuffer(&context);
  519. outputBits(&context, 1, 1); // "final" block flag
  520. outputBits(&context, 2, BLOCKTYPE_DYNAMIC);
  521. outputTreeStructure(
  522. &context,
  523. g_FastEncoderLiteralTreeLength,
  524. g_FastEncoderDistanceTreeLength
  525. );
  526. g_FastEncoderTreeLength = (int) (context.output_curpos - (BYTE *) g_FastEncoderTreeStructureData);
  527. g_FastEncoderPostTreeBitbuf = context.bitbuf;
  528. g_FastEncoderPostTreeBitcount = context.bitcount;
  529. }