Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

478 lines
11 KiB

  1. /*
  2. * optfmtch.c
  3. *
  4. * Match finder for the optimal parser
  5. */
  6. #include <string.h>
  7. #include <stdio.h>
  8. #include <crtdbg.h>
  9. #include "deflate.h"
  10. #define VERIFY_SEARCH_CODE(routine_name) \
  11. { \
  12. int debug_search; \
  13. for (debug_search = 0; debug_search < clen; debug_search++) \
  14. { \
  15. if (window[ptr+debug_search] != window[BufPos+debug_search]) \
  16. { \
  17. _RPT2( \
  18. _CRT_WARN, \
  19. routine_name \
  20. " char mismatch @%3d (clen=%d)\n", \
  21. debug_search, clen); \
  22. \
  23. _RPT3( \
  24. _CRT_WARN, \
  25. " ptr=%8d, bufpos=%8d, end_pos=%8d\n\n", \
  26. ptr, BufPos, end_pos); \
  27. _ASSERT(0); \
  28. } \
  29. } \
  30. }
  31. #define VERIFY_MULTI_TREE_SEARCH_CODE(routine_name) \
  32. _ASSERT(window[BufPos] == window[ptr]); \
  33. _ASSERT(window[BufPos+1] == window[ptr+1]);
  34. /*
  35. * Finds the closest matches of all possible lengths, MIN_MATCH <= x <= MAX_MATCH,
  36. * at position BufPos.
  37. *
  38. * The positions of each match location are stored in context->matchpos_table[]
  39. *
  40. * Returns the longest such match length found, or zero if no matches found.
  41. */
  42. int optimal_find_match(t_encoder_context *context, long BufPos)
  43. {
  44. ULONG ptr;
  45. ULONG a, b;
  46. t_search_node *small_ptr, *big_ptr;
  47. t_search_node *left = context->optimal_encoder->search_left;
  48. t_search_node *right = context->optimal_encoder->search_right;
  49. t_match_pos *matchpos_table = context->optimal_encoder->matchpos_table;
  50. BYTE *window = context->optimal_encoder->window;
  51. ULONG end_pos;
  52. int val; /* must be signed */
  53. int clen;
  54. int same;
  55. int match_length;
  56. int small_len, big_len;
  57. USHORT tree_to_use;
  58. /*
  59. * Retrieve root node of tree to search, and insert current node at
  60. * the root.
  61. */
  62. tree_to_use = *((USHORT UNALIGNED *) &window[BufPos]);
  63. ptr = context->optimal_encoder->search_tree_root[tree_to_use];
  64. context->optimal_encoder->search_tree_root[tree_to_use] = (t_search_node) BufPos;
  65. /*
  66. * end_pos is the furthest location back we will search for matches
  67. *
  68. * Remember that our window size is reduced by 3 bytes because of
  69. * our repeated offset codes.
  70. *
  71. * Since BufPos starts at WINDOW_SIZE when compression begins,
  72. * end_pos will never become negative.
  73. */
  74. end_pos = BufPos - (WINDOW_SIZE-4);
  75. /*
  76. * Root node is either NULL, or points to a really distant position.
  77. */
  78. if (ptr <= end_pos)
  79. {
  80. left[BufPos] = right[BufPos] = 0;
  81. return 0;
  82. }
  83. /*
  84. * confirmed length (no need to check the first clen chars in a search)
  85. *
  86. * note: clen is always equal to min(small_len, big_len)
  87. */
  88. clen = 2;
  89. /*
  90. * current best match length
  91. */
  92. match_length = 2;
  93. /*
  94. * longest match which is < our string
  95. */
  96. small_len = 2;
  97. /*
  98. * longest match which is > our string
  99. */
  100. big_len = 2;
  101. #ifdef _DEBUG
  102. VERIFY_MULTI_TREE_SEARCH_CODE("binary_search_findmatch()");
  103. #endif
  104. /*
  105. * pointers to nodes to check
  106. */
  107. small_ptr = &left[BufPos];
  108. big_ptr = &right[BufPos];
  109. do
  110. {
  111. /* compare bytes at current node */
  112. same = clen;
  113. #ifdef _DEBUG
  114. VERIFY_SEARCH_CODE("optimal_findmatch()")
  115. #endif
  116. /* don't need to check first clen characters */
  117. a = ptr + clen;
  118. b = BufPos + clen;
  119. while ((val = ((int) window[a++]) - ((int) window[b++])) == 0)
  120. {
  121. /* don't exceed MAX_MATCH */
  122. if (++same >= MAX_MATCH)
  123. goto long_match;
  124. }
  125. if (val < 0)
  126. {
  127. if (same > big_len)
  128. {
  129. if (same > match_length)
  130. {
  131. long_match:
  132. do
  133. {
  134. matchpos_table[++match_length] = BufPos-ptr-1;
  135. } while (match_length < same);
  136. if (same >= BREAK_LENGTH)
  137. {
  138. *small_ptr = left[ptr];
  139. *big_ptr = right[ptr];
  140. goto end_bsearch;
  141. }
  142. }
  143. big_len = same;
  144. clen = min(small_len, big_len);
  145. }
  146. *big_ptr = (t_search_node) ptr;
  147. big_ptr = &left[ptr];
  148. ptr = *big_ptr;
  149. }
  150. else
  151. {
  152. if (same > small_len)
  153. {
  154. if (same > match_length)
  155. {
  156. do
  157. {
  158. matchpos_table[++match_length] = BufPos-ptr-1;
  159. } while (match_length < same);
  160. if (same >= BREAK_LENGTH)
  161. {
  162. *small_ptr = left[ptr];
  163. *big_ptr = right[ptr];
  164. goto end_bsearch;
  165. }
  166. }
  167. small_len = same;
  168. clen = min(small_len, big_len);
  169. }
  170. *small_ptr = (t_search_node) ptr;
  171. small_ptr = &right[ptr];
  172. ptr = *small_ptr;
  173. }
  174. } while (ptr > end_pos); /* while we don't go too far backwards */
  175. *small_ptr = 0;
  176. *big_ptr = 0;
  177. end_bsearch:
  178. /*
  179. * If we have multiple search trees, we are already guaranteed
  180. * a minimum match length of 2 when we reach here.
  181. *
  182. * If we only have one tree, then we're not guaranteed anything.
  183. */
  184. if (match_length < MIN_MATCH)
  185. return 0;
  186. else
  187. return (long) match_length;
  188. }
  189. /*
  190. * Inserts the string at the current BufPos into the tree.
  191. *
  192. * Does not record all the best match lengths or otherwise attempt
  193. * to search for matches
  194. *
  195. * Similar to the above function.
  196. */
  197. void optimal_insert(t_encoder_context *context, long BufPos, long end_pos)
  198. {
  199. long ptr;
  200. ULONG a,b;
  201. t_search_node *small_ptr, *big_ptr;
  202. t_search_node *left = context->optimal_encoder->search_left;
  203. t_search_node *right = context->optimal_encoder->search_right;
  204. BYTE *window = context->optimal_encoder->window;
  205. int val;
  206. int small_len, big_len;
  207. int same;
  208. int clen;
  209. USHORT tree_to_use;
  210. tree_to_use = *((USHORT UNALIGNED *) &window[BufPos]);
  211. ptr = context->optimal_encoder->search_tree_root[tree_to_use];
  212. context->optimal_encoder->search_tree_root[tree_to_use] = (t_search_node) BufPos;
  213. if (ptr <= end_pos)
  214. {
  215. left[BufPos] = right[BufPos] = 0;
  216. return;
  217. }
  218. clen = 2;
  219. small_len = 2;
  220. big_len = 2;
  221. #ifdef _DEBUG
  222. VERIFY_MULTI_TREE_SEARCH_CODE("quick_insert_bsearch_findmatch()");
  223. #endif
  224. small_ptr = &left[BufPos];
  225. big_ptr = &right[BufPos];
  226. do
  227. {
  228. same = clen;
  229. a = ptr+clen;
  230. b = BufPos+clen;
  231. #ifdef _DEBUG
  232. VERIFY_SEARCH_CODE("quick_insert_bsearch_findmatch()")
  233. #endif
  234. while ((val = ((int) window[a++]) - ((int) window[b++])) == 0)
  235. {
  236. /*
  237. * Here we break on BREAK_LENGTH, not MAX_MATCH
  238. */
  239. if (++same >= BREAK_LENGTH)
  240. break;
  241. }
  242. if (val < 0)
  243. {
  244. if (same > big_len)
  245. {
  246. if (same >= BREAK_LENGTH)
  247. {
  248. *small_ptr = left[ptr];
  249. *big_ptr = right[ptr];
  250. return;
  251. }
  252. big_len = same;
  253. clen = min(small_len, big_len);
  254. }
  255. *big_ptr = (t_search_node) ptr;
  256. big_ptr = &left[ptr];
  257. ptr = *big_ptr;
  258. }
  259. else
  260. {
  261. if (same > small_len)
  262. {
  263. if (same >= BREAK_LENGTH)
  264. {
  265. *small_ptr = left[ptr];
  266. *big_ptr = right[ptr];
  267. return;
  268. }
  269. small_len = same;
  270. clen = min(small_len, big_len);
  271. }
  272. *small_ptr = (t_search_node) ptr;
  273. small_ptr = &right[ptr];
  274. ptr = *small_ptr;
  275. }
  276. } while (ptr > end_pos);
  277. *small_ptr = 0;
  278. *big_ptr = 0;
  279. }
  280. /*
  281. * Remove a node from the search tree; this is ONLY done for the last
  282. * BREAK_LENGTH symbols (see optenc.c). This is because we will have
  283. * inserted strings that contain undefined data (e.g. we're at the 4th
  284. * last byte from the file and binary_search_findmatch() a string into
  285. * the tree - everything from the 4th symbol onwards is invalid, and
  286. * would cause problems if it remained in the tree, so we have to
  287. * remove it).
  288. */
  289. void optimal_remove_node(t_encoder_context *context, long BufPos, ULONG end_pos)
  290. {
  291. ULONG ptr;
  292. ULONG left_node_pos;
  293. ULONG right_node_pos;
  294. USHORT tree_to_use;
  295. t_search_node *link;
  296. t_search_node *left = context->optimal_encoder->search_left;
  297. t_search_node *right = context->optimal_encoder->search_right;
  298. BYTE *window = context->optimal_encoder->window;
  299. /*
  300. * The root node of tree_to_use should equal BufPos, since that is
  301. * the most recent insertion into that tree - but if we never
  302. * inserted this string (because it was a near match or a long
  303. * string of zeroes), then we can't remove it.
  304. */
  305. tree_to_use = *((USHORT UNALIGNED *) &window[BufPos]);
  306. /*
  307. * If we never inserted this string, do not attempt to remove it
  308. */
  309. if (context->optimal_encoder->search_tree_root[tree_to_use] != BufPos)
  310. return;
  311. link = &context->optimal_encoder->search_tree_root[tree_to_use];
  312. /*
  313. * If the last occurence was too far away
  314. */
  315. if (*link <= end_pos)
  316. {
  317. *link = 0;
  318. left[BufPos] = right[BufPos] = 0;
  319. return;
  320. }
  321. /*
  322. * Most recent location of these chars
  323. */
  324. ptr = BufPos;
  325. /*
  326. * Most recent location of a string which is "less than" it
  327. */
  328. left_node_pos = left[ptr];
  329. if (left_node_pos <= end_pos)
  330. left_node_pos = left[ptr] = 0;
  331. /*
  332. * Most recent location of a string which is "greater than" it
  333. */
  334. right_node_pos = right[ptr];
  335. if (right_node_pos <= end_pos)
  336. right_node_pos = right[ptr] = 0;
  337. while (1)
  338. {
  339. /*
  340. * If left node position is greater than right node position
  341. * then follow the left node, since that is the more recent
  342. * insertion into the tree. Otherwise follow the right node.
  343. */
  344. if (left_node_pos > right_node_pos)
  345. {
  346. /*
  347. * If it's too far away, then store that it never happened
  348. */
  349. if (left_node_pos <= end_pos)
  350. left_node_pos = 0;
  351. ptr = *link = (t_search_node) left_node_pos;
  352. if (!ptr)
  353. break;
  354. left_node_pos = right[ptr];
  355. link = &right[ptr];
  356. }
  357. else
  358. {
  359. /*
  360. * If it's too far away, then store that it never happened
  361. */
  362. if (right_node_pos <= end_pos)
  363. right_node_pos = 0;
  364. ptr = *link = (t_search_node) right_node_pos;
  365. if (!ptr)
  366. break;
  367. right_node_pos = left[ptr];
  368. link = &left[ptr];
  369. }
  370. }
  371. }
  372. void removeNodes(t_encoder_context *context)
  373. {
  374. long i;
  375. // remove the most recent insertions into the hash table, since we had invalid data
  376. // sitting at the end of the window
  377. for (i = 0; i <= BREAK_LENGTH; i++)
  378. {
  379. if (context->bufpos-i-1 < WINDOW_SIZE)
  380. break;
  381. optimal_remove_node(context, context->bufpos-i-1, context->bufpos-WINDOW_SIZE+BREAK_LENGTH);
  382. }
  383. }
  384. //
  385. // Reinsert the tree nodes we removed previously
  386. //
  387. void reinsertRemovedNodes(t_encoder_context *context)
  388. {
  389. long j;
  390. for (j = BREAK_LENGTH; j > 0; j--)
  391. {
  392. if (context->bufpos - j > WINDOW_SIZE)
  393. {
  394. optimal_insert(
  395. context,
  396. context->bufpos - j,
  397. context->bufpos - j - WINDOW_SIZE + 4
  398. );
  399. }
  400. }
  401. }