Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

574 lines
15 KiB

  1. /*
  2. * bsearch.c
  3. *
  4. * Binary search for optimal encoder
  5. */
  6. #include "encoder.h"
  7. #define left context->enc_Left
  8. #define right context->enc_Right
  9. /*
  10. * Define this to force checking that all search locations visited
  11. * are valid.
  12. *
  13. * For debugging purposes only.
  14. */
  15. #ifdef _DEBUG
  16. #define VERIFY_SEARCHES
  17. #endif
  18. #define VERIFY_SEARCH_CODE(routine_name) \
  19. { \
  20. int debug_search; \
  21. for (debug_search = 0; debug_search < clen; debug_search++) \
  22. { \
  23. _ASSERTE( context->enc_MemWindow[ptr+debug_search] == context->enc_MemWindow[BufPos+debug_search]); \
  24. } \
  25. }
  26. #define VERIFY_MULTI_TREE_SEARCH_CODE(routine_name) \
  27. _ASSERTE (context->enc_MemWindow[BufPos] == context->enc_MemWindow[ptr]); \
  28. _ASSERTE (context->enc_MemWindow[BufPos+1] == context->enc_MemWindow[ptr+1]);
  29. /*
  30. * Finds the closest matches of all possible lengths, MIN_MATCH <= x <= MAX_MATCH,
  31. * at position BufPos.
  32. *
  33. * The positions of each match location are stored in context->enc_matchpos_table[]
  34. *
  35. * Returns the longest such match length found, or zero if no matches found.
  36. */
  37. #ifndef ASM_BSEARCH_FINDMATCH
  38. long binary_search_findmatch(t_encoder_context *context, long BufPos)
  39. {
  40. ulong ptr;
  41. ulong a, b;
  42. ulong *small_ptr, *big_ptr;
  43. ulong end_pos;
  44. int val; /* must be signed */
  45. int bytes_to_boundary;
  46. int clen;
  47. int same;
  48. int match_length;
  49. int small_len, big_len;
  50. int i, best_repeated_offset;
  51. #ifdef MULTIPLE_SEARCH_TREES
  52. ushort tree_to_use;
  53. /*
  54. * Retrieve root node of tree to search, and insert current node at
  55. * the root.
  56. */
  57. tree_to_use = *((ushort UNALIGNED *) &context->enc_MemWindow[BufPos]);
  58. ptr = context->enc_tree_root[tree_to_use];
  59. context->enc_tree_root[tree_to_use] = BufPos;
  60. #else
  61. ptr = context->enc_single_tree_root;
  62. context->enc_single_tree_root = BufPos;
  63. #endif
  64. /*
  65. * end_pos is the furthest location back we will search for matches
  66. *
  67. * Remember that our window size is reduced by 3 bytes because of
  68. * our repeated offset codes.
  69. *
  70. * Since BufPos starts at context->enc_window_size when compression begins,
  71. * end_pos will never become negative.
  72. */
  73. end_pos = BufPos - (context->enc_window_size-4);
  74. /*
  75. * Root node is either NULL, or points to a really distant position.
  76. */
  77. if (ptr <= end_pos)
  78. {
  79. left[BufPos] = right[BufPos] = 0;
  80. return 0;
  81. }
  82. #ifdef MULTIPLE_SEARCH_TREES
  83. /*
  84. * confirmed length (no need to check the first clen chars in a search)
  85. *
  86. * note: clen is always equal to min(small_len, big_len)
  87. */
  88. clen = 2;
  89. /*
  90. * current best match length
  91. */
  92. match_length = 2;
  93. /*
  94. * longest match which is < our string
  95. */
  96. small_len = 2;
  97. /*
  98. * longest match which is > our string
  99. */
  100. big_len = 2;
  101. /*
  102. * record match position for match length 2
  103. */
  104. context->enc_matchpos_table[2] = BufPos - ptr + 2;
  105. #ifdef VERIFY_SEARCHES
  106. VERIFY_MULTI_TREE_SEARCH_CODE("binary_search_findmatch()");
  107. #endif
  108. #else /* !MULTIPLE_SEARCH_TREES */
  109. clen = 0;
  110. match_length = 0;
  111. small_len = 0;
  112. big_len = 0;
  113. #endif /* MULTIPLE_SEARCH_TREES */
  114. /*
  115. * pointers to nodes to check
  116. */
  117. small_ptr = &left[BufPos];
  118. big_ptr = &right[BufPos];
  119. do
  120. {
  121. /* compare bytes at current node */
  122. same = clen;
  123. #ifdef VERIFY_SEARCHES
  124. VERIFY_SEARCH_CODE("binary_search_findmatch()")
  125. #endif
  126. /* don't need to check first clen characters */
  127. a = ptr + clen;
  128. b = BufPos + clen;
  129. while ((val = ((int) context->enc_MemWindow[a++]) - ((int) context->enc_MemWindow[b++])) == 0)
  130. {
  131. /* don't exceed MAX_MATCH */
  132. if (++same >= MAX_MATCH)
  133. goto long_match;
  134. }
  135. if (val < 0)
  136. {
  137. if (same > big_len)
  138. {
  139. if (same > match_length)
  140. {
  141. long_match:
  142. do
  143. {
  144. context->enc_matchpos_table[++match_length] = BufPos-ptr+(NUM_REPEATED_OFFSETS-1);
  145. } while (match_length < same);
  146. if (same >= BREAK_LENGTH)
  147. {
  148. *small_ptr = left[ptr];
  149. *big_ptr = right[ptr];
  150. goto end_bsearch;
  151. }
  152. }
  153. big_len = same;
  154. clen = min(small_len, big_len);
  155. }
  156. *big_ptr = ptr;
  157. big_ptr = &left[ptr];
  158. ptr = *big_ptr;
  159. }
  160. else
  161. {
  162. if (same > small_len)
  163. {
  164. if (same > match_length)
  165. {
  166. do
  167. {
  168. context->enc_matchpos_table[++match_length] = BufPos-ptr+(NUM_REPEATED_OFFSETS-1);
  169. } while (match_length < same);
  170. if (same >= BREAK_LENGTH)
  171. {
  172. *small_ptr = left[ptr];
  173. *big_ptr = right[ptr];
  174. goto end_bsearch;
  175. }
  176. }
  177. small_len = same;
  178. clen = min(small_len, big_len);
  179. }
  180. *small_ptr = ptr;
  181. small_ptr = &right[ptr];
  182. ptr = *small_ptr;
  183. }
  184. } while (ptr > end_pos); /* while we don't go too far backwards */
  185. *small_ptr = 0;
  186. *big_ptr = 0;
  187. end_bsearch:
  188. /*
  189. * If we have multiple search trees, we are already guaranteed
  190. * a minimum match length of 2 when we reach here.
  191. *
  192. * If we only have one tree, then we're not guaranteed anything.
  193. */
  194. #ifndef MULTIPLE_SEARCH_TREES
  195. if (match_length < MIN_MATCH)
  196. return 0;
  197. #endif
  198. /*
  199. * Check to see if any of our match lengths can
  200. * use repeated offsets.
  201. */
  202. /*
  203. * repeated offset 1
  204. */
  205. for (i = 0; i < match_length; i++)
  206. {
  207. if (context->enc_MemWindow[BufPos+i] != context->enc_MemWindow[BufPos-context->enc_last_matchpos_offset[0]+i])
  208. break;
  209. }
  210. /*
  211. * the longest repeated offset
  212. */
  213. best_repeated_offset = i;
  214. if (i >= MIN_MATCH)
  215. {
  216. /*
  217. * Yes, we can do a repeated offset for some match lengths; replace
  218. * their positions with the repeated offset position
  219. */
  220. do
  221. {
  222. context->enc_matchpos_table[i] = 0; /* first repeated offset position */
  223. } while (--i >= MIN_MATCH);
  224. /* A speed optimization to cope with long runs of bytes */
  225. if (best_repeated_offset > BREAK_LENGTH)
  226. goto quick_return;
  227. }
  228. /*
  229. * repeated offset 2
  230. */
  231. for (i = 0; i < match_length; i++)
  232. {
  233. if (context->enc_MemWindow[BufPos+i] != context->enc_MemWindow[BufPos-context->enc_last_matchpos_offset[1]+i])
  234. break;
  235. }
  236. /*
  237. * Does the second repeated offset provide a longer match?
  238. *
  239. * If so, leave the first repeated offset alone, but fill out the
  240. * difference in match lengths in the table with repeated offset 1.
  241. */
  242. if (i > best_repeated_offset)
  243. {
  244. do
  245. {
  246. context->enc_matchpos_table[++best_repeated_offset] = 1;
  247. } while (best_repeated_offset < i);
  248. }
  249. /*
  250. * repeated offset 3
  251. */
  252. for (i = 0; i < match_length; i++)
  253. {
  254. if (context->enc_MemWindow[BufPos+i] != context->enc_MemWindow[BufPos-context->enc_last_matchpos_offset[2]+i])
  255. break;
  256. }
  257. /*
  258. * Does the third repeated offset provide a longer match?
  259. */
  260. if (i > best_repeated_offset)
  261. {
  262. do
  263. {
  264. context->enc_matchpos_table[++best_repeated_offset] = 2;
  265. } while (best_repeated_offset < i);
  266. }
  267. quick_return:
  268. /*
  269. * Don't let a match cross a 32K boundary
  270. */
  271. bytes_to_boundary = (CHUNK_SIZE-1) - ((int) BufPos & (CHUNK_SIZE-1));
  272. if (match_length > bytes_to_boundary)
  273. {
  274. match_length = bytes_to_boundary;
  275. if (match_length < MIN_MATCH)
  276. match_length = 0;
  277. }
  278. return (long) match_length;
  279. }
  280. #endif
  281. /*
  282. * Inserts the string at the current BufPos into the tree.
  283. *
  284. * Does not record all the best match lengths or otherwise attempt
  285. * to search for matches
  286. *
  287. * Similar to the above function.
  288. */
  289. #ifndef ASM_QUICK_INSERT_BSEARCH_FINDMATCH
  290. void quick_insert_bsearch_findmatch(t_encoder_context *context, long BufPos, long end_pos)
  291. {
  292. long ptr;
  293. ulong a,b;
  294. ulong *small_ptr, *big_ptr;
  295. int val;
  296. int small_len, big_len;
  297. int same;
  298. int clen;
  299. #ifdef MULTIPLE_SEARCH_TREES
  300. ushort tree_to_use;
  301. tree_to_use = *((ushort UNALIGNED *) &context->enc_MemWindow[BufPos]);
  302. ptr = context->enc_tree_root[tree_to_use];
  303. context->enc_tree_root[tree_to_use] = BufPos;
  304. #else
  305. ptr = context->enc_single_tree_root;
  306. context->enc_single_tree_root = BufPos;
  307. #endif
  308. if (ptr <= end_pos)
  309. {
  310. left[BufPos] = right[BufPos] = 0;
  311. return;
  312. }
  313. #ifdef MULTIPLE_SEARCH_TREES
  314. clen = 2;
  315. small_len = 2;
  316. big_len = 2;
  317. #ifdef VERIFY_SEARCHES
  318. VERIFY_MULTI_TREE_SEARCH_CODE("quick_insert_bsearch_findmatch()");
  319. #endif
  320. #else
  321. clen = 0;
  322. small_len = 0;
  323. big_len = 0;
  324. #endif
  325. small_ptr = &left[BufPos];
  326. big_ptr = &right[BufPos];
  327. do
  328. {
  329. _ASSERTE ((ulong) ptr >= (ulong) (context->enc_RealLeft - context->enc_Left));
  330. same = clen;
  331. a = ptr+clen;
  332. b = BufPos+clen;
  333. #ifdef VERIFY_SEARCHES
  334. VERIFY_SEARCH_CODE("quick_insert_bsearch_findmatch()")
  335. #endif
  336. while ((val = ((int) context->enc_MemWindow[a++]) - ((int) context->enc_MemWindow[b++])) == 0)
  337. {
  338. /*
  339. * Here we break on BREAK_LENGTH, not MAX_MATCH
  340. */
  341. if (++same >= BREAK_LENGTH)
  342. break;
  343. }
  344. if (val < 0)
  345. {
  346. if (same > big_len)
  347. {
  348. if (same >= BREAK_LENGTH)
  349. {
  350. *small_ptr = left[ptr];
  351. *big_ptr = right[ptr];
  352. return;
  353. }
  354. big_len = same;
  355. clen = min(small_len, big_len);
  356. }
  357. *big_ptr = ptr;
  358. big_ptr = &left[ptr];
  359. ptr = *big_ptr;
  360. }
  361. else
  362. {
  363. if (same > small_len)
  364. {
  365. if (same >= BREAK_LENGTH)
  366. {
  367. *small_ptr = left[ptr];
  368. *big_ptr = right[ptr];
  369. return;
  370. }
  371. small_len = same;
  372. clen = min(small_len, big_len);
  373. }
  374. *small_ptr = ptr;
  375. small_ptr = &right[ptr];
  376. ptr = *small_ptr;
  377. }
  378. } while (ptr > end_pos);
  379. *small_ptr = 0;
  380. *big_ptr = 0;
  381. }
  382. #endif
  383. /*
  384. * Remove a node from the search tree; this is ONLY done for the last
  385. * BREAK_LENGTH symbols (see optenc.c). This is because we will have
  386. * inserted strings that contain undefined data (e.g. we're at the 4th
  387. * last byte from the file and binary_search_findmatch() a string into
  388. * the tree - everything from the 4th symbol onwards is invalid, and
  389. * would cause problems if it remained in the tree, so we have to
  390. * remove it).
  391. */
  392. void binary_search_remove_node(t_encoder_context *context, long BufPos, ulong end_pos)
  393. {
  394. ulong ptr;
  395. ulong left_node_pos;
  396. ulong right_node_pos;
  397. ulong *link;
  398. #ifdef MULTIPLE_SEARCH_TREES
  399. ushort tree_to_use;
  400. /*
  401. * The root node of tree_to_use should equal BufPos, since that is
  402. * the most recent insertion into that tree - but if we never
  403. * inserted this string (because it was a near match or a long
  404. * string of zeroes), then we can't remove it.
  405. */
  406. tree_to_use = *((ushort UNALIGNED *) &context->enc_MemWindow[BufPos]);
  407. /*
  408. * If we never inserted this string, do not attempt to remove it
  409. */
  410. if (context->enc_tree_root[tree_to_use] != (ulong) BufPos)
  411. return;
  412. link = &context->enc_tree_root[tree_to_use];
  413. #else
  414. if (context->enc_single_tree_root != (ulong) BufPos)
  415. return;
  416. link = &context->enc_single_tree_root;
  417. #endif
  418. /*
  419. * If the last occurence was too far away
  420. */
  421. if (*link <= end_pos)
  422. {
  423. *link = 0;
  424. left[BufPos] = right[BufPos] = 0;
  425. return;
  426. }
  427. /*
  428. * Most recent location of these chars
  429. */
  430. ptr = BufPos;
  431. /*
  432. * Most recent location of a string which is "less than" it
  433. */
  434. left_node_pos = left[ptr];
  435. if (left_node_pos <= end_pos)
  436. left_node_pos = left[ptr] = 0;
  437. /*
  438. * Most recent location of a string which is "greater than" it
  439. */
  440. right_node_pos = right[ptr];
  441. if (right_node_pos <= end_pos)
  442. right_node_pos = right[ptr] = 0;
  443. while (1)
  444. {
  445. #ifdef VERIFY_SEARCHES
  446. _ASSERTE (left_node_pos < (ulong) BufPos);
  447. _ASSERTE (right_node_pos < (ulong) BufPos);
  448. #endif
  449. /*
  450. * If left node position is greater than right node position
  451. * then follow the left node, since that is the more recent
  452. * insertion into the tree. Otherwise follow the right node.
  453. */
  454. if (left_node_pos > right_node_pos)
  455. {
  456. /*
  457. * If it's too far away, then store that it never happened
  458. */
  459. if (left_node_pos <= end_pos)
  460. left_node_pos = 0;
  461. ptr = *link = left_node_pos;
  462. if (!ptr)
  463. break;
  464. left_node_pos = right[ptr];
  465. link = &right[ptr];
  466. }
  467. else
  468. {
  469. /*
  470. * If it's too far away, then store that it never happened
  471. */
  472. if (right_node_pos <= end_pos)
  473. right_node_pos = 0;
  474. ptr = *link = right_node_pos;
  475. if (!ptr)
  476. break;
  477. right_node_pos = left[ptr];
  478. link = &left[ptr];
  479. }
  480. }
  481. }