Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

586 lines
12 KiB

  1. /*
  2. * bsearch.c
  3. *
  4. * Binary search for optimal encoder
  5. */
  6. #include "encoder.h"
  7. #define left context->enc_Left
  8. #define right context->enc_Right
  9. /*
  10. * Define this to force checking that all search locations visited
  11. * are valid.
  12. *
  13. * For debugging purposes only.
  14. */
  15. #ifdef _DEBUG
  16. # define VERIFY_SEARCHES
  17. #endif
  18. #define VERIFY_SEARCH_CODE(routine_name) \
  19. { \
  20. int debug_search; \
  21. for (debug_search = 0; debug_search < clen; debug_search++) \
  22. { \
  23. if (context->enc_MemWindow[ptr+debug_search] != context->enc_MemWindow[BufPos+debug_search]) \
  24. { \
  25. _RPT2( \
  26. _CRT_WARN, \
  27. routine_name \
  28. " char mismatch @%3d (clen=%d)\n", \
  29. debug_search, clen); \
  30. \
  31. _RPT3( \
  32. _CRT_WARN, \
  33. " ptr=%8d, bufpos=%8d, end_pos=%8d\n\n", \
  34. ptr, BufPos, end_pos); \
  35. } \
  36. } \
  37. }
  38. #define VERIFY_MULTI_TREE_SEARCH_CODE(routine_name) \
  39. _ASSERTE (context->enc_MemWindow[BufPos] == context->enc_MemWindow[ptr]); \
  40. _ASSERTE (context->enc_MemWindow[BufPos+1] == context->enc_MemWindow[ptr+1]);
  41. /*
  42. * Finds the closest matches of all possible lengths, MIN_MATCH <= x <= MAX_MATCH,
  43. * at position BufPos.
  44. *
  45. * The positions of each match location are stored in context->enc_matchpos_table[]
  46. *
  47. * Returns the longest such match length found, or zero if no matches found.
  48. */
  49. #ifndef ASM_BSEARCH_FINDMATCH
  50. long binary_search_findmatch(t_encoder_context *context, long BufPos)
  51. {
  52. ulong ptr;
  53. ulong a, b;
  54. ulong *small_ptr, *big_ptr;
  55. ulong end_pos;
  56. int val; /* must be signed */
  57. int bytes_to_boundary;
  58. int clen;
  59. int same;
  60. int match_length;
  61. int small_len, big_len;
  62. int i, best_repeated_offset;
  63. #ifdef MULTIPLE_SEARCH_TREES
  64. ushort tree_to_use;
  65. /*
  66. * Retrieve root node of tree to search, and insert current node at
  67. * the root.
  68. */
  69. tree_to_use = *((ushort UNALIGNED *) &context->enc_MemWindow[BufPos]);
  70. ptr = context->enc_tree_root[tree_to_use];
  71. context->enc_tree_root[tree_to_use] = BufPos;
  72. #else
  73. ptr = context->enc_single_tree_root;
  74. context->enc_single_tree_root = BufPos;
  75. #endif
  76. /*
  77. * end_pos is the furthest location back we will search for matches
  78. *
  79. * Remember that our window size is reduced by 3 bytes because of
  80. * our repeated offset codes.
  81. *
  82. * Since BufPos starts at context->enc_window_size when compression begins,
  83. * end_pos will never become negative.
  84. */
  85. end_pos = BufPos - (context->enc_window_size-4);
  86. /*
  87. * Root node is either NULL, or points to a really distant position.
  88. */
  89. if (ptr <= end_pos)
  90. {
  91. left[BufPos] = right[BufPos] = 0;
  92. return 0;
  93. }
  94. #ifdef MULTIPLE_SEARCH_TREES
  95. /*
  96. * confirmed length (no need to check the first clen chars in a search)
  97. *
  98. * note: clen is always equal to min(small_len, big_len)
  99. */
  100. clen = 2;
  101. /*
  102. * current best match length
  103. */
  104. match_length = 2;
  105. /*
  106. * longest match which is < our string
  107. */
  108. small_len = 2;
  109. /*
  110. * longest match which is > our string
  111. */
  112. big_len = 2;
  113. /*
  114. * record match position for match length 2
  115. */
  116. context->enc_matchpos_table[2] = BufPos - ptr + 2;
  117. #ifdef VERIFY_SEARCHES
  118. VERIFY_MULTI_TREE_SEARCH_CODE("binary_search_findmatch()");
  119. #endif
  120. #else /* !MULTIPLE_SEARCH_TREES */
  121. clen = 0;
  122. match_length = 0;
  123. small_len = 0;
  124. big_len = 0;
  125. #endif /* MULTIPLE_SEARCH_TREES */
  126. /*
  127. * pointers to nodes to check
  128. */
  129. small_ptr = &left[BufPos];
  130. big_ptr = &right[BufPos];
  131. do
  132. {
  133. /* compare bytes at current node */
  134. same = clen;
  135. #ifdef VERIFY_SEARCHES
  136. VERIFY_SEARCH_CODE("binary_search_findmatch()")
  137. #endif
  138. /* don't need to check first clen characters */
  139. a = ptr + clen;
  140. b = BufPos + clen;
  141. while ((val = ((int) context->enc_MemWindow[a++]) - ((int) context->enc_MemWindow[b++])) == 0)
  142. {
  143. /* don't exceed MAX_MATCH */
  144. if (++same >= MAX_MATCH)
  145. goto long_match;
  146. }
  147. if (val < 0)
  148. {
  149. if (same > big_len)
  150. {
  151. if (same > match_length)
  152. {
  153. long_match:
  154. do
  155. {
  156. context->enc_matchpos_table[++match_length] = BufPos-ptr+(NUM_REPEATED_OFFSETS-1);
  157. } while (match_length < same);
  158. if (same >= BREAK_LENGTH)
  159. {
  160. *small_ptr = left[ptr];
  161. *big_ptr = right[ptr];
  162. goto end_bsearch;
  163. }
  164. }
  165. big_len = same;
  166. clen = (((small_len) < (big_len)) ? (small_len) : (big_len));
  167. }
  168. *big_ptr = ptr;
  169. big_ptr = &left[ptr];
  170. ptr = *big_ptr;
  171. }
  172. else
  173. {
  174. if (same > small_len)
  175. {
  176. if (same > match_length)
  177. {
  178. do
  179. {
  180. context->enc_matchpos_table[++match_length] = BufPos-ptr+(NUM_REPEATED_OFFSETS-1);
  181. } while (match_length < same);
  182. if (same >= BREAK_LENGTH)
  183. {
  184. *small_ptr = left[ptr];
  185. *big_ptr = right[ptr];
  186. goto end_bsearch;
  187. }
  188. }
  189. small_len = same;
  190. clen = (((small_len) < (big_len)) ? (small_len) : (big_len));
  191. }
  192. *small_ptr = ptr;
  193. small_ptr = &right[ptr];
  194. ptr = *small_ptr;
  195. }
  196. } while (ptr > end_pos); /* while we don't go too far backwards */
  197. *small_ptr = 0;
  198. *big_ptr = 0;
  199. end_bsearch:
  200. /*
  201. * If we have multiple search trees, we are already guaranteed
  202. * a minimum match length of 2 when we reach here.
  203. *
  204. * If we only have one tree, then we're not guaranteed anything.
  205. */
  206. #ifndef MULTIPLE_SEARCH_TREES
  207. if (match_length < MIN_MATCH)
  208. return 0;
  209. #endif
  210. /*
  211. * Check to see if any of our match lengths can
  212. * use repeated offsets.
  213. */
  214. /*
  215. * repeated offset 1
  216. */
  217. for (i = 0; i < match_length; i++)
  218. {
  219. if (context->enc_MemWindow[BufPos+i] != context->enc_MemWindow[BufPos-context->enc_last_matchpos_offset[0]+i])
  220. break;
  221. }
  222. /*
  223. * the longest repeated offset
  224. */
  225. best_repeated_offset = i;
  226. if (i >= MIN_MATCH)
  227. {
  228. /*
  229. * Yes, we can do a repeated offset for some match lengths; replace
  230. * their positions with the repeated offset position
  231. */
  232. do
  233. {
  234. context->enc_matchpos_table[i] = 0; /* first repeated offset position */
  235. } while (--i >= MIN_MATCH);
  236. /* A speed optimization to cope with long runs of bytes */
  237. if (best_repeated_offset > BREAK_LENGTH)
  238. goto quick_return;
  239. }
  240. /*
  241. * repeated offset 2
  242. */
  243. for (i = 0; i < match_length; i++)
  244. {
  245. if (context->enc_MemWindow[BufPos+i] != context->enc_MemWindow[BufPos-context->enc_last_matchpos_offset[1]+i])
  246. break;
  247. }
  248. /*
  249. * Does the second repeated offset provide a longer match?
  250. *
  251. * If so, leave the first repeated offset alone, but fill out the
  252. * difference in match lengths in the table with repeated offset 1.
  253. */
  254. if (i > best_repeated_offset)
  255. {
  256. do
  257. {
  258. context->enc_matchpos_table[++best_repeated_offset] = 1;
  259. } while (best_repeated_offset < i);
  260. }
  261. /*
  262. * repeated offset 3
  263. */
  264. for (i = 0; i < match_length; i++)
  265. {
  266. if (context->enc_MemWindow[BufPos+i] != context->enc_MemWindow[BufPos-context->enc_last_matchpos_offset[2]+i])
  267. break;
  268. }
  269. /*
  270. * Does the third repeated offset provide a longer match?
  271. */
  272. if (i > best_repeated_offset)
  273. {
  274. do
  275. {
  276. context->enc_matchpos_table[++best_repeated_offset] = 2;
  277. } while (best_repeated_offset < i);
  278. }
  279. quick_return:
  280. /*
  281. * Don't let a match cross a 32K boundary
  282. */
  283. bytes_to_boundary = (CHUNK_SIZE-1) - ((int) BufPos & (CHUNK_SIZE-1));
  284. if (match_length > bytes_to_boundary)
  285. {
  286. match_length = bytes_to_boundary;
  287. if (match_length < MIN_MATCH)
  288. match_length = 0;
  289. }
  290. return (long) match_length;
  291. }
  292. #endif
  293. /*
  294. * Inserts the string at the current BufPos into the tree.
  295. *
  296. * Does not record all the best match lengths or otherwise attempt
  297. * to search for matches
  298. *
  299. * Similar to the above function.
  300. */
  301. #ifndef ASM_QUICK_INSERT_BSEARCH_FINDMATCH
  302. void quick_insert_bsearch_findmatch(t_encoder_context *context, long BufPos, long end_pos)
  303. {
  304. long ptr;
  305. ulong a,b;
  306. ulong *small_ptr, *big_ptr;
  307. int val;
  308. int small_len, big_len;
  309. int same;
  310. int clen;
  311. #ifdef MULTIPLE_SEARCH_TREES
  312. ushort tree_to_use;
  313. tree_to_use = *((ushort UNALIGNED *) &context->enc_MemWindow[BufPos]);
  314. ptr = context->enc_tree_root[tree_to_use];
  315. context->enc_tree_root[tree_to_use] = BufPos;
  316. #else
  317. ptr = context->enc_single_tree_root;
  318. context->enc_single_tree_root = BufPos;
  319. #endif
  320. if (ptr <= end_pos)
  321. {
  322. left[BufPos] = right[BufPos] = 0;
  323. return;
  324. }
  325. #ifdef MULTIPLE_SEARCH_TREES
  326. clen = 2;
  327. small_len = 2;
  328. big_len = 2;
  329. #ifdef VERIFY_SEARCHES
  330. VERIFY_MULTI_TREE_SEARCH_CODE("quick_insert_bsearch_findmatch()");
  331. #endif
  332. #else
  333. clen = 0;
  334. small_len = 0;
  335. big_len = 0;
  336. #endif
  337. small_ptr = &left[BufPos];
  338. big_ptr = &right[BufPos];
  339. do
  340. {
  341. _ASSERTE ((ulong) ptr >= (ulong) (context->enc_RealLeft - context->enc_Left));
  342. same = clen;
  343. a = ptr+clen;
  344. b = BufPos+clen;
  345. #ifdef VERIFY_SEARCHES
  346. VERIFY_SEARCH_CODE("quick_insert_bsearch_findmatch()")
  347. #endif
  348. while ((val = ((int) context->enc_MemWindow[a++]) - ((int) context->enc_MemWindow[b++])) == 0)
  349. {
  350. /*
  351. * Here we break on BREAK_LENGTH, not MAX_MATCH
  352. */
  353. if (++same >= BREAK_LENGTH)
  354. break;
  355. }
  356. if (val < 0)
  357. {
  358. if (same > big_len)
  359. {
  360. if (same >= BREAK_LENGTH)
  361. {
  362. *small_ptr = left[ptr];
  363. *big_ptr = right[ptr];
  364. return;
  365. }
  366. big_len = same;
  367. clen = (((small_len) < (big_len)) ? (small_len) : (big_len));
  368. }
  369. *big_ptr = ptr;
  370. big_ptr = &left[ptr];
  371. ptr = *big_ptr;
  372. }
  373. else
  374. {
  375. if (same > small_len)
  376. {
  377. if (same >= BREAK_LENGTH)
  378. {
  379. *small_ptr = left[ptr];
  380. *big_ptr = right[ptr];
  381. return;
  382. }
  383. small_len = same;
  384. clen = (((small_len) < (big_len)) ? (small_len) : (big_len));
  385. }
  386. *small_ptr = ptr;
  387. small_ptr = &right[ptr];
  388. ptr = *small_ptr;
  389. }
  390. } while (ptr > end_pos);
  391. *small_ptr = 0;
  392. *big_ptr = 0;
  393. }
  394. #endif
  395. /*
  396. * Remove a node from the search tree; this is ONLY done for the last
  397. * BREAK_LENGTH symbols (see optenc.c). This is because we will have
  398. * inserted strings that contain undefined data (e.g. we're at the 4th
  399. * last byte from the file and binary_search_findmatch() a string into
  400. * the tree - everything from the 4th symbol onwards is invalid, and
  401. * would cause problems if it remained in the tree, so we have to
  402. * remove it).
  403. */
  404. void binary_search_remove_node(t_encoder_context *context, long BufPos, ulong end_pos)
  405. {
  406. ulong ptr;
  407. ulong left_node_pos;
  408. ulong right_node_pos;
  409. ulong *link;
  410. #ifdef MULTIPLE_SEARCH_TREES
  411. ushort tree_to_use;
  412. /*
  413. * The root node of tree_to_use should equal BufPos, since that is
  414. * the most recent insertion into that tree - but if we never
  415. * inserted this string (because it was a near match or a long
  416. * string of zeroes), then we can't remove it.
  417. */
  418. tree_to_use = *((ushort UNALIGNED *) &context->enc_MemWindow[BufPos]);
  419. /*
  420. * If we never inserted this string, do not attempt to remove it
  421. */
  422. if (context->enc_tree_root[tree_to_use] != (ulong) BufPos)
  423. return;
  424. link = &context->enc_tree_root[tree_to_use];
  425. #else
  426. if (context->enc_single_tree_root != (ulong) BufPos)
  427. return;
  428. link = &context->enc_single_tree_root;
  429. #endif
  430. /*
  431. * If the last occurence was too far away
  432. */
  433. if (*link <= end_pos)
  434. {
  435. *link = 0;
  436. left[BufPos] = right[BufPos] = 0;
  437. return;
  438. }
  439. /*
  440. * Most recent location of these chars
  441. */
  442. ptr = BufPos;
  443. /*
  444. * Most recent location of a string which is "less than" it
  445. */
  446. left_node_pos = left[ptr];
  447. if (left_node_pos <= end_pos)
  448. left_node_pos = left[ptr] = 0;
  449. /*
  450. * Most recent location of a string which is "greater than" it
  451. */
  452. right_node_pos = right[ptr];
  453. if (right_node_pos <= end_pos)
  454. right_node_pos = right[ptr] = 0;
  455. while (1)
  456. {
  457. #ifdef VERIFY_SEARCHES
  458. _ASSERTE (left_node_pos < (ulong) BufPos);
  459. _ASSERTE (right_node_pos < (ulong) BufPos);
  460. #endif
  461. /*
  462. * If left node position is greater than right node position
  463. * then follow the left node, since that is the more recent
  464. * insertion into the tree. Otherwise follow the right node.
  465. */
  466. if (left_node_pos > right_node_pos)
  467. {
  468. /*
  469. * If it's too far away, then store that it never happened
  470. */
  471. if (left_node_pos <= end_pos)
  472. left_node_pos = 0;
  473. ptr = *link = left_node_pos;
  474. if (!ptr)
  475. break;
  476. left_node_pos = right[ptr];
  477. link = &right[ptr];
  478. }
  479. else
  480. {
  481. /*
  482. * If it's too far away, then store that it never happened
  483. */
  484. if (right_node_pos <= end_pos)
  485. right_node_pos = 0;
  486. ptr = *link = right_node_pos;
  487. if (!ptr)
  488. break;
  489. right_node_pos = left[ptr];
  490. link = &left[ptr];
  491. }
  492. }
  493. }