Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

576 lines
13 KiB

  1. /*
  2. * encstats.c
  3. *
  4. * Routines for calculating statistics on a block of data which
  5. * has been compressed, but not yet output.
  6. *
  7. * These routines are used to determine which encoding method to use
  8. * to output the block.
  9. */
  10. #include "encoder.h"
  11. static void tally_aligned_bits(t_encoder_context *context, ulong dist_to_end_at)
  12. {
  13. ulong *dist_ptr;
  14. ulong i;
  15. ulong match_pos;
  16. /*
  17. * Tally the lower 3 bits
  18. */
  19. dist_ptr = context->enc_DistData;
  20. for (i = dist_to_end_at; i > 0; i--)
  21. {
  22. match_pos = *dist_ptr++;
  23. /*
  24. * Only for matches which have >= 3 extra bits
  25. */
  26. if (match_pos >= MPSLOT3_CUTOFF)
  27. context->enc_aligned_tree_freq[match_pos & 7]++;
  28. }
  29. }
  30. /*
  31. * Determine whether it is advantageous to use aligned block
  32. * encoding on the block.
  33. */
  34. lzx_block_type get_aligned_stats(t_encoder_context *context, ulong dist_to_end_at)
  35. {
  36. byte i;
  37. ulong total_L3 = 0;
  38. ulong largest_L3 = 0;
  39. memset(
  40. context->enc_aligned_tree_freq,
  41. 0,
  42. ALIGNED_NUM_ELEMENTS * sizeof(context->enc_aligned_tree_freq[0])
  43. );
  44. tally_aligned_bits(context, dist_to_end_at);
  45. for (i = 0; i < ALIGNED_NUM_ELEMENTS; i++)
  46. {
  47. if (context->enc_aligned_tree_freq[i] > largest_L3)
  48. largest_L3 = context->enc_aligned_tree_freq[i];
  49. total_L3 += context->enc_aligned_tree_freq[i];
  50. }
  51. /*
  52. * Do aligned offsets if the largest frequency accounts for 20%
  53. * or more (as opposed to 12.5% for non-aligned offset blocks).
  54. *
  55. * Not worthwhile to do aligned offsets if we have < 100 matches
  56. */
  57. if ((largest_L3 > total_L3/5) && dist_to_end_at >= 100)
  58. return BLOCKTYPE_ALIGNED;
  59. else
  60. return BLOCKTYPE_VERBATIM;
  61. }
  62. /*
  63. * Calculates the frequency of each literal, and returns the total
  64. * number of uncompressed bytes compressed in the block.
  65. */
  66. static ulong tally_frequency(
  67. t_encoder_context *context,
  68. ulong literal_to_start_at,
  69. ulong distance_to_start_at,
  70. ulong literal_to_end_at
  71. )
  72. {
  73. ulong i;
  74. ulong d;
  75. ulong compressed_bytes = 0;
  76. d = distance_to_start_at;
  77. for (i = literal_to_start_at; i < literal_to_end_at; i++)
  78. {
  79. if (!IsMatch(i))
  80. {
  81. /* Uncompressed symbol */
  82. context->enc_main_tree_freq[context->enc_LitData[i]]++;
  83. compressed_bytes++;
  84. }
  85. else
  86. {
  87. /* Match */
  88. if (context->enc_LitData[i] < NUM_PRIMARY_LENGTHS)
  89. {
  90. context->enc_main_tree_freq[ NUM_CHARS + (MP_SLOT(context->enc_DistData[d])<<NL_SHIFT) + context->enc_LitData[i]] ++;
  91. }
  92. else
  93. {
  94. context->enc_main_tree_freq[ (NUM_CHARS + NUM_PRIMARY_LENGTHS) + (MP_SLOT(context->enc_DistData[d])<<NL_SHIFT)] ++;
  95. context->enc_secondary_tree_freq[context->enc_LitData[i] - NUM_PRIMARY_LENGTHS] ++;
  96. }
  97. compressed_bytes += context->enc_LitData[i]+MIN_MATCH;
  98. d++;
  99. }
  100. }
  101. return compressed_bytes;
  102. }
  103. /*
  104. * Get statistics
  105. */
  106. ulong get_block_stats(
  107. t_encoder_context *context,
  108. ulong literal_to_start_at,
  109. ulong distance_to_start_at,
  110. ulong literal_to_end_at
  111. )
  112. {
  113. memset(
  114. context->enc_main_tree_freq,
  115. 0,
  116. MAIN_TREE_ELEMENTS * sizeof(context->enc_main_tree_freq[0])
  117. );
  118. memset(
  119. context->enc_secondary_tree_freq,
  120. 0,
  121. NUM_SECONDARY_LENGTHS * sizeof(context->enc_secondary_tree_freq[0])
  122. );
  123. return tally_frequency(
  124. context,
  125. literal_to_start_at,
  126. distance_to_start_at,
  127. literal_to_end_at
  128. );
  129. }
  130. /*
  131. * Update cumulative statistics
  132. */
  133. ulong update_cumulative_block_stats(
  134. t_encoder_context *context,
  135. ulong literal_to_start_at,
  136. ulong distance_to_start_at,
  137. ulong literal_to_end_at
  138. )
  139. {
  140. return tally_frequency(
  141. context,
  142. literal_to_start_at,
  143. distance_to_start_at,
  144. literal_to_end_at
  145. );
  146. }
  147. /*
  148. * Used in block splitting
  149. *
  150. * This routine calculates the "difference in composition" between
  151. * two different sections of compressed data.
  152. *
  153. * Resolution must be evenly divisible by STEP_SIZE, and must be
  154. * a power of 2.
  155. */
  156. #define RESOLUTION 1024
  157. /*
  158. * Threshold for determining if two blocks are different
  159. *
  160. * If enough consecutive blocks are this different, the block
  161. * splitter will start investigating, narrowing down the
  162. * area where the change occurs.
  163. *
  164. * It will then look for two areas which are
  165. * EARLY_BREAK_THRESHOLD (or more) different.
  166. *
  167. * If THRESHOLD is too small, it will force examination
  168. * of a lot of blocks, slowing down the compressor.
  169. *
  170. * The EARLY_BREAK_THRESHOLD is the more important value.
  171. */
  172. #define THRESHOLD 1400
  173. /*
  174. * Threshold for determining if two blocks are REALLY different
  175. */
  176. #define EARLY_BREAK_THRESHOLD 1700
  177. /*
  178. * Must be >= 8 because ItemType[] array is in bits
  179. *
  180. * Must be a power of 2.
  181. *
  182. * This is the step size used to narrow down the exact
  183. * best point to split the block.
  184. */
  185. #define STEP_SIZE 64
  186. /*
  187. * Minimum # literals required to perform block
  188. * splitting at all.
  189. */
  190. #define MIN_LITERALS_REQUIRED 6144
  191. /*
  192. * Minimum # literals we will allow to be its own block.
  193. *
  194. * We don't want to create blocks with too small numbers
  195. * of literals, otherwise the static tree output will
  196. * take up too much space.
  197. */
  198. #define MIN_LITERALS_IN_BLOCK 4096
  199. static const long square_table[17] =
  200. {
  201. 0,1,4,9,16,25,36,49,64,81,100,121,144,169,196,225,256
  202. };
  203. /*
  204. * log2(x) = x < 256 ? log2_table[x] : 8 + log2_table[(x >> 8)]
  205. *
  206. * log2(0) = 0
  207. * log2(1) = 1
  208. * log2(2) = 2
  209. * log2(3) = 2
  210. * log2(4) = 3
  211. * log2(255) = 8
  212. * log2(256) = 9
  213. * log2(511) = 9
  214. * log2(512) = 10
  215. *
  216. * It's not a real log2; it's off by one because we have
  217. * log2(0) = 0.
  218. */
  219. static const byte log2_table[256] =
  220. {
  221. 0,1,2,2,3,3,3,3,
  222. 4,4,4,4,4,4,4,4,
  223. 5,5,5,5,5,5,5,5,
  224. 5,5,5,5,5,5,5,5,
  225. 6,6,6,6,6,6,6,6,
  226. 6,6,6,6,6,6,6,6,
  227. 6,6,6,6,6,6,6,6,
  228. 6,6,6,6,6,6,6,6,
  229. 7,7,7,7,7,7,7,7,
  230. 7,7,7,7,7,7,7,7,
  231. 7,7,7,7,7,7,7,7,
  232. 7,7,7,7,7,7,7,7,
  233. 7,7,7,7,7,7,7,7,
  234. 7,7,7,7,7,7,7,7,
  235. 7,7,7,7,7,7,7,7,
  236. 7,7,7,7,7,7,7,7,
  237. 8,8,8,8,8,8,8,8,
  238. 8,8,8,8,8,8,8,8,
  239. 8,8,8,8,8,8,8,8,
  240. 8,8,8,8,8,8,8,8,
  241. 8,8,8,8,8,8,8,8,
  242. 8,8,8,8,8,8,8,8,
  243. 8,8,8,8,8,8,8,8,
  244. 8,8,8,8,8,8,8,8,
  245. 8,8,8,8,8,8,8,8,
  246. 8,8,8,8,8,8,8,8,
  247. 8,8,8,8,8,8,8,8,
  248. 8,8,8,8,8,8,8,8,
  249. 8,8,8,8,8,8,8,8,
  250. 8,8,8,8,8,8,8,8,
  251. 8,8,8,8,8,8,8,8,
  252. 8,8,8,8,8,8,8,8
  253. };
  254. /*
  255. * Return the difference between two sets of matches/distances
  256. */
  257. static ulong return_difference(
  258. t_encoder_context *context,
  259. ulong item_start1,
  260. ulong item_start2,
  261. ulong dist_at_1,
  262. ulong dist_at_2,
  263. ulong size
  264. )
  265. {
  266. ushort freq1[800];
  267. ushort freq2[800];
  268. ulong i;
  269. ulong cum_diff;
  270. int element;
  271. /*
  272. * Error! Too many main tree elements
  273. */
  274. if (MAIN_TREE_ELEMENTS >= (sizeof(freq1)/sizeof(freq1[0])))
  275. return 0;
  276. memset(freq1, 0, sizeof(freq1[0])*MAIN_TREE_ELEMENTS);
  277. memset(freq2, 0, sizeof(freq2[0])*MAIN_TREE_ELEMENTS);
  278. for (i = 0; i < size; i++)
  279. {
  280. if (!IsMatch(item_start1))
  281. {
  282. element = context->enc_LitData[item_start1];
  283. }
  284. else
  285. {
  286. if (context->enc_LitData[item_start1] < NUM_PRIMARY_LENGTHS)
  287. element = NUM_CHARS + (MP_SLOT(context->enc_DistData[dist_at_1])<<NL_SHIFT) + context->enc_LitData[item_start1];
  288. else
  289. element = (NUM_CHARS + NUM_PRIMARY_LENGTHS) + (MP_SLOT(context->enc_DistData[dist_at_1]) << NL_SHIFT);
  290. dist_at_1++;
  291. }
  292. item_start1++;
  293. freq1[element]++;
  294. if (!IsMatch(item_start2))
  295. {
  296. element = context->enc_LitData[item_start2];
  297. }
  298. else
  299. {
  300. if (context->enc_LitData[item_start2] < NUM_PRIMARY_LENGTHS)
  301. element = NUM_CHARS + (MP_SLOT(context->enc_DistData[dist_at_2])<<NL_SHIFT) + context->enc_LitData[item_start2];
  302. else
  303. element = (NUM_CHARS + NUM_PRIMARY_LENGTHS) + (MP_SLOT(context->enc_DistData[dist_at_2]) << NL_SHIFT);
  304. dist_at_2++;
  305. }
  306. item_start2++;
  307. freq2[element]++;
  308. }
  309. cum_diff = 0;
  310. for (i = 0; i < (ulong) MAIN_TREE_ELEMENTS; i++)
  311. {
  312. ulong log2a, log2b, diff;
  313. #define log2(x) ((x) < 256 ? log2_table[(x)] : 8+log2_table[(x) >> 8])
  314. log2a = (ulong) log2(freq1[i]);
  315. log2b = (ulong) log2(freq2[i]);
  316. /* diff = (log2a*log2a) - (log2b*log2b); */
  317. diff = square_table[log2a] - square_table[log2b];
  318. cum_diff += abs(diff);
  319. }
  320. return cum_diff;
  321. }
  322. /*
  323. * Calculates where and if a block of compressed data should be split.
  324. *
  325. * For example, if we have just compressed text data, audio data, and
  326. * more text data, then the composition of matches and unmatched
  327. * symbols will be different between the text data and audio data.
  328. * Therefore we force an end of block whenever the compressed data
  329. * looks like it's changing in composition.
  330. *
  331. * This routine currently cannot tell the difference between blocks
  332. * which should use aligned offsets, and blocks which should not.
  333. * However, there is little to be gained from looking for this change,
  334. * since it the match finder doesn't make an effort to look for
  335. * aligned offsets either.
  336. *
  337. * Returns whether we split the block or not.
  338. */
  339. bool split_block(
  340. t_encoder_context *context,
  341. ulong literal_to_start_at,
  342. ulong literal_to_end_at,
  343. ulong distance_to_end_at, /* corresponds to # distances at literal_to_end_at */
  344. ulong *split_at_literal,
  345. ulong *split_at_distance /* optional parameter (may be NULL) */
  346. )
  347. {
  348. ulong i, j, d;
  349. int nd;
  350. /*
  351. * num_dist_at_item[n] equals the cumulative number of matches
  352. * at literal "n / STEP_SIZE".
  353. */
  354. ushort num_dist_at_item[(MAX_LITERAL_ITEMS/STEP_SIZE)+8]; /* +8 is slop */
  355. /*
  356. * default return
  357. */
  358. *split_at_literal = literal_to_end_at;
  359. if (split_at_distance)
  360. *split_at_distance = distance_to_end_at;
  361. /* Not worth doing if we don't have many literals */
  362. if (literal_to_end_at - literal_to_start_at < MIN_LITERALS_REQUIRED)
  363. return false;
  364. /* Not allowed to split blocks any more, so we don't overflow MAX_GROWTH? */
  365. if (context->enc_num_block_splits >= MAX_BLOCK_SPLITS)
  366. return false;
  367. /*
  368. * Keep track of the number of distances (matches) we've had,
  369. * at each step of STEP_SIZE literals.
  370. *
  371. * Look at 8 items at a time, and ignore the last
  372. * 0..7 items if they exist.
  373. */
  374. nd = 0;
  375. d = 0;
  376. for (i = 0; i < (literal_to_end_at >> 3); i++)
  377. {
  378. /*
  379. * if (i % (STEP_SIZE >> 3)) == 0
  380. */
  381. if ((i & ((STEP_SIZE >> 3)-1)) == 0)
  382. num_dist_at_item[nd++] = (ushort) d;
  383. d += context->enc_ones[ context->enc_ItemType[i] ];
  384. }
  385. /*
  386. * Must be a multiple of STEP_SIZE
  387. */
  388. literal_to_start_at = (literal_to_start_at + (STEP_SIZE-1)) & (~(STEP_SIZE-1));
  389. /*
  390. * See where the change in composition occurs
  391. */
  392. for ( i = literal_to_start_at + 2*RESOLUTION;
  393. i < literal_to_end_at - 4*RESOLUTION;
  394. i += RESOLUTION)
  395. {
  396. /*
  397. * If there appears to be a significant variance in composition
  398. * between
  399. * ___________
  400. * / \
  401. * A B i X Y Z
  402. * \ \___/ /
  403. * \_______________/
  404. */
  405. if (
  406. return_difference(
  407. context,
  408. i,
  409. i+1*RESOLUTION,
  410. (ulong) num_dist_at_item[i/STEP_SIZE],
  411. (ulong) num_dist_at_item[(i+1*RESOLUTION)/STEP_SIZE],
  412. RESOLUTION) > THRESHOLD
  413. &&
  414. return_difference(
  415. context,
  416. i-RESOLUTION,
  417. i+2*RESOLUTION,
  418. (ulong) num_dist_at_item[(i-RESOLUTION)/STEP_SIZE],
  419. (ulong) num_dist_at_item[(i+2*RESOLUTION)/STEP_SIZE],
  420. RESOLUTION) > THRESHOLD
  421. &&
  422. return_difference(
  423. context,
  424. i-2*RESOLUTION,
  425. i+3*RESOLUTION,
  426. (ulong) num_dist_at_item[(i-2*RESOLUTION)/STEP_SIZE],
  427. (ulong) num_dist_at_item[(i+3*RESOLUTION)/STEP_SIZE],
  428. RESOLUTION) > THRESHOLD
  429. )
  430. {
  431. ulong max_diff = 0;
  432. ulong literal_split;
  433. /*
  434. * Narrow down the best place to split block
  435. *
  436. * This really could be done much better; we could end up
  437. * doing a lot of stepping;
  438. *
  439. * basically ((5/2 - 1/2) * RESOLUTION) / STEP_SIZE
  440. *
  441. * which is (2 * RESOLUTION) / STEP_SIZE,
  442. * which with RESOLUTION = 1024 and STEP_SIZE = 32,
  443. * equals 2048/32 = 64 steps.
  444. */
  445. for (j = i+RESOLUTION/2; j<i+(5*RESOLUTION)/2; j += STEP_SIZE)
  446. {
  447. ulong diff;
  448. diff = return_difference(
  449. context,
  450. j - RESOLUTION,
  451. j,
  452. (ulong) num_dist_at_item[(j-RESOLUTION)/STEP_SIZE],
  453. (ulong) num_dist_at_item[j/STEP_SIZE],
  454. RESOLUTION
  455. );
  456. /* Get largest difference */
  457. if (diff > max_diff)
  458. {
  459. /*
  460. * j should not be too small, otherwise we'll be outputting
  461. * a very small block
  462. */
  463. max_diff = diff;
  464. literal_split = j;
  465. }
  466. }
  467. /*
  468. * There could be multiple changes in the data in our literals,
  469. * so if we find something really weird, make sure we break the
  470. * block now, and not on some later change.
  471. */
  472. if (max_diff >= EARLY_BREAK_THRESHOLD &&
  473. (literal_split-literal_to_start_at) >= MIN_LITERALS_IN_BLOCK)
  474. {
  475. context->enc_num_block_splits++;
  476. *split_at_literal = literal_split;
  477. /*
  478. * Return the associated # distances, if required.
  479. * Since we split on a literal which is % STEP_SIZE, we
  480. * can read the # distances right off
  481. */
  482. if (split_at_distance)
  483. *split_at_distance = num_dist_at_item[literal_split/STEP_SIZE];
  484. return true;
  485. }
  486. }
  487. }
  488. /*
  489. * No good place found to split
  490. */
  491. return false;
  492. }