/* * optenc.c * * Optimal encoder * * BUGBUG Can improve compression by using the "redo" method of LZX; after the first 32K bytes, * reset the compressor but keep the tables, and start over. */ #include #include #include #include "deflate.h" // // If we get a match this good, take it automatically // // Note: FAST_DECISION_THRESHOLD can be set to anything; it's been set to BREAK_LENGTH // arbitrarily // #define FAST_DECISION_THRESHOLD BREAK_LENGTH // // After we have this many literals, create a tree to get updated statistical estimates // #define FIRST_TREE_UPDATE 1024 // // Verifies that all of the hash pointers in the hash table are correct, and that // the tree structure is valid. // #define DISABLE_VERIFY_HASHES #ifdef _DEBUG #ifndef DISABLE_VERIFY_HASHES #define VERIFY_HASHES(bufpos) verifyHashes(context, bufpos) #else #define VERIFY_HASHES(bufpos) ; #endif #else #define VERIFY_HASHES(bufpos) ; #endif #define CHECK_FLUSH_RECORDING_BUFFER() \ if (recording_bitcount >= 16) \ { \ *recording_bufptr++ = (BYTE) recording_bitbuf; \ *recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); \ recording_bitbuf >>= 16; \ recording_bitcount -= 16; \ } #define OUTPUT_RECORDING_DATA(count,data) \ recording_bitbuf |= ((data) << recording_bitcount); \ recording_bitcount += (count); // // Record unmatched symbol c // #define RECORD_CHAR(c) \ context->outputting_block_num_literals++; \ encoder->literal_tree_freq[c]++; \ _ASSERT(encoder->recording_literal_tree_len[c] != 0); \ OUTPUT_RECORDING_DATA(encoder->recording_literal_tree_len[c], encoder->recording_literal_tree_code[c]); \ CHECK_FLUSH_RECORDING_BUFFER(); // // Record a match with length match_len (>= MIN_MATCH) and displacement match_pos // #define RECORD_MATCH(match_len, match_pos) \ { \ int pos_slot = POS_SLOT(match_pos); \ int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \ int item = (NUM_CHARS+1) + len_slot; \ int extra_dist_bits = g_ExtraDistanceBits[pos_slot]; \ int extra_len_bits = g_ExtraLengthBits[len_slot]; \ _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \ _ASSERT(context->outputting_block_num_literals >= 0 && context->outputting_block_num_literals < OPT_ENCODER_MAX_ITEMS); \ _ASSERT(encoder->recording_literal_tree_len[item] != 0); \ _ASSERT(encoder->recording_dist_tree_len[pos_slot] != 0); \ context->outputting_block_num_literals++; \ encoder->literal_tree_freq[(NUM_CHARS + 1) + len_slot]++; \ encoder->dist_tree_freq[pos_slot]++; \ OUTPUT_RECORDING_DATA(encoder->recording_literal_tree_len[item], encoder->recording_literal_tree_code[item]); \ CHECK_FLUSH_RECORDING_BUFFER(); \ if (extra_len_bits > 0) \ { \ OUTPUT_RECORDING_DATA(extra_len_bits, (match_len-MIN_MATCH) & ((1 << extra_len_bits)-1)); \ CHECK_FLUSH_RECORDING_BUFFER(); \ } \ OUTPUT_RECORDING_DATA(encoder->recording_dist_tree_len[pos_slot], encoder->recording_dist_tree_code[pos_slot]); \ CHECK_FLUSH_RECORDING_BUFFER(); \ if (extra_dist_bits > 0) \ { \ OUTPUT_RECORDING_DATA(extra_dist_bits, match_pos & ((1 << extra_dist_bits)-1)); \ CHECK_FLUSH_RECORDING_BUFFER(); \ } \ } #define FLUSH_RECORDING_BITBUF() \ *recording_bufptr++ = (BYTE) recording_bitbuf; \ *recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); static void calculateUpdatedEstimates(t_encoder_context *context); static void OptimalEncoderMoveWindows(t_encoder_context *context); static int match_est(t_optimal_encoder *encoder, int match_length, unsigned int match_pos) { int dist_slot; int len_slot; // output match position len_slot = g_LengthLookup[match_length-MIN_MATCH]; dist_slot = POS_SLOT(match_pos); return encoder->literal_tree_len[NUM_CHARS + 1 + len_slot] + g_ExtraLengthBits[len_slot] + encoder->dist_tree_len[dist_slot] + g_ExtraDistanceBits[dist_slot]; } // // Create initial estimations to output each element // static void initOptimalEstimates(t_encoder_context *context) { int i, p; t_optimal_encoder *encoder = context->optimal_encoder; for (i = 0; i < NUM_CHARS; i++) encoder->literal_tree_len[i] = 8; p = NUM_CHARS+1; encoder->literal_tree_len[p] = 3; encoder->literal_tree_len[p+1] = 4; encoder->literal_tree_len[p+2] = 5; for (; p < MAX_LITERAL_TREE_ELEMENTS; p++) encoder->literal_tree_len[p] = 6; for (i = 0; i < MAX_DIST_TREE_ELEMENTS; i++) encoder->dist_tree_len[i] = (i/2)+1; } // // Fix optimal estimates; if bitlen == 0 it doesn't mean that the element takes 0 // bits to output, it means that the element didn't occur, so come up with some estimate. // static void fixOptimalEstimates(t_encoder_context *context) { int i; t_optimal_encoder *encoder = context->optimal_encoder; for (i = 0; i < NUM_CHARS; i++) { if (encoder->literal_tree_len[i] == 0) encoder->literal_tree_len[i] = 13; } for (i = NUM_CHARS+1; i < MAX_LITERAL_TREE_ELEMENTS; i++) { if (encoder->literal_tree_len[i] == 0) encoder->literal_tree_len[i] = 12; } for (i = 0; i < MAX_DIST_TREE_ELEMENTS; i++) { if (encoder->dist_tree_len[i] == 0) encoder->dist_tree_len[i] = 10; } } /* * Returns an estimation of how many bits it would take to output * a given character */ #define CHAR_EST(c) (numbits_t) (encoder->literal_tree_len[(c)]) /* * Returns an estimation of how many bits it would take to output * a given match. */ #define MATCH_EST(ml,mp,result) result = match_est(encoder, ml,mp); // // Returns whether the literal buffers are just about full // // Since we could output a large number of matches/chars in between these checks, we // have to be careful. // // BUGBUG should check after each item output, so we don't have to be so careful; this // means we will utilise more of the recording buffer // #define LITERAL_BUFFERS_FULL() \ (context->outputting_block_num_literals >= OPT_ENCODER_MAX_ITEMS-4-LOOK-MAX_MATCH || \ recording_bufptr + 3*(MAX_MATCH + LOOK) >= end_recording_bufptr) void OptimalEncoderDeflate(t_encoder_context *context) { unsigned long bufpos_end; unsigned long MatchPos; unsigned long i; int EncMatchLength; /* must be a signed number */ unsigned long bufpos; unsigned long recording_bitbuf; int recording_bitcount; byte * recording_bufptr; byte * end_recording_bufptr; t_optimal_encoder *encoder = context->optimal_encoder; _ASSERT(encoder != NULL); _ASSERT(context->state == STATE_NORMAL); // reinsert the up to BREAK_LENGTH nodes we removed the last time we exit this function VERIFY_HASHES(context->bufpos); reinsertRemovedNodes(context); VERIFY_HASHES(context->bufpos); // restore literal/match bitmap variables end_recording_bufptr = &encoder->lit_dist_buffer[OPT_ENCODER_LIT_DIST_BUFFER_SIZE-8]; recording_bufptr = encoder->recording_bufptr; recording_bitbuf = encoder->recording_bitbuf; recording_bitcount = encoder->recording_bitcount; bufpos = context->bufpos; bufpos_end = context->bufpos_end; /* * While we haven't reached the end of the data */ after_output_block: while (bufpos < bufpos_end) { // time to update our stats? if (context->outputting_block_num_literals >= encoder->next_tree_update) { encoder->next_tree_update += 1024; calculateUpdatedEstimates(context); fixOptimalEstimates(context); } // literal buffer or distance buffer filled up (or close to filling up)? if (LITERAL_BUFFERS_FULL()) break; /* * Search for matches of all different possible lengths, at bufpos */ EncMatchLength = optimal_find_match(context, bufpos); if (EncMatchLength < MIN_MATCH) { output_literal: /* * No match longer than 1 character exists in the history * window, so output the character at bufpos as a symbol. */ RECORD_CHAR(encoder->window[bufpos]); bufpos++; continue; } /* * Found a match. * * Make sure it cannot exceed the end of the buffer. */ if ((unsigned long) EncMatchLength + bufpos > bufpos_end) { EncMatchLength = bufpos_end - bufpos; /* * Oops, not enough for even a small match, so we * have to output a literal */ if (EncMatchLength < MIN_MATCH) goto output_literal; } if (EncMatchLength < FAST_DECISION_THRESHOLD) { /* * A match has been found that is between MIN_MATCH and * FAST_DECISION_THRESHOLD bytes in length. The following * algorithm is the optimal encoder that will determine the * most efficient order of matches and unmatched characters * over a span area defined by LOOK. * * The code is essentially a shortest path determination * algorithm. A stream of data can be encoded in a vast number * of different ways depending on the match lengths and offsets * chosen. The key to good compression ratios is to chose the * least expensive path. */ unsigned long span; unsigned long epos, bpos, NextPrevPos, MatchPos; t_decision_node *decision_node_ptr; t_decision_node *context_decision_node = encoder->decision_node; t_match_pos *matchpos_table = encoder->matchpos_table; long iterations; /* * Points to the end of the area covered by this match; the span * will continually be extended whenever we find more matches * later on. It will stop being extended when we reach a spot * where there are no matches, which is when we decide which * path to take to output the matches. */ span = bufpos + EncMatchLength; /* * The furthest position into which we will do our lookahead parsing */ epos = bufpos + LOOK; /* * Temporary bufpos variable */ bpos = bufpos; /* * Calculate the path to the next character if we output * an unmatched symbol. */ /* bits required to get here */ context_decision_node[1].numbits = CHAR_EST(encoder->window[bufpos]); /* where we came from */ context_decision_node[1].path = bufpos; /* bits required to get here */ context_decision_node[2].numbits = CHAR_EST(encoder->window[bufpos+1]) + context_decision_node[1].numbits; /* where we came from */ context_decision_node[2].path = bufpos+1; /* * For the match found, estimate the cost of encoding the match * for each possible match length, shortest offset combination. * * The cost, path and offset is stored at bufpos + Length. */ for (i = MIN_MATCH; i <= (unsigned long) EncMatchLength; i++) { /* * Get estimation of match cost given match length = i, * match position = matchpos_table[i], and store * the result in numbits[i] */ MATCH_EST(i, matchpos_table[i], context_decision_node[i].numbits); /* * Where we came from */ context_decision_node[i].path = bufpos; /* * Associated match position with this path */ context_decision_node[i].link = matchpos_table[i]; } /* * Set bit counter to zero at the start */ context_decision_node[0].numbits = 0; decision_node_ptr = &context_decision_node[-(long) bpos]; while (1) { numbits_t est, cum_numbits; bufpos++; /* * Set the proper repeated offset locations depending on the * shortest path to the location prior to searching for a * match. */ /* * The following is one of the two possible break points from * the inner encoding loop. This break will exit the loop if * a point is reached that no match can incorporate; i.e. a * character that does not match back to anything is a point * where all possible paths will converge and the longest one * can be chosen. */ if (span == bufpos) break; /* * Search for matches at bufpos */ EncMatchLength = optimal_find_match(context, bufpos); /* * Make sure that the match does not exceed the stop point */ if ((unsigned long) EncMatchLength + bufpos > bufpos_end) { EncMatchLength = bufpos_end - bufpos; if (EncMatchLength < MIN_MATCH) EncMatchLength = 0; } /* * If the match is very long or it exceeds epos (either * surpassing the LOOK area, or exceeding past the end of the * input buffer), then break the loop and output the path. */ if (EncMatchLength > FAST_DECISION_THRESHOLD || bufpos + (unsigned long) EncMatchLength >= epos) { MatchPos = matchpos_table[EncMatchLength]; decision_node_ptr[bufpos+EncMatchLength].link = MatchPos; decision_node_ptr[bufpos+EncMatchLength].path = bufpos; /* * Quickly insert data into the search tree without * returning match positions/lengths */ #ifndef INSERT_NEAR_LONG_MATCHES if (MatchPos == 3 && EncMatchLength > 16) { /* * If we found a match 1 character away and it's * length 16 or more, it's probably a string of * zeroes, so don't insert that into the search * engine, since doing so can slow things down * significantly! */ optimal_insert( context, bufpos + 1, bufpos - WINDOW_SIZE + 2 ); } else #endif { for (i = 1; i < (unsigned long) EncMatchLength; i++) optimal_insert( context, bufpos + i, bufpos + i - WINDOW_SIZE + 4 ); } bufpos += EncMatchLength; break; } /* * The following code will extend the area spanned by the * set of matches if the current match surpasses the end of * the span. A match of length two that is far is not * accepted, since it would normally be encoded as characters, * thus allowing the paths to converge. */ if (EncMatchLength >= 3) { if (span < (unsigned long) (bufpos + EncMatchLength)) { long end; long i; end = min(bufpos+EncMatchLength-bpos, LOOK-1); /* * These new positions are undefined for now, since we haven't * gone there yet, so put in the costliest value */ for (i = span-bpos+1; i <= end; i++) context_decision_node[i].numbits = (numbits_t) -1; span = bufpos + EncMatchLength; } } /* * The following code will iterate through all combinations * of match lengths for the current match. It will estimate * the cost of the path from the beginning of LOOK to * bufpos and to every locations spanned by the current * match. If the path through bufpos with the found matches * is estimated to take fewer number of bits to encode than * the previously found match, then the path to the location * is altered. * * The code relies on accurate estimation of the cost of * encoding a character or a match. Furthermore, it requires * a search engine that will store the smallest match offset * of each possible match length. * * A match of length one is simply treated as an unmatched * character. */ /* * Get the estimated number of bits required to encode the * path leading up to bufpos. */ cum_numbits = decision_node_ptr[bufpos].numbits; /* * Calculate the estimated cost of outputting the path through * bufpos and outputting the next character as an unmatched byte */ est = cum_numbits + CHAR_EST(encoder->window[bufpos]); /* * Check if it is more efficient to encode the next character * as an unmatched character rather than the previously found * match. If so, then update the cheapest path to bufpos + 1. * * What happens if est == numbits[bufpos-bpos+1]; i.e. it * works out as well to output a character as to output a * match? It's a tough call; however, we will push the * encoder to use matches where possible. */ if (est < decision_node_ptr[bufpos+1].numbits) { decision_node_ptr[bufpos+1].numbits = est; decision_node_ptr[bufpos+1].path = bufpos; } /* * Now, iterate through the remaining match lengths and * compare the new path to the existing. Change the path * if it is found to be more cost effective to go through * bufpos. */ for (i = MIN_MATCH; i <= (unsigned long) EncMatchLength; i++) { MATCH_EST(i, matchpos_table[i], est); est += cum_numbits; /* * If est == numbits[bufpos+i] we want to leave things * alone, since this will tend to force the matches * to be smaller in size, which is beneficial for most * data. */ if (est < decision_node_ptr[bufpos+i].numbits) { decision_node_ptr[bufpos+i].numbits = est; decision_node_ptr[bufpos+i].path = bufpos; decision_node_ptr[bufpos+i].link = matchpos_table[i]; } } } /* continue to loop through span of matches */ /* * Here bufpos == span, ie. a non-matchable character found. The * following code will output the path properly. */ /* * Unfortunately the path is stored in reverse; how to get from * where we are now, to get back to where it all started. * * Traverse the path back to the original starting position * of the LOOK span. Invert the path pointers in order to be * able to traverse back to the current position from the start. */ /* * Count the number of iterations we did, so when we go forwards * we'll do the same amount */ iterations = 0; NextPrevPos = decision_node_ptr[bufpos].path; do { unsigned long PrevPos; PrevPos = NextPrevPos; NextPrevPos = decision_node_ptr[PrevPos].path; decision_node_ptr[PrevPos].path = bufpos; bufpos = PrevPos; iterations++; } while (bufpos != bpos); /* * Traverse from the beginning of the LOOK span to the end of * the span along the stored path, outputting matches and * characters appropriately. */ do { if (decision_node_ptr[bufpos].path > bufpos+1) { /* * Path skips over more than 1 character; therefore it's a match */ RECORD_MATCH( decision_node_ptr[bufpos].path - bufpos, decision_node_ptr[ decision_node_ptr[bufpos].path ].link ); bufpos = decision_node_ptr[bufpos].path; } else { /* * Path goes to the next character; therefore it's a symbol */ RECORD_CHAR(encoder->window[bufpos]); bufpos++; } } while (--iterations != 0); } else /* EncMatchLength >= FAST_DECISION_THRESHOLD */ { /* * This code reflects a speed optimization that will always take * a match of length >= FAST_DECISION_THRESHOLD characters. */ /* * The position associated with the match we found */ MatchPos = encoder->matchpos_table[EncMatchLength]; /* * Quickly insert match substrings into search tree * (don't look for new matches; just insert the strings) */ #ifndef INSERT_NEAR_LONG_MATCHES if (MatchPos == 3 && EncMatchLength > 16) { optimal_insert( context, bufpos + 1, bufpos - WINDOW_SIZE + 2 ); } else #endif { for (i = 1; i < (unsigned long) EncMatchLength; i++) optimal_insert( context, bufpos + i, bufpos + i - WINDOW_SIZE + 1 ); } /* * Advance our position in the window */ bufpos += EncMatchLength; /* * Output the match */ RECORD_MATCH(EncMatchLength, MatchPos); } /* EncMatchLength >= FAST_DECISION_THRESHOLD */ } /* end while ... bufpos <= bufpos_end */ if (LITERAL_BUFFERS_FULL()) { _ASSERT(context->outputting_block_num_literals <= OPT_ENCODER_MAX_ITEMS); // flush our recording matches bit buffer FLUSH_RECORDING_BITBUF(); // BUGBUG Should check for failure result. Luckily the only failure condition is // that the tree didn't fit into 500 bytes, which is basically impossible anyway. (void) OptimalEncoderOutputBlock(context); // fix estimates for optimal parser fixOptimalEstimates(context); encoder->next_tree_update = FIRST_TREE_UPDATE; // did we output the whole block? if (context->state == STATE_NORMAL) { // reset literal recording recording_bufptr = encoder->recording_bufptr; recording_bitbuf = encoder->recording_bitbuf; recording_bitcount = encoder->recording_bitcount; goto after_output_block; } } // save recording state encoder->recording_bufptr = recording_bufptr; encoder->recording_bitbuf = recording_bitbuf; encoder->recording_bitcount = recording_bitcount; context->bufpos = bufpos; VERIFY_HASHES(bufpos); removeNodes(context); VERIFY_HASHES(bufpos); if (context->bufpos == 2*WINDOW_SIZE) OptimalEncoderMoveWindows(context); } // // Move the search windows when bufpos reaches 2*WINDOW_SIZE // static void OptimalEncoderMoveWindows(t_encoder_context *context) { long delta; int i; t_optimal_encoder *encoder = context->optimal_encoder; t_search_node *search_tree_root = encoder->search_tree_root; t_search_node *left = encoder->search_left; t_search_node *right = encoder->search_right; _ASSERT(context->bufpos == 2*WINDOW_SIZE); VERIFY_HASHES(context->bufpos); delta = context->bufpos - WINDOW_SIZE; memcpy(&encoder->window[0], &encoder->window[context->bufpos - WINDOW_SIZE], WINDOW_SIZE); for (i = 0; i < NUM_DIRECT_LOOKUP_TABLE_ELEMENTS; i++) { long val = ((long) search_tree_root[i]) - delta; if (val <= 0) search_tree_root[i] = (t_search_node) 0; else search_tree_root[i] = (t_search_node) val; _ASSERT(search_tree_root[i] < WINDOW_SIZE); } memcpy(&left[0], &left[context->bufpos - WINDOW_SIZE], sizeof(t_search_node)*WINDOW_SIZE); memcpy(&right[0], &right[context->bufpos - WINDOW_SIZE], sizeof(t_search_node)*WINDOW_SIZE); for (i = 0; i < WINDOW_SIZE; i++) { long val; // left val = ((long) left[i]) - delta; if (val <= 0) left[i] = (t_search_node) 0; else left[i] = (t_search_node) val; // right val = ((long) right[i]) - delta; if (val <= 0) right[i] = (t_search_node) 0; else right[i] = (t_search_node) val; } #ifdef _DEBUG // force any search table references to be invalid memset(&encoder->window[WINDOW_SIZE], 0, WINDOW_SIZE); #endif context->bufpos = WINDOW_SIZE; context->bufpos_end = context->bufpos; VERIFY_HASHES(context->bufpos); } // // Calculate the frequencies of all literal and distance codes, for tree-making, then // make the trees // static void calculateUpdatedEstimates(t_encoder_context *context) { USHORT code[MAX_LITERAL_TREE_ELEMENTS]; t_optimal_encoder *encoder = context->optimal_encoder; // create the trees, we're interested only in len[], not code[] // BUGBUG perf optimisation: make makeTree() not call MakeCode() in this situation makeTree( MAX_LITERAL_TREE_ELEMENTS, 15, encoder->literal_tree_freq, code, encoder->literal_tree_len ); makeTree( MAX_DIST_TREE_ELEMENTS, 15, encoder->dist_tree_freq, code, encoder->dist_tree_len ); } // // Zero the running frequency counts // // Also set freq[END_OF_BLOCK_CODE] = 1 // void OptimalEncoderZeroFrequencyCounts(t_optimal_encoder *encoder) { _ASSERT(encoder != NULL); memset(encoder->literal_tree_freq, 0, sizeof(encoder->literal_tree_freq)); memset(encoder->dist_tree_freq, 0, sizeof(encoder->dist_tree_freq)); encoder->literal_tree_freq[END_OF_BLOCK_CODE] = 1; } void OptimalEncoderReset(t_encoder_context *context) { t_optimal_encoder *encoder = context->optimal_encoder; _ASSERT(encoder != NULL); encoder->recording_bitbuf = 0; encoder->recording_bitcount = 0; encoder->recording_bufptr = encoder->lit_dist_buffer; context->window_size = WINDOW_SIZE; context->bufpos = context->window_size; context->bufpos_end = context->bufpos; DeflateInitRecordingTables( encoder->recording_literal_tree_len, encoder->recording_literal_tree_code, encoder->recording_dist_tree_len, encoder->recording_dist_tree_code ); // clear the search table memset( encoder->search_tree_root, 0, sizeof(encoder->search_tree_root) ); encoder->next_tree_update = FIRST_TREE_UPDATE; initOptimalEstimates(context); OptimalEncoderZeroFrequencyCounts(encoder); } BOOL OptimalEncoderInit(t_encoder_context *context) { context->optimal_encoder = (t_optimal_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_optimal_encoder)); if (context->optimal_encoder == NULL) return FALSE; OptimalEncoderReset(context); return TRUE; }