* fstenc.c * * Fast encoder * * This is a one pass encoder which uses predefined trees. However, since these are not the same * trees defined for a fixed block (we use better trees than that), we output a dynamic block header. */ #include <string.h>
#include <stdio.h>
#include <crtdbg.h>
#include "deflate.h"
#include "fasttbl.h"
// For debugging purposes:
// Verifies that all of the hash pointers in the hash table are correct, and that everything
// in the same hash chain has the same hash value
#define VERIFY_HASHES(bufpos) FastEncoderVerifyHashes(context, bufpos)
#define VERIFY_HASHES(bufpos) ;
// Update hash variable "h" with character c
#define UPDATE_HASH(h,c) \
h = ((h) << FAST_ENCODER_HASH_SHIFT) ^ (c);
// Insert a string into the hash chain at location bufpos
#define INSERT_STRING(search,bufpos) \
{ \ UPDATE_HASH(hash, window[bufpos+2]); \ \ _ASSERT((unsigned int) FAST_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & FAST_ENCODER_HASH_MASK)); \ \ search = lookup[hash & FAST_ENCODER_HASH_MASK]; \ lookup[hash & FAST_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \ prev[bufpos & FAST_ENCODER_WINDOW_MASK] = (t_search_node) (search); \ }
// Output bits function which uses local variables for the bit buffer
#define LOCAL_OUTPUT_BITS(n, x) \
{ \ bitbuf |= ((x) << bitcount); \ bitcount += (n); \ if (bitcount >= 16) \ { \ *output_curpos++ = (BYTE) bitbuf; \ *output_curpos++ = (BYTE) (bitbuf >> 8); \ bitcount -= 16; \ bitbuf >>= 16; \ } \ }
// Output unmatched symbol c
#define OUTPUT_CHAR(c) \
LOCAL_OUTPUT_BITS(g_FastEncoderLiteralCodeInfo[c] & 31, g_FastEncoderLiteralCodeInfo[c] >> 5);
// Output a match with length match_len (>= MIN_MATCH) and displacement match_pos
// Optimisation: unlike the other encoders, here we have an array of codes for each match
// length (not just each match length slot), complete with all the extra bits filled in, in
// a single array element.
// There are many advantages to doing this:
// 1. A single array lookup on g_FastEncoderLiteralCodeInfo, instead of separate array lookups
// on g_LengthLookup (to get the length slot), g_FastEncoderLiteralTreeLength,
// g_FastEncoderLiteralTreeCode, g_ExtraLengthBits, and g_BitMask
// 2. The array is an array of ULONGs, so no access penalty, unlike for accessing those USHORT
// code arrays in the other encoders (although they could be made into ULONGs with some
// modifications to the source).
// Note, if we could guarantee that code_len <= 16 always, then we could skip an if statement here.
// A completely different optimisation is used for the distance codes since, obviously, a table for
// all 8192 distances combining their extra bits is not feasible. The distance codeinfo table is
// made up of code[], len[] and # extra_bits for this code.
// The advantages are similar to the above; a ULONG array instead of a USHORT and BYTE array, better
// cache locality, fewer memory operations.
#define OUTPUT_MATCH(match_len, match_pos) \
{ \ int extra_bits; \ int code_len; \ ULONG code_info; \ \ _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \ \ code_info = g_FastEncoderLiteralCodeInfo[(NUM_CHARS+1-MIN_MATCH)+match_len]; \ code_len = code_info & 31; \ _ASSERT(code_len != 0); \ if (code_len <= 16) \ { \ LOCAL_OUTPUT_BITS(code_len, code_info >> 5); \ } \ else \ { \ LOCAL_OUTPUT_BITS(16, (code_info >> 5) & 65535); \ LOCAL_OUTPUT_BITS(code_len-16, code_info >> (5+16)); \ } \ code_info = g_FastEncoderDistanceCodeInfo[POS_SLOT(match_pos)]; \ LOCAL_OUTPUT_BITS(code_info & 15, code_info >> 8); \ extra_bits = (code_info >> 4) & 15; \ if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \ }
// This commented out code is the old way of doing things, which is what the other encoders use
#if 0
#define OUTPUT_MATCH(match_len, match_pos) \
{ \ int pos_slot = POS_SLOT(match_pos); \ int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \ int extra_bits; \ \ _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \ _ASSERT(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot] != 0); \ _ASSERT(g_FastEncoderDistanceTreeLength[pos_slot] != 0); \ \ LOCAL_OUTPUT_BITS(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot], g_FastEncoderLiteralTreeCode[(NUM_CHARS+1)+len_slot]); \ extra_bits = g_ExtraLengthBits[len_slot]; \ if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_len-MIN_MATCH) & g_BitMask[extra_bits]); \ \ LOCAL_OUTPUT_BITS(g_FastEncoderDistanceTreeLength[pos_slot], g_FastEncoderDistanceTreeCode[pos_slot]); \ extra_bits = g_ExtraDistanceBits[pos_slot]; \ if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \ } #endif
// Local function prototypes
static void FastEncoderMoveWindows(t_encoder_context *context);
static int FastEncoderFindMatch( const BYTE * window, const USHORT * prev, long bufpos, long search, t_match_pos * match_pos, int cutoff, int nice_length );
// Output the block type and tree structure for our hard-coded trees.
// Functionally equivalent to:
// outputBits(context, 1, 1); // "final" block flag
// outputBits(context, 2, BLOCKTYPE_DYNAMIC);
// outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
// However, all of the above has smartly been cached in global data, so we just memcpy().
void FastEncoderOutputPreamble(t_encoder_context *context) { #if 0
// slow way:
outputBits(context, 1+2, 1 | (BLOCKTYPE_DYNAMIC << 1)); outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength); #endif
// make sure tree has been init
_ASSERT(g_FastEncoderTreeLength > 0);
// make sure we have enough space to output tree
_ASSERT(context->output_curpos + g_FastEncoderTreeLength < context->output_endpos);
// fast way:
memcpy(context->output_curpos, g_FastEncoderTreeStructureData, g_FastEncoderTreeLength); context->output_curpos += g_FastEncoderTreeLength;
// need to get final states of bitbuf and bitcount after outputting all that stuff
context->bitbuf = g_FastEncoderPostTreeBitbuf; context->bitcount = g_FastEncoderPostTreeBitcount; }
// Fast encoder deflate function
void FastEncoderDeflate( t_encoder_context * context, int search_depth, // # hash links to traverse
int lazy_match_threshold, // don't search @ X+1 if match length @ X is > lazy
int good_length, // divide traversal depth by 4 if match length > good
int nice_length // in match finder, if we find >= nice_length match, quit immediately
) { long bufpos; unsigned int hash; unsigned long bitbuf; int bitcount; BYTE * output_curpos; t_fast_encoder *encoder = context->fast_encoder; byte * window = encoder->window; // make local copies of context variables
t_search_node * prev = encoder->prev; t_search_node * lookup = encoder->lookup;
// If this is the first time in here (since last reset) then we need to output our dynamic
// block header
if (encoder->fOutputBlockHeader == FALSE) { encoder->fOutputBlockHeader = TRUE;
// Watch out! Calls to outputBits() and outputTreeStructure() use the bit buffer
// variables stored in the context, not our local cached variables.
FastEncoderOutputPreamble(context); }
// Copy bitbuf vars into local variables since we're now using OUTPUT_BITS macro.
// Do not call anything that uses the context structure's bit buffer variables!
output_curpos = context->output_curpos; bitbuf = context->bitbuf; bitcount = context->bitcount;
// copy bufpos into local variable
bufpos = context->bufpos;
VERIFY_HASHES(bufpos); // debug mode: verify that the hash table is correct
// initialise the value of the hash
// no problem if locations bufpos, bufpos+1 are invalid (not enough data), since we will
// never insert using that hash value
hash = 0; UPDATE_HASH(hash, window[bufpos]); UPDATE_HASH(hash, window[bufpos+1]);
// while we haven't come to the end of the input, and we still aren't close to the end
// of the output
while (bufpos < context->bufpos_end && output_curpos < context->output_near_end_threshold) { int match_len; t_match_pos match_pos; t_match_pos search;
VERIFY_HASHES(bufpos); // debugger: verify that hash table is correct
if (context->bufpos_end - bufpos <= 3) { // The hash value becomes corrupt when we get within 3 characters of the end of the
// input buffer, since the hash value is based on 3 characters. We just stop
// inserting into the hash table at this point, and allow no matches.
match_len = 0; } else { // insert string into hash table and return most recent location of same hash value
// did we find a recent location of this hash value?
if (search != 0) { // yes, now find a match at what we'll call position X
match_len = FastEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);
// truncate match if we're too close to the end of the input buffer
if (bufpos + match_len > context->bufpos_end) match_len = context->bufpos_end - bufpos; } else { // no most recent location found
match_len = 0; } }
if (match_len < MIN_MATCH) { // didn't find a match, so output unmatched char
OUTPUT_CHAR(window[bufpos]); bufpos++; } else { // bufpos now points to X+1
// is this match so good (long) that we should take it automatically without
// checking X+1 ?
if (match_len <= lazy_match_threshold) { int next_match_len; t_match_pos next_match_pos;
// sets search
// no, so check for a better match at X+1
if (search != 0) { next_match_len = FastEncoderFindMatch( window, prev, bufpos, search, &next_match_pos, match_len < good_length ? search_depth : (search_depth >> 2), nice_length ); // truncate match if we're too close to the end of the buffer
// note: next_match_len could now be < MIN_MATCH
if (bufpos + next_match_len > context->bufpos_end) next_match_len = context->bufpos_end - bufpos; } else { next_match_len = 0; }
// right now X and X+1 are both inserted into the search tree
if (next_match_len > match_len) { // since next_match_len > match_len, it can't be < MIN_MATCH here
// match at X+1 is better, so output unmatched char at X
// now output match at location X+1
OUTPUT_MATCH(next_match_len, next_match_pos);
// insert remainder of second match into search tree
// example: (*=inserted already)
// X X+1 X+2 X+3 X+4
// * *
// nextmatchlen=3
// bufpos
// If next_match_len == 3, we want to perform 2
// insertions (at X+2 and X+3). However, first we must
// inc bufpos.
bufpos++; // now points to X+2
match_len = next_match_len; goto insert; } else { // match at X is better, so take it
OUTPUT_MATCH(match_len, match_pos);
// Insert remainder of first match into search tree, minus the first
// two locations, which were inserted by the FindMatch() calls.
// For example, if match_len == 3, then we've inserted at X and X+1
// already (and bufpos is now pointing at X+1), and now we need to insert
// only at X+2.
match_len--; bufpos++; // now bufpos points to X+2
goto insert; } } else /* match_length >= good_match */ { // in assertion: bufpos points to X+1, location X inserted already
// first match is so good that we're not even going to check at X+1
OUTPUT_MATCH(match_len, match_pos);
// insert remainder of match at X into search tree
insert: if (context->bufpos_end - bufpos <= match_len) { bufpos += (match_len-1); } else { while (--match_len > 0) { t_match_pos ignore;
INSERT_STRING(ignore,bufpos); bufpos++; } } } } } /* end ... while (bufpos < bufpos_end) */
// store local variables back in context
context->bufpos = bufpos; context->bitbuf = bitbuf; context->bitcount = bitcount; context->output_curpos = output_curpos;
VERIFY_HASHES(bufpos); // debugger: verify that hash table is correct
if (bufpos == context->bufpos_end) context->state = STATE_NORMAL; else context->state = STATE_OUTPUTTING_BLOCK;
// slide the window if bufpos has reached 2*window size
if (context->bufpos == 2*FAST_ENCODER_WINDOW_SIZE) FastEncoderMoveWindows(context); }
static void FastEncoderMoveWindows(t_encoder_context *context) { t_search_node *lookup = context->fast_encoder->lookup; t_search_node *prev = context->fast_encoder->prev; BYTE *window = context->fast_encoder->window; int i;
_ASSERT(context->bufpos == 2*FAST_ENCODER_WINDOW_SIZE);
// verify that the hash table is correct
memcpy(&window[0], &window[context->bufpos - FAST_ENCODER_WINDOW_SIZE], FAST_ENCODER_WINDOW_SIZE);
// move all the hash pointers back
// BUGBUG We are incurring a performance penalty since lookup[] is a USHORT array. Would be
// nice to subtract from two locations at a time.
for (i = 0; i < FAST_ENCODER_HASH_TABLE_SIZE; i++) { long val = ((long) lookup[i]) - FAST_ENCODER_WINDOW_SIZE;
if (val <= 0) // too far away now? then set to zero
lookup[i] = (t_search_node) 0; else lookup[i] = (t_search_node) val; }
// prev[]'s are absolute pointers, not relative pointers, so we have to move them back too
// making prev[]'s into relative pointers poses problems of its own
for (i = 0; i < FAST_ENCODER_WINDOW_SIZE; i++) { long val = ((long) prev[i]) - FAST_ENCODER_WINDOW_SIZE;
if (val <= 0) prev[i] = (t_search_node) 0; else prev[i] = (t_search_node) val; }
// For debugging, wipe the window clean, so that if there is a bug in our hashing,
// the hash pointers will now point to locations which are not valid for the hash value
// (and will be caught by our ASSERTs).
VERIFY_HASHES(2*FAST_ENCODER_WINDOW_SIZE); // debug: verify hash table is correct
context->bufpos = FAST_ENCODER_WINDOW_SIZE; context->bufpos_end = context->bufpos; }
// Find match
// Returns match length found. A match length < MIN_MATCH means no match was found.
static int FastEncoderFindMatch( const BYTE * window, // window array
const USHORT * prev, // prev ptr array
long bufpos, // current buffer position
long search, // where to start searching
t_match_pos * match_pos, // return match position here
int cutoff, // # links to traverse
int nice_length // stop immediately if we find a match >= nice_length
) { // make local copies of context variables
long earliest; int best_match = 0; // best match length found so far
t_match_pos l_match_pos = 0; // absolute match position of best match found
BYTE want_char;
// the earliest we can look
earliest = bufpos - FAST_ENCODER_WINDOW_SIZE; _ASSERT(earliest >= 0);
// store window[bufpos + best_match]
want_char = window[bufpos];
while (search > earliest) { // make sure all our hash links are valid
// Start by checking the character that would allow us to increase the match
// length by one. This improves performance quite a bit.
if (window[search + best_match] == want_char) { int j;
// Now make sure that all the other characters are correct
for (j = 0; j < MAX_MATCH; j++) { if (window[bufpos+j] != window[search+j]) break; } if (j > best_match) { best_match = j; l_match_pos = search; // absolute position
if (j > nice_length) break;
want_char = window[bufpos+j]; } }
if (--cutoff == 0) break;
// make sure we're always going backwards
_ASSERT(prev[search & FAST_ENCODER_WINDOW_MASK] < search);
search = (long) prev[search & FAST_ENCODER_WINDOW_MASK]; }
// doesn't necessarily mean we found a match; best_match could be > 0 and < MIN_MATCH
*match_pos = bufpos - l_match_pos - 1; // convert absolute to relative position
// don't allow match length 3's which are too far away to be worthwhile
if (best_match == 3 && *match_pos >= FAST_ENCODER_MATCH3_DIST_THRESHOLD) return 0;
_ASSERT(best_match < MIN_MATCH || *match_pos < FAST_ENCODER_WINDOW_SIZE);
return best_match; }
void FastEncoderReset(t_encoder_context *context) { _ASSERT(context->fast_encoder != NULL);
// zero hash table
memset(context->fast_encoder->lookup, 0, sizeof(context->fast_encoder->lookup));
context->window_size = FAST_ENCODER_WINDOW_SIZE; context->bufpos = FAST_ENCODER_WINDOW_SIZE; context->bufpos_end = context->bufpos; context->fast_encoder->fOutputBlockHeader = FALSE; }
BOOL FastEncoderInit(t_encoder_context *context) { context->fast_encoder = (t_fast_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_fast_encoder));
if (context->fast_encoder == NULL) return FALSE;
FastEncoderReset(context); return TRUE; }
// Pregenerate the structure of the dynamic tree header which is output for
// the fast encoder. Also record the final states of bitcount and bitbuf
// after outputting.
void FastEncoderGenerateDynamicTreeEncoding(void) { t_encoder_context context;
// Create a fake context with output pointers into our global data
memset(&context, 0, sizeof(context)); context.output_curpos = g_FastEncoderTreeStructureData; context.output_endpos = g_FastEncoderTreeStructureData + sizeof(g_FastEncoderTreeStructureData); context.output_near_end_threshold = context.output_endpos - 16; InitBitBuffer(&context);
outputBits(&context, 1, 1); // "final" block flag
outputBits(&context, 2, BLOCKTYPE_DYNAMIC); outputTreeStructure( &context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength );
g_FastEncoderTreeLength = (int) (context.output_curpos - (BYTE *) g_FastEncoderTreeStructureData); g_FastEncoderPostTreeBitbuf = context.bitbuf; g_FastEncoderPostTreeBitcount = context.bitcount; }