|
|
/*
* @doc INTERNAL * * @module HASH.C -- RTF control word cache | * #ifdef'ed with RTF_HASHCACHE * * Owner: <nl> * Jon Matousek <nl> * * History: <nl> * 8/15/95 jonmat first hash-cache for RTF using Brent's Method. */ #include "_common.h"
#ifdef RTF_HASHCACHE
#include "hash.h"
ASSERTDATA
extern KEYWORD rgKeyword[]; // All of the RTF control words.
#define MAX_INAME 3
typedef struct { const KEYWORD *token; BOOL passBit; } HashEntry;
static HashEntry *(hashtbl[HASHSIZE]); static HashEntry *storage; // Dynamically alloc for cKeywords.
BOOL _rtfHashInited = FALSE;
static INT HashKeyword_Key( const CHAR *szKeyword );
/*
* HashKeyword_Insert() * * @func * Insert a KEYWORD into the RTF hash table. * @comm * This function uses the the % for MOD * in order to validate MOD257. */ VOID HashKeyword_Insert ( const KEYWORD *token )//@parm pointer to KEYWORD token to insert.
{ TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Insert");
INT index, step, position, cost, source, sink, index1, step1, temp; BOOL tmpPassBit; static INT totalKeys = 0;
CHAR *szKeyword; HashEntry *np; AssertSz ( _rtfHashInited, "forgot to call HashKeyword_Init()"); AssertSz ( totalKeys <= HASHSIZE * 0.7, "prime not large enough to hold total keys"); szKeyword = token->szKeyword; np = &storage[totalKeys++]; np->token = token;
index = HashKeyword_Key(szKeyword) % HASHSIZE; // Get keys.
step = 1 + (HashKeyword_Key(szKeyword) % (HASHSIZE-1));
position = 1; cost = HASHSIZE; // The max collisions for any.
while(hashtbl[index]!=NULL) // Find empty slot.
{ position++; // How many collisions.
// For the keyword stored here, calc # times before it is found.
temp=1; step1= 1+(HashKeyword_Key(hashtbl[index]->token->szKeyword) % (HASHSIZE-1)); index1= (index+step1)%HASHSIZE; while(hashtbl[index1] !=NULL) { index1=(index1+step1)%HASHSIZE; temp++; } // Incremental cost computation, minimizes average # of collisions
// for both keywords.
if (cost>position+temp) { source=index; sink=index1; cost=position+temp; } // There will be something stored beyound here, set the passBit.
hashtbl[index]->passBit=1;
// Next index to search for empty slot.
index=(index+step)%HASHSIZE;
} if (position<=cost) { source=sink=index; cost=position; } hashtbl[sink] = hashtbl[source]; hashtbl[source] = np; if (hashtbl[sink] && hashtbl[source]) // jOn hack, we didn't really
{ // want to swap pass bits.
tmpPassBit = hashtbl[sink]->passBit; hashtbl[sink]->passBit = hashtbl[source]->passBit; hashtbl[source]->passBit = tmpPassBit; }
}
/*
* static HashKeyword_Key() * * @func * Calculate the hash key. * @comm * Just add up the first few characters. * @rdesc * The hash Key for calculating the index and step. */ static INT HashKeyword_Key( const CHAR *szKeyword ) //@parm C string to create hash key for.
{ TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Key");
INT i, tot = 0; /* Just add up first few characters. */ for (i = 0; i < MAX_INAME && *szKeyword; szKeyword++, i++) tot += (UCHAR) *szKeyword; return tot; }
/*
* HashKeyword_Fetch() * * @func * Look up a KEYWORD with the given szKeyword. * @devnote * We have a hash table of size 257. This allows for * the use of very fast routines to calculate a MOD 257. * This gives us a significant increase in performance * over a binary search. * @rdesc * A pointer to the KEYWORD, or NULL if not found. */ const KEYWORD *HashKeyword_Fetch ( const CHAR *szKeyword ) //@parm C string to search for.
{ TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Fetch");
INT index, step; HashEntry * hashTblPtr;
BYTE * pchCandidate; BYTE * pchKeyword; INT nComp;
CHAR firstChar;
INT hashKey;
AssertSz( HASHSIZE == 257, "Remove custom MOD257."); firstChar = *szKeyword; hashKey = HashKeyword_Key(szKeyword); // For calc'ing 'index' and 'step'
//index = hashKey%HASHSIZE; // First entry to search.
index = MOD257(hashKey); // This formula gives us 18% perf.
hashTblPtr = hashtbl[index]; // Get first entry.
if ( hashTblPtr != NULL ) // Something there?
{ // Compare 2 C strings.
pchCandidate = (BYTE *)hashTblPtr->token->szKeyword; if ( firstChar == *pchCandidate ) { pchKeyword = (BYTE *)szKeyword; while (!(nComp = *pchKeyword - *pchCandidate) // Be sure to match
&& *pchKeyword) // terminating 0's
{ pchKeyword++; pchCandidate++; } // Matched?
if ( 0 == nComp ) return hashTblPtr->token; } if ( hashTblPtr->passBit==1 ) // passBit=>another entry to test
{
// step = 1+(hashKey%(HASHSIZE-1));// Calc 'step'
step = 1 + MOD257_1(hashKey);
// Get second entry to check.
index += step; index = MOD257(index); hashTblPtr = hashtbl[index];
while (hashTblPtr != NULL ) // While something there.
{ // Compare 2 C strings.
pchCandidate = (BYTE *)hashTblPtr->token->szKeyword; if ( firstChar == *pchCandidate ) { pchKeyword = (BYTE *)szKeyword; while (!(nComp = *pchKeyword - *pchCandidate) && *pchKeyword) { pchKeyword++; pchCandidate++; } // Matched?
if ( 0 == nComp ) return hashTblPtr->token; }
if ( !hashTblPtr->passBit )// Done searching?
break; // Get next entry.
index += step; index = MOD257(index); hashTblPtr = hashtbl[index]; } } } return NULL; }
/*
* HashKeyword_Init() * * @func * Load up and init the hash table with RTF control words. * @devnote * _rtfHashInited will be FALSE if anything here fails. */ VOID HashKeyword_Init( ) { TRACEBEGIN(TRCSUBSYSDISP, TRCSCOPEINTERN, "HashKeyword_Init");
extern SHORT cKeywords; // How many RTF keywords we currently recognize.
INT i;
AssertSz( _rtfHashInited == FALSE, "Only need to init this once.");
// Create enough storage for cKeywords
storage = (HashEntry *) PvAlloc( sizeof(HashEntry) * cKeywords, fZeroFill );
// Load in all of the RTF control words.
if ( storage ) { _rtfHashInited = TRUE;
for (i = 0; i < cKeywords; i++ ) { HashKeyword_Insert(&rgKeyword[i]); } #ifdef DEBUG // Make sure we can fetch all these keywords.
for (i = 0; i < cKeywords; i++ ) { AssertSz ( &rgKeyword[i] == HashKeyword_Fetch ( rgKeyword[i].szKeyword ), "Keyword Hash is not working."); } #endif
} }
#endif // RTF_HASHCACHE
|