|
|
/*
* pathash.h * * author: John R. Douceur * date: 5 May 1997 * * This header file defines structures, function prototypes, and macros for * the pat-hash table database. The code is object-oriented C, transliterated * from a C++ implementation. * * The pat-hash database is a combination of a dynamically sized, separately * chained hash table and a Patricia tree. The hash table dynamically grows * and shrinks as needed, and the workload of modifying the table size is * distributed evenly among the insertion or removal operations that cause * the growth or shrinkage. * * The insertion and removal operations manage both a hash table and a Patricia * tree, but the search routine uses only the hash table for performing the * search. The Patrica tree is present to support a scan operation, which * searches the database for all entries that match a given pattern, where the * pattern that is scanned may contain wildcards. * * Because this code is C, rather than C++, it is not possible to hide as * much of the implementation from the client code as one might wish. * Nonetheless, there is an attempt to isolate the client from some of the * implementation details through the use of macros. Below is described each * of the functions and macros necessary to use the pat-hash table. * */
#ifndef _INC_PATHASH
#define _INC_PATHASH
#ifdef __cplusplus
extern "C" { #endif
/*
* There are three basic structures employed: the PHTableEntry, the * PHTableGroup, and the PatHashTable. Ideally, these would be completely * hidden from the client, but the macro GetReferenceFromSpecificPatternHandle * requires knowledge of the structure's definition. It is strongly urged * that the client not directly refer to any of the fields of either of these * structures. To support the documentation of the accompanying pathash.c * file, these structures are annotated with internal comments, but these can * be ignored by the reader who wishes only to understand how to write client * code for the pat-hash table. * * The values stored in the pat-hash table are known as specific patterns, * where the term "specific" implies that the patterns do not contain * wildcards. The client refers to a pattern by its SpecificPatternHandle. * This is typedefed to a pointer to PHTableEntry, but this fact should be * ignored by the client, since it is an implementation detail. * */
//#include <stdlib.h>
//#include <malloc.h>
struct _PHTableEntry { // This is the element in which a specific pattern is stored. It is both
// a component of a hash chain (linked list) that is indexed by a hash
// table and also a component of a Patricia tree.
// hash table fields:
unsigned int hash; // hash value
struct _PHTableEntry *next; // pointer to next entry in linked list
// Patricia tree fields
int pivot_bit; // bit of key on which to branch
struct _PHTableEntry *children[2]; // pointers to child nodes
// general:
void *reference; // reference value supplied by client
char value[1]; // space for storing pattern value
};
typedef struct _PHTableEntry PHTableEntry;
struct _PHTableGroup { // The hash table that indexes the hash chain of entries is itself a
// linked list of structures called groups. Each group is a table of
// pointers to the hash chains of entries, and the group also contains
// a pointer to the previous group, meaning that the groups are backwardly
// linked. The groups are sized in powers of two, so, in addition to one
// special group of size one, there is a group of size one, a group of size
// two, a group of size four, a group of size eight, and so on, up to the
// number of groups necessary to hold the table.
struct _PHTableGroup *previous; // pointer to immediately smaller group
PHTableEntry *entry_list[1]; // space to hold table of chain pointers
};
typedef struct _PHTableGroup PHTableGroup;
struct _PatHashTable { int keybits; // number of bits in key
int keybytes; // number of bytes in key, calculated from keybits
int usage_ratio; // desired ratio of entries to hash chains
int usage_histeresis; // histeresis between insertion and removal resizes
int allocation_histeresis; // histeresis between insert and removal mallocs
int max_free_list_size; // maximum size of free entry list
PHTableGroup *initial_group; // pointer to first group to search
PHTableGroup *top_group; // pointer to largest group allocated
int allocation_exponent; // binary exponent of current allocation size
int size_exponent; // binary exponent of current group size
int extension_size; // number of slots in use in initial group
int population; // number of entries in database
PHTableEntry *root; // root of Patricia tree
PHTableEntry *free_list; // list of free (unused) entries
int free_list_size; // number of elements currently on free list
};
typedef struct _PatHashTable PatHashTable;
// The client uses SpecificPatternHandle to refer to values in the database.
typedef PHTableEntry *SpecificPatternHandle;
/*
* The client interface to the pat-hash table is provided by seven functions * and two macros. It is expected that the client will first instantiate a * database, either on the stack or the heap, and then insert specific patterns * with corresponding reference information into the database. The client can * then search the database for the specific patterns that were stored, and * it can scan the database for all specific patterns that match a general * pattern containing wildcards. * */
// A pat-hash table may be allocated on the stack simply by declaring a variable
// of type PatHashTable. To allocate it on the heap, the following macro
// returns a pointer to a new PatHashTable structure. If this macro is used, a
// corresponding call to free() must be made to deallocate the structure from
// the heap.
//
//#define NEW_PatHashTable ((PatHashTable *)malloc(sizeof(PatHashTable)))
#define AllocatePatHashTable(_ph) GpcAllocMem(&_ph, \
sizeof(PatHashTable), \ PathHashTag) #define FreePatHashTable(_ph) GpcFreeMem(_ph,PathHashTag)
// Since this is not C++, the PatHashTable structure is not self-constructing;
// therefore, the following constructor code must be called on the PatHashTable
// structure after it is allocated. The argument keybits specifies the size
// (in bits) of each pattern that will be stored in the database. The remaining
// arguments are parameters to the various control systems that govern the size
// of the database.
//
// The usage ratio is the target ratio of database entries to discrete hash
// chains, which is also the mean length of a hash chain: The minimum value
// is one; a larger value slightly decreases memory utilization and
// insertion/removal time at the expense of increasing search time. There is
// benefit to choosing a power of two for this value. Recommended values are
// 2 and 4.
//
// The usage histeresis is the histeresis between resizing operations due to
// insertions and removals. The minimum value is zero, providing no histeresis;
// in this case, if an insertion that causes a increase in table size is
// immediately followed by a removal, the table size will be decreased. Thus,
// a zero histeresis maintains low memory usage, but it engenders resizing
// chatter if insertions and removals are frequent.
//
// Allocation histeresis is the histeresis between allocation and deallocation
// of groups. A group is allocated immediately when it is required by a size
// increase in the table, but it is not necessarily deallocated immediately
// following a size decrease, if the allocation histeresis is set to a value
// greater than zero. Because groups are allocated in powers of two, the
// histeresis value is specified as a binary exponent. A value of 1 causes a
// group to be deallocated when the table is half of the size that will cause
// the group to be re-allocated. A value of 2 causes the group to be
// deallocated when the table is one quarter of the size that will cause the
// group to be re-allocated, and so forth.
//
// The maximum free list size determines the maximum number of elements that
// will be placed on a free list, rather than deallocated, when they are
// removed. Setting this value to zero keeps memory utilization low, but it
// can result in more frequent allocations and deallocation operations, which
// are expensive.
//
int constructPatHashTable( PatHashTable *phtable, int keybits, int usage_ratio, int usage_histeresis, int allocation_histeresis, int max_free_list_size);
// Since this is not C++, the PatHashTable structure is not self-destructing;
// therefore, the following destructor code must be called on the PatHashTable
// structure before it is deallocated.
//
void destructPatHashTable( PatHashTable *phtable);
// Once the PatHashTable structure has been allocated and constructed, patterns
// can be inserted into the database. Each pattern is passed as an array of
// bytes.
//
// Since the PatHashTable structure specifies the size of each pattern, it is
// theoretically possible for the insert routine to digest the submitted
// pattern and produce a hash value therefrom; however, general mechanisms for
// accomplishing this digestion are not very efficient. Therefore, the client
// is responsible for providing a digested form of its input as the chyme
// parameter. If the pattern is no bigger than an unsigned int, then the chyme
// can simply be equal to the pattern. If it is larger, then it should be set
// to something like the exclusive-or of the pattern's fields; however, care
// should be taken to ensure that two patterns are not likely to digest to the
// same chyme value, since this will substantially decrease the efficiency of
// the hash table. One common way of accomplishing this is by rotating the
// fields by varying amounts prior to the exclusive-or.
//
// The client also specifies a reference value, as a void pointer, that it
// wishes to associate with this pattern. When the pattern is installed, the
// insert routine returns a pointer to a SpecificPatternHandle. From the
// SpecificPatternHandle can be gotten the reference value via the macro
// GetReferenceFromSpecificPatternHandle.
//
// If the submitted pattern has already been installed in the database, then
// the insertion does not occur, and the SpecificPatternHandle of the
// previously installed pattern is returned.
//
SpecificPatternHandle insertPatHashTable( PatHashTable *phtable, char *pattern, unsigned int chyme, void *reference);
// This function removes a pattern from the pat-hash table. The pattern is
// specified by the SpecificPatternHandle that was returned by the insert
// routine. No checks are performed to insure that this is a valid handle.
//
void removePatHashTable( PatHashTable *phtable, SpecificPatternHandle sphandle);
// This function searches the database for the specific pattern that matches
// the given key, which is passed as an array of bytes. If a match is found,
// the SpecificPatternHandle of that matching specific pattern is returned.
// From the SpecificPatternHandle can be gotten the reference value via the
// macro GetReferenceFromSpecificPatternHandle. If no match is found, then a
// value of 0 is returned as the SpecificPatternHandle.
//
// As with the insert routine, the client is expected to provide a digested
// form of the key as the chyme argument to the routine. This chyme value
// must be calculated in the exact same way for the search routine as it is
// for the insert routine; otherwise, the search will not be able to find the
// matching pattern.
//
SpecificPatternHandle searchPatHashTable( PatHashTable *phtable, char *key, unsigned int chyme);
// The scan routine (described below) requires the client to supply a callback
// function to be called for each specific pattern that matches the supplied
// general pattern. The following typedef defines the ScanCallback function
// pointer, which specifies the prototype of the callback function that the
// client must provide. The client's callback function must accept a void
// pointer (which is a client-supplied context) and a SpecificPatternHandle.
// The return type of the client's callback function is void.
//
typedef void (*ScanCallback)(void *, SpecificPatternHandle);
// This function searches the database for all specific patterns that match a
// given general pattern. The general pattern is specified by a value and a
// mask. Each bit of the mask determines whether the bit position is specified
// or is a wildcard: A 1 in a mask bit indicates that the value of that bit is
// specified by the general pattern; a 0 indicates that the value of that bit
// is a wildcard. If a mask bit is 1, then the corresponding bit in the value
// field indicates the specified value of that bit. Value and mask fields are
// passed as arrays of bytes.
//
// For each specific pattern in the database that matches the supplied general
// pattern, a client-supplied callback function is called with the
// SpecificPatternHandle of the matching specific pattern. This callback
// function is also passed a context (as a void pointer) that is supplied by
// the client in the call to the scan routine.
//
void scanPatHashTable( PatHashTable *phtable, char *value, char *mask, void *context, ScanCallback func);
// To get the client-supplied reference value from a SpecificPatternHandle, the
// following macro should be used. The client should not make assumptions
// about the details of the PHTableEntry structure, nor should it even assume
// that the SpecificPatternHandle is a pointer to a PHTableEntry.
// Also, get the key pointer (value)
//
#define GetReferenceFromSpecificPatternHandle(sphandle) (sphandle)->reference
#define GetKeyPtrFromSpecificPatternHandle(sphandle) (sphandle)->value
// As described above in the comments on the constructor, if the allocation
// histeresis is non-zero, then the groups will not be deallocated as soon as
// they can be. Similarly, if max free list size is non-zero, then entries
// will not be deallocated as soon as they can be. Thus, unused pieces of
// memory may accumulate, up to a limit. If the client wishes to force the
// pat-hash table to release all of the memory that it currently can, then it
// should call the flush routine, which will deallocate all unneeded groups
// and entries.
//
void flushPatHashTable( PatHashTable *phtable);
#ifdef __cplusplus
} #endif
#endif /* _INC_PATHASH */
|