You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
834 lines
30 KiB
834 lines
30 KiB
/*
|
|
* pathash.c
|
|
*
|
|
* author: John R. Douceur
|
|
* date: 5 May 1997
|
|
*
|
|
* This source file provides functions that implement insertion, removal,
|
|
* search, scan, and flush operations on the pat-hash table database. The
|
|
* code is object-oriented C, transliterated from a C++ implementation.
|
|
*
|
|
* The pat-hash database is a combination of a dynamically sized, separately
|
|
* chained hash table and a Patricia tree. The hash table dynamically grows
|
|
* and shrinks as needed, and the workload of modifying the table size is
|
|
* distributed evenly among the insertion or removal operations that cause
|
|
* the growth or shrinkage.
|
|
*
|
|
* The insertion and removal operations manage both a hash table and a Patricia
|
|
* tree, but the search routine uses only the hash table for performing the
|
|
* search. The Patrica tree is present to support a scan operation, which
|
|
* searches the database for all entries that match a given pattern, where the
|
|
* pattern that is scanned may contain wildcards.
|
|
*
|
|
* None of the code or comments in this file needs to be understood by writers
|
|
* of client code; all explanatory information for clients is found in the
|
|
* associated header file, rhizome.h.
|
|
*
|
|
*/
|
|
|
|
#include "gpcpre.h"
|
|
|
|
#define MAGIC_NUMBER 0x9e4155b9 // Fibonacci hash multiplier (see Knuth 6.4)
|
|
|
|
// This macro allocates a new pat-hash table entry structure. The size of
|
|
// the structure is a function of the value of keybytes, since the entry stores
|
|
// a copy of the pattern. The value array, which is the last field in the
|
|
// structure, is declared as having a single element, but this array will
|
|
// actually extend beyond the defined end of the structure into additional
|
|
// space that is allocated for it by the following macro.
|
|
//
|
|
//#define NEW_PHTableEntry \
|
|
// ((PHTableEntry *)malloc(sizeof(PHTableEntry) + phtable->keybytes - 1))
|
|
#define NEW_PHTableEntry(_pe) \
|
|
GpcAllocMem(&_pe,\
|
|
sizeof(PHTableEntry) + phtable->keybytes - 1,\
|
|
PathHashTag)
|
|
|
|
// This macro allocates a new pat-hash table group structure. The size of
|
|
// the structure is a function of the size of the group. The entry_list array,
|
|
// which is the last field in the structure, is declared as having a single
|
|
// element, but this array will actually extend beyond the defined end of the
|
|
// structure into additional space that is allocated for it by the following
|
|
// macro.
|
|
//
|
|
//#define NEW_PHTableGroup(group_size) \
|
|
// ((PHTableGroup *)malloc(sizeof(PHTableGroup) + \
|
|
// ((group_size) - 1) * sizeof(PHTableEntry *)))
|
|
#define NEW_PHTableGroup(group_size, _pg) \
|
|
GpcAllocMem(&_pg,\
|
|
sizeof(PHTableGroup) + \
|
|
((group_size) - 1) * sizeof(PHTableEntry *),\
|
|
PathHashTag)
|
|
|
|
// This macro gets the indexed bit of the value, where the most-significant bit
|
|
// is defined as bit 0.
|
|
//
|
|
#define BIT_OF(value, index) \
|
|
(((value)[(index) >> 3] >> (7 - ((index) & 0x7))) & 0x1)
|
|
|
|
// Following is a prototype for a static function that is used internally by
|
|
// the implementation of the pat-hash routines.
|
|
|
|
void
|
|
node_scan(
|
|
PatHashTable *phtable,
|
|
PHTableEntry *node,
|
|
int prev_bit,
|
|
char *value,
|
|
char *mask,
|
|
void *context,
|
|
ScanCallback func);
|
|
|
|
// Since this is not C++, the PatHashTable structure is not self-constructing;
|
|
// therefore, the following constructor code must be called on the PatHashTable
|
|
// structure after it is allocated. The argument keybits specifies the size
|
|
// (in bits) of each pattern that will be stored in the database. The usage
|
|
// ratio is the target ratio of database entries to discrete hash chains, which
|
|
// is also the mean length of a hash chain. The usage histeresis is the
|
|
// histeresis between resizing operations due to insertions and removals.
|
|
// Allocation histeresis is the histeresis between allocation and deallocation
|
|
// of groups, specified as a binary exponent. The maximum free list size
|
|
// determines the maximum number of elements that will be placed on a free
|
|
// list, rather than deallocated, when they are removed.
|
|
//
|
|
int
|
|
constructPatHashTable(
|
|
PatHashTable *phtable,
|
|
int keybits,
|
|
int usage_ratio,
|
|
int usage_histeresis,
|
|
int allocation_histeresis,
|
|
int max_free_list_size)
|
|
{
|
|
PHTableGroup *group;
|
|
phtable->keybits = keybits;
|
|
phtable->keybytes = (keybits - 1) / 8 + 1;
|
|
phtable->usage_ratio = usage_ratio;
|
|
phtable->usage_histeresis = usage_histeresis;
|
|
phtable->allocation_histeresis = allocation_histeresis;
|
|
phtable->max_free_list_size = max_free_list_size;
|
|
NEW_PHTableGroup(1, phtable->initial_group);
|
|
phtable->top_group = phtable->initial_group;
|
|
phtable->allocation_exponent = 0;
|
|
phtable->size_exponent = 0;
|
|
phtable->extension_size = 0;
|
|
phtable->population = 0;
|
|
phtable->root = 0;
|
|
phtable->free_list = 0;
|
|
phtable->free_list_size = 0;
|
|
NEW_PHTableGroup(1, group);
|
|
if (phtable->initial_group == 0 || group == 0)
|
|
{
|
|
// Memory could not be allocated for one of the two groups created by
|
|
// the constructor. Therefore, we return an indication of failure to
|
|
// the client.
|
|
|
|
// 286334 : Not so fast! Please free memory before leaving...
|
|
if (phtable->initial_group != 0) {
|
|
GpcFreeMem(phtable->initial_group, PatHashTag);
|
|
}
|
|
|
|
if (group != 0) {
|
|
GpcFreeMem(group, PatHashTag);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
group->previous = 0;
|
|
group->entry_list[0] = 0;
|
|
phtable->initial_group->previous = group;
|
|
return 0;
|
|
}
|
|
|
|
// Since this is not C++, the PatHashTable structure is not self-destructing;
|
|
// therefore, the following destructor code must be called on the PatHashTable
|
|
// structure before it is deallocated.
|
|
//
|
|
void
|
|
destructPatHashTable(
|
|
PatHashTable *phtable)
|
|
{
|
|
PHTableGroup *group, *previous;
|
|
PHTableEntry *entry, *next;
|
|
int index, size;
|
|
// First, free all groups that are allocated but not currently used.
|
|
group = phtable->top_group;
|
|
while (group != phtable->initial_group)
|
|
{
|
|
previous = group->previous;
|
|
GpcFreeMem(group, PatHashTag);
|
|
group = previous;
|
|
}
|
|
// Then, free the entries in the initial group. Since not all fields
|
|
// in the initial group's table may be valid, only check those whose
|
|
// indices are less than the extension size.
|
|
for (index = phtable->extension_size - 1; index >= 0; index--)
|
|
{
|
|
entry = group->entry_list[index];
|
|
while (entry != 0)
|
|
{
|
|
next = entry->next;
|
|
GpcFreeMem(entry, PatHashTag);
|
|
entry = next;
|
|
}
|
|
}
|
|
// Then free the initial group.
|
|
previous = group->previous;
|
|
GpcFreeMem(group, PatHashTag);
|
|
group = previous;
|
|
// Scan through all remaining groups except the last one, freeing all
|
|
// entries in each group, and thereafter freeing the group.
|
|
size = 1 << (phtable->size_exponent - 1);
|
|
while (group->previous != 0)
|
|
{
|
|
for (index = size - 1; index >= 0; index--)
|
|
{
|
|
entry = group->entry_list[index];
|
|
while (entry != 0)
|
|
{
|
|
next = entry->next;
|
|
GpcFreeMem(entry, PatHashTag);
|
|
entry = next;
|
|
}
|
|
}
|
|
previous = group->previous;
|
|
GpcFreeMem(group, PatHashTag);
|
|
group = previous;
|
|
size >>= 1;
|
|
}
|
|
// The last group is special, since it has a size of one, but the logic
|
|
// used in the preceding loop would have calculated its size as zero.
|
|
// Rather than complicating the previous loop with a check for a single
|
|
// special case, we simply free the last group and its entries in the
|
|
// following code.
|
|
entry = group->entry_list[0];
|
|
while (entry != 0)
|
|
{
|
|
next = entry->next;
|
|
GpcFreeMem(entry, PatHashTag);
|
|
entry = next;
|
|
}
|
|
GpcFreeMem(group, PatHashTag);
|
|
// Finally, free all of the entries in the free list.
|
|
while (phtable->free_list != 0)
|
|
{
|
|
next = phtable->free_list->next;
|
|
GpcFreeMem(phtable->free_list, PatHashTag);
|
|
phtable->free_list = next;
|
|
}
|
|
}
|
|
|
|
// This function inserts a new specific pattern into the database, passed as
|
|
// an array of bytes. The client supplies a digested form of the pattern as
|
|
// the chyme argument.
|
|
//
|
|
// The client specifies a void pointer reference value to associate with the
|
|
// specific pattern. When the specific pattern is installed, the insert
|
|
// routine returns a pointer to a SpecificPatternHandle.
|
|
//
|
|
// If the submitted pattern has already been installed in the database, then
|
|
// the insertion does not occur, and the SpecificPatternHandle of the
|
|
// previously installed pattern is returned.
|
|
//
|
|
// The insertion routine inserts the new pattern into both the hash table and
|
|
// the Patricia tree, and the two insertions are almost completely independent
|
|
// except for the shared entry structure.
|
|
//
|
|
SpecificPatternHandle
|
|
insertPatHashTable(
|
|
PatHashTable *phtable,
|
|
char *pattern,
|
|
unsigned int chyme,
|
|
void *reference)
|
|
{
|
|
unsigned int hash, address, small_address, split_point;
|
|
PHTableGroup *group;
|
|
PHTableEntry **entry, *new_entry;
|
|
char *value;
|
|
int index, group_size, pivot_bit, bit_value;
|
|
// The first portion of this routine inserts the new pattern into the hash
|
|
// table. To begin, we determine whether the number of hash chains needs
|
|
// to be increased in order to maintain the desired usage ratio.
|
|
group_size = 1 << phtable->size_exponent;
|
|
if (phtable->population >=
|
|
(group_size + phtable->extension_size) * phtable->usage_ratio)
|
|
{
|
|
// The number of hash chains needs to be increased. So, determine
|
|
// whether the initial group is completely full.
|
|
if (phtable->extension_size == group_size)
|
|
{
|
|
// The initial group is completely full. So, determine whether
|
|
// all allocated groups are currently in use.
|
|
if (phtable->allocation_exponent == phtable->size_exponent)
|
|
{
|
|
// All allocated groups are currently in use. So, allocate
|
|
// a new group and set its previous pointer to point to the
|
|
// initial group. Update the allocation values of the structure
|
|
// to reflect the new allocation.
|
|
NEW_PHTableGroup(group_size << 1, group);
|
|
if (group == 0)
|
|
{
|
|
// Memory could not be allocated for the new group.
|
|
// Therefore, we return an indication of falure to the
|
|
// client.
|
|
return 0;
|
|
}
|
|
group->previous = phtable->initial_group;
|
|
phtable->top_group = group;
|
|
phtable->allocation_exponent++;
|
|
}
|
|
else
|
|
{
|
|
// Not all allocated groups are in use. So, scanning backward
|
|
// from the top group, find the group that immediately follows
|
|
// the initial group.
|
|
group = phtable->top_group;
|
|
while (group->previous != phtable->initial_group)
|
|
{
|
|
group = group->previous;
|
|
}
|
|
}
|
|
// We now have either a newly allocated group or a previously
|
|
// allocated group that immediately follows the initial group.
|
|
// Set this group to be the new initial group, and set the extension
|
|
// size to zero.
|
|
phtable->initial_group = group;
|
|
phtable->size_exponent++;
|
|
phtable->extension_size = 0;
|
|
}
|
|
else
|
|
{
|
|
// The initial group is not completely full. So, select the initial
|
|
// group.
|
|
group = phtable->initial_group;
|
|
}
|
|
// We now have a group that is not completely full, either because it
|
|
// wasn't completely full when the insert routine was entered, or
|
|
// because it has just been allocated. In either case, we now split
|
|
// a hash chain from a smaller group into two hash chains, one of which
|
|
// will be placed into an unused entry in the new group. The address
|
|
// of the hash chain to be split is determined by the extension size.
|
|
// First we find the group that contains this address.
|
|
group = group->previous;
|
|
address = phtable->extension_size;
|
|
while ((address & 0x1) == 0 && group->previous != 0)
|
|
{
|
|
address >>= 1;
|
|
group = group->previous;
|
|
}
|
|
// Then, we scan through the entry list at the given address for the
|
|
// appropriate split point. The entries are stored in sorted order,
|
|
// and we are essentially shifting one more bit into the address for
|
|
// this value, so the split point can be found by searching for the
|
|
// first entry with the bit set.
|
|
address >>= 1;
|
|
entry = &group->entry_list[address];
|
|
split_point = ((phtable->extension_size << 1) | 0x1)
|
|
<< (31 - phtable->size_exponent);
|
|
while (*entry != 0 && (*entry)->hash < split_point)
|
|
{
|
|
entry = &(*entry)->next;
|
|
}
|
|
// Now that we have found the split point, we move the split-off
|
|
// piece of the list to the new address, and increment the extension
|
|
// size.
|
|
phtable->initial_group->entry_list[phtable->extension_size] = *entry;
|
|
*entry = 0;
|
|
phtable->extension_size++;
|
|
}
|
|
// Now that the memory management aspects of the hash table insertion have
|
|
// been taken care of, we can perform the actual insertion. First, we find
|
|
// the address by hashing the chyme value.
|
|
group = phtable->initial_group;
|
|
hash = MAGIC_NUMBER * chyme;
|
|
address = hash >> (31 - phtable->size_exponent);
|
|
// There are two possible values for the address depending upon whether
|
|
// the hash chain pointer is below the extension size. If it is, then the
|
|
// larger (by one bit) address is used; otherwise, the smaller address is
|
|
// used.
|
|
small_address = address >> 1;
|
|
if ((int)small_address >= phtable->extension_size)
|
|
{
|
|
address = small_address;
|
|
group = group->previous;
|
|
}
|
|
// Next we find the group that contains this address.
|
|
while ((address & 0x1) == 0 && group->previous != 0)
|
|
{
|
|
address >>= 1;
|
|
group = group->previous;
|
|
}
|
|
// Then, we scan through the entry list at the given address for the first
|
|
// entry whose hash value is equal to or greater than the hash of the search
|
|
// key. The entries are stored in sorted order to improve the search speed.
|
|
address >>= 1;
|
|
entry = &group->entry_list[address];
|
|
while (*entry != 0 && (*entry)->hash < hash)
|
|
{
|
|
entry = &(*entry)->next;
|
|
}
|
|
// Now, we check all entries whose hash value matches that of the search
|
|
// key.
|
|
while (*entry != 0 && (*entry)->hash == hash)
|
|
{
|
|
// For each value whose hash matches, check the actual value to see
|
|
// if it matches the search key.
|
|
value = (*entry)->value;
|
|
for (index = phtable->keybytes-1; index >= 0; index--)
|
|
{
|
|
if (value[index] != pattern[index])
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (index < 0)
|
|
{
|
|
// A match is found, so we return the SpecificPatternHandle of the
|
|
// matching entry to the client.
|
|
return *entry;
|
|
}
|
|
entry = &(*entry)->next;
|
|
}
|
|
// A match was not found, so we insert the new entry into the hash chain.
|
|
// First we check to see if there is an entry avalable on the free list.
|
|
if (phtable->free_list != 0)
|
|
{
|
|
// There is an entry available on the free list, so grab it and
|
|
// decrement the size of the free list.
|
|
new_entry = phtable->free_list;
|
|
phtable->free_list = phtable->free_list->next;
|
|
phtable->free_list_size--;
|
|
}
|
|
else
|
|
{
|
|
// There is no entry available on the free list, so allocate a new one.
|
|
NEW_PHTableEntry(new_entry);
|
|
if (new_entry == 0)
|
|
{
|
|
// Memory could not be allocated for the new entry. Therefore,
|
|
// we return an indication of falure to the client.
|
|
return 0;
|
|
}
|
|
}
|
|
// Set the fields of the new entry to the appropriate information and add
|
|
// the entry to the hash chain.
|
|
new_entry->hash = hash;
|
|
new_entry->reference = reference;
|
|
new_entry->next = *entry;
|
|
for (index = phtable->keybytes - 1; index >= 0; index--)
|
|
{
|
|
new_entry->value[index] = pattern[index];
|
|
}
|
|
*entry = new_entry;
|
|
// The hash table insertion is now complete. Here we begin the insertion
|
|
// of the new entry into the Patricia tree. We have to treat an empty
|
|
// tree as a special case.
|
|
if (phtable->root == 0)
|
|
{
|
|
// The Patricia tree is empty, so we set the root to point to the new
|
|
// entry. This entry is special, since it serves only as a leaf of
|
|
// the Patricia search and not also as a branch node. A Patricia tree
|
|
// always contains one fewer branch node than the number of leaves.
|
|
// Since a leaf is determined by a pivot bit that is less than or equal
|
|
// to the pivot bit of the parent branch node, a pivot bit of -1 flags
|
|
// this node as always a leaf.
|
|
new_entry->pivot_bit = -1;
|
|
new_entry->children[0] = 0;
|
|
new_entry->children[1] = 0;
|
|
phtable->root = new_entry;
|
|
}
|
|
else
|
|
{
|
|
// The Patricia tree is not empty, so we proceed with the normal
|
|
// insertion process. Beginning at the root, scan through the tree
|
|
// according to the bits of the new pattern, until we reach a leaf.
|
|
entry = &phtable->root;
|
|
index = -1;
|
|
while ((*entry)->pivot_bit > index)
|
|
{
|
|
index = (*entry)->pivot_bit;
|
|
entry = &(*entry)->children[BIT_OF(pattern, index)];
|
|
}
|
|
// Now, compare the new pattern, bit by bit, to the pattern stored at
|
|
// the leaf, until a non-matching bit is found. There is no need to
|
|
// check for an exact match, since the hash insert above would have
|
|
// aborted if an exact match had been found.
|
|
value = (*entry)->value;
|
|
pivot_bit = 0;
|
|
while (BIT_OF(value, pivot_bit) == BIT_OF(pattern, pivot_bit))
|
|
{
|
|
pivot_bit++;
|
|
}
|
|
// Now, scan a second time through the tree, until finding either a leaf
|
|
// or a branch with a pivot bit greater than the bit of the non-match.
|
|
entry = &phtable->root;
|
|
index = -1;
|
|
while ((*entry)->pivot_bit > index && (*entry)->pivot_bit < pivot_bit)
|
|
{
|
|
index = (*entry)->pivot_bit;
|
|
entry = &(*entry)->children[BIT_OF(pattern, index)];
|
|
}
|
|
// This is the point at which the new branch must be inserted. Since
|
|
// each node is both a branch and a leaf, the new entry serves as the
|
|
// new branch, and one of its children points to itself as a leaf. The
|
|
// other child points to the remaining subtree below the insertion
|
|
// point.
|
|
bit_value = BIT_OF(value, pivot_bit);
|
|
new_entry->pivot_bit = pivot_bit;
|
|
new_entry->children[1 - bit_value] = new_entry;
|
|
new_entry->children[bit_value] = *entry;
|
|
*entry = new_entry;
|
|
}
|
|
// Having inserted the new entry in both the hash table and the Patricia
|
|
// tree, we increment the population and return the SpecificPatternHandle
|
|
// of the new entry.
|
|
phtable->population++;
|
|
return new_entry;
|
|
}
|
|
|
|
// This function removes a pattern from the pat-hash table. The pattern is
|
|
// specified by the SpecificPatternHandle that was returned by the insert
|
|
// routine. No checks are performed to insure that this is a valid handle.
|
|
//
|
|
// The removal routine removes the pattern from both the hash table and the
|
|
// Patricia tree, and the two removals are almost completely independent
|
|
// except for the shared entry structure.
|
|
//
|
|
void
|
|
removePatHashTable(
|
|
PatHashTable *phtable,
|
|
SpecificPatternHandle sphandle)
|
|
{
|
|
unsigned int hash, address, small_address;
|
|
PHTableGroup *group;
|
|
PHTableEntry **entry, **branch, **parent, *epoint, *bpoint;
|
|
char *value;
|
|
int index, group_size;
|
|
// The first portion of this routine removess the new pattern from the hash
|
|
// table. First, we find the address by hashing the chyme value.
|
|
group = phtable->initial_group;
|
|
hash = sphandle->hash;
|
|
address = hash >> (31 - phtable->size_exponent);
|
|
// There are two possible values for the address depending upon whether
|
|
// the hash chain pointer is below the extension size. If it is, then the
|
|
// larger (by one bit) address is used; otherwise, the smaller address is
|
|
// used.
|
|
small_address = address >> 1;
|
|
if ((int)small_address >= phtable->extension_size)
|
|
{
|
|
address = small_address;
|
|
group = group->previous;
|
|
}
|
|
// Next we find the group that contains this address.
|
|
while ((address & 0x1) == 0 && group->previous != 0)
|
|
{
|
|
address >>= 1;
|
|
group = group->previous;
|
|
}
|
|
// Then, we scan through the entry list at the given address for the entry
|
|
// that matches the given SpecificPatternHandle.
|
|
address >>= 1;
|
|
entry = &group->entry_list[address];
|
|
while (*entry != sphandle)
|
|
{
|
|
entry = &(*entry)->next;
|
|
}
|
|
// We then remove the entry from the hash chain and decrement the
|
|
// population.
|
|
*entry = sphandle->next;
|
|
phtable->population--;
|
|
// This completes the actual removal of the entry from the hash table, but
|
|
// we now have to determine whether to reduce the number of hash chains in
|
|
// order to maintain the desired usage ratio. Note that the usage
|
|
// histeresis is factored into the calculation.
|
|
group_size = 1 << phtable->size_exponent;
|
|
if (phtable->population + phtable->usage_histeresis <
|
|
(group_size + phtable->extension_size - 1) * phtable->usage_ratio)
|
|
{
|
|
// The number of hash chains needs to be reduced. So, we coalesce two
|
|
// hash chains into a single hash chain. The address of the hash chains
|
|
// is determined by the extension size. First we decrement the
|
|
// extension size and find the group that contains the address of the
|
|
// hash chain that is being retained.
|
|
phtable->extension_size--;
|
|
group = phtable->initial_group->previous;
|
|
address = phtable->extension_size;
|
|
while ((address & 0x1) == 0 && group->previous != 0)
|
|
{
|
|
address >>= 1;
|
|
group = group->previous;
|
|
}
|
|
// Then, we find the end of the entry list at the given address.
|
|
address >>= 1;
|
|
entry = &group->entry_list[address];
|
|
while (*entry != 0)
|
|
{
|
|
entry = &(*entry)->next;
|
|
}
|
|
// We then make the last entry in the hash chain point to the first
|
|
// entry in the other hash chain that is being coalesced. We do not
|
|
// need to update the group's pointer to the other hash chain, since
|
|
// it is now beyond the extension size, and it will thus never be seen.
|
|
*entry = phtable->initial_group->entry_list[phtable->extension_size];
|
|
// Now, we check to see whether a group has been completely emptied.
|
|
// We also check the size exponent, since even if we have just emptied
|
|
// the first non-special group, we do not remove it.
|
|
if (phtable->extension_size == 0 && phtable->size_exponent > 0)
|
|
{
|
|
// The initial group has just been completely emptied, so we set
|
|
// the previous group as the new initial group. Update all
|
|
// housekeeping information accordingly.
|
|
phtable->size_exponent--;
|
|
phtable->extension_size = group_size >> 1;
|
|
phtable->initial_group = phtable->initial_group->previous;
|
|
// We now determine whether we should deallocate a group. Note
|
|
// that the allocation histeresis is factored into the calculation.
|
|
if (phtable->size_exponent + phtable->allocation_histeresis <
|
|
phtable->allocation_exponent)
|
|
{
|
|
// We should deallocate a group, so we deallocate the top group.
|
|
phtable->allocation_exponent--;
|
|
group = phtable->top_group->previous;
|
|
GpcFreeMem(phtable->top_group, PatHashTag);
|
|
phtable->top_group = group;
|
|
}
|
|
}
|
|
}
|
|
// Now, the hash table removal operation is complete, including the memory
|
|
// management functions. Here we begin the removal of the entry from the
|
|
// Patricia tree. First, we scan through the tree according to the bits of
|
|
// the pattern being removed, until we reach a leaf. We keep track of the
|
|
// branch that immediately precedes the leaf, and we also note the parent
|
|
// of the pattern, in the latter's capacity as a branch node.
|
|
value = sphandle->value;
|
|
entry = &phtable->root;
|
|
branch = entry;
|
|
parent = 0;
|
|
index = -1;
|
|
while ((*entry)->pivot_bit > index)
|
|
{
|
|
if ((*entry) == sphandle)
|
|
{
|
|
parent = entry;
|
|
}
|
|
branch = entry;
|
|
index = (*entry)->pivot_bit;
|
|
entry = &(*entry)->children[BIT_OF(value, index)];
|
|
}
|
|
// We set the branch that points to the leaf to instead point to the child
|
|
// of the leaf that is not selected by the bit of the removed pattern, thus
|
|
// removing the branch from the tree.
|
|
epoint = *entry;
|
|
bpoint = *branch;
|
|
*branch = bpoint->children[1 - BIT_OF(value, index)];
|
|
// If the branch that was removed is also the leaf that contains the
|
|
// pattern, then the removal from the Patricia tree is complete. Otherwise,
|
|
// we replace the leaf that is being removed with the branch that is not
|
|
// being removed.
|
|
if (epoint != bpoint)
|
|
{
|
|
bpoint->pivot_bit = epoint->pivot_bit;
|
|
bpoint->children[0] = epoint->children[0];
|
|
bpoint->children[1] = epoint->children[1];
|
|
// In the case of the special node that is not a branch node, we do
|
|
// not update its parent to point to the replacing branch, since this
|
|
// node has no parent.
|
|
if (parent != 0)
|
|
{
|
|
*parent = bpoint;
|
|
}
|
|
}
|
|
// The removal from the Patricia tree is now complete. If appropriate, we
|
|
// place the removed entry onto the free list. If not, we simply free it.
|
|
if (phtable->free_list_size < phtable->max_free_list_size)
|
|
{
|
|
sphandle->next = phtable->free_list;
|
|
phtable->free_list = sphandle;
|
|
phtable->free_list_size++;
|
|
}
|
|
else
|
|
{
|
|
GpcFreeMem(sphandle, PatHashTag);
|
|
}
|
|
}
|
|
|
|
// This function searches the database for the specific pattern that matches
|
|
// the given key, which is passed as an array of bytes. The client supplies
|
|
// a digested form of the pattern as the chyme argument. If a match is found,
|
|
// the SpecificPatternHandle of that matching specific pattern is returned.
|
|
// If no match is found, then a value of 0 is returned.
|
|
//
|
|
// This search uses only the hash table; the Patricia tree is not used at all.
|
|
//
|
|
SpecificPatternHandle
|
|
searchPatHashTable(
|
|
PatHashTable *phtable,
|
|
char *key,
|
|
unsigned int chyme)
|
|
{
|
|
unsigned int hash, address, small_address;
|
|
PHTableGroup *group;
|
|
PHTableEntry *entry;
|
|
char *value;
|
|
int index;
|
|
// First, we find the address by hashing the chyme value.
|
|
group = phtable->initial_group;
|
|
hash = MAGIC_NUMBER * chyme;
|
|
address = hash >> (31 - phtable->size_exponent);
|
|
// There are two possible values for the address depending upon whether
|
|
// the hash chain pointer is below the extension size. If it is, then the
|
|
// larger (by one bit) address is used; otherwise, the smaller address is
|
|
// used.
|
|
small_address = address >> 1;
|
|
if ((int)small_address >= phtable->extension_size)
|
|
{
|
|
address = small_address;
|
|
group = group->previous;
|
|
}
|
|
// Next we find the group that contains this address.
|
|
while ((address & 0x1) == 0 && group->previous != 0)
|
|
{
|
|
address >>= 1;
|
|
group = group->previous;
|
|
}
|
|
// Then, we scan through the entry list at the given address for the first
|
|
// entry whose hash value is equal to or greater than the hash of the search
|
|
// key. The entries are stored in sorted order to improve the search speed.
|
|
address >>= 1;
|
|
entry = group->entry_list[address];
|
|
while (entry != 0 && entry->hash < hash)
|
|
{
|
|
entry = entry->next;
|
|
}
|
|
// Now, we check all entries whose hash value matches that of the search
|
|
// key.
|
|
while (entry != 0 && entry->hash == hash)
|
|
{
|
|
// For each value whose hash matches, check the actual value to see
|
|
// if it matches the search key.
|
|
value = entry->value;
|
|
for (index = phtable->keybytes-1; index >= 0; index--)
|
|
{
|
|
if (value[index] != key[index])
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (index < 0)
|
|
{
|
|
// A match is found, so we return the SpecificPatternHandle of the
|
|
// matching entry to the client.
|
|
return entry;
|
|
}
|
|
entry = entry->next;
|
|
}
|
|
// A match was not found, so we return a null pointer to the client.
|
|
return 0;
|
|
}
|
|
|
|
// This function searches the database for all specific patterns that match a
|
|
// given general pattern. The general pattern is specified by a value and a
|
|
// mask. For each specific pattern in the database that matches the supplied
|
|
// general pattern, a client-supplied callback function is called with the
|
|
// SpecificPatternHandle of the matching specific pattern. This callback
|
|
// function is also passed a context (as a void pointer) that is supplied by
|
|
// the client in the call to the scan routine.
|
|
//
|
|
// This scan uses only the Patricia tree; the hash table is not used at all.
|
|
//
|
|
void
|
|
scanPatHashTable(
|
|
PatHashTable *phtable,
|
|
char *value,
|
|
char *mask,
|
|
void *context,
|
|
ScanCallback func)
|
|
{
|
|
// Call the recursive node_scan routine, starting at the root of the
|
|
// Patricia tree.
|
|
if (phtable->root != 0)
|
|
{
|
|
node_scan(phtable, phtable->root, -1, value, mask, context, func);
|
|
}
|
|
}
|
|
|
|
// This function recursively scans the Patricia tree for all specific patterns
|
|
// that match a given general pattern.
|
|
void
|
|
node_scan(
|
|
PatHashTable *phtable,
|
|
PHTableEntry *node,
|
|
int prev_bit,
|
|
char *value,
|
|
char *mask,
|
|
void *context,
|
|
ScanCallback func)
|
|
{
|
|
int mask_bit, index;
|
|
// Partial recursion removal. The while loop takes the place of one of the
|
|
// recursive calls to node_scan(). We remain in the while loop while we
|
|
// are still examining branch nodes.
|
|
while (node->pivot_bit > prev_bit)
|
|
{
|
|
// For each branch node, determine which way(s) to branch based upon
|
|
// the bit of the general pattern. If the mask bit is a zero, then
|
|
// branch both ways, requiring a recursive call. If the mask bit is
|
|
// a one, then branch in the direction indicated by the value bit.
|
|
mask_bit = BIT_OF(mask, node->pivot_bit);
|
|
if (mask_bit == 0)
|
|
{
|
|
// The general pattern has a wildcard for this node's pivot bit,
|
|
// so we must branch both ways. We branch on child one through
|
|
// an actual recursive call.
|
|
node_scan(phtable, node->children[1], node->pivot_bit,
|
|
value, mask, context, func);
|
|
}
|
|
// We then branch either to the child selected by the value bit (if
|
|
// the mask bit is one) or to child zero (if the mask bit is zero).
|
|
prev_bit = node->pivot_bit;
|
|
node = node->children[BIT_OF(value, node->pivot_bit) & mask_bit];
|
|
}
|
|
// We have reached a leaf node. Examine its specific pattern to see if
|
|
// it matches the given general pattern. If it doesn't match, then just
|
|
// return; otherwise, call the client's callback function.
|
|
for (index = phtable->keybytes-1; index >= 0; index--)
|
|
{
|
|
if ((mask[index] & value[index]) !=
|
|
(mask[index] & node->value[index]))
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
func(context, node);
|
|
}
|
|
|
|
// This function forces the pat-hash table to release all of the memory that
|
|
// it currently can, by deallocating all unneeded groups and entries.
|
|
//
|
|
void
|
|
flushPatHashTable(
|
|
PatHashTable *phtable)
|
|
{
|
|
PHTableGroup *group, *previous;
|
|
PHTableEntry *entry, *next;
|
|
// First, free all groups that are allocated but not currently used.
|
|
group = phtable->top_group;
|
|
while (group != phtable->initial_group)
|
|
{
|
|
previous = group->previous;
|
|
GpcFreeMem(group, PatHashTag);
|
|
group = previous;
|
|
}
|
|
phtable->top_group = phtable->initial_group;
|
|
phtable->allocation_exponent = phtable->size_exponent;
|
|
// Then, free all of the entries in the free list.
|
|
entry = phtable->free_list;
|
|
while (entry != 0)
|
|
{
|
|
next = entry->next;
|
|
GpcFreeMem(entry, PatHashTag);
|
|
entry = next;
|
|
}
|
|
phtable->free_list = 0;
|
|
phtable->free_list_size = 0;
|
|
}
|