Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1009 lines
27 KiB

////////////////////////////////////////////////////////////////////////////////
//
// Filename : TRIE.H
// Purpose : Basic C MACROS/DEFS used by the Trie package
//
// Project : PQS
// Component: FE_CORE
//
// Author : dovh
//
// Log :
// MMM DD YYYY dovh Creation
// Dec 11 1996 DovH UNICODE Preparation: Convert char to TCHAR.
// Dec 1 1998 dovh Use HCFE_GlobalHandle
// Nov 2 1999 YairH Fix copilation errors.
// Nov 8 1999 urib Fix tabulation format.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef __TRIE_H__
#define __TRIE_H__
#pragma once
#include "comdefs.h"
#include "gtable.h"
#include "autoptr.h"
#include "excption.h"
#include "tracer.h"
DECLARE_TAG(s_tagTrie, "Trie")
//
// T R I E P A C K A G E F L A G S :
//
#define TRIE_DEFAULT 0X00000000L
#define TRIE_IGNORECASE 0X00000001L
#define TRIE_SHORTEST_MATCH 0X00000010L
#define TRIE_LONGEST_MATCH 0X00000020L
#define TRIE_ALL_MATCHES 0X00000040L
#define TRIE_FILTER_VERIFY 0X00000080L
#define TRIE_EXCHANGE_ITEM 0X00000100L
#define TRIE_OUT_BUFFER_EMPTY 0X00000200L
#define TRIE_NODE_SUFFIXCOUNT_INIT 0
#define TRIE_NODE_SUFFIXSIZE_INIT 2
#define DECLARE_TRIE_SENTINEL CTrieNode<BYTE> g_trie_Sentinel
template <class T, class C = CToUpper >
class CTrieNode
{
public:
CTrieNode();
CTrieNode(short sSize);
CTrieNode(
T* NewItem,
ULONG options,
const WCHAR* NewString,
ULONG ulCharToCopy = 0xffffffff);
~CTrieNode();
void DeleteItem();
void
AddSuffix(
ULONG options,
CTrieNode<T, C>* newSuffix,
USHORT index = 0xffff
);
void
SplitNode(
CTrieNode<T, C>* parent, // Parent of node
short index, // Index of node in parent->suffix
const WCHAR * NewString, // String sharing prefix with node->string
size_t charsMatched,
T* item, // Item associated with (sub)string
ULONG options // Insertion options
);
void Print(ULONG ulOffset);
int
trie_StrMatchIns(
const WCHAR * s,
const WCHAR * t,
size_t * matchCount
);
inline
int
trie_StrMatch(
const WCHAR * s,
const WCHAR * t,
size_t * matchCount
);
private:
void DoubleSuffixArray();
public:
short suffixCount; // Number of suffixes
short suffixSize; // Size of suffixes array
CTrieNode ** suffix; // Pointer to suffixes array
T* item; // Pointer to item corresponding to node
size_t charCount; // String length
WCHAR* string; // Zero terminated string
public:
};
extern CTrieNode<BYTE> g_trie_Sentinel;
template <class T, class C = CToUpper >
class CTrie
{
public:
CTrie(bool fDeleteItemsOnDestruction = false);
~CTrie();
DictStatus
trie_Insert( // Insert string into trie
const WCHAR * string, // String key of item
unsigned long options, // Insertion flags
T * item, // Item to be inserted
T ** pTrieItem // Matching item already in trie
);
DictStatus
trie_Find(
const WCHAR * string, // A string
unsigned long options, // Search flags
short outBufferSize, // Max number of results wanted
T ** outBuffer, // Buffer to be filled with matching items
short * resultCount // Number of matching prefixes returned
);
void Print();
private:
CTrieNode<T, C>* root;
bool fDeleteItems;
};
///////////////////////////////////////////////////////////////////////////////
// CTrieNode implementation
///////////////////////////////////////////////////////////////////////////////
template <class T, class C >
inline CTrieNode<T, C>::CTrieNode() :
suffixCount(0),
suffixSize(0),
charCount(0),
item(NULL),
suffix(NULL)
{
string = new WCHAR[1];
string[0] = L'\0';
}
template <class T, class C >
inline CTrieNode<T, C>::CTrieNode(short sSize) :
suffixCount(0),
suffixSize(sSize),
string(NULL),
charCount(0),
item(NULL),
suffix(NULL)
{
Assert(sSize > 0);
suffix = new CTrieNode<T, C>*[suffixSize];
memset(suffix,0, suffixSize*sizeof(CTrieNode<T, C>*));
}
template <class T, class C >
inline CTrieNode<T, C>::CTrieNode(
T* NewItem,
ULONG options,
const WCHAR* NewString,
ULONG ulCharToCopy) :
suffixCount(TRIE_NODE_SUFFIXCOUNT_INIT),
suffixSize(TRIE_NODE_SUFFIXSIZE_INIT)
{
charCount = min(wcslen(NewString), ulCharToCopy);
CAutoArrayPointer<WCHAR> apwcsNewString = new WCHAR[charCount + 1];
string = apwcsNewString.Get();
wcsncpy(string, NewString, charCount);
string[charCount] = L'\0';
suffix = new CTrieNode<T, C>*[suffixSize];
memset(suffix, 0, sizeof(CTrieNode<T, C>*) * suffixSize);
item = NewItem;
apwcsNewString.Detach();
}
template <class T, class C >
inline void CTrieNode<T, C>::DeleteItem()
{
for (short s = 0; s < suffixCount; s++)
{
if (suffix[s] != (CTrieNode<T, C>*)&g_trie_Sentinel)
{
suffix[s]->DeleteItem();
}
}
delete item;
}
template <class T, class C >
inline CTrieNode<T, C>::~CTrieNode()
{
Trace(
elInfo,
s_tagTrie,(
"CTrieNode:"
"Released"));
for (short s = 0; s < suffixCount; s++)
{
if (suffix[s] != (CTrieNode<T, C>*)&g_trie_Sentinel)
{
delete suffix[s];
}
}
delete[] suffix;
delete string;
}
template <class T, class C >
inline
int
CTrieNode<T, C>::trie_StrMatch(
const WCHAR * s,
const WCHAR * t,
size_t * matchCount
)
{
const WCHAR * s0 = s;
const WCHAR * t0 = t;
//
// Straigh K&R ptr version...
//
for ( ; *s0 == *t0; s0++, t0++ )
{
if (*s0 == TEXT('\0'))
{
*matchCount = s0 - s;
Assert( (*s0 - *t0) == 0 );
return (0);
}
}
*matchCount = s0 - s;
return ( *s0 - *t0 );
} // end trie_StrMatch
template <class T, class C >
inline
int
CTrieNode<T, C>::trie_StrMatchIns(
const WCHAR * s,
const WCHAR * t,
size_t * matchCount
)
{
const WCHAR * s0 = s;
const WCHAR * t0 = t;
//
// Straigh K&R ptr version...
//
for ( ; C::MapToUpper(*s0) == C::MapToUpper(*t0); s0++, t0++ )
{
if (*s0 == TEXT('\0'))
{
*matchCount = s0 - s;
Assert ( (C::MapToUpper(*s0) - C::MapToUpper(*t0)) == 0 );
return (0);
}
}
*matchCount = s0 - s;
return ( C::MapToUpper(*s0) - C::MapToUpper(*t0) );
} // end trie_StrMatchIns
/*++
Function trie_AddSuffix:
Insert a new suffix into the suffix array of node.
Routine Parameters:
node - Add a newSuffix to node->suffix array.
index - index in node->suffix to at which newSuffix should be added
to preserve increasing lexicographic ordering on node->suffix.
newSuffix - new suffix node to be added as child of node.
Return value:
--*/
template <class T, class C >
inline void
CTrieNode<T, C>::AddSuffix(
ULONG options,
CTrieNode<T, C>* newSuffix,
USHORT index
)
{
//
// Make sure there is enough room for the new child:
//
Assert(suffixCount <= suffixSize);
if (suffixCount == suffixSize)
{
DoubleSuffixArray();
}
if (0xffff == index)
{
if (options & TRIE_IGNORECASE)
{
for ( index=0;
(index < suffixCount) &&
(C::MapToUpper(suffix[index]->string[0]) < C::MapToUpper(newSuffix->string[0]));
index++
)
;
}
else
{
for ( index=0;
(index < suffixCount) &&
(suffix[index]->string[0] < newSuffix->string[0]);
index++
)
;
}
}
#ifdef DEBUG
if (options & TRIE_IGNORECASE)
{
Assert((index == 0 ) ||
(index == suffixCount) ||
(C::MapToUpper(suffix[index]->string[0]) > C::MapToUpper(newSuffix->string[0])));
}
else
{
Assert((index == 0 ) ||
(index == suffixCount) ||
(suffix[index]->string[0] > newSuffix->string[0]));
}
#endif
//
// Shift node->suffix[index .. node->suffixCount] one location to the right
// to make room for newSuffix at location index:
//
if ( index < suffixCount )
{
for (short i=suffixCount; i>index; i--)
{
suffix[i] = suffix[i-1];
}
}
suffixCount++;
//
// WARNING: after the next line do not add more allocations. The new suffix
// might be an automatic pointer that in this case will be released twice,
// as part of the destruction of the class and as automatic pointer
//
suffix[index] = newSuffix;
} // end AddSuffix
/*++
Function trie_SplitNode:
Assume string and node->string has a non-empty common prefix, which
is a strict substring of node->string. Splits node->string into the common
prefix, and the two suffixes (string may be a prefix of node->string) in which
case the corresponding suffix is NULL, represented by trie_Sentinel?).
Add two new children representing the admissible continuations of
the common suffix.
Return value:
Log:
Apr-14-1998 dovh - PerlWarn: change to ==:
Assert( node = parent->suffix[ index ] );
--*/
template <class T, class C >
inline void
CTrieNode<T, C>::SplitNode(
CTrieNode<T, C>* parent, // Parent of node
short index, // Index of node in parent->suffix
const WCHAR * NewString, // String sharing prefix with node->string
size_t charsMatched,
T* item, // Item associated with (sub)string
ULONG options // Insertion options
)
{
//
// Verify that the prefix common to string and node->string is
// a non-NULL proper prefix of node->string:
//
Assert( ( (options & TRIE_IGNORECASE) ?
(C::MapToUpper(*string) == C::MapToUpper(*NewString)) :
(*string == *NewString) )
);
Assert(charsMatched < wcslen(string));
//
// Set up the prefix node to replace node as child of parent:
//
CAutoClassPointer<CTrieNode<T, C> > nodePrefix =
new CTrieNode<T, C>(NULL, options, string, charsMatched);
//
// Compute respective suffix of string and add as the second child
// of nodePrefix:
//
if ( wcslen(NewString) == charsMatched )
{
//
// TRIE_ITEM: Add trie_Sentinel to nodePrefix;
// nodePrefix should point to new item!
//
nodePrefix->AddSuffix(0,(CTrieNode<T, C>*)&g_trie_Sentinel, 0);
nodePrefix->item = item;
}
else
{
Assert( charsMatched < wcslen(NewString) );
//
// Allocate the string suffix node:
//
CAutoClassPointer<CTrieNode<T, C> > strSuffix =
new CTrieNode(item, options, &NewString[charsMatched]);
nodePrefix->AddSuffix(options, strSuffix.Get());
strSuffix.Detach();
}
WCHAR* pwcsCurrStr = string;
size_t CurrCharCount = charCount;
try
{
size_t newCharCount = charCount - charsMatched;
Assert(newCharCount > 0);
CAutoArrayPointer<WCHAR> apwcsNewStr = new WCHAR[newCharCount + 1];
wcscpy(apwcsNewStr.Get(), &(string[charsMatched]));
string = apwcsNewStr.Get();
charCount = newCharCount;
//
// Add node as a child of nodePrefix:
// (Recall: node->string == respective suffix)
//
nodePrefix->AddSuffix(options, this);
apwcsNewStr.Detach();
delete[] pwcsCurrStr;
}
catch (CMemoryException& e)
{
string = pwcsCurrStr;
charCount = CurrCharCount;
throw e;
}
//
// Replace node by nodePrefix as the respective child of parent:
//
Assert( this == parent->suffix[ index ] );
parent->suffix[ index ] = nodePrefix.Get();
nodePrefix.Detach();
} // end trie_SplitNode
template <class T, class C >
inline void
CTrieNode<T, C>::Print(ULONG ulOffset)
{
for (ULONG i = 0; i < ulOffset; i++)
{
printf(" ");
}
if (this == (CTrieNode<T, C>*)&g_trie_Sentinel)
{
printf("Sentinel\n");
}
else
{
printf("%S\n",string);
}
for (short k = 0; k < suffixCount; k++)
{
suffix[k]->Print(ulOffset + 4);
}
}
template <class T, class C >
inline void
CTrieNode<T, C>::DoubleSuffixArray()
{
short newSize;
if (0 == suffixSize)
{
newSize = TRIE_NODE_SUFFIXSIZE_INIT;
}
else
{
newSize = suffixSize * 2;
}
CTrieNode<T, C> ** newPSuffix;
Assert(suffixCount == suffixSize);
newPSuffix = new CTrieNode<T, C>*[newSize];
memcpy(newPSuffix, suffix, suffixSize*sizeof(CTrieNode<T, C>*));
delete[] suffix;
suffix = newPSuffix;
suffixSize = newSize;
} // end trie_DoubleNode
///////////////////////////////////////////////////////////////////////////////
// CTrie implementation
///////////////////////////////////////////////////////////////////////////////
template <class T, class C >
inline CTrie<T, C>::CTrie(bool fDeleteItemsOnDestruction) :
fDeleteItems(fDeleteItemsOnDestruction)
{
root = new CTrieNode<T, C>(TRIE_NODE_SUFFIXSIZE_INIT);
}
template <class T, class C >
inline CTrie<T, C>::~CTrie()
{
if (fDeleteItems)
{
root->DeleteItem();
}
delete root;
}
/*++
Function trie_Insert:
Insert a given string into trie if it's not already a member trie.
Routine Parameters:
trie - Trie to insert item into.
string - String key of item.
options - Insertion options. If options == 0 the item will be inserted
only if the string key is not already in the tree. If options
is TRIE_EXCHANGE_ITEM the existing trie item will be replaced by
the item argument in the Trie. In that case the existing item
associated with string in the trie will be returned in the
*pTrieItem argument.
item - New item to be inserted.
pTrieItem - If an item associated with string already exists,
then *pTrieItem points to that item upon return.
Return value:
DICT_SUCCESS if string was inserted successfully, else
DICT_ITEM_ALREADY_PRESENT.
--*/
template <class T, class C >
inline DictStatus
CTrie<T, C>::trie_Insert( // Insert string into trie
const WCHAR * string, // String key of item
unsigned long options, // Insertion flags
T * item, // Item to be inserted
T ** pTrieItem // Matching item already in trie
)
{
CTrieNode<T, C> * t, * c;
int cmp = -1;
const WCHAR * subString = string;
size_t subStringSize = wcslen(subString);
size_t strIndex = 0;
size_t charsMatched = 0;
CAutoClassPointer<CTrieNode<T, C> > apNewSuffix;
t = root;
if (pTrieItem != NULL)
{
*pTrieItem = NULL;
}
while (true)
{
short i = 0;
//
// Search in this level sorted alternatives list:
//
for ( i = 0;
i < t->suffixCount;
i++
)
{
c = t->suffix[i];
// Quick "skip check":
cmp = (options & TRIE_IGNORECASE) ?
(C::MapToUpper(*c->string) - C::MapToUpper(*subString)) :
(*c->string - *subString);
if ( cmp < 0)
{
continue;
}
if (cmp > 0)
{
//
// First character of t->string does not match,
// insert a copy of subString before c (== t->suffix[i]):
//
apNewSuffix = new CTrieNode<T, C>(item, options, subString);
t->AddSuffix(options, apNewSuffix.Get(), i);
apNewSuffix.Detach();
return DICT_SUCCESS;
}
else
{
// At least one character matched.
// subStringSize = _tcslen(subString);
cmp = (options & TRIE_IGNORECASE) ?
c->trie_StrMatchIns(c->string, subString, &charsMatched) :
c->trie_StrMatch(c->string, subString, &charsMatched);
Assert(charsMatched <= min(c->charCount, subStringSize));
if (cmp == 0)
{
// t->charCount (<= subStringSize) characters matched
//
Assert(c->charCount == subStringSize);
// subString matched exactly:
//
if ((c->suffixCount == 0) ||
(c->suffix[0] == (CTrieNode<T, C>*)&g_trie_Sentinel))
{
// string already present:
//
if (pTrieItem != NULL)
{
*pTrieItem = c->item;
}
if (options & TRIE_EXCHANGE_ITEM)
{
Assert(pTrieItem!= NULL);
c->item = item;
}
return(DICT_ITEM_ALREADY_PRESENT);
}
else
{
//
// Insert the NULL trie_Sentinel at the front of
// the c->suffix list; and terminate!
//
// c should point to new item!
//
c->AddSuffix(options, (CTrieNode<T, C>*)&g_trie_Sentinel, 0);
c->item = item;
return( DICT_SUCCESS );
}
}
else
{
//
// cmp != 0:
//
if (charsMatched == c->charCount)
{
// CASE I: t->string is shorter than subString,
// and all of t->string matched.
// Continue the search through the suffixes subtree:
//
strIndex += c->charCount;
Assert( strIndex < wcslen(string) );
subString = &string[strIndex];
subStringSize = wcslen(subString);
t = c;
i = -1;
continue;
}
else
{
// CASE II: the child c and subString have a common prefix which is
// a non-NULL strict prefix of c->string. Split c into the common
// prefix node (a new node which will replace c as a child of t),
// with two children: c (with a corresponding suffix); and a new
// node (with the respective suffix of subString);
//
c->SplitNode(
t, // Parent of node to split
i, // Index of node in parent->suffix
subString, // String sharing prefix with node->string
charsMatched,
item,
options
);
return( DICT_SUCCESS );
}
} // end if (cmp == 0)
} // end if (cmp > 0)
} // end for
//
// Either the new string was successfully inserted, in which case
// we would have returned already; or we reached the end of the
// suffix array:
//
//
// Insert a copy of subString at the end of t->suffix:
//
apNewSuffix = new CTrieNode<T, C>(item, options, subString);
//
// Add item parameter to trie_NewNode!
//
t->AddSuffix(options, apNewSuffix.Get(), i);
apNewSuffix.Detach();
if (t->suffixCount == 1 && t->charCount != 0)
{
//
// First child of t and t is not the root of the trie;
// add a sentinel to t to designate
// that t->string is an actual item:
//
t->AddSuffix(options, (CTrieNode<T, C>*)&g_trie_Sentinel, 0);
}
return DICT_SUCCESS;
} // end while
Assert(0);
return(DICT_ITEM_NOT_FOUND);
} // end trie_Insert
template <class T, class C >
inline DictStatus
CTrie<T, C>::trie_Find(
const WCHAR * string, // A string
unsigned long options, // Search flags
short outBufferSize, // Max number of results wanted
T ** outBuffer, // Buffer to be filled with matching items
short * resultCount // Number of matching prefixes returned
)
{
CTrieNode<T, C> * node = root;
CTrieNode<T, C> * child = NULL;
int cmp = -1;
DictStatus status = DICT_ITEM_NOT_FOUND;
const WCHAR * subString = string;
size_t strIndex = 0;
size_t charsMatched = 0;
int i;
// at least one option matches:
Assert( options &
(TRIE_SHORTEST_MATCH | TRIE_LONGEST_MATCH | TRIE_ALL_MATCHES)
);
// at most one option matches:
Assert ( ( ((options & TRIE_SHORTEST_MATCH)>>4) +
((options & TRIE_LONGEST_MATCH)>>5) +
((options & TRIE_ALL_MATCHES)>>6)
) == 1
);
//
// Initialization:
//
Assert(outBufferSize > 0);
Assert(outBuffer);
memset(outBuffer, 0, sizeof(CTrieNode<T, C>*) * outBufferSize);
*resultCount = 0;
while ( status != DICT_SUCCESS &&
*resultCount < outBufferSize
)
{
if (child != NULL)
{
strIndex += child->charCount;
subString = &string[strIndex];
node = child;
}
//
// Future: low & high can be improved by a partial binary search on
// the first character of string in node->suffix:
// if (node->suffixSize > threshold)
// trie_BinarySearch( &low, &high, subString[0]);
//
//
// Search in this level sorted alternatives list:
//
for ( i = 0;
i < node->suffixCount;
i++
)
{
// Quick "skip check":
child = node->suffix[i];
cmp = options & TRIE_IGNORECASE ?
C::MapToUpper(*child->string) - C::MapToUpper(*subString) :
*child->string - *subString;
if ( cmp < 0 )
{
continue;
}
else
{
break;
}
} // end for
Assert(cmp >= 0 || i == node->suffixCount);
if (cmp != 0)
{
//
// First character did not match => subString mismatched;
// Bail out:
//
break; // From while loop!
}
//
// cmp == 0 => first character matched;
// Try to match more of subString:
//
// Note: subStringSize == _tcslen(subString);
//
cmp = (options & TRIE_IGNORECASE) ?
child->trie_StrMatchIns(child->string, subString, &charsMatched) :
child->trie_StrMatch(child->string, subString, &charsMatched);
Assert(charsMatched <= min(child->charCount, MAX_PATTERN_LENGTH));
if (charsMatched != child->charCount)
{
//
// child->string did not match;
// there are no more prefixes of string in trie
//
// return (status);
break; // From while loop!
}
//
// Interesting case: all of child->string matched.
//
if (child->item != NULL)
{
//
// Child represents a real item:
// Add child->item to result set:
//
outBuffer[*resultCount] = child->item;
if (0 == cmp)
{
status = DICT_SUCCESS;
}
if ( (options & TRIE_SHORTEST_MATCH) ==
TRIE_SHORTEST_MATCH
)
{
// (*resultCount)++;
// return(status);
break; // From while loop!
}
else
{
if ( (options & TRIE_ALL_MATCHES) ==
TRIE_ALL_MATCHES
)
{
(*resultCount)++;
}
}
//
// Descend into subtree rooted at child:
//
continue;
}
else
{
//
// Child does not represent a real item;
// keep looking for matches.
// descend into subtree rooted at child:
//
continue;
}
} // end while
if ( ((options & TRIE_LONGEST_MATCH)||
(options & TRIE_SHORTEST_MATCH)) &&
(outBuffer[*resultCount] != NULL))
{
(*resultCount)++;
}
return(status);
} // end trie_Find
template <class T, class C >
inline void
CTrie<T, C>::Print()
{
root->Print(0);
}
#endif // __TRIE_H__