|
|
//+-------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1994 - 2000.
//
// File: strhash.cxx
//
// Contents: Hash table compressions of strings for large tables.
//
// Classes: CCompressedColHashString
//
// Functions:
//
// History: 03 May 1994 AlanW Created
//
//--------------------------------------------------------------------------
#include "pch.cxx"
#pragma hdrstop
#include <tblvarnt.hxx>
#include "strhash.hxx"
//+---------------------------------------------------------------------------
//
// Function: HashWSTR
//
// Synopsis: Hashes a WSTR and returns a value according to the format
// explained in the HashString call.
//
// Arguments: [pwszStr] - Pointer to the string.
// [nChar] - Number of characters in the string.
//
// Returns: A HashValue (formatted according to notes in HashString)
//
// History: 5-19-95 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
inline ULONG CCompressedColHashString::HashWSTR( WCHAR const * pwszStr, USHORT nChar ) { ULONG ulRet = 0;
for ( ULONG i = 0; i < nChar ; i++) { WCHAR wch = pwszStr[i]; ulRet = (ulRet << 1) ^ wch; }
ulRet = (ulRet >> 16) ^ ulRet; ulRet = (ulRet & 0xFFFF) | (i << 17); return ulRet; }
//+---------------------------------------------------------------------------
//
// Function: HashSTR
//
// Synopsis: Hashes an ASCII string.
//
// Arguments: [pszStr] -
// [nChar] -
//
// Returns: (Same as HashWSTR)
//
// History: 5-19-95 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
inline ULONG CCompressedColHashString::HashSTR( CHAR const * pszStr, USHORT nChar ) { ULONG ulRet = 0;
for ( ULONG i = 0; i < nChar ; i++) { BYTE ch = (BYTE) pszStr[i]; ulRet = (ulRet << 1) ^ ch; }
ulRet = (ulRet >> 16) ^ ulRet; ulRet = (ulRet & 0xFFFF) | (i << 17) | (1 << 16); // is an ascii string
return ulRet; }
//const ULONG CCompressedColHashString::_cbDataWidth = sizeof (HashEntry);
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::HashString, public static
//
// Synopsis: Generic hash function for strings
//
// Arguments: [pbData] - pointer to the value to be hashed.
// [cbData] - size of pbData (may be some arbitrary large
// value if string is NUL terminated.
// [vtDataType] - type of string, VT_LPWSTR, or VT__LPSTR
// [fNullTerminated ] - Set to TRUE if the string is a NULL
// terminted string. FALSE o/w.
//
// Returns: ULONG - Hash value for the input data
//
// Notes: The returned hash value encodes the string length in
// characters and string format in the upper half of the
// returned DWORD. The format of the returned value is:
//
// +15 00+
// +-----------------------------------------------+
// | hash value (xor,shift of char values) |
// +--------------------------------------------+--+
// | character count | F|
// +--------------------------------------------+--+
// 31 17+16+
//
// where F = 0 if Unicode string, F = 1 if ASCII string
//
// As a side-effect, the string is copied to local storage,
// and a key to that storage is returned in rulCopyKey.
//
//--------------------------------------------------------------------------
ULONG CCompressedColHashString::HashString( BYTE *pbData, USHORT cbData, VARTYPE vtDataType, BOOL fNullTerminated ) {
ULONG ulRet = 0;
switch (vtDataType) {
case VT_LPWSTR:
{ UNICODE_STRING ustr;
if ( fNullTerminated ) { RtlInitUnicodeString(&ustr, (PWSTR)pbData); } else { Win4Assert( ( cbData & (USHORT) 0x1 ) == 0 ); // must be an even number
ustr.Buffer = (PWSTR) pbData; ustr.MaximumLength = ustr.Length = cbData; }
ulRet = HashWSTR( ustr.Buffer, ustr.Length/sizeof(WCHAR) ); }
break;
case VT_LPSTR:
{ ANSI_STRING astr;
if ( fNullTerminated ) { RtlInitAnsiString(&astr, (PSZ)pbData); } else { astr.Buffer = (CHAR *) pbData; astr.MaximumLength = astr.Length = cbData; }
ulRet = HashSTR( astr.Buffer, astr.Length ); }
break;
default: // PERFFIX - need to support VT_BSTR also?
Win4Assert(!"CCompressedColHashString::HashString called with bad type"); THROW( CException( STATUS_INVALID_PARAMETER ) ); }
return ulRet; }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::AddData, public
//
// Synopsis: Add a data entry to the hash table if it is not
// already there.
//
// Arguments: [pVarnt] - pointer to data item
// [pKey] - pointer to lookup key value
// [reIndicator] - returns an indicator variable for
// problems
//
// Returns: pKey is filled in with the index of the data item in
// the data array. reIndicator is filled with an indication
// of problems.
//
// Notes:
//
//--------------------------------------------------------------------------
VOID CCompressedColHashString::AddData( PROPVARIANT const * const pVarnt, ULONG* pKey, GetValueResult& reIndicator ) { //
// Specially handle the VT_EMPTY case
//
if (pVarnt->vt == VT_EMPTY) { *pKey = 0; reIndicator = GVRSuccess; return; }
CTableVariant *pVar = (CTableVariant *)pVarnt; Win4Assert((pVar->vt == VT_LPWSTR || pVar->vt == VT_LPSTR) && pVar->VariantPointerInFirstWord( ));
BYTE *pbData ; USHORT cbData = (USHORT) pVar->VarDataSize(); pbData = (BYTE *) pVar->pwszVal;
Win4Assert(cbData != 0 && pbData != NULL);
_AddData( pbData, cbData, pVar->vt, pKey, TRUE ); // NULL Terminated
reIndicator = GVRSuccess; return; }
//+---------------------------------------------------------------------------
//
// Function: FindCountedWStr
//
// Synopsis: Findss the given string to the string store. It is assumed
// that there is no terminating NULL in the string. Instead,
// its length is passed.
//
// Arguments: [pwszStr] - Pointer to the string to be added.
// [cwcStr] - Count of the characters in the string.
//
// Returns: ULONG key or stridInvalid
//
// History: 7-17-95 dlee Created
//
//----------------------------------------------------------------------------
ULONG CCompressedColHashString::FindCountedWStr( WCHAR const *pwszStr, ULONG cwcStr ) { Win4Assert( !_fOptimizeAscii );
BYTE *pbData = (BYTE *) pwszStr ; USHORT cbData = (USHORT) cwcStr * sizeof(WCHAR);
Win4Assert(cbData != 0 && pbData != NULL);
return _FindData( pbData, cbData, VT_LPWSTR, FALSE ); } //FindCountedWStr
//+---------------------------------------------------------------------------
//
// Function: AddCountedWStr
//
// Synopsis: Adds the given string to the string store. It is assumed
// that there is no terminating NULL in the string. Instead,
// its length is passed.
//
// Arguments: [pwszStr] - Pointer to the string to be added.
// [cwcStr] - Count of the characters in the string.
// [key] - OUTPUT - Id of the string
// [reIndicator] - GVRSuccess if successful. Failure code o/w
//
// History: 5-19-95 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
VOID CCompressedColHashString::AddCountedWStr( WCHAR const *pwszStr, ULONG cwcStr, ULONG & key, GetValueResult & reIndicator ) {
Win4Assert( !_fOptimizeAscii );
BYTE *pbData = (BYTE *) pwszStr ; USHORT cbData = (USHORT) cwcStr * sizeof(WCHAR);
Win4Assert(cbData != 0 && pbData != NULL);
_AddData( pbData, cbData, VT_LPWSTR, &key, FALSE ); reIndicator = GVRSuccess; return; }
//+---------------------------------------------------------------------------
//
// Function: AddData
//
// Synopsis: Adds a NULL terminated string to the string store.
//
// Arguments: [pwszStr] - Pointer to a NULL terminated string.
// [key] - OUTPUT - key of the added string.
// [reIndicator] - Status indicator.
//
// History: 5-19-95 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
VOID CCompressedColHashString::AddData( WCHAR const *pwszStr, ULONG & key, GetValueResult & reIndicator ) { ULONG cwcStr = wcslen( pwszStr ); AddCountedWStr( pwszStr, cwcStr, key, reIndicator ); return; }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::_AddData, private
//
// Synopsis: Private helper for the public AddData method. Adds
// a data entry to the hash table (if it does not already
// exist).
//
// Arguments: [pbData] - pointer to data item
// [cbDataSize] - size of data item
// [pKey] - pointer to lookup key value
//
// Returns: pKey is filled in with the index of the data item in
// the data array.
//
// Notes:
//
//--------------------------------------------------------------------------
VOID CCompressedColHashString::_AddData( BYTE *pbData, USHORT cbDataSize, VARTYPE vt, ULONG* pKey, BOOL fNullTerminated ) { if ( 0 == _cDataItems ) { _GrowHashTable(); }
ULONG ulHash = HashString( pbData, cbDataSize, vt, fNullTerminated ); USHORT usSizeFmt = (USHORT) (ulHash >> 16); ULONG cbString = usSizeFmt & 1? usSizeFmt >> 1 : usSizeFmt;
ulHash %= _cHashEntries;
HASHKEY* pulHashChain = &(((HASHKEY *)_pAlloc->BufferAddr())[ulHash]); HashEntry* pNextData; USHORT cChainLength = 0;
while (*pulHashChain != 0) { cChainLength++; pNextData = _IndexHashkey( *pulHashChain );
if (usSizeFmt == pNextData->usSizeFmt) { BYTE* pbNextString = (BYTE*)_pAlloc->OffsetToPointer(pNextData->ulStringKey); if (memcmp(pbNextString, pbData, cbString) == 0) {
//
// Found the data item. Return its index.
//
*pKey = *pulHashChain; return; } } pulHashChain = &pNextData->ulHashChain; }
//
// Allocate memory for the new string and copy the contents from
// the source buffer.
//
BYTE * pbNewData = (BYTE *) _pAlloc->Allocate( cbString ); TBL_OFF ulKey = _pAlloc->PointerToOffset(pbNewData); RtlCopyMemory( pbNewData, pbData, cbString );
// The table may move in memory when we call AllocFixed.
// Be sure we can address pulHashChain after that.
//
ULONG ulHashChainBase = (ULONG)((BYTE*)pulHashChain - _pAlloc->BufferAddr()); pNextData = (struct HashEntry*) _pAlloc->AllocFixed(); pulHashChain = (HASHKEY *) (_pAlloc->BufferAddr() + ulHashChainBase);
//
// NOTE: The fixed hash table at this point decides if it wants
// to grow the fixed area, with a possible rehash of the
// table to grow the number of buckets. With the code
// below, the string hash table has no opportunity to
// grow the number of hash buckets.
//
//
// Now add the new data item. The data item consists of a HASHKEY
// for the hash chain, followed by the size and format indicator,
// and the key for the string in the variable data.
//
*pKey = *pulHashChain = ++_cDataItems; Win4Assert(_cDataItems != 0); // check for overflow
pNextData->ulHashChain = 0; pNextData->usSizeFmt = usSizeFmt; pNextData->ulStringKey = ulKey; }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::_FindData, private
//
// Synopsis: Finds a data entry in the hash table.
//
// Arguments: [pbData] - pointer to data item
// [cbDataSize] - size of data item
// [pKey] - pointer to lookup key value
//
// Returns: The key of the string or stridInvalid
//
// History: 7-17-95 dlee Created
//
//--------------------------------------------------------------------------
ULONG CCompressedColHashString::_FindData( BYTE * pbData, USHORT cbDataSize, VARTYPE vt, BOOL fNullTerminated ) { if ( 0 == _pAlloc ) _GrowHashTable();
ULONG ulHash = HashString( pbData, cbDataSize, vt, fNullTerminated ); USHORT usSizeFmt = (USHORT) (ulHash >> 16); ULONG cbString = usSizeFmt & 1? usSizeFmt >> 1 : usSizeFmt; ulHash %= _cHashEntries;
HASHKEY* pulHashChain = &(((HASHKEY *)_pAlloc->BufferAddr())[ulHash]);
while ( 0 != *pulHashChain ) { HashEntry* pNextData = _IndexHashkey( *pulHashChain );
if ( usSizeFmt == pNextData->usSizeFmt ) { BYTE* pbNext = (BYTE*)_pAlloc->OffsetToPointer(pNextData->ulStringKey); if ( memcmp( pbNext, pbData, cbString ) == 0 ) { // Found the data item. Return its index.
return *pulHashChain; } } pulHashChain = &pNextData->ulHashChain; }
// couldn't find the string in the table
return stridInvalid; } //_FindData
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::GetData, public
//
// Synopsis: Retrieve a data value from the hash table.
//
// Arguments: [pVarnt] - pointer to a variant structure in which to
// return a pointer to the data
// [PreferredType] - preferred type of the result.
// [ulKey] - the lookup key value
// [PropId] - (unused) property id being retrieved.
//
// Returns: pVarnt is filled in with the data item from the hash table.
//
// Notes: The FreeVariant method must be called with the pVarnt
// structure as an argument when it is no longer needed.
//
//--------------------------------------------------------------------------
GetValueResult CCompressedColHashString::GetData( PROPVARIANT * pVarnt, VARTYPE PreferredType, ULONG ulKey, PROPID PropId ) { Win4Assert(ulKey <= _cDataItems);
if (ulKey == 0) { pVarnt->vt = VT_EMPTY; return GVRNotAvailable; }
HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + ulKey - 1; BOOL fAscii = (pData->usSizeFmt & 1) != 0; ULONG cchSize = (pData->usSizeFmt >> 1) + 1; ULONG cbSize = PreferredType == VT_LPWSTR ? cchSize * sizeof (WCHAR) : !fAscii ? cchSize * sizeof (WCHAR) : cchSize; BYTE* pbBuf = (BYTE*)_GetStringBuffer((cbSize+1) / sizeof (WCHAR)); BYTE* pbSource = (BYTE*)_pAlloc->OffsetToPointer(pData->ulStringKey);
//
// Give out the data as an LPSTR only if that's what the caller
// desires, and it's in the ascii range.
//
if (PreferredType == VT_LPSTR && fAscii) { RtlCopyMemory(pbBuf, pbSource, cbSize - 1); ((CHAR *)pbBuf)[cchSize - 1] = '\0'; pVarnt->vt = VT_LPSTR; pVarnt->pszVal = (PSZ)pbBuf; } else { if (!fAscii) { RtlCopyMemory(pbBuf, pbSource, cbSize - sizeof(WCHAR)); } else { for (unsigned i=0; i<cchSize-1; i++) { ((WCHAR*)pbBuf)[i] = ((CHAR*)pbSource)[i]; } } ((WCHAR *)pbBuf)[cchSize - 1] = L'\0'; pVarnt->vt = VT_LPWSTR; pVarnt->pwszVal = (PWSTR)pbBuf; } return GVRSuccess;
}
//+---------------------------------------------------------------------------
//
// Function: GetData
//
// Synopsis: Copies a NULL terminated string into the pwszStr by looking
// up the string identified by "ulKey".
//
// Arguments: [ulKey] - Key of the string to lookup.
// [pwszStr] - Pointer to the buffer to copy to.
// [cwcStr] - On input, it contains the length of the buffer in
// WCHARs. On output, it has the length of the string
// copied INCLUDING the terminating NULL.
//
// Returns: GVR* code
//
// History: 5-19-95 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
GetValueResult CCompressedColHashString::GetData( ULONG ulKey, WCHAR * pwszStr, ULONG & cwcStr ) { Win4Assert(ulKey <= _cDataItems);
if (ulKey == 0) { return GVRNotAvailable; }
HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + ulKey - 1; BOOL fAscii = (pData->usSizeFmt & 1) != 0; Win4Assert( !fAscii ); ULONG cchSize = (pData->usSizeFmt >> 1) + 1; ULONG cbSize = cchSize * sizeof (WCHAR);
if ( cwcStr < cchSize ) { return GVRNotEnoughSpace; }
BYTE* pbSource = (BYTE*)_pAlloc->OffsetToPointer(pData->ulStringKey); RtlCopyMemory( pwszStr, pbSource, cbSize - sizeof(WCHAR) ); pwszStr[cchSize - 1] = L'\0'; cwcStr = cchSize;
return GVRSuccess; }
//+---------------------------------------------------------------------------
//
// Function: GetCountedWStr
//
// Synopsis: Returns a pointer to a string which is NOT null terminated.
// The length of the string (in characters) is returned in
// cwcStr.
//
// Arguments: [ulKey] - String to lookup
// [cwcStr] - OUTPUT - length of the string in WCHARs.
//
// History: 5-19-95 srikants Created
//
// Notes:
//
//----------------------------------------------------------------------------
const WCHAR * CCompressedColHashString::GetCountedWStr( ULONG ulKey, ULONG & cwcStr ) { Win4Assert(ulKey <= _cDataItems);
if (ulKey == 0) return 0;
HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + ulKey - 1; BOOL fAscii = (pData->usSizeFmt & 1) != 0; Win4Assert( !fAscii ); ULONG cchSize = (pData->usSizeFmt >> 1);
BYTE* pbSource = (BYTE*)_pAlloc->OffsetToPointer(pData->ulStringKey); Win4Assert( ( (TBL_OFF)pbSource & (TBL_OFF) 0x1 ) == 0 ); // properly aligned on word.
cwcStr = cchSize;
return (const WCHAR *) pbSource; }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashStr::_GetStringBuffer, private
//
// Synopsis: Private helper for the public GetData method. Gets
// a string buffer of sufficient size to accomodate the
// request.
//
// Arguments: [cchString] - number of characters required in buffer
//
// Returns: pointer to a buffer of sufficient size
//
// Notes:
//
// History: 03 Mar 1995 Alanw Created
//
//--------------------------------------------------------------------------
PWSTR CCompressedColHashString::_GetStringBuffer( unsigned cchString ) { if (! _Buf1.InUse()) return _Buf1.Alloc(cchString); else if (! _Buf2.InUse()) return _Buf2.Alloc(cchString); else return new WCHAR [ cchString ]; }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::FreeVariant, public
//
// Synopsis: Free private data associated with a variant which had
// been filled in by the GetData method.
//
// Arguments: [pVarnt] - pointer to the variant
//
// Returns: Nothing
//
// Notes:
//
//--------------------------------------------------------------------------
void CCompressedColHashString::FreeVariant(PROPVARIANT * pVarnt) { if (pVarnt->vt != VT_EMPTY) {
Win4Assert(pVarnt->vt == VT_LPWSTR || pVarnt->vt == VT_LPSTR);
if (! _Buf1.FreeConditionally( pVarnt->pwszVal ) && ! _Buf2.FreeConditionally( pVarnt->pwszVal ) ) { delete [] pVarnt->pwszVal; }
pVarnt->pwszVal = 0; // To prevent accidental re-use
} }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::DataLength, public
//
// Synopsis: Free private data associated with a variant which had
// been filled in by the GetData method.
//
// Arguments: [kData] - key to the data
//
// Returns: USHORT number of characters in the data item. Includes
// space for a terminating character. Scale
// this by the size of a character for byte count.
//
// Notes:
//
//--------------------------------------------------------------------------
USHORT CCompressedColHashString::DataLength(ULONG kData) { if (kData == 0) return 0; else { HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + kData - 1; return (pData->usSizeFmt >> 1) + 1; } }
//+-------------------------------------------------------------------------
//
// Method: CCompressedColHashString::_GrowHashTable, private
//
// Synopsis: Grow the space allocated to the hash table and data
// items.
//
// Arguments: - none -
//
// Returns: Nothing
//
// Notes: Called to allocate the initial data area. Unlike the
// like-named method in the fixed hash table, this is
// called only for the initial allocation of data. Data
// Items are not re-hashed after being added to the table.
//
//--------------------------------------------------------------------------
const unsigned HASH_TABLE_SIZE = 174; // Minimum hash table size
// avg. chain length is about
// 3 for a one-page table.
// NOTE: should be even to
// assure DWORD allignment of
// fixed data.
VOID CCompressedColHashString::_GrowHashTable( void ) { int fRehash = FALSE;
_cHashEntries = HASH_TABLE_SIZE;
Win4Assert(_cDataItems == 0 && _pAlloc == NULL); // only called to initialize.
Win4Assert(_cbDataWidth == sizeof (HashEntry)); _pAlloc = new CFixedVarAllocator( TRUE, TRUE, _cbDataWidth, HASH_TABLE_SIZE*sizeof (HASHKEY) ); }
|