windows-server-2003/inetsrv/query/bigtable/strhash.cxx

//+-------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1994 - 2000.
//
//  File:       strhash.cxx
//
//  Contents:   Hash table compressions of strings for large tables.
//
//  Classes:    CCompressedColHashString
//
//  Functions:
//
//  History:    03 May 1994     AlanW    Created
//
//--------------------------------------------------------------------------

#include "pch.cxx"
#pragma hdrstop

#include <tblvarnt.hxx>

#include "strhash.hxx"


//+---------------------------------------------------------------------------
//
//  Function:   HashWSTR
//
//  Synopsis:   Hashes a WSTR and returns a value according to the format
//              explained in the HashString call.
//
//  Arguments:  [pwszStr] -  Pointer to the string.
//              [nChar]   -  Number of characters in the string.
//
//  Returns:    A HashValue (formatted according to notes in HashString)
//
//  History:    5-19-95   srikants   Created
//
//  Notes:
//
//----------------------------------------------------------------------------
inline
ULONG CCompressedColHashString::HashWSTR( WCHAR const * pwszStr,
                                          USHORT nChar )
{
    ULONG ulRet = 0;

    for ( ULONG i = 0; i < nChar ; i++)
    {
        WCHAR wch = pwszStr[i];
        ulRet = (ulRet << 1) ^ wch;
    }

    ulRet = (ulRet >> 16) ^ ulRet;
    ulRet = (ulRet & 0xFFFF) | (i << 17);
    return ulRet;
}

//+---------------------------------------------------------------------------
//
//  Function:   HashSTR
//
//  Synopsis:   Hashes an ASCII string.
//
//  Arguments:  [pszStr] -
//              [nChar]  -
//
//  Returns:    (Same as HashWSTR)
//
//  History:    5-19-95   srikants   Created
//
//  Notes:
//
//----------------------------------------------------------------------------
inline
ULONG CCompressedColHashString::HashSTR( CHAR const * pszStr, USHORT nChar )
{
    ULONG ulRet = 0;

    for ( ULONG i = 0; i < nChar ; i++)
    {
        BYTE ch = (BYTE) pszStr[i];
        ulRet = (ulRet << 1) ^ ch;
    }

    ulRet = (ulRet >> 16) ^ ulRet;
    ulRet = (ulRet & 0xFFFF) | (i << 17) | (1 << 16);  // is an ascii string
    return ulRet;
}

//const ULONG CCompressedColHashString::_cbDataWidth = sizeof (HashEntry);

//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::HashString, public static
//
//  Synopsis:   Generic hash function for strings
//
//  Arguments:  [pbData] - pointer to the value to be hashed.
//              [cbData] - size of pbData (may be some arbitrary large
//                      value if string is NUL terminated.
//              [vtDataType] - type of string, VT_LPWSTR, or VT__LPSTR
//              [fNullTerminated ] - Set to TRUE if the string is a NULL
//              terminted string. FALSE o/w.
//
//  Returns:    ULONG - Hash value for the input data
//
//  Notes:      The returned hash value encodes the string length in
//              characters and string format in the upper half of the
//              returned DWORD.  The format of the returned value is:
//
//              +15                                           00+
//              +-----------------------------------------------+
//              |      hash value (xor,shift of char values)    |
//              +--------------------------------------------+--+
//              |      character count                       | F|
//              +--------------------------------------------+--+
//               31                                        17+16+
//
//              where F = 0 if Unicode string, F = 1 if ASCII string
//
//              As a side-effect, the string is copied to local storage,
//              and a key to that storage is returned in rulCopyKey.
//
//--------------------------------------------------------------------------

ULONG CCompressedColHashString::HashString(
    BYTE *pbData,
    USHORT cbData,
    VARTYPE vtDataType,
    BOOL   fNullTerminated
)
{

    ULONG ulRet = 0;

    switch (vtDataType)
    {

    case VT_LPWSTR:


        {
            UNICODE_STRING ustr;

            if ( fNullTerminated )
            {
                RtlInitUnicodeString(&ustr, (PWSTR)pbData);
            }
            else
            {
                Win4Assert( ( cbData & (USHORT) 0x1 ) == 0 );    // must be an even number
                ustr.Buffer = (PWSTR) pbData;
                ustr.MaximumLength = ustr.Length = cbData;
            }

            ulRet = HashWSTR( ustr.Buffer, ustr.Length/sizeof(WCHAR) );
        }

        break;

    case VT_LPSTR:

        {
            ANSI_STRING astr;

            if ( fNullTerminated )
            {
                RtlInitAnsiString(&astr, (PSZ)pbData);
            }
            else
            {
                astr.Buffer = (CHAR *) pbData;
                astr.MaximumLength = astr.Length = cbData;
            }

            ulRet = HashSTR( astr.Buffer, astr.Length );
        }

        break;

    default:    // PERFFIX - need to support VT_BSTR also?
        Win4Assert(!"CCompressedColHashString::HashString called with bad type");
        THROW( CException( STATUS_INVALID_PARAMETER ) );
    }

    return ulRet;
}

//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::AddData, public
//
//  Synopsis:   Add a data entry to the hash table if it is not
//              already there.
//
//  Arguments:  [pVarnt] - pointer to data item
//              [pKey] - pointer to lookup key value
//              [reIndicator] - returns an indicator variable for
//                      problems
//
//  Returns:    pKey is filled in with the index of the data item in
//              the data array.  reIndicator is filled with an indication
//              of problems.
//
//  Notes:
//
//--------------------------------------------------------------------------

VOID    CCompressedColHashString::AddData(
    PROPVARIANT const * const pVarnt,
    ULONG* pKey,
    GetValueResult& reIndicator
)
{
    //
    //  Specially handle the VT_EMPTY case
    //
    if (pVarnt->vt == VT_EMPTY) {
        *pKey = 0;
        reIndicator = GVRSuccess;
        return;
    }

    CTableVariant *pVar = (CTableVariant *)pVarnt;
    Win4Assert((pVar->vt == VT_LPWSTR || pVar->vt == VT_LPSTR) &&
             pVar->VariantPointerInFirstWord( ));

    BYTE *pbData ;
    USHORT cbData = (USHORT) pVar->VarDataSize();
    pbData = (BYTE *) pVar->pwszVal;

    Win4Assert(cbData != 0 && pbData != NULL);

    _AddData( pbData, cbData, pVar->vt, pKey, TRUE );  // NULL Terminated
    reIndicator = GVRSuccess;
    return;
}

//+---------------------------------------------------------------------------
//
//  Function:   FindCountedWStr
//
//  Synopsis:   Findss the given string to the string store. It is assumed
//              that there is no terminating NULL in the string. Instead,
//              its length is passed.
//
//  Arguments:  [pwszStr]     - Pointer to the string to be added.
//              [cwcStr]      - Count of the characters in the string.
//
//  Returns:    ULONG key or stridInvalid
//
//  History:    7-17-95   dlee   Created
//
//----------------------------------------------------------------------------

ULONG CCompressedColHashString::FindCountedWStr(
    WCHAR const *pwszStr,
    ULONG cwcStr )
{
    Win4Assert( !_fOptimizeAscii );

    BYTE *pbData = (BYTE *) pwszStr ;
    USHORT cbData = (USHORT) cwcStr * sizeof(WCHAR);

    Win4Assert(cbData != 0 && pbData != NULL);

    return _FindData( pbData, cbData, VT_LPWSTR, FALSE );
} //FindCountedWStr

//+---------------------------------------------------------------------------
//
//  Function:   AddCountedWStr
//
//  Synopsis:   Adds the given string to the string store. It is assumed
//              that there is no terminating NULL in the string. Instead,
//              its length is passed.
//
//  Arguments:  [pwszStr]     - Pointer to the string to be added.
//              [cwcStr]      - Count of the characters in the string.
//              [key]         - OUTPUT - Id of the string
//              [reIndicator] - GVRSuccess if successful. Failure code o/w
//
//  History:    5-19-95   srikants   Created
//
//  Notes:
//
//----------------------------------------------------------------------------

VOID CCompressedColHashString::AddCountedWStr(
    WCHAR const *pwszStr,
    ULONG cwcStr,
    ULONG & key,
    GetValueResult & reIndicator
)
{

    Win4Assert( !_fOptimizeAscii );

    BYTE *pbData = (BYTE *) pwszStr ;
    USHORT cbData = (USHORT) cwcStr * sizeof(WCHAR);

    Win4Assert(cbData != 0 && pbData != NULL);

    _AddData( pbData, cbData, VT_LPWSTR, &key, FALSE );
    reIndicator = GVRSuccess;
    return;
}

//+---------------------------------------------------------------------------
//
//  Function:   AddData
//
//  Synopsis:   Adds a NULL terminated string to the string store.
//
//  Arguments:  [pwszStr]     -  Pointer to a NULL terminated string.
//              [key]         -  OUTPUT - key of the added string.
//              [reIndicator] -  Status indicator.
//
//  History:    5-19-95   srikants   Created
//
//  Notes:
//
//----------------------------------------------------------------------------

VOID    CCompressedColHashString::AddData(
    WCHAR const *pwszStr,
    ULONG & key,
    GetValueResult & reIndicator
)
{
    ULONG cwcStr = wcslen( pwszStr );
    AddCountedWStr( pwszStr, cwcStr, key, reIndicator );
    return;
}

//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::_AddData, private
//
//  Synopsis:   Private helper for the public AddData method.  Adds
//              a data entry to the hash table (if it does not already
//              exist).
//
//  Arguments:  [pbData] - pointer to data item
//              [cbDataSize] - size of data item
//              [pKey] - pointer to lookup key value
//
//  Returns:    pKey is filled in with the index of the data item in
//              the data array.
//
//  Notes:
//
//--------------------------------------------------------------------------

VOID    CCompressedColHashString::_AddData(
    BYTE *pbData,
    USHORT cbDataSize,
    VARTYPE vt,
    ULONG* pKey,
    BOOL   fNullTerminated
) {
    if ( 0 == _cDataItems )
    {
        _GrowHashTable();
    }

    ULONG ulHash = HashString( pbData, cbDataSize, vt, fNullTerminated );
    USHORT usSizeFmt = (USHORT) (ulHash >> 16);
    ULONG cbString = usSizeFmt & 1? usSizeFmt >> 1 : usSizeFmt;

    ulHash %= _cHashEntries;

    HASHKEY* pulHashChain = &(((HASHKEY *)_pAlloc->BufferAddr())[ulHash]);
    HashEntry* pNextData;
    USHORT cChainLength = 0;

    while (*pulHashChain != 0)
    {
        cChainLength++;
        pNextData = _IndexHashkey( *pulHashChain );

        if (usSizeFmt == pNextData->usSizeFmt)
        {
            BYTE* pbNextString = (BYTE*)_pAlloc->OffsetToPointer(pNextData->ulStringKey);
            if (memcmp(pbNextString, pbData, cbString) == 0)
            {

                //
                //  Found the data item.  Return its index.
                //
                *pKey = *pulHashChain;
                return;
            }
        }
        pulHashChain = &pNextData->ulHashChain;
    }

    //
    // Allocate memory for the new string and copy the contents from
    // the source buffer.
    //
    BYTE * pbNewData = (BYTE *) _pAlloc->Allocate( cbString );
    TBL_OFF ulKey = _pAlloc->PointerToOffset(pbNewData);
    RtlCopyMemory( pbNewData, pbData, cbString );

    //  The table may move in memory when we call AllocFixed.
    //  Be sure we can address pulHashChain after that.
    //
    ULONG ulHashChainBase = (ULONG)((BYTE*)pulHashChain - _pAlloc->BufferAddr());
    pNextData = (struct HashEntry*) _pAlloc->AllocFixed();
    pulHashChain = (HASHKEY *) (_pAlloc->BufferAddr() + ulHashChainBase);

    //
    //  NOTE:  The fixed hash table at this point decides if it wants
    //          to grow the fixed area, with a possible rehash of the
    //          table to grow the number of buckets.  With the code
    //          below, the string hash table has no opportunity to
    //          grow the number of hash buckets.
    //

    //
    //  Now add the new data item.  The data item consists of a HASHKEY
    //  for the hash chain, followed by the size and format indicator,
    //  and the key for the string in the variable data.
    //

    *pKey = *pulHashChain = ++_cDataItems;
    Win4Assert(_cDataItems != 0);               // check for overflow
    pNextData->ulHashChain = 0;
    pNextData->usSizeFmt = usSizeFmt;
    pNextData->ulStringKey = ulKey;
}


//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::_FindData, private
//
//  Synopsis:   Finds a data entry in the hash table.
//
//  Arguments:  [pbData] - pointer to data item
//              [cbDataSize] - size of data item
//              [pKey] - pointer to lookup key value
//
//  Returns:    The key of the string or stridInvalid
//
//  History:    7-17-95   dlee   Created
//
//--------------------------------------------------------------------------

ULONG CCompressedColHashString::_FindData(
    BYTE *   pbData,
    USHORT   cbDataSize,
    VARTYPE  vt,
    BOOL     fNullTerminated )
{
    if ( 0 == _pAlloc )
        _GrowHashTable();

    ULONG ulHash = HashString( pbData, cbDataSize, vt, fNullTerminated );
    USHORT usSizeFmt = (USHORT) (ulHash >> 16);
    ULONG cbString = usSizeFmt & 1? usSizeFmt >> 1 : usSizeFmt;
    ulHash %= _cHashEntries;

    HASHKEY* pulHashChain = &(((HASHKEY *)_pAlloc->BufferAddr())[ulHash]);

    while ( 0 != *pulHashChain )
    {
        HashEntry* pNextData = _IndexHashkey( *pulHashChain );

        if ( usSizeFmt == pNextData->usSizeFmt )
        {
            BYTE* pbNext = (BYTE*)_pAlloc->OffsetToPointer(pNextData->ulStringKey);
            if ( memcmp( pbNext, pbData, cbString ) == 0 )
            {
                // Found the data item.  Return its index.

                return *pulHashChain;
            }
        }
        pulHashChain = &pNextData->ulHashChain;
    }

    // couldn't find the string in the table

    return stridInvalid;
} //_FindData


//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::GetData, public
//
//  Synopsis:   Retrieve a data value from the hash table.
//
//  Arguments:  [pVarnt] - pointer to a variant structure in which to
//                      return a pointer to the data
//              [PreferredType] - preferred type of the result.
//              [ulKey] - the lookup key value
//              [PropId] - (unused) property id being retrieved.
//
//  Returns:    pVarnt is filled in with the data item from the hash table.
//
//  Notes:      The FreeVariant method must be called with the pVarnt
//              structure as an argument when it is no longer needed.
//
//--------------------------------------------------------------------------


GetValueResult  CCompressedColHashString::GetData(
    PROPVARIANT * pVarnt,
    VARTYPE PreferredType,
    ULONG ulKey,
    PROPID PropId
    )
{
    Win4Assert(ulKey <= _cDataItems);

    if (ulKey == 0) {
        pVarnt->vt = VT_EMPTY;
        return GVRNotAvailable;
    }

    HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + ulKey - 1;
    BOOL fAscii = (pData->usSizeFmt & 1) != 0;
    ULONG cchSize = (pData->usSizeFmt >> 1) + 1;
    ULONG cbSize = PreferredType == VT_LPWSTR ? cchSize * sizeof (WCHAR) :
                        !fAscii ?               cchSize * sizeof (WCHAR) :
                                                cchSize;
    BYTE* pbBuf = (BYTE*)_GetStringBuffer((cbSize+1) / sizeof (WCHAR));
    BYTE* pbSource = (BYTE*)_pAlloc->OffsetToPointer(pData->ulStringKey);

    //
    //  Give out the data as an LPSTR only if that's what the caller
    //  desires, and it's in the ascii range.
    //

    if (PreferredType == VT_LPSTR && fAscii)
    {
        RtlCopyMemory(pbBuf, pbSource, cbSize - 1);
        ((CHAR *)pbBuf)[cchSize - 1] = '\0';
        pVarnt->vt = VT_LPSTR;
        pVarnt->pszVal = (PSZ)pbBuf;
    }
    else
    {
        if (!fAscii) {
            RtlCopyMemory(pbBuf, pbSource, cbSize - sizeof(WCHAR));
        } else {
            for (unsigned i=0; i<cchSize-1; i++) {
                ((WCHAR*)pbBuf)[i] = ((CHAR*)pbSource)[i];
            }
        }
        ((WCHAR *)pbBuf)[cchSize - 1] = L'\0';
        pVarnt->vt = VT_LPWSTR;
        pVarnt->pwszVal = (PWSTR)pbBuf;
    }
    return GVRSuccess;

}

//+---------------------------------------------------------------------------
//
//  Function:   GetData
//
//  Synopsis:   Copies a NULL terminated string into the pwszStr by looking
//              up the string identified by "ulKey".
//
//  Arguments:  [ulKey]   - Key of the string to lookup.
//              [pwszStr] - Pointer to the buffer to copy to.
//              [cwcStr]  - On input, it contains the length of the buffer in
//                          WCHARs. On output, it has the length of the string
//                          copied INCLUDING the terminating NULL.
//
//  Returns:    GVR* code
//
//  History:    5-19-95   srikants   Created
//
//  Notes:
//
//----------------------------------------------------------------------------
GetValueResult
CCompressedColHashString::GetData( ULONG ulKey,
                                   WCHAR * pwszStr,
                                   ULONG & cwcStr
                                 )
{
    Win4Assert(ulKey <= _cDataItems);

    if (ulKey == 0)
    {
        return GVRNotAvailable;
    }

    HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + ulKey - 1;
    BOOL fAscii = (pData->usSizeFmt & 1) != 0;
    Win4Assert( !fAscii );
    ULONG cchSize = (pData->usSizeFmt >> 1) + 1;
    ULONG cbSize =  cchSize * sizeof (WCHAR);

    if ( cwcStr < cchSize )
    {
        return GVRNotEnoughSpace;
    }

    BYTE* pbSource = (BYTE*)_pAlloc->OffsetToPointer(pData->ulStringKey);
    RtlCopyMemory( pwszStr, pbSource, cbSize - sizeof(WCHAR) );
    pwszStr[cchSize - 1] = L'\0';
    cwcStr = cchSize;

    return GVRSuccess;
}

//+---------------------------------------------------------------------------
//
//  Function:   GetCountedWStr
//
//  Synopsis:   Returns a pointer to a string which is NOT null terminated.
//              The length of the string (in characters) is returned in
//              cwcStr.
//
//  Arguments:  [ulKey]  -  String to lookup
//              [cwcStr] -  OUTPUT - length of the string in WCHARs.
//
//  History:    5-19-95   srikants   Created
//
//  Notes:
//
//----------------------------------------------------------------------------

const WCHAR *
CCompressedColHashString::GetCountedWStr( ULONG ulKey,
                                          ULONG & cwcStr
                                        )
{
    Win4Assert(ulKey <= _cDataItems);

    if (ulKey == 0)
        return 0;

    HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + ulKey - 1;
    BOOL fAscii = (pData->usSizeFmt & 1) != 0;
    Win4Assert( !fAscii );
    ULONG cchSize = (pData->usSizeFmt >> 1);

    BYTE* pbSource = (BYTE*)_pAlloc->OffsetToPointer(pData->ulStringKey);
    Win4Assert( ( (TBL_OFF)pbSource & (TBL_OFF) 0x1 ) == 0 );    // properly aligned on word.

    cwcStr = cchSize;

    return (const WCHAR *) pbSource;
}

//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashStr::_GetStringBuffer, private
//
//  Synopsis:   Private helper for the public GetData method.  Gets
//              a string buffer of sufficient size to accomodate the
//              request.
//
//  Arguments:  [cchString] - number of characters required in buffer
//
//  Returns:    pointer to a buffer of sufficient size
//
//  Notes:
//
//  History:    03 Mar 1995     Alanw   Created
//
//--------------------------------------------------------------------------

PWSTR   CCompressedColHashString::_GetStringBuffer( unsigned cchString )
{
    if (! _Buf1.InUse())
        return _Buf1.Alloc(cchString);
    else if (! _Buf2.InUse())
        return _Buf2.Alloc(cchString);
    else
        return new WCHAR [ cchString ];
}

//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::FreeVariant, public
//
//  Synopsis:   Free private data associated with a variant which had
//              been filled in by the GetData method.
//
//  Arguments:  [pVarnt] - pointer to the variant
//
//  Returns:    Nothing
//
//  Notes:
//
//--------------------------------------------------------------------------

void    CCompressedColHashString::FreeVariant(PROPVARIANT * pVarnt)
{
    if (pVarnt->vt != VT_EMPTY) {

        Win4Assert(pVarnt->vt == VT_LPWSTR || pVarnt->vt == VT_LPSTR);

        if (! _Buf1.FreeConditionally( pVarnt->pwszVal ) &&
            ! _Buf2.FreeConditionally( pVarnt->pwszVal ) )
        {
            delete [] pVarnt->pwszVal;
        }

        pVarnt->pwszVal = 0;            // To prevent accidental re-use
    }
}


//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::DataLength, public
//
//  Synopsis:   Free private data associated with a variant which had
//              been filled in by the GetData method.
//
//  Arguments:  [kData] - key to the data
//
//  Returns:    USHORT number of characters in the data item.  Includes
//                      space for a terminating character.  Scale
//                      this by the size of a character for byte count.
//
//  Notes:
//
//--------------------------------------------------------------------------

USHORT  CCompressedColHashString::DataLength(ULONG kData)
{
    if (kData == 0)
        return 0;
    else
    {
        HashEntry* pData = ((HashEntry*) _pAlloc->FirstRow()) + kData - 1;
        return (pData->usSizeFmt >> 1) + 1;
    }
}


//+-------------------------------------------------------------------------
//
//  Method:     CCompressedColHashString::_GrowHashTable, private
//
//  Synopsis:   Grow the space allocated to the hash table and data
//              items.
//
//  Arguments:  - none -
//
//  Returns:    Nothing
//
//  Notes:      Called to allocate the initial data area.  Unlike the
//              like-named method in the fixed hash table, this is
//              called only for the initial allocation of data.  Data
//              Items are not re-hashed after being added to the table.
//
//--------------------------------------------------------------------------

const unsigned HASH_TABLE_SIZE = 174;   // Minimum hash table size
                                        // avg. chain length is about
                                        // 3 for a one-page table.
                                        //  NOTE: should be even to
                                        //      assure DWORD allignment of
                                        //      fixed data.

VOID CCompressedColHashString::_GrowHashTable( void )
{
    int fRehash = FALSE;

    _cHashEntries = HASH_TABLE_SIZE;

    Win4Assert(_cDataItems == 0 && _pAlloc == NULL); // only called to initialize.
    Win4Assert(_cbDataWidth == sizeof (HashEntry));
    _pAlloc = new CFixedVarAllocator( TRUE,
                                      TRUE,
                                      _cbDataWidth,
                                      HASH_TABLE_SIZE*sizeof (HASHKEY) );
}