windows-server-2003/inetsrv/query/bigtable/colhash.cxx


								//+-------------------------------------------------------------------------

								//

								//  Microsoft Windows

								//  Copyright (C) Microsoft Corporation, 1994 - 2000.

								//

								//  File:       colhash.cxx

								//

								//  Contents:   Hash table compressions for large tables.

								//

								//  Classes:    CCompressedColHash

								//

								//  Functions:  GuidHash - Hash function for GUIDs

								//

								//  History:    13 Apr 1994     AlanW    Created

								//

								//--------------------------------------------------------------------------


								#include "pch.cxx"

								#pragma hdrstop


								#include <objcur.hxx>

								#include <tblvarnt.hxx>


								#include "tabledbg.hxx"

								#include "colcompr.hxx"


								const USHORT MAX_HASH_TABLE_SIZE = 32767;     // Maximum hash table size


								//+-------------------------------------------------------------------------

								//

								//  Function:   GuidHash, public

								//

								//  Synopsis:   Hash a GUID value for use in a hash table.

								//

								//  Arguments:  [pbData] - pointer to the value to be hashed.

								//              [cbData] - should be sizeof (GUID), unused

								//

								//  Returns:    ULONG - Hash value for the input GUID

								//

								//  Notes:      The hash function just xors a few selected fields out

								//              of the GUID structure.  It is intended to work well for

								//              both generated GUIDs (from UuidCreate) and administratively

								//              assigned GUIDs like OLE IIDs and CLSIDs.

								//

								//--------------------------------------------------------------------------


								ULONG GuidHash(

								    BYTE *pbData,

								    USHORT cbData

								) {

								    UNALIGNED GUID *pGuid = (GUID *)pbData;

								    return (pGuid->Data1 ^

								            (pGuid->Data4[0]<<16) ^

								            (pGuid->Data4[6]<<8) ^

								            (pGuid->Data4[7]));

								}


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::DefaultHash, public static

								//

								//  Synopsis:   Generic hash function

								//

								//  Arguments:  [pbData] - pointer to the value to be hashed.

								//              [cbData] - size of pbData

								//

								//  Returns:    ULONG - Hash value for the input data

								//

								//--------------------------------------------------------------------------


								//static

								ULONG CCompressedColHash::DefaultHash(

								    BYTE *pbData,

								    USHORT cbData

								) {

								    ULONG ulRet = cbData;


								    while (cbData--)

								        ulRet = (ulRet<<1) ^ *pbData++;


								    return ulRet;

								}


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::CCompressedColHash, public

								//

								//  Synopsis:   Constructor for a hash compressed column.

								//

								//  Arguments:  [vtData] - type of each data item

								//              [cbDataWidth] - size of each data item

								//              [pfnHashFunction] - pointer to hash function

								//

								//  Returns:    pKey is filled in with the index of the data item in

								//              the data array.

								//

								//  Notes:

								//

								//--------------------------------------------------------------------------


								CCompressedColHash::CCompressedColHash(

								    VARTYPE     vtData,

								    USHORT      cbDataWidth,

								    PFNHASH     pfnHashFunction) :

								        CCompressedCol(

								            vtData,                     // DataType

								            sizeof (HASHKEY),           // _cbKeyWidth

								            CCompressedCol::FixedHash   // _CompressionType

								        ),


								        _cbDataWidth(cbDataWidth),

								        _pfnHash(pfnHashFunction),

								        _pHashTable(NULL), _cHashEntries(0),

								        _pDataItems(NULL), _cDataItems(0),

								        _fGrowthInProgress(FALSE),

								        _pData(NULL), _cbData(0),

								        _ulMemCounter(0)

								{


								}


								CCompressedColHash::~CCompressedColHash( )

								{

								    if (_pData) {

								        TblPageDealloc(_pData, _ulMemCounter);

								        _pData = NULL;

								        _cbData = 0;

								    }

								    Win4Assert(_ulMemCounter == 0);

								}


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::AddData, public

								//

								//  Synopsis:   Add a data entry to the hash table if it is not

								//              already there.

								//

								//  Arguments:  [pVarnt] - pointer to data item

								//              [pKey] - pointer to lookup key value

								//              [reIndicator] - returns an indicator variable for

								//                      problems

								//

								//  Returns:    pKey is filled in with the index of the data item in

								//              the data array.  reIndicator is filled with an indication

								//              of problems.

								//

								//  Notes:

								//

								//--------------------------------------------------------------------------


								VOID    CCompressedColHash::AddData(

								    PROPVARIANT const * const pVarnt,

								    ULONG* pKey,

								    GetValueResult& reIndicator

								) {

								    //

								    //  Specially handle the VT_EMPTY case

								    //

								    if (pVarnt->vt == VT_EMPTY) {

								        *pKey = 0;

								        reIndicator = GVRSuccess;

								        return;

								    }


								    CTableVariant *pVar = (CTableVariant *)pVarnt;

								    Win4Assert(pVarnt->vt == DataType);


								    BYTE *pbData ;

								    USHORT cbData = (USHORT) pVar->VarDataSize();


								    Win4Assert(cbData && cbData == _cbDataWidth);

								    if (pVar->VariantPointerInFirstWord( )) {

								        pbData = (BYTE *) pVar->pszVal;

								    } else {

								        Win4Assert(pVar->VariantPointerInSecondWord( ));

								        pbData = (BYTE *) pVar->blob.pBlobData;

								    }


								    _AddData(pbData, cbData, pKey);

								    reIndicator = GVRSuccess;

								    return;

								}


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::_AddData, protected

								//

								//  Synopsis:   Helper for the public AddData method.  Adds

								//              a data entry to the hash table (if it does not already

								//              exist).

								//

								//  Arguments:  [pbData] - pointer to data item

								//              [cbDataSize] - size of data item

								//              [pKey] - pointer to lookup key value

								//

								//  Returns:    pKey is filled in with the index of the data item in

								//              the data array.

								//

								//  Notes:

								//

								//--------------------------------------------------------------------------


								VOID    CCompressedColHash::_AddData(

								    BYTE *pbData,

								    USHORT cbDataSize,

								    ULONG* pKey

								) {

								    Win4Assert(cbDataSize == _cbDataWidth);


								    if (_pData == NULL) {

								        _GrowHashTable();

								    }


								    ULONG ulHash = _pfnHash(pbData, cbDataSize);


								    ulHash %= _cHashEntries;


								    HASHKEY* pusHashChain = &_pHashTable[ulHash];

								    HASHKEY* pusNextData;

								    USHORT cChainLength = 0;


								    while (*pusHashChain != 0) {

								        cChainLength++;

								        pusNextData = _IndexHashkey( *pusHashChain );


								        if (memcmp((BYTE *) (pusNextData+1), pbData, cbDataSize) == 0) {

								            //

								            //  Found the data item.  Return its index.

								            //

								            *pKey = *pusHashChain;

								            return;

								        }

								        pusHashChain = pusNextData;

								    }

								    if (cChainLength > _maxChain)

								        _maxChain = cChainLength;


								    pusNextData = (HASHKEY *) ((BYTE *)_pDataItems +

								                    (_cDataItems) * (sizeof (HASHKEY) + _cbDataWidth));

								    if (((BYTE*)pusNextData + (sizeof (HASHKEY) + _cbDataWidth) -

								        (BYTE *)_pData) > (int) _cbData ||

								        (_cDataItems > (ULONG) ( _cHashEntries * 3 ) &&

								         _cHashEntries < MAX_HASH_TABLE_SIZE &&

								         !_fGrowthInProgress)) {


								        //

								        //  The new data will not fit in the table, or the hash chains will

								        //  be too long.  Grow the table, then recurse.  The table may be

								        //  rehashed, and can be moved when grown, so the lookup we've

								        //  already done may be invalid.

								        //

								        _GrowHashTable();

								        _AddData(pbData, cbDataSize, pKey);

								        return;

								    }


								    //

								    //  Now add the new data item.  The data item consists of a USHORT

								    //  for the hash chain, followed by the buffer for the fixed size

								    //  data item.

								    //


								    *pKey = *pusHashChain = ++_cDataItems;

								    Win4Assert(_cDataItems != 0);               // check for overflow

								    *pusNextData++ = 0;

								    RtlCopyMemory((BYTE *)pusNextData, pbData, _cbDataWidth);

								}


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::_Rehash, protected

								//

								//  Synopsis:   Helper function for the _GrowHashTable method.

								//              reinserts an existing item into the hash table.

								//

								//  Arguments:  [pbData] - pointer to data item

								//              [kData] - index to the data item in the table

								//

								//  Returns:    Nothing

								//

								//  Notes:

								//

								//--------------------------------------------------------------------------


								VOID    CCompressedColHash::_Rehash(

								    HASHKEY kData,

								    BYTE *pbData

								) {

								    Win4Assert(_pData != NULL && kData > 0 && kData <= _cDataItems);


								    ULONG ulHash = _pfnHash(pbData, _cbDataWidth);


								    ulHash %= _cHashEntries;


								    HASHKEY* pusHashChain = &_pHashTable[ulHash];

								    HASHKEY* pusNextData;

								    USHORT cChainLength = 0;


								    while (*pusHashChain != 0) {

								        cChainLength++;

								        pusNextData = _IndexHashkey( *pusHashChain );

								        pusHashChain = pusNextData;

								    }

								    if (cChainLength > _maxChain)

								        _maxChain = cChainLength;


								    pusNextData = _IndexHashkey( kData );


								    //

								    //  Now add the data item to the hash chain.

								    //


								    *pusHashChain = kData;

								    *pusNextData++ = 0;

								    Win4Assert((BYTE*)pusNextData == pbData);

								    return;

								}


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::GetData, public

								//

								//  Synopsis:   Retrieve a value from the hash table.

								//

								//  Arguments:  [pVarnt] - pointer to variant in which to return the data

								//              [PreferredType] - Peferred data type

								//              [ulKey] - the lookup key value

								//              [PropId] - (unused) property id being retrieved.

								//

								//  Returns:    pVarnt is filled with the result of the lookup.

								//

								//  Notes:      The PreferredType expresses the caller's preference only.

								//              This method is free to return whatever type is most

								//              convenient.

								//

								//              The returned data does not conform to any alignment

								//              restrictions on the data.

								//

								//--------------------------------------------------------------------------


								GetValueResult  CCompressedColHash::GetData(

								    PROPVARIANT * pVarnt,

								    VARTYPE PreferredType,

								    ULONG ulKey,

								    PROPID PropId

								) {

								    CTableVariant *pVar = (CTableVariant *)pVarnt;

								    Win4Assert(PreferredType == DataType && ulKey >= 1 && ulKey <= _cDataItems);


								    if (ulKey >= 1 && ulKey <= _cDataItems) {

								        pVarnt->vt = DataType;


								        BYTE *pbData = ((BYTE *)_pDataItems +

								                        (ulKey-1) * (sizeof (HASHKEY) + _cbDataWidth)) +

								                        sizeof (HASHKEY);


								        if (pVar->VariantPointerInFirstWord( )) {

								            pVar->pszVal = (CHAR*)pbData;

								        } else {

								            Win4Assert(pVar->VariantPointerInSecondWord( ));

								            pVar->blob.pBlobData = pbData;

								        }

								        return GVRSuccess;

								    } else {

								        pVarnt->vt = VT_EMPTY;

								        return GVRNotAvailable;

								    }

								}


								void    CCompressedColHash::FreeVariant(PROPVARIANT * pvarnt) { }


								//+-------------------------------------------------------------------------

								//

								//  Method:     CCompressedColHash::_GrowHashTable, protected

								//

								//  Synopsis:   Grow the space allocated to the hash table and data

								//              items.

								//

								//  Arguments:  - none -

								//

								//  Returns:    Nothing

								//

								//  Notes:      Also called to allocate the initial data area.

								//

								//              The number of hash buckets starts out at a low

								//              number, then is increased as the amount of data

								//              grows.  Data items must be rehashed when this occurs.

								//              Since items are identified by their offset in the

								//              data array, this must not change while rehashing.

								//

								//--------------------------------------------------------------------------


								const unsigned MIN_HASH_TABLE_SIZE = 11;        // Minimum hash table size


								inline USHORT CCompressedColHash::_NextHashSize(

								    HASHKEY cItems,

								    USHORT cHash

								) {

								    do {

								        cHash = cHash*2 + 1;

								    } while (cHash < _cDataItems);

								    return  (cHash < MAX_HASH_TABLE_SIZE) ? cHash : MAX_HASH_TABLE_SIZE;

								}


								VOID CCompressedColHash::_GrowHashTable( void )

								{

								    ULONG cbSize;

								    USHORT cNewHashEntries;

								    int fRehash = FALSE;


								    Win4Assert(!_fGrowthInProgress &&

								             "Recursive call to CCompressedColHash::_GrowHashTable");


								    _fGrowthInProgress = TRUE;

								    if (_pData == NULL) {

								        cNewHashEntries = MIN_HASH_TABLE_SIZE;

								    } else if (_cHashEntries < MAX_HASH_TABLE_SIZE &&

								               (_cDataItems > (ULONG) _cHashEntries*2 ||

								                (_cDataItems > _cHashEntries && _maxChain > 3))) {

								        cNewHashEntries = _NextHashSize(_cDataItems, _cHashEntries);

								        fRehash = TRUE;

								        tbDebugOut((DEB_ITRACE, "Growing hash table, old,new sizes = %d,%d\n",

								                                        _cHashEntries, cNewHashEntries));

								    }


								    //

								    //  Compute the required size of the hash table and data

								    //

								    cbSize = _cHashEntries * sizeof(HASHKEY);

								    cbSize += (_cDataItems + 4) * (_cbDataWidth + sizeof (HASHKEY));

								    cbSize = TblPageGrowSize(cbSize, TRUE);

								    Win4Assert(cbSize > _cbData || (fRehash && cbSize == _cbData));


								    BYTE *pbNewData;


								    if (_pData && cbSize < TBL_PAGE_MAX_SEGMENT_SIZE) {

								        pbNewData = (BYTE *)

								            TblPageRealloc(_pData, _ulMemCounter, cbSize, 0);

								    } else {

								        pbNewData =

								            (BYTE *)TblPageAlloc(cbSize, _ulMemCounter, TBL_SIG_COMPRESSED);

								    }


								    tbDebugOut((DEB_ITRACE, "New hash table at = %x\n", pbNewData));


								    if (_pData != NULL && !fRehash) {

								        if (_pData != pbNewData) {

								            RtlCopyMemory(pbNewData, _pData, _cbData);

								            TblPageDealloc(_pData, _ulMemCounter, _cbData);

								            _pData = pbNewData;

								        }

								        _cbData = cbSize;

								        _pHashTable = (HASHKEY *) _pData;

								        _pDataItems = (BYTE *) (_pHashTable + _cHashEntries);

								    } else {

								        BYTE *pOldDataItems = _pDataItems;

								        VOID *pOldData = _pData;

								        ULONG cbOldSize = _cbData;


								        _pData = pbNewData;

								        _cbData = cbSize;

								        _pHashTable = (HASHKEY *)_pData;

								        _cHashEntries = cNewHashEntries;

								        _pDataItems = (BYTE *) (_pHashTable + _cHashEntries);

								        if (pOldData != NULL)

								            RtlMoveMemory(_pDataItems,

								                          pOldDataItems,

								                          _cDataItems * (sizeof (HASHKEY) + _cbDataWidth));

								        RtlZeroMemory(_pHashTable, cNewHashEntries * sizeof (HASHKEY));

								        _maxChain = 0;


								        //

								        //  Now re-add all old data items to the hash table.

								        //

								        pOldDataItems = _pDataItems;

								        for (HASHKEY i=1; i<=_cDataItems; i++) {

								            pOldDataItems += sizeof (HASHKEY);  // skip hash chain

								            _Rehash(i, pOldDataItems);

								            pOldDataItems += _cbDataWidth;      // skip data item

								        }

								        if (pOldData != NULL && pOldData != _pData)

								            TblPageDealloc(pOldData, _ulMemCounter, cbOldSize);

								    }


								    _fGrowthInProgress = FALSE;

								    return;

								}


								//+---------------------------------------------------------------------------

								//

								//  Function:   _ClearAll

								//

								//  Synopsis:   Method clears all the data in the "fixed width" part of the

								//              memory buffer.

								//

								//  Arguments:  (none)

								//

								//  History:    12-16-94   srikants   Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------


								void CCompressedColHash::_ClearAll()

								{

								    RtlZeroMemory(_pHashTable, _cHashEntries * sizeof (HASHKEY));

								    RtlZeroMemory(_pDataItems, _cDataItems * _cbDataWidth );

								    _cDataItems = 0;

								}