You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
380 lines
9.8 KiB
380 lines
9.8 KiB
/*++
|
|
|
|
Copyright (c) 1998-2001 Microsoft Corporation
|
|
|
|
Module Name :
|
|
WordHash.h
|
|
|
|
Abstract:
|
|
LKRhash test harness: hash table of words
|
|
|
|
Author:
|
|
George V. Reilly (GeorgeRe) 06-Jan-1998
|
|
|
|
Environment:
|
|
Win32 - User Mode
|
|
|
|
Project:
|
|
LKRhash
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#ifndef __WORDHASH_H__
|
|
#define __WORDHASH_H__
|
|
|
|
#define MAXKEYS 1000000
|
|
#define MAX_THREADS MAXIMUM_WAIT_OBJECTS
|
|
#define MAX_STRSIZE 300
|
|
|
|
#ifdef LKR_PUBLIC_API
|
|
|
|
typedef PLkrHashTable WordBaseTable;
|
|
#include <lkrhash.h>
|
|
|
|
#endif // LKR_PUBLIC_API
|
|
|
|
|
|
#ifndef __LKRHASH_NO_NAMESPACE__
|
|
#define LKRHASH_NS LKRhash
|
|
using namespace LKRhash;
|
|
#else
|
|
#define LKRHASH_NS
|
|
#endif // !__LKRHASH_NO_NAMESPACE__
|
|
|
|
#ifndef __HASHFN_NO_NAMESPACE__
|
|
using namespace HashFn;
|
|
#endif // !__HASHFN_NO_NAMESPACE__
|
|
|
|
|
|
#ifndef LKR_PUBLIC_API
|
|
typedef LKRHASH_NS::CLKRHashTable WordBaseTable;
|
|
// typedef LKRHASH_NS::CLKRLinearHashTable WordBaseTable;
|
|
#endif
|
|
|
|
|
|
|
|
// A string wrapper class that keeps track of the length of the string data.
|
|
// A more useful string class would refcount the data and have copy-on-write
|
|
// semantics (or use MFC's CString or STL's string classes).
|
|
|
|
class CStr
|
|
{
|
|
public:
|
|
const char* m_psz;
|
|
short m_cch;
|
|
bool m_fDynamic;
|
|
|
|
static int sm_cchMax;
|
|
|
|
CStr()
|
|
: m_psz(NULL),
|
|
m_cch(0),
|
|
m_fDynamic(false)
|
|
{
|
|
}
|
|
|
|
CStr(
|
|
const char* psz,
|
|
int cch,
|
|
bool fDynamic)
|
|
: m_psz(NULL),
|
|
m_cch(static_cast<short>(cch)),
|
|
m_fDynamic(fDynamic)
|
|
{
|
|
if (fDynamic)
|
|
Set(psz, cch);
|
|
else
|
|
m_psz = psz;
|
|
|
|
}
|
|
|
|
void Set(
|
|
const char* psz,
|
|
int cch)
|
|
{
|
|
delete [] const_cast<char*>(m_psz);
|
|
m_psz = new char[cch+1];
|
|
if (m_psz != NULL)
|
|
{
|
|
strcpy(const_cast<char*>(m_psz), psz);
|
|
m_cch = static_cast<short>(cch);
|
|
m_fDynamic = true;
|
|
sm_cchMax = max(m_cch, sm_cchMax);
|
|
}
|
|
else
|
|
{
|
|
m_cch = 0;
|
|
m_fDynamic = false;
|
|
}
|
|
}
|
|
|
|
~CStr()
|
|
{
|
|
if (m_fDynamic)
|
|
delete [] const_cast<char*>(m_psz);
|
|
}
|
|
};
|
|
|
|
#ifdef HASHTEST_STATIC_DATA
|
|
// length of longest string seen
|
|
int CStr::sm_cchMax = 0;
|
|
#endif
|
|
|
|
|
|
// a word from the data file, which contains one 'word' per line (may
|
|
// include spaces).
|
|
|
|
class CWord
|
|
{
|
|
public:
|
|
int m_cNotFound;
|
|
CStr m_str;
|
|
bool m_fInserted;
|
|
bool m_fIterated;
|
|
LONG m_cRefs;
|
|
LONG m_cInsertIfNotFounds;
|
|
int m_iIndex;
|
|
|
|
CWord()
|
|
: m_cNotFound(0),
|
|
m_fInserted(false),
|
|
m_fIterated(false),
|
|
m_cRefs(0),
|
|
m_cInsertIfNotFounds(0),
|
|
m_iIndex(-1)
|
|
{
|
|
}
|
|
|
|
~CWord()
|
|
{
|
|
IRTLASSERT(m_cRefs == 0);
|
|
#ifdef IRTLDEBUG
|
|
if (m_cRefs != 0)
|
|
IRTLTRACE(_TEXT("\"%hs\": %d, %d\n"), m_str.m_psz, m_cRefs,
|
|
m_cInsertIfNotFounds);
|
|
#endif
|
|
}
|
|
};
|
|
|
|
|
|
// globals
|
|
extern int g_nokeys ;
|
|
extern CWord g_wordtable[MAXKEYS];
|
|
|
|
const char*
|
|
LK_AddRefReasonCode2String(
|
|
LK_ADDREF_REASON lkar);
|
|
|
|
#define DO_REF_COUNT false
|
|
|
|
// A hash table of CWords, indexed by CStr*s.
|
|
class CWordHash
|
|
#ifndef LKR_PUBLIC_API
|
|
: public CTypedHashTable<CWordHash, CWord, const CStr*, DO_REF_COUNT, WordBaseTable>
|
|
#else
|
|
: public TypedLkrHashTable<CWordHash, CWord, const CStr*, DO_REF_COUNT>
|
|
#endif
|
|
{
|
|
public:
|
|
static bool sm_fCaseInsensitive;
|
|
static bool sm_fMemCmp;
|
|
static int sm_nLastChars;
|
|
static bool sm_fNonPagedAllocs;
|
|
static bool sm_fRefTrace;
|
|
static bool sm_fMultiKeys;
|
|
static bool sm_fUseLocks;
|
|
|
|
#ifndef LKR_PUBLIC_API
|
|
friend class CTypedHashTable<CWordHash, CWord, const CStr*,
|
|
DO_REF_COUNT, WordBaseTable>;
|
|
#else
|
|
friend class TypedLkrHashTable<CWordHash, CWord, const CStr*>;
|
|
#endif
|
|
|
|
static const CStr*
|
|
ExtractKey(const CWord* pKey)
|
|
{
|
|
return &pKey->m_str;
|
|
}
|
|
|
|
static DWORD
|
|
CalcKeyHash(const CStr* pstrKey)
|
|
{
|
|
const char* psz = pstrKey->m_psz;
|
|
|
|
// use only the last few chars instead of whole string?
|
|
if (sm_nLastChars > 0 && pstrKey->m_cch >= sm_nLastChars)
|
|
psz = pstrKey->m_psz + pstrKey->m_cch - sm_nLastChars;
|
|
|
|
IRTLASSERT(pstrKey->m_psz <= psz
|
|
&& psz < pstrKey->m_psz + pstrKey->m_cch);
|
|
|
|
if (sm_fCaseInsensitive)
|
|
return HashStringNoCase(psz, pstrKey->m_cch);
|
|
else
|
|
return HashString(psz, pstrKey->m_cch);
|
|
}
|
|
|
|
static int
|
|
CompareKeys(const CStr* pstrKey1, const CStr* pstrKey2)
|
|
{
|
|
int nCmp;
|
|
|
|
if (sm_fCaseInsensitive)
|
|
{
|
|
#if 1
|
|
// if (sm_fMultiKeys)
|
|
{
|
|
// Hack that works for ASCII data
|
|
nCmp = (pstrKey1->m_psz[0] & 0xDF)
|
|
- (pstrKey2->m_psz[0] & 0xDF);
|
|
|
|
if (nCmp != 0)
|
|
return nCmp;
|
|
}
|
|
#endif
|
|
// nCmp = pstrKey1->m_cch - pstrKey2->m_cch;
|
|
|
|
// if (nCmp != 0)
|
|
// return nCmp;
|
|
if (sm_fMemCmp)
|
|
return _memicmp(pstrKey1->m_psz, pstrKey2->m_psz,
|
|
pstrKey1->m_cch);
|
|
else
|
|
return _stricmp(pstrKey1->m_psz, pstrKey2->m_psz);
|
|
}
|
|
else
|
|
{
|
|
#if 1
|
|
// if (sm_fMultiKeys)
|
|
{
|
|
nCmp = pstrKey1->m_psz[0] - pstrKey2->m_psz[0];
|
|
|
|
if (nCmp != 0)
|
|
return nCmp;
|
|
}
|
|
#endif
|
|
// nCmp = pstrKey1->m_cch - pstrKey2->m_cch;
|
|
|
|
// if (nCmp != 0)
|
|
// return nCmp;
|
|
if (sm_fMemCmp)
|
|
return memcmp(pstrKey1->m_psz, pstrKey2->m_psz,
|
|
pstrKey1->m_cch);
|
|
else
|
|
return strcmp(pstrKey1->m_psz, pstrKey2->m_psz);
|
|
}
|
|
}
|
|
|
|
static LONG
|
|
AddRefRecord(CWord* pRec, LK_ADDREF_REASON lkar)
|
|
{
|
|
int nIncr = (lkar < 0) ? -1 : +1;
|
|
LONG cRefs = nIncr + InterlockedExchangeAdd(&pRec->m_cRefs, nIncr);
|
|
if (sm_fRefTrace)
|
|
IRTLTRACE(_TEXT("\tAddRef key(%d, %p: \"%hs\"), %hs (%s), = %d\n"),
|
|
pRec - g_wordtable, pRec, pRec->m_str.m_psz,
|
|
(lkar > 0) ? "+1" : "-1",
|
|
LKR_AddRefReasonAsString(lkar), cRefs);
|
|
IRTLASSERT(cRefs >= 0);
|
|
return cRefs;
|
|
}
|
|
|
|
CWordHash(
|
|
unsigned maxload, // Bound on avg chain length
|
|
size_t initsize, // Initial size of hash table.
|
|
size_t num_subtbls // #subordinate hash tables.
|
|
)
|
|
#ifndef LKR_PUBLIC_API
|
|
: CTypedHashTable<CWordHash, CWord, const CStr*,
|
|
DO_REF_COUNT, WordBaseTable>
|
|
("wordhash", maxload, initsize, num_subtbls,
|
|
sm_fMultiKeys, sm_fUseLocks
|
|
# ifdef LKRHASH_KERNEL_MODE
|
|
, sm_fNonPagedAllocs // use paged or NP pool
|
|
# endif
|
|
)
|
|
#else // LKR_PUBLIC_API
|
|
: TypedLkrHashTable<CWordHash, CWord, const CStr*>
|
|
("wordhash", (LK_TABLESIZE) initsize,
|
|
sm_fMultiKeys, sm_fUseLocks)
|
|
#endif // LKR_PUBLIC_API
|
|
{}
|
|
|
|
template <class _InputIterator>
|
|
CWordHash(
|
|
_InputIterator f,
|
|
_InputIterator l,
|
|
unsigned maxload, // Bound on avg chain length
|
|
size_t initsize, // Initial size of hash table.
|
|
size_t num_subtbls // #subordinate hash tables.
|
|
)
|
|
#ifndef LKR_PUBLIC_API
|
|
: CTypedHashTable<CWordHash, CWord, const CStr*,
|
|
DO_REF_COUNT, WordBaseTable>
|
|
(f, l, "wordhash", maxload, initsize, num_subtbls)
|
|
#else
|
|
: TypedLkrHashTable<CWordHash, CWord, const CStr*>
|
|
(f, l, "wordhash", (LK_TABLESIZE) initsize)
|
|
#endif
|
|
{}
|
|
|
|
static const TCHAR*
|
|
HashMethod()
|
|
{
|
|
TCHAR tszLast[20];
|
|
static TCHAR s_tsz[80];
|
|
|
|
if (sm_nLastChars > 0)
|
|
_stprintf(tszLast, _TEXT("last %d"), sm_nLastChars);
|
|
else
|
|
_tcscpy(tszLast, _TEXT("all"));
|
|
|
|
_stprintf(s_tsz, _TEXT("case-%ssensitive, %scmp, %s chars"),
|
|
sm_fCaseInsensitive ? _TEXT("in") : _TEXT(""),
|
|
sm_fMemCmp ? _TEXT("mem") : _TEXT("str"),
|
|
tszLast);
|
|
|
|
return s_tsz;
|
|
}
|
|
|
|
#ifdef LKR_PUBLIC_API
|
|
typedef CLKRHashTable BaseHashTable;
|
|
typedef BaseHashTable::TableLock TableLock;
|
|
typedef BaseHashTable::BucketLock BucketLock;
|
|
|
|
enum {
|
|
NODES_PER_CLUMP = BaseHashTable::NODES_PER_CLUMP,
|
|
};
|
|
|
|
static const TCHAR* ClassName()
|
|
{return _TEXT("PLkrHashTable");}
|
|
|
|
static LK_TABLESIZE NumSubTables(DWORD& rinitsize, DWORD& rnum_subtbls)
|
|
{ return BaseHashTable::NumSubTables(rinitsize, rnum_subtbls); }
|
|
|
|
int NumSubTables() const
|
|
{ return reinterpret_cast<BaseHashTable*>(m_plkr)->NumSubTables(); }
|
|
|
|
void SetTableLockSpinCount(WORD wSpins)
|
|
{ reinterpret_cast<BaseHashTable*>(m_plkr)->SetTableLockSpinCount(wSpins);}
|
|
|
|
void SetBucketLockSpinCount(WORD wSpins)
|
|
{ reinterpret_cast<BaseHashTable*>(m_plkr)->SetBucketLockSpinCount(wSpins);}
|
|
|
|
CLKRHashTableStats GetStatistics() const
|
|
{ return reinterpret_cast<BaseHashTable*>(m_plkr)->GetStatistics();}
|
|
|
|
#endif // LKR_PUBLIC_API
|
|
|
|
protected:
|
|
~CWordHash() {}
|
|
|
|
private:
|
|
CWordHash(const CWordHash&);
|
|
CWordHash& operator=(const CWordHash&);
|
|
};
|
|
|
|
#endif // __WORDHASH_H__
|