/*++ Copyright (c) 1998-2001 Microsoft Corporation Module Name : WordHash.h Abstract: LKRhash test harness: hash table of words Author: George V. Reilly (GeorgeRe) 06-Jan-1998 Environment: Win32 - User Mode Project: LKRhash Revision History: --*/ #ifndef __WORDHASH_H__ #define __WORDHASH_H__ #define MAXKEYS 1000000 #define MAX_THREADS MAXIMUM_WAIT_OBJECTS #define MAX_STRSIZE 300 #ifdef LKR_PUBLIC_API typedef PLkrHashTable WordBaseTable; #include #endif // LKR_PUBLIC_API #ifndef __LKRHASH_NO_NAMESPACE__ #define LKRHASH_NS LKRhash using namespace LKRhash; #else #define LKRHASH_NS #endif // !__LKRHASH_NO_NAMESPACE__ #ifndef __HASHFN_NO_NAMESPACE__ using namespace HashFn; #endif // !__HASHFN_NO_NAMESPACE__ #ifndef LKR_PUBLIC_API typedef LKRHASH_NS::CLKRHashTable WordBaseTable; // typedef LKRHASH_NS::CLKRLinearHashTable WordBaseTable; #endif // A string wrapper class that keeps track of the length of the string data. // A more useful string class would refcount the data and have copy-on-write // semantics (or use MFC's CString or STL's string classes). class CStr { public: const char* m_psz; short m_cch; bool m_fDynamic; static int sm_cchMax; CStr() : m_psz(NULL), m_cch(0), m_fDynamic(false) { } CStr( const char* psz, int cch, bool fDynamic) : m_psz(NULL), m_cch(static_cast(cch)), m_fDynamic(fDynamic) { if (fDynamic) Set(psz, cch); else m_psz = psz; } void Set( const char* psz, int cch) { delete [] const_cast(m_psz); m_psz = new char[cch+1]; if (m_psz != NULL) { strcpy(const_cast(m_psz), psz); m_cch = static_cast(cch); m_fDynamic = true; sm_cchMax = max(m_cch, sm_cchMax); } else { m_cch = 0; m_fDynamic = false; } } ~CStr() { if (m_fDynamic) delete [] const_cast(m_psz); } }; #ifdef HASHTEST_STATIC_DATA // length of longest string seen int CStr::sm_cchMax = 0; #endif // a word from the data file, which contains one 'word' per line (may // include spaces). class CWord { public: int m_cNotFound; CStr m_str; bool m_fInserted; bool m_fIterated; LONG m_cRefs; LONG m_cInsertIfNotFounds; int m_iIndex; CWord() : m_cNotFound(0), m_fInserted(false), m_fIterated(false), m_cRefs(0), m_cInsertIfNotFounds(0), m_iIndex(-1) { } ~CWord() { IRTLASSERT(m_cRefs == 0); #ifdef IRTLDEBUG if (m_cRefs != 0) IRTLTRACE(_TEXT("\"%hs\": %d, %d\n"), m_str.m_psz, m_cRefs, m_cInsertIfNotFounds); #endif } }; // globals extern int g_nokeys ; extern CWord g_wordtable[MAXKEYS]; const char* LK_AddRefReasonCode2String( LK_ADDREF_REASON lkar); #define DO_REF_COUNT false // A hash table of CWords, indexed by CStr*s. class CWordHash #ifndef LKR_PUBLIC_API : public CTypedHashTable #else : public TypedLkrHashTable #endif { public: static bool sm_fCaseInsensitive; static bool sm_fMemCmp; static int sm_nLastChars; static bool sm_fNonPagedAllocs; static bool sm_fRefTrace; static bool sm_fMultiKeys; static bool sm_fUseLocks; #ifndef LKR_PUBLIC_API friend class CTypedHashTable; #else friend class TypedLkrHashTable; #endif static const CStr* ExtractKey(const CWord* pKey) { return &pKey->m_str; } static DWORD CalcKeyHash(const CStr* pstrKey) { const char* psz = pstrKey->m_psz; // use only the last few chars instead of whole string? if (sm_nLastChars > 0 && pstrKey->m_cch >= sm_nLastChars) psz = pstrKey->m_psz + pstrKey->m_cch - sm_nLastChars; IRTLASSERT(pstrKey->m_psz <= psz && psz < pstrKey->m_psz + pstrKey->m_cch); if (sm_fCaseInsensitive) return HashStringNoCase(psz, pstrKey->m_cch); else return HashString(psz, pstrKey->m_cch); } static int CompareKeys(const CStr* pstrKey1, const CStr* pstrKey2) { int nCmp; if (sm_fCaseInsensitive) { #if 1 // if (sm_fMultiKeys) { // Hack that works for ASCII data nCmp = (pstrKey1->m_psz[0] & 0xDF) - (pstrKey2->m_psz[0] & 0xDF); if (nCmp != 0) return nCmp; } #endif // nCmp = pstrKey1->m_cch - pstrKey2->m_cch; // if (nCmp != 0) // return nCmp; if (sm_fMemCmp) return _memicmp(pstrKey1->m_psz, pstrKey2->m_psz, pstrKey1->m_cch); else return _stricmp(pstrKey1->m_psz, pstrKey2->m_psz); } else { #if 1 // if (sm_fMultiKeys) { nCmp = pstrKey1->m_psz[0] - pstrKey2->m_psz[0]; if (nCmp != 0) return nCmp; } #endif // nCmp = pstrKey1->m_cch - pstrKey2->m_cch; // if (nCmp != 0) // return nCmp; if (sm_fMemCmp) return memcmp(pstrKey1->m_psz, pstrKey2->m_psz, pstrKey1->m_cch); else return strcmp(pstrKey1->m_psz, pstrKey2->m_psz); } } static LONG AddRefRecord(CWord* pRec, LK_ADDREF_REASON lkar) { int nIncr = (lkar < 0) ? -1 : +1; LONG cRefs = nIncr + InterlockedExchangeAdd(&pRec->m_cRefs, nIncr); if (sm_fRefTrace) IRTLTRACE(_TEXT("\tAddRef key(%d, %p: \"%hs\"), %hs (%s), = %d\n"), pRec - g_wordtable, pRec, pRec->m_str.m_psz, (lkar > 0) ? "+1" : "-1", LKR_AddRefReasonAsString(lkar), cRefs); IRTLASSERT(cRefs >= 0); return cRefs; } CWordHash( unsigned maxload, // Bound on avg chain length size_t initsize, // Initial size of hash table. size_t num_subtbls // #subordinate hash tables. ) #ifndef LKR_PUBLIC_API : CTypedHashTable ("wordhash", maxload, initsize, num_subtbls, sm_fMultiKeys, sm_fUseLocks # ifdef LKRHASH_KERNEL_MODE , sm_fNonPagedAllocs // use paged or NP pool # endif ) #else // LKR_PUBLIC_API : TypedLkrHashTable ("wordhash", (LK_TABLESIZE) initsize, sm_fMultiKeys, sm_fUseLocks) #endif // LKR_PUBLIC_API {} template CWordHash( _InputIterator f, _InputIterator l, unsigned maxload, // Bound on avg chain length size_t initsize, // Initial size of hash table. size_t num_subtbls // #subordinate hash tables. ) #ifndef LKR_PUBLIC_API : CTypedHashTable (f, l, "wordhash", maxload, initsize, num_subtbls) #else : TypedLkrHashTable (f, l, "wordhash", (LK_TABLESIZE) initsize) #endif {} static const TCHAR* HashMethod() { TCHAR tszLast[20]; static TCHAR s_tsz[80]; if (sm_nLastChars > 0) _stprintf(tszLast, _TEXT("last %d"), sm_nLastChars); else _tcscpy(tszLast, _TEXT("all")); _stprintf(s_tsz, _TEXT("case-%ssensitive, %scmp, %s chars"), sm_fCaseInsensitive ? _TEXT("in") : _TEXT(""), sm_fMemCmp ? _TEXT("mem") : _TEXT("str"), tszLast); return s_tsz; } #ifdef LKR_PUBLIC_API typedef CLKRHashTable BaseHashTable; typedef BaseHashTable::TableLock TableLock; typedef BaseHashTable::BucketLock BucketLock; enum { NODES_PER_CLUMP = BaseHashTable::NODES_PER_CLUMP, }; static const TCHAR* ClassName() {return _TEXT("PLkrHashTable");} static LK_TABLESIZE NumSubTables(DWORD& rinitsize, DWORD& rnum_subtbls) { return BaseHashTable::NumSubTables(rinitsize, rnum_subtbls); } int NumSubTables() const { return reinterpret_cast(m_plkr)->NumSubTables(); } void SetTableLockSpinCount(WORD wSpins) { reinterpret_cast(m_plkr)->SetTableLockSpinCount(wSpins);} void SetBucketLockSpinCount(WORD wSpins) { reinterpret_cast(m_plkr)->SetBucketLockSpinCount(wSpins);} CLKRHashTableStats GetStatistics() const { return reinterpret_cast(m_plkr)->GetStatistics();} #endif // LKR_PUBLIC_API protected: ~CWordHash() {} private: CWordHash(const CWordHash&); CWordHash& operator=(const CWordHash&); }; #endif // __WORDHASH_H__