/*++ Copyright (c) 1998-2002 Microsoft Corporation Module Name : hashfn.h Abstract: Declares and defines a collection of overloaded hash functions. It is strongly suggested that you use these functions with LKRhash. Author: George V. Reilly (GeorgeRe) 06-Jan-1998 Environment: Win32 - User Mode Project: Internet Information Server RunTime Library Revision History: Paul McDaniel (paulmcd) Feb-05-1999 Trimmed for kernel mode and C (not C++) --*/ #ifndef __HASHFN_H__ #define __HASHFN_H__ #include #include extern WCHAR FastUpcaseChars[256]; #define UPCASE_UNICODE_CHAR( wc ) \ (wc < 256 ? FastUpcaseChars[(UCHAR)(wc)] : RtlUpcaseUnicodeChar(wc)) // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1. // Applying this to the results of the other hash functions is likely to // produce a much better distribution, especially for the identity hash // functions such as Hash(char c), where records will tend to cluster at // the low end of the hashtable otherwise. LKRhash applies this internally // to all hash signatures for exactly this reason. // __inline ULONG // HashScramble(ULONG dwHash) // { // // Here are 10 primes slightly greater than 10^9 // // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087, // // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181. // // // default value for "scrambling constant" // const ULONG RANDOM_CONSTANT = 314159269UL; // // large prime number, also used for scrambling // const ULONG RANDOM_PRIME = 1000000007UL; // // return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ; // } // // Given M = A % B, A and B unsigned 32-bit integers greater than zero, // there are no values of A or B which yield M = 2^32-1. Why? Because // M must be less than B. // #define HASH_INVALID_SIGNATURE ULONG_MAX // No number in 0..2^31-1 maps to this number after it has been // scrambled by HashRandomizeBits #define HASH_INVALID_SIGNATURE 31678523 // Faster scrambling function suggested by Eric Jacobsen __inline ULONG HashRandomizeBits(ULONG dw) { const ULONG dwLo = ((dw * 1103515245 + 12345) >> 16); const ULONG dwHi = ((dw * 69069 + 1) & 0xffff0000); const ULONG dw2 = dwHi | dwLo; ASSERT(dw2 != HASH_INVALID_SIGNATURE); return dw2; } // Small prime number used as a multiplier in the supplied hash functions #define HASH_MULTIPLIER 101 #undef HASH_SHIFT_MULTIPLY #ifdef HASH_SHIFT_MULTIPLY // 127 = 2^7 - 1 is prime # define HASH_MULTIPLY(dw) (((dw) << 7) - (dw)) #else # define HASH_MULTIPLY(dw) ((dw) * HASH_MULTIPLIER) #endif // Fast, simple hash function that tends to give a good distribution. // Apply HashScramble to the result if you're using this for something // other than LKHash. __inline ULONG HashStringA( const char* psz, ULONG dwHash) { // force compiler to use unsigned arithmetic const unsigned char* upsz = (const unsigned char*) psz; for ( ; *upsz != '\0'; ++upsz) dwHash = HASH_MULTIPLY(dwHash) + *upsz; return dwHash; } // Unicode version of above __inline ULONG HashStringW( const wchar_t* pwsz, ULONG dwHash) { for ( ; *pwsz != L'\0'; ++pwsz) dwHash = HASH_MULTIPLY(dwHash) + *pwsz; return dwHash; } __inline ULONG HashCharW( WCHAR UnicodeChar, ULONG Hash ) { return HASH_MULTIPLY(Hash) + UnicodeChar; } // Quick-'n'-dirty case-insensitive string hash function. // Make sure that you follow up with _stricmp or _mbsicmp. You should // also cache the length of strings and check those first. Caching // an uppercase version of a string can help too. // Again, apply HashScramble to the result if using with something other // than LKHash. // Note: this is not really adequate for MBCS strings. __inline ULONG HashStringNoCaseA( const char* psz, ULONG dwHash) { const unsigned char* upsz = (const unsigned char*) psz; for ( ; *upsz != '\0'; ++upsz) dwHash = HASH_MULTIPLY(dwHash) + (*upsz & 0xDF); // strip off lowercase bit return dwHash; } // Unicode version of above __inline ULONG HashStringNoCaseW( const wchar_t* pwsz, ULONG dwHash) { for ( ; *pwsz != L'\0'; ++pwsz) dwHash = HASH_MULTIPLY(dwHash) + UPCASE_UNICODE_CHAR(*pwsz); return dwHash; } __inline ULONG HashStringsNoCaseW( const wchar_t* pwsz1, const wchar_t* pwsz2, ULONG dwHash) { for ( ; *pwsz1 != L'\0'; ++pwsz1) dwHash = HASH_MULTIPLY(dwHash) + UPCASE_UNICODE_CHAR(*pwsz1); for ( ; *pwsz2 != L'\0'; ++pwsz2) dwHash = HASH_MULTIPLY(dwHash) + UPCASE_UNICODE_CHAR(*pwsz2); return dwHash; } __inline ULONG HashCharNoCaseW( WCHAR UnicodeChar, ULONG Hash ) { return HASH_MULTIPLY(Hash) + UPCASE_UNICODE_CHAR(UnicodeChar); } // HashBlob returns the hash of a blob of arbitrary binary data. // // Warning: HashBlob is generally not the right way to hash a class object. // Consider: // class CFoo { // public: // char m_ch; // double m_d; // char* m_psz; // }; // // inline ULONG Hash(const CFoo& rFoo) // { return HashBlob(&rFoo, sizeof(CFoo)); } // // This is the wrong way to hash a CFoo for two reasons: (a) there will be // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions // of doubles, which will be filled with random data (usually non-zero for // stack variables), and (b) it hashes the address (rather than the // contents) of the string m_psz. Similarly, // // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2) // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; } // // does the wrong thing. Much better to do this: // // ULONG Hash(const CFoo& rFoo) // { // return HashString(rFoo.m_psz, // 37 * Hash(rFoo.m_ch) + Hash(rFoo.m_d)); // } // // Again, apply HashScramble if using with something other than LKHash. __inline ULONG HashBlob( PUCHAR pb, ULONG cb, ULONG dwHash) { while (cb-- > 0) dwHash = HASH_MULTIPLY(dwHash) + *pb++; return dwHash; } // ======= // // paulmcd: a bunch snipped due to use of overloading, not allowed in C // // ======= __inline ULONG HashDouble(double dbl) { int nExponent; double dblMantissa; if (dbl == 0.0) return 0; dblMantissa = frexp(dbl, &nExponent); // 0.5 <= |mantissa| < 1.0 return (ULONG) ((2.0 * fabs(dblMantissa) - 1.0) * UINT_MAX); } __inline ULONG HashFloat(float f) { return HashDouble((double) f); } #endif // __HASHFN_H__