Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
6.2 KiB

  1. /*++
  2. Copyright (c) 1998-2001 Microsoft Corporation
  3. Module Name :
  4. hashfn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKRhash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. Internet Information Server RunTime Library
  14. Revision History:
  15. Paul McDaniel (paulmcd) Feb-05-1999 Trimmed for kernel mode
  16. and C (not C++)
  17. --*/
  18. #ifndef __HASHFN_H__
  19. #define __HASHFN_H__
  20. #include <math.h>
  21. #include <limits.h>
  22. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  23. // Applying this to the results of the other hash functions is likely to
  24. // produce a much better distribution, especially for the identity hash
  25. // functions such as Hash(char c), where records will tend to cluster at
  26. // the low end of the hashtable otherwise. LKRhash applies this internally
  27. // to all hash signatures for exactly this reason.
  28. // __inline ULONG
  29. // HashScramble(ULONG dwHash)
  30. // {
  31. // // Here are 10 primes slightly greater than 10^9
  32. // // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  33. // // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  34. //
  35. // // default value for "scrambling constant"
  36. // const ULONG RANDOM_CONSTANT = 314159269UL;
  37. // // large prime number, also used for scrambling
  38. // const ULONG RANDOM_PRIME = 1000000007UL;
  39. //
  40. // return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  41. // }
  42. //
  43. // Given M = A % B, A and B unsigned 32-bit integers greater than zero,
  44. // there are no values of A or B which yield M = 2^32-1. Why? Because
  45. // M must be less than B.
  46. // #define HASH_INVALID_SIGNATURE ULONG_MAX
  47. // No number in 0..2^31-1 maps to this number after it has been
  48. // scrambled by HashRandomizeBits
  49. #define HASH_INVALID_SIGNATURE 31678523
  50. // Faster scrambling function suggested by Eric Jacobsen
  51. __inline ULONG
  52. HashRandomizeBits(ULONG dw)
  53. {
  54. const ULONG dwLo = ((dw * 1103515245 + 12345) >> 16);
  55. const ULONG dwHi = ((dw * 69069 + 1) & 0xffff0000);
  56. const ULONG dw2 = dwHi | dwLo;
  57. ASSERT(dw2 != HASH_INVALID_SIGNATURE);
  58. return dw2;
  59. }
  60. // Small prime number used as a multiplier in the supplied hash functions
  61. #define HASH_MULTIPLIER 101
  62. #undef HASH_SHIFT_MULTIPLY
  63. #ifdef HASH_SHIFT_MULTIPLY
  64. // 127 = 2^7 - 1 is prime
  65. # define HASH_MULTIPLY(dw) (((dw) << 7) - (dw))
  66. #else
  67. # define HASH_MULTIPLY(dw) ((dw) * HASH_MULTIPLIER)
  68. #endif
  69. // Fast, simple hash function that tends to give a good distribution.
  70. // Apply HashScramble to the result if you're using this for something
  71. // other than LKHash.
  72. __inline ULONG
  73. HashStringA(
  74. const char* psz,
  75. ULONG dwHash)
  76. {
  77. // force compiler to use unsigned arithmetic
  78. const unsigned char* upsz = (const unsigned char*) psz;
  79. for ( ; *upsz != '\0'; ++upsz)
  80. dwHash = HASH_MULTIPLY(dwHash) + *upsz;
  81. return dwHash;
  82. }
  83. // Unicode version of above
  84. __inline ULONG
  85. HashStringW(
  86. const wchar_t* pwsz,
  87. ULONG dwHash)
  88. {
  89. for ( ; *pwsz != L'\0'; ++pwsz)
  90. dwHash = HASH_MULTIPLY(dwHash) + *pwsz;
  91. return dwHash;
  92. }
  93. __inline ULONG
  94. HashCharW(
  95. WCHAR UnicodeChar,
  96. ULONG Hash
  97. )
  98. {
  99. return HASH_MULTIPLY(Hash) + UnicodeChar;
  100. }
  101. // Quick-'n'-dirty case-insensitive string hash function.
  102. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  103. // also cache the length of strings and check those first. Caching
  104. // an uppercase version of a string can help too.
  105. // Again, apply HashScramble to the result if using with something other
  106. // than LKHash.
  107. // Note: this is not really adequate for MBCS strings.
  108. __inline ULONG
  109. HashStringNoCaseA(
  110. const char* psz,
  111. ULONG dwHash)
  112. {
  113. const unsigned char* upsz = (const unsigned char*) psz;
  114. for ( ; *upsz != '\0'; ++upsz)
  115. dwHash = HASH_MULTIPLY(dwHash)
  116. + (*upsz & 0xDF); // strip off lowercase bit
  117. return dwHash;
  118. }
  119. // Unicode version of above
  120. __inline ULONG
  121. HashStringNoCaseW(
  122. const wchar_t* pwsz,
  123. ULONG dwHash)
  124. {
  125. for ( ; *pwsz != L'\0'; ++pwsz)
  126. dwHash = HASH_MULTIPLY(dwHash) + RtlUpcaseUnicodeChar(*pwsz);
  127. return dwHash;
  128. }
  129. __inline ULONG
  130. HashCharNoCaseW(
  131. WCHAR UnicodeChar,
  132. ULONG Hash
  133. )
  134. {
  135. return HASH_MULTIPLY(Hash) + RtlUpcaseUnicodeChar(UnicodeChar);
  136. }
  137. // HashBlob returns the hash of a blob of arbitrary binary data.
  138. //
  139. // Warning: HashBlob is generally not the right way to hash a class object.
  140. // Consider:
  141. // class CFoo {
  142. // public:
  143. // char m_ch;
  144. // double m_d;
  145. // char* m_psz;
  146. // };
  147. //
  148. // inline ULONG Hash(const CFoo& rFoo)
  149. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  150. //
  151. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  152. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  153. // of doubles, which will be filled with random data (usually non-zero for
  154. // stack variables), and (b) it hashes the address (rather than the
  155. // contents) of the string m_psz. Similarly,
  156. //
  157. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  158. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  159. //
  160. // does the wrong thing. Much better to do this:
  161. //
  162. // ULONG Hash(const CFoo& rFoo)
  163. // {
  164. // return HashString(rFoo.m_psz,
  165. // 37 * Hash(rFoo.m_ch) + Hash(rFoo.m_d));
  166. // }
  167. //
  168. // Again, apply HashScramble if using with something other than LKHash.
  169. __inline ULONG
  170. HashBlob(
  171. PUCHAR pb,
  172. ULONG cb,
  173. ULONG dwHash)
  174. {
  175. while (cb-- > 0)
  176. dwHash = HASH_MULTIPLY(dwHash) + *pb++;
  177. return dwHash;
  178. }
  179. // ======= <snip>
  180. //
  181. // paulmcd: a bunch snipped due to use of overloading, not allowed in C
  182. //
  183. // ======= <snip>
  184. __inline ULONG HashDouble(double dbl)
  185. {
  186. int nExponent;
  187. double dblMantissa;
  188. if (dbl == 0.0)
  189. return 0;
  190. dblMantissa = frexp(dbl, &nExponent);
  191. // 0.5 <= |mantissa| < 1.0
  192. return (ULONG) ((2.0 * fabs(dblMantissa) - 1.0) * UINT_MAX);
  193. }
  194. __inline ULONG HashFloat(float f)
  195. { return HashDouble((double) f); }
  196. #endif // __HASHFN_H__