Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

284 lines
6.7 KiB

  1. /*++
  2. Copyright (c) 1998-2000 Microsoft Corporation
  3. Module Name :
  4. hashfn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKRHash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. Internet Information Server RunTime Library
  14. Revision History:
  15. --*/
  16. #ifndef __HASHFN_H__
  17. #define __HASHFN_H__
  18. #ifdef __HASHFN_NAMESPACE__
  19. namespace HashFn {
  20. #endif // __HASHFN_NAMESPACE__
  21. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  22. // Applying this to the results of the other hash functions is likely to
  23. // produce a much better distribution, especially for the identity hash
  24. // functions such as Hash(char c), where records will tend to cluster at
  25. // the low end of the hashtable otherwise. LKRHash applies this internally
  26. // to all hash signatures for exactly this reason.
  27. inline DWORD
  28. HashScramble(DWORD dwHash)
  29. {
  30. // Here are 10 primes slightly greater than 10^9
  31. // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  32. // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  33. // default value for "scrambling constant"
  34. const DWORD RANDOM_CONSTANT = 314159269UL;
  35. // large prime number, also used for scrambling
  36. const DWORD RANDOM_PRIME = 1000000007UL;
  37. return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  38. }
  39. // Faster scrambling function suggested by Eric Jacobsen
  40. inline DWORD
  41. RandomizeBits(DWORD dw)
  42. {
  43. return (((dw * 1103515245 + 12345) >> 16)
  44. | ((dw * 69069 + 1) & 0xffff0000));
  45. }
  46. // Small prime number used as a multiplier in the supplied hash functions
  47. const DWORD HASH_MULTIPLIER = 101;
  48. #undef HASH_SHIFT_MULTIPLY
  49. #ifdef HASH_SHIFT_MULTIPLY
  50. # define HASH_MULTIPLY(dw) (((dw) << 5) - (dw))
  51. #else
  52. # define HASH_MULTIPLY(dw) ((dw) * HASH_MULTIPLIER)
  53. #endif
  54. // Fast, simple hash function that tends to give a good distribution.
  55. // Apply HashScramble to the result if you're using this for something
  56. // other than LKRHash.
  57. inline DWORD
  58. HashString(
  59. const char* psz,
  60. DWORD dwHash = 0)
  61. {
  62. // force compiler to use unsigned arithmetic
  63. const unsigned char* upsz = (const unsigned char*) psz;
  64. for ( ; *upsz; ++upsz)
  65. dwHash = HASH_MULTIPLY(dwHash) + *upsz;
  66. return dwHash;
  67. }
  68. // Unicode version of above
  69. inline DWORD
  70. HashString(
  71. const wchar_t* pwsz,
  72. DWORD dwHash = 0)
  73. {
  74. for ( ; *pwsz; ++pwsz)
  75. dwHash = HASH_MULTIPLY(dwHash) + *pwsz;
  76. return dwHash;
  77. }
  78. // Quick-'n'-dirty case-insensitive string hash function.
  79. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  80. // also cache the length of strings and check those first. Caching
  81. // an uppercase version of a string can help too.
  82. // Again, apply HashScramble to the result if using with something other
  83. // than LKRHash.
  84. // Note: this is not really adequate for MBCS strings.
  85. inline DWORD
  86. HashStringNoCase(
  87. const char* psz,
  88. DWORD dwHash = 0)
  89. {
  90. const unsigned char* upsz = (const unsigned char*) psz;
  91. for ( ; *upsz; ++upsz)
  92. dwHash = HASH_MULTIPLY(dwHash)
  93. + (*upsz & 0xDF); // strip off lowercase bit
  94. return dwHash;
  95. }
  96. // Unicode version of above
  97. inline DWORD
  98. HashStringNoCase(
  99. const wchar_t* pwsz,
  100. DWORD dwHash = 0)
  101. {
  102. for ( ; *pwsz; ++pwsz)
  103. dwHash = HASH_MULTIPLY(dwHash) + (*pwsz & 0xFFDF);
  104. return dwHash;
  105. }
  106. // HashBlob returns the hash of a blob of arbitrary binary data.
  107. //
  108. // Warning: HashBlob is generally not the right way to hash a class object.
  109. // Consider:
  110. // class CFoo {
  111. // public:
  112. // char m_ch;
  113. // double m_d;
  114. // char* m_psz;
  115. // };
  116. //
  117. // inline DWORD Hash(const CFoo& rFoo)
  118. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  119. //
  120. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  121. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  122. // of doubles, which will be filled with random data (usually non-zero for
  123. // stack variables), and (b) it hashes the address (rather than the
  124. // contents) of the string m_psz. Similarly,
  125. //
  126. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  127. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  128. //
  129. // does the wrong thing. Much better to do this:
  130. //
  131. // DWORD Hash(const CFoo& rFoo)
  132. // {
  133. // return HashString(rFoo.m_psz,
  134. // HASH_MULTIPLIER * Hash(rFoo.m_ch)
  135. // + Hash(rFoo.m_d));
  136. // }
  137. //
  138. // Again, apply HashScramble if using with something other than LKRHash.
  139. inline DWORD
  140. HashBlob(
  141. const void* pv,
  142. size_t cb,
  143. DWORD dwHash = 0)
  144. {
  145. LPBYTE pb = static_cast<LPBYTE>(const_cast<void*>(pv));
  146. while (cb-- > 0)
  147. dwHash = HASH_MULTIPLY(dwHash) + *pb++;
  148. return dwHash;
  149. }
  150. //
  151. // Overloaded hash functions for all the major builtin types.
  152. // Again, apply HashScramble to result if using with something other than
  153. // LKRHash.
  154. //
  155. inline DWORD Hash(const char* psz)
  156. { return HashString(psz); }
  157. inline DWORD Hash(const unsigned char* pusz)
  158. { return HashString(reinterpret_cast<const char*>(pusz)); }
  159. inline DWORD Hash(const signed char* pssz)
  160. { return HashString(reinterpret_cast<const char*>(pssz)); }
  161. inline DWORD Hash(const wchar_t* pwsz)
  162. { return HashString(pwsz); }
  163. inline DWORD
  164. Hash(
  165. const GUID* pguid,
  166. DWORD dwHash = 0)
  167. {
  168. DWORD* pdw = reinterpret_cast<DWORD*>(const_cast<GUID*>(pguid));
  169. dwHash = HASH_MULTIPLY(dwHash) + *pdw++;
  170. dwHash = HASH_MULTIPLY(dwHash) + *pdw++;
  171. dwHash = HASH_MULTIPLY(dwHash) + *pdw++;
  172. dwHash = HASH_MULTIPLY(dwHash) + *pdw;
  173. return dwHash;
  174. }
  175. // Identity hash functions: scalar values map to themselves
  176. inline DWORD Hash(char c)
  177. { return c; }
  178. inline DWORD Hash(unsigned char uc)
  179. { return uc; }
  180. inline DWORD Hash(signed char sc)
  181. { return sc; }
  182. inline DWORD Hash(short sh)
  183. { return sh; }
  184. inline DWORD Hash(unsigned short ush)
  185. { return ush; }
  186. inline DWORD Hash(int i)
  187. { return i; }
  188. inline DWORD Hash(unsigned int u)
  189. { return u; }
  190. inline DWORD Hash(long l)
  191. { return l; }
  192. inline DWORD Hash(unsigned long ul)
  193. { return ul; }
  194. inline DWORD Hash(float f)
  195. {
  196. // be careful of rounding errors when computing keys
  197. union {
  198. float f;
  199. DWORD dw;
  200. } u;
  201. u.f = f;
  202. return u.dw;
  203. }
  204. inline DWORD Hash(double dbl)
  205. {
  206. // be careful of rounding errors when computing keys
  207. union {
  208. double dbl;
  209. DWORD dw[2];
  210. } u;
  211. u.dbl = dbl;
  212. return u.dw[0] * HASH_MULTIPLIER + u.dw[1];
  213. }
  214. #ifdef __HASHFN_NAMESPACE__
  215. }
  216. #endif // __HASHFN_NAMESPACE__
  217. #endif // __HASHFN_H__