Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

361 lines
9.1 KiB

  1. /*++
  2. Copyright (c) 1998-2000 Microsoft Corporation
  3. Module Name :
  4. HashFn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKRhash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. Internet Information Server RunTime Library
  14. Revision History:
  15. --*/
  16. #ifndef __HASHFN_H__
  17. #define __HASHFN_H__
  18. #include <math.h>
  19. #include <limits.h>
  20. #ifndef __HASHFN_NO_NAMESPACE__
  21. namespace HashFn {
  22. #endif // !__HASHFN_NO_NAMESPACE__
  23. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  24. // Applying this to the results of the other hash functions is likely to
  25. // produce a much better distribution, especially for the identity hash
  26. // functions such as Hash(char c), where records will tend to cluster at
  27. // the low end of the hashtable otherwise. LKRhash applies this internally
  28. // to all hash signatures for exactly this reason.
  29. inline DWORD
  30. HashScramble(DWORD dwHash)
  31. {
  32. // Here are 10 primes slightly greater than 10^9
  33. // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  34. // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  35. // default value for "scrambling constant"
  36. const DWORD RANDOM_CONSTANT = 314159269UL;
  37. // large prime number, also used for scrambling
  38. const DWORD RANDOM_PRIME = 1000000007UL;
  39. return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  40. }
  41. enum {
  42. // No number in 0..2^31-1 maps to this number after it has been
  43. // scrambled by HashFn::HashRandomizeBits
  44. HASH_INVALID_SIGNATURE = 31678523,
  45. // Given M = A % B, A and B unsigned 32-bit integers greater than zero,
  46. // there are no values of A or B which yield M = 2^32-1. Why? Because
  47. // M must be less than B. (For numbers scrambled by HashScramble)
  48. // HASH_INVALID_SIGNATURE = ULONG_MAX
  49. };
  50. // Faster scrambling function suggested by Eric Jacobsen
  51. inline DWORD
  52. HashRandomizeBits(DWORD dw)
  53. {
  54. return (((dw * 1103515245 + 12345) >> 16)
  55. | ((dw * 69069 + 1) & 0xffff0000));
  56. }
  57. #undef HASH_SHIFT_MULTIPLY
  58. #ifdef HASH_SHIFT_MULTIPLY
  59. inline DWORD
  60. HASH_MULTIPLY(
  61. DWORD dw)
  62. {
  63. // 127 = 2^7 - 1 is prime
  64. return (dw << 7) - dw;
  65. }
  66. #else // !HASH_SHIFT_MULTIPLY
  67. inline DWORD
  68. HASH_MULTIPLY(
  69. DWORD dw)
  70. {
  71. // Small prime number used as a multiplier in the supplied hash functions
  72. const DWORD HASH_MULTIPLIER = 101;
  73. return dw * HASH_MULTIPLIER;
  74. }
  75. #endif // !HASH_SHIFT_MULTIPLY
  76. // Fast, simple hash function that tends to give a good distribution.
  77. // Apply HashScramble to the result if you're using this for something
  78. // other than LKRhash.
  79. inline DWORD
  80. HashString(
  81. const char* psz,
  82. DWORD dwHash = 0)
  83. {
  84. // force compiler to use unsigned arithmetic
  85. const unsigned char* upsz = (const unsigned char*) psz;
  86. for ( ; *upsz != '\0'; ++upsz)
  87. dwHash = HASH_MULTIPLY(dwHash) + *upsz;
  88. return dwHash;
  89. }
  90. // --------------------------------------------------------
  91. // Compute a hash value from an input string of any type, i.e.
  92. // the input is just treated as a sequence of bytes.
  93. // Based on a hash function originally proposed by J. Zobel.
  94. // Author: Paul Larson, 1999, [email protected]
  95. // --------------------------------------------------------
  96. inline DWORD
  97. HashString2(
  98. const char* pszInputKey, // ptr to input - any type is OK
  99. DWORD dwHash = 314159269) // Initial seed for hash function
  100. {
  101. // Initialize dwHash to a reasonably large constant so very
  102. // short keys won't get mapped to small values. Virtually any
  103. // large odd constant will do.
  104. const unsigned char* upsz = (const unsigned char*) pszInputKey;
  105. for ( ; *upsz != '\0'; ++upsz)
  106. dwHash ^= (dwHash << 11) + (dwHash << 5) + (dwHash >> 2) + *upsz;
  107. return (dwHash & 0x7FFFFFFF);
  108. }
  109. // Unicode version of above
  110. inline DWORD
  111. HashString(
  112. const wchar_t* pwsz,
  113. DWORD dwHash = 0)
  114. {
  115. for ( ; *pwsz != L'\0'; ++pwsz)
  116. dwHash = HASH_MULTIPLY(dwHash) + *pwsz;
  117. return dwHash;
  118. }
  119. // Quick-'n'-dirty case-insensitive string hash function.
  120. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  121. // also cache the length of strings and check those first. Caching
  122. // an uppercase version of a string can help too.
  123. // Again, apply HashScramble to the result if using with something other
  124. // than LKRhash.
  125. // Note: this is not really adequate for MBCS strings.
  126. inline DWORD
  127. HashStringNoCase(
  128. const char* psz,
  129. DWORD dwHash = 0)
  130. {
  131. const unsigned char* upsz = (const unsigned char*) psz;
  132. for ( ; *upsz != '\0'; ++upsz)
  133. dwHash = HASH_MULTIPLY(dwHash)
  134. + (*upsz & 0xDF); // strip off lowercase bit
  135. return dwHash;
  136. }
  137. // Unicode version of above
  138. inline DWORD
  139. HashStringNoCase(
  140. const wchar_t* pwsz,
  141. DWORD dwHash = 0)
  142. {
  143. for ( ; *pwsz != L'\0'; ++pwsz)
  144. dwHash = HASH_MULTIPLY(dwHash) + (*pwsz & 0xFFDF);
  145. return dwHash;
  146. }
  147. // HashBlob returns the hash of a blob of arbitrary binary data.
  148. //
  149. // Warning: HashBlob is generally not the right way to hash a class object.
  150. // Consider:
  151. // class CFoo {
  152. // public:
  153. // char m_ch;
  154. // double m_d;
  155. // char* m_psz;
  156. // };
  157. //
  158. // inline DWORD Hash(const CFoo& rFoo)
  159. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  160. //
  161. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  162. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  163. // of doubles, which will be filled with random data (usually non-zero for
  164. // stack variables), and (b) it hashes the address (rather than the
  165. // contents) of the string m_psz. Similarly,
  166. //
  167. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  168. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  169. //
  170. // does the wrong thing. Much better to do this:
  171. //
  172. // DWORD Hash(const CFoo& rFoo)
  173. // {
  174. // return HashString(rFoo.m_psz,
  175. // HASH_MULTIPLIER * Hash(rFoo.m_ch)
  176. // + Hash(rFoo.m_d));
  177. // }
  178. //
  179. // Again, apply HashScramble if using with something other than LKRhash.
  180. inline DWORD
  181. HashBlob(
  182. const void* pv,
  183. size_t cb,
  184. DWORD dwHash = 0)
  185. {
  186. const BYTE* pb = static_cast<const BYTE*>(pv);
  187. while (cb-- > 0)
  188. dwHash = HASH_MULTIPLY(dwHash) + *pb++;
  189. return dwHash;
  190. }
  191. // --------------------------------------------------------
  192. // Compute a hash value from an input string of any type, i.e.
  193. // the input is just treated as a sequence of bytes.
  194. // Based on a hash function originally proposed by J. Zobel.
  195. // Author: Paul Larson, 1999, [email protected]
  196. // --------------------------------------------------------
  197. inline DWORD
  198. HashBlob2(
  199. const void* pvInputKey, // ptr to input - any type is OK
  200. size_t cbKeyLen, // length of input key in bytes
  201. DWORD dwHash = 314159269) // Initial seed for hash function
  202. {
  203. // Initialize dwHash to a reasonably large constant so very
  204. // short keys won't get mapped to small values. Virtually any
  205. // large odd constant will do.
  206. const BYTE* pb = static_cast<const BYTE*>(pvInputKey);
  207. const BYTE* pbSentinel = pb + cbKeyLen;
  208. for ( ; pb < pbSentinel; ++pb)
  209. dwHash ^= (dwHash << 11) + (dwHash << 5) + (dwHash >> 2) + *pb;
  210. return (dwHash & 0x7FFFFFFF);
  211. }
  212. //
  213. // Overloaded hash functions for all the major builtin types.
  214. // Again, apply HashScramble to result if using with something other than
  215. // LKRhash.
  216. //
  217. inline DWORD Hash(const char* psz)
  218. { return HashString(psz); }
  219. inline DWORD Hash(const unsigned char* pusz)
  220. { return HashString(reinterpret_cast<const char*>(pusz)); }
  221. inline DWORD Hash(const signed char* pssz)
  222. { return HashString(reinterpret_cast<const char*>(pssz)); }
  223. inline DWORD Hash(const wchar_t* pwsz)
  224. { return HashString(pwsz); }
  225. inline DWORD
  226. Hash(
  227. const GUID* pguid,
  228. DWORD dwHash = 0)
  229. {
  230. dwHash += * reinterpret_cast<const DWORD*>(pguid);
  231. return dwHash;
  232. }
  233. // Identity hash functions: scalar values map to themselves
  234. inline DWORD Hash(char c)
  235. { return c; }
  236. inline DWORD Hash(unsigned char uc)
  237. { return uc; }
  238. inline DWORD Hash(signed char sc)
  239. { return sc; }
  240. inline DWORD Hash(short sh)
  241. { return sh; }
  242. inline DWORD Hash(unsigned short ush)
  243. { return ush; }
  244. inline DWORD Hash(int i)
  245. { return i; }
  246. inline DWORD Hash(unsigned int u)
  247. { return u; }
  248. inline DWORD Hash(long l)
  249. { return l; }
  250. inline DWORD Hash(unsigned long ul)
  251. { return ul; }
  252. inline DWORD Hash(double dbl)
  253. {
  254. if (dbl == 0.0)
  255. return 0;
  256. int nExponent;
  257. double dblMantissa = frexp(dbl, &nExponent);
  258. // 0.5 <= |mantissa| < 1.0
  259. return (DWORD) ((2.0 * fabs(dblMantissa) - 1.0) * ULONG_MAX);
  260. }
  261. inline DWORD Hash(float f)
  262. { return Hash((double) f); }
  263. inline DWORD Hash(unsigned __int64 ull)
  264. {
  265. union {
  266. unsigned __int64 _ull;
  267. DWORD dw[2];
  268. } u = {ull};
  269. return HASH_MULTIPLY(u.dw[0]) + u.dw[1];
  270. }
  271. inline DWORD Hash(__int64 ll)
  272. { return Hash((unsigned __int64) ll); }
  273. #ifndef __HASHFN_NO_NAMESPACE__
  274. }
  275. #endif // !__HASHFN_NO_NAMESPACE__
  276. #endif // __HASHFN_H__