Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

409 lines
10 KiB

  1. /*++
  2. Copyright (c) 1997-2002 Microsoft Corporation
  3. Module Name :
  4. HashFn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKRhash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. LKRhash
  14. Revision History:
  15. --*/
  16. #ifndef __HASHFN_H__
  17. #define __HASHFN_H__
  18. #include <math.h>
  19. #include <limits.h>
  20. #ifndef __HASHFN_NO_NAMESPACE__
  21. namespace HashFn {
  22. #endif // !__HASHFN_NO_NAMESPACE__
  23. #if defined(_MSC_VER) && (_MSC_VER >= 1200)
  24. // The __forceinline keyword is new to VC6
  25. # define HASHFN_FORCEINLINE __forceinline
  26. #else
  27. # define HASHFN_FORCEINLINE inline
  28. #endif
  29. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  30. // Applying this to the results of the other hash functions is likely to
  31. // produce a much better distribution, especially for the identity hash
  32. // functions such as Hash(char c), where records will tend to cluster at
  33. // the low end of the hashtable otherwise. LKRhash applies this internally
  34. // to all hash signatures for exactly this reason.
  35. HASHFN_FORCEINLINE
  36. DWORD
  37. HashScramble(DWORD dwHash)
  38. {
  39. // Here are 10 primes slightly greater than 10^9
  40. // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  41. // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  42. // default value for "scrambling constant"
  43. const DWORD RANDOM_CONSTANT = 314159269UL;
  44. // large prime number, also used for scrambling
  45. const DWORD RANDOM_PRIME = 1000000007UL;
  46. return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  47. }
  48. enum {
  49. // No number in 0..2^31-1 maps to this number after it has been
  50. // scrambled by HashFn::HashRandomizeBits
  51. HASH_INVALID_SIGNATURE = 31678523,
  52. // Given M = A % B, A and B unsigned 32-bit integers greater than zero,
  53. // there are no values of A or B which yield M = 2^32-1. Why? Because
  54. // M must be less than B. (For numbers scrambled by HashScramble)
  55. // HASH_INVALID_SIGNATURE = ULONG_MAX
  56. };
  57. // Faster scrambling function suggested by Eric Jacobsen
  58. HASHFN_FORCEINLINE
  59. DWORD
  60. HashRandomizeBits(DWORD dw)
  61. {
  62. const DWORD dwLo = ((dw * 1103515245 + 12345) >> 16);
  63. const DWORD dwHi = ((dw * 69069 + 1) & 0xffff0000);
  64. const DWORD dw2 = dwHi | dwLo;
  65. IRTLASSERT(dw2 != HASH_INVALID_SIGNATURE);
  66. return dw2;
  67. }
  68. #undef HASH_SHIFT_MULTIPLY
  69. #ifdef HASH_SHIFT_MULTIPLY
  70. HASHFN_FORCEINLINE
  71. DWORD
  72. HASH_MULTIPLY(
  73. DWORD dw)
  74. {
  75. // 127 = 2^7 - 1 is prime
  76. return (dw << 7) - dw;
  77. }
  78. #else // !HASH_SHIFT_MULTIPLY
  79. HASHFN_FORCEINLINE
  80. DWORD
  81. HASH_MULTIPLY(
  82. DWORD dw)
  83. {
  84. // Small prime number used as a multiplier in the supplied hash functions
  85. const DWORD HASH_MULTIPLIER = 101;
  86. return dw * HASH_MULTIPLIER;
  87. }
  88. #endif // !HASH_SHIFT_MULTIPLY
  89. // Fast, simple hash function that tends to give a good distribution.
  90. // Apply HashRandomizeBits to the result if you're using this for something
  91. // other than LKRhash.
  92. HASHFN_FORCEINLINE
  93. DWORD
  94. HashString(
  95. const char* psz,
  96. DWORD dwHash = 0)
  97. {
  98. // force compiler to use unsigned arithmetic
  99. const unsigned char* upsz = (const unsigned char*) psz;
  100. for ( ; *upsz != '\0'; ++upsz)
  101. dwHash = HASH_MULTIPLY(dwHash) + *upsz;
  102. return dwHash;
  103. }
  104. // --------------------------------------------------------
  105. // Compute a hash value from an input string of any type, i.e.
  106. // the input is just treated as a sequence of bytes.
  107. // Based on a hash function originally proposed by J. Zobel.
  108. // Author: Paul Larson, 1999, [email protected]
  109. // --------------------------------------------------------
  110. HASHFN_FORCEINLINE
  111. DWORD
  112. HashString2(
  113. const char* pszInputKey, // ptr to input - any type is OK
  114. DWORD dwHash = 314159269) // Initial seed for hash function
  115. {
  116. // Initialize dwHash to a reasonably large constant so very
  117. // short keys won't get mapped to small values. Virtually any
  118. // large odd constant will do.
  119. const unsigned char* upsz = (const unsigned char*) pszInputKey;
  120. for ( ; *upsz != '\0'; ++upsz)
  121. dwHash ^= (dwHash << 11) + (dwHash << 5) + (dwHash >> 2) + *upsz;
  122. return (dwHash & 0x7FFFFFFF);
  123. }
  124. // Unicode version of above
  125. HASHFN_FORCEINLINE
  126. DWORD
  127. HashString(
  128. const wchar_t* pwsz,
  129. DWORD dwHash = 0)
  130. {
  131. for ( ; *pwsz != L'\0'; ++pwsz)
  132. dwHash = HASH_MULTIPLY(dwHash) + *pwsz;
  133. return dwHash;
  134. }
  135. // Quick-'n'-dirty case-insensitive string hash function.
  136. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  137. // also cache the length of strings and check those first. Caching
  138. // an uppercase version of a string can help too.
  139. // Again, apply HashRandomizeBits to the result if using with something other
  140. // than LKRhash.
  141. // Note: this is not really adequate for MBCS strings.
  142. HASHFN_FORCEINLINE
  143. DWORD
  144. HashStringNoCase(
  145. const char* psz,
  146. DWORD dwHash = 0)
  147. {
  148. const unsigned char* upsz = (const unsigned char*) psz;
  149. for ( ; *upsz != '\0'; ++upsz)
  150. {
  151. dwHash = HASH_MULTIPLY(dwHash) + toupper(*upsz);
  152. }
  153. return dwHash;
  154. }
  155. // Unicode version of above
  156. HASHFN_FORCEINLINE
  157. DWORD
  158. HashStringNoCase(
  159. const wchar_t* pwsz,
  160. DWORD dwHash = 0)
  161. {
  162. for ( ; *pwsz != L'\0'; ++pwsz)
  163. {
  164. #ifdef LKRHASH_KERNEL_MODE
  165. dwHash = HASH_MULTIPLY(dwHash) + RtlUpcaseUnicodeChar(*pwsz);
  166. #else
  167. dwHash = HASH_MULTIPLY(dwHash) + towupper(*pwsz);
  168. #endif
  169. }
  170. return dwHash;
  171. }
  172. // HashBlob returns the hash of a blob of arbitrary binary data.
  173. //
  174. // Warning: HashBlob is generally not the right way to hash a class object.
  175. // Consider:
  176. // class CFoo {
  177. // public:
  178. // char m_ch;
  179. // double m_d;
  180. // char* m_psz;
  181. // };
  182. //
  183. // inline DWORD Hash(const CFoo& rFoo)
  184. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  185. //
  186. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  187. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  188. // of doubles, which will be filled with random data (usually non-zero for
  189. // stack variables), and (b) it hashes the address (rather than the
  190. // contents) of the string m_psz. Similarly,
  191. //
  192. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  193. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  194. //
  195. // does the wrong thing. Much better to do this:
  196. //
  197. // DWORD Hash(const CFoo& rFoo)
  198. // {
  199. // return HashString(rFoo.m_psz,
  200. // HASH_MULTIPLIER * Hash(rFoo.m_ch)
  201. // + Hash(rFoo.m_d));
  202. // }
  203. //
  204. // Again, apply HashRandomizeBits if using with something other than LKRhash.
  205. HASHFN_FORCEINLINE
  206. DWORD
  207. HashBlob(
  208. const void* pv,
  209. size_t cb,
  210. DWORD dwHash = 0)
  211. {
  212. const BYTE* pb = static_cast<const BYTE*>(pv);
  213. while (cb-- > 0)
  214. dwHash = HASH_MULTIPLY(dwHash) + *pb++;
  215. return dwHash;
  216. }
  217. // --------------------------------------------------------
  218. // Compute a hash value from an input string of any type, i.e.
  219. // the input is just treated as a sequence of bytes.
  220. // Based on a hash function originally proposed by J. Zobel.
  221. // Author: Paul Larson, 1999, [email protected]
  222. // --------------------------------------------------------
  223. HASHFN_FORCEINLINE
  224. DWORD
  225. HashBlob2(
  226. const void* pvInputKey, // ptr to input - any type is OK
  227. size_t cbKeyLen, // length of input key in bytes
  228. DWORD dwHash = 314159269) // Initial seed for hash function
  229. {
  230. // Initialize dwHash to a reasonably large constant so very
  231. // short keys won't get mapped to small values. Virtually any
  232. // large odd constant will do.
  233. const BYTE* pb = static_cast<const BYTE*>(pvInputKey);
  234. const BYTE* pbSentinel = pb + cbKeyLen;
  235. for ( ; pb < pbSentinel; ++pb)
  236. dwHash ^= (dwHash << 11) + (dwHash << 5) + (dwHash >> 2) + *pb;
  237. return (dwHash & 0x7FFFFFFF);
  238. }
  239. //
  240. // Overloaded hash functions for all the major builtin types.
  241. // Again, apply HashRandomizeBits to result if using with something other than
  242. // LKRhash.
  243. //
  244. HASHFN_FORCEINLINE
  245. DWORD Hash(const char* psz)
  246. { return HashString(psz); }
  247. HASHFN_FORCEINLINE
  248. DWORD Hash(const unsigned char* pusz)
  249. { return HashString(reinterpret_cast<const char*>(pusz)); }
  250. HASHFN_FORCEINLINE
  251. DWORD Hash(const signed char* pssz)
  252. { return HashString(reinterpret_cast<const char*>(pssz)); }
  253. HASHFN_FORCEINLINE
  254. DWORD Hash(const wchar_t* pwsz)
  255. { return HashString(pwsz); }
  256. HASHFN_FORCEINLINE
  257. DWORD
  258. Hash(
  259. const GUID* pguid,
  260. DWORD dwHash = 0)
  261. {
  262. dwHash += * reinterpret_cast<const DWORD*>(pguid);
  263. return dwHash;
  264. }
  265. // Identity hash functions: scalar values map to themselves
  266. HASHFN_FORCEINLINE
  267. DWORD Hash(char c)
  268. { return c; }
  269. HASHFN_FORCEINLINE
  270. DWORD Hash(unsigned char uc)
  271. { return uc; }
  272. HASHFN_FORCEINLINE
  273. DWORD Hash(signed char sc)
  274. { return sc; }
  275. HASHFN_FORCEINLINE
  276. DWORD Hash(short sh)
  277. { return sh; }
  278. HASHFN_FORCEINLINE
  279. DWORD Hash(unsigned short ush)
  280. { return ush; }
  281. HASHFN_FORCEINLINE
  282. DWORD Hash(int i)
  283. { return i; }
  284. HASHFN_FORCEINLINE
  285. DWORD Hash(unsigned int u)
  286. { return u; }
  287. HASHFN_FORCEINLINE
  288. DWORD Hash(long l)
  289. { return l; }
  290. HASHFN_FORCEINLINE
  291. DWORD Hash(unsigned long ul)
  292. { return ul; }
  293. HASHFN_FORCEINLINE
  294. DWORD Hash(double dbl)
  295. {
  296. if (dbl == 0.0)
  297. return 0;
  298. int nExponent;
  299. double dblMantissa = frexp(dbl, &nExponent);
  300. // 0.5 <= |mantissa| < 1.0
  301. return (DWORD) ((2.0 * fabs(dblMantissa) - 1.0) * ULONG_MAX);
  302. }
  303. HASHFN_FORCEINLINE
  304. DWORD Hash(float f)
  305. { return Hash((double) f); }
  306. HASHFN_FORCEINLINE
  307. DWORD Hash(unsigned __int64 ull)
  308. {
  309. union {
  310. unsigned __int64 _ull;
  311. DWORD dw[2];
  312. } u = {ull};
  313. return HASH_MULTIPLY(u.dw[0]) + u.dw[1];
  314. }
  315. HASHFN_FORCEINLINE
  316. DWORD Hash(__int64 ll)
  317. { return Hash((unsigned __int64) ll); }
  318. #ifndef __HASHFN_NO_NAMESPACE__
  319. }
  320. #endif // !__HASHFN_NO_NAMESPACE__
  321. #endif // __HASHFN_H__