Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
6.9 KiB

  1. /*++
  2. Copyright (c) 1998-2002 Microsoft Corporation
  3. Module Name :
  4. hashfn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKRhash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. Internet Information Server RunTime Library
  14. Revision History:
  15. Paul McDaniel (paulmcd) Feb-05-1999 Trimmed for kernel mode
  16. and C (not C++)
  17. --*/
  18. #ifndef __HASHFN_H__
  19. #define __HASHFN_H__
  20. #include <math.h>
  21. #include <limits.h>
  22. extern WCHAR FastUpcaseChars[256];
  23. #define UPCASE_UNICODE_CHAR( wc ) \
  24. (wc < 256 ? FastUpcaseChars[(UCHAR)(wc)] : RtlUpcaseUnicodeChar(wc))
  25. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  26. // Applying this to the results of the other hash functions is likely to
  27. // produce a much better distribution, especially for the identity hash
  28. // functions such as Hash(char c), where records will tend to cluster at
  29. // the low end of the hashtable otherwise. LKRhash applies this internally
  30. // to all hash signatures for exactly this reason.
  31. // __inline ULONG
  32. // HashScramble(ULONG dwHash)
  33. // {
  34. // // Here are 10 primes slightly greater than 10^9
  35. // // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  36. // // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  37. //
  38. // // default value for "scrambling constant"
  39. // const ULONG RANDOM_CONSTANT = 314159269UL;
  40. // // large prime number, also used for scrambling
  41. // const ULONG RANDOM_PRIME = 1000000007UL;
  42. //
  43. // return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  44. // }
  45. //
  46. // Given M = A % B, A and B unsigned 32-bit integers greater than zero,
  47. // there are no values of A or B which yield M = 2^32-1. Why? Because
  48. // M must be less than B.
  49. // #define HASH_INVALID_SIGNATURE ULONG_MAX
  50. // No number in 0..2^31-1 maps to this number after it has been
  51. // scrambled by HashRandomizeBits
  52. #define HASH_INVALID_SIGNATURE 31678523
  53. // Faster scrambling function suggested by Eric Jacobsen
  54. __inline ULONG
  55. HashRandomizeBits(ULONG dw)
  56. {
  57. const ULONG dwLo = ((dw * 1103515245 + 12345) >> 16);
  58. const ULONG dwHi = ((dw * 69069 + 1) & 0xffff0000);
  59. const ULONG dw2 = dwHi | dwLo;
  60. ASSERT(dw2 != HASH_INVALID_SIGNATURE);
  61. return dw2;
  62. }
  63. // Small prime number used as a multiplier in the supplied hash functions
  64. #define HASH_MULTIPLIER 101
  65. #undef HASH_SHIFT_MULTIPLY
  66. #ifdef HASH_SHIFT_MULTIPLY
  67. // 127 = 2^7 - 1 is prime
  68. # define HASH_MULTIPLY(dw) (((dw) << 7) - (dw))
  69. #else
  70. # define HASH_MULTIPLY(dw) ((dw) * HASH_MULTIPLIER)
  71. #endif
  72. // Fast, simple hash function that tends to give a good distribution.
  73. // Apply HashScramble to the result if you're using this for something
  74. // other than LKHash.
  75. __inline ULONG
  76. HashStringA(
  77. const char* psz,
  78. ULONG dwHash)
  79. {
  80. // force compiler to use unsigned arithmetic
  81. const unsigned char* upsz = (const unsigned char*) psz;
  82. for ( ; *upsz != '\0'; ++upsz)
  83. dwHash = HASH_MULTIPLY(dwHash) + *upsz;
  84. return dwHash;
  85. }
  86. // Unicode version of above
  87. __inline ULONG
  88. HashStringW(
  89. const wchar_t* pwsz,
  90. ULONG dwHash)
  91. {
  92. for ( ; *pwsz != L'\0'; ++pwsz)
  93. dwHash = HASH_MULTIPLY(dwHash) + *pwsz;
  94. return dwHash;
  95. }
  96. __inline ULONG
  97. HashCharW(
  98. WCHAR UnicodeChar,
  99. ULONG Hash
  100. )
  101. {
  102. return HASH_MULTIPLY(Hash) + UnicodeChar;
  103. }
  104. // Quick-'n'-dirty case-insensitive string hash function.
  105. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  106. // also cache the length of strings and check those first. Caching
  107. // an uppercase version of a string can help too.
  108. // Again, apply HashScramble to the result if using with something other
  109. // than LKHash.
  110. // Note: this is not really adequate for MBCS strings.
  111. __inline ULONG
  112. HashStringNoCaseA(
  113. const char* psz,
  114. ULONG dwHash)
  115. {
  116. const unsigned char* upsz = (const unsigned char*) psz;
  117. for ( ; *upsz != '\0'; ++upsz)
  118. dwHash = HASH_MULTIPLY(dwHash)
  119. + (*upsz & 0xDF); // strip off lowercase bit
  120. return dwHash;
  121. }
  122. // Unicode version of above
  123. __inline ULONG
  124. HashStringNoCaseW(
  125. const wchar_t* pwsz,
  126. ULONG dwHash)
  127. {
  128. for ( ; *pwsz != L'\0'; ++pwsz)
  129. dwHash = HASH_MULTIPLY(dwHash) + UPCASE_UNICODE_CHAR(*pwsz);
  130. return dwHash;
  131. }
  132. __inline ULONG
  133. HashStringsNoCaseW(
  134. const wchar_t* pwsz1,
  135. const wchar_t* pwsz2,
  136. ULONG dwHash)
  137. {
  138. for ( ; *pwsz1 != L'\0'; ++pwsz1)
  139. dwHash = HASH_MULTIPLY(dwHash) + UPCASE_UNICODE_CHAR(*pwsz1);
  140. for ( ; *pwsz2 != L'\0'; ++pwsz2)
  141. dwHash = HASH_MULTIPLY(dwHash) + UPCASE_UNICODE_CHAR(*pwsz2);
  142. return dwHash;
  143. }
  144. __inline ULONG
  145. HashCharNoCaseW(
  146. WCHAR UnicodeChar,
  147. ULONG Hash
  148. )
  149. {
  150. return HASH_MULTIPLY(Hash) + UPCASE_UNICODE_CHAR(UnicodeChar);
  151. }
  152. // HashBlob returns the hash of a blob of arbitrary binary data.
  153. //
  154. // Warning: HashBlob is generally not the right way to hash a class object.
  155. // Consider:
  156. // class CFoo {
  157. // public:
  158. // char m_ch;
  159. // double m_d;
  160. // char* m_psz;
  161. // };
  162. //
  163. // inline ULONG Hash(const CFoo& rFoo)
  164. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  165. //
  166. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  167. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  168. // of doubles, which will be filled with random data (usually non-zero for
  169. // stack variables), and (b) it hashes the address (rather than the
  170. // contents) of the string m_psz. Similarly,
  171. //
  172. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  173. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  174. //
  175. // does the wrong thing. Much better to do this:
  176. //
  177. // ULONG Hash(const CFoo& rFoo)
  178. // {
  179. // return HashString(rFoo.m_psz,
  180. // 37 * Hash(rFoo.m_ch) + Hash(rFoo.m_d));
  181. // }
  182. //
  183. // Again, apply HashScramble if using with something other than LKHash.
  184. __inline ULONG
  185. HashBlob(
  186. PUCHAR pb,
  187. ULONG cb,
  188. ULONG dwHash)
  189. {
  190. while (cb-- > 0)
  191. dwHash = HASH_MULTIPLY(dwHash) + *pb++;
  192. return dwHash;
  193. }
  194. // ======= <snip>
  195. //
  196. // paulmcd: a bunch snipped due to use of overloading, not allowed in C
  197. //
  198. // ======= <snip>
  199. __inline ULONG HashDouble(double dbl)
  200. {
  201. int nExponent;
  202. double dblMantissa;
  203. if (dbl == 0.0)
  204. return 0;
  205. dblMantissa = frexp(dbl, &nExponent);
  206. // 0.5 <= |mantissa| < 1.0
  207. return (ULONG) ((2.0 * fabs(dblMantissa) - 1.0) * UINT_MAX);
  208. }
  209. __inline ULONG HashFloat(float f)
  210. { return HashDouble((double) f); }
  211. #endif // __HASHFN_H__