Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

261 lines
6.0 KiB

  1. /*++
  2. Copyright (c) 1998 Microsoft Corporation
  3. Module Name :
  4. hashfn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKRHash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. Internet Information Server RunTime Library
  14. Revision History:
  15. --*/
  16. #ifndef __HASHFN_H__
  17. #define __HASHFN_H__
  18. #ifdef __HASHFN_NAMESPACE__
  19. namespace HashFn {
  20. #endif // __HASHFN_NAMESPACE__
  21. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  22. // Applying this to the results of the other hash functions is likely to
  23. // produce a much better distribution, especially for the identity hash
  24. // functions such as Hash(char c), where records will tend to cluster at
  25. // the low end of the hashtable otherwise. LKRHash applies this internally
  26. // to all hash signatures for exactly this reason.
  27. inline DWORD
  28. HashScramble(DWORD dwHash)
  29. {
  30. // Here are 10 primes slightly greater than 10^9
  31. // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  32. // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  33. // default value for "scrambling constant"
  34. const DWORD RANDOM_CONSTANT = 314159269UL;
  35. // large prime number, also used for scrambling
  36. const DWORD RANDOM_PRIME = 1000000007UL;
  37. return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  38. }
  39. // Fast, simple hash function that tends to give a good distribution.
  40. // Apply HashScramble to the result if you're using this for something
  41. // other than LKRHash.
  42. inline DWORD
  43. HashString(
  44. const char* psz,
  45. DWORD dwHash = 0)
  46. {
  47. // force compiler to use unsigned arithmetic
  48. const unsigned char* upsz = (const unsigned char*) psz;
  49. for ( ; *upsz; ++upsz)
  50. dwHash = 37 * dwHash + *upsz;
  51. return dwHash;
  52. }
  53. // Unicode version of above
  54. inline DWORD
  55. HashString(
  56. const wchar_t* pwsz,
  57. DWORD dwHash = 0)
  58. {
  59. for ( ; *pwsz; ++pwsz)
  60. dwHash = 37 * dwHash + *pwsz;
  61. return dwHash;
  62. }
  63. // Quick-'n'-dirty case-insensitive string hash function.
  64. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  65. // also cache the length of strings and check those first. Caching
  66. // an uppercase version of a string can help too.
  67. // Again, apply HashScramble to the result if using with something other
  68. // than LKRHash.
  69. // Note: this is not really adequate for MBCS strings.
  70. inline DWORD
  71. HashStringNoCase(
  72. const char* psz,
  73. DWORD dwHash = 0)
  74. {
  75. const unsigned char* upsz = (const unsigned char*) psz;
  76. for ( ; *upsz; ++upsz)
  77. dwHash = 37 * dwHash + (*upsz & 0xDF); // strip off lowercase bit
  78. return dwHash;
  79. }
  80. // Unicode version of above
  81. inline DWORD
  82. HashStringNoCase(
  83. const wchar_t* pwsz,
  84. DWORD dwHash = 0)
  85. {
  86. for ( ; *pwsz; ++pwsz)
  87. dwHash = 37 * dwHash + (*pwsz & 0xFFDF);
  88. return dwHash;
  89. }
  90. // HashBlob returns the hash of a blob of arbitrary binary data.
  91. //
  92. // Warning: HashBlob is generally not the right way to hash a class object.
  93. // Consider:
  94. // class CFoo {
  95. // public:
  96. // char m_ch;
  97. // double m_d;
  98. // char* m_psz;
  99. // };
  100. //
  101. // inline DWORD Hash(const CFoo& rFoo)
  102. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  103. //
  104. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  105. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  106. // of doubles, which will be filled with random data (usually non-zero for
  107. // stack variables), and (b) it hashes the address (rather than the
  108. // contents) of the string m_psz. Similarly,
  109. //
  110. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  111. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  112. //
  113. // does the wrong thing. Much better to do this:
  114. //
  115. // DWORD Hash(const CFoo& rFoo)
  116. // {
  117. // return HashString(rFoo.m_psz,
  118. // 37 * Hash(rFoo.m_ch) + Hash(rFoo.m_d));
  119. // }
  120. //
  121. // Again, apply HashScramble if using with something other than LKRHash.
  122. inline DWORD
  123. HashBlob(
  124. const void* pv,
  125. size_t cb,
  126. DWORD dwHash = 0)
  127. {
  128. LPBYTE pb = static_cast<LPBYTE>(const_cast<void*>(pv));
  129. while (cb-- > 0)
  130. dwHash = 37 * dwHash + *pb++;
  131. return dwHash;
  132. }
  133. //
  134. // Overloaded hash functions for all the major builtin types.
  135. // Again, apply HashScramble to result if using with something other than
  136. // LKRHash.
  137. //
  138. inline DWORD Hash(const char* psz)
  139. { return HashString(psz); }
  140. inline DWORD Hash(const unsigned char* pusz)
  141. { return HashString(reinterpret_cast<const char*>(pusz)); }
  142. inline DWORD Hash(const signed char* pssz)
  143. { return HashString(reinterpret_cast<const char*>(pssz)); }
  144. inline DWORD Hash(const wchar_t* pwsz)
  145. { return HashString(pwsz); }
  146. inline DWORD
  147. Hash(
  148. const GUID* pguid,
  149. DWORD dwHash = 0)
  150. {
  151. DWORD* pdw = reinterpret_cast<DWORD*>(const_cast<GUID*>(pguid));
  152. dwHash = 37 * dwHash + *pdw++;
  153. dwHash = 37 * dwHash + *pdw++;
  154. dwHash = 37 * dwHash + *pdw++;
  155. dwHash = 37 * dwHash + *pdw;
  156. return dwHash;
  157. }
  158. // Identity hash functions: scalar values map to themselves
  159. inline DWORD Hash(char c)
  160. { return c; }
  161. inline DWORD Hash(unsigned char uc)
  162. { return uc; }
  163. inline DWORD Hash(signed char sc)
  164. { return sc; }
  165. inline DWORD Hash(short sh)
  166. { return sh; }
  167. inline DWORD Hash(unsigned short ush)
  168. { return ush; }
  169. inline DWORD Hash(int i)
  170. { return i; }
  171. inline DWORD Hash(unsigned int u)
  172. { return u; }
  173. inline DWORD Hash(long l)
  174. { return l; }
  175. inline DWORD Hash(unsigned long ul)
  176. { return ul; }
  177. inline DWORD Hash(float f)
  178. {
  179. // be careful of rounding errors when computing keys
  180. union {
  181. float f;
  182. DWORD dw;
  183. } u;
  184. u.f = f;
  185. return u.dw;
  186. }
  187. inline DWORD Hash(double dbl)
  188. {
  189. // be careful of rounding errors when computing keys
  190. union {
  191. double dbl;
  192. DWORD dw[2];
  193. } u;
  194. u.dbl = dbl;
  195. return u.dw[0] * 37 + u.dw[1];
  196. }
  197. #ifdef __HASHFN_NAMESPACE__
  198. }
  199. #endif // __HASHFN_NAMESPACE__
  200. #endif // __HASHFN_H__