Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
4.9 KiB

  1. /*++
  2. Copyright (c) 1998 Microsoft Corporation
  3. Module Name :
  4. hashfn.h
  5. Abstract:
  6. Declares and defines a collection of overloaded hash functions.
  7. It is strongly suggested that you use these functions with LKHash.
  8. Author:
  9. George V. Reilly (GeorgeRe) 06-Jan-1998
  10. Environment:
  11. Win32 - User Mode
  12. Project:
  13. Internet Information Server RunTime Library
  14. Revision History:
  15. Paul McDaniel (paulmcd) Feb-05-1999 Trimmed for kernel mode
  16. and C (not C++)
  17. --*/
  18. #ifndef __HASHFN_H__
  19. #define __HASHFN_H__
  20. // Produce a scrambled, randomish number in the range 0 to RANDOM_PRIME-1.
  21. // Applying this to the results of the other hash functions is likely to
  22. // produce a much better distribution, especially for the identity hash
  23. // functions such as Hash(char c), where records will tend to cluster at
  24. // the low end of the hashtable otherwise. LKHash applies this internally
  25. // to all hash signatures for exactly this reason.
  26. __inline ULONG
  27. HashScramble(ULONG dwHash)
  28. {
  29. // Here are 10 primes slightly greater than 10^9
  30. // 1000000007, 1000000009, 1000000021, 1000000033, 1000000087,
  31. // 1000000093, 1000000097, 1000000103, 1000000123, 1000000181.
  32. // default value for "scrambling constant"
  33. const ULONG RANDOM_CONSTANT = 314159269UL;
  34. // large prime number, also used for scrambling
  35. const ULONG RANDOM_PRIME = 1000000007UL;
  36. return (RANDOM_CONSTANT * dwHash) % RANDOM_PRIME ;
  37. }
  38. // Fast, simple hash function that tends to give a good distribution.
  39. // Apply HashScramble to the result if you're using this for something
  40. // other than LKHash.
  41. __inline ULONG
  42. HashStringA(
  43. const char* psz,
  44. ULONG dwHash)
  45. {
  46. for ( ; *psz; ++psz)
  47. dwHash = 37 * dwHash + *psz;
  48. return dwHash;
  49. }
  50. // Unicode version of above
  51. __inline ULONG
  52. HashStringW(
  53. const wchar_t* pwsz,
  54. ULONG dwHash)
  55. {
  56. for ( ; *pwsz; ++pwsz)
  57. dwHash = 37 * dwHash + *pwsz;
  58. return dwHash;
  59. }
  60. __inline ULONG
  61. HashCharW(
  62. WCHAR UnicodeChar,
  63. ULONG Hash
  64. )
  65. {
  66. Hash = 37 * Hash + UnicodeChar;
  67. return Hash;
  68. }
  69. // Quick-'n'-dirty case-insensitive string hash function.
  70. // Make sure that you follow up with _stricmp or _mbsicmp. You should
  71. // also cache the length of strings and check those first. Caching
  72. // an uppercase version of a string can help too.
  73. // Again, apply HashScramble to the result if using with something other
  74. // than LKHash.
  75. // Note: this is not really adequate for MBCS strings.
  76. __inline ULONG
  77. HashStringNoCaseA(
  78. const char* psz,
  79. ULONG dwHash)
  80. {
  81. for ( ; *psz; ++psz)
  82. dwHash = 37 * dwHash + (*psz & 0xDF); // strip off lowercase bit
  83. return dwHash;
  84. }
  85. // Unicode version of above
  86. __inline ULONG
  87. HashStringNoCaseW(
  88. const wchar_t* pwsz,
  89. ULONG dwHash)
  90. {
  91. for ( ; *pwsz; ++pwsz)
  92. dwHash = 37 * dwHash + (*pwsz & 0xFFDF);
  93. return dwHash;
  94. }
  95. __inline ULONG
  96. HashCharNoCaseW(
  97. WCHAR UnicodeChar,
  98. ULONG Hash
  99. )
  100. {
  101. Hash = 37 * Hash + (UnicodeChar & 0xFFDF);
  102. return Hash;
  103. }
  104. // HashBlob returns the hash of a blob of arbitrary binary data.
  105. //
  106. // Warning: HashBlob is generally not the right way to hash a class object.
  107. // Consider:
  108. // class CFoo {
  109. // public:
  110. // char m_ch;
  111. // double m_d;
  112. // char* m_psz;
  113. // };
  114. //
  115. // inline ULONG Hash(const CFoo& rFoo)
  116. // { return HashBlob(&rFoo, sizeof(CFoo)); }
  117. //
  118. // This is the wrong way to hash a CFoo for two reasons: (a) there will be
  119. // a 7-byte gap between m_ch and m_d imposed by the alignment restrictions
  120. // of doubles, which will be filled with random data (usually non-zero for
  121. // stack variables), and (b) it hashes the address (rather than the
  122. // contents) of the string m_psz. Similarly,
  123. //
  124. // bool operator==(const CFoo& rFoo1, const CFoo& rFoo2)
  125. // { return memcmp(&rFoo1, &rFoo2, sizeof(CFoo)) == 0; }
  126. //
  127. // does the wrong thing. Much better to do this:
  128. //
  129. // ULONG Hash(const CFoo& rFoo)
  130. // {
  131. // return HashString(rFoo.m_psz,
  132. // 37 * Hash(rFoo.m_ch) + Hash(rFoo.m_d));
  133. // }
  134. //
  135. // Again, apply HashScramble if using with something other than LKHash.
  136. __inline ULONG
  137. HashBlob(
  138. PUCHAR pb,
  139. ULONG cb,
  140. ULONG dwHash)
  141. {
  142. while (cb-- > 0)
  143. dwHash = 37 * dwHash + *pb++;
  144. return dwHash;
  145. }
  146. // ======= <snip>
  147. //
  148. // paulmcd: a bunch snipped due to use of overloading, not allowed in C
  149. //
  150. // ======= <snip>
  151. __inline ULONG HashFloat(float f)
  152. {
  153. // be careful of rounding errors when computing keys
  154. union {
  155. float f;
  156. ULONG dw;
  157. } u;
  158. u.f = f;
  159. return u.dw;
  160. }
  161. __inline ULONG HashDouble(double dbl)
  162. {
  163. // be careful of rounding errors when computing keys
  164. union {
  165. double dbl;
  166. ULONG dw[2];
  167. } u;
  168. u.dbl = dbl;
  169. return u.dw[0] * 37 + u.dw[1];
  170. }
  171. #endif // __HASHFN_H__