Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
7.4 KiB

  1. /*++
  2. 1998 Seagate Software, Inc. All rights reserved
  3. Module Name:
  4. Wsbhash.cpp
  5. Abstract:
  6. Some functions for hashing text strings and creating DB keys from
  7. file path names.
  8. NOTE: Since no one needed this code by the time I got it done, it
  9. hasn't been tested!
  10. Author:
  11. Ron White [ronw] 25-Apr-1997
  12. Revision History:
  13. --*/
  14. #include "stdafx.h"
  15. // This pseudorandom permutation table (used by the SimpleHash function below)
  16. // is taken from the article referenced in the comments for that function.
  17. static UCHAR perm_table[] = {
  18. 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
  19. 14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
  20. 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
  21. 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
  22. 97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
  23. 174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
  24. 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
  25. 119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
  26. 138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
  27. 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
  28. 125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
  29. 118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
  30. 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
  31. 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
  32. 140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
  33. 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
  34. };
  35. // Local functions
  36. static HRESULT ProgressiveHash(WCHAR* pWstring, ULONG nChars, UCHAR* pKey,
  37. ULONG keySize, ULONG* pKeyCount);
  38. static UCHAR SimpleHash(UCHAR* pString, ULONG count);
  39. // ProgressiveHash - hash a wide-character string into a byte key of a given
  40. // maximum size. The string is limited to 32K characters (64K bytes) and the
  41. // key size must be at least 16.
  42. //
  43. // The algorithm starts out merely XORing the two bytes of each character into a
  44. // single byte in the key. If it must use the last 15 bytes of the key, it begins
  45. // using the SimpleHash function to hash progressively larger (doubling) chuncks
  46. // of the string into a single byte.
  47. //
  48. // This method is used to try and preserve as much information about short strings
  49. // as possible; to preserve, to some extent, the sort order of strings; and to
  50. // compress long strings into a reasonably sized key. It is assumed (perhaps
  51. // incorrectly) that many of the characters will be ANSI characters an so the
  52. // XOR of the bytes in the initial part of the string won't lose any information.
  53. static HRESULT ProgressiveHash(WCHAR* pWstring, ULONG nChars, UCHAR* pKey,
  54. ULONG keySize, ULONG* pKeyCount)
  55. {
  56. HRESULT hr = S_OK;
  57. try {
  58. ULONG chunk; // Current chunk size
  59. ULONG headSize;
  60. ULONG keyIndex = 0; // Current index into the key
  61. UCHAR* pBytes; // Byte pointer into the string
  62. ULONG remains; // Bytes remaining in the string
  63. // Check arguments
  64. WsbAffirm(NULL != pWstring, E_POINTER);
  65. WsbAffirm(NULL != pKey, E_POINTER);
  66. remains = nChars * 2;
  67. WsbAffirm(65536 >= remains, E_INVALIDARG);
  68. WsbAffirm(15 < keySize, E_INVALIDARG);
  69. // Do the non-progressive part
  70. pBytes = (UCHAR*)pWstring;
  71. headSize = keySize - 15;
  72. while (remains > 0 && keyIndex < headSize) {
  73. pKey[keyIndex++] = (UCHAR) ( *pBytes ^ *(pBytes + 1) );
  74. pBytes += 2;
  75. remains -= 2;
  76. }
  77. // Do the progressive part
  78. chunk = 4;
  79. while (remains > 0) {
  80. if (chunk > remains) {
  81. chunk = remains;
  82. }
  83. pKey[keyIndex++] = SimpleHash(pBytes, chunk);
  84. pBytes += chunk;
  85. remains -= chunk;
  86. chunk *= 2;
  87. }
  88. if (NULL != pKeyCount) {
  89. *pKeyCount = keyIndex;
  90. }
  91. } WsbCatch(hr);
  92. return(hr);
  93. }
  94. // SimpleHash - hash a string of bytes into a single byte.
  95. //
  96. // This algorithm and the permutation table come from the article "Fast Hashing
  97. // of Variable-Length Text Strings" in the June 1990 (33, 6) issue of Communications
  98. // of the ACM (CACM).
  99. // NOTE: For a hash value larger than one byte, the article suggests hashing the
  100. // original string with this function to get one byte, adding 1 (mod 256) to the
  101. // first byte of the string and hashing the new string with this function to get
  102. // the second byte, etc.
  103. static UCHAR SimpleHash(UCHAR* pString, ULONG count)
  104. {
  105. int h = 0;
  106. for (ULONG i = 0; i < count; i++) {
  107. h = perm_table[h ^ pString[i]];
  108. }
  109. return((UCHAR)h);
  110. }
  111. // SquashFilepath - compress a file path name into a (possibly) shorter key.
  112. //
  113. // This function splits the key into a path part (about 3/4 of the initial
  114. // bytes of the key) and a file name part (the rest of the key). For each
  115. // part it uses the ProgressiveHash function to compress the substring.
  116. // This function attempts to preserve enough information in the key that keys
  117. // will be sorted in approximately the same order as the original path names
  118. // and it is unlikely (though not impossible) that two different paths would
  119. // result in the same key. Both of these are dependent on the size of the key.
  120. // A reasonable size is probably 128 bytes, which gives 96 bytes for the path
  121. // and 32 bytes for the file name. A key size of 64 or less will fail because
  122. // the file name part will be too small for the Progressive Hash function.
  123. HRESULT SquashFilepath(WCHAR* pWstring, UCHAR* pKey, ULONG keySize)
  124. {
  125. HRESULT hr = S_OK;
  126. try {
  127. ULONG keyIndex;
  128. ULONG nChars;
  129. WCHAR* pFilename;
  130. ULONG pathKeySize;
  131. // Check arguments
  132. WsbAffirm(NULL != pWstring, E_POINTER);
  133. WsbAffirm(NULL != pKey, E_POINTER);
  134. WsbAffirm(60 < keySize, E_INVALIDARG);
  135. // Calculate some initial values
  136. pFilename = wcsrchr(pWstring, WCHAR('\\'));
  137. if (NULL == pFilename) {
  138. nChars = 0;
  139. pFilename = pWstring;
  140. } else {
  141. nChars = (ULONG)(pFilename - pWstring);
  142. pFilename++;
  143. }
  144. pathKeySize = (keySize / 4) * 3;
  145. // Compress the path
  146. if (0 < nChars) {
  147. WsbAffirmHr(ProgressiveHash(pWstring, nChars, pKey, pathKeySize,
  148. &keyIndex));
  149. } else {
  150. keyIndex = 0;
  151. }
  152. // Fill the rest of the path part of the key with zeros
  153. for ( ; keyIndex < pathKeySize; keyIndex++) {
  154. pKey[keyIndex] = 0;
  155. }
  156. // Compress the file name
  157. nChars = wcslen(pFilename);
  158. if (0 < nChars) {
  159. WsbAffirmHr(ProgressiveHash(pFilename, nChars, &pKey[keyIndex],
  160. keySize - pathKeySize, &keyIndex));
  161. keyIndex += pathKeySize;
  162. }
  163. // Fill the rest of the file name part of the key with zeros
  164. for ( ; keyIndex < keySize; keyIndex++) {
  165. pKey[keyIndex] = 0;
  166. }
  167. } WsbCatch(hr);
  168. return(hr);
  169. }