Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

323 lines
15 KiB

  1. /*
  2. * pathash.h
  3. *
  4. * author: John R. Douceur
  5. * date: 5 May 1997
  6. *
  7. * This header file defines structures, function prototypes, and macros for
  8. * the pat-hash table database. The code is object-oriented C, transliterated
  9. * from a C++ implementation.
  10. *
  11. * The pat-hash database is a combination of a dynamically sized, separately
  12. * chained hash table and a Patricia tree. The hash table dynamically grows
  13. * and shrinks as needed, and the workload of modifying the table size is
  14. * distributed evenly among the insertion or removal operations that cause
  15. * the growth or shrinkage.
  16. *
  17. * The insertion and removal operations manage both a hash table and a Patricia
  18. * tree, but the search routine uses only the hash table for performing the
  19. * search. The Patrica tree is present to support a scan operation, which
  20. * searches the database for all entries that match a given pattern, where the
  21. * pattern that is scanned may contain wildcards.
  22. *
  23. * Because this code is C, rather than C++, it is not possible to hide as
  24. * much of the implementation from the client code as one might wish.
  25. * Nonetheless, there is an attempt to isolate the client from some of the
  26. * implementation details through the use of macros. Below is described each
  27. * of the functions and macros necessary to use the pat-hash table.
  28. *
  29. */
  30. #ifndef _INC_PATHASH
  31. #define _INC_PATHASH
  32. #ifdef __cplusplus
  33. extern "C" {
  34. #endif
  35. /*
  36. * There are three basic structures employed: the PHTableEntry, the
  37. * PHTableGroup, and the PatHashTable. Ideally, these would be completely
  38. * hidden from the client, but the macro GetReferenceFromSpecificPatternHandle
  39. * requires knowledge of the structure's definition. It is strongly urged
  40. * that the client not directly refer to any of the fields of either of these
  41. * structures. To support the documentation of the accompanying pathash.c
  42. * file, these structures are annotated with internal comments, but these can
  43. * be ignored by the reader who wishes only to understand how to write client
  44. * code for the pat-hash table.
  45. *
  46. * The values stored in the pat-hash table are known as specific patterns,
  47. * where the term "specific" implies that the patterns do not contain
  48. * wildcards. The client refers to a pattern by its SpecificPatternHandle.
  49. * This is typedefed to a pointer to PHTableEntry, but this fact should be
  50. * ignored by the client, since it is an implementation detail.
  51. *
  52. */
  53. //#include <stdlib.h>
  54. //#include <malloc.h>
  55. struct _PHTableEntry
  56. {
  57. // This is the element in which a specific pattern is stored. It is both
  58. // a component of a hash chain (linked list) that is indexed by a hash
  59. // table and also a component of a Patricia tree.
  60. // hash table fields:
  61. unsigned int hash; // hash value
  62. struct _PHTableEntry *next; // pointer to next entry in linked list
  63. // Patricia tree fields
  64. int pivot_bit; // bit of key on which to branch
  65. struct _PHTableEntry *children[2]; // pointers to child nodes
  66. // general:
  67. void *reference; // reference value supplied by client
  68. char value[1]; // space for storing pattern value
  69. };
  70. typedef struct _PHTableEntry PHTableEntry;
  71. struct _PHTableGroup
  72. {
  73. // The hash table that indexes the hash chain of entries is itself a
  74. // linked list of structures called groups. Each group is a table of
  75. // pointers to the hash chains of entries, and the group also contains
  76. // a pointer to the previous group, meaning that the groups are backwardly
  77. // linked. The groups are sized in powers of two, so, in addition to one
  78. // special group of size one, there is a group of size one, a group of size
  79. // two, a group of size four, a group of size eight, and so on, up to the
  80. // number of groups necessary to hold the table.
  81. struct _PHTableGroup *previous; // pointer to immediately smaller group
  82. PHTableEntry *entry_list[1]; // space to hold table of chain pointers
  83. };
  84. typedef struct _PHTableGroup PHTableGroup;
  85. struct _PatHashTable
  86. {
  87. int keybits; // number of bits in key
  88. int keybytes; // number of bytes in key, calculated from keybits
  89. int usage_ratio; // desired ratio of entries to hash chains
  90. int usage_histeresis; // histeresis between insertion and removal resizes
  91. int allocation_histeresis; // histeresis between insert and removal mallocs
  92. int max_free_list_size; // maximum size of free entry list
  93. PHTableGroup *initial_group; // pointer to first group to search
  94. PHTableGroup *top_group; // pointer to largest group allocated
  95. int allocation_exponent; // binary exponent of current allocation size
  96. int size_exponent; // binary exponent of current group size
  97. int extension_size; // number of slots in use in initial group
  98. int population; // number of entries in database
  99. PHTableEntry *root; // root of Patricia tree
  100. PHTableEntry *free_list; // list of free (unused) entries
  101. int free_list_size; // number of elements currently on free list
  102. };
  103. typedef struct _PatHashTable PatHashTable;
  104. // The client uses SpecificPatternHandle to refer to values in the database.
  105. typedef PHTableEntry *SpecificPatternHandle;
  106. /*
  107. * The client interface to the pat-hash table is provided by seven functions
  108. * and two macros. It is expected that the client will first instantiate a
  109. * database, either on the stack or the heap, and then insert specific patterns
  110. * with corresponding reference information into the database. The client can
  111. * then search the database for the specific patterns that were stored, and
  112. * it can scan the database for all specific patterns that match a general
  113. * pattern containing wildcards.
  114. *
  115. */
  116. // A pat-hash table may be allocated on the stack simply by declaring a variable
  117. // of type PatHashTable. To allocate it on the heap, the following macro
  118. // returns a pointer to a new PatHashTable structure. If this macro is used, a
  119. // corresponding call to free() must be made to deallocate the structure from
  120. // the heap.
  121. //
  122. //#define NEW_PatHashTable ((PatHashTable *)malloc(sizeof(PatHashTable)))
  123. #define AllocatePatHashTable(_ph) GpcAllocMem(&_ph, \
  124. sizeof(PatHashTable), \
  125. PathHashTag)
  126. #define FreePatHashTable(_ph) GpcFreeMem(_ph,PathHashTag)
  127. // Since this is not C++, the PatHashTable structure is not self-constructing;
  128. // therefore, the following constructor code must be called on the PatHashTable
  129. // structure after it is allocated. The argument keybits specifies the size
  130. // (in bits) of each pattern that will be stored in the database. The remaining
  131. // arguments are parameters to the various control systems that govern the size
  132. // of the database.
  133. //
  134. // The usage ratio is the target ratio of database entries to discrete hash
  135. // chains, which is also the mean length of a hash chain: The minimum value
  136. // is one; a larger value slightly decreases memory utilization and
  137. // insertion/removal time at the expense of increasing search time. There is
  138. // benefit to choosing a power of two for this value. Recommended values are
  139. // 2 and 4.
  140. //
  141. // The usage histeresis is the histeresis between resizing operations due to
  142. // insertions and removals. The minimum value is zero, providing no histeresis;
  143. // in this case, if an insertion that causes a increase in table size is
  144. // immediately followed by a removal, the table size will be decreased. Thus,
  145. // a zero histeresis maintains low memory usage, but it engenders resizing
  146. // chatter if insertions and removals are frequent.
  147. //
  148. // Allocation histeresis is the histeresis between allocation and deallocation
  149. // of groups. A group is allocated immediately when it is required by a size
  150. // increase in the table, but it is not necessarily deallocated immediately
  151. // following a size decrease, if the allocation histeresis is set to a value
  152. // greater than zero. Because groups are allocated in powers of two, the
  153. // histeresis value is specified as a binary exponent. A value of 1 causes a
  154. // group to be deallocated when the table is half of the size that will cause
  155. // the group to be re-allocated. A value of 2 causes the group to be
  156. // deallocated when the table is one quarter of the size that will cause the
  157. // group to be re-allocated, and so forth.
  158. //
  159. // The maximum free list size determines the maximum number of elements that
  160. // will be placed on a free list, rather than deallocated, when they are
  161. // removed. Setting this value to zero keeps memory utilization low, but it
  162. // can result in more frequent allocations and deallocation operations, which
  163. // are expensive.
  164. //
  165. int
  166. constructPatHashTable(
  167. PatHashTable *phtable,
  168. int keybits,
  169. int usage_ratio,
  170. int usage_histeresis,
  171. int allocation_histeresis,
  172. int max_free_list_size);
  173. // Since this is not C++, the PatHashTable structure is not self-destructing;
  174. // therefore, the following destructor code must be called on the PatHashTable
  175. // structure before it is deallocated.
  176. //
  177. void
  178. destructPatHashTable(
  179. PatHashTable *phtable);
  180. // Once the PatHashTable structure has been allocated and constructed, patterns
  181. // can be inserted into the database. Each pattern is passed as an array of
  182. // bytes.
  183. //
  184. // Since the PatHashTable structure specifies the size of each pattern, it is
  185. // theoretically possible for the insert routine to digest the submitted
  186. // pattern and produce a hash value therefrom; however, general mechanisms for
  187. // accomplishing this digestion are not very efficient. Therefore, the client
  188. // is responsible for providing a digested form of its input as the chyme
  189. // parameter. If the pattern is no bigger than an unsigned int, then the chyme
  190. // can simply be equal to the pattern. If it is larger, then it should be set
  191. // to something like the exclusive-or of the pattern's fields; however, care
  192. // should be taken to ensure that two patterns are not likely to digest to the
  193. // same chyme value, since this will substantially decrease the efficiency of
  194. // the hash table. One common way of accomplishing this is by rotating the
  195. // fields by varying amounts prior to the exclusive-or.
  196. //
  197. // The client also specifies a reference value, as a void pointer, that it
  198. // wishes to associate with this pattern. When the pattern is installed, the
  199. // insert routine returns a pointer to a SpecificPatternHandle. From the
  200. // SpecificPatternHandle can be gotten the reference value via the macro
  201. // GetReferenceFromSpecificPatternHandle.
  202. //
  203. // If the submitted pattern has already been installed in the database, then
  204. // the insertion does not occur, and the SpecificPatternHandle of the
  205. // previously installed pattern is returned.
  206. //
  207. SpecificPatternHandle
  208. insertPatHashTable(
  209. PatHashTable *phtable,
  210. char *pattern,
  211. unsigned int chyme,
  212. void *reference);
  213. // This function removes a pattern from the pat-hash table. The pattern is
  214. // specified by the SpecificPatternHandle that was returned by the insert
  215. // routine. No checks are performed to insure that this is a valid handle.
  216. //
  217. void
  218. removePatHashTable(
  219. PatHashTable *phtable,
  220. SpecificPatternHandle sphandle);
  221. // This function searches the database for the specific pattern that matches
  222. // the given key, which is passed as an array of bytes. If a match is found,
  223. // the SpecificPatternHandle of that matching specific pattern is returned.
  224. // From the SpecificPatternHandle can be gotten the reference value via the
  225. // macro GetReferenceFromSpecificPatternHandle. If no match is found, then a
  226. // value of 0 is returned as the SpecificPatternHandle.
  227. //
  228. // As with the insert routine, the client is expected to provide a digested
  229. // form of the key as the chyme argument to the routine. This chyme value
  230. // must be calculated in the exact same way for the search routine as it is
  231. // for the insert routine; otherwise, the search will not be able to find the
  232. // matching pattern.
  233. //
  234. SpecificPatternHandle
  235. searchPatHashTable(
  236. PatHashTable *phtable,
  237. char *key,
  238. unsigned int chyme);
  239. // The scan routine (described below) requires the client to supply a callback
  240. // function to be called for each specific pattern that matches the supplied
  241. // general pattern. The following typedef defines the ScanCallback function
  242. // pointer, which specifies the prototype of the callback function that the
  243. // client must provide. The client's callback function must accept a void
  244. // pointer (which is a client-supplied context) and a SpecificPatternHandle.
  245. // The return type of the client's callback function is void.
  246. //
  247. typedef void (*ScanCallback)(void *, SpecificPatternHandle);
  248. // This function searches the database for all specific patterns that match a
  249. // given general pattern. The general pattern is specified by a value and a
  250. // mask. Each bit of the mask determines whether the bit position is specified
  251. // or is a wildcard: A 1 in a mask bit indicates that the value of that bit is
  252. // specified by the general pattern; a 0 indicates that the value of that bit
  253. // is a wildcard. If a mask bit is 1, then the corresponding bit in the value
  254. // field indicates the specified value of that bit. Value and mask fields are
  255. // passed as arrays of bytes.
  256. //
  257. // For each specific pattern in the database that matches the supplied general
  258. // pattern, a client-supplied callback function is called with the
  259. // SpecificPatternHandle of the matching specific pattern. This callback
  260. // function is also passed a context (as a void pointer) that is supplied by
  261. // the client in the call to the scan routine.
  262. //
  263. void
  264. scanPatHashTable(
  265. PatHashTable *phtable,
  266. char *value,
  267. char *mask,
  268. void *context,
  269. ScanCallback func);
  270. // To get the client-supplied reference value from a SpecificPatternHandle, the
  271. // following macro should be used. The client should not make assumptions
  272. // about the details of the PHTableEntry structure, nor should it even assume
  273. // that the SpecificPatternHandle is a pointer to a PHTableEntry.
  274. // Also, get the key pointer (value)
  275. //
  276. #define GetReferenceFromSpecificPatternHandle(sphandle) (sphandle)->reference
  277. #define GetKeyPtrFromSpecificPatternHandle(sphandle) (sphandle)->value
  278. // As described above in the comments on the constructor, if the allocation
  279. // histeresis is non-zero, then the groups will not be deallocated as soon as
  280. // they can be. Similarly, if max free list size is non-zero, then entries
  281. // will not be deallocated as soon as they can be. Thus, unused pieces of
  282. // memory may accumulate, up to a limit. If the client wishes to force the
  283. // pat-hash table to release all of the memory that it currently can, then it
  284. // should call the flush routine, which will deallocate all unneeded groups
  285. // and entries.
  286. //
  287. void
  288. flushPatHashTable(
  289. PatHashTable *phtable);
  290. #ifdef __cplusplus
  291. }
  292. #endif
  293. #endif /* _INC_PATHASH */