Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

684 lines
15 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (c) 1997, Microsoft Corp. All rights reserved.
  4. //
  5. // FILE
  6. //
  7. // hashtbl.h
  8. //
  9. // SYNOPSIS
  10. //
  11. // This file describes the hash_table template class.
  12. //
  13. // MODIFICATION HISTORY
  14. //
  15. // 09/23/1997 Original version.
  16. //
  17. ///////////////////////////////////////////////////////////////////////////////
  18. #ifndef _HASHTBL_H_
  19. #define _HASHTBL_H_
  20. #include <algorithm>
  21. #include <functional>
  22. #include <string>
  23. #include <iasapi.h>
  24. #include <nocopy.h>
  25. //////////
  26. // TEMPLATE STRUCT identity
  27. //////////
  28. template<class _Ty>
  29. struct identity : std::unary_function<_Ty, _Ty>
  30. {
  31. _Ty operator()(const _Ty& _X) const
  32. {
  33. return _X;
  34. }
  35. };
  36. ///////////////////////////////////////////////////////////////////////////////
  37. //
  38. // CLASS
  39. //
  40. // Caster<Type1, Type2>
  41. //
  42. // DESCRIPTION
  43. //
  44. // Function class that casts references from Type1 to Type2. Used for
  45. // the hash table default parameters.
  46. //
  47. ///////////////////////////////////////////////////////////////////////////////
  48. template <class Type1, class Type2>
  49. class Caster : public std::unary_function<Type1, const Type2&>
  50. {
  51. public:
  52. Caster() {}
  53. const Type2& operator()(const Type1& X) const
  54. {
  55. return X;
  56. }
  57. };
  58. ///////////////////////////////////////////////////////////////////////////////
  59. //
  60. // CLASS
  61. //
  62. // ExtractFirst<T>
  63. //
  64. // DESCRIPTION
  65. //
  66. // Function class that extracts the first item from a pair. Useful for
  67. // setting up an STL style map where the first item in the pair is the key.
  68. //
  69. ///////////////////////////////////////////////////////////////////////////////
  70. template <class T>
  71. class ExtractFirst : public std::unary_function<T, const T::first_type&>
  72. {
  73. public:
  74. const T::first_type& operator()(const T& X) const
  75. {
  76. return X.first;
  77. }
  78. };
  79. //////////
  80. // I'm putting all the hash functions inside a namespace since hash
  81. // is such a common identifier.
  82. //////////
  83. namespace hash_util
  84. {
  85. ///////////////////////////////////////////////////////////////////////////////
  86. //
  87. // FUNCTION
  88. //
  89. // hash(const std::basic_string<E>& str)
  90. //
  91. // DESCRIPTION
  92. //
  93. // Function to compute a hash value for an STL string.
  94. //
  95. ///////////////////////////////////////////////////////////////////////////////
  96. template <class E>
  97. inline ULONG hash(const std::basic_string<E>& key)
  98. {
  99. return IASHashBytes((CONST BYTE*)key.data(), key.length() * sizeof(E));
  100. }
  101. ///////////////////////////////////////////////////////////////////////////////
  102. //
  103. // FUNCTION
  104. //
  105. // hash(ULONG key)
  106. //
  107. // and
  108. //
  109. // hash(LONG key)
  110. //
  111. // DESCRIPTION
  112. //
  113. // Functions to compute a hash value for a 32-bit integer.
  114. // Uses Robert Jenkins' 32-bit mix function.
  115. //
  116. ///////////////////////////////////////////////////////////////////////////////
  117. inline ULONG hash(ULONG key)
  118. {
  119. key += (key << 12);
  120. key ^= (key >> 22);
  121. key += (key << 4);
  122. key ^= (key >> 9);
  123. key += (key << 10);
  124. key ^= (key >> 2);
  125. key += (key << 7);
  126. key ^= (key >> 12);
  127. return key;
  128. }
  129. inline ULONG hash(LONG key)
  130. {
  131. return hash((ULONG)key);
  132. }
  133. ///////////////////////////////////////////////////////////////////////////////
  134. //
  135. // FUNCTION
  136. //
  137. // hash(const T* key)
  138. //
  139. // DESCRIPTION
  140. //
  141. // Function to compute a hash value for a pointer.
  142. // Implements Knuth's multiplicative hash with a bit shift to account for
  143. // address alignment.
  144. //
  145. ///////////////////////////////////////////////////////////////////////////////
  146. template <class T>
  147. inline ULONG hash(const T* key)
  148. {
  149. return 2654435761 * ((unsigned long)key >> 3);
  150. }
  151. //////////
  152. // Overloadings of the above to hash strings.
  153. //////////
  154. inline ULONG hash<char>(const char* key)
  155. {
  156. return IASHashBytes((CONST BYTE*)key,
  157. key ? strlen(key) : 0);
  158. }
  159. inline ULONG hash<wchar_t>(const wchar_t* key)
  160. {
  161. return IASHashBytes((CONST BYTE*)key,
  162. key ? wcslen(key) * sizeof(wchar_t) : 0);
  163. }
  164. ///////////////////////////////////////////////////////////////////////////////
  165. //
  166. // CLASS
  167. //
  168. // Hasher
  169. //
  170. // DESCRIPTION
  171. //
  172. // Function class that uses the 'default' hash functions defined above.
  173. //
  174. ///////////////////////////////////////////////////////////////////////////////
  175. template <class _Ty>
  176. struct Hasher
  177. : public std::unary_function<_Ty, ULONG>
  178. {
  179. ULONG operator()(const _Ty& _X) const
  180. {
  181. return hash(_X);
  182. }
  183. };
  184. ///////////////////////////////////////////////////////////////////////////////
  185. //
  186. // CLASS
  187. //
  188. // ObjectHasher
  189. //
  190. // DESCRIPTION
  191. //
  192. // Function class that invokes a bound 'hash' method.
  193. //
  194. ///////////////////////////////////////////////////////////////////////////////
  195. template <class _Ty>
  196. struct ObjectHasher
  197. : public std::unary_function<_Ty, ULONG>
  198. {
  199. ULONG operator()(const _Ty& _X) const
  200. {
  201. return _X.hash();
  202. }
  203. };
  204. } // hash_util
  205. ///////////////////////////////////////////////////////////////////////////////
  206. //
  207. // CLASS
  208. //
  209. // hash_table<Key, Hasher, Value, Extractor, KeyMatch>
  210. //
  211. // DESCRIPTION
  212. //
  213. // Implements a general-purpose hash table. This can implement a map, a
  214. // set, or a hybrid depending on how Key, Value, and Extractor are
  215. // specified. Note that the default parameters for Value and Extractor
  216. // implement a set.
  217. //
  218. // NOTES
  219. //
  220. // Although I used similar nomenclature, this is not an STL collection.
  221. // In particular, the iterator does not conform to the STL guidelines.
  222. //
  223. // This class is not thread safe.
  224. //
  225. ///////////////////////////////////////////////////////////////////////////////
  226. template <
  227. class Key,
  228. class Hasher = hash_util::ObjectHasher<Key>,
  229. class Value = Key,
  230. class Extractor = Caster<Value, Key>,
  231. class KeyMatch = std::equal_to<Key>
  232. >
  233. class hash_table : NonCopyable
  234. {
  235. public:
  236. typedef hash_table<Key, Hasher, Value, Extractor, KeyMatch> table_type;
  237. typedef Key key_type;
  238. typedef Value value_type;
  239. protected:
  240. //////////
  241. // Singly-linked list node.
  242. //////////
  243. struct Node
  244. {
  245. Node* next; // Next node in the list (is NULL for last item).
  246. value_type value; // Value stored in this node.
  247. Node(const value_type& _V) : value(_V) {}
  248. // Erase the node immediately following this.
  249. void erase_next()
  250. {
  251. Node* node = next;
  252. next = next->next;
  253. delete node;
  254. }
  255. };
  256. //////////
  257. //
  258. // Singly-linked list. This is not intended to be a general-purpose class;
  259. // it is only intended to serve as a bucket in a hash table.
  260. //
  261. // Note: I have intentionally NOT deleted the list nodes in the destructor.
  262. // This is to support the hash_table grow() method.
  263. //
  264. //////////
  265. struct SList
  266. {
  267. Node* head; // The first node in the list (if any).
  268. SList() : head(NULL) {}
  269. // Delete all nodes in the list.
  270. void clear()
  271. {
  272. while (head) pop_front();
  273. }
  274. // Remove a node from the front of the list.
  275. void pop_front()
  276. {
  277. ((Node*)&head)->erase_next();
  278. }
  279. // Add a node to the front of the list.
  280. void push_front(Node* node)
  281. {
  282. node->next = head;
  283. head = node;
  284. }
  285. };
  286. public:
  287. //////////
  288. //
  289. // Hash table iterator.
  290. //
  291. // Note: This iterator is NOT safe. If the hash table is resized, the
  292. // iterator will no longer be valid.
  293. //
  294. //////////
  295. class const_iterator
  296. {
  297. public:
  298. const_iterator(SList* _first, SList* _end)
  299. : node(_first->head), bucket(_first), end(_end)
  300. {
  301. find_node();
  302. }
  303. const value_type& operator*() const
  304. {
  305. return node->value;
  306. }
  307. const value_type* operator->() const
  308. {
  309. return &**this;
  310. }
  311. void operator++()
  312. {
  313. node = node->next;
  314. find_node();
  315. }
  316. bool more() const
  317. {
  318. return bucket != end;
  319. }
  320. protected:
  321. friend table_type;
  322. Node* MyNode() const
  323. {
  324. return node;
  325. }
  326. // Advance until we're on a node or we've reached the end.
  327. void find_node()
  328. {
  329. while (!node && ++bucket != end)
  330. {
  331. node = bucket->head;
  332. }
  333. }
  334. Node* node; // The node under the iterator.
  335. SList* bucket; // The current bucket.
  336. SList* end; // The end of the bucket array.
  337. };
  338. typedef const_iterator iterator;
  339. //////////
  340. // Constructor.
  341. //////////
  342. hash_table(size_t size = 16,
  343. const Hasher& h = Hasher(),
  344. const Extractor& e = Extractor(),
  345. const KeyMatch& k = KeyMatch())
  346. : buckets(1),
  347. entries(0),
  348. hasher(h),
  349. extractor(e),
  350. key_match(k)
  351. {
  352. // Set buckets to smallest power of 2 greater than or equal to size.
  353. while (buckets < size) buckets <<= 1;
  354. table = new SList[buckets];
  355. // Calculate the hash mask.
  356. mask = buckets - 1;
  357. }
  358. //////////
  359. // Destructor.
  360. //////////
  361. ~hash_table()
  362. {
  363. clear();
  364. delete[] table;
  365. }
  366. //////////
  367. // Return an iterator positioned at the start of the hash table.
  368. //////////
  369. const_iterator begin() const
  370. {
  371. return const_iterator(table, table + buckets);
  372. }
  373. //////////
  374. // Clear all entries from the hash table.
  375. //////////
  376. void clear()
  377. {
  378. if (!empty())
  379. {
  380. for (size_t i=0; i<buckets; i++)
  381. {
  382. table[i].clear();
  383. }
  384. entries = 0;
  385. }
  386. }
  387. bool empty() const
  388. {
  389. return entries == 0;
  390. }
  391. //////////
  392. // Erase all entries matching the given key. Returns the number of entries
  393. // erased.
  394. //////////
  395. size_t erase(const key_type& key)
  396. {
  397. size_t erased = 0;
  398. Node* node = (Node*)&(get_bucket(key).head);
  399. while (node->next)
  400. {
  401. if (key_match(extractor(node->next->value), key))
  402. {
  403. node->erase_next();
  404. ++erased;
  405. }
  406. else
  407. {
  408. node = node->next;
  409. }
  410. }
  411. entries -= erased;
  412. return erased;
  413. }
  414. //////////
  415. // Erases the entry under the current iterator.
  416. //////////
  417. void erase(iterator& it)
  418. {
  419. // Only look in the bucket indicated by the iterator.
  420. Node* node = (Node*)&(it.bucket->head);
  421. while (node->next)
  422. {
  423. // Look for a pointer match -- not a key match.
  424. if (node->next == it.node)
  425. {
  426. // Advance the iterator to a valid node ...
  427. ++it;
  428. // ... then delete the current one.
  429. node->erase_next();
  430. break;
  431. }
  432. node = node->next;
  433. }
  434. }
  435. //////////
  436. // Search the hash table for the first entry matching key.
  437. //////////
  438. const value_type* find(const key_type& key) const
  439. {
  440. return search_bucket(get_bucket(key), key);
  441. }
  442. //////////
  443. // Insert a new entry into the hash table ONLY if the key is unique. Returns
  444. // true if successful, false otherwise.
  445. //////////
  446. bool insert(const value_type& value)
  447. {
  448. reserve_space();
  449. SList& b = get_bucket(extractor(value));
  450. if (search_bucket(b, extractor(value))) return false;
  451. b.push_front(new Node(value));
  452. add_entry();
  453. return true;
  454. }
  455. //////////
  456. // Insert a new entry into the hash table without checking uniqueness.
  457. //////////
  458. void multi_insert(const value_type& value)
  459. {
  460. reserve_space();
  461. get_bucket(extractor(value)).push_front(new Node(value));
  462. add_entry();
  463. }
  464. //////////
  465. // Inserts the entry if the key is unique. Otherwise, overwrites the first
  466. // entry found with a matching key. Returns true if an entry was
  467. // overwritten, false otherwise.
  468. //////////
  469. bool overwrite(const value_type& value)
  470. {
  471. reserve_space();
  472. SList& b = get_bucket(extractor(value));
  473. const value_type* existing = search_bucket(b, extractor(value));
  474. if (existing)
  475. {
  476. // We can get away with modifying the value in place, because we
  477. // know the hash value must be the same. I destroy the old value
  478. // and construct a new one inplace, so that Value doesn't need an
  479. // assignment operator.
  480. existing->~value_type();
  481. new ((void*)existing) value_type(value);
  482. return true;
  483. }
  484. b.push_front(new Node(value));
  485. add_entry();
  486. return false;
  487. }
  488. //////////
  489. // Return the number of entries in the hash table.
  490. //////////
  491. size_t size() const
  492. {
  493. return entries;
  494. }
  495. protected:
  496. //////////
  497. // Increment the entries count.
  498. //////////
  499. void add_entry()
  500. {
  501. ++entries;
  502. }
  503. //////////
  504. // Grow the hash table as needed. We have to separate reserve_space and
  505. // add_entry to make the collection exception safe (since there will be
  506. // an intervening new).
  507. //////////
  508. void reserve_space()
  509. {
  510. if (entries >= buckets) grow();
  511. }
  512. //////////
  513. // Return the bucket for a given key.
  514. //////////
  515. SList& get_bucket(const key_type& key) const
  516. {
  517. return table[hasher(key) & mask];
  518. }
  519. //////////
  520. // Increase the capacity of the hash table.
  521. //////////
  522. void grow()
  523. {
  524. // We must allocate the memory first to be exception-safe.
  525. SList* newtbl = new SList[buckets << 1];
  526. // Initialize an iterator for the old table ...
  527. const_iterator i = begin();
  528. // ... then swap in the new table.
  529. std::swap(table, newtbl);
  530. buckets <<= 1;
  531. mask = buckets - 1;
  532. // Iterate through the old and insert the entries into the new.
  533. while (i.more())
  534. {
  535. Node* node = i.MyNode();
  536. // Increment the iterator ...
  537. ++i;
  538. // ... before we clobber the node's next pointer.
  539. get_bucket(extractor(node->value)).push_front(node);
  540. }
  541. // Delete the old table.
  542. delete[] newtbl;
  543. }
  544. //////////
  545. // Search a bucket for a specified key.
  546. //////////
  547. const value_type* search_bucket(SList& bucket, const key_type& key) const
  548. {
  549. Node* node = bucket.head;
  550. while (node)
  551. {
  552. if (key_match(extractor(node->value), key))
  553. {
  554. return &node->value;
  555. }
  556. node = node->next;
  557. }
  558. return NULL;
  559. }
  560. size_t buckets; // The number of buckets in the hash table.
  561. size_t mask; // Bit mask used for reducing hash values.
  562. size_t entries; // The number of entries in the hash table.
  563. SList* table; // An array of buckets.
  564. Hasher hasher; // Used to hash keys.
  565. Extractor extractor; // Used to convert values to keys.
  566. KeyMatch key_match; // Used to test keys for equality.
  567. };
  568. #endif // _HASHTBL_H_