Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4383 lines
106 KiB

  1. #ifndef _DHT_HXX_INCLUDED
  2. #define _DHT_HXX_INCLUDED
  3. #pragma warning ( disable : 4200 ) // we allow zero sized arrays
  4. // asserts
  5. //
  6. // #define DHTAssert to point to your favorite assert function per #include
  7. #ifdef DHTAssert
  8. #else // !DHTAssert
  9. #define DHTAssert Assert
  10. #endif // DHTAssert
  11. #include <sync.hxx>
  12. #ifdef DEBUG
  13. // turns on unique names for bucket reader/writer locks (adds 60 bytes per BUCKET)
  14. #define UNIQUE_BUCKET_NAMES
  15. #ifdef UNIQUE_BUCKET_NAMES
  16. #include <stdio.h>
  17. #endif // UNIQUE_BUCKET_NAMES
  18. #endif
  19. namespace DHT {
  20. /////////////////////////////////////////////////////////////////////////////////////////
  21. // CDynamicHashTable
  22. //
  23. // Implements a dynamically resizable hash table of entries stored using a unique key
  24. //
  25. // CKey = class representing keys used to identify entries in the hash table
  26. // CEntry = class representing entries stored in the hash table
  27. // (required copy-constructor)
  28. template< class CKey, class CEntry >
  29. class CDynamicHashTable
  30. {
  31. public:
  32. // counter type (uses native word size of machine)
  33. typedef ULONG_PTR NativeCounter;
  34. // class controlling the Key and Entry for each entry in the hash table
  35. //
  36. // NOTE: All member functions must be defined by the user per instance
  37. // of this template. These functions must be defined after the
  38. // template definition. Declaring these functions to be inline
  39. // will allow full optimization by the compiler!
  40. class CKeyEntry
  41. {
  42. public:
  43. // produces the hash value for the specified key. this hash
  44. // function should produce numbers as uniformly as possible over
  45. // as large a range as possible for good performance
  46. static NativeCounter Hash( const CKey& key );
  47. // produces the hash value for this entry's key. this hash
  48. // function should produce the same number as the above function
  49. // for the same key
  50. NativeCounter Hash() const;
  51. // returns fTrue if this entry matches the given key. this way,
  52. // the key doesn't necessarily have to be stored in the hash table
  53. // entry
  54. //
  55. // e.g.: CEntry can be PBF and key can be IFMP/PGNO where the
  56. // actual IFMP/PGNO is stored in the BF structure. this would
  57. // ruin cache locality, of course, but it would use less memory
  58. //
  59. // note that the entry could also contain some kind of hash value
  60. // for the key allowing some weeding out of entries before jumping
  61. // off to the full structure for a full comparison. an example
  62. // of this would be the SPAIRs from SORT
  63. BOOL FEntryMatchesKey( const CKey& key ) const;
  64. // sets the contained entry to the given entry
  65. void SetEntry( const CEntry& entry );
  66. // gets the contained entry
  67. void GetEntry( CEntry* const pentry ) const;
  68. public:
  69. CEntry m_entry;
  70. ~CKeyEntry(); // not allowed
  71. private:
  72. CKeyEntry(); // not allowed
  73. CKeyEntry *operator =( const CKeyEntry & ); // not allowed
  74. };
  75. // API Error Codes
  76. enum ERR
  77. {
  78. errSuccess, // success
  79. errOutOfMemory, // not enough memory
  80. errInvalidParameter, // bad argument to function
  81. errEntryNotFound, // entry was not found
  82. errNoCurrentEntry, // currency is invalid
  83. errKeyDuplicate, // cannot insert because key already exists
  84. };
  85. // API Lock Context
  86. class CLock;
  87. public:
  88. CDynamicHashTable( const NativeCounter rankDHTrwlBucket );
  89. ~CDynamicHashTable();
  90. ERR ErrInit( const double dblLoadFactor,
  91. const double dblUniformity,
  92. const NativeCounter cBucketMinimum = 0 );
  93. void Term();
  94. void ReadLockKey( const CKey& key, CLock* const plock );
  95. void ReadUnlockKey( CLock* const plock );
  96. void WriteLockKey( const CKey& key, CLock* const plock );
  97. void WriteUnlockKey( CLock* const plock );
  98. ERR ErrRetrieveEntry( CLock* const plock, CEntry* const pentry );
  99. ERR ErrReplaceEntry( CLock* const plock, const CEntry& entry );
  100. ERR ErrInsertEntry( CLock* const plock, const CEntry& entry );
  101. ERR ErrDeleteEntry( CLock* const plock );
  102. void BeginHashScan( CLock* const plock );
  103. void BeginHashScanFromKey( const CKey& key, CLock* const plock );
  104. ERR ErrMoveNext( CLock* const plock, BOOL* const pfNewBucket = NULL );
  105. void EndHashScan( CLock* const plock );
  106. #ifdef DHT_STATS
  107. long CBucketOverflow() const { return m_cBucketOverflowInsert; }
  108. long CBucketSplit() const { return m_cBucketSplit; }
  109. long CBucketMerge() const { return m_cBucketMerge; }
  110. long CDirectorySplit() const { return m_cDirSplit; }
  111. long CDirectoryMerge() const { return m_cDirMerge; }
  112. long CStateTransition() const { return m_cTransition; }
  113. long CPolicySelection() const { return m_cSelection; }
  114. long CSplitContend() const { return m_cSplitContend; }
  115. long CMergeContend() const { return m_cMergeContend; }
  116. #else // !DHT_STATS
  117. long CBucketOverflow() const { return 0; }
  118. long CBucketSplit() const { return 0; }
  119. long CBucketMerge() const { return 0; }
  120. long CDirectorySplit() const { return 0; }
  121. long CDirectoryMerge() const { return 0; }
  122. long CStateTransition() const { return 0; }
  123. long CPolicySelection() const { return 0; }
  124. long CSplitContend() const { return 0; }
  125. long CMergeContend() const { return 0; }
  126. #endif // DHT_STATS
  127. private:
  128. // possible states for the hash-table
  129. //
  130. // DANGER! DANGER! DANGER WILL ROBINSON!
  131. //
  132. // DO NOT CHANGE THE ENUMATION VALUES! CODE IS DEPENDANT ON THEM BEING AS THEY ARE!
  133. // (specifically, I do "stateCur >> 4" to test for 0x10000 so I can see if we are splitting)
  134. //
  135. // DANGER! DANGER! DANGER WILL ROBINSON!
  136. enum ENUMSTATE
  137. {
  138. stateNil = 0,
  139. stateShrinkFromGrow = 1,
  140. stateShrinkFromGrow2 = 2,
  141. stateGrowFromShrink = 3,
  142. stateGrowFromShrink2 = 4,
  143. stateSplitFromGrow = 5,
  144. stateSplitFromGrow2 = 6,
  145. stateGrowFromSplit = 7,
  146. stateGrowFromSplit2 = 8,
  147. stateMergeFromShrink = 9,
  148. stateMergeFromShrink2 = 10,
  149. stateShrinkFromMerge = 11,
  150. stateShrinkFromMerge2 = 12,
  151. stateUnused = 13,
  152. stateGrow = 14,
  153. stateShrink = 15,
  154. stateSplit = 16,
  155. stateMerge = 17,
  156. };
  157. // Constants
  158. enum { cbitByte = 8 }; // bits per byte
  159. enum { cbitNativeCounter = sizeof( NativeCounter ) * cbitByte }; // bits per NativeCounter
  160. // BUCKET
  161. //
  162. // - this is the individual unit of allocation for each logical bucket
  163. // - each BUCKET contains several CKeyEntry objects packed together
  164. // - BUCKETs are chained together to make up the entire logical bucket
  165. struct BUCKET
  166. {
  167. public:
  168. // read-write-lock/prev-ptr
  169. // in the primary BUCKET (allocated as a part of an array), this is the read-write-lock
  170. // in secondary BUCKETs, this is the prev-ptr for reverse traversal
  171. union
  172. {
  173. BYTE m_rgbRWL[ sizeof( OSSYNC::CReaderWriterLock ) ];
  174. BUCKET *m_pBucketPrev;
  175. };
  176. // next/end pointer
  177. // when this points outside of the array of buckets, it points to the next BUCKET
  178. // when this points inside of the array of buckets, it points to the first free entry
  179. union
  180. {
  181. BYTE *m_pb;
  182. BUCKET *m_pBucketNext;
  183. CKeyEntry *m_pEntryLast;
  184. };
  185. // array of entries (it will contain 'load-factor' entries)
  186. CKeyEntry m_rgEntry[];
  187. public:
  188. // return the properly typed CReaderWriterLock
  189. OSSYNC::CReaderWriterLock& CRWL() const
  190. {
  191. return (OSSYNC::CReaderWriterLock &)m_rgbRWL;
  192. }
  193. };
  194. typedef BUCKET* PBUCKET;
  195. // BUCKETPool
  196. //
  197. // pool of BUCKET structures (reservation system for bucket split/merge)
  198. class BUCKETPool
  199. {
  200. public:
  201. PBUCKET m_pReserve; // list of BUCKET structures available for reservation
  202. long m_cReserve; // number of BUCKET structures available to be reserved
  203. OSSYNC::CSemaphore m_semReserve; // protection for reservation ptrs
  204. #ifdef _WIN64
  205. BYTE m_rgbRsvd[ 40 ];
  206. #else // !_WIN64
  207. BYTE m_rgbRsvd[ 20 ];
  208. #endif // _WIN64
  209. public:
  210. BUCKETPool()
  211. : m_semReserve( CSyncBasicInfo( "CDynamicHashTable::BUCKETPool::m_semReserve" ) )
  212. {
  213. // initialize vars
  214. m_pReserve = NULL;
  215. m_cReserve = 0;
  216. // prepare the semaphore to have 1 owner
  217. m_semReserve.Release();
  218. #ifdef DEBUG
  219. memset( m_rgbRsvd, 0, sizeof( m_rgbRsvd ) );
  220. #endif // DEBUG
  221. }
  222. // terminate
  223. ~BUCKETPool()
  224. {
  225. while ( m_pReserve )
  226. {
  227. PBUCKET pBucket;
  228. pBucket = m_pReserve;
  229. m_pReserve = m_pReserve->m_pBucketNext;
  230. MEMFree( pBucket );
  231. }
  232. m_cReserve = 0;
  233. }
  234. // reserve a BUCKET structure
  235. // "allocate" a bucket from the list by decrementing the counter of available buckets
  236. // if the counter went below zero, we need add a bucket to the list now (or fail)
  237. // to make sure we can honor the request later
  238. BOOL FPOOLReserve( const NativeCounter cbBucket )
  239. {
  240. // reserve a bucket using the counter
  241. if ( AtomicDecrement( (long*)&m_cReserve ) >= 0 )
  242. {
  243. return fTrue;
  244. }
  245. // reserve a bucket from the heap
  246. else
  247. {
  248. return FPOOLReserve_( cbBucket );
  249. }
  250. }
  251. BOOL FPOOLReserve_( const NativeCounter cbBucket )
  252. {
  253. // at this point, we need to increment m_cReserve for 1 of 2 reasons:
  254. // the allocation will succeed and we will add the new bucket to the list
  255. // the allocation will fail and we can't leave without "deallocating" the bucket
  256. AtomicIncrement( (long*)&m_cReserve );
  257. // we need to allocate a bucket and add it to the list (to back the reservation we want)
  258. const PBUCKET pBucket = PBUCKET( PvMEMAlloc( cbBucket ) );
  259. if ( pBucket )
  260. {
  261. // add the bucket to the list
  262. m_semReserve.Acquire();
  263. pBucket->m_pBucketNext = m_pReserve;
  264. m_pReserve = pBucket;
  265. m_semReserve.Release();
  266. // reservation succeeded
  267. return fTrue;
  268. }
  269. // the allocation failed so the reservation cannot succeed
  270. return fFalse;
  271. }
  272. // commit a reservation
  273. BUCKET *PbucketPOOLCommit()
  274. {
  275. PBUCKET pBucketReserve;
  276. // assign a bucket to the reservation
  277. m_semReserve.Acquire();
  278. pBucketReserve = m_pReserve;
  279. DHTAssert( pBucketReserve );
  280. m_pReserve = m_pReserve->m_pBucketNext;
  281. m_semReserve.Release();
  282. // return the bucket
  283. return pBucketReserve;
  284. }
  285. // release the reservation
  286. void POOLUnreserve()
  287. {
  288. // "deallocate" the bucket that was previously reserved
  289. AtomicIncrement( (long*)&m_cReserve );
  290. }
  291. };
  292. // HOTSTUFF
  293. //
  294. // "hot" elements of the hash-table (hashed to array of size 2*cProcessor elems)
  295. //
  296. // 32 bytes on WIN32
  297. // 64 bytes on WIN64
  298. //
  299. struct HOTSTUFF
  300. {
  301. public:
  302. NativeCounter m_cEntry; // counter for entries
  303. NativeCounter m_cOp; // counter for inserts/deletes
  304. OSSYNC::CMeteredSection m_cms; // metered section for changing states
  305. #ifdef _WIN64
  306. BYTE m_rgbRsvd[ 24 ]; // alignment padding
  307. #else // !_WIN64
  308. BYTE m_rgbRsvd[ 12 ]; // alignment padding
  309. #endif // _WIN64
  310. BUCKETPool m_bucketpool; // pool of BUCKET blobs
  311. HOTSTUFF()
  312. : m_cms()
  313. {
  314. m_cEntry = 0;
  315. m_cOp = 0;
  316. #ifdef DEBUG
  317. memset( m_rgbRsvd, 0, sizeof( m_rgbRsvd ) );
  318. #endif // DEBUG
  319. }
  320. };
  321. // DIRPTRS
  322. //
  323. // containment for the directory pointers
  324. // these pointers control the use of the directory itself (m_rgrgBucket)
  325. //
  326. // the hash table will always have a minimum of 2 buckets (0 and 1) in the directory
  327. //
  328. // buckets are stored in dynamically allocated arrays which are pointed to by the directory
  329. // each array is 2 times larger than the previous array (exponential growth)
  330. // e.g. the Nth array (m_rgrgBucket[N]) contains 2^N contiguous buckets
  331. // NOTE: the 0th array is special in that it contains an extra element making its total 2 elements
  332. // (normally, 2^0 == 1 element; this is done for magical reasons to be explained later)
  333. // thus, the total number of entries for a given N is:
  334. // N
  335. // 1 + SUM 2^i --> 1 + [ 2^(N+1) - 1 ] --> 2^(N+1)
  336. // i=0
  337. //
  338. // we know the total number of distinct hash values is a power of 2 (it must fit into a NativeCounter)
  339. // we can represent this with 2^M where M is the number of bits in a NativeCounter
  340. // therefore, assuming the above system of exponential growth,
  341. // we know that we can store the total number of hash buckets required at any given time so long as N = M
  342. // in other words, N = # of bits in NativeCounter --> sizeof( NativeCounter ) * 8
  343. //
  344. // therefore, we can statically allocate the array of bucket arrays
  345. // and, we can use LOG2 to compute the bucket address of any given hash value
  346. // (exceptions: DIRILog2( 0 ) => 0, 0 and DIRILog2( 1 ) => 0, 1)
  347. //
  348. // for an explaination of m_cBucketMax and m_cBucket you should read the paper on
  349. // Dynamic Hashing by Per Ake Larson
  350. //
  351. // 160 bytes on WIN32 (5 cache lines)
  352. // 320 bytes on WIN64 (10 cache lines)
  353. struct DIRPTRS
  354. {
  355. NativeCounter m_cBucketMax; // half-way to last bucket in split iteration (2^(n-1))
  356. NativeCounter m_cBucket; // destination of next split (0 to 2^(n-1)), must add to m_cBucketMax
  357. #ifdef _WIN64
  358. BYTE m_rgbRsvd[ 16 ]; // alignment padding
  359. #else // !_WIN64
  360. BYTE m_rgbRsvd[ 8 ]; // alignment padding
  361. #endif // _WIN64
  362. };
  363. // CLock
  364. //
  365. // - lock context for read/write/scan operations on the hash-table
  366. // - tracks currency within a bucket
  367. // - access is restricted to the dynamic-hash-table
  368. public:
  369. class CLock
  370. {
  371. friend class CDynamicHashTable< CKey, CEntry >;
  372. public:
  373. // possible states for a lock context (class CLock)
  374. enum ENUMLOCKSTATE
  375. {
  376. lsNil = 0, // lock is not used
  377. lsRead = 1, // lock is being used to read a particular CKeyEntry object
  378. lsWrite = 2, // lock is being used to write a particular CKeyEntry object
  379. lsScan = 3, // lock is being used to scan the hash-table
  380. };
  381. public:
  382. CLock()
  383. {
  384. m_ls = lsNil;
  385. m_pBucketHead = NULL;
  386. }
  387. ~CLock()
  388. {
  389. DHTAssert( m_pBucketHead == NULL );
  390. }
  391. private:
  392. // lock state
  393. ENUMLOCKSTATE m_ls; // current state of this lock context
  394. BOOL m_fInsertOrDelete;
  395. // HOTSTUFF pointer
  396. HOTSTUFF *m_phs;
  397. #ifdef DEBUG
  398. // debug-only parameters
  399. CKey m_key; // track the key that should be locked
  400. #endif
  401. // ptr to the first BUCKET
  402. BUCKET *m_pBucketHead;
  403. // ptr to the current BUCKET
  404. BUCKET *m_pBucket; // current BUCKET
  405. // ISAM-style cursor on current BUCKET (m_pBucket)
  406. CKeyEntry *m_pEntryPrev; // previous entry
  407. CKeyEntry *m_pEntry; // current entry
  408. CKeyEntry *m_pEntryNext; // next entry
  409. // current bucket (used in scan-mode only)
  410. NativeCounter m_iBucket; // current bucket
  411. };
  412. /////////////////////////////////////////////////////////////////////////////////////////
  413. //
  414. // state machine
  415. //
  416. const int UiSTEnter( HOTSTUFF **pphs )
  417. {
  418. // hash to the HOTSTUFF structure
  419. *pphs = HOTSTUFFHash();
  420. // enter the metered section
  421. return ( *pphs )->m_cms.Enter();
  422. }
  423. void STLeave( const int group, HOTSTUFF *phs )
  424. {
  425. phs->m_cms.Leave( group );
  426. }
  427. const ENUMSTATE EsSTGetState() const
  428. {
  429. return m_stateCur;
  430. }
  431. void STTransition( const ENUMSTATE esNew )
  432. {
  433. // initiate a transition to the desired state
  434. m_stateCur = esNew;
  435. m_cCompletions = 0;
  436. for ( NativeCounter ihs = 0; ihs < m_chs; ihs++ )
  437. {
  438. m_rghs[ ihs ].m_cms.Partition( OSSYNC::CMeteredSection::PFNPARTITIONCOMPLETE( STCompletion_ ), DWORD_PTR( this ) );
  439. }
  440. }
  441. static void STCompletion_( CDynamicHashTable< CKey, CEntry >* pdht )
  442. {
  443. pdht->STCompletion();
  444. }
  445. void STCompletion()
  446. {
  447. // state transition table
  448. typedef void (CDynamicHashTable< CKey, CEntry >::*PfnCompletion)();
  449. struct StateTransitionTable
  450. {
  451. PfnCompletion m_pfnCompletion;
  452. ENUMSTATE m_stNext;
  453. };
  454. static const StateTransitionTable rgstt[] =
  455. {
  456. /* stateNil */ { NULL, stateNil, },
  457. /* stateShrinkFromGrow */ { NULL, stateShrinkFromGrow2, },
  458. /* stateShrinkFromGrow2 */ { NULL, stateShrink, },
  459. /* stateGrowFromShrink */ { NULL, stateGrowFromShrink2, },
  460. /* stateGrowFromShrink2 */ { NULL, stateGrow, },
  461. /* stateSplitFromGrow */ { NULL, stateSplitFromGrow2, },
  462. /* stateSplitFromGrow2 */ { STCompletionCopyDir, stateSplit, },
  463. /* stateGrowFromSplit */ { NULL, stateGrowFromSplit2, },
  464. /* stateGrowFromSplit2 */ { NULL, stateGrow, },
  465. /* stateMergeFromShrink */ { NULL, stateMergeFromShrink2, },
  466. /* stateMergeFromShrink2 */ { STCompletionCopyDir, stateMerge, },
  467. /* stateShrinkFromMerge */ { NULL, stateShrinkFromMerge2, },
  468. /* stateShrinkFromMerge2 */ { NULL, stateShrink, },
  469. /* stateUnused */ { NULL, stateNil, },
  470. /* stateGrow */ { STCompletionGrowShrink, stateNil, },
  471. /* stateShrink */ { STCompletionGrowShrink, stateNil, },
  472. /* stateSplit */ { STCompletionSplit, stateGrowFromSplit, },
  473. /* stateMerge */ { STCompletionMerge, stateShrinkFromMerge, },
  474. };
  475. // all metered sections have transitioned to the new state
  476. if ( NativeCounter( AtomicIncrement( &m_cCompletions ) ) >= m_chs )
  477. {
  478. STATStateTransition();
  479. // save the current state as it may change as a side-effect of
  480. // calling the completion function
  481. const ENUMSTATE esCurrent = EsSTGetState();
  482. // if there is a completion function for this state then call it
  483. if ( rgstt[ esCurrent ].m_pfnCompletion )
  484. {
  485. (this->*rgstt[ esCurrent ].m_pfnCompletion)();
  486. }
  487. // if there is a next state then immediately begin the transition to that state
  488. if ( rgstt[ esCurrent ].m_stNext )
  489. {
  490. STTransition( rgstt[ esCurrent ].m_stNext );
  491. }
  492. }
  493. }
  494. void STCompletionCopyDir()
  495. {
  496. // backup the bucket ptrs for use during the split/merge process
  497. memcpy( &m_dirptrs[ 1 ], &m_dirptrs[ 0 ], sizeof( DIRPTRS ) );
  498. }
  499. void STCompletionGrowShrink()
  500. {
  501. // enable the selection of a new maintenance policy
  502. m_semPolicy.Release();
  503. }
  504. void STCompletionSplit()
  505. {
  506. // split the directory
  507. DIRISplit();
  508. }
  509. void STCompletionMerge()
  510. {
  511. // merge the directory
  512. DIRIMerge();
  513. }
  514. /////////////////////////////////////////////////////////////////////////////////////////
  515. //
  516. // directory
  517. //
  518. // initialize the directory, possible allocating some buckets
  519. ERR ErrDIRInit( const NativeCounter cLoadFactor, const NativeCounter cbucketMin )
  520. {
  521. ERR err;
  522. NativeCounter iExponent;
  523. NativeCounter iRemainder;
  524. // check params
  525. if ( cLoadFactor < 1 )
  526. {
  527. return errInvalidParameter;
  528. }
  529. // setup the main paramters
  530. m_cLoadFactor = cLoadFactor;
  531. // calculate the bucket size, accounting for:
  532. //
  533. // - bucket header
  534. // - enough room for twice the load factor to eliminate overflow
  535. // buckets with uniform hashing
  536. // - room for an additional entry to give us some flexibility in
  537. // our actual load factor to reduce maintenance overhead
  538. // - cache line alignment of the bucket
  539. m_cbBucket = sizeof( BUCKET ) + ( cLoadFactor * 2 + 1 ) * sizeof( CKeyEntry );
  540. m_cbBucket = ( ( m_cbBucket + cbCacheLine - 1 ) / cbCacheLine ) * cbCacheLine;
  541. // calculate the number of entries we can fit into a single bucket
  542. // NOTE: this may be larger than intended because we rounded the bucket size up the nearest cache-line
  543. m_centryBucket = ( m_cbBucket - sizeof( BUCKET ) ) / sizeof( CKeyEntry );
  544. // calculate the minimum number of buckets using the following lower-bounds:
  545. // cbucketMin (user parameter)
  546. // # of processors (make sure we have atleast 1 bucket/proc as an attempt to minimize contention)
  547. // 2 (hash table assumes atleast 2 buckets)
  548. m_cbucketMin = max( cbucketMin, NativeCounter( OSSYNC::OSSyncGetProcessorCountMax() ) );
  549. m_cbucketMin = max( m_cbucketMin, 2 );
  550. // align the minimum number of buckets to the next highest power of 2 (unless it's already a power of 2)
  551. DIRILog2( m_cbucketMin, &iExponent, &iRemainder );
  552. if ( iRemainder )
  553. {
  554. if ( ++iExponent >= cbitNativeCounter )
  555. {
  556. return errInvalidParameter; // could not round up without overflowing
  557. }
  558. }
  559. m_cbucketMin = 1 << iExponent;
  560. // setup the directory pointers
  561. m_dirptrs[ 0 ].m_cBucketMax = m_cbucketMin / 2;
  562. m_dirptrs[ 0 ].m_cBucket = m_cbucketMin / 2;
  563. // SPECIAL CASE: allocate 2 entries for the first bucket array
  564. // (we always do this because we always have atleast 2 buckets)
  565. err = ErrDIRInitBucketArray( 2, 0, &m_rgrgBucket[ 0 ] );
  566. if ( errSuccess != err )
  567. {
  568. return err;
  569. }
  570. // allocate memory for all other initial bucket arrays
  571. for ( iExponent = 1; ( NativeCounter( 1 ) << iExponent ) < m_cbucketMin; iExponent++ )
  572. {
  573. err = ErrDIRInitBucketArray( 1 << iExponent, 1 << iExponent, &m_rgrgBucket[ iExponent ] );
  574. if ( errSuccess != err )
  575. {
  576. return err;
  577. }
  578. }
  579. // clear the second set of directory ptrs
  580. memset( &m_dirptrs[ 1 ], 0, sizeof( DIRPTRS ) );
  581. return errSuccess;
  582. }
  583. // cleanup all memory by destructing it then freeing it
  584. void DIRTerm()
  585. {
  586. NativeCounter iExponent;
  587. // SPECIAL CASE: term the first bucket array (contains 2 entries)
  588. // (we will always do this because the hash-table will always contain atleast 2 entries)
  589. if ( m_rgrgBucket[ 0 ] )
  590. {
  591. DIRTermBucketArray( m_rgrgBucket[ 0 ], 2 );
  592. m_rgrgBucket[ 0 ] = NULL;
  593. }
  594. // term all other bucket arrays
  595. for ( iExponent = 1; iExponent < cbitNativeCounter; iExponent++ )
  596. {
  597. if ( m_rgrgBucket[ iExponent ] )
  598. {
  599. DIRTermBucketArray( m_rgrgBucket[ iExponent ], 1 << iExponent );
  600. m_rgrgBucket[ iExponent ] = NULL;
  601. }
  602. }
  603. // reset both copies of the directory pointers
  604. memset( m_dirptrs, 0, sizeof( m_dirptrs ) );
  605. }
  606. // lock a key for read operations
  607. void DIRReadLockKey( const ENUMSTATE esCurrent, const CKey &key, CLock * const plock ) const
  608. {
  609. NativeCounter iHash;
  610. NativeCounter iBucket;
  611. NativeCounter cBucketBefore;
  612. NativeCounter cBucketAfter;
  613. NativeCounter cBucketMax;
  614. // verify the lock
  615. DHTAssert( FBKTRead( plock ) );
  616. DHTAssert( plock->m_pBucketHead == NULL );
  617. #ifdef DEBUG
  618. // remember the key we are locking
  619. plock->m_key = key;
  620. #endif
  621. // hash to the bucket we want (this may require a retry in grow/shrink mode)
  622. iHash = CKeyEntry::Hash( key );
  623. plock->m_pBucketHead = PbucketDIRIHash( esCurrent, iHash, &iBucket, &cBucketBefore );
  624. // acquire the lock as a reader
  625. plock->m_pBucketHead->CRWL().EnterAsReader();
  626. // the entry may have moved as the result of a bucket split/merge
  627. cBucketAfter = NcDIRIGetBucket( esCurrent );
  628. cBucketMax = NcDIRIGetBucketMax( esCurrent );
  629. if ( cBucketBefore != cBucketAfter &&
  630. ( cBucketBefore <= iBucket && iBucket < cBucketAfter ||
  631. cBucketMax + cBucketAfter <= iBucket && iBucket < cBucketMax + cBucketBefore ) )
  632. {
  633. // unlock the old bucket
  634. plock->m_pBucketHead->CRWL().LeaveAsReader();
  635. // hash to the bucket we want (this cannot fail more than once)
  636. plock->m_pBucketHead = PbucketDIRIHash( esCurrent, iHash );
  637. // lock the new bucket
  638. plock->m_pBucketHead->CRWL().EnterAsReader();
  639. }
  640. // we should now have the correct bucket locked
  641. DHTAssert( plock->m_pBucketHead == PbucketDIRIHash( esCurrent, iHash ) );
  642. }
  643. // unlock the current read-locked key
  644. void DIRReadUnlockKey( CLock * const plock ) const
  645. {
  646. // verify the lock
  647. DHTAssert( FBKTRead( plock ) );
  648. DHTAssert( plock->m_pBucketHead != NULL );
  649. // release the lock
  650. plock->m_pBucketHead->CRWL().LeaveAsReader();
  651. plock->m_pBucketHead = NULL;
  652. }
  653. // lock a key for read/write operations
  654. void DIRWriteLockKey( const ENUMSTATE esCurrent, const CKey &key, CLock * const plock ) const
  655. {
  656. NativeCounter iHash;
  657. NativeCounter iBucket;
  658. NativeCounter cBucketBefore;
  659. NativeCounter cBucketAfter;
  660. NativeCounter cBucketMax;
  661. // verify the lock
  662. DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
  663. DHTAssert( plock->m_pBucketHead == NULL );
  664. #ifdef DEBUG
  665. // remember the key we are locking
  666. plock->m_key = key;
  667. #endif
  668. // hash to the bucket we want (this may require a retry in grow/shrink mode)
  669. iHash = CKeyEntry::Hash( key );
  670. plock->m_pBucketHead = PbucketDIRIHash( esCurrent, iHash, &iBucket, &cBucketBefore );
  671. // acquire the lock as a writer
  672. plock->m_pBucketHead->CRWL().EnterAsWriter();
  673. // the entry may have moved as the result of a bucket split/merge
  674. cBucketAfter = NcDIRIGetBucket( esCurrent );
  675. cBucketMax = NcDIRIGetBucketMax( esCurrent );
  676. if ( cBucketBefore != cBucketAfter &&
  677. ( cBucketBefore <= iBucket && iBucket < cBucketAfter ||
  678. cBucketMax + cBucketAfter <= iBucket && iBucket < cBucketMax + cBucketBefore ) )
  679. {
  680. // unlock the old bucket
  681. plock->m_pBucketHead->CRWL().LeaveAsWriter();
  682. // hash to the bucket we want (this cannot fail more than once)
  683. plock->m_pBucketHead = PbucketDIRIHash( esCurrent, iHash );
  684. // lock the new bucket
  685. plock->m_pBucketHead->CRWL().EnterAsWriter();
  686. }
  687. // we should now have the correct bucket locked
  688. DHTAssert( plock->m_pBucketHead == PbucketDIRIHash( esCurrent, iHash ) );
  689. }
  690. // unlock the current write-locked key
  691. void DIRWriteUnlockKey( CLock * const plock ) const
  692. {
  693. // verify the lock
  694. DHTAssert( FBKTWrite( plock ) );
  695. DHTAssert( plock->m_pBucketHead != NULL );
  696. // release the lock
  697. plock->m_pBucketHead->CRWL().LeaveAsWriter();
  698. plock->m_pBucketHead = NULL;
  699. }
  700. // initalize an array of buckets
  701. ERR ErrDIRInitBucketArray( const NativeCounter cbucketAlloc,
  702. const NativeCounter ibucketFirst,
  703. BYTE** const prgbBucket )
  704. {
  705. #ifdef UNIQUE_BUCKET_NAMES
  706. char *psz;
  707. #endif // UNIQUE_BUCKET_NAMES
  708. NativeCounter cb;
  709. BYTE *rgb;
  710. NativeCounter ibucket;
  711. DHTAssert( cbucketAlloc > 0 );
  712. DHTAssert( prgbBucket );
  713. // calculate the size (in bytes) of the new bucket array
  714. #ifdef UNIQUE_BUCKET_NAMES
  715. cb = cbucketAlloc * ( m_cbBucket + 60 ); // add 60 extra bytes per bucket for a unique name (for the bucket's r/w-lock)
  716. #else
  717. cb = cbucketAlloc * m_cbBucket;
  718. #endif
  719. // allocate the new bucket array
  720. rgb = (BYTE*)PvMEMAlloc( cb );
  721. if ( !rgb )
  722. {
  723. *prgbBucket = NULL;
  724. return errOutOfMemory;
  725. }
  726. // initialize each bucket within the new array
  727. for ( ibucket = 0; ibucket < cbucketAlloc; ibucket++ )
  728. {
  729. // efficiency variables
  730. PBUCKET const pbucket = PBUCKET( rgb + ( ibucket * m_cbBucket ) );
  731. // construct the r/w-lock
  732. #ifdef UNIQUE_BUCKET_NAMES
  733. psz = (char*)( rgb + ( cbucketAlloc * m_cbBucket ) + ( ibucket * 60 ) );
  734. sprintf( psz, "CDynamicHashTable::BUCKET[0x%016I64X]::m_rwlBucket", QWORD( ibucketFirst + ibucket ) );
  735. DHTAssert( strlen( psz ) < 60 );
  736. new( &pbucket->CRWL() ) OSSYNC::CReaderWriterLock( CLockBasicInfo( CSyncBasicInfo( psz ), int( m_rankDHTrwlBucket ), 0 ) );
  737. #else // !UNIQUE_BUCKET_NAMES
  738. new( &pbucket->CRWL() ) OSSYNC::CReaderWriterLock( CLockBasicInfo( CSyncBasicInfo( "CDynamicHashTable::BUCKET::m_rwlBucket" ), int( m_rankDHTrwlBucket ), 0 ) );
  739. #endif // UNIQUE_BUCKET_NAMES
  740. // make the bucket empty
  741. pbucket->m_pb = NULL;
  742. }
  743. *prgbBucket = rgb;
  744. return errSuccess;
  745. }
  746. // uninitialize an array of buckets
  747. void DIRTermBucketArray( BYTE* const rgbBucket,
  748. const NativeCounter cbucketTerm )
  749. {
  750. NativeCounter ibucket;
  751. PBUCKET pbucketNext;
  752. // destroy each bucket in the array
  753. DHTAssert( rgbBucket );
  754. for ( ibucket = 0; ibucket < cbucketTerm; ibucket++ )
  755. {
  756. // efficiency variables
  757. PBUCKET pbucket = PBUCKET( rgbBucket + ( ibucket * m_cbBucket ) );
  758. // destruct the r/w-lock in place without freeing memory
  759. pbucket->CRWL().CReaderWriterLock::~CReaderWriterLock();
  760. // free all chained buckets (don't touch the first one because its part of rgbucket[])
  761. pbucket = PbucketBKTNext( pbucket );
  762. while ( pbucket )
  763. {
  764. pbucketNext = PbucketBKTNext( pbucket );
  765. MEMFree( pbucket );
  766. pbucket = pbucketNext;
  767. }
  768. }
  769. MEMFree( rgbBucket );
  770. }
  771. // split the directory
  772. void DIRISplit()
  773. {
  774. // we are executing the current policy (which is to split) and should be in this known state
  775. DHTAssert( m_dirptrs[ 0 ].m_cBucketMax > 0 );
  776. DHTAssert( m_dirptrs[ 0 ].m_cBucket == m_dirptrs[ 0 ].m_cBucketMax );
  777. // update the directory
  778. // NOTE: we do NOT allocate space here; this is deferred until BKTISplit() when we're sure we need it
  779. m_dirptrs[ 0 ].m_cBucketMax = m_dirptrs[ 0 ].m_cBucketMax * 2;
  780. m_dirptrs[ 0 ].m_cBucket = 0;
  781. STATSplitDirectory();
  782. }
  783. // merge the directory
  784. void DIRIMerge()
  785. {
  786. // we are executing the current policy (which is to split) and should be in this known state
  787. DHTAssert( m_dirptrs[ 0 ].m_cBucketMax > 1 ); // we should not be at the last split-level ( == 1 )
  788. DHTAssert( m_dirptrs[ 0 ].m_cBucket == 0 );
  789. // free the bucket array that is no longer being used (the last one in the directory)
  790. // NOTE: we can guarantee that it isn't in use because m_cBucket == 0 AND we can't grow (we're in stateMerge)
  791. // that means that everyone trying to hash to this bucket will be re-routed to the low-order bucket instead
  792. NativeCounter iExponent;
  793. NativeCounter iRemainder;
  794. DIRILog2( m_dirptrs[ 0 ].m_cBucketMax, &iExponent, &iRemainder );
  795. DHTAssert( NativeCounter( 1 ) << iExponent == m_dirptrs[ 0 ].m_cBucketMax );
  796. DHTAssert( 0 == iRemainder );
  797. // NOTE: the bucket array may not have been allocated because we defer its allocation until BKTISplit
  798. if ( m_rgrgBucket[ iExponent ] )
  799. {
  800. DIRTermBucketArray( m_rgrgBucket[ iExponent ], m_dirptrs[ 0 ].m_cBucketMax );
  801. m_rgrgBucket[ iExponent ] = NULL;
  802. }
  803. #ifdef DEBUG
  804. // verify that no higher-order bucket arrays exist
  805. while ( ++iExponent < cbitNativeCounter )
  806. {
  807. DHTAssert( !m_rgrgBucket[ iExponent ] );
  808. }
  809. #endif // DEBUG
  810. // update the directory
  811. m_dirptrs[ 0 ].m_cBucketMax = m_dirptrs[ 0 ].m_cBucketMax / 2;
  812. m_dirptrs[ 0 ].m_cBucket = m_dirptrs[ 0 ].m_cBucketMax;
  813. STATMergeDirectory();
  814. }
  815. // computer the log2 of the given value in terms of an exponent and an integer remainder
  816. void DIRILog2( const NativeCounter iValue,
  817. NativeCounter* const piExponent,
  818. NativeCounter* const piRemainder ) const
  819. {
  820. NativeCounter iExponent;
  821. NativeCounter iMask;
  822. NativeCounter iMaskLast;
  823. iExponent = 0;
  824. iMaskLast = 1;
  825. iMask = 1;
  826. while ( iMask < iValue )
  827. {
  828. iExponent++;
  829. iMaskLast = iMask;
  830. iMask = ( iMask << 1 ) + 1;
  831. }
  832. DHTAssert( iExponent < cbitNativeCounter );
  833. *piExponent = iExponent;
  834. *piRemainder = iMaskLast & iValue;
  835. }
  836. // get the correct copy of cBucketMax
  837. const NativeCounter NcDIRIGetBucketMax( const ENUMSTATE esCurrent ) const
  838. {
  839. return m_dirptrs[ esCurrent >> 4 ].m_cBucketMax;
  840. }
  841. // get the correct copy of cBucket
  842. const NativeCounter NcDIRIGetBucket( const ENUMSTATE esCurrent ) const
  843. {
  844. return m_dirptrs[ esCurrent >> 4 ].m_cBucket;
  845. }
  846. // resolve a bucket address to a bucket pointer
  847. PBUCKET const PbucketDIRIResolve( const NativeCounter ibucketIndex,
  848. const NativeCounter ibucketOffset ) const
  849. {
  850. BYTE* const pb = m_rgrgBucket[ ibucketIndex ]; // get ptr to one of the bucket arrays
  851. const NativeCounter ibOffset = ibucketOffset * m_cbBucket; // get byte offset within bucket array
  852. DHTAssert( NULL != pb );
  853. return PBUCKET( pb + ibOffset ); // return a typed ptr to the individual bucket within array
  854. }
  855. // hash to a bucket
  856. const PBUCKET PbucketDIRIHash( const ENUMSTATE esCurrent,
  857. const NativeCounter iHash,
  858. NativeCounter* const piBucket,
  859. NativeCounter* const pcBucket ) const
  860. {
  861. NativeCounter& iBucket = *piBucket;
  862. NativeCounter& cBucket = *pcBucket;
  863. NativeCounter cBucketMax;
  864. NativeCounter iExponent;
  865. NativeCounter iRemainder;
  866. // load some of the directory pointers
  867. cBucket = NcDIRIGetBucket( esCurrent );
  868. cBucketMax = NcDIRIGetBucketMax( esCurrent );
  869. // normalize the given hash value to the range of active buckets
  870. iBucket = iHash & ( ( cBucketMax - 1 ) + cBucketMax );
  871. if ( iBucket >= cBucketMax + cBucket )
  872. {
  873. iBucket -= cBucketMax;
  874. }
  875. // convert the normalized hash value to a bucket address
  876. DIRILog2( iBucket, &iExponent, &iRemainder );
  877. // return the bucket
  878. return PbucketDIRIResolve( iExponent, iRemainder );
  879. }
  880. const PBUCKET PbucketDIRIHash( const ENUMSTATE esCurrent,
  881. const NativeCounter iHash ) const
  882. {
  883. NativeCounter iBucket;
  884. NativeCounter cBucket;
  885. return PbucketDIRIHash( esCurrent, iHash, &iBucket, &cBucket );
  886. }
  887. /////////////////////////////////////////////////////////////////////////////////////////
  888. //
  889. // scan operations
  890. //
  891. // move from the current hash-bucket to the next hash-bucket that contains
  892. // atleast 1 entry; position currency on that entry
  893. ERR ErrSCANMoveNext( CLock *const plock )
  894. {
  895. DHTAssert( plock->m_pEntryPrev == NULL );
  896. DHTAssert( plock->m_pEntry == NULL );
  897. DHTAssert( plock->m_pEntryNext == NULL );
  898. // unlock the current bucket
  899. if ( plock->m_pBucketHead )
  900. {
  901. plock->m_pBucketHead->CRWL().LeaveAsWriter();
  902. plock->m_pBucketHead = NULL;
  903. // we performed an insert or delete while holding the write lock
  904. if ( plock->m_fInsertOrDelete )
  905. {
  906. // perform amortized maintenance on the table
  907. MaintainTable( plock->m_phs );
  908. }
  909. }
  910. // enter the state machine
  911. const int iGroup = UiSTEnter( &plock->m_phs );
  912. const ENUMSTATE esCurrent = EsSTGetState();
  913. while ( plock->m_iBucket + 1 < NcDIRIGetBucketMax( esCurrent ) + NcDIRIGetBucket( esCurrent ) )
  914. {
  915. // we have not scanned the last bucket yet
  916. // advance the bucket index
  917. plock->m_iBucket++;
  918. // hash to the bucket and lock it
  919. plock->m_pBucketHead = PbucketDIRIHash( esCurrent, plock->m_iBucket );
  920. plock->m_pBucketHead->CRWL().EnterAsWriter();
  921. if ( plock->m_iBucket < NcDIRIGetBucketMax( esCurrent ) + NcDIRIGetBucket( esCurrent ) )
  922. {
  923. // bucket address is OK (did not move)
  924. if ( plock->m_pBucketHead->m_pb != NULL )
  925. {
  926. // current bucket contains atleast 1 entry
  927. // setup the currency on the first entry
  928. plock->m_pBucket = plock->m_pBucketHead;
  929. plock->m_pEntry = &plock->m_pBucketHead->m_rgEntry[0];
  930. // stop the loop
  931. break;
  932. }
  933. // current bucket is empty
  934. }
  935. else
  936. {
  937. DHTAssert( stateShrink == esCurrent );
  938. // the current bucket disappeared because it was merged into a lower bucket
  939. DHTAssert( plock->m_iBucket >= NcDIRIGetBucketMax( esCurrent ) );
  940. DHTAssert( PbucketDIRIHash( esCurrent, plock->m_iBucket ) ==
  941. PbucketDIRIHash( esCurrent, plock->m_iBucket - NcDIRIGetBucketMax( esCurrent ) ) );
  942. // make sure the current entry ptr is reset
  943. DHTAssert( !plock->m_pEntry );
  944. }
  945. // release the bucket lock (bucket should be empty since it was merged)
  946. DHTAssert( !plock->m_pBucketHead->m_pb );
  947. plock->m_pBucketHead->CRWL().LeaveAsWriter();
  948. plock->m_pBucketHead = NULL;
  949. }
  950. // leave the state machine
  951. STLeave( iGroup, plock->m_phs );
  952. // return the result
  953. DHTAssert( !plock->m_pEntry || plock->m_pBucketHead );
  954. return plock->m_pEntry ? errSuccess : errNoCurrentEntry;
  955. }
  956. /////////////////////////////////////////////////////////////////////////////////////////
  957. //
  958. // bucket operations
  959. //
  960. // returns fTrue if the lock context is in read mode
  961. const BOOL FBKTRead( CLock *const plock ) const
  962. {
  963. return plock->m_ls == CLock::lsRead;
  964. }
  965. // returns fTrue if the lock context is in write mode
  966. const BOOL FBKTWrite( CLock *const plock ) const
  967. {
  968. return plock->m_ls == CLock::lsWrite;
  969. }
  970. // returns fTrue if the lock context is in scan-forward mode
  971. const BOOL FBKTScan( CLock *const plock ) const
  972. {
  973. return plock->m_ls == CLock::lsScan;
  974. }
  975. // returns the entry after last entry in the BUCKET or entry 0 if no entries exist
  976. CKeyEntry *PentryBKTNextMost( const PBUCKET pBucket ) const
  977. {
  978. const BYTE *pb = pBucket->m_pb;
  979. if ( BOOL( ( pb >= (BYTE*)&pBucket->m_rgEntry[ 0 ] ) &
  980. ( pb < (BYTE*)&pBucket->m_rgEntry[ m_centryBucket ] ) ) )
  981. {
  982. // we are in the last bucket
  983. return (CKeyEntry*)pb + 1;
  984. }
  985. else if ( NULL == pb )
  986. {
  987. // the bucket is empty
  988. return &pBucket->m_rgEntry[ 0 ];
  989. }
  990. // the bucket is full
  991. return &pBucket->m_rgEntry[ m_centryBucket ];
  992. }
  993. // returns the next BUCKET or NULL if no other BUCKETs exist
  994. PBUCKET PbucketBKTNext( const PBUCKET pBucket ) const
  995. {
  996. const BYTE *pb = pBucket->m_pb;
  997. if ( BOOL( ( pb <= (BYTE*)pBucket - m_cbBucket ) |
  998. ( pb >= (BYTE*)pBucket + m_cbBucket ) ) )
  999. {
  1000. // m_pBucketNext is either the next BUCKET or NULL
  1001. DHTAssert( !pb || PBUCKET( pb )->m_pBucketPrev == pBucket );
  1002. return PBUCKET( pb );
  1003. }
  1004. // m_pBucketNext is invalid (m_pEntryLast is valid instead)
  1005. return NULL;
  1006. }
  1007. // try to seek to the entry corresponding to the given key
  1008. // if found, the currency will be set to the entry and errSuccess will be returned
  1009. // if not, currency will be set to before-first or after-last, and errEntryNotFound will be returned
  1010. void BKTSeek( CLock *const plock, const CKey &key ) const
  1011. {
  1012. // pre-init our currency assuming we will hit a hot path
  1013. plock->m_pBucket = plock->m_pBucketHead;
  1014. plock->m_pEntryPrev = NULL;
  1015. plock->m_pEntryNext = NULL;
  1016. // HOT PATH:
  1017. //
  1018. // if the next/end pointer is within the head bucket then we know
  1019. // that all entries are in the head bucket. if we find the entry
  1020. // for this key then set our currency to point to it otherwise set
  1021. // our currency to no current entry
  1022. CKeyEntry* const pEntryLast = plock->m_pBucketHead->m_pEntryLast;
  1023. if ( DWORD_PTR( pEntryLast ) - DWORD_PTR( plock->m_pBucketHead ) < m_cbBucket )
  1024. {
  1025. CKeyEntry* pEntry = plock->m_pBucketHead->m_rgEntry;
  1026. do
  1027. {
  1028. if ( pEntry->FEntryMatchesKey( key ) )
  1029. {
  1030. plock->m_pEntry = pEntry;
  1031. return;
  1032. }
  1033. }
  1034. while ( ++pEntry <= pEntryLast );
  1035. plock->m_pEntry = NULL;
  1036. }
  1037. // HOT PATH:
  1038. //
  1039. // if the next/end pointer is NULL then we know that we will not
  1040. // find the key. set our currency to no current entry
  1041. else if ( !pEntryLast )
  1042. {
  1043. plock->m_pEntry = NULL;
  1044. }
  1045. // if the next/end pointer points outside of the head bucket then
  1046. // perform a full chain search
  1047. else
  1048. {
  1049. BKTISeek( plock, key );
  1050. }
  1051. }
  1052. void BKTISeek( CLock *const plock, const CKey &key ) const
  1053. {
  1054. PBUCKET pBucket;
  1055. PBUCKET pBucketPrev;
  1056. CKeyEntry *pEntryThis;
  1057. CKeyEntry *pEntryMost;
  1058. DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) );
  1059. DHTAssert( plock->m_pBucketHead != NULL );
  1060. // start the scan on the first bucket
  1061. pBucket = plock->m_pBucketHead;
  1062. do
  1063. {
  1064. // scan the current BUCKET
  1065. pEntryThis = &pBucket->m_rgEntry[ 0 ];
  1066. pEntryMost = PentryBKTNextMost( pBucket );
  1067. while ( pEntryThis < pEntryMost )
  1068. {
  1069. // query the entry against the given key for a match
  1070. // (assume we will be more likely to not find it)
  1071. if ( !pEntryThis->FEntryMatchesKey( key ) )
  1072. {
  1073. // nop
  1074. }
  1075. else
  1076. {
  1077. // the key exists; setup our currency around it
  1078. goto SetupCurrency;
  1079. }
  1080. // move to the next entry
  1081. pEntryThis++;
  1082. }
  1083. // move to the next BUCKET
  1084. pBucketPrev = pBucket;
  1085. pBucket = PbucketBKTNext( pBucket );
  1086. }
  1087. while ( pBucket );
  1088. // move back to the last BUCKET and reset the entry ptr
  1089. pBucket = pBucketPrev;
  1090. pEntryThis = NULL;
  1091. SetupCurrency:
  1092. // setup the currency in the lock context
  1093. // we will not allow moving next/prev, so we setup the next/prev ptrs accordingly
  1094. plock->m_pBucket = pBucket;
  1095. plock->m_pEntryPrev = NULL;
  1096. plock->m_pEntry = pEntryThis;
  1097. plock->m_pEntryNext = NULL;
  1098. }
  1099. #ifdef DEBUG
  1100. // get a pointer to the current entry
  1101. // if currency is before-first or after-last, then NULL is returned
  1102. void BKTGetEntry( CLock *const plock, CKeyEntry **ppKeyEntry ) const
  1103. {
  1104. DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) );
  1105. DHTAssert( plock->m_pBucketHead != NULL );
  1106. DHTAssert( plock->m_pBucket != NULL );
  1107. *ppKeyEntry = plock->m_pEntry;
  1108. return;
  1109. }
  1110. #endif
  1111. // get the current entry
  1112. // if currency is before-first or after-last, errEntryNotFound is returned
  1113. const ERR ErrBKTGetEntry( CLock *const plock, CEntry *pentry ) const
  1114. {
  1115. DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) || FBKTScan( plock ) );
  1116. DHTAssert( plock->m_pBucketHead != NULL );
  1117. DHTAssert( plock->m_pBucket != NULL );
  1118. if ( plock->m_pEntry )
  1119. {
  1120. // we are on an entry
  1121. plock->m_pEntry->GetEntry( pentry );
  1122. return errSuccess;
  1123. }
  1124. // we are not on an entry
  1125. return errEntryNotFound;
  1126. }
  1127. // replace the current entry (destruct old entry, contruct new entry)
  1128. // if currency is before-first or after-last, then errNoCurrentEntry is returned
  1129. const ERR ErrBKTReplaceEntry( CLock *const plock, const CEntry &entry ) const
  1130. {
  1131. DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
  1132. DHTAssert( plock->m_pBucketHead != NULL );
  1133. DHTAssert( plock->m_pBucket != NULL );
  1134. if ( plock->m_pEntry )
  1135. {
  1136. // we are on an entry
  1137. // copy the new entry over it
  1138. plock->m_pEntry->SetEntry( entry );
  1139. return errSuccess;
  1140. }
  1141. // we are not on an entry
  1142. return errNoCurrentEntry;
  1143. }
  1144. // insert an entry at the end of the logical bucket
  1145. // if memory is short, errOutOfMemory is returned
  1146. // otherwise, errSuccess is returned
  1147. const ERR ErrBKTInsertEntry( CLock *const plock, const CEntry &entry )
  1148. {
  1149. DHTAssert( FBKTWrite( plock ) );
  1150. DHTAssert( plock->m_pBucketHead != NULL );
  1151. DHTAssert( plock->m_pBucket != NULL );
  1152. if ( plock->m_pEntry )
  1153. {
  1154. // we are pointing to the key we locked, so it must already exist
  1155. return errKeyDuplicate;
  1156. }
  1157. #ifdef DEBUG
  1158. PBUCKET *rgBucketCheck = NULL, pbucketTX;
  1159. size_t cBucketCheck = 0, iT;
  1160. pbucketTX = plock->m_pBucketHead;
  1161. while ( pbucketTX )
  1162. {
  1163. cBucketCheck++;
  1164. pbucketTX = PbucketBKTNext( pbucketTX );
  1165. }
  1166. cBucketCheck++; // account for newly allocated bucket
  1167. rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
  1168. if ( NULL != rgBucketCheck )
  1169. {
  1170. iT = 0;
  1171. pbucketTX = plock->m_pBucketHead;
  1172. while ( pbucketTX )
  1173. {
  1174. rgBucketCheck[ iT++ ] = pbucketTX;
  1175. pbucketTX = PbucketBKTNext( pbucketTX );
  1176. }
  1177. rgBucketCheck[ iT++ ] = NULL; // new bucket
  1178. }
  1179. // count the number of entries we will be handling
  1180. size_t cEntriesTotal = 0;
  1181. PBUCKET pbktT, pbktNextT;
  1182. pbktT = plock->m_pBucketHead;
  1183. if ( pbktT->m_pb != NULL )
  1184. {
  1185. while ( pbktT )
  1186. {
  1187. pbktNextT = PbucketBKTNext( pbktT );
  1188. if ( pbktNextT )
  1189. {
  1190. // full bucket
  1191. cEntriesTotal += size_t( m_centryBucket );
  1192. }
  1193. else
  1194. {
  1195. // partial bucket (not empty)
  1196. cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  1197. }
  1198. pbktT = pbktNextT;
  1199. }
  1200. }
  1201. #endif
  1202. // cursor for insert
  1203. PBUCKET pBucketThis = plock->m_pBucket;
  1204. CKeyEntry *pEntryThis;
  1205. // efficiency variable
  1206. PBUCKET pBucketT;
  1207. // move to the last entry in the last bucket
  1208. pBucketT = PbucketBKTNext( pBucketThis );
  1209. while ( pBucketT )
  1210. {
  1211. pBucketThis = pBucketT;
  1212. pBucketT = PbucketBKTNext( pBucketT );
  1213. }
  1214. pEntryThis = PentryBKTNextMost( pBucketThis );
  1215. if ( pEntryThis != &pBucketThis->m_rgEntry[ m_centryBucket ] )
  1216. {
  1217. // there are available entries left in the last bucket
  1218. // nop
  1219. }
  1220. else
  1221. {
  1222. // there are no entries left in the last bucket
  1223. // allocate a new bucket
  1224. pBucketT = (BUCKET *)PvMEMAlloc( m_cbBucket );
  1225. if ( !pBucketT )
  1226. {
  1227. // we ran out of memory when allocating the new BUCKET
  1228. #ifdef DEBUG
  1229. // free memory from the start of this functions
  1230. if ( NULL != rgBucketCheck )
  1231. {
  1232. MEMFree( rgBucketCheck );
  1233. }
  1234. #endif
  1235. return errOutOfMemory;
  1236. }
  1237. STATInsertOverflowBucket();
  1238. #ifdef DEBUG
  1239. // put the new bucket in our list
  1240. if ( NULL != rgBucketCheck )
  1241. {
  1242. DHTAssert( rgBucketCheck[cBucketCheck-1] == NULL );
  1243. rgBucketCheck[cBucketCheck-1] = pBucketT;
  1244. }
  1245. #endif
  1246. // chain the new BUCKET
  1247. pBucketThis->m_pBucketNext = pBucketT;
  1248. pBucketT->m_pBucketPrev = pBucketThis;
  1249. // use the first entry of the new BUCKET
  1250. pBucketThis = pBucketT;
  1251. pEntryThis = &pBucketT->m_rgEntry[0];
  1252. }
  1253. // copy the entry
  1254. pEntryThis->SetEntry( entry );
  1255. // update the last entry pointer
  1256. pBucketThis->m_pEntryLast = pEntryThis;
  1257. // move the currency to the new entry
  1258. plock->m_pBucket = pBucketThis;
  1259. plock->m_pEntry = pEntryThis;
  1260. #ifdef DEBUG
  1261. if ( NULL != rgBucketCheck )
  1262. {
  1263. // check each catalogued bucket to see if it is still there
  1264. pbucketTX = plock->m_pBucketHead;
  1265. DHTAssert( pbucketTX );
  1266. // find an remove all buckets found in the destiantion bucket from our list
  1267. while ( pbucketTX )
  1268. {
  1269. for ( iT = 0; iT < cBucketCheck; iT++ )
  1270. {
  1271. if ( rgBucketCheck[iT] == pbucketTX )
  1272. {
  1273. rgBucketCheck[iT] = NULL;
  1274. break;
  1275. }
  1276. }
  1277. DHTAssert( iT < cBucketCheck ); // if this goes off, we somehow got a bucket
  1278. // into the chain that shouldn't be there
  1279. // (it is a bucket we never catalogued!)
  1280. pbucketTX = PbucketBKTNext( pbucketTX );
  1281. }
  1282. // the list should now be empty -- verify this
  1283. for ( iT = 0; iT < cBucketCheck; iT++ )
  1284. {
  1285. // if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
  1286. // being freed!
  1287. DHTAssert( rgBucketCheck[iT] == NULL );
  1288. }
  1289. // free the list
  1290. MEMFree( rgBucketCheck );
  1291. }
  1292. // make sure the number of entries has not changed since we started
  1293. size_t cEntriesAfterwards = 0;
  1294. pbktT = plock->m_pBucketHead;
  1295. if ( pbktT->m_pb != NULL )
  1296. {
  1297. while ( pbktT )
  1298. {
  1299. pbktNextT = PbucketBKTNext( pbktT );
  1300. if ( pbktNextT )
  1301. {
  1302. // full bucket
  1303. cEntriesAfterwards += size_t( m_centryBucket );
  1304. }
  1305. else
  1306. {
  1307. // partial bucket (not empty)
  1308. cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  1309. }
  1310. pbktT = pbktNextT;
  1311. }
  1312. }
  1313. // entry counters should match ( +1 is for the inserted entry )
  1314. DHTAssert( cEntriesAfterwards == cEntriesTotal + 1 );
  1315. #endif
  1316. return errSuccess;
  1317. }
  1318. // delete the current entry
  1319. // if currency is before-first or after-last, then errNoCurrentEntry is returned
  1320. // if the entry is not the last in the logical bucket, the last entry is promoted
  1321. // to fill in the hole
  1322. // should a BUCKET become empty, it will be released immediately
  1323. const ERR ErrBKTDeleteEntry( CLock *const plock )
  1324. {
  1325. DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
  1326. DHTAssert( plock->m_pBucketHead != NULL );
  1327. DHTAssert( plock->m_pBucket != NULL );
  1328. if ( !plock->m_pEntry )
  1329. {
  1330. // we do not have a current entry
  1331. return errNoCurrentEntry;
  1332. }
  1333. #ifdef DEBUG
  1334. PBUCKET *rgBucketCheck = NULL;
  1335. PBUCKET pbucketT;
  1336. size_t cBucketCheck = 0, iT;
  1337. pbucketT = plock->m_pBucketHead;
  1338. while ( pbucketT )
  1339. {
  1340. cBucketCheck++;
  1341. pbucketT = PbucketBKTNext( pbucketT );
  1342. }
  1343. rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
  1344. if ( NULL != rgBucketCheck )
  1345. {
  1346. iT = 0;
  1347. pbucketT = plock->m_pBucketHead;
  1348. while ( pbucketT )
  1349. {
  1350. rgBucketCheck[ iT++ ] = pbucketT;
  1351. pbucketT = PbucketBKTNext( pbucketT );
  1352. }
  1353. }
  1354. // count the number of entries we will be handling
  1355. size_t cEntriesTotal = 0;
  1356. PBUCKET pbktT, pbktNextT;
  1357. pbktT = plock->m_pBucketHead;
  1358. if ( pbktT->m_pb != NULL )
  1359. {
  1360. while ( pbktT )
  1361. {
  1362. pbktNextT = PbucketBKTNext( pbktT );
  1363. if ( pbktNextT )
  1364. {
  1365. // full bucket
  1366. cEntriesTotal += size_t( m_centryBucket );
  1367. }
  1368. else
  1369. {
  1370. // partial bucket (not empty)
  1371. cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  1372. }
  1373. pbktT = pbktNextT;
  1374. }
  1375. }
  1376. #endif
  1377. // we have a valid entry
  1378. PBUCKET pBucketThis = plock->m_pBucket;
  1379. CKeyEntry *pEntryThis = plock->m_pEntry;
  1380. PBUCKET pBucketFree = NULL; // used later if we free a BUCKET strucutre
  1381. if ( pEntryThis != pBucketThis->m_pEntryLast )
  1382. {
  1383. // we are not deleting the last entry in the bucket
  1384. // promote the last entry to fill in this spot left by the entry we are deleting
  1385. // move to the last bucket
  1386. PBUCKET pBucketT = PbucketBKTNext( pBucketThis );
  1387. while ( pBucketT )
  1388. {
  1389. pBucketThis = pBucketT;
  1390. pBucketT = PbucketBKTNext( pBucketT );
  1391. }
  1392. // move to the last entry in the last BUCKET
  1393. pEntryThis = pBucketThis->m_pEntryLast;
  1394. // copy the entry
  1395. plock->m_pEntry->SetEntry( pEntryThis->m_entry );
  1396. }
  1397. // update the currency to show that we are no longer on an entry
  1398. plock->m_pEntry = NULL;
  1399. // we are now pointing to the last entry in the last bucket
  1400. // (via pBucketThis/pEntryThis), and that entry needs to be
  1401. // "deleted" from the bucket
  1402. // update the next/end ptr to reflect this deletion
  1403. if ( pEntryThis != &pBucketThis->m_rgEntry[0] )
  1404. {
  1405. // entries still remain in the last bucket
  1406. DHTAssert( pBucketThis->m_pEntryLast == pEntryThis );
  1407. pBucketThis->m_pEntryLast--; // pEntryThis - 1;
  1408. #ifdef DEBUG
  1409. // jump to the validation code
  1410. goto DoValidation;
  1411. #endif
  1412. return errSuccess;
  1413. }
  1414. // no entries remain in the last bucket
  1415. if ( pBucketThis == plock->m_pBucketHead )
  1416. {
  1417. // this bucket is empty, but we cannot release it because it is part of the bucket array
  1418. // instead, we mark it as being empty
  1419. pBucketThis->m_pb = NULL;
  1420. #ifdef DEBUG
  1421. // jump to the validation code
  1422. goto DoValidation;
  1423. #endif
  1424. return errSuccess;
  1425. }
  1426. // we can free the last bucket
  1427. pBucketFree = pBucketThis;
  1428. // unchain it
  1429. DHTAssert( pBucketThis->m_pBucketPrev->m_pBucketNext == pBucketThis );
  1430. pBucketThis = pBucketThis->m_pBucketPrev;
  1431. pBucketThis->m_pEntryLast = &pBucketThis->m_rgEntry[ m_centryBucket - 1 ];
  1432. // free it
  1433. MEMFree( pBucketFree );
  1434. if ( plock->m_pBucket == pBucketFree )
  1435. {
  1436. // our currency was on the last bucket which is now invalid
  1437. // move to the previous bucket (which is now the NEW last BUCKET)
  1438. plock->m_pBucket = pBucketThis;
  1439. }
  1440. STATDeleteOverflowBucket();
  1441. #ifdef DEBUG
  1442. // check each catalogued bucket to see if it is still there
  1443. DoValidation:
  1444. if ( NULL != rgBucketCheck )
  1445. {
  1446. pbucketT = plock->m_pBucketHead;
  1447. DHTAssert( pbucketT );
  1448. // find an remove all buckets found in the destiantion bucket from our list
  1449. while ( pbucketT )
  1450. {
  1451. for ( iT = 0; iT < cBucketCheck; iT++ )
  1452. {
  1453. if ( rgBucketCheck[iT] == pbucketT )
  1454. {
  1455. rgBucketCheck[iT] = NULL;
  1456. break;
  1457. }
  1458. }
  1459. DHTAssert( iT < cBucketCheck ); // if this goes off, we somehow got a bucket
  1460. // into the chain that shouldn't be there
  1461. // (it is a bucket we never catalogued!)
  1462. pbucketT = PbucketBKTNext( pbucketT );
  1463. }
  1464. // remove pBucketFree from rgBucketCheck
  1465. if ( pBucketFree )
  1466. {
  1467. for ( iT = 0; iT < cBucketCheck; iT++ )
  1468. {
  1469. if ( rgBucketCheck[iT] == pBucketFree )
  1470. {
  1471. rgBucketCheck[iT] = NULL;
  1472. break;
  1473. }
  1474. }
  1475. DHTAssert( iT < cBucketCheck ); // if this goes off, we freed a bucket that
  1476. // was never catalogued! we should only be freeing
  1477. // buckets that were in the original catalogue!
  1478. }
  1479. // the list should now be empty -- verify this
  1480. for ( iT = 0; iT < cBucketCheck; iT++ )
  1481. {
  1482. // if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
  1483. // being freed!
  1484. DHTAssert( rgBucketCheck[iT] == NULL );
  1485. }
  1486. // free the list
  1487. MEMFree( rgBucketCheck );
  1488. }
  1489. // make sure the number of entries has not changed since we started
  1490. size_t cEntriesAfterwards = 0;
  1491. pbktT = plock->m_pBucketHead;
  1492. if ( pbktT->m_pb != NULL )
  1493. {
  1494. while ( pbktT )
  1495. {
  1496. pbktNextT = PbucketBKTNext( pbktT );
  1497. if ( pbktNextT )
  1498. {
  1499. // full bucket
  1500. cEntriesAfterwards += size_t( m_centryBucket );
  1501. }
  1502. else
  1503. {
  1504. // partial bucket (not empty)
  1505. cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  1506. }
  1507. pbktT = pbktNextT;
  1508. }
  1509. }
  1510. // entry counters should match ( -1 is for the deleted entry )
  1511. DHTAssert( cEntriesAfterwards == cEntriesTotal - 1 );
  1512. #endif
  1513. return errSuccess;
  1514. }
  1515. // split to a new bucket
  1516. void BKTISplit( HOTSTUFF* const phs )
  1517. {
  1518. // NOTE: from our perspective, we are in the grow state
  1519. // however, the current state may be set to something else due to a pending transition
  1520. // read the directory pointers
  1521. const NativeCounter cBucketMax = NcDIRIGetBucketMax( stateGrow );
  1522. const NativeCounter cBucket = NcDIRIGetBucket( stateGrow );
  1523. if ( cBucketMax + cBucket >= m_cBucketPreferred || cBucket == cBucketMax )
  1524. {
  1525. return; // the requested growth is complete
  1526. }
  1527. // we need to reserve memory now to ensure that the growth will succeed
  1528. // (BKTIDoSplit will commit or unreserve this reservation later)
  1529. if ( !phs->m_bucketpool.FPOOLReserve( m_cbBucket ) )
  1530. {
  1531. return;
  1532. }
  1533. // get the source bucket
  1534. const PBUCKET pbucketGrowSrc = PbucketDIRIHash( stateGrow, cBucket );
  1535. // try to get the lock
  1536. if ( pbucketGrowSrc->CRWL().FWritersQuiesced() ||
  1537. !pbucketGrowSrc->CRWL().FTryEnterAsWriter() )
  1538. {
  1539. STATSplitContention();
  1540. phs->m_bucketpool.POOLUnreserve();
  1541. return;
  1542. }
  1543. // having a write-lock on the source bucket means no one else attempting to split can
  1544. // be farther along than us at this moment unless they completed the growth already
  1545. // see whether or not m_cBucket changed while were trying to get here
  1546. // if it stayed the same, we were the first ones to split this bucket
  1547. // it if changed, we were not first; instead, someone else managed to split AFTER
  1548. // we read m_cBucket but BEFORE we could do the split ourselves
  1549. if ( cBucket != NcDIRIGetBucket( stateGrow ) )
  1550. {
  1551. DHTAssert( cBucket < NcDIRIGetBucket( stateGrow ) );
  1552. pbucketGrowSrc->CRWL().LeaveAsWriter();
  1553. phs->m_bucketpool.POOLUnreserve();
  1554. return;
  1555. }
  1556. // get the destination bucket (may not be allocated yet so we cannot use PbucketDIRIHash)
  1557. NativeCounter iExponent;
  1558. NativeCounter iRemainder;
  1559. DIRILog2( cBucketMax + cBucket, &iExponent, &iRemainder );
  1560. // extract the address of the bucket
  1561. if ( !m_rgrgBucket[ iExponent ] )
  1562. {
  1563. // allocate a new bucket array to hold 2^iExponent buckets for this entry
  1564. if ( ErrDIRInitBucketArray( cBucketMax, cBucketMax, &m_rgrgBucket[ iExponent ] ) != errSuccess )
  1565. {
  1566. pbucketGrowSrc->CRWL().LeaveAsWriter();
  1567. phs->m_bucketpool.POOLUnreserve();
  1568. return;
  1569. }
  1570. }
  1571. DHTAssert( m_rgrgBucket[ iExponent ] );
  1572. // get the destination bucket
  1573. const PBUCKET pbucketGrowDst = PbucketDIRIResolve( iExponent, iRemainder );
  1574. // lock the destination bucket (no possibility of contention here)
  1575. pbucketGrowDst->CRWL().FTryEnterAsWriter();
  1576. // increase m_cBucket (we cannot turn back after this point)
  1577. // anyone who hashes to the new bucket will be queued up until the growth is complete
  1578. DHTAssert( cBucket == NcDIRIGetBucket( stateGrow ) );
  1579. m_dirptrs[ 0 ].m_cBucket++;
  1580. // do the growth work
  1581. BKTIDoSplit( phs, pbucketGrowSrc, pbucketGrowDst, cBucket );
  1582. // release the write-locks
  1583. pbucketGrowSrc->CRWL().LeaveAsWriter();
  1584. pbucketGrowDst->CRWL().LeaveAsWriter();
  1585. }
  1586. // merge two existing buckets into one
  1587. void BKTIMerge( HOTSTUFF* const phs )
  1588. {
  1589. // NOTE: from our perspective, we are in the shrink state
  1590. // however, the current state may be set to something else due to a pending transition
  1591. // read the directory pointers
  1592. const NativeCounter cBucketMax = NcDIRIGetBucketMax( stateShrink );
  1593. NativeCounter cBucket = NcDIRIGetBucket( stateShrink );
  1594. if ( cBucketMax + cBucket <= m_cBucketPreferred || cBucket == 0 )
  1595. {
  1596. return; // the requested shrinkage is complete
  1597. }
  1598. cBucket--; // the bucket we are merging is really 1 below cBucket
  1599. // we need to reserve memory now to ensure that the shrinkage will succeed
  1600. // (BKTIDoMerge will commit or unreserve this reservation later)
  1601. if ( !phs->m_bucketpool.FPOOLReserve( m_cbBucket ) )
  1602. {
  1603. return;
  1604. }
  1605. // get the destination bucket
  1606. const PBUCKET pbucketShrinkDst = PbucketDIRIHash( stateShrink, cBucket );
  1607. // try to get the lock
  1608. if ( pbucketShrinkDst->CRWL().FWritersQuiesced() ||
  1609. !pbucketShrinkDst->CRWL().FTryEnterAsWriter() )
  1610. {
  1611. STATMergeContention();
  1612. phs->m_bucketpool.POOLUnreserve();
  1613. return;
  1614. }
  1615. // having a write-lock on the destination bucket means no one else attempting to merge can
  1616. // be farther along than us at this moment unless they completed the shrinkage already
  1617. // see whether or not m_cSplit changed while were trying to get here
  1618. // if it stayed the same, we were the first ones to merge this bucket
  1619. // it if changed, we were not first; instead, someone else managed to merge AFTER
  1620. // we read m_cBucket but BEFORE we could do the merge ourselves
  1621. if ( cBucket + 1 != NcDIRIGetBucket( stateShrink ) )
  1622. {
  1623. DHTAssert( cBucket + 1 > NcDIRIGetBucket( stateShrink ) );
  1624. pbucketShrinkDst->CRWL().LeaveAsWriter();
  1625. phs->m_bucketpool.POOLUnreserve();
  1626. return;
  1627. }
  1628. // convert cBucket to a bucket address
  1629. NativeCounter iExponent;
  1630. NativeCounter iRemainder;
  1631. DIRILog2( cBucket + NcDIRIGetBucketMax( stateShrink ), &iExponent, &iRemainder );
  1632. // extract the address of the bucket
  1633. const PBUCKET pbucketShrinkSrc = PbucketDIRIResolve( iExponent, iRemainder );
  1634. // try to get the lock
  1635. if ( pbucketShrinkSrc->CRWL().FWritersQuiesced() ||
  1636. !pbucketShrinkSrc->CRWL().FTryEnterAsWriter() )
  1637. {
  1638. STATMergeContention();
  1639. pbucketShrinkDst->CRWL().LeaveAsWriter();
  1640. phs->m_bucketpool.POOLUnreserve();
  1641. return;
  1642. }
  1643. // decrease m_cBucket (we cannot turn back after this point)
  1644. // anyone who hashes to the destination bucket will be queued up until
  1645. // the merge is complete
  1646. // no one will be able to hash to the source bucket
  1647. DHTAssert( cBucket + 1 == NcDIRIGetBucket( stateShrink ) );
  1648. m_dirptrs[ 0 ].m_cBucket--;
  1649. // do the shrinkage work
  1650. BKTIDoMerge( phs, pbucketShrinkSrc, pbucketShrinkDst );
  1651. // release the write-locks
  1652. pbucketShrinkDst->CRWL().LeaveAsWriter();
  1653. pbucketShrinkSrc->CRWL().LeaveAsWriter();
  1654. }
  1655. // work-horse for spliting a bucket
  1656. void BKTIDoSplit( HOTSTUFF* const phs,
  1657. PBUCKET pBucketSrcSrc,
  1658. PBUCKET pBucketDst,
  1659. const NativeCounter iHashSrc )
  1660. {
  1661. #ifdef DEBUG
  1662. PBUCKET pBucketSrcSrcOriginal = pBucketSrcSrc;
  1663. PBUCKET pBucketDstOriginal = pBucketDst;
  1664. size_t cEntriesTotal = 0, cEntriesTotalRunning = 0;
  1665. PBUCKET pbktT, pbktNextT;
  1666. // catalog each BUCKET structure and make sure they end up in the destination bucket
  1667. PBUCKET *rgBucketCheck = NULL, pbucketTX;
  1668. size_t cBucketCheck = 0, iT;
  1669. pbucketTX = pBucketSrcSrc;
  1670. while ( pbucketTX )
  1671. {
  1672. cBucketCheck++;
  1673. pbucketTX = PbucketBKTNext( pbucketTX );
  1674. }
  1675. pbucketTX = pBucketDst;
  1676. DHTAssert( PbucketBKTNext( pbucketTX ) == NULL );
  1677. while ( pbucketTX )
  1678. {
  1679. cBucketCheck++;
  1680. pbucketTX = PbucketBKTNext( pbucketTX );
  1681. }
  1682. cBucketCheck++; // account for bucket from heap
  1683. rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
  1684. if ( NULL != rgBucketCheck )
  1685. {
  1686. iT = 0;
  1687. pbucketTX = pBucketSrcSrc;
  1688. while ( pbucketTX )
  1689. {
  1690. rgBucketCheck[ iT++ ] = pbucketTX;
  1691. pbucketTX = PbucketBKTNext( pbucketTX );
  1692. }
  1693. pbucketTX = pBucketDst;
  1694. while ( pbucketTX )
  1695. {
  1696. rgBucketCheck[ iT++ ] = pbucketTX;
  1697. pbucketTX = PbucketBKTNext( pbucketTX );
  1698. }
  1699. rgBucketCheck[ iT++ ] = NULL; // heap bucket
  1700. DHTAssert( iT == cBucketCheck );
  1701. }
  1702. // count the number of entries that are in the source bucket
  1703. pbktT = pBucketSrcSrc;
  1704. if ( pbktT->m_pb != NULL )
  1705. {
  1706. while ( pbktT )
  1707. {
  1708. pbktNextT = PbucketBKTNext( pbktT );
  1709. if ( pbktNextT )
  1710. {
  1711. // full bucket
  1712. cEntriesTotal += size_t( m_centryBucket );
  1713. }
  1714. else
  1715. {
  1716. // partial bucket (not empty)
  1717. cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  1718. }
  1719. pbktT = pbktNextT;
  1720. }
  1721. }
  1722. #endif
  1723. // cursor for reading entries
  1724. PBUCKET pBucketNextSrc;
  1725. CKeyEntry *pEntryThisSrc;
  1726. CKeyEntry *pEntryMostSrc;
  1727. // cursors for writing entries
  1728. // index 0 is for the SrcDst cursor (entries whose src and dst is the source bucket)
  1729. // index 1 is for the Dst cursor (entries whose dst is the destination bucket)
  1730. PBUCKET pBucketThis[2];
  1731. CKeyEntry *pEntryThis[2];
  1732. CKeyEntry *pEntryMost[2];
  1733. CKeyEntry *pEntryLast[2];
  1734. size_t iIndex;
  1735. // extra buckets
  1736. PBUCKET pBucketAvail = NULL;
  1737. // remember if we used the bucket from the heap
  1738. BOOL fBucketFromHeap = fFalse;
  1739. // used for hashing
  1740. NativeCounter iHashMask;
  1741. DHTAssert( pBucketSrcSrc );
  1742. DHTAssert( pBucketDst );
  1743. DHTAssert( pBucketDst->m_pb == NULL );
  1744. // calculate the hash-mask (prevent wraparound)
  1745. DHTAssert( NcDIRIGetBucketMax( stateGrow ) > 0 );
  1746. iHashMask = ( NcDIRIGetBucketMax( stateGrow ) - 1 ) + NcDIRIGetBucketMax( stateGrow );
  1747. // prepare the read cursor
  1748. pBucketNextSrc = PbucketBKTNext( pBucketSrcSrc );
  1749. pEntryThisSrc = &pBucketSrcSrc->m_rgEntry[ 0 ];
  1750. pEntryMostSrc = PentryBKTNextMost( pBucketSrcSrc );
  1751. // prepare the src-dst write cursor
  1752. pBucketThis[ 0 ] = pBucketSrcSrc;
  1753. pEntryThis[ 0 ] = &pBucketSrcSrc->m_rgEntry[ 0 ];
  1754. pEntryMost[ 0 ] = &pBucketSrcSrc->m_rgEntry[ m_centryBucket ];
  1755. pEntryLast[ 0 ] = NULL;
  1756. // prepare the dst write cursor
  1757. pBucketThis[ 1 ] = pBucketDst;
  1758. pEntryThis[ 1 ] = &pBucketDst->m_rgEntry[ 0 ];
  1759. pEntryMost[ 1 ] = &pBucketDst->m_rgEntry[ m_centryBucket ];
  1760. pEntryLast[ 1 ] = NULL;
  1761. // iterate over all entries in the source bucket
  1762. while ( fTrue )
  1763. {
  1764. // check the read (src) cursor
  1765. if ( pEntryThisSrc < pEntryMostSrc )
  1766. {
  1767. // nop
  1768. }
  1769. else if ( NULL == pBucketNextSrc )
  1770. {
  1771. // all entries have been exhausted
  1772. break;
  1773. }
  1774. else
  1775. {
  1776. // all entries in the current bucket have been exhausted
  1777. if ( pBucketSrcSrc != pBucketThis[ 0 ] )
  1778. {
  1779. // the bucket we are leaving is completely empty and the
  1780. // SrcDst pointer is not using it
  1781. // we need to put it into the available bucket list
  1782. // the bucket ordering should be like this:
  1783. // pBucketThis[0] (src/dst bucket)
  1784. // pBucketSrcSrc (src bucket)
  1785. // pBucketNextSrc (next src bucket)
  1786. DHTAssert( pBucketThis[ 0 ]->m_pBucketNext == pBucketSrcSrc );
  1787. DHTAssert( pBucketSrcSrc->m_pBucketNext == pBucketNextSrc );
  1788. DHTAssert( pBucketNextSrc->m_pBucketPrev == pBucketSrcSrc );
  1789. DHTAssert( pBucketSrcSrc->m_pBucketPrev == pBucketThis[ 0 ] );
  1790. // update the bucket links to "remove" the free bucket
  1791. pBucketThis[ 0 ]->m_pBucketNext = pBucketNextSrc;
  1792. pBucketNextSrc->m_pBucketPrev = pBucketThis[ 0 ];
  1793. // add the bucket to the avail list
  1794. pBucketSrcSrc->m_pBucketNext = pBucketAvail;
  1795. pBucketAvail = pBucketSrcSrc;
  1796. }
  1797. // move to the next bucket
  1798. pEntryThisSrc = &pBucketNextSrc->m_rgEntry[ 0 ];
  1799. pEntryMostSrc = PentryBKTNextMost( pBucketNextSrc );
  1800. pBucketSrcSrc = pBucketNextSrc;
  1801. pBucketNextSrc = PbucketBKTNext( pBucketNextSrc );
  1802. }
  1803. // calculate the hash value
  1804. iIndex = BOOL( ( pEntryThisSrc->Hash() & iHashMask ) != iHashSrc );
  1805. DHTAssert( iIndex == 0 || iIndex == 1 );
  1806. #ifdef DEBUG
  1807. cEntriesTotalRunning++;
  1808. #endif // DEBUG
  1809. // check the write (src/dst or dst) cursor
  1810. if ( pEntryThis[ iIndex ] < pEntryMost[ iIndex ] )
  1811. {
  1812. // nop
  1813. }
  1814. else
  1815. {
  1816. // all entries in the current cursor's bucket are exhausted
  1817. if ( 0 == iIndex )
  1818. {
  1819. // the src/dst cursor will always have a next bucket
  1820. DHTAssert( pBucketThis[ 0 ]->m_pBucketNext->m_pBucketPrev == pBucketThis[ 0 ] );
  1821. pBucketThis[ 0 ] = pBucketThis[ 0 ]->m_pBucketNext;
  1822. // setup the entry ptrs
  1823. pEntryThis[ 0 ] = &pBucketThis[ 0 ]->m_rgEntry[ 0 ];
  1824. pEntryMost[ 0 ] = &pBucketThis[ 0 ]->m_rgEntry[ m_centryBucket ];
  1825. }
  1826. else
  1827. {
  1828. // the dst cursor must allocate a new bucket
  1829. if ( pBucketAvail )
  1830. {
  1831. // get a bucket from the avail list
  1832. const PBUCKET pBucketNew = pBucketAvail;
  1833. pBucketAvail = pBucketAvail->m_pBucketNext;
  1834. // chain it
  1835. pBucketThis[ 1 ]->m_pBucketNext = pBucketNew;
  1836. pBucketNew->m_pBucketPrev = pBucketThis[ 1 ];
  1837. // move to it
  1838. pBucketThis[ 1 ] = pBucketNew;
  1839. }
  1840. else
  1841. {
  1842. // get a bucket from the reservation pool
  1843. DHTAssert( !fBucketFromHeap );
  1844. fBucketFromHeap = fTrue;
  1845. // allocate it
  1846. const PBUCKET pBucketReserve = phs->m_bucketpool.PbucketPOOLCommit();
  1847. DHTAssert( pBucketReserve );
  1848. STATInsertOverflowBucket();
  1849. #ifdef DEBUG
  1850. // add the heap bucket to our catalog of buckets
  1851. if ( NULL != rgBucketCheck )
  1852. {
  1853. DHTAssert( NULL == rgBucketCheck[ cBucketCheck - 1 ] );
  1854. rgBucketCheck[ cBucketCheck - 1 ] = pBucketReserve;
  1855. }
  1856. #endif // DEBUG
  1857. // chain it
  1858. pBucketThis[ 1 ]->m_pBucketNext = pBucketReserve;
  1859. pBucketReserve->m_pBucketPrev = pBucketThis[ 1 ];
  1860. // move to it
  1861. pBucketThis[ 1 ] = pBucketReserve;
  1862. }
  1863. // setup the entry ptrs
  1864. pEntryThis[ 1 ] = &pBucketThis[ 1 ]->m_rgEntry[ 0 ];
  1865. pEntryMost[ 1 ] = &pBucketThis[ 1 ]->m_rgEntry[ m_centryBucket ];
  1866. }
  1867. }
  1868. // copy the entry
  1869. pEntryThis[ iIndex ]->SetEntry( pEntryThisSrc->m_entry );
  1870. // advance the write (src/dst or dst) cursor
  1871. pEntryLast[ iIndex ] = pEntryThis[ iIndex ];
  1872. pEntryThis[ iIndex ]++;
  1873. // advance the read (src) cursor
  1874. pEntryThisSrc++;
  1875. }
  1876. if ( pBucketSrcSrc == pBucketThis[ 0 ] )
  1877. {
  1878. // nop
  1879. }
  1880. else
  1881. {
  1882. // the last bucket of the src bucket is no longer needed
  1883. // the bucket ordering should be like this:
  1884. // pBucketThis[0] (src/dst bucket)
  1885. // pBucketSrcSrc (src bucket)
  1886. // << NOTHING >>
  1887. DHTAssert( pBucketThis[ 0 ]->m_pBucketNext == pBucketSrcSrc );
  1888. DHTAssert( pBucketSrcSrc->m_pBucketPrev == pBucketThis[ 0 ] );
  1889. // free the bucket
  1890. MEMFree( pBucketSrcSrc );
  1891. STATDeleteOverflowBucket();
  1892. #ifdef DEBUG
  1893. // remove the bucket from the bucket-catalog
  1894. if ( NULL != rgBucketCheck )
  1895. {
  1896. for ( iT = 0; iT < cBucketCheck; iT++ )
  1897. {
  1898. if ( rgBucketCheck[iT] == pBucketSrcSrc )
  1899. {
  1900. rgBucketCheck[iT] = NULL;
  1901. break;
  1902. }
  1903. }
  1904. DHTAssert( iT < cBucketCheck ); // the bucket better be in the bucket-catalog!
  1905. }
  1906. #endif // DEBUG
  1907. }
  1908. // update the next/end ptrs for the src/dst cursor and the dst cursor
  1909. pBucketThis[ 0 ]->m_pEntryLast = pEntryLast[ 0 ];
  1910. pBucketThis[ 1 ]->m_pEntryLast = pEntryLast[ 1 ];
  1911. #ifdef DEBUG
  1912. if ( NULL != rgBucketCheck )
  1913. {
  1914. // check each catalogued bucket to see if it is in the pBucketSrcSrc, pBucketDst, or pBucketAvail
  1915. // find and remove all buckets in pBucketSrcSrc
  1916. pbucketTX = pBucketSrcSrcOriginal;
  1917. DHTAssert( pbucketTX );
  1918. while ( pbucketTX )
  1919. {
  1920. for ( iT = 0; iT < cBucketCheck; iT++ )
  1921. {
  1922. if ( rgBucketCheck[iT] == pbucketTX )
  1923. {
  1924. rgBucketCheck[iT] = NULL;
  1925. break;
  1926. }
  1927. }
  1928. DHTAssert( iT < cBucketCheck ); // if this goes off, we somehow added a bucket to the
  1929. // SOURCE CHAIN -- THIS SHOULD NEVER HAPPEN! also, we
  1930. // never catalogued the bucket!
  1931. pbucketTX = PbucketBKTNext( pbucketTX );
  1932. }
  1933. // find and remove all buckets in pBucketDst
  1934. pbucketTX = pBucketDstOriginal;
  1935. DHTAssert( pbucketTX );
  1936. while ( pbucketTX )
  1937. {
  1938. for ( iT = 0; iT < cBucketCheck; iT++ )
  1939. {
  1940. if ( rgBucketCheck[iT] == pbucketTX )
  1941. {
  1942. rgBucketCheck[iT] = NULL;
  1943. break;
  1944. }
  1945. }
  1946. DHTAssert( iT < cBucketCheck ); // if this goes off, we added a bucket to the destination
  1947. // chain, but it was never catalogued! first question: where
  1948. // did the bucket come from if didn't catalogue it???
  1949. pbucketTX = PbucketBKTNext( pbucketTX );
  1950. }
  1951. // find and remove all buckets in pBucketAvail
  1952. pbucketTX = pBucketAvail;
  1953. while ( pbucketTX )
  1954. {
  1955. for ( iT = 0; iT < cBucketCheck; iT++ )
  1956. {
  1957. if ( rgBucketCheck[iT] == pbucketTX )
  1958. {
  1959. rgBucketCheck[iT] = NULL;
  1960. break;
  1961. }
  1962. }
  1963. DHTAssert( iT < cBucketCheck ); // if this goes off, we have a free bucket that was never
  1964. // catalogued! where did it come from?
  1965. // NOTE: this is not a memleak, it is a "we-never-catalogued-it"
  1966. // problem; the memory will be freed later in this function
  1967. pbucketTX = pbucketTX->m_pBucketNext;
  1968. }
  1969. // the list should now be empty -- verify this
  1970. for ( iT = 0; iT < cBucketCheck; iT++ )
  1971. {
  1972. // if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
  1973. // being freed!
  1974. DHTAssert( rgBucketCheck[iT] == NULL );
  1975. }
  1976. // free the list
  1977. MEMFree( rgBucketCheck );
  1978. }
  1979. size_t cEntriesAfterwards = 0;
  1980. // make sure the number of entries we processed matches the number of entries we started with
  1981. DHTAssert( cEntriesTotal == cEntriesTotalRunning );
  1982. // make sure we have all the entries we started with
  1983. pbktT = pBucketSrcSrcOriginal;
  1984. if ( pbktT->m_pb != NULL )
  1985. {
  1986. while ( pbktT )
  1987. {
  1988. pbktNextT = PbucketBKTNext( pbktT );
  1989. if ( pbktNextT )
  1990. {
  1991. // full bucket
  1992. cEntriesAfterwards += size_t( m_centryBucket );
  1993. }
  1994. else
  1995. {
  1996. // partial bucket (not empty)
  1997. cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  1998. }
  1999. pbktT = pbktNextT;
  2000. }
  2001. }
  2002. pbktT = pBucketDstOriginal;
  2003. if ( pbktT->m_pb != NULL )
  2004. {
  2005. while ( pbktT )
  2006. {
  2007. pbktNextT = PbucketBKTNext( pbktT );
  2008. if ( pbktNextT )
  2009. {
  2010. // full bucket
  2011. cEntriesAfterwards += size_t( m_centryBucket );
  2012. }
  2013. else
  2014. {
  2015. // partial bucket (not empty)
  2016. cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  2017. }
  2018. pbktT = pbktNextT;
  2019. }
  2020. }
  2021. DHTAssert( cEntriesAfterwards == cEntriesTotal );
  2022. #endif
  2023. // free the avail list
  2024. while ( pBucketAvail )
  2025. {
  2026. PBUCKET pBucketT;
  2027. pBucketT = pBucketAvail;
  2028. pBucketAvail = pBucketAvail->m_pBucketNext;
  2029. MEMFree( pBucketT );
  2030. STATDeleteOverflowBucket();
  2031. }
  2032. if ( !fBucketFromHeap )
  2033. {
  2034. phs->m_bucketpool.POOLUnreserve(); // cancel the heap reservation (we never used it)
  2035. }
  2036. STATSplitBucket();
  2037. }
  2038. // work-horse for shrinking a bucket
  2039. void BKTIDoMerge( HOTSTUFF* const phs,
  2040. PBUCKET pBucketSrc,
  2041. PBUCKET pBucketDst )
  2042. {
  2043. #ifdef DEBUG
  2044. // catalog each BUCKET structure and make sure they end up in the destination bucket
  2045. PBUCKET pBucketDstOriginal = pBucketDst;
  2046. PBUCKET *rgBucketCheck = NULL, pbucketT;
  2047. size_t cBucketCheck = 0, iT;
  2048. pbucketT = pBucketSrc;
  2049. while ( pbucketT )
  2050. {
  2051. cBucketCheck++;
  2052. pbucketT = PbucketBKTNext( pbucketT );
  2053. }
  2054. pbucketT = pBucketDst;
  2055. while ( pbucketT )
  2056. {
  2057. cBucketCheck++;
  2058. pbucketT = PbucketBKTNext( pbucketT );
  2059. }
  2060. cBucketCheck++; // account for bucket from heap
  2061. rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
  2062. if ( NULL != rgBucketCheck )
  2063. {
  2064. iT = 0;
  2065. pbucketT = pBucketSrc;
  2066. while ( pbucketT )
  2067. {
  2068. rgBucketCheck[ iT++ ] = pbucketT;
  2069. pbucketT = PbucketBKTNext( pbucketT );
  2070. }
  2071. pbucketT = pBucketDst;
  2072. while ( pbucketT )
  2073. {
  2074. rgBucketCheck[ iT++ ] = pbucketT;
  2075. pbucketT = PbucketBKTNext( pbucketT );
  2076. }
  2077. rgBucketCheck[ iT++ ] = NULL; // heap bucket
  2078. DHTAssert( iT == cBucketCheck );
  2079. }
  2080. // count the number of entries we will be handling
  2081. size_t cEntriesTotal = 0;
  2082. PBUCKET pbktT, pbktNextT;
  2083. pbktT = pBucketSrc;
  2084. if ( pbktT->m_pb != NULL )
  2085. {
  2086. while ( pbktT )
  2087. {
  2088. pbktNextT = PbucketBKTNext( pbktT );
  2089. if ( pbktNextT )
  2090. {
  2091. // full bucket
  2092. cEntriesTotal += size_t( m_centryBucket );
  2093. }
  2094. else
  2095. {
  2096. // partial bucket (not empty)
  2097. cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  2098. }
  2099. pbktT = pbktNextT;
  2100. }
  2101. }
  2102. pbktT = pBucketDst;
  2103. if ( pbktT->m_pb != NULL )
  2104. {
  2105. while ( pbktT )
  2106. {
  2107. pbktNextT = PbucketBKTNext( pbktT );
  2108. if ( pbktNextT )
  2109. {
  2110. // full bucket
  2111. cEntriesTotal += size_t( m_centryBucket );
  2112. }
  2113. else
  2114. {
  2115. // partial bucket (not empty)
  2116. cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  2117. }
  2118. pbktT = pbktNextT;
  2119. }
  2120. }
  2121. #endif
  2122. // read (src) cursor
  2123. CKeyEntry *pEntryThisSrc;
  2124. CKeyEntry *pEntryMostSrc;
  2125. // write (dst) cursor
  2126. CKeyEntry *pEntryThisDst;
  2127. CKeyEntry *pEntryMostDst;
  2128. // remember if we have moved to the last bucket or not
  2129. BOOL fSetEndPtr;
  2130. // remember if we allocated a bucket from the heap
  2131. BOOL fBucketFromHeap = fFalse;
  2132. // efficiency variables
  2133. PBUCKET pBucketT;
  2134. // move to the end of the dst bucket
  2135. pBucketT = PbucketBKTNext( pBucketDst );
  2136. while ( pBucketT )
  2137. {
  2138. pBucketDst = pBucketT;
  2139. pBucketT = PbucketBKTNext( pBucketT );
  2140. }
  2141. pEntryThisDst = PentryBKTNextMost( pBucketDst );
  2142. pEntryMostDst = &pBucketDst->m_rgEntry[ m_centryBucket ];
  2143. if ( !PbucketBKTNext( pBucketSrc ) )
  2144. {
  2145. // the src bucket does not have extra bucket structures
  2146. // setup the src cursor for a partial pass
  2147. pEntryThisSrc = &pBucketSrc->m_rgEntry[ 0 ];
  2148. pEntryMostSrc = PentryBKTNextMost( pBucketSrc );
  2149. // we are not appending buckets from the src bucket, so we will be setting the
  2150. // end ptr of the dst bucket iff we add entries from the src bucket
  2151. fSetEndPtr = BOOL( pEntryThisSrc < pEntryMostSrc );
  2152. }
  2153. else
  2154. {
  2155. // the src bucket has extra bucket structures
  2156. // attach the extra bucket structures to the dst bucket
  2157. pBucketDst->m_pBucketNext = pBucketSrc->m_pBucketNext;
  2158. pBucketDst->m_pBucketNext->m_pBucketPrev = pBucketDst;
  2159. // setup the src cursor for a full pass over the first src bucket
  2160. pEntryThisSrc = &pBucketSrc->m_rgEntry[ 0 ];
  2161. pEntryMostSrc = &pBucketSrc->m_rgEntry[ m_centryBucket ];
  2162. // we are appending buckets from the src bucket, so we will not be setting the
  2163. // end ptr of the dst bucket because we are no longer in the last bucket
  2164. // of the dst bucket chain
  2165. fSetEndPtr = fFalse;
  2166. }
  2167. // copy the entries in the src bucket
  2168. while ( pEntryThisSrc < pEntryMostSrc )
  2169. {
  2170. // check the dst cursor
  2171. if ( pEntryThisDst < pEntryMostDst )
  2172. {
  2173. // nop
  2174. }
  2175. else
  2176. {
  2177. // all entries in the dst bucket are exhausted
  2178. if ( !fSetEndPtr )
  2179. {
  2180. // we are not in the last bucket of the dst bucket because there is no end ptr
  2181. pBucketT = PbucketBKTNext( pBucketDst );
  2182. DHTAssert( pBucketT );
  2183. do
  2184. {
  2185. pBucketDst = pBucketT;
  2186. pBucketT = PbucketBKTNext( pBucketT );
  2187. }
  2188. while ( pBucketT );
  2189. // setup the dst cursor
  2190. pEntryThisDst = pBucketDst->m_pEntryLast + 1;
  2191. pEntryMostDst = &pBucketDst->m_rgEntry[ m_centryBucket ];
  2192. // we are now able to set the end ptr because we are in the last bucket
  2193. // of the dst bucket
  2194. fSetEndPtr = fTrue;
  2195. // restart the loop
  2196. continue;
  2197. }
  2198. // we were at the last bucket in the dst bucket
  2199. // get a bucket from the heap reservation pool
  2200. DHTAssert( !fBucketFromHeap );
  2201. fBucketFromHeap = fTrue;
  2202. // commit the reservation now
  2203. pBucketT = phs->m_bucketpool.PbucketPOOLCommit();
  2204. DHTAssert( pBucketT );
  2205. STATInsertOverflowBucket();
  2206. // chain the heap bucket
  2207. pBucketDst->m_pBucketNext = pBucketT;
  2208. pBucketT->m_pBucketPrev = pBucketDst;
  2209. // setup the dst cursor
  2210. pBucketDst = pBucketT;
  2211. pEntryThisDst = &pBucketDst->m_rgEntry[ 0 ];
  2212. pEntryMostDst = &pBucketDst->m_rgEntry[ m_centryBucket ];
  2213. #ifdef DEBUG
  2214. // add the heap bucket to our catalog of buckets
  2215. if ( NULL != rgBucketCheck )
  2216. {
  2217. DHTAssert( rgBucketCheck[cBucketCheck - 1] == NULL );
  2218. rgBucketCheck[cBucketCheck - 1] = pBucketT;
  2219. }
  2220. #endif // DEBUG
  2221. }
  2222. // copy the entry
  2223. pEntryThisDst->SetEntry( pEntryThisSrc->m_entry );
  2224. // advance the cursors
  2225. pEntryThisSrc++;
  2226. pEntryThisDst++;
  2227. }
  2228. // mark the src bucket as empty
  2229. pBucketSrc->m_pb = NULL;
  2230. if ( fSetEndPtr )
  2231. {
  2232. // set the end of the destination bucket
  2233. DHTAssert( pEntryThisDst != &pBucketDst->m_rgEntry[ 0 ] );
  2234. pBucketDst->m_pEntryLast = pEntryThisDst - 1;
  2235. }
  2236. else
  2237. {
  2238. // we do not need to set the end ptr of the dst bucket
  2239. // nop
  2240. }
  2241. if ( !fBucketFromHeap )
  2242. {
  2243. // cancel the unused heap reservation
  2244. phs->m_bucketpool.POOLUnreserve();
  2245. }
  2246. #ifdef DEBUG
  2247. if ( NULL != rgBucketCheck )
  2248. {
  2249. // check each catalogued bucket to see if it is in the pBucketDst bucket
  2250. pbucketT = pBucketDstOriginal;
  2251. DHTAssert( pbucketT );
  2252. // find an remove all buckets found in the destiantion bucket from our list
  2253. while ( pbucketT )
  2254. {
  2255. for ( iT = 0; iT < cBucketCheck; iT++ )
  2256. {
  2257. if ( rgBucketCheck[iT] == pbucketT )
  2258. {
  2259. rgBucketCheck[iT] = NULL;
  2260. break;
  2261. }
  2262. }
  2263. DHTAssert( iT < cBucketCheck ); // if this goes off, we somehow got a bucket
  2264. // into the chain that shouldn't be there
  2265. // (it is a bucket we never catalogued!)
  2266. pbucketT = PbucketBKTNext( pbucketT );
  2267. }
  2268. // find an remove pBucketSrc from our list
  2269. for ( iT = 0; iT < cBucketCheck; iT++ )
  2270. {
  2271. if ( rgBucketCheck[iT] == pBucketSrc )
  2272. {
  2273. rgBucketCheck[iT] = NULL;
  2274. break;
  2275. }
  2276. }
  2277. DHTAssert( iT < cBucketCheck ); // if this goes off, somehow the FIXED source bucket
  2278. // got removed from our catalogue OR pBucketSrc was
  2279. // changed (which should never happen)
  2280. // the list should now be empty -- verify this
  2281. for ( iT = 0; iT < cBucketCheck; iT++ )
  2282. {
  2283. // if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
  2284. // being freed!
  2285. DHTAssert( rgBucketCheck[iT] == NULL );
  2286. }
  2287. // free the list
  2288. MEMFree( rgBucketCheck );
  2289. }
  2290. // make sure the number of entries has not changed since we started
  2291. size_t cEntriesAfterwards = 0;
  2292. pbktT = pBucketDstOriginal;
  2293. if ( pbktT->m_pb != NULL )
  2294. {
  2295. while ( pbktT )
  2296. {
  2297. pbktNextT = PbucketBKTNext( pbktT );
  2298. if ( pbktNextT )
  2299. {
  2300. // full bucket
  2301. cEntriesAfterwards += size_t( m_centryBucket );
  2302. }
  2303. else
  2304. {
  2305. // partial bucket (not empty)
  2306. cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
  2307. }
  2308. pbktT = pbktNextT;
  2309. }
  2310. }
  2311. DHTAssert( cEntriesAfterwards == cEntriesTotal );
  2312. #endif
  2313. STATMergeBucket();
  2314. }
  2315. /////////////////////////////////////////////////////////////////////////////////////////
  2316. //
  2317. // mechanisms for implementing the dynamic-hash-table policies
  2318. //
  2319. // hash to the correct HOTSTUFF element
  2320. HOTSTUFF *HOTSTUFFHash() const
  2321. {
  2322. return m_rghs + OSSYNC::OSSyncGetCurrentProcessor();
  2323. }
  2324. // statistics
  2325. void STATInsertEntry( HOTSTUFF* const phs )
  2326. {
  2327. AtomicExchangeAddPointer( (void**)&phs->m_cEntry, (void*)1 );
  2328. phs->m_cOp++;
  2329. }
  2330. void STATDeleteEntry( HOTSTUFF* const phs )
  2331. {
  2332. AtomicExchangeAddPointer( (void**)&phs->m_cEntry, (void*)-1 );
  2333. phs->m_cOp++;
  2334. }
  2335. void STATInsertOverflowBucket()
  2336. {
  2337. #ifdef DHT_STATS
  2338. m_cBucketOverflowInsert++;
  2339. #endif // DHT_STATS
  2340. }
  2341. void STATDeleteOverflowBucket()
  2342. {
  2343. #ifdef DHT_STATS
  2344. m_cBucketOverflowDelete++;
  2345. #endif // DHT_STATS
  2346. }
  2347. void STATSplitBucket()
  2348. {
  2349. #ifdef DHT_STATS
  2350. m_cBucketSplit++;
  2351. #endif // DHT_STATS
  2352. }
  2353. void STATMergeBucket()
  2354. {
  2355. #ifdef DHT_STATS
  2356. m_cBucketMerge++;
  2357. #endif // DHT_STATS
  2358. }
  2359. void STATSplitDirectory()
  2360. {
  2361. #ifdef DHT_STATS
  2362. m_cDirSplit++;
  2363. #endif // DHT_STATS
  2364. }
  2365. void STATMergeDirectory()
  2366. {
  2367. #ifdef DHT_STATS
  2368. m_cDirMerge++;
  2369. #endif // DHT_STATS
  2370. }
  2371. void STATStateTransition()
  2372. {
  2373. #ifdef DHT_STATS
  2374. m_cTransition++;
  2375. #endif // DHT_STATS
  2376. }
  2377. void STATPolicySelection()
  2378. {
  2379. #ifdef DHT_STATS
  2380. m_cSelection++;
  2381. #endif // DHT_STATS
  2382. }
  2383. void STATSplitContention()
  2384. {
  2385. #ifdef DHT_STATS
  2386. m_cSplitContend++;
  2387. #endif // DHT_STATS
  2388. }
  2389. void STATMergeContention()
  2390. {
  2391. #ifdef DHT_STATS
  2392. m_cMergeContend++;
  2393. #endif // DHT_STATS
  2394. }
  2395. // amortized table maintenance
  2396. void PerformMaintenance()
  2397. {
  2398. // enter the state machine
  2399. HOTSTUFF* phs;
  2400. const int iGroup = UiSTEnter( &phs );
  2401. const ENUMSTATE esCurrent = EsSTGetState();
  2402. // carry out the current policy
  2403. if ( esCurrent == stateGrow )
  2404. {
  2405. BKTISplit( phs );
  2406. }
  2407. else if ( esCurrent == stateShrink )
  2408. {
  2409. BKTIMerge( phs );
  2410. }
  2411. // leave the state machine
  2412. STLeave( iGroup, phs );
  2413. }
  2414. void SelectMaintenancePolicy( HOTSTUFF* const phs )
  2415. {
  2416. // collect information on the current state of the hash table
  2417. const ENUMSTATE esCurrent = EsSTGetState();
  2418. const NativeCounter cBucketMax = NcDIRIGetBucketMax( esCurrent );
  2419. const NativeCounter cBucket = NcDIRIGetBucket( esCurrent );
  2420. const NativeCounter cBucketActive = cBucketMax + cBucket;
  2421. const NativeCounter cOpLocal = phs->m_cOp;
  2422. // compute the current entry count and op count and reset the op count
  2423. NativeCounter cEntry = 0;
  2424. NativeCounter cOp = 0;
  2425. for ( NativeCounter ihs = 0; ihs < m_chs; ihs++ )
  2426. {
  2427. cEntry += m_rghs[ ihs ].m_cEntry;
  2428. cOp += m_rghs[ ihs ].m_cOp;
  2429. m_rghs[ ihs ].m_cOp = 0;
  2430. }
  2431. // compute the ideal entry count
  2432. const NativeCounter cEntryIdeal = m_cLoadFactor * cBucketActive;
  2433. // compute the max entry count
  2434. const NativeCounter cEntryMax = m_centryBucket * cBucketActive;
  2435. // determine our current flexibility in the entry count
  2436. const NativeCounter cEntryFlexibility = max( m_centryBucket - m_cLoadFactor, cEntryMax / 2 - cEntryIdeal );
  2437. // determine our current threshold sensitivity
  2438. const NativeCounter cOpSensitivity = max( 1, cEntryFlexibility / 2 );
  2439. // approximate the local (per-HOTSTUFF) threshold sensitivity
  2440. const NativeCounter ratio = ( cOp + cOpLocal - 1 ) / cOpLocal;
  2441. const NativeCounter cOpSensitivityLocal = max( 1, cOpSensitivity / ratio );
  2442. // compute the preferred entry count
  2443. NativeCounter cEntryPreferred = cEntry;
  2444. if ( cEntryIdeal + ( cEntryFlexibility - cOpSensitivity ) < cEntry )
  2445. {
  2446. cEntryPreferred = cEntry - ( cEntryFlexibility - cOpSensitivity );
  2447. }
  2448. else if ( cEntryIdeal > cEntry + ( cEntryFlexibility - cOpSensitivity ) )
  2449. {
  2450. cEntryPreferred = cEntry + ( cEntryFlexibility - cOpSensitivity );
  2451. }
  2452. // compute the preferred bucket count
  2453. const NativeCounter cBucketPreferred = max( m_cbucketMin, ( cEntryPreferred + m_cLoadFactor - 1 ) / m_cLoadFactor );
  2454. // determine the new policy
  2455. ENUMSTATE esNew = stateNil;
  2456. if ( esCurrent == stateGrow )
  2457. {
  2458. if ( cBucketPreferred < cBucketActive )
  2459. {
  2460. esNew = stateShrinkFromGrow;
  2461. }
  2462. else if ( cBucketPreferred > cBucketActive )
  2463. {
  2464. if ( cBucket == cBucketMax )
  2465. {
  2466. esNew = stateSplitFromGrow;
  2467. }
  2468. }
  2469. }
  2470. else
  2471. {
  2472. DHTAssert( esCurrent == stateShrink );
  2473. if ( cBucketPreferred < cBucketActive )
  2474. {
  2475. if ( cBucket == 0 )
  2476. {
  2477. esNew = stateMergeFromShrink;
  2478. }
  2479. }
  2480. else if ( cBucketPreferred > cBucketActive )
  2481. {
  2482. esNew = stateGrowFromShrink;
  2483. }
  2484. }
  2485. // enact the new policy
  2486. if ( m_cOpSensitivity != cOpSensitivityLocal )
  2487. {
  2488. m_cOpSensitivity = cOpSensitivityLocal;
  2489. }
  2490. if ( m_cBucketPreferred != cBucketPreferred )
  2491. {
  2492. m_cBucketPreferred = cBucketPreferred;
  2493. }
  2494. if ( esNew )
  2495. {
  2496. STTransition( esNew );
  2497. }
  2498. else
  2499. {
  2500. m_semPolicy.Release();
  2501. }
  2502. STATPolicySelection();
  2503. }
  2504. void MaintainTable( HOTSTUFF* const phs )
  2505. {
  2506. // decide on a new policy if we may have breached one of our
  2507. // thresholds
  2508. if ( phs->m_cOp > m_cOpSensitivity &&
  2509. m_semPolicy.CAvail() &&
  2510. m_semPolicy.FTryAcquire() )
  2511. {
  2512. if ( phs->m_cOp > m_cOpSensitivity )
  2513. {
  2514. SelectMaintenancePolicy( phs );
  2515. }
  2516. else
  2517. {
  2518. m_semPolicy.Release();
  2519. }
  2520. }
  2521. // perform amortized work on the table as necessary
  2522. if ( NcDIRIGetBucketMax( stateGrow ) + NcDIRIGetBucket( stateGrow ) < m_cBucketPreferred ||
  2523. m_cBucketPreferred < NcDIRIGetBucketMax( stateShrink ) + NcDIRIGetBucket( stateShrink ) )
  2524. {
  2525. PerformMaintenance();
  2526. }
  2527. }
  2528. public:
  2529. // calculate the address of the aligned block and store its offset (for free)
  2530. static void* PvMEMIAlign( void* const pv, const size_t cbAlign )
  2531. {
  2532. // round up to the nearest cache line
  2533. // NOTE: this formula always forces an offset of at least 1 byte
  2534. const ULONG_PTR ulp = ULONG_PTR( pv );
  2535. const ULONG_PTR ulpAligned = ( ( ulp + cbAlign ) / cbAlign ) * cbAlign;
  2536. const ULONG_PTR ulpOffset = ulpAligned - ulp;
  2537. DHTAssert( ulpOffset > 0 );
  2538. DHTAssert( ulpOffset <= cbAlign );
  2539. DHTAssert( ulpOffset == BYTE( ulpOffset ) ); // must fit into a single BYTE
  2540. // store the offset
  2541. BYTE *const pbAligned = (BYTE*)ulpAligned;
  2542. pbAligned[ -1 ] = BYTE( ulpOffset );
  2543. // return the aligned block
  2544. return (void*)pbAligned;
  2545. }
  2546. // retrieve the original unaligned block of memory from the aligned block
  2547. static void* PvMEMIUnalign( void* const pv )
  2548. {
  2549. // read the offset of the real block
  2550. BYTE *const pbAligned = (BYTE*)pv;
  2551. const BYTE bOffset = pbAligned[ -1 ];
  2552. DHTAssert( bOffset > 0 );
  2553. // return the real unaligned block
  2554. return (void*)( pbAligned - bOffset );
  2555. }
  2556. // allocate memory
  2557. static void* PvMEMAlloc( const size_t cbSize, const size_t cbAlign = cbCacheLine )
  2558. {
  2559. void* const pv = new BYTE[ cbSize + cbAlign ];
  2560. if ( pv )
  2561. {
  2562. return PvMEMIAlign( pv, cbAlign );
  2563. }
  2564. return NULL;
  2565. }
  2566. // free memory
  2567. static void MEMFree( void* const pv )
  2568. {
  2569. if ( pv )
  2570. {
  2571. delete [] ((BYTE*)PvMEMIUnalign( pv ));
  2572. }
  2573. }
  2574. private:
  2575. // never written
  2576. NativeCounter m_cLoadFactor; // preferred number of entries in a bucket at any given time
  2577. NativeCounter m_centryBucket; // maximum number of entries per bucket
  2578. NativeCounter m_cbBucket; // size in bytes of a bucket (rounded up to the nearest full cache-line)
  2579. NativeCounter m_rankDHTrwlBucket; // rank of the reader/writer lock on each bucket
  2580. HOTSTUFF *m_rghs; // array of HOTSTUFF objects (hashed per processor)
  2581. NativeCounter m_chs; // size of HOTSTUFF array
  2582. NativeCounter m_cbucketMin; // minimum number of buckets in the hash-table
  2583. #ifdef _WIN64
  2584. BYTE m_rgbRsvdNever[ 8 ];
  2585. #else // !_WIN64
  2586. BYTE m_rgbRsvdNever[ 4 ];
  2587. #endif // _WIN64
  2588. // rarely written
  2589. DIRPTRS m_dirptrs[ 2 ]; // directory pointers (2 copies)
  2590. BYTE *m_rgrgBucket[ cbitNativeCounter ]; // directory (array of arrays of buckets)
  2591. // no padding necessary
  2592. // often written
  2593. NativeCounter m_cOpSensitivity; // used to regulate policy changes
  2594. NativeCounter m_cBucketPreferred; // preferred table size
  2595. ENUMSTATE m_stateCur; // current state
  2596. #ifdef _WIN64
  2597. BYTE m_rgbRsvdOften[ 44 ];
  2598. #else // !_WIN64
  2599. BYTE m_rgbRsvdOften[ 20 ];
  2600. #endif // _WIN64
  2601. // always written (second only to HOTSTUFF members)
  2602. OSSYNC::CSemaphore m_semPolicy; // used to serialize policy changes
  2603. long m_cCompletions; // counts the number of metered-section completions
  2604. #ifdef _WIN64
  2605. BYTE m_rgbRsvdAlways[ 52 ];
  2606. #else // !_WIN64
  2607. BYTE m_rgbRsvdAlways[ 24 ];
  2608. #endif // _WIN64
  2609. #ifdef DHT_STATS
  2610. // performance statistics
  2611. long m_cBucketOverflowInsert; // count of overflow bucket allocations
  2612. long m_cBucketOverflowDelete; // count of overflow bucket deletions
  2613. long m_cBucketSplit; // count of bucket split operations
  2614. long m_cBucketMerge; // count of bucket merge operations
  2615. long m_cDirSplit; // count of directory split operations
  2616. long m_cDirMerge; // count of directory merge operations
  2617. long m_cTransition; // count of state transitions
  2618. long m_cSelection; // count of policy selections
  2619. long m_cSplitContend; // count of split contentions
  2620. long m_cMergeContend; // count of merge contentions
  2621. #ifdef _WIN64
  2622. BYTE m_rgbRsvdPerf[ 24 ];
  2623. #else // !_WIN64
  2624. BYTE m_rgbRsvdPerf[ 24 ];
  2625. #endif // _WIN64
  2626. #endif // DHT_STATS
  2627. #ifdef DEBUG
  2628. BOOL m_fInit; // initialization flag
  2629. #endif // DEBUG
  2630. };
  2631. /////////////////////////////////////////////////////////////////////////////////////
  2632. //
  2633. // CDynamicHashTable< CKey, CEntry >
  2634. //
  2635. /////////////////////////////////////////////////////////////////////////////////////
  2636. // ctor
  2637. template< class CKey, class CEntry >
  2638. inline CDynamicHashTable< CKey, CEntry >::
  2639. CDynamicHashTable( const NativeCounter rankDHTrwlBucket )
  2640. : m_semPolicy( CSyncBasicInfo( "CDynamicHashTable::m_semPolicy" ) )
  2641. {
  2642. #ifdef DEBUG
  2643. m_fInit = fFalse;
  2644. // zero-out this memory so the debugger won't print garbage
  2645. memset( m_rgbRsvdNever, 0, sizeof( m_rgbRsvdNever ) );
  2646. memset( m_rgbRsvdOften, 0, sizeof( m_rgbRsvdOften ) );
  2647. memset( m_rgbRsvdAlways, 0, sizeof( m_rgbRsvdAlways ) );
  2648. #ifdef DHT_STATS
  2649. memset( m_rgbRsvdPerf, 0, sizeof( m_rgbRsvdPerf ) );
  2650. #endif // DHT_STATS
  2651. #endif
  2652. // we should be on a 32-bit or 64-bit system
  2653. #ifdef _WIN64
  2654. DHTAssert( 8 == sizeof( NativeCounter ) );
  2655. #else // _!WIN64
  2656. DHTAssert( 4 == sizeof( NativeCounter ) );
  2657. #endif // _WIN64
  2658. // capture the rank for each bucket
  2659. m_rankDHTrwlBucket = rankDHTrwlBucket;
  2660. // prepare each semaphore so it can have 1 owner
  2661. m_semPolicy.Release();
  2662. }
  2663. // dtor
  2664. template< class CKey, class CEntry >
  2665. inline CDynamicHashTable< CKey, CEntry >::
  2666. ~CDynamicHashTable()
  2667. {
  2668. }
  2669. // initializes the dynamic hash table. if the table cannot be initialized,
  2670. // errOutOfMemory will be returned
  2671. template< class CKey, class CEntry >
  2672. inline CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
  2673. ErrInit( const double dblLoadFactor,
  2674. const double dblUniformity,
  2675. const NativeCounter cBucketMinimum )
  2676. {
  2677. ERR err;
  2678. NativeCounter ihs;
  2679. DHTAssert( !m_fInit );
  2680. // initialize all data by its cache-line grouping
  2681. // never written
  2682. m_cLoadFactor = 0;
  2683. m_centryBucket = 0;
  2684. m_cbBucket = 0;
  2685. m_rghs = NULL;
  2686. m_chs = OSSYNC::OSSyncGetProcessorCountMax();
  2687. m_cbucketMin = 0;
  2688. // rarely written
  2689. memset( m_dirptrs, 0, sizeof( m_dirptrs ) );
  2690. memset( m_rgrgBucket, 0, sizeof( m_rgrgBucket ) );
  2691. // often written
  2692. m_cOpSensitivity = 0;
  2693. m_cBucketPreferred = cBucketMinimum;
  2694. // NOTE: we cannot start in stateFreeze because we must go through the "halfway" completion
  2695. // function so that we copy the directory ptrs safely
  2696. m_stateCur = stateGrow;
  2697. // always written
  2698. m_cCompletions = 0;
  2699. #ifdef DHT_STATS
  2700. // performance statistics
  2701. m_cBucketOverflowInsert = 0;
  2702. m_cBucketOverflowDelete = 0;
  2703. m_cBucketSplit = 0;
  2704. m_cBucketMerge = 0;
  2705. m_cDirSplit = 0;
  2706. m_cDirMerge = 0;
  2707. m_cTransition = 0;
  2708. m_cSelection = 0;
  2709. m_cSplitContend = 0;
  2710. m_cMergeContend = 0;
  2711. #endif // DHT_STATS
  2712. // allocate the HOTSTUFF array
  2713. m_rghs = (HOTSTUFF*)PvMEMAlloc( m_chs * sizeof( HOTSTUFF ), cbCacheLine );
  2714. if ( !m_rghs )
  2715. {
  2716. err = errOutOfMemory;
  2717. goto HandleError;
  2718. }
  2719. // construct the HOTSTUFF objects
  2720. for ( ihs = 0; ihs < m_chs; ihs++ )
  2721. {
  2722. new( m_rghs + ihs ) HOTSTUFF();
  2723. }
  2724. // initialize the directory
  2725. err = ErrDIRInit( NativeCounter( dblLoadFactor * dblUniformity ), cBucketMinimum );
  2726. if ( err != errSuccess )
  2727. {
  2728. goto HandleError;
  2729. }
  2730. #ifdef DEBUG
  2731. m_fInit = fTrue;
  2732. #endif // DEBUG
  2733. return errSuccess;
  2734. HandleError:
  2735. DHTAssert( err != errSuccess );
  2736. Term();
  2737. return err;
  2738. }
  2739. // terminates the dynamic hash table. this function can be called even if the
  2740. // hash table has never been initialized or is only partially initialized
  2741. //
  2742. // NOTE: any data stored in the table at this time will be lost!
  2743. template< class CKey, class CEntry >
  2744. inline void CDynamicHashTable< CKey, CEntry >::
  2745. Term()
  2746. {
  2747. #ifdef DEBUG
  2748. m_fInit = fFalse;
  2749. #endif // DEBUG
  2750. // term the directory
  2751. DIRTerm();
  2752. if ( NULL != m_rghs )
  2753. {
  2754. // delete the HOTSTUFF aray
  2755. while ( m_chs )
  2756. {
  2757. // destruct the object
  2758. m_chs--;
  2759. m_rghs[ m_chs ].HOTSTUFF::~HOTSTUFF();
  2760. }
  2761. MEMFree( m_rghs );
  2762. m_rghs = NULL;
  2763. }
  2764. }
  2765. // acquires a read lock on the specified key and returns the lock in the
  2766. // provided lock context
  2767. template< class CKey, class CEntry >
  2768. inline void CDynamicHashTable< CKey, CEntry >::
  2769. ReadLockKey( const CKey& key, CLock* const plock )
  2770. {
  2771. DHTAssert( m_fInit );
  2772. // verify the lock
  2773. DHTAssert( plock->m_ls == CLock::lsNil );
  2774. // initialize the lock
  2775. plock->m_ls = CLock::lsRead;
  2776. // enter the state machine
  2777. const int iGroup = UiSTEnter( &plock->m_phs );
  2778. const ENUMSTATE esCurrent = EsSTGetState();
  2779. // read-lock the key through the directory
  2780. DIRReadLockKey( esCurrent, key, plock );
  2781. // try to seek to the key (sets up currency)
  2782. BKTSeek( plock, key );
  2783. // leave the state machine
  2784. STLeave( iGroup, plock->m_phs );
  2785. }
  2786. // releases the read lock in the specified lock context
  2787. template< class CKey, class CEntry >
  2788. inline void CDynamicHashTable< CKey, CEntry >::
  2789. ReadUnlockKey( CLock* const plock )
  2790. {
  2791. DHTAssert( m_fInit );
  2792. // verify the lock
  2793. DHTAssert( FBKTRead( plock ) );
  2794. DHTAssert( plock->m_pBucketHead != NULL );
  2795. DHTAssert( plock->m_pBucketHead->CRWL().FReader() );
  2796. // unlock the key through the directory
  2797. DIRReadUnlockKey( plock );
  2798. // reset the lock
  2799. plock->m_ls = CLock::lsNil;
  2800. }
  2801. // acquires a write lock on the specified key and returns the lock in the
  2802. // provided lock context
  2803. template< class CKey, class CEntry >
  2804. inline void CDynamicHashTable< CKey, CEntry >::
  2805. WriteLockKey( const CKey& key, CLock* const plock )
  2806. {
  2807. DHTAssert( m_fInit );
  2808. // verify the lock
  2809. DHTAssert( plock->m_ls == CLock::lsNil );
  2810. // initialize the lock
  2811. plock->m_ls = CLock::lsWrite;
  2812. plock->m_fInsertOrDelete = fFalse;
  2813. // enter the state machine
  2814. const int iGroup = UiSTEnter( &plock->m_phs );
  2815. const ENUMSTATE esCurrent = EsSTGetState();
  2816. // write-lock the key through the directory
  2817. DIRWriteLockKey( esCurrent, key, plock );
  2818. // try to seek to the key (sets up currency)
  2819. BKTSeek( plock, key );
  2820. // leave the state machine
  2821. STLeave( iGroup, plock->m_phs );
  2822. }
  2823. // releases the write lock in the specified lock context
  2824. template< class CKey, class CEntry >
  2825. inline void CDynamicHashTable< CKey, CEntry >::
  2826. WriteUnlockKey( CLock* const plock )
  2827. {
  2828. DHTAssert( m_fInit );
  2829. // verify the lock
  2830. DHTAssert( FBKTWrite( plock ) );
  2831. DHTAssert( plock->m_pBucketHead != NULL );
  2832. DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
  2833. // unlock the key through the directory
  2834. DIRWriteUnlockKey( plock );
  2835. // we performed an insert or delete while holding the write lock
  2836. if ( plock->m_fInsertOrDelete )
  2837. {
  2838. // perform amortized maintenance on the table
  2839. MaintainTable( plock->m_phs );
  2840. }
  2841. // reset the lock
  2842. plock->m_ls = CLock::lsNil;
  2843. plock->m_fInsertOrDelete = fFalse;
  2844. }
  2845. // retrieves the entry corresponding to the key locked by the specified lock
  2846. // context. if there is no entry for this key, errEntryNotFound will be
  2847. // returned
  2848. template< class CKey, class CEntry >
  2849. inline CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
  2850. ErrRetrieveEntry( CLock* const plock, CEntry* const pentry )
  2851. {
  2852. DHTAssert( m_fInit );
  2853. // verify the lock
  2854. DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) || FBKTScan( plock ) );
  2855. DHTAssert( plock->m_pBucketHead != NULL );
  2856. #ifdef DEBUG
  2857. if ( FBKTRead( plock ) )
  2858. {
  2859. DHTAssert( plock->m_pBucketHead->CRWL().FReader() );
  2860. }
  2861. else
  2862. {
  2863. DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
  2864. }
  2865. if ( FBKTRead( plock ) || FBKTWrite( plock ) )
  2866. {
  2867. CKeyEntry *pKeyEntry;
  2868. BKTGetEntry( plock, &pKeyEntry );
  2869. DHTAssert( pKeyEntry ? pKeyEntry->FEntryMatchesKey( plock->m_key ) : fTrue );
  2870. }
  2871. #endif
  2872. // get the entry
  2873. return ErrBKTGetEntry( plock, pentry );
  2874. }
  2875. // replaces the entry corresponding to the key locked by the specified lock
  2876. // context. the key for the new entry must match the key for the old entry.
  2877. // if there is no entry for this key, errNoCurrentEntry will be returned
  2878. template< class CKey, class CEntry >
  2879. inline CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
  2880. ErrReplaceEntry( CLock* const plock, const CEntry& entry )
  2881. {
  2882. DHTAssert( m_fInit );
  2883. // verify the lock
  2884. DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
  2885. DHTAssert( plock->m_pBucketHead != NULL );
  2886. DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
  2887. #ifdef DEBUG
  2888. if ( FBKTWrite( plock ) )
  2889. {
  2890. CKeyEntry *pKeyEntry;
  2891. BKTGetEntry( plock, &pKeyEntry );
  2892. DHTAssert( pKeyEntry ? pKeyEntry->FEntryMatchesKey( plock->m_key ) : fTrue );
  2893. DHTAssert( ((CKeyEntry &)entry).FEntryMatchesKey( plock->m_key ) );
  2894. }
  2895. #endif
  2896. // replace the entry
  2897. return ErrBKTReplaceEntry( plock, entry );
  2898. }
  2899. // inserts a new entry corresponding to the key locked by the specified lock
  2900. // context. if there is already an entry with this key in the table,
  2901. // errKeyDuplicate will be returned. if the new entry cannot be inserted,
  2902. // errOutOfMemory will be returned
  2903. template< class CKey, class CEntry >
  2904. inline CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
  2905. ErrInsertEntry( CLock* const plock, const CEntry& entry )
  2906. {
  2907. DHTAssert( m_fInit );
  2908. // verify the lock
  2909. DHTAssert( FBKTWrite( plock ) );
  2910. DHTAssert( plock->m_pBucketHead != NULL );
  2911. DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
  2912. /// DHTAssert( ((CKeyEntry &)entry).FEntryMatchesKey( plock->m_key ) );
  2913. // insert the entry
  2914. const ERR err = ErrBKTInsertEntry( plock, entry );
  2915. if ( errSuccess == err )
  2916. {
  2917. // maintain our stats
  2918. STATInsertEntry( plock->m_phs );
  2919. // we have performed an insert
  2920. plock->m_fInsertOrDelete = fTrue;
  2921. }
  2922. return err;
  2923. }
  2924. // deletes the entry corresponding to the key locked by the specified lock
  2925. // context. if there is no entry for this key, errNoCurrentEntry will be
  2926. // returned
  2927. template< class CKey, class CEntry >
  2928. inline CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
  2929. ErrDeleteEntry( CLock* const plock )
  2930. {
  2931. DHTAssert( m_fInit );
  2932. // verify the lock
  2933. DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
  2934. DHTAssert( plock->m_pBucketHead != NULL );
  2935. DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
  2936. #ifdef DEBUG
  2937. if ( FBKTWrite( plock ) )
  2938. {
  2939. CKeyEntry *pKeyEntry;
  2940. BKTGetEntry( plock, &pKeyEntry );
  2941. DHTAssert( pKeyEntry ? pKeyEntry->FEntryMatchesKey( plock->m_key ) : fTrue );
  2942. }
  2943. #endif
  2944. if ( FBKTScan( plock ) )
  2945. {
  2946. // prepare the next-entry ptr so we can move-next after the delete
  2947. // if we are deleting the last entry in the bucket, make this NULL
  2948. // to force the cursor to move into the next hash bucket
  2949. DHTAssert( plock->m_pBucket != NULL );
  2950. DHTAssert( plock->m_pEntryNext == NULL );
  2951. plock->m_pEntryNext = ( plock->m_pEntry != plock->m_pBucket->m_pEntryLast ) ? plock->m_pEntry : NULL;
  2952. }
  2953. // delete the entry
  2954. const ERR err = ErrBKTDeleteEntry( plock );
  2955. if ( errSuccess == err )
  2956. {
  2957. // maintain our stats
  2958. STATDeleteEntry( plock->m_phs );
  2959. // we have performed a delete
  2960. plock->m_fInsertOrDelete = fTrue;
  2961. }
  2962. return err;
  2963. }
  2964. // sets up the specified lock context in preparation for scanning all entries
  2965. // in the hash table by physical storage order (i.e. not by key value order)
  2966. //
  2967. // NOTE: caller MUST terminate scan with EndHashScan to release any outstanding locks
  2968. template< class CKey, class CEntry >
  2969. inline void CDynamicHashTable< CKey, CEntry >::
  2970. BeginHashScan( CLock* const plock )
  2971. {
  2972. DHTAssert( m_fInit );
  2973. // verify the lock
  2974. DHTAssert( plock->m_ls == CLock::lsNil );
  2975. // initialize the lock to start scanning at the first bucket (it may be empty!)
  2976. plock->m_ls = CLock::lsScan;
  2977. plock->m_fInsertOrDelete = fFalse;
  2978. plock->m_iBucket = 0;
  2979. // enter the state machine
  2980. const int iGroup = UiSTEnter( &plock->m_phs );
  2981. const ENUMSTATE esCurrent = EsSTGetState();
  2982. // hash to the bucket we want (this may require a retry in grow/shrink mode)
  2983. DHTAssert( plock->m_pBucketHead == NULL );
  2984. plock->m_pBucketHead = PbucketDIRIHash( esCurrent, plock->m_iBucket );
  2985. // acquire the lock as a writer
  2986. plock->m_pBucketHead->CRWL().EnterAsWriter();
  2987. // NOTE: do not retry the hash function here because bucket 0 will never disappear
  2988. // leave the state machine
  2989. STLeave( iGroup, plock->m_phs );
  2990. // set up the currency as before-first
  2991. plock->m_pBucket = plock->m_pBucketHead;
  2992. plock->m_pEntryPrev = NULL;
  2993. plock->m_pEntry = NULL;
  2994. plock->m_pEntryNext = plock->m_pBucketHead->m_pb != NULL ? &plock->m_pBucketHead->m_rgEntry[0] : NULL;
  2995. }
  2996. // sets up the specified lock context in preparation for scanning all entries
  2997. // in the hash table by physical storage order (i.e. not by key value order)
  2998. //
  2999. // NOTE: caller MUST terminate scan with EndHashScan to release any outstanding locks
  3000. template< class CKey, class CEntry >
  3001. inline void CDynamicHashTable< CKey, CEntry >::
  3002. BeginHashScanFromKey( const CKey& key, CLock* const plock )
  3003. {
  3004. NativeCounter cBucket;
  3005. NativeCounter cBucketMax;
  3006. NativeCounter iHash;
  3007. DHTAssert( m_fInit );
  3008. // verify the lock
  3009. DHTAssert( plock->m_ls == CLock::lsNil );
  3010. // initialize the lock
  3011. plock->m_ls = CLock::lsScan;
  3012. plock->m_fInsertOrDelete = fFalse;
  3013. // enter the state machine
  3014. const int iGroup = UiSTEnter( &plock->m_phs );
  3015. const ENUMSTATE esCurrent = EsSTGetState();
  3016. // write-lock the key through the directory
  3017. DIRWriteLockKey( esCurrent, key, plock );
  3018. // calculate the current bucket configuration
  3019. //
  3020. // NOTES ON WHY THIS WILL WORK:
  3021. //
  3022. // cBucket may increase/decrease if we are in grow/shrink mode, but this won't effect the
  3023. // calculation below unless it grows ahead of OR shrinks behind the bucket at iHash;
  3024. // since we have the bucket at iHash locked, it cannot grow/shrink
  3025. // cBucketMax cannot change unless we are in split mode, and even then we will be reading from the
  3026. // COPY of the cBucketMax -- not the real cBucketMax which is changing
  3027. cBucket = NcDIRIGetBucket( esCurrent );
  3028. cBucketMax = NcDIRIGetBucketMax( esCurrent );
  3029. DHTAssert( cBucketMax != 0 );
  3030. // calculate the hash value and normalize it within the limits of the current bucket configuration
  3031. iHash = CKeyEntry::Hash( key );
  3032. iHash = iHash & ( ( cBucketMax - 1 ) + cBucketMax );
  3033. if ( iHash >= cBucketMax + cBucket )
  3034. iHash -= cBucketMax;
  3035. // remember which bucket we locked
  3036. plock->m_iBucket = iHash;
  3037. #ifdef DEBUG
  3038. {
  3039. // verify that we have the correct bucket locked using only iHash
  3040. NativeCounter iExponent;
  3041. NativeCounter iRemainder;
  3042. DIRILog2( iHash, &iExponent, &iRemainder );
  3043. const PBUCKET pbucketT = PbucketDIRIResolve( iExponent, iRemainder );
  3044. DHTAssert( pbucketT == plock->m_pBucketHead );
  3045. DHTAssert( pbucketT->CRWL().FWriter() );
  3046. }
  3047. #endif // DEBUG
  3048. // leave the state machine
  3049. STLeave( iGroup, plock->m_phs );
  3050. // set up the currency as before-first
  3051. plock->m_pBucket = plock->m_pBucketHead;
  3052. plock->m_pEntryPrev = NULL;
  3053. plock->m_pEntry = NULL;
  3054. plock->m_pEntryNext = plock->m_pBucketHead->m_pb != NULL ? &plock->m_pBucketHead->m_rgEntry[0] : NULL;
  3055. }
  3056. // moves the specified lock context to the next entry in the hash table by
  3057. // physical storage order. if the end of the index is reached,
  3058. // errNoCurrentEntry is returned.
  3059. template< class CKey, class CEntry >
  3060. inline CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
  3061. ErrMoveNext( CLock* const plock, BOOL* const pfNewBucket )
  3062. {
  3063. DHTAssert( m_fInit );
  3064. // verify the lock
  3065. DHTAssert( FBKTScan( plock ) );
  3066. DHTAssert( plock->m_pEntryPrev == NULL );
  3067. // move to the next entry in this bucket
  3068. if ( plock->m_pEntry )
  3069. {
  3070. // we are already on an existing entry
  3071. if ( plock->m_pEntry + 1 < PentryBKTNextMost( plock->m_pBucket ) )
  3072. {
  3073. // we have not reached the end of the current BUCKET
  3074. plock->m_pEntry++;
  3075. }
  3076. else
  3077. {
  3078. // we are at the end of the current BUCKET
  3079. plock->m_pBucket = PbucketBKTNext( plock->m_pBucket );
  3080. if ( plock->m_pBucket )
  3081. {
  3082. // we moved to the next BUCKET
  3083. plock->m_pEntry = &plock->m_pBucket->m_rgEntry[0];
  3084. }
  3085. else
  3086. {
  3087. // there are no more BUCKET structures in this chain
  3088. plock->m_pEntry = NULL;
  3089. }
  3090. }
  3091. }
  3092. else
  3093. {
  3094. // we are not on an entry (before-first or after-last)
  3095. plock->m_pEntry = plock->m_pEntryNext;
  3096. }
  3097. plock->m_pEntryNext = NULL;
  3098. if ( plock->m_pEntry != NULL )
  3099. {
  3100. // we moved to an entry successfully
  3101. DHTAssert( plock->m_pBucket );
  3102. if ( pfNewBucket )
  3103. {
  3104. *pfNewBucket = fFalse;
  3105. }
  3106. return errSuccess;
  3107. }
  3108. // try to move to the next hash-bucket
  3109. if ( pfNewBucket )
  3110. {
  3111. *pfNewBucket = fTrue;
  3112. }
  3113. return ErrSCANMoveNext( plock );
  3114. }
  3115. // terminates a scan by releasing all outstanding locks and reset the lock context
  3116. template< class CKey, class CEntry >
  3117. inline void CDynamicHashTable< CKey, CEntry >::
  3118. EndHashScan( CLock* const plock )
  3119. {
  3120. DHTAssert( m_fInit );
  3121. // verify the lock
  3122. DHTAssert( FBKTScan( plock ) );
  3123. DHTAssert( plock->m_pEntryPrev == NULL );
  3124. if ( plock->m_pBucketHead != NULL )
  3125. {
  3126. // unlock the current bucket
  3127. plock->m_pBucketHead->CRWL().LeaveAsWriter();
  3128. plock->m_pBucketHead = NULL;
  3129. // we performed an insert or delete while holding the write lock
  3130. if ( plock->m_fInsertOrDelete )
  3131. {
  3132. // perform amortized maintenance on the table
  3133. MaintainTable( plock->m_phs );
  3134. }
  3135. }
  3136. // reset the lock
  3137. plock->m_ls = CLock::lsNil;
  3138. plock->m_fInsertOrDelete = fFalse;
  3139. }
  3140. }; // namespace DHT
  3141. using namespace DHT;
  3142. #endif // __DHT_HXX_INCLUDED