windows-server-2003/ds/ese98/export/dht.hxx


#ifndef _DHT_HXX_INCLUDED
#define _DHT_HXX_INCLUDED


#pragma warning ( disable : 4200 )	//  we allow zero sized arrays


//  asserts
//
//  #define DHTAssert to point to your favorite assert function per #include

#ifdef DHTAssert
#else  //  !DHTAssert
#define DHTAssert Assert
#endif  //  DHTAssert


#include <sync.hxx>


#ifdef DEBUG

//	turns on unique names for bucket reader/writer locks (adds 60 bytes per BUCKET)

#define UNIQUE_BUCKET_NAMES

#ifdef UNIQUE_BUCKET_NAMES
#include <stdio.h>
#endif  //  UNIQUE_BUCKET_NAMES

#endif

#ifdef DEBUGGER_EXTENSION
class CPRINTF;
#endif


namespace DHT {


/////////////////////////////////////////////////////////////////////////////////////////
//  CDynamicHashTable
//
//  Implements a dynamically resizable hash table of entries stored using a unique key
//
//  CKey			= class representing keys used to identify entries in the hash table
//  CEntry			= class representing entries stored in the hash table
//					  (required copy-constructor)

template< class CKey, class CEntry >
class CDynamicHashTable
	{	
	public:

		//	counter type (uses native word size of machine)

		typedef ULONG_PTR NativeCounter;

		//  class controlling the Key and Entry for each entry in the hash table
		//
		//  NOTE:  All member functions must be defined by the user per instance
		//         of this template.  These functions must be defined after the
		//         template definition.  Declaring these functions to be inline
		//         will allow full optimization by the compiler!

		class CKeyEntry
			{
			public:

				//  produces the hash value for the specified key.  this hash
				//  function should produce numbers as uniformly as possible over
				//  as large a range as possible for good performance

				static NativeCounter Hash( const CKey& key );

				//  produces the hash value for this entry's key.  this hash
				//  function should produce the same number as the above function
				//  for the same key
				
				NativeCounter Hash() const;

				//  returns fTrue if this entry matches the given key.  this way,
				//  the key doesn't necessarily have to be stored in the hash table
				//  entry
				//
				//  e.g.:  CEntry can be PBF and key can be IFMP/PGNO where the
				//  actual IFMP/PGNO is stored in the BF structure.  this would
				//  ruin cache locality, of course, but it would use less memory
				//
				//  note that the entry could also contain some kind of hash value
				//  for the key allowing some weeding out of entries before jumping
				//  off to the full structure for a full comparison.  an example
				//  of this would be the SPAIRs from SORT

				BOOL FEntryMatchesKey( const CKey& key ) const;

				//  sets the contained entry to the given entry

				void SetEntry( const CEntry& entry );

				//  gets the contained entry

				void GetEntry( CEntry* const pentry ) const;

			public:
				CEntry	m_entry;
				~CKeyEntry();								//	not allowed

			private:

				CKeyEntry();								//	not allowed
				CKeyEntry *operator =( const CKeyEntry & );	//	not allowed
			};

		//  API Error Codes

		enum ERR
			{
			errSuccess,						//	success
			errOutOfMemory,					//	not enough memory
			errInvalidParameter,			//	bad argument to function
			errEntryNotFound,				//	entry was not found
			errNoCurrentEntry,				//	currency is invalid
			errKeyDuplicate,				//	cannot insert because key already exists
			};

		//  API Lock Context

		class CLock;

	public:

		CDynamicHashTable( const NativeCounter rankDHTrwlBucket );
		~CDynamicHashTable();

		ERR 	ErrInit(	const double		dblLoadFactor,
							const double		dblUniformity,
							const NativeCounter	cBucketMinimum = 0 );
		void 	Term();

		void 	ReadLockKey( const CKey& key, CLock* const plock );
		void 	ReadUnlockKey( CLock* const plock );

		void 	WriteLockKey( const CKey& key, CLock* const plock );
		void 	WriteUnlockKey( CLock* const plock );

		ERR 	ErrRetrieveEntry( CLock* const plock, CEntry* const pentry );
		ERR 	ErrReplaceEntry( CLock* const plock, const CEntry& entry );
		ERR 	ErrInsertEntry( CLock* const plock, const CEntry& entry );
		ERR 	ErrDeleteEntry( CLock* const plock );

		void 	BeginHashScan( CLock* const plock );
		void	BeginHashScanFromKey( const CKey& key, CLock* const plock );
		ERR 	ErrMoveNext( CLock* const plock, BOOL* const pfNewBucket = NULL );
		void	EndHashScan( CLock* const plock );

#ifdef DEBUGGER_EXTENSION
		VOID	Dump( CPRINTF * pcprintf, const DWORD_PTR dwOffset = 0 ) const;	
		VOID	Scan( CPRINTF * pcprintf, VOID * pv ) const;
#endif

#ifdef DHT_STATS
		long	CBucketOverflow() const		{ return m_cBucketOverflowInsert; }
		long	CBucketSplit() const		{ return m_cBucketSplit; }
		long	CBucketMerge() const		{ return m_cBucketMerge; }
		long	CDirectorySplit() const		{ return m_cDirSplit; }
		long	CDirectoryMerge() const		{ return m_cDirMerge; }
		long	CStateTransition() const	{ return m_cTransition; }
		long	CPolicySelection() const	{ return m_cSelection; }
		long	CSplitContend() const		{ return m_cSplitContend; }
		long	CMergeContend() const		{ return m_cMergeContend; }
#else  //  !DHT_STATS
		long	CBucketOverflow() const		{ return 0; }
		long	CBucketSplit() const		{ return 0; }
		long	CBucketMerge() const		{ return 0; }
		long	CDirectorySplit() const		{ return 0; }
		long	CDirectoryMerge() const		{ return 0; }
		long	CStateTransition() const	{ return 0; }
		long	CPolicySelection() const	{ return 0; }
		long	CSplitContend() const		{ return 0; }
		long	CMergeContend() const		{ return 0; }
#endif  //  DHT_STATS


	private:

		//	possible states for the hash-table
		//
		//		DANGER! DANGER! DANGER WILL ROBINSON!
		//
		//			DO NOT CHANGE THE ENUMATION VALUES! CODE IS DEPENDANT ON THEM BEING AS THEY ARE!
		//			(specifically, I do "stateCur >> 4" to test for 0x10000 so I can see if we are splitting)
		//
		//		DANGER! DANGER! DANGER WILL ROBINSON!
		
		enum ENUMSTATE
			{
			stateNil				= 0,
			stateShrinkFromGrow		= 1,
			stateShrinkFromGrow2	= 2,
			stateGrowFromShrink		= 3,
			stateGrowFromShrink2	= 4,
			stateSplitFromGrow		= 5,
			stateSplitFromGrow2		= 6,
			stateGrowFromSplit		= 7,
			stateGrowFromSplit2		= 8,
			stateMergeFromShrink	= 9,
			stateMergeFromShrink2	= 10,
			stateShrinkFromMerge	= 11,
			stateShrinkFromMerge2	= 12,
			stateUnused				= 13,
			stateGrow				= 14,
			stateShrink				= 15,
			stateSplit				= 16,
			stateMerge				= 17,
			};


		//	Constants

		enum { cbitByte				= 8 };			//	bits per byte
		enum { cbitNativeCounter	= sizeof( NativeCounter ) * cbitByte };	//	bits per NativeCounter


		//	BUCKET
		//
		//		- this is the individual unit of allocation for each logical bucket
		//		- each BUCKET contains several CKeyEntry objects packed together
		//		- BUCKETs are chained together to make up the entire logical bucket

		struct BUCKET
			{
			public:

				//	read-write-lock/prev-ptr
				//		in the primary BUCKET (allocated as a part of an array), this is the read-write-lock
				//		in secondary BUCKETs, this is the prev-ptr for reverse traversal

				union
					{
					BYTE			m_rgbRWL[ sizeof( OSSYNC::CReaderWriterLock ) ];
					BUCKET			*m_pBucketPrev;
					};

				//	next/end pointer
				//		when this points outside of the array of buckets, it points to the next BUCKET
				//		when this points inside of the array of buckets, it points to the first free entry

				union
					{
					BYTE			*m_pb;
					BUCKET			*m_pBucketNext;
					CKeyEntry		*m_pEntryLast;
					};

				//	array of entries (it will contain 'load-factor' entries)

				CKeyEntry			m_rgEntry[];

			public:

				//	return the properly typed CReaderWriterLock

				OSSYNC::CReaderWriterLock& CRWL() const
					{
					return (OSSYNC::CReaderWriterLock &)m_rgbRWL;
					}
			};
		typedef BUCKET* PBUCKET;


		//	BUCKETPool
		//
		//		pool of BUCKET structures (reservation system for bucket split/merge)

		class BUCKETPool
			{
			public:

				PBUCKET				m_pReserve;		//	list of BUCKET structures available for reservation
				long				m_cReserve;		//	number of BUCKET structures available to be reserved
				OSSYNC::CSemaphore	m_semReserve;	//	protection for reservation ptrs
#ifdef _WIN64
				BYTE				m_rgbRsvd[ 40 ];
#else	//	!_WIN64
				BYTE				m_rgbRsvd[ 20 ];
#endif	//	_WIN64

			public:

				BUCKETPool()
					:	m_semReserve( CSyncBasicInfo( "CDynamicHashTable::BUCKETPool::m_semReserve" ) )
					{

					//	initialize vars
					
					m_pReserve = NULL;
					m_cReserve = 0;

					//	prepare the semaphore to have 1 owner

					m_semReserve.Release();

#ifdef DEBUG
					memset( m_rgbRsvd, 0, sizeof( m_rgbRsvd ) );
#endif	//	DEBUG
					}
					

				//	terminate
				
				~BUCKETPool()
					{
					while ( m_pReserve )
						{
						PBUCKET pBucket;

						pBucket = m_pReserve;
						m_pReserve = m_pReserve->m_pBucketNext;
						MEMFree( pBucket );
						}
					m_cReserve = 0;
					}


				//	reserve a BUCKET structure
				//		"allocate" a bucket from the list by decrementing the counter of available buckets
				//		if the counter went below zero, we need add a bucket to the list now (or fail)
				//			to make sure we can honor the request later

				BOOL FPOOLReserve( const NativeCounter cbBucket )
					{
					//	reserve a bucket using the counter

					if ( AtomicDecrement( (long*)&m_cReserve ) >= 0 )
						{
						return fTrue;
						}

					//  reserve a bucket from the heap

					else
						{
						return FPOOLReserve_( cbBucket );
						}
					}

				BOOL FPOOLReserve_( const NativeCounter cbBucket )
					{
					//	at this point, we need to increment m_cReserve for 1 of 2 reasons:
					//		the allocation will succeed and we will add the new bucket to the list
					//		the allocation will fail and we can't leave without "deallocating" the bucket

					AtomicIncrement( (long*)&m_cReserve );

					//	we need to allocate a bucket and add it to the list (to back the reservation we want)

					const PBUCKET pBucket = PBUCKET( PvMEMAlloc( cbBucket ) );
					
					if ( pBucket )
						{

						//	add the bucket to the list 

						m_semReserve.Acquire();
						pBucket->m_pBucketNext = m_pReserve;
						m_pReserve = pBucket;
						m_semReserve.Release();

						//	reservation succeeded

						return fTrue;
						}

					//	the allocation failed so the reservation cannot succeed

					return fFalse;
					}


				//	commit a reservation

				BUCKET *PbucketPOOLCommit()
					{
					PBUCKET	pBucketReserve;

					//	assign a bucket to the reservation

					m_semReserve.Acquire();
					pBucketReserve = m_pReserve;
					DHTAssert( pBucketReserve );
					m_pReserve = m_pReserve->m_pBucketNext;
					m_semReserve.Release();

					//	return the bucket

					return pBucketReserve;
					}


				//	release the reservation

				void POOLUnreserve()
					{

					//	"deallocate" the bucket that was previously reserved

					AtomicIncrement( (long*)&m_cReserve );
					}
			};


		//	HOTSTUFF
		//
		//		"hot" elements of the hash-table (hashed to array of size 2*cProcessor elems)
		//
		//		32 bytes on WIN32
		//		64 bytes on WIN64
		//

		struct HOTSTUFF
			{
			public:

				NativeCounter				m_cEntry;			//	counter for entries
				NativeCounter				m_cOp;				//	counter for inserts/deletes
				OSSYNC::CMeteredSection		m_cms;				//	metered section for changing states
#ifdef _WIN64
				BYTE						m_rgbRsvd[ 24 ];	//	alignment padding
#else	//	!_WIN64
				BYTE						m_rgbRsvd[ 12 ];	//	alignment padding
#endif	//	_WIN64

				BUCKETPool					m_bucketpool;		//	pool of BUCKET blobs

				HOTSTUFF()
					:	m_cms()
					{
					m_cEntry	= 0;
					m_cOp		= 0;
#ifdef DEBUG
					memset( m_rgbRsvd, 0, sizeof( m_rgbRsvd ) );
#endif	//	DEBUG
					}

			};


		//	DIRPTRS
		//
		//		containment for the directory pointers
		//		these pointers control the use of the directory itself (m_rgrgBucket)
		//
		//		the hash table will always have a minimum of 2 buckets (0 and 1) in the directory
		//
		//		buckets are stored in dynamically allocated arrays which are pointed to by the directory
		//		each array is 2 times larger than the previous array (exponential growth)
		//			e.g. the Nth array (m_rgrgBucket[N]) contains 2^N contiguous buckets
		//		NOTE: the 0th array is special in that it contains an extra element making its total 2 elements 
		//			  (normally, 2^0 == 1 element;  this is done for magical reasons to be explained later)
		//		thus, the total number of entries for a given N is:
		//				 N
		//			1 + SUM 2^i  -->  1 + [ 2^(N+1) - 1 ]  -->  2^(N+1)
		//				i=0
		//
		//		we know the total number of distinct hash values is a power of 2 (it must fit into a NativeCounter)
		//		we can represent this with 2^M where M is the number of bits in a NativeCounter
		//		therefore, assuming the above system of exponential growth,
		//			we know that we can store the total number of hash buckets required at any given time so long as N = M
		//		in other words, N = # of bits in NativeCounter --> sizeof( NativeCounter ) * 8
		//
		//		therefore, we can statically allocate the array of bucket arrays
		//		and, we can use LOG2 to compute the bucket address of any given hash value 
		//			(exceptions:  DIRILog2( 0 ) => 0, 0 and DIRILog2( 1 ) => 0, 1)
		//
		//		for an explaination of m_cBucketMax and m_cBucket you should read the paper on
		//			Dynamic Hashing by Per Ake Larson
		//
		//		160 bytes on WIN32 (5 cache lines)
		//		320 bytes on WIN64 (10 cache lines)

		struct DIRPTRS
			{
			NativeCounter		m_cBucketMax;			//	half-way to last bucket in split iteration (2^(n-1))
			NativeCounter		m_cBucket;				//	destination of next split (0 to 2^(n-1)), must add to m_cBucketMax
#ifdef _WIN64
			BYTE				m_rgbRsvd[ 16 ];		//	alignment padding
#else	//	!_WIN64
			BYTE				m_rgbRsvd[ 8 ];			//	alignment padding
#endif	//	_WIN64
			};


		//	CLock
		//
		//		- lock context for read/write/scan operations on the hash-table
		//		- tracks currency within a bucket
		//		- access is restricted to the dynamic-hash-table

	public:

		class CLock
			{
			
			friend class CDynamicHashTable< CKey, CEntry >;

			public:
			
				//	possible states for a lock context (class CLock)

				enum ENUMLOCKSTATE
					{
					lsNil	= 0,	//	lock is not used
					lsRead	= 1,	//	lock is being used to read a particular CKeyEntry object
					lsWrite	= 2,	//	lock is being used to write a particular CKeyEntry object
					lsScan	= 3,	//	lock is being used to scan the hash-table
					};

			public:

				CLock()
					{
					m_ls = lsNil;
					m_pBucketHead = NULL;
					}


				~CLock()
					{
					DHTAssert( m_pBucketHead == NULL );
					}


			private:

				//	lock state

				ENUMLOCKSTATE		m_ls;			//	current state of this lock context
				BOOL				m_fInsertOrDelete;

				//	HOTSTUFF pointer

				HOTSTUFF			*m_phs;

#ifdef DEBUG
				//	debug-only parameters

				CKey				m_key;			//	track the key that should be locked
#endif

				//	ptr to the first BUCKET

				BUCKET				*m_pBucketHead;

				//	ptr to the current BUCKET

				BUCKET				*m_pBucket;			//	current BUCKET

				//	ISAM-style cursor on current BUCKET (m_pBucket)

				CKeyEntry			*m_pEntryPrev;		//	previous entry
				CKeyEntry			*m_pEntry;			//	current entry
				CKeyEntry			*m_pEntryNext;		//	next entry

				//	current bucket (used in scan-mode only)

				NativeCounter		m_iBucket;			//	current bucket
			};
		

		/////////////////////////////////////////////////////////////////////////////////////////
		//
		//	state machine
		//

		const int UiSTEnter( HOTSTUFF **pphs ) 
			{
			//	hash to the HOTSTUFF structure
			
			*pphs = HOTSTUFFHash();

			//	enter the metered section

			return ( *pphs )->m_cms.Enter();
			}
			

		void STLeave( const int group, HOTSTUFF *phs )
			{
			phs->m_cms.Leave( group );
			}


		const ENUMSTATE EsSTGetState() const
			{
			return m_stateCur;
			}


		void STTransition( const ENUMSTATE esNew )
			{
			//  initiate a transition to the desired state

			m_stateCur = esNew;
			
			m_cCompletions = 0;
			for ( NativeCounter ihs = 0; ihs < m_chs; ihs++ )
				{
				m_rghs[ ihs ].m_cms.Partition( OSSYNC::CMeteredSection::PFNPARTITIONCOMPLETE( STCompletion_ ), DWORD_PTR( this ) );
				}
			}


		static void STCompletion_( CDynamicHashTable< CKey, CEntry >* pdht )
			{
			pdht->STCompletion();
			}


		void STCompletion()
			{
			//  state transition table

			typedef void (CDynamicHashTable< CKey, CEntry >::*PfnCompletion)();
			
			struct StateTransitionTable
				{
				PfnCompletion	m_pfnCompletion;
				ENUMSTATE		m_stNext;
				};

			static const StateTransitionTable rgstt[] =
				{
				/*  stateNil				*/  { NULL,						stateNil,				},
				/*  stateShrinkFromGrow		*/  { NULL,						stateShrinkFromGrow2,	},
				/*  stateShrinkFromGrow2	*/  { NULL,						stateShrink,			},
				/*  stateGrowFromShrink		*/  { NULL,						stateGrowFromShrink2,	},
				/*  stateGrowFromShrink2	*/  { NULL,						stateGrow,				},
				/*  stateSplitFromGrow		*/  { NULL,						stateSplitFromGrow2,	},
				/*  stateSplitFromGrow2		*/  { STCompletionCopyDir,		stateSplit,				},
				/*  stateGrowFromSplit		*/  { NULL,						stateGrowFromSplit2,	},
				/*  stateGrowFromSplit2		*/  { NULL,						stateGrow,				},
				/*  stateMergeFromShrink  	*/  { NULL,						stateMergeFromShrink2,	},
				/*  stateMergeFromShrink2	*/  { STCompletionCopyDir,		stateMerge,				},
				/*  stateShrinkFromMerge	*/  { NULL,						stateShrinkFromMerge2,	},
				/*  stateShrinkFromMerge2	*/  { NULL,						stateShrink,			},
				/*  stateUnused				*/  { NULL,						stateNil,				},
				/*  stateGrow				*/  { STCompletionGrowShrink,	stateNil,				},
				/*  stateShrink				*/  { STCompletionGrowShrink,	stateNil,				},
				/*  stateSplit				*/  { STCompletionSplit,		stateGrowFromSplit,		},
				/*  stateMerge				*/  { STCompletionMerge,		stateShrinkFromMerge,	},
				};

			//  all metered sections have transitioned to the new state

			if ( NativeCounter( AtomicIncrement( &m_cCompletions ) ) >= m_chs )
				{
				STATStateTransition();

				//  save the current state as it may change as a side-effect of
				//  calling the completion function

				const ENUMSTATE esCurrent = EsSTGetState();

				//  if there is a completion function for this state then call it

				if ( rgstt[ esCurrent ].m_pfnCompletion )
					{
					(this->*rgstt[ esCurrent ].m_pfnCompletion)();
					}

				//  if there is a next state then immediately begin the transition to that state

				if ( rgstt[ esCurrent ].m_stNext )
					{
					STTransition( rgstt[ esCurrent ].m_stNext );
					}
				}
			}


		void STCompletionCopyDir()
			{
			//  backup the bucket ptrs for use during the split/merge process

			memcpy( &m_dirptrs[ 1 ], &m_dirptrs[ 0 ], sizeof( DIRPTRS ) );
			}


		void STCompletionGrowShrink()
			{
			//	enable the selection of a new maintenance policy

			m_semPolicy.Release();
			}


		void STCompletionSplit()
			{
			//  split the directory

			DIRISplit();
			}


		void STCompletionMerge()
			{
			//  merge the directory

			DIRIMerge();
			}


		/////////////////////////////////////////////////////////////////////////////////////////
		//
		//	directory
		//


		//	initialize the directory, possible allocating some buckets

		ERR ErrDIRInit( const NativeCounter cLoadFactor, const NativeCounter cbucketMin )
			{
			ERR				err;
			NativeCounter	iExponent;
			NativeCounter	iRemainder;

			//	check params

			if ( cLoadFactor < 1 )
				{
				return errInvalidParameter;
				}

			//	setup the main paramters

			m_cLoadFactor	= cLoadFactor;

			//	calculate the bucket size, accounting for:
			//
			//  -  bucket header
			//  -  enough room for twice the load factor to eliminate overflow
			//     buckets with uniform hashing
			//  -  room for an additional entry to give us some flexibility in
			//     our actual load factor to reduce maintenance overhead
			//  -  cache line alignment of the bucket

			m_cbBucket	=	sizeof( BUCKET ) + ( cLoadFactor * 2 + 1 ) * sizeof( CKeyEntry );
			m_cbBucket	=	( ( m_cbBucket + cbCacheLine - 1 ) / cbCacheLine ) * cbCacheLine;

			//	calculate the number of entries we can fit into a single bucket
			//	NOTE: this may be larger than intended because we rounded the bucket size up the nearest cache-line

			m_centryBucket	= ( m_cbBucket - sizeof( BUCKET ) ) / sizeof( CKeyEntry );

			//	calculate the minimum number of buckets using the following lower-bounds:
			//		cbucketMin		(user parameter)
			//		# of processors	(make sure we have atleast 1 bucket/proc as an attempt to minimize contention)
			//		2				(hash table assumes atleast 2 buckets)

			m_cbucketMin	= max( cbucketMin, NativeCounter( OSSYNC::OSSyncGetProcessorCountMax() ) );
			m_cbucketMin	= max( m_cbucketMin, 2 );

			//	align the minimum number of buckets to the next highest power of 2 (unless it's already a power of 2)

			DIRILog2( m_cbucketMin, &iExponent, &iRemainder );

			if ( iRemainder )
				{
				if ( ++iExponent >= cbitNativeCounter )
					{
					return errInvalidParameter;		//	could not round up without overflowing
					}
				}

			m_cbucketMin	= 1 << iExponent;

			//	setup the directory pointers 

			m_dirptrs[ 0 ].m_cBucketMax	= m_cbucketMin / 2;
			m_dirptrs[ 0 ].m_cBucket	= m_cbucketMin / 2;

			//	SPECIAL CASE: allocate 2 entries for the first bucket array
			//				  (we always do this because we always have atleast 2 buckets)

			err = ErrDIRInitBucketArray( 2, 0, &m_rgrgBucket[ 0 ] );
			if ( errSuccess != err )
				{
				return err;
				}

			//	allocate memory for all other initial bucket arrays

			for ( iExponent = 1; ( NativeCounter( 1 ) << iExponent ) < m_cbucketMin; iExponent++ )
				{
				err = ErrDIRInitBucketArray( 1 << iExponent, 1 << iExponent, &m_rgrgBucket[ iExponent ] );
				if ( errSuccess != err )
					{
					return err;
					}
				}

			//	clear the second set of directory ptrs

			memset( &m_dirptrs[ 1 ], 0, sizeof( DIRPTRS ) );

			return errSuccess;
			}


		//	cleanup all memory by destructing it then freeing it

		void DIRTerm()
			{
			NativeCounter	iExponent;

			//	SPECIAL CASE: term the first bucket array (contains 2 entries)
			//				  (we will always do this because the hash-table will always contain atleast 2 entries)

			if ( m_rgrgBucket[ 0 ] )
				{
				DIRTermBucketArray( m_rgrgBucket[ 0 ], 2 );
				m_rgrgBucket[ 0 ] = NULL;
				}

			//	term all other bucket arrays

			for ( iExponent = 1; iExponent < cbitNativeCounter; iExponent++ )
				{
				if ( m_rgrgBucket[ iExponent ] )
					{
					DIRTermBucketArray( m_rgrgBucket[ iExponent ], 1 << iExponent );
					m_rgrgBucket[ iExponent ] = NULL;
					}
				}

			//	reset both copies of the directory pointers

			memset( m_dirptrs, 0, sizeof( m_dirptrs ) );
			}


		//	lock a key for read operations 

		void DIRReadLockKey( const ENUMSTATE esCurrent, const CKey &key, CLock * const plock ) const
			{
			NativeCounter 	iHash;
			NativeCounter 	iBucket;
			NativeCounter 	cBucketBefore;
			NativeCounter 	cBucketAfter;
			NativeCounter	cBucketMax;

			//	verify the lock

			DHTAssert( FBKTRead( plock ) );
			DHTAssert( plock->m_pBucketHead == NULL );

#ifdef DEBUG
			//	remember the key we are locking

			plock->m_key	= key;
#endif

			//	hash to the bucket we want (this may require a retry in grow/shrink mode)

			iHash					= CKeyEntry::Hash( key );
			plock->m_pBucketHead	= PbucketDIRIHash( esCurrent, iHash, &iBucket, &cBucketBefore );

			//	acquire the lock as a reader

			plock->m_pBucketHead->CRWL().EnterAsReader();
			
			//	the entry may have moved as the result of a bucket split/merge

			cBucketAfter	= NcDIRIGetBucket( esCurrent );
			cBucketMax		= NcDIRIGetBucketMax( esCurrent );

			if (	cBucketBefore != cBucketAfter &&
					(	cBucketBefore <= iBucket && iBucket < cBucketAfter ||
						cBucketMax + cBucketAfter <= iBucket && iBucket < cBucketMax + cBucketBefore ) )
				{
				//	unlock the old bucket
				
				plock->m_pBucketHead->CRWL().LeaveAsReader();

				//	hash to the bucket we want (this cannot fail more than once)

				plock->m_pBucketHead = PbucketDIRIHash( esCurrent, iHash ); 

				//	lock the new bucket

				plock->m_pBucketHead->CRWL().EnterAsReader();
				}

			//  we should now have the correct bucket locked

			DHTAssert( plock->m_pBucketHead == PbucketDIRIHash( esCurrent, iHash ) );
			}


		//	unlock the current read-locked key

		void DIRReadUnlockKey( CLock * const plock ) const
			{

			//	verify the lock

			DHTAssert( FBKTRead( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );

			//	release the lock

			plock->m_pBucketHead->CRWL().LeaveAsReader();
			plock->m_pBucketHead = NULL;
			}


		//	lock a key for read/write operations
			
		void DIRWriteLockKey( const ENUMSTATE esCurrent, const CKey &key, CLock * const plock ) const
			{
			NativeCounter	iHash;
			NativeCounter 	iBucket;
			NativeCounter 	cBucketBefore;
			NativeCounter 	cBucketAfter;
			NativeCounter	cBucketMax;

			//	verify the lock

			DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
			DHTAssert( plock->m_pBucketHead == NULL );

#ifdef DEBUG
			//	remember the key we are locking

			plock->m_key	= key;
#endif

			//	hash to the bucket we want (this may require a retry in grow/shrink mode)

			iHash					= CKeyEntry::Hash( key );
			plock->m_pBucketHead	= PbucketDIRIHash( esCurrent, iHash, &iBucket, &cBucketBefore );

			//	acquire the lock as a writer

			plock->m_pBucketHead->CRWL().EnterAsWriter();
			
			//	the entry may have moved as the result of a bucket split/merge

			cBucketAfter	= NcDIRIGetBucket( esCurrent );
			cBucketMax		= NcDIRIGetBucketMax( esCurrent );

			if (	cBucketBefore != cBucketAfter &&
					(	cBucketBefore <= iBucket && iBucket < cBucketAfter ||
						cBucketMax + cBucketAfter <= iBucket && iBucket < cBucketMax + cBucketBefore ) )
				{
				//	unlock the old bucket
				
				plock->m_pBucketHead->CRWL().LeaveAsWriter();

				//	hash to the bucket we want (this cannot fail more than once)

				plock->m_pBucketHead = PbucketDIRIHash( esCurrent, iHash ); 

				//	lock the new bucket

				plock->m_pBucketHead->CRWL().EnterAsWriter();
				}

			//  we should now have the correct bucket locked

			DHTAssert( plock->m_pBucketHead == PbucketDIRIHash( esCurrent, iHash ) );
			}


		//	unlock the current write-locked key 
			
		void DIRWriteUnlockKey( CLock * const plock ) const
			{

			//	verify the lock

			DHTAssert( FBKTWrite( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );

			//	release the lock

			plock->m_pBucketHead->CRWL().LeaveAsWriter();
			plock->m_pBucketHead = NULL;
			}


		//	initalize an array of buckets

		ERR ErrDIRInitBucketArray(	const NativeCounter	cbucketAlloc, 
									const NativeCounter	ibucketFirst, 
									BYTE** const		prgbBucket )
			{
#ifdef UNIQUE_BUCKET_NAMES
			char				*psz;
#endif	//	UNIQUE_BUCKET_NAMES
			NativeCounter		cb;
			BYTE				*rgb;
			NativeCounter		ibucket;

			DHTAssert( cbucketAlloc > 0 );
			DHTAssert( prgbBucket );

			//	calculate the size (in bytes) of the new bucket array

#ifdef UNIQUE_BUCKET_NAMES
			cb = cbucketAlloc * ( m_cbBucket + 60 );	//	add 60 extra bytes per bucket for a unique name (for the bucket's r/w-lock)
#else
			cb = cbucketAlloc * m_cbBucket;
#endif

			//	allocate the new bucket array

			rgb = (BYTE*)PvMEMAlloc( cb );
			if ( !rgb )
				{
				*prgbBucket = NULL;
				return errOutOfMemory;
				}

			//	initialize each bucket within the new array

			for ( ibucket = 0; ibucket < cbucketAlloc; ibucket++ )
				{

				//	efficiency variables

				PBUCKET const	pbucket = PBUCKET( rgb + ( ibucket * m_cbBucket ) );

				//	construct the r/w-lock

#ifdef UNIQUE_BUCKET_NAMES
				psz = (char*)( rgb + ( cbucketAlloc * m_cbBucket ) + ( ibucket * 60 ) );
				sprintf( psz, "CDynamicHashTable::BUCKET[0x%016I64X]::m_rwlBucket", QWORD( ibucketFirst + ibucket ) );
				DHTAssert( strlen( psz ) < 60 );

				new( &pbucket->CRWL() ) OSSYNC::CReaderWriterLock( CLockBasicInfo( CSyncBasicInfo( psz ), int( m_rankDHTrwlBucket ), 0 ) );
#else	//	!UNIQUE_BUCKET_NAMES
				new( &pbucket->CRWL() ) OSSYNC::CReaderWriterLock( CLockBasicInfo( CSyncBasicInfo( "CDynamicHashTable::BUCKET::m_rwlBucket" ), int( m_rankDHTrwlBucket ), 0 ) );
#endif	//	UNIQUE_BUCKET_NAMES

				//	make the bucket empty

				pbucket->m_pb = NULL;
				}

			*prgbBucket = rgb;
			return errSuccess;
			}


		//	uninitialize an array of buckets

		void DIRTermBucketArray(	BYTE* const			rgbBucket, 
									const NativeCounter	cbucketTerm )
			{
			NativeCounter	ibucket;
			PBUCKET			pbucketNext;

			//	destroy each bucket in the array

			DHTAssert( rgbBucket );
			for ( ibucket = 0; ibucket < cbucketTerm; ibucket++ )
				{

				//	efficiency variables

				PBUCKET pbucket = PBUCKET( rgbBucket + ( ibucket * m_cbBucket ) );

				//	destruct the r/w-lock in place without freeing memory
				
				pbucket->CRWL().CReaderWriterLock::~CReaderWriterLock();

				//	free all chained buckets (don't touch the first one because its part of rgbucket[])

				pbucket = PbucketBKTNext( pbucket );
				while ( pbucket )
					{
					pbucketNext = PbucketBKTNext( pbucket );
					MEMFree( pbucket );
					pbucket = pbucketNext;
					}
				}

			MEMFree( rgbBucket );
			}


		//	split the directory

		void DIRISplit()
			{

			//	we are executing the current policy (which is to split) and should be in this known state

			DHTAssert( m_dirptrs[ 0 ].m_cBucketMax > 0 );
			DHTAssert( m_dirptrs[ 0 ].m_cBucket == m_dirptrs[ 0 ].m_cBucketMax );

			//	update the directory
			//	NOTE: we do NOT allocate space here; this is deferred until BKTISplit() when we're sure we need it

			m_dirptrs[ 0 ].m_cBucketMax	= m_dirptrs[ 0 ].m_cBucketMax * 2;
			m_dirptrs[ 0 ].m_cBucket	= 0;

			STATSplitDirectory();
			}


		//	merge the directory

		void DIRIMerge()
			{

			//	we are executing the current policy (which is to split) and should be in this known state

			DHTAssert( m_dirptrs[ 0 ].m_cBucketMax > 1 );	//	we should not be at the last split-level ( == 1 )
			DHTAssert( m_dirptrs[ 0 ].m_cBucket == 0 );

			//	free the bucket array that is no longer being used (the last one in the directory)
			//	NOTE: we can guarantee that it isn't in use because m_cBucket == 0 AND we can't grow (we're in stateMerge)
			//		  that means that everyone trying to hash to this bucket will be re-routed to the low-order bucket instead

			NativeCounter	iExponent;
			NativeCounter	iRemainder;

			DIRILog2( m_dirptrs[ 0 ].m_cBucketMax, &iExponent, &iRemainder );

			DHTAssert( NativeCounter( 1 ) << iExponent == m_dirptrs[ 0 ].m_cBucketMax );
			DHTAssert( 0 == iRemainder );

			//	NOTE: the bucket array may not have been allocated because we defer its allocation until BKTISplit

			if ( m_rgrgBucket[ iExponent ] )
				{
				DIRTermBucketArray( m_rgrgBucket[ iExponent ], m_dirptrs[ 0 ].m_cBucketMax );
				m_rgrgBucket[ iExponent ] = NULL;
				}

#ifdef DEBUG
			//	verify that no higher-order bucket arrays exist

			while ( ++iExponent < cbitNativeCounter )
				{
				DHTAssert( !m_rgrgBucket[ iExponent ] );
				}
#endif	//	DEBUG

			//	update the directory

			m_dirptrs[ 0 ].m_cBucketMax	= m_dirptrs[ 0 ].m_cBucketMax / 2;
			m_dirptrs[ 0 ].m_cBucket	= m_dirptrs[ 0 ].m_cBucketMax;

			STATMergeDirectory();
			}


		//	computer the log2 of the given value in terms of an exponent and an integer remainder

		void DIRILog2(	const NativeCounter		iValue,
						NativeCounter* const	piExponent,
						NativeCounter* const	piRemainder ) const
			{
			NativeCounter	iExponent;
			NativeCounter	iMask;
			NativeCounter	iMaskLast;

			iExponent		= 0;
			iMaskLast		= 1;
			iMask			= 1;

			while ( iMask < iValue )
				{
				iExponent++;
				iMaskLast	= iMask;
				iMask		= ( iMask << 1 ) + 1;
				}

			DHTAssert( iExponent < cbitNativeCounter );

			*piExponent		= iExponent;
			*piRemainder	= iMaskLast & iValue;
			}


		//	get the correct copy of cBucketMax

		const NativeCounter NcDIRIGetBucketMax( const ENUMSTATE esCurrent ) const
			{
			return m_dirptrs[ esCurrent >> 4 ].m_cBucketMax;
			}


		//	get the correct copy of cBucket

		const NativeCounter NcDIRIGetBucket( const ENUMSTATE esCurrent ) const
			{
			return m_dirptrs[ esCurrent >> 4 ].m_cBucket;
			}


		//	resolve a bucket address to a bucket pointer

		PBUCKET const PbucketDIRIResolve(	const NativeCounter ibucketIndex, 
											const NativeCounter ibucketOffset ) const
			{
			BYTE* const			pb			= m_rgrgBucket[ ibucketIndex ];	//	get ptr to one of the bucket arrays
			const NativeCounter	ibOffset	= ibucketOffset * m_cbBucket;	//	get byte offset within bucket array

			DHTAssert( NULL != pb );

			return PBUCKET( pb + ibOffset );		//	return a typed ptr to the individual bucket within array
			}


		//	hash to a bucket

		const PBUCKET PbucketDIRIHash(	const ENUMSTATE			esCurrent,
										const NativeCounter		iHash,
										NativeCounter* const	piBucket,
										NativeCounter* const	pcBucket ) const
			{
			NativeCounter&		iBucket		= *piBucket;
			NativeCounter&		cBucket		= *pcBucket;
			NativeCounter		cBucketMax;
			NativeCounter		iExponent;
			NativeCounter		iRemainder;

			//	load some of the directory pointers

			cBucket		= NcDIRIGetBucket( esCurrent );
			cBucketMax 	= NcDIRIGetBucketMax( esCurrent );

			//	normalize the given hash value to the range of active buckets

			iBucket = iHash & ( ( cBucketMax - 1 ) + cBucketMax );
			if ( iBucket >= cBucketMax + cBucket )
				{
				iBucket -= cBucketMax;
				}

			//	convert the normalized hash value to a bucket address
	
			DIRILog2( iBucket, &iExponent, &iRemainder );

			//	return the bucket

			return PbucketDIRIResolve( iExponent, iRemainder );
			}			

		const PBUCKET PbucketDIRIHash(	const ENUMSTATE			esCurrent,
										const NativeCounter		iHash ) const
			{
			NativeCounter	iBucket;
			NativeCounter	cBucket;

			return PbucketDIRIHash( esCurrent, iHash, &iBucket, &cBucket );
			}


		/////////////////////////////////////////////////////////////////////////////////////////
		//
		//	scan operations
		//

		//	move from the current hash-bucket to the next hash-bucket that contains
		//	atleast 1 entry; position currency on that entry

		ERR ErrSCANMoveNext( CLock *const plock )
			{
			DHTAssert( plock->m_pEntryPrev == NULL );
			DHTAssert( plock->m_pEntry == NULL );
			DHTAssert( plock->m_pEntryNext == NULL );

			//	unlock the current bucket

			if ( plock->m_pBucketHead )
				{
				plock->m_pBucketHead->CRWL().LeaveAsWriter();
				plock->m_pBucketHead = NULL;

				//  we performed an insert or delete while holding the write lock

				if ( plock->m_fInsertOrDelete )
					{
					//	perform amortized maintenance on the table

					MaintainTable( plock->m_phs );
					}
				}

			//	enter the state machine
			
			const int iGroup = UiSTEnter( &plock->m_phs );
			const ENUMSTATE esCurrent = EsSTGetState();

			while ( plock->m_iBucket + 1 < NcDIRIGetBucketMax( esCurrent ) + NcDIRIGetBucket( esCurrent ) )
				{

				//	we have not scanned the last bucket yet

				//	advance the bucket index
				
				plock->m_iBucket++;

				//	hash to the bucket and lock it

				plock->m_pBucketHead = PbucketDIRIHash( esCurrent, plock->m_iBucket );
				plock->m_pBucketHead->CRWL().EnterAsWriter();

				if ( plock->m_iBucket < NcDIRIGetBucketMax( esCurrent ) + NcDIRIGetBucket( esCurrent ) )
					{

					//	bucket address is OK (did not move)

					if ( plock->m_pBucketHead->m_pb != NULL )
						{

						//	current bucket contains atleast 1 entry

						//	setup the currency on the first entry

						plock->m_pBucket = plock->m_pBucketHead;
						plock->m_pEntry = &plock->m_pBucketHead->m_rgEntry[0];

						//	stop the loop

						break;
						}

					//	current bucket is empty 
					}
				else
					{
					DHTAssert( stateShrink == esCurrent );

					//	the current bucket disappeared because it was merged into a lower bucket

					DHTAssert( plock->m_iBucket >= NcDIRIGetBucketMax( esCurrent ) );
					DHTAssert(	PbucketDIRIHash( esCurrent, plock->m_iBucket ) ==
								PbucketDIRIHash( esCurrent, plock->m_iBucket - NcDIRIGetBucketMax( esCurrent ) ) );

					//	make sure the current entry ptr is reset
					
					DHTAssert( !plock->m_pEntry );
					}
					
				//	release the bucket lock (bucket should be empty since it was merged)

				DHTAssert( !plock->m_pBucketHead->m_pb );

				plock->m_pBucketHead->CRWL().LeaveAsWriter();
				plock->m_pBucketHead = NULL;
				}

			//	leave the state machine

			STLeave( iGroup, plock->m_phs );

			//	return the result

			DHTAssert( !plock->m_pEntry || plock->m_pBucketHead );
			return plock->m_pEntry ? errSuccess : errNoCurrentEntry;
			}


		/////////////////////////////////////////////////////////////////////////////////////////
		//
		//	bucket operations
		//

		//	returns fTrue if the lock context is in read mode

		const BOOL FBKTRead( CLock *const plock ) const
			{ 
			return plock->m_ls == CLock::lsRead; 
			}


		//	returns fTrue if the lock context is in write mode
			
		const BOOL FBKTWrite( CLock *const plock ) const
			{ 
			return plock->m_ls == CLock::lsWrite; 
			}


		//	returns fTrue if the lock context is in scan-forward mode

		const BOOL FBKTScan( CLock *const plock ) const
			{ 
			return plock->m_ls == CLock::lsScan; 
			}


		//	returns the entry after last entry in the BUCKET or entry 0 if no entries exist

		CKeyEntry *PentryBKTNextMost( const PBUCKET pBucket ) const
			{
			const BYTE *pb = pBucket->m_pb;

			if ( BOOL( ( pb >= (BYTE*)&pBucket->m_rgEntry[ 0 ] ) &
				 	   ( pb < (BYTE*)&pBucket->m_rgEntry[ m_centryBucket ] ) ) )
				{

				//	we are in the last bucket

				return (CKeyEntry*)pb + 1;
				}
			else if ( NULL == pb )
				{

				//	the bucket is empty

				return &pBucket->m_rgEntry[ 0 ];
				}

			//	the bucket is full

			return &pBucket->m_rgEntry[ m_centryBucket ];
			}


		//	returns the next BUCKET or NULL if no other BUCKETs exist

		PBUCKET PbucketBKTNext( const PBUCKET pBucket ) const
			{
			const BYTE *pb = pBucket->m_pb;

			if ( BOOL( ( pb <= (BYTE*)pBucket - m_cbBucket ) |
					   ( pb >= (BYTE*)pBucket + m_cbBucket ) ) )
				{

				//	m_pBucketNext is either the next BUCKET or NULL

				DHTAssert( !pb || PBUCKET( pb )->m_pBucketPrev == pBucket );
				return PBUCKET( pb );
				}

			//	m_pBucketNext is invalid (m_pEntryLast is valid instead)

			return NULL;
			}


		//	try to seek to the entry corresponding to the given key
		//	if found, the currency will be set to the entry and errSuccess will be returned
		//	if not, currency will be set to before-first or after-last, and errEntryNotFound will be returned

		void BKTSeek( CLock *const plock, const CKey &key ) const
			{
			//  pre-init our currency assuming we will hit a hot path
			
			plock->m_pBucket	= plock->m_pBucketHead;
			plock->m_pEntryPrev	= NULL;
			plock->m_pEntryNext	= NULL;

			//  HOT PATH:
			//
			//  if the next/end pointer is within the head bucket then we know
			//  that all entries are in the head bucket.  if we find the entry
			//  for this key then set our currency to point to it otherwise set
			//  our currency to no current entry

			CKeyEntry* const pEntryLast = plock->m_pBucketHead->m_pEntryLast;

			if ( DWORD_PTR( pEntryLast ) - DWORD_PTR( plock->m_pBucketHead ) < m_cbBucket )
				{
 				CKeyEntry* pEntry = plock->m_pBucketHead->m_rgEntry;
				do
					{
					if ( pEntry->FEntryMatchesKey( key ) )
						{
						plock->m_pEntry = pEntry;
						return;
						}
					}
				while ( ++pEntry <= pEntryLast );

				plock->m_pEntry = NULL;
				}

			//  HOT PATH:
			//
			//  if the next/end pointer is NULL then we know that we will not
			//  find the key.  set our currency to no current entry

			else if ( !pEntryLast )
				{
				plock->m_pEntry = NULL;
				}

			//  if the next/end pointer points outside of the head bucket then
			//  perform a full chain search

			else
				{
				BKTISeek( plock, key );
				}
			}

		void BKTISeek( CLock *const plock, const CKey &key ) const
			{
			PBUCKET			pBucket;
			PBUCKET			pBucketPrev;
			CKeyEntry		*pEntryThis;
			CKeyEntry		*pEntryMost;

			DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );

			//	start the scan on the first bucket

			pBucket = plock->m_pBucketHead;
			do
				{

				//	scan the current BUCKET

				pEntryThis = &pBucket->m_rgEntry[ 0 ];
				pEntryMost = PentryBKTNextMost( pBucket );
				while ( pEntryThis < pEntryMost )
					{

					//	query the entry against the given key for a match
					//		(assume we will be more likely to not find it)
					
					if ( !pEntryThis->FEntryMatchesKey( key ) )
						{
						
						//	nop
						}
					else
						{

						//	the key exists; setup our currency around it

						goto SetupCurrency;
						}

					//	move to the next entry

					pEntryThis++;
					}

				//	move to the next BUCKET

				pBucketPrev = pBucket;
				pBucket = PbucketBKTNext( pBucket );
				}
			while ( pBucket );

			//	move back to the last BUCKET and reset the entry ptr

			pBucket = pBucketPrev;
			pEntryThis = NULL;

	SetupCurrency:

			//	setup the currency in the lock context
			//	we will not allow moving next/prev, so we setup the next/prev ptrs accordingly

			plock->m_pBucket	= pBucket;
			plock->m_pEntryPrev	= NULL;
			plock->m_pEntry		= pEntryThis;
			plock->m_pEntryNext = NULL;
			}


#ifdef DEBUG
		//	get a pointer to the current entry
		//	if currency is before-first or after-last, then NULL is returned

		void BKTGetEntry( CLock *const plock, CKeyEntry **ppKeyEntry ) const
			{
			DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );
			DHTAssert( plock->m_pBucket != NULL );

			*ppKeyEntry = plock->m_pEntry;
			return;
			}
#endif


		//	get the current entry
		//	if currency is before-first or after-last, errEntryNotFound is returned
			
		const ERR ErrBKTGetEntry( CLock *const plock, CEntry *pentry ) const
			{
			DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) || FBKTScan( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );
			DHTAssert( plock->m_pBucket != NULL );

			if ( plock->m_pEntry )
				{

				//	we are on an entry

				plock->m_pEntry->GetEntry( pentry );
				return errSuccess;
				}

			//	we are not on an entry

			return errEntryNotFound;
			}


		//	replace the current entry (destruct old entry, contruct new entry)
		//	if currency is before-first or after-last, then errNoCurrentEntry is returned

		const ERR ErrBKTReplaceEntry( CLock *const plock, const CEntry &entry ) const
			{
			DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );
			DHTAssert( plock->m_pBucket != NULL );

			if ( plock->m_pEntry )
				{

				//	we are on an entry

				//	copy the new entry over it

				plock->m_pEntry->SetEntry( entry );
				return errSuccess;
				}

			//	we are not on an entry

			return errNoCurrentEntry;
			}


		//	insert an entry at the end of the logical bucket
		//	if memory is short, errOutOfMemory is returned
		//	otherwise, errSuccess is returned
			
		const ERR ErrBKTInsertEntry( CLock *const plock, const CEntry &entry )
			{
			DHTAssert( FBKTWrite( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );
			DHTAssert( plock->m_pBucket != NULL );

			if ( plock->m_pEntry )
				{

				//	we are pointing to the key we locked, so it must already exist

				return errKeyDuplicate;
				}

#ifdef DEBUG
			PBUCKET		*rgBucketCheck = NULL, pbucketTX;
			size_t		cBucketCheck = 0, iT;

			pbucketTX = plock->m_pBucketHead;
			while ( pbucketTX )
				{
				cBucketCheck++;
				pbucketTX = PbucketBKTNext( pbucketTX );				
				}
			cBucketCheck++;	//	account for newly allocated bucket

			rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
			if ( NULL != rgBucketCheck )
				{
				iT = 0;
				pbucketTX = plock->m_pBucketHead;
				while ( pbucketTX )
					{
					rgBucketCheck[ iT++ ] = pbucketTX;
					pbucketTX = PbucketBKTNext( pbucketTX );
					}
				rgBucketCheck[ iT++ ] = NULL;	//	new bucket
				}

			//	count the number of entries we will be handling

			size_t			cEntriesTotal = 0;
			PBUCKET 		pbktT, pbktNextT;

			pbktT = plock->m_pBucketHead;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesTotal += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}
#endif


			//	cursor for insert

			PBUCKET		pBucketThis = plock->m_pBucket;
			CKeyEntry	*pEntryThis;

			//	efficiency variable

			PBUCKET		pBucketT;


			//	move to the last entry in the last bucket

			pBucketT = PbucketBKTNext( pBucketThis );
			while ( pBucketT )
				{
				pBucketThis = pBucketT;
				pBucketT = PbucketBKTNext( pBucketT );
				}
			pEntryThis = PentryBKTNextMost( pBucketThis );

			if ( pEntryThis != &pBucketThis->m_rgEntry[ m_centryBucket ] )
				{

				//	there are available entries left in the last bucket

				//	nop
				}
			else
				{

				//	there are no entries left in the last bucket

				//	allocate a new bucket

				pBucketT = (BUCKET *)PvMEMAlloc( m_cbBucket );
				if ( !pBucketT )
					{

					//	we ran out of memory when allocating the new BUCKET

#ifdef DEBUG
					//	free memory from the start of this functions

					if ( NULL != rgBucketCheck )
						{
						MEMFree( rgBucketCheck );
						}
#endif

					return errOutOfMemory;
					}

				STATInsertOverflowBucket();

#ifdef DEBUG
				//	put the new bucket in our list

				if ( NULL != rgBucketCheck )
					{
					DHTAssert( rgBucketCheck[cBucketCheck-1] == NULL );
					rgBucketCheck[cBucketCheck-1] = pBucketT;
					}
#endif

				//	chain the new BUCKET

				pBucketThis->m_pBucketNext = pBucketT;
				pBucketT->m_pBucketPrev = pBucketThis;

				//	use the first entry of the new BUCKET

				pBucketThis = pBucketT;
				pEntryThis = &pBucketT->m_rgEntry[0];
				}

			//	copy the entry

			pEntryThis->SetEntry( entry );

			//	update the last entry pointer

			pBucketThis->m_pEntryLast = pEntryThis;

			//	move the currency to the new entry

			plock->m_pBucket = pBucketThis;
			plock->m_pEntry = pEntryThis;


#ifdef DEBUG

			if ( NULL != rgBucketCheck )
				{

				//	check each catalogued bucket to see if it is still there

				pbucketTX = plock->m_pBucketHead;
				DHTAssert( pbucketTX );

				//	find an remove all buckets found in the destiantion bucket from our list

				while ( pbucketTX )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pbucketTX )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we somehow got a bucket 
													//	into the chain that shouldn't be there
													//	(it is a bucket we never catalogued!)
					pbucketTX = PbucketBKTNext( pbucketTX );
					}

				//	the list should now be empty -- verify this

				for ( iT = 0; iT < cBucketCheck; iT++ )
					{
					//	if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
					//	being freed!
					DHTAssert( rgBucketCheck[iT] == NULL );
					}

				//	free the list

				MEMFree( rgBucketCheck );
				}


			//	make sure the number of entries has not changed since we started

			size_t	cEntriesAfterwards = 0;

			pbktT = plock->m_pBucketHead;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesAfterwards += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}

			//	entry counters should match ( +1 is for the inserted entry )

			DHTAssert( cEntriesAfterwards == cEntriesTotal + 1 );
#endif

			return errSuccess;
			}


		//	delete the current entry
		//	if currency is before-first or after-last, then errNoCurrentEntry is returned
		//	if the entry is not the last in the logical bucket, the last entry is promoted
		//		to fill in the hole
		//	should a BUCKET become empty, it will be released immediately
			
		const ERR ErrBKTDeleteEntry( CLock *const plock )
			{
			DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
			DHTAssert( plock->m_pBucketHead != NULL );
			DHTAssert( plock->m_pBucket != NULL );	

			if ( !plock->m_pEntry )
				{

				//	we do not have a current entry
					
				return errNoCurrentEntry;
				}
				
#ifdef DEBUG
			PBUCKET		*rgBucketCheck = NULL;
			PBUCKET		pbucketT;
			size_t		cBucketCheck = 0, iT;

			pbucketT = plock->m_pBucketHead;
			while ( pbucketT )
				{
				cBucketCheck++;
				pbucketT = PbucketBKTNext( pbucketT );				
				}

			rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
			if ( NULL != rgBucketCheck )
				{
				iT = 0;
				pbucketT = plock->m_pBucketHead;
				while ( pbucketT )
					{
					rgBucketCheck[ iT++ ] = pbucketT;
					pbucketT = PbucketBKTNext( pbucketT );
					}
				}


			//	count the number of entries we will be handling

			size_t			cEntriesTotal = 0;
			PBUCKET 		pbktT, pbktNextT;

			pbktT = plock->m_pBucketHead;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesTotal += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}
#endif


			//	we have a valid entry

			PBUCKET		pBucketThis = plock->m_pBucket;
			CKeyEntry	*pEntryThis = plock->m_pEntry;
			PBUCKET 	pBucketFree = NULL;			//	used later if we free a BUCKET strucutre

			if ( pEntryThis != pBucketThis->m_pEntryLast )
				{

				//	we are not deleting the last entry in the bucket
				//	promote the last entry to fill in this spot left by the entry we are deleting

				//	move to the last bucket

				PBUCKET pBucketT = PbucketBKTNext( pBucketThis );
				while ( pBucketT )
					{
					pBucketThis = pBucketT;
					pBucketT = PbucketBKTNext( pBucketT );
					}

				//	move to the last entry in the last BUCKET

				pEntryThis = pBucketThis->m_pEntryLast;

				//	copy the entry

				plock->m_pEntry->SetEntry( pEntryThis->m_entry );
				}

			//	update the currency to show that we are no longer on an entry

			plock->m_pEntry = NULL;

			//	we are now pointing to the last entry in the last bucket 
			//		(via pBucketThis/pEntryThis), and that entry needs to be
			//		"deleted" from the bucket

			//	update the next/end ptr to reflect this deletion

			if ( pEntryThis != &pBucketThis->m_rgEntry[0] )
				{

				//	entries still remain in the last bucket

				DHTAssert( pBucketThis->m_pEntryLast == pEntryThis );
				pBucketThis->m_pEntryLast--;	//	pEntryThis - 1;

#ifdef DEBUG
				//	jump to the validation code
				goto DoValidation;
#endif

				return errSuccess;
				}

			//	no entries remain in the last bucket

			if ( pBucketThis == plock->m_pBucketHead )
				{

				//	this bucket is empty, but we cannot release it because it is part of the bucket array
				//	instead, we mark it as being empty

				pBucketThis->m_pb = NULL;

#ifdef DEBUG
				//	jump to the validation code
				goto DoValidation;
#endif

				return errSuccess;
				}

			//	we can free the last bucket

			pBucketFree = pBucketThis;

			//	unchain it

			DHTAssert( pBucketThis->m_pBucketPrev->m_pBucketNext == pBucketThis );
			pBucketThis = pBucketThis->m_pBucketPrev;
			pBucketThis->m_pEntryLast = &pBucketThis->m_rgEntry[ m_centryBucket - 1 ];

			//	free it

			MEMFree( pBucketFree );

			if ( plock->m_pBucket == pBucketFree )
				{

				//	our currency was on the last bucket which is now invalid
				//	move to the previous bucket (which is now the NEW last BUCKET)

				plock->m_pBucket = pBucketThis;
				}

			STATDeleteOverflowBucket();

#ifdef DEBUG
			//	check each catalogued bucket to see if it is still there
		DoValidation:

			if ( NULL != rgBucketCheck )
				{

				pbucketT = plock->m_pBucketHead;
				DHTAssert( pbucketT );

				//	find an remove all buckets found in the destiantion bucket from our list

				while ( pbucketT )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pbucketT )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we somehow got a bucket 
													//	into the chain that shouldn't be there
													//	(it is a bucket we never catalogued!)
					pbucketT = PbucketBKTNext( pbucketT );
					}

				//	remove pBucketFree from rgBucketCheck

				if ( pBucketFree )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pBucketFree )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we freed a bucket that
													//	was never catalogued! we should only be freeing
													//	buckets that were in the original catalogue!
					}

				//	the list should now be empty -- verify this

				for ( iT = 0; iT < cBucketCheck; iT++ )
					{
					//	if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
					//	being freed!
					DHTAssert( rgBucketCheck[iT] == NULL );
					}

				//	free the list

				MEMFree( rgBucketCheck );
				}


			//	make sure the number of entries has not changed since we started

			size_t	cEntriesAfterwards = 0;

			pbktT = plock->m_pBucketHead;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesAfterwards += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}

			//	entry counters should match ( -1 is for the deleted entry )

			DHTAssert( cEntriesAfterwards == cEntriesTotal - 1 );
#endif

			return errSuccess;
			}


		//	split to a new bucket

		void BKTISplit( HOTSTUFF* const phs )
			{
			//	NOTE:	from our perspective, we are in the grow state
			//			however, the current state may be set to something else due to a pending transition

			//	read the directory pointers

			const NativeCounter	cBucketMax	= NcDIRIGetBucketMax( stateGrow );
			const NativeCounter	cBucket		= NcDIRIGetBucket( stateGrow );

			if ( cBucketMax + cBucket >= m_cBucketPreferred || cBucket == cBucketMax )
				{
				return;		//	the requested growth is complete
				}

			//	we need to reserve memory now to ensure that the growth will succeed
			//		(BKTIDoSplit will commit or unreserve this reservation later)

			if ( !phs->m_bucketpool.FPOOLReserve( m_cbBucket ) )
				{
				return;
				}

			//	get the source bucket

			const PBUCKET		pbucketGrowSrc		= PbucketDIRIHash( stateGrow, cBucket );

			//	try to get the lock
			
			if (	pbucketGrowSrc->CRWL().FWritersQuiesced() ||
					!pbucketGrowSrc->CRWL().FTryEnterAsWriter() )
				{
				STATSplitContention();
				phs->m_bucketpool.POOLUnreserve();
				return;
				}

			//	having a write-lock on the source bucket means no one else attempting to split can
			//		be farther along than us at this moment unless they completed the growth already
			//	see whether or not m_cBucket changed while were trying to get here
			//		if it stayed the same, we were the first ones to split this bucket
			//		it if changed, we were not first; instead, someone else managed to split AFTER
			//			we read m_cBucket but BEFORE we could do the split ourselves

			if ( cBucket != NcDIRIGetBucket( stateGrow ) )
				{
				DHTAssert( cBucket < NcDIRIGetBucket( stateGrow ) );
				pbucketGrowSrc->CRWL().LeaveAsWriter();
				phs->m_bucketpool.POOLUnreserve();
				return;
				}

			//	get the destination bucket (may not be allocated yet so we cannot use PbucketDIRIHash)

			NativeCounter	iExponent;
			NativeCounter	iRemainder;
			DIRILog2( cBucketMax + cBucket, &iExponent, &iRemainder );

			//	extract the address of the bucket

			if ( !m_rgrgBucket[ iExponent ] )
				{
				//	allocate a new bucket array to hold 2^iExponent buckets for this entry

				if ( ErrDIRInitBucketArray( cBucketMax, cBucketMax, &m_rgrgBucket[ iExponent ] ) != errSuccess )
					{
					pbucketGrowSrc->CRWL().LeaveAsWriter();
					phs->m_bucketpool.POOLUnreserve();
					return;
					}
				}
			DHTAssert( m_rgrgBucket[ iExponent ] );

			//	get the destination bucket

			const PBUCKET pbucketGrowDst = PbucketDIRIResolve( iExponent, iRemainder );

			//	lock the destination bucket (no possibility of contention here)

			pbucketGrowDst->CRWL().FTryEnterAsWriter();

			//	increase m_cBucket (we cannot turn back after this point)
			//	anyone who hashes to the new bucket will be queued up until the growth is complete

			DHTAssert( cBucket == NcDIRIGetBucket( stateGrow ) );
			m_dirptrs[ 0 ].m_cBucket++;

			//	do the growth work

			BKTIDoSplit( phs, pbucketGrowSrc, pbucketGrowDst, cBucket );

			//	release the write-locks

			pbucketGrowSrc->CRWL().LeaveAsWriter();
			pbucketGrowDst->CRWL().LeaveAsWriter();
			}


		//	merge two existing buckets into one

		void BKTIMerge( HOTSTUFF* const phs )
			{
			//	NOTE:	from our perspective, we are in the shrink state
			//			however, the current state may be set to something else due to a pending transition

			//	read the directory pointers

			const NativeCounter	cBucketMax	= NcDIRIGetBucketMax( stateShrink );
			NativeCounter		cBucket		= NcDIRIGetBucket( stateShrink );

			if ( cBucketMax + cBucket <= m_cBucketPreferred || cBucket == 0 )
				{
				return;		//	the requested shrinkage is complete
				}

			cBucket--;		//	the bucket we are merging is really 1 below cBucket

			//	we need to reserve memory now to ensure that the shrinkage will succeed 
			//		(BKTIDoMerge will commit or unreserve this reservation later)

			if ( !phs->m_bucketpool.FPOOLReserve( m_cbBucket ) )
				{
				return;
				}

			//	get the destination bucket

			const PBUCKET pbucketShrinkDst = PbucketDIRIHash( stateShrink, cBucket );

			//	try to get the lock
			
			if (	pbucketShrinkDst->CRWL().FWritersQuiesced() ||
					!pbucketShrinkDst->CRWL().FTryEnterAsWriter() )
				{
				STATMergeContention();
				phs->m_bucketpool.POOLUnreserve();
				return;
				}

			//	having a write-lock on the destination bucket means no one else attempting to merge can
			//		be farther along than us at this moment unless they completed the shrinkage already
			//	see whether or not m_cSplit changed while were trying to get here
			//		if it stayed the same, we were the first ones to merge this bucket
			//		it if changed, we were not first; instead, someone else managed to merge AFTER
			//			we read m_cBucket but BEFORE we could do the merge ourselves

			if ( cBucket + 1 != NcDIRIGetBucket( stateShrink ) )
				{
				DHTAssert( cBucket + 1 > NcDIRIGetBucket( stateShrink ) );
				pbucketShrinkDst->CRWL().LeaveAsWriter();
				phs->m_bucketpool.POOLUnreserve();
				return;
				}

			//	convert cBucket to a bucket address

			NativeCounter	iExponent;
			NativeCounter	iRemainder;
			DIRILog2( cBucket + NcDIRIGetBucketMax( stateShrink ), &iExponent, &iRemainder );

			//	extract the address of the bucket

			const PBUCKET pbucketShrinkSrc = PbucketDIRIResolve( iExponent, iRemainder );

			//	try to get the lock
			
			if (	pbucketShrinkSrc->CRWL().FWritersQuiesced() ||
					!pbucketShrinkSrc->CRWL().FTryEnterAsWriter() )
				{
				STATMergeContention();
				pbucketShrinkDst->CRWL().LeaveAsWriter();
				phs->m_bucketpool.POOLUnreserve();
				return;
				}

			//	decrease m_cBucket (we cannot turn back after this point)
			//		anyone who hashes to the destination bucket will be queued up until 
			//			the merge is complete
			//		no one will be able to hash to the source bucket

			DHTAssert( cBucket + 1 == NcDIRIGetBucket( stateShrink ) );
			m_dirptrs[ 0 ].m_cBucket--;

			//	do the shrinkage work

			BKTIDoMerge( phs, pbucketShrinkSrc, pbucketShrinkDst );

			//	release the write-locks

			pbucketShrinkDst->CRWL().LeaveAsWriter();
			pbucketShrinkSrc->CRWL().LeaveAsWriter();
			}


		//	work-horse for spliting a bucket

		void BKTIDoSplit(	HOTSTUFF* const			phs,
							PBUCKET 				pBucketSrcSrc,
							PBUCKET 				pBucketDst, 
							const NativeCounter		iHashSrc )
			{

#ifdef DEBUG
			PBUCKET			pBucketSrcSrcOriginal = pBucketSrcSrc;
			PBUCKET			pBucketDstOriginal = pBucketDst;
			size_t			cEntriesTotal = 0, cEntriesTotalRunning = 0;
			PBUCKET 		pbktT, pbktNextT;

			//	catalog each BUCKET structure and make sure they end up in the destination bucket
	
			PBUCKET		*rgBucketCheck = NULL, pbucketTX;
			size_t		cBucketCheck = 0, iT;

			pbucketTX = pBucketSrcSrc;
			while ( pbucketTX )
				{
				cBucketCheck++;
				pbucketTX = PbucketBKTNext( pbucketTX );				
				}
			pbucketTX = pBucketDst;
			DHTAssert( PbucketBKTNext( pbucketTX ) == NULL );
			while ( pbucketTX )
				{
				cBucketCheck++;
				pbucketTX = PbucketBKTNext( pbucketTX );
				}
			cBucketCheck++;	//	account for bucket from heap

			rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
			if ( NULL != rgBucketCheck )
				{
				iT = 0;
				pbucketTX = pBucketSrcSrc;
				while ( pbucketTX )
					{
					rgBucketCheck[ iT++ ] = pbucketTX;
					pbucketTX = PbucketBKTNext( pbucketTX );
					}
				pbucketTX = pBucketDst;
				while ( pbucketTX )
					{
					rgBucketCheck[ iT++ ] = pbucketTX;
					pbucketTX = PbucketBKTNext( pbucketTX );
					}
				rgBucketCheck[ iT++ ] = NULL;	//	heap bucket
				DHTAssert( iT == cBucketCheck );
				}

			//	count the number of entries that are in the source bucket

			pbktT = pBucketSrcSrc;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesTotal += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}
#endif

			//	cursor for reading entries

			PBUCKET			pBucketNextSrc;
			CKeyEntry		*pEntryThisSrc;
			CKeyEntry		*pEntryMostSrc;

			//	cursors for writing entries
			//		index 0 is for the SrcDst cursor (entries whose src and dst is the source bucket)
			//		index 1 is for the Dst cursor (entries whose dst is the destination bucket)

			PBUCKET			pBucketThis[2];
			CKeyEntry		*pEntryThis[2];
			CKeyEntry		*pEntryMost[2];
			CKeyEntry		*pEntryLast[2];
			size_t			iIndex;

			//	extra buckets

			PBUCKET			pBucketAvail = NULL;

			//	remember if we used the bucket from the heap

			BOOL			fBucketFromHeap = fFalse;

			//	used for hashing

			NativeCounter	iHashMask;

			DHTAssert( pBucketSrcSrc );
			DHTAssert( pBucketDst );
			DHTAssert( pBucketDst->m_pb == NULL );

			//	calculate the hash-mask (prevent wraparound)

			DHTAssert( NcDIRIGetBucketMax( stateGrow ) > 0 );
			iHashMask		= ( NcDIRIGetBucketMax( stateGrow ) - 1 ) + NcDIRIGetBucketMax( stateGrow );

			//	prepare the read cursor

			pBucketNextSrc		= PbucketBKTNext( pBucketSrcSrc );
			pEntryThisSrc		= &pBucketSrcSrc->m_rgEntry[ 0 ];
			pEntryMostSrc		= PentryBKTNextMost( pBucketSrcSrc );

			//	prepare the src-dst write cursor

			pBucketThis[ 0 ]	= pBucketSrcSrc;
			pEntryThis[ 0 ] 	= &pBucketSrcSrc->m_rgEntry[ 0 ];
			pEntryMost[ 0 ]		= &pBucketSrcSrc->m_rgEntry[ m_centryBucket ];
			pEntryLast[ 0 ]		= NULL;

			//	prepare the dst write cursor

			pBucketThis[ 1 ]	= pBucketDst;
			pEntryThis[ 1 ]		= &pBucketDst->m_rgEntry[ 0 ];
			pEntryMost[ 1 ]		= &pBucketDst->m_rgEntry[ m_centryBucket ];
			pEntryLast[ 1 ]		= NULL;
			
			//	iterate over all entries in the source bucket

			while ( fTrue )
				{

				//	check the read (src) cursor
				
				if ( pEntryThisSrc < pEntryMostSrc )
					{

					//	nop
					}
				else if ( NULL == pBucketNextSrc )
					{

					//	all entries have been exhausted
					
					break;
					}
				else
					{

					//	all entries in the current bucket have been exhausted

					if ( pBucketSrcSrc != pBucketThis[ 0 ] )
						{

						//	the bucket we are leaving is completely empty and the
						//		SrcDst pointer is not using it
						//	we need to put it into the available bucket list

						//	the bucket ordering should be like this:
						//		pBucketThis[0]	(src/dst bucket)
						//		pBucketSrcSrc	(src bucket)
						//		pBucketNextSrc	(next src bucket)

						DHTAssert( pBucketThis[ 0 ]->m_pBucketNext == pBucketSrcSrc );
						DHTAssert( pBucketSrcSrc->m_pBucketNext == pBucketNextSrc );
						DHTAssert( pBucketNextSrc->m_pBucketPrev == pBucketSrcSrc );
						DHTAssert( pBucketSrcSrc->m_pBucketPrev == pBucketThis[ 0 ] );

						//	update the bucket links to "remove" the free bucket
						
						pBucketThis[ 0 ]->m_pBucketNext	= pBucketNextSrc;
						pBucketNextSrc->m_pBucketPrev	= pBucketThis[ 0 ];

						//	add the bucket to the avail list

						pBucketSrcSrc->m_pBucketNext	= pBucketAvail;
						pBucketAvail					= pBucketSrcSrc;
						}

					//	move to the next bucket

					pEntryThisSrc	= &pBucketNextSrc->m_rgEntry[ 0 ];
					pEntryMostSrc	= PentryBKTNextMost( pBucketNextSrc );
					pBucketSrcSrc	= pBucketNextSrc;
					pBucketNextSrc	= PbucketBKTNext( pBucketNextSrc );
					}

				//	calculate the hash value

				iIndex = BOOL( ( pEntryThisSrc->Hash() & iHashMask ) != iHashSrc );
				DHTAssert( iIndex == 0 || iIndex == 1 );
#ifdef DEBUG
				cEntriesTotalRunning++;
#endif	//	DEBUG

				//	check the write (src/dst or dst) cursor

				if ( pEntryThis[ iIndex ] < pEntryMost[ iIndex ] )
					{

					//	nop
					}
				else
					{

					//	all entries in the current cursor's bucket are exhausted

					if ( 0 == iIndex )
						{

						//	the src/dst cursor will always have a next bucket

						DHTAssert( pBucketThis[ 0 ]->m_pBucketNext->m_pBucketPrev == pBucketThis[ 0 ] );
						pBucketThis[ 0 ]	= pBucketThis[ 0 ]->m_pBucketNext;

						//	setup the entry ptrs

						pEntryThis[ 0 ]		= &pBucketThis[ 0 ]->m_rgEntry[ 0 ];
						pEntryMost[ 0 ]		= &pBucketThis[ 0 ]->m_rgEntry[ m_centryBucket ];
						}
					else
						{

						//	the dst cursor must allocate a new bucket

						if ( pBucketAvail )
							{

							//	get a bucket from the avail list

							const PBUCKET pBucketNew		= pBucketAvail;
							pBucketAvail					= pBucketAvail->m_pBucketNext;

							//	chain it

							pBucketThis[ 1 ]->m_pBucketNext	= pBucketNew;
							pBucketNew->m_pBucketPrev		= pBucketThis[ 1 ];

							//	move to it
							
							pBucketThis[ 1 ]				= pBucketNew;
							}
						else
							{

							//	get a bucket from the reservation pool

							DHTAssert( !fBucketFromHeap );
							fBucketFromHeap = fTrue;

							//	allocate it
							
							const PBUCKET pBucketReserve	= phs->m_bucketpool.PbucketPOOLCommit();
							DHTAssert( pBucketReserve );

							STATInsertOverflowBucket();

#ifdef DEBUG
							//	add the heap bucket to our catalog of buckets

							if ( NULL != rgBucketCheck )
								{
								DHTAssert( NULL == rgBucketCheck[ cBucketCheck - 1 ] );
								rgBucketCheck[ cBucketCheck - 1 ] = pBucketReserve;
								}
#endif	//	DEBUG

							//	chain it

							pBucketThis[ 1 ]->m_pBucketNext	= pBucketReserve;
							pBucketReserve->m_pBucketPrev	= pBucketThis[ 1 ];

							//	move to it
							
							pBucketThis[ 1 ]				= pBucketReserve;
							}

						//	setup the entry ptrs

						pEntryThis[ 1 ]	= &pBucketThis[ 1 ]->m_rgEntry[ 0 ];
						pEntryMost[ 1 ]	= &pBucketThis[ 1 ]->m_rgEntry[ m_centryBucket ];
						}
					}

				//	copy the entry

				pEntryThis[ iIndex ]->SetEntry( pEntryThisSrc->m_entry );

				//	advance the write (src/dst or dst) cursor

				pEntryLast[ iIndex ] = pEntryThis[ iIndex ];
				pEntryThis[ iIndex ]++;

				//	advance the read (src) cursor

				pEntryThisSrc++;
				}

			if ( pBucketSrcSrc == pBucketThis[ 0 ] )
				{

				//	nop
				}
			else
				{

				//	the last bucket of the src bucket is no longer needed

				//	the bucket ordering should be like this:
				//		pBucketThis[0]	(src/dst bucket)
				//		pBucketSrcSrc	(src bucket)
				//		<< NOTHING >>

				DHTAssert( pBucketThis[ 0 ]->m_pBucketNext == pBucketSrcSrc );
				DHTAssert( pBucketSrcSrc->m_pBucketPrev == pBucketThis[ 0 ] );

				//	free the bucket

				MEMFree( pBucketSrcSrc );

				STATDeleteOverflowBucket();

#ifdef DEBUG
				//	remove the bucket from the bucket-catalog

				if ( NULL != rgBucketCheck )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pBucketSrcSrc )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	the bucket better be in the bucket-catalog!
					}
#endif	//	DEBUG
				}

			//	update the next/end ptrs for the src/dst cursor and the dst cursor

			pBucketThis[ 0 ]->m_pEntryLast = pEntryLast[ 0 ];
			pBucketThis[ 1 ]->m_pEntryLast = pEntryLast[ 1 ];


#ifdef DEBUG

			if ( NULL != rgBucketCheck )
				{

				//	check each catalogued bucket to see if it is in the pBucketSrcSrc, pBucketDst, or pBucketAvail
				
				//	find and remove all buckets in pBucketSrcSrc

				pbucketTX = pBucketSrcSrcOriginal;
				DHTAssert( pbucketTX );
				while ( pbucketTX )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pbucketTX )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we somehow added a bucket to the
													//	SOURCE CHAIN -- THIS SHOULD NEVER HAPPEN! also, we
													//	never catalogued the bucket!
					pbucketTX = PbucketBKTNext( pbucketTX );
					}

				//	find and remove all buckets in pBucketDst

				pbucketTX = pBucketDstOriginal;
				DHTAssert( pbucketTX );
				while ( pbucketTX )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pbucketTX )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we added a bucket to the destination
													//	chain, but it was never catalogued! first question: where
													//	did the bucket come from if didn't catalogue it???
					pbucketTX = PbucketBKTNext( pbucketTX );
					}

				//	find and remove all buckets in pBucketAvail

				pbucketTX = pBucketAvail;
				while ( pbucketTX )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pbucketTX )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we have a free bucket that was never
													//	catalogued! where did it come from?
													//	NOTE: this is not a memleak, it is a "we-never-catalogued-it"
													//		  problem; the memory will be freed later in this function
					pbucketTX = pbucketTX->m_pBucketNext;
					}

				//	the list should now be empty -- verify this

				for ( iT = 0; iT < cBucketCheck; iT++ )
					{
					//	if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
					//	being freed!
					DHTAssert( rgBucketCheck[iT] == NULL );
					}

				//	free the list

				MEMFree( rgBucketCheck );
				}


			size_t	cEntriesAfterwards = 0;
			
			//	make sure the number of entries we processed matches the number of entries we started with

			DHTAssert( cEntriesTotal == cEntriesTotalRunning );

			//	make sure we have all the entries we started with

			pbktT = pBucketSrcSrcOriginal;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesAfterwards += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}

			pbktT = pBucketDstOriginal;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesAfterwards += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}

			DHTAssert( cEntriesAfterwards == cEntriesTotal );
#endif


			//	free the avail list

			while ( pBucketAvail )
				{
				PBUCKET pBucketT;

				pBucketT = pBucketAvail;
				pBucketAvail = pBucketAvail->m_pBucketNext;
				MEMFree( pBucketT );
				STATDeleteOverflowBucket();
				}

			if ( !fBucketFromHeap )
				{
				phs->m_bucketpool.POOLUnreserve();	//	cancel the heap reservation (we never used it)
				}

			STATSplitBucket();
			}


		//	work-horse for shrinking a bucket

		void BKTIDoMerge(	HOTSTUFF* const	phs,
							PBUCKET			pBucketSrc, 
							PBUCKET 		pBucketDst )
			{
#ifdef DEBUG

			//	catalog each BUCKET structure and make sure they end up in the destination bucket
	
			PBUCKET 	pBucketDstOriginal = pBucketDst;
			PBUCKET		*rgBucketCheck = NULL, pbucketT;
			size_t		cBucketCheck = 0, iT;

			pbucketT = pBucketSrc;
			while ( pbucketT )
				{
				cBucketCheck++;
				pbucketT = PbucketBKTNext( pbucketT );				
				}
			pbucketT = pBucketDst;
			while ( pbucketT )
				{
				cBucketCheck++;
				pbucketT = PbucketBKTNext( pbucketT );
				}
			cBucketCheck++;	//	account for bucket from heap

			rgBucketCheck = (PBUCKET *)PvMEMAlloc( cBucketCheck * sizeof( PBUCKET ) );
			if ( NULL != rgBucketCheck )
				{
				iT = 0;
				pbucketT = pBucketSrc;
				while ( pbucketT )
					{
					rgBucketCheck[ iT++ ] = pbucketT;
					pbucketT = PbucketBKTNext( pbucketT );
					}
				pbucketT = pBucketDst;
				while ( pbucketT )
					{
					rgBucketCheck[ iT++ ] = pbucketT;
					pbucketT = PbucketBKTNext( pbucketT );
					}
				rgBucketCheck[ iT++ ] = NULL;	//	heap bucket
				DHTAssert( iT == cBucketCheck );
				}

			//	count the number of entries we will be handling

			size_t			cEntriesTotal = 0;
			PBUCKET 		pbktT, pbktNextT;

			pbktT = pBucketSrc;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesTotal += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}

			pbktT = pBucketDst;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesTotal += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesTotal += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}
#endif

			//	read (src) cursor

			CKeyEntry	*pEntryThisSrc;
			CKeyEntry	*pEntryMostSrc;
			
			//	write (dst) cursor

			CKeyEntry	*pEntryThisDst;
			CKeyEntry	*pEntryMostDst;

			//	remember if we have moved to the last bucket or not
			
			BOOL		fSetEndPtr;

			//	remember if we allocated a bucket from the heap

			BOOL		fBucketFromHeap = fFalse;

			//	efficiency variables

			PBUCKET		pBucketT;

			//	move to the end of the dst bucket

			pBucketT = PbucketBKTNext( pBucketDst );
			while ( pBucketT )
				{
				pBucketDst	= pBucketT;
				pBucketT	= PbucketBKTNext( pBucketT );
				}

			pEntryThisDst	= PentryBKTNextMost( pBucketDst );
			pEntryMostDst	= &pBucketDst->m_rgEntry[ m_centryBucket ];

			if ( !PbucketBKTNext( pBucketSrc ) )
				{

				//	the src bucket does not have extra bucket structures

				//	setup the src cursor for a partial pass

				pEntryThisSrc	= &pBucketSrc->m_rgEntry[ 0 ];
				pEntryMostSrc	= PentryBKTNextMost( pBucketSrc );

				//	we are not appending buckets from the src bucket, so we will be setting the
				//		end ptr of the dst bucket iff we add entries from the src bucket

				fSetEndPtr		= BOOL( pEntryThisSrc < pEntryMostSrc );
				}
			else
				{

				//	the src bucket has extra bucket structures
				
				//	attach the extra bucket structures to the dst bucket

				pBucketDst->m_pBucketNext					= pBucketSrc->m_pBucketNext;
				pBucketDst->m_pBucketNext->m_pBucketPrev	= pBucketDst;

				//	setup the src cursor for a full pass over the first src bucket

				pEntryThisSrc								= &pBucketSrc->m_rgEntry[ 0 ];
				pEntryMostSrc								= &pBucketSrc->m_rgEntry[ m_centryBucket ];

				//	we are appending buckets from the src bucket, so we will not be setting the
				//		end ptr of the dst bucket because we are no longer in the last bucket
				//		of the dst bucket chain

				fSetEndPtr									= fFalse;
				}

			//	copy the entries in the src bucket

			while ( pEntryThisSrc < pEntryMostSrc )
				{

				//	check the dst cursor

				if ( pEntryThisDst < pEntryMostDst )
					{

					//	nop
					}
				else
					{

					//	all entries in the dst bucket are exhausted

					if ( !fSetEndPtr )
						{

						//	we are not in the last bucket of the dst bucket because there is no end ptr

						pBucketT = PbucketBKTNext( pBucketDst );
						DHTAssert( pBucketT );
						do
							{
							pBucketDst	= pBucketT;
							pBucketT	= PbucketBKTNext( pBucketT );
							}
						while ( pBucketT );

						//	setup the dst cursor

						pEntryThisDst	= pBucketDst->m_pEntryLast + 1;
						pEntryMostDst	= &pBucketDst->m_rgEntry[ m_centryBucket ];

						//	we are now able to set the end ptr because we are in the last bucket 
						//		of the dst bucket

						fSetEndPtr		= fTrue;

						//	restart the loop

						continue;
						}

					//	we were at the last bucket in the dst bucket

					//	get a bucket from the heap reservation pool

					DHTAssert( !fBucketFromHeap );
					fBucketFromHeap = fTrue;

					//	commit the reservation now
					
					pBucketT = phs->m_bucketpool.PbucketPOOLCommit();
					DHTAssert( pBucketT );

					STATInsertOverflowBucket();

					//	chain the heap bucket

					pBucketDst->m_pBucketNext	= pBucketT;
					pBucketT->m_pBucketPrev		= pBucketDst;

					//	setup the dst cursor

					pBucketDst					= pBucketT;
					pEntryThisDst				= &pBucketDst->m_rgEntry[ 0 ];
					pEntryMostDst				= &pBucketDst->m_rgEntry[ m_centryBucket ];

#ifdef DEBUG
					//	add the heap bucket to our catalog of buckets

					if ( NULL != rgBucketCheck )
						{
						DHTAssert( rgBucketCheck[cBucketCheck - 1] == NULL );
						rgBucketCheck[cBucketCheck - 1] = pBucketT;
						}
#endif	//	DEBUG
					}

				//	copy the entry

				pEntryThisDst->SetEntry( pEntryThisSrc->m_entry );

				//	advance the cursors

				pEntryThisSrc++;
				pEntryThisDst++;
				}

			//	mark the src bucket as empty

			pBucketSrc->m_pb = NULL;

			if ( fSetEndPtr )
				{

				//	set the end of the destination bucket
				
				DHTAssert( pEntryThisDst != &pBucketDst->m_rgEntry[ 0 ] );
				pBucketDst->m_pEntryLast = pEntryThisDst - 1;
				}
			else
				{

				//	we do not need to set the end ptr of the dst bucket

				//	nop
				}

			if ( !fBucketFromHeap )
				{

				//	cancel the unused heap reservation 

				phs->m_bucketpool.POOLUnreserve();
				}


#ifdef DEBUG

			if ( NULL != rgBucketCheck )
				{

				//	check each catalogued bucket to see if it is in the pBucketDst bucket

				pbucketT = pBucketDstOriginal;
				DHTAssert( pbucketT );

				//	find an remove all buckets found in the destiantion bucket from our list

				while ( pbucketT )
					{
					for ( iT = 0; iT < cBucketCheck; iT++ )
						{
						if ( rgBucketCheck[iT] == pbucketT )
							{
							rgBucketCheck[iT] = NULL;
							break;
							}
						}
					DHTAssert( iT < cBucketCheck );	//	if this goes off, we somehow got a bucket 
													//	into the chain that shouldn't be there
													//	(it is a bucket we never catalogued!)
					pbucketT = PbucketBKTNext( pbucketT );
					}

				//	find an remove pBucketSrc from our list

				for ( iT = 0; iT < cBucketCheck; iT++ )
					{
					if ( rgBucketCheck[iT] == pBucketSrc )
						{
						rgBucketCheck[iT] = NULL;
						break;
						}
					}
				DHTAssert( iT < cBucketCheck );	//	if this goes off, somehow the FIXED source bucket 
												//	got removed from our catalogue OR pBucketSrc was
												//	changed (which should never happen)

				//	the list should now be empty -- verify this

				for ( iT = 0; iT < cBucketCheck; iT++ )
					{
					//	if this goes off, rgBucketCheck[iT] contains a bucket that was abandoned without
					//	being freed!
					DHTAssert( rgBucketCheck[iT] == NULL );
					}

				//	free the list

				MEMFree( rgBucketCheck );
				}


			//	make sure the number of entries has not changed since we started

			size_t	cEntriesAfterwards = 0;

			pbktT = pBucketDstOriginal;
			if ( pbktT->m_pb != NULL )
				{
				while ( pbktT )
					{
					pbktNextT = PbucketBKTNext( pbktT );
					if ( pbktNextT )
						{
						//	full bucket
						cEntriesAfterwards += size_t( m_centryBucket );
						}
					else
						{
						//	partial bucket (not empty)
						cEntriesAfterwards += 1 + ( pbktT->m_pEntryLast - &pbktT->m_rgEntry[0] );
						}
					pbktT = pbktNextT;
					}
				}

			DHTAssert( cEntriesAfterwards == cEntriesTotal );

#endif

			STATMergeBucket();
			}


		/////////////////////////////////////////////////////////////////////////////////////////
		//
		//	mechanisms for implementing the dynamic-hash-table policies
		//

		//	hash to the correct HOTSTUFF element

		HOTSTUFF *HOTSTUFFHash() const
			{
			return m_rghs + OSSYNC::OSSyncGetCurrentProcessor();
			}
			

		//	statistics

		void STATInsertEntry( HOTSTUFF* const phs )
			{
			AtomicExchangeAddPointer( (void**)&phs->m_cEntry, (void*)1 );
			phs->m_cOp++;
			}

		void STATDeleteEntry( HOTSTUFF* const phs )
			{
			AtomicExchangeAddPointer( (void**)&phs->m_cEntry, (void*)-1 );
			phs->m_cOp++;
			}

		void STATInsertOverflowBucket()
			{
#ifdef DHT_STATS
			m_cBucketOverflowInsert++;
#endif  //  DHT_STATS
			}

		void STATDeleteOverflowBucket()
			{
#ifdef DHT_STATS
			m_cBucketOverflowDelete++;
#endif  //  DHT_STATS
			}

		void STATSplitBucket()
			{
#ifdef DHT_STATS
			m_cBucketSplit++;
#endif  //  DHT_STATS
			}

		void STATMergeBucket()
			{
#ifdef DHT_STATS
			m_cBucketMerge++;
#endif  //  DHT_STATS
			}

		void STATSplitDirectory()
			{
#ifdef DHT_STATS
			m_cDirSplit++;
#endif  //  DHT_STATS
			}

		void STATMergeDirectory()
			{
#ifdef DHT_STATS
			m_cDirMerge++;
#endif  //  DHT_STATS
			}

		void STATStateTransition()
			{
#ifdef DHT_STATS
			m_cTransition++;
#endif  //  DHT_STATS
			}

		void STATPolicySelection()
			{
#ifdef DHT_STATS
			m_cSelection++;
#endif  //  DHT_STATS
			}

		void STATSplitContention()
			{
#ifdef DHT_STATS
			m_cSplitContend++;
#endif  //  DHT_STATS
			}

		void STATMergeContention()
			{
#ifdef DHT_STATS
			m_cMergeContend++;
#endif  //  DHT_STATS
			}


		//  amortized table maintenance

		void PerformMaintenance()
			{
			//	enter the state machine

			HOTSTUFF*		phs;
			const int		iGroup		= UiSTEnter( &phs );
			const ENUMSTATE	esCurrent	= EsSTGetState();

			//	carry out the current policy

			if ( esCurrent == stateGrow )
				{
				BKTISplit( phs );
				}
			else if ( esCurrent == stateShrink )
				{
				BKTIMerge( phs );
				}

			//	leave the state machine

			STLeave( iGroup, phs );
			}

		void SelectMaintenancePolicy( HOTSTUFF* const phs )
			{
			//  collect information on the current state of the hash table
			
			const ENUMSTATE		esCurrent		= EsSTGetState();
			const NativeCounter	cBucketMax		= NcDIRIGetBucketMax( esCurrent );
			const NativeCounter	cBucket			= NcDIRIGetBucket( esCurrent );
			const NativeCounter	cBucketActive	= cBucketMax + cBucket;
			const NativeCounter	cOpLocal		= phs->m_cOp;

			//  compute the current entry count and op count and reset the op count

			NativeCounter	cEntry	= 0;
			NativeCounter	cOp		= 0;
			for ( NativeCounter ihs = 0; ihs < m_chs; ihs++ )
				{
				cEntry += m_rghs[ ihs ].m_cEntry;
				cOp += m_rghs[ ihs ].m_cOp;
				m_rghs[ ihs ].m_cOp = 0;
				}

			//  compute the ideal entry count

			const NativeCounter cEntryIdeal = m_cLoadFactor * cBucketActive;

			//  compute the max entry count

			const NativeCounter cEntryMax = m_centryBucket * cBucketActive;

			//  determine our current flexibility in the entry count

			const NativeCounter cEntryFlexibility = max( m_centryBucket - m_cLoadFactor, cEntryMax / 2 - cEntryIdeal );

			//  determine our current threshold sensitivity
			
			const NativeCounter cOpSensitivity = max( 1, cEntryFlexibility / 2 );

			//  approximate the local (per-HOTSTUFF) threshold sensitivity

			const NativeCounter	ratio				= ( cOp + cOpLocal - 1 ) / cOpLocal;
			const NativeCounter	cOpSensitivityLocal	= max( 1, cOpSensitivity / ratio );

			//  compute the preferred entry count

			NativeCounter cEntryPreferred = cEntry;

			if ( cEntryIdeal + ( cEntryFlexibility - cOpSensitivity ) < cEntry )
				{
				cEntryPreferred = cEntry - ( cEntryFlexibility - cOpSensitivity );
				}
			else if ( cEntryIdeal > cEntry + ( cEntryFlexibility - cOpSensitivity ) )
				{
				cEntryPreferred = cEntry + ( cEntryFlexibility - cOpSensitivity );
				}

			//  compute the preferred bucket count

			const NativeCounter cBucketPreferred = max( m_cbucketMin, ( cEntryPreferred + m_cLoadFactor - 1 ) / m_cLoadFactor );

			//  determine the new policy

			ENUMSTATE esNew = stateNil;

			if ( esCurrent == stateGrow )
				{
				if ( cBucketPreferred < cBucketActive )
					{
					esNew = stateShrinkFromGrow;
					}
				else if ( cBucketPreferred > cBucketActive )
					{
					if ( cBucket == cBucketMax )
						{
						esNew = stateSplitFromGrow;
						}
					}
				}
			else
				{
				DHTAssert( esCurrent == stateShrink );

				if ( cBucketPreferred < cBucketActive )
					{
					if ( cBucket == 0 )
						{
						esNew = stateMergeFromShrink;
						}
					}
				else if ( cBucketPreferred > cBucketActive )
					{
					esNew = stateGrowFromShrink;
					}
				}

			//  enact the new policy

 			if ( m_cOpSensitivity != cOpSensitivityLocal )
				{
				m_cOpSensitivity = cOpSensitivityLocal;
				}

			if ( m_cBucketPreferred != cBucketPreferred )
				{
				m_cBucketPreferred = cBucketPreferred;
				}

			if ( esNew )
				{
				STTransition( esNew );
				}
			else
				{
				m_semPolicy.Release();
				}

			STATPolicySelection();
			}

		void MaintainTable( HOTSTUFF* const phs )
			{
			//  decide on a new policy if we may have breached one of our
			//  thresholds
			
			if (	phs->m_cOp > m_cOpSensitivity &&
					m_semPolicy.CAvail() &&
					m_semPolicy.FTryAcquire() )
				{
				if ( phs->m_cOp > m_cOpSensitivity )
					{
					SelectMaintenancePolicy( phs );
					}
				else
					{
					m_semPolicy.Release();
					}
				}

			//  perform amortized work on the table as necessary

			if (	NcDIRIGetBucketMax( stateGrow ) + NcDIRIGetBucket( stateGrow ) < m_cBucketPreferred ||
					m_cBucketPreferred < NcDIRIGetBucketMax( stateShrink ) + NcDIRIGetBucket( stateShrink ) )
				{
				PerformMaintenance();
				}
			}

	public:

		//	calculate the address of the aligned block and store its offset (for free)

		static void* PvMEMIAlign( void* const pv, const size_t cbAlign )
			{
			//	round up to the nearest cache line
			//	NOTE: this formula always forces an offset of at least 1 byte

			const ULONG_PTR ulp			= ULONG_PTR( pv );
			const ULONG_PTR ulpAligned	= ( ( ulp + cbAlign ) / cbAlign ) * cbAlign;
			const ULONG_PTR ulpOffset	= ulpAligned - ulp;

			DHTAssert( ulpOffset > 0 );
			DHTAssert( ulpOffset <= cbAlign );
			DHTAssert( ulpOffset == BYTE( ulpOffset ) );	//	must fit into a single BYTE

			//	store the offset

			BYTE *const pbAligned		= (BYTE*)ulpAligned;
			pbAligned[ -1 ]				= BYTE( ulpOffset );

			//	return the aligned block

			return (void*)pbAligned;
			}


		//	retrieve the original unaligned block of memory from the aligned block

		static void* PvMEMIUnalign( void* const pv )
			{
			//	read the offset of the real block

			BYTE *const pbAligned		= (BYTE*)pv;
			const BYTE bOffset			= pbAligned[ -1 ];

			DHTAssert( bOffset > 0 );

			//	return the real unaligned block

			return (void*)( pbAligned - bOffset );
			}


		//	allocate memory

		static void* PvMEMAlloc( const size_t cbSize, const size_t cbAlign = cbCacheLine )
			{
			void* const pv = new BYTE[ cbSize + cbAlign ];
			if ( pv )
				{
				return PvMEMIAlign( pv, cbAlign );
				}
			return NULL;
			}


		//	free memory

		static void MEMFree( void* const pv )
			{
			if ( pv )
				{
				delete [] ((BYTE*)PvMEMIUnalign( pv ));
				}
			}

	private:

		//	never written

		NativeCounter		m_cLoadFactor;			//	preferred number of entries in a bucket at any given time
		NativeCounter		m_centryBucket;			//	maximum number of entries per bucket
		NativeCounter		m_cbBucket;				//	size in bytes of a bucket (rounded up to the nearest full cache-line)
		NativeCounter		m_rankDHTrwlBucket;		//	rank of the reader/writer lock on each bucket
		HOTSTUFF			*m_rghs;				//	array of HOTSTUFF objects (hashed per processor)
		NativeCounter		m_chs;					//	size of HOTSTUFF array
		NativeCounter		m_cbucketMin;			//	minimum number of buckets in the hash-table
#ifdef _WIN64
		BYTE				m_rgbRsvdNever[ 8 ];
#else	//	!_WIN64
		BYTE				m_rgbRsvdNever[ 4 ];
#endif	//	_WIN64

		//	rarely written

		DIRPTRS				m_dirptrs[ 2 ];			//	directory pointers (2 copies)
		BYTE				*m_rgrgBucket[ cbitNativeCounter ];	//	directory (array of arrays of buckets)
		//	no padding necessary

		//	often written

		NativeCounter		m_cOpSensitivity;		//  used to regulate policy changes
		NativeCounter		m_cBucketPreferred;		//	preferred table size
		ENUMSTATE			m_stateCur;				//	current state
#ifdef _WIN64
		BYTE				m_rgbRsvdOften[ 44 ];
#else	//	!_WIN64
		BYTE				m_rgbRsvdOften[ 20 ];
#endif	//	_WIN64

		//	always written (second only to HOTSTUFF members)

		OSSYNC::CSemaphore	m_semPolicy;			//	used to serialize policy changes
		long				m_cCompletions;			//	counts the number of metered-section completions
#ifdef _WIN64
		BYTE				m_rgbRsvdAlways[ 52 ];
#else	//	!_WIN64
		BYTE				m_rgbRsvdAlways[ 24 ];
#endif	//	_WIN64

#ifdef DHT_STATS

		//  performance statistics

		long				m_cBucketOverflowInsert;	//  count of overflow bucket allocations
		long				m_cBucketOverflowDelete;	//  count of overflow bucket deletions
		long				m_cBucketSplit;				//  count of bucket split operations
		long				m_cBucketMerge;				//  count of bucket merge operations
		long				m_cDirSplit;				//  count of directory split operations
		long				m_cDirMerge;				//  count of directory merge operations
		long				m_cTransition;				//  count of state transitions
		long				m_cSelection;				//  count of policy selections
		long				m_cSplitContend;			//  count of split contentions
		long				m_cMergeContend;			//  count of merge contentions
#ifdef _WIN64
		BYTE				m_rgbRsvdPerf[ 24 ];
#else	//	!_WIN64
		BYTE				m_rgbRsvdPerf[ 24 ];
#endif	//	_WIN64

#endif  //  DHT_STATS


#ifdef DEBUG
		BOOL				m_fInit;				//	initialization flag
#endif	//	DEBUG

	};


/////////////////////////////////////////////////////////////////////////////////////
//
//	CDynamicHashTable< CKey, CEntry >
//
/////////////////////////////////////////////////////////////////////////////////////

//  ctor

template< class CKey, class CEntry >
inline CDynamicHashTable< CKey, CEntry >::
CDynamicHashTable( const NativeCounter rankDHTrwlBucket )
	:	m_semPolicy( CSyncBasicInfo( "CDynamicHashTable::m_semPolicy" ) )
	{
#ifdef DEBUG
	m_fInit = fFalse;

	//	zero-out this memory so the debugger won't print garbage

	memset( m_rgbRsvdNever, 0, sizeof( m_rgbRsvdNever ) );
	memset( m_rgbRsvdOften, 0, sizeof( m_rgbRsvdOften ) );
	memset( m_rgbRsvdAlways, 0, sizeof( m_rgbRsvdAlways ) );
#ifdef DHT_STATS
	memset( m_rgbRsvdPerf, 0, sizeof( m_rgbRsvdPerf ) );
#endif  //  DHT_STATS

#endif

	//	we should be on a 32-bit or 64-bit system

#ifdef _WIN64
	DHTAssert( 8 == sizeof( NativeCounter ) );
#else	//	_!WIN64
	DHTAssert( 4 == sizeof( NativeCounter ) );
#endif	//	_WIN64

	//	capture the rank for each bucket

	m_rankDHTrwlBucket = rankDHTrwlBucket;

	//	prepare each semaphore so it can have 1 owner

	m_semPolicy.Release();
	}


//  dtor

template< class CKey, class CEntry >
inline CDynamicHashTable< CKey, CEntry >::
~CDynamicHashTable()
	{
	}


//  initializes the dynamic hash table.  if the table cannot be initialized,
//  errOutOfMemory will be returned

template< class CKey, class CEntry >
inline typename CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
ErrInit(	const double			dblLoadFactor,
			const double			dblUniformity,
			const NativeCounter		cBucketMinimum )
	{
	ERR				err;
	NativeCounter	ihs;

	DHTAssert( !m_fInit );

	//	initialize all data by its cache-line grouping

	//	never written

	m_cLoadFactor			= 0;
	m_centryBucket		 	= 0;
	m_cbBucket				= 0;	
	m_rghs					= NULL;
	m_chs					= OSSYNC::OSSyncGetProcessorCountMax();
	m_cbucketMin			= 0;
	
	//	rarely written

	memset( m_dirptrs, 0, sizeof( m_dirptrs ) );
	memset( m_rgrgBucket, 0, sizeof( m_rgrgBucket ) );

	//  often written

	m_cOpSensitivity		= 0;
	m_cBucketPreferred		= cBucketMinimum;

	//	NOTE: we cannot start in stateFreeze because we must go through the "halfway" completion
	//		  function so that we copy the directory ptrs safely

	m_stateCur				= stateGrow;

	//	always written
	
	m_cCompletions			= 0;

#ifdef DHT_STATS

	//  performance statistics

	m_cBucketOverflowInsert	= 0;
	m_cBucketOverflowDelete	= 0;
	m_cBucketSplit			= 0;
	m_cBucketMerge			= 0;
	m_cDirSplit				= 0;
	m_cDirMerge				= 0;
	m_cTransition			= 0;
	m_cSelection			= 0;
	m_cSplitContend			= 0;
	m_cMergeContend			= 0;

#endif  //  DHT_STATS

	//	allocate the HOTSTUFF array

	m_rghs = (HOTSTUFF*)PvMEMAlloc( m_chs * sizeof( HOTSTUFF ), cbCacheLine );
	if ( !m_rghs )
		{
		err = errOutOfMemory;
		goto HandleError;
		}

	//	construct the HOTSTUFF objects

	for ( ihs = 0; ihs < m_chs; ihs++ )
		{		
		new( m_rghs + ihs ) HOTSTUFF();
		}

	//	initialize the directory

	err = ErrDIRInit( NativeCounter( dblLoadFactor * dblUniformity ), cBucketMinimum );
	if ( err != errSuccess )
		{
		goto HandleError;
		}

#ifdef DEBUG
	m_fInit = fTrue;
#endif	//	DEBUG

	return errSuccess;

HandleError:
	DHTAssert( err != errSuccess );
	Term();
	return err;
	}


//  terminates the dynamic hash table.  this function can be called even if the
//  hash table has never been initialized or is only partially initialized
//
//  NOTE:  any data stored in the table at this time will be lost!

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
Term()
	{
#ifdef DEBUG
	m_fInit = fFalse;
#endif	//	DEBUG

	//	term the directory

	DIRTerm();

	if ( NULL != m_rghs )
		{

		//	delete the HOTSTUFF aray

		while ( m_chs )
			{

			//	destruct the object

			m_chs--;
			m_rghs[ m_chs ].HOTSTUFF::~HOTSTUFF();
			}
		MEMFree( m_rghs );
		m_rghs = NULL;
		}
	}


//  acquires a read lock on the specified key and returns the lock in the
//  provided lock context

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
ReadLockKey( const CKey& key, CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( plock->m_ls == CLock::lsNil );
					
	//	initialize the lock

	plock->m_ls = CLock::lsRead;

	//	enter the state machine
					
	const int iGroup = UiSTEnter( &plock->m_phs );
	const ENUMSTATE esCurrent = EsSTGetState();

	//	read-lock the key through the directory

	DIRReadLockKey( esCurrent, key, plock );

	//	try to seek to the key (sets up currency)

	BKTSeek( plock, key );

	//	leave the state machine

	STLeave( iGroup, plock->m_phs );
	}


//  releases the read lock in the specified lock context

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
ReadUnlockKey( CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTRead( plock ) );
	DHTAssert( plock->m_pBucketHead != NULL );
	DHTAssert( plock->m_pBucketHead->CRWL().FReader() );

	//	unlock the key through the directory

	DIRReadUnlockKey( plock );

	//	reset the lock

	plock->m_ls = CLock::lsNil;
	}


//  acquires a write lock on the specified key and returns the lock in the
//  provided lock context

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
WriteLockKey( const CKey& key, CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( plock->m_ls == CLock::lsNil );

	//	initialize the lock

	plock->m_ls					= CLock::lsWrite;
	plock->m_fInsertOrDelete	= fFalse;

	//	enter the state machine

	const int iGroup = UiSTEnter( &plock->m_phs );
	const ENUMSTATE esCurrent = EsSTGetState();

	//	write-lock the key through the directory

	DIRWriteLockKey( esCurrent, key, plock );

	//	try to seek to the key (sets up currency)

	BKTSeek( plock, key );

	//	leave the state machine

	STLeave( iGroup, plock->m_phs );
	}


//  releases the write lock in the specified lock context

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
WriteUnlockKey( CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTWrite( plock ) );
	DHTAssert( plock->m_pBucketHead != NULL );
	DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );

	//	unlock the key through the directory

	DIRWriteUnlockKey( plock );

	//  we performed an insert or delete while holding the write lock

	if ( plock->m_fInsertOrDelete )
		{
		//	perform amortized maintenance on the table

		MaintainTable( plock->m_phs );
		}

	//	reset the lock
	
	plock->m_ls					= CLock::lsNil;
	plock->m_fInsertOrDelete	= fFalse;
	}


//  retrieves the entry corresponding to the key locked by the specified lock
//  context.  if there is no entry for this key, errEntryNotFound will be
//  returned

template< class CKey, class CEntry >
inline typename CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
ErrRetrieveEntry( CLock* const plock, CEntry* const pentry )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTRead( plock ) || FBKTWrite( plock ) || FBKTScan( plock ) );
	DHTAssert( plock->m_pBucketHead != NULL );
#ifdef DEBUG
	if ( FBKTRead( plock ) )
		{
		DHTAssert( plock->m_pBucketHead->CRWL().FReader() );
		}
	else
		{
		DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
		}
	if ( FBKTRead( plock ) || FBKTWrite( plock ) )
		{
		CKeyEntry *pKeyEntry;
		BKTGetEntry( plock, &pKeyEntry );
		DHTAssert( pKeyEntry ? pKeyEntry->FEntryMatchesKey( plock->m_key ) : fTrue );
		}
#endif

	//	get the entry

	return ErrBKTGetEntry( plock, pentry );
	}


//  replaces the entry corresponding to the key locked by the specified lock
//  context.  the key for the new entry must match the key for the old entry.
//  if there is no entry for this key, errNoCurrentEntry will be returned

template< class CKey, class CEntry >
inline typename CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
ErrReplaceEntry( CLock* const plock, const CEntry& entry )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
	DHTAssert( plock->m_pBucketHead != NULL );
	DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
#ifdef DEBUG
	if ( FBKTWrite( plock ) )
		{
		CKeyEntry *pKeyEntry;
		BKTGetEntry( plock, &pKeyEntry );
		DHTAssert( pKeyEntry ? pKeyEntry->FEntryMatchesKey( plock->m_key ) : fTrue );
		DHTAssert( ((CKeyEntry &)entry).FEntryMatchesKey( plock->m_key ) );
		}
#endif

	//	replace the entry

	return ErrBKTReplaceEntry( plock, entry );
	}


//  inserts a new entry corresponding to the key locked by the specified lock
//  context.  if there is already an entry with this key in the table,
//  errKeyDuplicate will be returned.  if the new entry cannot be inserted,
//  errOutOfMemory will be returned

template< class CKey, class CEntry >
inline typename CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
ErrInsertEntry( CLock* const plock, const CEntry& entry )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTWrite( plock ) );
	DHTAssert( plock->m_pBucketHead != NULL );
	DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
///	DHTAssert( ((CKeyEntry &)entry).FEntryMatchesKey( plock->m_key ) );

	//	insert the entry

	const ERR err = ErrBKTInsertEntry( plock, entry );

	if ( errSuccess == err )
		{

		//	maintain our stats

		STATInsertEntry( plock->m_phs );

		//  we have performed an insert

		plock->m_fInsertOrDelete = fTrue;
		}

	return err;
	}


//  deletes the entry corresponding to the key locked by the specified lock
//  context.  if there is no entry for this key, errNoCurrentEntry will be
//  returned

template< class CKey, class CEntry >
inline typename CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
ErrDeleteEntry( CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTWrite( plock ) || FBKTScan( plock ) );
	DHTAssert( plock->m_pBucketHead != NULL );
	DHTAssert( plock->m_pBucketHead->CRWL().FWriter() );
#ifdef DEBUG
	if ( FBKTWrite( plock ) )
		{
		CKeyEntry *pKeyEntry;
		BKTGetEntry( plock, &pKeyEntry );
		DHTAssert( pKeyEntry ? pKeyEntry->FEntryMatchesKey( plock->m_key ) : fTrue );
		}
#endif

	if ( FBKTScan( plock ) )
		{

		//	prepare the next-entry ptr so we can move-next after the delete
		//	if we are deleting the last entry in the bucket, make this NULL
		//		to force the cursor to move into the next hash bucket

		DHTAssert( plock->m_pBucket != NULL );
		DHTAssert( plock->m_pEntryNext == NULL );
		plock->m_pEntryNext = ( plock->m_pEntry != plock->m_pBucket->m_pEntryLast ) ? plock->m_pEntry : NULL;
		}
		
	//	delete the entry

	const ERR err = ErrBKTDeleteEntry( plock );

	if ( errSuccess == err )
		{

		//	maintain our stats

		STATDeleteEntry( plock->m_phs );

		//  we have performed a delete

		plock->m_fInsertOrDelete = fTrue;
		}

	return err;
	}


//  sets up the specified lock context in preparation for scanning all entries
//  in the hash table by physical storage order (i.e. not by key value order)
//
//  NOTE:  caller MUST terminate scan with EndHashScan to release any outstanding locks

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
BeginHashScan( CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( plock->m_ls == CLock::lsNil );

	//	initialize the lock to start scanning at the first bucket (it may be empty!)

	plock->m_ls					= CLock::lsScan;
	plock->m_fInsertOrDelete	= fFalse;
	plock->m_iBucket			= 0;

	//	enter the state machine

	const int iGroup = UiSTEnter( &plock->m_phs );
	const ENUMSTATE esCurrent = EsSTGetState();

	//	hash to the bucket we want (this may require a retry in grow/shrink mode)

	DHTAssert( plock->m_pBucketHead == NULL );
	plock->m_pBucketHead = PbucketDIRIHash( esCurrent, plock->m_iBucket ); 

	//	acquire the lock as a writer

	plock->m_pBucketHead->CRWL().EnterAsWriter();

	//	NOTE: do not retry the hash function here because bucket 0 will never disappear

	//	leave the state machine

	STLeave( iGroup, plock->m_phs );

	//	set up the currency as before-first

	plock->m_pBucket	= plock->m_pBucketHead;
	plock->m_pEntryPrev	= NULL;
	plock->m_pEntry		= NULL;
	plock->m_pEntryNext	= plock->m_pBucketHead->m_pb != NULL ? &plock->m_pBucketHead->m_rgEntry[0] : NULL;
	}
	

//  sets up the specified lock context in preparation for scanning all entries
//  in the hash table by physical storage order (i.e. not by key value order)
//
//  NOTE:  caller MUST terminate scan with EndHashScan to release any outstanding locks

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
BeginHashScanFromKey( const CKey& key, CLock* const plock )
	{
	NativeCounter	cBucket;
	NativeCounter	cBucketMax;
	NativeCounter	iHash;
	
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( plock->m_ls == CLock::lsNil );

	//	initialize the lock 

	plock->m_ls					= CLock::lsScan;
	plock->m_fInsertOrDelete	= fFalse;

	//	enter the state machine

	const int iGroup = UiSTEnter( &plock->m_phs );
	const ENUMSTATE esCurrent = EsSTGetState();

	//	write-lock the key through the directory

	DIRWriteLockKey( esCurrent, key, plock );

	//	calculate the current bucket configuration
	//
	//	NOTES ON WHY THIS WILL WORK:
	//
	//		cBucket may increase/decrease if we are in grow/shrink mode, but this won't effect the
	//			calculation below unless it grows ahead of OR shrinks behind the bucket at iHash;
	//			since we have the bucket at iHash locked, it cannot grow/shrink
	//		cBucketMax cannot change unless we are in split mode, and even then we will be reading from the
	//			COPY of the cBucketMax -- not the real cBucketMax which is changing

	cBucket = NcDIRIGetBucket( esCurrent );
	cBucketMax = NcDIRIGetBucketMax( esCurrent );
	DHTAssert( cBucketMax != 0 );

	//	calculate the hash value and normalize it within the limits of the current bucket configuration

	iHash = CKeyEntry::Hash( key );
	iHash = iHash & ( ( cBucketMax - 1 ) + cBucketMax );
	if ( iHash >= cBucketMax + cBucket )
		iHash -= cBucketMax;

	//	remember which bucket we locked

	plock->m_iBucket = iHash;

#ifdef DEBUG
	{
	//	verify that we have the correct bucket locked using only iHash

	NativeCounter	iExponent;
	NativeCounter	iRemainder;
	DIRILog2( iHash, &iExponent, &iRemainder );
	const PBUCKET pbucketT = PbucketDIRIResolve( iExponent, iRemainder );
	DHTAssert( pbucketT == plock->m_pBucketHead );
	DHTAssert( pbucketT->CRWL().FWriter() );
	}
#endif	//	DEBUG

	//	leave the state machine

	STLeave( iGroup, plock->m_phs );

	//	set up the currency as before-first

	plock->m_pBucket	= plock->m_pBucketHead;
	plock->m_pEntryPrev	= NULL;
	plock->m_pEntry		= NULL;
	plock->m_pEntryNext	= plock->m_pBucketHead->m_pb != NULL ? &plock->m_pBucketHead->m_rgEntry[0] : NULL;
	}


//  moves the specified lock context to the next entry in the hash table by
//  physical storage order.  if the end of the index is reached,
//  errNoCurrentEntry is returned.

template< class CKey, class CEntry >
inline typename CDynamicHashTable< CKey, CEntry >::ERR CDynamicHashTable< CKey, CEntry >::
ErrMoveNext( CLock* const plock, BOOL* const pfNewBucket )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTScan( plock ) );
	DHTAssert( plock->m_pEntryPrev == NULL );

	//  move to the next entry in this bucket

	if ( plock->m_pEntry )
		{

		//	we are already on an existing entry 
		
		if ( plock->m_pEntry + 1 < PentryBKTNextMost( plock->m_pBucket ) )
			{

			//	we have not reached the end of the current BUCKET
			
			plock->m_pEntry++;
			}
		else
			{

			//	we are at the end of the current BUCKET 

			plock->m_pBucket = PbucketBKTNext( plock->m_pBucket );
			if ( plock->m_pBucket )
				{

				//	we moved to the next BUCKET

				plock->m_pEntry = &plock->m_pBucket->m_rgEntry[0];
				}
			else
				{

				//	there are no more BUCKET structures in this chain
				
				plock->m_pEntry = NULL;
				}
			}
		}
	else
		{

		//	we are not on an entry (before-first or after-last)
		
		plock->m_pEntry = plock->m_pEntryNext;
		}
	plock->m_pEntryNext = NULL;

	if ( plock->m_pEntry != NULL )
		{

		//	we moved to an entry successfully

		DHTAssert( plock->m_pBucket );
		if ( pfNewBucket )
			{
			*pfNewBucket = fFalse;
			}
		return errSuccess;
		}

	//	try to move to the next hash-bucket

	if ( pfNewBucket )
		{
		*pfNewBucket = fTrue;
		}
	return ErrSCANMoveNext( plock );
	}


//  terminates a scan by releasing all outstanding locks and reset the lock context

template< class CKey, class CEntry >
inline void CDynamicHashTable< CKey, CEntry >::
EndHashScan( CLock* const plock )
	{
	DHTAssert( m_fInit );

	//	verify the lock

	DHTAssert( FBKTScan( plock ) );
	DHTAssert( plock->m_pEntryPrev == NULL );

	if ( plock->m_pBucketHead != NULL )
		{

		//	unlock the current bucket

		plock->m_pBucketHead->CRWL().LeaveAsWriter();
		plock->m_pBucketHead = NULL;

		//  we performed an insert or delete while holding the write lock

		if ( plock->m_fInsertOrDelete )
			{
			//	perform amortized maintenance on the table

			MaintainTable( plock->m_phs );
			}
		}

	//	reset the lock
	
	plock->m_ls					= CLock::lsNil;
	plock->m_fInsertOrDelete	= fFalse;
	}


};	//	namespace DHT

using namespace DHT;


#endif	//	__DHT_HXX_INCLUDED