/*++ Copyright (c) 1998-2002 Microsoft Corporation Module Name : locks.cpp Abstract: A collection of locks for multithreaded access to data structures Author: George V. Reilly (GeorgeRe) 06-Jan-1998 Environment: Win32 - User Mode Project: LKRhash Revision History: --*/ #include "precomp.hxx" #define DLL_IMPLEMENTATION #define IMPLEMENTATION_EXPORT #include #include "_locks.h" //------------------------------------------------------------------------ // Not all Win32 platforms support all the functions we want. Set up dummy // thunks and use GetProcAddress to find their addresses at runtime. typedef BOOL (WINAPI * PFN_SWITCH_TO_THREAD)( VOID ); static BOOL WINAPI FakeSwitchToThread( VOID) { return FALSE; } PFN_SWITCH_TO_THREAD g_pfnSwitchToThread = NULL; typedef BOOL (WINAPI * PFN_TRY_ENTER_CRITICAL_SECTION)( IN OUT LPCRITICAL_SECTION lpCriticalSection ); static BOOL WINAPI FakeTryEnterCriticalSection( LPCRITICAL_SECTION /*lpCriticalSection*/) { return FALSE; } PFN_TRY_ENTER_CRITICAL_SECTION g_pfnTryEnterCritSec = NULL; typedef DWORD (WINAPI * PFN_SET_CRITICAL_SECTION_SPIN_COUNT)( LPCRITICAL_SECTION lpCriticalSection, DWORD dwSpinCount ); static DWORD WINAPI FakeSetCriticalSectionSpinCount( LPCRITICAL_SECTION /*lpCriticalSection*/, DWORD /*dwSpinCount*/) { // For faked critical sections, the previous spin count is just ZERO! return 0; } PFN_SET_CRITICAL_SECTION_SPIN_COUNT g_pfnSetCSSpinCount = NULL; DWORD g_cProcessors = 0; BOOL g_fLocksInitialized = FALSE; CSimpleLock g_lckInit; BOOL Locks_Initialize() { if (!g_fLocksInitialized) { g_lckInit.Enter(); if (!g_fLocksInitialized) { // load kernel32 and get NT-specific entry points HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32.dll")); if (hKernel32 != NULL) { g_pfnSwitchToThread = (PFN_SWITCH_TO_THREAD) GetProcAddress(hKernel32, "SwitchToThread"); g_pfnTryEnterCritSec = (PFN_TRY_ENTER_CRITICAL_SECTION) GetProcAddress(hKernel32, "TryEnterCriticalSection"); g_pfnSetCSSpinCount = (PFN_SET_CRITICAL_SECTION_SPIN_COUNT) GetProcAddress(hKernel32, "SetCriticalSectionSpinCount"); } if (g_pfnSwitchToThread == NULL) g_pfnSwitchToThread = FakeSwitchToThread; if (g_pfnTryEnterCritSec == NULL) g_pfnTryEnterCritSec = FakeTryEnterCriticalSection; if (g_pfnSetCSSpinCount == NULL) g_pfnSetCSSpinCount = FakeSetCriticalSectionSpinCount; g_cProcessors = NumProcessors(); Lock_AtomicExchange((LONG*) &g_fLocksInitialized, TRUE); } g_lckInit.Leave(); } return TRUE; } BOOL Locks_Cleanup() { return TRUE; } #ifdef __LOCKS_NAMESPACE__ namespace Locks { #endif // __LOCKS_NAMESPACE__ #define LOCK_DEFAULT_SPIN_DATA(CLASS) \ WORD CLASS::sm_wDefaultSpinCount = LOCK_DEFAULT_SPINS; \ double CLASS::sm_dblDfltSpinAdjFctr = 0.5 #ifdef LOCK_INSTRUMENTATION # define LOCK_STATISTICS_DATA(CLASS) \ LONG CLASS::sm_cTotalLocks = 0; \ LONG CLASS::sm_cContendedLocks = 0; \ LONG CLASS::sm_nSleeps = 0; \ LONGLONG CLASS::sm_cTotalSpins = 0; \ LONG CLASS::sm_nReadLocks = 0; \ LONG CLASS::sm_nWriteLocks = 0 # define LOCK_STATISTICS_DUMMY_IMPLEMENTATION(CLASS) \ CLockStatistics CLASS::Statistics() const \ {return CLockStatistics();} \ CGlobalLockStatistics CLASS::GlobalStatistics() \ {return CGlobalLockStatistics();} \ void CLASS::ResetGlobalStatistics() \ {} # define LOCK_STATISTICS_REAL_IMPLEMENTATION(CLASS) \ \ /* Per-lock statistics */ \ CLockStatistics \ CLASS::Statistics() const \ { \ CLockStatistics ls; \ \ ls.m_nContentions = m_nContentions; \ ls.m_nSleeps = m_nSleeps; \ ls.m_nContentionSpins = m_nContentionSpins; \ if (m_nContentions > 0) \ ls.m_nAverageSpins = m_nContentionSpins / m_nContentions;\ else \ ls.m_nAverageSpins = 0; \ ls.m_nReadLocks = m_nReadLocks; \ ls.m_nWriteLocks = m_nWriteLocks; \ _tcscpy(ls.m_tszName, m_tszName); \ \ return ls; \ } \ \ \ /* Global statistics for CLASS */ \ CGlobalLockStatistics \ CLASS::GlobalStatistics() \ { \ CGlobalLockStatistics gls; \ \ gls.m_cTotalLocks = sm_cTotalLocks; \ gls.m_cContendedLocks = sm_cContendedLocks; \ gls.m_nSleeps = sm_nSleeps; \ gls.m_cTotalSpins = sm_cTotalSpins; \ if (sm_cContendedLocks > 0) \ gls.m_nAverageSpins = static_cast(sm_cTotalSpins / \ sm_cContendedLocks);\ else \ gls.m_nAverageSpins = 0; \ gls.m_nReadLocks = sm_nReadLocks; \ gls.m_nWriteLocks = sm_nWriteLocks; \ \ return gls; \ } \ \ \ /* Reset global statistics for CLASS */ \ void \ CLASS::ResetGlobalStatistics() \ { \ sm_cTotalLocks = 0; \ sm_cContendedLocks = 0; \ sm_nSleeps = 0; \ sm_cTotalSpins = 0; \ sm_nReadLocks = 0; \ sm_nWriteLocks = 0; \ } // Note: we are not using Interlocked operations for the shared // statistical counters. We'll lose perfect accuracy, but we'll // gain by reduced bus synchronization traffic. # define LOCK_INSTRUMENTATION_PROLOG() \ ++sm_cContendedLocks; \ LONG cTotalSpins = 0; \ WORD cSleeps = 0 // Don't need InterlockedIncrement or InterlockedExchangeAdd for // member variables, as the lock is now locked by this thread. # define LOCK_INSTRUMENTATION_EPILOG() \ ++m_nContentions; \ m_nSleeps += cSleeps; \ m_nContentionSpins += cTotalSpins; \ sm_nSleeps += cSleeps; \ sm_cTotalSpins += cTotalSpins #else // !LOCK_INSTRUMENTATION # define LOCK_STATISTICS_DATA(CLASS) # define LOCK_STATISTICS_DUMMY_IMPLEMENTATION(CLASS) # define LOCK_STATISTICS_REAL_IMPLEMENTATION(CLASS) # define LOCK_INSTRUMENTATION_PROLOG() # define LOCK_INSTRUMENTATION_EPILOG() #endif // !LOCK_INSTRUMENTATION //------------------------------------------------------------------------ // Function: RandomBackoffFactor // Synopsis: A fudge factor to help avoid synchronization problems //------------------------------------------------------------------------ double RandomBackoffFactor() { static const double s_aFactors[] = { 1.020, 0.965, 0.890, 1.065, 1.025, 1.115, 0.940, 0.995, 1.050, 1.080, 0.915, 0.980, 1.010, }; const int nFactors = sizeof(s_aFactors) / sizeof(s_aFactors[0]); // Alternatives for nRand include a static counter // or the low DWORD of QueryPerformanceCounter(). DWORD nRand = ::GetCurrentThreadId(); return s_aFactors[nRand % nFactors]; } //------------------------------------------------------------------------ // Function: SwitchOrSleep // Synopsis: If possible, yields the thread with SwitchToThread. If that // doesn't work, calls Sleep. //------------------------------------------------------------------------ void SwitchOrSleep( DWORD dwSleepMSec) { #ifdef LOCKS_SWITCH_TO_THREAD if (!g_pfnSwitchToThread()) #endif Sleep(dwSleepMSec); } // CSmallSpinLock static member variables LOCK_DEFAULT_SPIN_DATA(CSmallSpinLock); #ifdef LOCK_SMALL_SPIN_INSTRUMENTATION LOCK_STATISTICS_DATA(CSmallSpinLock); LOCK_STATISTICS_REAL_IMPLEMENTATION(CSmallSpinLock); #endif // LOCK_SMALL_SPIN_INSTRUMENTATION //------------------------------------------------------------------------ // Function: CSmallSpinLock::_LockSpin // Synopsis: Acquire an exclusive lock. Blocks until acquired. //------------------------------------------------------------------------ void CSmallSpinLock::_LockSpin() { #ifdef LOCK_SMALL_SPIN_INSTRUMENTATION LOCK_INSTRUMENTATION_PROLOG(); #endif // LOCK_SMALL_SPIN_INSTRUMENTATION DWORD dwSleepTime = 0; LONG cBaseSpins = sm_wDefaultSpinCount; LONG cBaseSpins2 = static_cast(cBaseSpins * RandomBackoffFactor()); // This lock cannot be acquired recursively. Attempting to do so will // deadlock this thread forever. Use CSpinLock instead if you need that // kind of lock. if (m_lTid == _CurrentThreadId()) { IRTLASSERT( !"CSmallSpinLock: Illegally attempted to acquire lock recursively"); DebugBreak(); } while (!_TryLock()) { // Only spin on a multiprocessor machine and then only if // spinning is enabled if (g_cProcessors > 1 && cBaseSpins != LOCK_DONT_SPIN) { LONG cSpins = cBaseSpins2; // Check no more than cBaseSpins2 times then yield. // It is important not to use the InterlockedExchange in the // inner loop in order to minimize system memory bus traffic. while (m_lTid != 0) { if (--cSpins < 0) { #ifdef LOCK_SMALL_SPIN_INSTRUMENTATION cTotalSpins += cBaseSpins2; ++cSleeps; #endif // LOCK_SMALL_SPIN_INSTRUMENTATION SwitchOrSleep(dwSleepTime) ; // Backoff algorithm: reduce (or increase) busy wait time cBaseSpins2 = (int) (cBaseSpins2 * sm_dblDfltSpinAdjFctr); // LOCK_MINIMUM_SPINS <= cBaseSpins2 <= LOCK_MAXIMUM_SPINS cBaseSpins2 = min(LOCK_MAXIMUM_SPINS, cBaseSpins2); cBaseSpins2 = max(cBaseSpins2, LOCK_MINIMUM_SPINS); cSpins = cBaseSpins2; // Using Sleep(0) leads to the possibility of priority // inversion. Sleep(0) only yields the processor if // there's another thread of the same priority that's // ready to run. If a high-priority thread is trying to // acquire the lock, which is held by a low-priority // thread, then the low-priority thread may never get // scheduled and hence never free the lock. NT attempts // to avoid priority inversions by temporarily boosting // the priority of low-priority runnable threads, but the // problem can still occur if there's a medium-priority // thread that's always runnable. If Sleep(1) is used, // then the thread unconditionally yields the CPU. We // only do this for the second and subsequent even // iterations, since a millisecond is a long time to wait // if the thread can be scheduled in again sooner // (~100,000 instructions). // Avoid priority inversion: 0, 1, 0, 1,... dwSleepTime = !dwSleepTime; } else { Lock_Yield(); } } // Lock is now available, but we still need to do the // InterlockedExchange to atomically grab it for ourselves. #ifdef LOCK_SMALL_SPIN_INSTRUMENTATION cTotalSpins += cBaseSpins2 - cSpins; #endif // LOCK_SMALL_SPIN_INSTRUMENTATION } // On a 1P machine, busy waiting is a waste of time else { #ifdef LOCK_SMALL_SPIN_INSTRUMENTATION ++cSleeps; #endif // LOCK_SMALL_SPIN_INSTRUMENTATION SwitchOrSleep(dwSleepTime); // Avoid priority inversion: 0, 1, 0, 1,... dwSleepTime = !dwSleepTime; } } #ifdef LOCK_SMALL_SPIN_INSTRUMENTATION LOCK_INSTRUMENTATION_EPILOG(); #endif // LOCK_SMALL_SPIN_INSTRUMENTATION } // CSpinLock static member variables LOCK_DEFAULT_SPIN_DATA(CSpinLock); LOCK_STATISTICS_DATA(CSpinLock); LOCK_STATISTICS_REAL_IMPLEMENTATION(CSpinLock); //------------------------------------------------------------------------ // Function: CSpinLock::_LockSpin // Synopsis: Acquire an exclusive lock. Blocks until acquired. //------------------------------------------------------------------------ void CSpinLock::_LockSpin() { LOCK_INSTRUMENTATION_PROLOG(); DWORD dwSleepTime = 0; bool fAcquiredLock = false; LONG cBaseSpins = sm_wDefaultSpinCount; cBaseSpins = static_cast(cBaseSpins * RandomBackoffFactor()); while (!fAcquiredLock) { // Only spin on a multiprocessor machine and then only if // spinning is enabled if (g_cProcessors > 1 && sm_wDefaultSpinCount != LOCK_DONT_SPIN) { LONG cSpins = cBaseSpins; // Check no more than cBaseSpins times then yield while (m_lTid != 0) { if (--cSpins < 0) { #ifdef LOCK_INSTRUMENTATION cTotalSpins += cBaseSpins; ++cSleeps; #endif // LOCK_INSTRUMENTATION SwitchOrSleep(dwSleepTime) ; // Backoff algorithm: reduce (or increase) busy wait time cBaseSpins = (int) (cBaseSpins * sm_dblDfltSpinAdjFctr); // LOCK_MINIMUM_SPINS <= cBaseSpins <= LOCK_MAXIMUM_SPINS cBaseSpins = min(LOCK_MAXIMUM_SPINS, cBaseSpins); cBaseSpins = max(cBaseSpins, LOCK_MINIMUM_SPINS); cSpins = cBaseSpins; // Avoid priority inversion: 0, 1, 0, 1,... dwSleepTime = !dwSleepTime; } else { Lock_Yield(); } } // Lock is now available, but we still need to atomically // update m_cOwners and m_nThreadId to grab it for ourselves. #ifdef LOCK_INSTRUMENTATION cTotalSpins += cBaseSpins - cSpins; #endif // LOCK_INSTRUMENTATION } // on a 1P machine, busy waiting is a waste of time else { #ifdef LOCK_INSTRUMENTATION ++cSleeps; #endif // LOCK_INSTRUMENTATION SwitchOrSleep(dwSleepTime); // Avoid priority inversion: 0, 1, 0, 1,... dwSleepTime = !dwSleepTime; } // Is the lock unowned? if (_TryLock()) fAcquiredLock = true; // got the lock } IRTLASSERT((m_lTid & OWNER_MASK) > 0 && (m_lTid & THREAD_MASK) == _CurrentThreadId()); LOCK_INSTRUMENTATION_EPILOG(); } // CCritSec static member variables LOCK_DEFAULT_SPIN_DATA(CCritSec); LOCK_STATISTICS_DATA(CCritSec); LOCK_STATISTICS_DUMMY_IMPLEMENTATION(CCritSec); bool CCritSec::TryWriteLock() { IRTLASSERT(g_pfnTryEnterCritSec != NULL); return g_pfnTryEnterCritSec(&m_cs) ? true : false; } //------------------------------------------------------------------------ // Function: CCritSec::SetSpinCount // Synopsis: This function is used to call the appropriate underlying // functions to set the spin count for the supplied critical // section. The original function is supposed to be exported out // of kernel32.dll from NT 4.0 SP3. If the func is not available // from the dll, we will use a fake function. // // Arguments: // lpCriticalSection // Points to the critical section object. // // dwSpinCount // Supplies the spin count for the critical section object. For UP // systems, the spin count is ignored and the critical section spin // count is set to 0. For MP systems, if contention occurs, instead of // waiting on a semaphore associated with the critical section, the // calling thread will spin for spin count iterations before doing the // hard wait. If the critical section becomes free during the spin, a // wait is avoided. // // Returns: // The previous spin count for the critical section is returned. //------------------------------------------------------------------------ DWORD CCritSec::SetSpinCount( LPCRITICAL_SECTION pcs, DWORD dwSpinCount) { IRTLASSERT(g_pfnSetCSSpinCount != NULL); return g_pfnSetCSSpinCount(pcs, dwSpinCount); } // CFakeLock static member variables LOCK_DEFAULT_SPIN_DATA(CFakeLock); LOCK_STATISTICS_DATA(CFakeLock); LOCK_STATISTICS_DUMMY_IMPLEMENTATION(CFakeLock); // CReaderWriterLock static member variables LOCK_DEFAULT_SPIN_DATA(CReaderWriterLock); LOCK_STATISTICS_DATA(CReaderWriterLock); LOCK_STATISTICS_REAL_IMPLEMENTATION(CReaderWriterLock); void CReaderWriterLock::_LockSpin( bool fWrite) { LOCK_INSTRUMENTATION_PROLOG(); DWORD dwSleepTime = 0; LONG cBaseSpins = static_cast(sm_wDefaultSpinCount * RandomBackoffFactor()); LONG cSpins = cBaseSpins; for (;;) { if (g_cProcessors < 2 || sm_wDefaultSpinCount == LOCK_DONT_SPIN) cSpins = 1; // must loop once to call _TryRWLock for (int i = cSpins; --i >= 0; ) { bool fLock = fWrite ? _TryWriteLock() : _TryReadLock(); if (fLock) { #ifdef LOCK_INSTRUMENTATION cTotalSpins += (cSpins - i - 1); #endif // LOCK_INSTRUMENTATION goto locked; } Lock_Yield(); } #ifdef LOCK_INSTRUMENTATION cTotalSpins += cBaseSpins; ++cSleeps; #endif // LOCK_INSTRUMENTATION SwitchOrSleep(dwSleepTime) ; dwSleepTime = !dwSleepTime; // Avoid priority inversion: 0, 1, 0, 1,... // Backoff algorithm: reduce (or increase) busy wait time cBaseSpins = (int) (cBaseSpins * sm_dblDfltSpinAdjFctr); // LOCK_MINIMUM_SPINS <= cBaseSpins <= LOCK_MAXIMUM_SPINS cBaseSpins = min(LOCK_MAXIMUM_SPINS, cBaseSpins); cBaseSpins = max(cBaseSpins, LOCK_MINIMUM_SPINS); cSpins = cBaseSpins; } locked: IRTLASSERT(fWrite ? IsWriteLocked() : IsReadLocked()); LOCK_INSTRUMENTATION_EPILOG(); } // CReaderWriterLock2 static member variables LOCK_DEFAULT_SPIN_DATA(CReaderWriterLock2); LOCK_STATISTICS_DATA(CReaderWriterLock2); LOCK_STATISTICS_REAL_IMPLEMENTATION(CReaderWriterLock2); void CReaderWriterLock2::_WriteLockSpin() { // Add ourselves to the queue of waiting writers for (LONG l = m_lRW; !_CmpExch(l + SL_WRITER_INCR, l); l = m_lRW) { Lock_Yield(); } _LockSpin(true); } void CReaderWriterLock2::_LockSpin( bool fWrite) { LOCK_INSTRUMENTATION_PROLOG(); DWORD dwSleepTime = 0; LONG cBaseSpins = static_cast(sm_wDefaultSpinCount * RandomBackoffFactor()); LONG cSpins = cBaseSpins; for (;;) { if (g_cProcessors < 2 || sm_wDefaultSpinCount == LOCK_DONT_SPIN) cSpins = 1; // must loop once to call _TryRWLock for (int i = cSpins; --i >= 0; ) { bool fLock = fWrite ? _TryWriteLock(0) : _TryReadLock(); if (fLock) { #ifdef LOCK_INSTRUMENTATION cTotalSpins += (cSpins - i - 1); #endif // LOCK_INSTRUMENTATION goto locked; } Lock_Yield(); } #ifdef LOCK_INSTRUMENTATION cTotalSpins += cBaseSpins; ++cSleeps; #endif // LOCK_INSTRUMENTATION SwitchOrSleep(dwSleepTime) ; dwSleepTime = !dwSleepTime; // Avoid priority inversion: 0, 1, 0, 1,... // Backoff algorithm: reduce (or increase) busy wait time cBaseSpins = (int) (cBaseSpins * sm_dblDfltSpinAdjFctr); // LOCK_MINIMUM_SPINS <= cBaseSpins <= LOCK_MAXIMUM_SPINS cBaseSpins = min(LOCK_MAXIMUM_SPINS, cBaseSpins); cBaseSpins = max(cBaseSpins, LOCK_MINIMUM_SPINS); cSpins = cBaseSpins; } locked: IRTLASSERT(fWrite ? IsWriteLocked() : IsReadLocked()); LOCK_INSTRUMENTATION_EPILOG(); } // CReaderWriterLock3 static member variables LOCK_DEFAULT_SPIN_DATA(CReaderWriterLock3); LOCK_STATISTICS_DATA(CReaderWriterLock3); LOCK_STATISTICS_REAL_IMPLEMENTATION(CReaderWriterLock3); void CReaderWriterLock3::_WriteLockSpin() { // Add ourselves to the queue of waiting writers for (LONG l = m_lRW; !_CmpExch(l + SL_WRITER_INCR, l); l = m_lRW) { Lock_Yield(); } _LockSpin(SPIN_WRITE); } void CReaderWriterLock3::_LockSpin( SPIN_TYPE st) { LOCK_INSTRUMENTATION_PROLOG(); DWORD dwSleepTime = 0; LONG cBaseSpins = static_cast(sm_wDefaultSpinCount * RandomBackoffFactor()); LONG cSpins = cBaseSpins; for (;;) { if (g_cProcessors < 2 || sm_wDefaultSpinCount == LOCK_DONT_SPIN) cSpins = 1; // must loop once to call _TryRWLock for (int i = cSpins; --i >= 0; ) { bool fLock; if (st == SPIN_WRITE) fLock = _TryWriteLock(0); else if (st == SPIN_READ) fLock = _TryReadLock(); else { IRTLASSERT(st == SPIN_READ_RECURSIVE); fLock = _TryReadLockRecursive(); } if (fLock) { #ifdef LOCK_INSTRUMENTATION cTotalSpins += (cSpins - i - 1); #endif // LOCK_INSTRUMENTATION goto locked; } Lock_Yield(); } #ifdef LOCK_INSTRUMENTATION cTotalSpins += cBaseSpins; ++cSleeps; #endif // LOCK_INSTRUMENTATION SwitchOrSleep(dwSleepTime) ; dwSleepTime = !dwSleepTime; // Avoid priority inversion: 0, 1, 0, 1,... // Backoff algorithm: reduce (or increase) busy wait time cBaseSpins = (int) (cBaseSpins * sm_dblDfltSpinAdjFctr); // LOCK_MINIMUM_SPINS <= cBaseSpins <= LOCK_MAXIMUM_SPINS cBaseSpins = min(LOCK_MAXIMUM_SPINS, cBaseSpins); cBaseSpins = max(cBaseSpins, LOCK_MINIMUM_SPINS); cSpins = cBaseSpins; } locked: IRTLASSERT((st == SPIN_WRITE) ? IsWriteLocked() : IsReadLocked()); LOCK_INSTRUMENTATION_EPILOG(); } #ifdef __LOCKS_NAMESPACE__ } #endif // __LOCKS_NAMESPACE__