Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

691 lines
18 KiB

  1. /*++
  2. Copyright (C) Microsoft Corporation, 1997 - 1999
  3. Module Name:
  4. bcache.cxx
  5. Abstract:
  6. RPC's buffer cache implementation
  7. Author:
  8. Mario Goertzel [MarioGo]
  9. Revision History:
  10. MarioGo 9/7/1997 Bits 'n pieces
  11. KamenM 5/15/2001 Rewrite the paged bcache implementation
  12. GrigoriK 6/2002 Rewrite bcache to work with read-only page heap
  13. eliminating paged bcache infrastructure.
  14. --*/
  15. #include <precomp.hxx>
  16. ////////////////////////////////////////////////////////////
  17. // (Internal) Perf counters
  18. //#define BUFFER_CACHE_STATS
  19. #ifdef BUFFER_CACHE_STATS
  20. LONG cAllocs = 0;
  21. LONG cFrees = 0;
  22. LONG cAllocsMissed = 0;
  23. LONG cFreesBack = 0;
  24. #define INC_STAT(x) InterlockedIncrement(&x)
  25. #else
  26. #define INC_STAT(x)
  27. #endif
  28. ////////////////////////////////////////////////////////////
  29. typedef BCACHE_STATE *PBCTLS;
  30. ////////////////////////////////////////////////////////////
  31. // Default hints
  32. CONST BUFFER_CACHE_HINTS gCacheHints[4] =
  33. {
  34. // 64 bits and WOW6432 use larger message size
  35. #if defined(_WIN64) || defined(USE_LPC6432)
  36. {1, 4, 512}, // LRPC message size and small calls
  37. #else
  38. {1, 4, 256}, // LRPC message size and small calls
  39. #endif
  40. {1, 3, 1024}, // Default CO receive size
  41. {1, 3, 4096+44}, // Default UDP receive size
  42. {1, 3, 5840} // Maximum CO fragment size
  43. };
  44. BUFFER_CACHE_HINTS *pHints = (BUFFER_CACHE_HINTS *)gCacheHints;
  45. BCacheMode gBCacheMode = BCacheModeCached;
  46. BCACHE *gBufferCache;
  47. BCACHE::BCACHE( OUT RPC_STATUS &status)
  48. // The default process heap lock spin count. This lock is held only
  49. // for a very short time while pushing/poping into a singly linked list.
  50. // PERF: move to a user-mode slist implementation if available.
  51. : _csBufferCacheLock(&status, TRUE, 4000)
  52. {
  53. DWORD Type;
  54. DWORD DataSize;
  55. DWORD Value;
  56. if (status != RPC_S_OK)
  57. return;
  58. // Compute the per cache size default buffer cache cap.
  59. // This only matters for the default mode.
  60. UINT cCapBytes = 20 * 1024; // Start at 20KB for UP workstations.
  61. if (gfServerPlatform) cCapBytes *= 2; // *2 for servers
  62. if (gNumberOfProcessors > 1) cCapBytes *= 2; // *2 for MP boxes
  63. for (int i = 0; i < 4; i++)
  64. {
  65. _bcGlobalState[i].cBlocks= 0;
  66. _bcGlobalState[i].pList = 0;
  67. if (gBCacheMode == BCacheModeDirect)
  68. {
  69. _bcGlobalStats[i].cBufferCacheCap = 0;
  70. _bcGlobalStats[i].cAllocationHits = 0;
  71. _bcGlobalStats[i].cAllocationMisses = 0;
  72. }
  73. else
  74. {
  75. _bcGlobalStats[i].cBufferCacheCap = cCapBytes / pHints[i].cSize;
  76. // We keeps stats on process wide cache hits and misses from the
  77. // cache. We initially give credit for 2x allocations required
  78. // to load the cache. Any adjustments to the cap, up only, occur
  79. // in ::FreeHelper.
  80. _bcGlobalStats[i].cAllocationHits = _bcGlobalStats[i].cBufferCacheCap * 2*8;
  81. _bcGlobalStats[i].cAllocationMisses = 0;
  82. }
  83. }
  84. return;
  85. }
  86. BCACHE::~BCACHE()
  87. {
  88. // There should be only one BCACHE object that lives forever.
  89. // This destructor will be called iff _csBufferCacheLock could
  90. // not be initialized in the constructor. We do not need to do
  91. // anything since the body of constructor did not execute.
  92. }
  93. PVOID
  94. BCACHE::Allocate(CONST size_t cSize)
  95. {
  96. PBUFFER pBuffer;
  97. int index;
  98. INC_STAT(cAllocs);
  99. // In direct bcache mode try to allocate from heap. We favor
  100. // full release over speed in order to catch the offenders
  101. // who touch memory after releasing it.
  102. if (gBCacheMode == BCacheModeDirect)
  103. {
  104. return(AllocHelper(cSize,
  105. -1, // Index
  106. 0 // per thread cache
  107. ));
  108. }
  109. // Find the right bucket, if any. Binary search.
  110. if (cSize <= pHints[1].cSize)
  111. {
  112. if (cSize <= pHints[0].cSize)
  113. {
  114. index = 0;
  115. }
  116. else
  117. {
  118. index = 1;
  119. }
  120. }
  121. else
  122. {
  123. if (cSize <= pHints[2].cSize)
  124. {
  125. index = 2;
  126. }
  127. else
  128. {
  129. if (cSize <= pHints[3].cSize)
  130. {
  131. index = 3;
  132. }
  133. else
  134. {
  135. return(AllocHelper(cSize,
  136. -1, // Index
  137. 0 // per thread cache
  138. ));
  139. }
  140. }
  141. }
  142. // Try the per-thread cache, this is the 90% case
  143. THREAD *pThread = RpcpGetThreadPointer();
  144. ASSERT(pThread);
  145. PBCTLS pbctls = pThread->BufferCache;
  146. if (pbctls[index].pList)
  147. {
  148. // we shouldn't have anything in the thread cache in paged bcache mode
  149. ASSERT(gBCacheMode == BCacheModeCached);
  150. ASSERT(pbctls[index].cBlocks);
  151. pBuffer = pbctls[index].pList;
  152. pbctls[index].pList = pBuffer->pNext;
  153. pbctls[index].cBlocks--;
  154. pBuffer->index = index + 1;
  155. LogEvent(SU_BCACHE, EV_BUFFER_OUT, pBuffer, 0, index, 1, 2);
  156. return((PVOID)(pBuffer + 1));
  157. }
  158. // This is the 10% case
  159. INC_STAT(cAllocsMissed);
  160. return(AllocHelper(cSize, index, pbctls));
  161. }
  162. PVOID
  163. BCACHE::AllocHelper(
  164. IN size_t cSize,
  165. IN INT index,
  166. PBCTLS pbctls
  167. )
  168. /*++
  169. Routine Description:
  170. Called by BCACHE::Alloc on either large buffers (index == -1)
  171. or when the per-thread cache is empty.
  172. Arguments:
  173. cSize - Size of the block to allocate.
  174. index - The bucket index for this size of block
  175. pbctls - The per-thread cache, NULL iff index == -1.
  176. Return Value:
  177. 0 - out of memory
  178. non-zero - A pointer to a block at least 'cSize' bytes long. The returned
  179. pointer is to the user portion of the block.
  180. --*/
  181. {
  182. PBUFFER pBuffer = NULL;
  183. LIST_ENTRY *CurrentListEntry;
  184. BOOL fFoundUncommittedSegment;
  185. ULONG TargetSegmentSize;
  186. PVOID SegmentStartAddress;
  187. PVOID pTemp;
  188. BOOL Result;
  189. // Large buffers are a special case. Go dirrectly to the heap.
  190. if (index == -1)
  191. {
  192. pBuffer = AllocBlock(cSize);
  193. if (pBuffer)
  194. {
  195. LogEvent(SU_BCACHE, EV_BUFFER_OUT, pBuffer, 0, index, 1, 2);
  196. return((PVOID(pBuffer + 1)));
  197. }
  198. LogEvent(SU_BCACHE, EV_BUFFER_FAIL, 0, 0, index, 1);
  199. return(0);
  200. }
  201. // Try to allocate a process cached buffer
  202. // loop to avoid taking the mutex in the empty list case.
  203. // This allows us to opportunistically take it in the
  204. // non-empty list case only.
  205. do
  206. {
  207. if (0 == _bcGlobalState[index].pList)
  208. {
  209. // Looks like there are no global buffer available, allocate
  210. // a new buffer.
  211. ASSERT(IsBufferSizeAligned(sizeof(BUFFER_HEAD)));
  212. cSize = pHints[index].cSize + sizeof(BUFFER_HEAD);
  213. pBuffer = (PBUFFER) new BYTE[cSize];
  214. if (!pBuffer)
  215. {
  216. LogEvent(SU_BCACHE, EV_BUFFER_FAIL, 0, 0, index, 1);
  217. return(0);
  218. }
  219. _bcGlobalStats[index].cAllocationMisses++;
  220. break;
  221. }
  222. _csBufferCacheLock.Request();
  223. if (_bcGlobalState[index].pList)
  224. {
  225. ASSERT(_bcGlobalState[index].cBlocks);
  226. pBuffer = _bcGlobalState[index].pList;
  227. _bcGlobalState[index].cBlocks--;
  228. _bcGlobalStats[index].cAllocationHits++;
  229. ASSERT(pbctls[index].pList == NULL);
  230. ASSERT(pbctls[index].cBlocks == 0);
  231. PBUFFER pkeep = pBuffer;
  232. UINT cBlocksMoved = 0;
  233. while (pkeep->pNext && cBlocksMoved < pHints[index].cLowWatermark)
  234. {
  235. pkeep = pkeep->pNext;
  236. cBlocksMoved++;
  237. }
  238. pbctls[index].cBlocks = cBlocksMoved;
  239. _bcGlobalState[index].cBlocks -= cBlocksMoved;
  240. _bcGlobalStats[index].cAllocationHits += cBlocksMoved;
  241. // Now we have the head of the list to move to this
  242. // thread (pBuffer->pNext) and the tail (pkeep).
  243. // Block counts in the global state and thread state have
  244. // already been updated.
  245. pbctls[index].pList = pBuffer->pNext;
  246. ASSERT(pkeep->pNext || _bcGlobalState[index].cBlocks == 0);
  247. _bcGlobalState[index].pList = pkeep->pNext;
  248. // Break the link (if any) between the new per thread list
  249. // and the blocks which will remain in the process list.
  250. pkeep->pNext = NULL;
  251. }
  252. _csBufferCacheLock.Clear();
  253. }
  254. while (NULL == pBuffer );
  255. ASSERT(pBuffer);
  256. ASSERT(IsBufferAligned(pBuffer));
  257. pBuffer->index = index + 1;
  258. LogEvent(SU_BCACHE, EV_BUFFER_OUT, pBuffer, 0, index, 1, 2);
  259. return((PVOID(pBuffer + 1)));
  260. }
  261. VOID
  262. BCACHE::Free(PVOID p)
  263. /*++
  264. Routine Description:
  265. The fast (common) free path. For large blocks it just deletes them. For
  266. small blocks that are inserted into the thread cache. If the thread
  267. cache is too large it calls FreeHelper().
  268. Arguments:
  269. p - The pointer to free.
  270. Return Value:
  271. None
  272. --*/
  273. {
  274. PBUFFER pBuffer = ((PBUFFER )p - 1);
  275. INT index;
  276. ASSERT(((pBuffer->index >= 1) && (pBuffer->index <= 4)) || (pBuffer->index == -1));
  277. index = pBuffer->index - 1;
  278. LogEvent(SU_BCACHE, EV_BUFFER_IN, pBuffer, 0, index, 1, 1);
  279. INC_STAT(cFrees);
  280. if (index >= 0)
  281. {
  282. // Free to thread cache
  283. THREAD *pThread = RpcpGetThreadPointer();
  284. if (NULL == pThread)
  285. {
  286. // No thread cache available - free to process cache.
  287. FreeBuffers(pBuffer, index, 1);
  288. return;
  289. }
  290. PBCTLS pbctls = pThread->BufferCache;
  291. pBuffer->pNext = pbctls[index].pList;
  292. pbctls[index].pList = pBuffer;
  293. pbctls[index].cBlocks++;
  294. if (pbctls[index].cBlocks >= pHints[index].cHighWatermark)
  295. {
  296. // 10% case - Too many blocks in the thread cache, free to process cache
  297. FreeHelper(p, index, pbctls);
  298. }
  299. }
  300. else
  301. {
  302. FreeBlock(pBuffer);
  303. }
  304. return;
  305. }
  306. VOID
  307. BCACHE::FreeHelper(PVOID p, INT index, PBCTLS pbctls)
  308. /*++
  309. Routine Description:
  310. Called only by Free(). Separate code to avoid unneeded saves/
  311. restores in the Free() function. Called when too many
  312. blocks are in a thread cache bucket.
  313. Arguments:
  314. p - The pointer being freed, used if pbctls is NULL
  315. index - The bucket index of this block
  316. pbctls - A pointer to the thread cache structure. If
  317. NULL the this thread has no cache (yet) p should
  318. be directly freed.
  319. Return Value:
  320. None
  321. --*/
  322. {
  323. ASSERT(pbctls[index].cBlocks == pHints[index].cHighWatermark);
  324. INC_STAT(cFreesBack);
  325. // First, build the list to free from the TLS cache
  326. // Note: We free the buffers at the *end* of the per thread cache. This helps
  327. // keep a set of buffers near this thread and (with luck) associated processor.
  328. PBUFFER ptail = pbctls[index].pList;
  329. // pbctls[index].pList contains the new keep list. (aka pBuffer)
  330. // ptail is the pointer to the *end* of the keep list.
  331. // ptail->pNext will be the head of the list to free.
  332. // One element already in keep list.
  333. ASSERT(pHints[index].cLowWatermark >= 1);
  334. for (unsigned i = 1; i < pHints[index].cLowWatermark; i++)
  335. {
  336. ptail = ptail->pNext; // Move up in the free list
  337. ASSERT(ptail);
  338. }
  339. // Save the list to free and break the link between keep list and free list.
  340. PBUFFER pfree = ptail->pNext;
  341. ptail->pNext = NULL;
  342. // Thread cache now contains on low watermark elements.
  343. pbctls[index].cBlocks = pHints[index].cLowWatermark;
  344. // Now we need to free the extra buffers to the process cache
  345. FreeBuffers(pfree, index, pHints[index].cHighWatermark - pHints[index].cLowWatermark);
  346. return;
  347. }
  348. VOID
  349. BCACHE::FreeBuffers(PBUFFER pBuffers, INT index, UINT cBuffers)
  350. /*++
  351. Routine Description:
  352. Frees a set of buffers to the global (process) cache. Maybe called when a
  353. thread has exceeded the number of buffers is wants to cache or when a
  354. thread doesn't have a thread cache but we still need to free a buffer.
  355. Arguments:
  356. pBuffers - A linked list of buffers which need to be freed.
  357. cBuffers - A count of the buffers to be freed.
  358. Return Value:
  359. None
  360. --*/
  361. {
  362. PBUFFER pfree = pBuffers;
  363. BOOL Result;
  364. PVOID Allocation;
  365. // Special case for the freeing without a TLS blob. We're freeing just
  366. // one buffer but it's next pointer may not be NULL.
  367. if (cBuffers == 1)
  368. {
  369. pfree->pNext = 0;
  370. }
  371. // Find the end of the to free list
  372. PBUFFER ptail = pfree;
  373. while(ptail->pNext)
  374. {
  375. ptail = ptail->pNext;
  376. }
  377. // We have a set of cBuffers buffers starting with pfree and ending with
  378. // ptail that need to move into the process wide cache now.
  379. _csBufferCacheLock.Request();
  380. // If we have too many free buffers or the cache is off we'll throw away these extra buffers.
  381. if ((_bcGlobalState[index].cBlocks >= _bcGlobalStats[index].cBufferCacheCap)
  382. || (gBCacheMode == BCacheModeDirect))
  383. {
  384. // It looks like we have too many buffers or the cache is off. We can either increase the buffer
  385. // cache cap or really free the buffers.
  386. if ((_bcGlobalStats[index].cAllocationHits > _bcGlobalStats[index].cAllocationMisses * 8)
  387. || (gBCacheMode == BCacheModeDirect))
  388. {
  389. // Cache hit rate looks good or we don't want cache, we're going to really free the buffers.
  390. // Don't hold the lock while actually freeing to the heap.
  391. _csBufferCacheLock.Clear();
  392. PBUFFER psave;
  393. while(pfree)
  394. {
  395. psave = pfree->pNext;
  396. delete pfree;
  397. pfree = psave;
  398. }
  399. return;
  400. }
  401. // Hit rate looks BAD. Time to bump up the buffer cache cap.
  402. UINT cNewCap = _bcGlobalStats[index].cBufferCacheCap;
  403. cNewCap = min(cNewCap + 32, cNewCap * 2);
  404. _bcGlobalStats[index].cBufferCacheCap = cNewCap;
  405. // Start keeping new stats, start with a balanced ratio of hits to misses.
  406. // We'll get at least (cBlocks + cfree) more hits before the next new miss.
  407. _bcGlobalStats[index].cAllocationHits = 8 * cNewCap;
  408. _bcGlobalStats[index].cAllocationMisses = 0;
  409. // Drop into regular free path, we're going to keep these buffers.
  410. }
  411. _csBufferCacheLock.VerifyOwned();
  412. ptail->pNext = _bcGlobalState[index].pList;
  413. _bcGlobalState[index].pList = pfree;
  414. _bcGlobalState[index].cBlocks += cBuffers;
  415. _csBufferCacheLock.Clear();
  416. return;
  417. }
  418. void
  419. BCACHE::ThreadDetach(THREAD *pThread)
  420. /*++
  421. Routine Description:
  422. Called when a thread dies. Moves any cached buffes into
  423. the process wide cache.
  424. Arguments:
  425. pThread - The thread object of the thread which is dying.
  426. Return Value:
  427. None
  428. --*/
  429. {
  430. PBCTLS pbctls = pThread->BufferCache;
  431. INT index;
  432. // CacheLevelOff mode has no thread cache.
  433. if (gBCacheMode == BCacheModeDirect)
  434. {
  435. ASSERT(pbctls[0].pList == 0);
  436. ASSERT(pbctls[1].pList == 0);
  437. ASSERT(pbctls[2].pList == 0);
  438. ASSERT(pbctls[3].pList == 0);
  439. }
  440. for (index = 0; index < 4; index++)
  441. {
  442. if (pbctls[index].pList)
  443. {
  444. ASSERT(pbctls[index].cBlocks);
  445. FreeBuffers(pbctls[index].pList, index, pbctls[index].cBlocks);
  446. pbctls[index].pList = 0;
  447. pbctls[index].cBlocks = 0;
  448. }
  449. ASSERT(pbctls[index].pList == 0);
  450. ASSERT(pbctls[index].cBlocks == 0);
  451. }
  452. }
  453. PBUFFER
  454. BCACHE::AllocBlock(
  455. IN size_t cBytes
  456. )
  457. /*++
  458. Routine Description:
  459. Allocates a buffer directly from the RPC heap.
  460. In page heap mode allocates buffers from read-only RPC page heap.
  461. Notes:
  462. Designed with 4Kb and 8Kb pages in mind. Assumes address space
  463. is allocated 64Kb at a time.
  464. Arguments:
  465. cBytes - The size of allocation needed.
  466. Return Value:
  467. null - out of Vm
  468. non-null - a pointer to a buffer of cBytes rounded up to page size.
  469. --*/
  470. {
  471. PBUFFER p;
  472. size_t BytesToAllocate;
  473. PVOID pT;
  474. ASSERT(IsBufferSizeAligned(sizeof(BUFFER_HEAD)));
  475. BytesToAllocate = cBytes + sizeof(BUFFER_HEAD);
  476. p = (PBUFFER) new BYTE[BytesToAllocate];
  477. if (p)
  478. {
  479. p->index = -1;
  480. p->size = BytesToAllocate;
  481. }
  482. return (p);
  483. }
  484. VOID
  485. BCACHE::FreeBlock(
  486. IN PBUFFER pBuffer
  487. )
  488. /*++
  489. Routine Description:
  490. Frees a buffer allocated by AllocBlock
  491. Arguments:
  492. ptr - The buffer to free
  493. Return Value:
  494. None
  495. --*/
  496. {
  497. delete [] pBuffer;
  498. return;
  499. }