Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2542 lines
67 KiB

  1. /*++
  2. Copyright (c) 1990 Microsoft Corporation
  3. Module Name:
  4. wsmanage.c
  5. Abstract:
  6. This module contains routines which manage the set of active working
  7. set lists.
  8. Working set management is accomplished by a parallel group of actions
  9. 1. Writing modified pages.
  10. 2. Trimming working sets by :
  11. a) Aging pages by turning off access bits and incrementing age
  12. counts for pages which haven't been accessed.
  13. b) Estimating the number of unused pages in a working set and
  14. keeping a global count of that estimate.
  15. c) When getting tight on memory, replacing rather than adding
  16. pages in a working set when a fault occurs in a working set
  17. that has a significant proportion of unused pages.
  18. d) When memory is tight, reducing (trimming) working sets which
  19. are above their maximum towards their minimum. This is done
  20. especially if there are a large number of available pages
  21. in it.
  22. The metrics are set such that writing modified pages is typically
  23. accomplished before trimming working sets, however, under certain cases
  24. where modified pages are being generated at a very high rate, working
  25. set trimming will be initiated to free up more pages.
  26. Once a process has had its working set raised above the minimum
  27. specified, the process is put on the Working Set Expanded list and
  28. is now eligible for trimming. Note that at this time the FLINK field
  29. in the WorkingSetExpansionLink has an address value.
  30. Author:
  31. Lou Perazzoli (loup) 10-Apr-1990
  32. Landy Wang (landyw) 02-Jun-1997
  33. Revision History:
  34. --*/
  35. #include "mi.h"
  36. #ifdef ALLOC_PRAGMA
  37. #pragma alloc_text(INIT, MiAdjustWorkingSetManagerParameters)
  38. #pragma alloc_text(PAGE, MmIsMemoryAvailable)
  39. #endif
  40. KEVENT MiWaitForEmptyEvent;
  41. BOOLEAN MiWaitingForWorkingSetEmpty;
  42. LOGICAL MiReplacing = FALSE;
  43. extern ULONG MmStandbyRePurposed;
  44. ULONG MiLastStandbyRePurposed;
  45. extern ULONG MiActiveVerifies;
  46. PFN_NUMBER MmPlentyFreePages = 400;
  47. PFN_NUMBER MmPlentyFreePagesValue;
  48. #define MI_MAXIMUM_AGING_SHIFT 7
  49. ULONG MiAgingShift = 4;
  50. ULONG MiEstimationShift = 5;
  51. PFN_NUMBER MmTotalClaim = 0;
  52. PFN_NUMBER MmTotalEstimatedAvailable = 0;
  53. LARGE_INTEGER MiLastAdjustmentOfClaimParams;
  54. //
  55. // Sixty seconds.
  56. //
  57. const LARGE_INTEGER MmClaimParameterAdjustUpTime = {60 * 1000 * 1000 * 10, 0};
  58. //
  59. // 2 seconds.
  60. //
  61. const LARGE_INTEGER MmClaimParameterAdjustDownTime = {2 * 1000 * 1000 * 10, 0};
  62. LOGICAL MiHardTrim = FALSE;
  63. WSLE_NUMBER MiMaximumWslesPerSweep = (1024 * 1024 * 1024) / PAGE_SIZE;
  64. #define MI_MAXIMUM_SAMPLE 8192
  65. #define MI_MINIMUM_SAMPLE 64
  66. #define MI_MINIMUM_SAMPLE_SHIFT 7
  67. #if DBG
  68. PETHREAD MmWorkingSetThread;
  69. #endif
  70. //
  71. // Number of times to retry when the target working set's mutex is not
  72. // readily available.
  73. //
  74. ULONG MiWsRetryCount = 5;
  75. typedef struct _MMWS_TRIM_CRITERIA {
  76. UCHAR NumPasses;
  77. UCHAR TrimAge;
  78. UCHAR DoAging;
  79. UCHAR TrimAllPasses;
  80. PFN_NUMBER DesiredFreeGoal;
  81. PFN_NUMBER NewTotalClaim;
  82. PFN_NUMBER NewTotalEstimatedAvailable;
  83. } MMWS_TRIM_CRITERIA, *PMMWS_TRIM_CRITERIA;
  84. LOGICAL
  85. MiCheckAndSetSystemTrimCriteria (
  86. IN OUT PMMWS_TRIM_CRITERIA Criteria
  87. );
  88. LOGICAL
  89. MiCheckSystemTrimEndCriteria (
  90. IN OUT PMMWS_TRIM_CRITERIA Criteria,
  91. IN KIRQL OldIrql
  92. );
  93. WSLE_NUMBER
  94. MiDetermineWsTrimAmount (
  95. IN PMMWS_TRIM_CRITERIA Criteria,
  96. IN PMMSUPPORT VmSupport
  97. );
  98. VOID
  99. MiAgePagesAndEstimateClaims (
  100. LOGICAL EmptyIt
  101. );
  102. VOID
  103. MiAdjustClaimParameters (
  104. IN LOGICAL EnoughPages
  105. );
  106. VOID
  107. MiRearrangeWorkingSetExpansionList (
  108. VOID
  109. );
  110. VOID
  111. MiAdjustWorkingSetManagerParameters (
  112. IN LOGICAL WorkStation
  113. )
  114. /*++
  115. Routine Description:
  116. This function is called from MmInitSystem to adjust the working set manager
  117. trim algorithms based on system type and size.
  118. Arguments:
  119. WorkStation - TRUE if this is a workstation, FALSE if not.
  120. Return Value:
  121. None.
  122. Environment:
  123. Kernel mode, INIT time only.
  124. --*/
  125. {
  126. if (WorkStation && MmNumberOfPhysicalPages <= 257*1024*1024/PAGE_SIZE) {
  127. MiAgingShift = 3;
  128. MiEstimationShift = 4;
  129. }
  130. else {
  131. MiAgingShift = 5;
  132. MiEstimationShift = 6;
  133. }
  134. if (MmNumberOfPhysicalPages >= 63*1024*1024/PAGE_SIZE) {
  135. MmPlentyFreePages *= 2;
  136. }
  137. MmPlentyFreePagesValue = MmPlentyFreePages;
  138. MiWaitingForWorkingSetEmpty = FALSE;
  139. KeInitializeEvent (&MiWaitForEmptyEvent, NotificationEvent, TRUE);
  140. }
  141. VOID
  142. MiObtainFreePages (
  143. VOID
  144. )
  145. /*++
  146. Routine Description:
  147. This function examines the size of the modified list and the
  148. total number of pages in use because of working set increments
  149. and obtains pages by writing modified pages and/or reducing
  150. working sets.
  151. Arguments:
  152. None.
  153. Return Value:
  154. None.
  155. Environment:
  156. Kernel mode, APCs disabled, working set and PFN mutexes held.
  157. --*/
  158. {
  159. //
  160. // Check to see if there are enough modified pages to institute a
  161. // write.
  162. //
  163. if (MmModifiedPageListHead.Total >= MmModifiedWriteClusterSize) {
  164. //
  165. // Start the modified page writer.
  166. //
  167. KeSetEvent (&MmModifiedPageWriterEvent, 0, FALSE);
  168. }
  169. //
  170. // See if there are enough working sets above the minimum
  171. // threshold to make working set trimming worthwhile.
  172. //
  173. if ((MmPagesAboveWsMinimum > MmPagesAboveWsThreshold) ||
  174. (MmAvailablePages < 5)) {
  175. //
  176. // Start the working set manager to reduce working sets.
  177. //
  178. KeSetEvent (&MmWorkingSetManagerEvent, 0, FALSE);
  179. }
  180. }
  181. LOGICAL
  182. MmIsMemoryAvailable (
  183. IN PFN_NUMBER PagesDesired
  184. )
  185. /*++
  186. Routine Description:
  187. This function checks whether there are sufficient available pages based
  188. on the caller's request. If currently active pages are needed to satisfy
  189. this request and non-useful ones can be taken, then trimming is initiated
  190. here to do so.
  191. Arguments:
  192. PagesRequested - Supplies the number of pages desired.
  193. Return Value:
  194. TRUE if sufficient pages exist to satisfy the request.
  195. FALSE if not.
  196. Environment:
  197. Kernel mode, PASSIVE_LEVEL.
  198. --*/
  199. {
  200. LOGICAL Status;
  201. PFN_NUMBER PageTarget;
  202. PFN_NUMBER PagePlentyTarget;
  203. ULONG i;
  204. PFN_NUMBER CurrentAvailablePages;
  205. PFN_NUMBER CurrentTotalClaim;
  206. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  207. CurrentAvailablePages = MmAvailablePages;
  208. //
  209. // If twice the pages that the caller asked for are available
  210. // without trimming anything, return TRUE.
  211. //
  212. PageTarget = PagesDesired * 2;
  213. if (CurrentAvailablePages >= PageTarget) {
  214. return TRUE;
  215. }
  216. CurrentTotalClaim = MmTotalClaim;
  217. //
  218. // If there are few pages available or claimable, we adjust to do
  219. // a hard trim.
  220. //
  221. if (CurrentAvailablePages + CurrentTotalClaim < PagesDesired) {
  222. MiHardTrim = TRUE;
  223. }
  224. //
  225. // Active pages must be trimmed to satisfy this request and it is believed
  226. // that non-useful pages can be taken to accomplish this.
  227. //
  228. // Set the PagePlentyTarget to 125% of the readlist size and kick it off.
  229. // Our actual trim goal will be 150% of the PagePlentyTarget.
  230. //
  231. PagePlentyTarget = PagesDesired + (PagesDesired >> 2);
  232. MmPlentyFreePages = PagePlentyTarget;
  233. KeSetEvent (&MmWorkingSetManagerEvent, 0, FALSE);
  234. Status = FALSE;
  235. for (i = 0; i < 10; i += 1) {
  236. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&Mm30Milliseconds);
  237. if (MmAvailablePages >= PagesDesired) {
  238. Status = TRUE;
  239. break;
  240. }
  241. }
  242. MmPlentyFreePages = MmPlentyFreePagesValue;
  243. MiHardTrim = FALSE;
  244. return Status;
  245. }
  246. LOGICAL
  247. MiAttachAndLockWorkingSet (
  248. IN PMMSUPPORT VmSupport
  249. )
  250. /*++
  251. Routine Description:
  252. This function attaches to the proper address space and acquires the
  253. relevant working set mutex for the address space being trimmed.
  254. If successful, this routine returns with APCs blocked as well.
  255. On failure, this routine returns without any APCs blocked, no working
  256. set mutex acquired and no address space attached to.
  257. Arguments:
  258. VmSupport - Supplies the working set to attach to and lock.
  259. Return Value:
  260. TRUE if successful, FALSE if not.
  261. Environment:
  262. Kernel mode, PASSIVE_LEVEL.
  263. --*/
  264. {
  265. ULONG count;
  266. KIRQL OldIrql;
  267. PEPROCESS ProcessToTrim;
  268. LOGICAL Attached;
  269. PMM_SESSION_SPACE SessionSpace;
  270. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  271. if (VmSupport == &MmSystemCacheWs) {
  272. ASSERT (VmSupport->Flags.SessionSpace == 0);
  273. ASSERT (VmSupport->Flags.TrimHard == 0);
  274. //
  275. // System cache,
  276. //
  277. if (KeTryToAcquireGuardedMutex (&VmSupport->WorkingSetMutex) == FALSE) {
  278. //
  279. // System working set mutex was not granted, don't trim
  280. // the system cache.
  281. //
  282. return FALSE;
  283. }
  284. MM_SYSTEM_WS_LOCK_TIMESTAMP ();
  285. return TRUE;
  286. }
  287. if (VmSupport->Flags.SessionSpace == 0) {
  288. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  289. ASSERT ((ProcessToTrim->Flags & PS_PROCESS_FLAGS_VM_DELETED) == 0);
  290. //
  291. // Attach to the process in preparation for trimming.
  292. //
  293. Attached = 0;
  294. if (ProcessToTrim != PsInitialSystemProcess) {
  295. Attached = KeForceAttachProcess (&ProcessToTrim->Pcb);
  296. if (Attached == 0) {
  297. return FALSE;
  298. }
  299. if (ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAP_ENABLED) {
  300. //
  301. // We have effectively performed an inswap of the process
  302. // due to the force attach. Mark the process (and session)
  303. // accordingly.
  304. //
  305. ASSERT ((ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAPPED) == 0);
  306. LOCK_EXPANSION (OldIrql);
  307. PS_CLEAR_BITS (&ProcessToTrim->Flags,
  308. PS_PROCESS_FLAGS_OUTSWAP_ENABLED);
  309. if ((ProcessToTrim->Flags & PS_PROCESS_FLAGS_IN_SESSION) &&
  310. (VmSupport->Flags.SessionLeader == 0)) {
  311. ASSERT (MmSessionSpace->ProcessOutSwapCount >= 1);
  312. MmSessionSpace->ProcessOutSwapCount -= 1;
  313. }
  314. UNLOCK_EXPANSION (OldIrql);
  315. }
  316. }
  317. //
  318. // Attempt to acquire the working set mutex. If the
  319. // lock cannot be acquired, skip over this process.
  320. //
  321. count = 0;
  322. do {
  323. if (KeTryToAcquireGuardedMutex (&VmSupport->WorkingSetMutex) != FALSE) {
  324. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_WS_TRIMMING);
  325. LOCK_WS_TIMESTAMP (ProcessToTrim);
  326. return TRUE;
  327. }
  328. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmShortTime);
  329. count += 1;
  330. } while (count < MiWsRetryCount);
  331. //
  332. // Could not get the lock, skip this process.
  333. //
  334. if (Attached) {
  335. KeDetachProcess ();
  336. }
  337. return FALSE;
  338. }
  339. SessionSpace = CONTAINING_RECORD (VmSupport, MM_SESSION_SPACE, Vm);
  340. //
  341. // Attach directly to the session space to be trimmed.
  342. //
  343. MiAttachSession (SessionSpace);
  344. //
  345. // Try for the session working set mutex.
  346. //
  347. if (KeTryToAcquireGuardedMutex (&VmSupport->WorkingSetMutex) == FALSE) {
  348. //
  349. // This session space's working set mutex was not
  350. // granted, don't trim it.
  351. //
  352. MiDetachSession ();
  353. return FALSE;
  354. }
  355. return TRUE;
  356. }
  357. VOID
  358. MiDetachAndUnlockWorkingSet (
  359. IN PMMSUPPORT VmSupport
  360. )
  361. /*++
  362. Routine Description:
  363. This function detaches from the target address space and releases the
  364. relevant working set mutex for the address space that was trimmed.
  365. Arguments:
  366. VmSupport - Supplies the working set to detach from and unlock.
  367. Return Value:
  368. None.
  369. Environment:
  370. Kernel mode, APC_LEVEL.
  371. --*/
  372. {
  373. PEPROCESS ProcessToTrim;
  374. ASSERT (KeAreAllApcsDisabled () == TRUE);
  375. UNLOCK_WORKING_SET (VmSupport);
  376. if (VmSupport == &MmSystemCacheWs) {
  377. ASSERT (VmSupport->Flags.SessionSpace == 0);
  378. }
  379. else if (VmSupport->Flags.SessionSpace == 0) {
  380. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  381. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  382. if (ProcessToTrim != PsInitialSystemProcess) {
  383. KeDetachProcess ();
  384. }
  385. }
  386. else {
  387. MiDetachSession ();
  388. }
  389. return;
  390. }
  391. VOID
  392. MmWorkingSetManager (
  393. VOID
  394. )
  395. /*++
  396. Routine Description:
  397. Implements the NT working set manager thread. When the number
  398. of free pages becomes critical and ample pages can be obtained by
  399. reducing working sets, the working set manager's event is set, and
  400. this thread becomes active.
  401. Arguments:
  402. None.
  403. Return Value:
  404. None.
  405. Environment:
  406. Kernel mode.
  407. --*/
  408. {
  409. PLIST_ENTRY ListEntry;
  410. WSLE_NUMBER Trim;
  411. KIRQL OldIrql;
  412. PMMSUPPORT VmSupport;
  413. LARGE_INTEGER CurrentTime;
  414. LOGICAL DoTrimming;
  415. MMWS_TRIM_CRITERIA TrimCriteria;
  416. static ULONG Initialized = 0;
  417. PERFINFO_WSMANAGE_DECL();
  418. if (Initialized == 0) {
  419. PsGetCurrentThread()->MemoryMaker = 1;
  420. Initialized = 1;
  421. }
  422. #if DBG
  423. MmWorkingSetThread = PsGetCurrentThread ();
  424. #endif
  425. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  426. PERFINFO_WSMANAGE_CHECK();
  427. //
  428. // Set the trim criteria: If there are plenty of pages, the existing
  429. // sets are aged and FALSE is returned to signify no trim is necessary.
  430. // Otherwise, the working set expansion list is ordered so the best
  431. // candidates for trimming are placed at the front and TRUE is returned.
  432. //
  433. DoTrimming = MiCheckAndSetSystemTrimCriteria (&TrimCriteria);
  434. if (DoTrimming) {
  435. //
  436. // Clear the deferred entry list to free up some pages.
  437. //
  438. MiDeferredUnlockPages (0);
  439. KeQuerySystemTime (&CurrentTime);
  440. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  441. LOCK_EXPANSION (OldIrql);
  442. while (!IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  443. //
  444. // Remove the entry at the head and trim it.
  445. //
  446. ListEntry = RemoveHeadList (&MmWorkingSetExpansionHead.ListHead);
  447. VmSupport = CONTAINING_RECORD (ListEntry,
  448. MMSUPPORT,
  449. WorkingSetExpansionLinks);
  450. //
  451. // Note that other routines that set this bit must remove the
  452. // entry from the expansion list first.
  453. //
  454. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink != MM_WS_TRIMMING);
  455. //
  456. // Check to see if we've been here before.
  457. //
  458. if (VmSupport->LastTrimTime.QuadPart == CurrentTime.QuadPart) {
  459. InsertHeadList (&MmWorkingSetExpansionHead.ListHead,
  460. &VmSupport->WorkingSetExpansionLinks);
  461. //
  462. // If we aren't finished we may sleep in this call.
  463. //
  464. if (MiCheckSystemTrimEndCriteria (&TrimCriteria, OldIrql)) {
  465. //
  466. // No more pages are needed so we're done.
  467. //
  468. break;
  469. }
  470. //
  471. // Start a new round of trimming.
  472. //
  473. KeQuerySystemTime (&CurrentTime);
  474. continue;
  475. }
  476. //
  477. // Only attach if the working set is worth examining. This is
  478. // not just an optimization, as care must be taken not to attempt
  479. // an attach to a process which is a candidate for being currently
  480. // (or already) swapped out because if we attach to a page
  481. // directory that is in transition it's all over.
  482. //
  483. if ((VmSupport->WorkingSetSize <= MM_PROCESS_COMMIT_CHARGE) &&
  484. (VmSupport != &MmSystemCacheWs) &&
  485. (VmSupport->Flags.SessionSpace == 0)) {
  486. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  487. &VmSupport->WorkingSetExpansionLinks);
  488. continue;
  489. }
  490. VmSupport->LastTrimTime = CurrentTime;
  491. VmSupport->WorkingSetExpansionLinks.Flink = MM_WS_TRIMMING;
  492. VmSupport->WorkingSetExpansionLinks.Blink = NULL;
  493. UNLOCK_EXPANSION (OldIrql);
  494. if (MiAttachAndLockWorkingSet (VmSupport) == TRUE) {
  495. //
  496. // Determine how many pages to trim from this working set.
  497. //
  498. Trim = MiDetermineWsTrimAmount (&TrimCriteria, VmSupport);
  499. //
  500. // If there's something to trim...
  501. //
  502. if ((Trim != 0) &&
  503. ((TrimCriteria.TrimAllPasses > TrimCriteria.NumPasses) ||
  504. (MmAvailablePages < TrimCriteria.DesiredFreeGoal))) {
  505. //
  506. // We haven't reached our goal, so trim now.
  507. //
  508. PERFINFO_WSMANAGE_TOTRIM(Trim);
  509. Trim = MiTrimWorkingSet (Trim,
  510. VmSupport,
  511. TrimCriteria.TrimAge);
  512. PERFINFO_WSMANAGE_ACTUALTRIM(Trim);
  513. }
  514. //
  515. // Estimating the current claim is always done here by taking a
  516. // sample of the working set. Aging is only done if the trim
  517. // pass warrants it (ie: the first pass only).
  518. //
  519. MiAgeAndEstimateAvailableInWorkingSet (
  520. VmSupport,
  521. TrimCriteria.DoAging,
  522. NULL,
  523. &TrimCriteria.NewTotalClaim,
  524. &TrimCriteria.NewTotalEstimatedAvailable);
  525. MiDetachAndUnlockWorkingSet (VmSupport);
  526. LOCK_EXPANSION (OldIrql);
  527. }
  528. else {
  529. //
  530. // Unable to attach to the working set presumably because
  531. // some other thread has it locked. Set the ForceTrim flag
  532. // so it will be trimmed later by whoever owns it (or whoever
  533. // tries to insert the next entry).
  534. //
  535. LOCK_EXPANSION (OldIrql);
  536. VmSupport->Flags.ForceTrim = 1;
  537. }
  538. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_WS_TRIMMING);
  539. if (VmSupport->WorkingSetExpansionLinks.Blink == NULL) {
  540. //
  541. // Reinsert this working set at the tail of the list.
  542. //
  543. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  544. &VmSupport->WorkingSetExpansionLinks);
  545. }
  546. else {
  547. //
  548. // The process is terminating - the value in the blink
  549. // is the address of an event to set.
  550. //
  551. ASSERT (VmSupport != &MmSystemCacheWs);
  552. VmSupport->WorkingSetExpansionLinks.Flink = MM_WS_NOT_LISTED;
  553. KeSetEvent ((PKEVENT)VmSupport->WorkingSetExpansionLinks.Blink,
  554. 0,
  555. FALSE);
  556. }
  557. }
  558. MmTotalClaim = TrimCriteria.NewTotalClaim;
  559. MmTotalEstimatedAvailable = TrimCriteria.NewTotalEstimatedAvailable;
  560. PERFINFO_WSMANAGE_TRIMEND_CLAIMS(&TrimCriteria);
  561. UNLOCK_EXPANSION (OldIrql);
  562. }
  563. //
  564. // If memory is critical and there are modified pages to be written
  565. // (presumably because we've just trimmed them), then signal the
  566. // modified page writer.
  567. //
  568. if ((MmAvailablePages < MmMinimumFreePages) ||
  569. (MmModifiedPageListHead.Total >= MmModifiedPageMaximum)) {
  570. KeSetEvent (&MmModifiedPageWriterEvent, 0, FALSE);
  571. }
  572. return;
  573. }
  574. LOGICAL
  575. MiCheckAndSetSystemTrimCriteria (
  576. IN PMMWS_TRIM_CRITERIA Criteria
  577. )
  578. /*++
  579. Routine Description:
  580. Decide whether to trim, age or adjust claim estimations at this time.
  581. Arguments:
  582. Criteria - Supplies a pointer to the trim criteria information. Various
  583. fields in this structure are set as needed by this routine.
  584. Return Value:
  585. TRUE if the caller should initiate trimming, FALSE if not.
  586. Environment:
  587. Kernel mode. No locks held. APC level or below.
  588. This is called at least once per second on entry to MmWorkingSetManager.
  589. --*/
  590. {
  591. KIRQL OldIrql;
  592. PFN_NUMBER Available;
  593. ULONG StandbyRemoved;
  594. ULONG StandbyTemp;
  595. ULONG WsRetryCount;
  596. PERFINFO_WSMANAGE_DECL();
  597. PERFINFO_WSMANAGE_CHECK();
  598. //
  599. // See if an empty-all-working-sets request has been queued to us.
  600. //
  601. WsRetryCount = MiWsRetryCount;
  602. if (MiWaitingForWorkingSetEmpty == TRUE) {
  603. MiWsRetryCount = 1;
  604. MiAgePagesAndEstimateClaims (TRUE);
  605. LOCK_EXPANSION (OldIrql);
  606. KeSetEvent (&MiWaitForEmptyEvent, 0, FALSE);
  607. MiWaitingForWorkingSetEmpty = FALSE;
  608. UNLOCK_EXPANSION (OldIrql);
  609. MiReplacing = FALSE;
  610. MiWsRetryCount = WsRetryCount;
  611. return FALSE;
  612. }
  613. //
  614. // Check the number of pages available to see if any trimming (or aging)
  615. // is really required.
  616. //
  617. Available = MmAvailablePages;
  618. StandbyRemoved = MmStandbyRePurposed;
  619. //
  620. // If the counter wrapped, it's ok to just ignore it this time around.
  621. //
  622. if (StandbyRemoved <= MiLastStandbyRePurposed) {
  623. MiLastStandbyRePurposed = StandbyRemoved;
  624. StandbyRemoved = 0;
  625. }
  626. else {
  627. //
  628. // The value is nonzero, we need to synchronize so we get a coordinated
  629. // snapshot of both values.
  630. //
  631. LOCK_PFN (OldIrql);
  632. Available = MmAvailablePages;
  633. StandbyRemoved = MmStandbyRePurposed;
  634. UNLOCK_PFN (OldIrql);
  635. if (StandbyRemoved <= MiLastStandbyRePurposed) {
  636. MiLastStandbyRePurposed = StandbyRemoved;
  637. StandbyRemoved = 0;
  638. }
  639. else {
  640. StandbyTemp = StandbyRemoved;
  641. StandbyRemoved -= MiLastStandbyRePurposed;
  642. MiLastStandbyRePurposed = StandbyTemp;
  643. }
  644. }
  645. PERFINFO_WSMANAGE_STARTLOG_CLAIMS();
  646. //
  647. // If we're low on pages, or we've been replacing within a given
  648. // working set, or we've been cannibalizing a large number of standby
  649. // pages, then trim now.
  650. //
  651. if ((Available <= MmPlentyFreePages) ||
  652. (MiReplacing == TRUE) ||
  653. (StandbyRemoved >= (Available >> 2))) {
  654. //
  655. // Inform our caller to start trimming since we're below
  656. // plenty pages - order the list so the bigger working sets are
  657. // in front so our caller trims those first.
  658. //
  659. Criteria->NumPasses = 0;
  660. Criteria->DesiredFreeGoal = MmPlentyFreePages + (MmPlentyFreePages / 2);
  661. Criteria->NewTotalClaim = 0;
  662. Criteria->NewTotalEstimatedAvailable = 0;
  663. //
  664. // If more than 25% of the available pages were recycled standby
  665. // pages, then trim more aggresively in an attempt to get more of the
  666. // cold pages into standby for the next pass.
  667. //
  668. if (StandbyRemoved >= (Available >> 2)) {
  669. Criteria->TrimAllPasses = TRUE;
  670. }
  671. else {
  672. Criteria->TrimAllPasses = FALSE;
  673. }
  674. //
  675. // Start trimming the bigger working sets first.
  676. //
  677. MiRearrangeWorkingSetExpansionList ();
  678. #if DBG
  679. if (MmDebug & MM_DBG_WS_EXPANSION) {
  680. DbgPrint("\nMM-wsmanage: Desired = %ld, Avail %ld\n",
  681. Criteria->DesiredFreeGoal, MmAvailablePages);
  682. }
  683. #endif
  684. PERFINFO_WSMANAGE_WILLTRIM_CLAIMS(Criteria);
  685. //
  686. // No need to lock synchronize the MiReplacing clearing as it
  687. // gets set every time a page replacement happens anyway.
  688. //
  689. MiReplacing = FALSE;
  690. return TRUE;
  691. }
  692. //
  693. // If there is an overwhelming surplus of memory and this is a big
  694. // server then don't even bother aging at this point.
  695. //
  696. if (Available > MM_ENORMOUS_LIMIT) {
  697. //
  698. // Note the claim and estimated available are not cleared so they
  699. // may contain stale values, but at this level it doesn't really
  700. // matter.
  701. //
  702. return FALSE;
  703. }
  704. //
  705. // Don't trim but do age unused pages and estimate
  706. // the amount available in working sets.
  707. //
  708. MiAgePagesAndEstimateClaims (FALSE);
  709. MiAdjustClaimParameters (TRUE);
  710. PERFINFO_WSMANAGE_TRIMACTION (PERFINFO_WS_ACTION_RESET_COUNTER);
  711. PERFINFO_WSMANAGE_DUMPENTRIES_CLAIMS ();
  712. return FALSE;
  713. }
  714. LOGICAL
  715. MiCheckSystemTrimEndCriteria (
  716. IN PMMWS_TRIM_CRITERIA Criteria,
  717. IN KIRQL OldIrql
  718. )
  719. /*++
  720. Routine Description:
  721. Check the ending criteria. If we're not done, delay for a little
  722. bit to let the modified writes catch up.
  723. Arguments:
  724. Criteria - Supplies the trim criteria information.
  725. OldIrql - Supplies the old IRQL to lower to if the expansion lock needs
  726. to be released.
  727. Return Value:
  728. TRUE if trimming can be stopped, FALSE otherwise.
  729. Environment:
  730. Kernel mode. Expansion lock held. APC level or below.
  731. --*/
  732. {
  733. LOGICAL FinishedTrimming;
  734. PERFINFO_WSMANAGE_DECL();
  735. PERFINFO_WSMANAGE_CHECK();
  736. if ((MmAvailablePages > Criteria->DesiredFreeGoal) ||
  737. (Criteria->NumPasses >= MI_MAX_TRIM_PASSES)) {
  738. //
  739. // We have enough pages or we trimmed as many as we're going to get.
  740. //
  741. return TRUE;
  742. }
  743. //
  744. // Update the global claim and estimate before we wait.
  745. //
  746. MmTotalClaim = Criteria->NewTotalClaim;
  747. MmTotalEstimatedAvailable = Criteria->NewTotalEstimatedAvailable;
  748. //
  749. // We don't have enough pages - give the modified page writer
  750. // 10 milliseconds to catch up. The wait is also important because a
  751. // thread may have the system cache locked but has been preempted
  752. // by the balance set manager due to its higher priority. We must
  753. // give this thread a shot at running so it can release the system
  754. // cache lock (all the trimmable pages may reside in the system cache).
  755. //
  756. UNLOCK_EXPANSION (OldIrql);
  757. KeDelayExecutionThread (KernelMode,
  758. FALSE,
  759. (PLARGE_INTEGER)&MmShortTime);
  760. PERFINFO_WSMANAGE_WAITFORWRITER_CLAIMS();
  761. //
  762. // Check again to see if we've met the criteria to stop trimming.
  763. //
  764. if (MmAvailablePages > Criteria->DesiredFreeGoal) {
  765. //
  766. // Now we have enough pages so break out.
  767. //
  768. FinishedTrimming = TRUE;
  769. }
  770. else {
  771. //
  772. // We don't have enough pages so let's do another pass.
  773. // Go get the next working set list which is probably the
  774. // one we put back before we gave up the processor.
  775. //
  776. FinishedTrimming = FALSE;
  777. if (Criteria->NumPasses == 0) {
  778. MiAdjustClaimParameters (FALSE);
  779. }
  780. Criteria->NumPasses += 1;
  781. Criteria->NewTotalClaim = 0;
  782. Criteria->NewTotalEstimatedAvailable = 0;
  783. PERFINFO_WSMANAGE_TRIMACTION(PERFINFO_WS_ACTION_FORCE_TRIMMING_PROCESS);
  784. }
  785. LOCK_EXPANSION (OldIrql);
  786. return FinishedTrimming;
  787. }
  788. WSLE_NUMBER
  789. MiDetermineWsTrimAmount (
  790. PMMWS_TRIM_CRITERIA Criteria,
  791. PMMSUPPORT VmSupport
  792. )
  793. /*++
  794. Routine Description:
  795. Determine whether this process should be trimmed.
  796. Arguments:
  797. Criteria - Supplies the trim criteria information.
  798. VmSupport - Supplies the working set information for the candidate.
  799. Return Value:
  800. TRUE if trimming should be done on this process, FALSE if not.
  801. Environment:
  802. Kernel mode. Expansion lock held. APC level or below.
  803. --*/
  804. {
  805. PMMWSL WorkingSetList;
  806. WSLE_NUMBER MaxTrim;
  807. WSLE_NUMBER Trim;
  808. LOGICAL OutswapEnabled;
  809. PEPROCESS ProcessToTrim;
  810. PMM_SESSION_SPACE SessionSpace;
  811. WorkingSetList = VmSupport->VmWorkingSetList;
  812. MaxTrim = VmSupport->WorkingSetSize;
  813. if (MaxTrim <= WorkingSetList->FirstDynamic) {
  814. return 0;
  815. }
  816. OutswapEnabled = FALSE;
  817. if (VmSupport == &MmSystemCacheWs) {
  818. PERFINFO_WSMANAGE_TRIMWS (NULL, NULL, VmSupport);
  819. }
  820. else if (VmSupport->Flags.SessionSpace == 0) {
  821. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  822. if (ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAP_ENABLED) {
  823. OutswapEnabled = TRUE;
  824. }
  825. if (VmSupport->Flags.MinimumWorkingSetHard == 1) {
  826. if (MaxTrim <= VmSupport->MinimumWorkingSetSize) {
  827. return 0;
  828. }
  829. OutswapEnabled = FALSE;
  830. }
  831. PERFINFO_WSMANAGE_TRIMWS (ProcessToTrim, NULL, VmSupport);
  832. }
  833. else {
  834. if (VmSupport->Flags.TrimHard == 1) {
  835. OutswapEnabled = TRUE;
  836. }
  837. SessionSpace = CONTAINING_RECORD(VmSupport,
  838. MM_SESSION_SPACE,
  839. Vm);
  840. PERFINFO_WSMANAGE_TRIMWS (NULL, SessionSpace, VmSupport);
  841. }
  842. if (OutswapEnabled == FALSE) {
  843. //
  844. // Don't trim the cache or non-swapped sessions or processes
  845. // below their minimum.
  846. //
  847. MaxTrim -= VmSupport->MinimumWorkingSetSize;
  848. }
  849. switch (Criteria->NumPasses) {
  850. case 0:
  851. Trim = VmSupport->Claim >>
  852. ((VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND)
  853. ? MI_FOREGROUND_CLAIM_AVAILABLE_SHIFT
  854. : MI_BACKGROUND_CLAIM_AVAILABLE_SHIFT);
  855. Criteria->TrimAge = MI_PASS0_TRIM_AGE;
  856. Criteria->DoAging = TRUE;
  857. break;
  858. case 1:
  859. Trim = VmSupport->Claim >>
  860. ((VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND)
  861. ? MI_FOREGROUND_CLAIM_AVAILABLE_SHIFT
  862. : MI_BACKGROUND_CLAIM_AVAILABLE_SHIFT);
  863. Criteria->TrimAge = MI_PASS1_TRIM_AGE;
  864. Criteria->DoAging = FALSE;
  865. break;
  866. case 2:
  867. Trim = VmSupport->Claim;
  868. Criteria->TrimAge = MI_PASS2_TRIM_AGE;
  869. Criteria->DoAging = FALSE;
  870. break;
  871. case 3:
  872. Trim = VmSupport->EstimatedAvailable;
  873. Criteria->TrimAge = MI_PASS3_TRIM_AGE;
  874. Criteria->DoAging = FALSE;
  875. break;
  876. default:
  877. Trim = VmSupport->EstimatedAvailable;
  878. Criteria->TrimAge = MI_PASS3_TRIM_AGE;
  879. Criteria->DoAging = FALSE;
  880. if (MiHardTrim == TRUE || MmAvailablePages < MM_HIGH_LIMIT + 64) {
  881. if (VmSupport->WorkingSetSize > VmSupport->MinimumWorkingSetSize) {
  882. Trim = (VmSupport->WorkingSetSize - VmSupport->MinimumWorkingSetSize) >> 2;
  883. if (Trim == 0) {
  884. Trim = VmSupport->WorkingSetSize - VmSupport->MinimumWorkingSetSize;
  885. }
  886. }
  887. Criteria->TrimAge = MI_PASS4_TRIM_AGE;
  888. Criteria->DoAging = TRUE;
  889. }
  890. break;
  891. }
  892. if (Trim > MaxTrim) {
  893. Trim = MaxTrim;
  894. }
  895. #if DBG
  896. if ((MmDebug & MM_DBG_WS_EXPANSION) && (Trim != 0)) {
  897. if (VmSupport->Flags.SessionSpace == 0) {
  898. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  899. DbgPrint(" Trimming Process %16s, WS %6d, Trimming %5d ==> %5d\n",
  900. ProcessToTrim ? ProcessToTrim->ImageFileName : (PUCHAR)"System Cache",
  901. VmSupport->WorkingSetSize,
  902. Trim,
  903. VmSupport->WorkingSetSize-Trim);
  904. }
  905. else {
  906. SessionSpace = CONTAINING_RECORD (VmSupport,
  907. MM_SESSION_SPACE,
  908. Vm);
  909. DbgPrint(" Trimming Session 0x%x (id %d), WS %6d, Trimming %5d ==> %5d\n",
  910. SessionSpace,
  911. SessionSpace->SessionId,
  912. VmSupport->WorkingSetSize,
  913. Trim,
  914. VmSupport->WorkingSetSize-Trim);
  915. }
  916. }
  917. #endif
  918. return Trim;
  919. }
  920. VOID
  921. MiAgePagesAndEstimateClaims (
  922. LOGICAL EmptyIt
  923. )
  924. /*++
  925. Routine Description:
  926. Walk through the sets on the working set expansion list.
  927. Either age pages and estimate the claim (number of pages they aren't using),
  928. or empty the working set.
  929. Arguments:
  930. EmptyIt - Supplies TRUE to empty the working set,
  931. FALSE to just age and estimate it.
  932. Return Value:
  933. None.
  934. Environment:
  935. Kernel mode, APCs disabled. PFN lock NOT held.
  936. --*/
  937. {
  938. WSLE_NUMBER WslesScanned;
  939. PMMSUPPORT VmSupport;
  940. PMMSUPPORT FirstSeen;
  941. LOGICAL SystemCacheSeen;
  942. KIRQL OldIrql;
  943. PLIST_ENTRY ListEntry;
  944. PFN_NUMBER NewTotalClaim;
  945. PFN_NUMBER NewTotalEstimatedAvailable;
  946. ULONG LoopCount;
  947. FirstSeen = NULL;
  948. SystemCacheSeen = FALSE;
  949. LoopCount = 0;
  950. WslesScanned = 0;
  951. NewTotalClaim = 0;
  952. NewTotalEstimatedAvailable = 0;
  953. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  954. LOCK_EXPANSION (OldIrql);
  955. while (!IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  956. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  957. //
  958. // Remove the entry at the head, try to lock it, if we can lock it
  959. // then age some pages and estimate the number of available pages.
  960. //
  961. ListEntry = RemoveHeadList (&MmWorkingSetExpansionHead.ListHead);
  962. VmSupport = CONTAINING_RECORD (ListEntry,
  963. MMSUPPORT,
  964. WorkingSetExpansionLinks);
  965. if (VmSupport == &MmSystemCacheWs) {
  966. if (SystemCacheSeen == TRUE) {
  967. //
  968. // Seen this one already.
  969. //
  970. FirstSeen = VmSupport;
  971. }
  972. SystemCacheSeen = TRUE;
  973. }
  974. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink != MM_WS_TRIMMING);
  975. if (VmSupport == FirstSeen) {
  976. InsertHeadList (&MmWorkingSetExpansionHead.ListHead,
  977. &VmSupport->WorkingSetExpansionLinks);
  978. break;
  979. }
  980. if ((VmSupport->WorkingSetSize <= MM_PROCESS_COMMIT_CHARGE) &&
  981. (VmSupport != &MmSystemCacheWs) &&
  982. (VmSupport->Flags.SessionSpace == 0)) {
  983. //
  984. // Only attach if the working set is worth examining. This is
  985. // not just an optimization, as care must be taken not to attempt
  986. // an attach to a process which is a candidate for being currently
  987. // (or already) swapped out because if we attach to a page
  988. // directory that is in transition it's all over.
  989. //
  990. // Since this one is at the minimum where a racing swapout
  991. // thread can be processing it in parallel, just reinsert this
  992. // working set at the tail of the list.
  993. //
  994. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  995. &VmSupport->WorkingSetExpansionLinks);
  996. goto skip;
  997. }
  998. VmSupport->WorkingSetExpansionLinks.Flink = MM_WS_TRIMMING;
  999. VmSupport->WorkingSetExpansionLinks.Blink = NULL;
  1000. UNLOCK_EXPANSION (OldIrql);
  1001. if (FirstSeen == NULL) {
  1002. FirstSeen = VmSupport;
  1003. }
  1004. if (MiAttachAndLockWorkingSet (VmSupport) == TRUE) {
  1005. if (EmptyIt == FALSE) {
  1006. MiAgeAndEstimateAvailableInWorkingSet (VmSupport,
  1007. TRUE,
  1008. &WslesScanned,
  1009. &NewTotalClaim,
  1010. &NewTotalEstimatedAvailable);
  1011. }
  1012. else {
  1013. MiEmptyWorkingSet (VmSupport, FALSE);
  1014. }
  1015. MiDetachAndUnlockWorkingSet (VmSupport);
  1016. }
  1017. LOCK_EXPANSION (OldIrql);
  1018. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_WS_TRIMMING);
  1019. if (VmSupport->WorkingSetExpansionLinks.Blink == NULL) {
  1020. //
  1021. // Reinsert this working set at the tail of the list.
  1022. //
  1023. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  1024. &VmSupport->WorkingSetExpansionLinks);
  1025. }
  1026. else {
  1027. //
  1028. // The process is terminating - the value in the blink
  1029. // is the address of an event to set.
  1030. //
  1031. ASSERT (VmSupport != &MmSystemCacheWs);
  1032. VmSupport->WorkingSetExpansionLinks.Flink = MM_WS_NOT_LISTED;
  1033. KeSetEvent ((PKEVENT)VmSupport->WorkingSetExpansionLinks.Blink,
  1034. 0,
  1035. FALSE);
  1036. }
  1037. skip:
  1038. //
  1039. // The initial working set that was chosen for FirstSeen may have
  1040. // been trimmed down under its minimum and been removed from the
  1041. // ExpansionHead links. It is possible that the system cache is not
  1042. // on the links either. This check detects this extremely rare
  1043. // situation so that the system does not spin forever.
  1044. //
  1045. LoopCount += 1;
  1046. if (LoopCount > 200) {
  1047. if (MmSystemCacheWs.WorkingSetExpansionLinks.Blink == NULL) {
  1048. break;
  1049. }
  1050. }
  1051. }
  1052. UNLOCK_EXPANSION (OldIrql);
  1053. if (EmptyIt == FALSE) {
  1054. MmTotalClaim = NewTotalClaim;
  1055. MmTotalEstimatedAvailable = NewTotalEstimatedAvailable;
  1056. }
  1057. }
  1058. VOID
  1059. MiAgeAndEstimateAvailableInWorkingSet (
  1060. IN PMMSUPPORT VmSupport,
  1061. IN LOGICAL DoAging,
  1062. IN PWSLE_NUMBER WslesScanned,
  1063. IN OUT PPFN_NUMBER TotalClaim,
  1064. IN OUT PPFN_NUMBER TotalEstimatedAvailable
  1065. )
  1066. /*++
  1067. Routine Description:
  1068. Age pages (clear the access bit or if the page hasn't been
  1069. accessed, increment the age) for a portion of the working
  1070. set. Also, walk through a sample of the working set
  1071. building a set of counts of how old the pages are.
  1072. The counts are used to create a claim of the amount
  1073. the system can steal from this process if memory
  1074. becomes tight.
  1075. Arguments:
  1076. VmSupport - Supplies the VM support structure to age and estimate.
  1077. DoAging - TRUE if pages are to be aged. Regardless, the pages will be
  1078. added to the availability estimation.
  1079. WslesScanned - Total numbers of WSLEs scanned on this sweep, used as a
  1080. control to prevent excessive aging on large systems with
  1081. many processes.
  1082. TotalClaim - Supplies a pointer to system wide claim to update.
  1083. TotalEstimatedAvailable - Supplies a pointer to system wide estimate
  1084. to update.
  1085. Return Value:
  1086. None
  1087. Environment:
  1088. Kernel mode, APCs disabled, working set mutex. PFN lock NOT held.
  1089. --*/
  1090. {
  1091. LOGICAL RecalculateShift;
  1092. WSLE_NUMBER LastEntry;
  1093. WSLE_NUMBER StartEntry;
  1094. WSLE_NUMBER FirstDynamic;
  1095. WSLE_NUMBER CurrentEntry;
  1096. PMMWSL WorkingSetList;
  1097. PMMWSLE Wsle;
  1098. PMMPTE PointerPte;
  1099. WSLE_NUMBER NumberToExamine;
  1100. WSLE_NUMBER Claim;
  1101. ULONG Estimate;
  1102. ULONG SampledAgeCounts[MI_USE_AGE_COUNT] = {0};
  1103. MI_NEXT_ESTIMATION_SLOT_CONST NextConst;
  1104. WSLE_NUMBER SampleSize;
  1105. WSLE_NUMBER AgeSize;
  1106. ULONG CounterShift;
  1107. WSLE_NUMBER Temp;
  1108. ULONG i;
  1109. WorkingSetList = VmSupport->VmWorkingSetList;
  1110. Wsle = WorkingSetList->Wsle;
  1111. AgeSize = 0;
  1112. LastEntry = WorkingSetList->LastEntry;
  1113. FirstDynamic = WorkingSetList->FirstDynamic;
  1114. if (DoAging == TRUE) {
  1115. //
  1116. // Clear the used bits or increment the age of a portion of the
  1117. // working set.
  1118. //
  1119. // Try to walk the entire working set every 2^MI_AGE_AGING_SHIFT
  1120. // seconds.
  1121. //
  1122. if (VmSupport->WorkingSetSize > WorkingSetList->FirstDynamic) {
  1123. NumberToExamine = (VmSupport->WorkingSetSize - WorkingSetList->FirstDynamic) >> MiAgingShift;
  1124. //
  1125. // Bigger machines can easily have working sets that span
  1126. // terabytes so limit the absolute walk.
  1127. //
  1128. if (NumberToExamine > MI_MAXIMUM_SAMPLE) {
  1129. NumberToExamine = MI_MAXIMUM_SAMPLE;
  1130. }
  1131. //
  1132. // In addition to large working sets, bigger machines may also
  1133. // have huge numbers of processes - checking the aggregate number
  1134. // of working set list entries scanned prevents this situation
  1135. // from triggering excessive scanning.
  1136. //
  1137. if ((WslesScanned != NULL) &&
  1138. (*WslesScanned >= MiMaximumWslesPerSweep)) {
  1139. NumberToExamine = 64;
  1140. }
  1141. AgeSize = NumberToExamine;
  1142. CurrentEntry = VmSupport->NextAgingSlot;
  1143. if (CurrentEntry > LastEntry || CurrentEntry < FirstDynamic) {
  1144. CurrentEntry = FirstDynamic;
  1145. }
  1146. if (Wsle[CurrentEntry].u1.e1.Valid == 0) {
  1147. MI_NEXT_VALID_AGING_SLOT(CurrentEntry, FirstDynamic, LastEntry, Wsle);
  1148. }
  1149. while (NumberToExamine != 0) {
  1150. PointerPte = MiGetPteAddress (Wsle[CurrentEntry].u1.VirtualAddress);
  1151. if (MI_GET_ACCESSED_IN_PTE(PointerPte) == 1) {
  1152. MI_SET_ACCESSED_IN_PTE(PointerPte, 0);
  1153. MI_RESET_WSLE_AGE(PointerPte, &Wsle[CurrentEntry]);
  1154. }
  1155. else {
  1156. MI_INC_WSLE_AGE(PointerPte, &Wsle[CurrentEntry]);
  1157. }
  1158. NumberToExamine -= 1;
  1159. MI_NEXT_VALID_AGING_SLOT(CurrentEntry, FirstDynamic, LastEntry, Wsle);
  1160. }
  1161. VmSupport->NextAgingSlot = CurrentEntry + 1; // Start here next time
  1162. }
  1163. }
  1164. //
  1165. // Estimate the number of unused pages in the working set.
  1166. //
  1167. // The working set may have shrunk or the non-paged portion may have
  1168. // grown since the last time. Put the next counter at the FirstDynamic
  1169. // if so.
  1170. //
  1171. CurrentEntry = VmSupport->NextEstimationSlot;
  1172. if (CurrentEntry > LastEntry || CurrentEntry < FirstDynamic) {
  1173. CurrentEntry = FirstDynamic;
  1174. }
  1175. //
  1176. // When aging, walk the entire working set every 2^MiEstimationShift
  1177. // seconds.
  1178. //
  1179. CounterShift = 0;
  1180. SampleSize = 0;
  1181. if (VmSupport->WorkingSetSize > WorkingSetList->FirstDynamic) {
  1182. RecalculateShift = FALSE;
  1183. SampleSize = VmSupport->WorkingSetSize - WorkingSetList->FirstDynamic;
  1184. NumberToExamine = SampleSize >> MiEstimationShift;
  1185. //
  1186. // Bigger machines may have huge numbers of processes - checking the
  1187. // aggregate number of working set list entries scanned prevents this
  1188. // situation from triggering excessive scanning.
  1189. //
  1190. if ((WslesScanned != NULL) &&
  1191. (*WslesScanned >= MiMaximumWslesPerSweep)) {
  1192. RecalculateShift = TRUE;
  1193. }
  1194. else if (NumberToExamine > MI_MAXIMUM_SAMPLE) {
  1195. //
  1196. // Bigger machines can easily have working sets that span
  1197. // terabytes so limit the absolute walk.
  1198. //
  1199. NumberToExamine = MI_MAXIMUM_SAMPLE;
  1200. Temp = SampleSize >> MI_MINIMUM_SAMPLE_SHIFT;
  1201. SampleSize = MI_MAXIMUM_SAMPLE;
  1202. //
  1203. // Calculate the necessary counter shift to estimate pages
  1204. // in use.
  1205. //
  1206. for ( ; Temp != 0; Temp = Temp >> 1) {
  1207. CounterShift += 1;
  1208. }
  1209. }
  1210. else if (NumberToExamine >= MI_MINIMUM_SAMPLE) {
  1211. //
  1212. // Ensure that NumberToExamine is at least the minimum size.
  1213. //
  1214. SampleSize = NumberToExamine;
  1215. CounterShift = MiEstimationShift;
  1216. }
  1217. else if (SampleSize > MI_MINIMUM_SAMPLE) {
  1218. RecalculateShift = TRUE;
  1219. }
  1220. if (RecalculateShift == TRUE) {
  1221. Temp = SampleSize >> MI_MINIMUM_SAMPLE_SHIFT;
  1222. SampleSize = MI_MINIMUM_SAMPLE;
  1223. //
  1224. // Calculate the necessary counter shift to estimate pages
  1225. // in use.
  1226. //
  1227. for ( ; Temp != 0; Temp = Temp >> 1) {
  1228. CounterShift += 1;
  1229. }
  1230. }
  1231. ASSERT (SampleSize != 0);
  1232. MI_CALC_NEXT_ESTIMATION_SLOT_CONST(NextConst, WorkingSetList);
  1233. StartEntry = FirstDynamic;
  1234. if (Wsle[CurrentEntry].u1.e1.Valid == 0) {
  1235. MI_NEXT_VALID_ESTIMATION_SLOT (CurrentEntry,
  1236. StartEntry,
  1237. FirstDynamic,
  1238. LastEntry,
  1239. NextConst,
  1240. Wsle);
  1241. }
  1242. for (i = 0; i < SampleSize; i += 1) {
  1243. PointerPte = MiGetPteAddress (Wsle[CurrentEntry].u1.VirtualAddress);
  1244. if (MI_GET_ACCESSED_IN_PTE(PointerPte) == 0) {
  1245. MI_UPDATE_USE_ESTIMATE (PointerPte,
  1246. &Wsle[CurrentEntry],
  1247. SampledAgeCounts);
  1248. }
  1249. if (i == NumberToExamine - 1) {
  1250. //
  1251. // Start estimation here next time.
  1252. //
  1253. VmSupport->NextEstimationSlot = CurrentEntry + 1;
  1254. }
  1255. MI_NEXT_VALID_ESTIMATION_SLOT (CurrentEntry,
  1256. StartEntry,
  1257. FirstDynamic,
  1258. LastEntry,
  1259. NextConst,
  1260. Wsle);
  1261. }
  1262. }
  1263. if (SampleSize < AgeSize) {
  1264. SampleSize = AgeSize;
  1265. }
  1266. if (WslesScanned != NULL) {
  1267. *WslesScanned += SampleSize;
  1268. }
  1269. Estimate = MI_CALCULATE_USAGE_ESTIMATE(SampledAgeCounts, CounterShift);
  1270. Claim = VmSupport->Claim + MI_CLAIM_INCR;
  1271. if (Claim > Estimate) {
  1272. Claim = Estimate;
  1273. }
  1274. VmSupport->Claim = Claim;
  1275. VmSupport->EstimatedAvailable = Estimate;
  1276. PERFINFO_WSMANAGE_DUMPWS(VmSupport, SampledAgeCounts);
  1277. VmSupport->GrowthSinceLastEstimate = 0;
  1278. *TotalClaim += Claim >> ((VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND)
  1279. ? MI_FOREGROUND_CLAIM_AVAILABLE_SHIFT
  1280. : MI_BACKGROUND_CLAIM_AVAILABLE_SHIFT);
  1281. *TotalEstimatedAvailable += Estimate;
  1282. return;
  1283. }
  1284. ULONG MiClaimAdjustmentThreshold[8] = { 0, 0, 4000, 8000, 12000, 24000, 32000, 32000};
  1285. VOID
  1286. MiAdjustClaimParameters (
  1287. IN LOGICAL EnoughPages
  1288. )
  1289. /*++
  1290. Routine Description:
  1291. Adjust the rate at which we walk through working sets. If we have
  1292. enough pages (we aren't trimming pages that aren't considered young),
  1293. then we check to see whether we should decrease the aging rate and
  1294. vice versa.
  1295. The limits for the aging rate are 1/8 and 1/128 of the working sets.
  1296. This means that the finest age granularities are 8 to 128 seconds in
  1297. these cases. With the current 2 bit counter, at the low end we would
  1298. start trimming pages > 16 seconds old and at the high end > 4 minutes.
  1299. Arguments:
  1300. EnoughPages - Supplies whether to increase the rate or decrease it.
  1301. Return Value:
  1302. None.
  1303. Environment:
  1304. Kernel mode.
  1305. --*/
  1306. {
  1307. LARGE_INTEGER CurrentTime;
  1308. KeQuerySystemTime (&CurrentTime);
  1309. if (EnoughPages == TRUE &&
  1310. ((MmTotalClaim + MmAvailablePages) > MiClaimAdjustmentThreshold[MiAgingShift])) {
  1311. //
  1312. // Don't adjust the rate too frequently, don't go over the limit, and
  1313. // make sure there are enough claimed and/or available.
  1314. //
  1315. if (((CurrentTime.QuadPart - MiLastAdjustmentOfClaimParams.QuadPart) >
  1316. MmClaimParameterAdjustUpTime.QuadPart) &&
  1317. (MiAgingShift < MI_MAXIMUM_AGING_SHIFT ) ) {
  1318. //
  1319. // Set the time only when we change the rate.
  1320. //
  1321. MiLastAdjustmentOfClaimParams.QuadPart = CurrentTime.QuadPart;
  1322. MiAgingShift += 1;
  1323. MiEstimationShift += 1;
  1324. }
  1325. }
  1326. else if ((EnoughPages == FALSE) ||
  1327. (MmTotalClaim + MmAvailablePages) < MiClaimAdjustmentThreshold[MiAgingShift - 1]) {
  1328. //
  1329. // Don't adjust the rate down too frequently.
  1330. //
  1331. if ((CurrentTime.QuadPart - MiLastAdjustmentOfClaimParams.QuadPart) >
  1332. MmClaimParameterAdjustDownTime.QuadPart) {
  1333. //
  1334. // Always set the time so we don't adjust up too soon after
  1335. // a 2nd pass trim.
  1336. //
  1337. MiLastAdjustmentOfClaimParams.QuadPart = CurrentTime.QuadPart;
  1338. //
  1339. // Don't go under the limit.
  1340. //
  1341. if (MiAgingShift > 3) {
  1342. MiAgingShift -= 1;
  1343. MiEstimationShift -= 1;
  1344. }
  1345. }
  1346. }
  1347. }
  1348. #define MM_WS_REORG_BUCKETS_MAX 7
  1349. #if DBG
  1350. ULONG MiSessionIdleBuckets[MM_WS_REORG_BUCKETS_MAX];
  1351. #endif
  1352. VOID
  1353. MiRearrangeWorkingSetExpansionList (
  1354. VOID
  1355. )
  1356. /*++
  1357. Routine Description:
  1358. This function arranges the working set list into different
  1359. groups based upon the claim. This is done so the working set
  1360. trimming will take place on fat processes first.
  1361. The working sets are sorted into buckets and then linked back up.
  1362. Swapped out sessions and processes are put at the front.
  1363. Arguments:
  1364. None.
  1365. Return Value:
  1366. None.
  1367. Environment:
  1368. Kernel mode, no locks held.
  1369. --*/
  1370. {
  1371. KIRQL OldIrql;
  1372. PLIST_ENTRY ListEntry;
  1373. PMMSUPPORT VmSupport;
  1374. int Size;
  1375. int PreviousNonEmpty;
  1376. int NonEmpty;
  1377. LIST_ENTRY ListHead[MM_WS_REORG_BUCKETS_MAX];
  1378. LARGE_INTEGER CurrentTime;
  1379. LARGE_INTEGER SessionIdleTime;
  1380. ULONG IdleTime;
  1381. PMM_SESSION_SPACE SessionGlobal;
  1382. KeQuerySystemTime (&CurrentTime);
  1383. if (IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  1384. return;
  1385. }
  1386. for (Size = 0 ; Size < MM_WS_REORG_BUCKETS_MAX; Size++) {
  1387. InitializeListHead (&ListHead[Size]);
  1388. }
  1389. LOCK_EXPANSION (OldIrql);
  1390. while (!IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  1391. ListEntry = RemoveHeadList (&MmWorkingSetExpansionHead.ListHead);
  1392. VmSupport = CONTAINING_RECORD(ListEntry,
  1393. MMSUPPORT,
  1394. WorkingSetExpansionLinks);
  1395. if (VmSupport->Flags.TrimHard == 1) {
  1396. ASSERT (VmSupport->Flags.SessionSpace == 1);
  1397. SessionGlobal = CONTAINING_RECORD (VmSupport,
  1398. MM_SESSION_SPACE,
  1399. Vm);
  1400. SessionIdleTime.QuadPart = CurrentTime.QuadPart - SessionGlobal->LastProcessSwappedOutTime.QuadPart;
  1401. #if DBG
  1402. if (MmDebug & MM_DBG_SESSIONS) {
  1403. DbgPrint ("Mm: Session %d heavily trim/aged - all its processes (%d) swapped out %d seconds ago\n",
  1404. SessionGlobal->SessionId,
  1405. SessionGlobal->ReferenceCount,
  1406. (ULONG)(SessionIdleTime.QuadPart / 10000000));
  1407. }
  1408. #endif
  1409. if (SessionIdleTime.QuadPart < 0) {
  1410. //
  1411. // The administrator has moved the system time backwards.
  1412. // Give this session a fresh start.
  1413. //
  1414. SessionIdleTime.QuadPart = 0;
  1415. KeQuerySystemTime (&SessionGlobal->LastProcessSwappedOutTime);
  1416. }
  1417. IdleTime = (ULONG) (SessionIdleTime.QuadPart / 10000000);
  1418. }
  1419. else {
  1420. IdleTime = 0;
  1421. }
  1422. if (VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND) {
  1423. //
  1424. // Put the foreground processes at the end of the list,
  1425. // to give them priority.
  1426. //
  1427. Size = 6;
  1428. }
  1429. else {
  1430. if (VmSupport->Claim > 400) {
  1431. Size = 0;
  1432. }
  1433. else if (IdleTime > 30) {
  1434. Size = 0;
  1435. #if DBG
  1436. MiSessionIdleBuckets[Size] += 1;
  1437. #endif
  1438. }
  1439. else if (VmSupport->Claim > 200) {
  1440. Size = 1;
  1441. }
  1442. else if (IdleTime > 20) {
  1443. Size = 1;
  1444. #if DBG
  1445. MiSessionIdleBuckets[Size] += 1;
  1446. #endif
  1447. }
  1448. else if (VmSupport->Claim > 100) {
  1449. Size = 2;
  1450. }
  1451. else if (IdleTime > 10) {
  1452. Size = 2;
  1453. #if DBG
  1454. MiSessionIdleBuckets[Size] += 1;
  1455. #endif
  1456. }
  1457. else if (VmSupport->Claim > 50) {
  1458. Size = 3;
  1459. }
  1460. else if (IdleTime) {
  1461. Size = 3;
  1462. #if DBG
  1463. MiSessionIdleBuckets[Size] += 1;
  1464. #endif
  1465. }
  1466. else if (VmSupport->Claim > 25) {
  1467. Size = 4;
  1468. }
  1469. else {
  1470. Size = 5;
  1471. #if DBG
  1472. if (VmSupport->Flags.SessionSpace == 1) {
  1473. MiSessionIdleBuckets[Size] += 1;
  1474. }
  1475. #endif
  1476. }
  1477. }
  1478. #if DBG
  1479. if (MmDebug & MM_DBG_WS_EXPANSION) {
  1480. DbgPrint("MM-rearrange: TrimHard = %d, WS Size = 0x%x, Claim 0x%x, Bucket %d\n",
  1481. VmSupport->Flags.TrimHard,
  1482. VmSupport->WorkingSetSize,
  1483. VmSupport->Claim,
  1484. Size);
  1485. }
  1486. #endif //DBG
  1487. //
  1488. // Note: this reverses the bucket order each time we
  1489. // reorganize the lists. This may be good or bad -
  1490. // if you change it you may want to think about it.
  1491. //
  1492. InsertHeadList (&ListHead[Size],
  1493. &VmSupport->WorkingSetExpansionLinks);
  1494. }
  1495. //
  1496. // Find the first non-empty list.
  1497. //
  1498. for (NonEmpty = 0 ; NonEmpty < MM_WS_REORG_BUCKETS_MAX ; NonEmpty += 1) {
  1499. if (!IsListEmpty (&ListHead[NonEmpty])) {
  1500. break;
  1501. }
  1502. }
  1503. //
  1504. // Put the head of first non-empty list at the beginning
  1505. // of the MmWorkingSetExpansion list.
  1506. //
  1507. MmWorkingSetExpansionHead.ListHead.Flink = ListHead[NonEmpty].Flink;
  1508. ListHead[NonEmpty].Flink->Blink = &MmWorkingSetExpansionHead.ListHead;
  1509. PreviousNonEmpty = NonEmpty;
  1510. //
  1511. // Link the rest of the lists together.
  1512. //
  1513. for (NonEmpty += 1; NonEmpty < MM_WS_REORG_BUCKETS_MAX; NonEmpty += 1) {
  1514. if (!IsListEmpty (&ListHead[NonEmpty])) {
  1515. ListHead[PreviousNonEmpty].Blink->Flink = ListHead[NonEmpty].Flink;
  1516. ListHead[NonEmpty].Flink->Blink = ListHead[PreviousNonEmpty].Blink;
  1517. PreviousNonEmpty = NonEmpty;
  1518. }
  1519. }
  1520. //
  1521. // Link the tail of last non-empty to the MmWorkingSetExpansion list.
  1522. //
  1523. MmWorkingSetExpansionHead.ListHead.Blink = ListHead[PreviousNonEmpty].Blink;
  1524. ListHead[PreviousNonEmpty].Blink->Flink = &MmWorkingSetExpansionHead.ListHead;
  1525. UNLOCK_EXPANSION (OldIrql);
  1526. return;
  1527. }
  1528. VOID
  1529. MmEmptyAllWorkingSets (
  1530. VOID
  1531. )
  1532. /*++
  1533. Routine Description:
  1534. This routine attempts to empty all the working sets on the
  1535. expansion list.
  1536. Arguments:
  1537. None.
  1538. Return Value:
  1539. None.
  1540. Environment:
  1541. Kernel mode. No locks held. APC level or below.
  1542. --*/
  1543. {
  1544. KIRQL OldIrql;
  1545. ASSERT (KeGetCurrentIrql () <= APC_LEVEL);
  1546. ASSERT (PsGetCurrentThread () != MmWorkingSetThread);
  1547. //
  1548. // For session working sets, we cannot attach directly to the session
  1549. // space to be trimmed because it would result in session space
  1550. // references by other threads in this process to the attached session
  1551. // instead of the (currently) correct one. In fact, we cannot even queue
  1552. // this to a worker thread because the working set manager
  1553. // (who shares the same page directory) may be attaching or
  1554. // detaching from a session (any session). So this must be queued
  1555. // to the working set manager.
  1556. //
  1557. LOCK_EXPANSION (OldIrql);
  1558. if (MiWaitingForWorkingSetEmpty == FALSE) {
  1559. MiWaitingForWorkingSetEmpty = TRUE;
  1560. KeClearEvent (&MiWaitForEmptyEvent);
  1561. }
  1562. UNLOCK_EXPANSION (OldIrql);
  1563. KeSetEvent (&MmWorkingSetManagerEvent, 0, FALSE);
  1564. KeWaitForSingleObject (&MiWaitForEmptyEvent,
  1565. WrVirtualMemory,
  1566. KernelMode,
  1567. FALSE,
  1568. (PLARGE_INTEGER)0);
  1569. return;
  1570. }
  1571. //
  1572. // This is deliberately initialized to 1 and only cleared when we have
  1573. // initialized enough of the system working set to support a trim.
  1574. //
  1575. LONG MiTrimInProgressCount = 1;
  1576. ULONG MiTrimAllPageFaultCount;
  1577. LOGICAL
  1578. MmTrimAllSystemPagableMemory (
  1579. IN LOGICAL PurgeTransition
  1580. )
  1581. /*++
  1582. Routine Description:
  1583. This routine unmaps all pagable system memory. This does not unmap user
  1584. memory or locked down kernel memory. Thus, the memory being unmapped
  1585. resides in paged pool, pagable kernel/driver code & data, special pool
  1586. and the system cache.
  1587. Note that pages with a reference count greater than 1 are skipped (ie:
  1588. they remain valid, as they are assumed to be locked down). This prevents
  1589. us from unmapping all of the system cache entries, etc.
  1590. Non-locked down kernel stacks must be outpaged by modifying the balance
  1591. set manager to operate in conjunction with a support routine. This is not
  1592. done here.
  1593. Arguments:
  1594. PurgeTransition - Supplies whether to purge all the clean pages from the
  1595. transition list.
  1596. Return Value:
  1597. TRUE if accomplished, FALSE if not.
  1598. Environment:
  1599. Kernel mode. APC_LEVEL or below.
  1600. --*/
  1601. {
  1602. return MiTrimAllSystemPagableMemory (MI_SYSTEM_GLOBAL, PurgeTransition);
  1603. }
  1604. #if DBG
  1605. LOGICAL
  1606. MmTrimProcessMemory (
  1607. IN LOGICAL PurgeTransition
  1608. )
  1609. /*++
  1610. Routine Description:
  1611. This routine unmaps all of the current process' user memory.
  1612. Arguments:
  1613. PurgeTransition - Supplies whether to purge all the clean pages from the
  1614. transition list.
  1615. Return Value:
  1616. TRUE if accomplished, FALSE if not.
  1617. Environment:
  1618. Kernel mode. APC_LEVEL or below.
  1619. --*/
  1620. {
  1621. return MiTrimAllSystemPagableMemory (MI_USER_LOCAL, PurgeTransition);
  1622. }
  1623. #endif
  1624. LOGICAL
  1625. MiTrimAllSystemPagableMemory (
  1626. IN ULONG MemoryType,
  1627. IN LOGICAL PurgeTransition
  1628. )
  1629. /*++
  1630. Routine Description:
  1631. This routine unmaps all pagable memory of the type specified.
  1632. Note that pages with a reference count greater than 1 are skipped (ie:
  1633. they remain valid, as they are assumed to be locked down). This prevents
  1634. us from unmapping all of the system cache entries, etc.
  1635. Non-locked down kernel stacks must be outpaged by modifying the balance
  1636. set manager to operate in conjunction with a support routine. This is not
  1637. done here.
  1638. Arguments:
  1639. MemoryType - Supplies the type of memory to unmap.
  1640. PurgeTransition - Supplies whether to purge all the clean pages from the
  1641. transition list.
  1642. Return Value:
  1643. TRUE if accomplished, FALSE if not.
  1644. Environment:
  1645. Kernel mode. APC_LEVEL or below.
  1646. --*/
  1647. {
  1648. LOGICAL Status;
  1649. KIRQL OldIrql;
  1650. PMMSUPPORT VmSupport;
  1651. WSLE_NUMBER PagesInUse;
  1652. LOGICAL LockAvailable;
  1653. PETHREAD CurrentThread;
  1654. PEPROCESS Process;
  1655. PMM_SESSION_SPACE SessionGlobal;
  1656. #if defined(_X86_)
  1657. ULONG flags;
  1658. #endif
  1659. //
  1660. // It's ok to check this without acquiring the system WS lock.
  1661. //
  1662. if (MemoryType == MI_SYSTEM_GLOBAL) {
  1663. if (MiTrimAllPageFaultCount == MmSystemCacheWs.PageFaultCount) {
  1664. return FALSE;
  1665. }
  1666. }
  1667. else if (MemoryType == MI_USER_LOCAL) {
  1668. }
  1669. else {
  1670. ASSERT (MemoryType == MI_SESSION_LOCAL);
  1671. }
  1672. //
  1673. // Working set mutexes will be acquired which require APC_LEVEL or below.
  1674. //
  1675. if (KeGetCurrentIrql () > APC_LEVEL) {
  1676. return FALSE;
  1677. }
  1678. //
  1679. // Just return if it's too early during system initialization or if
  1680. // another thread/processor is racing here to do the work for us.
  1681. //
  1682. if (InterlockedIncrement (&MiTrimInProgressCount) > 1) {
  1683. InterlockedDecrement (&MiTrimInProgressCount);
  1684. return FALSE;
  1685. }
  1686. #if defined(_X86_)
  1687. _asm {
  1688. pushfd
  1689. pop eax
  1690. mov flags, eax
  1691. }
  1692. if ((flags & EFLAGS_INTERRUPT_MASK) == 0) {
  1693. InterlockedDecrement (&MiTrimInProgressCount);
  1694. return FALSE;
  1695. }
  1696. #endif
  1697. #if defined(_AMD64_)
  1698. if ((GetCallersEflags () & EFLAGS_IF_MASK) == 0) {
  1699. InterlockedDecrement (&MiTrimInProgressCount);
  1700. return FALSE;
  1701. }
  1702. #endif
  1703. CurrentThread = PsGetCurrentThread ();
  1704. //
  1705. // Don't acquire mutexes if the thread is at priority 0 (ie: zeropage
  1706. // thread) because this priority is not boosted - so a preemption that
  1707. // occurs after a WS mutex is acquired can result in the thread never
  1708. // running again and then all the other threads will be denied the mutex.
  1709. //
  1710. if (CurrentThread->Tcb.Priority == 0) {
  1711. InterlockedDecrement (&MiTrimInProgressCount);
  1712. return FALSE;
  1713. }
  1714. //
  1715. // If the WS mutex is not readily available then just return.
  1716. //
  1717. if (MemoryType == MI_SYSTEM_GLOBAL) {
  1718. Process = NULL;
  1719. VmSupport = &MmSystemCacheWs;
  1720. if (KeTryToAcquireGuardedMutex (&VmSupport->WorkingSetMutex) == FALSE) {
  1721. InterlockedDecrement (&MiTrimInProgressCount);
  1722. return FALSE;
  1723. }
  1724. MM_SYSTEM_WS_LOCK_TIMESTAMP ();
  1725. }
  1726. else if (MemoryType == MI_USER_LOCAL) {
  1727. Process = PsGetCurrentProcessByThread (CurrentThread);
  1728. VmSupport = &Process->Vm;
  1729. if (KeTryToAcquireGuardedMutex (&VmSupport->WorkingSetMutex) == FALSE) {
  1730. InterlockedDecrement (&MiTrimInProgressCount);
  1731. return FALSE;
  1732. }
  1733. LOCK_WS_TIMESTAMP (Process);
  1734. //
  1735. // If the process is exiting then just return.
  1736. //
  1737. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1738. UNLOCK_WS (Process);
  1739. InterlockedDecrement (&MiTrimInProgressCount);
  1740. return FALSE;
  1741. }
  1742. ASSERT (!MI_IS_WS_UNSAFE(Process));
  1743. }
  1744. else {
  1745. ASSERT (MemoryType == MI_SESSION_LOCAL);
  1746. Process = PsGetCurrentProcessByThread (CurrentThread);
  1747. if (((Process->Flags & PS_PROCESS_FLAGS_IN_SESSION) == 0) ||
  1748. (Process->Vm.Flags.SessionLeader == 1)) {
  1749. InterlockedDecrement (&MiTrimInProgressCount);
  1750. return FALSE;
  1751. }
  1752. SessionGlobal = SESSION_GLOBAL (MmSessionSpace);
  1753. //
  1754. // If the WS mutex is not readily available then just return.
  1755. //
  1756. VmSupport = &SessionGlobal->Vm;
  1757. if (KeTryToAcquireGuardedMutex (&VmSupport->WorkingSetMutex) == FALSE) {
  1758. InterlockedDecrement (&MiTrimInProgressCount);
  1759. return FALSE;
  1760. }
  1761. }
  1762. Status = FALSE;
  1763. //
  1764. // If the expansion lock is not available then just return.
  1765. //
  1766. LockAvailable = KeTryToAcquireSpinLock (&MmExpansionLock, &OldIrql);
  1767. if (LockAvailable == FALSE) {
  1768. goto Bail;
  1769. }
  1770. MM_SET_EXPANSION_OWNER ();
  1771. if (VmSupport->WorkingSetExpansionLinks.Flink <= MM_WS_SWAPPED_OUT) {
  1772. UNLOCK_EXPANSION (OldIrql);
  1773. goto Bail;
  1774. }
  1775. RemoveEntryList (&VmSupport->WorkingSetExpansionLinks);
  1776. VmSupport->WorkingSetExpansionLinks.Flink = MM_WS_TRIMMING;
  1777. VmSupport->WorkingSetExpansionLinks.Blink = NULL;
  1778. if (MemoryType == MI_SYSTEM_GLOBAL) {
  1779. MiTrimAllPageFaultCount = VmSupport->PageFaultCount;
  1780. }
  1781. PagesInUse = VmSupport->WorkingSetSize;
  1782. //
  1783. // There are 2 issues here that are carefully dealt with :
  1784. //
  1785. // 1. APCs must be disabled while any resources are held to prevent
  1786. // suspend APCs from deadlocking the system.
  1787. //
  1788. // 2. Once the working set has been marked MM_WS_TRIMMING,
  1789. // either the thread must not be preempted or the working
  1790. // set mutex must be held throughout. Otherwise a high priority thread
  1791. // can fault on a system code and data address and the two pages will
  1792. // thrash forever (at high priority) because no system working set
  1793. // expansion is allowed while TRIMMING is set.
  1794. //
  1795. // Thus, the decision was to hold the working set mutex throughout.
  1796. //
  1797. UNLOCK_EXPANSION (OldIrql);
  1798. MiEmptyWorkingSet (VmSupport, FALSE);
  1799. LOCK_EXPANSION (OldIrql);
  1800. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_WS_TRIMMING);
  1801. if (VmSupport->WorkingSetExpansionLinks.Blink == NULL) {
  1802. //
  1803. // Reinsert this working set at the tail of the list.
  1804. //
  1805. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  1806. &VmSupport->WorkingSetExpansionLinks);
  1807. }
  1808. else {
  1809. //
  1810. // The process is terminating - the value in the blink
  1811. // is the address of an event to set.
  1812. //
  1813. ASSERT (VmSupport != &MmSystemCacheWs);
  1814. VmSupport->WorkingSetExpansionLinks.Flink = MM_WS_NOT_LISTED;
  1815. KeSetEvent ((PKEVENT)VmSupport->WorkingSetExpansionLinks.Blink,
  1816. 0,
  1817. FALSE);
  1818. }
  1819. UNLOCK_EXPANSION (OldIrql);
  1820. Status = TRUE;
  1821. Bail:
  1822. UNLOCK_WORKING_SET (VmSupport);
  1823. ASSERT (KeGetCurrentIrql() <= APC_LEVEL);
  1824. if ((PurgeTransition == TRUE) && (Status == TRUE)) {
  1825. MiPurgeTransitionList ();
  1826. }
  1827. InterlockedDecrement (&MiTrimInProgressCount);
  1828. return Status;
  1829. }