Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2815 lines
73 KiB

  1. /*++
  2. Copyright (c) 1990 Microsoft Corporation
  3. Module Name:
  4. wsmanage.c
  5. Abstract:
  6. This module contains routines which manage the set of active working
  7. set lists.
  8. Working set management is accomplished by a parallel group of actions
  9. 1. Writing modified pages.
  10. 2. Trimming working sets by :
  11. a) Aging pages by turning off access bits and incrementing age
  12. counts for pages which haven't been accessed.
  13. b) Estimating the number of unused pages in a working set and
  14. keeping a global count of that estimate.
  15. c) When getting tight on memory, replacing rather than adding
  16. pages in a working set when a fault occurs in a working set
  17. that has a significant proportion of unused pages.
  18. d) When memory is tight, reducing (trimming) working sets which
  19. are above their maximum towards their minimum. This is done
  20. especially if there are a large number of available pages
  21. in it.
  22. The metrics are set such that writing modified pages is typically
  23. accomplished before trimming working sets, however, under certain cases
  24. where modified pages are being generated at a very high rate, working
  25. set trimming will be initiated to free up more pages.
  26. When the first thread in a process is created, the memory management
  27. system is notified that working set expansion is allowed. This
  28. is noted by changing the FLINK field of the WorkingSetExpansionLink
  29. entry in the process control block from MM_NO_WS_EXPANSION to
  30. MM_ALLOW_WS_EXPANSION. As threads fault, the working set is eligible
  31. for expansion if ample pages exist (MmAvailablePages is high enough).
  32. Once a process has had its working set raised above the minimum
  33. specified, the process is put on the Working Set Expanded list and
  34. is now eligible for trimming. Note that at this time the FLINK field
  35. in the WorkingSetExpansionLink has an address value.
  36. When working set trimming is initiated, a process is removed from the
  37. list (the expansion lock guards this list) and the FLINK field is set
  38. to MM_NO_WS_EXPANSION, also, the BLINK field is set to
  39. MM_WS_EXPANSION_IN_PROGRESS. The BLINK field value indicates to
  40. the MmCleanUserAddressSpace function that working set trimming is
  41. in progress for this process and it should wait until it completes.
  42. This is accomplished by creating an event, putting the address of the
  43. event in the BLINK field and then releasing the expansion lock and
  44. waiting on the event atomically. When working set trimming is
  45. complete, the BLINK field is no longer MM_EXPANSION_IN_PROGRESS
  46. indicating that the event should be set.
  47. Author:
  48. Lou Perazzoli (loup) 10-Apr-1990
  49. Landy Wang (landyw) 02-Jun-1997
  50. Revision History:
  51. --*/
  52. #include "mi.h"
  53. #ifdef ALLOC_PRAGMA
  54. #pragma alloc_text(INIT, MiAdjustWorkingSetManagerParameters)
  55. #pragma alloc_text(PAGE, MmIsMemoryAvailable)
  56. #pragma alloc_text(PAGEVRFY, MmTrimSessionMemory)
  57. #if !DBG
  58. #pragma alloc_text(PAGEVRFY, MmTrimProcessMemory)
  59. #endif
  60. #endif
  61. KEVENT MiWaitForEmptyEvent;
  62. BOOLEAN MiWaitingForWorkingSetEmpty;
  63. LOGICAL MiReplacing = FALSE;
  64. extern ULONG MiStandbyRemoved;
  65. PFN_NUMBER MmMoreThanEnoughFreePages = 1000;
  66. #define MI_MAXIMUM_AGING_SHIFT 7
  67. ULONG MiAgingShift = 4;
  68. ULONG MiEstimationShift = 5;
  69. PFN_NUMBER MmTotalClaim = 0;
  70. PFN_NUMBER MmTotalEstimatedAvailable = 0;
  71. LARGE_INTEGER MiLastAdjustmentOfClaimParams;
  72. //
  73. // Sixty seconds.
  74. //
  75. const LARGE_INTEGER MmClaimParameterAdjustUpTime = {60 * 1000 * 1000 * 10, 0};
  76. //
  77. // 2 seconds.
  78. //
  79. const LARGE_INTEGER MmClaimParameterAdjustDownTime = {2 * 1000 * 1000 * 10, 0};
  80. ULONG MmPlentyFreePages = 400;
  81. ULONG MmEnormousFreePages = 20000;
  82. ULONG MmPlentyFreePagesValue;
  83. LOGICAL MiHardTrim = FALSE;
  84. WSLE_NUMBER MiMaximumWslesPerSweep = (1024 * 1024 * 1024) / PAGE_SIZE;
  85. #define MI_MAXIMUM_SAMPLE 8192
  86. #define MI_MINIMUM_SAMPLE 64
  87. #define MI_MINIMUM_SAMPLE_SHIFT 7
  88. #if DBG
  89. PETHREAD MmWorkingSetThread;
  90. #endif
  91. //
  92. // Number of times to retry when the target working set's mutex is not
  93. // readily available.
  94. //
  95. ULONG MiWsRetryCount = 5;
  96. typedef struct _MMWS_TRIM_CRITERIA {
  97. ULONG NumPasses;
  98. ULONG TrimAge;
  99. PFN_NUMBER DesiredFreeGoal;
  100. PFN_NUMBER NewTotalClaim;
  101. PFN_NUMBER NewTotalEstimatedAvailable;
  102. LOGICAL DoAging;
  103. LOGICAL TrimAllPasses;
  104. } MMWS_TRIM_CRITERIA, *PMMWS_TRIM_CRITERIA;
  105. LOGICAL
  106. MiCheckAndSetSystemTrimCriteria (
  107. IN OUT PMMWS_TRIM_CRITERIA Criteria
  108. );
  109. LOGICAL
  110. MiCheckSystemTrimEndCriteria (
  111. IN OUT PMMWS_TRIM_CRITERIA Criteria,
  112. IN KIRQL OldIrql
  113. );
  114. WSLE_NUMBER
  115. MiDetermineWsTrimAmount (
  116. IN PMMWS_TRIM_CRITERIA Criteria,
  117. IN PMMSUPPORT VmSupport
  118. );
  119. VOID
  120. MiAgePagesAndEstimateClaims (
  121. LOGICAL EmptyIt
  122. );
  123. VOID
  124. MiAdjustClaimParameters (
  125. IN LOGICAL EnoughPages
  126. );
  127. VOID
  128. MiRearrangeWorkingSetExpansionList (
  129. VOID
  130. );
  131. VOID
  132. MiAdjustWorkingSetManagerParameters (
  133. IN LOGICAL WorkStation
  134. )
  135. /*++
  136. Routine Description:
  137. This function is called from MmInitSystem to adjust the working set manager
  138. trim algorithms based on system type and size.
  139. Arguments:
  140. WorkStation - TRUE if this is a workstation, FALSE if not.
  141. Return Value:
  142. None.
  143. Environment:
  144. Kernel mode, INIT time only.
  145. --*/
  146. {
  147. if (WorkStation && MmNumberOfPhysicalPages <= 257*1024*1024/PAGE_SIZE) {
  148. MiAgingShift = 3;
  149. MiEstimationShift = 4;
  150. }
  151. else {
  152. MiAgingShift = 5;
  153. MiEstimationShift = 6;
  154. }
  155. if (MmNumberOfPhysicalPages >= 63*1024*1024/PAGE_SIZE) {
  156. MmPlentyFreePages *= 2;
  157. }
  158. MmPlentyFreePagesValue = MmPlentyFreePages;
  159. MiWaitingForWorkingSetEmpty = FALSE;
  160. KeInitializeEvent (&MiWaitForEmptyEvent, NotificationEvent, TRUE);
  161. }
  162. VOID
  163. MiObtainFreePages (
  164. VOID
  165. )
  166. /*++
  167. Routine Description:
  168. This function examines the size of the modified list and the
  169. total number of pages in use because of working set increments
  170. and obtains pages by writing modified pages and/or reducing
  171. working sets.
  172. Arguments:
  173. None.
  174. Return Value:
  175. None.
  176. Environment:
  177. Kernel mode, APCs disabled, working set and PFN mutexes held.
  178. --*/
  179. {
  180. //
  181. // Check to see if there are enough modified pages to institute a
  182. // write.
  183. //
  184. if (MmModifiedPageListHead.Total >= MmModifiedWriteClusterSize) {
  185. //
  186. // Start the modified page writer.
  187. //
  188. KeSetEvent (&MmModifiedPageWriterEvent, 0, FALSE);
  189. }
  190. //
  191. // See if there are enough working sets above the minimum
  192. // threshold to make working set trimming worthwhile.
  193. //
  194. if ((MmPagesAboveWsMinimum > MmPagesAboveWsThreshold) ||
  195. (MmAvailablePages < 5)) {
  196. //
  197. // Start the working set manager to reduce working sets.
  198. //
  199. KeSetEvent (&MmWorkingSetManagerEvent, 0, FALSE);
  200. }
  201. }
  202. LOGICAL
  203. MmIsMemoryAvailable (
  204. IN ULONG PagesDesired
  205. )
  206. /*++
  207. Routine Description:
  208. This function checks whether there are sufficient available pages based
  209. on the caller's request. If currently active pages are needed to satisfy
  210. this request and non-useful ones can be taken, then trimming is initiated
  211. here to do so.
  212. Arguments:
  213. PagesRequested - Supplies the number of pages desired.
  214. Return Value:
  215. TRUE if sufficient pages exist to satisfy the request.
  216. FALSE if not.
  217. Environment:
  218. Kernel mode, PASSIVE_LEVEL.
  219. --*/
  220. {
  221. LOGICAL Status;
  222. ULONG PageTarget;
  223. ULONG PagePlentyTarget;
  224. ULONG i;
  225. ULONG CurrentAvailablePages;
  226. PFN_NUMBER CurrentTotalClaim;
  227. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  228. CurrentAvailablePages = MmAvailablePages;
  229. //
  230. // If twice the pages that the caller asked for are available
  231. // without trimming anything, return TRUE.
  232. //
  233. PageTarget = PagesDesired * 2;
  234. if (CurrentAvailablePages >= PageTarget) {
  235. return TRUE;
  236. }
  237. CurrentTotalClaim = MmTotalClaim;
  238. //
  239. // If there are few pages available or claimable, we adjust to do
  240. // a hard trim.
  241. //
  242. if (CurrentAvailablePages + CurrentTotalClaim < PagesDesired) {
  243. MiHardTrim = TRUE;
  244. }
  245. //
  246. // Active pages must be trimmed to satisfy this request and it is believed
  247. // that non-useful pages can be taken to accomplish this.
  248. //
  249. // Set the PagePlentyTarget to 125% of the readlist size and kick it off.
  250. // Our actual trim goal will be 150% of the PagePlentyTarget.
  251. //
  252. PagePlentyTarget = PagesDesired + (PagesDesired >> 2);
  253. MmPlentyFreePages = PagePlentyTarget;
  254. KeSetEvent (&MmWorkingSetManagerEvent, 0, FALSE);
  255. Status = FALSE;
  256. for (i = 0; i < 10; i += 1) {
  257. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&Mm30Milliseconds);
  258. if (MmAvailablePages >= PagesDesired) {
  259. Status = TRUE;
  260. break;
  261. }
  262. }
  263. MmPlentyFreePages = MmPlentyFreePagesValue;
  264. MiHardTrim = FALSE;
  265. return Status;
  266. }
  267. LOGICAL
  268. MiAttachAndLockWorkingSet (
  269. IN PMMSUPPORT VmSupport,
  270. IN PLOGICAL InformSessionOfRelease
  271. )
  272. /*++
  273. Routine Description:
  274. This function attaches to the proper address space and acquires the
  275. relevant working set mutex for the address space being trimmed.
  276. If successful, this routine returns with APCs blocked as well.
  277. On failure, this routine returns without any APCs blocked, no working
  278. set mutex acquired and no address space attached to.
  279. Arguments:
  280. VmSupport - Supplies the working set to attach to and lock.
  281. Return Value:
  282. TRUE on success.
  283. FALSE if not.
  284. Environment:
  285. Kernel mode, PASSIVE_LEVEL.
  286. --*/
  287. {
  288. ULONG count;
  289. KIRQL OldIrql;
  290. PEPROCESS ProcessToTrim;
  291. LOGICAL Attached;
  292. PMM_SESSION_SPACE SessionSpace;
  293. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  294. *InformSessionOfRelease = FALSE;
  295. if (VmSupport == &MmSystemCacheWs) {
  296. ASSERT (VmSupport->Flags.SessionSpace == 0);
  297. ASSERT (VmSupport->Flags.TrimHard == 0);
  298. //
  299. // System cache,
  300. //
  301. KeRaiseIrql (APC_LEVEL, &OldIrql);
  302. if (!ExTryToAcquireResourceExclusiveLite (&MmSystemWsLock)) {
  303. //
  304. // System working set mutex was not granted, don't trim
  305. // the system cache.
  306. //
  307. KeLowerIrql (OldIrql);
  308. return FALSE;
  309. }
  310. MmSystemLockOwner = PsGetCurrentThread ();
  311. return TRUE;
  312. }
  313. if (VmSupport->Flags.SessionSpace == 0) {
  314. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  315. ASSERT ((ProcessToTrim->Flags & PS_PROCESS_FLAGS_VM_DELETED) == 0);
  316. //
  317. // Attach to the process in preparation for trimming.
  318. //
  319. Attached = 0;
  320. if (ProcessToTrim != PsInitialSystemProcess) {
  321. Attached = KeForceAttachProcess (&ProcessToTrim->Pcb);
  322. if (Attached == 0) {
  323. return FALSE;
  324. }
  325. if (ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAP_ENABLED) {
  326. ASSERT ((ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAPPED) == 0);
  327. if ((ProcessToTrim->Flags & PS_PROCESS_FLAGS_IN_SESSION ) &&
  328. (VmSupport->Flags.SessionLeader == 0)) {
  329. *InformSessionOfRelease = TRUE;
  330. }
  331. }
  332. }
  333. //
  334. // Attempt to acquire the working set mutex. If the
  335. // lock cannot be acquired, skip over this process.
  336. //
  337. count = 0;
  338. do {
  339. if (ExTryToAcquireFastMutex(&ProcessToTrim->WorkingSetLock) != FALSE) {
  340. ASSERT (VmSupport->Flags.BeingTrimmed == 1);
  341. return TRUE;
  342. }
  343. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmShortTime);
  344. count += 1;
  345. } while (count < MiWsRetryCount);
  346. //
  347. // Could not get the lock, skip this process.
  348. //
  349. if (*InformSessionOfRelease == TRUE) {
  350. LOCK_EXPANSION (OldIrql);
  351. ASSERT (ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAP_ENABLED);
  352. PS_CLEAR_BITS (&ProcessToTrim->Flags, PS_PROCESS_FLAGS_OUTSWAP_ENABLED);
  353. ASSERT (MmSessionSpace->ProcessOutSwapCount >= 1);
  354. MmSessionSpace->ProcessOutSwapCount -= 1;
  355. UNLOCK_EXPANSION (OldIrql);
  356. *InformSessionOfRelease = FALSE;
  357. }
  358. if (Attached) {
  359. KeDetachProcess ();
  360. }
  361. return FALSE;
  362. }
  363. SessionSpace = CONTAINING_RECORD (VmSupport, MM_SESSION_SPACE, Vm);
  364. //
  365. // Attach directly to the session space to be trimmed.
  366. //
  367. MiAttachSession (SessionSpace);
  368. //
  369. // Try for the session working set mutex.
  370. //
  371. KeRaiseIrql (APC_LEVEL, &OldIrql);
  372. if (!ExTryToAcquireResourceExclusiveLite (&SessionSpace->WsLock)) {
  373. //
  374. // This session space's working set mutex was not
  375. // granted, don't trim it.
  376. //
  377. KeLowerIrql (OldIrql);
  378. MiDetachSession ();
  379. return FALSE;
  380. }
  381. MM_SET_SESSION_RESOURCE_OWNER (PsGetCurrentThread ());
  382. return TRUE;
  383. }
  384. VOID
  385. MiDetachAndUnlockWorkingSet (
  386. IN PMMSUPPORT VmSupport,
  387. IN LOGICAL InformSessionOfRelease
  388. )
  389. /*++
  390. Routine Description:
  391. This function detaches from the target address space and releases the
  392. relevant working set mutex for the address space that was trimmed.
  393. Arguments:
  394. VmSupport - Supplies the working set to detach from and unlock.
  395. Return Value:
  396. None.
  397. Environment:
  398. Kernel mode, APC_LEVEL.
  399. --*/
  400. {
  401. KIRQL OldIrql;
  402. PEPROCESS ProcessToTrim;
  403. ASSERT (KeGetCurrentIrql () == APC_LEVEL);
  404. if (VmSupport == &MmSystemCacheWs) {
  405. ASSERT (VmSupport->Flags.SessionSpace == 0);
  406. UNLOCK_SYSTEM_WS (PASSIVE_LEVEL);
  407. return;
  408. }
  409. if (VmSupport->Flags.SessionSpace == 0) {
  410. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  411. UNLOCK_WS (ProcessToTrim);
  412. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  413. if (InformSessionOfRelease == TRUE) {
  414. LOCK_EXPANSION (OldIrql);
  415. ASSERT (ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAP_ENABLED);
  416. PS_CLEAR_BITS (&ProcessToTrim->Flags, PS_PROCESS_FLAGS_OUTSWAP_ENABLED);
  417. ASSERT (MmSessionSpace->ProcessOutSwapCount >= 1);
  418. MmSessionSpace->ProcessOutSwapCount -= 1;
  419. UNLOCK_EXPANSION (OldIrql);
  420. }
  421. if (ProcessToTrim != PsInitialSystemProcess) {
  422. KeDetachProcess ();
  423. }
  424. return;
  425. }
  426. UNLOCK_SESSION_SPACE_WS (PASSIVE_LEVEL);
  427. MiDetachSession ();
  428. return;
  429. }
  430. VOID
  431. MmWorkingSetManager (
  432. VOID
  433. )
  434. /*++
  435. Routine Description:
  436. Implements the NT working set manager thread. When the number
  437. of free pages becomes critical and ample pages can be obtained by
  438. reducing working sets, the working set manager's event is set, and
  439. this thread becomes active.
  440. Arguments:
  441. None.
  442. Return Value:
  443. None.
  444. Environment:
  445. Kernel mode.
  446. --*/
  447. {
  448. PLIST_ENTRY ListEntry;
  449. WSLE_NUMBER Trim;
  450. KIRQL OldIrql;
  451. PMMSUPPORT VmSupport;
  452. LARGE_INTEGER CurrentTime;
  453. LOGICAL DoTrimming;
  454. LOGICAL InformSessionOfRelease;
  455. MMWS_TRIM_CRITERIA TrimCriteria;
  456. static ULONG Initialized = 0;
  457. PERFINFO_WSMANAGE_DECL();
  458. if (Initialized == 0) {
  459. PsGetCurrentThread()->MemoryMaker = 1;
  460. Initialized = 1;
  461. }
  462. #if DBG
  463. MmWorkingSetThread = PsGetCurrentThread ();
  464. #endif
  465. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  466. PERFINFO_WSMANAGE_CHECK();
  467. //
  468. // Set the trim criteria: If there are plenty of pages, the existing
  469. // sets are aged and FALSE is returned to signify no trim is necessary.
  470. // Otherwise, the working set expansion list is ordered so the best
  471. // candidates for trimming are placed at the front and TRUE is returned.
  472. //
  473. DoTrimming = MiCheckAndSetSystemTrimCriteria (&TrimCriteria);
  474. if (DoTrimming) {
  475. //
  476. // Clear the deferred entry list to free up some pages.
  477. //
  478. MiDeferredUnlockPages (0);
  479. KeQuerySystemTime (&CurrentTime);
  480. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  481. LOCK_EXPANSION (OldIrql);
  482. while (!IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  483. //
  484. // Remove the entry at the head and trim it.
  485. //
  486. ListEntry = RemoveHeadList (&MmWorkingSetExpansionHead.ListHead);
  487. VmSupport = CONTAINING_RECORD (ListEntry,
  488. MMSUPPORT,
  489. WorkingSetExpansionLinks);
  490. //
  491. // Note that other routines that set this bit must remove the
  492. // entry from the expansion list first.
  493. //
  494. ASSERT (VmSupport->Flags.BeingTrimmed == 0);
  495. //
  496. // Check to see if we've been here before.
  497. //
  498. if ((*(PLARGE_INTEGER)&VmSupport->LastTrimTime).QuadPart ==
  499. (*(PLARGE_INTEGER)&CurrentTime).QuadPart) {
  500. InsertHeadList (&MmWorkingSetExpansionHead.ListHead,
  501. &VmSupport->WorkingSetExpansionLinks);
  502. //
  503. // If we aren't finished we may sleep in this call.
  504. //
  505. if (MiCheckSystemTrimEndCriteria (&TrimCriteria, OldIrql)) {
  506. //
  507. // No more pages are needed so we're done.
  508. //
  509. break;
  510. }
  511. //
  512. // Start a new round of trimming.
  513. //
  514. KeQuerySystemTime (&CurrentTime);
  515. continue;
  516. }
  517. if ((VmSupport->WorkingSetSize > 3) ||
  518. ((VmSupport->Flags.TrimHard == 1) && (VmSupport->WorkingSetSize != 0))) {
  519. //
  520. // This working set is worth examining.
  521. //
  522. }
  523. else {
  524. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  525. &VmSupport->WorkingSetExpansionLinks);
  526. continue;
  527. }
  528. VmSupport->LastTrimTime = CurrentTime;
  529. VmSupport->Flags.BeingTrimmed = 1;
  530. VmSupport->WorkingSetExpansionLinks.Flink = MM_NO_WS_EXPANSION;
  531. VmSupport->WorkingSetExpansionLinks.Blink =
  532. MM_WS_EXPANSION_IN_PROGRESS;
  533. UNLOCK_EXPANSION (OldIrql);
  534. if (MiAttachAndLockWorkingSet (VmSupport, &InformSessionOfRelease) == FALSE) {
  535. LOCK_EXPANSION (OldIrql);
  536. VmSupport->Flags.AllowWorkingSetAdjustment = MM_FORCE_TRIM;
  537. goto DoneWithWorkingSet;
  538. }
  539. //
  540. // Determine how many pages to trim from this working set.
  541. //
  542. Trim = MiDetermineWsTrimAmount (&TrimCriteria, VmSupport);
  543. //
  544. // If there's something to trim...
  545. //
  546. if ((Trim != 0) &&
  547. ((TrimCriteria.TrimAllPasses > TrimCriteria.NumPasses) ||
  548. (MmAvailablePages < TrimCriteria.DesiredFreeGoal))) {
  549. //
  550. // We haven't reached our goal, so trim now.
  551. //
  552. PERFINFO_WSMANAGE_TOTRIM(Trim);
  553. Trim = MiTrimWorkingSet (Trim,
  554. VmSupport,
  555. TrimCriteria.TrimAge);
  556. PERFINFO_WSMANAGE_ACTUALTRIM(Trim);
  557. }
  558. //
  559. // Estimating the current claim is always done here by taking a
  560. // sample of the working set. Aging is only done if the trim
  561. // pass warrants it (ie: the first pass only).
  562. //
  563. MiAgeAndEstimateAvailableInWorkingSet (
  564. VmSupport,
  565. TrimCriteria.DoAging,
  566. NULL,
  567. &TrimCriteria.NewTotalClaim,
  568. &TrimCriteria.NewTotalEstimatedAvailable);
  569. MiDetachAndUnlockWorkingSet (VmSupport, InformSessionOfRelease);
  570. LOCK_EXPANSION (OldIrql);
  571. DoneWithWorkingSet:
  572. ASSERT (VmSupport->Flags.BeingTrimmed == 1);
  573. VmSupport->Flags.BeingTrimmed = 0;
  574. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_NO_WS_EXPANSION);
  575. if (VmSupport->WorkingSetExpansionLinks.Blink ==
  576. MM_WS_EXPANSION_IN_PROGRESS) {
  577. //
  578. // If the working set size is still above the minimum,
  579. // add this back at the tail of the list.
  580. //
  581. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  582. &VmSupport->WorkingSetExpansionLinks);
  583. }
  584. else {
  585. //
  586. // The value in the blink is the address of an event
  587. // to set.
  588. //
  589. ASSERT (VmSupport != &MmSystemCacheWs);
  590. KeSetEvent ((PKEVENT)VmSupport->WorkingSetExpansionLinks.Blink,
  591. 0,
  592. FALSE);
  593. }
  594. }
  595. MmTotalClaim = TrimCriteria.NewTotalClaim;
  596. MmTotalEstimatedAvailable = TrimCriteria.NewTotalEstimatedAvailable;
  597. PERFINFO_WSMANAGE_TRIMEND_CLAIMS(&TrimCriteria);
  598. UNLOCK_EXPANSION (OldIrql);
  599. }
  600. MiStandbyRemoved = 0;
  601. //
  602. // If memory is critical and there are modified pages to be written
  603. // (presumably because we've just trimmed them), then signal the
  604. // modified page writer.
  605. //
  606. if ((MmAvailablePages < MmMinimumFreePages) ||
  607. (MmModifiedPageListHead.Total >= MmModifiedPageMaximum)) {
  608. KeSetEvent (&MmModifiedPageWriterEvent, 0, FALSE);
  609. }
  610. return;
  611. }
  612. LOGICAL
  613. MiCheckAndSetSystemTrimCriteria (
  614. IN PMMWS_TRIM_CRITERIA Criteria
  615. )
  616. /*++
  617. Routine Description:
  618. Decide whether to trim, age or adjust claim estimations at this time.
  619. Arguments:
  620. Criteria - Supplies a pointer to the trim criteria information. Various
  621. fields in this structure are set as needed by this routine.
  622. Return Value:
  623. TRUE if the caller should initiate trimming, FALSE if not.
  624. Environment:
  625. Kernel mode. No locks held. APC level or below.
  626. --*/
  627. {
  628. KIRQL OldIrql;
  629. PFN_NUMBER Available;
  630. ULONG StandbyRemoved;
  631. ULONG WsRetryCount;
  632. PERFINFO_WSMANAGE_DECL();
  633. PERFINFO_WSMANAGE_CHECK();
  634. //
  635. // See if an empty-all-working-sets request has been queued to us.
  636. //
  637. WsRetryCount = MiWsRetryCount;
  638. if (MiWaitingForWorkingSetEmpty == TRUE) {
  639. MiWsRetryCount = 1;
  640. MiAgePagesAndEstimateClaims (TRUE);
  641. LOCK_EXPANSION (OldIrql);
  642. KeSetEvent (&MiWaitForEmptyEvent, 0, FALSE);
  643. MiWaitingForWorkingSetEmpty = FALSE;
  644. UNLOCK_EXPANSION (OldIrql);
  645. MiReplacing = FALSE;
  646. MiWsRetryCount = WsRetryCount;
  647. return FALSE;
  648. }
  649. //
  650. // Check the number of pages available to see if any trimming (or aging)
  651. // is really required.
  652. //
  653. Available = MmAvailablePages;
  654. StandbyRemoved = MiStandbyRemoved;
  655. if (StandbyRemoved != 0) {
  656. //
  657. // The value is nonzero, we need to synchronize so we get a coordinated
  658. // snapshot of both values.
  659. //
  660. LOCK_PFN (OldIrql);
  661. Available = MmAvailablePages;
  662. StandbyRemoved = MiStandbyRemoved;
  663. UNLOCK_PFN (OldIrql);
  664. }
  665. PERFINFO_WSMANAGE_STARTLOG_CLAIMS();
  666. //
  667. // If we've low on pages, or we've been replacing within a given
  668. // working set, or we've been cannibalizing a large number of standby
  669. // pages, then trim now.
  670. //
  671. if ((Available <= MmPlentyFreePages) ||
  672. (MiReplacing == TRUE) ||
  673. (StandbyRemoved >= (Available >> 2))) {
  674. //
  675. // Inform our caller to start trimming since we're below
  676. // plenty pages - order the list so the bigger working sets are
  677. // in front so our caller trims those first.
  678. //
  679. Criteria->NumPasses = 0;
  680. Criteria->DesiredFreeGoal = MmPlentyFreePages + (MmPlentyFreePages / 2);
  681. Criteria->NewTotalClaim = 0;
  682. Criteria->NewTotalEstimatedAvailable = 0;
  683. //
  684. // If more than 25% of the available pages were recycled standby
  685. // pages, then trim more aggresively in an attempt to get more of the
  686. // cold pages into standby for the next pass.
  687. //
  688. if (StandbyRemoved >= (Available >> 2)) {
  689. Criteria->TrimAllPasses = TRUE;
  690. }
  691. else {
  692. Criteria->TrimAllPasses = FALSE;
  693. }
  694. //
  695. // Start trimming the bigger working sets first.
  696. //
  697. MiRearrangeWorkingSetExpansionList ();
  698. #if DBG
  699. if (MmDebug & MM_DBG_WS_EXPANSION) {
  700. DbgPrint("\nMM-wsmanage: Desired = %ld, Avail %ld\n",
  701. Criteria->DesiredFreeGoal, MmAvailablePages);
  702. }
  703. #endif
  704. PERFINFO_WSMANAGE_WILLTRIM_CLAIMS(Criteria);
  705. //
  706. // No need to lock synchronize the MiReplacing clearing as it
  707. // gets set every time a page replacement happens anyway.
  708. //
  709. MiReplacing = FALSE;
  710. return TRUE;
  711. }
  712. //
  713. // If there is an overwhelming surplus of memory and this is a big
  714. // server then don't even bother aging at this point.
  715. //
  716. if (Available > MmEnormousFreePages) {
  717. //
  718. // Note the claim and estimated available are not cleared so they
  719. // may contain stale values, but at tihs level it doesn't really
  720. // matter.
  721. //
  722. return FALSE;
  723. }
  724. //
  725. // Don't trim but do age unused pages and estimate
  726. // the amount available in working sets.
  727. //
  728. MiAgePagesAndEstimateClaims (FALSE);
  729. MiAdjustClaimParameters (TRUE);
  730. PERFINFO_WSMANAGE_TRIMACTION (PERFINFO_WS_ACTION_RESET_COUNTER);
  731. PERFINFO_WSMANAGE_DUMPENTRIES_CLAIMS ();
  732. return FALSE;
  733. }
  734. LOGICAL
  735. MiCheckSystemTrimEndCriteria (
  736. IN PMMWS_TRIM_CRITERIA Criteria,
  737. IN KIRQL OldIrql
  738. )
  739. /*++
  740. Routine Description:
  741. Check the ending criteria. If we're not done, delay for a little
  742. bit to let the modified writes catch up.
  743. Arguments:
  744. Criteria - Supplies the trim criteria information.
  745. OldIrql - Supplies the old IRQL to lower to if the expansion lock needs
  746. to be released.
  747. Return Value:
  748. TRUE if trimming can be stopped, FALSE otherwise.
  749. Environment:
  750. Kernel mode. Expansion lock held. APC level or below.
  751. --*/
  752. {
  753. LOGICAL FinishedTrimming;
  754. PERFINFO_WSMANAGE_DECL();
  755. PERFINFO_WSMANAGE_CHECK();
  756. if ((MmAvailablePages > Criteria->DesiredFreeGoal) ||
  757. (Criteria->NumPasses >= MI_MAX_TRIM_PASSES)) {
  758. //
  759. // We have enough pages or we trimmed as many as we're going to get.
  760. //
  761. return TRUE;
  762. }
  763. //
  764. // Update the global claim and estimate before we wait.
  765. //
  766. MmTotalClaim = Criteria->NewTotalClaim;
  767. MmTotalEstimatedAvailable = Criteria->NewTotalEstimatedAvailable;
  768. //
  769. // We don't have enough pages - give the modified page writer
  770. // 10 milliseconds to catch up. The wait is also important because a
  771. // thread may have the system cache locked but has been preempted
  772. // by the balance set manager due to its higher priority. We must
  773. // give this thread a shot at running so it can release the system
  774. // cache lock (all the trimmable pages may reside in the system cache).
  775. //
  776. UNLOCK_EXPANSION (OldIrql);
  777. KeDelayExecutionThread (KernelMode,
  778. FALSE,
  779. (PLARGE_INTEGER)&MmShortTime);
  780. PERFINFO_WSMANAGE_WAITFORWRITER_CLAIMS();
  781. //
  782. // Check again to see if we've met the criteria to stop trimming.
  783. //
  784. if (MmAvailablePages > Criteria->DesiredFreeGoal) {
  785. //
  786. // Now we have enough pages so break out.
  787. //
  788. FinishedTrimming = TRUE;
  789. }
  790. else {
  791. //
  792. // We don't have enough pages so let's do another pass.
  793. // Go get the next working set list which is probably the
  794. // one we put back before we gave up the processor.
  795. //
  796. FinishedTrimming = FALSE;
  797. if (Criteria->NumPasses == 0) {
  798. MiAdjustClaimParameters (FALSE);
  799. }
  800. Criteria->NumPasses += 1;
  801. Criteria->NewTotalClaim = 0;
  802. Criteria->NewTotalEstimatedAvailable = 0;
  803. PERFINFO_WSMANAGE_TRIMACTION(PERFINFO_WS_ACTION_FORCE_TRIMMING_PROCESS);
  804. }
  805. LOCK_EXPANSION (OldIrql);
  806. return FinishedTrimming;
  807. }
  808. WSLE_NUMBER
  809. MiDetermineWsTrimAmount (
  810. PMMWS_TRIM_CRITERIA Criteria,
  811. PMMSUPPORT VmSupport
  812. )
  813. /*++
  814. Routine Description:
  815. Determine whether this process should be trimmed.
  816. Arguments:
  817. Criteria - Supplies the trim criteria information.
  818. VmSupport - Supplies the working set information for the candidate.
  819. Return Value:
  820. TRUE if trimming should be done on this process, FALSE if not.
  821. Environment:
  822. Kernel mode. Expansion lock held. APC level or below.
  823. --*/
  824. {
  825. PMMWSL WorkingSetList;
  826. WSLE_NUMBER MaxTrim;
  827. WSLE_NUMBER Trim;
  828. LOGICAL OutswapEnabled;
  829. PEPROCESS ProcessToTrim;
  830. PMM_SESSION_SPACE SessionSpace;
  831. WorkingSetList = VmSupport->VmWorkingSetList;
  832. MaxTrim = VmSupport->WorkingSetSize;
  833. if (MaxTrim <= WorkingSetList->FirstDynamic) {
  834. return 0;
  835. }
  836. OutswapEnabled = FALSE;
  837. if (VmSupport == &MmSystemCacheWs) {
  838. PERFINFO_WSMANAGE_TRIMWS (NULL, NULL, VmSupport);
  839. }
  840. else if (VmSupport->Flags.SessionSpace == 0) {
  841. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  842. if (ProcessToTrim->Flags & PS_PROCESS_FLAGS_OUTSWAP_ENABLED) {
  843. OutswapEnabled = TRUE;
  844. }
  845. PERFINFO_WSMANAGE_TRIMWS (ProcessToTrim, NULL, VmSupport);
  846. }
  847. else {
  848. if (VmSupport->Flags.TrimHard == 1) {
  849. OutswapEnabled = TRUE;
  850. }
  851. SessionSpace = CONTAINING_RECORD(VmSupport,
  852. MM_SESSION_SPACE,
  853. Vm);
  854. PERFINFO_WSMANAGE_TRIMWS (NULL, SessionSpace, VmSupport);
  855. }
  856. if (OutswapEnabled == FALSE) {
  857. //
  858. // Don't trim the cache or non-swapped sessions or processes
  859. // below their minimum.
  860. //
  861. MaxTrim -= VmSupport->MinimumWorkingSetSize;
  862. }
  863. switch (Criteria->NumPasses) {
  864. case 0:
  865. Trim = VmSupport->Claim >>
  866. ((VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND)
  867. ? MI_FOREGROUND_CLAIM_AVAILABLE_SHIFT
  868. : MI_BACKGROUND_CLAIM_AVAILABLE_SHIFT);
  869. Criteria->TrimAge = MI_PASS0_TRIM_AGE;
  870. Criteria->DoAging = TRUE;
  871. break;
  872. case 1:
  873. Trim = VmSupport->Claim >>
  874. ((VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND)
  875. ? MI_FOREGROUND_CLAIM_AVAILABLE_SHIFT
  876. : MI_BACKGROUND_CLAIM_AVAILABLE_SHIFT);
  877. Criteria->TrimAge = MI_PASS1_TRIM_AGE;
  878. Criteria->DoAging = FALSE;
  879. break;
  880. case 2:
  881. Trim = VmSupport->Claim;
  882. Criteria->TrimAge = MI_PASS2_TRIM_AGE;
  883. Criteria->DoAging = FALSE;
  884. break;
  885. case 3:
  886. Trim = VmSupport->EstimatedAvailable;
  887. Criteria->TrimAge = MI_PASS3_TRIM_AGE;
  888. Criteria->DoAging = FALSE;
  889. break;
  890. default:
  891. Trim = VmSupport->EstimatedAvailable;
  892. Criteria->TrimAge = MI_PASS3_TRIM_AGE;
  893. Criteria->DoAging = FALSE;
  894. if (MiHardTrim == TRUE || MmAvailablePages < MM_HIGH_LIMIT + 64) {
  895. if (VmSupport->WorkingSetSize > VmSupport->MinimumWorkingSetSize) {
  896. Trim = (VmSupport->WorkingSetSize - VmSupport->MinimumWorkingSetSize) >> 2;
  897. if (Trim == 0) {
  898. Trim = VmSupport->WorkingSetSize - VmSupport->MinimumWorkingSetSize;
  899. }
  900. }
  901. Criteria->TrimAge = MI_PASS4_TRIM_AGE;
  902. Criteria->DoAging = TRUE;
  903. }
  904. break;
  905. }
  906. if (Trim > MaxTrim) {
  907. Trim = MaxTrim;
  908. }
  909. #if DBG
  910. if ((MmDebug & MM_DBG_WS_EXPANSION) && (Trim != 0)) {
  911. if (VmSupport->Flags.SessionSpace == 0) {
  912. ProcessToTrim = CONTAINING_RECORD (VmSupport, EPROCESS, Vm);
  913. DbgPrint(" Trimming Process %16s, WS %6d, Trimming %5d ==> %5d\n",
  914. ProcessToTrim ? ProcessToTrim->ImageFileName : (PUCHAR)"System Cache",
  915. VmSupport->WorkingSetSize,
  916. Trim,
  917. VmSupport->WorkingSetSize-Trim);
  918. }
  919. else {
  920. SessionSpace = CONTAINING_RECORD (VmSupport,
  921. MM_SESSION_SPACE,
  922. Vm);
  923. DbgPrint(" Trimming Session 0x%x (id %d), WS %6d, Trimming %5d ==> %5d\n",
  924. SessionSpace,
  925. SessionSpace->SessionId,
  926. VmSupport->WorkingSetSize,
  927. Trim,
  928. VmSupport->WorkingSetSize-Trim);
  929. }
  930. }
  931. #endif
  932. return Trim;
  933. }
  934. VOID
  935. MiAgePagesAndEstimateClaims (
  936. LOGICAL EmptyIt
  937. )
  938. /*++
  939. Routine Description:
  940. Walk through the sets on the working set expansion list.
  941. Either age pages and estimate the claim (number of pages they aren't using),
  942. or empty the working set.
  943. Arguments:
  944. EmptyIt - Supplies TRUE to empty the working set,
  945. FALSE to just age and estimate it.
  946. Return Value:
  947. None.
  948. Environment:
  949. Kernel mode, APCs disabled. PFN lock NOT held.
  950. --*/
  951. {
  952. WSLE_NUMBER WslesScanned;
  953. PMMSUPPORT VmSupport;
  954. PMMSUPPORT FirstSeen;
  955. LOGICAL SystemCacheSeen;
  956. KIRQL OldIrql;
  957. PLIST_ENTRY ListEntry;
  958. PFN_NUMBER NewTotalClaim;
  959. PFN_NUMBER NewTotalEstimatedAvailable;
  960. LOGICAL InformSessionOfRelease;
  961. ULONG LoopCount;
  962. FirstSeen = NULL;
  963. SystemCacheSeen = FALSE;
  964. NewTotalClaim = 0;
  965. NewTotalEstimatedAvailable = 0;
  966. LoopCount = 0;
  967. WslesScanned = 0;
  968. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  969. LOCK_EXPANSION (OldIrql);
  970. while (!IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  971. ASSERT (MmIsAddressValid (MmSessionSpace) == FALSE);
  972. //
  973. // Remove the entry at the head, try to lock it, if we can lock it
  974. // then age some pages and estimate the number of available pages.
  975. //
  976. ListEntry = RemoveHeadList (&MmWorkingSetExpansionHead.ListHead);
  977. VmSupport = CONTAINING_RECORD (ListEntry,
  978. MMSUPPORT,
  979. WorkingSetExpansionLinks);
  980. if (VmSupport == &MmSystemCacheWs) {
  981. if (SystemCacheSeen == TRUE) {
  982. //
  983. // Seen this one already.
  984. //
  985. FirstSeen = VmSupport;
  986. }
  987. SystemCacheSeen = TRUE;
  988. }
  989. ASSERT (VmSupport->Flags.BeingTrimmed == 0);
  990. if (VmSupport == FirstSeen) {
  991. InsertHeadList (&MmWorkingSetExpansionHead.ListHead,
  992. &VmSupport->WorkingSetExpansionLinks);
  993. break;
  994. }
  995. VmSupport->Flags.BeingTrimmed = 1;
  996. VmSupport->WorkingSetExpansionLinks.Flink = MM_NO_WS_EXPANSION;
  997. VmSupport->WorkingSetExpansionLinks.Blink =
  998. MM_WS_EXPANSION_IN_PROGRESS;
  999. UNLOCK_EXPANSION (OldIrql);
  1000. if (FirstSeen == NULL) {
  1001. FirstSeen = VmSupport;
  1002. }
  1003. if (MiAttachAndLockWorkingSet (VmSupport, &InformSessionOfRelease) == TRUE) {
  1004. if (EmptyIt == FALSE) {
  1005. MiAgeAndEstimateAvailableInWorkingSet (VmSupport,
  1006. TRUE,
  1007. &WslesScanned,
  1008. &NewTotalClaim,
  1009. &NewTotalEstimatedAvailable);
  1010. }
  1011. else {
  1012. MiEmptyWorkingSet (VmSupport, FALSE);
  1013. }
  1014. MiDetachAndUnlockWorkingSet (VmSupport, InformSessionOfRelease);
  1015. }
  1016. LOCK_EXPANSION (OldIrql);
  1017. ASSERT (VmSupport->Flags.BeingTrimmed == 1);
  1018. VmSupport->Flags.BeingTrimmed = 0;
  1019. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_NO_WS_EXPANSION);
  1020. if (VmSupport->WorkingSetExpansionLinks.Blink ==
  1021. MM_WS_EXPANSION_IN_PROGRESS) {
  1022. //
  1023. // If the working set size is still above the minimum,
  1024. // add this back at the tail of the list.
  1025. //
  1026. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  1027. &VmSupport->WorkingSetExpansionLinks);
  1028. }
  1029. else {
  1030. //
  1031. // The value in the blink is the address of an event
  1032. // to set.
  1033. //
  1034. ASSERT (VmSupport != &MmSystemCacheWs);
  1035. KeSetEvent ((PKEVENT)VmSupport->WorkingSetExpansionLinks.Blink,
  1036. 0,
  1037. FALSE);
  1038. }
  1039. //
  1040. // The initial working set that was chosen for FirstSeen may have
  1041. // been trimmed down under its minimum and been removed from the
  1042. // ExpansionHead links. It is possible that the system cache is not
  1043. // on the links either. This check detects this extremely rare
  1044. // situation so that the system does not spin forever.
  1045. //
  1046. LoopCount += 1;
  1047. if (LoopCount > 200) {
  1048. if (MmSystemCacheWs.WorkingSetExpansionLinks.Blink == MM_WS_EXPANSION_IN_PROGRESS) {
  1049. break;
  1050. }
  1051. }
  1052. }
  1053. UNLOCK_EXPANSION (OldIrql);
  1054. if (EmptyIt == FALSE) {
  1055. MmTotalClaim = NewTotalClaim;
  1056. MmTotalEstimatedAvailable = NewTotalEstimatedAvailable;
  1057. }
  1058. }
  1059. VOID
  1060. MiAgeAndEstimateAvailableInWorkingSet (
  1061. IN PMMSUPPORT VmSupport,
  1062. IN LOGICAL DoAging,
  1063. IN PWSLE_NUMBER WslesScanned,
  1064. IN OUT PPFN_NUMBER TotalClaim,
  1065. IN OUT PPFN_NUMBER TotalEstimatedAvailable
  1066. )
  1067. /*++
  1068. Routine Description:
  1069. Age pages (clear the access bit or if the page hasn't been
  1070. accessed, increment the age) for a portion of the working
  1071. set. Also, walk through a sample of the working set
  1072. building a set of counts of how old the pages are.
  1073. The counts are used to create a claim of the amount
  1074. the system can steal from this process if memory
  1075. becomes tight.
  1076. Arguments:
  1077. VmSupport - Supplies the VM support structure to age and estimate.
  1078. DoAging - TRUE if pages are to be aged. Regardless, the pages will be
  1079. added to the availability estimation.
  1080. WslesScanned - Total numbers of WSLEs scanned on this sweep, used as a
  1081. control to prevent excessive aging on large systems with
  1082. many processes.
  1083. TotalClaim - Supplies a pointer to system wide claim to update.
  1084. TotalEstimatedAvailable - Supplies a pointer to system wide estimate
  1085. to update.
  1086. Return Value:
  1087. None
  1088. Environment:
  1089. Kernel mode, APCs disabled, working set mutex. PFN lock NOT held.
  1090. --*/
  1091. {
  1092. LOGICAL RecalculateShift;
  1093. WSLE_NUMBER LastEntry;
  1094. WSLE_NUMBER StartEntry;
  1095. WSLE_NUMBER FirstDynamic;
  1096. WSLE_NUMBER CurrentEntry;
  1097. PMMWSL WorkingSetList;
  1098. PMMWSLE Wsle;
  1099. PMMPTE PointerPte;
  1100. WSLE_NUMBER NumberToExamine;
  1101. WSLE_NUMBER Claim;
  1102. ULONG Estimate;
  1103. ULONG SampledAgeCounts[MI_USE_AGE_COUNT] = {0};
  1104. MI_NEXT_ESTIMATION_SLOT_CONST NextConst;
  1105. WSLE_NUMBER SampleSize;
  1106. WSLE_NUMBER AgeSize;
  1107. ULONG CounterShift;
  1108. WSLE_NUMBER Temp;
  1109. ULONG i;
  1110. WorkingSetList = VmSupport->VmWorkingSetList;
  1111. Wsle = WorkingSetList->Wsle;
  1112. AgeSize = 0;
  1113. ASSERT ((VmSupport != &MmSystemCacheWs) || (PsGetCurrentThread() == MmSystemLockOwner));
  1114. LastEntry = WorkingSetList->LastEntry;
  1115. FirstDynamic = WorkingSetList->FirstDynamic;
  1116. if (DoAging == TRUE) {
  1117. //
  1118. // Clear the used bits or increment the age of a portion of the
  1119. // working set.
  1120. //
  1121. // Try to walk the entire working set every 2^MI_AGE_AGING_SHIFT
  1122. // seconds.
  1123. //
  1124. if (VmSupport->WorkingSetSize > WorkingSetList->FirstDynamic) {
  1125. NumberToExamine = (VmSupport->WorkingSetSize - WorkingSetList->FirstDynamic) >> MiAgingShift;
  1126. //
  1127. // Bigger machines can easily have working sets that span
  1128. // terabytes so limit the absolute walk.
  1129. //
  1130. if (NumberToExamine > MI_MAXIMUM_SAMPLE) {
  1131. NumberToExamine = MI_MAXIMUM_SAMPLE;
  1132. }
  1133. //
  1134. // In addition to large working sets, bigger machines may also
  1135. // have huge numbers of processes - checking the aggregate number
  1136. // of working set list entries scanned prevents this situation
  1137. // from triggering excessive scanning.
  1138. //
  1139. if ((WslesScanned != NULL) &&
  1140. (*WslesScanned >= MiMaximumWslesPerSweep)) {
  1141. NumberToExamine = 64;
  1142. }
  1143. AgeSize = NumberToExamine;
  1144. CurrentEntry = VmSupport->NextAgingSlot;
  1145. if (CurrentEntry > LastEntry || CurrentEntry < FirstDynamic) {
  1146. CurrentEntry = FirstDynamic;
  1147. }
  1148. if (Wsle[CurrentEntry].u1.e1.Valid == 0) {
  1149. MI_NEXT_VALID_AGING_SLOT(CurrentEntry, FirstDynamic, LastEntry, Wsle);
  1150. }
  1151. while (NumberToExamine != 0) {
  1152. PointerPte = MiGetPteAddress (Wsle[CurrentEntry].u1.VirtualAddress);
  1153. if (MI_GET_ACCESSED_IN_PTE(PointerPte) == 1) {
  1154. MI_SET_ACCESSED_IN_PTE(PointerPte, 0);
  1155. MI_RESET_WSLE_AGE(PointerPte, &Wsle[CurrentEntry]);
  1156. }
  1157. else {
  1158. MI_INC_WSLE_AGE(PointerPte, &Wsle[CurrentEntry]);
  1159. }
  1160. NumberToExamine -= 1;
  1161. MI_NEXT_VALID_AGING_SLOT(CurrentEntry, FirstDynamic, LastEntry, Wsle);
  1162. }
  1163. VmSupport->NextAgingSlot = CurrentEntry + 1; // Start here next time
  1164. }
  1165. }
  1166. //
  1167. // Estimate the number of unused pages in the working set.
  1168. //
  1169. // The working set may have shrunk or the non-paged portion may have
  1170. // grown since the last time. Put the next counter at the FirstDynamic
  1171. // if so.
  1172. //
  1173. CurrentEntry = VmSupport->NextEstimationSlot;
  1174. if (CurrentEntry > LastEntry || CurrentEntry < FirstDynamic) {
  1175. CurrentEntry = FirstDynamic;
  1176. }
  1177. //
  1178. // When aging, walk the entire working set every 2^MiEstimationShift
  1179. // seconds.
  1180. //
  1181. CounterShift = 0;
  1182. SampleSize = 0;
  1183. if (VmSupport->WorkingSetSize > WorkingSetList->FirstDynamic) {
  1184. RecalculateShift = FALSE;
  1185. SampleSize = VmSupport->WorkingSetSize - WorkingSetList->FirstDynamic;
  1186. NumberToExamine = SampleSize >> MiEstimationShift;
  1187. //
  1188. // Bigger machines may have huge numbers of processes - checking the
  1189. // aggregate number of working set list entries scanned prevents this
  1190. // situation from triggering excessive scanning.
  1191. //
  1192. if ((WslesScanned != NULL) &&
  1193. (*WslesScanned >= MiMaximumWslesPerSweep)) {
  1194. RecalculateShift = TRUE;
  1195. }
  1196. else if (NumberToExamine > MI_MAXIMUM_SAMPLE) {
  1197. //
  1198. // Bigger machines can easily have working sets that span
  1199. // terabytes so limit the absolute walk.
  1200. //
  1201. NumberToExamine = MI_MAXIMUM_SAMPLE;
  1202. Temp = SampleSize >> MI_MINIMUM_SAMPLE_SHIFT;
  1203. SampleSize = MI_MAXIMUM_SAMPLE;
  1204. //
  1205. // Calculate the necessary counter shift to estimate pages
  1206. // in use.
  1207. //
  1208. for ( ; Temp != 0; Temp = Temp >> 1) {
  1209. CounterShift += 1;
  1210. }
  1211. }
  1212. else if (NumberToExamine >= MI_MINIMUM_SAMPLE) {
  1213. //
  1214. // Ensure that NumberToExamine is at least the minimum size.
  1215. //
  1216. SampleSize = NumberToExamine;
  1217. CounterShift = MiEstimationShift;
  1218. }
  1219. else if (SampleSize > MI_MINIMUM_SAMPLE) {
  1220. RecalculateShift = TRUE;
  1221. }
  1222. if (RecalculateShift == TRUE) {
  1223. Temp = SampleSize >> MI_MINIMUM_SAMPLE_SHIFT;
  1224. SampleSize = MI_MINIMUM_SAMPLE;
  1225. //
  1226. // Calculate the necessary counter shift to estimate pages
  1227. // in use.
  1228. //
  1229. for ( ; Temp != 0; Temp = Temp >> 1) {
  1230. CounterShift += 1;
  1231. }
  1232. }
  1233. ASSERT (SampleSize != 0);
  1234. MI_CALC_NEXT_ESTIMATION_SLOT_CONST(NextConst, WorkingSetList);
  1235. StartEntry = FirstDynamic;
  1236. if (Wsle[CurrentEntry].u1.e1.Valid == 0) {
  1237. MI_NEXT_VALID_ESTIMATION_SLOT (CurrentEntry,
  1238. StartEntry,
  1239. FirstDynamic,
  1240. LastEntry,
  1241. NextConst,
  1242. Wsle);
  1243. }
  1244. for (i = 0; i < SampleSize; i += 1) {
  1245. PointerPte = MiGetPteAddress (Wsle[CurrentEntry].u1.VirtualAddress);
  1246. if (MI_GET_ACCESSED_IN_PTE(PointerPte) == 0) {
  1247. MI_UPDATE_USE_ESTIMATE (PointerPte,
  1248. &Wsle[CurrentEntry],
  1249. SampledAgeCounts);
  1250. }
  1251. if (i == NumberToExamine - 1) {
  1252. //
  1253. // Start estimation here next time.
  1254. //
  1255. VmSupport->NextEstimationSlot = CurrentEntry + 1;
  1256. }
  1257. MI_NEXT_VALID_ESTIMATION_SLOT (CurrentEntry,
  1258. StartEntry,
  1259. FirstDynamic,
  1260. LastEntry,
  1261. NextConst,
  1262. Wsle);
  1263. }
  1264. }
  1265. if (SampleSize < AgeSize) {
  1266. SampleSize = AgeSize;
  1267. }
  1268. if (WslesScanned != NULL) {
  1269. *WslesScanned += SampleSize;
  1270. }
  1271. Estimate = MI_CALCULATE_USAGE_ESTIMATE(SampledAgeCounts, CounterShift);
  1272. Claim = VmSupport->Claim + MI_CLAIM_INCR;
  1273. if (Claim > Estimate) {
  1274. Claim = Estimate;
  1275. }
  1276. VmSupport->Claim = Claim;
  1277. VmSupport->EstimatedAvailable = Estimate;
  1278. PERFINFO_WSMANAGE_DUMPWS(VmSupport, SampledAgeCounts);
  1279. VmSupport->GrowthSinceLastEstimate = 0;
  1280. *TotalClaim += Claim >> ((VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND)
  1281. ? MI_FOREGROUND_CLAIM_AVAILABLE_SHIFT
  1282. : MI_BACKGROUND_CLAIM_AVAILABLE_SHIFT);
  1283. *TotalEstimatedAvailable += Estimate;
  1284. return;
  1285. }
  1286. ULONG MiClaimAdjustmentThreshold[8] = { 0, 0, 4000, 8000, 12000, 24000, 32000, 32000};
  1287. VOID
  1288. MiAdjustClaimParameters (
  1289. IN LOGICAL EnoughPages
  1290. )
  1291. /*++
  1292. Routine Description:
  1293. Adjust the rate at which we walk through working sets. If we have
  1294. enough pages (we aren't trimming pages that aren't considered young),
  1295. then we check to see whether we should decrease the aging rate and
  1296. vice versa.
  1297. The limits for the aging rate are 1/8 and 1/128 of the working sets.
  1298. This means that the finest age granularities are 8 to 128 seconds in
  1299. these cases. With the current 2 bit counter, at the low end we would
  1300. start trimming pages > 16 seconds old and at the high end > 4 minutes.
  1301. Arguments:
  1302. EnoughPages - Supplies whether to increase the rate or decrease it.
  1303. Return Value:
  1304. None.
  1305. Environment:
  1306. Kernel mode.
  1307. --*/
  1308. {
  1309. LARGE_INTEGER CurrentTime;
  1310. KeQuerySystemTime (&CurrentTime);
  1311. if (EnoughPages == TRUE &&
  1312. ((MmTotalClaim + MmAvailablePages) > MiClaimAdjustmentThreshold[MiAgingShift])) {
  1313. //
  1314. // Don't adjust the rate too frequently, don't go over the limit, and
  1315. // make sure there are enough claimed and/or available.
  1316. //
  1317. if (((CurrentTime.QuadPart - MiLastAdjustmentOfClaimParams.QuadPart) >
  1318. MmClaimParameterAdjustUpTime.QuadPart) &&
  1319. (MiAgingShift < MI_MAXIMUM_AGING_SHIFT ) ) {
  1320. //
  1321. // Set the time only when we change the rate.
  1322. //
  1323. MiLastAdjustmentOfClaimParams.QuadPart = CurrentTime.QuadPart;
  1324. MiAgingShift += 1;
  1325. MiEstimationShift += 1;
  1326. }
  1327. }
  1328. else if ((EnoughPages == FALSE) ||
  1329. (MmTotalClaim + MmAvailablePages) < MiClaimAdjustmentThreshold[MiAgingShift - 1]) {
  1330. //
  1331. // Don't adjust the rate down too frequently.
  1332. //
  1333. if ((CurrentTime.QuadPart - MiLastAdjustmentOfClaimParams.QuadPart) >
  1334. MmClaimParameterAdjustDownTime.QuadPart) {
  1335. //
  1336. // Always set the time so we don't adjust up too soon after
  1337. // a 2nd pass trim.
  1338. //
  1339. MiLastAdjustmentOfClaimParams.QuadPart = CurrentTime.QuadPart;
  1340. //
  1341. // Don't go under the limit.
  1342. //
  1343. if (MiAgingShift > 3) {
  1344. MiAgingShift -= 1;
  1345. MiEstimationShift -= 1;
  1346. }
  1347. }
  1348. }
  1349. }
  1350. #define MM_WS_REORG_BUCKETS_MAX 7
  1351. #if DBG
  1352. ULONG MiSessionIdleBuckets[MM_WS_REORG_BUCKETS_MAX];
  1353. #endif
  1354. VOID
  1355. MiRearrangeWorkingSetExpansionList (
  1356. VOID
  1357. )
  1358. /*++
  1359. Routine Description:
  1360. This function arranges the working set list into different
  1361. groups based upon the claim. This is done so the working set
  1362. trimming will take place on fat processes first.
  1363. The working sets are sorted into buckets and then linked back up.
  1364. Swapped out sessions and processes are put at the front.
  1365. Arguments:
  1366. None.
  1367. Return Value:
  1368. None.
  1369. Environment:
  1370. Kernel mode, no locks held.
  1371. --*/
  1372. {
  1373. KIRQL OldIrql;
  1374. PLIST_ENTRY ListEntry;
  1375. PMMSUPPORT VmSupport;
  1376. int Size;
  1377. int PreviousNonEmpty;
  1378. int NonEmpty;
  1379. LIST_ENTRY ListHead[MM_WS_REORG_BUCKETS_MAX];
  1380. LARGE_INTEGER CurrentTime;
  1381. LARGE_INTEGER SessionIdleTime;
  1382. ULONG IdleTime;
  1383. PMM_SESSION_SPACE SessionGlobal;
  1384. KeQuerySystemTime (&CurrentTime);
  1385. if (IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  1386. return;
  1387. }
  1388. for (Size = 0 ; Size < MM_WS_REORG_BUCKETS_MAX; Size++) {
  1389. InitializeListHead (&ListHead[Size]);
  1390. }
  1391. LOCK_EXPANSION (OldIrql);
  1392. while (!IsListEmpty (&MmWorkingSetExpansionHead.ListHead)) {
  1393. ListEntry = RemoveHeadList (&MmWorkingSetExpansionHead.ListHead);
  1394. VmSupport = CONTAINING_RECORD(ListEntry,
  1395. MMSUPPORT,
  1396. WorkingSetExpansionLinks);
  1397. if (VmSupport->Flags.TrimHard == 1) {
  1398. ASSERT (VmSupport->Flags.SessionSpace == 1);
  1399. SessionGlobal = CONTAINING_RECORD (VmSupport,
  1400. MM_SESSION_SPACE,
  1401. Vm);
  1402. SessionIdleTime.QuadPart = CurrentTime.QuadPart - SessionGlobal->LastProcessSwappedOutTime.QuadPart;
  1403. #if DBG
  1404. if (MmDebug & MM_DBG_SESSIONS) {
  1405. DbgPrint ("Mm: Session %d heavily trim/aged - all its processes (%d) swapped out %d seconds ago\n",
  1406. SessionGlobal->SessionId,
  1407. SessionGlobal->ReferenceCount,
  1408. (ULONG)(SessionIdleTime.QuadPart / 10000000));
  1409. }
  1410. #endif
  1411. if (SessionIdleTime.QuadPart < 0) {
  1412. //
  1413. // The administrator has moved the system time backwards.
  1414. // Give this session a fresh start.
  1415. //
  1416. SessionIdleTime.QuadPart = 0;
  1417. KeQuerySystemTime (&SessionGlobal->LastProcessSwappedOutTime);
  1418. }
  1419. IdleTime = (ULONG) (SessionIdleTime.QuadPart / 10000000);
  1420. }
  1421. else {
  1422. IdleTime = 0;
  1423. }
  1424. if (VmSupport->Flags.MemoryPriority == MEMORY_PRIORITY_FOREGROUND) {
  1425. //
  1426. // Put the foreground processes at the end of the list,
  1427. // to give them priority.
  1428. //
  1429. Size = 6;
  1430. }
  1431. else {
  1432. if (VmSupport->Claim > 400) {
  1433. Size = 0;
  1434. }
  1435. else if (IdleTime > 30) {
  1436. Size = 0;
  1437. #if DBG
  1438. MiSessionIdleBuckets[Size] += 1;
  1439. #endif
  1440. }
  1441. else if (VmSupport->Claim > 200) {
  1442. Size = 1;
  1443. }
  1444. else if (IdleTime > 20) {
  1445. Size = 1;
  1446. #if DBG
  1447. MiSessionIdleBuckets[Size] += 1;
  1448. #endif
  1449. }
  1450. else if (VmSupport->Claim > 100) {
  1451. Size = 2;
  1452. }
  1453. else if (IdleTime > 10) {
  1454. Size = 2;
  1455. #if DBG
  1456. MiSessionIdleBuckets[Size] += 1;
  1457. #endif
  1458. }
  1459. else if (VmSupport->Claim > 50) {
  1460. Size = 3;
  1461. }
  1462. else if (IdleTime) {
  1463. Size = 3;
  1464. #if DBG
  1465. MiSessionIdleBuckets[Size] += 1;
  1466. #endif
  1467. }
  1468. else if (VmSupport->Claim > 25) {
  1469. Size = 4;
  1470. }
  1471. else {
  1472. Size = 5;
  1473. #if DBG
  1474. if (VmSupport->Flags.SessionSpace == 1) {
  1475. MiSessionIdleBuckets[Size] += 1;
  1476. }
  1477. #endif
  1478. }
  1479. }
  1480. #if DBG
  1481. if (MmDebug & MM_DBG_WS_EXPANSION) {
  1482. DbgPrint("MM-rearrange: TrimHard = %d, WS Size = 0x%x, Claim 0x%x, Bucket %d\n",
  1483. VmSupport->Flags.TrimHard,
  1484. VmSupport->WorkingSetSize,
  1485. VmSupport->Claim,
  1486. Size);
  1487. }
  1488. #endif //DBG
  1489. //
  1490. // Note: this reverses the bucket order each time we
  1491. // reorganize the lists. This may be good or bad -
  1492. // if you change it you may want to think about it.
  1493. //
  1494. InsertHeadList (&ListHead[Size],
  1495. &VmSupport->WorkingSetExpansionLinks);
  1496. }
  1497. //
  1498. // Find the first non-empty list.
  1499. //
  1500. for (NonEmpty = 0 ; NonEmpty < MM_WS_REORG_BUCKETS_MAX ; NonEmpty += 1) {
  1501. if (!IsListEmpty (&ListHead[NonEmpty])) {
  1502. break;
  1503. }
  1504. }
  1505. //
  1506. // Put the head of first non-empty list at the beginning
  1507. // of the MmWorkingSetExpansion list.
  1508. //
  1509. MmWorkingSetExpansionHead.ListHead.Flink = ListHead[NonEmpty].Flink;
  1510. ListHead[NonEmpty].Flink->Blink = &MmWorkingSetExpansionHead.ListHead;
  1511. PreviousNonEmpty = NonEmpty;
  1512. //
  1513. // Link the rest of the lists together.
  1514. //
  1515. for (NonEmpty += 1; NonEmpty < MM_WS_REORG_BUCKETS_MAX; NonEmpty += 1) {
  1516. if (!IsListEmpty (&ListHead[NonEmpty])) {
  1517. ListHead[PreviousNonEmpty].Blink->Flink = ListHead[NonEmpty].Flink;
  1518. ListHead[NonEmpty].Flink->Blink = ListHead[PreviousNonEmpty].Blink;
  1519. PreviousNonEmpty = NonEmpty;
  1520. }
  1521. }
  1522. //
  1523. // Link the tail of last non-empty to the MmWorkingSetExpansion list.
  1524. //
  1525. MmWorkingSetExpansionHead.ListHead.Blink = ListHead[PreviousNonEmpty].Blink;
  1526. ListHead[PreviousNonEmpty].Blink->Flink = &MmWorkingSetExpansionHead.ListHead;
  1527. UNLOCK_EXPANSION (OldIrql);
  1528. return;
  1529. }
  1530. VOID
  1531. MmEmptyAllWorkingSets (
  1532. VOID
  1533. )
  1534. /*++
  1535. Routine Description:
  1536. This routine attempts to empty all the working sets on the
  1537. expansion list.
  1538. Arguments:
  1539. None.
  1540. Return Value:
  1541. None.
  1542. Environment:
  1543. Kernel mode. No locks held. APC level or below.
  1544. --*/
  1545. {
  1546. KIRQL OldIrql;
  1547. ASSERT (KeGetCurrentIrql () <= APC_LEVEL);
  1548. ASSERT (PsGetCurrentThread () != MmWorkingSetThread);
  1549. //
  1550. // For Hydra, we cannot attach directly to the session space to be
  1551. // trimmed because it would result in session space references by
  1552. // other threads in this process to the attached session instead
  1553. // of the (currently) correct one. In fact, we cannot even queue
  1554. // this to a worker thread because the working set manager
  1555. // (who shares the same page directory) may be attaching or
  1556. // detaching from a session (any session). So this must be queued
  1557. // to the working set manager.
  1558. //
  1559. LOCK_EXPANSION (OldIrql);
  1560. if (MiWaitingForWorkingSetEmpty == FALSE) {
  1561. MiWaitingForWorkingSetEmpty = TRUE;
  1562. KeClearEvent (&MiWaitForEmptyEvent);
  1563. }
  1564. UNLOCK_EXPANSION (OldIrql);
  1565. KeSetEvent (&MmWorkingSetManagerEvent, 0, FALSE);
  1566. KeWaitForSingleObject (&MiWaitForEmptyEvent,
  1567. WrVirtualMemory,
  1568. KernelMode,
  1569. FALSE,
  1570. (PLARGE_INTEGER)0);
  1571. return;
  1572. }
  1573. //
  1574. // This is deliberately initialized to 1 and only cleared when we have
  1575. // initialized enough of the system working set to support a trim.
  1576. //
  1577. LONG MiTrimInProgressCount = 1;
  1578. ULONG MiTrimAllPageFaultCount;
  1579. LOGICAL
  1580. MmTrimAllSystemPagableMemory (
  1581. IN LOGICAL PurgeTransition
  1582. )
  1583. /*++
  1584. Routine Description:
  1585. This routine unmaps all pagable system memory. This does not unmap user
  1586. memory or locked down kernel memory. Thus, the memory being unmapped
  1587. resides in paged pool, pagable kernel/driver code & data, special pool
  1588. and the system cache.
  1589. Note that pages with a reference count greater than 1 are skipped (ie:
  1590. they remain valid, as they are assumed to be locked down). This prevents
  1591. us from unmapping all of the system cache entries, etc.
  1592. Non-locked down kernel stacks must be outpaged by modifying the balance
  1593. set manager to operate in conjunction with a support routine. This is not
  1594. done here.
  1595. Arguments:
  1596. PurgeTransition - Supplies whether to purge all the clean pages from the
  1597. transition list.
  1598. Return Value:
  1599. TRUE if accomplished, FALSE if not.
  1600. Environment:
  1601. Kernel mode. APC_LEVEL or below.
  1602. --*/
  1603. {
  1604. KIRQL OldIrql;
  1605. KIRQL OldIrql2;
  1606. PLIST_ENTRY Next;
  1607. PMMSUPPORT VmSupport;
  1608. WSLE_NUMBER PagesInUse;
  1609. LOGICAL LockAvailable;
  1610. PETHREAD CurrentThread;
  1611. #if defined(_X86_)
  1612. ULONG flags;
  1613. #endif
  1614. //
  1615. // It's ok to check this without acquiring the system WS lock.
  1616. //
  1617. if (MiTrimAllPageFaultCount == MmSystemCacheWs.PageFaultCount) {
  1618. return FALSE;
  1619. }
  1620. //
  1621. // Working set mutexes will be acquired which require APC_LEVEL or below.
  1622. //
  1623. if (KeGetCurrentIrql() > APC_LEVEL) {
  1624. return FALSE;
  1625. }
  1626. //
  1627. // Just return if it's too early during system initialization or if
  1628. // another thread/processor is racing here to do the work for us.
  1629. //
  1630. if (InterlockedIncrement (&MiTrimInProgressCount) > 1) {
  1631. InterlockedDecrement (&MiTrimInProgressCount);
  1632. return FALSE;
  1633. }
  1634. #if defined(_X86_)
  1635. _asm {
  1636. pushfd
  1637. pop eax
  1638. mov flags, eax
  1639. }
  1640. if ((flags & EFLAGS_INTERRUPT_MASK) == 0) {
  1641. InterlockedDecrement (&MiTrimInProgressCount);
  1642. return FALSE;
  1643. }
  1644. #endif
  1645. LockAvailable = KeTryToAcquireSpinLock (&MmExpansionLock, &OldIrql);
  1646. if (LockAvailable == FALSE) {
  1647. InterlockedDecrement (&MiTrimInProgressCount);
  1648. return FALSE;
  1649. }
  1650. MM_SET_EXPANSION_OWNER ();
  1651. CurrentThread = PsGetCurrentThread();
  1652. //
  1653. // If the system cache resource is owned by this thread then don't bother
  1654. // trying to trim now. Note that checking the MmSystemLockOwner is not
  1655. // sufficient as this flag is cleared just before actually releasing it.
  1656. //
  1657. if ((CurrentThread == MmSystemLockOwner) ||
  1658. (ExTryToAcquireResourceExclusiveLite(&MmSystemWsLock) == FALSE)) {
  1659. UNLOCK_EXPANSION (OldIrql);
  1660. InterlockedDecrement (&MiTrimInProgressCount);
  1661. return FALSE;
  1662. }
  1663. Next = MmWorkingSetExpansionHead.ListHead.Flink;
  1664. while (Next != &MmWorkingSetExpansionHead.ListHead) {
  1665. if (Next == &MmSystemCacheWs.WorkingSetExpansionLinks) {
  1666. break;
  1667. }
  1668. Next = Next->Flink;
  1669. }
  1670. if (Next != &MmSystemCacheWs.WorkingSetExpansionLinks) {
  1671. ExReleaseResourceLite(&MmSystemWsLock);
  1672. UNLOCK_EXPANSION (OldIrql);
  1673. InterlockedDecrement (&MiTrimInProgressCount);
  1674. return FALSE;
  1675. }
  1676. RemoveEntryList (Next);
  1677. VmSupport = &MmSystemCacheWs;
  1678. VmSupport->WorkingSetExpansionLinks.Flink = MM_NO_WS_EXPANSION;
  1679. VmSupport->WorkingSetExpansionLinks.Blink = MM_WS_EXPANSION_IN_PROGRESS;
  1680. ASSERT (VmSupport->Flags.BeingTrimmed == 0);
  1681. VmSupport->Flags.BeingTrimmed = 1;
  1682. MiTrimAllPageFaultCount = VmSupport->PageFaultCount;
  1683. PagesInUse = VmSupport->WorkingSetSize;
  1684. //
  1685. // There are 2 issues here that are carefully dealt with :
  1686. //
  1687. // 1. APCs must be disabled while any resources are held to prevent
  1688. // suspend APCs from deadlocking the system.
  1689. // 2. Once the system cache has been marked MM_WS_EXPANSION_IN_PROGRESS,
  1690. // either the thread must not be preempted or the system cache working
  1691. // set mutex must be held throughout. Otherwise a high priority thread
  1692. // can fault on a system code and data address and the two pages will
  1693. // thrash forever (at high priority) because no system working set
  1694. // expansion is allowed while MM_WS_EXPANSION_IN_PROGRESS is set.
  1695. // The decision was to hold the system working set mutex throughout.
  1696. //
  1697. MmSystemLockOwner = PsGetCurrentThread ();
  1698. UNLOCK_EXPANSION (APC_LEVEL);
  1699. MiEmptyWorkingSet (VmSupport, FALSE);
  1700. LOCK_EXPANSION (OldIrql2);
  1701. ASSERT (OldIrql2 == APC_LEVEL);
  1702. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_NO_WS_EXPANSION);
  1703. ASSERT (VmSupport->Flags.BeingTrimmed == 1);
  1704. VmSupport->Flags.BeingTrimmed = 0;
  1705. ASSERT (VmSupport->WorkingSetExpansionLinks.Blink ==
  1706. MM_WS_EXPANSION_IN_PROGRESS);
  1707. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  1708. &VmSupport->WorkingSetExpansionLinks);
  1709. UNLOCK_EXPANSION (APC_LEVEL);
  1710. //
  1711. // Since MiEmptyWorkingSet will attempt to recursively acquire and release
  1712. // the MmSystemWsLock, the MmSystemLockOwner field may get cleared.
  1713. // This means here the resource must be explicitly released instead of
  1714. // using UNLOCK_SYSTEM_WS.
  1715. //
  1716. MmSystemLockOwner = NULL;
  1717. ExReleaseResourceLite (&MmSystemWsLock);
  1718. KeLowerIrql (OldIrql);
  1719. ASSERT (KeGetCurrentIrql() <= APC_LEVEL);
  1720. if (PurgeTransition == TRUE) {
  1721. MiPurgeTransitionList ();
  1722. }
  1723. InterlockedDecrement (&MiTrimInProgressCount);
  1724. return TRUE;
  1725. }
  1726. LOGICAL
  1727. MmTrimProcessMemory (
  1728. IN LOGICAL PurgeTransition
  1729. )
  1730. /*++
  1731. Routine Description:
  1732. This routine unmaps all of the current process' user memory.
  1733. Arguments:
  1734. PurgeTransition - Supplies whether to purge all the clean pages from the
  1735. transition list.
  1736. Return Value:
  1737. TRUE if accomplished, FALSE if not.
  1738. Environment:
  1739. Kernel mode. APC_LEVEL or below.
  1740. --*/
  1741. {
  1742. WSLE_NUMBER Last;
  1743. KIRQL OldIrql;
  1744. PLIST_ENTRY Next;
  1745. PMMSUPPORT VmSupport;
  1746. LOGICAL LockAvailable;
  1747. PEPROCESS Process;
  1748. WSLE_NUMBER LastFreed;
  1749. PMMWSL WorkingSetList;
  1750. PMMWSLE Wsle;
  1751. PMMPTE PointerPte;
  1752. WSLE_NUMBER Entry;
  1753. #if defined(_X86_)
  1754. ULONG flags;
  1755. #endif
  1756. //
  1757. // Working set mutexes will be acquired which require APC_LEVEL or below.
  1758. //
  1759. if (KeGetCurrentIrql() > APC_LEVEL) {
  1760. return FALSE;
  1761. }
  1762. #if defined(_X86_)
  1763. _asm {
  1764. pushfd
  1765. pop eax
  1766. mov flags, eax
  1767. }
  1768. if ((flags & EFLAGS_INTERRUPT_MASK) == 0) {
  1769. return FALSE;
  1770. }
  1771. #endif
  1772. //
  1773. // Just return if it's too early during system initialization or if
  1774. // another thread/processor is racing here to do the work for us.
  1775. //
  1776. if (InterlockedIncrement (&MiTrimInProgressCount) > 1) {
  1777. InterlockedDecrement (&MiTrimInProgressCount);
  1778. return FALSE;
  1779. }
  1780. Process = PsGetCurrentProcess ();
  1781. VmSupport = &Process->Vm;
  1782. //
  1783. // If the WS mutex is not readily available then just return.
  1784. //
  1785. if (ExTryToAcquireFastMutex (&Process->WorkingSetLock) == FALSE) {
  1786. InterlockedDecrement (&MiTrimInProgressCount);
  1787. return FALSE;
  1788. }
  1789. //
  1790. // If the process is exiting then just return.
  1791. //
  1792. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1793. UNLOCK_WS (Process);
  1794. InterlockedDecrement (&MiTrimInProgressCount);
  1795. return FALSE;
  1796. }
  1797. ASSERT (!MI_IS_WS_UNSAFE(Process));
  1798. //
  1799. // If the expansion lock is not available then just return.
  1800. //
  1801. LockAvailable = KeTryToAcquireSpinLock (&MmExpansionLock, &OldIrql);
  1802. if (LockAvailable == FALSE) {
  1803. UNLOCK_WS (Process);
  1804. InterlockedDecrement (&MiTrimInProgressCount);
  1805. return FALSE;
  1806. }
  1807. ASSERT (OldIrql == APC_LEVEL);
  1808. MM_SET_EXPANSION_OWNER ();
  1809. Next = MmWorkingSetExpansionHead.ListHead.Flink;
  1810. while (Next != &MmWorkingSetExpansionHead.ListHead) {
  1811. if (Next == &VmSupport->WorkingSetExpansionLinks) {
  1812. break;
  1813. }
  1814. Next = Next->Flink;
  1815. }
  1816. if (Next != &VmSupport->WorkingSetExpansionLinks) {
  1817. UNLOCK_EXPANSION (OldIrql);
  1818. UNLOCK_WS (Process);
  1819. InterlockedDecrement (&MiTrimInProgressCount);
  1820. return FALSE;
  1821. }
  1822. RemoveEntryList (Next);
  1823. VmSupport->WorkingSetExpansionLinks.Flink = MM_NO_WS_EXPANSION;
  1824. VmSupport->WorkingSetExpansionLinks.Blink = MM_WS_EXPANSION_IN_PROGRESS;
  1825. ASSERT (VmSupport->Flags.BeingTrimmed == 0);
  1826. VmSupport->Flags.BeingTrimmed = 1;
  1827. UNLOCK_EXPANSION (APC_LEVEL);
  1828. //
  1829. // There are 2 issues here that are carefully dealt with :
  1830. //
  1831. // 1. APCs must be disabled while any resources are held to prevent
  1832. // suspend APCs from deadlocking the system.
  1833. // 2. Once the working set has been marked MM_WS_EXPANSION_IN_PROGRESS,
  1834. // the working set mutex must be held throughout, otherwise a high
  1835. // priority thread can fault on a code and data address and the two
  1836. // pages will thrash forever (at high priority) because no working set
  1837. // expansion is allowed while MM_WS_EXPANSION_IN_PROGRESS is set.
  1838. //
  1839. WorkingSetList = VmSupport->VmWorkingSetList;
  1840. Wsle = WorkingSetList->Wsle;
  1841. //
  1842. // Attempt to remove the pages starting at the bottom.
  1843. //
  1844. LastFreed = WorkingSetList->LastEntry;
  1845. for (Entry = WorkingSetList->FirstDynamic; Entry <= LastFreed; Entry += 1) {
  1846. if (Wsle[Entry].u1.e1.Valid != 0) {
  1847. PointerPte = MiGetPteAddress (Wsle[Entry].u1.VirtualAddress);
  1848. MiFreeWsle (Entry, VmSupport, PointerPte);
  1849. }
  1850. }
  1851. MiRemoveWorkingSetPages (WorkingSetList, VmSupport);
  1852. WorkingSetList->NextSlot = WorkingSetList->FirstDynamic;
  1853. //
  1854. // Attempt to remove the pages from the front to the end.
  1855. //
  1856. //
  1857. // Reorder the free list.
  1858. //
  1859. Last = 0;
  1860. Entry = WorkingSetList->FirstDynamic;
  1861. LastFreed = WorkingSetList->LastInitializedWsle;
  1862. while (Entry <= LastFreed) {
  1863. if (Wsle[Entry].u1.e1.Valid == 0) {
  1864. if (Last == 0) {
  1865. WorkingSetList->FirstFree = Entry;
  1866. }
  1867. else {
  1868. Wsle[Last].u1.Long = Entry << MM_FREE_WSLE_SHIFT;
  1869. }
  1870. Last = Entry;
  1871. }
  1872. Entry += 1;
  1873. }
  1874. if (Last != 0) {
  1875. Wsle[Last].u1.Long = WSLE_NULL_INDEX << MM_FREE_WSLE_SHIFT; // End of list.
  1876. }
  1877. LOCK_EXPANSION (OldIrql);
  1878. ASSERT (OldIrql == APC_LEVEL);
  1879. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_NO_WS_EXPANSION);
  1880. ASSERT (VmSupport->Flags.BeingTrimmed == 1);
  1881. VmSupport->Flags.BeingTrimmed = 0;
  1882. ASSERT (VmSupport->WorkingSetExpansionLinks.Blink ==
  1883. MM_WS_EXPANSION_IN_PROGRESS);
  1884. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  1885. &VmSupport->WorkingSetExpansionLinks);
  1886. UNLOCK_EXPANSION (APC_LEVEL);
  1887. UNLOCK_WS (Process);
  1888. ASSERT (KeGetCurrentIrql() <= APC_LEVEL);
  1889. if (PurgeTransition == TRUE) {
  1890. MiPurgeTransitionList ();
  1891. }
  1892. InterlockedDecrement (&MiTrimInProgressCount);
  1893. return TRUE;
  1894. }
  1895. LOGICAL
  1896. MmTrimSessionMemory (
  1897. IN LOGICAL PurgeTransition
  1898. )
  1899. /*++
  1900. Routine Description:
  1901. This routine unmaps all of the current session's virtual addresses.
  1902. Arguments:
  1903. PurgeTransition - Supplies whether to purge all the clean pages from the
  1904. transition list.
  1905. Return Value:
  1906. TRUE if accomplished, FALSE if not.
  1907. Environment:
  1908. Kernel mode. APC_LEVEL or below.
  1909. --*/
  1910. {
  1911. KIRQL OldIrqlWs;
  1912. KIRQL OldIrql;
  1913. PLIST_ENTRY Next;
  1914. PMMSUPPORT VmSupport;
  1915. LOGICAL LockAvailable;
  1916. PEPROCESS Process;
  1917. PETHREAD Thread;
  1918. PMM_SESSION_SPACE SessionGlobal;
  1919. #if defined(_X86_)
  1920. ULONG flags;
  1921. #endif
  1922. //
  1923. // Working set mutexes will be acquired which require APC_LEVEL or below.
  1924. //
  1925. if (KeGetCurrentIrql() > APC_LEVEL) {
  1926. return FALSE;
  1927. }
  1928. #if defined(_X86_)
  1929. _asm {
  1930. pushfd
  1931. pop eax
  1932. mov flags, eax
  1933. }
  1934. if ((flags & EFLAGS_INTERRUPT_MASK) == 0) {
  1935. return FALSE;
  1936. }
  1937. #endif
  1938. //
  1939. // Just return if it's too early during system initialization or if
  1940. // another thread/processor is racing here to do the work for us.
  1941. //
  1942. if (InterlockedIncrement (&MiTrimInProgressCount) > 1) {
  1943. InterlockedDecrement (&MiTrimInProgressCount);
  1944. return FALSE;
  1945. }
  1946. Thread = PsGetCurrentThread ();
  1947. Process = PsGetCurrentProcessByThread (Thread);
  1948. if (((Process->Flags & PS_PROCESS_FLAGS_IN_SESSION) == 0) ||
  1949. (Process->Vm.Flags.SessionLeader == 1)) {
  1950. InterlockedDecrement (&MiTrimInProgressCount);
  1951. return FALSE;
  1952. }
  1953. //
  1954. // If the WS mutex is not readily available then just return.
  1955. //
  1956. SessionGlobal = SESSION_GLOBAL(MmSessionSpace);
  1957. KeRaiseIrql (APC_LEVEL, &OldIrqlWs);
  1958. //
  1959. // Check for the working set resource being owned by the current thread
  1960. // because the resource package allows recursive acquires.
  1961. //
  1962. if (MmSessionSpace->WorkingSetLockOwner == Thread) {
  1963. KeLowerIrql (OldIrqlWs);
  1964. InterlockedDecrement (&MiTrimInProgressCount);
  1965. return FALSE;
  1966. }
  1967. if (ExTryToAcquireResourceExclusiveLite (&MmSessionSpace->WsLock) == FALSE) {
  1968. KeLowerIrql (OldIrqlWs);
  1969. InterlockedDecrement (&MiTrimInProgressCount);
  1970. return FALSE;
  1971. }
  1972. MM_SET_SESSION_RESOURCE_OWNER (Thread);
  1973. VmSupport = &SessionGlobal->Vm;
  1974. //
  1975. // If the expansion lock is not available then just return.
  1976. //
  1977. LockAvailable = KeTryToAcquireSpinLock (&MmExpansionLock, &OldIrql);
  1978. if (LockAvailable == FALSE) {
  1979. UNLOCK_SESSION_SPACE_WS (OldIrqlWs);
  1980. InterlockedDecrement (&MiTrimInProgressCount);
  1981. return FALSE;
  1982. }
  1983. ASSERT (OldIrql == APC_LEVEL);
  1984. MM_SET_EXPANSION_OWNER ();
  1985. Next = MmWorkingSetExpansionHead.ListHead.Flink;
  1986. while (Next != &MmWorkingSetExpansionHead.ListHead) {
  1987. if (Next == &VmSupport->WorkingSetExpansionLinks) {
  1988. break;
  1989. }
  1990. Next = Next->Flink;
  1991. }
  1992. if (Next != &VmSupport->WorkingSetExpansionLinks) {
  1993. UNLOCK_EXPANSION (OldIrql);
  1994. UNLOCK_SESSION_SPACE_WS (OldIrqlWs);
  1995. InterlockedDecrement (&MiTrimInProgressCount);
  1996. return FALSE;
  1997. }
  1998. RemoveEntryList (Next);
  1999. VmSupport->WorkingSetExpansionLinks.Flink = MM_NO_WS_EXPANSION;
  2000. VmSupport->WorkingSetExpansionLinks.Blink = MM_WS_EXPANSION_IN_PROGRESS;
  2001. ASSERT (VmSupport->Flags.BeingTrimmed == 0);
  2002. VmSupport->Flags.BeingTrimmed = 1;
  2003. UNLOCK_EXPANSION (APC_LEVEL);
  2004. //
  2005. // There are 2 issues here that are carefully dealt with :
  2006. //
  2007. // 1. APCs must be disabled while any resources are held to prevent
  2008. // suspend APCs from deadlocking the system.
  2009. // 2. Once the working set has been marked MM_WS_EXPANSION_IN_PROGRESS,
  2010. // the working set mutex must be held throughout, otherwise a high
  2011. // priority thread can fault on a code and data address and the two
  2012. // pages will thrash forever (at high priority) because no working set
  2013. // expansion is allowed while MM_WS_EXPANSION_IN_PROGRESS is set.
  2014. //
  2015. MiEmptyWorkingSet (VmSupport, FALSE);
  2016. LOCK_EXPANSION (OldIrql);
  2017. ASSERT (OldIrql == APC_LEVEL);
  2018. ASSERT (VmSupport->WorkingSetExpansionLinks.Flink == MM_NO_WS_EXPANSION);
  2019. ASSERT (VmSupport->Flags.BeingTrimmed == 1);
  2020. VmSupport->Flags.BeingTrimmed = 0;
  2021. ASSERT (VmSupport->WorkingSetExpansionLinks.Blink ==
  2022. MM_WS_EXPANSION_IN_PROGRESS);
  2023. InsertTailList (&MmWorkingSetExpansionHead.ListHead,
  2024. &VmSupport->WorkingSetExpansionLinks);
  2025. UNLOCK_EXPANSION (APC_LEVEL);
  2026. //
  2027. // Since MiEmptyWorkingSet will attempt to recursively acquire and release
  2028. // the session space ws mutex, the owner field may get cleared.
  2029. // This means here the resource must be explicitly released instead of
  2030. // using UNLOCK_SESSION_SPACE_WS.
  2031. //
  2032. MmSessionSpace->WorkingSetLockOwner = NULL;
  2033. ExReleaseResourceLite (&MmSessionSpace->WsLock);
  2034. KeLowerIrql (OldIrqlWs);
  2035. ASSERT (KeGetCurrentIrql() <= APC_LEVEL);
  2036. if (PurgeTransition == TRUE) {
  2037. MiPurgeTransitionList ();
  2038. }
  2039. InterlockedDecrement (&MiTrimInProgressCount);
  2040. return TRUE;
  2041. }