Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

477 lines
13 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. wrtfault.c
  5. Abstract:
  6. This module contains the copy on write routine for memory management.
  7. Author:
  8. Lou Perazzoli (loup) 10-Apr-1989
  9. Landy Wang (landyw) 02-June-1997
  10. Revision History:
  11. --*/
  12. #include "mi.h"
  13. LOGICAL
  14. FASTCALL
  15. MiCopyOnWrite (
  16. IN PVOID FaultingAddress,
  17. IN PMMPTE PointerPte
  18. )
  19. /*++
  20. Routine Description:
  21. This routine performs a copy on write operation for the specified
  22. virtual address.
  23. Arguments:
  24. FaultingAddress - Supplies the virtual address which caused the fault.
  25. PointerPte - Supplies the pointer to the PTE which caused the page fault.
  26. Return Value:
  27. Returns TRUE if the page was actually split, FALSE if not.
  28. Environment:
  29. Kernel mode, APCs disabled, working set mutex held.
  30. --*/
  31. {
  32. MMPTE TempPte;
  33. PFN_NUMBER PageFrameIndex;
  34. PFN_NUMBER NewPageIndex;
  35. PULONG CopyTo;
  36. PULONG CopyFrom;
  37. KIRQL OldIrql;
  38. PMMPFN Pfn1;
  39. PEPROCESS CurrentProcess;
  40. PMMCLONE_BLOCK CloneBlock;
  41. PMMCLONE_DESCRIPTOR CloneDescriptor;
  42. WSLE_NUMBER WorkingSetIndex;
  43. LOGICAL FakeCopyOnWrite;
  44. PMMWSL WorkingSetList;
  45. PVOID SessionSpace;
  46. PLIST_ENTRY NextEntry;
  47. PIMAGE_ENTRY_IN_SESSION Image;
  48. //
  49. // This is called from MmAccessFault, the PointerPte is valid
  50. // and the working set mutex ensures it cannot change state.
  51. //
  52. // Capture the PTE contents to TempPte.
  53. //
  54. TempPte = *PointerPte;
  55. ASSERT (TempPte.u.Hard.Valid == 1);
  56. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&TempPte);
  57. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  58. //
  59. // Check to see if this is a prototype PTE with copy on write enabled.
  60. //
  61. FakeCopyOnWrite = FALSE;
  62. CurrentProcess = PsGetCurrentProcess ();
  63. CloneBlock = NULL;
  64. if (FaultingAddress >= (PVOID) MmSessionBase) {
  65. WorkingSetList = MmSessionSpace->Vm.VmWorkingSetList;
  66. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  67. SessionSpace = (PVOID) MmSessionSpace;
  68. MM_SESSION_SPACE_WS_LOCK_ASSERT ();
  69. if (MmSessionSpace->ImageLoadingCount != 0) {
  70. NextEntry = MmSessionSpace->ImageList.Flink;
  71. while (NextEntry != &MmSessionSpace->ImageList) {
  72. Image = CONTAINING_RECORD (NextEntry, IMAGE_ENTRY_IN_SESSION, Link);
  73. if ((FaultingAddress >= Image->Address) &&
  74. (FaultingAddress <= Image->LastAddress)) {
  75. if (Image->ImageLoading) {
  76. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  77. TempPte.u.Hard.CopyOnWrite = 0;
  78. TempPte.u.Hard.Write = 1;
  79. //
  80. // The page is no longer copy on write, update the PTE
  81. // setting both the dirty bit and the accessed bit.
  82. //
  83. // Even though the page's current backing is the image
  84. // file, the modified writer will convert it to
  85. // pagefile backing when it notices the change later.
  86. //
  87. MI_SET_PTE_DIRTY (TempPte);
  88. MI_SET_ACCESSED_IN_PTE (&TempPte, 1);
  89. MI_WRITE_VALID_PTE_NEW_PROTECTION (PointerPte, TempPte);
  90. //
  91. // The TB entry must be flushed as the valid PTE with
  92. // the dirty bit clear has been fetched into the TB. If
  93. // it isn't flushed, another fault is generated as the
  94. // dirty bit is not set in the cached TB entry.
  95. //
  96. MI_FLUSH_SINGLE_SESSION_TB (FaultingAddress);
  97. return FALSE;
  98. }
  99. break;
  100. }
  101. NextEntry = NextEntry->Flink;
  102. }
  103. }
  104. #if 0
  105. //
  106. // This ASSERT is triggered if the session image came from removable
  107. // media (ie: a special CD install, etc) so it cannot be enabled.
  108. //
  109. ASSERT (Pfn1->u3.e1.Modified == 0);
  110. #endif
  111. }
  112. else {
  113. WorkingSetList = MmWorkingSetList;
  114. SessionSpace = NULL;
  115. //
  116. // If a fork operation is in progress, block until the fork is
  117. // completed, then retry the whole operation as the state of
  118. // everything may have changed between when the mutexes were
  119. // released and reacquired.
  120. //
  121. if (CurrentProcess->ForkInProgress != NULL) {
  122. if (MiWaitForForkToComplete (CurrentProcess) == TRUE) {
  123. return FALSE;
  124. }
  125. }
  126. if (TempPte.u.Hard.CopyOnWrite == 0) {
  127. //
  128. // This is a fork page which is being made private in order
  129. // to change the protection of the page.
  130. // Do not make the page writable.
  131. //
  132. FakeCopyOnWrite = TRUE;
  133. }
  134. }
  135. WorkingSetIndex = MiLocateWsle (FaultingAddress,
  136. WorkingSetList,
  137. Pfn1->u1.WsIndex);
  138. //
  139. // The page must be copied into a new page.
  140. //
  141. LOCK_PFN (OldIrql);
  142. if ((MmAvailablePages < MM_HIGH_LIMIT) &&
  143. (MiEnsureAvailablePageOrWait (SessionSpace != NULL ? HYDRA_PROCESS : CurrentProcess, NULL, OldIrql))) {
  144. //
  145. // A wait operation was performed to obtain an available
  146. // page and the working set mutex and PFN lock have
  147. // been released and various things may have changed for
  148. // the worse. Rather than examine all the conditions again,
  149. // return and if things are still proper, the fault will
  150. // be taken again.
  151. //
  152. UNLOCK_PFN (OldIrql);
  153. return FALSE;
  154. }
  155. //
  156. // This must be a prototype PTE. Perform the copy on write.
  157. //
  158. #if DBG
  159. if (Pfn1->u3.e1.PrototypePte == 0) {
  160. DbgPrint ("writefault - PTE indicates cow but not protopte\n");
  161. MiFormatPte (PointerPte);
  162. MiFormatPfn (Pfn1);
  163. }
  164. #endif
  165. //
  166. // A page is being copied and made private, the global state of
  167. // the shared page needs to be updated at this point on certain
  168. // hardware. This is done by ORing the dirty bit into the modify bit in
  169. // the PFN element.
  170. //
  171. // Note that a session page cannot be dirty (no POSIX-style forking is
  172. // supported for these drivers).
  173. //
  174. if (SessionSpace != NULL) {
  175. ASSERT ((TempPte.u.Hard.Valid == 1) && (TempPte.u.Hard.Write == 0));
  176. ASSERT (!MI_IS_PTE_DIRTY (TempPte));
  177. NewPageIndex = MiRemoveAnyPage (MI_GET_PAGE_COLOR_FROM_SESSION(MmSessionSpace));
  178. }
  179. else {
  180. MI_CAPTURE_DIRTY_BIT_TO_PFN (PointerPte, Pfn1);
  181. CloneBlock = (PMMCLONE_BLOCK) Pfn1->PteAddress;
  182. //
  183. // Get a new page with the same color as this page.
  184. //
  185. NewPageIndex = MiRemoveAnyPage (
  186. MI_PAGE_COLOR_PTE_PROCESS(PageFrameIndex,
  187. &CurrentProcess->NextPageColor));
  188. }
  189. MiInitializeCopyOnWritePfn (NewPageIndex,
  190. PointerPte,
  191. WorkingSetIndex,
  192. SessionSpace);
  193. UNLOCK_PFN (OldIrql);
  194. InterlockedIncrement ((PLONG) &MmInfoCounters.CopyOnWriteCount);
  195. #if defined(_MIALT4K_)
  196. //
  197. // Avoid accessing user space as it may potentially
  198. // cause a page fault on the alternate table.
  199. //
  200. CopyFrom = KSEG_ADDRESS (PageFrameIndex);
  201. #else
  202. CopyFrom = (PULONG) PAGE_ALIGN (FaultingAddress);
  203. #endif
  204. CopyTo = (PULONG) MiMapPageInHyperSpace (CurrentProcess,
  205. NewPageIndex,
  206. &OldIrql);
  207. RtlCopyMemory (CopyTo, CopyFrom, PAGE_SIZE);
  208. PERFINFO_PRIVATE_COPY_ON_WRITE(CopyFrom, PAGE_SIZE);
  209. MiUnmapPageInHyperSpace (CurrentProcess, CopyTo, OldIrql);
  210. if (!FakeCopyOnWrite) {
  211. //
  212. // If the page was really a copy on write page, make it
  213. // accessed, dirty and writable. Also, clear the copy-on-write
  214. // bit in the PTE.
  215. //
  216. MI_SET_PTE_DIRTY (TempPte);
  217. TempPte.u.Hard.Write = 1;
  218. MI_SET_ACCESSED_IN_PTE (&TempPte, 1);
  219. TempPte.u.Hard.CopyOnWrite = 0;
  220. }
  221. //
  222. // Regardless of whether the page was really a copy on write,
  223. // the frame field of the PTE must be updated.
  224. //
  225. TempPte.u.Hard.PageFrameNumber = NewPageIndex;
  226. //
  227. // If the modify bit is set in the PFN database for the
  228. // page, the data cache must be flushed. This is due to the
  229. // fact that this process may have been cloned and the cache
  230. // still contains stale data destined for the page we are
  231. // going to remove.
  232. //
  233. ASSERT (TempPte.u.Hard.Valid == 1);
  234. MI_WRITE_VALID_PTE_NEW_PAGE (PointerPte, TempPte);
  235. //
  236. // Flush the TB entry for this page.
  237. //
  238. if (SessionSpace == NULL) {
  239. KeFlushSingleTb (FaultingAddress, FALSE);
  240. //
  241. // Increment the number of private pages.
  242. //
  243. CurrentProcess->NumberOfPrivatePages += 1;
  244. }
  245. else {
  246. MI_FLUSH_SINGLE_SESSION_TB (FaultingAddress);
  247. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  248. }
  249. //
  250. // Decrement the share count for the page which was copied
  251. // as this PTE no longer refers to it.
  252. //
  253. LOCK_PFN (OldIrql);
  254. MiDecrementShareCount (Pfn1, PageFrameIndex);
  255. if (SessionSpace == NULL) {
  256. CloneDescriptor = MiLocateCloneAddress (CurrentProcess,
  257. (PVOID)CloneBlock);
  258. if (CloneDescriptor != NULL) {
  259. //
  260. // Decrement the reference count for the clone block,
  261. // note that this could release and reacquire the mutexes.
  262. //
  263. MiDecrementCloneBlockReference (CloneDescriptor,
  264. CloneBlock,
  265. CurrentProcess,
  266. OldIrql);
  267. }
  268. }
  269. UNLOCK_PFN (OldIrql);
  270. return TRUE;
  271. }
  272. #if !defined(NT_UP) || defined (_IA64_)
  273. VOID
  274. MiSetDirtyBit (
  275. IN PVOID FaultingAddress,
  276. IN PMMPTE PointerPte,
  277. IN ULONG PfnHeld
  278. )
  279. /*++
  280. Routine Description:
  281. This routine sets dirty in the specified PTE and the modify bit in the
  282. corresponding PFN element. If any page file space is allocated, it
  283. is deallocated.
  284. Arguments:
  285. FaultingAddress - Supplies the faulting address.
  286. PointerPte - Supplies a pointer to the corresponding valid PTE.
  287. PfnHeld - Supplies TRUE if the PFN lock is already held.
  288. Return Value:
  289. None.
  290. Environment:
  291. Kernel mode, APCs disabled, Working set mutex held.
  292. --*/
  293. {
  294. MMPTE TempPte;
  295. PFN_NUMBER PageFrameIndex;
  296. PMMPFN Pfn1;
  297. //
  298. // The page is NOT copy on write, update the PTE setting both the
  299. // dirty bit and the accessed bit. Note, that as this PTE is in
  300. // the TB, the TB must be flushed.
  301. //
  302. TempPte = *PointerPte;
  303. MI_SET_PTE_DIRTY (TempPte);
  304. MI_SET_ACCESSED_IN_PTE (&TempPte, 1);
  305. MI_WRITE_VALID_PTE_NEW_PROTECTION (PointerPte, TempPte);
  306. //
  307. // Check state of PFN lock and if not held, don't update PFN database.
  308. //
  309. if (PfnHeld) {
  310. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (PointerPte);
  311. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  312. //
  313. // Set the modified field in the PFN database, also, if the physical
  314. // page is currently in a paging file, free up the page file space
  315. // as the contents are now worthless.
  316. //
  317. if ((Pfn1->OriginalPte.u.Soft.Prototype == 0) &&
  318. (Pfn1->u3.e1.WriteInProgress == 0)) {
  319. //
  320. // This page is in page file format, deallocate the page file space.
  321. //
  322. MiReleasePageFileSpace (Pfn1->OriginalPte);
  323. //
  324. // Change original PTE to indicate no page file space is reserved,
  325. // otherwise the space will be deallocated when the PTE is
  326. // deleted.
  327. //
  328. Pfn1->OriginalPte.u.Soft.PageFileHigh = 0;
  329. }
  330. MI_SET_MODIFIED (Pfn1, 1, 0x17);
  331. }
  332. //
  333. // The TB entry must be flushed as the valid PTE with the dirty bit clear
  334. // has been fetched into the TB. If it isn't flushed, another fault
  335. // is generated as the dirty bit is not set in the cached TB entry.
  336. //
  337. KeFillEntryTb (FaultingAddress);
  338. return;
  339. }
  340. #endif