Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2223 lines
60 KiB

  1. /*++
  2. Copyright (c) 1990 Microsoft Corporation
  3. Module Name:
  4. cc.h
  5. Abstract:
  6. This module is a header file for the Memory Management based cache
  7. management routines for the common Cache subsystem.
  8. Author:
  9. Tom Miller [TomM] 4-May-1990
  10. Revision History:
  11. --*/
  12. #ifndef _CCh_
  13. #define _CCh_
  14. #pragma warning(disable:4214) // bit field types other than int
  15. #pragma warning(disable:4201) // nameless struct/union
  16. #pragma warning(disable:4127) // condition expression is constant
  17. #pragma warning(disable:4115) // named type definition in parentheses
  18. #include <ntos.h>
  19. #include <NtIoLogc.h>
  20. #ifdef MEMPRINT
  21. #include <memprint.h>
  22. #endif
  23. //
  24. // Define macros to acquire and release cache manager locks.
  25. //
  26. #define CcAcquireMasterLock( OldIrql ) \
  27. *( OldIrql ) = KeAcquireQueuedSpinLock( LockQueueMasterLock )
  28. #define CcReleaseMasterLock( OldIrql ) \
  29. KeReleaseQueuedSpinLock( LockQueueMasterLock, OldIrql )
  30. #define CcAcquireMasterLockAtDpcLevel() \
  31. KeAcquireQueuedSpinLockAtDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueMasterLock] )
  32. #define CcReleaseMasterLockFromDpcLevel() \
  33. KeReleaseQueuedSpinLockFromDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueMasterLock] )
  34. #define CcAcquireVacbLock( OldIrql ) \
  35. *( OldIrql ) = KeAcquireQueuedSpinLock( LockQueueVacbLock )
  36. #define CcReleaseVacbLock( OldIrql ) \
  37. KeReleaseQueuedSpinLock( LockQueueVacbLock, OldIrql )
  38. #define CcAcquireVacbLockAtDpcLevel() \
  39. KeAcquireQueuedSpinLockAtDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueVacbLock] )
  40. #define CcReleaseVacbLockFromDpcLevel() \
  41. KeReleaseQueuedSpinLockFromDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueVacbLock] )
  42. #define CcAcquireWorkQueueLock( OldIrql ) \
  43. *( OldIrql ) = KeAcquireQueuedSpinLock( LockQueueWorkQueueLock )
  44. #define CcReleaseWorkQueueLock( OldIrql ) \
  45. KeReleaseQueuedSpinLock( LockQueueWorkQueueLock, OldIrql )
  46. #define CcAcquireWorkQueueLockAtDpcLevel() \
  47. KeAcquireQueuedSpinLockAtDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueWorkQueueLock] )
  48. #define CcReleaseWorkQueueLockFromDpcLevel() \
  49. KeReleaseQueuedSpinLockFromDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueWorkQueueLock] )
  50. //
  51. // This turns on the Bcb list debugging in a debug system. Set value
  52. // to 0 to turn off.
  53. //
  54. // **** Note it must currently be turned off because the routines in
  55. // pinsup.c that manipulate this list need to be changed to do the
  56. // right thing for Obcbs. Right now they get messed up by inserting Obcbs
  57. // (which may not be large enough among other things) into the global
  58. // list. Ideally each place gets some code to insert the underlying
  59. // Bcbs into the list if they are not already there.
  60. //
  61. #if DBG
  62. #define LIST_DBG 0
  63. #endif
  64. #include <FsRtl.h>
  65. //
  66. // Peek at number of available pages.
  67. //
  68. extern PFN_NUMBER MmAvailablePages;
  69. //
  70. // Define our node type codes.
  71. //
  72. #define CACHE_NTC_SHARED_CACHE_MAP (0x2FF)
  73. #define CACHE_NTC_PRIVATE_CACHE_MAP (0x2FE)
  74. #define CACHE_NTC_BCB (0x2FD)
  75. #define CACHE_NTC_DEFERRED_WRITE (0x2FC)
  76. #define CACHE_NTC_MBCB (0x2FB)
  77. #define CACHE_NTC_OBCB (0x2FA)
  78. #define CACHE_NTC_MBCB_GRANDE (0x2F9)
  79. //
  80. // The following definitions are used to generate meaningful blue bugcheck
  81. // screens. On a bugcheck the file system can output 4 ulongs of useful
  82. // information. The first ulong will have encoded in it a source file id
  83. // (in the high word) and the line number of the bugcheck (in the low word).
  84. // The other values can be whatever the caller of the bugcheck routine deems
  85. // necessary.
  86. //
  87. // Each individual file that calls bugcheck needs to have defined at the
  88. // start of the file a constant called BugCheckFileId with one of the
  89. // CACHE_BUG_CHECK_ values defined below and then use CcBugCheck to bugcheck
  90. // the system.
  91. //
  92. #define CACHE_BUG_CHECK_CACHEDAT (0x00010000)
  93. #define CACHE_BUG_CHECK_CACHESUB (0x00020000)
  94. #define CACHE_BUG_CHECK_COPYSUP (0x00030000)
  95. #define CACHE_BUG_CHECK_FSSUP (0x00040000)
  96. #define CACHE_BUG_CHECK_LAZYRITE (0x00050000)
  97. #define CACHE_BUG_CHECK_LOGSUP (0x00060000)
  98. #define CACHE_BUG_CHECK_MDLSUP (0x00070000)
  99. #define CACHE_BUG_CHECK_PINSUP (0x00080000)
  100. #define CACHE_BUG_CHECK_VACBSUP (0x00090000)
  101. #define CcBugCheck(A,B,C) { KeBugCheckEx(CACHE_MANAGER, BugCheckFileId | __LINE__, A, B, C ); }
  102. //
  103. // Define maximum View Size (These constants are currently so chosen so
  104. // as to be exactly a page worth of PTEs.
  105. //
  106. #define DEFAULT_CREATE_MODULO ((ULONG)(0x00100000))
  107. #define DEFAULT_EXTEND_MODULO ((ULONG)(0x00100000))
  108. //
  109. // For non FO_RANDOM_ACCESS files, define how far we go before umapping
  110. // views.
  111. //
  112. #define SEQUENTIAL_MAP_LIMIT ((ULONG)(0x00080000))
  113. //
  114. // Define some constants to drive read ahead and write behind
  115. //
  116. //
  117. // Set max read ahead. Even though some drivers, such as AT, break up transfers >= 128kb,
  118. // we need to permit enough readahead to satisfy plausible cached read operation while
  119. // preventing denial of service attacks.
  120. //
  121. // This value used to be set to 64k. When doing cached reads in larger units (128k), we
  122. // would never be bringing in enough data to keep the user from blocking. 8mb is
  123. // arbitrarily chosen to be greater than plausible RAID bandwidth and user operation size
  124. // by a factor of 3-4.
  125. //
  126. #define MAX_READ_AHEAD (8 * 1024 * 1024)
  127. //
  128. // Set maximum write behind / lazy write (most drivers break up transfers >= 64kb)
  129. //
  130. #define MAX_WRITE_BEHIND (MM_MAXIMUM_DISK_IO_SIZE)
  131. //
  132. // Set a throttle for charging a given write against the total number of dirty
  133. // pages in the system, for the purpose of seeing when we should invoke write
  134. // throttling.
  135. //
  136. // This must be the same as the throttle used for seeing when we must flush
  137. // temporary files in the lazy writer. On the back of the envelope, here
  138. // is why:
  139. //
  140. // RDP = Regular File Dirty Pages
  141. // TDP = Temporary File Dirty Pages
  142. // CWT = Charged Write Throttle
  143. // -> the maximum we will charge a user with when we see if
  144. // he should be throttled
  145. // TWT = Temporary Write Throttle
  146. // -> if we can't write this many pages, we must write temp data
  147. // DPT = Dirty Page Threshold
  148. // -> the limit when write throttling kicks in
  149. //
  150. // PTD = Pages To Dirty
  151. // CDP = Charged Dirty Pages
  152. //
  153. // Now, CDP = Min( PTD, CWT).
  154. //
  155. // Excluding other effects, we throttle when:
  156. // #0 (RDP + TDP) + CPD >= DPT
  157. //
  158. // To write temporary data, we must cause:
  159. // #1 (RDP + TDP) + TWT >= DPT
  160. //
  161. // To release the throttle, we must eventually cause:
  162. // #2 (RDP + TDP) + CDP < DPT
  163. //
  164. // Now, imagine TDP >> RDP (perhaps RDP == 0) and CDP == CWT for a particular
  165. // throttled write.
  166. //
  167. // If CWT > TWT, as we drive RDP to zero (we never defer writing regular
  168. // data except for hotspots or other very temporary conditions), it is clear
  169. // that we may never trigger the writing of temporary data (#1) but also
  170. // never release the throttle (#2). Simply, we would be willing to charge
  171. // for more dirty pages than we would be willing to guarantee are available
  172. // to dirty. Hence, potential deadlock.
  173. //
  174. // CWT < TWT I leave aside for the moment. This would mean we try not to
  175. // allow temporary data to accumulate to the point that writes throttle as
  176. // a result. Perhaps this would even be better than CWT == TWT.
  177. //
  178. // It is legitimate to ask if throttling temporary data writes should be relaxed
  179. // if we see a large amount of dirty temp data accumulate (and it would be very
  180. // easy to keep track of this). I don't claim to know the best answer to this,
  181. // but for now the attempt to avoid temporary data writes at all costs still
  182. // fits the reasonable operation mix, and we will only penalize the outside
  183. // oddcase with a little more throttle/release.
  184. //
  185. #define WRITE_CHARGE_THRESHOLD (64 * PAGE_SIZE)
  186. //
  187. // Define constants to control zeroing of file data: one constant to control
  188. // how much data we will actually zero ahead in the cache, and another to
  189. // control what the maximum transfer size is that we will use to write zeros.
  190. //
  191. #define MAX_ZERO_TRANSFER (PAGE_SIZE * 128)
  192. #define MIN_ZERO_TRANSFER (0x10000)
  193. #define MAX_ZEROS_IN_CACHE (0x10000)
  194. //
  195. // Definitions for multi-level Vacb structure. The primary definition is the
  196. // VACB_LEVEL_SHIFT. In a multi-level Vacb structure, level in the tree of
  197. // pointers has 2 ** VACB_LEVEL_SHIFT pointers.
  198. //
  199. // For test, this value may be set as low as 4 (no lower), a value of 10 corresponds
  200. // to a convenient block size of 4KB. (If set to 2, CcExtendVacbArray will try to
  201. // "push" the Vacb array allocated within the SharedCacheMap, and later someone will
  202. // try to deallocate the middle of the SharedCacheMap. At 3, the MBCB_BITMAP_BLOCK_SIZE
  203. // is larger than MBCB_BITMAP_BLOCK_SIZE)
  204. //
  205. // There is a bit of a trick as we make the jump to the multilevel structure in that
  206. // we need a real fixed reference count.
  207. //
  208. #define VACB_LEVEL_SHIFT (7)
  209. //
  210. // This is how many bytes of pointers are at each level. This is the size for both
  211. // the Vacb array and (optional) Bcb listheads. It does not include the reference
  212. // block.
  213. //
  214. #define VACB_LEVEL_BLOCK_SIZE ((1 << VACB_LEVEL_SHIFT) * sizeof(PVOID))
  215. //
  216. // This is the last index for a level.
  217. //
  218. #define VACB_LAST_INDEX_FOR_LEVEL ((1 << VACB_LEVEL_SHIFT) - 1)
  219. //
  220. // This is the size of file which can be handled in a single level.
  221. //
  222. #define VACB_SIZE_OF_FIRST_LEVEL (1 << (VACB_OFFSET_SHIFT + VACB_LEVEL_SHIFT))
  223. //
  224. // This is the maximum number of levels it takes to support 63-bits. It is
  225. // used for routines that must remember a path.
  226. //
  227. #define VACB_NUMBER_OF_LEVELS (((63 - VACB_OFFSET_SHIFT)/VACB_LEVEL_SHIFT) + 1)
  228. //
  229. // Define the reference structure for multilevel Vacb trees.
  230. //
  231. typedef struct _VACB_LEVEL_REFERENCE {
  232. LONG Reference;
  233. LONG SpecialReference;
  234. } VACB_LEVEL_REFERENCE, *PVACB_LEVEL_REFERENCE;
  235. //
  236. // Define the size of a bitmap allocated for a bitmap range, in bytes.
  237. //
  238. #define MBCB_BITMAP_BLOCK_SIZE (VACB_LEVEL_BLOCK_SIZE)
  239. //
  240. // Define how many bytes of a file are covered by an Mbcb bitmap range,
  241. // at a bit for each page.
  242. //
  243. #define MBCB_BITMAP_RANGE (MBCB_BITMAP_BLOCK_SIZE * 8 * PAGE_SIZE)
  244. //
  245. // Define the initial size of the Mbcb bitmap that is self-contained in the Mbcb.
  246. //
  247. #define MBCB_BITMAP_INITIAL_SIZE (2 * sizeof(BITMAP_RANGE))
  248. //
  249. // Define constants controlling when the Bcb list is broken into a
  250. // pendaflex-style array of listheads, and how the correct listhead
  251. // is found. Begin when file size exceeds 2MB, and cover 512KB per
  252. // listhead. At 512KB per listhead, the BcbListArray is the same
  253. // size as the Vacb array, i.e., it doubles the size.
  254. //
  255. // The code handling these Bcb lists in the Vacb package contains
  256. // assumptions that the size is the same as that of the Vacb pointers.
  257. // Future work could undo this, but until then the size and shift
  258. // below cannot change. There really isn't a good reason to want to
  259. // anyway.
  260. //
  261. // Note that by definition a flat vacb array cannot fail to find an
  262. // exact match when searching for the listhead - this is only a
  263. // complication of the sparse structure.
  264. //
  265. #define BEGIN_BCB_LIST_ARRAY (0x200000)
  266. #define SIZE_PER_BCB_LIST (VACB_MAPPING_GRANULARITY * 2)
  267. #define BCB_LIST_SHIFT (VACB_OFFSET_SHIFT + 1)
  268. #define GetBcbListHead(SCM,OFF,FAILSUCC) ( \
  269. (((SCM)->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && \
  270. FlagOn((SCM)->Flags, MODIFIED_WRITE_DISABLED)) ? \
  271. (((SCM)->SectionSize.QuadPart > VACB_SIZE_OF_FIRST_LEVEL) ? \
  272. CcGetBcbListHeadLargeOffset((SCM),(OFF),(FAILSUCC)) : \
  273. (((OFF) >= (SCM)->SectionSize.QuadPart) ? &(SCM)->BcbList : \
  274. ((PLIST_ENTRY)((SCM)->Vacbs) + (((SCM)->SectionSize.QuadPart + (OFF)) >> BCB_LIST_SHIFT)))) : \
  275. &(SCM)->BcbList \
  276. )
  277. //
  278. // Macros to lock/unlock a Vacb level as Bcbs are inserted/deleted
  279. //
  280. #define CcLockVacbLevel(SCM,OFF) { \
  281. if (((SCM)->SectionSize.QuadPart > VACB_SIZE_OF_FIRST_LEVEL) && \
  282. FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { \
  283. CcAdjustVacbLevelLockCount((SCM),(OFF), +1);} \
  284. }
  285. #define CcUnlockVacbLevel(SCM,OFF) { \
  286. if (((SCM)->SectionSize.QuadPart > VACB_SIZE_OF_FIRST_LEVEL) && \
  287. FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { \
  288. CcAdjustVacbLevelLockCount((SCM),(OFF), -1);} \
  289. }
  290. //
  291. // NOISE_BITS defines how many bits are masked off when testing for
  292. // sequential reads. This allows the reader to skip up to 7 bytes
  293. // for alignment purposes, and we still consider the next read to be
  294. // sequential. Starting and ending addresses are masked by this pattern
  295. // before comparison.
  296. //
  297. #define NOISE_BITS (0x7)
  298. //
  299. // Define some constants to drive the Lazy Writer
  300. //
  301. #define LAZY_WRITER_IDLE_DELAY ((LONG)(10000000))
  302. #define LAZY_WRITER_COLLISION_DELAY ((LONG)(1000000))
  303. //
  304. // the wait is in 100 nanosecond units to 10,000,000 = 1 second
  305. //
  306. #define NANO_FULL_SECOND ((LONGLONG)10000000)
  307. //
  308. // The following target should best be a power of 2
  309. //
  310. #define LAZY_WRITER_MAX_AGE_TARGET ((ULONG)(8))
  311. //
  312. // Requeue information hint for the lazy writer.
  313. //
  314. #define CC_REQUEUE 35422
  315. //
  316. // The global Cache Manager debug level variable, its values are:
  317. //
  318. // 0x00000000 Always gets printed (used when about to bug check)
  319. //
  320. // 0x00000001 FsSup
  321. // 0x00000002 CacheSub
  322. // 0x00000004 CopySup
  323. // 0x00000008 PinSup
  324. //
  325. // 0x00000010 MdlSup
  326. // 0x00000020 LazyRite
  327. // 0x00000040
  328. // 0x00000080
  329. //
  330. // 0x00000100 Trace all Mm calls
  331. //
  332. #define mm (0x100)
  333. //
  334. // Miscellaneous support macros.
  335. //
  336. // ULONG
  337. // FlagOn (
  338. // IN ULONG Flags,
  339. // IN ULONG SingleFlag
  340. // );
  341. //
  342. // BOOLEAN
  343. // BooleanFlagOn (
  344. // IN ULONG Flags,
  345. // IN ULONG SingleFlag
  346. // );
  347. //
  348. // VOID
  349. // SetFlag (
  350. // IN ULONG Flags,
  351. // IN ULONG SingleFlag
  352. // );
  353. //
  354. // VOID
  355. // ClearFlag (
  356. // IN ULONG Flags,
  357. // IN ULONG SingleFlag
  358. // );
  359. //
  360. // ULONG
  361. // QuadAlign (
  362. // IN ULONG Pointer
  363. // );
  364. //
  365. #define FlagOn(F,SF) ( \
  366. (((F) & (SF))) \
  367. )
  368. #define BooleanFlagOn(F,SF) ( \
  369. (BOOLEAN)(((F) & (SF)) != 0) \
  370. )
  371. #define SetFlag(F,SF) { \
  372. (F) |= (SF); \
  373. }
  374. #define ClearFlag(F,SF) { \
  375. (F) &= ~(SF); \
  376. }
  377. #define QuadAlign(P) ( \
  378. ((((P)) + 7) & (-8)) \
  379. )
  380. //
  381. // Turn on pseudo-asserts if CC_FREE_ASSERTS is defined.
  382. //
  383. #if (!DBG && defined( CC_FREE_ASSERTS ))
  384. #undef ASSERT
  385. #undef ASSERTMSG
  386. #define ASSERT(exp) \
  387. ((exp) ? TRUE : \
  388. (DbgPrint( "%s:%d %s\n",__FILE__,__LINE__,#exp ), \
  389. DbgBreakPoint(), \
  390. TRUE))
  391. #define ASSERTMSG(msg,exp) \
  392. ((exp) ? TRUE : \
  393. (DbgPrint( "%s:%d %s %s\n",__FILE__,__LINE__,msg,#exp ), \
  394. DbgBreakPoint(), \
  395. TRUE))
  396. #endif
  397. //
  398. // Define the Virtual Address Control Block, which controls all mapping
  399. // performed by the Cache Manager.
  400. //
  401. //
  402. // First some constants
  403. //
  404. #define PREALLOCATED_VACBS (4)
  405. //
  406. // Virtual Address Control Block
  407. //
  408. typedef struct _VACB {
  409. //
  410. // Base Address for this control block.
  411. //
  412. PVOID BaseAddress;
  413. //
  414. // Pointer to the Shared Cache Map using this Vacb.
  415. //
  416. struct _SHARED_CACHE_MAP *SharedCacheMap;
  417. //
  418. // Overlay for remembering mapped offset within the Shared Cache Map,
  419. // and the count of the number of times this Vacb is in use.
  420. //
  421. union {
  422. //
  423. // File Offset within Shared Cache Map
  424. //
  425. LARGE_INTEGER FileOffset;
  426. //
  427. // Count of number of times this Vacb is in use. The size of this
  428. // count is calculated to be adequate, while never large enough to
  429. // overwrite nonzero bits of the FileOffset, which is a multiple
  430. // of VACB_MAPPING_GRANULARITY.
  431. //
  432. USHORT ActiveCount;
  433. } Overlay;
  434. //
  435. // Entry for the VACB reuse list
  436. //
  437. LIST_ENTRY LruList;
  438. } VACB, *PVACB;
  439. //
  440. // These define special flag values that are overloaded as PVACB. They cause
  441. // certain special behavior, currently only in the case of multilevel structures.
  442. //
  443. #define VACB_SPECIAL_REFERENCE ((PVACB) ~0)
  444. #define VACB_SPECIAL_DEREFERENCE ((PVACB) ~1)
  445. #define VACB_SPECIAL_FIRST_VALID VACB_SPECIAL_DEREFERENCE
  446. #define PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE 0x10000
  447. #define PRIVATE_CACHE_MAP_READ_AHEAD_ENABLED 0x20000
  448. typedef struct _PRIVATE_CACHE_MAP_FLAGS {
  449. ULONG DontUse : 16; // Overlaid with NodeTypeCode
  450. //
  451. // This flag says read ahead is currently active, which means either
  452. // a file system call to CcReadAhead is still determining if the
  453. // desired data is already resident, or else a request to do read ahead
  454. // has been queued to a worker thread.
  455. //
  456. ULONG ReadAheadActive : 1;
  457. //
  458. // Flag to say whether read ahead is currently enabled for this
  459. // FileObject/PrivateCacheMap. On read misses it is enabled on
  460. // read ahead hits it will be disabled. Initially disabled.
  461. //
  462. ULONG ReadAheadEnabled : 1;
  463. ULONG Available : 14;
  464. } PRIVATE_CACHE_MAP_FLAGS;
  465. #define CC_SET_PRIVATE_CACHE_MAP(PrivateCacheMap, Flags) \
  466. RtlInterlockedSetBitsDiscardReturn (&PrivateCacheMap->UlongFlags, Flags);
  467. #define CC_CLEAR_PRIVATE_CACHE_MAP(PrivateCacheMap, Feature) \
  468. RtlInterlockedAndBitsDiscardReturn (&PrivateCacheMap->UlongFlags, (ULONG)~Feature);
  469. //
  470. // The Private Cache Map is a structure pointed to by the File Object, whenever
  471. // a file is opened with caching enabled (default).
  472. //
  473. typedef struct _PRIVATE_CACHE_MAP {
  474. //
  475. // Type and size of this record
  476. //
  477. union {
  478. CSHORT NodeTypeCode;
  479. PRIVATE_CACHE_MAP_FLAGS Flags;
  480. ULONG UlongFlags;
  481. };
  482. //
  483. // Read Ahead mask formed from Read Ahead granularity - 1.
  484. // Private Cache Map ReadAheadSpinLock controls access to this field.
  485. //
  486. ULONG ReadAheadMask;
  487. //
  488. // Pointer to FileObject for this PrivateCacheMap.
  489. //
  490. PFILE_OBJECT FileObject;
  491. //
  492. // READ AHEAD CONTROL
  493. //
  494. // Read ahead history for determining when read ahead might be
  495. // beneficial.
  496. //
  497. LARGE_INTEGER FileOffset1;
  498. LARGE_INTEGER BeyondLastByte1;
  499. LARGE_INTEGER FileOffset2;
  500. LARGE_INTEGER BeyondLastByte2;
  501. //
  502. // Current read ahead requirements.
  503. //
  504. // Array element 0 is optionally used for recording remaining bytes
  505. // required for satisfying a large Mdl read.
  506. //
  507. // Array element 1 is used for predicted read ahead.
  508. //
  509. LARGE_INTEGER ReadAheadOffset[2];
  510. ULONG ReadAheadLength[2];
  511. //
  512. // SpinLock controlling access to following fields
  513. //
  514. KSPIN_LOCK ReadAheadSpinLock;
  515. //
  516. // Links for list of all PrivateCacheMaps linked to the same
  517. // SharedCacheMap.
  518. //
  519. LIST_ENTRY PrivateLinks;
  520. } PRIVATE_CACHE_MAP;
  521. typedef PRIVATE_CACHE_MAP *PPRIVATE_CACHE_MAP;
  522. //
  523. // The Shared Cache Map is a per-file structure pointed to indirectly by
  524. // each File Object. The File Object points to a pointer in a single
  525. // FS-private structure for the file (Fcb). The SharedCacheMap maps the
  526. // first part of the file for common access by all callers.
  527. //
  528. //
  529. // OpenCount log Reasons/Actions
  530. //
  531. #if OPEN_COUNT_LOG
  532. typedef struct _CC_OPEN_COUNT_LOG_ENTRY {
  533. ULONG Action;
  534. ULONG Reason;
  535. } CC_OPEN_COUNT_LOG_ENTRY;
  536. typedef struct _CC_OPEN_COUNT_LOG {
  537. USHORT Next;
  538. USHORT Size;
  539. CC_OPEN_COUNT_LOG_ENTRY Log[48];
  540. } CC_OPEN_COUNT_LOG;
  541. #define CcAddOpenToLog( LOG, ACTION, REASON ) { \
  542. (LOG)->Log[(LOG)->Next].Action = (ACTION); \
  543. (LOG)->Log[(LOG)->Next].Reason = (REASON); \
  544. (LOG)->Next += 1; \
  545. if ((LOG)->Next == (LOG)->Size) { \
  546. (LOG)->Next = 0; \
  547. } \
  548. }
  549. #else // OPEN_COUNT_LOG
  550. #define CcAddOpenToLog( LOG, ACTION, REASON )
  551. #endif // OPEN_COUNT_LOG
  552. #define CcIncrementOpenCount( SCM, REASON ) { \
  553. (SCM)->OpenCount += 1; \
  554. if (REASON != 0) { \
  555. CcAddOpenToLog( &(SCM)->OpenCountLog, REASON, 1 ); \
  556. } \
  557. }
  558. #define CcDecrementOpenCount( SCM, REASON ) { \
  559. (SCM)->OpenCount -= 1; \
  560. if (REASON != 0) { \
  561. CcAddOpenToLog( &(SCM)->OpenCountLog, REASON, -1 ); \
  562. } \
  563. }
  564. typedef struct _SHARED_CACHE_MAP {
  565. //
  566. // Type and size of this record
  567. //
  568. CSHORT NodeTypeCode;
  569. CSHORT NodeByteSize;
  570. //
  571. // Number of times this file has been opened cached.
  572. //
  573. ULONG OpenCount;
  574. //
  575. // Actual size of file, primarily for restricting Read Ahead. Initialized
  576. // on creation and maintained by extend and truncate operations.
  577. //
  578. // NOTE: This field may never be moved, thanks to the late DavidGoe,
  579. // who should have written this comment himself :-( cache.h
  580. // exports a macro which "knows" that FileSize is the second
  581. // longword in the Cache Map!
  582. //
  583. LARGE_INTEGER FileSize;
  584. //
  585. // Bcb Listhead. The BcbList is ordered by descending
  586. // FileOffsets, to optimize misses in the sequential I/O case.
  587. // Synchronized by the BcbSpinLock.
  588. //
  589. LIST_ENTRY BcbList;
  590. //
  591. // Size of section created.
  592. //
  593. LARGE_INTEGER SectionSize;
  594. //
  595. // ValidDataLength for file, as currently stored by the file system.
  596. // Synchronized by the BcbSpinLock or exclusive access by FileSystem.
  597. //
  598. LARGE_INTEGER ValidDataLength;
  599. //
  600. // Goal for ValidDataLength, when current dirty data is written.
  601. // Synchronized by the BcbSpinLock or exclusive access by FileSystem.
  602. //
  603. LARGE_INTEGER ValidDataGoal;
  604. //
  605. // Pointer to a contiguous array of Vacb pointers which control mapping
  606. // to this file, along with Vacbs (currently) for a 1MB file.
  607. // Synchronized by CcVacbSpinLock.
  608. //
  609. PVACB InitialVacbs[PREALLOCATED_VACBS];
  610. PVACB * Vacbs;
  611. //
  612. // Referenced pointer to original File Object on which the SharedCacheMap
  613. // was created.
  614. //
  615. PFILE_OBJECT FileObject;
  616. //
  617. // Describe Active Vacb and Page for copysup optimizations.
  618. //
  619. volatile PVACB ActiveVacb;
  620. //
  621. // Virtual address needing zero to end of page
  622. //
  623. volatile PVOID NeedToZero;
  624. ULONG ActivePage;
  625. ULONG NeedToZeroPage;
  626. //
  627. // Fields for synchronizing on active requests.
  628. //
  629. KSPIN_LOCK ActiveVacbSpinLock;
  630. ULONG VacbActiveCount;
  631. //
  632. // Number of dirty pages in this SharedCacheMap. Used to trigger
  633. // write behind. Synchronized by CcMasterSpinLock.
  634. //
  635. ULONG DirtyPages;
  636. //
  637. // THE NEXT TWO FIELDS MUST BE ADJACENT, TO SUPPORT
  638. // SHARED_CACHE_MAP_LIST_CURSOR!
  639. //
  640. // Links for Global SharedCacheMap List
  641. //
  642. LIST_ENTRY SharedCacheMapLinks;
  643. //
  644. // Shared Cache Map flags (defined below)
  645. //
  646. ULONG Flags;
  647. //
  648. // Status variable set by creator of SharedCacheMap
  649. //
  650. NTSTATUS Status;
  651. //
  652. // Mask Bcb for this SharedCacheMap, if there is one.
  653. // Synchronized by the BcbSpinLock.
  654. //
  655. struct _MBCB *Mbcb;
  656. //
  657. // Pointer to the common Section Object used by the file system.
  658. //
  659. PVOID Section;
  660. //
  661. // This event pointer is used to handle creation collisions.
  662. // If a second thread tries to call CcInitializeCacheMap for the
  663. // same file, while BeingCreated (below) is TRUE, then that thread
  664. // will allocate an event store it here (if not already allocated),
  665. // and wait on it. The first creator will set this event when it
  666. // is done. The event is not deleted until CcUninitializedCacheMap
  667. // is called, to avoid possible race conditions. (Note that normally
  668. // the event never has to be allocated.
  669. //
  670. PKEVENT CreateEvent;
  671. //
  672. // This points to an event used to wait for active count to go to zero
  673. //
  674. PKEVENT WaitOnActiveCount;
  675. //
  676. // These two fields control the writing of large metadata
  677. // streams. The first field gives a target for the current
  678. // flush interval, and the second field stores the end of
  679. // the last flush that occurred on this file.
  680. //
  681. ULONG PagesToWrite;
  682. LONGLONG BeyondLastFlush;
  683. //
  684. // Pointer to structure of routines used by the Lazy Writer to Acquire
  685. // and Release the file for Lazy Write and Close, to avoid deadlocks,
  686. // and the context to call them with.
  687. //
  688. PCACHE_MANAGER_CALLBACKS Callbacks;
  689. PVOID LazyWriteContext;
  690. //
  691. // Listhead of all PrivateCacheMaps linked to this SharedCacheMap.
  692. //
  693. LIST_ENTRY PrivateList;
  694. //
  695. // Log handle specified for this shared cache map, for support of routines
  696. // in logsup.c
  697. //
  698. PVOID LogHandle;
  699. //
  700. // Callback routine specified for flushing to Lsn.
  701. //
  702. PFLUSH_TO_LSN FlushToLsnRoutine;
  703. //
  704. // Dirty Page Threshold for this stream
  705. //
  706. ULONG DirtyPageThreshold;
  707. //
  708. // Lazy Writer pass count. Used by the Lazy Writer for
  709. // no modified write streams, which are not serviced on
  710. // every pass in order to avoid contention with foreground
  711. // activity.
  712. //
  713. ULONG LazyWritePassCount;
  714. //
  715. // This event pointer is used to allow a file system to be notified when
  716. // the deletion of a shared cache map.
  717. //
  718. // This has to be provided here because the cache manager may decide to
  719. // "Lazy Delete" the shared cache map, and some network file systems
  720. // will want to know when the lazy delete completes.
  721. //
  722. PCACHE_UNINITIALIZE_EVENT UninitializeEvent;
  723. //
  724. // This Vacb pointer is needed for keeping the NeedToZero virtual address
  725. // valid.
  726. //
  727. PVACB NeedToZeroVacb;
  728. //
  729. // Spinlock for synchronizing the Mbcb and Bcb lists - must be acquired
  730. // before CcMasterSpinLock. This spinlock also synchronizes ValidDataGoal
  731. // and ValidDataLength, as described above.
  732. //
  733. KSPIN_LOCK BcbSpinLock;
  734. PVOID Reserved;
  735. //
  736. // This is an event which may be used for the WaitOnActiveCount event. We
  737. // avoid overhead by only "activating" it when it is needed.
  738. //
  739. KEVENT Event;
  740. EX_PUSH_LOCK VacbPushLock;
  741. //
  742. // Preallocate one PrivateCacheMap to reduce pool allocations.
  743. //
  744. PRIVATE_CACHE_MAP PrivateCacheMap;
  745. #if OPEN_COUNT_LOG
  746. //
  747. // Instrument reasons for OpenCount
  748. //
  749. CC_OPEN_COUNT_LOG OpenCountLog;
  750. #endif
  751. } SHARED_CACHE_MAP;
  752. typedef SHARED_CACHE_MAP *PSHARED_CACHE_MAP;
  753. //
  754. // Shared Cache Map Flags
  755. //
  756. //
  757. // Read ahead has been disabled on this file.
  758. //
  759. #define DISABLE_READ_AHEAD 0x0001
  760. //
  761. // Write behind has been disabled on this file.
  762. //
  763. #define DISABLE_WRITE_BEHIND 0x0002
  764. //
  765. // This flag indicates whether CcInitializeCacheMap was called with
  766. // PinAccess = TRUE.
  767. //
  768. #define PIN_ACCESS 0x0004
  769. //
  770. // This flag indicates that a truncate is required when OpenCount
  771. // goes to 0.
  772. //
  773. #define TRUNCATE_REQUIRED 0x0010
  774. //
  775. // This flag indicates that a LazyWrite request is queued.
  776. //
  777. #define WRITE_QUEUED 0x0020
  778. //
  779. // This flag indicates that we have never seen anyone cache
  780. // the file except for with FO_SEQUENTIAL_ONLY, so we should
  781. // tell MM to quickly dump pages when we unmap.
  782. //
  783. #define ONLY_SEQUENTIAL_ONLY_SEEN 0x0040
  784. //
  785. // Active Page is locked
  786. //
  787. #define ACTIVE_PAGE_IS_DIRTY 0x0080
  788. //
  789. // Flag to say that a create is in progress.
  790. //
  791. #define BEING_CREATED 0x0100
  792. //
  793. // Flag to say that modified write was disabled on the section.
  794. //
  795. #define MODIFIED_WRITE_DISABLED 0x0200
  796. //
  797. // Flag that indicates if a lazy write ever occurred on this file.
  798. //
  799. #define LAZY_WRITE_OCCURRED 0x0400
  800. //
  801. // Flag that indicates this structure is only a cursor, only the
  802. // SharedCacheMapLinks and Flags are valid!
  803. //
  804. #define IS_CURSOR 0x0800
  805. //
  806. // Flag that indicates that we have seen someone cache this file
  807. // and specify FO_RANDOM_ACCESS. This will deactivate our cache
  808. // working set trim assist.
  809. //
  810. #define RANDOM_ACCESS_SEEN 0x1000
  811. //
  812. // Flag indicating that the stream is private write. This disables
  813. // non-aware flush/purge.
  814. //
  815. #define PRIVATE_WRITE 0x2000
  816. //
  817. // This flag indicates that a LazyWrite request is queued.
  818. //
  819. #define READ_AHEAD_QUEUED 0x4000
  820. //
  821. // This flag indicates that CcMapAndCopy() forced a remote write
  822. // to be write through while writes were throttled. This tells
  823. // CcUninitializeCacheMap() to force a lazy close of the file
  824. // and CcWriteBehind() to force an update of the valid data
  825. // length.
  826. //
  827. #define FORCED_WRITE_THROUGH 0x8000
  828. //
  829. // This flag indicates that Mm is waiting for the data section being used
  830. // by Cc at this time to go away so that the file can be opened as an image
  831. // section. If this flag is set during CcWriteBehind, we will flush the
  832. // entire file and try to tear down the shared cache map.
  833. //
  834. #define WAITING_FOR_TEARDOWN 0x10000
  835. //
  836. // Cursor structure for traversing the SharedCacheMap lists. Anyone
  837. // scanning these lists must verify that the IS_CURSOR flag is clear
  838. // before looking at other SharedCacheMap fields.
  839. //
  840. typedef struct _SHARED_CACHE_MAP_LIST_CURSOR {
  841. //
  842. // Links for Global SharedCacheMap List
  843. //
  844. LIST_ENTRY SharedCacheMapLinks;
  845. //
  846. // Shared Cache Map flags, IS_CURSOR must be set.
  847. //
  848. ULONG Flags;
  849. } SHARED_CACHE_MAP_LIST_CURSOR, *PSHARED_CACHE_MAP_LIST_CURSOR;
  850. #ifndef KDEXT
  851. //
  852. // Bitmap Range structure. For small files there is just one embedded in the
  853. // Mbcb. For large files there may be many of these linked to the Mbcb.
  854. //
  855. typedef struct _BITMAP_RANGE {
  856. //
  857. // Links for the list of bitmap ranges off the Mbcb.
  858. //
  859. LIST_ENTRY Links;
  860. //
  861. // Base page (FileOffset / PAGE_SIZE) represented by this range.
  862. // (Size is a fixed maximum.)
  863. //
  864. LONGLONG BasePage;
  865. //
  866. // First and Last dirty pages relative to the BasePage.
  867. //
  868. ULONG FirstDirtyPage;
  869. ULONG LastDirtyPage;
  870. //
  871. // Number of dirty pages in this range.
  872. //
  873. ULONG DirtyPages;
  874. //
  875. // Pointer to the bitmap for this range.
  876. //
  877. PULONG Bitmap;
  878. } BITMAP_RANGE, *PBITMAP_RANGE;
  879. #endif
  880. //
  881. // This structure is a "mask" Bcb. For fast simple write operations,
  882. // a mask Bcb is used so that we basically only have to set bits to remember
  883. // where the dirty data is.
  884. //
  885. typedef struct _MBCB {
  886. //
  887. // Type and size of this record
  888. //
  889. CSHORT NodeTypeCode;
  890. CSHORT NodeIsInZone;
  891. //
  892. // This field is used as a scratch area for the Lazy Writer to
  893. // guide how much he will write each time he wakes up.
  894. //
  895. ULONG PagesToWrite;
  896. //
  897. // Number of dirty pages (set bits) in the bitmap below.
  898. //
  899. ULONG DirtyPages;
  900. //
  901. // Reserved for alignment.
  902. //
  903. ULONG Reserved;
  904. //
  905. // ListHead of Bitmap ranges.
  906. //
  907. LIST_ENTRY BitmapRanges;
  908. //
  909. // This is a hint on where to resume writing, since we will not
  910. // always write all of the dirty data at once.
  911. //
  912. LONGLONG ResumeWritePage;
  913. //
  914. // Initial three embedded Bitmap ranges. For a file up to 2MB, only the
  915. // first range is used, and the rest of the Mbcb contains bits for 2MB of
  916. // dirty pages (4MB on Alpha). For larger files, all three ranges may
  917. // be used to describe external bitmaps.
  918. //
  919. BITMAP_RANGE BitmapRange1;
  920. BITMAP_RANGE BitmapRange2;
  921. BITMAP_RANGE BitmapRange3;
  922. } MBCB;
  923. typedef MBCB *PMBCB;
  924. //
  925. // This is the Buffer Control Block structure for representing data which
  926. // is "pinned" in memory by one or more active requests and/or dirty. This
  927. // structure is created the first time that a call to CcPinFileData specifies
  928. // a particular integral range of pages. It is deallocated whenever the Pin
  929. // Count reaches 0 and the Bcb is not Dirty.
  930. //
  931. // NOTE: The first four fields must be the same as the PUBLIC_BCB.
  932. //
  933. typedef struct _BCB {
  934. union {
  935. //
  936. // To ensure QuadAlign (sizeof (BCB)) >= QuadAlign (sizeof (MBCB))
  937. // so that they can share the same pool blocks.
  938. //
  939. MBCB Dummy;
  940. struct {
  941. //
  942. // Type and size of this record
  943. //
  944. CSHORT NodeTypeCode;
  945. //
  946. // Flags
  947. //
  948. BOOLEAN Dirty;
  949. BOOLEAN Reserved;
  950. //
  951. // Byte FileOffset and and length of entire buffer
  952. //
  953. ULONG ByteLength;
  954. LARGE_INTEGER FileOffset;
  955. //
  956. // Links for BcbList in SharedCacheMap
  957. //
  958. LIST_ENTRY BcbLinks;
  959. //
  960. // Byte FileOffset of last byte in buffer (used for searching)
  961. //
  962. LARGE_INTEGER BeyondLastByte;
  963. //
  964. // Oldest Lsn (if specified) when this buffer was set dirty.
  965. //
  966. LARGE_INTEGER OldestLsn;
  967. //
  968. // Most recent Lsn specified when this buffer was set dirty.
  969. // The FlushToLsnRoutine is called with this Lsn.
  970. //
  971. LARGE_INTEGER NewestLsn;
  972. //
  973. // Pointer to Vacb via which this Bcb is mapped.
  974. //
  975. PVACB Vacb;
  976. #if LIST_DBG
  977. //
  978. // Links and caller addresses for the global Bcb list (for debug only)
  979. //
  980. LIST_ENTRY CcBcbLinks;
  981. PVOID CallerAddress;
  982. PVOID CallersCallerAddress;
  983. #endif
  984. //
  985. // Count of threads actively using this Bcb to process a request.
  986. // This must be manipulated under protection of the BcbListSpinLock
  987. // in the SharedCacheMap.
  988. //
  989. ULONG PinCount;
  990. //
  991. // Resource to synchronize buffer access. Pinning Readers and all Writers
  992. // of the described buffer take out shared access (synchronization of
  993. // buffer modifications is strictly up to the caller). Note that pinning
  994. // readers do not declare if they are going to modify the buffer or not.
  995. // Anyone writing to disk takes out exclusive access, to prevent the buffer
  996. // from changing while it is being written out.
  997. //
  998. ERESOURCE Resource;
  999. //
  1000. // Pointer to SharedCacheMap for this Bcb.
  1001. //
  1002. PSHARED_CACHE_MAP SharedCacheMap;
  1003. //
  1004. // This is the Base Address at which the buffer can be seen in
  1005. // system space. All access to buffer data should go through this
  1006. // address.
  1007. //
  1008. PVOID BaseAddress;
  1009. };
  1010. };
  1011. } BCB;
  1012. #ifndef KDEXT
  1013. typedef BCB *PBCB;
  1014. #endif
  1015. //
  1016. // This is the Overlap Buffer Control Block structure for representing data which
  1017. // is "pinned" in memory and must be represented by multiple Bcbs due to overlaps.
  1018. //
  1019. // NOTE: The first four fields must be the same as the PUBLIC_BCB.
  1020. //
  1021. typedef struct _OBCB {
  1022. //
  1023. // Type and size of this record
  1024. //
  1025. CSHORT NodeTypeCode;
  1026. CSHORT NodeByteSize;
  1027. //
  1028. // Byte FileOffset and and length of entire buffer
  1029. //
  1030. ULONG ByteLength;
  1031. LARGE_INTEGER FileOffset;
  1032. //
  1033. // Vector of Bcb pointers.
  1034. //
  1035. PBCB Bcbs[ANYSIZE_ARRAY];
  1036. } OBCB;
  1037. typedef OBCB *POBCB;
  1038. //
  1039. // Struct for remembering deferred writes for later posting.
  1040. //
  1041. typedef struct _DEFERRED_WRITE {
  1042. //
  1043. // Type and size of this record
  1044. //
  1045. CSHORT NodeTypeCode;
  1046. CSHORT NodeByteSize;
  1047. //
  1048. // The file to be written.
  1049. //
  1050. PFILE_OBJECT FileObject;
  1051. //
  1052. // Number of bytes the caller intends to write
  1053. //
  1054. ULONG BytesToWrite;
  1055. //
  1056. // Links for the deferred write queue.
  1057. //
  1058. LIST_ENTRY DeferredWriteLinks;
  1059. //
  1060. // If this event pointer is not NULL, then this event will
  1061. // be signalled when the write is ok, rather than calling
  1062. // the PostRoutine below.
  1063. //
  1064. PKEVENT Event;
  1065. //
  1066. // The posting routine and its parameters
  1067. //
  1068. PCC_POST_DEFERRED_WRITE PostRoutine;
  1069. PVOID Context1;
  1070. PVOID Context2;
  1071. BOOLEAN LimitModifiedPages;
  1072. } DEFERRED_WRITE, *PDEFERRED_WRITE;
  1073. //
  1074. // Struct controlling the Lazy Writer algorithms
  1075. //
  1076. typedef struct _LAZY_WRITER {
  1077. //
  1078. // Work queue.
  1079. //
  1080. LIST_ENTRY WorkQueue;
  1081. //
  1082. // Dpc and Timer Structures used for activating periodic scan when active.
  1083. //
  1084. KDPC ScanDpc;
  1085. KTIMER ScanTimer;
  1086. //
  1087. // Boolean to say whether Lazy Writer scan is active or not.
  1088. //
  1089. BOOLEAN ScanActive;
  1090. //
  1091. // Boolean indicating if there is any other reason for Lazy Writer to
  1092. // wake up.
  1093. //
  1094. BOOLEAN OtherWork;
  1095. } LAZY_WRITER;
  1096. #ifndef KDEXT
  1097. //
  1098. // Work queue entry for the worker threads, with an enumerated
  1099. // function code.
  1100. //
  1101. typedef enum _WORKER_FUNCTION {
  1102. Noop = 0,
  1103. ReadAhead,
  1104. WriteBehind,
  1105. LazyWriteScan,
  1106. EventSet
  1107. } WORKER_FUNCTION;
  1108. #endif
  1109. typedef struct _WORK_QUEUE_ENTRY {
  1110. //
  1111. // List entry for our work queues.
  1112. //
  1113. LIST_ENTRY WorkQueueLinks;
  1114. //
  1115. // Define a union to contain function-specific parameters.
  1116. //
  1117. union {
  1118. //
  1119. // Read parameters (for read ahead)
  1120. //
  1121. struct {
  1122. PFILE_OBJECT FileObject;
  1123. } Read;
  1124. //
  1125. // Write parameters (for write behind)
  1126. //
  1127. struct {
  1128. PSHARED_CACHE_MAP SharedCacheMap;
  1129. } Write;
  1130. //
  1131. // Set event parameters (for queue checks)
  1132. //
  1133. struct {
  1134. PKEVENT Event;
  1135. } Event;
  1136. } Parameters;
  1137. //
  1138. // Function code for this entry:
  1139. //
  1140. UCHAR Function;
  1141. } WORK_QUEUE_ENTRY, *PWORK_QUEUE_ENTRY;
  1142. //
  1143. // This is a structure apended to the end of an MDL
  1144. //
  1145. typedef struct _MDL_WRITE {
  1146. //
  1147. // This field is for the use of the Server to stash anything interesting
  1148. //
  1149. PVOID ServerContext;
  1150. //
  1151. // This is the resource to release when the write is complete.
  1152. //
  1153. PERESOURCE Resource;
  1154. //
  1155. // This is thread caller's thread, and the thread that must release
  1156. // the resource.
  1157. //
  1158. ERESOURCE_THREAD Thread;
  1159. //
  1160. // This links all the pending MDLs through the shared cache map.
  1161. //
  1162. LIST_ENTRY MdlLinks;
  1163. } MDL_WRITE, *PMDL_WRITE;
  1164. //
  1165. // Common Private routine definitions for the Cache Manager
  1166. //
  1167. VOID
  1168. CcGetActiveVacb (
  1169. IN PSHARED_CACHE_MAP SharedCacheMap,
  1170. OUT PVACB *Vacb,
  1171. OUT PULONG Page,
  1172. OUT PULONG Dirty
  1173. );
  1174. VOID
  1175. CcSetActiveVacb (
  1176. IN PSHARED_CACHE_MAP SharedCacheMap,
  1177. IN OUT PVACB *Vacb,
  1178. IN ULONG Page,
  1179. IN ULONG Dirty
  1180. );
  1181. //
  1182. // We trim out the previous macro-forms of Get/Set (nondpc) so that we can page
  1183. // more cache manager code that otherwise does not acquire spinlocks.
  1184. //
  1185. #define GetActiveVacb(SCM,IRQ,V,P,D) CcGetActiveVacb((SCM),&(V),&(P),&(D))
  1186. #define SetActiveVacb(SCM,IRQ,V,P,D) CcSetActiveVacb((SCM),&(V),(P),(D))
  1187. #define GetActiveVacbAtDpcLevel(SCM,V,P,D) { \
  1188. ExAcquireSpinLockAtDpcLevel(&(SCM)->ActiveVacbSpinLock); \
  1189. (V) = (SCM)->ActiveVacb; \
  1190. if ((V) != NULL) { \
  1191. (P) = (SCM)->ActivePage; \
  1192. (SCM)->ActiveVacb = NULL; \
  1193. (D) = (SCM)->Flags & ACTIVE_PAGE_IS_DIRTY; \
  1194. } \
  1195. ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \
  1196. }
  1197. //
  1198. // Gather the common work of charging and deducting dirty page counts. When
  1199. // write hysteresis was being considered during Windows XP, this also helped
  1200. // gather up the activation of that throttle.
  1201. //
  1202. #define CcDeductDirtyPages( S, P ) \
  1203. CcTotalDirtyPages -= (P); \
  1204. (S)->DirtyPages -= (P);
  1205. #define CcChargeMaskDirtyPages( S, M, B, P ) \
  1206. CcTotalDirtyPages += (P); \
  1207. (M)->DirtyPages += (P); \
  1208. (B)->DirtyPages += (P); \
  1209. (S)->DirtyPages += (P);
  1210. #define CcChargePinDirtyPages( S, P ) \
  1211. CcTotalDirtyPages += (P); \
  1212. (S)->DirtyPages += (P);
  1213. VOID
  1214. CcPostDeferredWrites (
  1215. );
  1216. BOOLEAN
  1217. CcPinFileData (
  1218. IN PFILE_OBJECT FileObject,
  1219. IN PLARGE_INTEGER FileOffset,
  1220. IN ULONG Length,
  1221. IN BOOLEAN ReadOnly,
  1222. IN BOOLEAN WriteOnly,
  1223. IN ULONG Flags,
  1224. OUT PBCB *Bcb,
  1225. OUT PVOID *BaseAddress,
  1226. OUT PLARGE_INTEGER BeyondLastByte
  1227. );
  1228. typedef enum {
  1229. UNPIN,
  1230. UNREF,
  1231. SET_CLEAN
  1232. } UNMAP_ACTIONS;
  1233. VOID
  1234. FASTCALL
  1235. CcUnpinFileData (
  1236. IN OUT PBCB Bcb,
  1237. IN BOOLEAN ReadOnly,
  1238. IN UNMAP_ACTIONS UnmapAction
  1239. );
  1240. VOID
  1241. FASTCALL
  1242. CcDeallocateBcb (
  1243. IN PBCB Bcb
  1244. );
  1245. VOID
  1246. FASTCALL
  1247. CcPerformReadAhead (
  1248. IN PFILE_OBJECT FileObject
  1249. );
  1250. VOID
  1251. CcSetDirtyInMask (
  1252. IN PSHARED_CACHE_MAP SharedCacheMap,
  1253. IN PLARGE_INTEGER FileOffset,
  1254. IN ULONG Length
  1255. );
  1256. VOID
  1257. FASTCALL
  1258. CcWriteBehind (
  1259. IN PSHARED_CACHE_MAP SharedCacheMap,
  1260. IN PIO_STATUS_BLOCK IoStatus
  1261. );
  1262. #define ZERO_FIRST_PAGE 1
  1263. #define ZERO_MIDDLE_PAGES 2
  1264. #define ZERO_LAST_PAGE 4
  1265. BOOLEAN
  1266. CcMapAndRead(
  1267. IN PSHARED_CACHE_MAP SharedCacheMap,
  1268. IN PLARGE_INTEGER FileOffset,
  1269. IN ULONG Length,
  1270. IN ULONG ZeroFlags,
  1271. IN BOOLEAN Wait,
  1272. IN PVOID BaseAddress
  1273. );
  1274. VOID
  1275. CcFreeActiveVacb (
  1276. IN PSHARED_CACHE_MAP SharedCacheMap,
  1277. IN PVACB ActiveVacb OPTIONAL,
  1278. IN ULONG ActivePage,
  1279. IN ULONG PageIsDirty
  1280. );
  1281. VOID
  1282. CcMapAndCopy(
  1283. IN PSHARED_CACHE_MAP SharedCacheMap,
  1284. IN PVOID UserBuffer,
  1285. IN PLARGE_INTEGER FileOffset,
  1286. IN ULONG Length,
  1287. IN ULONG ZeroFlags,
  1288. IN PFILE_OBJECT FileObject
  1289. );
  1290. VOID
  1291. CcScanDpc (
  1292. IN PKDPC Dpc,
  1293. IN PVOID DeferredContext,
  1294. IN PVOID SystemArgument1,
  1295. IN PVOID SystemArgument2
  1296. );
  1297. VOID
  1298. CcScheduleLazyWriteScan (
  1299. IN BOOLEAN FastScan
  1300. );
  1301. VOID
  1302. CcStartLazyWriter (
  1303. IN PVOID NotUsed
  1304. );
  1305. #define CcAllocateWorkQueueEntry() \
  1306. (PWORK_QUEUE_ENTRY)ExAllocateFromPPLookasideList(LookasideTwilightList)
  1307. #define CcFreeWorkQueueEntry(_entry_) \
  1308. ExFreeToPPLookasideList(LookasideTwilightList, (_entry_))
  1309. VOID
  1310. FASTCALL
  1311. CcPostWorkQueue (
  1312. IN PWORK_QUEUE_ENTRY WorkQueueEntry,
  1313. IN PLIST_ENTRY WorkQueue
  1314. );
  1315. VOID
  1316. CcWorkerThread (
  1317. PVOID ExWorkQueueItem
  1318. );
  1319. VOID
  1320. FASTCALL
  1321. CcDeleteSharedCacheMap (
  1322. IN PSHARED_CACHE_MAP SharedCacheMap,
  1323. IN KIRQL ListIrql,
  1324. IN ULONG ReleaseFile
  1325. );
  1326. //
  1327. // This exception filter handles STATUS_IN_PAGE_ERROR correctly
  1328. //
  1329. LONG
  1330. CcCopyReadExceptionFilter(
  1331. IN PEXCEPTION_POINTERS ExceptionPointer,
  1332. IN PNTSTATUS ExceptionCode
  1333. );
  1334. //
  1335. // Exception filter for Worker Threads in lazyrite.c
  1336. //
  1337. LONG
  1338. CcExceptionFilter (
  1339. IN NTSTATUS ExceptionCode
  1340. );
  1341. #ifdef CCDBG
  1342. VOID
  1343. CcDump (
  1344. IN PVOID Ptr
  1345. );
  1346. #endif
  1347. //
  1348. // Vacb routines
  1349. //
  1350. VOID
  1351. CcInitializeVacbs(
  1352. );
  1353. PVOID
  1354. CcGetVirtualAddressIfMapped (
  1355. IN PSHARED_CACHE_MAP SharedCacheMap,
  1356. IN LONGLONG FileOffset,
  1357. OUT PVACB *Vacb,
  1358. OUT PULONG ReceivedLength
  1359. );
  1360. PVOID
  1361. CcGetVirtualAddress (
  1362. IN PSHARED_CACHE_MAP SharedCacheMap,
  1363. IN LARGE_INTEGER FileOffset,
  1364. OUT PVACB *Vacb,
  1365. OUT PULONG ReceivedLength
  1366. );
  1367. VOID
  1368. FASTCALL
  1369. CcFreeVirtualAddress (
  1370. IN PVACB Vacb
  1371. );
  1372. VOID
  1373. CcReferenceFileOffset (
  1374. IN PSHARED_CACHE_MAP SharedCacheMap,
  1375. IN LARGE_INTEGER FileOffset
  1376. );
  1377. VOID
  1378. CcDereferenceFileOffset (
  1379. IN PSHARED_CACHE_MAP SharedCacheMap,
  1380. IN LARGE_INTEGER FileOffset
  1381. );
  1382. VOID
  1383. CcWaitOnActiveCount (
  1384. IN PSHARED_CACHE_MAP SharedCacheMap
  1385. );
  1386. NTSTATUS
  1387. FASTCALL
  1388. CcCreateVacbArray (
  1389. IN PSHARED_CACHE_MAP SharedCacheMap,
  1390. IN LARGE_INTEGER NewSectionSize
  1391. );
  1392. NTSTATUS
  1393. CcExtendVacbArray (
  1394. IN PSHARED_CACHE_MAP SharedCacheMap,
  1395. IN LARGE_INTEGER NewSectionSize
  1396. );
  1397. BOOLEAN
  1398. FASTCALL
  1399. CcUnmapVacbArray (
  1400. IN PSHARED_CACHE_MAP SharedCacheMap,
  1401. IN PLARGE_INTEGER FileOffset OPTIONAL,
  1402. IN ULONG Length,
  1403. IN BOOLEAN UnmapBehind
  1404. );
  1405. VOID
  1406. CcAdjustVacbLevelLockCount (
  1407. IN PSHARED_CACHE_MAP SharedCacheMap,
  1408. IN LONGLONG FileOffset,
  1409. IN LONG Adjustment
  1410. );
  1411. PLIST_ENTRY
  1412. CcGetBcbListHeadLargeOffset (
  1413. IN PSHARED_CACHE_MAP SharedCacheMap,
  1414. IN LONGLONG FileOffset,
  1415. IN BOOLEAN FailToSuccessor
  1416. );
  1417. ULONG
  1418. CcPrefillVacbLevelZone (
  1419. IN ULONG NumberNeeded,
  1420. OUT PKIRQL OldIrql,
  1421. IN ULONG NeedBcbListHeads
  1422. );
  1423. VOID
  1424. CcDrainVacbLevelZone (
  1425. );
  1426. //
  1427. // Define references to global data
  1428. //
  1429. extern KSPIN_LOCK CcBcbSpinLock;
  1430. extern LIST_ENTRY CcCleanSharedCacheMapList;
  1431. extern SHARED_CACHE_MAP_LIST_CURSOR CcDirtySharedCacheMapList;
  1432. extern SHARED_CACHE_MAP_LIST_CURSOR CcLazyWriterCursor;
  1433. extern GENERAL_LOOKASIDE CcTwilightLookasideList;
  1434. extern ULONG CcNumberWorkerThreads;
  1435. extern ULONG CcNumberActiveWorkerThreads;
  1436. extern LIST_ENTRY CcIdleWorkerThreadList;
  1437. extern LIST_ENTRY CcExpressWorkQueue;
  1438. extern LIST_ENTRY CcRegularWorkQueue;
  1439. extern LIST_ENTRY CcPostTickWorkQueue;
  1440. extern BOOLEAN CcQueueThrottle;
  1441. extern ULONG CcIdleDelayTick;
  1442. extern LARGE_INTEGER CcNoDelay;
  1443. extern LARGE_INTEGER CcFirstDelay;
  1444. extern LARGE_INTEGER CcIdleDelay;
  1445. extern LARGE_INTEGER CcCollisionDelay;
  1446. extern LARGE_INTEGER CcTargetCleanDelay;
  1447. extern LAZY_WRITER LazyWriter;
  1448. extern ULONG_PTR CcNumberVacbs;
  1449. extern PVACB CcVacbs;
  1450. extern PVACB CcBeyondVacbs;
  1451. extern LIST_ENTRY CcVacbLru;
  1452. extern LIST_ENTRY CcVacbFreeList;
  1453. extern KSPIN_LOCK CcDeferredWriteSpinLock;
  1454. extern LIST_ENTRY CcDeferredWrites;
  1455. extern ULONG CcDirtyPageThreshold;
  1456. extern ULONG CcDirtyPageTarget;
  1457. extern ULONG CcDirtyPagesLastScan;
  1458. extern ULONG CcPagesYetToWrite;
  1459. extern ULONG CcPagesWrittenLastTime;
  1460. extern ULONG CcThrottleLastTime;
  1461. extern ULONG CcDirtyPageHysteresisThreshold;
  1462. extern PSHARED_CACHE_MAP CcSingleDirtySourceDominant;
  1463. extern ULONG CcAvailablePagesThreshold;
  1464. extern ULONG CcTotalDirtyPages;
  1465. extern ULONG CcTune;
  1466. extern LONG CcAggressiveZeroCount;
  1467. extern LONG CcAggressiveZeroThreshold;
  1468. extern ULONG CcLazyWriteHotSpots;
  1469. extern MM_SYSTEMSIZE CcCapturedSystemSize;
  1470. extern ULONG CcMaxVacbLevelsSeen;
  1471. extern ULONG CcVacbLevelEntries;
  1472. extern PVACB *CcVacbLevelFreeList;
  1473. extern ULONG CcVacbLevelWithBcbsEntries;
  1474. extern PVACB *CcVacbLevelWithBcbsFreeList;
  1475. //
  1476. // Macros for allocating and deallocating Vacb levels - CcVacbSpinLock must
  1477. // be acquired.
  1478. //
  1479. _inline PVACB *CcAllocateVacbLevel (
  1480. IN LOGICAL AllocatingBcbListHeads
  1481. )
  1482. {
  1483. PVACB *ReturnEntry;
  1484. if (AllocatingBcbListHeads) {
  1485. ReturnEntry = CcVacbLevelWithBcbsFreeList;
  1486. CcVacbLevelWithBcbsFreeList = (PVACB *)*ReturnEntry;
  1487. CcVacbLevelWithBcbsEntries -= 1;
  1488. } else {
  1489. ReturnEntry = CcVacbLevelFreeList;
  1490. CcVacbLevelFreeList = (PVACB *)*ReturnEntry;
  1491. CcVacbLevelEntries -= 1;
  1492. }
  1493. *ReturnEntry = NULL;
  1494. ASSERT(RtlCompareMemory(ReturnEntry, ReturnEntry + 1, VACB_LEVEL_BLOCK_SIZE - sizeof(PVACB)) ==
  1495. (VACB_LEVEL_BLOCK_SIZE - sizeof(PVACB)));
  1496. return ReturnEntry;
  1497. }
  1498. _inline VOID CcDeallocateVacbLevel (
  1499. IN PVACB *Entry,
  1500. IN LOGICAL DeallocatingBcbListHeads
  1501. )
  1502. {
  1503. if (DeallocatingBcbListHeads) {
  1504. *Entry = (PVACB)CcVacbLevelWithBcbsFreeList;
  1505. CcVacbLevelWithBcbsFreeList = Entry;
  1506. CcVacbLevelWithBcbsEntries += 1;
  1507. } else {
  1508. *Entry = (PVACB)CcVacbLevelFreeList;
  1509. CcVacbLevelFreeList = Entry;
  1510. CcVacbLevelEntries += 1;
  1511. }
  1512. }
  1513. //
  1514. // Export the macros for inspecting the reference counts for
  1515. // the multilevel Vacb array.
  1516. //
  1517. _inline
  1518. PVACB_LEVEL_REFERENCE
  1519. VacbLevelReference (
  1520. IN PSHARED_CACHE_MAP SharedCacheMap,
  1521. IN PVACB *VacbArray,
  1522. IN ULONG Level
  1523. )
  1524. {
  1525. return (PVACB_LEVEL_REFERENCE)
  1526. ((PCHAR)VacbArray +
  1527. VACB_LEVEL_BLOCK_SIZE +
  1528. (Level != 0?
  1529. 0 : (FlagOn( SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED )?
  1530. VACB_LEVEL_BLOCK_SIZE : 0)));
  1531. }
  1532. _inline
  1533. ULONG
  1534. IsVacbLevelReferenced (
  1535. IN PSHARED_CACHE_MAP SharedCacheMap,
  1536. IN PVACB *VacbArray,
  1537. IN ULONG Level
  1538. )
  1539. {
  1540. PVACB_LEVEL_REFERENCE VacbReference = VacbLevelReference( SharedCacheMap, VacbArray, Level );
  1541. return VacbReference->Reference | VacbReference->SpecialReference;
  1542. }
  1543. //
  1544. // Here is a page of macros stolen directly from Pinball...
  1545. //
  1546. //
  1547. // The following macros are used to establish the semantics needed
  1548. // to do a return from within a try-finally clause. As a rule every
  1549. // try clause must end with a label call try_exit. For example,
  1550. //
  1551. // try {
  1552. // :
  1553. // :
  1554. //
  1555. // try_exit: NOTHING;
  1556. // } finally {
  1557. //
  1558. // :
  1559. // :
  1560. // }
  1561. //
  1562. // Every return statement executed inside of a try clause should use the
  1563. // try_return macro. If the compiler fully supports the try-finally construct
  1564. // then the macro should be
  1565. //
  1566. // #define try_return(S) { return(S); }
  1567. //
  1568. // If the compiler does not support the try-finally construct then the macro
  1569. // should be
  1570. //
  1571. // #define try_return(S) { S; goto try_exit; }
  1572. //
  1573. #define try_return(S) { S; goto try_exit; }
  1574. #ifdef CCDBG
  1575. extern LONG CcDebugTraceLevel;
  1576. extern LONG CcDebugTraceIndent;
  1577. #ifndef CCDBG_LOCK
  1578. #define DebugTrace(INDENT,LEVEL,X,Y) { \
  1579. LONG _i; \
  1580. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1581. _i = (ULONG)PsGetCurrentThread(); \
  1582. DbgPrint("%08lx:",_i); \
  1583. if ((INDENT) < 0) { \
  1584. CcDebugTraceIndent += (INDENT); \
  1585. } \
  1586. if (CcDebugTraceIndent < 0) { \
  1587. CcDebugTraceIndent = 0; \
  1588. } \
  1589. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1590. DbgPrint(" "); \
  1591. } \
  1592. DbgPrint(X,Y); \
  1593. if ((INDENT) > 0) { \
  1594. CcDebugTraceIndent += (INDENT); \
  1595. } \
  1596. } \
  1597. }
  1598. #define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \
  1599. LONG _i; \
  1600. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1601. _i = (ULONG)PsGetCurrentThread(); \
  1602. DbgPrint("%08lx:",_i); \
  1603. if ((INDENT) < 0) { \
  1604. CcDebugTraceIndent += (INDENT); \
  1605. } \
  1606. if (CcDebugTraceIndent < 0) { \
  1607. CcDebugTraceIndent = 0; \
  1608. } \
  1609. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1610. DbgPrint(" "); \
  1611. } \
  1612. DbgPrint(X,Y,Z); \
  1613. if ((INDENT) > 0) { \
  1614. CcDebugTraceIndent += (INDENT); \
  1615. } \
  1616. } \
  1617. }
  1618. #define DebugDump(STR,LEVEL,PTR) { \
  1619. LONG _i; \
  1620. VOID CcDump(); \
  1621. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1622. _i = (ULONG)PsGetCurrentThread(); \
  1623. DbgPrint("%08lx:",_i); \
  1624. DbgPrint(STR); \
  1625. if (PTR != NULL) {CcDump(PTR);} \
  1626. DbgBreakPoint(); \
  1627. } \
  1628. }
  1629. #else // ndef CCDBG_LOCK
  1630. extern KSPIN_LOCK CcDebugTraceLock;
  1631. #define DebugTrace(INDENT,LEVEL,X,Y) { \
  1632. LONG _i; \
  1633. KIRQL _oldIrql; \
  1634. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1635. _i = (ULONG)PsGetCurrentThread(); \
  1636. ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
  1637. DbgPrint("%08lx:",_i); \
  1638. if ((INDENT) < 0) { \
  1639. CcDebugTraceIndent += (INDENT); \
  1640. } \
  1641. if (CcDebugTraceIndent < 0) { \
  1642. CcDebugTraceIndent = 0; \
  1643. } \
  1644. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1645. DbgPrint(" "); \
  1646. } \
  1647. DbgPrint(X,Y); \
  1648. if ((INDENT) > 0) { \
  1649. CcDebugTraceIndent += (INDENT); \
  1650. } \
  1651. ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
  1652. } \
  1653. }
  1654. #define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \
  1655. LONG _i; \
  1656. KIRQL _oldIrql; \
  1657. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1658. _i = (ULONG)PsGetCurrentThread(); \
  1659. ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
  1660. DbgPrint("%08lx:",_i); \
  1661. if ((INDENT) < 0) { \
  1662. CcDebugTraceIndent += (INDENT); \
  1663. } \
  1664. if (CcDebugTraceIndent < 0) { \
  1665. CcDebugTraceIndent = 0; \
  1666. } \
  1667. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1668. DbgPrint(" "); \
  1669. } \
  1670. DbgPrint(X,Y,Z); \
  1671. if ((INDENT) > 0) { \
  1672. CcDebugTraceIndent += (INDENT); \
  1673. } \
  1674. ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
  1675. } \
  1676. }
  1677. #define DebugDump(STR,LEVEL,PTR) { \
  1678. LONG _i; \
  1679. KIRQL _oldIrql; \
  1680. VOID CcDump(); \
  1681. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1682. _i = (ULONG)PsGetCurrentThread(); \
  1683. ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
  1684. DbgPrint("%08lx:",_i); \
  1685. DbgPrint(STR); \
  1686. if (PTR != NULL) {CcDump(PTR);} \
  1687. DbgBreakPoint(); \
  1688. ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
  1689. } \
  1690. }
  1691. #endif // else ndef CCDBG_LOCK
  1692. #else
  1693. #undef CCDBG_LOCK
  1694. #define DebugTrace(INDENT,LEVEL,X,Y) {NOTHING;}
  1695. #define DebugTrace2(INDENT,LEVEL,X,Y,Z) {NOTHING;}
  1696. #define DebugDump(STR,LEVEL,PTR) {NOTHING;}
  1697. #endif // CCDBG
  1698. //
  1699. // Global list of pinned Bcbs which may be examined for debug purposes
  1700. //
  1701. #if DBG
  1702. extern ULONG CcBcbCount;
  1703. extern LIST_ENTRY CcBcbList;
  1704. #endif
  1705. FORCEINLINE
  1706. VOID
  1707. CcInsertIntoCleanSharedCacheMapList (
  1708. IN PSHARED_CACHE_MAP SharedCacheMap
  1709. )
  1710. {
  1711. if (KdDebuggerEnabled &&
  1712. (KdDebuggerNotPresent == FALSE) &&
  1713. SharedCacheMap->OpenCount == 0 &&
  1714. SharedCacheMap->DirtyPages == 0) {
  1715. DbgPrint( "CC: SharedCacheMap->OpenCount == 0 && DirtyPages == 0 && going onto CleanList!\n" );
  1716. DbgBreakPoint();
  1717. }
  1718. InsertTailList( &CcCleanSharedCacheMapList,
  1719. &SharedCacheMap->SharedCacheMapLinks );
  1720. }
  1721. #endif // _CCh_