Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2173 lines
57 KiB

  1. /*++
  2. Copyright (c) 1990 Microsoft Corporation
  3. Module Name:
  4. cc.h
  5. Abstract:
  6. This module is a header file for the Memory Management based cache
  7. management routines for the common Cache subsystem.
  8. Author:
  9. Tom Miller [TomM] 4-May-1990
  10. Revision History:
  11. --*/
  12. #ifndef _CCh_
  13. #define _CCh_
  14. #pragma warning(disable:4214) // bit field types other than int
  15. #pragma warning(disable:4201) // nameless struct/union
  16. #pragma warning(disable:4127) // condition expression is constant
  17. #pragma warning(disable:4115) // named type definition in parentheses
  18. #include <ntos.h>
  19. #include <NtIoLogc.h>
  20. #ifdef MEMPRINT
  21. #include <memprint.h>
  22. #endif
  23. //
  24. // Define macros to acquire and release cache manager locks.
  25. //
  26. #define CcAcquireMasterLock( OldIrql ) \
  27. *( OldIrql ) = KeAcquireQueuedSpinLock( LockQueueMasterLock )
  28. #define CcReleaseMasterLock( OldIrql ) \
  29. KeReleaseQueuedSpinLock( LockQueueMasterLock, OldIrql )
  30. #define CcAcquireMasterLockAtDpcLevel() \
  31. KeAcquireQueuedSpinLockAtDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueMasterLock] )
  32. #define CcReleaseMasterLockFromDpcLevel() \
  33. KeReleaseQueuedSpinLockFromDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueMasterLock] )
  34. #define CcAcquireVacbLock( OldIrql ) \
  35. *( OldIrql ) = KeAcquireQueuedSpinLock( LockQueueVacbLock )
  36. #define CcReleaseVacbLock( OldIrql ) \
  37. KeReleaseQueuedSpinLock( LockQueueVacbLock, OldIrql )
  38. #define CcAcquireVacbLockAtDpcLevel() \
  39. KeAcquireQueuedSpinLockAtDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueVacbLock] )
  40. #define CcReleaseVacbLockFromDpcLevel() \
  41. KeReleaseQueuedSpinLockFromDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueVacbLock] )
  42. #define CcAcquireWorkQueueLock( OldIrql ) \
  43. *( OldIrql ) = KeAcquireQueuedSpinLock( LockQueueWorkQueueLock )
  44. #define CcReleaseWorkQueueLock( OldIrql ) \
  45. KeReleaseQueuedSpinLock( LockQueueWorkQueueLock, OldIrql )
  46. #define CcAcquireWorkQueueLockAtDpcLevel() \
  47. KeAcquireQueuedSpinLockAtDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueWorkQueueLock] )
  48. #define CcReleaseWorkQueueLockFromDpcLevel() \
  49. KeReleaseQueuedSpinLockFromDpcLevel( &KeGetCurrentPrcb()->LockQueue[LockQueueWorkQueueLock] )
  50. //
  51. // This turns on the Bcb list debugging in a debug system. Set value
  52. // to 0 to turn off.
  53. //
  54. // **** Note it must currently be turned off because the routines in
  55. // pinsup.c that manipulate this list need to be changed to do the
  56. // right thing for Obcbs. Right now they get messed up by inserting Obcbs
  57. // (which may not be large enough among other things) into the global
  58. // list. Ideally each place gets some code to insert the underlying
  59. // Bcbs into the list if they are not already there.
  60. //
  61. #if DBG
  62. #define LIST_DBG 0
  63. #endif
  64. #include <FsRtl.h>
  65. //
  66. // Peek at number of available pages.
  67. //
  68. extern PFN_COUNT MmAvailablePages;
  69. //
  70. // Define our node type codes.
  71. //
  72. #define CACHE_NTC_SHARED_CACHE_MAP (0x2FF)
  73. #define CACHE_NTC_PRIVATE_CACHE_MAP (0x2FE)
  74. #define CACHE_NTC_BCB (0x2FD)
  75. #define CACHE_NTC_DEFERRED_WRITE (0x2FC)
  76. #define CACHE_NTC_MBCB (0x2FB)
  77. #define CACHE_NTC_OBCB (0x2FA)
  78. #define CACHE_NTC_MBCB_GRANDE (0x2F9)
  79. //
  80. // The following definitions are used to generate meaningful blue bugcheck
  81. // screens. On a bugcheck the file system can output 4 ulongs of useful
  82. // information. The first ulong will have encoded in it a source file id
  83. // (in the high word) and the line number of the bugcheck (in the low word).
  84. // The other values can be whatever the caller of the bugcheck routine deems
  85. // necessary.
  86. //
  87. // Each individual file that calls bugcheck needs to have defined at the
  88. // start of the file a constant called BugCheckFileId with one of the
  89. // CACHE_BUG_CHECK_ values defined below and then use CcBugCheck to bugcheck
  90. // the system.
  91. //
  92. #define CACHE_BUG_CHECK_CACHEDAT (0x00010000)
  93. #define CACHE_BUG_CHECK_CACHESUB (0x00020000)
  94. #define CACHE_BUG_CHECK_COPYSUP (0x00030000)
  95. #define CACHE_BUG_CHECK_FSSUP (0x00040000)
  96. #define CACHE_BUG_CHECK_LAZYRITE (0x00050000)
  97. #define CACHE_BUG_CHECK_LOGSUP (0x00060000)
  98. #define CACHE_BUG_CHECK_MDLSUP (0x00070000)
  99. #define CACHE_BUG_CHECK_PINSUP (0x00080000)
  100. #define CACHE_BUG_CHECK_VACBSUP (0x00090000)
  101. #define CcBugCheck(A,B,C) { KeBugCheckEx(CACHE_MANAGER, BugCheckFileId | __LINE__, A, B, C ); }
  102. //
  103. // Define maximum View Size (These constants are currently so chosen so
  104. // as to be exactly a page worth of PTEs.
  105. //
  106. #define DEFAULT_CREATE_MODULO ((ULONG)(0x00100000))
  107. #define DEFAULT_EXTEND_MODULO ((ULONG)(0x00100000))
  108. //
  109. // For non FO_RANDOM_ACCESS files, define how far we go before umapping
  110. // views.
  111. //
  112. #define SEQUENTIAL_MAP_LIMIT ((ULONG)(0x00080000))
  113. //
  114. // Define some constants to drive read ahead and write behind
  115. //
  116. //
  117. // Set max read ahead. Even though some drivers, such as AT, break up transfers >= 128kb,
  118. // we need to permit enough readahead to satisfy plausible cached read operation while
  119. // preventing denial of service attacks.
  120. //
  121. // This value used to be set to 64k. When doing cached reads in larger units (128k), we
  122. // would never be bringing in enough data to keep the user from blocking. 8mb is
  123. // arbitrarily chosen to be greater than plausible RAID bandwidth and user operation size
  124. // by a factor of 3-4.
  125. //
  126. #define MAX_READ_AHEAD (8 * 1024 * 1024)
  127. //
  128. // Set maximum write behind / lazy write (most drivers break up transfers >= 64kb)
  129. //
  130. #define MAX_WRITE_BEHIND (MM_MAXIMUM_DISK_IO_SIZE)
  131. //
  132. // Set a throttle for charging a given write against the total number of dirty
  133. // pages in the system, for the purpose of seeing when we should invoke write
  134. // throttling.
  135. //
  136. // This must be the same as the throttle used for seeing when we must flush
  137. // temporary files in the lazy writer. On the back of the envelope, here
  138. // is why:
  139. //
  140. // RDP = Regular File Dirty Pages
  141. // TDP = Temporary File Dirty Pages
  142. // CWT = Charged Write Throttle
  143. // -> the maximum we will charge a user with when we see if
  144. // he should be throttled
  145. // TWT = Temporary Write Throttle
  146. // -> if we can't write this many pages, we must write temp data
  147. // DPT = Dirty Page Threshold
  148. // -> the limit when write throttling kicks in
  149. //
  150. // PTD = Pages To Dirty
  151. // CDP = Charged Dirty Pages
  152. //
  153. // Now, CDP = Min( PTD, CWT).
  154. //
  155. // Excluding other effects, we throttle when:
  156. // #0 (RDP + TDP) + CPD >= DPT
  157. //
  158. // To write temporary data, we must cause:
  159. // #1 (RDP + TDP) + TWT >= DPT
  160. //
  161. // To release the throttle, we must eventually cause:
  162. // #2 (RDP + TDP) + CDP < DPT
  163. //
  164. // Now, imagine TDP >> RDP (perhaps RDP == 0) and CDP == CWT for a particular
  165. // throttled write.
  166. //
  167. // If CWT > TWT, as we drive RDP to zero (we never defer writing regular
  168. // data except for hotspots or other very temporary conditions), it is clear
  169. // that we may never trigger the writing of temporary data (#1) but also
  170. // never release the throttle (#2). Simply, we would be willing to charge
  171. // for more dirty pages than we would be willing to guarantee are available
  172. // to dirty. Hence, potential deadlock.
  173. //
  174. // CWT < TWT I leave aside for the moment. This would mean we try not to
  175. // allow temporary data to accumulate to the point that writes throttle as
  176. // a result. Perhaps this would even be better than CWT == TWT.
  177. //
  178. // It is legitimate to ask if throttling temporary data writes should be relaxed
  179. // if we see a large amount of dirty temp data accumulate (and it would be very
  180. // easy to keep track of this). I don't claim to know the best answer to this,
  181. // but for now the attempt to avoid temporary data writes at all costs still
  182. // fits the reasonable operation mix, and we will only penalize the outside
  183. // oddcase with a little more throttle/release.
  184. //
  185. #define WRITE_CHARGE_THRESHOLD (64 * PAGE_SIZE)
  186. //
  187. // Define constants to control zeroing of file data: one constant to control
  188. // how much data we will actually zero ahead in the cache, and another to
  189. // control what the maximum transfer size is that we will use to write zeros.
  190. //
  191. #define MAX_ZERO_TRANSFER (PAGE_SIZE * 128)
  192. #define MIN_ZERO_TRANSFER (0x10000)
  193. #define MAX_ZEROS_IN_CACHE (0x10000)
  194. //
  195. // Definitions for multi-level Vacb structure. The primary definition is the
  196. // VACB_LEVEL_SHIFT. In a multi-level Vacb structure, level in the tree of
  197. // pointers has 2 ** VACB_LEVEL_SHIFT pointers.
  198. //
  199. // For test, this value may be set as low as 4 (no lower), a value of 10 corresponds
  200. // to a convenient block size of 4KB. (If set to 2, CcExtendVacbArray will try to
  201. // "push" the Vacb array allocated within the SharedCacheMap, and later someone will
  202. // try to deallocate the middle of the SharedCacheMap. At 3, the MBCB_BITMAP_BLOCK_SIZE
  203. // is larger than MBCB_BITMAP_BLOCK_SIZE)
  204. //
  205. // There is a bit of a trick as we make the jump to the multilevel structure in that
  206. // we need a real fixed reference count.
  207. //
  208. #define VACB_LEVEL_SHIFT (7)
  209. //
  210. // This is how many bytes of pointers are at each level. This is the size for both
  211. // the Vacb array and (optional) Bcb listheads. It does not include the reference
  212. // block.
  213. //
  214. #define VACB_LEVEL_BLOCK_SIZE ((1 << VACB_LEVEL_SHIFT) * sizeof(PVOID))
  215. //
  216. // This is the last index for a level.
  217. //
  218. #define VACB_LAST_INDEX_FOR_LEVEL ((1 << VACB_LEVEL_SHIFT) - 1)
  219. //
  220. // This is the size of file which can be handled in a single level.
  221. //
  222. #define VACB_SIZE_OF_FIRST_LEVEL (1 << (VACB_OFFSET_SHIFT + VACB_LEVEL_SHIFT))
  223. //
  224. // This is the maximum number of levels it takes to support 63-bits. It is
  225. // used for routines that must remember a path.
  226. //
  227. #define VACB_NUMBER_OF_LEVELS (((63 - VACB_OFFSET_SHIFT)/VACB_LEVEL_SHIFT) + 1)
  228. //
  229. // Define the reference structure for multilevel Vacb trees.
  230. //
  231. typedef struct _VACB_LEVEL_REFERENCE {
  232. LONG Reference;
  233. LONG SpecialReference;
  234. } VACB_LEVEL_REFERENCE, *PVACB_LEVEL_REFERENCE;
  235. //
  236. // Define the size of a bitmap allocated for a bitmap range, in bytes.
  237. //
  238. #define MBCB_BITMAP_BLOCK_SIZE (VACB_LEVEL_BLOCK_SIZE)
  239. //
  240. // Define how many bytes of a file are covered by an Mbcb bitmap range,
  241. // at a bit for each page.
  242. //
  243. #define MBCB_BITMAP_RANGE (MBCB_BITMAP_BLOCK_SIZE * 8 * PAGE_SIZE)
  244. //
  245. // Define the initial size of the Mbcb bitmap that is self-contained in the Mbcb.
  246. //
  247. #define MBCB_BITMAP_INITIAL_SIZE (2 * sizeof(BITMAP_RANGE))
  248. //
  249. // Define constants controlling when the Bcb list is broken into a
  250. // pendaflex-style array of listheads, and how the correct listhead
  251. // is found. Begin when file size exceeds 2MB, and cover 512KB per
  252. // listhead. At 512KB per listhead, the BcbListArray is the same
  253. // size as the Vacb array, i.e., it doubles the size.
  254. //
  255. // The code handling these Bcb lists in the Vacb package contains
  256. // assumptions that the size is the same as that of the Vacb pointers.
  257. // Future work could undo this, but until then the size and shift
  258. // below cannot change. There really isn't a good reason to want to
  259. // anyway.
  260. //
  261. // Note that by definition a flat vacb array cannot fail to find an
  262. // exact match when searching for the listhead - this is only a
  263. // complication of the sparse structure.
  264. //
  265. #define BEGIN_BCB_LIST_ARRAY (0x200000)
  266. #define SIZE_PER_BCB_LIST (VACB_MAPPING_GRANULARITY * 2)
  267. #define BCB_LIST_SHIFT (VACB_OFFSET_SHIFT + 1)
  268. #define GetBcbListHead(SCM,OFF,FAILSUCC) ( \
  269. (((SCM)->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && \
  270. FlagOn((SCM)->Flags, MODIFIED_WRITE_DISABLED)) ? \
  271. (((SCM)->SectionSize.QuadPart > VACB_SIZE_OF_FIRST_LEVEL) ? \
  272. CcGetBcbListHeadLargeOffset((SCM),(OFF),(FAILSUCC)) : \
  273. (((OFF) >= (SCM)->SectionSize.QuadPart) ? &(SCM)->BcbList : \
  274. ((PLIST_ENTRY)((SCM)->Vacbs) + (((SCM)->SectionSize.QuadPart + (OFF)) >> BCB_LIST_SHIFT)))) : \
  275. &(SCM)->BcbList \
  276. )
  277. //
  278. // Macros to lock/unlock a Vacb level as Bcbs are inserted/deleted
  279. //
  280. #define CcLockVacbLevel(SCM,OFF) { \
  281. if (((SCM)->SectionSize.QuadPart > VACB_SIZE_OF_FIRST_LEVEL) && \
  282. FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { \
  283. CcAdjustVacbLevelLockCount((SCM),(OFF), +1);} \
  284. }
  285. #define CcUnlockVacbLevel(SCM,OFF) { \
  286. if (((SCM)->SectionSize.QuadPart > VACB_SIZE_OF_FIRST_LEVEL) && \
  287. FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { \
  288. CcAdjustVacbLevelLockCount((SCM),(OFF), -1);} \
  289. }
  290. //
  291. // NOISE_BITS defines how many bits are masked off when testing for
  292. // sequential reads. This allows the reader to skip up to 7 bytes
  293. // for alignment purposes, and we still consider the next read to be
  294. // sequential. Starting and ending addresses are masked by this pattern
  295. // before comparison.
  296. //
  297. #define NOISE_BITS (0x7)
  298. //
  299. // Define some constants to drive the Lazy Writer
  300. //
  301. #define LAZY_WRITER_IDLE_DELAY ((LONG)(10000000))
  302. #define LAZY_WRITER_COLLISION_DELAY ((LONG)(1000000))
  303. //
  304. // The following target should best be a power of 2
  305. //
  306. #define LAZY_WRITER_MAX_AGE_TARGET ((ULONG)(8))
  307. //
  308. // Requeue information hint for the lazy writer.
  309. //
  310. #define CC_REQUEUE 35422
  311. //
  312. // The global Cache Manager debug level variable, its values are:
  313. //
  314. // 0x00000000 Always gets printed (used when about to bug check)
  315. //
  316. // 0x00000001 FsSup
  317. // 0x00000002 CacheSub
  318. // 0x00000004 CopySup
  319. // 0x00000008 PinSup
  320. //
  321. // 0x00000010 MdlSup
  322. // 0x00000020 LazyRite
  323. // 0x00000040
  324. // 0x00000080
  325. //
  326. // 0x00000100 Trace all Mm calls
  327. //
  328. #define mm (0x100)
  329. //
  330. // Miscellaneous support macros.
  331. //
  332. // ULONG
  333. // FlagOn (
  334. // IN ULONG Flags,
  335. // IN ULONG SingleFlag
  336. // );
  337. //
  338. // BOOLEAN
  339. // BooleanFlagOn (
  340. // IN ULONG Flags,
  341. // IN ULONG SingleFlag
  342. // );
  343. //
  344. // VOID
  345. // SetFlag (
  346. // IN ULONG Flags,
  347. // IN ULONG SingleFlag
  348. // );
  349. //
  350. // VOID
  351. // ClearFlag (
  352. // IN ULONG Flags,
  353. // IN ULONG SingleFlag
  354. // );
  355. //
  356. // ULONG
  357. // QuadAlign (
  358. // IN ULONG Pointer
  359. // );
  360. //
  361. #define FlagOn(F,SF) ( \
  362. (((F) & (SF))) \
  363. )
  364. #define BooleanFlagOn(F,SF) ( \
  365. (BOOLEAN)(((F) & (SF)) != 0) \
  366. )
  367. #define SetFlag(F,SF) { \
  368. (F) |= (SF); \
  369. }
  370. #define ClearFlag(F,SF) { \
  371. (F) &= ~(SF); \
  372. }
  373. #define QuadAlign(P) ( \
  374. ((((P)) + 7) & (-8)) \
  375. )
  376. //
  377. // Turn on pseudo-asserts if CC_FREE_ASSERTS is defined.
  378. //
  379. #if (!DBG && defined( CC_FREE_ASSERTS ))
  380. #undef ASSERT
  381. #undef ASSERTMSG
  382. #define ASSERT(exp) \
  383. ((exp) ? TRUE : \
  384. (DbgPrint( "%s:%d %s\n",__FILE__,__LINE__,#exp ), \
  385. DbgBreakPoint(), \
  386. TRUE))
  387. #define ASSERTMSG(msg,exp) \
  388. ((exp) ? TRUE : \
  389. (DbgPrint( "%s:%d %s %s\n",__FILE__,__LINE__,msg,#exp ), \
  390. DbgBreakPoint(), \
  391. TRUE))
  392. #endif
  393. //
  394. // Define the Virtual Address Control Block, which controls all mapping
  395. // performed by the Cache Manager.
  396. //
  397. //
  398. // First some constants
  399. //
  400. #define PREALLOCATED_VACBS (4)
  401. //
  402. // Virtual Address Control Block
  403. //
  404. typedef struct _VACB {
  405. //
  406. // Base Address for this control block.
  407. //
  408. PVOID BaseAddress;
  409. //
  410. // Pointer to the Shared Cache Map using this Vacb.
  411. //
  412. struct _SHARED_CACHE_MAP *SharedCacheMap;
  413. //
  414. // Overlay for remembering mapped offset within the Shared Cache Map,
  415. // and the count of the number of times this Vacb is in use.
  416. //
  417. union {
  418. //
  419. // File Offset within Shared Cache Map
  420. //
  421. LARGE_INTEGER FileOffset;
  422. //
  423. // Count of number of times this Vacb is in use. The size of this
  424. // count is calculated to be adequate, while never large enough to
  425. // overwrite nonzero bits of the FileOffset, which is a multiple
  426. // of VACB_MAPPING_GRANULARITY.
  427. //
  428. USHORT ActiveCount;
  429. } Overlay;
  430. //
  431. // Entry for the VACB reuse list
  432. //
  433. LIST_ENTRY LruList;
  434. } VACB, *PVACB;
  435. //
  436. // These define special flag values that are overloaded as PVACB. They cause
  437. // certain special behavior, currently only in the case of multilevel structures.
  438. //
  439. #define VACB_SPECIAL_REFERENCE ((PVACB) ~0)
  440. #define VACB_SPECIAL_DEREFERENCE ((PVACB) ~1)
  441. #define VACB_SPECIAL_FIRST_VALID VACB_SPECIAL_DEREFERENCE
  442. #define PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE 0x10000
  443. #define PRIVATE_CACHE_MAP_READ_AHEAD_ENABLED 0x20000
  444. typedef struct _PRIVATE_CACHE_MAP_FLAGS {
  445. ULONG DontUse : 16; // Overlaid with NodeTypeCode
  446. //
  447. // This flag says read ahead is currently active, which means either
  448. // a file system call to CcReadAhead is still determining if the
  449. // desired data is already resident, or else a request to do read ahead
  450. // has been queued to a worker thread.
  451. //
  452. ULONG ReadAheadActive : 1;
  453. //
  454. // Flag to say whether read ahead is currently enabled for this
  455. // FileObject/PrivateCacheMap. On read misses it is enabled on
  456. // read ahead hits it will be disabled. Initially disabled.
  457. //
  458. ULONG ReadAheadEnabled : 1;
  459. ULONG Available : 14;
  460. } PRIVATE_CACHE_MAP_FLAGS;
  461. #define CC_SET_PRIVATE_CACHE_MAP(PrivateCacheMap, Flags) \
  462. RtlInterlockedSetBitsDiscardReturn (&PrivateCacheMap->UlongFlags, Flags);
  463. #define CC_CLEAR_PRIVATE_CACHE_MAP(PrivateCacheMap, Feature) \
  464. RtlInterlockedAndBitsDiscardReturn (&PrivateCacheMap->UlongFlags, (ULONG)~Feature);
  465. //
  466. // The Private Cache Map is a structure pointed to by the File Object, whenever
  467. // a file is opened with caching enabled (default).
  468. //
  469. typedef struct _PRIVATE_CACHE_MAP {
  470. //
  471. // Type and size of this record
  472. //
  473. union {
  474. CSHORT NodeTypeCode;
  475. PRIVATE_CACHE_MAP_FLAGS Flags;
  476. ULONG UlongFlags;
  477. };
  478. //
  479. // Read Ahead mask formed from Read Ahead granularity - 1.
  480. // Private Cache Map ReadAheadSpinLock controls access to this field.
  481. //
  482. ULONG ReadAheadMask;
  483. //
  484. // Pointer to FileObject for this PrivateCacheMap.
  485. //
  486. PFILE_OBJECT FileObject;
  487. //
  488. // READ AHEAD CONTROL
  489. //
  490. // Read ahead history for determining when read ahead might be
  491. // beneficial.
  492. //
  493. LARGE_INTEGER FileOffset1;
  494. LARGE_INTEGER BeyondLastByte1;
  495. LARGE_INTEGER FileOffset2;
  496. LARGE_INTEGER BeyondLastByte2;
  497. //
  498. // Current read ahead requirements.
  499. //
  500. // Array element 0 is optionally used for recording remaining bytes
  501. // required for satisfying a large Mdl read.
  502. //
  503. // Array element 1 is used for predicted read ahead.
  504. //
  505. LARGE_INTEGER ReadAheadOffset[2];
  506. ULONG ReadAheadLength[2];
  507. //
  508. // SpinLock controlling access to following fields
  509. //
  510. KSPIN_LOCK ReadAheadSpinLock;
  511. //
  512. // Links for list of all PrivateCacheMaps linked to the same
  513. // SharedCacheMap.
  514. //
  515. LIST_ENTRY PrivateLinks;
  516. } PRIVATE_CACHE_MAP;
  517. typedef PRIVATE_CACHE_MAP *PPRIVATE_CACHE_MAP;
  518. //
  519. // The Shared Cache Map is a per-file structure pointed to indirectly by
  520. // each File Object. The File Object points to a pointer in a single
  521. // FS-private structure for the file (Fcb). The SharedCacheMap maps the
  522. // first part of the file for common access by all callers.
  523. //
  524. //
  525. // OpenCount log Reasons/Actions
  526. //
  527. #if OPEN_COUNT_LOG
  528. typedef struct _CC_OPEN_COUNT_LOG_ENTRY {
  529. ULONG Action;
  530. ULONG Reason;
  531. } CC_OPEN_COUNT_LOG_ENTRY;
  532. typedef struct _CC_OPEN_COUNT_LOG {
  533. USHORT Next;
  534. USHORT Size;
  535. CC_OPEN_COUNT_LOG_ENTRY Log[48];
  536. } CC_OPEN_COUNT_LOG;
  537. #define CcAddOpenToLog( LOG, ACTION, REASON ) { \
  538. (LOG)->Log[(LOG)->Next].Action = (ACTION); \
  539. (LOG)->Log[(LOG)->Next].Reason = (REASON); \
  540. (LOG)->Next += 1; \
  541. if ((LOG)->Next == (LOG)->Size) { \
  542. (LOG)->Next = 0; \
  543. } \
  544. }
  545. #else // OPEN_COUNT_LOG
  546. #define CcAddOpenToLog( LOG, ACTION, REASON )
  547. #endif // OPEN_COUNT_LOG
  548. #define CcIncrementOpenCount( SCM, REASON ) { \
  549. (SCM)->OpenCount += 1; \
  550. if (REASON != 0) { \
  551. CcAddOpenToLog( &(SCM)->OpenCountLog, REASON, 1 ); \
  552. } \
  553. }
  554. #define CcDecrementOpenCount( SCM, REASON ) { \
  555. (SCM)->OpenCount -= 1; \
  556. if (REASON != 0) { \
  557. CcAddOpenToLog( &(SCM)->OpenCountLog, REASON, -1 ); \
  558. } \
  559. }
  560. typedef struct _SHARED_CACHE_MAP {
  561. //
  562. // Type and size of this record
  563. //
  564. CSHORT NodeTypeCode;
  565. CSHORT NodeByteSize;
  566. //
  567. // Number of times this file has been opened cached.
  568. //
  569. ULONG OpenCount;
  570. //
  571. // Actual size of file, primarily for restricting Read Ahead. Initialized
  572. // on creation and maintained by extend and truncate operations.
  573. //
  574. // NOTE: This field may never be moved, thanks to the late DavidGoe,
  575. // who should have written this comment himself :-( cache.h
  576. // exports a macro which "knows" that FileSize is the second
  577. // longword in the Cache Map!
  578. //
  579. LARGE_INTEGER FileSize;
  580. //
  581. // Bcb Listhead. The BcbList is ordered by descending
  582. // FileOffsets, to optimize misses in the sequential I/O case.
  583. // Synchronized by the BcbSpinLock.
  584. //
  585. LIST_ENTRY BcbList;
  586. //
  587. // Size of section created.
  588. //
  589. LARGE_INTEGER SectionSize;
  590. //
  591. // ValidDataLength for file, as currently stored by the file system.
  592. // Synchronized by the BcbSpinLock or exclusive access by FileSystem.
  593. //
  594. LARGE_INTEGER ValidDataLength;
  595. //
  596. // Goal for ValidDataLength, when current dirty data is written.
  597. // Synchronized by the BcbSpinLock or exclusive access by FileSystem.
  598. //
  599. LARGE_INTEGER ValidDataGoal;
  600. //
  601. // Pointer to a contiguous array of Vacb pointers which control mapping
  602. // to this file, along with Vacbs (currently) for a 1MB file.
  603. // Synchronized by CcVacbSpinLock.
  604. //
  605. PVACB InitialVacbs[PREALLOCATED_VACBS];
  606. PVACB * Vacbs;
  607. //
  608. // Referenced pointer to original File Object on which the SharedCacheMap
  609. // was created.
  610. //
  611. PFILE_OBJECT FileObject;
  612. //
  613. // Describe Active Vacb and Page for copysup optimizations.
  614. //
  615. volatile PVACB ActiveVacb;
  616. //
  617. // Virtual address needing zero to end of page
  618. //
  619. volatile PVOID NeedToZero;
  620. ULONG ActivePage;
  621. ULONG NeedToZeroPage;
  622. //
  623. // Fields for synchronizing on active requests.
  624. //
  625. KSPIN_LOCK ActiveVacbSpinLock;
  626. ULONG VacbActiveCount;
  627. //
  628. // Number of dirty pages in this SharedCacheMap. Used to trigger
  629. // write behind. Synchronized by CcMasterSpinLock.
  630. //
  631. ULONG DirtyPages;
  632. //
  633. // THE NEXT TWO FIELDS MUST BE ADJACENT, TO SUPPORT
  634. // SHARED_CACHE_MAP_LIST_CURSOR!
  635. //
  636. // Links for Global SharedCacheMap List
  637. //
  638. LIST_ENTRY SharedCacheMapLinks;
  639. //
  640. // Shared Cache Map flags (defined below)
  641. //
  642. ULONG Flags;
  643. //
  644. // Status variable set by creator of SharedCacheMap
  645. //
  646. NTSTATUS Status;
  647. //
  648. // Mask Bcb for this SharedCacheMap, if there is one.
  649. // Synchronized by the BcbSpinLock.
  650. //
  651. struct _MBCB *Mbcb;
  652. //
  653. // Pointer to the common Section Object used by the file system.
  654. //
  655. PVOID Section;
  656. //
  657. // This event pointer is used to handle creation collisions.
  658. // If a second thread tries to call CcInitializeCacheMap for the
  659. // same file, while BeingCreated (below) is TRUE, then that thread
  660. // will allocate an event store it here (if not already allocated),
  661. // and wait on it. The first creator will set this event when it
  662. // is done. The event is not deleted until CcUninitializedCacheMap
  663. // is called, to avoid possible race conditions. (Note that normally
  664. // the event never has to be allocated.
  665. //
  666. PKEVENT CreateEvent;
  667. //
  668. // This points to an event used to wait for active count to go to zero
  669. //
  670. PKEVENT WaitOnActiveCount;
  671. //
  672. // These two fields control the writing of large metadata
  673. // streams. The first field gives a target for the current
  674. // flush interval, and the second field stores the end of
  675. // the last flush that occurred on this file.
  676. //
  677. ULONG PagesToWrite;
  678. LONGLONG BeyondLastFlush;
  679. //
  680. // Pointer to structure of routines used by the Lazy Writer to Acquire
  681. // and Release the file for Lazy Write and Close, to avoid deadlocks,
  682. // and the context to call them with.
  683. //
  684. PCACHE_MANAGER_CALLBACKS Callbacks;
  685. PVOID LazyWriteContext;
  686. //
  687. // Listhead of all PrivateCacheMaps linked to this SharedCacheMap.
  688. //
  689. LIST_ENTRY PrivateList;
  690. //
  691. // Log handle specified for this shared cache map, for support of routines
  692. // in logsup.c
  693. //
  694. PVOID LogHandle;
  695. //
  696. // Callback routine specified for flushing to Lsn.
  697. //
  698. PFLUSH_TO_LSN FlushToLsnRoutine;
  699. //
  700. // Dirty Page Threshold for this stream
  701. //
  702. ULONG DirtyPageThreshold;
  703. //
  704. // Lazy Writer pass count. Used by the Lazy Writer for
  705. // no modified write streams, which are not serviced on
  706. // every pass in order to avoid contention with foreground
  707. // activity.
  708. //
  709. ULONG LazyWritePassCount;
  710. //
  711. // This event pointer is used to allow a file system to be notified when
  712. // the deletion of a shared cache map.
  713. //
  714. // This has to be provided here because the cache manager may decide to
  715. // "Lazy Delete" the shared cache map, and some network file systems
  716. // will want to know when the lazy delete completes.
  717. //
  718. PCACHE_UNINITIALIZE_EVENT UninitializeEvent;
  719. //
  720. // This Vacb pointer is needed for keeping the NeedToZero virtual address
  721. // valid.
  722. //
  723. PVACB NeedToZeroVacb;
  724. //
  725. // Spinlock for synchronizing the Mbcb and Bcb lists - must be acquired
  726. // before CcMasterSpinLock. This spinlock also synchronizes ValidDataGoal
  727. // and ValidDataLength, as described above.
  728. //
  729. KSPIN_LOCK BcbSpinLock;
  730. PVOID Reserved;
  731. //
  732. // This is an event which may be used for the WaitOnActiveCount event. We
  733. // avoid overhead by only "activating" it when it is needed.
  734. //
  735. KEVENT Event;
  736. EX_PUSH_LOCK VacbPushLock;
  737. //
  738. // Preallocate one PrivateCacheMap to reduce pool allocations.
  739. //
  740. PRIVATE_CACHE_MAP PrivateCacheMap;
  741. #if OPEN_COUNT_LOG
  742. //
  743. // Instrument reasons for OpenCount
  744. //
  745. CC_OPEN_COUNT_LOG OpenCountLog;
  746. #endif
  747. } SHARED_CACHE_MAP;
  748. typedef SHARED_CACHE_MAP *PSHARED_CACHE_MAP;
  749. //
  750. // Shared Cache Map Flags
  751. //
  752. //
  753. // Read ahead has been disabled on this file.
  754. //
  755. #define DISABLE_READ_AHEAD 0x0001
  756. //
  757. // Write behind has been disabled on this file.
  758. //
  759. #define DISABLE_WRITE_BEHIND 0x0002
  760. //
  761. // This flag indicates whether CcInitializeCacheMap was called with
  762. // PinAccess = TRUE.
  763. //
  764. #define PIN_ACCESS 0x0004
  765. //
  766. // This flag indicates that a truncate is required when OpenCount
  767. // goes to 0.
  768. //
  769. #define TRUNCATE_REQUIRED 0x0010
  770. //
  771. // This flag indicates that a LazyWrite request is queued.
  772. //
  773. #define WRITE_QUEUED 0x0020
  774. //
  775. // This flag indicates that we have never seen anyone cache
  776. // the file except for with FO_SEQUENTIAL_ONLY, so we should
  777. // tell MM to quickly dump pages when we unmap.
  778. //
  779. #define ONLY_SEQUENTIAL_ONLY_SEEN 0x0040
  780. //
  781. // Active Page is locked
  782. //
  783. #define ACTIVE_PAGE_IS_DIRTY 0x0080
  784. //
  785. // Flag to say that a create is in progress.
  786. //
  787. #define BEING_CREATED 0x0100
  788. //
  789. // Flag to say that modified write was disabled on the section.
  790. //
  791. #define MODIFIED_WRITE_DISABLED 0x0200
  792. //
  793. // Flag that indicates if a lazy write ever occurred on this file.
  794. //
  795. #define LAZY_WRITE_OCCURRED 0x0400
  796. //
  797. // Flag that indicates this structure is only a cursor, only the
  798. // SharedCacheMapLinks and Flags are valid!
  799. //
  800. #define IS_CURSOR 0x0800
  801. //
  802. // Flag that indicates that we have seen someone cache this file
  803. // and specify FO_RANDOM_ACCESS. This will deactivate our cache
  804. // working set trim assist.
  805. //
  806. #define RANDOM_ACCESS_SEEN 0x1000
  807. //
  808. // Flag indicating that the stream is private write. This disables
  809. // non-aware flush/purge.
  810. //
  811. #define PRIVATE_WRITE 0x2000
  812. //
  813. // Cursor structure for traversing the SharedCacheMap lists. Anyone
  814. // scanning these lists must verify that the IS_CURSOR flag is clear
  815. // before looking at other SharedCacheMap fields.
  816. //
  817. typedef struct _SHARED_CACHE_MAP_LIST_CURSOR {
  818. //
  819. // Links for Global SharedCacheMap List
  820. //
  821. LIST_ENTRY SharedCacheMapLinks;
  822. //
  823. // Shared Cache Map flags, IS_CURSOR must be set.
  824. //
  825. ULONG Flags;
  826. } SHARED_CACHE_MAP_LIST_CURSOR, *PSHARED_CACHE_MAP_LIST_CURSOR;
  827. #ifndef KDEXT
  828. //
  829. // Bitmap Range structure. For small files there is just one embedded in the
  830. // Mbcb. For large files there may be many of these linked to the Mbcb.
  831. //
  832. typedef struct _BITMAP_RANGE {
  833. //
  834. // Links for the list of bitmap ranges off the Mbcb.
  835. //
  836. LIST_ENTRY Links;
  837. //
  838. // Base page (FileOffset / PAGE_SIZE) represented by this range.
  839. // (Size is a fixed maximum.)
  840. //
  841. LONGLONG BasePage;
  842. //
  843. // First and Last dirty pages relative to the BasePage.
  844. //
  845. ULONG FirstDirtyPage;
  846. ULONG LastDirtyPage;
  847. //
  848. // Number of dirty pages in this range.
  849. //
  850. ULONG DirtyPages;
  851. //
  852. // Pointer to the bitmap for this range.
  853. //
  854. PULONG Bitmap;
  855. } BITMAP_RANGE, *PBITMAP_RANGE;
  856. #endif
  857. //
  858. // This structure is a "mask" Bcb. For fast simple write operations,
  859. // a mask Bcb is used so that we basically only have to set bits to remember
  860. // where the dirty data is.
  861. //
  862. typedef struct _MBCB {
  863. //
  864. // Type and size of this record
  865. //
  866. CSHORT NodeTypeCode;
  867. CSHORT NodeIsInZone;
  868. //
  869. // This field is used as a scratch area for the Lazy Writer to
  870. // guide how much he will write each time he wakes up.
  871. //
  872. ULONG PagesToWrite;
  873. //
  874. // Number of dirty pages (set bits) in the bitmap below.
  875. //
  876. ULONG DirtyPages;
  877. //
  878. // Reserved for alignment.
  879. //
  880. ULONG Reserved;
  881. //
  882. // ListHead of Bitmap ranges.
  883. //
  884. LIST_ENTRY BitmapRanges;
  885. //
  886. // This is a hint on where to resume writing, since we will not
  887. // always write all of the dirty data at once.
  888. //
  889. LONGLONG ResumeWritePage;
  890. //
  891. // Initial three embedded Bitmap ranges. For a file up to 2MB, only the
  892. // first range is used, and the rest of the Mbcb contains bits for 2MB of
  893. // dirty pages (4MB on Alpha). For larger files, all three ranges may
  894. // be used to describe external bitmaps.
  895. //
  896. BITMAP_RANGE BitmapRange1;
  897. BITMAP_RANGE BitmapRange2;
  898. BITMAP_RANGE BitmapRange3;
  899. } MBCB;
  900. typedef MBCB *PMBCB;
  901. //
  902. // This is the Buffer Control Block structure for representing data which
  903. // is "pinned" in memory by one or more active requests and/or dirty. This
  904. // structure is created the first time that a call to CcPinFileData specifies
  905. // a particular integral range of pages. It is deallocated whenever the Pin
  906. // Count reaches 0 and the Bcb is not Dirty.
  907. //
  908. // NOTE: The first four fields must be the same as the PUBLIC_BCB.
  909. //
  910. typedef struct _BCB {
  911. union {
  912. //
  913. // To ensure QuadAlign (sizeof (BCB)) >= QuadAlign (sizeof (MBCB))
  914. // so that they can share the same pool blocks.
  915. //
  916. MBCB Dummy;
  917. struct {
  918. //
  919. // Type and size of this record
  920. //
  921. CSHORT NodeTypeCode;
  922. //
  923. // Flags
  924. //
  925. BOOLEAN Dirty;
  926. BOOLEAN Reserved;
  927. //
  928. // Byte FileOffset and and length of entire buffer
  929. //
  930. ULONG ByteLength;
  931. LARGE_INTEGER FileOffset;
  932. //
  933. // Links for BcbList in SharedCacheMap
  934. //
  935. LIST_ENTRY BcbLinks;
  936. //
  937. // Byte FileOffset of last byte in buffer (used for searching)
  938. //
  939. LARGE_INTEGER BeyondLastByte;
  940. //
  941. // Oldest Lsn (if specified) when this buffer was set dirty.
  942. //
  943. LARGE_INTEGER OldestLsn;
  944. //
  945. // Most recent Lsn specified when this buffer was set dirty.
  946. // The FlushToLsnRoutine is called with this Lsn.
  947. //
  948. LARGE_INTEGER NewestLsn;
  949. //
  950. // Pointer to Vacb via which this Bcb is mapped.
  951. //
  952. PVACB Vacb;
  953. #if LIST_DBG
  954. //
  955. // Links and caller addresses for the global Bcb list (for debug only)
  956. //
  957. LIST_ENTRY CcBcbLinks;
  958. PVOID CallerAddress;
  959. PVOID CallersCallerAddress;
  960. #endif
  961. //
  962. // Count of threads actively using this Bcb to process a request.
  963. // This must be manipulated under protection of the BcbListSpinLock
  964. // in the SharedCacheMap.
  965. //
  966. ULONG PinCount;
  967. //
  968. // Resource to synchronize buffer access. Pinning Readers and all Writers
  969. // of the described buffer take out shared access (synchronization of
  970. // buffer modifications is strictly up to the caller). Note that pinning
  971. // readers do not declare if they are going to modify the buffer or not.
  972. // Anyone writing to disk takes out exclusive access, to prevent the buffer
  973. // from changing while it is being written out.
  974. //
  975. ERESOURCE Resource;
  976. //
  977. // Pointer to SharedCacheMap for this Bcb.
  978. //
  979. PSHARED_CACHE_MAP SharedCacheMap;
  980. //
  981. // This is the Base Address at which the buffer can be seen in
  982. // system space. All access to buffer data should go through this
  983. // address.
  984. //
  985. PVOID BaseAddress;
  986. };
  987. };
  988. } BCB;
  989. #ifndef KDEXT
  990. typedef BCB *PBCB;
  991. #endif
  992. //
  993. // This is the Overlap Buffer Control Block structure for representing data which
  994. // is "pinned" in memory and must be represented by multiple Bcbs due to overlaps.
  995. //
  996. // NOTE: The first four fields must be the same as the PUBLIC_BCB.
  997. //
  998. typedef struct _OBCB {
  999. //
  1000. // Type and size of this record
  1001. //
  1002. CSHORT NodeTypeCode;
  1003. CSHORT NodeByteSize;
  1004. //
  1005. // Byte FileOffset and and length of entire buffer
  1006. //
  1007. ULONG ByteLength;
  1008. LARGE_INTEGER FileOffset;
  1009. //
  1010. // Vector of Bcb pointers.
  1011. //
  1012. PBCB Bcbs[ANYSIZE_ARRAY];
  1013. } OBCB;
  1014. typedef OBCB *POBCB;
  1015. //
  1016. // Struct for remembering deferred writes for later posting.
  1017. //
  1018. typedef struct _DEFERRED_WRITE {
  1019. //
  1020. // Type and size of this record
  1021. //
  1022. CSHORT NodeTypeCode;
  1023. CSHORT NodeByteSize;
  1024. //
  1025. // The file to be written.
  1026. //
  1027. PFILE_OBJECT FileObject;
  1028. //
  1029. // Number of bytes the caller intends to write
  1030. //
  1031. ULONG BytesToWrite;
  1032. //
  1033. // Links for the deferred write queue.
  1034. //
  1035. LIST_ENTRY DeferredWriteLinks;
  1036. //
  1037. // If this event pointer is not NULL, then this event will
  1038. // be signalled when the write is ok, rather than calling
  1039. // the PostRoutine below.
  1040. //
  1041. PKEVENT Event;
  1042. //
  1043. // The posting routine and its parameters
  1044. //
  1045. PCC_POST_DEFERRED_WRITE PostRoutine;
  1046. PVOID Context1;
  1047. PVOID Context2;
  1048. BOOLEAN LimitModifiedPages;
  1049. } DEFERRED_WRITE, *PDEFERRED_WRITE;
  1050. //
  1051. // Struct controlling the Lazy Writer algorithms
  1052. //
  1053. typedef struct _LAZY_WRITER {
  1054. //
  1055. // Work queue.
  1056. //
  1057. LIST_ENTRY WorkQueue;
  1058. //
  1059. // Dpc and Timer Structures used for activating periodic scan when active.
  1060. //
  1061. KDPC ScanDpc;
  1062. KTIMER ScanTimer;
  1063. //
  1064. // Boolean to say whether Lazy Writer scan is active or not.
  1065. //
  1066. BOOLEAN ScanActive;
  1067. //
  1068. // Boolean indicating if there is any other reason for Lazy Writer to
  1069. // wake up.
  1070. //
  1071. BOOLEAN OtherWork;
  1072. } LAZY_WRITER;
  1073. #ifndef KDEXT
  1074. //
  1075. // Work queue entry for the worker threads, with an enumerated
  1076. // function code.
  1077. //
  1078. typedef enum _WORKER_FUNCTION {
  1079. Noop = 0,
  1080. ReadAhead,
  1081. WriteBehind,
  1082. LazyWriteScan,
  1083. EventSet
  1084. } WORKER_FUNCTION;
  1085. #endif
  1086. typedef struct _WORK_QUEUE_ENTRY {
  1087. //
  1088. // List entry for our work queues.
  1089. //
  1090. LIST_ENTRY WorkQueueLinks;
  1091. //
  1092. // Define a union to contain function-specific parameters.
  1093. //
  1094. union {
  1095. //
  1096. // Read parameters (for read ahead)
  1097. //
  1098. struct {
  1099. PFILE_OBJECT FileObject;
  1100. } Read;
  1101. //
  1102. // Write parameters (for write behind)
  1103. //
  1104. struct {
  1105. PSHARED_CACHE_MAP SharedCacheMap;
  1106. } Write;
  1107. //
  1108. // Set event parameters (for queue checks)
  1109. //
  1110. struct {
  1111. PKEVENT Event;
  1112. } Event;
  1113. } Parameters;
  1114. //
  1115. // Function code for this entry:
  1116. //
  1117. UCHAR Function;
  1118. } WORK_QUEUE_ENTRY, *PWORK_QUEUE_ENTRY;
  1119. //
  1120. // This is a structure apended to the end of an MDL
  1121. //
  1122. typedef struct _MDL_WRITE {
  1123. //
  1124. // This field is for the use of the Server to stash anything interesting
  1125. //
  1126. PVOID ServerContext;
  1127. //
  1128. // This is the resource to release when the write is complete.
  1129. //
  1130. PERESOURCE Resource;
  1131. //
  1132. // This is thread caller's thread, and the thread that must release
  1133. // the resource.
  1134. //
  1135. ERESOURCE_THREAD Thread;
  1136. //
  1137. // This links all the pending MDLs through the shared cache map.
  1138. //
  1139. LIST_ENTRY MdlLinks;
  1140. } MDL_WRITE, *PMDL_WRITE;
  1141. //
  1142. // Common Private routine definitions for the Cache Manager
  1143. //
  1144. VOID
  1145. CcGetActiveVacb (
  1146. IN PSHARED_CACHE_MAP SharedCacheMap,
  1147. OUT PVACB *Vacb,
  1148. OUT PULONG Page,
  1149. OUT PULONG Dirty
  1150. );
  1151. VOID
  1152. CcSetActiveVacb (
  1153. IN PSHARED_CACHE_MAP SharedCacheMap,
  1154. IN OUT PVACB *Vacb,
  1155. IN ULONG Page,
  1156. IN ULONG Dirty
  1157. );
  1158. //
  1159. // We trim out the previous macro-forms of Get/Set (nondpc) so that we can page
  1160. // more cache manager code that otherwise does not acquire spinlocks.
  1161. //
  1162. #define GetActiveVacb(SCM,IRQ,V,P,D) CcGetActiveVacb((SCM),&(V),&(P),&(D))
  1163. #define SetActiveVacb(SCM,IRQ,V,P,D) CcSetActiveVacb((SCM),&(V),(P),(D))
  1164. #define GetActiveVacbAtDpcLevel(SCM,V,P,D) { \
  1165. ExAcquireSpinLockAtDpcLevel(&(SCM)->ActiveVacbSpinLock); \
  1166. (V) = (SCM)->ActiveVacb; \
  1167. if ((V) != NULL) { \
  1168. (P) = (SCM)->ActivePage; \
  1169. (SCM)->ActiveVacb = NULL; \
  1170. (D) = (SCM)->Flags & ACTIVE_PAGE_IS_DIRTY; \
  1171. } \
  1172. ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \
  1173. }
  1174. //
  1175. // Gather the common work of charging and deducting dirty page counts. When
  1176. // write hysteresis was being considered during Windows XP, this also helped
  1177. // gather up the activation of that throttle.
  1178. //
  1179. #define CcDeductDirtyPages( S, P ) \
  1180. CcTotalDirtyPages -= (P); \
  1181. (S)->DirtyPages -= (P);
  1182. #define CcChargeMaskDirtyPages( S, M, B, P ) \
  1183. CcTotalDirtyPages += (P); \
  1184. (M)->DirtyPages += (P); \
  1185. (B)->DirtyPages += (P); \
  1186. (S)->DirtyPages += (P);
  1187. #define CcChargePinDirtyPages( S, P ) \
  1188. CcTotalDirtyPages += (P); \
  1189. (S)->DirtyPages += (P);
  1190. VOID
  1191. CcPostDeferredWrites (
  1192. );
  1193. BOOLEAN
  1194. CcPinFileData (
  1195. IN PFILE_OBJECT FileObject,
  1196. IN PLARGE_INTEGER FileOffset,
  1197. IN ULONG Length,
  1198. IN BOOLEAN ReadOnly,
  1199. IN BOOLEAN WriteOnly,
  1200. IN ULONG Flags,
  1201. OUT PBCB *Bcb,
  1202. OUT PVOID *BaseAddress,
  1203. OUT PLARGE_INTEGER BeyondLastByte
  1204. );
  1205. typedef enum {
  1206. UNPIN,
  1207. UNREF,
  1208. SET_CLEAN
  1209. } UNMAP_ACTIONS;
  1210. VOID
  1211. FASTCALL
  1212. CcUnpinFileData (
  1213. IN OUT PBCB Bcb,
  1214. IN BOOLEAN ReadOnly,
  1215. IN UNMAP_ACTIONS UnmapAction
  1216. );
  1217. VOID
  1218. FASTCALL
  1219. CcDeallocateBcb (
  1220. IN PBCB Bcb
  1221. );
  1222. VOID
  1223. FASTCALL
  1224. CcPerformReadAhead (
  1225. IN PFILE_OBJECT FileObject
  1226. );
  1227. VOID
  1228. CcSetDirtyInMask (
  1229. IN PSHARED_CACHE_MAP SharedCacheMap,
  1230. IN PLARGE_INTEGER FileOffset,
  1231. IN ULONG Length
  1232. );
  1233. VOID
  1234. FASTCALL
  1235. CcWriteBehind (
  1236. IN PSHARED_CACHE_MAP SharedCacheMap,
  1237. IN PIO_STATUS_BLOCK IoStatus
  1238. );
  1239. #define ZERO_FIRST_PAGE 1
  1240. #define ZERO_MIDDLE_PAGES 2
  1241. #define ZERO_LAST_PAGE 4
  1242. BOOLEAN
  1243. CcMapAndRead(
  1244. IN PSHARED_CACHE_MAP SharedCacheMap,
  1245. IN PLARGE_INTEGER FileOffset,
  1246. IN ULONG Length,
  1247. IN ULONG ZeroFlags,
  1248. IN BOOLEAN Wait,
  1249. IN PVOID BaseAddress
  1250. );
  1251. VOID
  1252. CcFreeActiveVacb (
  1253. IN PSHARED_CACHE_MAP SharedCacheMap,
  1254. IN PVACB ActiveVacb OPTIONAL,
  1255. IN ULONG ActivePage,
  1256. IN ULONG PageIsDirty
  1257. );
  1258. VOID
  1259. CcMapAndCopy(
  1260. IN PSHARED_CACHE_MAP SharedCacheMap,
  1261. IN PVOID UserBuffer,
  1262. IN PLARGE_INTEGER FileOffset,
  1263. IN ULONG Length,
  1264. IN ULONG ZeroFlags,
  1265. IN PFILE_OBJECT FileObject
  1266. );
  1267. VOID
  1268. CcScanDpc (
  1269. IN PKDPC Dpc,
  1270. IN PVOID DeferredContext,
  1271. IN PVOID SystemArgument1,
  1272. IN PVOID SystemArgument2
  1273. );
  1274. VOID
  1275. CcScheduleLazyWriteScan (
  1276. IN BOOLEAN FastScan
  1277. );
  1278. VOID
  1279. CcStartLazyWriter (
  1280. IN PVOID NotUsed
  1281. );
  1282. #define CcAllocateWorkQueueEntry() \
  1283. (PWORK_QUEUE_ENTRY)ExAllocateFromPPLookasideList(LookasideTwilightList)
  1284. #define CcFreeWorkQueueEntry(_entry_) \
  1285. ExFreeToPPLookasideList(LookasideTwilightList, (_entry_))
  1286. VOID
  1287. FASTCALL
  1288. CcPostWorkQueue (
  1289. IN PWORK_QUEUE_ENTRY WorkQueueEntry,
  1290. IN PLIST_ENTRY WorkQueue
  1291. );
  1292. VOID
  1293. CcWorkerThread (
  1294. PVOID ExWorkQueueItem
  1295. );
  1296. VOID
  1297. FASTCALL
  1298. CcDeleteSharedCacheMap (
  1299. IN PSHARED_CACHE_MAP SharedCacheMap,
  1300. IN KIRQL ListIrql,
  1301. IN ULONG ReleaseFile
  1302. );
  1303. //
  1304. // This exception filter handles STATUS_IN_PAGE_ERROR correctly
  1305. //
  1306. LONG
  1307. CcCopyReadExceptionFilter(
  1308. IN PEXCEPTION_POINTERS ExceptionPointer,
  1309. IN PNTSTATUS ExceptionCode
  1310. );
  1311. //
  1312. // Exception filter for Worker Threads in lazyrite.c
  1313. //
  1314. LONG
  1315. CcExceptionFilter (
  1316. IN NTSTATUS ExceptionCode
  1317. );
  1318. #ifdef CCDBG
  1319. VOID
  1320. CcDump (
  1321. IN PVOID Ptr
  1322. );
  1323. #endif
  1324. //
  1325. // Vacb routines
  1326. //
  1327. VOID
  1328. CcInitializeVacbs(
  1329. );
  1330. PVOID
  1331. CcGetVirtualAddressIfMapped (
  1332. IN PSHARED_CACHE_MAP SharedCacheMap,
  1333. IN LONGLONG FileOffset,
  1334. OUT PVACB *Vacb,
  1335. OUT PULONG ReceivedLength
  1336. );
  1337. PVOID
  1338. CcGetVirtualAddress (
  1339. IN PSHARED_CACHE_MAP SharedCacheMap,
  1340. IN LARGE_INTEGER FileOffset,
  1341. OUT PVACB *Vacb,
  1342. OUT PULONG ReceivedLength
  1343. );
  1344. VOID
  1345. FASTCALL
  1346. CcFreeVirtualAddress (
  1347. IN PVACB Vacb
  1348. );
  1349. VOID
  1350. CcReferenceFileOffset (
  1351. IN PSHARED_CACHE_MAP SharedCacheMap,
  1352. IN LARGE_INTEGER FileOffset
  1353. );
  1354. VOID
  1355. CcDereferenceFileOffset (
  1356. IN PSHARED_CACHE_MAP SharedCacheMap,
  1357. IN LARGE_INTEGER FileOffset
  1358. );
  1359. VOID
  1360. CcWaitOnActiveCount (
  1361. IN PSHARED_CACHE_MAP SharedCacheMap
  1362. );
  1363. NTSTATUS
  1364. FASTCALL
  1365. CcCreateVacbArray (
  1366. IN PSHARED_CACHE_MAP SharedCacheMap,
  1367. IN LARGE_INTEGER NewSectionSize
  1368. );
  1369. NTSTATUS
  1370. CcExtendVacbArray (
  1371. IN PSHARED_CACHE_MAP SharedCacheMap,
  1372. IN LARGE_INTEGER NewSectionSize
  1373. );
  1374. BOOLEAN
  1375. FASTCALL
  1376. CcUnmapVacbArray (
  1377. IN PSHARED_CACHE_MAP SharedCacheMap,
  1378. IN PLARGE_INTEGER FileOffset OPTIONAL,
  1379. IN ULONG Length,
  1380. IN BOOLEAN UnmapBehind
  1381. );
  1382. VOID
  1383. CcAdjustVacbLevelLockCount (
  1384. IN PSHARED_CACHE_MAP SharedCacheMap,
  1385. IN LONGLONG FileOffset,
  1386. IN LONG Adjustment
  1387. );
  1388. PLIST_ENTRY
  1389. CcGetBcbListHeadLargeOffset (
  1390. IN PSHARED_CACHE_MAP SharedCacheMap,
  1391. IN LONGLONG FileOffset,
  1392. IN BOOLEAN FailToSuccessor
  1393. );
  1394. ULONG
  1395. CcPrefillVacbLevelZone (
  1396. IN ULONG NumberNeeded,
  1397. OUT PKIRQL OldIrql,
  1398. IN ULONG NeedBcbListHeads
  1399. );
  1400. VOID
  1401. CcDrainVacbLevelZone (
  1402. );
  1403. //
  1404. // Define references to global data
  1405. //
  1406. extern KSPIN_LOCK CcBcbSpinLock;
  1407. extern LIST_ENTRY CcCleanSharedCacheMapList;
  1408. extern SHARED_CACHE_MAP_LIST_CURSOR CcDirtySharedCacheMapList;
  1409. extern SHARED_CACHE_MAP_LIST_CURSOR CcLazyWriterCursor;
  1410. extern GENERAL_LOOKASIDE CcTwilightLookasideList;
  1411. extern ULONG CcNumberWorkerThreads;
  1412. extern ULONG CcNumberActiveWorkerThreads;
  1413. extern LIST_ENTRY CcIdleWorkerThreadList;
  1414. extern LIST_ENTRY CcExpressWorkQueue;
  1415. extern LIST_ENTRY CcRegularWorkQueue;
  1416. extern LIST_ENTRY CcPostTickWorkQueue;
  1417. extern BOOLEAN CcQueueThrottle;
  1418. extern ULONG CcIdleDelayTick;
  1419. extern LARGE_INTEGER CcNoDelay;
  1420. extern LARGE_INTEGER CcFirstDelay;
  1421. extern LARGE_INTEGER CcIdleDelay;
  1422. extern LARGE_INTEGER CcCollisionDelay;
  1423. extern LARGE_INTEGER CcTargetCleanDelay;
  1424. extern LAZY_WRITER LazyWriter;
  1425. extern ULONG_PTR CcNumberVacbs;
  1426. extern PVACB CcVacbs;
  1427. extern PVACB CcBeyondVacbs;
  1428. extern LIST_ENTRY CcVacbLru;
  1429. extern LIST_ENTRY CcVacbFreeList;
  1430. extern KSPIN_LOCK CcDeferredWriteSpinLock;
  1431. extern LIST_ENTRY CcDeferredWrites;
  1432. extern ULONG CcDirtyPageThreshold;
  1433. extern ULONG CcDirtyPageTarget;
  1434. extern ULONG CcDirtyPagesLastScan;
  1435. extern ULONG CcPagesYetToWrite;
  1436. extern ULONG CcPagesWrittenLastTime;
  1437. extern ULONG CcThrottleLastTime;
  1438. extern ULONG CcDirtyPageHysteresisThreshold;
  1439. extern PSHARED_CACHE_MAP CcSingleDirtySourceDominant;
  1440. extern ULONG CcAvailablePagesThreshold;
  1441. extern ULONG CcTotalDirtyPages;
  1442. extern ULONG CcTune;
  1443. extern LONG CcAggressiveZeroCount;
  1444. extern LONG CcAggressiveZeroThreshold;
  1445. extern ULONG CcLazyWriteHotSpots;
  1446. extern MM_SYSTEMSIZE CcCapturedSystemSize;
  1447. extern ULONG CcMaxVacbLevelsSeen;
  1448. extern ULONG CcVacbLevelEntries;
  1449. extern PVACB *CcVacbLevelFreeList;
  1450. extern ULONG CcVacbLevelWithBcbsEntries;
  1451. extern PVACB *CcVacbLevelWithBcbsFreeList;
  1452. //
  1453. // Macros for allocating and deallocating Vacb levels - CcVacbSpinLock must
  1454. // be acquired.
  1455. //
  1456. _inline PVACB *CcAllocateVacbLevel (
  1457. IN LOGICAL AllocatingBcbListHeads
  1458. )
  1459. {
  1460. PVACB *ReturnEntry;
  1461. if (AllocatingBcbListHeads) {
  1462. ReturnEntry = CcVacbLevelWithBcbsFreeList;
  1463. CcVacbLevelWithBcbsFreeList = (PVACB *)*ReturnEntry;
  1464. CcVacbLevelWithBcbsEntries -= 1;
  1465. } else {
  1466. ReturnEntry = CcVacbLevelFreeList;
  1467. CcVacbLevelFreeList = (PVACB *)*ReturnEntry;
  1468. CcVacbLevelEntries -= 1;
  1469. }
  1470. *ReturnEntry = NULL;
  1471. ASSERT(RtlCompareMemory(ReturnEntry, ReturnEntry + 1, VACB_LEVEL_BLOCK_SIZE - sizeof(PVACB)) ==
  1472. (VACB_LEVEL_BLOCK_SIZE - sizeof(PVACB)));
  1473. return ReturnEntry;
  1474. }
  1475. _inline VOID CcDeallocateVacbLevel (
  1476. IN PVACB *Entry,
  1477. IN LOGICAL DeallocatingBcbListHeads
  1478. )
  1479. {
  1480. if (DeallocatingBcbListHeads) {
  1481. *Entry = (PVACB)CcVacbLevelWithBcbsFreeList;
  1482. CcVacbLevelWithBcbsFreeList = Entry;
  1483. CcVacbLevelWithBcbsEntries += 1;
  1484. } else {
  1485. *Entry = (PVACB)CcVacbLevelFreeList;
  1486. CcVacbLevelFreeList = Entry;
  1487. CcVacbLevelEntries += 1;
  1488. }
  1489. }
  1490. //
  1491. // Export the macros for inspecting the reference counts for
  1492. // the multilevel Vacb array.
  1493. //
  1494. _inline
  1495. PVACB_LEVEL_REFERENCE
  1496. VacbLevelReference (
  1497. IN PSHARED_CACHE_MAP SharedCacheMap,
  1498. IN PVACB *VacbArray,
  1499. IN ULONG Level
  1500. )
  1501. {
  1502. return (PVACB_LEVEL_REFERENCE)
  1503. ((PCHAR)VacbArray +
  1504. VACB_LEVEL_BLOCK_SIZE +
  1505. (Level != 0?
  1506. 0 : (FlagOn( SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED )?
  1507. VACB_LEVEL_BLOCK_SIZE : 0)));
  1508. }
  1509. _inline
  1510. ULONG
  1511. IsVacbLevelReferenced (
  1512. IN PSHARED_CACHE_MAP SharedCacheMap,
  1513. IN PVACB *VacbArray,
  1514. IN ULONG Level
  1515. )
  1516. {
  1517. PVACB_LEVEL_REFERENCE VacbReference = VacbLevelReference( SharedCacheMap, VacbArray, Level );
  1518. return VacbReference->Reference | VacbReference->SpecialReference;
  1519. }
  1520. //
  1521. // Here is a page of macros stolen directly from Pinball...
  1522. //
  1523. //
  1524. // The following macros are used to establish the semantics needed
  1525. // to do a return from within a try-finally clause. As a rule every
  1526. // try clause must end with a label call try_exit. For example,
  1527. //
  1528. // try {
  1529. // :
  1530. // :
  1531. //
  1532. // try_exit: NOTHING;
  1533. // } finally {
  1534. //
  1535. // :
  1536. // :
  1537. // }
  1538. //
  1539. // Every return statement executed inside of a try clause should use the
  1540. // try_return macro. If the compiler fully supports the try-finally construct
  1541. // then the macro should be
  1542. //
  1543. // #define try_return(S) { return(S); }
  1544. //
  1545. // If the compiler does not support the try-finally construct then the macro
  1546. // should be
  1547. //
  1548. // #define try_return(S) { S; goto try_exit; }
  1549. //
  1550. #define try_return(S) { S; goto try_exit; }
  1551. #ifdef CCDBG
  1552. extern LONG CcDebugTraceLevel;
  1553. extern LONG CcDebugTraceIndent;
  1554. #ifndef CCDBG_LOCK
  1555. #define DebugTrace(INDENT,LEVEL,X,Y) { \
  1556. LONG _i; \
  1557. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1558. _i = (ULONG)PsGetCurrentThread(); \
  1559. DbgPrint("%08lx:",_i); \
  1560. if ((INDENT) < 0) { \
  1561. CcDebugTraceIndent += (INDENT); \
  1562. } \
  1563. if (CcDebugTraceIndent < 0) { \
  1564. CcDebugTraceIndent = 0; \
  1565. } \
  1566. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1567. DbgPrint(" "); \
  1568. } \
  1569. DbgPrint(X,Y); \
  1570. if ((INDENT) > 0) { \
  1571. CcDebugTraceIndent += (INDENT); \
  1572. } \
  1573. } \
  1574. }
  1575. #define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \
  1576. LONG _i; \
  1577. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1578. _i = (ULONG)PsGetCurrentThread(); \
  1579. DbgPrint("%08lx:",_i); \
  1580. if ((INDENT) < 0) { \
  1581. CcDebugTraceIndent += (INDENT); \
  1582. } \
  1583. if (CcDebugTraceIndent < 0) { \
  1584. CcDebugTraceIndent = 0; \
  1585. } \
  1586. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1587. DbgPrint(" "); \
  1588. } \
  1589. DbgPrint(X,Y,Z); \
  1590. if ((INDENT) > 0) { \
  1591. CcDebugTraceIndent += (INDENT); \
  1592. } \
  1593. } \
  1594. }
  1595. #define DebugDump(STR,LEVEL,PTR) { \
  1596. LONG _i; \
  1597. VOID CcDump(); \
  1598. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1599. _i = (ULONG)PsGetCurrentThread(); \
  1600. DbgPrint("%08lx:",_i); \
  1601. DbgPrint(STR); \
  1602. if (PTR != NULL) {CcDump(PTR);} \
  1603. DbgBreakPoint(); \
  1604. } \
  1605. }
  1606. #else // ndef CCDBG_LOCK
  1607. extern KSPIN_LOCK CcDebugTraceLock;
  1608. #define DebugTrace(INDENT,LEVEL,X,Y) { \
  1609. LONG _i; \
  1610. KIRQL _oldIrql; \
  1611. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1612. _i = (ULONG)PsGetCurrentThread(); \
  1613. ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
  1614. DbgPrint("%08lx:",_i); \
  1615. if ((INDENT) < 0) { \
  1616. CcDebugTraceIndent += (INDENT); \
  1617. } \
  1618. if (CcDebugTraceIndent < 0) { \
  1619. CcDebugTraceIndent = 0; \
  1620. } \
  1621. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1622. DbgPrint(" "); \
  1623. } \
  1624. DbgPrint(X,Y); \
  1625. if ((INDENT) > 0) { \
  1626. CcDebugTraceIndent += (INDENT); \
  1627. } \
  1628. ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
  1629. } \
  1630. }
  1631. #define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \
  1632. LONG _i; \
  1633. KIRQL _oldIrql; \
  1634. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1635. _i = (ULONG)PsGetCurrentThread(); \
  1636. ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
  1637. DbgPrint("%08lx:",_i); \
  1638. if ((INDENT) < 0) { \
  1639. CcDebugTraceIndent += (INDENT); \
  1640. } \
  1641. if (CcDebugTraceIndent < 0) { \
  1642. CcDebugTraceIndent = 0; \
  1643. } \
  1644. for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
  1645. DbgPrint(" "); \
  1646. } \
  1647. DbgPrint(X,Y,Z); \
  1648. if ((INDENT) > 0) { \
  1649. CcDebugTraceIndent += (INDENT); \
  1650. } \
  1651. ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
  1652. } \
  1653. }
  1654. #define DebugDump(STR,LEVEL,PTR) { \
  1655. LONG _i; \
  1656. KIRQL _oldIrql; \
  1657. VOID CcDump(); \
  1658. if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
  1659. _i = (ULONG)PsGetCurrentThread(); \
  1660. ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
  1661. DbgPrint("%08lx:",_i); \
  1662. DbgPrint(STR); \
  1663. if (PTR != NULL) {CcDump(PTR);} \
  1664. DbgBreakPoint(); \
  1665. ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
  1666. } \
  1667. }
  1668. #endif // else ndef CCDBG_LOCK
  1669. #else
  1670. #undef CCDBG_LOCK
  1671. #define DebugTrace(INDENT,LEVEL,X,Y) {NOTHING;}
  1672. #define DebugTrace2(INDENT,LEVEL,X,Y,Z) {NOTHING;}
  1673. #define DebugDump(STR,LEVEL,PTR) {NOTHING;}
  1674. #endif // CCDBG
  1675. //
  1676. // Global list of pinned Bcbs which may be examined for debug purposes
  1677. //
  1678. #if DBG
  1679. extern ULONG CcBcbCount;
  1680. extern LIST_ENTRY CcBcbList;
  1681. #endif
  1682. #endif // _CCh_