Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

293 lines
7.2 KiB

  1. /*++
  2. Module Name:
  3. i64cache.c
  4. Abstract:
  5. Merced (IA64 processor) has level0 Instruction and Data cache. Level 1 is
  6. unified Cache. All the caches in level0 and level1 are writeback caches.
  7. Hardware ensures coherency in both instruction and data cache for DMA
  8. transfers.
  9. Level0 Instruction and Data caches are not coherent with respect to self
  10. modifying or cross modifying code. Also for PIO transfers hardware does not
  11. ensure coherency. Software has to ensure coherency for self or cross
  12. modifying code as well as PIO transfers.
  13. Author:
  14. Bernard Lint
  15. M. Jayakumar (Muthurajan.Jayakumar@intel.com)
  16. Environment:
  17. Kernel mode
  18. Revision History:
  19. --*/
  20. #include "halp.h"
  21. #include "i64fw.h"
  22. ULONG CacheFlushStride = 64; // Default value is the one for Itanium
  23. VOID
  24. HalpInitCacheInfo(
  25. ULONG Stride
  26. )
  27. /*++
  28. Routine Description:
  29. This sets the stride used for FC instructions.
  30. Arguments:
  31. Stride - New stride value.
  32. Return Value:
  33. None.
  34. --*/
  35. {
  36. //
  37. // Perform a number of consistency checks on the argument. If any of them
  38. // fail we will leave CacheFlushStride at the default.
  39. //
  40. // Since the source of this value is a PAL call done by the loader and
  41. // passed in the loader block we always stand the risk that the loader
  42. // will be out of date and we will get some garbage from uninitialized
  43. // memory.
  44. //
  45. //
  46. // The stride value must be a power of 2.
  47. //
  48. if ((Stride & (Stride - 1)) != 0) {
  49. return;
  50. }
  51. //
  52. // The Itanium architecture specifies a minimum of 32
  53. //
  54. if (Stride < 32) {
  55. return;
  56. }
  57. CacheFlushStride = Stride;
  58. }
  59. VOID
  60. HalSweepIcache (
  61. )
  62. /*++
  63. Routine Description:
  64. This function sweeps the entire I cache on the processor which it runs.
  65. Arguments:
  66. None.
  67. Return Value:
  68. None.
  69. NOTE: Anyone modifying the code for HalSweepIcache should note that
  70. HalSweepIcache CANNOT USE the FC instruction (or any routine that uses FC
  71. instruction, for example, HalSweepIcacheRange).
  72. This is because FC can generate page faults and if HalSweepIcache raises its
  73. IRQL (for avoiding context switch) then page faults will not be tolerated at
  74. a raied IRQL.
  75. --*/
  76. {
  77. //
  78. // Calls SAL_FLUSH to flush the single processor I cache on which it runs
  79. // and the platform cache, if any.
  80. // Calls PAL_FLUSH to flush only the processor I cache on which it runs.
  81. // PAL_FLUSH does not flush the platform cache.
  82. // The decision to choose PAL_FLUSH or SAL_FLUSH is made using a
  83. // interlockedCompareExchange to a semaphore.This allows only one processor
  84. // to call SAL_FLUSH and other processors to call PAL_FLUSH. This avoids
  85. // unnecessary overhead of flushing the platform cache multiple times.
  86. // The assumption in using InterlockedCompareExchange is that by the time
  87. // the CPU which grabs the semaphore comes out after doing the SAL_FLUSH,
  88. // all other CPUs at least have entered their PAL_FLUSH. If this assumption
  89. // is voilated, the platform cache will be flushed multiple times.
  90. // Functionally nothing fails.
  91. SAL_PAL_RETURN_VALUES rv = {0};
  92. HalpPalCall(PAL_CACHE_FLUSH, FLUSH_COHERENT,0,0,&rv);
  93. }
  94. VOID
  95. HalSweepDcache (
  96. )
  97. /*++
  98. Routine Description:
  99. This function sweeps the entire D cache on ths processor which it runs.
  100. Arguments:
  101. None.
  102. Return Value:
  103. None.
  104. NOTE: Anyone modifying this code for HalSweepDcache should note that
  105. HalSweepDcache CANNOT USE FC instruction (or any routine that uses FC
  106. instruction,for example,HalSweepDcacheRange).
  107. This is because FC can generate page faults and if HalSweepDcache raises its
  108. IRQL (for avoiding context switch) then page faults will not be tolerated at
  109. a raied IRQL.
  110. --*/
  111. {
  112. //
  113. // Calls SAL_FLUSH to flush the single processor D cache on which it runs
  114. // and the platform cache, if any.
  115. // Calls PAL_FLUSH to flush only the processor D cache on which it runs.
  116. // PAL_FLUSH does not flush the platform cache.
  117. // The decision to choose PAL_FLUSH or SAL_FLUSH is made using a
  118. // interlockedCompareExchange to a semaphore.This allows only one processor
  119. // to call SAL_FLUSH and other processors to call PAL_FLUSH. This avoids
  120. // unnecessary overhead of flushing the platform cache multiple times.
  121. // The assumption in using InterlockedCompareExchange is that by the time
  122. // the CPU which grabs the semaphore comes out after doing the SAL_FLUSH,
  123. // all other CPUs at least have entered their PAL_FLUSH. If this assumption
  124. // is violated, the platform cache will be flushed multiple times.
  125. // Functionally nothing fails.
  126. //
  127. //
  128. SAL_PAL_RETURN_VALUES rv = {0};
  129. HalpSalCall(SAL_CACHE_FLUSH,FLUSH_DATA_CACHE,0,0,0,0,0,0,&rv);
  130. }
  131. VOID
  132. HalSweepCacheRange (
  133. IN PVOID BaseAddress,
  134. IN SIZE_T Length
  135. )
  136. /*++
  137. Routine Description:
  138. This function sweeps the range of address in the I cache throughout the
  139. system.
  140. Arguments:
  141. BaseAddress - Supplies the starting virtual address of a range of
  142. virtual addresses that are to be flushed from the data cache.
  143. Length - Supplies the length of the range of virtual addresses
  144. that are to be flushed from the data cache.
  145. Return Value:
  146. None.
  147. PS: HalSweepCacheRange just flushes the cache. It does not synchrnoize the
  148. I-Fetch pipeline with the flush operation. To Achieve pipeline flush also,
  149. one has to call KeSweepCacheRange.
  150. --*/
  151. {
  152. ULONGLONG SweepAddress, LastAddress;
  153. //
  154. // Do we need to prevent a context switch? No. We will allow context
  155. // switching in between fc.
  156. // Flush the specified range of virtual addresses from the primary
  157. // instruction cache.
  158. //
  159. //
  160. // Since Merced hardware aligns the address on cache line boundary for
  161. // flush cache instruction we don't have to align it ourselves. However
  162. // the boundary cases are much easier to get right if we just align it.
  163. //
  164. SweepAddress = ((ULONGLONG)BaseAddress & ~((ULONGLONG)CacheFlushStride - 1));
  165. LastAddress = (ULONGLONG)BaseAddress + Length;
  166. do {
  167. __fc((__int64)SweepAddress);
  168. SweepAddress += CacheFlushStride;
  169. } while (SweepAddress < LastAddress);
  170. }
  171. VOID
  172. HalSweepDcacheRange (
  173. IN PVOID BaseAddress,
  174. IN SIZE_T Length
  175. )
  176. /*++
  177. Routine Description:
  178. This function sweeps the range of address in the D cache throughout the
  179. system.
  180. Arguments:
  181. BaseAddress - Supplies the starting virtual address of a range of
  182. virtual addresses that are to be flushed from the data cache.
  183. Length - Supplies the length of the range of virtual addresses
  184. that are to be flushed from the data cache.
  185. Return Value:
  186. None.
  187. PS: HalSweepCacheRange just flushes the cache. It does not synchrnoizes the
  188. I-Fetch pipeline with the flush operation. To Achieve pipeline flush also,
  189. one has to call KeSweepCacheRange.
  190. --*/
  191. {
  192. HalSweepCacheRange(BaseAddress,Length);
  193. }