/*++ Module Name: flush.c Abstract: This module implements IA64 machine dependent kernel functions to flush the data and instruction caches and to flush I/O buffers. Author: 07-Mar-1996 Bernard Lint M. Jayakumar (Muthurajan.Jayakumar@intel.com) Environment: Kernel mode only. Revision History: --*/ #include "ki.h" #include "kxia64.h" // // PROBE_VISIBILITY_PAL_SUPPORT flag is one time write (RESET) only and multiple time read // only flag. It is used to check to see if the processor needs PAL_SUPPORT for VISIBILITY // in prefetches. Once the check is made, this flag optimizes such that further checks are // eliminated. // ULONG ProbePalVisibilitySupport=1; ULONG NeedPalVisibilitySupport=1; extern KSPIN_LOCK KiCacheFlushLock; // // Define forward referenced prototyes. // VOID KiSweepDcacheTarget ( IN PULONG SignalDone, IN PVOID Parameter1, IN PVOID Parameter2, IN PVOID Parameter3 ); VOID KiSweepIcacheTarget ( IN PULONG SignalDone, IN PVOID Parameter1, IN PVOID Parameter2, IN PVOID Parameter3 ); VOID KiFlushIoBuffersTarget ( IN PKIPI_CONTEXT SignalDone, IN PVOID Mdl, IN PVOID ReadOperation, IN PVOID DmaOperation ); VOID KiSyncCacheTarget( IN PKIPI_CONTEXT SignalDone, IN PVOID Parameter1, IN PVOID Parameter2, IN PVOID Parameter3 ); ULONG_PTR KiSyncMC_DrainTarget( ); ULONG_PTR KiSyncMC_Drain( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ); ULONG_PTR KiSyncPrefetchVisibleTarget( ); ULONG_PTR KiSyncPrefetchVisible ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ); VOID KiSyncCacheTarget ( IN PKIPI_CONTEXT SignalDone, IN PVOID Parameter1, IN PVOID Parameter2, IN PVOID Parameter3 ) /*++ Routine Description: This function synchronizes the I-fetch pipeline. Typically this routine will be executed by every processor in the system in response to an IPI after the cache is flushed. Each processor executing RFI while leaving the IPI produces the serialization effect that is required after isync to make sure that further instruction prefetches wait till the ISYNC completes. Arguements: SignalDone Supplies a pointer to a variable that is cleared when the requested operation has been performed. Parameter1 - Parameter3 - Not used. Return Value: Nothing. --*/ { UNREFERENCED_PARAMETER (Parameter1); UNREFERENCED_PARAMETER (Parameter2); UNREFERENCED_PARAMETER (Parameter3); #if !defined(NT_UP) __synci(); KiIpiSignalPacketDone(SignalDone); #else UNREFERENCED_PARAMETER (SignalDone); #endif return; } VOID KeSweepIcache ( IN BOOLEAN AllProcessors ) /*++ Routine Description: This function flushes the instruction cache on all processors that are currently running threads which are children of the current process or flushes the instruction cache on all processors in the host configuration. N.B. Although PowerPC maintains cache coherency across processors, we use the flash invalidate function (h/w) for I-Cache sweeps which doesn't maintain coherency so we still do the MP I-Cache flush in s/w. plj. Arguments: AllProcessors - Supplies a boolean value that determines which instruction caches are flushed. Return Value: None. --*/ { #if !defined(NT_UP) KIRQL OldIrql; KAFFINITY TargetProcessors; #endif UNREFERENCED_PARAMETER (AllProcessors); ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); #if !defined(NT_UP) // // Acquire cache flush spinlock // Cache flush is not MP safe yet // KeAcquireSpinLock(&KiCacheFlushLock, &OldIrql); #endif HalSweepIcache(); #if !defined(NT_UP) // // Compute the set of target processors and send the sweep parameters // to the target processors, if any, for execution. // TargetProcessors = KeActiveProcessors & PCR->NotMember; if (TargetProcessors != 0) { KiIpiSendPacket(TargetProcessors, KiSweepIcacheTarget, NULL, NULL, NULL); } // // Wait until all target processors have finished sweeping their // instruction caches. // if (TargetProcessors != 0) { KiIpiStallOnPacketTargets(TargetProcessors); } // // Lower IRQL to its previous level and return. // KeReleaseSpinLock(&KiCacheFlushLock, OldIrql); #endif return; } #undef KeSweepCurrentIcache VOID KeSweepCurrentIcache( ) /*++ Routine Description: This function makes the instruction cache coherent with the data cache. It is generally called by the debugger. Arguments: None. Return Value: None. Note: This routine only works correct if it is called on the same processor that made the modification to the instruction memeory This routine can be called at high IRQL. --*/ { HalSweepIcache(); } VOID KiSweepIcacheTarget ( IN PULONG SignalDone, IN PVOID Parameter1, IN PVOID Parameter2, IN PVOID Parameter3 ) /*++ Routine Description: This is the target function for sweeping the instruction cache on target processors. Arguments: SignalDone Supplies a pointer to a variable that is cleared when the requested operation has been performed. Parameter1 - Parameter3 - Not used. Return Value: None. --*/ { UNREFERENCED_PARAMETER (Parameter1); UNREFERENCED_PARAMETER (Parameter2); UNREFERENCED_PARAMETER (Parameter3); // // Sweep the instruction cache on the current processor and clear // the sweep instruction cache packet address to signal the source // to continue. // #if !defined(NT_UP) HalSweepIcache(); KiIpiSignalPacketDone(SignalDone); #else UNREFERENCED_PARAMETER (SignalDone); #endif return; } VOID KeSweepDcache ( IN BOOLEAN AllProcessors ) /*++ Routine Description: This function flushes the data cache on all processors that are currently running threads which are children of the current process or flushes the data cache on all processors in the host configuration. N.B. PowerPC maintains cache coherency across processors however in this routine, the range of addresses being flushed is unknown so we must still broadcast the request to the other processors. Arguments: AllProcessors - Supplies a boolean value that determines which data caches are flushed. Return Value: None. --*/ { #if !defined(NT_UP) KIRQL OldIrql; KAFFINITY TargetProcessors; #endif UNREFERENCED_PARAMETER (AllProcessors); ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); #if !defined(NT_UP) // // Acquire cache flush spinlock // Cache flush is not MP safe yet // KeAcquireSpinLock(&KiCacheFlushLock, &OldIrql); #endif HalSweepDcache(); #if !defined(NT_UP) // // Compute the set of target processors and send the sweep parameters // to the target processors, if any, for execution. // TargetProcessors = KeActiveProcessors & PCR->NotMember; if (TargetProcessors != 0) { KiIpiSendPacket(TargetProcessors, KiSweepDcacheTarget, NULL, NULL, NULL); } // // Wait until all target processors have finished sweeping their // data caches. // if (TargetProcessors != 0) { KiIpiStallOnPacketTargets(TargetProcessors); } // // Lower IRQL to its previous level and return. // KeReleaseSpinLock(&KiCacheFlushLock, OldIrql); #endif return; } VOID KiSweepDcacheTarget ( IN PULONG SignalDone, IN PVOID Parameter1, IN PVOID Parameter2, IN PVOID Parameter3 ) /*++ Routine Description: This is the target function for sweeping the data cache on target processors. Arguments: SignalDone Supplies a pointer to a variable that is cleared when the requested operation has been performed. Parameter1 - Parameter3 - Not used. Return Value: None. --*/ { UNREFERENCED_PARAMETER (Parameter1); UNREFERENCED_PARAMETER (Parameter2); UNREFERENCED_PARAMETER (Parameter3); // // Sweep the data cache on the current processor and clear the sweep // data cache packet address to signal the source to continue. // #if !defined(NT_UP) HalSweepDcache(); KiIpiSignalPacketDone(SignalDone); #else UNREFERENCED_PARAMETER (SignalDone); #endif return; } ULONG_PTR KiSyncMC_DrainTarget( ) /*++ Routine Description: This is the target function for issuing PAL_MC_DRAIN to drain prefetches, demand references and pending fc cache line evictions on the target CPU it executes. Argument: None Return Value: Returns the status from the function HalCallPal --*/ { ULONG_PTR Status; // // Call HalCallPal to drain. // Status = HalCallPal(PAL_MC_DRAIN, 0, 0, 0, 0, 0, 0, 0); ASSERT(Status == PAL_STATUS_SUCCESS); return Status; } VOID KeSweepCacheRange ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ) /*++ Routine Description: This function is used to flush a range of virtual addresses from both the instruction and data cache on all processors in the system. Irrespective of the length of the range, it should not call SweepIcache or SweepDcache. This is because SweepDcache will only sweep D cache and not the I cache and Vice versa. Since the caller of KeSweepCacheRange assumes both the caches are being swept, one cannot call SweepIcache or SweepDcache in trying to optimize. Arguments: AllProcessors - Not used BaseAddress - Supplies a pointer to the base of the range that is flushed. Length - Supplies the length of the range that is flushed if the base address is specified. Return Value: None. --*/ { UNREFERENCED_PARAMETER (AllProcessors); // // We will not raise IRQL to synchronization level so that we can allow // a context switch in between Flush Cache. FC need not run in the same processor // throughout. It can be context switched. So no binding is done to any processor. // // HalSweepCacheRange(BaseAddress,Length); // // Synchronize the Instruction Prefetch pipe in the local processor. // __synci(); __isrlz(); // // Wait until all target processors have finished sweeping the their // data cache. // return; } VOID KeSweepIcacheRange ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN SIZE_T Length ) /*++ Routine Description: This function is used to flush a range of virtual addresses from the primary instruction cache on all processors in the host configuration. If the length of the range is greater than the size of the instruction cache, then one can call HalSweepIcache which calls SAL to flush the entire cache. Since SAL does not take care of MP flushing, HalSweepIcache has to use IPI mechanism to execute SAL flush from each processor. We need to weight the overhead of all these versus using HalSweepIcacheRange and avoiding IPI mechanism since HalSweepIcacheRange uses fc instruction and fc instruction takes care of MP. Arguments: AllProcessors - Not used BaseAddress - Supplies a pointer to the base of the range that is flushed. Length - Supplies the length of the range that is flushed if the base address is specified. Return Value: None. Note: For performance reason, we may update KeSweepIcacheRange to do the following: if the range asked to sweep is very large, we may call KeSweepIcache to flush the full cache. --*/ { UNREFERENCED_PARAMETER (AllProcessors); // // We will not raise IRQL to synchronization level so that we can allow // a context switch in between Flush Cache. FC need not run in the same processor // throughout. It can be context switched. So no binding is done to any processor. // // HalSweepIcacheRange(BaseAddress,Length); // // Synchronize the Instruction Prefetch pipe in the local processor. // __synci(); __isrlz(); return; } VOID KeSweepCurrentIcacheRange ( IN PVOID BaseAddress, IN SIZE_T Length ) /*++ Routine Description: This function is used to flush a range of virtual addresses from the primary instruction cache on the current processor. This is used by the kernel debugger for flushing the i-cache after modifying memory in case the instruction stream is changed. To avoid calling SAL during phase 0 we use "fc" instead of the SAL cache flush call. Arguments: BaseAddress - Supplies a pointer to the base of the range that is flushed. Length - Supplies the length of the range that is flushed if the base address is specified. Return Value: None. --*/ { KIRQL OldIrql; KeRaiseIrql(HIGH_LEVEL, &OldIrql); HalSweepIcacheRange(BaseAddress,Length); // // Synchronize the Instruction Prefetch pipe in the local processor. // __synci(); __isrlz(); KeLowerIrql(OldIrql); return; } VOID KeSweepDcacheRange ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ) /*++ Routine Description: This function is used to flush a range of virtual addresses from the primary data cache on all processors in the host configuration. If the length of the range is greater than the size of the data cache, then one can call HalSweepDcache which calls SAL to flush the entire cache. Since SAL does not take care of MP flushing, HalSweepDcache has to use IPI mechanism to execute SAL flush from each processor. We need to weight the overhead of all these versus using HalSweepDcacheRange and avoiding IPI mechanism since HalSweepDcacheRange uses fc instruction and fc instruction takes care of MP. Arguments: AllProcessors - Not used BaseAddress - Supplies a pointer to the base of the range that is flushed. Length - Supplies the length of the range that is flushed if the base address is specified. Return Value: None. Note: For performance reason, we may update KeSweepDcacheRange to do the following: if the range asked to sweep is very large, we may call KeSweepDcache to flush the full cache. --*/ { UNREFERENCED_PARAMETER (AllProcessors); // // We will not raise IRQL to synchronization level so that we can allow // a context switch in between Flush Cache. FC need not run in the same processor // throughout. It can be context switched. So no binding is done to any processor. // // HalSweepDcacheRange(BaseAddress,Length); // // Synchronize the Instruction Prefetch pipe in the local processor. // __synci(); __isrlz(); return; } ULONG_PTR KiSyncMC_Drain ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ) /*++ Routine Description: KiSyncMC_Drain issues PAL_MC_DRAIN to drain either prefetches, demand references or pending fc cache line evictions to all the processors in the system. DrainTypePointer points to the variable, DrainType, which determines the type of drain to be performed. This is typically used when changing the memory attribute from WB to UC. Arguments: AllProcessors - All processors in the system. BaseAddress - Supplies a pointer to the base of the range that is to be drained. Length - Supplies the length of the range that is drained for the base address specified. Return Value: Note: This is used when changing attributes of WB pages to UC pages. --*/ { ULONG_PTR Status; UNREFERENCED_PARAMETER (AllProcessors); UNREFERENCED_PARAMETER (BaseAddress); UNREFERENCED_PARAMETER (Length); // // KeIpiGenericCall returns ULONG_PTR as the function value of the specified function // Status = (KeIpiGenericCall ( (PKIPI_BROADCAST_WORKER)KiSyncMC_DrainTarget, (ULONG_PTR)NULL) ); ASSERT(Status == PAL_STATUS_SUCCESS); return Status; } ULONG_PTR KiSyncPrefetchVisibleTarget( ) /*++ Routine Description: This is the target function for issuing PAL_PREFETCH VISIBILITY on the target CPU it executes. Argument: Not used. Return Value: Returns the status from the function HalCallPal --*/ { ULONG_PTR Status; // // Call HalCallPal to drain. // Status = HalCallPal(PAL_PREFETCH_VISIBILITY, 0, 0, 0, 0, 0, 0, 0); ASSERT(Status != PAL_STATUS_ERROR); return Status; } ULONG_PTR KiSyncPrefetchVisible ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ) /*++ Routine Description: KiSyncPrefetchVisible issues PAL_PREFETCH_VISIBILITY to cause the processor to make all pending prefetches visible to subsequent fc instructions; or does nothing, on processor implementations which does not require PAL support for disabling prefetch in the architectural sequence. On processors that require PAL support for this sequence, the actions performed by this procedure may include any or all of the following (or none, as long as the processor guarantees that prefetches that were issued prior to this call are not resident in the processor's caches after the architected sequence is complete. This is typically used when changing the memory attribute from WB to UC. Arguments: AllProcessors - All processors in the system. BaseAddress - Supplies a pointer to the base of the range that is to be drained. Length - Supplies the length of the range that is drained for the base address specified. Return Value: Status of the PAL CALL 0 Success 1 Call not needed -3 Error returned Note: This is used when changing attributes of WB pages to UC pages. --*/ { ULONG_PTR Status; UNREFERENCED_PARAMETER (AllProcessors); UNREFERENCED_PARAMETER (BaseAddress); UNREFERENCED_PARAMETER (Length); switch (ProbePalVisibilitySupport) { case 0: if (NeedPalVisibilitySupport == 0) Status = PAL_STATUS_SUPPORT_NOT_NEEDED; else { Status = (KeIpiGenericCall ( (PKIPI_BROADCAST_WORKER)KiSyncPrefetchVisibleTarget, (ULONG_PTR)NULL) ); } break; case 1: Status = KiSyncPrefetchVisibleTarget(); ASSERT(Status != PAL_STATUS_ERROR); ProbePalVisibilitySupport = 0; if (Status == PAL_STATUS_SUPPORT_NOT_NEEDED) { NeedPalVisibilitySupport = 0; Status = PAL_STATUS_SUPPORT_NOT_NEEDED; } else { Status = (KeIpiGenericCall ( (PKIPI_BROADCAST_WORKER)KiSyncPrefetchVisibleTarget, (ULONG_PTR)NULL) ); } break; default: Status = (ULONG_PTR) PAL_STATUS_ERROR; break; } ASSERT(Status != PAL_STATUS_ERROR); return Status; } VOID KeSweepCacheRangeWithDrain ( IN BOOLEAN AllProcessors, IN PVOID BaseAddress, IN ULONG Length ) /*++ Routine Description: This function is used to drain prefetches,demand references followed by flushing the cache followed by draining pending fc cache line evictions to a specified range address in all processors in the system. Arguments: AllProcessors - All processors in the system. BaseAddress - Supplies a pointer to the base of the range that is flushed and drained. Length - Supplies the length of the range that is flushed and drained for the base address is specified. Return Value: None. Note: This is used when changing attributes of WB pages to UC pages. --*/ { ULONG_PTR Status; Status = KiSyncPrefetchVisible( AllProcessors, BaseAddress, Length ); ASSERT(Status != PAL_STATUS_ERROR); KeSweepCacheRange ( AllProcessors, BaseAddress, Length ); Status = KiSyncMC_Drain ( AllProcessors, BaseAddress, Length ); ASSERT(Status == PAL_STATUS_SUCCESS); return; }