Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1001 lines
24 KiB

/*++
Copyright (c) 1989 Microsoft Corporation
Module Name:
compress.c
Abstract:
This module contains the routines to support allow hardware to
transparently compress physical memory.
Author:
Landy Wang (landyw) 21-Oct-2000
Revision History:
--*/
#include "mi.h"
#if defined (_MI_COMPRESSION)
Enable the #if 0 code in cmdat3.c to allow Ratio specification.
//
// Compression public interface.
//
#define MM_PHYSICAL_MEMORY_PRODUCED_VIA_COMPRESSION 0x1
typedef
NTSTATUS
(*PMM_SET_COMPRESSION_THRESHOLD) (
IN ULONGLONG CompressionByteThreshold
);
typedef struct _MM_COMPRESSION_CONTEXT {
ULONG Version;
ULONG SizeInBytes;
ULONGLONG ReservedBytes;
PMM_SET_COMPRESSION_THRESHOLD SetCompressionThreshold;
} MM_COMPRESSION_CONTEXT, *PMM_COMPRESSION_CONTEXT;
#define MM_COMPRESSION_VERSION_INITIAL 1
#define MM_COMPRESSION_VERSION_CURRENT 1
NTSTATUS
MmRegisterCompressionDevice (
IN PMM_COMPRESSION_CONTEXT Context
);
NTSTATUS
MmDeregisterCompressionDevice (
IN PMM_COMPRESSION_CONTEXT Context
);
//
// This defaults to 75% but can be overridden in the registry. At this
// percentage of *real* physical memory in use, an interrupt is generated so
// that memory management can zero pages to make more memory available.
//
#define MI_DEFAULT_COMPRESSION_THRESHOLD 75
ULONG MmCompressionThresholdRatio;
PFN_NUMBER MiNumberOfCompressionPages;
PMM_SET_COMPRESSION_THRESHOLD MiSetCompressionThreshold;
#if DBG
KIRQL MiCompressionIrql;
#endif
//
// Note there is also code in dynmem.c that is dependent on this #define.
//
#if defined (_MI_COMPRESSION_SUPPORTED_)
typedef struct _MI_COMPRESSION_INFO {
ULONG IsrPageProcessed;
ULONG DpcPageProcessed;
ULONG IsrForcedDpc;
ULONG IsrFailedDpc;
ULONG IsrRan;
ULONG DpcRan;
ULONG DpcsFired;
ULONG IsrSkippedZeroedPage;
ULONG DpcSkippedZeroedPage;
ULONG PfnForcedDpcInsert;
ULONG PfnFailedDpcInsert;
} MI_COMPRESSION_INFO, *PMI_COMPRESSION_INFO;
MI_COMPRESSION_INFO MiCompressionInfo; // LWFIX - temp remove.
PFN_NUMBER MiCompressionOverHeadInPages;
PKDPC MiCompressionDpcArray;
CCHAR MiCompressionProcessors;
VOID
MiCompressionDispatch (
IN PKDPC Dpc,
IN PVOID DeferredContext,
IN PVOID SystemArgument1,
IN PVOID SystemArgument2
);
PVOID
MiMapCompressionInHyperSpace (
IN PFN_NUMBER PageFrameIndex
);
VOID
MiUnmapCompressionInHyperSpace (
VOID
);
SIZE_T
MiMakeCompressibleMemoryAtDispatch (
IN SIZE_T NumberOfBytes OPTIONAL
);
NTSTATUS
MmRegisterCompressionDevice (
IN PMM_COMPRESSION_CONTEXT Context
)
/*++
Routine Description:
This routine notifies memory management that compression hardware exists
in the system. Memory management responds by initializing compression
support here.
Arguments:
Context - Supplies the compression context pointer.
Return Value:
NTSTATUS.
Environment:
Kernel mode, PASSIVE_LEVEL.
--*/
{
KIRQL OldIrql;
PFN_NUMBER OverHeadInPages;
CCHAR Processor;
CCHAR NumberProcessors;
PKDPC CompressionDpcArray;
ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
if (Context->Version != MM_COMPRESSION_VERSION_CURRENT) {
return STATUS_INVALID_PARAMETER_1;
}
if (Context->SizeInBytes < sizeof (MM_COMPRESSION_CONTEXT)) {
return STATUS_INVALID_PARAMETER_1;
}
//
// If the subsequent hot-add cannot succeed then fail this API now.
//
if (MmDynamicPfn == 0) {
return STATUS_NOT_SUPPORTED;
}
//
// Hardware that can't generate a configurable interrupt is not supported.
//
if (Context->SetCompressionThreshold == NULL) {
return STATUS_INVALID_PARAMETER_1;
}
//
// ReservedBytes indicates the number of reserved bytes required by the
// underlying hardware. For example, some hardware might have:
//
// 1. translation tables which are 1/64 of the fictional RAM total.
//
// 2. the first MB of memory is never compressed.
//
// 3. an L3 which is never compressed.
//
// etc.
//
// ReservedBytes would be the sum of all of these types of ranges.
//
OverHeadInPages = (PFN_COUNT)(Context->ReservedBytes / PAGE_SIZE);
if (MmResidentAvailablePages < (SPFN_NUMBER) OverHeadInPages) {
return STATUS_INSUFFICIENT_RESOURCES;
}
if (MmAvailablePages < OverHeadInPages) {
MmEmptyAllWorkingSets ();
if (MmAvailablePages < OverHeadInPages) {
return STATUS_INSUFFICIENT_RESOURCES;
}
}
//
// Create a DPC for every processor in the system as servicing the
// compression interrupt is critical.
//
NumberProcessors = KeNumberProcessors;
CompressionDpcArray = ExAllocatePoolWithTag (NonPagedPool,
NumberProcessors * sizeof (KDPC),
'pDmM');
if (CompressionDpcArray == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
}
for (Processor = 0; Processor < NumberProcessors; Processor += 1) {
KeInitializeDpc (CompressionDpcArray + Processor, MiCompressionDispatch, NULL);
//
// Set importance so this DPC always gets queued at the head.
//
KeSetImportanceDpc (CompressionDpcArray + Processor, HighImportance);
KeSetTargetProcessorDpc (CompressionDpcArray + Processor, Processor);
}
LOCK_PFN (OldIrql);
if (MmCompressionThresholdRatio == 0) {
MmCompressionThresholdRatio = MI_DEFAULT_COMPRESSION_THRESHOLD;
}
else if (MmCompressionThresholdRatio > 100) {
MmCompressionThresholdRatio = 100;
}
if ((MmResidentAvailablePages < (SPFN_NUMBER) OverHeadInPages) ||
(MmAvailablePages < OverHeadInPages)) {
UNLOCK_PFN (OldIrql);
ExFreePool (CompressionDpcArray);
return STATUS_INSUFFICIENT_RESOURCES;
}
MI_DECREMENT_RESIDENT_AVAILABLE (OverHeadInPages,
MM_RESAVAIL_ALLOCATE_COMPRESSION);
MmAvailablePages -= (PFN_COUNT) OverHeadInPages;
//
// Signal applications if allocating these pages caused a threshold cross.
//
MiNotifyMemoryEvents ();
//
// Snap our own copy to prevent busted drivers from causing overcommits
// if they deregister improperly.
//
MiCompressionOverHeadInPages += OverHeadInPages;
ASSERT (MiNumberOfCompressionPages == 0);
ASSERT (MiSetCompressionThreshold == NULL);
MiSetCompressionThreshold = Context->SetCompressionThreshold;
if (MiCompressionDpcArray == NULL) {
MiCompressionDpcArray = CompressionDpcArray;
CompressionDpcArray = NULL;
MiCompressionProcessors = NumberProcessors;
}
UNLOCK_PFN (OldIrql);
if (CompressionDpcArray != NULL) {
ExFreePool (CompressionDpcArray);
}
return STATUS_SUCCESS;
}
NTSTATUS
MiArmCompressionInterrupt (
VOID
)
/*++
Routine Description:
This routine arms the hardware-generated compression interrupt.
Arguments:
None.
Return Value:
NTSTATUS.
Environment:
Kernel mode, PFN lock held.
--*/
{
NTSTATUS Status;
PFN_NUMBER RealPages;
ULONGLONG ByteThreshold;
MM_PFN_LOCK_ASSERT();
if (MiSetCompressionThreshold == NULL) {
return STATUS_SUCCESS;
}
RealPages = MmNumberOfPhysicalPages - MiNumberOfCompressionPages - MiCompressionOverHeadInPages;
ByteThreshold = (RealPages * MmCompressionThresholdRatio) / 100;
ByteThreshold *= PAGE_SIZE;
//
// Note this callout is made with the PFN lock held !
//
Status = (*MiSetCompressionThreshold) (ByteThreshold);
if (!NT_SUCCESS (Status)) {
//
// If the hardware fails, all is lost.
//
KeBugCheckEx (MEMORY_MANAGEMENT,
0x61941,
MmNumberOfPhysicalPages,
RealPages,
MmCompressionThresholdRatio);
}
return Status;
}
NTSTATUS
MmDeregisterCompressionDevice (
IN PMM_COMPRESSION_CONTEXT Context
)
/*++
Routine Description:
This routine notifies memory management that compression hardware is
being removed. Note the compression driver must have already SUCCESSFULLY
called MmRemovePhysicalMemoryEx.
Arguments:
Context - Supplies the compression context pointer.
Return Value:
STATUS_SUCCESS if compression support is initialized properly.
Environment:
Kernel mode, PASSIVE_LEVEL.
--*/
{
KIRQL OldIrql;
PFN_COUNT OverHeadInPages;
ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
OverHeadInPages = (PFN_COUNT)(Context->ReservedBytes / PAGE_SIZE);
LOCK_PFN (OldIrql);
if (OverHeadInPages > MiCompressionOverHeadInPages) {
UNLOCK_PFN (OldIrql);
return STATUS_INVALID_PARAMETER;
}
MmAvailablePages += OverHeadInPages;
//
// Signal applications if allocating these pages caused a threshold cross.
//
MiNotifyMemoryEvents ();
ASSERT (MiCompressionOverHeadInPages == OverHeadInPages);
MiCompressionOverHeadInPages -= OverHeadInPages;
MiSetCompressionThreshold = NULL;
UNLOCK_PFN (OldIrql);
MI_INCREMENT_RESIDENT_AVAILABLE (OverHeadInPages,
MM_RESAVAIL_FREE_COMPRESSION);
return STATUS_SUCCESS;
}
VOID
MiCompressionDispatch (
IN PKDPC Dpc,
IN PVOID DeferredContext,
IN PVOID SystemArgument1,
IN PVOID SystemArgument2
)
/*++
Routine Description:
Called to make memory compressible if the PFN lock could not be
acquired during the original device interrupt.
Arguments:
Dpc - Supplies a pointer to a control object of type DPC.
SystemArgument1 - Supplies the number of bytes to make compressible.
Return Value:
None.
Environment:
Kernel mode. DISPATCH_LEVEL.
--*/
{
SIZE_T NumberOfBytes;
UNREFERENCED_PARAMETER (Dpc);
UNREFERENCED_PARAMETER (DeferredContext);
UNREFERENCED_PARAMETER (SystemArgument2);
NumberOfBytes = (SIZE_T) SystemArgument1;
MiCompressionInfo.DpcsFired += 1;
MiMakeCompressibleMemoryAtDispatch (NumberOfBytes);
}
SIZE_T
MmMakeCompressibleMemory (
IN SIZE_T NumberOfBytes OPTIONAL
)
/*++
Routine Description:
This routine attempts to move pages from transition to zero so that
hardware compression can reclaim the physical memory.
Arguments:
NumberOfBytes - Supplies the number of bytes to make compressible.
Zero indicates as much as possible.
Return Value:
Returns the number of bytes made compressible.
Environment:
Kernel mode. Any IRQL as this is called from device interrupt service
routines.
--*/
{
KIRQL OldIrql;
BOOLEAN Queued;
#if !defined(NT_UP)
PFN_NUMBER PageFrameIndex;
MMLISTS MemoryList;
PMMPFNLIST ListHead;
PMMPFN Pfn1;
CCHAR Processor;
PFN_NUMBER Total;
PVOID ZeroBase;
PKPRCB Prcb;
PFN_NUMBER RequestedPages;
PFN_NUMBER ActualPages;
PKSPIN_LOCK_QUEUE LockQueuePfn;
#endif
//
// LWFIX: interlocked add in the request size above so overlapping
// requests can be processed.
//
OldIrql = KeGetCurrentIrql();
if (OldIrql <= DISPATCH_LEVEL) {
return MiMakeCompressibleMemoryAtDispatch (NumberOfBytes);
}
#if defined(NT_UP)
//
// In uniprocessor configurations, there is no indication as to the PFN lock
// is owned because the uniprocessor kernel macros these into merely IRQL
// raises. Therefore this routine must be conservative when called above
// DISPATCH_LEVEL and assume the lock is owned and just always queue
// a DPC in these cases.
//
Queued = KeInsertQueueDpc (MiCompressionDpcArray,
(PVOID) NumberOfBytes,
NULL);
if (Queued == TRUE) {
MiCompressionInfo.PfnForcedDpcInsert += 1;
}
else {
MiCompressionInfo.PfnFailedDpcInsert += 1;
}
return 0;
#else
#if DBG
//
// Make sure this interrupt always comes in at the same device IRQL.
//
ASSERT ((MiCompressionIrql == 0) || (OldIrql == MiCompressionIrql));
MiCompressionIrql = OldIrql;
#endif
Prcb = KeGetCurrentPrcb();
RequestedPages = NumberOfBytes >> PAGE_SHIFT;
ActualPages = 0;
MemoryList = FreePageList;
ListHead = MmPageLocationList[MemoryList];
LockQueuePfn = &Prcb->LockQueue[LockQueuePfnLock];
if (KeTryToAcquireQueuedSpinLockAtRaisedIrql (LockQueuePfn) == FALSE) {
//
// Unable to acquire the spinlock, queue a DPC to pick it up instead.
//
for (Processor = 0; Processor < MiCompressionProcessors; Processor += 1) {
Queued = KeInsertQueueDpc (MiCompressionDpcArray + Processor,
(PVOID) NumberOfBytes,
NULL);
if (Queued == TRUE) {
MiCompressionInfo.PfnForcedDpcInsert += 1;
}
else {
MiCompressionInfo.PfnFailedDpcInsert += 1;
}
}
return 0;
}
MiCompressionInfo.IsrRan += 1;
//
// Run the free and transition list and zero the pages.
//
while (MemoryList <= StandbyPageList) {
Total = ListHead->Total;
PageFrameIndex = ListHead->Flink;
while (Total != 0) {
//
// Transition pages may need restoration which requires a
// hyperspace mapping plus control area deletion actions all of
// which occur at DISPATCH_LEVEL. So if we're at device IRQL,
// only do the minimum and queue the rest.
//
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
if ((Pfn1->u3.e1.InPageError == 1) &&
(Pfn1->u3.e1.ReadInProgress == 1)) {
//
// This page is already zeroed so skip it.
//
MiCompressionInfo.IsrSkippedZeroedPage += 1;
}
else {
//
// Zero the page directly now instead of waiting for the low
// priority zeropage thread to get a slice. Note that the
// slower mapping and zeroing routines are used here because
// the faster ones are for the zeropage thread only.
// Maybe we should change this someday.
//
ZeroBase = MiMapCompressionInHyperSpace (PageFrameIndex);
KeZeroPages (ZeroBase, PAGE_SIZE);
MiUnmapCompressionInHyperSpace ();
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
//
// Overload ReadInProgress to signify that collided faults that
// occur before the PTE is completely restored will know to
// delay and retry until the page (and PTE) are updated.
//
Pfn1->u3.e1.InPageError = 1;
ASSERT (Pfn1->u3.e1.ReadInProgress == 0);
Pfn1->u3.e1.ReadInProgress = 1;
ActualPages += 1;
if (ActualPages == RequestedPages) {
MemoryList = StandbyPageList;
ListHead = MmPageLocationList[MemoryList];
break;
}
}
Total -= 1;
PageFrameIndex = Pfn1->u1.Flink;
}
MemoryList += 1;
ListHead += 1;
}
if (ActualPages != 0) {
//
// Rearm the interrupt as pages have now been zeroed.
//
MiArmCompressionInterrupt ();
}
KeReleaseQueuedSpinLockFromDpcLevel (LockQueuePfn);
if (ActualPages != 0) {
//
// Pages were zeroed - queue a DPC to the current processor to
// move them to the zero list. Note this is not critical path so
// don't bother sending a DPC to every processor for this case.
//
MiCompressionInfo.IsrPageProcessed += (ULONG)ActualPages;
Processor = (CCHAR) KeGetCurrentProcessorNumber ();
//
// Ensure a hot-added processor scenario just works.
//
if (Processor >= MiCompressionProcessors) {
Processor = MiCompressionProcessors;
}
Queued = KeInsertQueueDpc (MiCompressionDpcArray + Processor,
(PVOID) NumberOfBytes,
NULL);
if (Queued == TRUE) {
MiCompressionInfo.IsrForcedDpc += 1;
}
else {
MiCompressionInfo.IsrFailedDpc += 1;
}
}
return (ActualPages << PAGE_SHIFT);
#endif
}
SIZE_T
MiMakeCompressibleMemoryAtDispatch (
IN SIZE_T NumberOfBytes OPTIONAL
)
/*++
Routine Description:
This routine attempts to move pages from transition to zero so that
hardware compression can reclaim the physical memory.
Arguments:
NumberOfBytes - Supplies the number of bytes to make compressible.
Zero indicates as much as possible.
Return Value:
Returns the number of bytes made compressible.
Environment:
Kernel mode. DISPATCH_LEVEL.
--*/
{
KIRQL OldIrql;
PFN_NUMBER PageFrameIndex;
PFN_NUMBER PageFrameIndex2;
PVOID ZeroBase;
PMMPFN Pfn1;
MMLISTS MemoryList;
PMMPFNLIST ListHead;
PFN_NUMBER RequestedPages;
PFN_NUMBER ActualPages;
LOGICAL NeedToZero;
ASSERT (KeGetCurrentIrql () == DISPATCH_LEVEL);
RequestedPages = NumberOfBytes >> PAGE_SHIFT;
ActualPages = 0;
MemoryList = FreePageList;
ListHead = MmPageLocationList[MemoryList];
MiCompressionInfo.DpcRan += 1;
LOCK_PFN2 (OldIrql);
//
// Run the free and transition list and zero the pages.
//
while (MemoryList <= StandbyPageList) {
while (ListHead->Total != 0) {
//
// Before removing the page from the head of the list (which will
// zero the flag bits), snap whether it's been zeroed by our ISR
// or whether we need to zero it here.
//
PageFrameIndex = ListHead->Flink;
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
NeedToZero = TRUE;
if ((Pfn1->u3.e1.InPageError == 1) && (Pfn1->u3.e1.ReadInProgress == 1)) {
MiCompressionInfo.DpcSkippedZeroedPage += 1;
NeedToZero = FALSE;
}
//
// Transition pages may need restoration which requires a
// hyperspace mapping plus control area deletion actions all of
// which occur at DISPATCH_LEVEL. Since we're at DISPATCH_LEVEL
// now, go ahead and do it.
//
PageFrameIndex2 = MiRemovePageFromList (ListHead);
ASSERT (PageFrameIndex == PageFrameIndex2);
//
// Zero the page directly now instead of waiting for the low
// priority zeropage thread to get a slice. Note that the
// slower mapping and zeroing routines are used here because
// the faster ones are for the zeropage thread only.
// Maybe we should change this someday.
//
if (NeedToZero == TRUE) {
ZeroBase = MiMapCompressionInHyperSpace (PageFrameIndex);
KeZeroPages (ZeroBase, PAGE_SIZE);
MiUnmapCompressionInHyperSpace ();
}
ASSERT (Pfn1->u2.ShareCount == 0);
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
MiInsertPageInList (&MmZeroedPageListHead, PageFrameIndex);
//
// We have changed (zeroed) the contents of this page.
// If memory mirroring is in progress, the bitmap must be updated.
//
if (MiMirroringActive == TRUE) {
RtlSetBit (MiMirrorBitMap2, (ULONG)PageFrameIndex);
}
MiCompressionInfo.DpcPageProcessed += 1;
ActualPages += 1;
if (ActualPages == RequestedPages) {
MemoryList = StandbyPageList;
ListHead = MmPageLocationList[MemoryList];
break;
}
}
MemoryList += 1;
ListHead += 1;
}
//
// Rearm the interrupt as pages have now been zeroed.
//
MiArmCompressionInterrupt ();
UNLOCK_PFN2 (OldIrql);
return (ActualPages << PAGE_SHIFT);
}
PVOID
MiMapCompressionInHyperSpace (
IN PFN_NUMBER PageFrameIndex
)
/*++
Routine Description:
This procedure maps the specified physical page into the
PTE within hyper space reserved explicitly for compression page
mapping.
The PTE is guaranteed to always be available since the PFN lock is held.
Arguments:
PageFrameIndex - Supplies the physical page number to map.
Return Value:
Returns the virtual address where the specified physical page was
mapped.
Environment:
Kernel mode, PFN lock held, any IRQL.
--*/
{
MMPTE TempPte;
PMMPTE PointerPte;
PVOID FlushVaPointer;
ASSERT (PageFrameIndex != 0);
TempPte = ValidPtePte;
TempPte.u.Hard.PageFrameNumber = PageFrameIndex;
FlushVaPointer = (PVOID) (ULONG_PTR) COMPRESSION_MAPPING_PTE;
//
// Ensure both modified and accessed bits are set so the hardware doesn't
// ever write this PTE.
//
ASSERT (TempPte.u.Hard.Dirty == 1);
ASSERT (TempPte.u.Hard.Accessed == 1);
PointerPte = MiGetPteAddress (COMPRESSION_MAPPING_PTE);
ASSERT (PointerPte->u.Long == 0);
//
// Only flush the TB on the current processor as no context switch can
// occur while using this mapping.
//
MI_WRITE_VALID_PTE (PointerPte, TempPte);
KeFlushSingleTb (FlushVaPointer, FALSE);
return (PVOID) MiGetVirtualAddressMappedByPte (PointerPte);
}
__forceinline
VOID
MiUnmapCompressionInHyperSpace (
VOID
)
/*++
Routine Description:
This procedure unmaps the PTE reserved for mapping the compression page.
Arguments:
None.
Return Value:
None.
Environment:
Kernel mode, PFN lock held, any IRQL.
--*/
{
PMMPTE PointerPte;
PointerPte = MiGetPteAddress (COMPRESSION_MAPPING_PTE);
//
// Capture the number of waiters.
//
ASSERT (PointerPte->u.Long != 0);
MI_WRITE_INVALID_PTE (PointerPte, ZeroPte);
return;
}
#else
NTSTATUS
MmRegisterCompressionDevice (
IN PMM_COMPRESSION_CONTEXT Context
)
{
UNREFERENCED_PARAMETER (Context);
return STATUS_NOT_SUPPORTED;
}
NTSTATUS
MmDeregisterCompressionDevice (
IN PMM_COMPRESSION_CONTEXT Context
)
{
UNREFERENCED_PARAMETER (Context);
return STATUS_NOT_SUPPORTED;
}
SIZE_T
MmMakeCompressibleMemory (
IN SIZE_T NumberOfBytes OPTIONAL
)
{
UNREFERENCED_PARAMETER (NumberOfBytes);
return 0;
}
NTSTATUS
MiArmCompressionInterrupt (
VOID
)
{
return STATUS_NOT_SUPPORTED;
}
#endif
#endif