Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

648 lines
17 KiB

/*++
Copyright (c) 2000 Microsoft Corporation
Module Name:
sratnuma.c
Abstract:
This module contain functions which support static NUMA configurations
as provided by the ACPI SRAT "Static Resource Affinity Table".
Author:
Peter L Johnston (peterj) 2-Jul-2000
Environment:
Kernel mode only.
Revision History:
--*/
#include "halp.h"
#include "acpitabl.h"
#include "xxacpi.h"
#if !defined(NT_UP)
#define ROUNDUP_TO_NEXT(base, size) \
((((ULONG_PTR)(base)) + (size)) & ~((size) - 1))
//
// The following routine is external but only used by NUMA support
// at the moment.
//
NTSTATUS
HalpGetApicIdByProcessorNumber(
IN UCHAR Processor,
IN OUT USHORT *ApicId
);
//
// Prototypes for alloc pragmas.
//
VOID
HalpNumaInitializeStaticConfiguration(
IN PLOADER_PARAMETER_BLOCK
);
#if defined(ALLOC_PRAGMA)
#pragma alloc_text(INIT,HalpNumaInitializeStaticConfiguration)
#endif
#define NEXT_ENTRY(base) (((PUCHAR)base) + (base)->Length)
#if defined(_WIN64)
#define HAL_MAX_PROCESSORS 64
#else
#define HAL_MAX_PROCESSORS 32
#endif
typedef struct _STATIC_NUMA_CONFIG {
USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
UCHAR ProximityId[MAXIMUM_CCNUMA_NODES];
UCHAR NodeCount;
UCHAR ProcessorCount;
} HALPSRAT_STATIC_NUMA_CONFIG, *PHALPSRAT_STATIC_NUMA_CONFIG;
PHALPSRAT_STATIC_NUMA_CONFIG HalpNumaConfig;
PACPI_SRAT HalpAcpiSrat;
PULONG_PTR HalpNumaMemoryRanges;
PUCHAR HalpNumaMemoryNode;
ULONG HalpNumaLastRangeIndex;
ULONG
HalpNumaQueryPageToNode(
IN ULONG_PTR PhysicalPageNumber
)
/*++
Routine Description:
Search the memory range descriptors to determine the node
this page exists on.
Arguments:
PhysicalPageNumber Provides the page number.
Return Value:
Returns the node number for the page.
--*/
{
ULONG Index = HalpNumaLastRangeIndex;
//
// Starting in the same range as the last page returned,
// look for this page.
//
if (PhysicalPageNumber >= HalpNumaMemoryRanges[Index]) {
//
// Search upwards.
//
while (PhysicalPageNumber >= HalpNumaMemoryRanges[Index+1]) {
Index++;
}
} else {
//
// Search downwards.
//
do {
Index--;
} while (PhysicalPageNumber < HalpNumaMemoryRanges[Index]);
}
HalpNumaLastRangeIndex = Index;
return HalpNumaMemoryNode[Index];
}
NTSTATUS
HalpNumaQueryProcessorNode(
IN ULONG ProcessorNumber,
OUT PUSHORT Identifier,
OUT PUCHAR Node
)
{
NTSTATUS Status;
USHORT ApicId;
UCHAR Proximity;
UCHAR i, j;
//
// Get the APIC Id for this processor.
//
Status = HalpGetApicIdByProcessorNumber((UCHAR)ProcessorNumber, &ApicId);
if (!NT_SUCCESS(Status)) {
return Status;
}
//
// Return the APIC Id as the Identifier. This should probably
// be the ACPI Id but we don't have a way to get that yet.
//
*Identifier = ApicId;
//
// Find the node this processor belongs to. The node is the
// index into the array of Proximity Ids for the entry corresponding
// to the Proximity Id of this processor.
//
for (i = 0; i < HalpNumaConfig->ProcessorCount; i++) {
if (HalpNumaConfig->ProcessorApicId[i] == ApicId) {
Proximity = HalpNumaConfig->ProcessorProximity[i];
for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
if (HalpNumaConfig->ProximityId[j] == Proximity) {
*Node = j;
return STATUS_SUCCESS;
}
}
}
}
//
// Didn't find this processor in the known set of APIC IDs, this
// would indicate a mismatch between the BIOS MP tables and the
// SRAT, or, didn't find the proximity for this processor in the
// table of proximity IDs. This would be an internal error as
// this array is build from the set of proximity IDs in the SRAT.
//
return STATUS_NOT_FOUND;
}
VOID
HalpNumaInitializeStaticConfiguration(
IN PLOADER_PARAMETER_BLOCK LoaderBlock
)
/*++
Routine Description:
This routine reads the ACPI Static Resource Affinity Table to build
a picture of the system's NUMA configuration. This information is
saved in the HalpNumaConfig structure in a form which is optimal for
the OS's use.
Arguments:
LoaderBlock supplies a pointer to the system loader parameter block.
Return Value:
None.
--*/
{
ULONG MemoryDescriptorCount;
UCHAR ProcessorCount;
PACPI_SRAT_ENTRY SratEntry;
PACPI_SRAT_ENTRY SratEnd;
ULONG i, j;
BOOLEAN Swapped;
PHYSICAL_ADDRESS Base;
ULONG_PTR p;
ULONG_PTR Phys;
HalpAcpiSrat = HalpGetAcpiTablePhase0(LoaderBlock, ACPI_SRAT_SIGNATURE);
if (HalpAcpiSrat == NULL) {
return;
}
//
// The Static Resource Affinity Table (SRAT) exists.
//
// Scan it to determine the number of memory descriptors then
// allocate memory to contain the tables needed to hold the
// system's NUMA configuration.
//
MemoryDescriptorCount = 0;
ProcessorCount = 0;
SratEnd = (PACPI_SRAT_ENTRY)(((PUCHAR)HalpAcpiSrat) +
HalpAcpiSrat->Header.Length);
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
SratEntry < SratEnd;
SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
switch (SratEntry->Type) {
case SratMemory:
if (SratEntry->MemoryAffinity.Flags.Enabled == 1) {
MemoryDescriptorCount++;
}
break;
case SratProcessorLocalAPIC:
if (SratEntry->ApicAffinity.Flags.Enabled == 1) {
ProcessorCount++;
}
break;
}
}
if ((MemoryDescriptorCount == 0) || (ProcessorCount == 0)) {
//
// Can't handle the case where there is either no memory or no
// processors in the table. Turn this into a non-numa
// machine.
//
HalpAcpiSrat = NULL;
return;
}
//
// HalpNumaConfig format:
//
// HalpNumaConfig->
// USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
// UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
// UCHAR ProximityIds[MAXIMUM_CCNUMA_NODES];
// UCHAR NodeCount;
// -pad- to 128 byte boundary
// HalpNumaMemoryNode->
// UCHAR MemoryRangeProximityId[NumberOfMemoryRanges];
// -pad to ULONG_PTR alignment-
// HalpNumaMemoryRanges->
// ULONG_PTR MemoryRangeBasePage[NumberOfMemoryRanges];
//
// This format has been selected to maximize cache hits while
// searching the ranges. Specifically, the size of the ranges
// array is kept to a minumum.
//
// NOTE: This code does not account for the length of the memory
// ranges specified in the SRAT table. Instead it treats each
// memory range as extending to the next specified memory range.
// The rationale is that the code shouldn't be asked about pages
// not found in the SRAT table and if we are to return something
// for these pages it might as well be the NUMA node associated
// with the pages from the previous range.
//
//
// Calculate number of pages required to hold the needed structures.
//
i = MemoryDescriptorCount * (sizeof(ULONG_PTR) + sizeof(UCHAR)) +
sizeof(HALPSRAT_STATIC_NUMA_CONFIG) + 2 * sizeof(ULONG_PTR) +
128 + sizeof(ULONG_PTR);
i += PAGE_SIZE - 1;
i >>= PAGE_SHIFT;
Phys = (ULONG_PTR)HalpAllocPhysicalMemory(LoaderBlock,
MAXIMUM_PHYSICAL_ADDRESS,
i,
FALSE);
if (Phys == 0) {
//
// Allocation failed, the system will not be able to run
// as a NUMA system,.... actually the system will probably
// not run far at all.
//
DbgPrint("HAL NUMA Initialization failed, could not allocate %d pages\n",
i);
HalpAcpiSrat = NULL;
return;
}
Base.QuadPart = (ULONG_PTR)Phys;
#if !defined(_IA64_)
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1);
#else
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1, MmCached);
#endif
if (HalpNumaConfig == NULL) {
//
// Couldn't map the allocation, give up.
//
HalpAcpiSrat = NULL;
return;
}
RtlZeroMemory(HalpNumaConfig, i * PAGE_SIZE);
//
// MemoryRangeProximity is an array of UCHARs starting at the next
// 128 byte boundary.
//
p = ROUNDUP_TO_NEXT((HalpNumaConfig + 1), 128);
HalpNumaMemoryNode = (PUCHAR)p;
//
// NumaMemoryRanges is an array of ULONG_PTRs starting at the next
// ULONG_PTR boundary.
//
p += (MemoryDescriptorCount + sizeof(ULONG_PTR)) & ~(sizeof(ULONG_PTR) - 1);
HalpNumaMemoryRanges = (PULONG_PTR)p;
//
// Rescan the SRAT entries filling in the HalpNumaConfig structure.
//
ProcessorCount = 0;
MemoryDescriptorCount = 0;
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
SratEntry < SratEnd;
SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
//
// Does this entry belong to a proximity domain not previously
// seen? If so, we have a new node.
//
for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
if (SratEntry->ProximityDomain == HalpNumaConfig->ProximityId[i]) {
break;
}
}
if (i == HalpNumaConfig->NodeCount) {
//
// This is an ID we haven't seen before. New Node.
//
if (HalpNumaConfig->NodeCount >= MAXIMUM_CCNUMA_NODES) {
//
// We support a limited number of nodes, make this machine
// not NUMA. (Yes, we should free the config space
// we allocated,... but this is an error when it happens
// so I'm not worrying about it. peterj).
//
HalpAcpiSrat = NULL;
return;
}
HalpNumaConfig->ProximityId[i] = SratEntry->ProximityDomain;
HalpNumaConfig->NodeCount++;
}
switch (SratEntry->Type) {
case SratProcessorLocalAPIC:
if (SratEntry->ApicAffinity.Flags.Enabled == 0) {
//
// This processor is not enabled, skip it.
//
continue;
}
if (ProcessorCount == HAL_MAX_PROCESSORS) {
//
// Can't handle any more processors. Turn this
// into a non-numa machine.
//
HalpAcpiSrat = NULL;
return;
}
HalpNumaConfig->ProcessorApicId[ProcessorCount] =
#if defined(_IA64_)
SratEntry->ApicAffinity.ApicId << 8 |
(SratEntry->ApicAffinity.SApicEid);
#else
SratEntry->ApicAffinity.ApicId;
#endif
HalpNumaConfig->ProcessorProximity[ProcessorCount] =
SratEntry->ProximityDomain;
ProcessorCount++;
break;
case SratMemory:
if (SratEntry->MemoryAffinity.Flags.Enabled == 0) {
//
// This memory is not enabled, skip it.
//
continue;
}
//
// Save the proximity and the base page for this range.
//
HalpNumaMemoryNode[MemoryDescriptorCount] =
SratEntry->ProximityDomain;
Base = SratEntry->MemoryAffinity.Base;
Base.QuadPart >>= PAGE_SHIFT;
#if !defined(_WIN64)
ASSERT(Base.u.HighPart == 0);
#endif
HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) Base.QuadPart;
//
// Explicitly ignore the entry's MemoryAffinity.Length as
// the code treats anything up to the next greatest range
// as associated with this entry.
//
MemoryDescriptorCount++;
break;
}
}
HalpNumaConfig->ProcessorCount = ProcessorCount;
//
// Make sure processor 0 is always in 'logical' node 0. This
// is achieved by making sure the proximity Id for the first
// processor is always the first proximity Id in the table.
//
i = 0;
if (!NT_SUCCESS(HalpGetApicIdByProcessorNumber(0, (PUSHORT)&i))) {
//
// Couldn't find the ApicId of processor 0? Not quite
// sure what to do, I suspect the MP table's APIC IDs
// don't match the SRAT's.
//
DbgPrint("HAL No APIC ID for boot processor.\n");
}
for (j = 0; j < ProcessorCount; j++) {
if (HalpNumaConfig->ProcessorApicId[j] == (USHORT)i) {
UCHAR Proximity = HalpNumaConfig->ProcessorProximity[j];
for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
if (HalpNumaConfig->ProximityId[i] == Proximity) {
HalpNumaConfig->ProximityId[i] =
HalpNumaConfig->ProximityId[0];
HalpNumaConfig->ProximityId[0] = Proximity;
break;
}
}
break;
}
}
//
// Sort the memory ranges. There shouldn't be very many
// so a bubble sort should suffice.
//
j = MemoryDescriptorCount - 1;
do {
Swapped = FALSE;
for (i = 0; i < j; i++) {
ULONG_PTR t;
UCHAR td;
t = HalpNumaMemoryRanges[i];
if (t > HalpNumaMemoryRanges[i+1]) {
Swapped = TRUE;
HalpNumaMemoryRanges[i] = HalpNumaMemoryRanges[i+1];
HalpNumaMemoryRanges[i+1] = t;
//
// Keep the proximity domain in sync with the base.
//
td = HalpNumaMemoryNode[i];
HalpNumaMemoryNode[i] = HalpNumaMemoryNode[i+1];
HalpNumaMemoryNode[i+1] = td;
}
}
//
// The highest value is now at the top so cut it from the sort.
//
j--;
} while (Swapped == TRUE);
//
// When searching the memory descriptors to find out which domain
// a page is in, we don't care about gaps, we'll never be asked
// for a page in a gap, so, if two descriptors refer to the same
// domain, merge them in place.
//
j = 0;
for (i = 1; i < MemoryDescriptorCount; i++) {
if (HalpNumaMemoryNode[j] !=
HalpNumaMemoryNode[i]) {
j++;
HalpNumaMemoryNode[j] = HalpNumaMemoryNode[i];
HalpNumaMemoryRanges[j] = HalpNumaMemoryRanges[i];
continue;
}
}
MemoryDescriptorCount = j + 1;
//
// Terminate the table with ~0 which won't actually correspond to
// any domain but will always be higher than any valid value.
//
HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) ~0I64;
//
// And the base of the lowest range should be 0 even if there
// are no pages that low.
//
HalpNumaMemoryRanges[0] = 0;
//
// Convert the proximity IDs in the memory node array to
// node number. Node number is the index of the matching
// entry in proximity ID array.
//
for (i= 0; i < MemoryDescriptorCount; i++) {
for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
if (HalpNumaMemoryNode[i] == HalpNumaConfig->ProximityId[j]) {
HalpNumaMemoryNode[i] = (UCHAR)j;
break;
}
}
}
}
#endif
NTSTATUS
HalpGetAcpiStaticNumaTopology(
HAL_NUMA_TOPOLOGY_INTERFACE * NumaInfo
)
{
#if !defined(NT_UP)
//
// This routine is never called unless this ACPI HAL found
// a Static Resource Affinity Table (SRAT). But just in case ...
//
if (HalpAcpiSrat == NULL) {
return STATUS_INVALID_LEVEL;
}
//
// Fill in the data structure for the kernel.
//
NumaInfo->NumberOfNodes = HalpNumaConfig->NodeCount;
NumaInfo->QueryProcessorNode = HalpNumaQueryProcessorNode;
NumaInfo->PageToNode = HalpNumaQueryPageToNode;
return STATUS_SUCCESS;
#else
return STATUS_INVALID_LEVEL;
#endif
}