You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
648 lines
17 KiB
648 lines
17 KiB
/*++
|
|
|
|
Copyright (c) 2000 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
sratnuma.c
|
|
|
|
Abstract:
|
|
|
|
This module contain functions which support static NUMA configurations
|
|
as provided by the ACPI SRAT "Static Resource Affinity Table".
|
|
|
|
Author:
|
|
|
|
Peter L Johnston (peterj) 2-Jul-2000
|
|
|
|
Environment:
|
|
|
|
Kernel mode only.
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include "halp.h"
|
|
#include "acpitabl.h"
|
|
#include "xxacpi.h"
|
|
|
|
#if !defined(NT_UP)
|
|
|
|
#define ROUNDUP_TO_NEXT(base, size) \
|
|
((((ULONG_PTR)(base)) + (size)) & ~((size) - 1))
|
|
|
|
//
|
|
// The following routine is external but only used by NUMA support
|
|
// at the moment.
|
|
//
|
|
|
|
NTSTATUS
|
|
HalpGetApicIdByProcessorNumber(
|
|
IN UCHAR Processor,
|
|
IN OUT USHORT *ApicId
|
|
);
|
|
|
|
//
|
|
// Prototypes for alloc pragmas.
|
|
//
|
|
|
|
VOID
|
|
HalpNumaInitializeStaticConfiguration(
|
|
IN PLOADER_PARAMETER_BLOCK
|
|
);
|
|
|
|
|
|
#if defined(ALLOC_PRAGMA)
|
|
#pragma alloc_text(INIT,HalpNumaInitializeStaticConfiguration)
|
|
#endif
|
|
|
|
#define NEXT_ENTRY(base) (((PUCHAR)base) + (base)->Length)
|
|
|
|
#if defined(_WIN64)
|
|
|
|
#define HAL_MAX_PROCESSORS 64
|
|
|
|
#else
|
|
|
|
#define HAL_MAX_PROCESSORS 32
|
|
|
|
#endif
|
|
|
|
typedef struct _STATIC_NUMA_CONFIG {
|
|
USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
|
|
UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
|
|
UCHAR ProximityId[MAXIMUM_CCNUMA_NODES];
|
|
UCHAR NodeCount;
|
|
UCHAR ProcessorCount;
|
|
} HALPSRAT_STATIC_NUMA_CONFIG, *PHALPSRAT_STATIC_NUMA_CONFIG;
|
|
|
|
PHALPSRAT_STATIC_NUMA_CONFIG HalpNumaConfig;
|
|
PACPI_SRAT HalpAcpiSrat;
|
|
PULONG_PTR HalpNumaMemoryRanges;
|
|
PUCHAR HalpNumaMemoryNode;
|
|
ULONG HalpNumaLastRangeIndex;
|
|
|
|
ULONG
|
|
HalpNumaQueryPageToNode(
|
|
IN ULONG_PTR PhysicalPageNumber
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Search the memory range descriptors to determine the node
|
|
this page exists on.
|
|
|
|
Arguments:
|
|
|
|
PhysicalPageNumber Provides the page number.
|
|
|
|
Return Value:
|
|
|
|
Returns the node number for the page.
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG Index = HalpNumaLastRangeIndex;
|
|
|
|
//
|
|
// Starting in the same range as the last page returned,
|
|
// look for this page.
|
|
//
|
|
|
|
if (PhysicalPageNumber >= HalpNumaMemoryRanges[Index]) {
|
|
|
|
//
|
|
// Search upwards.
|
|
//
|
|
|
|
while (PhysicalPageNumber >= HalpNumaMemoryRanges[Index+1]) {
|
|
Index++;
|
|
}
|
|
|
|
} else {
|
|
|
|
//
|
|
// Search downwards.
|
|
//
|
|
|
|
do {
|
|
Index--;
|
|
} while (PhysicalPageNumber < HalpNumaMemoryRanges[Index]);
|
|
}
|
|
|
|
HalpNumaLastRangeIndex = Index;
|
|
return HalpNumaMemoryNode[Index];
|
|
}
|
|
|
|
NTSTATUS
|
|
HalpNumaQueryProcessorNode(
|
|
IN ULONG ProcessorNumber,
|
|
OUT PUSHORT Identifier,
|
|
OUT PUCHAR Node
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
USHORT ApicId;
|
|
UCHAR Proximity;
|
|
UCHAR i, j;
|
|
|
|
//
|
|
// Get the APIC Id for this processor.
|
|
//
|
|
|
|
Status = HalpGetApicIdByProcessorNumber((UCHAR)ProcessorNumber, &ApicId);
|
|
if (!NT_SUCCESS(Status)) {
|
|
return Status;
|
|
}
|
|
|
|
//
|
|
// Return the APIC Id as the Identifier. This should probably
|
|
// be the ACPI Id but we don't have a way to get that yet.
|
|
//
|
|
|
|
*Identifier = ApicId;
|
|
|
|
//
|
|
// Find the node this processor belongs to. The node is the
|
|
// index into the array of Proximity Ids for the entry corresponding
|
|
// to the Proximity Id of this processor.
|
|
//
|
|
|
|
for (i = 0; i < HalpNumaConfig->ProcessorCount; i++) {
|
|
if (HalpNumaConfig->ProcessorApicId[i] == ApicId) {
|
|
Proximity = HalpNumaConfig->ProcessorProximity[i];
|
|
for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
|
|
if (HalpNumaConfig->ProximityId[j] == Proximity) {
|
|
*Node = j;
|
|
return STATUS_SUCCESS;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Didn't find this processor in the known set of APIC IDs, this
|
|
// would indicate a mismatch between the BIOS MP tables and the
|
|
// SRAT, or, didn't find the proximity for this processor in the
|
|
// table of proximity IDs. This would be an internal error as
|
|
// this array is build from the set of proximity IDs in the SRAT.
|
|
//
|
|
|
|
return STATUS_NOT_FOUND;
|
|
}
|
|
|
|
VOID
|
|
HalpNumaInitializeStaticConfiguration(
|
|
IN PLOADER_PARAMETER_BLOCK LoaderBlock
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine reads the ACPI Static Resource Affinity Table to build
|
|
a picture of the system's NUMA configuration. This information is
|
|
saved in the HalpNumaConfig structure in a form which is optimal for
|
|
the OS's use.
|
|
|
|
Arguments:
|
|
|
|
LoaderBlock supplies a pointer to the system loader parameter block.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG MemoryDescriptorCount;
|
|
UCHAR ProcessorCount;
|
|
PACPI_SRAT_ENTRY SratEntry;
|
|
PACPI_SRAT_ENTRY SratEnd;
|
|
ULONG i, j;
|
|
BOOLEAN Swapped;
|
|
PHYSICAL_ADDRESS Base;
|
|
ULONG_PTR p;
|
|
ULONG_PTR Phys;
|
|
|
|
HalpAcpiSrat = HalpGetAcpiTablePhase0(LoaderBlock, ACPI_SRAT_SIGNATURE);
|
|
if (HalpAcpiSrat == NULL) {
|
|
return;
|
|
}
|
|
|
|
//
|
|
// The Static Resource Affinity Table (SRAT) exists.
|
|
//
|
|
// Scan it to determine the number of memory descriptors then
|
|
// allocate memory to contain the tables needed to hold the
|
|
// system's NUMA configuration.
|
|
//
|
|
|
|
MemoryDescriptorCount = 0;
|
|
ProcessorCount = 0;
|
|
SratEnd = (PACPI_SRAT_ENTRY)(((PUCHAR)HalpAcpiSrat) +
|
|
HalpAcpiSrat->Header.Length);
|
|
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
|
|
SratEntry < SratEnd;
|
|
SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
|
|
switch (SratEntry->Type) {
|
|
case SratMemory:
|
|
if (SratEntry->MemoryAffinity.Flags.Enabled == 1) {
|
|
|
|
MemoryDescriptorCount++;
|
|
}
|
|
break;
|
|
case SratProcessorLocalAPIC:
|
|
|
|
if (SratEntry->ApicAffinity.Flags.Enabled == 1) {
|
|
|
|
ProcessorCount++;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ((MemoryDescriptorCount == 0) || (ProcessorCount == 0)) {
|
|
//
|
|
// Can't handle the case where there is either no memory or no
|
|
// processors in the table. Turn this into a non-numa
|
|
// machine.
|
|
//
|
|
|
|
HalpAcpiSrat = NULL;
|
|
return;
|
|
}
|
|
|
|
//
|
|
// HalpNumaConfig format:
|
|
//
|
|
// HalpNumaConfig->
|
|
// USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
|
|
// UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
|
|
// UCHAR ProximityIds[MAXIMUM_CCNUMA_NODES];
|
|
// UCHAR NodeCount;
|
|
// -pad- to 128 byte boundary
|
|
// HalpNumaMemoryNode->
|
|
// UCHAR MemoryRangeProximityId[NumberOfMemoryRanges];
|
|
// -pad to ULONG_PTR alignment-
|
|
// HalpNumaMemoryRanges->
|
|
// ULONG_PTR MemoryRangeBasePage[NumberOfMemoryRanges];
|
|
//
|
|
// This format has been selected to maximize cache hits while
|
|
// searching the ranges. Specifically, the size of the ranges
|
|
// array is kept to a minumum.
|
|
//
|
|
// NOTE: This code does not account for the length of the memory
|
|
// ranges specified in the SRAT table. Instead it treats each
|
|
// memory range as extending to the next specified memory range.
|
|
// The rationale is that the code shouldn't be asked about pages
|
|
// not found in the SRAT table and if we are to return something
|
|
// for these pages it might as well be the NUMA node associated
|
|
// with the pages from the previous range.
|
|
//
|
|
|
|
//
|
|
// Calculate number of pages required to hold the needed structures.
|
|
//
|
|
|
|
i = MemoryDescriptorCount * (sizeof(ULONG_PTR) + sizeof(UCHAR)) +
|
|
sizeof(HALPSRAT_STATIC_NUMA_CONFIG) + 2 * sizeof(ULONG_PTR) +
|
|
128 + sizeof(ULONG_PTR);
|
|
i += PAGE_SIZE - 1;
|
|
i >>= PAGE_SHIFT;
|
|
|
|
Phys = (ULONG_PTR)HalpAllocPhysicalMemory(LoaderBlock,
|
|
MAXIMUM_PHYSICAL_ADDRESS,
|
|
i,
|
|
FALSE);
|
|
if (Phys == 0) {
|
|
|
|
//
|
|
// Allocation failed, the system will not be able to run
|
|
// as a NUMA system,.... actually the system will probably
|
|
// not run far at all.
|
|
//
|
|
|
|
DbgPrint("HAL NUMA Initialization failed, could not allocate %d pages\n",
|
|
i);
|
|
|
|
HalpAcpiSrat = NULL;
|
|
return;
|
|
}
|
|
Base.QuadPart = (ULONG_PTR)Phys;
|
|
|
|
#if !defined(_IA64_)
|
|
|
|
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1);
|
|
|
|
#else
|
|
|
|
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1, MmCached);
|
|
|
|
#endif
|
|
|
|
if (HalpNumaConfig == NULL) {
|
|
|
|
//
|
|
// Couldn't map the allocation, give up.
|
|
//
|
|
|
|
HalpAcpiSrat = NULL;
|
|
return;
|
|
}
|
|
RtlZeroMemory(HalpNumaConfig, i * PAGE_SIZE);
|
|
|
|
//
|
|
// MemoryRangeProximity is an array of UCHARs starting at the next
|
|
// 128 byte boundary.
|
|
//
|
|
|
|
p = ROUNDUP_TO_NEXT((HalpNumaConfig + 1), 128);
|
|
HalpNumaMemoryNode = (PUCHAR)p;
|
|
|
|
//
|
|
// NumaMemoryRanges is an array of ULONG_PTRs starting at the next
|
|
// ULONG_PTR boundary.
|
|
//
|
|
|
|
p += (MemoryDescriptorCount + sizeof(ULONG_PTR)) & ~(sizeof(ULONG_PTR) - 1);
|
|
HalpNumaMemoryRanges = (PULONG_PTR)p;
|
|
|
|
//
|
|
// Rescan the SRAT entries filling in the HalpNumaConfig structure.
|
|
//
|
|
|
|
ProcessorCount = 0;
|
|
MemoryDescriptorCount = 0;
|
|
|
|
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
|
|
SratEntry < SratEnd;
|
|
SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
|
|
|
|
//
|
|
// Does this entry belong to a proximity domain not previously
|
|
// seen? If so, we have a new node.
|
|
//
|
|
|
|
for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
|
|
if (SratEntry->ProximityDomain == HalpNumaConfig->ProximityId[i]) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == HalpNumaConfig->NodeCount) {
|
|
|
|
//
|
|
// This is an ID we haven't seen before. New Node.
|
|
//
|
|
|
|
if (HalpNumaConfig->NodeCount >= MAXIMUM_CCNUMA_NODES) {
|
|
|
|
//
|
|
// We support a limited number of nodes, make this machine
|
|
// not NUMA. (Yes, we should free the config space
|
|
// we allocated,... but this is an error when it happens
|
|
// so I'm not worrying about it. peterj).
|
|
//
|
|
|
|
HalpAcpiSrat = NULL;
|
|
return;
|
|
}
|
|
HalpNumaConfig->ProximityId[i] = SratEntry->ProximityDomain;
|
|
HalpNumaConfig->NodeCount++;
|
|
}
|
|
|
|
switch (SratEntry->Type) {
|
|
case SratProcessorLocalAPIC:
|
|
|
|
if (SratEntry->ApicAffinity.Flags.Enabled == 0) {
|
|
|
|
//
|
|
// This processor is not enabled, skip it.
|
|
//
|
|
|
|
continue;
|
|
}
|
|
if (ProcessorCount == HAL_MAX_PROCESSORS) {
|
|
|
|
//
|
|
// Can't handle any more processors. Turn this
|
|
// into a non-numa machine.
|
|
//
|
|
|
|
HalpAcpiSrat = NULL;
|
|
return;
|
|
}
|
|
HalpNumaConfig->ProcessorApicId[ProcessorCount] =
|
|
|
|
#if defined(_IA64_)
|
|
|
|
SratEntry->ApicAffinity.ApicId << 8 |
|
|
(SratEntry->ApicAffinity.SApicEid);
|
|
|
|
#else
|
|
|
|
SratEntry->ApicAffinity.ApicId;
|
|
|
|
#endif
|
|
|
|
HalpNumaConfig->ProcessorProximity[ProcessorCount] =
|
|
SratEntry->ProximityDomain;
|
|
ProcessorCount++;
|
|
break;
|
|
case SratMemory:
|
|
|
|
if (SratEntry->MemoryAffinity.Flags.Enabled == 0) {
|
|
|
|
//
|
|
// This memory is not enabled, skip it.
|
|
//
|
|
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Save the proximity and the base page for this range.
|
|
//
|
|
|
|
HalpNumaMemoryNode[MemoryDescriptorCount] =
|
|
SratEntry->ProximityDomain;
|
|
Base = SratEntry->MemoryAffinity.Base;
|
|
Base.QuadPart >>= PAGE_SHIFT;
|
|
|
|
#if !defined(_WIN64)
|
|
ASSERT(Base.u.HighPart == 0);
|
|
#endif
|
|
|
|
HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) Base.QuadPart;
|
|
|
|
//
|
|
// Explicitly ignore the entry's MemoryAffinity.Length as
|
|
// the code treats anything up to the next greatest range
|
|
// as associated with this entry.
|
|
//
|
|
|
|
MemoryDescriptorCount++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
HalpNumaConfig->ProcessorCount = ProcessorCount;
|
|
|
|
//
|
|
// Make sure processor 0 is always in 'logical' node 0. This
|
|
// is achieved by making sure the proximity Id for the first
|
|
// processor is always the first proximity Id in the table.
|
|
//
|
|
|
|
i = 0;
|
|
if (!NT_SUCCESS(HalpGetApicIdByProcessorNumber(0, (PUSHORT)&i))) {
|
|
|
|
//
|
|
// Couldn't find the ApicId of processor 0? Not quite
|
|
// sure what to do, I suspect the MP table's APIC IDs
|
|
// don't match the SRAT's.
|
|
//
|
|
|
|
DbgPrint("HAL No APIC ID for boot processor.\n");
|
|
}
|
|
|
|
for (j = 0; j < ProcessorCount; j++) {
|
|
if (HalpNumaConfig->ProcessorApicId[j] == (USHORT)i) {
|
|
UCHAR Proximity = HalpNumaConfig->ProcessorProximity[j];
|
|
for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
|
|
if (HalpNumaConfig->ProximityId[i] == Proximity) {
|
|
HalpNumaConfig->ProximityId[i] =
|
|
HalpNumaConfig->ProximityId[0];
|
|
HalpNumaConfig->ProximityId[0] = Proximity;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Sort the memory ranges. There shouldn't be very many
|
|
// so a bubble sort should suffice.
|
|
//
|
|
|
|
j = MemoryDescriptorCount - 1;
|
|
do {
|
|
Swapped = FALSE;
|
|
for (i = 0; i < j; i++) {
|
|
|
|
ULONG_PTR t;
|
|
UCHAR td;
|
|
|
|
t = HalpNumaMemoryRanges[i];
|
|
if (t > HalpNumaMemoryRanges[i+1]) {
|
|
Swapped = TRUE;
|
|
HalpNumaMemoryRanges[i] = HalpNumaMemoryRanges[i+1];
|
|
HalpNumaMemoryRanges[i+1] = t;
|
|
|
|
//
|
|
// Keep the proximity domain in sync with the base.
|
|
//
|
|
|
|
td = HalpNumaMemoryNode[i];
|
|
HalpNumaMemoryNode[i] = HalpNumaMemoryNode[i+1];
|
|
HalpNumaMemoryNode[i+1] = td;
|
|
}
|
|
}
|
|
|
|
//
|
|
// The highest value is now at the top so cut it from the sort.
|
|
//
|
|
|
|
j--;
|
|
} while (Swapped == TRUE);
|
|
|
|
//
|
|
// When searching the memory descriptors to find out which domain
|
|
// a page is in, we don't care about gaps, we'll never be asked
|
|
// for a page in a gap, so, if two descriptors refer to the same
|
|
// domain, merge them in place.
|
|
//
|
|
|
|
j = 0;
|
|
for (i = 1; i < MemoryDescriptorCount; i++) {
|
|
if (HalpNumaMemoryNode[j] !=
|
|
HalpNumaMemoryNode[i]) {
|
|
j++;
|
|
HalpNumaMemoryNode[j] = HalpNumaMemoryNode[i];
|
|
HalpNumaMemoryRanges[j] = HalpNumaMemoryRanges[i];
|
|
continue;
|
|
}
|
|
}
|
|
|
|
MemoryDescriptorCount = j + 1;
|
|
|
|
//
|
|
// Terminate the table with ~0 which won't actually correspond to
|
|
// any domain but will always be higher than any valid value.
|
|
//
|
|
|
|
HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) ~0I64;
|
|
|
|
//
|
|
// And the base of the lowest range should be 0 even if there
|
|
// are no pages that low.
|
|
//
|
|
|
|
HalpNumaMemoryRanges[0] = 0;
|
|
|
|
//
|
|
// Convert the proximity IDs in the memory node array to
|
|
// node number. Node number is the index of the matching
|
|
// entry in proximity ID array.
|
|
//
|
|
|
|
for (i= 0; i < MemoryDescriptorCount; i++) {
|
|
for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
|
|
if (HalpNumaMemoryNode[i] == HalpNumaConfig->ProximityId[j]) {
|
|
HalpNumaMemoryNode[i] = (UCHAR)j;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
NTSTATUS
|
|
HalpGetAcpiStaticNumaTopology(
|
|
HAL_NUMA_TOPOLOGY_INTERFACE * NumaInfo
|
|
)
|
|
{
|
|
#if !defined(NT_UP)
|
|
|
|
//
|
|
// This routine is never called unless this ACPI HAL found
|
|
// a Static Resource Affinity Table (SRAT). But just in case ...
|
|
//
|
|
|
|
if (HalpAcpiSrat == NULL) {
|
|
return STATUS_INVALID_LEVEL;
|
|
}
|
|
|
|
//
|
|
// Fill in the data structure for the kernel.
|
|
//
|
|
|
|
NumaInfo->NumberOfNodes = HalpNumaConfig->NodeCount;
|
|
NumaInfo->QueryProcessorNode = HalpNumaQueryProcessorNode;
|
|
NumaInfo->PageToNode = HalpNumaQueryPageToNode;
|
|
return STATUS_SUCCESS;
|
|
|
|
#else
|
|
|
|
return STATUS_INVALID_LEVEL;
|
|
|
|
#endif
|
|
}
|
|
|