Copyright (c) 2000 Microsoft Corporation
Module Name:
This module contain functions which support static NUMA configurations as provided by the ACPI SRAT "Static Resource Affinity Table".
Peter L Johnston (peterj) 2-Jul-2000
Kernel mode only.
Revision History:
#include "halp.h"
#include "acpitabl.h"
#include "xxacpi.h"
#if !defined(NT_UP)
#define ROUNDUP_TO_NEXT(base, size) \
((((ULONG_PTR)(base)) + (size)) & ~((size) - 1))
// The following routine is external but only used by NUMA support
// at the moment.
NTSTATUS HalpGetApicIdByProcessorNumber( IN UCHAR Processor, IN OUT USHORT *ApicId );
// Prototypes for alloc pragmas.
VOID HalpNumaInitializeStaticConfiguration( IN PLOADER_PARAMETER_BLOCK );
#if defined(ALLOC_PRAGMA)
#pragma alloc_text(INIT,HalpNumaInitializeStaticConfiguration)
#define NEXT_ENTRY(base) (((PUCHAR)base) + (base)->Length)
#if defined(_WIN64)
PHALPSRAT_STATIC_NUMA_CONFIG HalpNumaConfig; PACPI_SRAT HalpAcpiSrat; PULONG_PTR HalpNumaMemoryRanges; PUCHAR HalpNumaMemoryNode; ULONG HalpNumaLastRangeIndex;
ULONG HalpNumaQueryPageToNode( IN ULONG_PTR PhysicalPageNumber ) /*++
Routine Description:
Search the memory range descriptors to determine the node this page exists on.
PhysicalPageNumber Provides the page number.
Return Value:
Returns the node number for the page.
{ ULONG Index = HalpNumaLastRangeIndex;
// Starting in the same range as the last page returned,
// look for this page.
if (PhysicalPageNumber >= HalpNumaMemoryRanges[Index]) {
// Search upwards.
while (PhysicalPageNumber >= HalpNumaMemoryRanges[Index+1]) { Index++; }
} else {
// Search downwards.
do { Index--; } while (PhysicalPageNumber < HalpNumaMemoryRanges[Index]); }
HalpNumaLastRangeIndex = Index; return HalpNumaMemoryNode[Index]; }
NTSTATUS HalpNumaQueryProcessorNode( IN ULONG ProcessorNumber, OUT PUSHORT Identifier, OUT PUCHAR Node ) { NTSTATUS Status; USHORT ApicId; UCHAR Proximity; UCHAR i, j;
// Get the APIC Id for this processor.
Status = HalpGetApicIdByProcessorNumber((UCHAR)ProcessorNumber, &ApicId); if (!NT_SUCCESS(Status)) { return Status; }
// Return the APIC Id as the Identifier. This should probably
// be the ACPI Id but we don't have a way to get that yet.
*Identifier = ApicId;
// Find the node this processor belongs to. The node is the
// index into the array of Proximity Ids for the entry corresponding
// to the Proximity Id of this processor.
for (i = 0; i < HalpNumaConfig->ProcessorCount; i++) { if (HalpNumaConfig->ProcessorApicId[i] == ApicId) { Proximity = HalpNumaConfig->ProcessorProximity[i]; for (j = 0; j < HalpNumaConfig->NodeCount; j++) { if (HalpNumaConfig->ProximityId[j] == Proximity) { *Node = j; return STATUS_SUCCESS; } } } }
// Didn't find this processor in the known set of APIC IDs, this
// would indicate a mismatch between the BIOS MP tables and the
// SRAT, or, didn't find the proximity for this processor in the
// table of proximity IDs. This would be an internal error as
// this array is build from the set of proximity IDs in the SRAT.
VOID HalpNumaInitializeStaticConfiguration( IN PLOADER_PARAMETER_BLOCK LoaderBlock )
Routine Description:
This routine reads the ACPI Static Resource Affinity Table to build a picture of the system's NUMA configuration. This information is saved in the HalpNumaConfig structure in a form which is optimal for the OS's use.
LoaderBlock supplies a pointer to the system loader parameter block.
Return Value:
{ ULONG MemoryDescriptorCount; UCHAR ProcessorCount; PACPI_SRAT_ENTRY SratEntry; PACPI_SRAT_ENTRY SratEnd; ULONG i, j; BOOLEAN Swapped; PHYSICAL_ADDRESS Base; ULONG_PTR p; ULONG_PTR Phys;
HalpAcpiSrat = HalpGetAcpiTablePhase0(LoaderBlock, ACPI_SRAT_SIGNATURE); if (HalpAcpiSrat == NULL) { return; }
// The Static Resource Affinity Table (SRAT) exists.
// Scan it to determine the number of memory descriptors then
// allocate memory to contain the tables needed to hold the
// system's NUMA configuration.
MemoryDescriptorCount = 0; ProcessorCount = 0; SratEnd = (PACPI_SRAT_ENTRY)(((PUCHAR)HalpAcpiSrat) + HalpAcpiSrat->Header.Length); for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1); SratEntry < SratEnd; SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) { switch (SratEntry->Type) { case SratMemory: if (SratEntry->MemoryAffinity.Flags.Enabled == 1) {
MemoryDescriptorCount++; } break; case SratProcessorLocalAPIC:
if (SratEntry->ApicAffinity.Flags.Enabled == 1) {
ProcessorCount++; } break; } }
if ((MemoryDescriptorCount == 0) || (ProcessorCount == 0)) { //
// Can't handle the case where there is either no memory or no
// processors in the table. Turn this into a non-numa
// machine.
HalpAcpiSrat = NULL; return; }
// HalpNumaConfig format:
// HalpNumaConfig->
// UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
// UCHAR NodeCount;
// -pad- to 128 byte boundary
// HalpNumaMemoryNode->
// UCHAR MemoryRangeProximityId[NumberOfMemoryRanges];
// -pad to ULONG_PTR alignment-
// HalpNumaMemoryRanges->
// ULONG_PTR MemoryRangeBasePage[NumberOfMemoryRanges];
// This format has been selected to maximize cache hits while
// searching the ranges. Specifically, the size of the ranges
// array is kept to a minumum.
// NOTE: This code does not account for the length of the memory
// ranges specified in the SRAT table. Instead it treats each
// memory range as extending to the next specified memory range.
// The rationale is that the code shouldn't be asked about pages
// not found in the SRAT table and if we are to return something
// for these pages it might as well be the NUMA node associated
// with the pages from the previous range.
// Calculate number of pages required to hold the needed structures.
i = MemoryDescriptorCount * (sizeof(ULONG_PTR) + sizeof(UCHAR)) + sizeof(HALPSRAT_STATIC_NUMA_CONFIG) + 2 * sizeof(ULONG_PTR) + 128 + sizeof(ULONG_PTR); i += PAGE_SIZE - 1; i >>= PAGE_SHIFT;
Phys = (ULONG_PTR)HalpAllocPhysicalMemory(LoaderBlock, MAXIMUM_PHYSICAL_ADDRESS, i, FALSE); if (Phys == 0) {
// Allocation failed, the system will not be able to run
// as a NUMA system,.... actually the system will probably
// not run far at all.
DbgPrint("HAL NUMA Initialization failed, could not allocate %d pages\n", i);
HalpAcpiSrat = NULL; return; } Base.QuadPart = (ULONG_PTR)Phys;
#if !defined(_IA64_)
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1);
HalpNumaConfig = HalpMapPhysicalMemory(Base, 1, MmCached);
if (HalpNumaConfig == NULL) {
// Couldn't map the allocation, give up.
HalpAcpiSrat = NULL; return; } RtlZeroMemory(HalpNumaConfig, i * PAGE_SIZE);
// MemoryRangeProximity is an array of UCHARs starting at the next
// 128 byte boundary.
p = ROUNDUP_TO_NEXT((HalpNumaConfig + 1), 128); HalpNumaMemoryNode = (PUCHAR)p;
// NumaMemoryRanges is an array of ULONG_PTRs starting at the next
// ULONG_PTR boundary.
p += (MemoryDescriptorCount + sizeof(ULONG_PTR)) & ~(sizeof(ULONG_PTR) - 1); HalpNumaMemoryRanges = (PULONG_PTR)p;
// Rescan the SRAT entries filling in the HalpNumaConfig structure.
ProcessorCount = 0; MemoryDescriptorCount = 0;
for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1); SratEntry < SratEnd; SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
// Does this entry belong to a proximity domain not previously
// seen? If so, we have a new node.
for (i = 0; i < HalpNumaConfig->NodeCount; i++) { if (SratEntry->ProximityDomain == HalpNumaConfig->ProximityId[i]) { break; } }
if (i == HalpNumaConfig->NodeCount) {
// This is an ID we haven't seen before. New Node.
if (HalpNumaConfig->NodeCount >= MAXIMUM_CCNUMA_NODES) {
// We support a limited number of nodes, make this machine
// not NUMA. (Yes, we should free the config space
// we allocated,... but this is an error when it happens
// so I'm not worrying about it. peterj).
HalpAcpiSrat = NULL; return; } HalpNumaConfig->ProximityId[i] = SratEntry->ProximityDomain; HalpNumaConfig->NodeCount++; }
switch (SratEntry->Type) { case SratProcessorLocalAPIC:
if (SratEntry->ApicAffinity.Flags.Enabled == 0) {
// This processor is not enabled, skip it.
continue; } if (ProcessorCount == HAL_MAX_PROCESSORS) {
// Can't handle any more processors. Turn this
// into a non-numa machine.
HalpAcpiSrat = NULL; return; } HalpNumaConfig->ProcessorApicId[ProcessorCount] =
#if defined(_IA64_)
SratEntry->ApicAffinity.ApicId << 8 | (SratEntry->ApicAffinity.SApicEid);
HalpNumaConfig->ProcessorProximity[ProcessorCount] = SratEntry->ProximityDomain; ProcessorCount++; break; case SratMemory:
if (SratEntry->MemoryAffinity.Flags.Enabled == 0) {
// This memory is not enabled, skip it.
continue; }
// Save the proximity and the base page for this range.
HalpNumaMemoryNode[MemoryDescriptorCount] = SratEntry->ProximityDomain; Base = SratEntry->MemoryAffinity.Base; Base.QuadPart >>= PAGE_SHIFT;
#if !defined(_WIN64)
ASSERT(Base.u.HighPart == 0); #endif
HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) Base.QuadPart;
// Explicitly ignore the entry's MemoryAffinity.Length as
// the code treats anything up to the next greatest range
// as associated with this entry.
MemoryDescriptorCount++; break; } }
HalpNumaConfig->ProcessorCount = ProcessorCount;
// Make sure processor 0 is always in 'logical' node 0. This
// is achieved by making sure the proximity Id for the first
// processor is always the first proximity Id in the table.
i = 0; if (!NT_SUCCESS(HalpGetApicIdByProcessorNumber(0, (PUSHORT)&i))) {
// Couldn't find the ApicId of processor 0? Not quite
// sure what to do, I suspect the MP table's APIC IDs
// don't match the SRAT's.
DbgPrint("HAL No APIC ID for boot processor.\n"); }
for (j = 0; j < ProcessorCount; j++) { if (HalpNumaConfig->ProcessorApicId[j] == (USHORT)i) { UCHAR Proximity = HalpNumaConfig->ProcessorProximity[j]; for (i = 0; i < HalpNumaConfig->NodeCount; i++) { if (HalpNumaConfig->ProximityId[i] == Proximity) { HalpNumaConfig->ProximityId[i] = HalpNumaConfig->ProximityId[0]; HalpNumaConfig->ProximityId[0] = Proximity; break; } } break; } }
// Sort the memory ranges. There shouldn't be very many
// so a bubble sort should suffice.
j = MemoryDescriptorCount - 1; do { Swapped = FALSE; for (i = 0; i < j; i++) { ULONG_PTR t; UCHAR td; t = HalpNumaMemoryRanges[i]; if (t > HalpNumaMemoryRanges[i+1]) { Swapped = TRUE; HalpNumaMemoryRanges[i] = HalpNumaMemoryRanges[i+1]; HalpNumaMemoryRanges[i+1] = t;
// Keep the proximity domain in sync with the base.
td = HalpNumaMemoryNode[i]; HalpNumaMemoryNode[i] = HalpNumaMemoryNode[i+1]; HalpNumaMemoryNode[i+1] = td; } }
// The highest value is now at the top so cut it from the sort.
j--; } while (Swapped == TRUE);
// When searching the memory descriptors to find out which domain
// a page is in, we don't care about gaps, we'll never be asked
// for a page in a gap, so, if two descriptors refer to the same
// domain, merge them in place.
j = 0; for (i = 1; i < MemoryDescriptorCount; i++) { if (HalpNumaMemoryNode[j] != HalpNumaMemoryNode[i]) { j++; HalpNumaMemoryNode[j] = HalpNumaMemoryNode[i]; HalpNumaMemoryRanges[j] = HalpNumaMemoryRanges[i]; continue; } }
MemoryDescriptorCount = j + 1;
// Terminate the table with ~0 which won't actually correspond to
// any domain but will always be higher than any valid value.
HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) ~0I64;
// And the base of the lowest range should be 0 even if there
// are no pages that low.
HalpNumaMemoryRanges[0] = 0;
// Convert the proximity IDs in the memory node array to
// node number. Node number is the index of the matching
// entry in proximity ID array.
for (i= 0; i < MemoryDescriptorCount; i++) { for (j = 0; j < HalpNumaConfig->NodeCount; j++) { if (HalpNumaMemoryNode[i] == HalpNumaConfig->ProximityId[j]) { HalpNumaMemoryNode[i] = (UCHAR)j; break; } } } }
NTSTATUS HalpGetAcpiStaticNumaTopology( HAL_NUMA_TOPOLOGY_INTERFACE * NumaInfo ) { #if !defined(NT_UP)
// This routine is never called unless this ACPI HAL found
// a Static Resource Affinity Table (SRAT). But just in case ...
if (HalpAcpiSrat == NULL) { return STATUS_INVALID_LEVEL; }
// Fill in the data structure for the kernel.
NumaInfo->NumberOfNodes = HalpNumaConfig->NodeCount; NumaInfo->QueryProcessorNode = HalpNumaQueryProcessorNode; NumaInfo->PageToNode = HalpNumaQueryPageToNode; return STATUS_SUCCESS;