Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

606 lines
15 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. sratnuma.c
  5. Abstract:
  6. This module contain functions which support static NUMA configurations
  7. as provided by the ACPI SRAT "Static Resource Affinity Table".
  8. Author:
  9. Peter L Johnston (peterj) 2-Jul-2000
  10. Environment:
  11. Kernel mode only.
  12. Revision History:
  13. --*/
  14. #include "halp.h"
  15. #include "acpitabl.h"
  16. #include "xxacpi.h"
  17. #if !defined(NT_UP)
  18. #define ROUNDUP_TO_NEXT(base, size) \
  19. ((((ULONG_PTR)(base)) + (size)) & ~((size) - 1))
  20. //
  21. // The following routine is external but only used by NUMA support
  22. // at the moment.
  23. //
  24. NTSTATUS
  25. HalpGetApicIdByProcessorNumber(
  26. IN UCHAR Processor,
  27. IN OUT USHORT *ApicId
  28. );
  29. //
  30. // Prototypes for alloc pragmas.
  31. //
  32. VOID
  33. HalpNumaInitializeStaticConfiguration(
  34. IN PLOADER_PARAMETER_BLOCK
  35. );
  36. #if defined(ALLOC_PRAGMA)
  37. #pragma alloc_text(INIT,HalpNumaInitializeStaticConfiguration)
  38. #endif
  39. #define NEXT_ENTRY(base) (((PUCHAR)base) + (base)->Length)
  40. #define HAL_MAX_NODES 8
  41. #if defined(_WIN64)
  42. #define HAL_MAX_PROCESSORS 64
  43. #else
  44. #define HAL_MAX_PROCESSORS 32
  45. #endif
  46. typedef struct _STATIC_NUMA_CONFIG {
  47. USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
  48. UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
  49. UCHAR ProximityId[HAL_MAX_NODES];
  50. UCHAR NodeCount;
  51. UCHAR ProcessorCount;
  52. } HALPSRAT_STATIC_NUMA_CONFIG, *PHALPSRAT_STATIC_NUMA_CONFIG;
  53. PHALPSRAT_STATIC_NUMA_CONFIG HalpNumaConfig;
  54. PACPI_SRAT HalpAcpiSrat;
  55. PULONG_PTR HalpNumaMemoryRanges;
  56. PUCHAR HalpNumaMemoryNode;
  57. ULONG HalpNumaLastRangeIndex;
  58. ULONG
  59. HalpNumaQueryPageToNode(
  60. IN ULONG_PTR PhysicalPageNumber
  61. )
  62. /*++
  63. Routine Description:
  64. Search the memory range descriptors to determine the node
  65. this page exists on.
  66. Arguments:
  67. PhysicalPageNumber Provides the page number.
  68. Return Value:
  69. Returns the node number for the page.
  70. --*/
  71. {
  72. ULONG Index = HalpNumaLastRangeIndex;
  73. //
  74. // Starting in the same range as the last page returned,
  75. // look for this page.
  76. //
  77. if (PhysicalPageNumber >= HalpNumaMemoryRanges[Index]) {
  78. //
  79. // Search upwards.
  80. //
  81. while (PhysicalPageNumber >= HalpNumaMemoryRanges[Index+1]) {
  82. Index++;
  83. }
  84. } else {
  85. //
  86. // Search downwards.
  87. //
  88. do {
  89. Index--;
  90. } while (PhysicalPageNumber < HalpNumaMemoryRanges[Index]);
  91. }
  92. HalpNumaLastRangeIndex = Index;
  93. return HalpNumaMemoryNode[Index];
  94. }
  95. NTSTATUS
  96. HalpNumaQueryProcessorNode(
  97. IN ULONG ProcessorNumber,
  98. OUT PUSHORT Identifier,
  99. OUT PUCHAR Node
  100. )
  101. {
  102. NTSTATUS Status;
  103. USHORT ApicId;
  104. UCHAR Proximity;
  105. UCHAR i, j;
  106. //
  107. // Get the APIC Id for this processor.
  108. //
  109. Status = HalpGetApicIdByProcessorNumber((UCHAR)ProcessorNumber, &ApicId);
  110. if (!NT_SUCCESS(Status)) {
  111. return Status;
  112. }
  113. //
  114. // Return the APIC Id as the Identifier. This should probably
  115. // be the ACPI Id but we don't have a way to get that yet.
  116. //
  117. *Identifier = ApicId;
  118. //
  119. // Find the node this processor belongs to. The node is the
  120. // index into the array of Proximity Ids for the entry corresponding
  121. // to the Proximity Id of this processor.
  122. //
  123. for (i = 0; i < HalpNumaConfig->ProcessorCount; i++) {
  124. if (HalpNumaConfig->ProcessorApicId[i] == ApicId) {
  125. Proximity = HalpNumaConfig->ProcessorProximity[i];
  126. for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
  127. if (HalpNumaConfig->ProximityId[j] == Proximity) {
  128. *Node = j;
  129. return STATUS_SUCCESS;
  130. }
  131. }
  132. }
  133. }
  134. //
  135. // Didn't find this processor in the known set of APIC IDs, this
  136. // would indicate a mismatch between the BIOS MP tables and the
  137. // SRAT, or, didn't find the proximity for this processor in the
  138. // table of proximity IDs. This would be an internal error as
  139. // this array is build from the set of proximity IDs in the SRAT.
  140. //
  141. return STATUS_NOT_FOUND;
  142. }
  143. VOID
  144. HalpNumaInitializeStaticConfiguration(
  145. IN PLOADER_PARAMETER_BLOCK LoaderBlock
  146. )
  147. /*++
  148. Routine Description:
  149. This routine reads the ACPI Static Resource Affinity Table to build
  150. a picture of the system's NUMA configuration. This information is
  151. saved in the HalpNumaConfig structure in a form which is optimal for
  152. the OS's use.
  153. Arguments:
  154. LoaderBlock supplies a pointer to the system loader parameter block.
  155. Return Value:
  156. None.
  157. --*/
  158. {
  159. ULONG NodeCount = 0;
  160. ULONG MemoryDescriptorCount = 0;
  161. UCHAR ProcessorCount = 0;
  162. PACPI_SRAT_ENTRY SratEntry;
  163. PACPI_SRAT_ENTRY SratEnd;
  164. ULONG i, j;
  165. BOOLEAN Swapped;
  166. PHYSICAL_ADDRESS Base;
  167. ULONG_PTR p;
  168. PVOID Phys;
  169. HalpAcpiSrat = HalpGetAcpiTablePhase0(LoaderBlock, ACPI_SRAT_SIGNATURE);
  170. if (HalpAcpiSrat == NULL) {
  171. return;
  172. }
  173. //
  174. // The Static Resource Affinity Table (SRAT) exists.
  175. //
  176. // Scan it to determine the number of memory descriptors then
  177. // allocate memory to contain the tables needed to hold the
  178. // system's NUMA configuration.
  179. //
  180. SratEnd = (PACPI_SRAT_ENTRY)(((PUCHAR)HalpAcpiSrat) +
  181. HalpAcpiSrat->Header.Length);
  182. for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
  183. SratEntry < SratEnd;
  184. SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
  185. switch (SratEntry->Type) {
  186. case SratMemory:
  187. MemoryDescriptorCount++;
  188. break;
  189. }
  190. }
  191. //
  192. // HalpNumaConfig format:
  193. //
  194. // HalpNumaConfig->
  195. // USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
  196. // UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
  197. // UCHAR ProximityIds[HAL_MAX_NODES];
  198. // UCHAR NodeCount;
  199. // -pad- to 128 byte boundary
  200. // HalpNumaMemoryNode->
  201. // UCHAR MemoryRangeProximityId[NumberOfMemoryRanges];
  202. // -pad to ULONG_PTR alignment-
  203. // HalpNumaMemoryRanges->
  204. // ULONG_PTR MemoryRangeBasePage[NumberOfMemoryRanges];
  205. //
  206. // This format has been selected to maximize cache hits while
  207. // searching the ranges. Specifically, the size of the ranges
  208. // array is kept to a minumum.
  209. //
  210. //
  211. // Calculate number of pages required to hold the needed structures.
  212. //
  213. i = MemoryDescriptorCount * (sizeof(ULONG_PTR) + sizeof(UCHAR)) +
  214. sizeof(HALPSRAT_STATIC_NUMA_CONFIG) + 2 * sizeof(ULONG) + 128;
  215. i += PAGE_SIZE - 1;
  216. i >>= PAGE_SHIFT;
  217. Phys = (PVOID)HalpAllocPhysicalMemory(LoaderBlock,
  218. MAXIMUM_PHYSICAL_ADDRESS,
  219. i,
  220. FALSE);
  221. if (Phys == NULL) {
  222. //
  223. // Allocation failed, the system will not be able to run
  224. // as a NUMA system,.... actually the system will probably
  225. // not run far at all.
  226. //
  227. DbgPrint("HAL NUMA Initialization failed, could not allocate %d pages\n",
  228. i);
  229. HalpAcpiSrat = NULL;
  230. return;
  231. }
  232. Base.QuadPart = (ULONG_PTR)Phys;
  233. #if !defined(_IA64_)
  234. HalpNumaConfig = HalpMapPhysicalMemory(Base, 1);
  235. #else
  236. HalpNumaConfig = HalpMapPhysicalMemory(Base, 1, MmCached);
  237. #endif
  238. if (HalpNumaConfig == NULL) {
  239. //
  240. // Couldn't map the allocation, give up.
  241. //
  242. HalpAcpiSrat = NULL;
  243. return;
  244. }
  245. RtlZeroMemory(HalpNumaConfig, i * PAGE_SIZE);
  246. //
  247. // MemoryRangeProximity is an array of UCHARs starting at the next
  248. // 128 byte boundary.
  249. //
  250. p = ROUNDUP_TO_NEXT((HalpNumaConfig + 1), 128);
  251. HalpNumaMemoryNode = (PUCHAR)p;
  252. //
  253. // NumaMemoryRanges is an array of ULONG_PTRs starting at the next
  254. // ULONG_PTR boundary.
  255. //
  256. p += (MemoryDescriptorCount + sizeof(ULONG_PTR)) & ~(sizeof(ULONG_PTR) - 1);
  257. HalpNumaMemoryRanges = (PULONG_PTR)p;
  258. //
  259. // Rescan the SRAT entries filling in the HalpNumaConfig structure.
  260. //
  261. ProcessorCount = 0;
  262. MemoryDescriptorCount = 0;
  263. for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
  264. SratEntry < SratEnd;
  265. SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
  266. //
  267. // Does this entry belong to a proximity domain not previously
  268. // seen? If so, we have a new node.
  269. //
  270. for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
  271. if (SratEntry->ProximityDomain == HalpNumaConfig->ProximityId[i]) {
  272. break;
  273. }
  274. }
  275. if (i == HalpNumaConfig->NodeCount) {
  276. //
  277. // This is an ID we haven't seen before. New Node.
  278. //
  279. if (HalpNumaConfig->NodeCount >= 8) {
  280. //
  281. // We support a maximum of 8 nodes, make this machine
  282. // not NUMA. (Yes, we should free the config space
  283. // we allocated,... but this is an error when it happens
  284. // so I'm not worrying about it. peterj).
  285. //
  286. HalpAcpiSrat = NULL;
  287. return;
  288. }
  289. HalpNumaConfig->ProximityId[i] = SratEntry->ProximityDomain;
  290. HalpNumaConfig->NodeCount++;
  291. }
  292. switch (SratEntry->Type) {
  293. case SratProcessorLocalAPIC:
  294. if (SratEntry->ApicAffinity.Flags.Enabled == 0) {
  295. //
  296. // This processor is not enabled, skip it.
  297. //
  298. continue;
  299. }
  300. if (ProcessorCount == HAL_MAX_PROCESSORS) {
  301. //
  302. // Can't handle any more processors. Turn this
  303. // into a non-numa machine.
  304. //
  305. HalpAcpiSrat = NULL;
  306. return;
  307. }
  308. HalpNumaConfig->ProcessorApicId[ProcessorCount] =
  309. #if defined(_IA64_)
  310. SratEntry->ApicAffinity.ApicId << 8 |
  311. (SratEntry->ApicAffinity.SApicEid);
  312. #else
  313. SratEntry->ApicAffinity.ApicId |
  314. (SratEntry->ApicAffinity.SApicEid << 8);
  315. #endif
  316. HalpNumaConfig->ProcessorProximity[ProcessorCount] =
  317. SratEntry->ProximityDomain;
  318. ProcessorCount++;
  319. break;
  320. case SratMemory:
  321. //
  322. // Save the proximity and the base page for this range.
  323. //
  324. HalpNumaMemoryNode[MemoryDescriptorCount] =
  325. SratEntry->ProximityDomain;
  326. Base = SratEntry->MemoryAffinity.Base;
  327. Base.QuadPart >>= PAGE_SHIFT;
  328. ASSERT(Base.u.HighPart == 0);
  329. // N.B. This does NOT work for 64 bit systems, those systems
  330. // should keep both halves of the base address.
  331. HalpNumaMemoryRanges[MemoryDescriptorCount] = Base.u.LowPart;
  332. MemoryDescriptorCount++;
  333. break;
  334. }
  335. }
  336. HalpNumaConfig->ProcessorCount = ProcessorCount;
  337. //
  338. // Make sure processor 0 is always in 'logical' node 0. This
  339. // is achieved by making sure the proximity Id for the first
  340. // processor is always the first proximity Id in the table.
  341. //
  342. i = 0;
  343. if (!NT_SUCCESS(HalpGetApicIdByProcessorNumber(0, (PUSHORT)&i))) {
  344. //
  345. // Couldn't find the ApicId of processor 0? Not quite
  346. // sure what to do, I suspect the MP table's APIC IDs
  347. // don't match the SRAT's.
  348. //
  349. DbgPrint("HAL No APIC ID for boot processor.\n");
  350. }
  351. for (j = 0; j < ProcessorCount; j++) {
  352. if (HalpNumaConfig->ProcessorApicId[j] == (USHORT)i) {
  353. UCHAR Proximity = HalpNumaConfig->ProcessorProximity[j];
  354. for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
  355. if (HalpNumaConfig->ProximityId[i] == Proximity) {
  356. HalpNumaConfig->ProximityId[i] =
  357. HalpNumaConfig->ProximityId[0];
  358. HalpNumaConfig->ProximityId[0] = Proximity;
  359. break;
  360. }
  361. }
  362. break;
  363. }
  364. }
  365. //
  366. // Sort the memory ranges. There shouldn't be very many
  367. // so a bubble sort should suffice.
  368. //
  369. j = MemoryDescriptorCount - 1;
  370. do {
  371. Swapped = FALSE;
  372. for (i = 0; i < j; i++) {
  373. ULONG_PTR t;
  374. UCHAR td;
  375. t = HalpNumaMemoryRanges[i];
  376. if (t > HalpNumaMemoryRanges[i+1]) {
  377. Swapped = TRUE;
  378. HalpNumaMemoryRanges[i] = HalpNumaMemoryRanges[i+1];
  379. HalpNumaMemoryRanges[i+1] = t;
  380. //
  381. // Keep the proximity domain in sync with the base.
  382. //
  383. td = HalpNumaMemoryNode[i];
  384. HalpNumaMemoryNode[i] = HalpNumaMemoryNode[i+1];
  385. HalpNumaMemoryNode[i+1] = td;
  386. }
  387. }
  388. //
  389. // The highest value is now at the top so cut it from the sort.
  390. //
  391. j--;
  392. } while (Swapped == TRUE);
  393. //
  394. // When searching the memory descriptors to find out which domain
  395. // a page is in, we don't care about gaps, we'll never be asked
  396. // for a page in a gap, so, if two descriptors refer to the same
  397. // domain, merge them in place.
  398. //
  399. j = 0;
  400. for (i = 1; i < MemoryDescriptorCount; i++) {
  401. if (HalpNumaMemoryNode[j] !=
  402. HalpNumaMemoryNode[i]) {
  403. j++;
  404. HalpNumaMemoryNode[j] = HalpNumaMemoryNode[i];
  405. HalpNumaMemoryRanges[j] = HalpNumaMemoryRanges[i];
  406. continue;
  407. }
  408. }
  409. MemoryDescriptorCount = j + 1;
  410. //
  411. // The highest page number on an x86 (32 bit) system with PAE
  412. // making physical addresses 36 bits, is (1 << (36-12)) - 1, ie
  413. // 0x00ffffff. Terminate the table with 0xffffffff which won't
  414. // actually correspond to any domain but will always be higher
  415. // than any valid value.
  416. //
  417. HalpNumaMemoryRanges[MemoryDescriptorCount] = 0xffffffff;
  418. //
  419. // And the base of the lowest range should be 0 even if there
  420. // are no pages that low.
  421. //
  422. HalpNumaMemoryRanges[0] = 0;
  423. //
  424. // Convert the proximity IDs in the memory node array to
  425. // node number. Node number is the index of the matching
  426. // entry in proximity ID array.
  427. //
  428. for (i= 0; i < MemoryDescriptorCount; i++) {
  429. for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
  430. if (HalpNumaMemoryNode[i] == HalpNumaConfig->ProximityId[j]) {
  431. HalpNumaMemoryNode[i] = (UCHAR)j;
  432. break;
  433. }
  434. }
  435. }
  436. }
  437. #endif
  438. NTSTATUS
  439. HalpGetAcpiStaticNumaTopology(
  440. HAL_NUMA_TOPOLOGY_INTERFACE * NumaInfo
  441. )
  442. {
  443. #if !defined(NT_UP)
  444. //
  445. // This routine is never called unless this ACPI HAL found
  446. // a Static Resource Affinity Table (SRAT). But just in case ...
  447. //
  448. if (HalpAcpiSrat == NULL) {
  449. return STATUS_INVALID_LEVEL;
  450. }
  451. //
  452. // Fill in the data structure for the kernel.
  453. //
  454. NumaInfo->NumberOfNodes = HalpNumaConfig->NodeCount;
  455. NumaInfo->QueryProcessorNode = HalpNumaQueryProcessorNode;
  456. NumaInfo->PageToNode = HalpNumaQueryPageToNode;
  457. return STATUS_SUCCESS;
  458. #else
  459. return STATUS_INVALID_LEVEL;
  460. #endif
  461. }