Leaked source code of windows server 2003

648 lines
17 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. sratnuma.c
  5. Abstract:
  6. This module contain functions which support static NUMA configurations
  7. as provided by the ACPI SRAT "Static Resource Affinity Table".
  8. Author:
  9. Peter L Johnston (peterj) 2-Jul-2000
  10. Environment:
  11. Kernel mode only.
  12. Revision History:
  13. --*/
  14. #include "halp.h"
  15. #include "acpitabl.h"
  16. #include "xxacpi.h"
  17. #if !defined(NT_UP)
  18. #define ROUNDUP_TO_NEXT(base, size) \
  19. ((((ULONG_PTR)(base)) + (size)) & ~((size) - 1))
  20. //
  21. // The following routine is external but only used by NUMA support
  22. // at the moment.
  23. //
  24. NTSTATUS
  25. HalpGetApicIdByProcessorNumber(
  26. IN UCHAR Processor,
  27. IN OUT USHORT *ApicId
  28. );
  29. //
  30. // Prototypes for alloc pragmas.
  31. //
  32. VOID
  33. HalpNumaInitializeStaticConfiguration(
  34. IN PLOADER_PARAMETER_BLOCK
  35. );
  36. #if defined(ALLOC_PRAGMA)
  37. #pragma alloc_text(INIT,HalpNumaInitializeStaticConfiguration)
  38. #endif
  39. #define NEXT_ENTRY(base) (((PUCHAR)base) + (base)->Length)
  40. #if defined(_WIN64)
  41. #define HAL_MAX_PROCESSORS 64
  42. #else
  43. #define HAL_MAX_PROCESSORS 32
  44. #endif
  45. typedef struct _STATIC_NUMA_CONFIG {
  46. USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
  47. UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
  48. UCHAR ProximityId[MAXIMUM_CCNUMA_NODES];
  49. UCHAR NodeCount;
  50. UCHAR ProcessorCount;
  51. } HALPSRAT_STATIC_NUMA_CONFIG, *PHALPSRAT_STATIC_NUMA_CONFIG;
  52. PHALPSRAT_STATIC_NUMA_CONFIG HalpNumaConfig;
  53. PACPI_SRAT HalpAcpiSrat;
  54. PULONG_PTR HalpNumaMemoryRanges;
  55. PUCHAR HalpNumaMemoryNode;
  56. ULONG HalpNumaLastRangeIndex;
  57. ULONG
  58. HalpNumaQueryPageToNode(
  59. IN ULONG_PTR PhysicalPageNumber
  60. )
  61. /*++
  62. Routine Description:
  63. Search the memory range descriptors to determine the node
  64. this page exists on.
  65. Arguments:
  66. PhysicalPageNumber Provides the page number.
  67. Return Value:
  68. Returns the node number for the page.
  69. --*/
  70. {
  71. ULONG Index = HalpNumaLastRangeIndex;
  72. //
  73. // Starting in the same range as the last page returned,
  74. // look for this page.
  75. //
  76. if (PhysicalPageNumber >= HalpNumaMemoryRanges[Index]) {
  77. //
  78. // Search upwards.
  79. //
  80. while (PhysicalPageNumber >= HalpNumaMemoryRanges[Index+1]) {
  81. Index++;
  82. }
  83. } else {
  84. //
  85. // Search downwards.
  86. //
  87. do {
  88. Index--;
  89. } while (PhysicalPageNumber < HalpNumaMemoryRanges[Index]);
  90. }
  91. HalpNumaLastRangeIndex = Index;
  92. return HalpNumaMemoryNode[Index];
  93. }
  94. NTSTATUS
  95. HalpNumaQueryProcessorNode(
  96. IN ULONG ProcessorNumber,
  97. OUT PUSHORT Identifier,
  98. OUT PUCHAR Node
  99. )
  100. {
  101. NTSTATUS Status;
  102. USHORT ApicId;
  103. UCHAR Proximity;
  104. UCHAR i, j;
  105. //
  106. // Get the APIC Id for this processor.
  107. //
  108. Status = HalpGetApicIdByProcessorNumber((UCHAR)ProcessorNumber, &ApicId);
  109. if (!NT_SUCCESS(Status)) {
  110. return Status;
  111. }
  112. //
  113. // Return the APIC Id as the Identifier. This should probably
  114. // be the ACPI Id but we don't have a way to get that yet.
  115. //
  116. *Identifier = ApicId;
  117. //
  118. // Find the node this processor belongs to. The node is the
  119. // index into the array of Proximity Ids for the entry corresponding
  120. // to the Proximity Id of this processor.
  121. //
  122. for (i = 0; i < HalpNumaConfig->ProcessorCount; i++) {
  123. if (HalpNumaConfig->ProcessorApicId[i] == ApicId) {
  124. Proximity = HalpNumaConfig->ProcessorProximity[i];
  125. for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
  126. if (HalpNumaConfig->ProximityId[j] == Proximity) {
  127. *Node = j;
  128. return STATUS_SUCCESS;
  129. }
  130. }
  131. }
  132. }
  133. //
  134. // Didn't find this processor in the known set of APIC IDs, this
  135. // would indicate a mismatch between the BIOS MP tables and the
  136. // SRAT, or, didn't find the proximity for this processor in the
  137. // table of proximity IDs. This would be an internal error as
  138. // this array is build from the set of proximity IDs in the SRAT.
  139. //
  140. return STATUS_NOT_FOUND;
  141. }
  142. VOID
  143. HalpNumaInitializeStaticConfiguration(
  144. IN PLOADER_PARAMETER_BLOCK LoaderBlock
  145. )
  146. /*++
  147. Routine Description:
  148. This routine reads the ACPI Static Resource Affinity Table to build
  149. a picture of the system's NUMA configuration. This information is
  150. saved in the HalpNumaConfig structure in a form which is optimal for
  151. the OS's use.
  152. Arguments:
  153. LoaderBlock supplies a pointer to the system loader parameter block.
  154. Return Value:
  155. None.
  156. --*/
  157. {
  158. ULONG MemoryDescriptorCount;
  159. UCHAR ProcessorCount;
  160. PACPI_SRAT_ENTRY SratEntry;
  161. PACPI_SRAT_ENTRY SratEnd;
  162. ULONG i, j;
  163. BOOLEAN Swapped;
  164. PHYSICAL_ADDRESS Base;
  165. ULONG_PTR p;
  166. ULONG_PTR Phys;
  167. HalpAcpiSrat = HalpGetAcpiTablePhase0(LoaderBlock, ACPI_SRAT_SIGNATURE);
  168. if (HalpAcpiSrat == NULL) {
  169. return;
  170. }
  171. //
  172. // The Static Resource Affinity Table (SRAT) exists.
  173. //
  174. // Scan it to determine the number of memory descriptors then
  175. // allocate memory to contain the tables needed to hold the
  176. // system's NUMA configuration.
  177. //
  178. MemoryDescriptorCount = 0;
  179. ProcessorCount = 0;
  180. SratEnd = (PACPI_SRAT_ENTRY)(((PUCHAR)HalpAcpiSrat) +
  181. HalpAcpiSrat->Header.Length);
  182. for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
  183. SratEntry < SratEnd;
  184. SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
  185. switch (SratEntry->Type) {
  186. case SratMemory:
  187. if (SratEntry->MemoryAffinity.Flags.Enabled == 1) {
  188. MemoryDescriptorCount++;
  189. }
  190. break;
  191. case SratProcessorLocalAPIC:
  192. if (SratEntry->ApicAffinity.Flags.Enabled == 1) {
  193. ProcessorCount++;
  194. }
  195. break;
  196. }
  197. }
  198. if ((MemoryDescriptorCount == 0) || (ProcessorCount == 0)) {
  199. //
  200. // Can't handle the case where there is either no memory or no
  201. // processors in the table. Turn this into a non-numa
  202. // machine.
  203. //
  204. HalpAcpiSrat = NULL;
  205. return;
  206. }
  207. //
  208. // HalpNumaConfig format:
  209. //
  210. // HalpNumaConfig->
  211. // USHORT ProcessorApicId[HAL_MAX_PROCESSORS];
  212. // UCHAR ProcessorProximity[HAL_MAX_PROCESSORS];
  213. // UCHAR ProximityIds[MAXIMUM_CCNUMA_NODES];
  214. // UCHAR NodeCount;
  215. // -pad- to 128 byte boundary
  216. // HalpNumaMemoryNode->
  217. // UCHAR MemoryRangeProximityId[NumberOfMemoryRanges];
  218. // -pad to ULONG_PTR alignment-
  219. // HalpNumaMemoryRanges->
  220. // ULONG_PTR MemoryRangeBasePage[NumberOfMemoryRanges];
  221. //
  222. // This format has been selected to maximize cache hits while
  223. // searching the ranges. Specifically, the size of the ranges
  224. // array is kept to a minumum.
  225. //
  226. // NOTE: This code does not account for the length of the memory
  227. // ranges specified in the SRAT table. Instead it treats each
  228. // memory range as extending to the next specified memory range.
  229. // The rationale is that the code shouldn't be asked about pages
  230. // not found in the SRAT table and if we are to return something
  231. // for these pages it might as well be the NUMA node associated
  232. // with the pages from the previous range.
  233. //
  234. //
  235. // Calculate number of pages required to hold the needed structures.
  236. //
  237. i = MemoryDescriptorCount * (sizeof(ULONG_PTR) + sizeof(UCHAR)) +
  238. sizeof(HALPSRAT_STATIC_NUMA_CONFIG) + 2 * sizeof(ULONG_PTR) +
  239. 128 + sizeof(ULONG_PTR);
  240. i += PAGE_SIZE - 1;
  241. i >>= PAGE_SHIFT;
  242. Phys = (ULONG_PTR)HalpAllocPhysicalMemory(LoaderBlock,
  243. MAXIMUM_PHYSICAL_ADDRESS,
  244. i,
  245. FALSE);
  246. if (Phys == 0) {
  247. //
  248. // Allocation failed, the system will not be able to run
  249. // as a NUMA system,.... actually the system will probably
  250. // not run far at all.
  251. //
  252. DbgPrint("HAL NUMA Initialization failed, could not allocate %d pages\n",
  253. i);
  254. HalpAcpiSrat = NULL;
  255. return;
  256. }
  257. Base.QuadPart = (ULONG_PTR)Phys;
  258. #if !defined(_IA64_)
  259. HalpNumaConfig = HalpMapPhysicalMemory(Base, 1);
  260. #else
  261. HalpNumaConfig = HalpMapPhysicalMemory(Base, 1, MmCached);
  262. #endif
  263. if (HalpNumaConfig == NULL) {
  264. //
  265. // Couldn't map the allocation, give up.
  266. //
  267. HalpAcpiSrat = NULL;
  268. return;
  269. }
  270. RtlZeroMemory(HalpNumaConfig, i * PAGE_SIZE);
  271. //
  272. // MemoryRangeProximity is an array of UCHARs starting at the next
  273. // 128 byte boundary.
  274. //
  275. p = ROUNDUP_TO_NEXT((HalpNumaConfig + 1), 128);
  276. HalpNumaMemoryNode = (PUCHAR)p;
  277. //
  278. // NumaMemoryRanges is an array of ULONG_PTRs starting at the next
  279. // ULONG_PTR boundary.
  280. //
  281. p += (MemoryDescriptorCount + sizeof(ULONG_PTR)) & ~(sizeof(ULONG_PTR) - 1);
  282. HalpNumaMemoryRanges = (PULONG_PTR)p;
  283. //
  284. // Rescan the SRAT entries filling in the HalpNumaConfig structure.
  285. //
  286. ProcessorCount = 0;
  287. MemoryDescriptorCount = 0;
  288. for (SratEntry = (PACPI_SRAT_ENTRY)(HalpAcpiSrat + 1);
  289. SratEntry < SratEnd;
  290. SratEntry = (PACPI_SRAT_ENTRY)NEXT_ENTRY(SratEntry)) {
  291. //
  292. // Does this entry belong to a proximity domain not previously
  293. // seen? If so, we have a new node.
  294. //
  295. for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
  296. if (SratEntry->ProximityDomain == HalpNumaConfig->ProximityId[i]) {
  297. break;
  298. }
  299. }
  300. if (i == HalpNumaConfig->NodeCount) {
  301. //
  302. // This is an ID we haven't seen before. New Node.
  303. //
  304. if (HalpNumaConfig->NodeCount >= MAXIMUM_CCNUMA_NODES) {
  305. //
  306. // We support a limited number of nodes, make this machine
  307. // not NUMA. (Yes, we should free the config space
  308. // we allocated,... but this is an error when it happens
  309. // so I'm not worrying about it. peterj).
  310. //
  311. HalpAcpiSrat = NULL;
  312. return;
  313. }
  314. HalpNumaConfig->ProximityId[i] = SratEntry->ProximityDomain;
  315. HalpNumaConfig->NodeCount++;
  316. }
  317. switch (SratEntry->Type) {
  318. case SratProcessorLocalAPIC:
  319. if (SratEntry->ApicAffinity.Flags.Enabled == 0) {
  320. //
  321. // This processor is not enabled, skip it.
  322. //
  323. continue;
  324. }
  325. if (ProcessorCount == HAL_MAX_PROCESSORS) {
  326. //
  327. // Can't handle any more processors. Turn this
  328. // into a non-numa machine.
  329. //
  330. HalpAcpiSrat = NULL;
  331. return;
  332. }
  333. HalpNumaConfig->ProcessorApicId[ProcessorCount] =
  334. #if defined(_IA64_)
  335. SratEntry->ApicAffinity.ApicId << 8 |
  336. (SratEntry->ApicAffinity.SApicEid);
  337. #else
  338. SratEntry->ApicAffinity.ApicId;
  339. #endif
  340. HalpNumaConfig->ProcessorProximity[ProcessorCount] =
  341. SratEntry->ProximityDomain;
  342. ProcessorCount++;
  343. break;
  344. case SratMemory:
  345. if (SratEntry->MemoryAffinity.Flags.Enabled == 0) {
  346. //
  347. // This memory is not enabled, skip it.
  348. //
  349. continue;
  350. }
  351. //
  352. // Save the proximity and the base page for this range.
  353. //
  354. HalpNumaMemoryNode[MemoryDescriptorCount] =
  355. SratEntry->ProximityDomain;
  356. Base = SratEntry->MemoryAffinity.Base;
  357. Base.QuadPart >>= PAGE_SHIFT;
  358. #if !defined(_WIN64)
  359. ASSERT(Base.u.HighPart == 0);
  360. #endif
  361. HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) Base.QuadPart;
  362. //
  363. // Explicitly ignore the entry's MemoryAffinity.Length as
  364. // the code treats anything up to the next greatest range
  365. // as associated with this entry.
  366. //
  367. MemoryDescriptorCount++;
  368. break;
  369. }
  370. }
  371. HalpNumaConfig->ProcessorCount = ProcessorCount;
  372. //
  373. // Make sure processor 0 is always in 'logical' node 0. This
  374. // is achieved by making sure the proximity Id for the first
  375. // processor is always the first proximity Id in the table.
  376. //
  377. i = 0;
  378. if (!NT_SUCCESS(HalpGetApicIdByProcessorNumber(0, (PUSHORT)&i))) {
  379. //
  380. // Couldn't find the ApicId of processor 0? Not quite
  381. // sure what to do, I suspect the MP table's APIC IDs
  382. // don't match the SRAT's.
  383. //
  384. DbgPrint("HAL No APIC ID for boot processor.\n");
  385. }
  386. for (j = 0; j < ProcessorCount; j++) {
  387. if (HalpNumaConfig->ProcessorApicId[j] == (USHORT)i) {
  388. UCHAR Proximity = HalpNumaConfig->ProcessorProximity[j];
  389. for (i = 0; i < HalpNumaConfig->NodeCount; i++) {
  390. if (HalpNumaConfig->ProximityId[i] == Proximity) {
  391. HalpNumaConfig->ProximityId[i] =
  392. HalpNumaConfig->ProximityId[0];
  393. HalpNumaConfig->ProximityId[0] = Proximity;
  394. break;
  395. }
  396. }
  397. break;
  398. }
  399. }
  400. //
  401. // Sort the memory ranges. There shouldn't be very many
  402. // so a bubble sort should suffice.
  403. //
  404. j = MemoryDescriptorCount - 1;
  405. do {
  406. Swapped = FALSE;
  407. for (i = 0; i < j; i++) {
  408. ULONG_PTR t;
  409. UCHAR td;
  410. t = HalpNumaMemoryRanges[i];
  411. if (t > HalpNumaMemoryRanges[i+1]) {
  412. Swapped = TRUE;
  413. HalpNumaMemoryRanges[i] = HalpNumaMemoryRanges[i+1];
  414. HalpNumaMemoryRanges[i+1] = t;
  415. //
  416. // Keep the proximity domain in sync with the base.
  417. //
  418. td = HalpNumaMemoryNode[i];
  419. HalpNumaMemoryNode[i] = HalpNumaMemoryNode[i+1];
  420. HalpNumaMemoryNode[i+1] = td;
  421. }
  422. }
  423. //
  424. // The highest value is now at the top so cut it from the sort.
  425. //
  426. j--;
  427. } while (Swapped == TRUE);
  428. //
  429. // When searching the memory descriptors to find out which domain
  430. // a page is in, we don't care about gaps, we'll never be asked
  431. // for a page in a gap, so, if two descriptors refer to the same
  432. // domain, merge them in place.
  433. //
  434. j = 0;
  435. for (i = 1; i < MemoryDescriptorCount; i++) {
  436. if (HalpNumaMemoryNode[j] !=
  437. HalpNumaMemoryNode[i]) {
  438. j++;
  439. HalpNumaMemoryNode[j] = HalpNumaMemoryNode[i];
  440. HalpNumaMemoryRanges[j] = HalpNumaMemoryRanges[i];
  441. continue;
  442. }
  443. }
  444. MemoryDescriptorCount = j + 1;
  445. //
  446. // Terminate the table with ~0 which won't actually correspond to
  447. // any domain but will always be higher than any valid value.
  448. //
  449. HalpNumaMemoryRanges[MemoryDescriptorCount] = (ULONG_PTR) ~0I64;
  450. //
  451. // And the base of the lowest range should be 0 even if there
  452. // are no pages that low.
  453. //
  454. HalpNumaMemoryRanges[0] = 0;
  455. //
  456. // Convert the proximity IDs in the memory node array to
  457. // node number. Node number is the index of the matching
  458. // entry in proximity ID array.
  459. //
  460. for (i= 0; i < MemoryDescriptorCount; i++) {
  461. for (j = 0; j < HalpNumaConfig->NodeCount; j++) {
  462. if (HalpNumaMemoryNode[i] == HalpNumaConfig->ProximityId[j]) {
  463. HalpNumaMemoryNode[i] = (UCHAR)j;
  464. break;
  465. }
  466. }
  467. }
  468. }
  469. #endif
  470. NTSTATUS
  471. HalpGetAcpiStaticNumaTopology(
  472. HAL_NUMA_TOPOLOGY_INTERFACE * NumaInfo
  473. )
  474. {
  475. #if !defined(NT_UP)
  476. //
  477. // This routine is never called unless this ACPI HAL found
  478. // a Static Resource Affinity Table (SRAT). But just in case ...
  479. //
  480. if (HalpAcpiSrat == NULL) {
  481. return STATUS_INVALID_LEVEL;
  482. }
  483. //
  484. // Fill in the data structure for the kernel.
  485. //
  486. NumaInfo->NumberOfNodes = HalpNumaConfig->NodeCount;
  487. NumaInfo->QueryProcessorNode = HalpNumaQueryProcessorNode;
  488. NumaInfo->PageToNode = HalpNumaQueryPageToNode;
  489. return STATUS_SUCCESS;
  490. #else
  491. return STATUS_INVALID_LEVEL;
  492. #endif
  493. }