Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

763 lines
17 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. numa.c
  5. Abstract:
  6. This module implements Win32 Non Uniform Memory Architecture
  7. information APIs.
  8. Author:
  9. Peter Johnston (peterj) 21-Sep-2000
  10. Revision History:
  11. --*/
  12. #include "basedll.h"
  13. BOOL
  14. WINAPI
  15. GetNumaHighestNodeNumber(
  16. PULONG HighestNodeNumber
  17. )
  18. /*++
  19. Routine Description:
  20. Return the (current) highest numbered node in the system.
  21. Arguments:
  22. HighestNodeNumber Supplies a pointer to receive the number of
  23. last (highest) node in the system.
  24. Return Value:
  25. TRUE unless something impossible happened.
  26. --*/
  27. {
  28. NTSTATUS Status;
  29. ULONG ReturnedSize;
  30. ULONGLONG Information;
  31. PSYSTEM_NUMA_INFORMATION Numa;
  32. Numa = (PSYSTEM_NUMA_INFORMATION)&Information;
  33. Status = NtQuerySystemInformation(SystemNumaProcessorMap,
  34. Numa,
  35. sizeof(Information),
  36. &ReturnedSize);
  37. if (!NT_SUCCESS(Status)) {
  38. //
  39. // This can't possibly happen. Attempt to handle it
  40. // gracefully.
  41. //
  42. BaseSetLastNTError(Status);
  43. return FALSE;
  44. }
  45. if (ReturnedSize < sizeof(ULONG)) {
  46. //
  47. // Nor can this.
  48. //
  49. SetLastError(ERROR_INVALID_PARAMETER);
  50. return FALSE;
  51. }
  52. //
  53. // Return the number of nodes in the system.
  54. //
  55. *HighestNodeNumber = Numa->HighestNodeNumber;
  56. return TRUE;
  57. }
  58. BOOL
  59. WINAPI
  60. GetNumaProcessorNode(
  61. UCHAR Processor,
  62. PUCHAR NodeNumber
  63. )
  64. /*++
  65. Routine Description:
  66. Return the Node number for a given processor.
  67. Arguments:
  68. Processor Supplies the processor number.
  69. NodeNumber Supplies a pointer to the UCHAR to receive the
  70. node number this processor belongs to.
  71. Return Value:
  72. Returns the Node number for the node this processor belongs to.
  73. Returns 0xFF if the processor doesn't exist.
  74. --*/
  75. {
  76. ULONGLONG Mask;
  77. NTSTATUS Status;
  78. ULONG ReturnedSize;
  79. UCHAR Node;
  80. SYSTEM_NUMA_INFORMATION Map;
  81. //
  82. // If the requested processor number is not reasonable, return
  83. // error value.
  84. //
  85. if (Processor >= MAXIMUM_PROCESSORS) {
  86. SetLastError(ERROR_INVALID_PARAMETER);
  87. return FALSE;
  88. }
  89. //
  90. // Get the Node -> Processor Affinity map from the system.
  91. //
  92. Status = NtQuerySystemInformation(SystemNumaProcessorMap,
  93. &Map,
  94. sizeof(Map),
  95. &ReturnedSize);
  96. if (!NT_SUCCESS(Status)) {
  97. //
  98. // This can't happen,... but try to stay sane if possible.
  99. //
  100. BaseSetLastNTError(Status);
  101. return FALSE;
  102. }
  103. //
  104. // Look thru the nodes returned for the node in which the
  105. // requested processor's affinity is non-zero.
  106. //
  107. Mask = 1 << Processor;
  108. for (Node = 0; Node <= Map.HighestNodeNumber; Node++) {
  109. if ((Map.ActiveProcessorsAffinityMask[Node] & Mask) != 0) {
  110. *NodeNumber = Node;
  111. return TRUE;
  112. }
  113. }
  114. //
  115. // Didn't find this processor in any node, return error value.
  116. //
  117. SetLastError(ERROR_INVALID_PARAMETER);
  118. return FALSE;
  119. }
  120. BOOL
  121. WINAPI
  122. GetNumaNodeProcessorMask(
  123. UCHAR Node,
  124. PULONGLONG ProcessorMask
  125. )
  126. /*++
  127. Routine Description:
  128. This routine is used to obtain the bitmask of processors for a
  129. given node.
  130. Arguments:
  131. Node Supplies the Node number for which the set of
  132. processors is returned.
  133. ProcessorMask Pointer to a ULONGLONG to receivethe bitmask of
  134. processors on this node.
  135. Return Value:
  136. TRUE is the Node number was reasonable, FALSE otherwise.
  137. --*/
  138. {
  139. NTSTATUS Status;
  140. ULONG ReturnedSize;
  141. SYSTEM_NUMA_INFORMATION Map;
  142. //
  143. // Get the node -> processor mask table from the system.
  144. //
  145. Status = NtQuerySystemInformation(SystemNumaProcessorMap,
  146. &Map,
  147. sizeof(Map),
  148. &ReturnedSize);
  149. if (!NT_SUCCESS(Status)) {
  150. //
  151. // This can't possibly have happened.
  152. //
  153. BaseSetLastNTError(Status);
  154. return FALSE;
  155. }
  156. //
  157. // If the requested node doesn't exist, return a zero processor
  158. // mask.
  159. //
  160. if (Node > Map.HighestNodeNumber) {
  161. SetLastError(ERROR_INVALID_PARAMETER);
  162. return FALSE;
  163. }
  164. //
  165. // Return the processor mask for the requested node.
  166. //
  167. *ProcessorMask = Map.ActiveProcessorsAffinityMask[Node];
  168. return TRUE;
  169. }
  170. BOOL
  171. WINAPI
  172. GetNumaProcessorMap(
  173. PSYSTEM_NUMA_INFORMATION Map,
  174. ULONG Length,
  175. PULONG ReturnedLength
  176. )
  177. /*++
  178. Routine Description:
  179. Query the system for the NUMA processor map.
  180. Arguments:
  181. Map Supplies a pointer to a stucture into which the
  182. Node to Processor layout is copied.
  183. Length Size of Map (ie max size to copy).
  184. ReturnedLength Number of bytes returned in Map.
  185. Return Value:
  186. TRUE unless something bad happened, FALSE otherwise.
  187. --*/
  188. {
  189. NTSTATUS Status;
  190. ULONG ReturnedSize;
  191. RtlZeroMemory(Map, Length);
  192. //
  193. // Fill in the user's buffer with the system Node -> Processor map.
  194. //
  195. Status = NtQuerySystemInformation(SystemNumaProcessorMap,
  196. Map,
  197. Length,
  198. ReturnedLength);
  199. if (!NT_SUCCESS(Status)) {
  200. BaseSetLastNTError(Status);
  201. return FALSE;
  202. }
  203. return TRUE;
  204. }
  205. BOOL
  206. WINAPI
  207. GetNumaAvailableMemory(
  208. PSYSTEM_NUMA_INFORMATION Memory,
  209. ULONG Length,
  210. PULONG ReturnedLength
  211. )
  212. /*++
  213. Routine Description:
  214. Query the system for the NUMA processor map.
  215. Arguments:
  216. Memory Supplies a pointer to a stucture into which the
  217. per node available memory data is copied.
  218. Length Size of data (ie max size to copy).
  219. ReturnedLength Nomber of bytes returned in Memory.
  220. Return Value:
  221. Returns the length of the data returned.
  222. --*/
  223. {
  224. NTSTATUS Status;
  225. ULONG ReturnedSize;
  226. RtlZeroMemory(Memory, Length);
  227. //
  228. // Fill in the user's buffer with the per node available
  229. // memory table.
  230. //
  231. Status = NtQuerySystemInformation(SystemNumaAvailableMemory,
  232. Memory,
  233. Length,
  234. ReturnedLength);
  235. if (!NT_SUCCESS(Status)) {
  236. BaseSetLastNTError(Status);
  237. return FALSE;
  238. }
  239. return TRUE;
  240. }
  241. BOOL
  242. WINAPI
  243. GetNumaAvailableMemoryNode(
  244. UCHAR Node,
  245. PULONGLONG AvailableBytes
  246. )
  247. /*++
  248. Routine Description:
  249. This routine returns the (aproximate) amount of memory available
  250. on a given node.
  251. Arguments:
  252. Node Node number for which available memory count is
  253. needed.
  254. AvailableBytes Supplies a pointer to a ULONGLONG in which the
  255. number of bytes of available memory will be
  256. returned.
  257. Return Value:
  258. TRUE is this call was successful, FALSE otherwise.
  259. --*/
  260. {
  261. NTSTATUS Status;
  262. ULONG ReturnedSize;
  263. SYSTEM_NUMA_INFORMATION Memory;
  264. //
  265. // Get the per node available memory table from the system.
  266. //
  267. Status = NtQuerySystemInformation(SystemNumaAvailableMemory,
  268. &Memory,
  269. sizeof(Memory),
  270. &ReturnedSize);
  271. if (!NT_SUCCESS(Status)) {
  272. BaseSetLastNTError(Status);
  273. return FALSE;
  274. }
  275. //
  276. // If the requested node doesn't exist, it doesn't have any
  277. // available memory either.
  278. //
  279. if (Node > Memory.HighestNodeNumber) {
  280. SetLastError(ERROR_INVALID_PARAMETER);
  281. return FALSE;
  282. }
  283. //
  284. // Return the amount of available memory on the requested node.
  285. //
  286. *AvailableBytes = Memory.AvailableMemory[Node];
  287. return TRUE;
  288. }
  289. //
  290. // NumaVirtualQueryNode
  291. //
  292. // SORT_SIZE defines the number of elements to be sorted before merging.
  293. //
  294. #define SORT_SIZE 64
  295. typedef struct {
  296. PMEMORY_WORKING_SET_BLOCK Low;
  297. PMEMORY_WORKING_SET_BLOCK Limit;
  298. } MERGELIST, *PMERGELIST;
  299. static
  300. VOID
  301. numaSortWSInfo(
  302. PMERGELIST List
  303. )
  304. {
  305. //
  306. // A simple bubble sort for small data sets.
  307. //
  308. PMEMORY_WORKING_SET_BLOCK High;
  309. PMEMORY_WORKING_SET_BLOCK Low;
  310. MEMORY_WORKING_SET_BLOCK Temp;
  311. for (Low = List->Low; Low < List->Limit; Low++) {
  312. for (High = Low + 1; High <= List->Limit; High++) {
  313. if (Low->VirtualPage > High->VirtualPage) {
  314. Temp = *High;
  315. *High = *Low;
  316. *Low = Temp;
  317. }
  318. }
  319. }
  320. }
  321. ULONGLONG
  322. WINAPI
  323. NumaVirtualQueryNode(
  324. IN ULONG NumberOfRanges,
  325. IN PULONG_PTR RangeList,
  326. OUT PULONG_PTR VirtualPageAndNode,
  327. IN SIZE_T MaximumOutputLength
  328. )
  329. /*++
  330. Routine Description:
  331. Determine the nodes for pages in the ranges described by the input
  332. RangeList.
  333. Arguments:
  334. NumberOfRanges Supplies the number of ranges in the range list.
  335. RangeList Points to a list of ULONG_PTRs which, in pairs,
  336. describe the lower and upper bounds of the pages
  337. for which node information is required.
  338. VirtualPageAndNode Points to the result buffer. The result buffer
  339. will be filled with one entry for each page that
  340. is found to fall within the ranges specified in
  341. RangeList.
  342. MaximumOutputLength Defines the maximum amount of data (in bytes) to
  343. be places in the result set.
  344. Return Value:
  345. Returns the number of entries in the result set.
  346. --*/
  347. {
  348. ULONGLONG PagesReturned = 0;
  349. PULONG_PTR Range;
  350. ULONG i;
  351. ULONG j;
  352. NTSTATUS Status;
  353. MEMORY_WORKING_SET_INFORMATION Info0;
  354. HANDLE Process = NtCurrentProcess();
  355. PMEMORY_WORKING_SET_INFORMATION Info = &Info0;
  356. SIZE_T ReturnedLength;
  357. ULONG_PTR NumberOfLists;
  358. PMEMORY_WORKING_SET_INFORMATION MergedList;
  359. PMERGELIST MergeList;
  360. PMERGELIST List;
  361. MERGELIST List0;
  362. typedef union {
  363. ULONG_PTR Raw;
  364. MEMORY_WORKING_SET_BLOCK WsBlock;
  365. } RAWWSBLOCK, *PRAWWSBLOCK;
  366. RAWWSBLOCK Result;
  367. RAWWSBLOCK MaxInterest;
  368. RAWWSBLOCK MinInterest;
  369. RAWWSBLOCK MaskLow;
  370. RAWWSBLOCK MaskHigh;
  371. SetLastError(NO_ERROR);
  372. //
  373. // Determine the max and min pages of interest.
  374. //
  375. Range = RangeList;
  376. MinInterest.Raw = (ULONG_PTR)-1;
  377. MaxInterest.Raw = 0;
  378. for (i = 0; i < NumberOfRanges; i++) {
  379. if (*Range < MinInterest.Raw) {
  380. MinInterest.Raw = *Range;
  381. }
  382. Range++;
  383. if (*Range > MaxInterest.Raw) {
  384. MaxInterest.Raw = *Range;
  385. }
  386. Range++;
  387. }
  388. //
  389. // Trim out any garbage.
  390. //
  391. Result.Raw = 0;
  392. Result.WsBlock.VirtualPage = MinInterest.WsBlock.VirtualPage;
  393. MinInterest = Result;
  394. Result.WsBlock.VirtualPage = MaxInterest.WsBlock.VirtualPage;
  395. MaxInterest = Result;
  396. if (MinInterest.Raw > MaxInterest.Raw) {
  397. return 0;
  398. }
  399. //
  400. // Ask for the working set once, with only enough space to get
  401. // the number of entries in the working set list.
  402. //
  403. Status = NtQueryVirtualMemory(Process,
  404. NULL,
  405. MemoryWorkingSetInformation,
  406. &Info0,
  407. sizeof(Info0),
  408. &ReturnedLength);
  409. if (Status != STATUS_INFO_LENGTH_MISMATCH) {
  410. BaseSetLastNTError(Status);
  411. return 0;
  412. }
  413. if (Info->NumberOfEntries == 0) {
  414. return 0;
  415. }
  416. //
  417. // Bump the entry count by some margin in case a few pages get added
  418. // before we ask again.
  419. //
  420. i = sizeof(Info0) + (Info->NumberOfEntries + 100) *
  421. sizeof(MEMORY_WORKING_SET_BLOCK);
  422. //
  423. // Get memory to read the process's working set information into.
  424. //
  425. Info = RtlAllocateHeap(RtlProcessHeap(), MAKE_TAG(TMP_TAG), i);
  426. if (!Info) {
  427. SetLastError(ERROR_NOT_ENOUGH_MEMORY);
  428. return 0;
  429. }
  430. Status = NtQueryVirtualMemory(Process,
  431. NULL,
  432. MemoryWorkingSetInformation,
  433. Info,
  434. i,
  435. &ReturnedLength);
  436. if (!NT_SUCCESS(Status)) {
  437. RtlFreeHeap(RtlProcessHeap(), 0, Info);
  438. BaseSetLastNTError(Status);
  439. return 0;
  440. }
  441. //
  442. // Make the comparisons easier. Or, more specifically, make
  443. // the comparisons ignore any page offsets in the Range info.
  444. //
  445. MaskLow.Raw = (ULONG_PTR)-1;
  446. MaskHigh.Raw = 0;
  447. //
  448. // For each entry returned, check to see if the entry is within
  449. // a requested range.
  450. //
  451. // We assume the number of working set entries will exceed the
  452. // number of ranges requested which means it is more efficient
  453. // to make one pass over the working set information.
  454. //
  455. for (i = 0; i < Info->NumberOfEntries; i++) {
  456. MaskHigh.WsBlock.VirtualPage = Info->WorkingSetInfo[i].VirtualPage;
  457. if ((MaskHigh.Raw < MinInterest.Raw) ||
  458. (MaskHigh.Raw > MaxInterest.Raw)) {
  459. //
  460. // This page is not interesting, skip it.
  461. //
  462. continue;
  463. }
  464. MaskLow.WsBlock.VirtualPage = MaskHigh.WsBlock.VirtualPage;
  465. Range = RangeList;
  466. for (j = 0; j < NumberOfRanges; j++) {
  467. if ((MaskLow.Raw >= *Range) &&
  468. (MaskHigh.Raw <= *(Range+1))) {
  469. //
  470. // Match.
  471. //
  472. // Coalesce interesting entries towards the beginning
  473. // os the WSInfo array.
  474. //
  475. Info->WorkingSetInfo[PagesReturned] = Info->WorkingSetInfo[i];
  476. PagesReturned++;
  477. break;
  478. }
  479. Range += 2;
  480. }
  481. }
  482. //
  483. // The pages of interest are now collected at the front of the
  484. // set of data returned by the system. Sort this and merge the
  485. // results into the caller's buffer.
  486. //
  487. Info->NumberOfEntries = (ULONG)PagesReturned;
  488. //
  489. // Divide the sort into a number of smaller sort lists.
  490. //
  491. NumberOfLists = (Info->NumberOfEntries / SORT_SIZE) + 1;
  492. //
  493. // Allocate memory for list management of the sorts (for the merge).
  494. //
  495. MergeList = RtlAllocateHeap(RtlProcessHeap(),
  496. MAKE_TAG(TMP_TAG),
  497. NumberOfLists * sizeof(MERGELIST));
  498. if (!MergeList) {
  499. //
  500. // Couldn't allocate memory for merge copy, do bubble sort in
  501. // place. Slow but will work.
  502. //
  503. List0.Low = &Info->WorkingSetInfo[0];
  504. List0.Limit = &Info->WorkingSetInfo[Info->NumberOfEntries - 1];
  505. numaSortWSInfo(&List0);
  506. NumberOfLists = 1;
  507. MergeList = &List0;
  508. } else {
  509. //
  510. // Sort each of the smaller lists.
  511. //
  512. List = MergeList;
  513. for (i = 0; i < Info->NumberOfEntries; i += SORT_SIZE) {
  514. ULONG_PTR j = i + SORT_SIZE - 1;
  515. if (j >= Info->NumberOfEntries) {
  516. j = Info->NumberOfEntries - 1;
  517. }
  518. List->Low = &Info->WorkingSetInfo[i];
  519. List->Limit = &Info->WorkingSetInfo[j];
  520. numaSortWSInfo(List);
  521. List++;
  522. }
  523. }
  524. //
  525. // Trim the result set to what will fit in the structure supplied
  526. // by the caller.
  527. //
  528. if ((PagesReturned * sizeof(ULONG_PTR)) > MaximumOutputLength) {
  529. PagesReturned = MaximumOutputLength / sizeof(ULONG_PTR);
  530. }
  531. //
  532. // Merge each list into the result array.
  533. //
  534. for (i = 0; i < PagesReturned; i++) {
  535. //
  536. // Look at each of the lists and choose the lowest element.
  537. //
  538. PMERGELIST NewLow = NULL;
  539. for (j = 0; j < NumberOfLists; j++) {
  540. //
  541. // If this list has been exhausted, skip it.
  542. //
  543. if (MergeList[j].Low > MergeList[j].Limit) {
  544. continue;
  545. }
  546. //
  547. // If no list has been selected as the new low, OR,
  548. // if this list has a lower element than the currently
  549. // selected low element, select it.
  550. //
  551. if ((NewLow == NULL) ||
  552. (MergeList[j].Low->VirtualPage < NewLow->Low->VirtualPage)) {
  553. NewLow = &MergeList[j];
  554. }
  555. }
  556. //
  557. // Take the selected low element and place it on the output list
  558. // then increment the low pointer for the list it was removed from.
  559. //
  560. Result.Raw = 0;
  561. Result.WsBlock.VirtualPage = NewLow->Low->VirtualPage;
  562. Result.Raw |= NewLow->Low->Node;
  563. *VirtualPageAndNode++ = Result.Raw;
  564. NewLow->Low++;
  565. }
  566. //
  567. // Free allocated memory and return the number of pages in the
  568. // result set.
  569. //
  570. if (MergeList != &List0) {
  571. RtlFreeHeap(RtlProcessHeap(), 0, MergeList);
  572. }
  573. RtlFreeHeap(RtlProcessHeap(), 0, Info);
  574. return PagesReturned;
  575. }