Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3517 lines
131 KiB

  1. /*++
  2. Copyright (c) 1997-1999 Microsoft Corporation
  3. Module Name:
  4. Mca.c
  5. Abstract:
  6. Machine Check Architecture interface
  7. Author:
  8. AlanWar
  9. Environment:
  10. Kernel mode
  11. Revision History:
  12. --*/
  13. #pragma warning(disable:4206) // translation unit empty
  14. #include "wmikmp.h"
  15. #include <mce.h>
  16. #include "hal.h"
  17. #include "ntiologc.h"
  18. #define MCA_EVENT_INSTANCE_NAME L"McaEvent"
  19. #define MCA_UNDEFINED_CPU 0xffffffff
  20. #if defined(_IA64_)
  21. #define SAL_30_ERROR_REVISION 0x0002
  22. #define HalpGetFwMceLogProcessorNumber( /* PERROR_RECORD_HEADER */ _Log ) \
  23. ((UCHAR) (_Log)->TimeStamp.Reserved )
  24. #endif
  25. #if defined(_X86_) || defined(_AMD64_)
  26. #define HalpGetFwMceLogProcessorNumber( /* PMCA_EXCEPTION */ _Log ) \
  27. ( (_Log)->ProcessorNumber )
  28. typedef MCA_EXCEPTION ERROR_LOGRECORD, *PERROR_LOGRECORD;
  29. typedef MCA_EXCEPTION ERROR_RECORD_HEADER, *PERROR_RECORD_HEADER;
  30. #endif
  31. //
  32. // Types of corrected errors that are tracked
  33. //
  34. typedef enum
  35. {
  36. SingleBitEcc,
  37. CpuCache,
  38. CpuTlb,
  39. CpuBus,
  40. CpuRegFile
  41. } MCECORRECTEDTYPE, *PMCECORRECTEDTYPE;
  42. typedef struct
  43. {
  44. LIST_ENTRY List;
  45. MCECORRECTEDTYPE Type;
  46. USHORT Counter;
  47. USHORT Flags;
  48. LARGE_INTEGER Timestamp;
  49. union
  50. {
  51. //
  52. // For SingleBitEcc type, indicates physical address of page
  53. // where error occured
  54. //
  55. PHYSICAL_ADDRESS SingleBitEccAddress;
  56. //
  57. // For Cpu* types, indicates cpu on which the error
  58. // occured
  59. //
  60. ULONG CpuId;
  61. };
  62. } MCECORRECTEDEVENT, *PMCECORRECTEDEVENT;
  63. #define CORRECTED_MCE_EVENT_BUSY 0x0001
  64. BOOLEAN WmipMceEventDelivery(
  65. IN PVOID Reserved,
  66. IN KERNEL_MCE_DELIVERY_OPERATION Operation,
  67. IN PVOID Argument2
  68. );
  69. BOOLEAN WmipMceDelivery(
  70. IN PVOID Reserved,
  71. IN KERNEL_MCE_DELIVERY_OPERATION Operation,
  72. IN PVOID Argument2
  73. );
  74. void WmipMceWorkerRoutine(
  75. IN PVOID Context // Not Used
  76. );
  77. NTSTATUS WmipGetLogFromHal(
  78. HAL_QUERY_INFORMATION_CLASS InfoClass,
  79. PVOID Token,
  80. PWNODE_SINGLE_INSTANCE *Wnode,
  81. PERROR_LOGRECORD *Mca,
  82. PULONG McaSize,
  83. ULONG MaxSize,
  84. LPGUID Guid
  85. );
  86. NTSTATUS WmipRegisterMcaHandler(
  87. ULONG Phase
  88. );
  89. NTSTATUS WmipBuildMcaCmcEvent(
  90. OUT PWNODE_SINGLE_INSTANCE Wnode,
  91. IN LPGUID EventGuid,
  92. IN PERROR_LOGRECORD McaCmcEvent,
  93. IN ULONG McaCmcSize
  94. );
  95. NTSTATUS WmipGetRawMCAInfo(
  96. OUT PUCHAR Buffer,
  97. IN OUT PULONG BufferSize
  98. );
  99. NTSTATUS WmipWriteMCAEventLogEvent(
  100. PUCHAR Event
  101. );
  102. NTSTATUS WmipSetupWaitForWbem(
  103. void
  104. );
  105. void WmipIsWbemRunningDispatch(
  106. IN PKDPC Dpc,
  107. IN PVOID DeferredContext, // Not Used
  108. IN PVOID SystemArgument1, // Not Used
  109. IN PVOID SystemArgument2 // Not Used
  110. );
  111. void WmipPollingDpcRoutine(
  112. IN PKDPC Dpc,
  113. IN PVOID DeferredContext, // MCEQUERYINFO
  114. IN PVOID SystemArgument1, // New polling interval
  115. IN PVOID SystemArgument2 // Not used
  116. );
  117. void WmipIsWbemRunningWorker(
  118. PVOID Context
  119. );
  120. BOOLEAN WmipCheckIsWbemRunning(
  121. void
  122. );
  123. void WmipProcessPrevMcaLogs(
  124. void
  125. );
  126. void WmipFreeCorrectedMCEEvent(
  127. PMCECORRECTEDEVENT Event
  128. );
  129. PMCECORRECTEDEVENT WmipAllocCorrectedMCEEvent(
  130. MCECORRECTEDTYPE Type
  131. );
  132. NTSTATUS WmipTrackCorrectedMCE(
  133. IN MCECORRECTEDTYPE Type,
  134. IN PERROR_RECORD_HEADER Record,
  135. #if defined(_IA64_)
  136. IN PERROR_SECTION_HEADER Section,
  137. #endif
  138. OUT ULONG *LogToEventlog
  139. );
  140. #ifdef ALLOC_PRAGMA
  141. #pragma alloc_text(PAGE,WmipAllocCorrectedMCEEvent)
  142. #pragma alloc_text(PAGE,WmipFreeCorrectedMCEEvent)
  143. #pragma alloc_text(PAGE,WmipTrackCorrectedMCE)
  144. #pragma alloc_text(PAGE,WmipRegisterMcaHandler)
  145. #pragma alloc_text(PAGE,WmipMceWorkerRoutine)
  146. #pragma alloc_text(PAGE,WmipGetLogFromHal)
  147. #pragma alloc_text(PAGE,WmipBuildMcaCmcEvent)
  148. #pragma alloc_text(PAGE,WmipGetRawMCAInfo)
  149. #pragma alloc_text(PAGE,WmipWriteMCAEventLogEvent)
  150. #pragma alloc_text(PAGE,WmipGenerateMCAEventlog)
  151. #pragma alloc_text(PAGE,WmipIsWbemRunningWorker)
  152. #pragma alloc_text(PAGE,WmipCheckIsWbemRunning)
  153. #pragma alloc_text(PAGE,WmipSetupWaitForWbem)
  154. #pragma alloc_text(PAGE,WmipProcessPrevMcaLogs)
  155. #endif
  156. //
  157. // Set to TRUE when the registry indicates that popups should be
  158. // disabled. HKLM\System\CurrentControlSet\Control\WMI\DisableMCAPopups
  159. //
  160. ULONG WmipDisableMCAPopups;
  161. //
  162. // Guids for the various RAW MCA/CMC/CPE events
  163. //
  164. GUID WmipMSMCAEvent_CPUErrorGuid = MSMCAEvent_CPUErrorGuid;
  165. GUID WmipMSMCAEvent_MemoryErrorGuid = MSMCAEvent_MemoryErrorGuid;
  166. GUID WmipMSMCAEvent_PCIBusErrorGuid = MSMCAEvent_PCIBusErrorGuid;
  167. GUID WmipMSMCAEvent_PCIComponentErrorGuid = MSMCAEvent_PCIComponentErrorGuid;
  168. GUID WmipMSMCAEvent_SystemEventErrorGuid = MSMCAEvent_SystemEventErrorGuid;
  169. GUID WmipMSMCAEvent_SMBIOSErrorGuid = MSMCAEvent_SMBIOSErrorGuid;
  170. GUID WmipMSMCAEvent_PlatformSpecificErrorGuid = MSMCAEvent_PlatformSpecificErrorGuid;
  171. GUID WmipMSMCAEvent_InvalidErrorGuid = MSMCAEvent_InvalidErrorGuid;
  172. GUID WmipMSMCAEvent_MemoryPageRemoved = MSMCAEvent_MemoryPageRemovedGuid;
  173. //
  174. // GUIDs for the different error sections within a MCA
  175. //
  176. #if defined(_IA64_)
  177. GUID WmipErrorProcessorGuid = ERROR_PROCESSOR_GUID;
  178. GUID WmipErrorMemoryGuid = ERROR_MEMORY_GUID;
  179. GUID WmipErrorPCIBusGuid = ERROR_PCI_BUS_GUID;
  180. GUID WmipErrorPCIComponentGuid = ERROR_PCI_COMPONENT_GUID;
  181. GUID WmipErrorSELGuid = ERROR_SYSTEM_EVENT_LOG_GUID;
  182. GUID WmipErrorSMBIOSGuid = ERROR_SMBIOS_GUID;
  183. GUID WmipErrorSpecificGuid = ERROR_PLATFORM_SPECIFIC_GUID;
  184. #endif
  185. //
  186. // Each type of MCE has a control structure that is used to determine
  187. // whether to poll or wait for an interrupt to determine when to query
  188. // for the logs. This is needed since we can get a callback from the
  189. // HAL at high IRQL to inform us that a MCE log is available.
  190. // Additionally Ke Timer used for polling will calls us at DPC level.
  191. // So in the case of an interrupt we will queue a DPC. Within the DPC
  192. // routine we will queue a work item so that we can get back to
  193. // passive level and be able to call the hal to get the logs (Can only
  194. // call hal at passive). The DPC and work item routines are common so a
  195. // MCEQUERYINFO struct is passed around so that it can operate on the
  196. // correct log type. Note that this implies that there may be multiple
  197. // work items querying the hal for different log types at the same
  198. // time. In addition this struct also contains useful log related
  199. // information including the maximum log size (as reported by the HAL),
  200. // the token that must be passed to the HAL when querying for the
  201. // logs and the HAL InfoClass to use when querying for the logs.
  202. //
  203. // PollFrequency keeps track of the number of seconds before initiating a
  204. // query. If it is 0 (HAL_CPE_DISABLED / HAL_CMC_DISABLED) then no
  205. // polling occurs and if it is -1 (HAL_CPE_INTERRUPTS_BASED /
  206. // HAL_CMC_INTERRUPTS_BASED) then no polling occurs either. There is
  207. // only one work item active for each log type and this is enforced via
  208. // ItemsOutstanding in that only whenever it transitions from 0 to 1 is
  209. // the work item queued.
  210. //
  211. #define DEFAULT_MAX_MCA_SIZE 0x1000
  212. #define DEFAULT_MAX_CMC_SIZE 0x1000
  213. #define DEFAULT_MAX_CPE_SIZE 0x1000
  214. typedef struct
  215. {
  216. HAL_QUERY_INFORMATION_CLASS InfoClass; // HAL Info class to use in MCE query
  217. ULONG PollFrequency; // Polling Frequency in seconds
  218. PVOID Token; // HAL Token to use in MCE Queries
  219. LONG ItemsOutstanding; // Number of interrupts or poll requests to process
  220. ULONG MaxSize; // Max size for log (as reported by HAL)
  221. GUID WnodeGuid; // GUID to use for the raw data event
  222. GUID SwitchToPollGuid; // GUID to use to fire event for switching to polled mode
  223. NTSTATUS SwitchToPollErrorCode; // Eventlog error code that indicates a switch to polled mode
  224. ULONG WorkerInProgress; // Set to 1 if worker routine is running
  225. KSPIN_LOCK DpcLock;
  226. KDPC DeliveryDpc; // DPC to handle delivery
  227. KTIMER PollingTimer; // KTIMER used for polling
  228. KDPC PollingDpc; // DPC to use for polling
  229. WORK_QUEUE_ITEM WorkItem; // Work item used to query for log
  230. } MCEQUERYINFO, *PMCEQUERYINFO;
  231. MCEQUERYINFO WmipMcaQueryInfo =
  232. {
  233. HalMcaLogInformation,
  234. HAL_MCA_INTERRUPTS_BASED, // Corrected MCA are delivered by interrupts
  235. NULL,
  236. 0,
  237. DEFAULT_MAX_MCA_SIZE,
  238. MSMCAInfo_RawMCAEventGuid
  239. };
  240. MCEQUERYINFO WmipCmcQueryInfo =
  241. {
  242. HalCmcLogInformation,
  243. HAL_CMC_DISABLED,
  244. NULL,
  245. 0,
  246. DEFAULT_MAX_CMC_SIZE,
  247. MSMCAInfo_RawCMCEventGuid,
  248. MSMCAEvent_SwitchToCMCPollingGuid,
  249. MCA_WARNING_CMC_THRESHOLD_EXCEEDED,
  250. 0
  251. };
  252. MCEQUERYINFO WmipCpeQueryInfo =
  253. {
  254. HalCpeLogInformation,
  255. HAL_CPE_DISABLED,
  256. NULL,
  257. 0,
  258. DEFAULT_MAX_CPE_SIZE,
  259. MSMCAInfo_RawCorrectedPlatformEventGuid,
  260. MSMCAEvent_SwitchToCPEPollingGuid,
  261. MCA_WARNING_CPE_THRESHOLD_EXCEEDED,
  262. 0
  263. };
  264. //
  265. // Used for waiting until WBEM is ready to receive events
  266. //
  267. KTIMER WmipIsWbemRunningTimer;
  268. KDPC WmipIsWbemRunningDpc;
  269. WORK_QUEUE_ITEM WmipIsWbemRunningWorkItem;
  270. LIST_ENTRY WmipWaitingMCAEvents = {&WmipWaitingMCAEvents, &WmipWaitingMCAEvents};
  271. #define WBEM_STATUS_UNKNOWN 0 // Polling process for waiting is not started
  272. #define WBEM_IS_RUNNING 1 // WBEM is currently running
  273. #define WAITING_FOR_WBEM 2 // Polling process for waiting is started
  274. UCHAR WmipIsWbemRunningFlag;
  275. #ifdef ALLOC_DATA_PRAGMA
  276. #pragma data_seg("PAGEDATA")
  277. #endif
  278. //
  279. // MCA information obtained at boot and holds the MCA that caused the
  280. // system to bugcheck on the previous boot
  281. //
  282. ULONG WmipRawMCASize;
  283. PMSMCAInfo_RawMCAData WmipRawMCA;
  284. //
  285. // Status of the MCE registration process
  286. //
  287. #define MCE_STATE_UNINIT 0
  288. #define MCE_STATE_REGISTERED 1
  289. #define MCE_STATE_RUNNING 2
  290. #define MCE_STATE_ERROR -1
  291. ULONG WmipMCEState;
  292. //
  293. // Configurable paramters for managing thresholds for eventlog
  294. // suppression and recovery action for corrected MCE
  295. //
  296. //
  297. // Interval within which multiple identical errors will be reported as
  298. // a single error to the system eventlog. Can be configured under
  299. // HKLM\System\CurrentControlSet\Control\WMI\CoalesceCorrectedErrorInterval
  300. // A value of 0 will cause no coalesce of identical errors
  301. //
  302. ULONG WmipCoalesceCorrectedErrorInterval = 5000;
  303. //
  304. // Number of single bit ecc errors that can occur in the same page
  305. // before it is attempted to map out the page. Can be configured under :
  306. // HKLM\System\CurrentControlSet\Control\WMI\SingleBitEccErrorThreshold
  307. // A value of 0 will cause no attempt to map out pages
  308. //
  309. ULONG WmipSingleBitEccErrorThreshold = 6;
  310. //
  311. // Maxiumum number of MCE events being tracked at one time. If there is
  312. // more than this limit then the oldest ones are recycled. Can be
  313. // configured under :
  314. // HKLM\System\CurrentControlSet\Control\WMI\MaxCorrectedMCEOutstanding
  315. // A value of 0 will disable tracking of corrected errors
  316. //
  317. ULONG WmipMaxCorrectedMCEOutstanding = 5;
  318. //
  319. // List of corrected MCE that are being tracked
  320. //
  321. LIST_ENTRY WmipCorrectedMCEHead = {&WmipCorrectedMCEHead, &WmipCorrectedMCEHead};
  322. ULONG WmipCorrectedMCECount;
  323. //
  324. // Counter of maximum eventlog entries generated by any source. Can be
  325. // configured under:
  326. // HKLM\System\CurrentControlSet\Control\WMI\MaxCorrectedEventlogs
  327. //
  328. ULONG WmipCorrectedEventlogCounter = 20;
  329. //
  330. // Check if WBEM is already running and if not check if we've already
  331. // kicked off the timer that will wait for wbem to start
  332. //
  333. #define WmipIsWbemRunning() ((WmipIsWbemRunningFlag == WBEM_IS_RUNNING) ? \
  334. TRUE : \
  335. FALSE)
  336. void WmipInsertQueueMCEDpc(
  337. PMCEQUERYINFO QueryInfo
  338. );
  339. NTSTATUS WmipWriteToEventlog(
  340. NTSTATUS ErrorCode,
  341. NTSTATUS FinalStatus
  342. )
  343. {
  344. PIO_ERROR_LOG_PACKET ErrLog;
  345. NTSTATUS Status;
  346. ErrLog = IoAllocateErrorLogEntry(WmipServiceDeviceObject,
  347. sizeof(IO_ERROR_LOG_PACKET));
  348. if (ErrLog != NULL) {
  349. //
  350. // Fill it in and write it out as a single string.
  351. //
  352. ErrLog->ErrorCode = ErrorCode;
  353. ErrLog->FinalStatus = FinalStatus;
  354. ErrLog->StringOffset = 0;
  355. ErrLog->NumberOfStrings = 0;
  356. IoWriteErrorLogEntry(ErrLog);
  357. Status = STATUS_SUCCESS;
  358. } else {
  359. Status = STATUS_INSUFFICIENT_RESOURCES;
  360. }
  361. return(Status);
  362. }
  363. NTSTATUS WmipFireOffWmiEvent(
  364. LPGUID Guid,
  365. ULONG DataSize,
  366. PVOID DataPtr
  367. )
  368. {
  369. PVOID Ptr;
  370. PWNODE_SINGLE_INSTANCE Wnode;
  371. PWCHAR Wptr;
  372. ULONG RoundedDataSize;
  373. NTSTATUS Status;
  374. RoundedDataSize = (DataSize + 1) & ~1;
  375. Wnode = ExAllocatePoolWithTag(NonPagedPool,
  376. sizeof(WNODE_SINGLE_INSTANCE) +
  377. RoundedDataSize +
  378. sizeof(USHORT) +
  379. sizeof(MCA_EVENT_INSTANCE_NAME),
  380. WmipMCAPoolTag);
  381. if (Wnode != NULL)
  382. {
  383. Wnode->WnodeHeader.BufferSize = sizeof(WNODE_SINGLE_INSTANCE) +
  384. sizeof(USHORT) +
  385. RoundedDataSize +
  386. sizeof(MCA_EVENT_INSTANCE_NAME);
  387. Wnode->WnodeHeader.Guid = *Guid;
  388. Wnode->WnodeHeader.Flags = WNODE_FLAG_SINGLE_INSTANCE |
  389. WNODE_FLAG_EVENT_ITEM;
  390. KeQuerySystemTime(&Wnode->WnodeHeader.TimeStamp);
  391. Wnode->DataBlockOffset = sizeof(WNODE_SINGLE_INSTANCE);
  392. Wnode->SizeDataBlock = DataSize;
  393. if (DataPtr != NULL)
  394. {
  395. Ptr = OffsetToPtr(Wnode, Wnode->DataBlockOffset);
  396. memcpy(Ptr, DataPtr, DataSize);
  397. }
  398. Wnode->OffsetInstanceName = sizeof(WNODE_SINGLE_INSTANCE) + RoundedDataSize;
  399. Wptr = (PWCHAR)OffsetToPtr(Wnode, Wnode->OffsetInstanceName);
  400. *Wptr++ = sizeof(MCA_EVENT_INSTANCE_NAME);
  401. RtlCopyMemory(Wptr,
  402. MCA_EVENT_INSTANCE_NAME,
  403. sizeof(MCA_EVENT_INSTANCE_NAME));
  404. Status = IoWMIWriteEvent(Wnode);
  405. if (! NT_SUCCESS(Status))
  406. {
  407. ExFreePool(Wnode);
  408. }
  409. }
  410. else {
  411. Status = STATUS_INSUFFICIENT_RESOURCES;
  412. }
  413. return(Status);
  414. }
  415. NTSTATUS WmipBuildMcaCmcEvent(
  416. OUT PWNODE_SINGLE_INSTANCE Wnode,
  417. IN LPGUID EventGuid,
  418. IN PERROR_LOGRECORD McaCmcEvent,
  419. IN ULONG McaCmcSize
  420. )
  421. /*++
  422. Routine Description:
  423. This routine will take a MCA or CMC log and build a
  424. WNODE_EVENT_ITEM for it.
  425. This routine may be called at DPC
  426. Arguments:
  427. Wnode is the wnode buffer in which to build the event
  428. EventGuid is the guid to use in the event wnode
  429. McaCmcEvent is the MCA, CMC or CPE data payload to put into the
  430. event
  431. McaCmcSize is the size of the event data
  432. Return Value:
  433. NT status code
  434. --*/
  435. {
  436. PMSMCAInfo_RawCMCEvent Ptr;
  437. ULONG Size;
  438. PAGED_CODE();
  439. Size = McaCmcSize + FIELD_OFFSET(MSMCAInfo_RawCMCEvent,
  440. Records) +
  441. FIELD_OFFSET(MSMCAInfo_Entry, Data);
  442. RtlZeroMemory(Wnode, sizeof(WNODE_SINGLE_INSTANCE));
  443. Wnode->WnodeHeader.BufferSize = Size + sizeof(WNODE_SINGLE_INSTANCE);
  444. Wnode->WnodeHeader.ProviderId = IoWMIDeviceObjectToProviderId(WmipServiceDeviceObject);
  445. KeQuerySystemTime(&Wnode->WnodeHeader.TimeStamp);
  446. Wnode->WnodeHeader.Guid = *EventGuid;
  447. Wnode->WnodeHeader.Flags = WNODE_FLAG_SINGLE_INSTANCE |
  448. WNODE_FLAG_EVENT_ITEM |
  449. WNODE_FLAG_STATIC_INSTANCE_NAMES;
  450. Wnode->DataBlockOffset = FIELD_OFFSET(WNODE_SINGLE_INSTANCE,
  451. VariableData);
  452. Wnode->SizeDataBlock = Size;
  453. Ptr = (PMSMCAInfo_RawCMCEvent)&Wnode->VariableData;
  454. Ptr->Count = 1; // 1 Record in this event
  455. Ptr->Records[0].Length = McaCmcSize; // Size of log record in bytes
  456. if (McaCmcEvent != NULL)
  457. {
  458. RtlCopyMemory(Ptr->Records[0].Data, McaCmcEvent, McaCmcSize);
  459. }
  460. return(STATUS_SUCCESS);
  461. }
  462. NTSTATUS WmipQueryLogAndFireEvent(
  463. PMCEQUERYINFO QueryInfo
  464. )
  465. /*++
  466. Routine Description:
  467. Utility routine that will query the hal for a log and then if one
  468. is returned successfully then will fire the appropriate WMI events
  469. Arguments:
  470. QueryInfo is a pointer to the MCEQUERYINFO for the type of log that
  471. needs to be queried.
  472. Return Value:
  473. --*/
  474. {
  475. PWNODE_SINGLE_INSTANCE Wnode;
  476. NTSTATUS Status, Status2;
  477. ULONG Size;
  478. PERROR_LOGRECORD Log;
  479. PAGED_CODE();
  480. //
  481. // Call HAL to get the log
  482. //
  483. Status = WmipGetLogFromHal(QueryInfo->InfoClass,
  484. QueryInfo->Token,
  485. &Wnode,
  486. &Log,
  487. &Size,
  488. QueryInfo->MaxSize,
  489. &QueryInfo->WnodeGuid);
  490. if (NT_SUCCESS(Status))
  491. {
  492. //
  493. // Look at the event and fire it off as WMI events that
  494. // will generate eventlog events
  495. //
  496. WmipGenerateMCAEventlog((PUCHAR)Log,
  497. Size,
  498. FALSE);
  499. //
  500. // Fire the log off as a WMI event
  501. //
  502. Status2 = IoWMIWriteEvent(Wnode);
  503. if (! NT_SUCCESS(Status2))
  504. {
  505. //
  506. // IoWMIWriteEvent will free the wnode back to pool,
  507. // but not if it fails
  508. //
  509. ExFreePool(Wnode);
  510. }
  511. WmipDebugPrintEx((DPFLTR_WMICORE_ID,
  512. DPFLTR_MCA_LEVEL,
  513. "WMI: MCE Event fired to WMI -> %x\n",
  514. Status));
  515. } else {
  516. WmipDebugPrintEx((DPFLTR_WMICORE_ID,
  517. DPFLTR_MCA_LEVEL,
  518. "WMI: MCE Event for %p not available %x\n",
  519. QueryInfo, Status));
  520. }
  521. return(Status);
  522. }
  523. void WmipMceWorkerRoutine(
  524. IN PVOID Context // MCEQUERYINFO
  525. )
  526. /*++
  527. Routine Description:
  528. Worker routine that handles polling for corrected MCA, CMC and CPE
  529. logs from the HAL and then firing them as WMI events.
  530. Arguments:
  531. Context is a pointer to the MCEQUERYINFO for the type of log that
  532. needs to be queried.
  533. Return Value:
  534. --*/
  535. {
  536. PMCEQUERYINFO QueryInfo = (PMCEQUERYINFO)Context;
  537. NTSTATUS Status;
  538. ULONG i;
  539. LONG x, Count;
  540. PAGED_CODE();
  541. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  542. "WMI: WmipMceWorkerRoutine %p enter\n",
  543. QueryInfo));
  544. //
  545. // If the worker is already in progress then we just exit
  546. //
  547. WmipEnterSMCritSection();
  548. if (QueryInfo->WorkerInProgress == 0)
  549. {
  550. QueryInfo->WorkerInProgress = 1;
  551. WmipLeaveSMCritSection();
  552. } else {
  553. WmipLeaveSMCritSection();
  554. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  555. "WMI: WmipMceWorkerRoutine %p in progress\n",
  556. QueryInfo));
  557. return;
  558. }
  559. //
  560. // Check to see if access has already been disabled
  561. //
  562. if (QueryInfo->PollFrequency != HAL_MCE_DISABLED)
  563. {
  564. //
  565. // We get all of the records by calling into the hal and querying
  566. // for the logs until the hal returns an error or we've
  567. // retrieved 256 records. We want to protect ourselves from the
  568. // case where a repeated corrected error would cause the loop
  569. // to be infinite.
  570. //
  571. i = 0;
  572. do
  573. {
  574. //
  575. // Remember how many corrected errors we have received up until
  576. // this point. We guarantee that we've handled them up
  577. // until this point
  578. //
  579. Count = QueryInfo->ItemsOutstanding;
  580. Status = WmipQueryLogAndFireEvent(QueryInfo);
  581. } while ((NT_SUCCESS(Status) && (i++ < 256)));
  582. //
  583. // Reset counter back to 0, but check if any errors
  584. // had occured while we were processing. If so we go
  585. // back and make sure they are handled. Note that this
  586. // could cause a new worker thread to be created while we
  587. // are still processing these, but that is ok since we only
  588. // allow one worker thread to run at one time.
  589. //
  590. WmipEnterSMCritSection();
  591. x = InterlockedExchange(&QueryInfo->ItemsOutstanding,
  592. 0);
  593. if ((x > Count) && (i < 257))
  594. {
  595. //
  596. // Since there are still more corrected errors to
  597. // process, queue a new DPC to cause a new worker
  598. // routine to be run.
  599. //
  600. WmipInsertQueueMCEDpc(QueryInfo);
  601. }
  602. QueryInfo->WorkerInProgress = 0;
  603. WmipLeaveSMCritSection();
  604. }
  605. }
  606. void WmipMceDispatchRoutine(
  607. PMCEQUERYINFO QueryInfo
  608. )
  609. {
  610. ULONG x;
  611. //
  612. // Increment the number of items that are outstanding for this info
  613. // class. If the number of items outstanding transitions from 0 to
  614. // 1 then this implies that a work item for this info class needs
  615. // to be queued
  616. //
  617. x = InterlockedIncrement(&QueryInfo->ItemsOutstanding);
  618. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  619. "WMI: WmipMceDispatchRoutine %p transition to %d\n",
  620. QueryInfo,
  621. x));
  622. if (x == 1)
  623. {
  624. ExQueueWorkItem(&QueryInfo->WorkItem,
  625. DelayedWorkQueue);
  626. }
  627. }
  628. void WmipMceDpcRoutine(
  629. IN PKDPC Dpc,
  630. IN PVOID DeferredContext, // Not Used
  631. IN PVOID SystemArgument1, // MCEQUERYINFO
  632. IN PVOID SystemArgument2 // Not used
  633. )
  634. {
  635. UNREFERENCED_PARAMETER (Dpc);
  636. UNREFERENCED_PARAMETER (DeferredContext);
  637. UNREFERENCED_PARAMETER (SystemArgument2);
  638. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  639. "WMI: WmipMceDpcRoutine %p Enter\n",
  640. SystemArgument1));
  641. WmipMceDispatchRoutine((PMCEQUERYINFO)SystemArgument1);
  642. }
  643. void WmipPollingDpcRoutine(
  644. IN PKDPC Dpc,
  645. IN PVOID DeferredContext, // MCEQUERYINFO
  646. IN PVOID SystemArgument1, // New polling Interval
  647. IN PVOID SystemArgument2 // Not used
  648. )
  649. {
  650. PMCEQUERYINFO QueryInfo = (PMCEQUERYINFO)DeferredContext;
  651. LARGE_INTEGER li;
  652. ULONG PollingInterval = PtrToUlong(SystemArgument1);
  653. UNREFERENCED_PARAMETER (Dpc);
  654. UNREFERENCED_PARAMETER (SystemArgument2);
  655. if (QueryInfo->PollFrequency == HAL_MCE_INTERRUPTS_BASED)
  656. {
  657. //
  658. // HAL has instructed us to switch into polled mode and has
  659. // informed us of the new polling interval.
  660. //
  661. QueryInfo->PollFrequency = PollingInterval;
  662. li.QuadPart = -1 * (QueryInfo->PollFrequency * 1000000000);
  663. KeSetTimerEx(&QueryInfo->PollingTimer,
  664. li,
  665. QueryInfo->PollFrequency * 1000,
  666. &QueryInfo->PollingDpc);
  667. //
  668. // Make a note in the eventlog that this has occured.
  669. //
  670. WmipWriteToEventlog(QueryInfo->SwitchToPollErrorCode,
  671. STATUS_SUCCESS
  672. );
  673. //
  674. // Inform any WMI consumers that the switch has occured
  675. //
  676. WmipFireOffWmiEvent(&QueryInfo->SwitchToPollGuid,
  677. 0,
  678. NULL);
  679. } else {
  680. //
  681. // Our timer fired so we need to poll
  682. //
  683. WmipMceDispatchRoutine(QueryInfo);
  684. }
  685. }
  686. BOOLEAN WmipMceDelivery(
  687. IN PVOID Reserved,
  688. IN KERNEL_MCE_DELIVERY_OPERATION Operation,
  689. IN PVOID Argument2
  690. )
  691. /*++
  692. Routine Description:
  693. This routine is called by the HAL when a CMC or CPE occurs. It is called
  694. at high irql
  695. Arguments:
  696. Operation is the operation that the HAL is instructing us to do
  697. Reserved is the CMC token
  698. Parameter for operation specified.
  699. For CmcSwitchToPolledMode and CpeSwitchToPolledMode, Parameter
  700. specifies the number of seconds to between polling.
  701. Return Value:
  702. TRUE to indicate that we handled the delivery
  703. --*/
  704. {
  705. PMCEQUERYINFO QueryInfo;
  706. BOOLEAN ret;
  707. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  708. "WMI: MceDelivery Operation %d(%p)\n",
  709. Operation, Argument2));
  710. //
  711. // First figure out which type of MCE we are dealing with
  712. //
  713. switch (Operation)
  714. {
  715. case CmcAvailable:
  716. case CmcSwitchToPolledMode:
  717. {
  718. QueryInfo = &WmipCmcQueryInfo;
  719. break;
  720. }
  721. case CpeAvailable:
  722. case CpeSwitchToPolledMode:
  723. {
  724. QueryInfo = &WmipCpeQueryInfo;
  725. break;
  726. }
  727. case McaAvailable:
  728. {
  729. QueryInfo = &WmipMcaQueryInfo;
  730. break;
  731. }
  732. default:
  733. {
  734. WmipAssert(FALSE);
  735. return(FALSE);
  736. }
  737. }
  738. //
  739. // Next determine what action to perform
  740. //
  741. switch (Operation)
  742. {
  743. case CmcAvailable:
  744. case CpeAvailable:
  745. case McaAvailable:
  746. {
  747. //
  748. // Store the HAL token which is needed to retrieve the logs from
  749. // the hal
  750. //
  751. QueryInfo->Token = Reserved;
  752. //
  753. // If we are ready to handle the logs and we are dealing with thse
  754. // logs on an interrupt basis, then go ahead and queue a DPC to handle
  755. // processing the log
  756. //
  757. if ((WmipMCEState == MCE_STATE_RUNNING) &&
  758. (QueryInfo->PollFrequency == HAL_MCE_INTERRUPTS_BASED))
  759. {
  760. KeAcquireSpinLockAtDpcLevel(&QueryInfo->DpcLock);
  761. KeInsertQueueDpc(&QueryInfo->DeliveryDpc,
  762. QueryInfo,
  763. NULL);
  764. KeReleaseSpinLockFromDpcLevel(&QueryInfo->DpcLock);
  765. ret = TRUE;
  766. } else {
  767. ret = FALSE;
  768. }
  769. break;
  770. }
  771. case CmcSwitchToPolledMode:
  772. case CpeSwitchToPolledMode:
  773. {
  774. KeInsertQueueDpc(&QueryInfo->PollingDpc,
  775. Argument2,
  776. NULL);
  777. ret = TRUE;
  778. break;
  779. }
  780. default:
  781. {
  782. ret = FALSE;
  783. break;
  784. }
  785. }
  786. return(ret);
  787. }
  788. BOOLEAN WmipMceEventDelivery(
  789. IN PVOID Reserved,
  790. IN KERNEL_MCE_DELIVERY_OPERATION Operation,
  791. IN PVOID Argument2
  792. )
  793. /*++
  794. Routine Description:
  795. This routine is called by the HAL when a situation occurs between
  796. the HAL and SAL interface. It is called at high irql
  797. Arguments:
  798. Reserved has the Operation and EventType
  799. Argument2 has the SAL return code
  800. Return Value:
  801. --*/
  802. {
  803. USHORT MceOperation;
  804. LONGLONG SalStatus;
  805. ULONG MceType;
  806. PMCEQUERYINFO QueryInfo;
  807. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  808. "WMI: MCEDelivery %p %d %p\n",
  809. Reserved,
  810. Operation,
  811. Argument2
  812. ));
  813. MceOperation = KERNEL_MCE_OPERATION(Reserved);
  814. MceType = KERNEL_MCE_EVENTTYPE(Reserved);
  815. SalStatus = (LONGLONG)Argument2;
  816. //
  817. // If the hal is notifying us that a GetStateInfo failed with
  818. // SalStatus == -15 then we need to retry our query later
  819. //
  820. if ((MceOperation == KERNEL_MCE_OPERATION_GET_STATE_INFO) &&
  821. (Operation == MceNotification) &&
  822. (SalStatus == (LONGLONG)-15))
  823. {
  824. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  825. "WMI: Sal is asking us to retry getstateinfo for type %x\n",
  826. MceType));
  827. switch(MceType)
  828. {
  829. case KERNEL_MCE_EVENTTYPE_CMC:
  830. {
  831. QueryInfo = &WmipCmcQueryInfo;
  832. break;
  833. }
  834. case KERNEL_MCE_EVENTTYPE_CPE:
  835. {
  836. QueryInfo = &WmipCpeQueryInfo;
  837. break;
  838. }
  839. default:
  840. {
  841. QueryInfo = NULL;
  842. }
  843. }
  844. if (QueryInfo != NULL)
  845. {
  846. //
  847. // If CMC or CPE are interrupt based then queue up a new
  848. // DPC for performing the query. If polling based then
  849. // there are no worries, we just wait for the next polling
  850. // interval.
  851. //
  852. if ((WmipMCEState == MCE_STATE_RUNNING) &&
  853. (QueryInfo->PollFrequency == HAL_MCE_INTERRUPTS_BASED))
  854. {
  855. KeAcquireSpinLockAtDpcLevel(&QueryInfo->DpcLock);
  856. KeInsertQueueDpc(&QueryInfo->DeliveryDpc,
  857. QueryInfo,
  858. NULL);
  859. KeReleaseSpinLockFromDpcLevel(&QueryInfo->DpcLock);
  860. }
  861. }
  862. }
  863. return(FALSE);
  864. }
  865. void WmipProcessPrevMcaLogs(
  866. void
  867. )
  868. /*++
  869. Routine Description:
  870. This routine will flush out any of the previous MCA logs and then
  871. hang onto them for WMI to report.
  872. Arguments:
  873. Return Value:
  874. --*/
  875. {
  876. NTSTATUS status;
  877. PERROR_LOGRECORD log;
  878. PMSMCAInfo_RawMCAEvent event;
  879. ULONG size;
  880. PWNODE_SINGLE_INSTANCE wnode;
  881. LIST_ENTRY list;
  882. ULONG prevLogCount;
  883. PMSMCAInfo_Entry record;
  884. ULONG sizeNeeded;
  885. PAGED_CODE();
  886. InitializeListHead(&list);
  887. sizeNeeded = sizeof(ULONG); // Need space for count of records
  888. prevLogCount = 0;
  889. do
  890. {
  891. //
  892. // Read a MCA log out of the HAL
  893. //
  894. status = WmipGetLogFromHal(HalMcaLogInformation,
  895. WmipMcaQueryInfo.Token,
  896. &wnode,
  897. &log,
  898. &size,
  899. WmipMcaQueryInfo.MaxSize,
  900. &WmipMcaQueryInfo.WnodeGuid);
  901. if (NT_SUCCESS(status))
  902. {
  903. //
  904. // Previous logs have a ErrorSeverity of Fatal since they
  905. // were fatal and brought down the system in last boot.
  906. // keep track of how much memory we will need
  907. //
  908. prevLogCount++;
  909. // Need space for record length and
  910. // record padded to DWORD
  911. sizeNeeded += sizeof(ULONG) + ((size +3)&~3);
  912. InsertTailList(&list, (PLIST_ENTRY)wnode);
  913. WmipGenerateMCAEventlog((PUCHAR)log,
  914. size,
  915. TRUE);
  916. }
  917. } while (NT_SUCCESS(status));
  918. if (! IsListEmpty(&list))
  919. {
  920. //
  921. // We have collected a set of previous logs, so we need to
  922. // build the buffer containing the aggregation of those logs.
  923. // The buffer will correspond to the entire MOF structure for
  924. // the MSMCAInfo_RawMCAData class
  925. //
  926. WmipRawMCA = (PMSMCAInfo_RawMCAData)ExAllocatePoolWithTag(PagedPool,
  927. sizeNeeded,
  928. WmipMCAPoolTag);
  929. //
  930. // Fill in the count of logs that follow
  931. //
  932. if (WmipRawMCA != NULL)
  933. {
  934. WmipRawMCA->Count = prevLogCount;
  935. }
  936. //
  937. // Loop over all previous logs
  938. //
  939. WmipRawMCASize = sizeNeeded;
  940. record = &WmipRawMCA->Records[0];
  941. while (! IsListEmpty(&list))
  942. {
  943. wnode = (PWNODE_SINGLE_INSTANCE)RemoveHeadList(&list);
  944. if (WmipRawMCA != NULL)
  945. {
  946. //
  947. // Get the log back from within the wnode
  948. //
  949. event = (PMSMCAInfo_RawMCAEvent)OffsetToPtr(wnode, wnode->DataBlockOffset);
  950. //
  951. // Copy the log data into our buffer. Note that we
  952. // assume there will only be 1 record within the event
  953. //
  954. size = event->Records[0].Length;
  955. record->Length = size;
  956. RtlCopyMemory(&record->Data[0], &event->Records[0].Data[0], size);
  957. size = FIELD_OFFSET(MSMCAInfo_Entry, Data) + (size +3)&~3;
  958. record = (PMSMCAInfo_Entry)((PUCHAR)record + size);
  959. }
  960. ExFreePool(wnode);
  961. }
  962. }
  963. }
  964. //#define TEST_EARLY_CPE
  965. #ifdef TEST_EARLY_CPE
  966. void WmipTestEarlyCPE(
  967. void
  968. )
  969. {
  970. //
  971. // Test code to generate a previous MCA without having
  972. // had generate one previously
  973. //
  974. PERROR_SMBIOS s;
  975. UCHAR Buffer[0x400];
  976. PERROR_RECORD_HEADER rh;
  977. PERROR_SECTION_HEADER sh;
  978. #define ERROR_SMBIOS_GUID \
  979. { 0xe429faf5, 0x3cb7, 0x11d4, { 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81 }}
  980. ERROR_DEVICE_GUID ErrorSmbiosGuid = ERROR_SMBIOS_GUID;
  981. rh = (PERROR_RECORD_HEADER)Buffer;
  982. rh->Id = 0x12345678;
  983. rh->Revision.Revision = 0x0200;
  984. rh->Valid.Valid = 0;
  985. rh->TimeStamp.TimeStamp = 0x2001031900165323;
  986. sh = (PERROR_SECTION_HEADER)((PUCHAR)rh + sizeof(ERROR_RECORD_HEADER));
  987. memset(sh, 0, sizeof(Buffer));
  988. sh->Revision.Revision = 0x0200;
  989. sh->RecoveryInfo.RecoveryInfo = 0;
  990. sh->Length = sizeof(ERROR_SMBIOS);
  991. sh->Guid = ErrorSmbiosGuid;
  992. s = (PERROR_SMBIOS)sh;
  993. s->Valid.Valid = 0;
  994. s->Valid.EventType = 1;
  995. s->EventType = 0xa0;
  996. rh->Length = sizeof(ERROR_RECORD_HEADER) + sh->Length;
  997. HalSetSystemInformation(HalCpeLog,
  998. rh->Length,
  999. rh);
  1000. }
  1001. #endif
  1002. void WmipInsertQueueMCEDpc(
  1003. PMCEQUERYINFO QueryInfo
  1004. )
  1005. {
  1006. KIRQL OldIrql;
  1007. KeAcquireSpinLock(&QueryInfo->DpcLock,
  1008. &OldIrql);
  1009. KeInsertQueueDpc(&QueryInfo->DeliveryDpc,
  1010. QueryInfo,
  1011. NULL);
  1012. KeReleaseSpinLock(&QueryInfo->DpcLock,
  1013. OldIrql);
  1014. }
  1015. NTSTATUS WmipRegisterMcaHandler(
  1016. ULONG Phase
  1017. )
  1018. /*++
  1019. Routine Description:
  1020. This routine will register a kernel MCA and CMC handler with the
  1021. hal
  1022. Arguments:
  1023. Return Value:
  1024. NT status code
  1025. --*/
  1026. {
  1027. KERNEL_ERROR_HANDLER_INFO KernelMcaHandlerInfo;
  1028. NTSTATUS Status;
  1029. HAL_ERROR_INFO HalErrorInfo;
  1030. ULONG ReturnSize;
  1031. LARGE_INTEGER li;
  1032. PAGED_CODE();
  1033. if (Phase == 0)
  1034. {
  1035. //
  1036. // Phase 0 initialization is done before device drivers are
  1037. // loaded so that the kernel can register its kernel error
  1038. // handler before any driver gets a chance to do so.
  1039. //
  1040. //
  1041. // Validate registry values
  1042. //
  1043. if (WmipCorrectedEventlogCounter == 0)
  1044. {
  1045. //
  1046. // set corrected eventlog counter to -1 to indicate that no
  1047. // eventlog suppression should occur
  1048. //
  1049. WmipCorrectedEventlogCounter = 0xffffffff;
  1050. }
  1051. //
  1052. // Get the size of the logs and any polling/interrupt policies
  1053. //
  1054. HalErrorInfo.Version = HAL_ERROR_INFO_VERSION;
  1055. Status = HalQuerySystemInformation(HalErrorInformation,
  1056. sizeof(HAL_ERROR_INFO),
  1057. &HalErrorInfo,
  1058. &ReturnSize);
  1059. if ((NT_SUCCESS(Status)) &&
  1060. (ReturnSize >= sizeof(HAL_ERROR_INFO)))
  1061. {
  1062. //
  1063. // Initialize MCA QueryInfo structure
  1064. //
  1065. if (HalErrorInfo.McaMaxSize != 0)
  1066. {
  1067. WmipMcaQueryInfo.MaxSize = HalErrorInfo.McaMaxSize;
  1068. }
  1069. WmipMcaQueryInfo.Token = (PVOID)(ULONG_PTR) HalErrorInfo.McaKernelToken;
  1070. //
  1071. // Initialize DPC and Workitem for processing
  1072. //
  1073. KeInitializeDpc(&WmipMcaQueryInfo.DeliveryDpc,
  1074. WmipMceDpcRoutine,
  1075. NULL);
  1076. KeInitializeDpc(&WmipMcaQueryInfo.PollingDpc,
  1077. WmipPollingDpcRoutine,
  1078. &WmipMcaQueryInfo);
  1079. ExInitializeWorkItem(&WmipMcaQueryInfo.WorkItem,
  1080. WmipMceWorkerRoutine,
  1081. &WmipMcaQueryInfo);
  1082. //
  1083. // Initialize CMC QueryInfo structure
  1084. //
  1085. if (HalErrorInfo.CmcMaxSize != 0)
  1086. {
  1087. WmipCmcQueryInfo.MaxSize = HalErrorInfo.CmcMaxSize;
  1088. }
  1089. WmipCmcQueryInfo.PollFrequency = HalErrorInfo.CmcPollingInterval;
  1090. WmipCmcQueryInfo.Token = (PVOID)(ULONG_PTR) HalErrorInfo.CmcKernelToken;
  1091. //
  1092. // Initialize DPC and Workitem for processing
  1093. //
  1094. KeInitializeSpinLock(&WmipCmcQueryInfo.DpcLock);
  1095. KeInitializeDpc(&WmipCmcQueryInfo.DeliveryDpc,
  1096. WmipMceDpcRoutine,
  1097. NULL);
  1098. KeInitializeDpc(&WmipCmcQueryInfo.PollingDpc,
  1099. WmipPollingDpcRoutine,
  1100. &WmipCmcQueryInfo);
  1101. ExInitializeWorkItem(&WmipCmcQueryInfo.WorkItem,
  1102. WmipMceWorkerRoutine,
  1103. &WmipCmcQueryInfo);
  1104. KeInitializeTimerEx(&WmipCmcQueryInfo.PollingTimer,
  1105. NotificationTimer);
  1106. //
  1107. // Initialize CPE QueryInfo structure
  1108. //
  1109. if (HalErrorInfo.CpeMaxSize != 0)
  1110. {
  1111. WmipCpeQueryInfo.MaxSize = HalErrorInfo.CpeMaxSize;
  1112. }
  1113. WmipCpeQueryInfo.PollFrequency = HalErrorInfo.CpePollingInterval;
  1114. WmipCpeQueryInfo.Token = (PVOID)(ULONG_PTR) HalErrorInfo.CpeKernelToken;
  1115. //
  1116. // Initialize DPC and Workitem for processing
  1117. //
  1118. KeInitializeSpinLock(&WmipCpeQueryInfo.DpcLock);
  1119. KeInitializeDpc(&WmipCpeQueryInfo.DeliveryDpc,
  1120. WmipMceDpcRoutine,
  1121. NULL);
  1122. KeInitializeDpc(&WmipCpeQueryInfo.PollingDpc,
  1123. WmipPollingDpcRoutine,
  1124. &WmipCpeQueryInfo);
  1125. ExInitializeWorkItem(&WmipCpeQueryInfo.WorkItem,
  1126. WmipMceWorkerRoutine,
  1127. &WmipCpeQueryInfo);
  1128. KeInitializeTimerEx(&WmipCpeQueryInfo.PollingTimer,
  1129. NotificationTimer);
  1130. //
  1131. // Register our CMC and MCA callbacks. And if interrupt driven CPE
  1132. // callbacks are enabled register them too
  1133. //
  1134. KernelMcaHandlerInfo.Version = KERNEL_ERROR_HANDLER_VERSION;
  1135. KernelMcaHandlerInfo.KernelMcaDelivery = WmipMceDelivery;
  1136. KernelMcaHandlerInfo.KernelCmcDelivery = WmipMceDelivery;
  1137. KernelMcaHandlerInfo.KernelCpeDelivery = WmipMceDelivery;
  1138. KernelMcaHandlerInfo.KernelMceDelivery = WmipMceEventDelivery;
  1139. Status = HalSetSystemInformation(HalKernelErrorHandler,
  1140. sizeof(KERNEL_ERROR_HANDLER_INFO),
  1141. &KernelMcaHandlerInfo);
  1142. if (NT_SUCCESS(Status))
  1143. {
  1144. WmipMCEState = MCE_STATE_REGISTERED;
  1145. #ifdef TEST_EARLY_CPE
  1146. WmipTestEarlyCPE();
  1147. #endif
  1148. } else {
  1149. WmipMCEState = (ULONG) MCE_STATE_ERROR;
  1150. WmipDebugPrintEx((DPFLTR_WMICORE_ID,
  1151. DPFLTR_MCA_LEVEL | DPFLTR_ERROR_LEVEL,
  1152. "WMI: Error %x registering MCA error handlers\n",
  1153. Status));
  1154. }
  1155. }
  1156. } else if (WmipMCEState != MCE_STATE_ERROR) {
  1157. //
  1158. // Phase 1 initialization is done after all of the boot drivers
  1159. // have loaded and have had a chance to register for WMI event
  1160. // notifications. At this point it is safe to go ahead and send
  1161. // wmi events for MCA, CMC, CPE, etc
  1162. //
  1163. // If there were any MCA logs generated prior to boot then get
  1164. // them out of the HAL and process them. Do this before
  1165. // starting any polling since the SAL likes to have the
  1166. // previous MCA records removed before being polled for CPE and
  1167. // CMC
  1168. //
  1169. #if 0
  1170. // DEBUG
  1171. //
  1172. // Test code to generate a previous MCA without having
  1173. // had generate one previously
  1174. //
  1175. {
  1176. PERROR_SMBIOS s;
  1177. UCHAR Buffer[0x400];
  1178. PERROR_RECORD_HEADER rh;
  1179. PERROR_SECTION_HEADER sh;
  1180. #define ERROR_SMBIOS_GUID \
  1181. { 0xe429faf5, 0x3cb7, 0x11d4, { 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81 }}
  1182. ERROR_DEVICE_GUID ErrorSmbiosGuid = ERROR_SMBIOS_GUID;
  1183. rh = (PERROR_RECORD_HEADER)Buffer;
  1184. rh->Id = 0x12345678;
  1185. rh->Revision.Revision = 0x0200;
  1186. rh->Valid.Valid = 0;
  1187. rh->TimeStamp.TimeStamp = 0x2001031900165323;
  1188. sh = (PERROR_SECTION_HEADER)((PUCHAR)rh + sizeof(ERROR_RECORD_HEADER));
  1189. memset(sh, 0, sizeof(Buffer));
  1190. sh->Revision.Revision = 0x0200;
  1191. sh->RecoveryInfo.RecoveryInfo = 0;
  1192. sh->Length = sizeof(ERROR_SMBIOS);
  1193. sh->Guid = ErrorSmbiosGuid;
  1194. s = (PERROR_SMBIOS)sh;
  1195. s->Valid.Valid = 0;
  1196. s->Valid.EventType = 1;
  1197. s->EventType = 0xa0;
  1198. rh->Length = sizeof(ERROR_RECORD_HEADER) + sh->Length;
  1199. WmipGenerateMCAEventlog(Buffer,
  1200. rh->Length,
  1201. TRUE);
  1202. }
  1203. // DEBUG
  1204. #endif
  1205. HalErrorInfo.Version = HAL_ERROR_INFO_VERSION;
  1206. Status = HalQuerySystemInformation(HalErrorInformation,
  1207. sizeof(HAL_ERROR_INFO),
  1208. &HalErrorInfo,
  1209. &ReturnSize);
  1210. if ((NT_SUCCESS(Status)) &&
  1211. (ReturnSize >= sizeof(HAL_ERROR_INFO)))
  1212. {
  1213. if (HalErrorInfo.McaPreviousEventsCount != 0)
  1214. {
  1215. //
  1216. // We need to flush out any previous MCA logs and then
  1217. // make them available via WMI
  1218. //
  1219. WmipProcessPrevMcaLogs();
  1220. }
  1221. }
  1222. //
  1223. // Establish polling timer for CMC, if needed
  1224. //
  1225. if ((WmipCmcQueryInfo.PollFrequency != HAL_CMC_DISABLED) &&
  1226. (WmipCmcQueryInfo.PollFrequency != HAL_CMC_INTERRUPTS_BASED))
  1227. {
  1228. li.QuadPart = -1 * (WmipCmcQueryInfo.PollFrequency * 1000000000);
  1229. KeSetTimerEx(&WmipCmcQueryInfo.PollingTimer,
  1230. li,
  1231. WmipCmcQueryInfo.PollFrequency * 1000,
  1232. &WmipCmcQueryInfo.PollingDpc);
  1233. } else if (WmipCmcQueryInfo.PollFrequency == HAL_CMC_INTERRUPTS_BASED) {
  1234. //
  1235. // CMC is interrupt based so we need to kick off an attempt
  1236. // to read any CMC that had previously occured
  1237. //
  1238. WmipInsertQueueMCEDpc(&WmipCmcQueryInfo);
  1239. }
  1240. //
  1241. // Establish polling timer for Cpe, if needed
  1242. //
  1243. if ((WmipCpeQueryInfo.PollFrequency != HAL_CPE_DISABLED) &&
  1244. (WmipCpeQueryInfo.PollFrequency != HAL_CPE_INTERRUPTS_BASED))
  1245. {
  1246. li.QuadPart = -1 * (WmipCpeQueryInfo.PollFrequency * 1000000000);
  1247. KeSetTimerEx(&WmipCpeQueryInfo.PollingTimer,
  1248. li,
  1249. WmipCpeQueryInfo.PollFrequency * 1000,
  1250. &WmipCpeQueryInfo.PollingDpc);
  1251. } else if (WmipCpeQueryInfo.PollFrequency == HAL_CPE_INTERRUPTS_BASED) {
  1252. //
  1253. // Cpe is interrupt based so we need to kick off an attempt
  1254. // to read any Cpe that had previously occured
  1255. //
  1256. WmipInsertQueueMCEDpc(&WmipCpeQueryInfo);
  1257. }
  1258. //
  1259. // Flag that we are now able to start firing events
  1260. //
  1261. WmipMCEState = MCE_STATE_RUNNING;
  1262. Status = STATUS_SUCCESS;
  1263. }
  1264. else {
  1265. Status = STATUS_UNSUCCESSFUL;
  1266. }
  1267. return(Status);
  1268. }
  1269. NTSTATUS WmipGetRawMCAInfo(
  1270. OUT PUCHAR Buffer,
  1271. IN OUT PULONG BufferSize
  1272. )
  1273. /*++
  1274. Routine Description:
  1275. Return raw MCA log that was already retrieved from hal
  1276. Arguments:
  1277. Return Value:
  1278. NT status code
  1279. --*/
  1280. {
  1281. NTSTATUS status;
  1282. PAGED_CODE();
  1283. if (WmipRawMCA != NULL)
  1284. {
  1285. //
  1286. // THere are logs so copy over all of the logs
  1287. //
  1288. if (*BufferSize >= WmipRawMCASize)
  1289. {
  1290. RtlCopyMemory(Buffer, WmipRawMCA, WmipRawMCASize);
  1291. status = STATUS_SUCCESS;
  1292. } else {
  1293. status = STATUS_BUFFER_TOO_SMALL;
  1294. }
  1295. *BufferSize = WmipRawMCASize;
  1296. } else {
  1297. //
  1298. // There are no logs so return no records
  1299. //
  1300. if (*BufferSize >= sizeof(ULONG))
  1301. {
  1302. *(PULONG)Buffer = 0;
  1303. status = STATUS_SUCCESS;
  1304. } else {
  1305. status = STATUS_BUFFER_TOO_SMALL;
  1306. }
  1307. *BufferSize = sizeof(ULONG);
  1308. }
  1309. return(status);
  1310. }
  1311. NTSTATUS WmipGetLogFromHal(
  1312. IN HAL_QUERY_INFORMATION_CLASS InfoClass,
  1313. IN PVOID Token,
  1314. IN OUT PWNODE_SINGLE_INSTANCE *Wnode,
  1315. OUT PERROR_LOGRECORD *Mca,
  1316. OUT PULONG McaSize,
  1317. IN ULONG MaxSize,
  1318. IN LPGUID Guid
  1319. )
  1320. /*++
  1321. Routine Description:
  1322. This routine will call the HAL to get a log and possibly build a
  1323. wnode event for it.
  1324. Arguments:
  1325. InfoClass is the HalInformationClass that specifies the log
  1326. information to retrieve
  1327. Token is the HAL token for the log type
  1328. *Wnode returns a pointer to a WNODE_EVENT_ITEM containing the log
  1329. information if Wnode is not NULL
  1330. *Mca returns a pointer to the log read from the hal. It may point
  1331. into the memory pointed to by *Wnode
  1332. *McaSize returns with the size of the log infomration.
  1333. MaxSize has the maximum size to allocate for the log data
  1334. Guid points to the guid to use if a Wnode is built
  1335. Return Value:
  1336. NT status code
  1337. --*/
  1338. {
  1339. NTSTATUS Status;
  1340. PERROR_LOGRECORD Log;
  1341. PWNODE_SINGLE_INSTANCE WnodeSI;
  1342. PULONG Ptr;
  1343. ULONG Size, LogSize, WnodeSize;
  1344. PAGED_CODE();
  1345. //
  1346. // If we are reading directly into a wnode then set this up
  1347. //
  1348. if (Wnode != NULL)
  1349. {
  1350. WnodeSize = FIELD_OFFSET(WNODE_SINGLE_INSTANCE, VariableData) +
  1351. 2 * sizeof(ULONG);
  1352. } else {
  1353. WnodeSize = 0;
  1354. }
  1355. //
  1356. // Allocate a buffer to store the log reported from the hal. Note
  1357. // that this must be in non paged pool as per the HAL.
  1358. //
  1359. Size = MaxSize + WnodeSize;
  1360. Ptr = ExAllocatePoolWithTag(NonPagedPool,
  1361. Size,
  1362. WmipMCAPoolTag);
  1363. if (Ptr != NULL)
  1364. {
  1365. Log = (PERROR_LOGRECORD)((PUCHAR)Ptr + WnodeSize);
  1366. LogSize = Size - WnodeSize;
  1367. *(PVOID *)Log = Token;
  1368. Status = HalQuerySystemInformation(InfoClass,
  1369. LogSize,
  1370. Log,
  1371. &LogSize);
  1372. if (Status == STATUS_BUFFER_TOO_SMALL)
  1373. {
  1374. //
  1375. // If our buffer was too small then the Hal lied to us when
  1376. // it told us the maximum buffer size. This is ok as we'll
  1377. // handle this situation by reallocating and trying again
  1378. //
  1379. ExFreePool(Log);
  1380. //
  1381. // Reallocate the buffer and call the hal to get the log
  1382. //
  1383. Size = LogSize + WnodeSize;
  1384. Ptr = ExAllocatePoolWithTag(NonPagedPool,
  1385. Size,
  1386. WmipMCAPoolTag);
  1387. if (Ptr != NULL)
  1388. {
  1389. Log = (PERROR_LOGRECORD)((PUCHAR)Ptr + WnodeSize);
  1390. LogSize = Size - WnodeSize;
  1391. *(PVOID *)Log = Token;
  1392. Status = HalQuerySystemInformation(InfoClass,
  1393. LogSize,
  1394. Log,
  1395. &LogSize);
  1396. //
  1397. // The hal gave us a buffer size needed that was too
  1398. // small, so lets stop right here and let him know]
  1399. //
  1400. WmipAssert(Status != STATUS_BUFFER_TOO_SMALL);
  1401. } else {
  1402. Status = STATUS_INSUFFICIENT_RESOURCES;
  1403. }
  1404. }
  1405. if (NT_SUCCESS(Status))
  1406. {
  1407. //
  1408. // We sucessfully read the data from the hal so build up
  1409. // output buffers.
  1410. //
  1411. if (Wnode != NULL)
  1412. {
  1413. //
  1414. // Caller requested buffer returned within a WNODE, so
  1415. // build up the wnode around the log data
  1416. //
  1417. WnodeSI = (PWNODE_SINGLE_INSTANCE)Ptr;
  1418. Status = WmipBuildMcaCmcEvent(WnodeSI,
  1419. Guid,
  1420. NULL,
  1421. LogSize);
  1422. *Wnode = WnodeSI;
  1423. }
  1424. *Mca = Log;
  1425. *McaSize = LogSize;
  1426. }
  1427. if ((! NT_SUCCESS(Status)) && (Ptr != NULL))
  1428. {
  1429. //
  1430. // If the function failed, but we have an allocated buffer
  1431. // then clean it up
  1432. //
  1433. ExFreePool(Ptr);
  1434. }
  1435. } else {
  1436. Status = STATUS_INSUFFICIENT_RESOURCES;
  1437. }
  1438. return(Status);
  1439. }
  1440. //
  1441. // Unlink and free a buffer to contain the corrected event information.
  1442. // Assumes that the SM Critical section is held
  1443. //
  1444. void WmipFreeCorrectedMCEEvent(
  1445. PMCECORRECTEDEVENT Event
  1446. )
  1447. {
  1448. PAGED_CODE();
  1449. RemoveEntryList(&Event->List);
  1450. WmipCorrectedMCECount--;
  1451. WmipDebugPrintEx((DPFLTR_WMICORE_ID,
  1452. DPFLTR_MCA_LEVEL,
  1453. "WMI: MCE event %p for type %d freed\n",
  1454. Event,
  1455. Event->Type));
  1456. ExFreePool(Event);
  1457. }
  1458. PMCECORRECTEDEVENT WmipAllocCorrectedMCEEvent(
  1459. MCECORRECTEDTYPE Type
  1460. )
  1461. /*++
  1462. Routine Description:
  1463. This routine will allocate and initialize a MCECORRECTEDEVENT
  1464. structure for a new corrected mce event that the kernel is
  1465. tracking. The routine ensures that only a fixed limit of corrected
  1466. MCE events are allocated and if the limit is exceeded, then the
  1467. oldest entry is recycled.
  1468. This routine assumes that the WmipSMCriticalSection is held
  1469. Arguments:
  1470. Type is the type of corrected MCE event
  1471. Return Value:
  1472. pointer to MCECORRECTEDEVENT stucture or NULL if an entry could not
  1473. be allocated
  1474. --*/
  1475. {
  1476. PMCECORRECTEDEVENT Event, EventX;
  1477. LARGE_INTEGER OldestTime;
  1478. PLIST_ENTRY List;
  1479. PAGED_CODE();
  1480. if (WmipMaxCorrectedMCEOutstanding != 0)
  1481. {
  1482. if ((WmipCorrectedMCECount < WmipMaxCorrectedMCEOutstanding) ||
  1483. (IsListEmpty(&WmipCorrectedMCEHead)))
  1484. {
  1485. //
  1486. // Allocate a new event from pool
  1487. //
  1488. Event = (PMCECORRECTEDEVENT)ExAllocatePoolWithTag(PagedPool,
  1489. sizeof(MCECORRECTEDEVENT),
  1490. WmipMCAPoolTag);
  1491. if (Event != NULL)
  1492. {
  1493. WmipCorrectedMCECount++;
  1494. }
  1495. } else {
  1496. //
  1497. // There are already enough mce being tracked, so pick the
  1498. // oldest and recycle
  1499. //
  1500. List = WmipCorrectedMCEHead.Flink;
  1501. Event = CONTAINING_RECORD(List,
  1502. MCECORRECTEDEVENT,
  1503. List);
  1504. OldestTime = Event->Timestamp;
  1505. List = List->Flink;
  1506. while (List != &WmipCorrectedMCEHead)
  1507. {
  1508. EventX = CONTAINING_RECORD(List,
  1509. MCECORRECTEDEVENT,
  1510. List);
  1511. if (EventX->Timestamp.QuadPart < OldestTime.QuadPart)
  1512. {
  1513. Event = EventX;
  1514. OldestTime = EventX->Timestamp;
  1515. }
  1516. List = List->Flink;
  1517. }
  1518. RemoveEntryList(&Event->List);
  1519. }
  1520. } else {
  1521. Event = NULL;
  1522. }
  1523. if (Event != NULL)
  1524. {
  1525. Event->Type = Type;
  1526. Event->Counter = 1;
  1527. Event->Flags = 0;
  1528. KeQuerySystemTime(&Event->Timestamp);
  1529. InsertHeadList(&WmipCorrectedMCEHead,
  1530. &Event->List);
  1531. }
  1532. return(Event);
  1533. }
  1534. NTSTATUS WmipTrackCorrectedMCE(
  1535. IN MCECORRECTEDTYPE Type,
  1536. IN PERROR_RECORD_HEADER Record,
  1537. #if defined(_IA64_)
  1538. IN PERROR_SECTION_HEADER Section,
  1539. #endif
  1540. OUT ULONG *LogToEventlog
  1541. )
  1542. {
  1543. PLIST_ENTRY List;
  1544. PMCECORRECTEDEVENT Event;
  1545. LARGE_INTEGER DeltaTime;
  1546. PAGED_CODE();
  1547. //
  1548. // By default we'll always want an eventlog entry for corrected
  1549. // errors
  1550. //
  1551. switch(Type)
  1552. {
  1553. case CpuCache:
  1554. case CpuTlb:
  1555. case CpuBus:
  1556. case CpuRegFile:
  1557. {
  1558. LARGE_INTEGER CurrentTime;
  1559. ULONG CpuId;
  1560. //
  1561. // We got a corrected CPU cache error. If this happended on
  1562. // this CPU before within a certain time window then we
  1563. // want to suppress the eventlog message
  1564. //
  1565. CpuId = HalpGetFwMceLogProcessorNumber(Record);
  1566. KeQuerySystemTime(&CurrentTime);
  1567. WmipEnterSMCritSection();
  1568. List = WmipCorrectedMCEHead.Flink;
  1569. while (List != &WmipCorrectedMCEHead)
  1570. {
  1571. Event = CONTAINING_RECORD(List,
  1572. MCECORRECTEDEVENT,
  1573. List);
  1574. if ((Type == Event->Type) &&
  1575. (CpuId == Event->CpuId))
  1576. {
  1577. //
  1578. // We have seen a cpu error on this cpu before,
  1579. // check if it was within the time interval
  1580. //
  1581. DeltaTime.QuadPart = (CurrentTime.QuadPart -
  1582. Event->Timestamp.QuadPart) /
  1583. 1000;
  1584. if ( (ULONG)DeltaTime.QuadPart <= WmipCoalesceCorrectedErrorInterval)
  1585. {
  1586. //
  1587. // Since it is within the interval, we suppress
  1588. // the event
  1589. //
  1590. *LogToEventlog = 0;
  1591. } else {
  1592. //
  1593. // Since it is not within the interval we do
  1594. // not suppress the event, but do need to
  1595. // update the time that the last error occurred
  1596. //
  1597. Event->Timestamp = CurrentTime;
  1598. }
  1599. goto CpuDone;
  1600. }
  1601. List = List->Flink;
  1602. }
  1603. //
  1604. // This appears to be the first time we've seen
  1605. // this physical address. Build an event structure
  1606. // for it and put it on the watch list
  1607. //
  1608. Event = WmipAllocCorrectedMCEEvent(Type);
  1609. if (Event != NULL)
  1610. {
  1611. Event->CpuId = CpuId;
  1612. WmipDebugPrintEx((DPFLTR_WMICORE_ID,
  1613. DPFLTR_MCA_LEVEL,
  1614. "WMI: MCE event %p for type %d, cpuid %d added\n",
  1615. Event,
  1616. Event->Type,
  1617. Event->CpuId));
  1618. }
  1619. CpuDone:
  1620. WmipLeaveSMCritSection();
  1621. break;
  1622. }
  1623. case SingleBitEcc:
  1624. {
  1625. #if defined(_IA64_)
  1626. PERROR_MEMORY Memory;
  1627. LARGE_INTEGER BytesRemoved;
  1628. PHYSICAL_ADDRESS Address;
  1629. NTSTATUS Status;
  1630. //
  1631. // We got a single bit ECC error. See if the physical
  1632. // address for it is already on the list and if so bump the
  1633. // counter and possibly try to remove the physical memory
  1634. // form the system. If not then create a new entry for the
  1635. // error.
  1636. //
  1637. Memory = (PERROR_MEMORY)Section;
  1638. if (Memory->Valid.PhysicalAddress == 1)
  1639. {
  1640. //
  1641. // Round down the the nearest page boundry since we are
  1642. // tracking errors on a page basis. This means that 2
  1643. // errors at different addresses in the same page are
  1644. // considered 2 instances of the same error
  1645. //
  1646. Address.QuadPart = (Memory->PhysicalAddress) & ~(PAGE_SIZE-1);
  1647. WmipEnterSMCritSection();
  1648. List = WmipCorrectedMCEHead.Flink;
  1649. while (List != &WmipCorrectedMCEHead)
  1650. {
  1651. Event = CONTAINING_RECORD(List,
  1652. MCECORRECTEDEVENT,
  1653. List);
  1654. if ((Type == Event->Type) &&
  1655. ((Event->Flags & CORRECTED_MCE_EVENT_BUSY) == 0) &&
  1656. (Address.QuadPart == Event->SingleBitEccAddress.QuadPart))
  1657. {
  1658. //
  1659. // Don't report multiple errors for the same
  1660. // page ever, but update to the current
  1661. // timestamp
  1662. //
  1663. *LogToEventlog = 0;
  1664. KeQuerySystemTime(&Event->Timestamp);
  1665. if ((WmipSingleBitEccErrorThreshold != 0) &&
  1666. (++Event->Counter >= WmipSingleBitEccErrorThreshold))
  1667. {
  1668. //
  1669. // We have crossed the threshold so lets
  1670. // attempt to map out the memory.
  1671. // Mark the entry as busy and release the
  1672. // critical section since mapping out the
  1673. // memory may take a long time.
  1674. //
  1675. Event->Flags |= CORRECTED_MCE_EVENT_BUSY;
  1676. WmipLeaveSMCritSection();
  1677. //
  1678. // MmMarkPhysicalMmemoryAsBad
  1679. // requires that the address and
  1680. // size be page aligned
  1681. //
  1682. BytesRemoved.QuadPart = PAGE_SIZE;
  1683. Status = MmMarkPhysicalMemoryAsBad(&Address,
  1684. &BytesRemoved);
  1685. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  1686. "WMI: Physical Address %p removal -> %x\n",
  1687. Address.QuadPart,
  1688. Status));
  1689. if (NT_SUCCESS(Status))
  1690. {
  1691. //
  1692. // Fire off a wmi event to announce
  1693. // that the memory has been mapped out
  1694. //
  1695. WmipFireOffWmiEvent(&WmipMSMCAEvent_MemoryPageRemoved,
  1696. sizeof(PHYSICAL_ADDRESS),
  1697. &Address);
  1698. //
  1699. // SInce mapping succeeded, we do not
  1700. // expect to see the physical address
  1701. // again so we can remove it from the
  1702. // list of tracked MCE
  1703. //
  1704. WmipEnterSMCritSection();
  1705. WmipFreeCorrectedMCEEvent(Event);
  1706. } else {
  1707. Event->Flags &= ~CORRECTED_MCE_EVENT_BUSY;
  1708. WmipEnterSMCritSection();
  1709. }
  1710. }
  1711. goto MemoryDone;
  1712. }
  1713. List = List->Flink;
  1714. }
  1715. //
  1716. // This appears to be the first time we've seen
  1717. // this physical address. Build an event structure
  1718. // for it and put it on the watch list
  1719. //
  1720. Event = WmipAllocCorrectedMCEEvent(Type);
  1721. if (Event != NULL)
  1722. {
  1723. Event->SingleBitEccAddress = Address;
  1724. WmipDebugPrintEx((DPFLTR_WMICORE_ID,
  1725. DPFLTR_MCA_LEVEL,
  1726. "WMI: MCE event %p for type %d, physaddr %I64x added\n",
  1727. Event,
  1728. Event->Type,
  1729. Event->SingleBitEccAddress.QuadPart));
  1730. }
  1731. MemoryDone:
  1732. WmipLeaveSMCritSection();
  1733. }
  1734. #endif
  1735. break;
  1736. }
  1737. default:
  1738. {
  1739. WmipAssert(FALSE);
  1740. }
  1741. }
  1742. return(STATUS_SUCCESS);
  1743. }
  1744. typedef enum
  1745. {
  1746. CpuStateCheckCache = 0,
  1747. CpuStateCheckTLB = 1,
  1748. CpuStateCheckBus = 2,
  1749. CpuStateCheckRegFile = 3,
  1750. CpuStateCheckMS = 4
  1751. };
  1752. void WmipGenerateMCAEventlog(
  1753. PUCHAR ErrorLog,
  1754. ULONG ErrorLogSize,
  1755. BOOLEAN IsFatal
  1756. )
  1757. {
  1758. PERROR_RECORD_HEADER RecordHeader;
  1759. #if defined(_IA64_)
  1760. PERROR_SECTION_HEADER SectionHeader;
  1761. PERROR_MODINFO ModInfo;
  1762. #endif
  1763. NTSTATUS Status = STATUS_INVALID_PARAMETER;
  1764. PWCHAR w;
  1765. ULONG BufferSize;
  1766. PUCHAR Buffer, RawPtr = NULL;
  1767. PWNODE_SINGLE_INSTANCE Wnode;
  1768. PMSMCAEvent_Header Header;
  1769. PAGED_CODE();
  1770. RecordHeader = (PERROR_RECORD_HEADER)ErrorLog;
  1771. //
  1772. // Allocate a buffer large enough to accomodate any type of MCA.
  1773. // Right now the largest is MSMCAEvent_MemoryError. If this changes
  1774. // then this code should be updated
  1775. //
  1776. BufferSize = ((sizeof(WNODE_SINGLE_INSTANCE) +
  1777. (sizeof(USHORT) + sizeof(MCA_EVENT_INSTANCE_NAME)) +7) & ~7) +
  1778. sizeof(MSMCAEvent_MemoryError) +
  1779. ErrorLogSize;
  1780. //
  1781. // Allocate a buffer to build the event
  1782. //
  1783. Buffer = ExAllocatePoolWithTag(PagedPool,
  1784. BufferSize,
  1785. WmipMCAPoolTag);
  1786. if (Buffer != NULL)
  1787. {
  1788. //
  1789. // Fill in the common fields of the WNODE
  1790. //
  1791. Wnode = (PWNODE_SINGLE_INSTANCE)Buffer;
  1792. Wnode->WnodeHeader.BufferSize = BufferSize;
  1793. Wnode->WnodeHeader.Linkage = 0;
  1794. WmiInsertTimestamp(&Wnode->WnodeHeader);
  1795. Wnode->WnodeHeader.Flags = WNODE_FLAG_SINGLE_INSTANCE |
  1796. WNODE_FLAG_EVENT_ITEM;
  1797. Wnode->OffsetInstanceName = sizeof(WNODE_SINGLE_INSTANCE);
  1798. Wnode->DataBlockOffset = ((sizeof(WNODE_SINGLE_INSTANCE) +
  1799. (sizeof(USHORT) + sizeof(MCA_EVENT_INSTANCE_NAME)) +7) & ~7);
  1800. w = (PWCHAR)OffsetToPtr(Wnode, Wnode->OffsetInstanceName);
  1801. *w++ = sizeof(MCA_EVENT_INSTANCE_NAME);
  1802. wcscpy(w, MCA_EVENT_INSTANCE_NAME);
  1803. //
  1804. // Fill in the common fields of the event data
  1805. //
  1806. Header = (PMSMCAEvent_Header)OffsetToPtr(Wnode, Wnode->DataBlockOffset);
  1807. Header->Cpu = MCA_UNDEFINED_CPU; // assume CPU will be undefined
  1808. Header->AdditionalErrors = 0;
  1809. Header->LogToEventlog = 1;
  1810. #if defined(_IA64_)
  1811. if ((ErrorLogSize < sizeof(ERROR_RECORD_HEADER)) ||
  1812. (RecordHeader->Revision.Major != ERROR_MAJOR_REVISION_SAL_03_00) ||
  1813. (RecordHeader->Length > ErrorLogSize))
  1814. {
  1815. //
  1816. // Record header is not SAL 3.0 compliant so we do not try
  1817. // to interpert the record. It is not compliant for one of
  1818. // these reasons:
  1819. //
  1820. // 1. The error record size is not large enough to contain
  1821. // the entire error record header.
  1822. // 2. The Major revision number does not match the major
  1823. // revision number expected by the code. Note that the
  1824. // minor revision number is not checked since changes to
  1825. // the minor revision number do not affect the format of
  1826. // the error record or sections.
  1827. // 3. The error record size as specified in the error
  1828. // record header does not match the size obtained from
  1829. // the firmware.
  1830. //
  1831. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  1832. "WMI: Invalid MCA Record revision %x or size %d at %p\n"
  1833. "do !mca %p to dump MCA record\n",
  1834. RecordHeader->Revision,
  1835. RecordHeader->Length,
  1836. RecordHeader,
  1837. RecordHeader));
  1838. #endif
  1839. Status = STATUS_INVALID_PARAMETER;
  1840. #if defined(_IA64_)
  1841. } else {
  1842. ULONG SizeUsed;
  1843. ULONG CpuErrorState = CpuStateCheckCache;
  1844. ULONG CpuErrorIndex = 0;
  1845. BOOLEAN AdvanceSection;
  1846. BOOLEAN FirstError;
  1847. //
  1848. // Valid 3.0 record, gather the record id and severity from
  1849. // the header
  1850. //
  1851. Header->RecordId = RecordHeader->Id;
  1852. Header->ErrorSeverity = RecordHeader->ErrorSeverity;
  1853. Header->Cpu = HalpGetFwMceLogProcessorNumber(RecordHeader);
  1854. //
  1855. // Use the error severity value in the record header to
  1856. // determine if the error was fatal. If the value is
  1857. // ErrorRecoverable then assume that the error was fatal
  1858. // since the HAL will change this value to ErrorCorrected
  1859. //
  1860. IsFatal = (RecordHeader->ErrorSeverity != ErrorCorrected ? TRUE : FALSE);
  1861. //
  1862. // Loop over all sections within the record.
  1863. //
  1864. // CONSIDER: Is it possible to have a record that only has a record
  1865. // header and no sections
  1866. //
  1867. SizeUsed = sizeof(ERROR_RECORD_HEADER);
  1868. ModInfo = NULL;
  1869. FirstError = TRUE;
  1870. while (SizeUsed < ErrorLogSize)
  1871. {
  1872. //
  1873. // Advance to the next section in the record
  1874. //
  1875. SectionHeader = (PERROR_SECTION_HEADER)(ErrorLog + SizeUsed);
  1876. AdvanceSection = TRUE;
  1877. Header->AdditionalErrors++;
  1878. //
  1879. // First validate that this is a valid section
  1880. //
  1881. if (((SizeUsed + sizeof(ERROR_SECTION_HEADER)) > ErrorLogSize) ||
  1882. (SectionHeader->Revision.Revision != SAL_30_ERROR_REVISION) ||
  1883. ((SizeUsed + SectionHeader->Length) > ErrorLogSize))
  1884. {
  1885. //
  1886. // Not valid section header so we'll give up on
  1887. // the whole record. This could be because
  1888. //
  1889. // 1. There is not enough room in the buffer passed
  1890. // by the FW for a complete section header
  1891. // 2. The section header revision is not correct
  1892. // 3. There is not enough room in the buffer passed
  1893. // by the FW for the complete section
  1894. //
  1895. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  1896. "WMI: Invalid MCA SectionHeader revision %d or length %d at %p\n"
  1897. "do !mca %p to dump MCA record\n",
  1898. SectionHeader->Revision,
  1899. SectionHeader->Length,
  1900. SectionHeader,
  1901. RecordHeader));
  1902. //
  1903. // We'll break out of the loop since we don't know how to
  1904. // move on to the next MCA section since we don't
  1905. // understand any format previous to 3.0
  1906. //
  1907. Status = STATUS_INVALID_PARAMETER;
  1908. break;
  1909. } else {
  1910. //
  1911. // Now determine what type of section we have got. This is
  1912. // determined by looking at the guid in the section header.
  1913. // Each section type has a unique guid value
  1914. //
  1915. if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorProcessorGuid))
  1916. {
  1917. //
  1918. // Build event for CPU eventlog MCA
  1919. //
  1920. PMSMCAEvent_CPUError Event;
  1921. PERROR_PROCESSOR Processor;
  1922. SIZE_T TotalSectionSize;
  1923. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  1924. sizeof(MSMCAEvent_CPUError) );
  1925. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  1926. "WMI: MCA Section %p indicates processor error\n",
  1927. SectionHeader));
  1928. //
  1929. // Validate that the section length is large
  1930. // enough to accomodate all of the information
  1931. // that it declares
  1932. //
  1933. if (SectionHeader->Length >= sizeof(ERROR_PROCESSOR))
  1934. {
  1935. Event = (PMSMCAEvent_CPUError)Header;
  1936. Processor = (PERROR_PROCESSOR)SectionHeader;
  1937. //
  1938. // Assume we won't be able to determine the
  1939. // various additional information from the
  1940. // error logs
  1941. //
  1942. if (FirstError)
  1943. {
  1944. Event->Type = IsFatal ? MCA_ERROR_CPU :
  1945. MCA_WARNING_CPU;
  1946. Event->MajorErrorType = (ULONG)0xffffffff;
  1947. Event->Level = (ULONG)0xffffffff;
  1948. Event->CacheOp = (ULONG)0xffffffff;
  1949. Event->CacheMesi = (ULONG)0xffffffff;
  1950. Event->TLBOp = (ULONG)0xffffffff;
  1951. Event->BusType = (ULONG)0xffffffff;
  1952. Event->BusSev = (ULONG)0xffffffff;
  1953. Event->RegFileId = (ULONG)0xffffffff;
  1954. Event->RegFileOp = (ULONG)0xffffffff;
  1955. Event->MSSid = (ULONG)0xffffffff;
  1956. Event->MSOp = (ULONG)0xffffffff;
  1957. Event->MSArrayId = (ULONG)0xffffffff;
  1958. Event->MSIndex = (ULONG)0xffffffff;
  1959. }
  1960. //
  1961. // Validate that section is large enough to
  1962. // handle all specified ERROR_MODINFO
  1963. // structs
  1964. //
  1965. TotalSectionSize = sizeof(ERROR_PROCESSOR) +
  1966. ((Processor->Valid.CacheCheckNum +
  1967. Processor->Valid.TlbCheckNum +
  1968. Processor->Valid.BusCheckNum +
  1969. Processor->Valid.RegFileCheckNum +
  1970. Processor->Valid.MsCheckNum) *
  1971. sizeof(ERROR_MODINFO));
  1972. if (SectionHeader->Length >= TotalSectionSize)
  1973. {
  1974. //
  1975. // Initialize pointer to the current ERROR_MOFINFO
  1976. //
  1977. if (ModInfo == NULL)
  1978. {
  1979. ModInfo = (PERROR_MODINFO)((PUCHAR)Processor +
  1980. sizeof(ERROR_PROCESSOR));
  1981. } else {
  1982. ModInfo++;
  1983. }
  1984. switch (CpuErrorState)
  1985. {
  1986. case CpuStateCheckCache:
  1987. {
  1988. ERROR_CACHE_CHECK Check;
  1989. if (Processor->Valid.CacheCheckNum > CpuErrorIndex)
  1990. {
  1991. //
  1992. // We have a cache error that we need to
  1993. // handle.
  1994. // Advance to next error in the section,
  1995. // but don't advance the section
  1996. //
  1997. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  1998. "WMI: MCA ModInfo %p indicates cache error index %d\n",
  1999. ModInfo,
  2000. CpuErrorIndex));
  2001. if (! IsFatal)
  2002. {
  2003. WmipTrackCorrectedMCE(CpuCache,
  2004. RecordHeader,
  2005. SectionHeader,
  2006. &Header->LogToEventlog);
  2007. }
  2008. CpuErrorIndex++;
  2009. AdvanceSection = FALSE;
  2010. if (FirstError)
  2011. {
  2012. Event->Type = IsFatal ? MCA_ERROR_CACHE :
  2013. MCA_WARNING_CACHE;
  2014. Event->MajorErrorType = MCACpuCacheError;
  2015. if (ModInfo->Valid.CheckInfo == 1)
  2016. {
  2017. Check.CacheCheck = ModInfo->CheckInfo.CheckInfo;
  2018. Event->Level = (ULONG)Check.Level;
  2019. Event->CacheOp = (ULONG)Check.Operation;
  2020. if (Check.MESIValid == 1)
  2021. {
  2022. Event->CacheMesi = (ULONG)Check.MESI;
  2023. }
  2024. }
  2025. }
  2026. break;
  2027. } else {
  2028. CpuErrorState = CpuStateCheckTLB;
  2029. CpuErrorIndex = 0;
  2030. // Fall through and see if there are any
  2031. // TLB errors
  2032. }
  2033. }
  2034. case CpuStateCheckTLB:
  2035. {
  2036. ERROR_TLB_CHECK Check;
  2037. if (Processor->Valid.TlbCheckNum > CpuErrorIndex)
  2038. {
  2039. //
  2040. // We have a cache error that we need to
  2041. // handle.
  2042. // Advance to next error in the section,
  2043. // but don't advance the section
  2044. //
  2045. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2046. "WMI: MCA ModInfo %p indicates TLB error index %d\n",
  2047. ModInfo,
  2048. CpuErrorIndex));
  2049. if (! IsFatal)
  2050. {
  2051. WmipTrackCorrectedMCE(CpuTlb,
  2052. RecordHeader,
  2053. SectionHeader,
  2054. &Header->LogToEventlog);
  2055. }
  2056. CpuErrorIndex++;
  2057. AdvanceSection = FALSE;
  2058. if (FirstError)
  2059. {
  2060. Event->Type = IsFatal ? MCA_ERROR_TLB :
  2061. MCA_WARNING_TLB;
  2062. Event->MajorErrorType = MCACpuTlbError;
  2063. if (ModInfo->Valid.CheckInfo == 1)
  2064. {
  2065. Check.TlbCheck = ModInfo->CheckInfo.CheckInfo;
  2066. Event->Level = (ULONG)Check.Level;
  2067. Event->TLBOp = (ULONG)Check.Operation;
  2068. }
  2069. }
  2070. break;
  2071. } else {
  2072. CpuErrorState = CpuStateCheckBus;
  2073. CpuErrorIndex = 0;
  2074. // Fall through and see if there are any
  2075. // CPU Bus errors
  2076. }
  2077. }
  2078. case CpuStateCheckBus:
  2079. {
  2080. ERROR_BUS_CHECK Check;
  2081. if (Processor->Valid.BusCheckNum > CpuErrorIndex)
  2082. {
  2083. //
  2084. // We have a cache error that we need to
  2085. // handle.
  2086. // Advance to next error in the section,
  2087. // but don't advance the section
  2088. //
  2089. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2090. "WMI: MCA ModInfo %p indicates bus error index %d\n",
  2091. ModInfo,
  2092. CpuErrorIndex));
  2093. if (! IsFatal)
  2094. {
  2095. WmipTrackCorrectedMCE(CpuBus,
  2096. RecordHeader,
  2097. SectionHeader,
  2098. &Header->LogToEventlog);
  2099. }
  2100. CpuErrorIndex++;
  2101. AdvanceSection = FALSE;
  2102. if (FirstError)
  2103. {
  2104. Event->Type = IsFatal ? MCA_ERROR_CPU_BUS :
  2105. MCA_WARNING_CPU_BUS;
  2106. Event->MajorErrorType = MCACpuBusError;
  2107. if (ModInfo->Valid.CheckInfo == 1)
  2108. {
  2109. Check.BusCheck = ModInfo->CheckInfo.CheckInfo;
  2110. Event->BusType = (ULONG)Check.Type;
  2111. Event->BusSev = (ULONG)Check.Severity;
  2112. }
  2113. }
  2114. break;
  2115. } else {
  2116. CpuErrorState = CpuStateCheckRegFile;
  2117. CpuErrorIndex = 0;
  2118. // Fall through and see if there are any
  2119. // REG FILE errors
  2120. }
  2121. }
  2122. case CpuStateCheckRegFile:
  2123. {
  2124. ERROR_REGFILE_CHECK Check;
  2125. if (Processor->Valid.RegFileCheckNum > CpuErrorIndex)
  2126. {
  2127. //
  2128. // We have a cache error that we need to
  2129. // handle.
  2130. // Advance to next error in the section,
  2131. // but don't advance the section
  2132. //
  2133. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2134. "WMI: MCA ModInfo %p indicates reg file error index %d\n",
  2135. ModInfo,
  2136. CpuErrorIndex));
  2137. if (! IsFatal)
  2138. {
  2139. WmipTrackCorrectedMCE(CpuRegFile,
  2140. RecordHeader,
  2141. SectionHeader,
  2142. &Header->LogToEventlog);
  2143. }
  2144. CpuErrorIndex++;
  2145. AdvanceSection = FALSE;
  2146. if (FirstError)
  2147. {
  2148. Event->Type = IsFatal ? MCA_ERROR_REGISTER_FILE :
  2149. MCA_WARNING_REGISTER_FILE;
  2150. Event->MajorErrorType = MCACpuRegFileError;
  2151. if (ModInfo->Valid.CheckInfo == 1)
  2152. {
  2153. Check.RegFileCheck = ModInfo->CheckInfo.CheckInfo;
  2154. Event->RegFileOp = (ULONG)Check.Operation;
  2155. Event->RegFileId = (ULONG)Check.Identifier;
  2156. }
  2157. }
  2158. break;
  2159. } else {
  2160. CpuErrorState = CpuStateCheckMS;
  2161. CpuErrorIndex = 0;
  2162. // Fall through and see if there are any
  2163. // Micro Architecture errors
  2164. }
  2165. }
  2166. case CpuStateCheckMS:
  2167. {
  2168. ERROR_MS_CHECK Check;
  2169. if (Processor->Valid.MsCheckNum > CpuErrorIndex)
  2170. {
  2171. //
  2172. // We have a cache error that we need to
  2173. // handle.
  2174. // Advance to next error in the section,
  2175. // but don't advance the section
  2176. //
  2177. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2178. "WMI: MCA ModInfo %p indicates MAS error index %d\n",
  2179. ModInfo,
  2180. CpuErrorIndex));
  2181. CpuErrorIndex++;
  2182. AdvanceSection = FALSE;
  2183. if (FirstError)
  2184. {
  2185. Event->Type = IsFatal ? MCA_ERROR_MAS :
  2186. MCA_WARNING_MAS;
  2187. Event->MajorErrorType = MCACpuMSError;
  2188. if (ModInfo->Valid.CheckInfo == 1)
  2189. {
  2190. Check.MsCheck = ModInfo->CheckInfo.CheckInfo;
  2191. Event->MSOp = (ULONG)Check.Operation;
  2192. Event->MSSid = (ULONG)Check.StructureIdentifier;
  2193. Event->Level = (ULONG)Check.Level;
  2194. Event->MSArrayId = (ULONG)Check.ArrayId;
  2195. if (Check.IndexValid == 1)
  2196. {
  2197. Event->MSIndex = (ULONG)Check.Index;
  2198. }
  2199. }
  2200. }
  2201. break;
  2202. } else {
  2203. if (! FirstError)
  2204. {
  2205. //
  2206. // There are no more errors left in the
  2207. // error section so we don't want to
  2208. // generate anything.
  2209. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2210. "WMI: MCA ModInfo %p indicates no error index %d\n",
  2211. ModInfo,
  2212. CpuErrorIndex));
  2213. Header->AdditionalErrors--;
  2214. goto DontGenerate;
  2215. }
  2216. }
  2217. }
  2218. }
  2219. if (FirstError)
  2220. {
  2221. Event->Size = ErrorLogSize;
  2222. RawPtr = Event->RawRecord;
  2223. //
  2224. // Finish filling in WNODE fields
  2225. //
  2226. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_CPUErrorGuid;
  2227. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_CPUError,
  2228. RawRecord) +
  2229. ErrorLogSize;
  2230. }
  2231. Status = STATUS_SUCCESS;
  2232. }
  2233. } else {
  2234. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2235. "WMI: MCA Processor Error Section %p has invalid size %d\n",
  2236. SectionHeader,
  2237. SectionHeader->Length));
  2238. Status = STATUS_INVALID_PARAMETER;
  2239. break;
  2240. }
  2241. } else if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorMemoryGuid)) {
  2242. //
  2243. // Build event for MEMORY error eventlog MCA
  2244. //
  2245. PMSMCAEvent_MemoryError Event;
  2246. PERROR_MEMORY Memory;
  2247. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2248. "WMI: MCA Section %p indicates memory error\n",
  2249. SectionHeader));
  2250. Status = STATUS_SUCCESS;
  2251. if (FirstError)
  2252. {
  2253. //
  2254. // Ensure the record contains all of the
  2255. // fields that it is supposed to
  2256. //
  2257. if (SectionHeader->Length >= sizeof(ERROR_MEMORY))
  2258. {
  2259. Event = (PMSMCAEvent_MemoryError)Header;
  2260. Memory = (PERROR_MEMORY)SectionHeader;
  2261. //
  2262. // Take note of any recoverable single
  2263. // bit ECC errors. This may even cause
  2264. // the memory to be mapped out
  2265. //
  2266. if (! IsFatal)
  2267. {
  2268. WmipTrackCorrectedMCE(SingleBitEcc,
  2269. RecordHeader,
  2270. SectionHeader,
  2271. &Header->LogToEventlog);
  2272. }
  2273. //
  2274. // Fill in the data from the MCA within the WMI event
  2275. //
  2276. if ((Memory->Valid.PhysicalAddress == 1) &&
  2277. (Memory->Valid.AddressMask == 1) &&
  2278. (Memory->Valid.Card == 1) &&
  2279. (Memory->Valid.Module == 1))
  2280. {
  2281. Event->Type = IsFatal ? MCA_ERROR_MEM_1_2_5_4 :
  2282. MCA_WARNING_MEM_1_2_5_4;
  2283. } else if ((Memory->Valid.PhysicalAddress == 1) &&
  2284. (Memory->Valid.AddressMask == 1) &&
  2285. (Memory->Valid.Module == 1))
  2286. {
  2287. Event->Type = IsFatal ? MCA_ERROR_MEM_1_2_5 :
  2288. MCA_WARNING_MEM_1_2_5;
  2289. } else if (Memory->Valid.PhysicalAddress == 1)
  2290. {
  2291. Event->Type = IsFatal ? MCA_ERROR_MEM_1_2:
  2292. MCA_WARNING_MEM_1_2;
  2293. } else {
  2294. Event->Type = IsFatal ? MCA_ERROR_MEM_UNKNOWN:
  2295. MCA_WARNING_MEM_UNKNOWN;
  2296. }
  2297. Event->VALIDATION_BITS = Memory->Valid.Valid;
  2298. Event->MEM_ERROR_STATUS = Memory->ErrorStatus.Status;
  2299. Event->MEM_PHYSICAL_ADDR = Memory->PhysicalAddress;
  2300. Event->MEM_PHYSICAL_MASK = Memory->PhysicalAddressMask;
  2301. Event->RESPONDER_ID = Memory->ResponderId;
  2302. Event->TARGET_ID = Memory->TargetId;
  2303. Event->REQUESTOR_ID = Memory->RequestorId;
  2304. Event->BUS_SPECIFIC_DATA = Memory->BusSpecificData;
  2305. Event->MEM_NODE = Memory->Node;
  2306. Event->MEM_CARD = Memory->Card;
  2307. Event->MEM_BANK = Memory->Bank;
  2308. Event->xMEM_DEVICE = Memory->Device;
  2309. Event->MEM_MODULE = Memory->Module;
  2310. Event->MEM_ROW = Memory->Row;
  2311. Event->MEM_COLUMN = Memory->Column;
  2312. Event->MEM_BIT_POSITION = Memory->BitPosition;
  2313. Event->Size = ErrorLogSize;
  2314. RawPtr = Event->RawRecord;
  2315. //
  2316. // Finish filling in WNODE fields
  2317. //
  2318. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_MemoryErrorGuid;
  2319. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_MemoryError,
  2320. RawRecord) +
  2321. ErrorLogSize;
  2322. } else {
  2323. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2324. "WMI: MCA Memory Error Section %p has invalid size %d\n",
  2325. SectionHeader,
  2326. SectionHeader->Length));
  2327. Status = STATUS_INVALID_PARAMETER;
  2328. break;
  2329. }
  2330. }
  2331. } else if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorPCIBusGuid)) {
  2332. //
  2333. // Build event for PCI Component MCA
  2334. //
  2335. PMSMCAEvent_PCIBusError Event;
  2336. PERROR_PCI_BUS PciBus;
  2337. NTSTATUS PCIBusErrorTypes[] = {
  2338. MCA_WARNING_PCI_BUS_PARITY,
  2339. MCA_ERROR_PCI_BUS_PARITY,
  2340. MCA_WARNING_PCI_BUS_SERR,
  2341. MCA_ERROR_PCI_BUS_SERR,
  2342. MCA_WARNING_PCI_BUS_MASTER_ABORT,
  2343. MCA_ERROR_PCI_BUS_MASTER_ABORT,
  2344. MCA_WARNING_PCI_BUS_TIMEOUT,
  2345. MCA_ERROR_PCI_BUS_TIMEOUT,
  2346. MCA_WARNING_PCI_BUS_PARITY,
  2347. MCA_ERROR_PCI_BUS_PARITY,
  2348. MCA_WARNING_PCI_BUS_PARITY,
  2349. MCA_ERROR_PCI_BUS_PARITY,
  2350. MCA_WARNING_PCI_BUS_PARITY,
  2351. MCA_ERROR_PCI_BUS_PARITY
  2352. };
  2353. NTSTATUS PCIBusErrorTypesNoInfo[] = {
  2354. MCA_WARNING_PCI_BUS_PARITY_NO_INFO,
  2355. MCA_ERROR_PCI_BUS_PARITY_NO_INFO,
  2356. MCA_WARNING_PCI_BUS_SERR_NO_INFO,
  2357. MCA_ERROR_PCI_BUS_SERR_NO_INFO,
  2358. MCA_WARNING_PCI_BUS_MASTER_ABORT_NO_INFO,
  2359. MCA_ERROR_PCI_BUS_MASTER_ABORT_NO_INFO,
  2360. MCA_WARNING_PCI_BUS_TIMEOUT_NO_INFO,
  2361. MCA_ERROR_PCI_BUS_TIMEOUT_NO_INFO,
  2362. MCA_WARNING_PCI_BUS_PARITY_NO_INFO,
  2363. MCA_ERROR_PCI_BUS_PARITY_NO_INFO,
  2364. MCA_WARNING_PCI_BUS_PARITY_NO_INFO,
  2365. MCA_ERROR_PCI_BUS_PARITY_NO_INFO,
  2366. MCA_WARNING_PCI_BUS_PARITY_NO_INFO,
  2367. MCA_ERROR_PCI_BUS_PARITY_NO_INFO
  2368. };
  2369. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  2370. sizeof(MSMCAEvent_PCIBusError) );
  2371. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2372. "WMI: MCA Section %p indicates PCI Bus error\n",
  2373. SectionHeader));
  2374. Status = STATUS_SUCCESS;
  2375. if (FirstError)
  2376. {
  2377. if (SectionHeader->Length >= sizeof(ERROR_PCI_BUS))
  2378. {
  2379. Event = (PMSMCAEvent_PCIBusError)Header;
  2380. PciBus = (PERROR_PCI_BUS)SectionHeader;
  2381. //
  2382. // Fill in the data from the MCA within the WMI event
  2383. //
  2384. if ((PciBus->Type.Type >= PciBusDataParityError) &&
  2385. (PciBus->Type.Type <= PciCommandParityError))
  2386. {
  2387. if ((PciBus->Valid.CmdType == 1) &&
  2388. (PciBus->Valid.Address == 1) &&
  2389. (PciBus->Valid.Id == 1))
  2390. {
  2391. Event->Type = PCIBusErrorTypes[(2 * (PciBus->Type.Type-1)) +
  2392. (IsFatal ? 1 : 0)];
  2393. } else {
  2394. Event->Type = PCIBusErrorTypesNoInfo[(2 * (PciBus->Type.Type-1)) +
  2395. (IsFatal ? 1 : 0)];
  2396. }
  2397. } else {
  2398. Event->Type = IsFatal ? MCA_ERROR_PCI_BUS_UNKNOWN :
  2399. MCA_WARNING_PCI_BUS_UNKNOWN;
  2400. }
  2401. Event->VALIDATION_BITS = PciBus->Valid.Valid;
  2402. Event->PCI_BUS_ERROR_STATUS = PciBus->ErrorStatus.Status;
  2403. Event->PCI_BUS_ADDRESS = PciBus->Address;
  2404. Event->PCI_BUS_DATA = PciBus->Data;
  2405. Event->PCI_BUS_CMD = PciBus->CmdType;
  2406. Event->PCI_BUS_REQUESTOR_ID = PciBus->RequestorId;
  2407. Event->PCI_BUS_RESPONDER_ID = PciBus->ResponderId;
  2408. Event->PCI_BUS_TARGET_ID = PciBus->TargetId;
  2409. Event->PCI_BUS_ERROR_TYPE = PciBus->Type.Type;
  2410. Event->PCI_BUS_ID_BusNumber = PciBus->Id.BusNumber;
  2411. Event->PCI_BUS_ID_SegmentNumber = PciBus->Id.SegmentNumber;
  2412. Event->Size = ErrorLogSize;
  2413. RawPtr = Event->RawRecord;
  2414. //
  2415. // Finish filling in WNODE fields
  2416. //
  2417. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_PCIBusErrorGuid;
  2418. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_PCIBusError,
  2419. RawRecord) +
  2420. ErrorLogSize;
  2421. } else {
  2422. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2423. "WMI: PCI Bus Error Section %p has invalid size %d\n",
  2424. SectionHeader,
  2425. SectionHeader->Length));
  2426. Status = STATUS_INVALID_PARAMETER;
  2427. break;
  2428. }
  2429. }
  2430. } else if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorPCIComponentGuid)) {
  2431. //
  2432. // Build event for PCI Component MCA
  2433. //
  2434. PMSMCAEvent_PCIComponentError Event;
  2435. PERROR_PCI_COMPONENT PciComp;
  2436. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2437. "WMI: MCA Section %p indicates PCI Component error\n",
  2438. SectionHeader));
  2439. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  2440. sizeof(MSMCAEvent_PCIComponentError) );
  2441. Status = STATUS_SUCCESS;
  2442. if (FirstError)
  2443. {
  2444. if (SectionHeader->Length >= sizeof(ERROR_PCI_COMPONENT))
  2445. {
  2446. Event = (PMSMCAEvent_PCIComponentError)Header;
  2447. PciComp = (PERROR_PCI_COMPONENT)SectionHeader;
  2448. //
  2449. // Fill in the data from the MCA within the WMI event
  2450. //
  2451. Event->Type = IsFatal ? MCA_ERROR_PCI_DEVICE :
  2452. MCA_WARNING_PCI_DEVICE;
  2453. Event->VALIDATION_BITS = PciComp->Valid.Valid;
  2454. Event->PCI_COMP_ERROR_STATUS = PciComp->ErrorStatus.Status;
  2455. Event->PCI_COMP_INFO_VendorId = (USHORT)PciComp->Info.VendorId;
  2456. Event->PCI_COMP_INFO_DeviceId = (USHORT)PciComp->Info.DeviceId;
  2457. Event->PCI_COMP_INFO_ClassCodeInterface = PciComp->Info.ClassCodeInterface;
  2458. Event->PCI_COMP_INFO_ClassCodeSubClass = PciComp->Info.ClassCodeSubClass;
  2459. Event->PCI_COMP_INFO_ClassCodeBaseClass = PciComp->Info.ClassCodeBaseClass;
  2460. Event->PCI_COMP_INFO_FunctionNumber = (UCHAR)PciComp->Info.FunctionNumber;
  2461. Event->PCI_COMP_INFO_DeviceNumber = (UCHAR)PciComp->Info.DeviceNumber;
  2462. Event->PCI_COMP_INFO_BusNumber = (UCHAR)PciComp->Info.BusNumber;
  2463. Event->PCI_COMP_INFO_SegmentNumber = (UCHAR)PciComp->Info.SegmentNumber;
  2464. Event->Size = ErrorLogSize;
  2465. RawPtr = Event->RawRecord;
  2466. //
  2467. // Finish filling in WNODE fields
  2468. //
  2469. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_PCIComponentErrorGuid;
  2470. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_PCIComponentError,
  2471. RawRecord) +
  2472. ErrorLogSize;
  2473. } else {
  2474. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2475. "WMI: PCI Component Error Section %p has invalid size %d\n",
  2476. SectionHeader,
  2477. SectionHeader->Length));
  2478. Status = STATUS_INVALID_PARAMETER;
  2479. break;
  2480. }
  2481. }
  2482. } else if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorSELGuid)) {
  2483. //
  2484. // Build event for System Eventlog MCA
  2485. //
  2486. PMSMCAEvent_SystemEventError Event;
  2487. PERROR_SYSTEM_EVENT_LOG Sel;
  2488. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  2489. sizeof(MSMCAEvent_SystemEventError) );
  2490. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2491. "WMI: MCA Section %p indicates SEL error\n",
  2492. SectionHeader));
  2493. Status = STATUS_SUCCESS;
  2494. if (FirstError)
  2495. {
  2496. if (SectionHeader->Length >= sizeof(ERROR_SYSTEM_EVENT_LOG))
  2497. {
  2498. Event = (PMSMCAEvent_SystemEventError)Header;
  2499. Sel = (PERROR_SYSTEM_EVENT_LOG)SectionHeader;
  2500. //
  2501. // Fill in the data from the MCA within the WMI event
  2502. //
  2503. Event->Type = IsFatal ? MCA_ERROR_SYSTEM_EVENT :
  2504. MCA_WARNING_SYSTEM_EVENT;
  2505. Event->VALIDATION_BITS = Sel->Valid.Valid;
  2506. Event->SEL_RECORD_ID = Sel->RecordId;
  2507. Event->SEL_RECORD_TYPE = Sel->RecordType;
  2508. Event->SEL_TIME_STAMP = Sel->TimeStamp;
  2509. Event->SEL_GENERATOR_ID = Sel->GeneratorId;
  2510. Event->SEL_EVM_REV = Sel->EVMRevision;
  2511. Event->SEL_SENSOR_TYPE = Sel->SensorType;
  2512. Event->SEL_SENSOR_NUM = Sel->SensorNumber;
  2513. Event->SEL_EVENT_DIR_TYPE = Sel->EventDir;
  2514. Event->SEL_DATA1 = Sel->Data1;
  2515. Event->SEL_DATA2 = Sel->Data2;
  2516. Event->SEL_DATA3 = Sel->Data3;
  2517. Event->Size = ErrorLogSize;
  2518. RawPtr = Event->RawRecord;
  2519. //
  2520. // Finish filling in WNODE fields
  2521. //
  2522. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_SystemEventErrorGuid;
  2523. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_SystemEventError,
  2524. RawRecord) +
  2525. ErrorLogSize;
  2526. } else {
  2527. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2528. "WMI: System Eventlog Error Section %p has invalid size %d\n",
  2529. SectionHeader,
  2530. SectionHeader->Length));
  2531. Status = STATUS_INVALID_PARAMETER;
  2532. break;
  2533. }
  2534. }
  2535. } else if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorSMBIOSGuid)) {
  2536. //
  2537. // Build event for SMBIOS MCA
  2538. //
  2539. PMSMCAEvent_SMBIOSError Event;
  2540. PERROR_SMBIOS Smbios;
  2541. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  2542. sizeof(MSMCAEvent_SMBIOSError) );
  2543. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2544. "WMI: MCA Section %p indicates smbios error\n",
  2545. SectionHeader));
  2546. Status = STATUS_SUCCESS;
  2547. if (FirstError)
  2548. {
  2549. if (SectionHeader->Length >= sizeof(ERROR_SMBIOS))
  2550. {
  2551. Event = (PMSMCAEvent_SMBIOSError)Header;
  2552. Smbios = (PERROR_SMBIOS)SectionHeader;
  2553. //
  2554. // Fill in the data from the MCA within the WMI event
  2555. //
  2556. Event->Type = IsFatal ? MCA_ERROR_SMBIOS :
  2557. MCA_WARNING_SMBIOS;
  2558. Event->VALIDATION_BITS = Smbios->Valid.Valid;
  2559. Event->SMBIOS_EVENT_TYPE = Smbios->EventType;
  2560. Event->Size = ErrorLogSize;
  2561. RawPtr = Event->RawRecord;
  2562. //
  2563. // Finish filling in WNODE fields
  2564. //
  2565. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_SMBIOSErrorGuid;
  2566. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_SMBIOSError,
  2567. RawRecord) +
  2568. ErrorLogSize;
  2569. } else {
  2570. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2571. "WMI: SMBIOS Error Section %p has invalid size %d\n",
  2572. SectionHeader,
  2573. SectionHeader->Length));
  2574. Status = STATUS_INVALID_PARAMETER;
  2575. break;
  2576. }
  2577. }
  2578. } else if (IsEqualGUID(&SectionHeader->Guid, &WmipErrorSpecificGuid)) {
  2579. //
  2580. // Build event for Platform Specific MCA
  2581. //
  2582. PMSMCAEvent_PlatformSpecificError Event;
  2583. PERROR_PLATFORM_SPECIFIC Specific;
  2584. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  2585. sizeof(MSMCAEvent_PlatformSpecificError) );
  2586. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2587. "WMI: MCA Section %p indicates platform specific error\n",
  2588. SectionHeader));
  2589. Status = STATUS_SUCCESS;
  2590. if (FirstError)
  2591. {
  2592. if (SectionHeader->Length >= sizeof(ERROR_PLATFORM_SPECIFIC))
  2593. {
  2594. Event = (PMSMCAEvent_PlatformSpecificError)Header;
  2595. Specific = (PERROR_PLATFORM_SPECIFIC)SectionHeader;
  2596. //
  2597. // Fill in the data from the MCA within the WMI event
  2598. //
  2599. Event->Type = IsFatal ? MCA_ERROR_PLATFORM_SPECIFIC :
  2600. MCA_WARNING_PLATFORM_SPECIFIC;
  2601. Event->VALIDATION_BITS = Specific->Valid.Valid;
  2602. Event->PLATFORM_ERROR_STATUS = Specific->ErrorStatus.Status;
  2603. #if 0
  2604. // TODO: Wait until we figure this out
  2605. Event->PLATFORM_REQUESTOR_ID = Specific->;
  2606. Event->PLATFORM_RESPONDER_ID = Specific->;
  2607. Event->PLATFORM_TARGET_ID = Specific->;
  2608. Event->PLATFORM_BUS_SPECIFIC_DATA = Specific->;
  2609. Event->OEM_COMPONENT_ID = Specific->[16];
  2610. #endif
  2611. Event->Size = ErrorLogSize;
  2612. RawPtr = Event->RawRecord;
  2613. //
  2614. // Finish filling in WNODE fields
  2615. //
  2616. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_PlatformSpecificErrorGuid;
  2617. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_PlatformSpecificError,
  2618. RawRecord) +
  2619. ErrorLogSize;
  2620. } else {
  2621. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2622. "WMI: Platform specific Error Section %p has invalid size %d\n",
  2623. SectionHeader,
  2624. SectionHeader->Length));
  2625. Status = STATUS_INVALID_PARAMETER;
  2626. break;
  2627. }
  2628. }
  2629. } else {
  2630. //
  2631. // We don't recognize the guid, so we use a very generic
  2632. // eventlog message for it
  2633. //
  2634. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2635. "WMI: Unknown Error GUID at %p\n",
  2636. &SectionHeader->Guid));
  2637. //
  2638. // If we've already analyzed an error then we
  2639. // don't really care that this one can't be
  2640. // analyzed
  2641. //
  2642. if (FirstError)
  2643. {
  2644. Status = STATUS_INVALID_PARAMETER;
  2645. }
  2646. }
  2647. }
  2648. //
  2649. // Advance to the next section within the Error record
  2650. //
  2651. DontGenerate:
  2652. if (AdvanceSection)
  2653. {
  2654. SizeUsed += SectionHeader->Length;
  2655. ModInfo = NULL;
  2656. }
  2657. //
  2658. // If we've successfully parsed an error section then
  2659. // we want to remember that. Only the first error gets
  2660. // analyzed while we calculate the number of additional
  2661. // errors following
  2662. //
  2663. if (NT_SUCCESS(Status))
  2664. {
  2665. FirstError = FALSE;
  2666. }
  2667. }
  2668. }
  2669. #endif
  2670. //
  2671. // If we were not able to build a specific event type then
  2672. // we fallback and fire a generic one
  2673. //
  2674. if (! NT_SUCCESS(Status))
  2675. {
  2676. //
  2677. // Build event for Unknown MCA
  2678. //
  2679. PMSMCAEvent_InvalidError Event;
  2680. WmipAssert( sizeof(MSMCAEvent_MemoryError) >=
  2681. sizeof(MSMCAEvent_InvalidError) );
  2682. Event = (PMSMCAEvent_InvalidError)Header;
  2683. //
  2684. // Fill in the data from the MCA within the WMI event
  2685. //
  2686. if (Header->Cpu == MCA_UNDEFINED_CPU)
  2687. {
  2688. Event->Type = IsFatal ? MCA_ERROR_UNKNOWN_NO_CPU :
  2689. MCA_WARNING_UNKNOWN_NO_CPU;
  2690. } else {
  2691. Event->Type = IsFatal ? MCA_ERROR_UNKNOWN :
  2692. MCA_WARNING_UNKNOWN;
  2693. }
  2694. Event->Size = ErrorLogSize;
  2695. RawPtr = Event->RawRecord;
  2696. //
  2697. // Finish filling in WNODE fields
  2698. //
  2699. Wnode->WnodeHeader.Guid = WmipMSMCAEvent_InvalidErrorGuid;
  2700. Wnode->SizeDataBlock = FIELD_OFFSET(MSMCAEvent_InvalidError,
  2701. RawRecord) +
  2702. ErrorLogSize;
  2703. }
  2704. //
  2705. // Adjust the Error event count
  2706. //
  2707. if (Header->AdditionalErrors > 0)
  2708. {
  2709. Header->AdditionalErrors--;
  2710. }
  2711. //
  2712. // Put the entire MCA record into the event
  2713. //
  2714. RtlCopyMemory(RawPtr,
  2715. RecordHeader,
  2716. ErrorLogSize);
  2717. if ((! IsFatal) && (Header->LogToEventlog == 1))
  2718. {
  2719. if (WmipCorrectedEventlogCounter != 0)
  2720. {
  2721. //
  2722. // Since this is a corrected error that is getting
  2723. // logged to the eventlog we need to account for it
  2724. //
  2725. if ((WmipCorrectedEventlogCounter != 0xffffffff) &&
  2726. (--WmipCorrectedEventlogCounter == 0))
  2727. {
  2728. WmipWriteToEventlog(MCA_INFO_NO_MORE_CORRECTED_ERROR_LOGS,
  2729. STATUS_SUCCESS);
  2730. }
  2731. } else {
  2732. //
  2733. // We have exceeded the limit of corrected errors that
  2734. // we are allowed to write into the eventlog, so we
  2735. // just suppress it
  2736. //
  2737. Header->LogToEventlog = 0;
  2738. }
  2739. }
  2740. //
  2741. // Now go and fire off the event
  2742. //
  2743. if ((WmipDisableMCAPopups == 0) &&
  2744. (Header->LogToEventlog != 0))
  2745. {
  2746. IoRaiseInformationalHardError(STATUS_MCA_OCCURED,
  2747. NULL,
  2748. NULL);
  2749. }
  2750. if ((Header->LogToEventlog == 1) ||
  2751. (WmipIsWbemRunning()))
  2752. {
  2753. //
  2754. // Only fire off a WMI event if we want to log to the
  2755. // eventlog or WBEM is up and running
  2756. //
  2757. Status = WmipWriteMCAEventLogEvent((PUCHAR)Wnode);
  2758. }
  2759. if (! NT_SUCCESS(Status))
  2760. {
  2761. ExFreePool(Wnode);
  2762. }
  2763. } else {
  2764. //
  2765. // Not enough memory to do a full MCA event so lets just do a
  2766. // generic one
  2767. //
  2768. WmipWriteToEventlog(IsFatal ? MCA_WARNING_UNKNOWN_NO_CPU :
  2769. MCA_ERROR_UNKNOWN_NO_CPU,
  2770. STATUS_INSUFFICIENT_RESOURCES);
  2771. }
  2772. }
  2773. NTSTATUS WmipWriteMCAEventLogEvent(
  2774. PUCHAR Event
  2775. )
  2776. {
  2777. PWNODE_HEADER Wnode = (PWNODE_HEADER)Event;
  2778. NTSTATUS Status;
  2779. PAGED_CODE();
  2780. WmipEnterSMCritSection();
  2781. if (WmipIsWbemRunning() ||
  2782. WmipCheckIsWbemRunning())
  2783. {
  2784. //
  2785. // We know WBEM is running so we can just fire off our event
  2786. //
  2787. WmipLeaveSMCritSection();
  2788. Status = IoWMIWriteEvent(Event);
  2789. } else {
  2790. //
  2791. // WBEM is not currently running and so startup a timer that
  2792. // will keep polling it
  2793. //
  2794. if (WmipIsWbemRunningFlag == WBEM_STATUS_UNKNOWN)
  2795. {
  2796. //
  2797. // No one has kicked off the waiting process for wbem so we
  2798. // do that here. Note we need to maintain the critical
  2799. // section to guard angainst another thread that might be
  2800. // trying to startup the waiting process as well. Note that
  2801. // if the setup fails we want to stay in the unknown state
  2802. // so that the next time an event is fired we can retry
  2803. // waiting for wbem
  2804. //
  2805. Status = WmipSetupWaitForWbem();
  2806. if (NT_SUCCESS(Status))
  2807. {
  2808. WmipIsWbemRunningFlag = WAITING_FOR_WBEM;
  2809. }
  2810. }
  2811. Wnode->ClientContext = Wnode->BufferSize;
  2812. InsertTailList(&WmipWaitingMCAEvents,
  2813. (PLIST_ENTRY)Event);
  2814. WmipLeaveSMCritSection();
  2815. Status = STATUS_SUCCESS;
  2816. }
  2817. return(Status);
  2818. }
  2819. ULONG WmipWbemMinuteWait = 1;
  2820. NTSTATUS WmipSetupWaitForWbem(
  2821. void
  2822. )
  2823. {
  2824. LARGE_INTEGER TimeOut;
  2825. NTSTATUS Status;
  2826. PAGED_CODE();
  2827. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2828. "WMI: SetupWaitForWbem starting\n"));
  2829. //
  2830. // Initialize a kernel time to fire periodically so we can
  2831. // check if WBEM has started or not
  2832. //
  2833. KeInitializeTimer(&WmipIsWbemRunningTimer);
  2834. KeInitializeDpc(&WmipIsWbemRunningDpc,
  2835. WmipIsWbemRunningDispatch,
  2836. NULL);
  2837. ExInitializeWorkItem(&WmipIsWbemRunningWorkItem,
  2838. WmipIsWbemRunningWorker,
  2839. NULL);
  2840. TimeOut.HighPart = -1;
  2841. TimeOut.LowPart = -1 * (WmipWbemMinuteWait * 60 * 1000 * 10000); // 1 minutes
  2842. KeSetTimer(&WmipIsWbemRunningTimer,
  2843. TimeOut,
  2844. &WmipIsWbemRunningDpc);
  2845. Status = STATUS_SUCCESS;
  2846. return(Status);
  2847. }
  2848. void WmipIsWbemRunningDispatch(
  2849. IN PKDPC Dpc,
  2850. IN PVOID DeferredContext, // Not Used
  2851. IN PVOID SystemArgument1, // Not Used
  2852. IN PVOID SystemArgument2 // Not Used
  2853. )
  2854. {
  2855. UNREFERENCED_PARAMETER (Dpc);
  2856. UNREFERENCED_PARAMETER (DeferredContext);
  2857. UNREFERENCED_PARAMETER (SystemArgument1);
  2858. UNREFERENCED_PARAMETER (SystemArgument2);
  2859. ExQueueWorkItem(&WmipIsWbemRunningWorkItem,
  2860. DelayedWorkQueue);
  2861. }
  2862. void WmipIsWbemRunningWorker(
  2863. PVOID Context
  2864. )
  2865. {
  2866. LARGE_INTEGER TimeOut;
  2867. PAGED_CODE();
  2868. UNREFERENCED_PARAMETER (Context);
  2869. if (! WmipCheckIsWbemRunning())
  2870. {
  2871. //
  2872. // WBEM is not yet started, so timeout in another minute to
  2873. // check again
  2874. //
  2875. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2876. "WMI: IsWbemRunningWorker starting -> WBEM not started\n"));
  2877. TimeOut.HighPart = -1;
  2878. TimeOut.LowPart = (ULONG)(-1 * (1 *60 *1000 *10000)); // 1 minutes
  2879. KeSetTimer(&WmipIsWbemRunningTimer,
  2880. TimeOut,
  2881. &WmipIsWbemRunningDpc);
  2882. } else {
  2883. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2884. "WMI: WbemRunningWorker found wbem started\n"));
  2885. }
  2886. }
  2887. BOOLEAN WmipCheckIsWbemRunning(
  2888. void
  2889. )
  2890. {
  2891. OBJECT_ATTRIBUTES Obj;
  2892. UNICODE_STRING Name;
  2893. HANDLE Handle;
  2894. LARGE_INTEGER TimeOut;
  2895. BOOLEAN IsWbemRunning = FALSE;
  2896. NTSTATUS Status;
  2897. PWNODE_HEADER Wnode;
  2898. PAGED_CODE();
  2899. RtlInitUnicodeString(&Name,
  2900. L"\\BaseNamedObjects\\WBEM_ESS_OPEN_FOR_BUSINESS");
  2901. InitializeObjectAttributes(
  2902. &Obj,
  2903. &Name,
  2904. FALSE,
  2905. NULL,
  2906. NULL
  2907. );
  2908. Status = ZwOpenEvent(
  2909. &Handle,
  2910. SYNCHRONIZE,
  2911. &Obj
  2912. );
  2913. if (NT_SUCCESS(Status))
  2914. {
  2915. TimeOut.QuadPart = 0;
  2916. Status = ZwWaitForSingleObject(Handle,
  2917. FALSE,
  2918. &TimeOut);
  2919. if (Status == STATUS_SUCCESS)
  2920. {
  2921. IsWbemRunning = TRUE;
  2922. //
  2923. // We've determined that WBEM is running so now lets see if
  2924. // another thread has made that dermination as well. If not
  2925. // then we can flush the MCA event queue and set the flag
  2926. // that WBEM is running
  2927. //
  2928. WmipEnterSMCritSection();
  2929. if (WmipIsWbemRunningFlag != WBEM_IS_RUNNING)
  2930. {
  2931. //
  2932. // Flush the list of all MCA events waiting to be fired
  2933. //
  2934. while (! IsListEmpty(&WmipWaitingMCAEvents))
  2935. {
  2936. Wnode = (PWNODE_HEADER)RemoveHeadList(&WmipWaitingMCAEvents);
  2937. WmipLeaveSMCritSection();
  2938. Wnode->BufferSize = Wnode->ClientContext;
  2939. Wnode->Linkage = 0;
  2940. Status = IoWMIWriteEvent(Wnode);
  2941. if (! NT_SUCCESS(Status))
  2942. {
  2943. ExFreePool(Wnode);
  2944. }
  2945. WmipEnterSMCritSection();
  2946. }
  2947. WmipDebugPrintEx((DPFLTR_WMICORE_ID, DPFLTR_MCA_LEVEL,
  2948. "WMI: WBEM is Running and queus flushed\n"));
  2949. WmipIsWbemRunningFlag = WBEM_IS_RUNNING;
  2950. }
  2951. WmipLeaveSMCritSection();
  2952. }
  2953. ZwClose(Handle);
  2954. }
  2955. return(IsWbemRunning);
  2956. }
  2957. #ifdef ALLOC_DATA_PRAGMA
  2958. #pragma data_seg()
  2959. #endif