Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1284 lines
44 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. gdisup.c
  5. Abstract:
  6. This is the NT Watchdog driver implementation.
  7. This module implements support routines for
  8. watchdog in win32k.
  9. Author:
  10. Michael Maciesowicz (mmacie) 05-May-2000
  11. Environment:
  12. Kernel mode only.
  13. Notes:
  14. This module cannot be moved to win32k since routines defined here can
  15. be called at any time and it is possible that win32k may not be mapped
  16. into running process space at this time (e.g. TS session).
  17. Revision History:
  18. --*/
  19. //
  20. // TODO: This module needs major rework.
  21. //
  22. // 1. We should eliminate all global variables from here and move them into
  23. // GDI context structure.
  24. //
  25. // 2. We should extract generic logging routines
  26. // (e.g. WdWriteErrorLogEntry(pdo, className), WdWriteEventToRegistry(...),
  27. // WdBreakPoint(...) so we can use them for any device class, not just Display.
  28. //
  29. // 3. We should use IoAllocateWorkItem - we could drop some globals then.
  30. //
  31. #include "videoprt.h"
  32. #include "gdisup.h"
  33. #ifdef ALLOC_PRAGMA
  34. #pragma alloc_text (PAGE, WdpBugCheckStuckDriver)
  35. #endif
  36. WD_BUGCHECK_DATA
  37. g_WdpBugCheckData = {0, 0, 0, 0, 0};
  38. WORK_QUEUE_ITEM
  39. g_WdpWorkQueueItem;
  40. LONG
  41. g_lWdpDisplayHandlerState = WD_HANDLER_IDLE;
  42. //
  43. // Undocumented export from kernel to create Minidump
  44. //
  45. ULONG
  46. KeCapturePersistentThreadState(
  47. PCONTEXT pContext,
  48. PETHREAD pThread,
  49. ULONG ulBugCheckCode,
  50. ULONG_PTR ulpBugCheckParam1,
  51. ULONG_PTR ulpBugCheckParam2,
  52. ULONG_PTR ulpBugCheckParam3,
  53. ULONG_PTR ulpBugCheckParam4,
  54. PVOID pvDump
  55. );
  56. //
  57. // defined dump.c
  58. //
  59. ULONG
  60. pVpAppendSecondaryMinidumpData(
  61. PVOID pvSecondaryData,
  62. ULONG ulSecondaryDataSize,
  63. PVOID pvDump
  64. );
  65. WATCHDOGAPI
  66. VOID
  67. WdDdiWatchdogDpcCallback(
  68. IN PKDPC pDpc,
  69. IN PVOID pDeferredContext,
  70. IN PVOID pSystemArgument1,
  71. IN PVOID pSystemArgument2
  72. )
  73. /*++
  74. Routine Description:
  75. This function is a DPC callback routine for GDI watchdog. It is only
  76. called when GDI watchdog times out before it is cancelled. It schedules
  77. a work item to bugcheck the machine in the context of system worker
  78. thread.
  79. Arguments:
  80. pDpc - Supplies a pointer to a DPC object.
  81. pDeferredContext - Supplies a pointer to a GDI defined context.
  82. pSystemArgument1 - Supplies a pointer to a spinning thread object (PKTHREAD).
  83. pSystemArgument2 - Supplies a pointer to a watchdog object (PDEFERRED_WATCHDOG).
  84. Return Value:
  85. None.
  86. --*/
  87. {
  88. //
  89. // Make sure we handle only one event at the time.
  90. //
  91. // Note: Timeout and recovery events for the same watchdog object are
  92. // synchronized already in timer DPC.
  93. //
  94. if (InterlockedCompareExchange(&g_lWdpDisplayHandlerState,
  95. WD_HANDLER_BUSY,
  96. WD_HANDLER_IDLE) == WD_HANDLER_IDLE)
  97. {
  98. g_WdpBugCheckData.ulBugCheckCode = THREAD_STUCK_IN_DEVICE_DRIVER;
  99. g_WdpBugCheckData.ulpBugCheckParameter1 = (ULONG_PTR)pSystemArgument1;
  100. g_WdpBugCheckData.ulpBugCheckParameter2 = (ULONG_PTR)pSystemArgument2;
  101. g_WdpBugCheckData.ulpBugCheckParameter3 = (ULONG_PTR)pDeferredContext;
  102. g_WdpBugCheckData.ulpBugCheckParameter4++;
  103. ExInitializeWorkItem(&g_WdpWorkQueueItem, WdpBugCheckStuckDriver, &g_WdpBugCheckData);
  104. ExQueueWorkItem(&g_WdpWorkQueueItem, CriticalWorkQueue);
  105. }
  106. else
  107. {
  108. //
  109. // Resume watchdog event processing.
  110. //
  111. WdCompleteEvent(pSystemArgument2, (PKTHREAD)pSystemArgument1);
  112. }
  113. return;
  114. } // WdDdiWatchdogDpcCallback()
  115. //
  116. // We'll support a string name up to 80 characters.
  117. //
  118. VOID
  119. WdpBugCheckStuckDriver(
  120. IN PVOID pvContext
  121. )
  122. /*++
  123. Routine Description:
  124. This function is a worker callback routine for GDI watchdog DPC.
  125. Arguments:
  126. pvContext - Supplies a pointer to a watchdog defined context.
  127. Return Value:
  128. None.
  129. --*/
  130. {
  131. static BOOLEAN s_bFirstTime = TRUE;
  132. static BOOLEAN s_bDbgBreak = FALSE;
  133. static BOOLEAN s_bEventLogged = FALSE;
  134. static ULONG s_ulTrapOnce = WD_DEFAULT_TRAP_ONCE;
  135. static ULONG s_ulDisableBugcheck = WD_DEFAULT_DISABLE_BUGCHECK;
  136. static ULONG s_ulBreakPointDelay = WD_GDI_STRESS_BREAK_POINT_DELAY;
  137. static ULONG s_ulCurrentBreakPointDelay = WD_GDI_STRESS_BREAK_POINT_DELAY;
  138. static ULONG s_ulBreakCount = 0;
  139. static ULONG s_ulEventCount = 0;
  140. static ULONG s_ulEaRecovery = 0;
  141. static ULONG s_ulFullRecovery = 0;
  142. PWD_BUGCHECK_DATA pBugCheckData;
  143. PKTHREAD pThread;
  144. PDEFERRED_WATCHDOG pWatch;
  145. PUNICODE_STRING pUnicodeDriverName;
  146. PDEVICE_OBJECT pFdo;
  147. PDEVICE_OBJECT pPdo;
  148. PWD_GDI_DPC_CONTEXT pDpcContext;
  149. NTSTATUS ntStatus;
  150. WD_EVENT_TYPE lastEvent;
  151. BOOLEAN Recovered = FALSE;
  152. UNICODE_STRING UnicodeString;
  153. PWCHAR Buffer[81];
  154. PAGED_CODE();
  155. ASSERT(NULL != pvContext);
  156. pBugCheckData = (PWD_BUGCHECK_DATA)pvContext;
  157. pThread = (PKTHREAD)(pBugCheckData->ulpBugCheckParameter1);
  158. pWatch = (PDEFERRED_WATCHDOG)(pBugCheckData->ulpBugCheckParameter2);
  159. pDpcContext = (PWD_GDI_DPC_CONTEXT)(pBugCheckData->ulpBugCheckParameter3);
  160. ASSERT(NULL != pDpcContext);
  161. //
  162. // Note: pThread is NULL for recovery events.
  163. //
  164. ASSERT(NULL != pWatch);
  165. //
  166. // In the case where we try to recover from an EA, we will want to use
  167. // the display driver name in the Hard Error message. However, after
  168. // recovering it is remotely possible that GDI may have release the
  169. // watchdog, and the string is no longer valid. Therefor let's make a
  170. // copy of the display driver name on the stack.
  171. //
  172. ASSERT(pDpcContext->DisplayDriverName.Length <= (sizeof(Buffer) - sizeof(WCHAR)));
  173. UnicodeString.Buffer = (PWSTR)Buffer;
  174. UnicodeString.Length = min(pDpcContext->DisplayDriverName.Length, sizeof(Buffer) - sizeof(WCHAR));
  175. UnicodeString.MaximumLength = UnicodeString.Length + sizeof(WCHAR);
  176. RtlCopyMemory(UnicodeString.Buffer,
  177. pDpcContext->DisplayDriverName.Buffer,
  178. UnicodeString.Length);
  179. //
  180. // We guaranteed that length is less then our buffer size, so we know we
  181. // always have room to NULL terminate the string.
  182. //
  183. UnicodeString.Buffer[UnicodeString.Length / sizeof(WCHAR)] = UNICODE_NULL;
  184. pUnicodeDriverName = &UnicodeString;
  185. pFdo = WdGetDeviceObject(pWatch);
  186. pPdo = WdGetLowestDeviceObject(pWatch);
  187. ASSERT(NULL != pFdo);
  188. ASSERT(NULL != pPdo);
  189. lastEvent = WdGetLastEvent(pWatch);
  190. ASSERT((WdTimeoutEvent == lastEvent) || (WdRecoveryEvent == lastEvent));
  191. //
  192. // Grab configuration data from the registry on first timeout.
  193. //
  194. if (TRUE == s_bFirstTime)
  195. {
  196. ULONG ulDefaultTrapOnce = WD_DEFAULT_TRAP_ONCE;
  197. ULONG ulDefaultDisableBugcheck = WD_DEFAULT_DISABLE_BUGCHECK;
  198. ULONG ulDefaultBreakPointDelay = WD_GDI_STRESS_BREAK_POINT_DELAY;
  199. ULONG ulDefaultBreakCount = 0;
  200. ULONG ulDefaultEventCount = 0;
  201. ULONG ulDefaultEaRecovery = 0;
  202. ULONG ulDefaultFullRecovery = 0;
  203. RTL_QUERY_REGISTRY_TABLE queryTable[] =
  204. {
  205. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"TrapOnce", &s_ulTrapOnce, REG_DWORD, &ulDefaultTrapOnce, 4},
  206. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"DisableBugcheck", &s_ulDisableBugcheck, REG_DWORD, &ulDefaultDisableBugcheck, 4},
  207. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"BreakPointDelay", &s_ulBreakPointDelay, REG_DWORD, &ulDefaultBreakPointDelay, 4},
  208. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"BreakCount", &s_ulBreakCount, REG_DWORD, &ulDefaultBreakCount, 4},
  209. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"EaRecovery", &s_ulEaRecovery, REG_DWORD, &ulDefaultEaRecovery, 4},
  210. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"FullRecovery", &s_ulFullRecovery, REG_DWORD, &ulDefaultFullRecovery, 4},
  211. {NULL, 0, NULL}
  212. };
  213. //
  214. // Get configurable values and accumulated statistics from registry.
  215. //
  216. RtlQueryRegistryValues(RTL_REGISTRY_ABSOLUTE,
  217. WD_KEY_WATCHDOG_DISPLAY,
  218. queryTable,
  219. NULL,
  220. NULL);
  221. //
  222. // Rolling down counter to workaround GDI slowness in some stress cases.
  223. //
  224. s_ulCurrentBreakPointDelay = s_ulBreakPointDelay;
  225. #if !defined(_X86_) && !defined(_IA64_)
  226. //
  227. // For now, only recover on x86 and ia64.
  228. //
  229. s_ulEaRecovery = 0;
  230. #endif
  231. }
  232. //
  233. // Handle current event.
  234. //
  235. if (WdTimeoutEvent == lastEvent)
  236. {
  237. //
  238. // Timeout.
  239. //
  240. ULONG ulDebuggerNotPresent;
  241. BOOLEAN bBreakIn;
  242. ASSERT(NULL != pThread);
  243. ulDebuggerNotPresent = 1;
  244. bBreakIn = FALSE;
  245. KdRefreshDebuggerNotPresent();
  246. if ((TRUE == KD_DEBUGGER_ENABLED) && (FALSE == KD_DEBUGGER_NOT_PRESENT))
  247. {
  248. //
  249. // Give a chance to debug a spinning code if kernel debugger is connected.
  250. //
  251. ulDebuggerNotPresent = 0;
  252. if ((0 == s_ulTrapOnce) || (FALSE == s_bDbgBreak))
  253. {
  254. //
  255. // Print out info to debugger and break in if we timed out enought times already.
  256. // Hopefuly one day GDI becomes fast enough and we won't have to set any delays.
  257. //
  258. if (0 == s_ulCurrentBreakPointDelay)
  259. {
  260. s_ulCurrentBreakPointDelay = s_ulBreakPointDelay;
  261. DbgPrint("\n");
  262. DbgPrint("*******************************************************************************\n");
  263. DbgPrint("* *\n");
  264. DbgPrint("* The watchdog detected a timeout condition. We broke into the debugger to *\n");
  265. DbgPrint("* allow a chance for debugging this failure. *\n");
  266. DbgPrint("* *\n");
  267. DbgPrint("* Normally the system will try to recover from this failure and return to a *\n");
  268. DbgPrint("* VGA graphics mode. To disable the recovery feature edit the videoprt *\n");
  269. DbgPrint("* variable VpDisableRecovery. This will allow you to debug your driver. *\n");
  270. DbgPrint("* i.e. execute ed videoprt!VpDisableRecovery 1. *\n");
  271. DbgPrint("* *\n");
  272. DbgPrint("* Intercepted bugcheck code and arguments are listed below this message. *\n");
  273. DbgPrint("* You can use them the same way as you would in case of the actual break, *\n");
  274. DbgPrint("* i.e. execute .thread Arg1 then kv to identify an offending thread. *\n");
  275. DbgPrint("* *\n");
  276. DbgPrint("*******************************************************************************\n");
  277. DbgPrint("\n");
  278. DbgPrint("*** Intercepted Fatal System Error: 0x%08X\n", pBugCheckData->ulBugCheckCode);
  279. DbgPrint(" (0x%p,0x%p,0x%p,0x%p)\n\n",
  280. pBugCheckData->ulpBugCheckParameter1,
  281. pBugCheckData->ulpBugCheckParameter2,
  282. pBugCheckData->ulpBugCheckParameter3,
  283. pBugCheckData->ulpBugCheckParameter4);
  284. DbgPrint("Driver at fault: %ws\n\n", pUnicodeDriverName->Buffer);
  285. bBreakIn = TRUE;
  286. s_bDbgBreak = TRUE;
  287. s_ulBreakCount++;
  288. }
  289. else
  290. {
  291. DbgPrint("Watchdog: Timeout in %ws. Break in %d\n",
  292. pUnicodeDriverName->Buffer,
  293. s_ulCurrentBreakPointDelay);
  294. s_ulCurrentBreakPointDelay--;
  295. }
  296. }
  297. //
  298. // Make sure we won't bugcheck if we have kernel debugger connected.
  299. //
  300. s_ulDisableBugcheck = 1;
  301. }
  302. else if (0 == s_ulDisableBugcheck)
  303. {
  304. s_ulBreakCount++;
  305. }
  306. //
  307. // Log error (only once unless we recover).
  308. //
  309. if ((FALSE == s_bEventLogged) && ((TRUE == bBreakIn) || ulDebuggerNotPresent))
  310. {
  311. PIO_ERROR_LOG_PACKET pIoErrorLogPacket;
  312. ULONG ulPacketSize;
  313. USHORT usNumberOfStrings;
  314. PWCHAR wszDeviceClass = L"display";
  315. ULONG ulClassSize = sizeof (L"display");
  316. ulPacketSize = sizeof (IO_ERROR_LOG_PACKET);
  317. usNumberOfStrings = 0;
  318. //
  319. // For event log message:
  320. //
  321. // %1 = fixed device description (this is set by event log itself)
  322. // %2 = string 1 = device class starting in lower case
  323. // %3 = string 2 = driver name
  324. //
  325. if ((ulPacketSize + ulClassSize) <= ERROR_LOG_MAXIMUM_SIZE)
  326. {
  327. ulPacketSize += ulClassSize;
  328. usNumberOfStrings++;
  329. //
  330. // We're looking at MaximumLength since it includes terminating UNICODE_NULL.
  331. //
  332. if ((ulPacketSize + pUnicodeDriverName->MaximumLength) <= ERROR_LOG_MAXIMUM_SIZE)
  333. {
  334. ulPacketSize += pUnicodeDriverName->MaximumLength;
  335. usNumberOfStrings++;
  336. }
  337. }
  338. pIoErrorLogPacket = IoAllocateErrorLogEntry(pFdo, (UCHAR)ulPacketSize);
  339. if (pIoErrorLogPacket)
  340. {
  341. pIoErrorLogPacket->MajorFunctionCode = 0;
  342. pIoErrorLogPacket->RetryCount = 0;
  343. pIoErrorLogPacket->DumpDataSize = 0;
  344. pIoErrorLogPacket->NumberOfStrings = usNumberOfStrings;
  345. pIoErrorLogPacket->StringOffset = (USHORT)FIELD_OFFSET(IO_ERROR_LOG_PACKET, DumpData);
  346. pIoErrorLogPacket->EventCategory = 0;
  347. pIoErrorLogPacket->ErrorCode = IO_ERR_THREAD_STUCK_IN_DEVICE_DRIVER;
  348. pIoErrorLogPacket->UniqueErrorValue = 0;
  349. pIoErrorLogPacket->FinalStatus = STATUS_SUCCESS;
  350. pIoErrorLogPacket->SequenceNumber = 0;
  351. pIoErrorLogPacket->IoControlCode = 0;
  352. pIoErrorLogPacket->DeviceOffset.QuadPart = 0;
  353. if (usNumberOfStrings > 0)
  354. {
  355. RtlCopyMemory(&(pIoErrorLogPacket->DumpData[0]),
  356. wszDeviceClass,
  357. ulClassSize);
  358. if (usNumberOfStrings > 1)
  359. {
  360. RtlCopyMemory((PUCHAR)&(pIoErrorLogPacket->DumpData[0]) + ulClassSize,
  361. pUnicodeDriverName->Buffer,
  362. pUnicodeDriverName->MaximumLength);
  363. }
  364. }
  365. IoWriteErrorLogEntry(pIoErrorLogPacket);
  366. s_bEventLogged = TRUE;
  367. }
  368. }
  369. //
  370. // Write reliability info into registry. Setting ShutdownEventPending will trigger winlogon
  371. // to run savedump where we're doing our boot-time handling of watchdog events for DrWatson.
  372. //
  373. // Note: We are only allowed to set ShutdownEventPending, savedump is the only component
  374. // allowed to clear this value. Even if we recover from watchdog timeout we'll keep this
  375. // value set, savedump will be able to figure out if we recovered or not.
  376. //
  377. if (TRUE == s_bFirstTime)
  378. {
  379. ULONG ulValue = 1;
  380. //
  381. // Set ShutdownEventPending flag.
  382. //
  383. ntStatus = RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  384. WD_KEY_RELIABILITY,
  385. L"ShutdownEventPending",
  386. REG_DWORD,
  387. &ulValue,
  388. sizeof (ulValue));
  389. if (NT_SUCCESS(ntStatus))
  390. {
  391. pVpFlushRegistry(WD_KEY_RELIABILITY);
  392. }
  393. else
  394. {
  395. //
  396. // Reliability key should be always reliable there.
  397. //
  398. ASSERT(FALSE);
  399. }
  400. }
  401. //
  402. // Write watchdog event info into registry.
  403. //
  404. if ((0 == s_ulTrapOnce) || (TRUE == s_bFirstTime))
  405. {
  406. //
  407. // Is Watchdog\Display key already there?
  408. //
  409. ntStatus = RtlCheckRegistryKey(RTL_REGISTRY_ABSOLUTE,
  410. WD_KEY_WATCHDOG_DISPLAY);
  411. if (!NT_SUCCESS(ntStatus))
  412. {
  413. //
  414. // Is Watchdog key already there?
  415. //
  416. ntStatus = RtlCheckRegistryKey(RTL_REGISTRY_ABSOLUTE,
  417. WD_KEY_WATCHDOG);
  418. if (!NT_SUCCESS(ntStatus))
  419. {
  420. //
  421. // Create a new key.
  422. //
  423. ntStatus = RtlCreateRegistryKey(RTL_REGISTRY_ABSOLUTE,
  424. WD_KEY_WATCHDOG);
  425. }
  426. if (NT_SUCCESS(ntStatus))
  427. {
  428. //
  429. // Create a new key.
  430. //
  431. ntStatus = RtlCreateRegistryKey(RTL_REGISTRY_ABSOLUTE,
  432. WD_KEY_WATCHDOG_DISPLAY);
  433. }
  434. }
  435. if (NT_SUCCESS(ntStatus))
  436. {
  437. PVOID pvPropertyBuffer;
  438. ULONG ulLength;
  439. ULONG ulValue;
  440. //
  441. // Set values maintained by watchdog.
  442. //
  443. ulValue = 1;
  444. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  445. WD_KEY_WATCHDOG_DISPLAY,
  446. L"EventFlag",
  447. REG_DWORD,
  448. &ulValue,
  449. sizeof (ulValue));
  450. s_ulEventCount++;
  451. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  452. WD_KEY_WATCHDOG_DISPLAY,
  453. L"EventCount",
  454. REG_DWORD,
  455. &s_ulEventCount,
  456. sizeof (s_ulEventCount));
  457. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  458. WD_KEY_WATCHDOG_DISPLAY,
  459. L"BreakCount",
  460. REG_DWORD,
  461. &s_ulBreakCount,
  462. sizeof (s_ulBreakCount));
  463. ulValue = !s_ulDisableBugcheck;
  464. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  465. WD_KEY_WATCHDOG_DISPLAY,
  466. L"BugcheckTriggered",
  467. REG_DWORD,
  468. &ulValue,
  469. sizeof (ulValue));
  470. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  471. WD_KEY_WATCHDOG_DISPLAY,
  472. L"DebuggerNotPresent",
  473. REG_DWORD,
  474. &ulDebuggerNotPresent,
  475. sizeof (ulDebuggerNotPresent));
  476. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  477. WD_KEY_WATCHDOG_DISPLAY,
  478. L"DriverName",
  479. REG_SZ,
  480. pUnicodeDriverName->Buffer,
  481. pUnicodeDriverName->MaximumLength);
  482. //
  483. // Delete other values in case allocation or property read fails.
  484. //
  485. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  486. WD_KEY_WATCHDOG_DISPLAY,
  487. L"DeviceClass");
  488. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  489. WD_KEY_WATCHDOG_DISPLAY,
  490. L"DeviceDescription");
  491. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  492. WD_KEY_WATCHDOG_DISPLAY,
  493. L"DeviceFriendlyName");
  494. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  495. WD_KEY_WATCHDOG_DISPLAY,
  496. L"HardwareID");
  497. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  498. WD_KEY_WATCHDOG_DISPLAY,
  499. L"Manufacturer");
  500. //
  501. // Allocate buffer for device properties reads.
  502. //
  503. // Note: Legacy devices don't have PDOs and we can't query properties
  504. // for them. Calling IoGetDeviceProperty() with FDO upsets Verifier.
  505. // In legacy case lowest device object is the same as FDO, we check
  506. // against this and if this is the case we won't allocate property
  507. // buffer and we'll skip the next block.
  508. //
  509. if (pFdo != pPdo)
  510. {
  511. pvPropertyBuffer = ExAllocatePoolWithTag(PagedPool,
  512. WD_MAX_PROPERTY_SIZE,
  513. WD_TAG);
  514. }
  515. else
  516. {
  517. pvPropertyBuffer = NULL;
  518. }
  519. if (pvPropertyBuffer)
  520. {
  521. //
  522. // Read and save device properties.
  523. //
  524. ntStatus = IoGetDeviceProperty(pPdo,
  525. DevicePropertyClassName,
  526. WD_MAX_PROPERTY_SIZE,
  527. pvPropertyBuffer,
  528. &ulLength);
  529. if (NT_SUCCESS(ntStatus))
  530. {
  531. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  532. WD_KEY_WATCHDOG_DISPLAY,
  533. L"DeviceClass",
  534. REG_SZ,
  535. pvPropertyBuffer,
  536. ulLength);
  537. }
  538. ntStatus = IoGetDeviceProperty(pPdo,
  539. DevicePropertyDeviceDescription,
  540. WD_MAX_PROPERTY_SIZE,
  541. pvPropertyBuffer,
  542. &ulLength);
  543. if (NT_SUCCESS(ntStatus))
  544. {
  545. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  546. WD_KEY_WATCHDOG_DISPLAY,
  547. L"DeviceDescription",
  548. REG_SZ,
  549. pvPropertyBuffer,
  550. ulLength);
  551. }
  552. ntStatus = IoGetDeviceProperty(pPdo,
  553. DevicePropertyFriendlyName,
  554. WD_MAX_PROPERTY_SIZE,
  555. pvPropertyBuffer,
  556. &ulLength);
  557. if (NT_SUCCESS(ntStatus))
  558. {
  559. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  560. WD_KEY_WATCHDOG_DISPLAY,
  561. L"DeviceFriendlyName",
  562. REG_SZ,
  563. pvPropertyBuffer,
  564. ulLength);
  565. }
  566. ntStatus = IoGetDeviceProperty(pPdo,
  567. DevicePropertyHardwareID,
  568. WD_MAX_PROPERTY_SIZE,
  569. pvPropertyBuffer,
  570. &ulLength);
  571. if (NT_SUCCESS(ntStatus))
  572. {
  573. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  574. WD_KEY_WATCHDOG_DISPLAY,
  575. L"HardwareID",
  576. REG_MULTI_SZ,
  577. pvPropertyBuffer,
  578. ulLength);
  579. }
  580. ntStatus = IoGetDeviceProperty(pPdo,
  581. DevicePropertyManufacturer,
  582. WD_MAX_PROPERTY_SIZE,
  583. pvPropertyBuffer,
  584. &ulLength);
  585. if (NT_SUCCESS(ntStatus))
  586. {
  587. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  588. WD_KEY_WATCHDOG_DISPLAY,
  589. L"Manufacturer",
  590. REG_SZ,
  591. pvPropertyBuffer,
  592. ulLength);
  593. }
  594. //
  595. // Release property buffer.
  596. //
  597. ExFreePool(pvPropertyBuffer);
  598. pvPropertyBuffer = NULL;
  599. }
  600. }
  601. //
  602. // Flush registry in case we're going to break in / bugcheck or if this is first time.
  603. //
  604. if ((TRUE == s_bFirstTime) || (TRUE == bBreakIn) || (0 == s_ulDisableBugcheck))
  605. {
  606. pVpFlushRegistry(WD_KEY_WATCHDOG_DISPLAY);
  607. }
  608. }
  609. //
  610. // Track the device object which is responsible for the bugcheck EA.
  611. //
  612. VpBugcheckDeviceObject = pVpGetFdo(pPdo);
  613. //
  614. // Bugcheck machine without kernel debugger connected and with bugcheck EA enabled.
  615. // Bugcheck EA is enabled on SKUs below Server.
  616. //
  617. if (1 == ulDebuggerNotPresent)
  618. {
  619. if (s_ulEaRecovery)
  620. {
  621. Recovered = WdpInjectExceptionIntoThread(pThread, pDpcContext);
  622. }
  623. if ((0 == s_ulDisableBugcheck) && (FALSE == Recovered))
  624. {
  625. KeBugCheckEx(pBugCheckData->ulBugCheckCode,
  626. pBugCheckData->ulpBugCheckParameter1,
  627. pBugCheckData->ulpBugCheckParameter2,
  628. (ULONG_PTR)pUnicodeDriverName,
  629. pBugCheckData->ulpBugCheckParameter4);
  630. }
  631. }
  632. else
  633. {
  634. if (TRUE == bBreakIn)
  635. {
  636. DbgBreakPoint();
  637. if (s_ulEaRecovery && (VpDisableRecovery == FALSE))
  638. {
  639. Recovered = WdpInjectExceptionIntoThread(pThread, pDpcContext);
  640. }
  641. }
  642. }
  643. }
  644. else
  645. {
  646. if (FALSE == s_ulEaRecovery) {
  647. //
  648. // Recovery - knock down EventFlag in registry and update statics.
  649. //
  650. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  651. WD_KEY_WATCHDOG_DISPLAY,
  652. L"EventFlag");
  653. }
  654. s_bEventLogged = FALSE;
  655. s_ulCurrentBreakPointDelay = s_ulBreakPointDelay;
  656. }
  657. //
  658. // Reenable event processing in this module.
  659. //
  660. s_bFirstTime = FALSE;
  661. InterlockedExchange(&g_lWdpDisplayHandlerState, WD_HANDLER_IDLE);
  662. //
  663. // Dereference objects and resume watchdog event processing.
  664. //
  665. ObDereferenceObject(pFdo);
  666. ObDereferenceObject(pPdo);
  667. WdCompleteEvent(pWatch, pThread);
  668. //
  669. // If we Recovered then raise a hard error notifing the user
  670. // of the situation. We do this here because the raise hard error
  671. // is synchronous and waits for user input. So we'll raise the hard
  672. // error after everything else is done.
  673. //
  674. if (Recovered) {
  675. static ULONG ulHardErrorInProgress = FALSE;
  676. //
  677. // If we hang and recover several times, don't allow more than
  678. // one dialog to appear on the screen. Only allow the dialog
  679. // to pop up again, after the user has hit "ok".
  680. //
  681. if (InterlockedCompareExchange(&ulHardErrorInProgress,
  682. TRUE,
  683. FALSE) == FALSE) {
  684. ULONG Response;
  685. ExRaiseHardError(0xC0000415, //STATUS_HUNG_DISPLAY_DRIVER_THREAD
  686. 1,
  687. 1,
  688. (PULONG_PTR)&pUnicodeDriverName,
  689. OptionOk,
  690. &Response);
  691. InterlockedExchange(&ulHardErrorInProgress, FALSE);
  692. }
  693. }
  694. return;
  695. } // WdpBugCheckStuckDriver()
  696. VOID
  697. WdpKernelApc(
  698. IN PKAPC pApc,
  699. OUT PKNORMAL_ROUTINE *pNormalRoutine,
  700. IN OUT PVOID pvNormalContext,
  701. IN OUT PVOID *ppvSystemArgument1,
  702. IN OUT PVOID *ppvSystemArgument2
  703. )
  704. /*++
  705. Routine Description:
  706. This APC runs in the context of spinning thread and is responsible
  707. for raising THREAD_STUCK exception.
  708. Arguments:
  709. pApc - Not used.
  710. pNormalRoutine - Not used.
  711. pvNormalContext - Not used.
  712. ppvSystemArgument1 - Supplies a pointer to WD_GDI_CONTEXT_DATA.
  713. ppvSystemArgument2 - Not used.
  714. Return Value:
  715. None.
  716. --*/
  717. {
  718. PKEVENT pInjectionEvent;
  719. CONTEXT Context;
  720. PWD_GDI_CONTEXT_DATA pContextData = (PWD_GDI_CONTEXT_DATA)*ppvSystemArgument1;
  721. ULONG_PTR ulpImageStart;
  722. ULONG_PTR ulpImageStop;
  723. PETHREAD pThread;
  724. NTSTATUS ntStatus;
  725. PLDEV pldev;
  726. ASSERT(NULL != ppvSystemArgument1);
  727. UNREFERENCED_PARAMETER(pApc);
  728. UNREFERENCED_PARAMETER(pNormalRoutine);
  729. UNREFERENCED_PARAMETER(pvNormalContext);
  730. UNREFERENCED_PARAMETER(ppvSystemArgument2);
  731. pInjectionEvent = pContextData->pInjectionEvent;
  732. pldev = *pContextData->ppldevDrivers;
  733. pThread = PsGetCurrentThread();
  734. //
  735. // Initialize the context.
  736. //
  737. RtlZeroMemory(&Context, sizeof (Context));
  738. Context.ContextFlags = CONTEXT_ALL;
  739. //
  740. // Get the kernel context for this thread.
  741. //
  742. if (NT_SUCCESS(PsGetContextThread(pThread, &Context, KernelMode)))
  743. {
  744. //
  745. // We can safely touch the pldev's (which live in session space)
  746. // because this thread came from a process that has the session
  747. // space mapped in.
  748. //
  749. while (pldev)
  750. {
  751. if (pldev->pGdiDriverInfo)
  752. {
  753. ulpImageStart = (ULONG_PTR)pldev->pGdiDriverInfo->ImageAddress;
  754. ulpImageStop = ulpImageStart + (ULONG_PTR)pldev->pGdiDriverInfo->ImageLength - 1;
  755. //
  756. // Modify the context to inject a fault into the thread
  757. // when it starts running again (after APC returns).
  758. //
  759. #if defined (_X86_)
  760. if ((Context.Eip >= ulpImageStart) && (Context.Eip <= ulpImageStop))
  761. {
  762. //
  763. // Capture the context so we can use it to create a mini-dump
  764. //
  765. pContextData->ulDumpSize = KeCapturePersistentThreadState(
  766. &Context,
  767. pThread,
  768. pContextData->pBugCheckData->ulBugCheckCode,
  769. pContextData->pBugCheckData->ulpBugCheckParameter1,
  770. pContextData->pBugCheckData->ulpBugCheckParameter2,
  771. pContextData->pBugCheckData->ulpBugCheckParameter3,
  772. pContextData->pBugCheckData->ulpBugCheckParameter4,
  773. pContextData->pvDump);
  774. //
  775. // We should decrement the stack pointer, and store the
  776. // return address to "fake" a call instruction. However,
  777. // this is not allowed. So instead, lets just put the
  778. // return address in the current stack location. This isn't
  779. // quite right, but should make the stack unwind code happier
  780. // then if we do nothing.
  781. //
  782. //Context.Esp -= 4;
  783. //*((PULONG)Context.Esp) = context.Eip;
  784. Context.Eip = (ULONG)WdpRaiseExceptionInThread;
  785. //
  786. // Set the modified context record.
  787. //
  788. Context.ContextFlags = CONTEXT_CONTROL;
  789. PsSetContextThread(pThread, &Context, KernelMode);
  790. pContextData->bRecoveryAttempted = TRUE;
  791. break;
  792. }
  793. #elif defined (_IA64_)
  794. if ((Context.StIIP >= ulpImageStart) &&
  795. (Context.StIIP <= ulpImageStop))
  796. {
  797. FRAME_MARKER cfm;
  798. PULONGLONG pullTemp = (PULONGLONG)WdpRaiseExceptionInThread;
  799. ULONGLONG RsBSP;
  800. ULONGLONG StIFS;
  801. //
  802. // Capture the context so we can use it to create a mini-dump
  803. //
  804. pContextData->ulDumpSize = KeCapturePersistentThreadState(
  805. &Context,
  806. pThread,
  807. pContextData->pBugCheckData->ulBugCheckCode,
  808. pContextData->pBugCheckData->ulpBugCheckParameter1,
  809. pContextData->pBugCheckData->ulpBugCheckParameter2,
  810. pContextData->pBugCheckData->ulpBugCheckParameter3,
  811. pContextData->pBugCheckData->ulpBugCheckParameter4,
  812. pContextData->pvDump);
  813. RsBSP = Context.RsBSP;
  814. StIFS = Context.StIFS;
  815. //
  816. // We have to unwind one level up to so any preserved registers, BrRp and PFS
  817. // are set correctly.
  818. //
  819. {
  820. ULONGLONG TargetGp;
  821. PRUNTIME_FUNCTION FunctionEntry;
  822. FRAME_POINTERS EstablisherFrame;
  823. BOOLEAN InFunction;
  824. FunctionEntry = RtlLookupFunctionEntry(Context.StIIP,
  825. &ulpImageStart,
  826. &TargetGp);
  827. if (FunctionEntry)
  828. {
  829. Context.StIIP = RtlVirtualUnwind(ulpImageStart,
  830. Context.StIIP,
  831. FunctionEntry,
  832. &Context,
  833. &InFunction,
  834. &EstablisherFrame,
  835. NULL);
  836. //
  837. // Set the return address.
  838. //
  839. Context.BrRp = (Context.StIIP+0x10) & ~(ULONGLONG)0xf;
  840. }
  841. }
  842. //
  843. // Restore orignal BSP and IFS
  844. //
  845. Context.RsBSP = RsBSP;
  846. Context.StIFS = StIFS;
  847. //
  848. // Emulate the call.
  849. //
  850. Context.StIIP = *pullTemp;
  851. Context.IntGp = *(pullTemp+1);
  852. Context.StIPSR &= ~((ULONGLONG) 3 << PSR_RI);
  853. //
  854. // Set the modified context record.
  855. //
  856. Context.ContextFlags = CONTEXT_FULL;
  857. PsSetContextThread(pThread, &Context, KernelMode);
  858. pContextData->bRecoveryAttempted = TRUE;
  859. break;
  860. }
  861. #endif // per platform
  862. }
  863. pldev = pldev->pldevNext;
  864. }
  865. if (pContextData->pvDump && pContextData->ulDumpSize)
  866. {
  867. KBUGCHECK_SECONDARY_DUMP_DATA SecondaryData;
  868. ULONG ulDumpSize = pContextData->ulDumpSize;
  869. //
  870. // Write the data to disk (without secondary data)
  871. //
  872. pVpWriteFile(L"\\SystemRoot\\MEMORY.DMP",
  873. pContextData->pvDump,
  874. pContextData->ulDumpSize);
  875. //
  876. // Try to collect secondary data and rewrite dump with it
  877. //
  878. RtlZeroMemory(&SecondaryData, sizeof(SecondaryData));
  879. pVpGeneralBugcheckHandler(&SecondaryData);
  880. if (SecondaryData.OutBuffer &&
  881. SecondaryData.OutBufferLength)
  882. {
  883. pContextData->ulDumpSize = pVpAppendSecondaryMinidumpData(
  884. SecondaryData.OutBuffer,
  885. SecondaryData.OutBufferLength,
  886. pContextData->pvDump);
  887. if (pContextData->ulDumpSize > ulDumpSize) {
  888. pVpWriteFile(L"\\SystemRoot\\MEMORY.DMP",
  889. pContextData->pvDump,
  890. pContextData->ulDumpSize);
  891. }
  892. }
  893. }
  894. //
  895. // Single our event so the caller knows we did something.
  896. //
  897. KeSetEvent(pInjectionEvent, 0, FALSE);
  898. }
  899. } // WdpKernelApc()
  900. BOOLEAN
  901. WdpInjectExceptionIntoThread(
  902. PKTHREAD pThread,
  903. PWD_GDI_DPC_CONTEXT pDpcContext
  904. )
  905. /*++
  906. Routine Description:
  907. This routine schedules APC to run in the spinning thread's context.
  908. Arguments:
  909. pThread - Supplies a pointer to the spinning thread.
  910. ppvSystemArgument1 - Supplies a pointer to WD_GDI_DPC_CONTEXT.
  911. Return Value:
  912. None.
  913. --*/
  914. {
  915. KAPC Apc;
  916. KEVENT InjectionEvent;
  917. WD_GDI_CONTEXT_DATA ContextData;
  918. ASSERT(NULL != pThread);
  919. ASSERT(NULL != pDpcContext);
  920. //
  921. // Prepare all needed data for minidump creation
  922. //
  923. RtlZeroMemory(&ContextData, sizeof(ContextData));
  924. ContextData.pThread = pThread;
  925. ContextData.pInjectionEvent = &InjectionEvent;
  926. ContextData.ppldevDrivers = pDpcContext->ppldevDrivers;
  927. ContextData.bRecoveryAttempted = FALSE;
  928. ContextData.pBugCheckData = &g_WdpBugCheckData;
  929. ContextData.pvDump = ExAllocatePoolWithTag(PagedPool,
  930. TRIAGE_DUMP_SIZE + 0x1000, // XXX olegk - why 1000? why not 2*TRIAGE_DUMP_SIZE?
  931. VP_TAG);
  932. KeInitializeEvent(&InjectionEvent, NotificationEvent, FALSE);
  933. KeInitializeApc(&Apc,
  934. pThread,
  935. OriginalApcEnvironment,
  936. WdpKernelApc,
  937. NULL,
  938. NULL,
  939. KernelMode,
  940. NULL);
  941. if (KeInsertQueueApc(&Apc, &ContextData, NULL, 0))
  942. {
  943. NTSTATUS Status;
  944. LARGE_INTEGER Timeout;
  945. Timeout.QuadPart = -(LONGLONG)80000000L; // 8 sec
  946. Status = KeWaitForSingleObject(&InjectionEvent,
  947. Executive,
  948. KernelMode,
  949. FALSE,
  950. &Timeout);
  951. if (Status != STATUS_SUCCESS)
  952. {
  953. KeBugCheckEx(ContextData.pBugCheckData->ulBugCheckCode,
  954. ContextData.pBugCheckData->ulpBugCheckParameter1,
  955. ContextData.pBugCheckData->ulpBugCheckParameter2,
  956. (ULONG_PTR)&pDpcContext->DisplayDriverName,
  957. ContextData.pBugCheckData->ulpBugCheckParameter4);
  958. }
  959. KeClearEvent(&InjectionEvent); // BUGBUG: Is this required?
  960. }
  961. if (ContextData.pvDump) {
  962. ExFreePool(ContextData.pvDump);
  963. }
  964. return TRUE;
  965. } // WdpInjectExceptionIntoThread()
  966. VOID
  967. WdpRaiseExceptionInThread()
  968. /*++
  969. Routine Description:
  970. This routine raises THREAD_STUCK exception in the spinning thread's context.
  971. Arguments:
  972. None.
  973. Return Value:
  974. None.
  975. --*/
  976. {
  977. ExRaiseStatus(WD_SE_THREAD_STUCK);
  978. } // WdpRaiseExceptionInThread()
  979. PDEVICE_OBJECT
  980. pVpGetFdo(
  981. PDEVICE_OBJECT pPdo
  982. )
  983. /*++
  984. Routine Description:
  985. Return the FDO that goes with this PDO.
  986. Arguments:
  987. pPdo - the PDO for which you want to find the FDO.
  988. Returns:
  989. the FDO associated with the PDO.
  990. --*/
  991. {
  992. PFDO_EXTENSION CurrFdo = FdoHead;
  993. while (CurrFdo) {
  994. if (CurrFdo->PhysicalDeviceObject == pPdo) {
  995. return CurrFdo->FunctionalDeviceObject;
  996. }
  997. CurrFdo = CurrFdo->NextFdoExtension;
  998. }
  999. return NULL;
  1000. }