Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1246 lines
40 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. gdisup.c
  5. Abstract:
  6. This is the NT Watchdog driver implementation.
  7. Author:
  8. Michael Maciesowicz (mmacie) 05-May-2000
  9. Environment:
  10. Kernel mode only.
  11. Notes:
  12. This module cannot be moved to win32k since routines defined here can
  13. be called at any time and it is possible that win32k may not be mapped
  14. into running process space at this time (e.g. TS session).
  15. Revision History:
  16. --*/
  17. //
  18. // TODO: This module needs major rework.
  19. //
  20. // 1. We should eliminate all global variables from here and move them into
  21. // GDI context structure.
  22. //
  23. // 2. We should extract generic logging routines
  24. // (e.g. WdWriteErrorLogEntry(pdo, className), WdWriteEventToRegistry(...),
  25. // WdBreakPoint(...) so we can use them for any device class, not just Display.
  26. //
  27. // 3. We should use IoAllocateWorkItem - we could drop some globals then.
  28. //
  29. #include "wd.h"
  30. #include "ntiodump.h"
  31. //
  32. // Undocumented export from kernel to create Minidump
  33. //
  34. ULONG
  35. KeCapturePersistentThreadState(
  36. PCONTEXT pContext,
  37. PETHREAD pThread,
  38. ULONG ulBugCheckCode,
  39. ULONG_PTR ulpBugCheckParam1,
  40. ULONG_PTR ulpBugCheckParam2,
  41. ULONG_PTR ulpBugCheckParam3,
  42. ULONG_PTR ulpBugCheckParam4,
  43. PVOID pvDump
  44. );
  45. NTSTATUS
  46. PsSetContextThread(
  47. IN PETHREAD Thread,
  48. IN PCONTEXT ThreadContext,
  49. IN KPROCESSOR_MODE Mode
  50. );
  51. NTSTATUS
  52. PsGetContextThread(
  53. IN PETHREAD Thread,
  54. IN OUT PCONTEXT ThreadContext,
  55. IN KPROCESSOR_MODE Mode
  56. );
  57. typedef enum _KAPC_ENVIRONMENT {
  58. OriginalApcEnvironment,
  59. AttachedApcEnvironment,
  60. CurrentApcEnvironment,
  61. InsertApcEnvironment
  62. } KAPC_ENVIRONMENT;
  63. NTKERNELAPI
  64. VOID
  65. KeInitializeApc (
  66. IN PRKAPC Apc,
  67. IN PRKTHREAD Thread,
  68. IN KAPC_ENVIRONMENT Environment,
  69. IN PKKERNEL_ROUTINE KernelRoutine,
  70. IN PKRUNDOWN_ROUTINE RundownRoutine OPTIONAL,
  71. IN PKNORMAL_ROUTINE NormalRoutine OPTIONAL,
  72. IN KPROCESSOR_MODE ProcessorMode OPTIONAL,
  73. IN PVOID NormalContext OPTIONAL
  74. );
  75. NTKERNELAPI
  76. BOOLEAN
  77. KeInsertQueueApc (
  78. IN PRKAPC Apc,
  79. IN PVOID SystemArgument1,
  80. IN PVOID SystemArgument2,
  81. IN KPRIORITY Increment
  82. );
  83. #define WD_HANDLER_IDLE 0
  84. #define WD_HANDLER_BUSY 1
  85. #define WD_GDI_STRESS_BREAK_POINT_DELAY 15
  86. typedef struct _BUGCHECK_DATA
  87. {
  88. ULONG ulBugCheckCode;
  89. ULONG_PTR ulpBugCheckParameter1;
  90. ULONG_PTR ulpBugCheckParameter2;
  91. ULONG_PTR ulpBugCheckParameter3;
  92. ULONG_PTR ulpBugCheckParameter4;
  93. } BUGCHECK_DATA, *PBUGCHECK_DATA;
  94. VOID
  95. WdBugCheckStuckDriver(
  96. IN PVOID Context
  97. );
  98. VOID
  99. VpNotifyEaData(
  100. PDEVICE_OBJECT DeviceObject,
  101. PVOID pvDump
  102. );
  103. #ifdef ALLOC_PRAGMA
  104. #pragma alloc_text (PAGE, WdBugCheckStuckDriver)
  105. #endif
  106. #if defined(_IA64_)
  107. #define PSR_RI 41
  108. #define PSR_CPL 32
  109. typedef struct _FRAME_MARKER {
  110. union {
  111. struct {
  112. ULONGLONG sof : 7;
  113. ULONGLONG sol : 7;
  114. ULONGLONG sor : 4;
  115. ULONGLONG rrbgr : 7;
  116. ULONGLONG rrbfr : 7;
  117. ULONGLONG rrbpr : 6;
  118. } f;
  119. ULONGLONG Ulong64;
  120. } u;
  121. } FRAME_MARKER;
  122. #endif
  123. BOOLEAN
  124. WdDisableRecovery = FALSE;
  125. BUGCHECK_DATA
  126. g_WdBugCheckData = {0, 0, 0, 0, 0};
  127. WORK_QUEUE_ITEM
  128. g_WdWorkQueueItem;
  129. LONG
  130. g_lDisplayHandlerState = WD_HANDLER_IDLE;
  131. //
  132. // TODO:
  133. //
  134. // This structure is defined in ntexapi.h. Find a way to include
  135. // this here.
  136. //
  137. typedef struct _PIMAGE_EXPORT_DIRECTORY IMAGE_EXPORT_DIRECTORY, *PIMAGE_EXPORT_DIRECTORY;
  138. typedef struct _SYSTEM_GDI_DRIVER_INFORMATION {
  139. UNICODE_STRING DriverName;
  140. PVOID ImageAddress;
  141. PVOID SectionPointer;
  142. PVOID EntryPoint;
  143. PIMAGE_EXPORT_DIRECTORY ExportSectionPointer;
  144. ULONG ImageLength;
  145. } SYSTEM_GDI_DRIVER_INFORMATION, *PSYSTEM_GDI_DRIVER_INFORMATION;
  146. typedef enum _HARDERROR_RESPONSE_OPTION {
  147. OptionAbortRetryIgnore,
  148. OptionOk,
  149. OptionOkCancel,
  150. OptionRetryCancel,
  151. OptionYesNo,
  152. OptionYesNoCancel,
  153. OptionShutdownSystem,
  154. OptionOkNoWait,
  155. OptionCancelTryContinue
  156. } HARDERROR_RESPONSE_OPTION;
  157. NTKERNELAPI
  158. NTSTATUS
  159. ExRaiseHardError(
  160. IN NTSTATUS ErrorStatus,
  161. IN ULONG NumberOfParameters,
  162. IN ULONG UnicodeStringParameterMask,
  163. IN PULONG_PTR Parameters,
  164. IN ULONG ValidResponseOptions,
  165. OUT PULONG Response
  166. );
  167. //
  168. // TODO:
  169. //
  170. // Find a way to share the same LDEV structure used by GDI.
  171. //
  172. typedef struct _LDEV {
  173. struct _LDEV *pldevNext; // link to the next LDEV in list
  174. struct _LDEV *pldevPrev; // link to the previous LDEV in list
  175. PSYSTEM_GDI_DRIVER_INFORMATION pGdiDriverInfo; // Driver module handle.
  176. } LDEV, *PLDEV;
  177. //
  178. // TODO:
  179. //
  180. // this structure is defined here, and in gre\os.cxx. We need to find
  181. // the proper .h file to put it in.
  182. //
  183. typedef struct _WATCHDOG_DPC_CONTEXT
  184. {
  185. PLDEV *ppldevDrivers;
  186. HANDLE hDriver;
  187. UNICODE_STRING DisplayDriverName;
  188. } WATCHDOG_DPC_CONTEXT, *PWATCHDOG_DPC_CONTEXT;
  189. WATCHDOGAPI
  190. VOID
  191. WdDdiWatchdogDpcCallback(
  192. IN PKDPC pDpc,
  193. IN PVOID pDeferredContext,
  194. IN PVOID pSystemArgument1,
  195. IN PVOID pSystemArgument2
  196. )
  197. /*++
  198. Routine Description:
  199. This function is a DPC callback routine for GDI watchdog. It is only
  200. called when GDI watchdog times out before it is cancelled. It schedules
  201. a work item to bugcheck the machine in the context of system worker
  202. thread.
  203. Arguments:
  204. pDpc - Supplies a pointer to a DPC object.
  205. pDeferredContext - Supplies a pointer to a GDI defined context.
  206. pSystemArgument1 - Supplies a pointer to a spinning thread object (PKTHREAD).
  207. pSystemArgument2 - Supplies a pointer to a watchdog object (PDEFERRED_WATCHDOG).
  208. Return Value:
  209. None.
  210. --*/
  211. {
  212. //
  213. // Make sure we handle only one event at the time.
  214. //
  215. // Note: Timeout and recovery events for the same watchdog object are
  216. // synchronized already in timer DPC.
  217. //
  218. if (InterlockedCompareExchange(&g_lDisplayHandlerState,
  219. WD_HANDLER_BUSY,
  220. WD_HANDLER_IDLE) == WD_HANDLER_IDLE)
  221. {
  222. g_WdBugCheckData.ulBugCheckCode = THREAD_STUCK_IN_DEVICE_DRIVER;
  223. g_WdBugCheckData.ulpBugCheckParameter1 = (ULONG_PTR)(pSystemArgument1);
  224. g_WdBugCheckData.ulpBugCheckParameter2 = (ULONG_PTR)(pSystemArgument2);
  225. g_WdBugCheckData.ulpBugCheckParameter3 = (ULONG_PTR)(pDeferredContext);
  226. g_WdBugCheckData.ulpBugCheckParameter4++;
  227. ExInitializeWorkItem(&g_WdWorkQueueItem, WdBugCheckStuckDriver, &g_WdBugCheckData);
  228. ExQueueWorkItem(&g_WdWorkQueueItem, CriticalWorkQueue);
  229. }
  230. else
  231. {
  232. //
  233. // Resume watchdog event processing.
  234. //
  235. WdCompleteEvent(pSystemArgument2, (PKTHREAD)pSystemArgument1);
  236. }
  237. return;
  238. } // WdDdiWatchdogDpcCallback()
  239. #define MAKESOFTWAREEXCEPTION(Severity, Facility, Exception) \
  240. ((ULONG) ((Severity << 30) | (1 << 29) | (Facility << 16) | (Exception)))
  241. #define SE_THREAD_STUCK MAKESOFTWAREEXCEPTION(3,0,1)
  242. VOID
  243. RaiseExceptionInThread(
  244. VOID
  245. )
  246. {
  247. ExRaiseStatus(SE_THREAD_STUCK);
  248. }
  249. typedef struct _WATCHDOG_CONTEXT_DATA
  250. {
  251. PKEVENT pInjectionEvent;
  252. PKTHREAD pThread;
  253. PLDEV *ppldevDrivers;
  254. PWATCHDOG_DPC_CONTEXT pWatchdogContext;
  255. BOOLEAN bRecoveryAttempted;
  256. PBUGCHECK_DATA pBugCheckData;
  257. PVOID pvDump;
  258. ULONG ulDumpSize;
  259. } WATCHDOG_CONTEXT_DATA, *PWATCHDOG_CONTEXT_DATA;
  260. VOID
  261. WatchdogKernelApc(
  262. IN PKAPC Apc,
  263. OUT PKNORMAL_ROUTINE *NormalRoutine,
  264. IN OUT PVOID NormalContext,
  265. IN OUT PVOID *SystemArgument1,
  266. IN OUT PVOID *SystemArgument2
  267. )
  268. {
  269. PKEVENT pInjectionEvent;
  270. CONTEXT Context;
  271. PWATCHDOG_CONTEXT_DATA pContextData = (PWATCHDOG_CONTEXT_DATA) *SystemArgument1;
  272. ULONG_PTR ImageStart;
  273. ULONG_PTR ImageStop;
  274. PETHREAD pThread;
  275. NTSTATUS Status;
  276. PLDEV pldev;
  277. UNREFERENCED_PARAMETER (Apc);
  278. UNREFERENCED_PARAMETER (NormalRoutine);
  279. pInjectionEvent = pContextData->pInjectionEvent;
  280. pldev = *pContextData->ppldevDrivers;
  281. pThread = PsGetCurrentThread();
  282. //
  283. // Initialize the context.
  284. //
  285. memset(&Context, 0, sizeof(Context));
  286. Context.ContextFlags = CONTEXT_FULL;
  287. //
  288. // get the kernel context for this thread
  289. //
  290. if (NT_SUCCESS(PsGetContextThread(pThread, &Context, KernelMode))) {
  291. //
  292. // Capture the context so we can use it in a minidump.
  293. //
  294. pContextData->ulDumpSize = KeCapturePersistentThreadState(
  295. &Context,
  296. pThread,
  297. pContextData->pBugCheckData->ulBugCheckCode,
  298. pContextData->pBugCheckData->ulpBugCheckParameter1,
  299. pContextData->pBugCheckData->ulpBugCheckParameter2,
  300. pContextData->pBugCheckData->ulpBugCheckParameter3,
  301. pContextData->pBugCheckData->ulpBugCheckParameter4,
  302. pContextData->pvDump);
  303. //
  304. // We can safely touch the pldev's (which live in session space)
  305. // because this thread came from a process that has the session
  306. // space mapped in.
  307. //
  308. while (pldev) {
  309. if (pldev->pGdiDriverInfo) {
  310. ImageStart = (ULONG_PTR)pldev->pGdiDriverInfo->ImageAddress;
  311. ImageStop = ImageStart + (ULONG_PTR)pldev->pGdiDriverInfo->ImageLength - 1;
  312. //
  313. // Modify the context to inject a fault into the thread
  314. // when it starts running again (after APC returns)
  315. //
  316. #if defined(_X86_)
  317. if ((Context.Eip >= ImageStart) && (Context.Eip <= ImageStop)) {
  318. Context.Eip = (ULONG)RaiseExceptionInThread;
  319. //
  320. // set the modified context record
  321. //
  322. Context.ContextFlags = CONTEXT_CONTROL;
  323. PsSetContextThread(pThread, &Context, KernelMode);
  324. pContextData->bRecoveryAttempted = TRUE;
  325. break;
  326. }
  327. #elif defined(_IA64_)
  328. if ((Context.StIIP >= ImageStart) && (Context.StIIP <= ImageStop)) {
  329. FRAME_MARKER Cfm;
  330. PULONGLONG pullTemp = (PULONGLONG)RaiseExceptionInThread;
  331. //
  332. // Set the return address
  333. //
  334. Context.BrRp = Context.StIIP;
  335. //
  336. // Update the frame markers
  337. //
  338. Context.RsPFS = Context.StIFS & 0x3FFFFFFFFFi64;
  339. Context.RsPFS |= (Context.ApEC & (0x3fi64 << 52));
  340. Context.RsPFS |= (((Context.StIPSR >> PSR_CPL) & 0x3) << 62);
  341. Cfm.u.Ulong64 = Context.StIFS;
  342. Cfm.u.f.sof -= Cfm.u.f.sol;
  343. Cfm.u.f.sol = 0;
  344. Cfm.u.f.sor = 0;
  345. Cfm.u.f.rrbgr = 0;
  346. Cfm.u.f.rrbfr = 0;
  347. Cfm.u.f.rrbpr = 0;
  348. Context.StIFS = Cfm.u.Ulong64;
  349. Context.StIFS |= 0x8000000000000000;
  350. //
  351. // Emulate the call
  352. //
  353. Context.StIIP = *pullTemp;
  354. Context.IntGp = *(pullTemp+1);
  355. Context.StIPSR &= ~((ULONGLONG) 3 << PSR_RI);
  356. //
  357. // set the modified context record
  358. //
  359. Context.ContextFlags = CONTEXT_CONTROL;
  360. PsSetContextThread(pThread, &Context, KernelMode);
  361. pContextData->bRecoveryAttempted = TRUE;
  362. break;
  363. }
  364. #endif
  365. }
  366. pldev = pldev->pldevNext;
  367. }
  368. //
  369. // Notify the videoprt of the device object, and context of the
  370. // thread causing the EA.
  371. //
  372. VpNotifyEaData(pContextData->pWatchdogContext->hDriver,
  373. pContextData->ulDumpSize ? pContextData->pvDump : NULL);
  374. //
  375. // Single our event so the caller knows we did something
  376. //
  377. KeSetEvent(pInjectionEvent, 0, FALSE);
  378. }
  379. }
  380. BOOLEAN
  381. WatchdogInjectExceptionIntoThread(
  382. PKTHREAD pThread,
  383. PWATCHDOG_DPC_CONTEXT pWdContext
  384. )
  385. {
  386. KAPC Apc;
  387. KEVENT InjectionEvent;
  388. WATCHDOG_CONTEXT_DATA ContextData;
  389. //
  390. // Prepare all needed data for minidump creation
  391. //
  392. RtlZeroMemory(&ContextData, sizeof(ContextData));
  393. ContextData.pThread = pThread;
  394. ContextData.pInjectionEvent = &InjectionEvent;
  395. ContextData.ppldevDrivers = pWdContext->ppldevDrivers;
  396. ContextData.pWatchdogContext = pWdContext;
  397. ContextData.bRecoveryAttempted = FALSE;
  398. ContextData.pvDump = ExAllocatePoolWithTag(PagedPool,
  399. TRIAGE_DUMP_SIZE + 0x1000, // XXX olegk - why 1000? why not 2*TRIAGE_DUMP_SIZE?
  400. WD_TAG);
  401. ContextData.pBugCheckData = &g_WdBugCheckData;
  402. KeInitializeEvent(&InjectionEvent, NotificationEvent, FALSE);
  403. KeInitializeApc(&Apc,
  404. pThread,
  405. OriginalApcEnvironment,
  406. WatchdogKernelApc,
  407. NULL,
  408. NULL,
  409. KernelMode,
  410. NULL);
  411. if (KeInsertQueueApc(&Apc, &ContextData, NULL, 0))
  412. {
  413. KeWaitForSingleObject(&InjectionEvent,
  414. Executive,
  415. KernelMode,
  416. FALSE,
  417. NULL);
  418. //
  419. // We need this wait because ContextData.bRecoveryAttempted is
  420. // set in the APC and we need to wait on the result.
  421. //
  422. KeClearEvent(&InjectionEvent);
  423. }
  424. return ContextData.bRecoveryAttempted;
  425. }
  426. VOID
  427. WdBugCheckStuckDriver(
  428. IN PVOID pContext
  429. )
  430. /*++
  431. Routine Description:
  432. This function is a worker callback routine for GDI watchdog DPC.
  433. Arguments:
  434. pContext - Supplies a pointer to a watchdog defined context.
  435. Return Value:
  436. None.
  437. --*/
  438. {
  439. static BOOLEAN s_bFirstTime = TRUE;
  440. static BOOLEAN s_bDbgBreak = FALSE;
  441. static BOOLEAN s_bEventLogged = FALSE;
  442. static ULONG s_ulTrapOnce = WD_DEFAULT_TRAP_ONCE;
  443. static ULONG s_ulDisableBugcheck = WD_DEFAULT_DISABLE_BUGCHECK;
  444. static ULONG s_ulBreakPointDelay = WD_GDI_STRESS_BREAK_POINT_DELAY;
  445. static ULONG s_ulCurrentBreakPointDelay = WD_GDI_STRESS_BREAK_POINT_DELAY;
  446. static ULONG s_ulBreakCount = 0;
  447. static ULONG s_ulEventCount = 0;
  448. static ULONG s_ulEaRecovery = 0;
  449. static ULONG s_ulFullRecovery = 0;
  450. PBUGCHECK_DATA pBugCheckData;
  451. PKTHREAD pThread;
  452. PDEFERRED_WATCHDOG pWatch;
  453. PUNICODE_STRING pUnicodeDriverName;
  454. PDEVICE_OBJECT pFdo;
  455. PDEVICE_OBJECT pPdo;
  456. NTSTATUS ntStatus;
  457. WD_EVENT_TYPE lastEvent;
  458. PWATCHDOG_DPC_CONTEXT WatchdogContext;
  459. BOOLEAN Recovered = FALSE;
  460. PAGED_CODE();
  461. ASSERT(NULL != pContext);
  462. pBugCheckData = (PBUGCHECK_DATA)pContext;
  463. WatchdogContext = (PWATCHDOG_DPC_CONTEXT)pBugCheckData->ulpBugCheckParameter3;
  464. pThread = (PKTHREAD)(pBugCheckData->ulpBugCheckParameter1);
  465. pWatch = (PDEFERRED_WATCHDOG)(pBugCheckData->ulpBugCheckParameter2);
  466. pUnicodeDriverName = &WatchdogContext->DisplayDriverName;
  467. //
  468. // Note: pThread is NULL for recovery events.
  469. //
  470. ASSERT(NULL != pWatch);
  471. ASSERT(NULL != pUnicodeDriverName);
  472. pFdo = WdGetDeviceObject(pWatch);
  473. pPdo = WdGetLowestDeviceObject(pWatch);
  474. ASSERT(NULL != pFdo);
  475. ASSERT(NULL != pPdo);
  476. lastEvent = WdGetLastEvent(pWatch);
  477. ASSERT((WdTimeoutEvent == lastEvent) || (WdRecoveryEvent == lastEvent));
  478. //
  479. // Grab configuration data from the registry on first timeout.
  480. //
  481. if (TRUE == s_bFirstTime)
  482. {
  483. ULONG ulDefaultTrapOnce = WD_DEFAULT_TRAP_ONCE;
  484. ULONG ulDefaultDisableBugcheck = WD_DEFAULT_DISABLE_BUGCHECK;
  485. ULONG ulDefaultBreakPointDelay = WD_GDI_STRESS_BREAK_POINT_DELAY;
  486. ULONG ulDefaultBreakCount = 0;
  487. ULONG ulDefaultEventCount = 0;
  488. ULONG ulDefaultEaRecovery = 0;
  489. ULONG ulDefaultFullRecovery = 0;
  490. RTL_QUERY_REGISTRY_TABLE queryTable[] =
  491. {
  492. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"TrapOnce", &s_ulTrapOnce, REG_DWORD, &ulDefaultTrapOnce, 4},
  493. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"DisableBugcheck", &s_ulDisableBugcheck, REG_DWORD, &ulDefaultDisableBugcheck, 4},
  494. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"BreakPointDelay", &s_ulBreakPointDelay, REG_DWORD, &ulDefaultBreakPointDelay, 4},
  495. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"BreakCount", &s_ulBreakCount, REG_DWORD, &ulDefaultBreakCount, 4},
  496. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"EventCount", &s_ulEventCount, REG_DWORD, &ulDefaultEventCount, 4},
  497. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"EaRecovery", &s_ulEaRecovery, REG_DWORD, &ulDefaultEaRecovery, 4},
  498. {NULL, RTL_QUERY_REGISTRY_DIRECT, L"FullRecovery", &s_ulFullRecovery, REG_DWORD, &ulDefaultFullRecovery, 4},
  499. {NULL, 0, NULL}
  500. };
  501. //
  502. // Get configurable values and accumulated statistics from registry.
  503. //
  504. RtlQueryRegistryValues(RTL_REGISTRY_ABSOLUTE,
  505. WD_KEY_WATCHDOG_DISPLAY,
  506. queryTable,
  507. NULL,
  508. NULL);
  509. //
  510. // Rolling down counter to workaround GDI slowness in some stress cases.
  511. //
  512. s_ulCurrentBreakPointDelay = s_ulBreakPointDelay;
  513. #if !defined(_X86_)
  514. //
  515. // For now, don't try to recover on non-x86 platforms
  516. //
  517. s_ulEaRecovery = FALSE;
  518. #endif
  519. }
  520. //
  521. // Handle current event.
  522. //
  523. if (WdTimeoutEvent == lastEvent)
  524. {
  525. //
  526. // Timeout.
  527. //
  528. ULONG ulDebuggerNotPresent;
  529. BOOLEAN bBreakIn;
  530. ASSERT(NULL != pThread);
  531. ulDebuggerNotPresent = 1;
  532. bBreakIn = FALSE;
  533. if ((TRUE == KD_DEBUGGER_ENABLED) && (FALSE == KD_DEBUGGER_NOT_PRESENT))
  534. {
  535. //
  536. // Give a chance to debug a spinning code if kernel debugger is connected.
  537. //
  538. ulDebuggerNotPresent = 0;
  539. if ((0 == s_ulTrapOnce) || (FALSE == s_bDbgBreak))
  540. {
  541. //
  542. // Print out info to debugger and break in if we timed out enought times already.
  543. // Hopefuly one day GDI becomes fast enough and we won't have to set any delays.
  544. //
  545. if (0 == s_ulCurrentBreakPointDelay)
  546. {
  547. s_ulCurrentBreakPointDelay = s_ulBreakPointDelay;
  548. DbgPrint("\n");
  549. DbgPrint("*******************************************************************************\n");
  550. DbgPrint("* *\n");
  551. DbgPrint("* The watchdog detected a timeout condition. We broke into the debugger to *\n");
  552. DbgPrint("* allow a chance for debugging this failure. *\n");
  553. DbgPrint("* *\n");
  554. DbgPrint("* Normally the system will try to recover from this failure and return to a *\n");
  555. DbgPrint("* VGA graphics mode. To disable the recovery feature edit the videoprt *\n");
  556. DbgPrint("* variable VpDisableRecovery. This will allow you to debug your driver. *\n");
  557. DbgPrint("* i.e. execute ed watchdog!WdDisableRecovery 1. *\n");
  558. DbgPrint("* *\n");
  559. DbgPrint("* Intercepted bugcheck code and arguments are listed below this message. *\n");
  560. DbgPrint("* You can use them the same way as you would in case of the actual break, *\n");
  561. DbgPrint("* i.e. execute .thread Arg1 then kv to identify an offending thread. *\n");
  562. DbgPrint("* *\n");
  563. DbgPrint("*******************************************************************************\n");
  564. DbgPrint("\n");
  565. DbgPrint("*** Intercepted Fatal System Error: 0x%08X\n", pBugCheckData->ulBugCheckCode);
  566. DbgPrint(" (0x%p,0x%p,0x%p,0x%p)\n\n",
  567. pBugCheckData->ulpBugCheckParameter1,
  568. pBugCheckData->ulpBugCheckParameter2,
  569. pBugCheckData->ulpBugCheckParameter3,
  570. pBugCheckData->ulpBugCheckParameter4);
  571. DbgPrint("Driver at fault: %ws\n\n", pUnicodeDriverName->Buffer);
  572. bBreakIn = TRUE;
  573. s_bDbgBreak = TRUE;
  574. s_ulBreakCount++;
  575. }
  576. else
  577. {
  578. DbgPrint("Watchdog: Timeout in %ws. Break in %d\n",
  579. pUnicodeDriverName->Buffer,
  580. s_ulCurrentBreakPointDelay);
  581. s_ulCurrentBreakPointDelay--;
  582. }
  583. }
  584. //
  585. // Make sure we won't bugcheck if we have kernel debugger connected.
  586. //
  587. s_ulDisableBugcheck = 1;
  588. }
  589. else if (0 == s_ulDisableBugcheck)
  590. {
  591. s_ulBreakCount++;
  592. }
  593. //
  594. // Log error (only once unless we recover).
  595. //
  596. if ((FALSE == s_bEventLogged) && ((TRUE == bBreakIn) || ulDebuggerNotPresent))
  597. {
  598. PIO_ERROR_LOG_PACKET pIoErrorLogPacket;
  599. ULONG ulPacketSize;
  600. USHORT usNumberOfStrings;
  601. PWCHAR wszDeviceClass = L"display";
  602. ULONG ulClassSize = sizeof (L"display");
  603. ulPacketSize = sizeof (IO_ERROR_LOG_PACKET);
  604. usNumberOfStrings = 0;
  605. //
  606. // For event log message:
  607. //
  608. // %1 = fixed device description (this is set by event log itself)
  609. // %2 = string 1 = device class starting in lower case
  610. // %3 = string 2 = driver name
  611. //
  612. if ((ulPacketSize + ulClassSize) <= ERROR_LOG_MAXIMUM_SIZE)
  613. {
  614. ulPacketSize += ulClassSize;
  615. usNumberOfStrings++;
  616. //
  617. // We're looking at MaximumLength since it includes terminating UNICODE_NULL.
  618. //
  619. if ((ulPacketSize + pUnicodeDriverName->MaximumLength) <= ERROR_LOG_MAXIMUM_SIZE)
  620. {
  621. ulPacketSize += pUnicodeDriverName->MaximumLength;
  622. usNumberOfStrings++;
  623. }
  624. }
  625. pIoErrorLogPacket = IoAllocateErrorLogEntry(pFdo, (UCHAR)ulPacketSize);
  626. if (pIoErrorLogPacket)
  627. {
  628. pIoErrorLogPacket->MajorFunctionCode = 0;
  629. pIoErrorLogPacket->RetryCount = 0;
  630. pIoErrorLogPacket->DumpDataSize = 0;
  631. pIoErrorLogPacket->NumberOfStrings = usNumberOfStrings;
  632. pIoErrorLogPacket->StringOffset = (USHORT)FIELD_OFFSET(IO_ERROR_LOG_PACKET, DumpData);
  633. pIoErrorLogPacket->EventCategory = 0;
  634. pIoErrorLogPacket->ErrorCode = IO_ERR_THREAD_STUCK_IN_DEVICE_DRIVER;
  635. pIoErrorLogPacket->UniqueErrorValue = 0;
  636. pIoErrorLogPacket->FinalStatus = STATUS_SUCCESS;
  637. pIoErrorLogPacket->SequenceNumber = 0;
  638. pIoErrorLogPacket->IoControlCode = 0;
  639. pIoErrorLogPacket->DeviceOffset.QuadPart = 0;
  640. if (usNumberOfStrings > 0)
  641. {
  642. RtlCopyMemory(&(pIoErrorLogPacket->DumpData[0]),
  643. wszDeviceClass,
  644. ulClassSize);
  645. if (usNumberOfStrings > 1)
  646. {
  647. RtlCopyMemory((PUCHAR)&(pIoErrorLogPacket->DumpData[0]) + ulClassSize,
  648. pUnicodeDriverName->Buffer,
  649. pUnicodeDriverName->MaximumLength);
  650. }
  651. }
  652. IoWriteErrorLogEntry(pIoErrorLogPacket);
  653. s_bEventLogged = TRUE;
  654. }
  655. }
  656. //
  657. // Write reliability info into registry. Setting ShutdownEventPending will trigger winlogon
  658. // to run savedump where we're doing our boot-time handling of watchdog events for DrWatson.
  659. //
  660. // Note: We are only allowed to set ShutdownEventPending, savedump is the only component
  661. // allowed to clear this value. Even if we recover from watchdog timeout we'll keep this
  662. // value set, savedump will be able to figure out if we recovered or not.
  663. //
  664. if (TRUE == s_bFirstTime)
  665. {
  666. ULONG ulValue = 1;
  667. //
  668. // Set ShutdownEventPending flag.
  669. //
  670. ntStatus = RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  671. WD_KEY_RELIABILITY,
  672. L"ShutdownEventPending",
  673. REG_DWORD,
  674. &ulValue,
  675. sizeof (ulValue));
  676. if(NT_SUCCESS(ntStatus))
  677. {
  678. WdFlushRegistryKey(pWatch, WD_KEY_RELIABILITY);
  679. }
  680. else
  681. {
  682. //
  683. // Reliability key should be always reliable there.
  684. //
  685. ASSERT(FALSE);
  686. }
  687. }
  688. //
  689. // Write watchdog event info into registry.
  690. //
  691. if ((0 == s_ulTrapOnce) || (TRUE == s_bFirstTime))
  692. {
  693. //
  694. // Is Watchdog\Display key already there?
  695. //
  696. ntStatus = RtlCheckRegistryKey(RTL_REGISTRY_ABSOLUTE,
  697. WD_KEY_WATCHDOG_DISPLAY);
  698. if (!NT_SUCCESS(ntStatus))
  699. {
  700. //
  701. // Is Watchdog key already there?
  702. //
  703. ntStatus = RtlCheckRegistryKey(RTL_REGISTRY_ABSOLUTE,
  704. WD_KEY_WATCHDOG);
  705. if (!NT_SUCCESS(ntStatus))
  706. {
  707. //
  708. // Create a new key.
  709. //
  710. ntStatus = RtlCreateRegistryKey(RTL_REGISTRY_ABSOLUTE,
  711. WD_KEY_WATCHDOG);
  712. }
  713. if (NT_SUCCESS(ntStatus))
  714. {
  715. //
  716. // Create a new key.
  717. //
  718. ntStatus = RtlCreateRegistryKey(RTL_REGISTRY_ABSOLUTE,
  719. WD_KEY_WATCHDOG_DISPLAY);
  720. }
  721. }
  722. if (NT_SUCCESS(ntStatus))
  723. {
  724. PVOID pvPropertyBuffer;
  725. ULONG ulLength;
  726. ULONG ulValue;
  727. //
  728. // Set values maintained by watchdog.
  729. //
  730. ulValue = 1;
  731. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  732. WD_KEY_WATCHDOG_DISPLAY,
  733. L"EventFlag",
  734. REG_DWORD,
  735. &ulValue,
  736. sizeof (ulValue));
  737. s_ulEventCount++;
  738. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  739. WD_KEY_WATCHDOG_DISPLAY,
  740. L"EventCount",
  741. REG_DWORD,
  742. &s_ulEventCount,
  743. sizeof (s_ulEventCount));
  744. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  745. WD_KEY_WATCHDOG_DISPLAY,
  746. L"BreakCount",
  747. REG_DWORD,
  748. &s_ulBreakCount,
  749. sizeof (s_ulBreakCount));
  750. ulValue = !s_ulDisableBugcheck;
  751. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  752. WD_KEY_WATCHDOG_DISPLAY,
  753. L"BugcheckTriggered",
  754. REG_DWORD,
  755. &ulValue,
  756. sizeof (ulValue));
  757. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  758. WD_KEY_WATCHDOG_DISPLAY,
  759. L"DebuggerNotPresent",
  760. REG_DWORD,
  761. &ulDebuggerNotPresent,
  762. sizeof (ulDebuggerNotPresent));
  763. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  764. WD_KEY_WATCHDOG_DISPLAY,
  765. L"DriverName",
  766. REG_SZ,
  767. pUnicodeDriverName->Buffer,
  768. pUnicodeDriverName->MaximumLength);
  769. //
  770. // Delete other values in case allocation or property read fails.
  771. //
  772. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  773. WD_KEY_WATCHDOG_DISPLAY,
  774. L"DeviceClass");
  775. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  776. WD_KEY_WATCHDOG_DISPLAY,
  777. L"DeviceDescription");
  778. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  779. WD_KEY_WATCHDOG_DISPLAY,
  780. L"DeviceFriendlyName");
  781. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  782. WD_KEY_WATCHDOG_DISPLAY,
  783. L"HardwareID");
  784. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  785. WD_KEY_WATCHDOG_DISPLAY,
  786. L"Manufacturer");
  787. //
  788. // Allocate buffer for device properties reads.
  789. //
  790. // Note: Legacy devices don't have PDOs and we can't query properties
  791. // for them. Calling IoGetDeviceProperty() with FDO upsets Verifier.
  792. // In legacy case lowest device object is the same as FDO, we check
  793. // against this and if this is the case we won't allocate property
  794. // buffer and we'll skip the next block.
  795. //
  796. if (pFdo != pPdo)
  797. {
  798. pvPropertyBuffer = ExAllocatePoolWithTag(PagedPool,
  799. WD_MAX_PROPERTY_SIZE,
  800. WD_TAG);
  801. }
  802. else
  803. {
  804. pvPropertyBuffer = NULL;
  805. }
  806. if (pvPropertyBuffer)
  807. {
  808. //
  809. // Read and save device properties.
  810. //
  811. ntStatus = IoGetDeviceProperty(pPdo,
  812. DevicePropertyClassName,
  813. WD_MAX_PROPERTY_SIZE,
  814. pvPropertyBuffer,
  815. &ulLength);
  816. if (NT_SUCCESS(ntStatus))
  817. {
  818. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  819. WD_KEY_WATCHDOG_DISPLAY,
  820. L"DeviceClass",
  821. REG_SZ,
  822. pvPropertyBuffer,
  823. ulLength);
  824. }
  825. ntStatus = IoGetDeviceProperty(pPdo,
  826. DevicePropertyDeviceDescription,
  827. WD_MAX_PROPERTY_SIZE,
  828. pvPropertyBuffer,
  829. &ulLength);
  830. if (NT_SUCCESS(ntStatus))
  831. {
  832. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  833. WD_KEY_WATCHDOG_DISPLAY,
  834. L"DeviceDescription",
  835. REG_SZ,
  836. pvPropertyBuffer,
  837. ulLength);
  838. }
  839. ntStatus = IoGetDeviceProperty(pPdo,
  840. DevicePropertyFriendlyName,
  841. WD_MAX_PROPERTY_SIZE,
  842. pvPropertyBuffer,
  843. &ulLength);
  844. if (NT_SUCCESS(ntStatus))
  845. {
  846. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  847. WD_KEY_WATCHDOG_DISPLAY,
  848. L"DeviceFriendlyName",
  849. REG_SZ,
  850. pvPropertyBuffer,
  851. ulLength);
  852. }
  853. ntStatus = IoGetDeviceProperty(pPdo,
  854. DevicePropertyHardwareID,
  855. WD_MAX_PROPERTY_SIZE,
  856. pvPropertyBuffer,
  857. &ulLength);
  858. if (NT_SUCCESS(ntStatus))
  859. {
  860. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  861. WD_KEY_WATCHDOG_DISPLAY,
  862. L"HardwareID",
  863. REG_MULTI_SZ,
  864. pvPropertyBuffer,
  865. ulLength);
  866. }
  867. ntStatus = IoGetDeviceProperty(pPdo,
  868. DevicePropertyManufacturer,
  869. WD_MAX_PROPERTY_SIZE,
  870. pvPropertyBuffer,
  871. &ulLength);
  872. if (NT_SUCCESS(ntStatus))
  873. {
  874. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  875. WD_KEY_WATCHDOG_DISPLAY,
  876. L"Manufacturer",
  877. REG_SZ,
  878. pvPropertyBuffer,
  879. ulLength);
  880. }
  881. //
  882. // Release property buffer.
  883. //
  884. ExFreePool(pvPropertyBuffer);
  885. pvPropertyBuffer = NULL;
  886. }
  887. if (TRUE == s_bFirstTime)
  888. {
  889. //
  890. // Knock down Shutdown flag. Videoprt always sets this value upon receiving
  891. // IRP_MN_SHUTDOWN. If the value is not there on the next boot we will know
  892. // that user rebooted dirty.
  893. //
  894. // TODO: Drop it (and the stuff in videoprt) once we have NtQueryLastShutDownType()
  895. // API implemented.
  896. //
  897. ulValue = 0;
  898. RtlWriteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  899. WD_KEY_WATCHDOG_DISPLAY,
  900. L"Shutdown",
  901. REG_DWORD,
  902. &ulValue,
  903. sizeof (ulValue));
  904. }
  905. }
  906. //
  907. // Flush registry in case we're going to break in / bugcheck or if this is first time.
  908. //
  909. if ((TRUE == s_bFirstTime) || (TRUE == bBreakIn) || (0 == s_ulDisableBugcheck))
  910. {
  911. WdFlushRegistryKey(pWatch, WD_KEY_WATCHDOG_DISPLAY);
  912. }
  913. }
  914. //
  915. // Notify the videoprt of the device object causing the failure.
  916. //
  917. VpNotifyEaData(WatchdogContext->hDriver, NULL);
  918. //
  919. // Bugcheck machine without kernel debugger connected and with bugcheck EA enabled.
  920. // Bugcheck EA is enabled on SKUs below Server.
  921. //
  922. if (1 == ulDebuggerNotPresent)
  923. {
  924. if (s_ulEaRecovery)
  925. {
  926. Recovered = WatchdogInjectExceptionIntoThread(pThread, WatchdogContext);
  927. }
  928. if ((0 == s_ulDisableBugcheck) && (FALSE == Recovered))
  929. {
  930. KeBugCheckEx(pBugCheckData->ulBugCheckCode,
  931. pBugCheckData->ulpBugCheckParameter1,
  932. pBugCheckData->ulpBugCheckParameter2,
  933. (ULONG_PTR)pUnicodeDriverName,
  934. pBugCheckData->ulpBugCheckParameter4);
  935. }
  936. }
  937. else
  938. {
  939. if (TRUE == bBreakIn)
  940. {
  941. DbgBreakPoint();
  942. if (s_ulEaRecovery && (WdDisableRecovery == FALSE))
  943. {
  944. Recovered = WatchdogInjectExceptionIntoThread(pThread, WatchdogContext);
  945. }
  946. }
  947. }
  948. }
  949. else
  950. {
  951. //
  952. // Recovery - knock down EventFlag in registry and update statics.
  953. //
  954. RtlDeleteRegistryValue(RTL_REGISTRY_ABSOLUTE,
  955. WD_KEY_WATCHDOG_DISPLAY,
  956. L"EventFlag");
  957. s_bEventLogged = FALSE;
  958. s_ulCurrentBreakPointDelay = s_ulBreakPointDelay;
  959. }
  960. //
  961. // Reenable event processing in this module.
  962. //
  963. s_bFirstTime = FALSE;
  964. InterlockedExchange(&g_lDisplayHandlerState, WD_HANDLER_IDLE);
  965. //
  966. // Dereference objects and resume watchdog event processing.
  967. //
  968. ObDereferenceObject(pFdo);
  969. ObDereferenceObject(pPdo);
  970. WdCompleteEvent(pWatch, pThread);
  971. //
  972. // If we Recovered then raise a hard error notifing the user
  973. // of the situation. We do this here because the raise hard error
  974. // is synchronous and waits for user input. So we'll raise the hard
  975. // error after everything else is done.
  976. //
  977. if (Recovered) {
  978. static ULONG ulHardErrorInProgress = FALSE;
  979. //
  980. // If we hang and recover several times, don't allow more than
  981. // one dialog to appear on the screen. Only allow the dialog
  982. // to pop up again, after the user has hit "ok".
  983. //
  984. if (InterlockedCompareExchange(&ulHardErrorInProgress,
  985. TRUE,
  986. FALSE) == FALSE) {
  987. ULONG Response;
  988. ExRaiseHardError(0xC0000415, //STATUS_HUNG_DISPLAY_DRIVER_THREAD
  989. 1,
  990. 1,
  991. (PULONG_PTR)&pUnicodeDriverName,
  992. OptionOk,
  993. &Response);
  994. InterlockedExchange(&ulHardErrorInProgress, FALSE);
  995. }
  996. }
  997. return;
  998. } // WdBugCheckStuckDriver()