Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1474 lines
38 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. dminit.c
  5. Abstract:
  6. Contains the initialization code for the Cluster Database Manager
  7. Author:
  8. John Vert (jvert) 24-Apr-1996
  9. Revision History:
  10. --*/
  11. #include "dmp.h"
  12. //
  13. // Global Data
  14. //
  15. HKEY DmpRoot;
  16. LIST_ENTRY KeyList;
  17. CRITICAL_SECTION KeyLock;
  18. HDMKEY DmClusterParametersKey;
  19. HDMKEY DmResourcesKey;
  20. HDMKEY DmResourceTypesKey;
  21. HDMKEY DmGroupsKey;
  22. HDMKEY DmNodesKey;
  23. HDMKEY DmNetworksKey;
  24. HDMKEY DmNetInterfacesKey;
  25. HDMKEY DmQuorumKey;
  26. HANDLE ghQuoLogOpenEvent=NULL;
  27. #if NO_SHARED_LOCKS
  28. CRITICAL_SECTION gLockDmpRoot;
  29. #else
  30. RTL_RESOURCE gLockDmpRoot;
  31. #endif
  32. BOOL gbIsQuoLoggingOn=FALSE;
  33. HANDLE ghDiskManTimer=NULL;//disk management timer
  34. PFM_RESOURCE gpQuoResource=NULL; //set when DMFormNewCluster is completed
  35. HANDLE ghCheckpointTimer = NULL; //timer for periodic checkpointing
  36. BOOL gbDmInited = FALSE; //set to TRUE when all phases of dm initialization are over
  37. extern HLOG ghQuoLog;
  38. BOOL gbDmpShutdownUpdates = FALSE;
  39. //define public cluster key value names
  40. const WCHAR cszPath[]= CLUSREG_NAME_QUORUM_PATH;
  41. const WCHAR cszMaxQuorumLogSize[]=CLUSREG_NAME_QUORUM_MAX_LOG_SIZE;
  42. const WCHAR cszParameters[] = CLUSREG_KEYNAME_PARAMETERS;
  43. //other const strings
  44. const WCHAR cszQuoFileName[]=L"quolog.log";
  45. const WCHAR cszQuoTombStoneFile[]=L"quotomb.stn";
  46. const WCHAR cszTmpQuoTombStoneFile[]=L"quotomb.tmp";
  47. GUM_DISPATCH_ENTRY DmGumDispatchTable[] = {
  48. {3, (PGUM_DISPATCH_ROUTINE1)DmpUpdateCreateKey},
  49. {4, (PGUM_DISPATCH_ROUTINE1)DmpUpdateSetSecurity}
  50. };
  51. //
  52. // Global data for interfacing with registry watcher thread
  53. //
  54. HANDLE hDmpRegistryFlusher=NULL;
  55. HANDLE hDmpRegistryEvent=NULL;
  56. HANDLE hDmpRegistryRestart=NULL;
  57. DWORD
  58. DmpRegistryFlusher(
  59. IN LPVOID lpThreadParameter
  60. );
  61. //
  62. // Local function prototypes
  63. //
  64. VOID
  65. DmpInvalidateKeys(
  66. VOID
  67. );
  68. VOID
  69. DmpReopenKeys(
  70. VOID
  71. );
  72. DWORD
  73. DmpLoadHive(
  74. IN LPCWSTR Path
  75. );
  76. typedef struct _DMP_KEY_DEF {
  77. HDMKEY *pKey;
  78. LPWSTR Name;
  79. } DMP_KEY_DEF;
  80. DMP_KEY_DEF DmpKeyTable[] = {
  81. {&DmResourcesKey, CLUSREG_KEYNAME_RESOURCES},
  82. {&DmResourceTypesKey, CLUSREG_KEYNAME_RESOURCE_TYPES},
  83. {&DmQuorumKey, CLUSREG_KEYNAME_QUORUM},
  84. {&DmGroupsKey, CLUSREG_KEYNAME_GROUPS},
  85. {&DmNodesKey, CLUSREG_KEYNAME_NODES},
  86. {&DmNetworksKey, CLUSREG_KEYNAME_NETWORKS},
  87. {&DmNetInterfacesKey, CLUSREG_KEYNAME_NETINTERFACES}};
  88. DWORD
  89. DmInitialize(
  90. VOID
  91. )
  92. /*++
  93. Routine Description:
  94. Inits the config database manager
  95. Arguments:
  96. None
  97. Return Value:
  98. ERROR_SUCCESS if successful
  99. Win32 error code otherwise
  100. --*/
  101. {
  102. BOOL Success;
  103. DWORD Status = ERROR_SUCCESS;
  104. DWORD dwOut;
  105. ClRtlLogPrint(LOG_NOISE,"[DM] Initialization\n");
  106. InitializeListHead(&KeyList);
  107. InitializeCriticalSection(&KeyLock);
  108. //create a critical section for locking the database while checkpointing
  109. INITIALIZE_LOCK(gLockDmpRoot);
  110. //create a named event that is used for waiting for quorum resource
  111. //to go online
  112. ghQuoLogOpenEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
  113. if (!ghQuoLogOpenEvent)
  114. {
  115. CL_UNEXPECTED_ERROR((Status = GetLastError()));
  116. goto FnExit;
  117. }
  118. Success = DmpInitNotify();
  119. CL_ASSERT(Success);
  120. if (!Success)
  121. {
  122. Status = GetLastError();
  123. goto FnExit;
  124. }
  125. //find out if the databasecopy was in progresss on last death
  126. DmpGetDwordFromClusterServer(L"ClusterDatabaseCopyInProgress", &dwOut, 0);
  127. LoadClusterDatabase:
  128. //
  129. // Open key to root of cluster.
  130. //
  131. Status = RegOpenKeyW(HKEY_LOCAL_MACHINE,
  132. DmpClusterParametersKeyName,
  133. &DmpRoot);
  134. //
  135. // If the key was not found, go load the database.
  136. //
  137. if (Status == ERROR_FILE_NOT_FOUND) {
  138. WCHAR Path[MAX_PATH];
  139. WCHAR BkpPath[MAX_PATH];
  140. WCHAR *p;
  141. Status = GetModuleFileName(NULL, Path, MAX_PATH);
  142. if (Status == 0) {
  143. Status = GetLastError();
  144. ClRtlLogPrint(LOG_CRITICAL,
  145. "[DM] Couldn't find cluster database, status=%1!u!\n",
  146. Status);
  147. goto FnExit;
  148. }
  149. //get the name of the cluster database
  150. p=wcsrchr(Path, L'\\');
  151. if (p == NULL)
  152. {
  153. Status = ERROR_FILE_NOT_FOUND;
  154. CL_UNEXPECTED_ERROR(Status);
  155. goto FnExit;
  156. }
  157. //see if we should load the hive from the old one or the bkp file
  158. *p = L'\0';
  159. wcscpy(BkpPath, Path);
  160. #ifdef OLD_WAY
  161. wcscat(Path, L"\\CLUSDB");
  162. wcscat(BkpPath, L"\\CLUSTER_DATABASE_TMPBKP_NAME");
  163. #else // OLD_WAY
  164. wcscat(Path, L"\\"CLUSTER_DATABASE_NAME );
  165. wcscat(BkpPath, L"\\"CLUSTER_DATABASE_TMPBKP_NAME);
  166. #endif // OLD_WAY
  167. if (dwOut)
  168. {
  169. //the backip file must exist
  170. ClRtlLogPrint(LOG_NOISE,
  171. "[DM] DmInitialize:: DatabaseCopy was in progress on last death, get hive from %1!ws!!\n",
  172. BkpPath);
  173. //set file attributes of the BkpPath
  174. if (!SetFileAttributes(BkpPath, FILE_ATTRIBUTE_NORMAL))
  175. {
  176. Status = GetLastError();
  177. ClRtlLogPrint(LOG_UNUSUAL,
  178. "[DM] DmInitialize:: SetFileAttrib on BkpPath %1!ws! failed, Status=%2!u!\n",
  179. BkpPath, Status);
  180. goto FnExit;
  181. }
  182. //copyfilex preserves the attributes on the original file
  183. if (!CopyFileEx(BkpPath, Path, NULL, NULL, NULL, 0))
  184. {
  185. Status = GetLastError();
  186. ClRtlLogPrint(LOG_CRITICAL,
  187. "[DM] DmInitialize:: Databasecopy was in progress,Failed to copy %1!ws! to %2!ws!, Status=%3!u!\n",
  188. BkpPath, Path, Status);
  189. //set the file attribute on the backup, so that
  190. //nobody mucks with it without knowing what they are
  191. //doing
  192. SetFileAttributes(BkpPath, FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_READONLY);
  193. goto FnExit;
  194. }
  195. //now we can reset the DatabaseCopyInProgress value in the registry
  196. //set databaseCopyInProgress key to FALSE
  197. //This will flush the key as well
  198. Status = DmpSetDwordInClusterServer( L"ClusterDatabaseCopyInProgress", 0);
  199. if (Status != ERROR_SUCCESS)
  200. {
  201. ClRtlLogPrint(LOG_CRITICAL,
  202. "[DM] DmInitialize:: Failed to reset ClusterDatabaseCopyInProgress, Status=%1!u!\n",
  203. Status);
  204. goto FnExit;
  205. }
  206. //Now we can delete the backup path, since the key has been flushed
  207. if (!DeleteFileW(BkpPath))
  208. {
  209. ClRtlLogPrint(LOG_CRITICAL,
  210. "[DM] DmInitialize:: Failed to delete the backup when it wasnt needed,Status=%1!u!\n",
  211. GetLastError());
  212. //this is not fatal so we ignore the error
  213. }
  214. }
  215. else
  216. {
  217. //the backup file might exist
  218. //this is true when safe copy makes a backup but hasnt
  219. //set the value DatabaseCopyInProgress in the registry
  220. //if it does delete it
  221. //set file attributes of the BkpPath
  222. if (!SetFileAttributes(BkpPath, FILE_ATTRIBUTE_NORMAL))
  223. {
  224. //errors are not fatal, we just ignore them
  225. //this may fail because the path doesnt exist
  226. }
  227. //Now we can delete the backup path, since the key has been flushed
  228. //this is not fatal so we ignore the error
  229. if (DeleteFileW(BkpPath))
  230. {
  231. ClRtlLogPrint(LOG_NOISE,
  232. "[DM] DmInitialize:: Deleted the unneeded backup of the cluster database\n");
  233. }
  234. }
  235. Status = DmpLoadHive(Path);
  236. if (Status != ERROR_SUCCESS)
  237. {
  238. ClRtlLogPrint(LOG_CRITICAL,
  239. "[DM] Couldn't load cluster database\n");
  240. CsLogEventData(LOG_CRITICAL,
  241. DM_DATABASE_CORRUPT_OR_MISSING,
  242. sizeof(Status),
  243. &Status);
  244. goto FnExit;
  245. }
  246. Status = RegOpenKeyW(HKEY_LOCAL_MACHINE,
  247. DmpClusterParametersKeyName,
  248. &DmpRoot);
  249. //
  250. // HACKHACK John Vert (jvert) 6/3/1997
  251. // There is a bug in the registry with refresh
  252. // where the Parent field in the root cell doesn't
  253. // get flushed to disk, so it gets blasted if we
  254. // do a refresh. Then we crash in unload. So flush
  255. // out the registry to disk here to make sure the
  256. // right Parent field gets written to disk.
  257. //
  258. if (Status == ERROR_SUCCESS) {
  259. DWORD Dummy=0;
  260. //
  261. // Make something dirty in the root
  262. //
  263. RegSetValueEx(DmpRoot,
  264. L"Valid",
  265. 0,
  266. REG_DWORD,
  267. (PBYTE)&Dummy,
  268. sizeof(Dummy));
  269. RegDeleteValue(DmpRoot, L"Valid");
  270. Status = RegFlushKey(DmpRoot);
  271. }
  272. } else {
  273. //if the hive is already loaded we unload and reload it again
  274. //to make sure that it is loaded with the right flags and
  275. //also to make sure that the backup copy is used in case
  276. //of failures
  277. ClRtlLogPrint(LOG_CRITICAL,
  278. "[DM] DmInitialize: The hive was loaded- rollback, unload and reload again\n");
  279. //BUGBUG:: currently the unload flushes the hive, ideally we
  280. //would like to unload it without flushing it
  281. //This way a part transaction wont be a part of the hive
  282. //However, if somebody messes with the cluster hive using
  283. //regedt32 and if reg_no_lazy flush is not specified, some
  284. //changes might get flushed to the hive.
  285. //We can try and do the rollback in any case,
  286. //the rollback will fail if the registry wasnt loaded with the
  287. //reg_no_lazy_flush flag.
  288. //unload it and then proceed to reload it
  289. //this will take care of situations where a half baked clusdb
  290. //gets loaded because of failures
  291. Status = DmRollbackRegistry();
  292. if (Status != ERROR_SUCCESS)
  293. {
  294. //we ignore the error
  295. Status = ERROR_SUCCESS;
  296. }
  297. RegCloseKey(DmpRoot);
  298. Status = DmpUnloadHive();
  299. if (Status != ERROR_SUCCESS)
  300. {
  301. ClRtlLogPrint(LOG_CRITICAL,
  302. "[DM] DmInitialize: DmpUnloadHive failed, Status=%1!u!\n",
  303. Status);
  304. goto FnExit;
  305. }
  306. goto LoadClusterDatabase;
  307. }
  308. if (Status != ERROR_SUCCESS) {
  309. CL_UNEXPECTED_ERROR(Status);
  310. goto FnExit;
  311. }
  312. //
  313. // Create the registry watcher thread
  314. //
  315. Status = DmpStartFlusher();
  316. if (Status != ERROR_SUCCESS) {
  317. goto FnExit;
  318. }
  319. //
  320. // Open the cluster keys
  321. //
  322. Status = DmpOpenKeys(MAXIMUM_ALLOWED);
  323. if (Status != ERROR_SUCCESS) {
  324. CL_UNEXPECTED_ERROR( Status );
  325. goto FnExit;
  326. }
  327. FnExit:
  328. return(Status);
  329. }//DmInitialize
  330. DWORD
  331. DmpRegistryFlusher(
  332. IN LPVOID lpThreadParameter
  333. )
  334. /*++
  335. Routine Description:
  336. Registry watcher thread for explicitly flushing changes.
  337. Arguments:
  338. lpThreadParameter - not used
  339. Return Value:
  340. None.
  341. --*/
  342. {
  343. DWORD Status;
  344. HANDLE hEvent;
  345. HANDLE hTimer;
  346. HANDLE WaitArray[4];
  347. LARGE_INTEGER DueTime;
  348. BOOL Dirty = FALSE;
  349. //
  350. // Create a notification event and a delayed timer for lazy flushing.
  351. //
  352. hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
  353. if (hEvent == NULL) {
  354. Status = GetLastError();
  355. ClRtlLogPrint(LOG_CRITICAL,
  356. "[DM] DmpRegistryFlusher couldn't create notification event %1!d!\n",
  357. Status);
  358. goto error_exit1;
  359. }
  360. hTimer = CreateWaitableTimer(NULL, FALSE, NULL);
  361. if (hTimer == NULL) {
  362. Status = GetLastError();
  363. ClRtlLogPrint(LOG_CRITICAL,
  364. "[DM] DmpRegistryFlusher couldn't create notification event %1!d!\n",
  365. Status);
  366. goto error_exit2;
  367. }
  368. WaitArray[0] = hDmpRegistryEvent;
  369. WaitArray[1] = hEvent;
  370. WaitArray[2] = hTimer;
  371. WaitArray[3] = hDmpRegistryRestart;
  372. while (TRUE) {
  373. //
  374. // Set up a registry notification on DmpRoot. We acquire the lock here to
  375. // make sure that rollback or install is not messing with the database
  376. // while we are trying to get a notification.
  377. //
  378. ACQUIRE_EXCLUSIVE_LOCK(gLockDmpRoot);
  379. Status = RegNotifyChangeKeyValue(DmpRoot,
  380. TRUE,
  381. REG_LEGAL_CHANGE_FILTER,
  382. hEvent,
  383. TRUE);
  384. RELEASE_LOCK(gLockDmpRoot);
  385. if (Status != ERROR_SUCCESS) {
  386. ClRtlLogPrint(LOG_CRITICAL,
  387. "[DM] DmpRegistryFlusher couldn't register for notifications %1!d!\n",
  388. Status);
  389. break;
  390. }
  391. //
  392. // Wait for something to happen.
  393. //
  394. Status = WaitForMultipleObjects(4,
  395. WaitArray,
  396. FALSE,
  397. (DWORD)-1);
  398. switch (Status) {
  399. case 0:
  400. ClRtlLogPrint(LOG_NOISE,"[DM] DmpRegistryFlusher: got 0\r\n");
  401. //
  402. // We have been asked to stop, clean up and exit
  403. //
  404. Status = ERROR_SUCCESS;
  405. if (Dirty) {
  406. //
  407. // Make sure any changes that we haven't gotten around to flushing
  408. // get flushed now.
  409. //
  410. DmCommitRegistry();
  411. }
  412. ClRtlLogPrint(LOG_NOISE,"[DM] DmpRegistryFlusher: exiting\r\n");
  413. goto error_exit3;
  414. break;
  415. case 1:
  416. //
  417. // A registry change has occurred. Set our timer to
  418. // go off in 5 seconds. At that point we will do the
  419. // actual flush.
  420. //
  421. //ClRtlLogPrint(LOG_NOISE,"[DM] DmpRegistryFlusher: got 1\r\n");
  422. DueTime.QuadPart = -5 * 10 * 1000 * 1000;
  423. if (!SetWaitableTimer(hTimer,
  424. &DueTime,
  425. 0,
  426. NULL,
  427. NULL,
  428. FALSE)) {
  429. //
  430. // Some error occurred, go ahead and flush now.
  431. //
  432. Status = GetLastError();
  433. ClRtlLogPrint(LOG_CRITICAL,
  434. "[DM] DmpRegistryFlusher failed to set lazy flush timer %1!d!\n",
  435. Status);
  436. #if DBG
  437. CL_ASSERT(FALSE);
  438. #endif
  439. DmCommitRegistry();
  440. Dirty = FALSE;
  441. } else {
  442. Dirty = TRUE;
  443. }
  444. break;
  445. case 2:
  446. //
  447. // The lazy flush timer has gone off, commit the registry now.
  448. //
  449. //ClRtlLogPrint(LOG_NOISE,"[DM] DmpRegistryFlusher: got 2\r\n");
  450. DmCommitRegistry();
  451. Dirty = FALSE;
  452. break;
  453. case 3:
  454. //
  455. // DmpRoot has been changed, simply restart the loop with the new handle.
  456. //
  457. ClRtlLogPrint(LOG_NOISE,"[DM] DmpRegistryFlusher: restarting\n");
  458. break;
  459. default:
  460. //
  461. // Something very odd has happened
  462. //
  463. ClRtlLogPrint(LOG_CRITICAL,
  464. "[DM] DmpRegistryFlusher got error %1!d! from WaitForMultipleObjects\n",
  465. Status);
  466. goto error_exit3;
  467. }
  468. }
  469. error_exit3:
  470. CloseHandle(hTimer);
  471. error_exit2:
  472. CloseHandle(hEvent);
  473. error_exit1:
  474. if (Status != ERROR_SUCCESS) {
  475. ClRtlLogPrint(LOG_CRITICAL,
  476. "[DM] DmpRegistryFlusher exiting abnormally, status %1!d!\n",
  477. Status);
  478. }
  479. return(Status);
  480. }
  481. DWORD
  482. DmJoin(
  483. IN RPC_BINDING_HANDLE RpcBinding,
  484. OUT DWORD *StartSeq
  485. )
  486. /*++
  487. Routine Description:
  488. Performs the join and synchronization process for the
  489. database manager.
  490. Arguments:
  491. RpcBinding - Supplies an RPC binding handle to the Join Master
  492. Return Value:
  493. ERROR_SUCCESS if successful
  494. Win32 error otherwise.
  495. --*/
  496. {
  497. DWORD Status;
  498. DWORD GumSequence;
  499. DWORD CurrentSequence;
  500. //
  501. // Register our update handler.
  502. //
  503. GumReceiveUpdates(TRUE,
  504. GumUpdateRegistry,
  505. DmpUpdateHandler,
  506. DmWriteToQuorumLog,
  507. sizeof(DmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
  508. DmGumDispatchTable,
  509. NULL);
  510. retry:
  511. CurrentSequence = DmpGetRegistrySequence();
  512. Status = GumBeginJoinUpdate(GumUpdateRegistry, &GumSequence);
  513. if (Status != ERROR_SUCCESS) {
  514. ClRtlLogPrint(LOG_CRITICAL,
  515. "[DM] GumBeginJoinUpdate failed %1!d!\n",
  516. Status);
  517. return(Status);
  518. }
  519. /*
  520. if (CurrentSequence == GumSequence) {
  521. //
  522. // Our registry sequence already matches. No need to slurp
  523. // down a new copy.
  524. //
  525. ClRtlLogPrint(LOG_NOISE,
  526. "[DM] DmJoin: registry database is up-to-date\n");
  527. } else
  528. */
  529. //SS: always get the database irrespective of the sequence numbers
  530. //this is because transactions may be lost in the log file due
  531. //to the fact that it is not write through and because of certain
  532. //race conditions in down notifications vs gum failure conditions.
  533. {
  534. ClRtlLogPrint(LOG_NOISE,
  535. "[DM] DmJoin: getting new registry database\n");
  536. Status = DmpSyncDatabase(RpcBinding, NULL);
  537. if (Status != ERROR_SUCCESS) {
  538. ClRtlLogPrint(LOG_UNUSUAL,
  539. "[DM] DmJoin: DmpSyncDatabase failed %1!d!\n",
  540. Status);
  541. return(Status);
  542. }
  543. }
  544. //
  545. // Issue GUM join update
  546. //
  547. Status = GumEndJoinUpdate(GumSequence,
  548. GumUpdateRegistry,
  549. DmUpdateJoin,
  550. 0,
  551. NULL);
  552. if (Status == ERROR_CLUSTER_DATABASE_SEQMISMATCH) {
  553. ClRtlLogPrint(LOG_UNUSUAL,
  554. "[DM] GumEndJoinUpdate with sequence %1!d! failed with a sequence mismatch\n",
  555. GumSequence);
  556. goto retry;
  557. } else if (Status != ERROR_SUCCESS) {
  558. ClRtlLogPrint(LOG_CRITICAL,
  559. "[DM] GumEndJoinUpdate with sequence %1!d! failed with status %2!d!\n",
  560. GumSequence,
  561. Status);
  562. return(Status);
  563. }
  564. *StartSeq = GumSequence;
  565. return(ERROR_SUCCESS);
  566. } // DmJoin
  567. /*
  568. DWORD
  569. DmFormNewCluster(
  570. VOID
  571. )
  572. {
  573. DWORD Status;
  574. //
  575. // Set the current GUM sequence to be one more than the one in the registry.
  576. //
  577. // SS: this will be the one to be used for the next gum transaction,
  578. // it should be one than the current as the logger discards the first of
  579. // every record the same transaction number to resolve changes made when the
  580. // locker/logger node dies in the middle of a transaction
  581. GumSetCurrentSequence(GumUpdateRegistry, (DmpGetRegistrySequence()+1));
  582. return(ERROR_SUCCESS);
  583. } // DmFormNewCluster
  584. */
  585. DWORD
  586. DmFormNewCluster(
  587. VOID
  588. )
  589. /*++
  590. Routine Description:
  591. This routine sets the gum sequence number from the registry before
  592. logs are unrolled and prepares the quorum object for quorum logging.
  593. It also hooks events for node up/down notifications.
  594. Arguments:
  595. None.
  596. Return Value:
  597. ERROR_SUCCESS if successful.
  598. A Win32 error code on failure.
  599. --*/
  600. {
  601. DWORD dwError=ERROR_SUCCESS;
  602. //
  603. // Set the current GUM sequence to be one more than the one in the registry.
  604. //
  605. // SS: this will be the one to be used for the next gum transaction,
  606. // it should be one than the current as the logger discards the first of
  607. // every record the same transaction number to resolve changes made when the
  608. // locker/logger node dies in the middle of a transaction
  609. GumSetCurrentSequence(GumUpdateRegistry, (DmpGetRegistrySequence()+1));
  610. //
  611. // Register our update handler.
  612. //
  613. GumReceiveUpdates(FALSE,
  614. GumUpdateRegistry,
  615. DmpUpdateHandler,
  616. DmWriteToQuorumLog,
  617. sizeof(DmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
  618. DmGumDispatchTable,
  619. NULL);
  620. //hook the callback for node related notification with the event processor
  621. if (dwError = DmpHookEventHandler())
  622. {
  623. ClRtlLogPrint(LOG_UNUSUAL,
  624. "[DM] DmUpdateFormNewCluster: DmpHookEventHandler failed 0x!08lx!\r\n",
  625. dwError);
  626. goto FnExit;
  627. };
  628. //get the quorum resource and hook the callback for notification on quorum resource
  629. if (dwError = DmpHookQuorumNotify())
  630. {
  631. ClRtlLogPrint(LOG_UNUSUAL,
  632. "[DM] DmUpdateFormNewCluster: DmpHookQuorumNotify failed 0x%1!08lx!\r\n",
  633. dwError);
  634. goto FnExit;
  635. };
  636. //SS: if this procedure is successfully completed gpQuoResource is NON NULL.
  637. FnExit:
  638. return(dwError);
  639. } // DmUpdateFormNewCluster
  640. DWORD
  641. DmUpdateFormNewCluster(
  642. VOID
  643. )
  644. /*++
  645. Routine Description:
  646. This routine updates the cluster registry after the quorum resource has
  647. been arbitrated as part of forming a new cluster. The database manager
  648. is expected to read logs or do whatever it needs to update the current
  649. state of the registry - presumably using logs that are written to the
  650. quorum resource. This implies that the quorum resource represents some
  651. form of stable storage.
  652. Arguments:
  653. None.
  654. Return Value:
  655. ERROR_SUCCESS if successful.
  656. A Win32 error code on failure.
  657. --*/
  658. {
  659. DWORD dwError=ERROR_SUCCESS;
  660. BOOL bAreAllNodesUp = TRUE; //assume all nodes are up
  661. //since we havent been logging as yet, take a checkpoint
  662. if (ghQuoLog)
  663. {
  664. //get a checkpoint database
  665. ClRtlLogPrint(LOG_NOISE,
  666. "[DM] DmUpdateFormNewCluster - taking a checkpoint\r\n");
  667. //
  668. // Chittur Subbaraman (chitturs) - 6/3/99
  669. //
  670. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  671. // so as to maintain the ordering between this lock and the log lock.
  672. //
  673. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  674. dwError = LogCheckPoint(ghQuoLog, TRUE, NULL, 0);
  675. RELEASE_LOCK(gLockDmpRoot);
  676. if (dwError != ERROR_SUCCESS)
  677. {
  678. ClRtlLogPrint(LOG_CRITICAL,
  679. "[DM] DmUpdateFormNewCluster - Failed to take a checkpoint in the log file\r\n");
  680. CL_UNEXPECTED_ERROR(dwError);
  681. }
  682. }
  683. //if all nodes are not up, turn quorum logging on
  684. if ((dwError = OmEnumObjects(ObjectTypeNode, DmpNodeObjEnumCb, &bAreAllNodesUp, NULL))
  685. != ERROR_SUCCESS)
  686. {
  687. ClRtlLogPrint(LOG_UNUSUAL,
  688. "[DM] DmUpdateFormNewCluster : OmEnumObjects returned 0x%1!08lx!\r\n",
  689. dwError);
  690. goto FnExit;
  691. }
  692. if (!bAreAllNodesUp)
  693. {
  694. ClRtlLogPrint(LOG_NOISE,
  695. "[DM] DmUpdateFormNewCluster - some node down\r\n");
  696. gbIsQuoLoggingOn = TRUE;
  697. }
  698. //add a timer to monitor disk space, should be done after we have formed.
  699. ghDiskManTimer = CreateWaitableTimer(NULL, FALSE, NULL);
  700. if (!ghDiskManTimer)
  701. {
  702. CL_LOGFAILURE(dwError = GetLastError());
  703. goto FnExit;
  704. }
  705. AddTimerActivity(ghDiskManTimer, DISKSPACE_MANAGE_INTERVAL, 1, DmpDiskManage, NULL);
  706. gbDmInited = TRUE;
  707. FnExit:
  708. return (dwError);
  709. } // DmFormNewCluster
  710. /****
  711. @func DWORD | DmPauseDiskManTimer| The disk manager timer activity to monitor
  712. space on the quorum disk is set to a puased state.
  713. @rdesc Returns ERROR_SUCCESS on success. Else returns the error code.
  714. @comm This is called while the quorum resource is being changed.
  715. @xref <f DmRestartDiskManTimer>
  716. ****/
  717. DWORD DmPauseDiskManTimer()
  718. {
  719. DWORD dwError=ERROR_SUCCESS;
  720. if (ghDiskManTimer)
  721. dwError = PauseTimerActivity(ghDiskManTimer);
  722. return(dwError);
  723. }
  724. /****
  725. @func DWORD | DmRestartDiskManTimer| This disk manager activity to monitor
  726. space on the quorum disk is set back to activated state.
  727. @rdesc Returns ERROR_SUCCESS on success. Else returns the error code.
  728. @comm This is called after the quorum resource has been changed.
  729. @xref <f DmPauseDiskManTimer>
  730. ****/
  731. DWORD DmRestartDiskManTimer()
  732. {
  733. DWORD dwError=ERROR_SUCCESS;
  734. if (ghDiskManTimer)
  735. dwError = UnpauseTimerActivity(ghDiskManTimer);
  736. return(dwError);
  737. }
  738. /****
  739. @func DWORD | DmRollChanges| This waits for the quorum resource to come online at
  740. initialization when a cluster is being formed. The changes in the quorum
  741. log file are applied to the local cluster database.
  742. @rdesc Returns ERROR_SUCCESS on success. Else returns the error code.
  743. @comm This allows for partitions in time.
  744. @xref
  745. ****/
  746. DWORD DmRollChanges()
  747. {
  748. DWORD dwError=ERROR_SUCCESS;
  749. //before applying the changes validate that this quorum resource is the real one
  750. if ((dwError = DmpChkQuoTombStone()) != ERROR_SUCCESS)
  751. {
  752. ClRtlLogPrint(LOG_UNUSUAL,
  753. "[DM] DmRollChanges: DmpChkQuoTombStone() failed 0x%1!08lx!\r\n",
  754. dwError);
  755. goto FnExit;
  756. }
  757. if ((dwError = DmpApplyChanges()) != ERROR_SUCCESS)
  758. {
  759. ClRtlLogPrint(LOG_UNUSUAL,
  760. "[DM] DmRollChanges: DmpApplyChanges() failed 0x%1!08lx!\r\n",
  761. dwError);
  762. goto FnExit;
  763. }
  764. //ss: this is here since lm doesnt know about the ownership of quorum
  765. //disks today
  766. //call DmpCheckSpace
  767. if ((dwError = DmpCheckDiskSpace()) != ERROR_SUCCESS)
  768. {
  769. ClRtlLogPrint(LOG_UNUSUAL,
  770. "[DM] DmRollChanges: DmpCheckDiskSpace() failed 0x%1!08lx!\r\n",
  771. dwError);
  772. goto FnExit;
  773. }
  774. FnExit:
  775. return(dwError);
  776. }
  777. DWORD DmShutdown()
  778. {
  779. DWORD dwError;
  780. ClRtlLogPrint(LOG_NOISE,
  781. "[Dm] DmShutdown\r\n");
  782. //this will close the timer handle
  783. if (ghDiskManTimer) RemoveTimerActivity(ghDiskManTimer);
  784. if (gpQuoResource)
  785. {
  786. // DmFormNewCluster() completed
  787. //
  788. // Deregister from any further GUM updates
  789. //
  790. //GumIgnoreUpdates(GumUpdateRegistry, DmpUpdateHandler);
  791. }
  792. //unhook the callback for notification on quorum resource
  793. if (dwError = DmpUnhookQuorumNotify())
  794. {
  795. //just log the error as we are shutting down
  796. ClRtlLogPrint(LOG_UNUSUAL,
  797. "[DM] DmShutdown: DmpUnhookQuorumNotify failed 0x%1!08lx!\r\n",
  798. dwError);
  799. }
  800. //if the quorum log is open close it
  801. if (ghQuoLog)
  802. {
  803. LogClose(ghQuoLog);
  804. ghQuoLog = NULL;
  805. //dont try and log after this
  806. gbIsQuoLoggingOn = FALSE;
  807. }
  808. //close the event created for notification of the quorum resource to
  809. //go online
  810. if (ghQuoLogOpenEvent)
  811. {
  812. //wait any thread blocked on this
  813. SetEvent(ghQuoLogOpenEvent);
  814. CloseHandle(ghQuoLogOpenEvent);
  815. ghQuoLogOpenEvent = NULL;
  816. }
  817. //
  818. // Shut down the registry flusher thread.
  819. //
  820. DmpShutdownFlusher();
  821. return(dwError);
  822. }
  823. DWORD
  824. DmpStartFlusher(
  825. VOID
  826. )
  827. /*++
  828. Routine Description:
  829. Starts up a new registry flusher thread.
  830. Arguments:
  831. None.
  832. Return Value:
  833. ERROR_SUCCESS if successful
  834. Win32 error code otherwise
  835. --*/
  836. {
  837. DWORD ThreadId;
  838. ClRtlLogPrint(LOG_NOISE,"[DM] DmpStartFlusher: Entry\r\n");
  839. if (!hDmpRegistryFlusher)
  840. {
  841. hDmpRegistryEvent = CreateEventW(NULL,FALSE,FALSE,NULL);
  842. if (hDmpRegistryEvent == NULL) {
  843. return(GetLastError());
  844. }
  845. hDmpRegistryRestart = CreateEventW(NULL,FALSE,FALSE,NULL);
  846. if (hDmpRegistryRestart == NULL) {
  847. CloseHandle(hDmpRegistryEvent);
  848. return(GetLastError());
  849. }
  850. hDmpRegistryFlusher = CreateThread(NULL,
  851. 0,
  852. DmpRegistryFlusher,
  853. NULL,
  854. 0,
  855. &ThreadId);
  856. if (hDmpRegistryFlusher == NULL) {
  857. CloseHandle(hDmpRegistryRestart);
  858. CloseHandle(hDmpRegistryEvent);
  859. return(GetLastError());
  860. }
  861. ClRtlLogPrint(LOG_NOISE,"[DM] DmpStartFlusher: thread created\r\n");
  862. }
  863. return(ERROR_SUCCESS);
  864. }
  865. VOID
  866. DmpShutdownFlusher(
  867. VOID
  868. )
  869. /*++
  870. Routine Description:
  871. Cleanly shutsdown the registry flusher thread.
  872. Arguments:
  873. None.
  874. Return Value:
  875. None.
  876. --*/
  877. {
  878. ClRtlLogPrint(LOG_NOISE,"[DM] DmpShutdownFlusher: Entry\r\n");
  879. if (hDmpRegistryFlusher) {
  880. ClRtlLogPrint(LOG_NOISE,"[DM] DmpShutdownFlusher: Setting event\r\n");
  881. SetEvent(hDmpRegistryEvent);
  882. WaitForSingleObject(hDmpRegistryFlusher, INFINITE);
  883. CloseHandle(hDmpRegistryFlusher);
  884. hDmpRegistryFlusher = NULL;
  885. CloseHandle(hDmpRegistryEvent);
  886. CloseHandle(hDmpRegistryRestart);
  887. hDmpRegistryEvent = NULL;
  888. hDmpRegistryRestart = NULL;
  889. }
  890. }
  891. VOID
  892. DmpRestartFlusher(
  893. VOID
  894. )
  895. /*++
  896. Routine Description:
  897. Restarts the registry flusher thread if DmpRoot is being changed.
  898. N.B. In order for this to work correctly, gLockDmpRoot MUST be held!
  899. Arguments:
  900. None.
  901. Return Value:
  902. None.
  903. --*/
  904. {
  905. ClRtlLogPrint(LOG_NOISE,"[DM] DmpRestartFlusher: Entry\r\n");
  906. #if NO_SHARED_LOCKS
  907. CL_ASSERT(HandleToUlong(gLockDmpRoot.OwningThread) == GetCurrentThreadId());
  908. #else
  909. CL_ASSERT(HandleToUlong(gLockDmpRoot.ExclusiveOwnerThread) == GetCurrentThreadId());
  910. #endif
  911. SetEvent(hDmpRegistryRestart);
  912. }
  913. DWORD
  914. DmUpdateJoinCluster(
  915. VOID
  916. )
  917. /*++
  918. Routine Description:
  919. This routine is called after a node has successfully joined a cluster.
  920. It allows the DM to hook callbacks for node up/down notifications and for
  921. quorum resource change notification.
  922. Arguments:
  923. None.
  924. Return Value:
  925. ERROR_SUCCESS if successful.
  926. A Win32 error code on failure.
  927. --*/
  928. {
  929. DWORD dwError=ERROR_SUCCESS;
  930. ClRtlLogPrint(LOG_NOISE,
  931. "[DM] DmUpdateJoinCluster: Begin.\r\n");
  932. //hook the notification for node up/down so we can keep track of whether logging
  933. //should be on or off.
  934. if (dwError = DmpHookEventHandler())
  935. {
  936. //BUGBUG SS: do we log this or return this error code
  937. ClRtlLogPrint(LOG_UNUSUAL,
  938. "[DM] DmUpdateJoinCluster: DmpHookEventHandler failed 0x%1!08lx!\r\n",
  939. dwError);
  940. }
  941. //hook the callback for notification on quorum resource
  942. if (dwError = DmpHookQuorumNotify())
  943. {
  944. ClRtlLogPrint(LOG_UNUSUAL,
  945. "[DM] DmUpdateJoinCluster: DmpHookQuorumNotify failed 0x%1!08lx!\r\n",
  946. dwError);
  947. goto FnExit;
  948. }
  949. if ((dwError = DmpCheckDiskSpace()) != ERROR_SUCCESS)
  950. {
  951. ClRtlLogPrint(LOG_UNUSUAL,
  952. "[DM] DmUpdateJoinCluster: DmpCheckDiskSpace() failed 0x%1!08lx!\r\n",
  953. dwError);
  954. goto FnExit;
  955. }
  956. //add a timer to monitor disk space, should be done after we have joined.
  957. ghDiskManTimer = CreateWaitableTimer(NULL, FALSE, NULL);
  958. if (!ghDiskManTimer)
  959. {
  960. CL_LOGFAILURE(dwError = GetLastError());
  961. goto FnExit;
  962. }
  963. //register a periodic timer
  964. AddTimerActivity(ghDiskManTimer, DISKSPACE_MANAGE_INTERVAL, 1, DmpDiskManage, NULL);
  965. gbDmInited = TRUE;
  966. FnExit:
  967. return(dwError);
  968. } // DmUpdateJoinCluster
  969. DWORD
  970. DmpOpenKeys(
  971. IN REGSAM samDesired
  972. )
  973. /*++
  974. Routine Description:
  975. Opens all the standard cluster registry keys. If any of the
  976. keys are already opened, they will be closed and reopened.
  977. Arguments:
  978. samDesired - Supplies the access that the keys will be opened with.
  979. Return Value:
  980. ERROR_SUCCESS if successful.
  981. Win32 error code otherwise.
  982. --*/
  983. {
  984. DWORD i;
  985. DWORD Status;
  986. DmClusterParametersKey = DmGetRootKey( MAXIMUM_ALLOWED );
  987. if ( DmClusterParametersKey == NULL ) {
  988. Status = GetLastError();
  989. CL_UNEXPECTED_ERROR(Status);
  990. return(Status);
  991. }
  992. for (i=0;
  993. i<sizeof(DmpKeyTable)/sizeof(DMP_KEY_DEF);
  994. i++) {
  995. *DmpKeyTable[i].pKey = DmOpenKey(DmClusterParametersKey,
  996. DmpKeyTable[i].Name,
  997. samDesired);
  998. if (*DmpKeyTable[i].pKey == NULL) {
  999. Status = GetLastError();
  1000. ClRtlLogPrint(LOG_CRITICAL,
  1001. "[DM] Failed to open key %1!ws!, status %2!u!\n",
  1002. DmpKeyTable[i].Name,
  1003. Status);
  1004. CL_UNEXPECTED_ERROR( Status );
  1005. return(Status);
  1006. }
  1007. }
  1008. return(ERROR_SUCCESS);
  1009. }
  1010. VOID
  1011. DmpInvalidateKeys(
  1012. VOID
  1013. )
  1014. /*++
  1015. Routine Description:
  1016. Invalidates all open cluster registry keys.
  1017. Arguments:
  1018. None.
  1019. Return Value:
  1020. None.
  1021. --*/
  1022. {
  1023. PLIST_ENTRY ListEntry;
  1024. PDMKEY Key;
  1025. ListEntry = KeyList.Flink;
  1026. while (ListEntry != &KeyList) {
  1027. Key = CONTAINING_RECORD(ListEntry,
  1028. DMKEY,
  1029. ListEntry);
  1030. if (!Key->hKey)
  1031. {
  1032. ClRtlLogPrint(LOG_CRITICAL,
  1033. "[DM] DmpInvalidateKeys %1!ws! Key was deleted since last reopen but not closed\n",
  1034. Key->Name);
  1035. ClRtlLogPrint(LOG_CRITICAL,
  1036. "[DM] THIS MAY BE A KEY LEAK !!\r\n");
  1037. }
  1038. else
  1039. {
  1040. RegCloseKey(Key->hKey);
  1041. Key->hKey = NULL;
  1042. }
  1043. ListEntry = ListEntry->Flink;
  1044. }
  1045. }
  1046. VOID
  1047. DmpReopenKeys(
  1048. VOID
  1049. )
  1050. /*++
  1051. Routine Description:
  1052. Reopens all the keys that were invalidated by DmpInvalidateKeys
  1053. Arguments:
  1054. None
  1055. Return Value:
  1056. None.
  1057. --*/
  1058. {
  1059. PLIST_ENTRY ListEntry;
  1060. PDMKEY Key;
  1061. DWORD Status;
  1062. ListEntry = KeyList.Flink;
  1063. while (ListEntry != &KeyList) {
  1064. Key = CONTAINING_RECORD(ListEntry,
  1065. DMKEY,
  1066. ListEntry);
  1067. CL_ASSERT(Key->hKey == NULL);
  1068. Status = RegOpenKeyEx(DmpRoot,
  1069. Key->Name,
  1070. 0,
  1071. Key->GrantedAccess,
  1072. &Key->hKey);
  1073. if (Status != ERROR_SUCCESS) {
  1074. ClRtlLogPrint(LOG_CRITICAL,"[DM] Could not reopen key %1!ws! error %2!d!\n",Key->Name,Status);
  1075. // if the error is file not found, then the key was deleted while the handle
  1076. // was open. Set the key to NULL
  1077. // If the key is used after delete, it should be validated
  1078. if (Status == ERROR_FILE_NOT_FOUND)
  1079. Key->hKey = NULL;
  1080. else
  1081. CL_UNEXPECTED_ERROR(Status);
  1082. }
  1083. ListEntry = ListEntry->Flink;
  1084. }
  1085. }
  1086. DWORD
  1087. DmpGetRegistrySequence(
  1088. VOID
  1089. )
  1090. /*++
  1091. Routine Description:
  1092. Returns the current registry sequence stored in the registry.
  1093. Arguments:
  1094. None.
  1095. Return Value:
  1096. The current registry sequence.
  1097. --*/
  1098. {
  1099. DWORD Length;
  1100. DWORD Type;
  1101. DWORD Sequence;
  1102. DWORD Status;
  1103. Length = sizeof(Sequence);
  1104. Status = RegQueryValueExW(DmpRoot,
  1105. CLUSREG_NAME_CLUS_REG_SEQUENCE,
  1106. 0,
  1107. &Type,
  1108. (LPBYTE)&Sequence,
  1109. &Length);
  1110. if (Status != ERROR_SUCCESS) {
  1111. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmpGetRegistrySequence failed %1!u!\n",Status);
  1112. Sequence = 0;
  1113. }
  1114. return(Sequence);
  1115. }
  1116. DWORD DmWaitQuorumResOnline()
  1117. /*++
  1118. Routine Description:
  1119. Waits for quorum resource to come online. Used for quorum logging.
  1120. Arguments:
  1121. None
  1122. Return Value:
  1123. returns ERROR_SUCCESS - if the online event is signaled and the quorum
  1124. notification callback is called. Else returns the wait status.
  1125. --*/
  1126. {
  1127. //wait for the quorum resource to go online
  1128. //give it a minute
  1129. DWORD dwError;
  1130. if (ghQuoLogOpenEvent)
  1131. //dwError = WaitForSingleObject(ghQuoOnlineEvent, 60000*10);
  1132. dwError = WaitForSingleObject(ghQuoLogOpenEvent, INFINITE);
  1133. switch(dwError)
  1134. {
  1135. case WAIT_OBJECT_0:
  1136. //everything is fine
  1137. dwError = ERROR_SUCCESS;
  1138. break;
  1139. case WAIT_TIMEOUT:
  1140. //couldnt roll the changes
  1141. ClRtlLogPrint(LOG_UNUSUAL,
  1142. "[DM] DmRollChanges: Timed out waiting on dmInitEvent\r\n");
  1143. break;
  1144. case WAIT_FAILED:
  1145. CL_ASSERT(dwError != WAIT_FAILED);
  1146. ClRtlLogPrint(LOG_UNUSUAL,
  1147. "[DM] DmRollChanges: wait on dmInitEventfailed failed 0x%1!08lx!\r\n",
  1148. GetLastError());
  1149. break;
  1150. }
  1151. return(dwError);
  1152. }
  1153. VOID DmShutdownUpdates(
  1154. VOID
  1155. )
  1156. /*++
  1157. Routine Description:
  1158. Shutdown DM GUM updates.
  1159. Arguments:
  1160. None
  1161. Return Value:
  1162. None.
  1163. --*/
  1164. {
  1165. gbDmpShutdownUpdates = TRUE;
  1166. }