Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3005 lines
104 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. dmlog.c
  5. Abstract:
  6. Contains the quorum logging related functions for
  7. the cluster registry.
  8. Author:
  9. Sunita Shrivastava (sunitas) 24-Apr-1996
  10. Revision History:
  11. --*/
  12. #include "dmp.h"
  13. #include "tchar.h"
  14. #include "clusudef.h"
  15. /****
  16. @doc EXTERNAL INTERFACES CLUSSVC DM
  17. ****/
  18. //global static data
  19. HLOG ghQuoLog=NULL; //pointer to the quorum log
  20. DWORD gbIsQuoResOnline = FALSE;
  21. DWORD gbNeedToCheckPoint = FALSE;
  22. DWORD gbIsQuoResEnoughSpace = TRUE;
  23. HLOG ghNewQuoLog = NULL; //pointer to the new quorum resource
  24. //global data
  25. extern HANDLE ghQuoLogOpenEvent;
  26. extern BOOL gbIsQuoLoggingOn;
  27. extern HANDLE ghDiskManTimer;
  28. extern HANDLE ghCheckpointTimer;
  29. extern PFM_RESOURCE gpQuoResource; //set when DmFormNewCluster is complete
  30. extern BOOL gbDmInited;
  31. #if NO_SHARED_LOCKS
  32. extern CRITICAL_SECTION gLockDmpRoot;
  33. #else
  34. extern RTL_RESOURCE gLockDmpRoot;
  35. #endif
  36. //forward definitions
  37. void DmpLogCheckPointCb();
  38. /****
  39. @func DWORD | DmPrepareQuorumResChange| When the quorum resource is changed,
  40. the FM invokes this api on the owner node of the new quorum resource
  41. to create a new quorum log file.
  42. @parm IN PVOID | pResource | The new quorum resource.
  43. @parm IN LPCWSTR | lpszPath | The path for temporary cluster files.
  44. @parm IN DWORD | dwMaxQuoLogSize | The maximum size limit for the quorum log file.
  45. @comm When a quorum resource is changed, the fm calls this funtion before it
  46. updates the quorum resource. If a new log file needs to be created,
  47. a checkpoint is taken.
  48. @rdesc Returns a result code. ERROR_SUCCESS on success.
  49. @xref <f DmSwitchToNewQuorumLog>
  50. ****/
  51. DWORD DmPrepareQuorumResChange(
  52. IN PVOID pResource,
  53. IN LPCWSTR lpszPath,
  54. IN DWORD dwMaxQuoLogSize)
  55. {
  56. DWORD dwError=ERROR_SUCCESS;
  57. PFM_RESOURCE pNewQuoRes;
  58. WCHAR szFileName1[MAX_PATH]; //for new quorum log,for tombstonefile
  59. LSN FirstLsn;
  60. WCHAR szFileName2[MAX_PATH]; //for old quorum log, for temp tombstone
  61. DWORD dwCurLogSize;
  62. DWORD dwMaxLogSize;
  63. DWORD dwChkPtSequence;
  64. WIN32_FIND_DATA FindData;
  65. QfsHANDLE hSrchTmpFiles;
  66. pNewQuoRes = (PFM_RESOURCE)pResource;
  67. ClRtlLogPrint(LOG_NOISE,
  68. "[DM] DmPrepareQuorumResChange - Entry\r\n");
  69. //the resource is already online at this point
  70. //if the directory doesnt exist create it
  71. dwError = QfsClRtlCreateDirectory(lpszPath);
  72. if (dwError != ERROR_SUCCESS)
  73. {
  74. ClRtlLogPrint(LOG_NOISE,
  75. "[DM] DmPrepareQuorumResChange - Failed to create directory, Status=%1!u!\r\n",
  76. dwError);
  77. goto FnExit;
  78. }
  79. lstrcpyW(szFileName1, lpszPath);
  80. lstrcatW(szFileName1, cszQuoFileName);
  81. //if the log file is open here
  82. //this implies that the new quorum resource is the on the same node
  83. //as the old one
  84. if (ghQuoLog)
  85. {
  86. LogGetInfo(ghQuoLog, szFileName2, &dwCurLogSize, &dwMaxLogSize);
  87. //if the file is the same as the new log file, simply set the size
  88. if (!lstrcmpiW(szFileName2, szFileName1))
  89. {
  90. LogSetInfo(ghQuoLog, dwMaxQuoLogSize);
  91. ghNewQuoLog = ghQuoLog;
  92. goto FnExit;
  93. }
  94. }
  95. //delele all the quorum logging related files
  96. //delete the log if it exits
  97. QfsDeleteFile(szFileName1);
  98. //delete all checkpoint files
  99. lstrcpyW(szFileName2, lpszPath);
  100. lstrcatW(szFileName2, L"*.tmp");
  101. hSrchTmpFiles = QfsFindFirstFile(szFileName2, & FindData);
  102. if (QfsIsHandleValid(hSrchTmpFiles))
  103. {
  104. lstrcpyW(szFileName2, lpszPath);
  105. lstrcatW(szFileName2, FindData.cFileName);
  106. QfsDeleteFile(szFileName2);
  107. while (QfsFindNextFile( hSrchTmpFiles, & FindData))
  108. {
  109. lstrcpyW(szFileName2, lpszPath);
  110. lstrcatW(szFileName2, FindData.cFileName);
  111. QfsDeleteFile(szFileName2);
  112. }
  113. QfsFindClose(hSrchTmpFiles);
  114. }
  115. dwError = QfsSetFileSecurityInfo(lpszPath,
  116. GENERIC_ALL, GENERIC_ALL, 0);
  117. if (dwError != ERROR_SUCCESS)
  118. {
  119. ClRtlLogPrint(LOG_NOISE,
  120. "[DM] DmPrepareQuorumResChange - ClRtlSetObjSecurityInfo Failed, Status=%1!u!\r\n",
  121. dwError);
  122. goto FnExit;
  123. }
  124. //open the new log file
  125. ClRtlLogPrint(LOG_NOISE,
  126. "[DM] DmPrepareQuorumResChange: the name of the quorum file is %1!ls!\r\n",
  127. szFileName1);
  128. //open the log file
  129. ghNewQuoLog = LogCreate(szFileName1, dwMaxQuoLogSize,
  130. (PLOG_GETCHECKPOINT_CALLBACK)DmpGetSnapShotCb, NULL,
  131. TRUE, &FirstLsn);
  132. if (!ghNewQuoLog)
  133. {
  134. dwError = GetLastError();
  135. ClRtlLogPrint(LOG_UNUSUAL,
  136. "[DM] DmPrepareQuorumResChange: Quorum log could not be opened, error = %1!u!\r\n",
  137. dwError);
  138. CsLogEventData1( LOG_CRITICAL,
  139. CS_DISKWRITE_FAILURE,
  140. sizeof(dwError),
  141. &dwError,
  142. szFileName1 );
  143. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  144. }
  145. //create a checkpoint in the new place
  146. dwError = DmpGetSnapShotCb(lpszPath, NULL, szFileName1, &dwChkPtSequence);
  147. if (dwError != ERROR_SUCCESS)
  148. {
  149. CL_LOGFAILURE(dwError);
  150. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  151. goto FnExit;
  152. }
  153. dwError = LogCheckPoint(ghNewQuoLog, TRUE, szFileName1, dwChkPtSequence);
  154. if (dwError != ERROR_SUCCESS)
  155. {
  156. ClRtlLogPrint(LOG_NOISE,
  157. "[DM] DmPrepareQuorumResChange - failed to take chkpoint, error = %1!u!\r\n",
  158. dwError);
  159. goto FnExit;
  160. }
  161. ClRtlLogPrint(LOG_NOISE,
  162. "[DM] DmPrepareQuorumResChange - checkpoint taken\r\n");
  163. //
  164. // Call the checkpoint manager to copy over any checkpoint files
  165. //
  166. if ( !( CsNoQuorum ) || ( gpQuoResource->State == ClusterResourceOnline ) )
  167. {
  168. dwError = CpCopyCheckpointFiles(lpszPath, FALSE);
  169. if (dwError != ERROR_SUCCESS)
  170. {
  171. goto FnExit;
  172. }
  173. } else
  174. {
  175. ClRtlLogPrint(LOG_NOISE,
  176. "[DM] DmPrepareQuorumResChange: Skip copying checkpoint files from old quorum, FixQuorum=%1!u!, QuoState=%2!u!...\n",
  177. CsNoQuorum,
  178. gpQuoResource->State);
  179. }
  180. //create the tombstone and tmp file names
  181. lstrcpyW(szFileName1, lpszPath);
  182. lstrcatW(szFileName1, cszQuoTombStoneFile);
  183. lstrcpyW(szFileName2, lpszPath);
  184. lstrcatW(szFileName2, cszTmpQuoTombStoneFile);
  185. //rename the quorum tomstone file,it if it exists
  186. if (!QfsMoveFileEx(szFileName1, szFileName2,
  187. MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
  188. {
  189. //this may fail if the tombstone doesnt exist, ignore error
  190. ClRtlLogPrint(LOG_UNUSUAL,
  191. "[DM] DmPrepareQuorumResChange:tombstone doesnt exist,movefilexW failed, error=0x%1!08lx!\r\n",
  192. GetLastError());
  193. }
  194. FnExit:
  195. if (dwError != ERROR_SUCCESS)
  196. {
  197. //if not sucess, clean up the new file
  198. if (ghNewQuoLog)
  199. {
  200. LogClose(ghNewQuoLog);
  201. ghNewQuoLog = NULL;
  202. }
  203. ClRtlLogPrint(LOG_NOISE,
  204. "[DM] DmPrepareQuorumResChange - Exit, error=0x%1!08lx!\r\n",
  205. dwError);
  206. } else {
  207. ClRtlLogPrint(LOG_NOISE,
  208. "[DM] DmPrepareQuorumResChange - Exit, status=0x%1!08lx!\r\n",
  209. dwError);
  210. }
  211. return(dwError);
  212. } // DmPrepareQuorumResChange
  213. /****
  214. @func void | DmDwitchToNewQuorumLog| This is called to switch to a new
  215. quorum log when the quorum resource is changed.
  216. @comm When a quorum resource is successfully changed, this function is
  217. to switch quorum logs. The synchronous notifications for the old resource
  218. are unhooked and those for the new resource file are hooked.
  219. @rdesc Returns a result code. ERROR_SUCCESS on success.
  220. @xref <f DmSwitchToNewQuorumLog>
  221. ****/
  222. void DmSwitchToNewQuorumLog(
  223. IN LPCWSTR lpszQuoLogPath,
  224. IN DWORD dwNewQuorumResourceCharacteristics)
  225. {
  226. WCHAR szTmpQuoTombStone[MAX_PATH];
  227. DWORD dwError = ERROR_SUCCESS;
  228. ClRtlLogPrint(LOG_NOISE,
  229. "[DM] DmSwitchQuorumLogs - Entry\r\n");
  230. //unhook notifications with the old quorum resource
  231. DmpUnhookQuorumNotify();
  232. //ask the dm to register with the new quorum resource
  233. DmpHookQuorumNotify();
  234. //if the new log file exists... this is the owner of the new quorum resource.
  235. //the new log file may be the same as the old one
  236. if (ghNewQuoLog)
  237. {
  238. if (ghQuoLog && (ghQuoLog != ghNewQuoLog))
  239. {
  240. LogClose(ghQuoLog);
  241. //take another checkpoint to the new quorum file,
  242. //so that the last few updates make into it
  243. if ((dwError = LogCheckPoint(ghNewQuoLog, TRUE, NULL, 0))
  244. != ERROR_SUCCESS)
  245. {
  246. ClRtlLogPrint(LOG_CRITICAL,
  247. "[DM] DmSwitchQuorumLogs - Failed to take a checkpoint\r\n");
  248. CL_UNEXPECTED_ERROR(dwError);
  249. }
  250. ClRtlLogPrint(LOG_NOISE,
  251. "[DM] DmSwitchQuorumLogs - taken checkpoint\r\n");
  252. ghQuoLog = NULL;
  253. }
  254. ghQuoLog = ghNewQuoLog;
  255. ghNewQuoLog = NULL;
  256. // if the old tombstome was replace by a tmp file at the beginning
  257. //of change quorum resource delete it now
  258. //get the tmp file for the new quorum resource
  259. lstrcpyW(szTmpQuoTombStone, lpszQuoLogPath);
  260. lstrcatW(szTmpQuoTombStone, cszTmpQuoTombStoneFile);
  261. QfsDeleteFile(szTmpQuoTombStone);
  262. }
  263. else
  264. {
  265. //if the old log file is open, owner of the old quorum resource
  266. if (ghQuoLog)
  267. {
  268. LogClose(ghQuoLog);
  269. ghQuoLog = NULL;
  270. }
  271. }
  272. if (FmDoesQuorumAllowLogging(dwNewQuorumResourceCharacteristics) != ERROR_SUCCESS)
  273. {
  274. //this is not enough to ensure the dm logging will cease
  275. //the ghQuoLog parameter must be NULL
  276. CsNoQuorumLogging = TRUE;
  277. if (ghQuoLog)
  278. {
  279. LogClose(ghQuoLog);
  280. ghQuoLog = NULL;
  281. }
  282. } else if ( !CsUserTurnedOffQuorumLogging )
  283. {
  284. //
  285. // If the user did not turn off quorum logging explicitly, then turn it back on since
  286. // the new quorum resource is not local quorum.
  287. //
  288. CsNoQuorumLogging = FALSE;
  289. }
  290. ClRtlLogPrint(LOG_NOISE,
  291. "[DM] DmSwitchQuorumLogs - Exit!\r\n");
  292. return;
  293. }
  294. /****
  295. @func DWORD | DmReinstallTombStone| If the change to a new quorum
  296. resource fails, the new log is closed and the tombstone is
  297. reinstalled.
  298. @parm IN LPCWSTR | lpszQuoLogPath | The path for maintenance cluster files.
  299. @comm The old quorum log file is deleted and a tomstone file is created in its
  300. place. If this tombstone file is detected in the quorum path, the node
  301. is not allowed to do a form. It must do a join to find about the new
  302. quorum resource from the node that knows about the most recent quorum
  303. resource.
  304. @rdesc Returns a result code. ERROR_SUCCESS on success.
  305. @xref <f DmSwitchToNewQuorumLog>
  306. ****/
  307. DWORD DmReinstallTombStone(
  308. IN LPCWSTR lpszQuoLogPath
  309. )
  310. {
  311. DWORD dwError=ERROR_SUCCESS;
  312. WCHAR szQuoTombStone[MAX_PATH];
  313. WCHAR szTmpQuoTombStone[MAX_PATH];
  314. ClRtlLogPrint(LOG_NOISE,
  315. "[DM] DmReinstallTombStone - Entry\r\n");
  316. if (ghNewQuoLog)
  317. {
  318. //get the tmp file for the new quorum resource
  319. lstrcpyW(szTmpQuoTombStone, lpszQuoLogPath);
  320. lstrcatW(szTmpQuoTombStone, cszTmpQuoTombStoneFile);
  321. //create the tombstone file or replace the previous one with a new one
  322. lstrcpyW(szQuoTombStone, lpszQuoLogPath);
  323. lstrcatW(szQuoTombStone, cszQuoTombStoneFile);
  324. //restore the tombstone
  325. if (!QfsMoveFileEx(szTmpQuoTombStone, szQuoTombStone,
  326. MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
  327. {
  328. //this may fail if the tombstone doesnt exist, ignore error
  329. ClRtlLogPrint(LOG_UNUSUAL,
  330. "[DM] DmReinstallTombStone :Warning-MoveFileExW failed, error=0x%1!08lx!\r\n",
  331. GetLastError());
  332. }
  333. // if this is not the same as the old log file, close it
  334. if (ghNewQuoLog != ghQuoLog)
  335. {
  336. LogClose(ghNewQuoLog);
  337. }
  338. ghNewQuoLog = NULL;
  339. }
  340. return(dwError);
  341. }
  342. /****
  343. @func DWORD | DmCompleteQuorumResChange| This is called on the quorum resource
  344. if the old quorum log file is not the same as the new one.
  345. @parm IN PVOID | pOldQuoRes | The new quorum resource.
  346. @parm IN LPCWSTR | lpszPath | The path for temporary cluster files.
  347. @parm IN DWORD | dwMaxQuoLogSize | The maximum size limit for the quorum log file.
  348. @comm The old quorum log file is deleted and a tomstone file is created in its
  349. place. If this tombstone file is detected in the quorum path, the node
  350. is not allowed to do a form. It must do a join to find about the new
  351. quorum resource from the node that knows about the most recent quorum
  352. resource.
  353. @rdesc Returns a result code. ERROR_SUCCESS on success.
  354. @xref <f DmSwitchToNewQuorumLog>
  355. ****/
  356. DWORD DmCompleteQuorumResChange(
  357. IN LPCWSTR lpszOldQuoResId,
  358. IN LPCWSTR lpszOldQuoLogPath
  359. )
  360. {
  361. DWORD dwError=ERROR_SUCCESS;
  362. WCHAR szOldQuoFileName[MAX_PATH];
  363. QfsHANDLE hTombStoneFile;
  364. WCHAR szQuorumTombStone[MAX_PATH];
  365. PQUO_TOMBSTONE pTombStone = NULL;
  366. DWORD dwBytesWritten;
  367. WIN32_FIND_DATA FindData;
  368. QfsHANDLE hSrchTmpFiles;
  369. ClRtlLogPrint(LOG_NOISE,
  370. "[DM] DmCompleteQuorumResChange - Entry\r\n");
  371. //the old log file name
  372. lstrcpyW(szOldQuoFileName, lpszOldQuoLogPath);
  373. lstrcatW(szOldQuoFileName, cszQuoFileName);
  374. //create the tombstone file or replace the previous one with a new one
  375. lstrcpyW(szQuorumTombStone, lpszOldQuoLogPath);
  376. lstrcatW(szQuorumTombStone, cszQuoTombStoneFile);
  377. pTombStone = LocalAlloc(LMEM_FIXED, sizeof(QUO_TOMBSTONE));
  378. if (!pTombStone)
  379. {
  380. CL_LOGFAILURE(ERROR_NOT_ENOUGH_MEMORY);
  381. CsLogEvent(LOG_UNUSUAL, DM_TOMBSTONECREATE_FAILED);
  382. goto DelOldFiles;
  383. }
  384. hTombStoneFile = QfsCreateFile(szQuorumTombStone,
  385. GENERIC_READ | GENERIC_WRITE,
  386. FILE_SHARE_READ|FILE_SHARE_WRITE,
  387. NULL,
  388. CREATE_ALWAYS,
  389. 0,
  390. NULL);
  391. if (!QfsIsHandleValid(hTombStoneFile) )
  392. {
  393. //dont return failure
  394. CL_LOGFAILURE(dwError);
  395. CsLogEvent(LOG_UNUSUAL, DM_TOMBSTONECREATE_FAILED);
  396. goto DelOldFiles;
  397. }
  398. //write the old quorum path to it.
  399. lstrcpyn(pTombStone->szOldQuoResId, lpszOldQuoResId, MAXSIZE_RESOURCEID);
  400. lstrcpy(pTombStone->szOldQuoLogPath, lpszOldQuoLogPath);
  401. //write the tombstones
  402. if (! QfsWriteFile(hTombStoneFile, pTombStone, sizeof(QUO_TOMBSTONE),
  403. &dwBytesWritten, NULL))
  404. {
  405. CL_LOGFAILURE(GetLastError());
  406. CsLogEvent(LOG_UNUSUAL, DM_TOMBSTONECREATE_FAILED);
  407. goto DelOldFiles;
  408. }
  409. CL_ASSERT(dwBytesWritten == sizeof(QUO_TOMBSTONE));
  410. ClRtlLogPrint(LOG_NOISE,
  411. "[DM] DmCompleteQuorumResChange: tombstones written\r\n");
  412. DelOldFiles:
  413. //
  414. //delete the old quorum files
  415. //
  416. if (!QfsDeleteFile(szOldQuoFileName))
  417. CL_LOGFAILURE(GetLastError());
  418. //delele other tmp files in there
  419. lstrcpyW(szOldQuoFileName, lpszOldQuoLogPath);
  420. lstrcatW(szOldQuoFileName, L"*.tmp");
  421. hSrchTmpFiles = QfsFindFirstFile(szOldQuoFileName, & FindData);
  422. if (QfsIsHandleValid(hSrchTmpFiles))
  423. {
  424. lstrcpyW(szQuorumTombStone, lpszOldQuoLogPath);
  425. lstrcatW(szQuorumTombStone, FindData.cFileName);
  426. QfsDeleteFile(szQuorumTombStone);
  427. while (QfsFindNextFile( hSrchTmpFiles, & FindData))
  428. {
  429. lstrcpyW(szQuorumTombStone, lpszOldQuoLogPath);
  430. lstrcatW(szQuorumTombStone, FindData.cFileName);
  431. QfsDeleteFile(szQuorumTombStone);
  432. }
  433. QfsFindClose(hSrchTmpFiles);
  434. }
  435. //
  436. // Clean up the old registry checkpoint files
  437. //
  438. CpCompleteQuorumChange(lpszOldQuoLogPath);
  439. QfsCloseHandleIfValid(hTombStoneFile);
  440. if (pTombStone) LocalFree(pTombStone);
  441. return(dwError);
  442. }
  443. /****
  444. @func DWORD | DmWriteToQuorumLog| When a transaction to the cluster database
  445. is completed successfully, this function is invoked.
  446. @parm DWORD | dwSequence | The sequnece number of the transaction.
  447. @parm PVOID | pData | A pointer to a record data.
  448. @parm DWORD | dwSize | The size of the record data in bytes.
  449. @rdesc Returns a result code. ERROR_SUCCESS on success.
  450. @xref
  451. ****/
  452. DWORD WINAPI DmWriteToQuorumLog(
  453. IN DWORD dwGumDispatch,
  454. IN DWORD dwSequence,
  455. IN DWORD dwType,
  456. IN PVOID pData,
  457. IN DWORD dwSize)
  458. {
  459. DWORD dwError=ERROR_SUCCESS;
  460. //dmupdate is coming before the DmUpdateJoinCluster is called.
  461. //at this point we are not the owner of quorum in any case
  462. if (!gpQuoResource)
  463. goto FnExit;
  464. ClRtlLogPrint(LOG_NOISE,
  465. "[DM] DmWriteToQuorumLog Entry Seq#=%1!u! Type=%2!u! Size=%3!u!\r\n",
  466. dwSequence, dwType, dwSize);
  467. //
  468. // Chittur Subbaraman (chitturs) - 6/3/99
  469. //
  470. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  471. // so as to maintain the ordering between this lock and the log lock.
  472. //
  473. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  474. //if I am the owner of the quorum logs, just write the record
  475. if (gbIsQuoLoggingOn && ghQuoLog && gbIsQuoResOnline && AMIOWNEROFQUORES(gpQuoResource))
  476. {
  477. if (dwGumDispatch == PRE_GUM_DISPATCH)
  478. {
  479. //make sure the logger has enough space to commit this else
  480. //refuse this GUM transaction
  481. dwError = LogCommitSize(ghQuoLog, RMRegistryMgr, dwSize);
  482. if (dwError != ERROR_SUCCESS)
  483. {
  484. if (dwError == ERROR_CLUSTERLOG_NOT_ENOUGH_SPACE)
  485. {
  486. //map error
  487. CL_LOGCLUSERROR(LM_DISKSPACE_LOW_WATERMARK);
  488. gbIsQuoResEnoughSpace = FALSE;
  489. }
  490. }
  491. else
  492. {
  493. if (!gbIsQuoResEnoughSpace) gbIsQuoResEnoughSpace = TRUE;
  494. }
  495. }
  496. else if (dwGumDispatch == POST_GUM_DISPATCH)
  497. {
  498. if (LogWrite(ghQuoLog, dwSequence, TTCompleteXsaction, RMRegistryMgr,
  499. dwType, pData, dwSize) == NULL_LSN)
  500. {
  501. dwError = GetLastError();
  502. ClRtlLogPrint(LOG_UNUSUAL,
  503. "[DM] DmWriteToQuorumLog failed, error=0x%1!08lx!\r\n",
  504. dwError);
  505. }
  506. }
  507. }
  508. RELEASE_LOCK(gLockDmpRoot);
  509. FnExit:
  510. return (dwError);
  511. }
  512. /****
  513. @func DWORD | DmpChkQuoTombStone| This checks the quorum logs to ensure
  514. that it is the most recent one before rolling in the changes.
  515. @rdesc Returns a result code. ERROR_SUCCESS on success.
  516. @comm This looks for the tombstone file and if one exists. It checks if this
  517. quorum file is marked as dead in there.
  518. @xref <f FmSetQuorumResource>
  519. ****/
  520. DWORD DmpChkQuoTombStone()
  521. {
  522. DWORD dwError=ERROR_SUCCESS;
  523. WCHAR szQuorumLogPath[MAX_PATH];
  524. WCHAR szQuorumTombStone[MAX_PATH];
  525. QfsHANDLE hTombStoneFile = QfsINVALID_HANDLE_VALUE;
  526. PQUO_TOMBSTONE pTombStone = NULL;
  527. DWORD dwBytesRead;
  528. ClRtlLogPrint(LOG_NOISE,
  529. "[DM] DmpChkQuoTombStone - Entry\r\n");
  530. dwError = DmGetQuorumLogPath(szQuorumLogPath, sizeof(szQuorumLogPath));
  531. if (dwError)
  532. {
  533. ClRtlLogPrint(LOG_UNUSUAL,
  534. "[DM] DmpChkQuoTombStone - DmGetQuorumLogPath failed,error=0x%1!08lx!\n",
  535. dwError);
  536. goto FnExit;
  537. }
  538. lstrcpyW(szQuorumTombStone, szQuorumLogPath);
  539. lstrcatW(szQuorumTombStone, L"\\quotomb.stn");
  540. pTombStone = LocalAlloc(LMEM_FIXED, sizeof(QUO_TOMBSTONE));
  541. if (!pTombStone)
  542. {
  543. dwError = ERROR_NOT_ENOUGH_MEMORY;
  544. goto FnExit;
  545. }
  546. hTombStoneFile = QfsCreateFile(szQuorumTombStone,
  547. GENERIC_READ | GENERIC_WRITE,
  548. FILE_SHARE_READ|FILE_SHARE_WRITE,
  549. NULL,
  550. OPEN_EXISTING,
  551. 0,
  552. NULL);
  553. if (!QfsIsHandleValid(hTombStoneFile) )
  554. {
  555. //there is no tombstone file, not a problem-we can proceed with the form
  556. goto FnExit;
  557. }
  558. //found a tombstone file
  559. //read the file
  560. if (! QfsReadFile(hTombStoneFile, pTombStone, sizeof(QUO_TOMBSTONE),
  561. &dwBytesRead, NULL))
  562. {
  563. ClRtlLogPrint(LOG_UNUSUAL,
  564. "[DM] DmpChkQuoTombStone - Couldn't read the tombstone,error=0x%1!08lx!\n",
  565. dwError);
  566. //dont return an error, we can proceed with form??
  567. goto FnExit;
  568. }
  569. if (dwBytesRead != sizeof(QUO_TOMBSTONE))
  570. {
  571. ClRtlLogPrint(LOG_UNUSUAL,
  572. "[DM] DmpChkQuoTombStone - Couldn't read the entire tombstone\r\n");
  573. //dont return an error, we can proceed with form??
  574. goto FnExit;
  575. }
  576. if ((!lstrcmpW(OmObjectId(gpQuoResource), pTombStone->szOldQuoResId))
  577. && (!lstrcmpiW(szQuorumLogPath, pTombStone->szOldQuoLogPath)))
  578. {
  579. ClRtlLogPrint(LOG_UNUSUAL,
  580. "[DM] DmpChkQuoTombStone:A tombstone for this resource, and quorum log file was found here.\r\n");
  581. ClRtlLogPrint(LOG_UNUSUAL,
  582. "[DM] DmpChkQuoTombStone:This is node is only allowed to do a join, make sure another node forms\r\n");
  583. //log something into the eventlog
  584. CL_LOGCLUSERROR(SERVICE_MUST_JOIN);
  585. //we exit with succes because this is by design and we dont want
  586. //clusprxy to retry starting unnecessarily
  587. ExitProcess(dwError);
  588. goto FnExit;
  589. }
  590. else
  591. {
  592. ClRtlLogPrint(LOG_UNUSUAL,
  593. "[DM] DmpChkQuoTombStone: Bogus TombStone ??\r\n");
  594. #if DBG
  595. if (IsDebuggerPresent())
  596. DebugBreak();
  597. #endif
  598. goto FnExit;
  599. }
  600. FnExit:
  601. QfsCloseHandleIfValid(hTombStoneFile);
  602. if (pTombStone) LocalFree(pTombStone);
  603. ClRtlLogPrint(LOG_NOISE,
  604. "[DM] DmpChkQuoTombStone: Exit, returning 0x%1!08lx!\r\n",
  605. dwError);
  606. return(dwError);
  607. }
  608. /****
  609. @func DWORD | DmpApplyChanges| When dm is notified that the cluster form is
  610. occuring, it calls DmpApplyChanges to apply the quorum logs to the
  611. cluster database.
  612. @rdesc Returns a result code. ERROR_SUCCESS on success.
  613. @comm This opens the quorum file. Note that it doesnt close the quorum file.
  614. @xref
  615. ****/
  616. DWORD DmpApplyChanges()
  617. {
  618. LSN FirstLsn;
  619. DWORD dwErr = ERROR_SUCCESS;
  620. DWORD dwSequence;
  621. DM_LOGSCAN_CONTEXT DmAppliedChangeContext;
  622. if (ghQuoLog == NULL)
  623. {
  624. return(ERROR_QUORUMLOG_OPEN_FAILED);
  625. }
  626. //find the current sequence number from the registry
  627. dwSequence = DmpGetRegistrySequence();
  628. ClRtlLogPrint(LOG_NOISE,
  629. "[DM] DmpApplyChanges: The current registry sequence number %1!d!\r\n",
  630. dwSequence);
  631. // upload a database if the current sequence number is lower or equal to
  632. // the one in the database OR if the user is forcing a restore database
  633. // operation.
  634. // find the lsn of the record from which we need to start applying changes
  635. // if null there are no changes to apply
  636. dwErr = DmpLogFindStartLsn(ghQuoLog, &FirstLsn, &dwSequence);
  637. if (dwErr != ERROR_SUCCESS)
  638. {
  639. ClRtlLogPrint(LOG_NOISE,
  640. "[DM] DmpApplyChanges: DmpLogFindStartLsn failed, error=0x%1!08lx!\r\n",
  641. dwErr);
  642. goto FnExit;
  643. }
  644. //dwSequence now contains the current sequence number in the registry
  645. DmAppliedChangeContext.dwSequence = dwSequence;
  646. if (FirstLsn != NULL_LSN)
  647. {
  648. ClRtlLogPrint(LOG_NOISE,
  649. "[DM] DmpApplyChanges: The LSN of the record to apply changes from 0x%1!08lx!\r\n",
  650. FirstLsn);
  651. if (dwErr = LogScan(ghQuoLog, FirstLsn, TRUE,(PLOG_SCAN_CALLBACK)DmpLogApplyChangesCb,
  652. &DmAppliedChangeContext) != ERROR_SUCCESS)
  653. {
  654. ClRtlLogPrint(LOG_UNUSUAL,
  655. "[DM] DmpApplyChanges: LogScan failed, error=0x%1!08lx!\r\n",
  656. dwErr);
  657. }
  658. //if the more changes have been applied
  659. if (DmAppliedChangeContext.dwSequence != dwSequence)
  660. {
  661. //set the gum sequence number to the trid that has been applied
  662. GumSetCurrentSequence(GumUpdateRegistry, DmAppliedChangeContext.dwSequence);
  663. //update the registry with this sequence number
  664. DmpUpdateSequence();
  665. //set the gum sequence number to one higher for the next transaction
  666. GumSetCurrentSequence(GumUpdateRegistry,
  667. (DmAppliedChangeContext.dwSequence + 1));
  668. ClRtlLogPrint(LOG_NOISE,
  669. "[DM] DmpApplyChanges: Gum sequnce number set to = %1!d!\r\n",
  670. (DmAppliedChangeContext.dwSequence + 1));
  671. }
  672. }
  673. FnExit:
  674. ClRtlLogPrint(LOG_NOISE,
  675. "[DM] DmpApplyChanges: Exit, returning 0x%1!08lx!\r\n",
  676. dwErr);
  677. return(dwErr);
  678. }
  679. /****
  680. @func DWORD | DmpFindStartLsn| Uploads the last checkpoint from the
  681. quorum and returns the LSN of the record from which the changes
  682. should be applied.
  683. @parm IN HLOG | hQuoLog | the log file handle.
  684. @parm OUT LSN *| pStartScanLsn | Returns the LSN of the record in the
  685. quorum log from which changes must be applied is returned here.
  686. NULL_LSN is returned if no changes need to be applied.
  687. @parm IN OUT LPDWORD | *pdwSequence | Should be set to the current sequence
  688. number is the cluster registry. If a new chkpoint is uploaded, the
  689. sequence number corresponding to that is returned.
  690. @rdesc Returns ERROR_SUCCESS if a valid LSN is returned. This may be NULL_LSN.
  691. Returns the error code if the database cannot be uploaded from the last chkpoint
  692. or if something horrible happens.
  693. @comm This finds the last valid check point in the log file. The data
  694. base is synced with this checkpoint and the gum sequence number is
  695. set to one plus the sequence number of that checkpoint. If no
  696. checkpoint record is found, a checkpoint is taken and NULL_LSN is
  697. returned.
  698. @xref
  699. ****/
  700. DWORD DmpLogFindStartLsn(
  701. IN HLOG hQuoLog,
  702. OUT LSN *pStartScanLsn,
  703. IN OUT LPDWORD pdwSequence)
  704. {
  705. LSN ChkPtLsn;
  706. LSN StartScanLsn;
  707. DWORD dwChkPtSequence=0;
  708. DWORD dwError = ERROR_SUCCESS;
  709. WCHAR szChkPtFileName[LOG_MAX_FILENAME_LENGTH];
  710. DM_LOGSCAN_CONTEXT DmAppliedChangeContext;
  711. *pStartScanLsn = NULL_LSN;
  712. ChkPtLsn = NULL_LSN;
  713. //read the last check point record if any and the transaction id till that
  714. //checkpoint
  715. dwError = LogGetLastChkPoint(hQuoLog, szChkPtFileName, &dwChkPtSequence,
  716. &ChkPtLsn);
  717. if (dwError != ERROR_SUCCESS)
  718. {
  719. //no chk point record found
  720. ClRtlLogPrint(LOG_UNUSUAL,
  721. "[DM] DmpLogFindStartLsn: LogGetLastChkPoint failed, error=0x%1!08lx!\r\n",
  722. dwError );
  723. // this can happen either due to the fact that the log file was just created,
  724. // and hence there is no checkpoint or because log file was messed up
  725. // and the mount process corrected it but removed the checkpoint.
  726. // If it is the second case, then logpmountlog should put something in the
  727. // event log
  728. if (dwError == ERROR_CLUSTERLOG_CHKPOINT_NOT_FOUND)
  729. {
  730. //
  731. // Chittur Subbaraman (chitturs) - 6/3/99
  732. //
  733. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  734. // so as to maintain the ordering between this lock and the log lock.
  735. //
  736. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  737. //take a checkpoint, so that this doesnt happen the next time
  738. dwError = LogCheckPoint(hQuoLog, TRUE, NULL, 0);
  739. RELEASE_LOCK(gLockDmpRoot);
  740. if (dwError != ERROR_SUCCESS)
  741. {
  742. //check point could not be taken
  743. ClRtlLogPrint(LOG_UNUSUAL,
  744. "[DM] DmpLogFindStartLsn: Checkpoint on first form failed, error=0x%1!08lx!\r\n",
  745. dwError );
  746. goto FnExit;
  747. }
  748. }
  749. else
  750. {
  751. //there were other errors
  752. goto FnExit;
  753. }
  754. }
  755. else
  756. {
  757. //found check point record
  758. ClRtlLogPrint(LOG_NOISE,
  759. "[DM] DmpLogFindStartLsn: LogGetLastChkPt rets, Seq#=%1!d! ChkPtLsn=0x%2!08lx!\r\n",
  760. dwChkPtSequence, ChkPtLsn);
  761. //
  762. // Chittur Subbaraman (chitturs) - 10/18/98
  763. //
  764. // If the user is forcing a database restore from backup, then
  765. // do not check whether the current sequence number in the registry
  766. // is younger than the checkpoint sequence number in the quorum log.
  767. // Just, go ahead and load the checkpoint from restored database.
  768. //
  769. if ( CsDatabaseRestore == TRUE )
  770. {
  771. ClRtlLogPrint(LOG_NOISE,
  772. "[DM] DmpLogFindStartLsn: User forcing a chkpt upload from quorum log...\r\n");
  773. }
  774. else
  775. {
  776. //if the sequence number is greater than the check point sequence number
  777. //plus one, that implies..that only changes from that sequence number
  778. //need to be applied.(this node may not have been the first one to die)
  779. //We dont always apply the database because if logging is mostly off
  780. //and the two nodes die simultaneosly we want to prevent losing all the
  781. //changes
  782. //else if the checkpoint sequence is one below the current
  783. //current sequence number, then the locker node could have died after updating
  784. //get the current checkpoint irrespective of what the current sequence number is
  785. //this is because a checkpoint with the same sequence number may have
  786. //a change that is different from whats there in the current registry.
  787. //if node 'a'(locker and logger dies in the middle of logging trid=x+1,
  788. //the other node,'b' will take over logging and checkpoint the database
  789. //at trid=x. If 'a' comes back up, it needs to throw aways its x+1 change
  790. //and apply changes from the log from chk pt x.
  791. if (*pdwSequence > (dwChkPtSequence + 1))
  792. {
  793. //the current sequence number is less than or equal to chkpt Seq + 1
  794. ClRtlLogPrint(LOG_NOISE,
  795. "[DM] DmpLogFindStartLsn: ChkPt not applied, search for next seq\r\n");
  796. DmAppliedChangeContext.dwSequence = *pdwSequence;
  797. DmAppliedChangeContext.StartLsn = NULL_LSN;
  798. //find the LSN from which to apply changes
  799. if (dwError = LogScan(ghQuoLog, ChkPtLsn, TRUE,(PLOG_SCAN_CALLBACK)DmpLogFindStartLsnCb,
  800. &DmAppliedChangeContext) != ERROR_SUCCESS)
  801. {
  802. ClRtlLogPrint(LOG_UNUSUAL,
  803. "[DM] DmpLogFindStartLsn: LogScan failed, no changes will be applied, error=0x%1!08lx!\r\n",
  804. dwError);
  805. goto FnExit;
  806. }
  807. *pStartScanLsn = DmAppliedChangeContext.StartLsn;
  808. goto FnExit;
  809. }
  810. }
  811. //
  812. // The current registry sequence number is less than or equal
  813. // to chkpt Seq + 1 OR the user is forcing a database restore
  814. // from the backup area.
  815. //
  816. ClRtlLogPrint(LOG_NOISE,
  817. "[DM] DmpLogFindStartLsn: Uploading chkpt from quorum log\r\n");
  818. //make sure that no keys are added to the key list because of opens/creates
  819. ACQUIRE_EXCLUSIVE_LOCK(gLockDmpRoot);
  820. //hold the key lock as well
  821. EnterCriticalSection(&KeyLock);
  822. //invalidate all open keys
  823. DmpInvalidateKeys();
  824. if ((dwError = DmInstallDatabase(szChkPtFileName, NULL, FALSE)) != ERROR_SUCCESS)
  825. {
  826. //couldnt install the database
  827. //bad !
  828. ClRtlLogPrint(LOG_UNUSUAL,
  829. "[DM] DmpLogFindStartLsn: DmpInstallDatabase failed, error=0x%1!08lx!\r\n",
  830. dwError);
  831. CsLogEventData( LOG_CRITICAL,
  832. DM_CHKPOINT_UPLOADFAILED,
  833. sizeof(dwError),
  834. &dwError );
  835. DmpReopenKeys();
  836. //release the locks
  837. LeaveCriticalSection(&KeyLock);
  838. RELEASE_LOCK(gLockDmpRoot);
  839. goto FnExit;
  840. }
  841. else
  842. {
  843. //the current sequence number is less than or equal to chkpt Seq + 1
  844. ClRtlLogPrint(LOG_NOISE,
  845. "[DM] DmpLogFindStartLsn: chkpt uploaded from quorum log\r\n");
  846. //since we downloaded the database, we should start
  847. //aplying changes from ChkPtLsn
  848. *pStartScanLsn = ChkPtLsn;
  849. *pdwSequence = dwChkPtSequence;
  850. //set the gum sequence number to be the next one
  851. //ss: the next logged transaction shouldnt have the same
  852. //transaction id
  853. GumSetCurrentSequence(GumUpdateRegistry, (dwChkPtSequence+1));
  854. //reopen the keys
  855. DmpReopenKeys();
  856. //release the locks
  857. LeaveCriticalSection(&KeyLock);
  858. RELEASE_LOCK(gLockDmpRoot);
  859. goto FnExit;
  860. }
  861. }
  862. FnExit:
  863. ClRtlLogPrint(LOG_NOISE,
  864. "[DM] DmpLogFindStartLsn: LSN=0x%1!08lx!, returning 0x%2!08lx!\r\n",
  865. *pStartScanLsn, dwError);
  866. return(dwError);
  867. }
  868. /****
  869. @func DWORD | DmpLogFindStartLsnCb| The callback tries to find the first record
  870. with a transaction id that is larger than the sequence number of the
  871. local database.
  872. @parm PVOID | pContext| A pointer to a DM_STARTLSN_CONTEXT structure.
  873. @parm LSN | Lsn| The LSN of the record.
  874. @parm RMID | Resource | The resource manager for this transaction.
  875. @parm RMID | ResourceType | The resource manager for this transaction.
  876. @parm TRID | Transaction | The transaction number of this record.
  877. @parm PVOID | pLogData | The log data for this record.
  878. @parm DWORD | DataLength | The length of the record.
  879. @rdesc Returns TRUE to continue scan. FALSE to stop.
  880. @comm This function returns true if the sequence number of the record
  881. being scanned is higher than the seqence number passed in the context.
  882. @xref <f DmpLogFindStartLsn> <f LogScan>
  883. ****/
  884. BOOL WINAPI DmpLogFindStartLsnCb(
  885. IN PVOID pContext,
  886. IN LSN Lsn,
  887. IN RMID Resource,
  888. IN RMTYPE ResourceFlags,
  889. IN TRID Transaction,
  890. IN TRTYPE TrType,
  891. IN const PVOID pLogData,
  892. IN DWORD DataLength)
  893. {
  894. PDM_LOGSCAN_CONTEXT pDmStartLsnContext= (PDM_LOGSCAN_CONTEXT) pContext;
  895. CL_ASSERT(pDmStartLsnContext);
  896. if (Transaction > (int)pDmStartLsnContext->dwSequence)
  897. {
  898. pDmStartLsnContext->StartLsn = Lsn;
  899. return (FALSE);
  900. }
  901. return(TRUE);
  902. }
  903. /****
  904. @func DWORD | DmpHookQuorumNotify| This hooks a callback to be invoked whenever
  905. the state of the quorum resource changes.
  906. @rdesc Returns a result code. ERROR_SUCCESS on success.
  907. @comm This is used to monitor the state of
  908. @xref
  909. ****/
  910. DWORD DmpHookQuorumNotify()
  911. {
  912. DWORD dwError = ERROR_SUCCESS;
  913. if (dwError = FmFindQuorumResource(&gpQuoResource))
  914. {
  915. ClRtlLogPrint(LOG_UNUSUAL,
  916. "[DM] DmUpdateFormNewCluster: FmFindQuorumResource failed, error=0x%1!08lx!\r\n",
  917. dwError);
  918. goto FnExit;
  919. }
  920. dwError = OmRegisterNotify(gpQuoResource, NULL,
  921. NOTIFY_RESOURCE_POSTONLINE| NOTIFY_RESOURCE_PREOFFLINE |
  922. NOTIFY_RESOURCE_OFFLINEPENDING | NOTIFY_RESOURCE_POSTOFFLINE |
  923. NOTIFY_RESOURCE_FAILED,
  924. DmpQuoObjNotifyCb);
  925. FnExit:
  926. return(dwError);
  927. }
  928. /****
  929. @func DWORD | DmpUnhookQuorumNotify| This unhooks the callback function
  930. that is registered with the object.
  931. @parm PVOID | pContext| A pointer to a DMLOGRECORD structure.
  932. @parm PVOID | pObject| A pointer to quorum resource object.
  933. @parm DWORD | dwNotification| A pointer to a DMLOGRECORD structure.
  934. @rdesc Returns a result code. ERROR_SUCCESS on success.
  935. @xref
  936. ****/
  937. DWORD DmpUnhookQuorumNotify()
  938. {
  939. DWORD dwError = ERROR_SUCCESS;
  940. if (gpQuoResource)
  941. {
  942. dwError = OmDeregisterNotify(gpQuoResource, DmpQuoObjNotifyCb);
  943. OmDereferenceObject(gpQuoResource);
  944. }
  945. return(ERROR_SUCCESS);
  946. }
  947. /****
  948. @func DWORD | DmpQuoObjNotifyCb| This is a callback that is called on
  949. change of state on quorum resource.
  950. @parm PVOID | pContext| A pointer to a DMLOGRECORD structure.
  951. @parm PVOID | pObject| A pointer to quorum resource object.
  952. @parm DWORD | dwNotification| A pointer to a DMLOGRECORD structure.
  953. @rdesc Returns a result code. ERROR_SUCCESS on success.
  954. @xref
  955. ****/
  956. void DmpQuoObjNotifyCb(
  957. IN PVOID pContext,
  958. IN PVOID pObject,
  959. IN DWORD dwNotification)
  960. {
  961. switch(dwNotification)
  962. {
  963. case NOTIFY_RESOURCE_POSTONLINE:
  964. gbIsQuoResOnline = TRUE;
  965. ClRtlLogPrint(LOG_NOISE,
  966. "[DM] DmpQuoObjNotifyCb: Quorum resource is online\r\n");
  967. //if this is the owner of the quorum resource
  968. //and the log is not open, open the log
  969. if (AMIOWNEROFQUORES(gpQuoResource) && !CsNoQuorumLogging)
  970. {
  971. //ToDo: the quorum file name should be obtained from the setup
  972. //for now obtain the value from the cluster registry.
  973. WCHAR szQuorumFileName[MAX_PATH];
  974. LSN FirstLsn;
  975. DWORD dwError;
  976. DWORD dwType;
  977. DWORD dwLength;
  978. DWORD dwMaxQuoLogSize;
  979. DWORD bForceReset = FALSE;
  980. ULONG OldHardErrorValue;
  981. //bug# :106647
  982. //SS: HACKHACK disabling hard error pop ups so that disk corruption
  983. //is caught somewhere else..
  984. //atleast the pop-ups must be disabled for the whole process !
  985. //me thinks this is covering up the problem of disk corruption
  986. //disk corruption should not occur!
  987. RtlSetThreadErrorMode(RTL_ERRORMODE_FAILCRITICALERRORS,
  988. &OldHardErrorValue);
  989. ClRtlLogPrint(LOG_NOISE,
  990. "[DM] DmpQuoObjNotifyCb: Own quorum resource, try open the quorum log\r\n");
  991. if (DmGetQuorumLogPath(szQuorumFileName, sizeof(szQuorumFileName)) != ERROR_SUCCESS)
  992. {
  993. ClRtlLogPrint(LOG_NOISE,
  994. "[DM] DmpQuoObjNotifyCb: Quorum log file is not configured\r\n");
  995. }
  996. else
  997. {
  998. BOOL fSetSecurity = FALSE;
  999. QfsHANDLE hFindFile = QfsINVALID_HANDLE_VALUE;
  1000. WIN32_FIND_DATA FindData;
  1001. hFindFile = QfsFindFirstFile( szQuorumFileName, &FindData );
  1002. if ( !QfsIsHandleValid(hFindFile) )
  1003. {
  1004. dwError = GetLastError();
  1005. ClRtlLogPrint(LOG_NOISE,
  1006. "[DM] DmpQuoObjNotifyCb: FindFirstFile on path %1!ws! failed, Error=%2!d! !!!\n",
  1007. szQuorumFileName,
  1008. dwError);
  1009. if ( dwError == ERROR_PATH_NOT_FOUND )
  1010. {
  1011. fSetSecurity = TRUE;
  1012. }
  1013. } else
  1014. {
  1015. QfsFindClose( hFindFile );
  1016. }
  1017. //if the directory doesnt exist create it
  1018. dwError = QfsClRtlCreateDirectory(szQuorumFileName);
  1019. if (dwError != ERROR_SUCCESS)
  1020. {
  1021. ClRtlLogPrint(LOG_CRITICAL,
  1022. "[DM] DmpQuoObjNotifyCb: Failed to create directory %1!ws!, error=0x%2!08lx!...\n",
  1023. szQuorumFileName,
  1024. dwError);
  1025. CL_UNEXPECTED_ERROR(dwError);
  1026. CsInconsistencyHalt(dwError);
  1027. }
  1028. if ( fSetSecurity == TRUE )
  1029. {
  1030. ClRtlLogPrint(LOG_NOISE,
  1031. "[DM] DmpQuoObjNotifyCb: Attempting to set security on directory %1!ws!...\n",
  1032. szQuorumFileName);
  1033. dwError = QfsSetFileSecurityInfo( szQuorumFileName,
  1034. GENERIC_ALL, // for Admins
  1035. GENERIC_ALL, // for Owner
  1036. 0 ); // for Everyone
  1037. if ( dwError != ERROR_SUCCESS )
  1038. {
  1039. ClRtlLogPrint(LOG_CRITICAL,
  1040. "[DM] DmpQuoObjNotifyCb: ClRtlSetObjSecurityInfo failed for file %1!ws!, Status=%2!u!\r\n",
  1041. szQuorumFileName,
  1042. dwError);
  1043. CL_LOGFAILURE( dwError );
  1044. CsInconsistencyHalt( dwError );
  1045. }
  1046. }
  1047. DmGetQuorumLogMaxSize(&dwMaxQuoLogSize);
  1048. // If the resource monitor dies and comes back up, this can happen
  1049. if (ghQuoLog != NULL)
  1050. {
  1051. HLOG hQuoLog;
  1052. //
  1053. // Make sure the ghQuoLog variable is NULLed out with lock held exclusively BEFORE the log
  1054. // is closed. This will prevent race cases in which another thread reads the ghQuoLog variable and
  1055. // assumes blindly that the log is open.
  1056. //
  1057. ACQUIRE_EXCLUSIVE_LOCK( gLockDmpRoot );
  1058. hQuoLog = ghQuoLog;
  1059. ghQuoLog = NULL;
  1060. RELEASE_LOCK( gLockDmpRoot );
  1061. LogClose( hQuoLog );
  1062. }
  1063. if (gbIsQuoLoggingOn) gbNeedToCheckPoint = TRUE;
  1064. //
  1065. // Chittur Subbaraman (chitturs) - 10/16/98
  1066. //
  1067. // Check whether you need to restore the database from a
  1068. // user-supplied backup directory to the quorum disk. This
  1069. // restore operation is done only once when the Dm has
  1070. // not been fully initialized. Note that this function
  1071. // is called whenever the state of the quorum resource
  1072. // changes but the restore operation is only done once.
  1073. //
  1074. if ( ( gbDmInited == FALSE ) &&
  1075. ( CsDatabaseRestore == TRUE ) )
  1076. {
  1077. ClRtlLogPrint(LOG_NOISE,
  1078. "[DM] DmpQuoObjNotifyCb: Beginning DB restoration from %1!ws!...\r\n",
  1079. CsDatabaseRestorePath);
  1080. if ( ( dwError = DmpRestoreClusterDatabase ( szQuorumFileName ) )
  1081. != ERROR_SUCCESS )
  1082. {
  1083. ClRtlLogPrint(LOG_UNUSUAL,
  1084. "[DM] DmpQuoObjNotifyCb: DB restore operation from %1!ws! failed! Error=0x%2!08lx!\r\n",
  1085. CsDatabaseRestorePath,
  1086. dwError);
  1087. CL_LOGFAILURE( dwError );
  1088. CsDatabaseRestore = FALSE;
  1089. CsInconsistencyHalt( dwError );
  1090. }
  1091. ClRtlLogPrint(LOG_NOISE,
  1092. "[DM] DmpQuoObjNotifyCb: DB restoration from %1!ws! successful...\r\n",
  1093. CsDatabaseRestorePath);
  1094. CL_LOGCLUSINFO( SERVICE_CLUSTER_DATABASE_RESTORE_SUCCESSFUL );
  1095. }
  1096. lstrcat(szQuorumFileName, cszQuoFileName);
  1097. ClRtlLogPrint(LOG_NOISE,
  1098. "[DM] DmpQuoObjNotifyCb: the name of the quorum file is %1!ls!\r\n",
  1099. szQuorumFileName);
  1100. //
  1101. // Chittur Subbaraman (chitturs) - 12/4/99
  1102. //
  1103. // If the quorum log file is found to be missing or corrupt,
  1104. // reset it only under the following conditions, else
  1105. // fail the log creation and halt the node.
  1106. //
  1107. // (1) A freshly formed cluster,
  1108. // (2) The user has chosen to reset the log since the user
  1109. // does not have a backup.
  1110. // (3) After the quorum resource has successfully come
  1111. // online on this node and the DM has been initialized
  1112. // successfully. This is because the sanity of the
  1113. // quorum log file has already been verified at
  1114. // initialization and the chances of the quorum log
  1115. // missing or getting corrputed after that are not
  1116. // so high (due to it being held open by the cluster
  1117. // service) and so it is not worth halting the node
  1118. // during run-time.
  1119. //
  1120. if ((CsFirstRun && !CsUpgrade) ||
  1121. (CsResetQuorumLog) ||
  1122. (gbDmInited == TRUE))
  1123. {
  1124. ClRtlLogPrint(LOG_NOISE,
  1125. "[DM] DmpQuoObjNotifyCb: Will try to reset Quorum log if file not found or if corrupt\r\n");
  1126. bForceReset = TRUE;
  1127. }
  1128. // open the log file
  1129. ghQuoLog = LogCreate(szQuorumFileName, dwMaxQuoLogSize,
  1130. (PLOG_GETCHECKPOINT_CALLBACK)DmpGetSnapShotCb, NULL,
  1131. bForceReset, &FirstLsn);
  1132. if (!ghQuoLog)
  1133. {
  1134. dwError = GetLastError();
  1135. ClRtlLogPrint(LOG_UNUSUAL,
  1136. "[DM] DmpQuoObjNotifyCb: Quorum log could not be opened, error = 0x%1!08lx!\r\n",
  1137. dwError);
  1138. CL_LOGFAILURE(dwError);
  1139. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  1140. }
  1141. else
  1142. {
  1143. ClRtlLogPrint(LOG_NOISE,
  1144. "[DM] DmpQuoObjNotifyCb: Quorum log opened\r\n");
  1145. }
  1146. if (gbNeedToCheckPoint && ghQuoLog)
  1147. {
  1148. //take a checkpoint and set the flag to FALSE.
  1149. gbNeedToCheckPoint = FALSE;
  1150. //get a checkpoint database
  1151. ClRtlLogPrint(LOG_NOISE,
  1152. "[DM] DmpQuoObjNotifyCb - taking a checkpoint\r\n");
  1153. //
  1154. // Chittur Subbaraman (chitturs) - 6/3/99
  1155. //
  1156. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  1157. // so as to maintain the ordering between this lock and the log lock.
  1158. //
  1159. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1160. dwError = LogCheckPoint(ghQuoLog, TRUE, NULL, 0);
  1161. RELEASE_LOCK(gLockDmpRoot);
  1162. if (dwError != ERROR_SUCCESS)
  1163. {
  1164. ClRtlLogPrint(LOG_CRITICAL,
  1165. "[DM] DmpEventHandler - Failed to take a checkpoint in the log file, error = 0x%1!08lx!\r\n",
  1166. dwError);
  1167. CL_UNEXPECTED_ERROR(dwError);
  1168. CsInconsistencyHalt(dwError);
  1169. }
  1170. }
  1171. //if the checkpoint timer doesnt already exist
  1172. //check if the timer has already been created - we might
  1173. // get two post online notifications
  1174. //and dont cause a timer leak
  1175. if (!ghCheckpointTimer)
  1176. {
  1177. ghCheckpointTimer = CreateWaitableTimer(NULL, FALSE, NULL);
  1178. if (!ghCheckpointTimer)
  1179. {
  1180. CL_UNEXPECTED_ERROR(dwError = GetLastError());
  1181. }
  1182. else
  1183. {
  1184. DWORD dwCheckpointInterval;
  1185. dwError = DmpGetCheckpointInterval(&dwCheckpointInterval);
  1186. CL_ASSERT(dwError == ERROR_SUCCESS);
  1187. //add a timer to take periodic checkpoints
  1188. AddTimerActivity(ghCheckpointTimer, dwCheckpointInterval,
  1189. 1, DmpCheckpointTimerCb, &ghQuoLog);
  1190. }
  1191. }
  1192. }
  1193. //SS:completion of hack, revert to enabling pop-ups
  1194. RtlSetThreadErrorMode(OldHardErrorValue, NULL);
  1195. }
  1196. if (ghQuoLogOpenEvent)
  1197. {
  1198. //this is the first notification after the form
  1199. //allow the initialization to continue after rolling
  1200. //back the changes
  1201. SetEvent(ghQuoLogOpenEvent);
  1202. }
  1203. break;
  1204. case NOTIFY_RESOURCE_FAILED:
  1205. case NOTIFY_RESOURCE_PREOFFLINE:
  1206. case NOTIFY_RESOURCE_OFFLINEPENDING:
  1207. ClRtlLogPrint(LOG_NOISE,
  1208. "[DM] DmpQuoObjNotifyCb: Quorum resource offline/offlinepending/preoffline\r\n");
  1209. gbIsQuoResOnline = FALSE;
  1210. if (ghQuoLog)
  1211. {
  1212. HLOG hQuoLog;
  1213. //stop the checkpoint timer
  1214. if (ghCheckpointTimer)
  1215. {
  1216. RemoveTimerActivity(ghCheckpointTimer);
  1217. ghCheckpointTimer = NULL;
  1218. }
  1219. //
  1220. // Make sure the ghQuoLog variable is NULLed out with lock held exclusively BEFORE the log
  1221. // is closed. This will prevent race cases in which another thread reads the ghQuoLog variable and
  1222. // assumes blindly that the log is open.
  1223. //
  1224. ACQUIRE_EXCLUSIVE_LOCK( gLockDmpRoot );
  1225. hQuoLog = ghQuoLog;
  1226. ghQuoLog = NULL;
  1227. RELEASE_LOCK( gLockDmpRoot );
  1228. LogClose( hQuoLog );
  1229. //dont try and log after this
  1230. gbIsQuoLoggingOn = FALSE;
  1231. }
  1232. if (ghQuoLogOpenEvent)
  1233. {
  1234. //this is the first notification after the form
  1235. //allow the initialization to continue after rolling
  1236. //back the changes
  1237. SetEvent(ghQuoLogOpenEvent);
  1238. }
  1239. break;
  1240. }
  1241. }
  1242. /****
  1243. @func DWORD | DmpHookEventHandler| This hooks a callback to be invoked whenever
  1244. the state of the quorum resource changes.
  1245. @rdesc Returns a result code. ERROR_SUCCESS on success.
  1246. @comm This is used to monitor the state of nodes and turn quorum logging on or off.
  1247. @xref
  1248. ****/
  1249. DWORD DmpHookEventHandler()
  1250. {
  1251. DWORD dwError;
  1252. dwError = EpRegisterEventHandler(CLUSTER_EVENT_ALL,DmpEventHandler);
  1253. if (dwError != ERROR_SUCCESS)
  1254. {
  1255. ClRtlLogPrint(LOG_UNUSUAL,
  1256. "[DM] DmHookEventHandler: EpRegisterEventHandler failed, error=0x%1!08lx!\r\n",
  1257. dwError);
  1258. CL_UNEXPECTED_ERROR( dwError );
  1259. }
  1260. return(dwError);
  1261. }
  1262. /****
  1263. @func DWORD | DmpEventHandler| This routine handles events for the Cluster
  1264. Database Manager.
  1265. @parm CLUSTER_EVENT | Event | The event to be processed. Only one event at a time.
  1266. If the event is not handled, return ERROR_SUCCESS.
  1267. @parm PVOID| pContext | A pointer to context associated with the particular event.
  1268. @rdesc Returns ERROR_SUCCESS else a Win32 error code on other errors.
  1269. @comm This is used to monitor the state of nodes and turn quorum logging on or off.
  1270. @xref
  1271. ****/
  1272. DWORD WINAPI DmpEventHandler(
  1273. IN CLUSTER_EVENT Event,
  1274. IN PVOID pContext
  1275. )
  1276. {
  1277. DWORD dwError=ERROR_SUCCESS;
  1278. BOOL bAreAllNodesUp;
  1279. switch ( Event ) {
  1280. case CLUSTER_EVENT_NODE_UP:
  1281. bAreAllNodesUp = TRUE;
  1282. if ((dwError = OmEnumObjects(ObjectTypeNode, DmpNodeObjEnumCb, &bAreAllNodesUp, NULL))
  1283. != ERROR_SUCCESS)
  1284. {
  1285. ClRtlLogPrint(LOG_UNUSUAL,
  1286. "[DM]DmpEventHandler : OmEnumObjects returned, error=0x%1!08lx!\r\n",
  1287. dwError);
  1288. }
  1289. else
  1290. {
  1291. if (bAreAllNodesUp)
  1292. {
  1293. ClRtlLogPrint(LOG_NOISE,
  1294. "[DM] DmpEventHandler - node is up, turning quorum logging off\r\n");
  1295. gbIsQuoLoggingOn = FALSE;
  1296. }
  1297. }
  1298. break;
  1299. case CLUSTER_EVENT_NODE_DOWN:
  1300. if (!gbIsQuoLoggingOn)
  1301. {
  1302. HANDLE hThread = NULL;
  1303. DWORD dwThreadId;
  1304. //
  1305. // Chittur Subbaraman (chitturs) - 7/23/99
  1306. //
  1307. // Create a new thread to handle the checkpointing on a
  1308. // node down. This is necessary since we don't want the
  1309. // DM node down handler to be blocked in any fashion. If
  1310. // it is blocked since FmCheckQuorumState couldn't get the
  1311. // quorum group lock and some other thread got the group
  1312. // lock and is waiting for the GUM lock, then we have
  1313. // an immediate deadlock. Only after this node down
  1314. // handler finishes, any subsequent future node down
  1315. // processing can be started.
  1316. //
  1317. ClRtlLogPrint(LOG_NOISE,
  1318. "[DM] DmpEventHandler - Node is down, turn quorum logging on...\r\n");
  1319. gbIsQuoLoggingOn = TRUE;
  1320. ClRtlLogPrint(LOG_NOISE,
  1321. "[DM] DmpEventHandler - Create thread to handle node down event...\r\n");
  1322. hThread = CreateThread( NULL,
  1323. 0,
  1324. DmpHandleNodeDownEvent,
  1325. NULL,
  1326. 0,
  1327. &dwThreadId );
  1328. if ( hThread == NULL )
  1329. {
  1330. dwError = GetLastError();
  1331. ClRtlLogPrint(LOG_CRITICAL,
  1332. "[DM] DmpEventHandler - Unable to create thread to handle node down event. Error=0x%1!08lx!\r\n",
  1333. dwError);
  1334. CsInconsistencyHalt( dwError );
  1335. }
  1336. CloseHandle( hThread );
  1337. }
  1338. break;
  1339. case CLUSTER_EVENT_NODE_CHANGE:
  1340. break;
  1341. case CLUSTER_EVENT_NODE_ADDED:
  1342. break;
  1343. case CLUSTER_EVENT_NODE_DELETED:
  1344. break;
  1345. case CLUSTER_EVENT_NODE_JOIN:
  1346. break;
  1347. }
  1348. return(dwError);
  1349. } // DmpEventHandler
  1350. /****
  1351. @func DWORD | DmpNodeObjEnumCb| This is a callback that is called when node
  1352. objects are enumberate by the dm.
  1353. @parm PVOID | pContext| A pointer to a DMLOGRECORD structure.
  1354. @parm PVOID | pObject| A pointer to quorum resource object.
  1355. @parm DWORD | dwNotification| A pointer to a DMLOGRECORD structure.
  1356. @rdesc Returns a result code. ERROR_SUCCESS on success.
  1357. @xref
  1358. ****/
  1359. BOOL DmpNodeObjEnumCb(IN BOOL *pbAreAllNodesUp, IN PVOID pContext2,
  1360. IN PVOID pNode, IN LPCWSTR szName)
  1361. {
  1362. if ((NmGetNodeState(pNode) != ClusterNodeUp) &&
  1363. (NmGetNodeState(pNode) != ClusterNodePaused))
  1364. *pbAreAllNodesUp = FALSE;
  1365. //if any of the nodes is down fall out
  1366. return(*pbAreAllNodesUp);
  1367. }
  1368. /****
  1369. @func BOOL | DmpGetSnapShotCb| This callback is invoked when the logger
  1370. is asked to take a checkpoint record for the cluster registry.
  1371. @parm PVOID| pContext | The checkpoint context passed into LogCreate.
  1372. @parm LPWSTR | szChkPtFile | The name of the file in which to take a checkpoint.
  1373. @parm LPDWORD | pdwChkPtSequence | The sequence number related with this
  1374. checkpoint is returned in this.
  1375. @rdesc Returns a result code. ERROR_SUCCESS on success. If the file corresponding
  1376. to this checkpoint already exists, it will return ERROR_ALREADY_EXISTS and
  1377. szChkPtFile will be set to the name of the file.
  1378. @comm LogCheckPoint() calls this function when the log manager is asked to checkpoint the
  1379. dm database.
  1380. @xref
  1381. ****/
  1382. DWORD WINAPI DmpGetSnapShotCb(IN LPCWSTR szPathName, IN PVOID pContext,
  1383. OUT LPWSTR szChkPtFile, OUT LPDWORD pdwChkPtSequence)
  1384. {
  1385. DWORD dwError = ERROR_SUCCESS;
  1386. WCHAR szFilePrefix[MAX_PATH] = L"chkpt";
  1387. WCHAR szTempFile[MAX_PATH] = L"";
  1388. ACQUIRE_SHARED_LOCK( gLockDmpRoot );
  1389. szChkPtFile[0] = L'\0';
  1390. //
  1391. // Chittur Subbaraman (chitturs) - 5/1/2000
  1392. //
  1393. // Checkpoint file name is based on registry sequence number. It is possible that two
  1394. // or more consecutive calls to this function to take checkpoints may read the same
  1395. // registry sequence number. Thus, if DmGetDatabase fails for some reason, it is possible
  1396. // that an existing checkpoint file will get corrupted. Thus, even though the quorum log
  1397. // marks a 'start checkpoint record' and an 'end checkpoint record', it could turn out
  1398. // to be useless if this function manages to corrupt an existing checkpoint file. To solve
  1399. // this problem, we first generate a temp file, take a cluster hive snapshot as this temp
  1400. // file, then atomically move the temp file to the final checkpoint file using the MoveFileEx
  1401. // function.
  1402. //
  1403. //
  1404. // Create a new unique temp file name
  1405. //
  1406. if ( !QfsGetTempFileName( szPathName, szFilePrefix, 0, szTempFile ) )
  1407. {
  1408. dwError = GetLastError();
  1409. ClRtlLogPrint(LOG_UNUSUAL,
  1410. "[LM] DmpGetSnapShotCb: Failed to generate a temp file name, PathName=%1!ls!, FilePrefix=%2!ls!, Error=0x%3!08lx!\r\n",
  1411. szPathName, szFilePrefix, dwError);
  1412. goto FnExit;
  1413. }
  1414. dwError = DmCommitRegistry(); // Ensure up-to-date snapshot
  1415. if ( dwError != ERROR_SUCCESS )
  1416. {
  1417. ClRtlLogPrint(LOG_CRITICAL,
  1418. "[LM] DmpGetSnapShotCb: DmCommitRegistry() failed, Error=0x%1!08lx!\r\n",
  1419. dwError);
  1420. goto FnExit;
  1421. }
  1422. dwError = DmGetDatabase( DmpRoot, szTempFile );
  1423. ClRtlLogPrint(LOG_NOISE,
  1424. "[DM] DmpGetSnapShotCb: DmpGetDatabase returned 0x%1!08lx!\r\n",
  1425. dwError);
  1426. if ( dwError == ERROR_SUCCESS )
  1427. {
  1428. *pdwChkPtSequence = DmpGetRegistrySequence();
  1429. //
  1430. // Create a checkpoint file name based on the registry sequence number
  1431. //
  1432. if ( !QfsGetTempFileName( szPathName, szFilePrefix, *pdwChkPtSequence, szChkPtFile ) )
  1433. {
  1434. dwError = GetLastError();
  1435. ClRtlLogPrint(LOG_UNUSUAL,
  1436. "[LM] DmpGetSnapShotCb: Failed to generate a chkpt file name, PathName=%1!ls!, FilePrefix=%2!ls!, Error=0x%3!08lx!\r\n",
  1437. szPathName, szFilePrefix, dwError);
  1438. //
  1439. // Reset the file name to null, as this information will be used to determine
  1440. // if the checkpoint was taken
  1441. //
  1442. szChkPtFile[0] = L'\0';
  1443. goto FnExit;
  1444. }
  1445. ClRtlLogPrint(LOG_NOISE,
  1446. "[LM] DmpGetSnapshotCb: Checkpoint file name=%1!ls! Seq#=%2!d!\r\n",
  1447. szChkPtFile, *pdwChkPtSequence);
  1448. if ( !QfsMoveFileEx( szTempFile, szChkPtFile, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH ) )
  1449. {
  1450. dwError = GetLastError();
  1451. ClRtlLogPrint(LOG_UNUSUAL,
  1452. "[LM] DmpGetSnapShotCb: Failed to move the temp file to checkpoint file, TempFileName=%1!ls!, ChkPtFileName=%2!ls!, Error=0x%3!08lx!\r\n",
  1453. szTempFile, szChkPtFile, dwError);
  1454. //
  1455. // Reset the file name to null, as this information will be used to determine
  1456. // if the checkpoint was taken
  1457. //
  1458. szChkPtFile[0] = L'\0';
  1459. goto FnExit;
  1460. }
  1461. }
  1462. FnExit:
  1463. RELEASE_LOCK(gLockDmpRoot);
  1464. if ( dwError != ERROR_SUCCESS )
  1465. {
  1466. QfsDeleteFile( szTempFile );
  1467. }
  1468. return ( dwError );
  1469. }
  1470. /****
  1471. @func BOOL WINAPI | DmpLogApplyChangesCb| This callback walks through the records in
  1472. the quorum logs and applies changes to the local database.
  1473. @parm PVOID | pContext | The event to be processed. Only one event at a time.
  1474. If the event is not handled, return ERROR_SUCCESS.
  1475. @parm LSN | Lsn | Lsn of the record.
  1476. @parm RMID | Resource | The resource id of the entity that logged this record.
  1477. @parm RMTYPE | ResourceType | The record type that is specific to the resource id.
  1478. @parm TRID | Transaction | The sequence number of the transaction.
  1479. @parm const PVOID | pLogData | A pointer to the record data.
  1480. @parm DWORD | DataLength | The length of the data in bytes.
  1481. @rdesc Returns TRUE to continue scan else returns FALSE.
  1482. @comm This function is called at initialization when a cluster is being formed to apply
  1483. transactions from the quorum log to the local cluster database.
  1484. @xref
  1485. ****/
  1486. BOOL WINAPI DmpLogApplyChangesCb(
  1487. IN PVOID pContext,
  1488. IN LSN Lsn,
  1489. IN RMID Resource,
  1490. IN RMTYPE ResourceType,
  1491. IN TRID Transaction,
  1492. IN TRTYPE TransactionType,
  1493. IN const PVOID pLogData,
  1494. IN DWORD DataLength)
  1495. {
  1496. DWORD Status;
  1497. PDM_LOGSCAN_CONTEXT pDmAppliedChangeContext = (PDM_LOGSCAN_CONTEXT) pContext;
  1498. TRSTATE trXsactionState;
  1499. BOOL bRet = TRUE;
  1500. CL_ASSERT(pDmAppliedChangeContext);
  1501. //if the resource id is not the same as dm..ignore..go to the next one
  1502. switch(TransactionType)
  1503. {
  1504. case TTStartXsaction:
  1505. Status = LogFindXsactionState(ghQuoLog, Lsn, Transaction, &trXsactionState);
  1506. if (Status != ERROR_SUCCESS)
  1507. {
  1508. //there was an error
  1509. ClRtlLogPrint(LOG_NOISE, "[DM] DmpLogApplyChangesCb ::LogFindXsaction failed, error=0x%1!08lx!\r\n",
  1510. Status);
  1511. //assume unknown state
  1512. CL_LOGFAILURE(Status);
  1513. trXsactionState = XsactionUnknown;
  1514. }
  1515. //if the transaction is successful apply it, else continue
  1516. if (trXsactionState == XsactionCommitted)
  1517. {
  1518. Status = LogScanXsaction(ghQuoLog, Lsn, Transaction, DmpApplyTransactionCb,
  1519. NULL);
  1520. if (Status != ERROR_SUCCESS)
  1521. {
  1522. ClRtlLogPrint(LOG_NOISE,
  1523. "[DM] DmpLogApplyChangesCb :LogScanTransaction for committed record failed, error=0x%1!08lx!\r\n",
  1524. Status);
  1525. bRet = FALSE;
  1526. CL_LOGFAILURE(Status);
  1527. break;
  1528. }
  1529. pDmAppliedChangeContext->dwSequence = Transaction;
  1530. }
  1531. else
  1532. {
  1533. ClRtlLogPrint(LOG_NOISE, "[DM] TransactionState = %1!u!\r\n",
  1534. trXsactionState);
  1535. }
  1536. break;
  1537. case TTCompleteXsaction:
  1538. bRet = DmpApplyTransactionCb(NULL, Lsn, Resource, ResourceType,
  1539. Transaction, pLogData, DataLength);
  1540. pDmAppliedChangeContext->dwSequence = Transaction;
  1541. break;
  1542. default:
  1543. CL_ASSERT(FALSE);
  1544. }
  1545. return(bRet);
  1546. }
  1547. BOOL WINAPI DmpApplyTransactionCb(
  1548. IN PVOID pContext,
  1549. IN LSN Lsn,
  1550. IN RMID Resource,
  1551. IN RMTYPE ResourceType,
  1552. IN TRID TransactionId,
  1553. IN const PVOID pLogData,
  1554. IN DWORD dwDataLength)
  1555. {
  1556. DWORD Status;
  1557. switch(ResourceType)
  1558. {
  1559. case DmUpdateCreateKey:
  1560. ClRtlLogPrint(LOG_NOISE,"[DM] DmpLogScanCb::DmUpdateCreateKey\n");
  1561. //SS: we dont care at this point as to where the update originated
  1562. Status = DmpUpdateCreateKey(FALSE,
  1563. GET_ARG(pLogData,0),
  1564. GET_ARG(pLogData,1),
  1565. GET_ARG(pLogData,2));
  1566. break;
  1567. case DmUpdateDeleteKey:
  1568. ClRtlLogPrint(LOG_NOISE,"[DM] DmUpdateDeleteKey \n");
  1569. Status = DmpUpdateDeleteKey(FALSE,
  1570. (PDM_DELETE_KEY_UPDATE)((PBYTE)pLogData));
  1571. break;
  1572. case DmUpdateSetValue:
  1573. ClRtlLogPrint(LOG_NOISE,"[DM] DmUpdateSetValue \n");
  1574. Status = DmpUpdateSetValue(FALSE,
  1575. (PDM_SET_VALUE_UPDATE)((PBYTE)pLogData));
  1576. break;
  1577. case DmUpdateDeleteValue:
  1578. ClRtlLogPrint(LOG_NOISE,"[DM] DmUpdateDeleteValue\n");
  1579. Status = DmpUpdateDeleteValue(FALSE,
  1580. (PDM_DELETE_VALUE_UPDATE)((PBYTE)pLogData));
  1581. break;
  1582. case DmUpdateJoin:
  1583. ClRtlLogPrint(LOG_UNUSUAL,"[DM] DmUpdateJoin\n");
  1584. Status = ERROR_SUCCESS;
  1585. break;
  1586. default:
  1587. ClRtlLogPrint(LOG_UNUSUAL,"[DM] DmpLogScanCb:uType = %1!u!\r\n",
  1588. ResourceType);
  1589. Status = ERROR_INVALID_DATA;
  1590. CL_UNEXPECTED_ERROR(ERROR_INVALID_DATA);
  1591. break;
  1592. }
  1593. return(TRUE);
  1594. }
  1595. /****
  1596. @func WORD| DmpLogCheckPtCb| A callback fn for DM
  1597. to take a checkpoint to the log if the quorum
  1598. resource is online on this node.
  1599. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1600. @comm This callback is called when the quorum resource
  1601. is online on this node. Since the quorum resource
  1602. synchronous callbacks are called before the resource
  1603. state changes are propagated, if the quorum is online
  1604. the log must be open.
  1605. @xref
  1606. ****/
  1607. void DmpLogCheckPointCb()
  1608. {
  1609. DWORD dwError;
  1610. //
  1611. // Chittur Subbaraman (chitturs) - 9/22/99
  1612. //
  1613. // If the quorum logging switch is off, don't do anything.
  1614. //
  1615. if (CsNoQuorumLogging) return;
  1616. //once it is online the log file should be open
  1617. //SS:BUGS: should we log something in the eventlog
  1618. if (ghQuoLog)
  1619. {
  1620. //
  1621. // Chittur Subbaraman (chitturs) - 6/3/99
  1622. //
  1623. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  1624. // so as to maintain the ordering between this lock and the log lock.
  1625. //
  1626. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1627. //get a checkpoint database
  1628. dwError = LogCheckPoint(ghQuoLog, TRUE, NULL, 0);
  1629. RELEASE_LOCK(gLockDmpRoot);
  1630. if (dwError != ERROR_SUCCESS)
  1631. {
  1632. ClRtlLogPrint(LOG_CRITICAL,
  1633. "[DM] DmpLogCheckPointCb - Failed to take a checkpoint in the log file, error=0x%1!08lx!\r\n",
  1634. dwError);
  1635. CL_UNEXPECTED_ERROR(dwError);
  1636. }
  1637. ClRtlLogPrint(LOG_NOISE,
  1638. "[DM] DmpLogCheckPointCb - taken checkpoint\r\n");
  1639. }
  1640. else
  1641. {
  1642. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  1643. }
  1644. }
  1645. /****
  1646. @func WORD| DmGetQuorumLogPath| Reads the quorum log file path configured in
  1647. the registry during setup.
  1648. @parm LPWSTR | szQuorumLogPath | A pointer to a wide string of size MAX_PATH.
  1649. @parm DWORD | dwSize | The size of szQuorumLogPath in bytes.
  1650. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1651. @comm If the quorum resource is not cabaple of logging this should not be set.
  1652. @xref
  1653. ****/
  1654. DWORD DmGetQuorumLogPath(LPWSTR szQuorumLogPath, DWORD dwSize)
  1655. {
  1656. DWORD Status;
  1657. Status = DmQuerySz( DmQuorumKey,
  1658. cszPath,
  1659. &szQuorumLogPath,
  1660. &dwSize,
  1661. &dwSize);
  1662. if (Status != ERROR_SUCCESS) {
  1663. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmGetQuorumLogPath failed, error=%1!u!\n", Status);
  1664. goto FnExit;
  1665. }
  1666. FnExit:
  1667. return(Status);
  1668. }
  1669. /****
  1670. @func WORD| DmpGetCheckpointInterval| Reads the checkpoint interval
  1671. from the registry, else returns the default.
  1672. @parm LPDWORD | pdwCheckpointInterval | A pointer to DWORD where
  1673. the checkpoint interval, in secs, is returned.
  1674. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1675. @comm The default checkpoint interval is 4 hours. The registry must be configured
  1676. in units of hours.
  1677. @xref
  1678. ****/
  1679. DWORD DmpGetCheckpointInterval(
  1680. OUT LPDWORD pdwCheckpointInterval)
  1681. {
  1682. DWORD dwDefCheckpointInterval = DEFAULT_CHECKPOINT_INTERVAL;
  1683. DWORD dwStatus = ERROR_SUCCESS;
  1684. dwStatus = DmQueryDword( DmQuorumKey,
  1685. CLUSREG_NAME_CHECKPOINT_INTERVAL,
  1686. pdwCheckpointInterval,
  1687. &dwDefCheckpointInterval);
  1688. if (dwStatus != ERROR_SUCCESS) {
  1689. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmGetCheckpointInterval Failed, error=%1!u!\n",
  1690. dwStatus);
  1691. goto FnExit;
  1692. }
  1693. //the checkpoint interval cant be less than 1 hour or more than 1 day
  1694. if ((*pdwCheckpointInterval < 1) || (*pdwCheckpointInterval>24))
  1695. *pdwCheckpointInterval = DEFAULT_CHECKPOINT_INTERVAL;
  1696. //convert to msecs
  1697. *pdwCheckpointInterval = *pdwCheckpointInterval * 60 * 60 * 1000;
  1698. FnExit:
  1699. return(dwStatus);
  1700. }
  1701. /****
  1702. @func WORD| DmGetQuorumLogMaxSize| Reads the quorum log file max size.
  1703. @parm LPDWORD | pdwMaxLogSize| A pointer to a dword containing the size.
  1704. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1705. @comm If the quorum resource is not cabaple of logging this should not be set.
  1706. @xref
  1707. ****/
  1708. DWORD DmGetQuorumLogMaxSize(LPDWORD pdwMaxLogSize)
  1709. {
  1710. DWORD Status;
  1711. DWORD dwDefaultLogMaxSize = CLUSTER_QUORUM_DEFAULT_MAX_LOG_SIZE;
  1712. Status = DmQueryDword( DmQuorumKey,
  1713. cszMaxQuorumLogSize,
  1714. pdwMaxLogSize,
  1715. &dwDefaultLogMaxSize);
  1716. if (Status != ERROR_SUCCESS) {
  1717. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmGetQuorumLogMaxSize failed, error=%1!u!\n",Status);
  1718. }
  1719. return(Status);
  1720. }
  1721. /****
  1722. @func DWORD | DmpCheckDiskSpace| Called to check for the disk space
  1723. on the quorum resource after it is brought online and logs are rolled up.
  1724. @rdesc ERROR_SUCCESS if successful. Win32 error code if something horrible happened.
  1725. @comm This function checks if there is enough disk space and sets up
  1726. a periodic timer to monitor the disk space.
  1727. @xref <f DmpDiskManage>
  1728. ****/
  1729. DWORD DmpCheckDiskSpace()
  1730. {
  1731. DWORD dwError = ERROR_SUCCESS;
  1732. WCHAR szQuoLogPathName[MAX_PATH];
  1733. ULARGE_INTEGER liNumTotalBytes;
  1734. ULARGE_INTEGER liNumFreeBytes;
  1735. //if you own the quorum resource, try to check the size
  1736. if (gpQuoResource && AMIOWNEROFQUORES(gpQuoResource) && gbIsQuoResOnline)
  1737. {
  1738. //get the path
  1739. if ((dwError = DmGetQuorumLogPath(szQuoLogPathName, sizeof(szQuoLogPathName)))
  1740. != ERROR_SUCCESS)
  1741. {
  1742. ClRtlLogPrint(LOG_NOISE,
  1743. "[DM] DmpCheckDiskSpace: Quorum log file is not configured, error=%1!u!\r\n",
  1744. dwError);
  1745. //log something in the event log
  1746. CL_LOGFAILURE(dwError);
  1747. goto FnExit;
  1748. }
  1749. //check the minimum space on the quorum disk
  1750. if (!QfsGetDiskFreeSpaceEx(szQuoLogPathName, &liNumFreeBytes, &liNumTotalBytes,
  1751. NULL))
  1752. {
  1753. dwError = GetLastError();
  1754. ClRtlLogPrint(LOG_NOISE,
  1755. "[DM] DmpCheckDiskSpace: GetDiskFreeSpace returned error=0x%1!08lx!\r\n",
  1756. dwError);
  1757. goto FnExit;
  1758. }
  1759. //if not available, log something in the event log and bail out
  1760. if ((liNumFreeBytes.HighPart == 0) &&
  1761. (liNumFreeBytes.LowPart < DISKSPACE_INIT_MINREQUIRED))
  1762. {
  1763. CL_LOGCLUSWARNING(LM_DISKSPACE_HIGH_WATERMARK);
  1764. dwError = ERROR_CLUSTERLOG_NOT_ENOUGH_SPACE;
  1765. goto FnExit;
  1766. }
  1767. }
  1768. FnExit:
  1769. return(dwError);
  1770. }
  1771. /****
  1772. @func DWORD | DmpDiskManage | This is the callback registered to perform
  1773. periodic disk check functions on the quorum resource.
  1774. @comm If the disk space has dipped below the lowwatermark, this gracefully
  1775. shuts the cluster service. If the disk space dips below the high
  1776. watermark, it sends an alert to registered recipients.
  1777. @xref <f DmpCheckDiskSpace>
  1778. ****/
  1779. void WINAPI DmpDiskManage(
  1780. IN HANDLE hTimer,
  1781. IN PVOID pContext)
  1782. {
  1783. DWORD dwError;
  1784. WCHAR szQuoLogPathName[MAX_PATH];
  1785. ULARGE_INTEGER liNumTotalBytes;
  1786. ULARGE_INTEGER liNumFreeBytes;
  1787. static DWORD dwNumWarnings=0;
  1788. if (!gpQuoResource || (!AMIOWNEROFQUORES(gpQuoResource)) ||
  1789. (!gbIsQuoResOnline || (CsNoQuorumLogging)))
  1790. {
  1791. //the owner of the quorum resource checks the disk space
  1792. //the quorum disk shouldnt go offline
  1793. //skip checking if no quorum logging is required
  1794. return;
  1795. }
  1796. //get the path
  1797. if ((dwError = DmGetQuorumLogPath(szQuoLogPathName, sizeof(szQuoLogPathName)))
  1798. != ERROR_SUCCESS)
  1799. {
  1800. ClRtlLogPrint(LOG_NOISE,
  1801. "[DM] DmpDiskManage: Quorum log file is not configured, error=%1!u!\r\n",
  1802. dwError);
  1803. //log something in the event log
  1804. CL_UNEXPECTED_ERROR(dwError);
  1805. goto FnExit;
  1806. }
  1807. //check the minimum space on the quorum disk
  1808. if (!QfsGetDiskFreeSpaceEx(szQuoLogPathName, &liNumFreeBytes, &liNumTotalBytes,
  1809. NULL))
  1810. {
  1811. dwError = GetLastError();
  1812. ClRtlLogPrint(LOG_NOISE,
  1813. "[DM] DmpDiskManage: GetDiskFreeSpace returned error=0x%1!08lx!\r\n",
  1814. dwError);
  1815. CL_LOGFAILURE(dwError);
  1816. goto FnExit;
  1817. }
  1818. if ((liNumFreeBytes.HighPart == 0) &&
  1819. (liNumFreeBytes.LowPart < DISKSPACE_LOW_WATERMARK))
  1820. {
  1821. //reached the low water mark
  1822. dwNumWarnings++;
  1823. //ss: we can control the rate at which we put things in the
  1824. //event log but once every five minutes is not bad.
  1825. //ss: post an event ???
  1826. ClRtlLogPrint(LOG_NOISE,
  1827. "[DM] DmpDiskManage: GetDiskFreeSpace - Not enough disk space, Avail=0x%1!08lx!\r\n",
  1828. liNumFreeBytes.LowPart);
  1829. CL_LOGCLUSWARNING(LM_DISKSPACE_LOW_WATERMARK);
  1830. }
  1831. else
  1832. {
  1833. gbIsQuoResEnoughSpace = TRUE;
  1834. dwNumWarnings = 0;
  1835. }
  1836. FnExit:
  1837. return;
  1838. }
  1839. /****
  1840. @func DWORD | DmpCheckpointTimerCb | This is the callback registered to perform
  1841. periodic checkpointing on the quorum log.
  1842. @parm IN HANDLE| hTimer| The timer associated with checkpointing interval.
  1843. @parm IN PVOID | pContext | A pointer to the handle for the quorum log file.
  1844. @comm This helps in backups. If you want to take a cluster backup by making
  1845. a copy of the quorum.log and checkpoint files, then if both nodes have
  1846. been up for a long time both the files can be old. By taking a periodic
  1847. checkpoint we guarantee that they are not more than n hours old.
  1848. ****/
  1849. void WINAPI DmpCheckpointTimerCb(
  1850. IN HANDLE hTimer,
  1851. IN PVOID pContext)
  1852. {
  1853. HLOG hQuoLog;
  1854. DWORD dwError;
  1855. //
  1856. // Chittur Subbaraman (chitturs) - 6/3/99
  1857. //
  1858. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  1859. // so as to maintain the ordering between this lock and the log lock.
  1860. // In addition, we want to read the pContext safely. This is because
  1861. // pContext is a pointer to the log and could change via the SetClusterQuorumResource
  1862. // API.
  1863. //
  1864. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1865. hQuoLog = *((HLOG *)pContext);
  1866. if (hQuoLog && gbDmInited)
  1867. {
  1868. //get a checkpoint database
  1869. ClRtlLogPrint(LOG_NOISE,
  1870. "[DM]DmpCheckpointTimerCb- taking a checkpoint\r\n");
  1871. dwError = LogReset(hQuoLog);
  1872. if (dwError != ERROR_SUCCESS)
  1873. {
  1874. ClRtlLogPrint(LOG_CRITICAL,
  1875. "[DM]DmpCheckpointTimerCb - Failed to reset log, error=%1!u!\r\n",
  1876. dwError);
  1877. CL_UNEXPECTED_ERROR(dwError);
  1878. }
  1879. }
  1880. RELEASE_LOCK(gLockDmpRoot);
  1881. }
  1882. /****
  1883. @func DWORD | DmBackupClusterDatabase | Take a fresh checkpoint and
  1884. copy the quorum log and the checkpoint file to the supplied
  1885. path name. This function is called with gQuoLock held.
  1886. @parm IN LPCWSTR | lpszPathName | The directory path name where the
  1887. files have to be backed up. This path must be visible to the
  1888. node on which the quorum resource is online (i.e., this node
  1889. in this case).
  1890. @comm This function first takes a fresh checkpoint, updates the quorum
  1891. log file and then copies the two files to a backup area.
  1892. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  1893. @xref <f DmpLogCheckpointAndBackup> <f DmpRestoreClusterDatabase>
  1894. ****/
  1895. DWORD DmBackupClusterDatabase(
  1896. IN LPCWSTR lpszPathName)
  1897. {
  1898. QfsHANDLE hFindFile = QfsINVALID_HANDLE_VALUE;
  1899. WIN32_FIND_DATA FindData;
  1900. DWORD status = ERROR_SUCCESS;
  1901. LPWSTR szDestPathName = NULL;
  1902. DWORD dwLen;
  1903. //
  1904. // Chittur Subbaraman (chitturs) - 10/12/98
  1905. //
  1906. dwLen = lstrlenW( lpszPathName );
  1907. //
  1908. // It is safer to use dynamic memory allocation for user-supplied
  1909. // path since we don't want to put restrictions on the user
  1910. // on the length of the path that can be supplied. However, as
  1911. // far as our own quorum disk path is concerned, it is system-dependent
  1912. // and static memory allocation for that would suffice.
  1913. //
  1914. szDestPathName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  1915. ( dwLen + 5 ) *
  1916. sizeof ( WCHAR ) );
  1917. if ( szDestPathName == NULL )
  1918. {
  1919. status = GetLastError();
  1920. ClRtlLogPrint(LOG_NOISE,
  1921. "[DM] DmBackupClusterDatabase: Error %1!d! in allocating memory for %2!ws! !!!\n",
  1922. status,
  1923. lpszPathName);
  1924. CL_LOGFAILURE( status );
  1925. goto FnExit;
  1926. }
  1927. lstrcpyW( szDestPathName, lpszPathName );
  1928. //
  1929. // If the client-supplied path is not already terminated with '\',
  1930. // then add it.
  1931. //
  1932. if ( szDestPathName [dwLen-1] != L'\\' )
  1933. {
  1934. szDestPathName [dwLen++] = L'\\';
  1935. }
  1936. //
  1937. // Add a wild character at the end to search for any file in the
  1938. // supplied directory
  1939. //
  1940. szDestPathName[dwLen++] = L'*';
  1941. szDestPathName[dwLen] = L'\0';
  1942. //
  1943. // Find out whether you can access the supplied path by
  1944. // trying to find some file in the directory.
  1945. //
  1946. hFindFile = QfsFindFirstFile( szDestPathName, &FindData );
  1947. if ( !QfsIsHandleValid(hFindFile) )
  1948. {
  1949. status = GetLastError();
  1950. ClRtlLogPrint(LOG_NOISE,
  1951. "[DM] DmBackupClusterDatabase: Supplied path %1!ws! does not exist, Error=%2!d! !!!\n",
  1952. szDestPathName,
  1953. status);
  1954. goto FnExit;
  1955. }
  1956. //
  1957. // Check whether the log is open. It must be since we already
  1958. // verified that the quorum resource is online on this node and
  1959. // quorum logging is turned on.
  1960. //
  1961. if ( ghQuoLog )
  1962. {
  1963. //
  1964. // Remove the '*' so the same variable can be used.
  1965. //
  1966. szDestPathName [dwLen-1] = L'\0';
  1967. ClRtlLogPrint(LOG_NOISE,
  1968. "[DM] DmBackupClusterDatabase: Attempting to take a checkpoint and then backup to %1!ws!..\n",
  1969. szDestPathName);
  1970. //
  1971. // The gLockDmpRoot needs to be acquired here since otherwise
  1972. // you will get the log lock in the LogCheckPoint( )
  1973. // function and someone else could get the gLockDmpRoot.
  1974. // After you get the log lock, you also try to acquire
  1975. // the gLockDmpRoot in the function DmCommitRegistry.
  1976. // This is a potential deadlock situation and is avoided here.
  1977. //
  1978. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1979. status = DmpLogCheckpointAndBackup ( ghQuoLog, szDestPathName );
  1980. RELEASE_LOCK(gLockDmpRoot);
  1981. if ( status == ERROR_SUCCESS )
  1982. {
  1983. ClRtlLogPrint(LOG_NOISE,
  1984. "[DM] DmBackupClusterDatabase: Successfully finished backing up to %1!ws!...\n",
  1985. szDestPathName);
  1986. }
  1987. } else
  1988. {
  1989. ClRtlLogPrint(LOG_UNUSUAL,
  1990. "[DM] DmBackupClusterDatabase: Quorum log could not be opened...\r\n");
  1991. status = ERROR_QUORUMLOG_OPEN_FAILED;
  1992. }
  1993. FnExit:
  1994. QfsFindCloseIfValid ( hFindFile );
  1995. LocalFree ( szDestPathName );
  1996. return ( status );
  1997. }
  1998. /****
  1999. @func DWORD | DmpLogCheckpointAndBackup | Takes a checkpoint, updates the
  2000. quorum log and then copies the files to the supplied path. This
  2001. function is called with the gQuoLock and the gLockDmpRoot held.
  2002. @parm IN HLOG | hLogFile | An identifier for the quorum log file.
  2003. @parm IN LPWSTR | lpszPathName | The path for storing the quorum log
  2004. file, the recent checkpoint file, and the resource registry
  2005. checkpoint files. This path must be visible from this node.
  2006. @comm Called by DmpBackupQuorumLog() to take a checkpoint and then
  2007. take a backup of the cluster database including resource
  2008. registry checkpoint files.
  2009. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  2010. @xref <f DmBackupClusterDatabase>
  2011. ****/
  2012. DWORD DmpLogCheckpointAndBackup(
  2013. IN HLOG hLogFile,
  2014. IN LPWSTR lpszPathName)
  2015. {
  2016. DWORD dwError;
  2017. DWORD dwLen;
  2018. WCHAR szChkPointFilePrefix[MAX_PATH];
  2019. WCHAR szQuoLogPathName[MAX_PATH];
  2020. LPWSTR szDestFileName = NULL;
  2021. WCHAR szSourceFileName[MAX_PATH];
  2022. LPWSTR szDestPathName = NULL;
  2023. LPWSTR lpChkPointFileNameStart;
  2024. LSN Lsn;
  2025. TRID Transaction;
  2026. QfsHANDLE hFile = QfsINVALID_HANDLE_VALUE;
  2027. //
  2028. // Chittur Subbaraman (chitturs) - 10/12/1998
  2029. //
  2030. //
  2031. // Initiate a checkpoint process. Allow a log file reset, if necessary.
  2032. //
  2033. if ( ( dwError = LogCheckPoint( hLogFile, TRUE, NULL, 0 ) )
  2034. != ERROR_SUCCESS )
  2035. {
  2036. ClRtlLogPrint(LOG_UNUSUAL,
  2037. "[DM] DmpLogCheckpointAndBackup::Callback failed to return a checkpoint. Error=%1!u!\r\n",
  2038. dwError);
  2039. CL_LOGFAILURE( dwError );
  2040. LogClose( hLogFile );
  2041. goto FnExit;
  2042. }
  2043. //
  2044. // Get the name of the most recent checkpoint file
  2045. //
  2046. szChkPointFilePrefix[0] = TEXT('\0');
  2047. if ( ( dwError = LogGetLastChkPoint( hLogFile, szChkPointFilePrefix, &Transaction, &Lsn ) )
  2048. != ERROR_SUCCESS )
  2049. {
  2050. ClRtlLogPrint(LOG_UNUSUAL,
  2051. "[DM] DmpLogCheckpointAndBackup::No check point found in the log file. Error=%1!u!\r\n",
  2052. dwError);
  2053. CL_LOGFAILURE( dwError );
  2054. LogClose( hLogFile );
  2055. goto FnExit;
  2056. }
  2057. dwError = DmGetQuorumLogPath( szQuoLogPathName, sizeof( szQuoLogPathName ) );
  2058. if ( dwError != ERROR_SUCCESS )
  2059. {
  2060. dwError = GetLastError();
  2061. ClRtlLogPrint(LOG_UNUSUAL,
  2062. "[DM] DmpLogCheckpointAndBackup::DmGetQuorumLogPath failed, Error = %1!d!\r\n",
  2063. dwError);
  2064. CL_LOGFAILURE( dwError );
  2065. goto FnExit;
  2066. }
  2067. //
  2068. // It is safer to use dynamic memory allocation for user-supplied
  2069. // path since we don't want to put restrictions on the user
  2070. // on the length of the path that can be supplied. However, as
  2071. // far as our own quorum disk path is concerned, it is system-dependent
  2072. // and static memory allocation for that would suffice.
  2073. //
  2074. szDestPathName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2075. ( lstrlenW ( lpszPathName ) + 1 ) *
  2076. sizeof ( WCHAR ) );
  2077. if ( szDestPathName == NULL )
  2078. {
  2079. dwError = GetLastError();
  2080. ClRtlLogPrint(LOG_NOISE,
  2081. "[DM] DmpLogCheckpointAndBackup: Error %1!d! in allocating memory for %2!ws! !!!\n",
  2082. dwError,
  2083. lpszPathName);
  2084. CL_LOGFAILURE( dwError );
  2085. goto FnExit;
  2086. }
  2087. //
  2088. // Get the user-supplied destination path name
  2089. //
  2090. lstrcpyW( szDestPathName, lpszPathName );
  2091. szDestFileName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2092. ( lstrlenW ( szDestPathName ) + 1 + LOG_MAX_FILENAME_LENGTH ) *
  2093. sizeof ( WCHAR ) );
  2094. if ( szDestFileName == NULL )
  2095. {
  2096. dwError = GetLastError();
  2097. ClRtlLogPrint(LOG_NOISE,
  2098. "[DM] DmpLogCheckpointAndBackup: Error %1!d! in allocating memory for chkpt file name !!!\n",
  2099. dwError);
  2100. CL_LOGFAILURE( dwError );
  2101. goto FnExit;
  2102. }
  2103. //
  2104. // Make an attempt to delete the CLUSBACKUP.DAT file
  2105. //
  2106. lstrcpyW( szDestFileName, szDestPathName );
  2107. lstrcatW( szDestFileName, L"CLUSBACKUP.DAT" );
  2108. //
  2109. // Set the file attribute to normal. Continue even if you
  2110. // fail in this step, but log an error. (Note that you are
  2111. // countering the case in which a destination file with
  2112. // the same name exists in the backup directory already and
  2113. // you are trying to delete it.)
  2114. //
  2115. if ( !QfsSetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2116. {
  2117. dwError = GetLastError();
  2118. if ( dwError != ERROR_FILE_NOT_FOUND )
  2119. {
  2120. ClRtlLogPrint(LOG_UNUSUAL,
  2121. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, Error = %2!d!\n",
  2122. szDestFileName,
  2123. dwError);
  2124. }
  2125. }
  2126. if ( !QfsDeleteFile( szDestFileName ) )
  2127. {
  2128. dwError = GetLastError();
  2129. if ( dwError != ERROR_FILE_NOT_FOUND )
  2130. {
  2131. ClRtlLogPrint(LOG_UNUSUAL,
  2132. "[DM] DmpLogCheckpointAndBackup::CLUSBACKUP.DAT exists, but can't delete it, Error = %1!d!\n",
  2133. dwError);
  2134. CL_LOGFAILURE( dwError );
  2135. goto FnExit;
  2136. }
  2137. }
  2138. //
  2139. // Just get the checkpoint file name without any path added.
  2140. // Note that szQuoLogPathName includes the '\'
  2141. //
  2142. dwLen = lstrlenW ( szQuoLogPathName );
  2143. lpChkPointFileNameStart = &szChkPointFilePrefix[dwLen];
  2144. //
  2145. // Now, create the path-included destination file name
  2146. //
  2147. lstrcpyW( szDestFileName, szDestPathName );
  2148. lstrcatW( szDestFileName, lpChkPointFileNameStart );
  2149. //
  2150. // And, the path-included source file name
  2151. //
  2152. lstrcpyW( szSourceFileName, szChkPointFilePrefix );
  2153. //
  2154. // Set the file attribute to normal. Continue even if you
  2155. // fail in this step, but log an error. (Note that you are
  2156. // countering the case in which a destination file with
  2157. // the same name exists in the backup directory already and
  2158. // you are trying to overwrite it.)
  2159. //
  2160. if ( !QfsSetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2161. {
  2162. dwError = GetLastError();
  2163. if ( dwError != ERROR_FILE_NOT_FOUND )
  2164. {
  2165. ClRtlLogPrint(LOG_UNUSUAL,
  2166. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, Error = %2!d!\n",
  2167. szDestFileName,
  2168. dwError);
  2169. }
  2170. }
  2171. //
  2172. // Copy the checkpoint file to the destination
  2173. //
  2174. dwError = QfsClRtlCopyFileAndFlushBuffers( szSourceFileName, szDestFileName );
  2175. if ( !dwError )
  2176. {
  2177. dwError = GetLastError();
  2178. ClRtlLogPrint(LOG_UNUSUAL,
  2179. "[DM] DmpLogCheckpointAndBackup::Unable to copy file %1!ws! to %2!ws!, Error = %3!d!\n",
  2180. szSourceFileName,
  2181. szDestFileName,
  2182. dwError);
  2183. CL_LOGFAILURE( dwError );
  2184. goto FnExit;
  2185. }
  2186. //
  2187. // Set the file attribute to read-only. Continue even if you
  2188. // fail in this step, but log an error.
  2189. //
  2190. if ( !QfsSetFileAttributes( szDestFileName, FILE_ATTRIBUTE_READONLY ) )
  2191. {
  2192. dwError = GetLastError();
  2193. ClRtlLogPrint(LOG_UNUSUAL,
  2194. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to READONLY, Error = %2!d!\n",
  2195. szDestFileName,
  2196. dwError);
  2197. }
  2198. //
  2199. // Now, create the path-included destination file name
  2200. //
  2201. lstrcpyW( szDestFileName, szDestPathName );
  2202. lstrcatW( szDestFileName, cszQuoFileName );
  2203. //
  2204. // And, the path-included source file name
  2205. //
  2206. lstrcpyW( szSourceFileName, szQuoLogPathName );
  2207. lstrcatW( szSourceFileName, cszQuoFileName );
  2208. //
  2209. // Set the destination file attribute to normal. Continue even if you
  2210. // fail in this step, but log an error. (Note that you are
  2211. // countering the case in which a destination file with
  2212. // the same name exists in the backup directory already and
  2213. // you are trying to overwrite it.)
  2214. //
  2215. if ( !QfsSetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2216. {
  2217. dwError = GetLastError();
  2218. if ( dwError != ERROR_FILE_NOT_FOUND )
  2219. {
  2220. ClRtlLogPrint(LOG_UNUSUAL,
  2221. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, Error = %2!d!\n",
  2222. szDestFileName,
  2223. dwError);
  2224. }
  2225. }
  2226. //
  2227. // Copy the quorum log file to the destination
  2228. //
  2229. dwError = QfsCopyFile( szSourceFileName, szDestFileName, FALSE );
  2230. if ( !dwError )
  2231. {
  2232. dwError = GetLastError();
  2233. ClRtlLogPrint(LOG_UNUSUAL,
  2234. "[DM] DmpLogCheckpointAndBackup::Unable to copy file %1!ws! to %2!ws!, Error = %3!d!\n",
  2235. szSourceFileName,
  2236. szDestFileName,
  2237. dwError);
  2238. CL_LOGFAILURE( dwError );
  2239. goto FnExit;
  2240. }
  2241. //
  2242. // Set the destination file attribute to read-only. Continue even
  2243. // if you fail in this step, but log an error
  2244. //
  2245. if ( !QfsSetFileAttributes( szDestFileName, FILE_ATTRIBUTE_READONLY ) )
  2246. {
  2247. dwError = GetLastError();
  2248. ClRtlLogPrint(LOG_UNUSUAL,
  2249. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to READONLY, Error = %2!d!\n",
  2250. szDestFileName,
  2251. dwError);
  2252. }
  2253. //
  2254. // Now copy the resource chkpt files to the destination. Note that
  2255. // we call this function with both gQuoLock and gLockDmpRoot held.
  2256. // The former lock prevents any checkpoint being read or written
  2257. // via CppReadCheckpoint() and CppWriteCheckpoint() while the
  2258. // following function is executing.
  2259. //
  2260. // Note: However, the CpDeleteRegistryCheckPoint() function is
  2261. // unprotected and poses a potential danger here.
  2262. //
  2263. // Note: Also, currently the following function returns ERROR_SUCCESS
  2264. // in all cases.
  2265. //
  2266. dwError = CpCopyCheckpointFiles( szDestPathName, TRUE );
  2267. if (dwError != ERROR_SUCCESS)
  2268. {
  2269. ClRtlLogPrint(LOG_UNUSUAL,
  2270. "[DM] DmpLogCheckpointAndBackup::Unable to copy resource checkpoint files, Error = %1!d!\n",
  2271. dwError);
  2272. goto FnExit;
  2273. }
  2274. //
  2275. // Now create an empty READONLY, HIDDEN, file in the destination
  2276. // directory which marks the successful ending of the backup.
  2277. //
  2278. lstrcpyW( szDestFileName, szDestPathName );
  2279. lstrcatW( szDestFileName, L"CLUSBACKUP.DAT");
  2280. hFile = QfsCreateFile(szDestFileName,
  2281. GENERIC_READ | GENERIC_WRITE,
  2282. 0,
  2283. NULL,
  2284. CREATE_NEW,
  2285. FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_READONLY,
  2286. NULL );
  2287. if ( !QfsIsHandleValid(hFile) )
  2288. {
  2289. dwError = GetLastError();
  2290. CL_LOGFAILURE( dwError );
  2291. goto FnExit;
  2292. }
  2293. dwError = ERROR_SUCCESS;
  2294. FnExit:
  2295. LocalFree ( szDestFileName );
  2296. LocalFree ( szDestPathName );
  2297. QfsCloseHandleIfValid ( hFile );
  2298. return ( dwError );
  2299. }
  2300. /****
  2301. @func DWORD | DmpRestoreClusterDatabase | Copy the quorum log and all the
  2302. checkpoint files from CsDatabaseRestorePath to the
  2303. quorum log path in the quorum disk.
  2304. @parm IN LPCWSTR | lpszQuoLogPathName | The quorum directory path
  2305. where the backed up files have to be copied to.
  2306. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  2307. @xref <f CppRestoreCpFiles> <f DmBackupClusterDatabase>
  2308. ****/
  2309. DWORD DmpRestoreClusterDatabase(
  2310. IN LPCWSTR lpszQuoLogPathName )
  2311. {
  2312. QfsHANDLE hFindFile = QfsINVALID_HANDLE_VALUE;
  2313. WIN32_FIND_DATA FindData;
  2314. DWORD status;
  2315. WCHAR szDestFileName[MAX_PATH];
  2316. LPWSTR szSourceFileName = NULL;
  2317. LPWSTR szSourcePathName = NULL;
  2318. DWORD dwLen;
  2319. WCHAR szChkptFileNameStart[4];
  2320. WCHAR szTempFileName[MAX_PATH];
  2321. //
  2322. // Chittur Subbaraman (chitturs) - 10/20/98
  2323. //
  2324. dwLen = lstrlenW ( CsDatabaseRestorePath );
  2325. //
  2326. // It is safer to use dynamic memory allocation for user-supplied
  2327. // path since we don't want to put restrictions on the user
  2328. // on the length of the path that can be supplied. However, as
  2329. // far as our own quorum disk path is concerned, it is system-dependent
  2330. // and static memory allocation for that would suffice.
  2331. //
  2332. szSourcePathName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2333. ( dwLen + 25 ) *
  2334. sizeof ( WCHAR ) );
  2335. if ( szSourcePathName == NULL )
  2336. {
  2337. status = GetLastError();
  2338. ClRtlLogPrint(LOG_NOISE,
  2339. "[DM] DmpRestoreClusterDatabase: Error %1!d! in allocating memory for %2!ws! !!!\n",
  2340. status,
  2341. CsDatabaseRestorePath);
  2342. CL_LOGFAILURE( status );
  2343. goto FnExit;
  2344. }
  2345. lstrcpyW ( szSourcePathName, CsDatabaseRestorePath );
  2346. //
  2347. // If the client-supplied path is not already terminated with '\',
  2348. // then add it.
  2349. //
  2350. if ( szSourcePathName [dwLen-1] != L'\\' )
  2351. {
  2352. szSourcePathName [dwLen++] = L'\\';
  2353. szSourcePathName[dwLen] = L'\0';
  2354. }
  2355. lstrcatW ( szSourcePathName, L"CLUSBACKUP.DAT" );
  2356. //
  2357. // Try to find the CLUSBACKUP.DAT file in the directory
  2358. //
  2359. hFindFile = QfsFindFirstFile( szSourcePathName, &FindData );
  2360. //
  2361. // Reuse the source path name variable
  2362. //
  2363. szSourcePathName[dwLen] = L'\0';
  2364. if ( !QfsIsHandleValid(hFindFile) )
  2365. {
  2366. status = GetLastError();
  2367. if ( status != ERROR_FILE_NOT_FOUND )
  2368. {
  2369. ClRtlLogPrint(LOG_NOISE,
  2370. "[DM] DmpRestoreClusterDatabase: Path %1!ws! unavailable, Error = %2!d! !!!\n",
  2371. szSourcePathName,
  2372. status);
  2373. } else
  2374. {
  2375. status = ERROR_DATABASE_BACKUP_CORRUPT;
  2376. ClRtlLogPrint(LOG_NOISE,
  2377. "[DM] DmpRestoreClusterDatabase: Backup procedure not fully successful, can't restore DB, Error = %1!d! !!!\n",
  2378. status);
  2379. }
  2380. CL_LOGFAILURE( status );
  2381. goto FnExit;
  2382. }
  2383. QfsFindClose ( hFindFile );
  2384. szSourcePathName[dwLen++] = L'*';
  2385. szSourcePathName[dwLen] = L'\0';
  2386. //
  2387. // Try to find any file in the directory
  2388. //
  2389. hFindFile = QfsFindFirstFile( szSourcePathName, &FindData );
  2390. //
  2391. // Reuse the source path name variable
  2392. //
  2393. szSourcePathName[dwLen-1] = L'\0';
  2394. if ( !QfsIsHandleValid(hFindFile) )
  2395. {
  2396. status = GetLastError();
  2397. ClRtlLogPrint(LOG_NOISE,
  2398. "[DM] DmpRestoreClusterDatabase: Error %2!d! in trying to find file in path %1!ws!\r\n",
  2399. szSourcePathName,
  2400. status);
  2401. CL_LOGFAILURE( status );
  2402. goto FnExit;
  2403. }
  2404. szSourceFileName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2405. ( lstrlenW ( szSourcePathName ) + 1 + LOG_MAX_FILENAME_LENGTH ) *
  2406. sizeof ( WCHAR ) );
  2407. if ( szSourceFileName == NULL )
  2408. {
  2409. status = GetLastError();
  2410. ClRtlLogPrint(LOG_NOISE,
  2411. "[DM] DmpRestoreClusterDatabase: Error %1!d! in allocating memory for source file name !!!\n",
  2412. status);
  2413. CL_LOGFAILURE( status );
  2414. goto FnExit;
  2415. }
  2416. status = ERROR_SUCCESS;
  2417. //
  2418. // Now, find and copy all relevant files from the backup area
  2419. // to the quorum disk. Note that only one of the copied chk*.tmp
  2420. // files will be used as the valid checkpoint. However, we copy
  2421. // all chk*.tmp files to make this implementation simple and
  2422. // straightforward to comprehend.
  2423. //
  2424. while ( status == ERROR_SUCCESS )
  2425. {
  2426. if ( FindData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )
  2427. {
  2428. if ( FindData.cFileName[0] == L'.' )
  2429. {
  2430. if ( FindData.cFileName[1] == L'\0' ||
  2431. FindData.cFileName[1] == L'.' && FindData.cFileName[2] == L'\0' )
  2432. {
  2433. goto skip;
  2434. }
  2435. }
  2436. //
  2437. // Since the found file is infact a directory, check
  2438. // whether it is one of the resource checkpoint directories.
  2439. // If so copy the relevant checkpoint files to the quorum
  2440. // disk.
  2441. //
  2442. if ( ( status = CpRestoreCheckpointFiles( szSourcePathName,
  2443. FindData.cFileName,
  2444. lpszQuoLogPathName ) )
  2445. != ERROR_SUCCESS )
  2446. {
  2447. ClRtlLogPrint(LOG_NOISE,
  2448. "[DM] DmpRestoreClusterDatabase: Error %1!d! in copying resource cp files !!!\n",
  2449. status);
  2450. CL_LOGFAILURE( status );
  2451. goto FnExit;
  2452. }
  2453. } else
  2454. {
  2455. lstrcpyW ( szTempFileName, FindData.cFileName );
  2456. szTempFileName[3] = L'\0';
  2457. mbstowcs( szChkptFileNameStart, "chk", 4 );
  2458. if ( ( lstrcmpW ( szTempFileName, szChkptFileNameStart ) == 0 )
  2459. ||
  2460. ( lstrcmpW ( FindData.cFileName, cszQuoFileName ) == 0 ) )
  2461. {
  2462. lstrcpyW( szSourceFileName, szSourcePathName );
  2463. lstrcatW( szSourceFileName, FindData.cFileName );
  2464. lstrcpyW( szDestFileName, lpszQuoLogPathName );
  2465. lstrcatW( szDestFileName, FindData.cFileName );
  2466. status = QfsCopyFile( szSourceFileName, szDestFileName, FALSE );
  2467. if ( !status )
  2468. {
  2469. status = GetLastError();
  2470. ClRtlLogPrint(LOG_UNUSUAL,
  2471. "[DM] DmpRestoreClusterDatabase: Unable to copy file %1!ws! to %2!ws!, Error = %3!d!\n",
  2472. szSourceFileName,
  2473. szDestFileName,
  2474. status);
  2475. CL_LOGFAILURE( status );
  2476. goto FnExit;
  2477. }
  2478. //
  2479. // Set the file attribute to normal. There is no reason
  2480. // to fail in this step since the quorum disk is ours
  2481. // and we succeeded in copying the file.
  2482. //
  2483. if ( !QfsSetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2484. {
  2485. status = GetLastError();
  2486. ClRtlLogPrint(LOG_UNUSUAL,
  2487. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, error = %2!u!\n",
  2488. szDestFileName,
  2489. status);
  2490. CL_LOGFAILURE( status );
  2491. goto FnExit;
  2492. }
  2493. }
  2494. }
  2495. skip:
  2496. if ( QfsFindNextFile( hFindFile, &FindData ) )
  2497. {
  2498. status = ERROR_SUCCESS;
  2499. } else
  2500. {
  2501. status = GetLastError();
  2502. }
  2503. }
  2504. if ( status == ERROR_NO_MORE_FILES )
  2505. {
  2506. status = ERROR_SUCCESS;
  2507. } else
  2508. {
  2509. ClRtlLogPrint(LOG_UNUSUAL,
  2510. "[DM] DmpRestoreClusterDatabase: FindNextFile failed! Error = %1!u!\n",
  2511. status);
  2512. }
  2513. FnExit:
  2514. QfsFindCloseIfValid ( hFindFile );
  2515. LocalFree ( szSourceFileName );
  2516. LocalFree ( szSourcePathName );
  2517. return ( status );
  2518. }
  2519. /****
  2520. @func DWORD | DmpHandleNodeDownEvent | Handle the node down event
  2521. for DM.
  2522. @parm IN LPVOID | NotUsed | Unused parameter.
  2523. @rdesc Returns ERROR_SUCCESS.
  2524. @xref <f DmpEventHandler>
  2525. ****/
  2526. DWORD DmpHandleNodeDownEvent(
  2527. IN LPVOID NotUsed )
  2528. {
  2529. //
  2530. // Chittur Subbaraman (chitturs) - 7/23/99
  2531. //
  2532. // This function handles the DM node down processing as a separate
  2533. // thread. The reasons for creating this thread are outlined in
  2534. // DmpEventHandler.
  2535. //
  2536. ClRtlLogPrint(LOG_NOISE,
  2537. "[DM] DmpHandleNodeDownEvent - Entry...\r\n");
  2538. //
  2539. // SS: I am not the owner of the quorum resource as yet, but I might
  2540. // be after rearbitration, in that case, just set a flag saying we
  2541. // need to checkpoint. It will be looked at when the quorum resource
  2542. // comes online. The following function in FM checks if the
  2543. // quorum is online on this node and if it is, it calls
  2544. // the checkpoint callback function. If it is not, it sets the
  2545. // global boolean variable passed to TRUE.
  2546. //
  2547. FmCheckQuorumState( DmpLogCheckPointCb, &gbNeedToCheckPoint );
  2548. ClRtlLogPrint(LOG_NOISE,
  2549. "[DM] DmpHandleNodeDownEvent - Exit...\r\n");
  2550. return( ERROR_SUCCESS );
  2551. }