Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3030 lines
102 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. dmlog.c
  5. Abstract:
  6. Contains the quorum logging related functions for
  7. the cluster registry.
  8. Author:
  9. Sunita Shrivastava (sunitas) 24-Apr-1996
  10. Revision History:
  11. --*/
  12. #include "dmp.h"
  13. #include "tchar.h"
  14. #include "clusudef.h"
  15. /****
  16. @doc EXTERNAL INTERFACES CLUSSVC DM
  17. ****/
  18. //global static data
  19. HLOG ghQuoLog=NULL; //pointer to the quorum log
  20. DWORD gbIsQuoResOnline = FALSE;
  21. DWORD gbNeedToCheckPoint = FALSE;
  22. DWORD gbIsQuoResEnoughSpace = TRUE;
  23. HLOG ghNewQuoLog = NULL; //pointer to the new quorum resource
  24. //global data
  25. extern HANDLE ghQuoLogOpenEvent;
  26. extern BOOL gbIsQuoLoggingOn;
  27. extern HANDLE ghDiskManTimer;
  28. extern HANDLE ghCheckpointTimer;
  29. extern PFM_RESOURCE gpQuoResource; //set when DmFormNewCluster is complete
  30. extern BOOL gbDmInited;
  31. #if NO_SHARED_LOCKS
  32. extern CRITICAL_SECTION gLockDmpRoot;
  33. #else
  34. extern RTL_RESOURCE gLockDmpRoot;
  35. #endif
  36. //forward definitions
  37. void DmpLogCheckPointCb();
  38. /****
  39. @func DWORD | DmPrepareQuorumResChange| When the quorum resource is changed,
  40. the FM invokes this api on the owner node of the new quorum resource
  41. to create a new quorum log file.
  42. @parm IN PVOID | pResource | The new quorum resource.
  43. @parm IN LPCWSTR | lpszPath | The path for temporary cluster files.
  44. @parm IN DWORD | dwMaxQuoLogSize | The maximum size limit for the quorum log file.
  45. @comm When a quorum resource is changed, the fm calls this funtion before it
  46. updates the quorum resource. If a new log file needs to be created,
  47. a checkpoint is taken.
  48. @rdesc Returns a result code. ERROR_SUCCESS on success.
  49. @xref <f DmSwitchToNewQuorumLog>
  50. ****/
  51. DWORD DmPrepareQuorumResChange(
  52. IN PVOID pResource,
  53. IN LPCWSTR lpszPath,
  54. IN DWORD dwMaxQuoLogSize)
  55. {
  56. DWORD dwError=ERROR_SUCCESS;
  57. PFM_RESOURCE pNewQuoRes;
  58. WCHAR szFileName1[MAX_PATH]; //for new quorum log,for tombstonefile
  59. LSN FirstLsn;
  60. WCHAR szFileName2[MAX_PATH]; //for old quorum log, for temp tombstone
  61. DWORD dwCurLogSize;
  62. DWORD dwMaxLogSize;
  63. DWORD dwChkPtSequence;
  64. WIN32_FIND_DATA FindData;
  65. HANDLE hSrchTmpFiles;
  66. HANDLE hDirectory;
  67. pNewQuoRes = (PFM_RESOURCE)pResource;
  68. ClRtlLogPrint(LOG_NOISE,
  69. "[DM] DmPrepareQuorumResChange - Entry\r\n");
  70. //the resource is already online at this point
  71. //if the directory doesnt exist create it
  72. dwError = ClRtlCreateDirectory(lpszPath);
  73. if (dwError != ERROR_SUCCESS)
  74. {
  75. ClRtlLogPrint(LOG_NOISE,
  76. "[DM] DmPrepareQuorumResChange - Failed to create directory, Status=%1!u!\r\n",
  77. dwError);
  78. goto FnExit;
  79. }
  80. lstrcpyW(szFileName1, lpszPath);
  81. lstrcatW(szFileName1, cszQuoFileName);
  82. //if the log file is open here
  83. //this implies that the new quorum resource is the on the same node
  84. //as the old one
  85. if (ghQuoLog)
  86. {
  87. LogGetInfo(ghQuoLog, szFileName2, &dwCurLogSize, &dwMaxLogSize);
  88. //if the file is the same as the new log file, simply set the size
  89. if (!lstrcmpiW(szFileName2, szFileName1))
  90. {
  91. LogSetInfo(ghQuoLog, dwMaxQuoLogSize);
  92. ghNewQuoLog = ghQuoLog;
  93. goto CopyCpFiles;
  94. }
  95. }
  96. //delele all the quorum logging related files
  97. //delete the log if it exits
  98. DeleteFile(szFileName1);
  99. //delete all checkpoint files
  100. lstrcpyW(szFileName2, lpszPath);
  101. lstrcatW(szFileName2, L"*.tmp");
  102. hSrchTmpFiles = FindFirstFileW(szFileName2, & FindData);
  103. if (hSrchTmpFiles != INVALID_HANDLE_VALUE)
  104. {
  105. lstrcpyW(szFileName2, lpszPath);
  106. lstrcatW(szFileName2, FindData.cFileName);
  107. DeleteFile(szFileName2);
  108. while (FindNextFile( hSrchTmpFiles, & FindData))
  109. {
  110. lstrcpyW(szFileName2, lpszPath);
  111. lstrcatW(szFileName2, FindData.cFileName);
  112. DeleteFile(szFileName2);
  113. }
  114. FindClose(hSrchTmpFiles);
  115. }
  116. //set the security attributes for the file
  117. hDirectory = CreateFile(lpszPath,
  118. GENERIC_READ|WRITE_DAC|READ_CONTROL,
  119. 0,
  120. NULL,
  121. OPEN_ALWAYS,
  122. FILE_FLAG_BACKUP_SEMANTICS,
  123. NULL);
  124. if (hDirectory == INVALID_HANDLE_VALUE)
  125. {
  126. dwError = GetLastError();
  127. ClRtlLogPrint(LOG_NOISE,
  128. "[DM] DmPrepareQuorumResChange - Failed to create file, Status=%1!u!\r\n",
  129. dwError);
  130. goto FnExit;
  131. }
  132. dwError = ClRtlSetObjSecurityInfo(hDirectory, SE_FILE_OBJECT,
  133. GENERIC_ALL, GENERIC_ALL, GENERIC_READ);
  134. CloseHandle(hDirectory);
  135. if (dwError != ERROR_SUCCESS)
  136. {
  137. ClRtlLogPrint(LOG_NOISE,
  138. "[DM] DmPrepareQuorumResChange - ClRtlSetObjSecurityInfo Failed, Status=%1!u!\r\n",
  139. dwError);
  140. goto FnExit;
  141. }
  142. //open the new log file
  143. ClRtlLogPrint(LOG_NOISE,
  144. "[DM] DmPrepareQuorumResChange: the name of the quorum file is %1!ls!\r\n",
  145. szFileName1);
  146. //open the log file
  147. ghNewQuoLog = LogCreate(szFileName1, dwMaxQuoLogSize,
  148. (PLOG_GETCHECKPOINT_CALLBACK)DmpGetSnapShotCb, NULL,
  149. TRUE, &FirstLsn);
  150. if (!ghNewQuoLog)
  151. {
  152. dwError = GetLastError();
  153. ClRtlLogPrint(LOG_UNUSUAL,
  154. "[DM] DmPrepareQuorumResChange: Quorum log could not be opened, error = %1!u!\r\n",
  155. dwError);
  156. CsLogEventData1( LOG_CRITICAL,
  157. CS_DISKWRITE_FAILURE,
  158. sizeof(dwError),
  159. &dwError,
  160. szFileName1 );
  161. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  162. }
  163. //create a checkpoint in the new place
  164. dwError = DmpGetSnapShotCb(lpszPath, NULL, szFileName1, &dwChkPtSequence);
  165. if (dwError != ERROR_SUCCESS)
  166. {
  167. CL_LOGFAILURE(dwError);
  168. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  169. goto FnExit;
  170. }
  171. dwError = LogCheckPoint(ghNewQuoLog, TRUE, szFileName1, dwChkPtSequence);
  172. if (dwError != ERROR_SUCCESS)
  173. {
  174. ClRtlLogPrint(LOG_NOISE,
  175. "[DM] DmPrepareQuorumResChange - failed to take chkpoint, error = %1!u!\r\n",
  176. dwError);
  177. goto FnExit;
  178. }
  179. ClRtlLogPrint(LOG_NOISE,
  180. "[DM] DmPrepareQuorumResChange - checkpoint taken\r\n");
  181. CopyCpFiles:
  182. //
  183. // Call the checkpoint manager to copy over any checkpoint files
  184. //
  185. dwError = CpCopyCheckpointFiles(lpszPath, FALSE);
  186. if (dwError != ERROR_SUCCESS)
  187. {
  188. goto FnExit;
  189. }
  190. //create the tombstone and tmp file names
  191. lstrcpyW(szFileName1, lpszPath);
  192. lstrcatW(szFileName1, cszQuoTombStoneFile);
  193. lstrcpyW(szFileName2, lpszPath);
  194. lstrcatW(szFileName2, cszTmpQuoTombStoneFile);
  195. //rename the quorum tomstone file,it if it exists
  196. if (!MoveFileExW(szFileName1, szFileName2,
  197. MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
  198. {
  199. //this may fail if the tombstone doesnt exist, ignore error
  200. ClRtlLogPrint(LOG_UNUSUAL,
  201. "[DM] DmPrepareQuorumResChange:tombstone doesnt exist,movefilexW failed, error=0x%1!08lx!\r\n",
  202. GetLastError());
  203. }
  204. FnExit:
  205. if (dwError != ERROR_SUCCESS)
  206. {
  207. //if not sucess, clean up the new file
  208. if (ghNewQuoLog)
  209. {
  210. LogClose(ghNewQuoLog);
  211. ghNewQuoLog = NULL;
  212. }
  213. ClRtlLogPrint(LOG_NOISE,
  214. "[DM] DmPrepareQuorumResChange - Exit, error=0x%1!08lx!\r\n",
  215. dwError);
  216. } else {
  217. ClRtlLogPrint(LOG_NOISE,
  218. "[DM] DmPrepareQuorumResChange - Exit, status=0x%1!08lx!\r\n",
  219. dwError);
  220. }
  221. return(dwError);
  222. } // DmPrepareQuorumResChange
  223. /****
  224. @func void | DmDwitchToNewQuorumLog| This is called to switch to a new
  225. quorum log when the quorum resource is changed.
  226. @comm When a quorum resource is successfully changed, this function is
  227. to switch quorum logs. The synchronous notifications for the old resource
  228. are unhooked and those for the new resource file are hooked.
  229. @rdesc Returns a result code. ERROR_SUCCESS on success.
  230. @xref <f DmSwitchToNewQuorumLog>
  231. ****/
  232. void DmSwitchToNewQuorumLog(
  233. IN LPCWSTR lpszQuoLogPath)
  234. {
  235. WCHAR szTmpQuoTombStone[MAX_PATH];
  236. DWORD dwError = ERROR_SUCCESS;
  237. ClRtlLogPrint(LOG_NOISE,
  238. "[DM] DmSwitchQuorumLogs - Entry\r\n");
  239. //unhook notifications with the old quorum resource
  240. DmpUnhookQuorumNotify();
  241. //ask the dm to register with the new quorum resource
  242. DmpHookQuorumNotify();
  243. //if the new log file exists... this is the owner of the new quorum resource.
  244. //the new log file may be the same as the old one
  245. if (ghNewQuoLog)
  246. {
  247. if (ghQuoLog && (ghQuoLog != ghNewQuoLog))
  248. {
  249. LogClose(ghQuoLog);
  250. //take another checkpoint to the new quorum file,
  251. //so that the last few updates make into it
  252. if ((dwError = LogCheckPoint(ghNewQuoLog, TRUE, NULL, 0))
  253. != ERROR_SUCCESS)
  254. {
  255. ClRtlLogPrint(LOG_CRITICAL,
  256. "[DM] DmSwitchQuorumLogs - Failed to take a checkpoint\r\n");
  257. CL_UNEXPECTED_ERROR(dwError);
  258. }
  259. ClRtlLogPrint(LOG_NOISE,
  260. "[DM] DmSwitchQuorumLogs - taken checkpoint\r\n");
  261. ghQuoLog = NULL;
  262. }
  263. ghQuoLog = ghNewQuoLog;
  264. ghNewQuoLog = NULL;
  265. // if the old tombstome was replace by a tmp file at the beginning
  266. //of change quorum resource delete it now
  267. //get the tmp file for the new quorum resource
  268. lstrcpyW(szTmpQuoTombStone, lpszQuoLogPath);
  269. lstrcatW(szTmpQuoTombStone, cszTmpQuoTombStoneFile);
  270. DeleteFile(szTmpQuoTombStone);
  271. }
  272. else
  273. {
  274. //if the old log file is open, owner of the old quorum resource
  275. if (ghQuoLog)
  276. {
  277. LogClose(ghQuoLog);
  278. ghQuoLog = NULL;
  279. }
  280. }
  281. if (FmDoesQuorumAllowLogging() != ERROR_SUCCESS)
  282. {
  283. //this is not enough to ensure the dm logging will cease
  284. //the ghQuoLog parameter must be NULL
  285. CsNoQuorumLogging = TRUE;
  286. if (ghQuoLog)
  287. {
  288. LogClose(ghQuoLog);
  289. ghQuoLog = NULL;
  290. }
  291. } else if ( !CsUserTurnedOffQuorumLogging )
  292. {
  293. //
  294. // If the user did not turn off quorum logging explicitly, then turn it back on since
  295. // the new quorum resource is not local quorum.
  296. //
  297. CsNoQuorumLogging = FALSE;
  298. }
  299. ClRtlLogPrint(LOG_NOISE,
  300. "[DM] DmSwitchQuorumLogs - Exit!\r\n");
  301. return;
  302. }
  303. /****
  304. @func DWORD | DmReinstallTombStone| If the change to a new quorum
  305. resource fails, the new log is closed and the tombstone is
  306. reinstalled.
  307. @parm IN LPCWSTR | lpszQuoLogPath | The path for maintenance cluster files.
  308. @comm The old quorum log file is deleted and a tomstone file is created in its
  309. place. If this tombstone file is detected in the quorum path, the node
  310. is not allowed to do a form. It must do a join to find about the new
  311. quorum resource from the node that knows about the most recent quorum
  312. resource.
  313. @rdesc Returns a result code. ERROR_SUCCESS on success.
  314. @xref <f DmSwitchToNewQuorumLog>
  315. ****/
  316. DWORD DmReinstallTombStone(
  317. IN LPCWSTR lpszQuoLogPath
  318. )
  319. {
  320. DWORD dwError=ERROR_SUCCESS;
  321. WCHAR szQuoTombStone[MAX_PATH];
  322. WCHAR szTmpQuoTombStone[MAX_PATH];
  323. ClRtlLogPrint(LOG_NOISE,
  324. "[DM] DmReinstallTombStone - Entry\r\n");
  325. if (ghNewQuoLog)
  326. {
  327. //get the tmp file for the new quorum resource
  328. lstrcpyW(szTmpQuoTombStone, lpszQuoLogPath);
  329. lstrcatW(szTmpQuoTombStone, cszTmpQuoTombStoneFile);
  330. //create the tombstone file or replace the previous one with a new one
  331. lstrcpyW(szQuoTombStone, lpszQuoLogPath);
  332. lstrcatW(szQuoTombStone, cszQuoTombStoneFile);
  333. //restore the tombstone
  334. if (!MoveFileExW(szTmpQuoTombStone, szQuoTombStone,
  335. MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
  336. {
  337. //this may fail if the tombstone doesnt exist, ignore error
  338. ClRtlLogPrint(LOG_UNUSUAL,
  339. "[DM] DmReinstallTombStone :Warning-MoveFileExW failed, error=0x%1!08lx!\r\n",
  340. GetLastError());
  341. }
  342. // if this is not the same as the old log file, close it
  343. if (ghNewQuoLog != ghQuoLog)
  344. {
  345. LogClose(ghNewQuoLog);
  346. }
  347. ghNewQuoLog = NULL;
  348. }
  349. return(dwError);
  350. }
  351. /****
  352. @func DWORD | DmCompleteQuorumResChange| This is called on the quorum resource
  353. if the old quorum log file is not the same as the new one.
  354. @parm IN PVOID | pOldQuoRes | The new quorum resource.
  355. @parm IN LPCWSTR | lpszPath | The path for temporary cluster files.
  356. @parm IN DWORD | dwMaxQuoLogSize | The maximum size limit for the quorum log file.
  357. @comm The old quorum log file is deleted and a tomstone file is created in its
  358. place. If this tombstone file is detected in the quorum path, the node
  359. is not allowed to do a form. It must do a join to find about the new
  360. quorum resource from the node that knows about the most recent quorum
  361. resource.
  362. @rdesc Returns a result code. ERROR_SUCCESS on success.
  363. @xref <f DmSwitchToNewQuorumLog>
  364. ****/
  365. DWORD DmCompleteQuorumResChange(
  366. IN LPCWSTR lpszOldQuoResId,
  367. IN LPCWSTR lpszOldQuoLogPath
  368. )
  369. {
  370. DWORD dwError=ERROR_SUCCESS;
  371. WCHAR szOldQuoFileName[MAX_PATH];
  372. HANDLE hTombStoneFile;
  373. WCHAR szQuorumTombStone[MAX_PATH];
  374. PQUO_TOMBSTONE pTombStone = NULL;
  375. DWORD dwBytesWritten;
  376. WIN32_FIND_DATA FindData;
  377. HANDLE hSrchTmpFiles;
  378. ClRtlLogPrint(LOG_NOISE,
  379. "[DM] DmCompleteQuorumResChange - Entry\r\n");
  380. //the old log file name
  381. lstrcpyW(szOldQuoFileName, lpszOldQuoLogPath);
  382. lstrcatW(szOldQuoFileName, cszQuoFileName);
  383. //create the tombstone file or replace the previous one with a new one
  384. lstrcpyW(szQuorumTombStone, lpszOldQuoLogPath);
  385. lstrcatW(szQuorumTombStone, cszQuoTombStoneFile);
  386. pTombStone = LocalAlloc(LMEM_FIXED, sizeof(QUO_TOMBSTONE));
  387. if (!pTombStone)
  388. {
  389. CL_LOGFAILURE(ERROR_NOT_ENOUGH_MEMORY);
  390. CsLogEvent(LOG_UNUSUAL, DM_TOMBSTONECREATE_FAILED);
  391. goto DelOldFiles;
  392. }
  393. hTombStoneFile = CreateFileW(szQuorumTombStone,
  394. GENERIC_READ | GENERIC_WRITE,
  395. FILE_SHARE_READ|FILE_SHARE_WRITE,
  396. NULL,
  397. CREATE_ALWAYS,
  398. 0,
  399. NULL);
  400. if (hTombStoneFile == INVALID_HANDLE_VALUE)
  401. {
  402. //dont return failure
  403. CL_LOGFAILURE(GetLastError());
  404. CsLogEvent(LOG_UNUSUAL, DM_TOMBSTONECREATE_FAILED);
  405. goto DelOldFiles;
  406. }
  407. //write the old quorum path to it.
  408. lstrcpyn(pTombStone->szOldQuoResId, lpszOldQuoResId, MAXSIZE_RESOURCEID);
  409. lstrcpy(pTombStone->szOldQuoLogPath, lpszOldQuoLogPath);
  410. //write the tombstones
  411. if (! WriteFile(hTombStoneFile, pTombStone, sizeof(QUO_TOMBSTONE),
  412. &dwBytesWritten, NULL))
  413. {
  414. CL_LOGFAILURE(GetLastError());
  415. CsLogEvent(LOG_UNUSUAL, DM_TOMBSTONECREATE_FAILED);
  416. goto DelOldFiles;
  417. }
  418. CL_ASSERT(dwBytesWritten == sizeof(QUO_TOMBSTONE));
  419. ClRtlLogPrint(LOG_NOISE,
  420. "[DM] DmCompleteQuorumResChange: tombstones written\r\n");
  421. DelOldFiles:
  422. //
  423. //delete the old quorum files
  424. //
  425. if (!DeleteFile(szOldQuoFileName))
  426. CL_LOGFAILURE(GetLastError());
  427. //delele other tmp files in there
  428. lstrcpyW(szOldQuoFileName, lpszOldQuoLogPath);
  429. lstrcatW(szOldQuoFileName, L"*.tmp");
  430. hSrchTmpFiles = FindFirstFileW(szOldQuoFileName, & FindData);
  431. if (hSrchTmpFiles != INVALID_HANDLE_VALUE)
  432. {
  433. lstrcpyW(szQuorumTombStone, lpszOldQuoLogPath);
  434. lstrcatW(szQuorumTombStone, FindData.cFileName);
  435. DeleteFile(szQuorumTombStone);
  436. while (FindNextFile( hSrchTmpFiles, & FindData))
  437. {
  438. lstrcpyW(szQuorumTombStone, lpszOldQuoLogPath);
  439. lstrcatW(szQuorumTombStone, FindData.cFileName);
  440. DeleteFile(szQuorumTombStone);
  441. }
  442. FindClose(hSrchTmpFiles);
  443. }
  444. //
  445. // Clean up the old registry checkpoint files
  446. //
  447. CpCompleteQuorumChange(lpszOldQuoLogPath);
  448. if (hTombStoneFile != INVALID_HANDLE_VALUE)
  449. CloseHandle(hTombStoneFile);
  450. if (pTombStone) LocalFree(pTombStone);
  451. return(dwError);
  452. }
  453. /****
  454. @func DWORD | DmWriteToQuorumLog| When a transaction to the cluster database
  455. is completed successfully, this function is invoked.
  456. @parm DWORD | dwSequence | The sequnece number of the transaction.
  457. @parm PVOID | pData | A pointer to a record data.
  458. @parm DWORD | dwSize | The size of the record data in bytes.
  459. @rdesc Returns a result code. ERROR_SUCCESS on success.
  460. @xref
  461. ****/
  462. DWORD WINAPI DmWriteToQuorumLog(
  463. IN DWORD dwGumDispatch,
  464. IN DWORD dwSequence,
  465. IN DWORD dwType,
  466. IN PVOID pData,
  467. IN DWORD dwSize)
  468. {
  469. DWORD dwError=ERROR_SUCCESS;
  470. //dmupdate is coming before the DmUpdateJoinCluster is called.
  471. //at this point we are not the owner of quorum in any case
  472. if (!gpQuoResource)
  473. goto FnExit;
  474. ClRtlLogPrint(LOG_NOISE,
  475. "[DM] DmWriteToQuorumLog Entry Seq#=%1!u! Type=%2!u! Size=%3!u!\r\n",
  476. dwSequence, dwType, dwSize);
  477. //
  478. // Chittur Subbaraman (chitturs) - 6/3/99
  479. //
  480. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  481. // so as to maintain the ordering between this lock and the log lock.
  482. //
  483. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  484. //if I am the owner of the quorum logs, just write the record
  485. if (gbIsQuoLoggingOn && ghQuoLog && gbIsQuoResOnline && AMIOWNEROFQUORES(gpQuoResource))
  486. {
  487. if (dwGumDispatch == PRE_GUM_DISPATCH)
  488. {
  489. //make sure the logger has enough space to commit this else
  490. //refuse this GUM transaction
  491. dwError = LogCommitSize(ghQuoLog, RMRegistryMgr, dwSize);
  492. if (dwError != ERROR_SUCCESS)
  493. {
  494. if (dwError == ERROR_CLUSTERLOG_NOT_ENOUGH_SPACE)
  495. {
  496. //map error
  497. CL_LOGCLUSERROR(LM_DISKSPACE_LOW_WATERMARK);
  498. gbIsQuoResEnoughSpace = FALSE;
  499. }
  500. }
  501. else
  502. {
  503. if (!gbIsQuoResEnoughSpace) gbIsQuoResEnoughSpace = TRUE;
  504. }
  505. }
  506. else if (dwGumDispatch == POST_GUM_DISPATCH)
  507. {
  508. if (LogWrite(ghQuoLog, dwSequence, TTCompleteXsaction, RMRegistryMgr,
  509. dwType, pData, dwSize) == NULL_LSN)
  510. {
  511. dwError = GetLastError();
  512. ClRtlLogPrint(LOG_UNUSUAL,
  513. "[DM] DmWriteToQuorumLog failed, error=0x%1!08lx!\r\n",
  514. dwError);
  515. }
  516. }
  517. }
  518. RELEASE_LOCK(gLockDmpRoot);
  519. FnExit:
  520. return (dwError);
  521. }
  522. /****
  523. @func DWORD | DmpChkQuoTombStone| This checks the quorum logs to ensure
  524. that it is the most recent one before rolling in the changes.
  525. @rdesc Returns a result code. ERROR_SUCCESS on success.
  526. @comm This looks for the tombstone file and if one exists. It checks if this
  527. quorum file is marked as dead in there.
  528. @xref <f FmSetQuorumResource>
  529. ****/
  530. DWORD DmpChkQuoTombStone()
  531. {
  532. DWORD dwError=ERROR_SUCCESS;
  533. WCHAR szQuorumLogPath[MAX_PATH];
  534. WCHAR szQuorumTombStone[MAX_PATH];
  535. HANDLE hTombStoneFile = INVALID_HANDLE_VALUE;
  536. PQUO_TOMBSTONE pTombStone = NULL;
  537. DWORD dwBytesRead;
  538. ClRtlLogPrint(LOG_NOISE,
  539. "[DM] DmpChkQuoTombStone - Entry\r\n");
  540. dwError = DmGetQuorumLogPath(szQuorumLogPath, sizeof(szQuorumLogPath));
  541. if (dwError)
  542. {
  543. ClRtlLogPrint(LOG_UNUSUAL,
  544. "[DM] DmpChkQuoTombStone - DmGetQuorumLogPath failed,error=0x%1!08lx!\n",
  545. dwError);
  546. goto FnExit;
  547. }
  548. lstrcpyW(szQuorumTombStone, szQuorumLogPath);
  549. lstrcatW(szQuorumTombStone, L"\\quotomb.stn");
  550. pTombStone = LocalAlloc(LMEM_FIXED, sizeof(QUO_TOMBSTONE));
  551. if (!pTombStone)
  552. {
  553. dwError = ERROR_NOT_ENOUGH_MEMORY;
  554. goto FnExit;
  555. }
  556. hTombStoneFile = CreateFileW(szQuorumTombStone,
  557. GENERIC_READ | GENERIC_WRITE,
  558. FILE_SHARE_READ|FILE_SHARE_WRITE,
  559. NULL,
  560. OPEN_EXISTING,
  561. 0,
  562. NULL);
  563. if (hTombStoneFile == INVALID_HANDLE_VALUE)
  564. {
  565. //there is no tombstone file, not a problem-we can proceed with the form
  566. goto FnExit;
  567. }
  568. //found a tombstone file
  569. //read the file
  570. if (! ReadFile(hTombStoneFile, pTombStone, sizeof(QUO_TOMBSTONE),
  571. &dwBytesRead, NULL))
  572. {
  573. ClRtlLogPrint(LOG_UNUSUAL,
  574. "[DM] DmpChkQuoTombStone - Couldn't read the tombstone,error=0x%1!08lx!\n",
  575. dwError);
  576. //dont return an error, we can proceed with form??
  577. goto FnExit;
  578. }
  579. if (dwBytesRead != sizeof(QUO_TOMBSTONE))
  580. {
  581. ClRtlLogPrint(LOG_UNUSUAL,
  582. "[DM] DmpChkQuoTombStone - Couldn't read the entire tombstone\r\n");
  583. //dont return an error, we can proceed with form??
  584. goto FnExit;
  585. }
  586. if ((!lstrcmpW(OmObjectId(gpQuoResource), pTombStone->szOldQuoResId))
  587. && (!lstrcmpiW(szQuorumLogPath, pTombStone->szOldQuoLogPath)))
  588. {
  589. ClRtlLogPrint(LOG_UNUSUAL,
  590. "[DM] DmpChkQuoTombStone:A tombstone for this resource, and quorum log file was found here.\r\n");
  591. ClRtlLogPrint(LOG_UNUSUAL,
  592. "[DM] DmpChkQuoTombStone:This is node is only allowed to do a join, make sure another node forms\r\n");
  593. //log something into the eventlog
  594. CL_LOGCLUSERROR(SERVICE_MUST_JOIN);
  595. //we exit with succes because this is by design and we dont want
  596. //clusprxy to retry starting unnecessarily
  597. ExitProcess(dwError);
  598. goto FnExit;
  599. }
  600. else
  601. {
  602. ClRtlLogPrint(LOG_UNUSUAL,
  603. "[DM] DmpChkQuoTombStone: Bogus TombStone ??\r\n");
  604. #if DBG
  605. if (IsDebuggerPresent())
  606. DebugBreak();
  607. #endif
  608. goto FnExit;
  609. }
  610. FnExit:
  611. if (hTombStoneFile != INVALID_HANDLE_VALUE)
  612. CloseHandle(hTombStoneFile);
  613. if (pTombStone) LocalFree(pTombStone);
  614. ClRtlLogPrint(LOG_NOISE,
  615. "[DM] DmpChkQuoTombStone: Exit, returning 0x%1!08lx!\r\n",
  616. dwError);
  617. return(dwError);
  618. }
  619. /****
  620. @func DWORD | DmpApplyChanges| When dm is notified that the cluster form is
  621. occuring, it calls DmpApplyChanges to apply the quorum logs to the
  622. cluster database.
  623. @rdesc Returns a result code. ERROR_SUCCESS on success.
  624. @comm This opens the quorum file. Note that it doesnt close the quorum file.
  625. @xref
  626. ****/
  627. DWORD DmpApplyChanges()
  628. {
  629. LSN FirstLsn;
  630. DWORD dwErr = ERROR_SUCCESS;
  631. DWORD dwSequence;
  632. DM_LOGSCAN_CONTEXT DmAppliedChangeContext;
  633. if (ghQuoLog == NULL)
  634. {
  635. return(ERROR_QUORUMLOG_OPEN_FAILED);
  636. }
  637. //find the current sequence number from the registry
  638. dwSequence = DmpGetRegistrySequence();
  639. ClRtlLogPrint(LOG_NOISE,
  640. "[DM] DmpApplyChanges: The current registry sequence number %1!d!\r\n",
  641. dwSequence);
  642. // upload a database if the current sequence number is lower or equal to
  643. // the one in the database OR if the user is forcing a restore database
  644. // operation.
  645. // find the lsn of the record from which we need to start applying changes
  646. // if null there are no changes to apply
  647. dwErr = DmpLogFindStartLsn(ghQuoLog, &FirstLsn, &dwSequence);
  648. if (dwErr != ERROR_SUCCESS)
  649. {
  650. ClRtlLogPrint(LOG_NOISE,
  651. "[DM] DmpApplyChanges: DmpLogFindStartLsn failed, error=0x%1!08lx!\r\n",
  652. dwErr);
  653. goto FnExit;
  654. }
  655. //dwSequence now contains the current sequence number in the registry
  656. DmAppliedChangeContext.dwSequence = dwSequence;
  657. if (FirstLsn != NULL_LSN)
  658. {
  659. ClRtlLogPrint(LOG_NOISE,
  660. "[DM] DmpApplyChanges: The LSN of the record to apply changes from 0x%1!08lx!\r\n",
  661. FirstLsn);
  662. if (dwErr = LogScan(ghQuoLog, FirstLsn, TRUE,(PLOG_SCAN_CALLBACK)DmpLogApplyChangesCb,
  663. &DmAppliedChangeContext) != ERROR_SUCCESS)
  664. {
  665. ClRtlLogPrint(LOG_UNUSUAL,
  666. "[DM] DmpApplyChanges: LogScan failed, error=0x%1!08lx!\r\n",
  667. dwErr);
  668. }
  669. //if the more changes have been applied
  670. if (DmAppliedChangeContext.dwSequence != dwSequence)
  671. {
  672. //set the gum sequence number to the trid that has been applied
  673. GumSetCurrentSequence(GumUpdateRegistry, DmAppliedChangeContext.dwSequence);
  674. //update the registry with this sequence number
  675. DmpUpdateSequence();
  676. //set the gum sequence number to one higher for the next transaction
  677. GumSetCurrentSequence(GumUpdateRegistry,
  678. (DmAppliedChangeContext.dwSequence + 1));
  679. ClRtlLogPrint(LOG_NOISE,
  680. "[DM] DmpApplyChanges: Gum sequnce number set to = %1!d!\r\n",
  681. (DmAppliedChangeContext.dwSequence + 1));
  682. }
  683. }
  684. FnExit:
  685. ClRtlLogPrint(LOG_NOISE,
  686. "[DM] DmpApplyChanges: Exit, returning 0x%1!08lx!\r\n",
  687. dwErr);
  688. return(dwErr);
  689. }
  690. /****
  691. @func DWORD | DmpFindStartLsn| Uploads the last checkpoint from the
  692. quorum and returns the LSN of the record from which the changes
  693. should be applied.
  694. @parm IN HLOG | hQuoLog | the log file handle.
  695. @parm OUT LSN *| pStartScanLsn | Returns the LSN of the record in the
  696. quorum log from which changes must be applied is returned here.
  697. NULL_LSN is returned if no changes need to be applied.
  698. @parm IN OUT LPDWORD | *pdwSequence | Should be set to the current sequence
  699. number is the cluster registry. If a new chkpoint is uploaded, the
  700. sequence number corresponding to that is returned.
  701. @rdesc Returns ERROR_SUCCESS if a valid LSN is returned. This may be NULL_LSN.
  702. Returns the error code if the database cannot be uploaded from the last chkpoint
  703. or if something horrible happens.
  704. @comm This finds the last valid check point in the log file. The data
  705. base is synced with this checkpoint and the gum sequence number is
  706. set to one plus the sequence number of that checkpoint. If no
  707. checkpoint record is found, a checkpoint is taken and NULL_LSN is
  708. returned.
  709. @xref
  710. ****/
  711. DWORD DmpLogFindStartLsn(
  712. IN HLOG hQuoLog,
  713. OUT LSN *pStartScanLsn,
  714. IN OUT LPDWORD pdwSequence)
  715. {
  716. LSN ChkPtLsn;
  717. LSN StartScanLsn;
  718. DWORD dwChkPtSequence=0;
  719. DWORD dwError = ERROR_SUCCESS;
  720. WCHAR szChkPtFileName[LOG_MAX_FILENAME_LENGTH];
  721. DM_LOGSCAN_CONTEXT DmAppliedChangeContext;
  722. *pStartScanLsn = NULL_LSN;
  723. ChkPtLsn = NULL_LSN;
  724. //read the last check point record if any and the transaction id till that
  725. //checkpoint
  726. dwError = LogGetLastChkPoint(hQuoLog, szChkPtFileName, &dwChkPtSequence,
  727. &ChkPtLsn);
  728. if (dwError != ERROR_SUCCESS)
  729. {
  730. //no chk point record found
  731. ClRtlLogPrint(LOG_UNUSUAL,
  732. "[DM] DmpLogFindStartLsn: LogGetLastChkPoint failed, error=0x%1!08lx!\r\n",
  733. dwError );
  734. // this can happen either due to the fact that the log file was just created,
  735. // and hence there is no checkpoint or because log file was messed up
  736. // and the mount process corrected it but removed the checkpoint.
  737. // If it is the second case, then logpmountlog should put something in the
  738. // event log
  739. if (dwError == ERROR_CLUSTERLOG_CHKPOINT_NOT_FOUND)
  740. {
  741. //
  742. // Chittur Subbaraman (chitturs) - 6/3/99
  743. //
  744. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  745. // so as to maintain the ordering between this lock and the log lock.
  746. //
  747. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  748. //take a checkpoint, so that this doesnt happen the next time
  749. dwError = LogCheckPoint(hQuoLog, TRUE, NULL, 0);
  750. RELEASE_LOCK(gLockDmpRoot);
  751. if (dwError != ERROR_SUCCESS)
  752. {
  753. //check point could not be taken
  754. ClRtlLogPrint(LOG_UNUSUAL,
  755. "[DM] DmpLogFindStartLsn: Checkpoint on first form failed, error=0x%1!08lx!\r\n",
  756. dwError );
  757. goto FnExit;
  758. }
  759. }
  760. else
  761. {
  762. //there were other errors
  763. goto FnExit;
  764. }
  765. }
  766. else
  767. {
  768. //found check point record
  769. ClRtlLogPrint(LOG_NOISE,
  770. "[DM] DmpLogFindStartLsn: LogGetLastChkPt rets, Seq#=%1!d! ChkPtLsn=0x%2!08lx!\r\n",
  771. dwChkPtSequence, ChkPtLsn);
  772. //
  773. // Chittur Subbaraman (chitturs) - 10/18/98
  774. //
  775. // If the user is forcing a database restore from backup, then
  776. // do not check whether the current sequence number in the registry
  777. // is younger than the checkpoint sequence number in the quorum log.
  778. // Just, go ahead and load the checkpoint from restored database.
  779. //
  780. if ( CsDatabaseRestore == TRUE )
  781. {
  782. ClRtlLogPrint(LOG_NOISE,
  783. "[DM] DmpLogFindStartLsn: User forcing a chkpt upload from quorum log...\r\n");
  784. }
  785. else
  786. {
  787. //if the sequence number is greater than the check point sequence number
  788. //plus one, that implies..that only changes from that sequence number
  789. //need to be applied.(this node may not have been the first one to die)
  790. //We dont always apply the database because if logging is mostly off
  791. //and the two nodes die simultaneosly we want to prevent losing all the
  792. //changes
  793. //else if the checkpoint sequence is one below the current
  794. //current sequence number, then the locker node could have died after updating
  795. //get the current checkpoint irrespective of what the current sequence number is
  796. //this is because a checkpoint with the same sequence number may have
  797. //a change that is different from whats there in the current registry.
  798. //if node 'a'(locker and logger dies in the middle of logging trid=x+1,
  799. //the other node,'b' will take over logging and checkpoint the database
  800. //at trid=x. If 'a' comes back up, it needs to throw aways its x+1 change
  801. //and apply changes from the log from chk pt x.
  802. if (*pdwSequence > (dwChkPtSequence + 1))
  803. {
  804. //the current sequence number is less than or equal to chkpt Seq + 1
  805. ClRtlLogPrint(LOG_NOISE,
  806. "[DM] DmpLogFindStartLsn: ChkPt not applied, search for next seq\r\n");
  807. DmAppliedChangeContext.dwSequence = *pdwSequence;
  808. DmAppliedChangeContext.StartLsn = NULL_LSN;
  809. //find the LSN from which to apply changes
  810. if (dwError = LogScan(ghQuoLog, ChkPtLsn, TRUE,(PLOG_SCAN_CALLBACK)DmpLogFindStartLsnCb,
  811. &DmAppliedChangeContext) != ERROR_SUCCESS)
  812. {
  813. ClRtlLogPrint(LOG_UNUSUAL,
  814. "[DM] DmpLogFindStartLsn: LogScan failed, no changes will be applied, error=0x%1!08lx!\r\n",
  815. dwError);
  816. goto FnExit;
  817. }
  818. *pStartScanLsn = DmAppliedChangeContext.StartLsn;
  819. goto FnExit;
  820. }
  821. }
  822. //
  823. // The current registry sequence number is less than or equal
  824. // to chkpt Seq + 1 OR the user is forcing a database restore
  825. // from the backup area.
  826. //
  827. ClRtlLogPrint(LOG_NOISE,
  828. "[DM] DmpLogFindStartLsn: Uploading chkpt from quorum log\r\n");
  829. //make sure that no keys are added to the key list because of opens/creates
  830. ACQUIRE_EXCLUSIVE_LOCK(gLockDmpRoot);
  831. //hold the key lock as well
  832. EnterCriticalSection(&KeyLock);
  833. //invalidate all open keys
  834. DmpInvalidateKeys();
  835. if ((dwError = DmInstallDatabase(szChkPtFileName, NULL, FALSE)) != ERROR_SUCCESS)
  836. {
  837. //couldnt install the database
  838. //bad !
  839. ClRtlLogPrint(LOG_UNUSUAL,
  840. "[DM] DmpLogFindStartLsn: DmpInstallDatabase failed, error=0x%1!08lx!\r\n",
  841. dwError);
  842. CsLogEventData( LOG_CRITICAL,
  843. DM_CHKPOINT_UPLOADFAILED,
  844. sizeof(dwError),
  845. &dwError );
  846. DmpReopenKeys();
  847. //release the locks
  848. LeaveCriticalSection(&KeyLock);
  849. RELEASE_LOCK(gLockDmpRoot);
  850. goto FnExit;
  851. }
  852. else
  853. {
  854. //the current sequence number is less than or equal to chkpt Seq + 1
  855. ClRtlLogPrint(LOG_NOISE,
  856. "[DM] DmpLogFindStartLsn: chkpt uploaded from quorum log\r\n");
  857. //since we downloaded the database, we should start
  858. //aplying changes from ChkPtLsn
  859. *pStartScanLsn = ChkPtLsn;
  860. *pdwSequence = dwChkPtSequence;
  861. //set the gum sequence number to be the next one
  862. //ss: the next logged transaction shouldnt have the same
  863. //transaction id
  864. GumSetCurrentSequence(GumUpdateRegistry, (dwChkPtSequence+1));
  865. //reopen the keys
  866. DmpReopenKeys();
  867. //release the locks
  868. LeaveCriticalSection(&KeyLock);
  869. RELEASE_LOCK(gLockDmpRoot);
  870. goto FnExit;
  871. }
  872. }
  873. FnExit:
  874. ClRtlLogPrint(LOG_NOISE,
  875. "[DM] DmpLogFindStartLsn: LSN=0x%1!08lx!, returning 0x%2!08lx!\r\n",
  876. *pStartScanLsn, dwError);
  877. return(dwError);
  878. }
  879. /****
  880. @func DWORD | DmpLogFindStartLsnCb| The callback tries to find the first record
  881. with a transaction id that is larger than the sequence number of the
  882. local database.
  883. @parm PVOID | pContext| A pointer to a DM_STARTLSN_CONTEXT structure.
  884. @parm LSN | Lsn| The LSN of the record.
  885. @parm RMID | Resource | The resource manager for this transaction.
  886. @parm RMID | ResourceType | The resource manager for this transaction.
  887. @parm TRID | Transaction | The transaction number of this record.
  888. @parm PVOID | pLogData | The log data for this record.
  889. @parm DWORD | DataLength | The length of the record.
  890. @rdesc Returns TRUE to continue scan. FALSE to stop.
  891. @comm This function returns true if the sequence number of the record
  892. being scanned is higher than the seqence number passed in the context.
  893. @xref <f DmpLogFindStartLsn> <f LogScan>
  894. ****/
  895. BOOL WINAPI DmpLogFindStartLsnCb(
  896. IN PVOID pContext,
  897. IN LSN Lsn,
  898. IN RMID Resource,
  899. IN RMTYPE ResourceFlags,
  900. IN TRID Transaction,
  901. IN TRTYPE TrType,
  902. IN const PVOID pLogData,
  903. IN DWORD DataLength)
  904. {
  905. PDM_LOGSCAN_CONTEXT pDmStartLsnContext= (PDM_LOGSCAN_CONTEXT) pContext;
  906. CL_ASSERT(pDmStartLsnContext);
  907. if (Transaction > (int)pDmStartLsnContext->dwSequence)
  908. {
  909. pDmStartLsnContext->StartLsn = Lsn;
  910. return (FALSE);
  911. }
  912. return(TRUE);
  913. }
  914. /****
  915. @func DWORD | DmpHookQuorumNotify| This hooks a callback to be invoked whenever
  916. the state of the quorum resource changes.
  917. @rdesc Returns a result code. ERROR_SUCCESS on success.
  918. @comm This is used to monitor the state of
  919. @xref
  920. ****/
  921. DWORD DmpHookQuorumNotify()
  922. {
  923. DWORD dwError = ERROR_SUCCESS;
  924. if (dwError = FmFindQuorumResource(&gpQuoResource))
  925. {
  926. ClRtlLogPrint(LOG_UNUSUAL,
  927. "[DM] DmUpdateFormNewCluster: FmFindQuorumResource failed, error=0x%1!08lx!\r\n",
  928. dwError);
  929. goto FnExit;
  930. }
  931. dwError = OmRegisterNotify(gpQuoResource, NULL,
  932. NOTIFY_RESOURCE_POSTONLINE| NOTIFY_RESOURCE_PREOFFLINE |
  933. NOTIFY_RESOURCE_OFFLINEPENDING | NOTIFY_RESOURCE_POSTOFFLINE |
  934. NOTIFY_RESOURCE_FAILED,
  935. DmpQuoObjNotifyCb);
  936. FnExit:
  937. return(dwError);
  938. }
  939. /****
  940. @func DWORD | DmpUnhookQuorumNotify| This unhooks the callback function
  941. that is registered with the object.
  942. @parm PVOID | pContext| A pointer to a DMLOGRECORD structure.
  943. @parm PVOID | pObject| A pointer to quorum resource object.
  944. @parm DWORD | dwNotification| A pointer to a DMLOGRECORD structure.
  945. @rdesc Returns a result code. ERROR_SUCCESS on success.
  946. @xref
  947. ****/
  948. DWORD DmpUnhookQuorumNotify()
  949. {
  950. DWORD dwError = ERROR_SUCCESS;
  951. if (gpQuoResource)
  952. {
  953. dwError = OmDeregisterNotify(gpQuoResource, DmpQuoObjNotifyCb);
  954. OmDereferenceObject(gpQuoResource);
  955. }
  956. return(ERROR_SUCCESS);
  957. }
  958. /****
  959. @func DWORD | DmpQuoObjNotifyCb| This is a callback that is called on
  960. change of state on quorum resource.
  961. @parm PVOID | pContext| A pointer to a DMLOGRECORD structure.
  962. @parm PVOID | pObject| A pointer to quorum resource object.
  963. @parm DWORD | dwNotification| A pointer to a DMLOGRECORD structure.
  964. @rdesc Returns a result code. ERROR_SUCCESS on success.
  965. @xref
  966. ****/
  967. void DmpQuoObjNotifyCb(
  968. IN PVOID pContext,
  969. IN PVOID pObject,
  970. IN DWORD dwNotification)
  971. {
  972. switch(dwNotification)
  973. {
  974. case NOTIFY_RESOURCE_POSTONLINE:
  975. gbIsQuoResOnline = TRUE;
  976. ClRtlLogPrint(LOG_NOISE,
  977. "[DM] DmpQuoObjNotifyCb: Quorum resource is online\r\n");
  978. //if this is the owner of the quorum resource
  979. //and the log is not open, open the log
  980. if (AMIOWNEROFQUORES(gpQuoResource) && !CsNoQuorumLogging)
  981. {
  982. //ToDo: the quorum file name should be obtained from the setup
  983. //for now obtain the value from the cluster registry.
  984. PTEB CurrentTeb;
  985. WCHAR szQuorumFileName[MAX_PATH];
  986. LSN FirstLsn;
  987. DWORD dwError;
  988. DWORD dwType;
  989. DWORD dwLength;
  990. DWORD dwMaxQuoLogSize;
  991. DWORD bForceReset = FALSE;
  992. DWORD OldHardErrorValue;
  993. //bug# :106647
  994. //SS: HACKHACK disabling hard error pop ups so that disk corruption
  995. //is caught somewhere else..
  996. //atleast the pop-ups must be disabled for the whole process !
  997. //me thinks this is covering up the problem of disk corruption
  998. //disk corruption should not occur!
  999. CurrentTeb = NtCurrentTeb();
  1000. OldHardErrorValue = CurrentTeb->HardErrorsAreDisabled;
  1001. CurrentTeb->HardErrorsAreDisabled = 1;
  1002. ClRtlLogPrint(LOG_NOISE,
  1003. "[DM] DmpQuoObjNotifyCb: Own quorum resource, try open the quorum log\r\n");
  1004. if (DmGetQuorumLogPath(szQuorumFileName, sizeof(szQuorumFileName)) != ERROR_SUCCESS)
  1005. {
  1006. ClRtlLogPrint(LOG_NOISE,
  1007. "[DM] DmpQuoObjNotifyCb: Quorum log file is not configured\r\n");
  1008. }
  1009. else
  1010. {
  1011. BOOL fSetSecurity = FALSE;
  1012. HANDLE hFindFile = INVALID_HANDLE_VALUE;
  1013. WIN32_FIND_DATA FindData;
  1014. hFindFile = FindFirstFile( szQuorumFileName, &FindData );
  1015. if ( hFindFile == INVALID_HANDLE_VALUE )
  1016. {
  1017. dwError = GetLastError();
  1018. ClRtlLogPrint(LOG_NOISE,
  1019. "[DM] DmpQuoObjNotifyCb: FindFirstFile on path %1!ws! failed, Error=%2!d! !!!\n",
  1020. szQuorumFileName,
  1021. dwError);
  1022. if ( dwError == ERROR_PATH_NOT_FOUND )
  1023. {
  1024. fSetSecurity = TRUE;
  1025. }
  1026. } else
  1027. {
  1028. FindClose( hFindFile );
  1029. }
  1030. //if the directory doesnt exist create it
  1031. dwError = ClRtlCreateDirectory(szQuorumFileName);
  1032. if (dwError != ERROR_SUCCESS)
  1033. {
  1034. ClRtlLogPrint(LOG_CRITICAL,
  1035. "[DM] DmpQuoObjNotifyCb: Failed to open quorum file: %1!ws!, error=0x%2!08lx!\r\n",
  1036. szQuorumFileName,
  1037. dwError);
  1038. CL_UNEXPECTED_ERROR(dwError);
  1039. CsInconsistencyHalt(dwError);
  1040. }
  1041. if ( fSetSecurity == TRUE )
  1042. {
  1043. HANDLE hFile;
  1044. ClRtlLogPrint(LOG_NOISE,
  1045. "[DM] DmpQuoObjNotifyCb: Attempting to set security on directory %1!ws!...\r\n",
  1046. szQuorumFileName);
  1047. //
  1048. // Open the newly created directory object with rights to modify DACL
  1049. // in the object's SD.
  1050. //
  1051. hFile = CreateFile( szQuorumFileName,
  1052. GENERIC_READ | WRITE_DAC | READ_CONTROL, // for setting DACL
  1053. 0,
  1054. NULL,
  1055. OPEN_EXISTING,
  1056. FILE_FLAG_BACKUP_SEMANTICS, // for directory open
  1057. NULL );
  1058. if ( hFile == INVALID_HANDLE_VALUE )
  1059. {
  1060. dwError = GetLastError();
  1061. ClRtlLogPrint(LOG_CRITICAL,
  1062. "[DM] DmpQuoObjNotifyCb: Failed to open directory %1!ws!, Status=%2!u!...\r\n",
  1063. szQuorumFileName,
  1064. dwError);
  1065. CL_LOGFAILURE( dwError );
  1066. CsInconsistencyHalt( dwError );
  1067. }
  1068. //
  1069. // Set DACL on the file handle object granting full rights only to admin
  1070. // and owner.
  1071. //
  1072. dwError = ClRtlSetObjSecurityInfo( hFile,
  1073. SE_FILE_OBJECT,
  1074. GENERIC_ALL, // for Admins
  1075. GENERIC_ALL, // for Owner
  1076. 0 ); // for Everyone
  1077. CloseHandle( hFile );
  1078. if ( dwError != ERROR_SUCCESS )
  1079. {
  1080. ClRtlLogPrint(LOG_CRITICAL,
  1081. "[DM] DmpQuoObjNotifyCb: ClRtlSetObjSecurityInfo failed for file %1!ws!, Status=%2!u!\r\n",
  1082. szQuorumFileName,
  1083. dwError);
  1084. CL_LOGFAILURE( dwError );
  1085. CsInconsistencyHalt( dwError );
  1086. }
  1087. }
  1088. DmGetQuorumLogMaxSize(&dwMaxQuoLogSize);
  1089. // If the resource monitor dies and comes back up, this can happen
  1090. if (ghQuoLog != NULL)
  1091. {
  1092. LogClose(ghQuoLog);
  1093. if (gbIsQuoLoggingOn) gbNeedToCheckPoint = TRUE;
  1094. }
  1095. //
  1096. // Chittur Subbaraman (chitturs) - 10/16/98
  1097. //
  1098. // Check whether you need to restore the database from a
  1099. // user-supplied backup directory to the quorum disk. This
  1100. // restore operation is done only once when the Dm has
  1101. // not been fully initialized. Note that this function
  1102. // is called whenever the state of the quorum resource
  1103. // changes but the restore operation is only done once.
  1104. //
  1105. if ( ( gbDmInited == FALSE ) &&
  1106. ( CsDatabaseRestore == TRUE ) )
  1107. {
  1108. ClRtlLogPrint(LOG_NOISE,
  1109. "[DM] DmpQuoObjNotifyCb: Beginning DB restoration from %1!ws!...\r\n",
  1110. CsDatabaseRestorePath);
  1111. if ( ( dwError = DmpRestoreClusterDatabase ( szQuorumFileName ) )
  1112. != ERROR_SUCCESS )
  1113. {
  1114. ClRtlLogPrint(LOG_UNUSUAL,
  1115. "[DM] DmpQuoObjNotifyCb: DB restore operation from %1!ws! failed! Error=0x%2!08lx!\r\n",
  1116. CsDatabaseRestorePath,
  1117. dwError);
  1118. CL_LOGFAILURE( dwError );
  1119. CsDatabaseRestore = FALSE;
  1120. CsInconsistencyHalt( dwError );
  1121. }
  1122. ClRtlLogPrint(LOG_NOISE,
  1123. "[DM] DmpQuoObjNotifyCb: DB restoration from %1!ws! successful...\r\n",
  1124. CsDatabaseRestorePath);
  1125. CL_LOGCLUSINFO( SERVICE_CLUSTER_DATABASE_RESTORE_SUCCESSFUL );
  1126. }
  1127. lstrcat(szQuorumFileName, cszQuoFileName);
  1128. ClRtlLogPrint(LOG_NOISE,
  1129. "[DM] DmpQuoObjNotifyCb: the name of the quorum file is %1!ls!\r\n",
  1130. szQuorumFileName);
  1131. //
  1132. // Chittur Subbaraman (chitturs) - 12/4/99
  1133. //
  1134. // If the quorum log file is found to be missing or corrupt,
  1135. // reset it only under the following conditions, else
  1136. // fail the log creation and halt the node.
  1137. //
  1138. // (1) A freshly formed cluster,
  1139. // (2) The user has chosen to reset the log since the user
  1140. // does not have a backup.
  1141. // (3) After the quorum resource has successfully come
  1142. // online on this node and the DM has been initialized
  1143. // successfully. This is because the sanity of the
  1144. // quorum log file has already been verified at
  1145. // initialization and the chances of the quorum log
  1146. // missing or getting corrputed after that are not
  1147. // so high (due to it being held open by the cluster
  1148. // service) and so it is not worth halting the node
  1149. // during run-time.
  1150. //
  1151. if ((CsFirstRun && !CsUpgrade) ||
  1152. (CsResetQuorumLog) ||
  1153. (gbDmInited == TRUE))
  1154. {
  1155. ClRtlLogPrint(LOG_NOISE,
  1156. "[DM] DmpQuoObjNotifyCb: Will try to reset Quorum log if file not found or if corrupt\r\n");
  1157. bForceReset = TRUE;
  1158. }
  1159. // open the log file
  1160. ghQuoLog = LogCreate(szQuorumFileName, dwMaxQuoLogSize,
  1161. (PLOG_GETCHECKPOINT_CALLBACK)DmpGetSnapShotCb, NULL,
  1162. bForceReset, &FirstLsn);
  1163. if (!ghQuoLog)
  1164. {
  1165. dwError = GetLastError();
  1166. ClRtlLogPrint(LOG_UNUSUAL,
  1167. "[DM] DmpQuoObjNotifyCb: Quorum log could not be opened, error = 0x%1!08lx!\r\n",
  1168. dwError);
  1169. CL_LOGFAILURE(dwError);
  1170. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  1171. }
  1172. else
  1173. {
  1174. ClRtlLogPrint(LOG_NOISE,
  1175. "[DM] DmpQuoObjNotifyCb: Quorum log opened\r\n");
  1176. }
  1177. if (gbNeedToCheckPoint && ghQuoLog)
  1178. {
  1179. //take a checkpoint and set the flag to FALSE.
  1180. gbNeedToCheckPoint = FALSE;
  1181. //get a checkpoint database
  1182. ClRtlLogPrint(LOG_NOISE,
  1183. "[DM] DmpQuoObjNotifyCb - taking a checkpoint\r\n");
  1184. //
  1185. // Chittur Subbaraman (chitturs) - 6/3/99
  1186. //
  1187. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  1188. // so as to maintain the ordering between this lock and the log lock.
  1189. //
  1190. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1191. dwError = LogCheckPoint(ghQuoLog, TRUE, NULL, 0);
  1192. RELEASE_LOCK(gLockDmpRoot);
  1193. if (dwError != ERROR_SUCCESS)
  1194. {
  1195. ClRtlLogPrint(LOG_CRITICAL,
  1196. "[DM] DmpEventHandler - Failed to take a checkpoint in the log file, error = 0x%1!08lx!\r\n",
  1197. dwError);
  1198. CL_UNEXPECTED_ERROR(dwError);
  1199. CsInconsistencyHalt(dwError);
  1200. }
  1201. }
  1202. //if the checkpoint timer doesnt already exist
  1203. //check if the timer has already been created - we might
  1204. // get two post online notifications
  1205. //and dont cause a timer leak
  1206. if (!ghCheckpointTimer)
  1207. {
  1208. ghCheckpointTimer = CreateWaitableTimer(NULL, FALSE, NULL);
  1209. if (!ghCheckpointTimer)
  1210. {
  1211. CL_UNEXPECTED_ERROR(dwError = GetLastError());
  1212. }
  1213. else
  1214. {
  1215. DWORD dwCheckpointInterval;
  1216. dwError = DmpGetCheckpointInterval(&dwCheckpointInterval);
  1217. CL_ASSERT(dwError == ERROR_SUCCESS);
  1218. //add a timer to take periodic checkpoints
  1219. AddTimerActivity(ghCheckpointTimer, dwCheckpointInterval,
  1220. 1, DmpCheckpointTimerCb, &ghQuoLog);
  1221. }
  1222. }
  1223. }
  1224. //SS:completion of hack, revert to enabling pop-ups
  1225. CurrentTeb->HardErrorsAreDisabled = OldHardErrorValue;
  1226. }
  1227. if (ghQuoLogOpenEvent)
  1228. {
  1229. //this is the first notification after the form
  1230. //allow the initialization to continue after rolling
  1231. //back the changes
  1232. SetEvent(ghQuoLogOpenEvent);
  1233. }
  1234. break;
  1235. case NOTIFY_RESOURCE_FAILED:
  1236. case NOTIFY_RESOURCE_PREOFFLINE:
  1237. case NOTIFY_RESOURCE_OFFLINEPENDING:
  1238. ClRtlLogPrint(LOG_NOISE,
  1239. "[DM] DmpQuoObjNotifyCb: Quorum resource offline/offlinepending/preoffline\r\n");
  1240. gbIsQuoResOnline = FALSE;
  1241. if (ghQuoLog)
  1242. {
  1243. //stop the checkpoint timer
  1244. if (ghCheckpointTimer)
  1245. {
  1246. RemoveTimerActivity(ghCheckpointTimer);
  1247. ghCheckpointTimer = NULL;
  1248. }
  1249. LogClose(ghQuoLog);
  1250. ghQuoLog = NULL;
  1251. //dont try and log after this
  1252. gbIsQuoLoggingOn = FALSE;
  1253. }
  1254. if (ghQuoLogOpenEvent)
  1255. {
  1256. //this is the first notification after the form
  1257. //allow the initialization to continue after rolling
  1258. //back the changes
  1259. SetEvent(ghQuoLogOpenEvent);
  1260. }
  1261. break;
  1262. }
  1263. }
  1264. /****
  1265. @func DWORD | DmpHookEventHandler| This hooks a callback to be invoked whenever
  1266. the state of the quorum resource changes.
  1267. @rdesc Returns a result code. ERROR_SUCCESS on success.
  1268. @comm This is used to monitor the state of nodes and turn quorum logging on or off.
  1269. @xref
  1270. ****/
  1271. DWORD DmpHookEventHandler()
  1272. {
  1273. DWORD dwError;
  1274. dwError = EpRegisterEventHandler(CLUSTER_EVENT_ALL,DmpEventHandler);
  1275. if (dwError != ERROR_SUCCESS)
  1276. {
  1277. ClRtlLogPrint(LOG_UNUSUAL,
  1278. "[DM] DmHookEventHandler: EpRegisterEventHandler failed, error=0x%1!08lx!\r\n",
  1279. dwError);
  1280. CL_UNEXPECTED_ERROR( dwError );
  1281. }
  1282. return(dwError);
  1283. }
  1284. /****
  1285. @func DWORD | DmpEventHandler| This routine handles events for the Cluster
  1286. Database Manager.
  1287. @parm CLUSTER_EVENT | Event | The event to be processed. Only one event at a time.
  1288. If the event is not handled, return ERROR_SUCCESS.
  1289. @parm PVOID| pContext | A pointer to context associated with the particular event.
  1290. @rdesc Returns ERROR_SUCCESS else a Win32 error code on other errors.
  1291. @comm This is used to monitor the state of nodes and turn quorum logging on or off.
  1292. @xref
  1293. ****/
  1294. DWORD WINAPI DmpEventHandler(
  1295. IN CLUSTER_EVENT Event,
  1296. IN PVOID pContext
  1297. )
  1298. {
  1299. DWORD dwError=ERROR_SUCCESS;
  1300. BOOL bAreAllNodesUp;
  1301. switch ( Event ) {
  1302. case CLUSTER_EVENT_NODE_UP:
  1303. bAreAllNodesUp = TRUE;
  1304. if ((dwError = OmEnumObjects(ObjectTypeNode, DmpNodeObjEnumCb, &bAreAllNodesUp, NULL))
  1305. != ERROR_SUCCESS)
  1306. {
  1307. ClRtlLogPrint(LOG_UNUSUAL,
  1308. "[DM]DmpEventHandler : OmEnumObjects returned, error=0x%1!08lx!\r\n",
  1309. dwError);
  1310. }
  1311. else
  1312. {
  1313. if (bAreAllNodesUp)
  1314. {
  1315. ClRtlLogPrint(LOG_NOISE,
  1316. "[DM] DmpEventHandler - node is up, turning quorum logging off\r\n");
  1317. gbIsQuoLoggingOn = FALSE;
  1318. }
  1319. }
  1320. break;
  1321. case CLUSTER_EVENT_NODE_DOWN:
  1322. if (!gbIsQuoLoggingOn)
  1323. {
  1324. HANDLE hThread = NULL;
  1325. DWORD dwThreadId;
  1326. //
  1327. // Chittur Subbaraman (chitturs) - 7/23/99
  1328. //
  1329. // Create a new thread to handle the checkpointing on a
  1330. // node down. This is necessary since we don't want the
  1331. // DM node down handler to be blocked in any fashion. If
  1332. // it is blocked since FmCheckQuorumState couldn't get the
  1333. // quorum group lock and some other thread got the group
  1334. // lock and is waiting for the GUM lock, then we have
  1335. // an immediate deadlock. Only after this node down
  1336. // handler finishes, any subsequent future node down
  1337. // processing can be started.
  1338. //
  1339. ClRtlLogPrint(LOG_NOISE,
  1340. "[DM] DmpEventHandler - Node is down, turn quorum logging on...\r\n");
  1341. gbIsQuoLoggingOn = TRUE;
  1342. ClRtlLogPrint(LOG_NOISE,
  1343. "[DM] DmpEventHandler - Create thread to handle node down event...\r\n");
  1344. hThread = CreateThread( NULL,
  1345. 0,
  1346. DmpHandleNodeDownEvent,
  1347. NULL,
  1348. 0,
  1349. &dwThreadId );
  1350. if ( hThread == NULL )
  1351. {
  1352. dwError = GetLastError();
  1353. ClRtlLogPrint(LOG_CRITICAL,
  1354. "[DM] DmpEventHandler - Unable to create thread to handle node down event. Error=0x%1!08lx!\r\n",
  1355. dwError);
  1356. CsInconsistencyHalt( dwError );
  1357. }
  1358. CloseHandle( hThread );
  1359. }
  1360. break;
  1361. case CLUSTER_EVENT_NODE_CHANGE:
  1362. break;
  1363. case CLUSTER_EVENT_NODE_ADDED:
  1364. break;
  1365. case CLUSTER_EVENT_NODE_DELETED:
  1366. break;
  1367. case CLUSTER_EVENT_NODE_JOIN:
  1368. break;
  1369. }
  1370. return(dwError);
  1371. } // DmpEventHandler
  1372. /****
  1373. @func DWORD | DmpNodeObjEnumCb| This is a callback that is called when node
  1374. objects are enumberate by the dm.
  1375. @parm PVOID | pContext| A pointer to a DMLOGRECORD structure.
  1376. @parm PVOID | pObject| A pointer to quorum resource object.
  1377. @parm DWORD | dwNotification| A pointer to a DMLOGRECORD structure.
  1378. @rdesc Returns a result code. ERROR_SUCCESS on success.
  1379. @xref
  1380. ****/
  1381. BOOL DmpNodeObjEnumCb(IN BOOL *pbAreAllNodesUp, IN PVOID pContext2,
  1382. IN PVOID pNode, IN LPCWSTR szName)
  1383. {
  1384. if ((NmGetNodeState(pNode) != ClusterNodeUp) &&
  1385. (NmGetNodeState(pNode) != ClusterNodePaused))
  1386. *pbAreAllNodesUp = FALSE;
  1387. //if any of the nodes is down fall out
  1388. return(*pbAreAllNodesUp);
  1389. }
  1390. /****
  1391. @func BOOL | DmpGetSnapShotCb| This callback is invoked when the logger
  1392. is asked to take a checkpoint record for the cluster registry.
  1393. @parm PVOID| pContext | The checkpoint context passed into LogCreate.
  1394. @parm LPWSTR | szChkPtFile | The name of the file in which to take a checkpoint.
  1395. @parm LPDWORD | pdwChkPtSequence | The sequence number related with this
  1396. checkpoint is returned in this.
  1397. @rdesc Returns a result code. ERROR_SUCCESS on success. If the file corresponding
  1398. to this checkpoint already exists, it will return ERROR_ALREADY_EXISTS and
  1399. szChkPtFile will be set to the name of the file.
  1400. @comm LogCheckPoint() calls this function when the log manager is asked to checkpoint the
  1401. dm database.
  1402. @xref
  1403. ****/
  1404. DWORD WINAPI DmpGetSnapShotCb(IN LPCWSTR szPathName, IN PVOID pContext,
  1405. OUT LPWSTR szChkPtFile, OUT LPDWORD pdwChkPtSequence)
  1406. {
  1407. DWORD dwError = ERROR_SUCCESS;
  1408. WCHAR szFilePrefix[MAX_PATH] = L"chkpt";
  1409. WCHAR szTempFile[MAX_PATH] = L"";
  1410. ACQUIRE_SHARED_LOCK( gLockDmpRoot );
  1411. szChkPtFile[0] = L'\0';
  1412. //
  1413. // Chittur Subbaraman (chitturs) - 5/1/2000
  1414. //
  1415. // Checkpoint file name is based on registry sequence number. It is possible that two
  1416. // or more consecutive calls to this function to take checkpoints may read the same
  1417. // registry sequence number. Thus, if DmGetDatabase fails for some reason, it is possible
  1418. // that an existing checkpoint file will get corrupted. Thus, even though the quorum log
  1419. // marks a 'start checkpoint record' and an 'end checkpoint record', it could turn out
  1420. // to be useless if this function manages to corrupt an existing checkpoint file. To solve
  1421. // this problem, we first generate a temp file, take a cluster hive snapshot as this temp
  1422. // file, then atomically move the temp file to the final checkpoint file using the MoveFileEx
  1423. // function.
  1424. //
  1425. //
  1426. // Create a new unique temp file name
  1427. //
  1428. if ( !GetTempFileNameW( szPathName, szFilePrefix, 0, szTempFile ) )
  1429. {
  1430. dwError = GetLastError();
  1431. ClRtlLogPrint(LOG_UNUSUAL,
  1432. "[LM] DmpGetSnapShotCb: Failed to generate a temp file name, PathName=%1!ls!, FilePrefix=%2!ls!, Error=0x%3!08lx!\r\n",
  1433. szPathName, szFilePrefix, dwError);
  1434. goto FnExit;
  1435. }
  1436. dwError = DmCommitRegistry(); // Ensure up-to-date snapshot
  1437. if ( dwError != ERROR_SUCCESS )
  1438. {
  1439. ClRtlLogPrint(LOG_CRITICAL,
  1440. "[LM] DmpGetSnapShotCb: DmCommitRegistry() failed, Error=0x%1!08lx!\r\n",
  1441. dwError);
  1442. goto FnExit;
  1443. }
  1444. dwError = DmGetDatabase( DmpRoot, szTempFile );
  1445. ClRtlLogPrint(LOG_NOISE,
  1446. "[DM] DmpGetSnapShotCb: DmpGetDatabase returned 0x%1!08lx!\r\n",
  1447. dwError);
  1448. if ( dwError == ERROR_SUCCESS )
  1449. {
  1450. *pdwChkPtSequence = DmpGetRegistrySequence();
  1451. //
  1452. // Create a checkpoint file name based on the registry sequence number
  1453. //
  1454. if ( !GetTempFileNameW( szPathName, szFilePrefix, *pdwChkPtSequence, szChkPtFile ) )
  1455. {
  1456. dwError = GetLastError();
  1457. ClRtlLogPrint(LOG_UNUSUAL,
  1458. "[LM] DmpGetSnapShotCb: Failed to generate a chkpt file name, PathName=%1!ls!, FilePrefix=%2!ls!, Error=0x%3!08lx!\r\n",
  1459. szPathName, szFilePrefix, dwError);
  1460. //
  1461. // Reset the file name to null, as this information will be used to determine
  1462. // if the checkpoint was taken
  1463. //
  1464. szChkPtFile[0] = L'\0';
  1465. goto FnExit;
  1466. }
  1467. ClRtlLogPrint(LOG_NOISE,
  1468. "[LM] DmpGetSnapshotCb: Checkpoint file name=%1!ls! Seq#=%2!d!\r\n",
  1469. szChkPtFile, *pdwChkPtSequence);
  1470. if ( !MoveFileEx( szTempFile, szChkPtFile, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH ) )
  1471. {
  1472. dwError = GetLastError();
  1473. ClRtlLogPrint(LOG_UNUSUAL,
  1474. "[LM] DmpGetSnapShotCb: Failed to move the temp file to checkpoint file, TempFileName=%1!ls!, ChkPtFileName=%2!ls!, Error=0x%3!08lx!\r\n",
  1475. szTempFile, szChkPtFile, dwError);
  1476. //
  1477. // Reset the file name to null, as this information will be used to determine
  1478. // if the checkpoint was taken
  1479. //
  1480. szChkPtFile[0] = L'\0';
  1481. goto FnExit;
  1482. }
  1483. }
  1484. FnExit:
  1485. RELEASE_LOCK(gLockDmpRoot);
  1486. if ( dwError != ERROR_SUCCESS )
  1487. {
  1488. DeleteFileW( szTempFile );
  1489. }
  1490. return ( dwError );
  1491. }
  1492. /****
  1493. @func BOOL WINAPI | DmpLogApplyChangesCb| This callback walks through the records in
  1494. the quorum logs and applies changes to the local database.
  1495. @parm PVOID | pContext | The event to be processed. Only one event at a time.
  1496. If the event is not handled, return ERROR_SUCCESS.
  1497. @parm LSN | Lsn | Lsn of the record.
  1498. @parm RMID | Resource | The resource id of the entity that logged this record.
  1499. @parm RMTYPE | ResourceType | The record type that is specific to the resource id.
  1500. @parm TRID | Transaction | The sequence number of the transaction.
  1501. @parm const PVOID | pLogData | A pointer to the record data.
  1502. @parm DWORD | DataLength | The length of the data in bytes.
  1503. @rdesc Returns TRUE to continue scan else returns FALSE.
  1504. @comm This function is called at initialization when a cluster is being formed to apply
  1505. transactions from the quorum log to the local cluster database.
  1506. @xref
  1507. ****/
  1508. BOOL WINAPI DmpLogApplyChangesCb(
  1509. IN PVOID pContext,
  1510. IN LSN Lsn,
  1511. IN RMID Resource,
  1512. IN RMTYPE ResourceType,
  1513. IN TRID Transaction,
  1514. IN TRTYPE TransactionType,
  1515. IN const PVOID pLogData,
  1516. IN DWORD DataLength)
  1517. {
  1518. DWORD Status;
  1519. PDM_LOGSCAN_CONTEXT pDmAppliedChangeContext = (PDM_LOGSCAN_CONTEXT) pContext;
  1520. TRSTATE trXsactionState;
  1521. BOOL bRet = TRUE;
  1522. CL_ASSERT(pDmAppliedChangeContext);
  1523. //if the resource id is not the same as dm..ignore..go to the next one
  1524. switch(TransactionType)
  1525. {
  1526. case TTStartXsaction:
  1527. Status = LogFindXsactionState(ghQuoLog, Lsn, Transaction, &trXsactionState);
  1528. if (Status != ERROR_SUCCESS)
  1529. {
  1530. //there was an error
  1531. ClRtlLogPrint(LOG_NOISE, "[DM] DmpLogApplyChangesCb ::LogFindXsaction failed, error=0x%1!08lx!\r\n",
  1532. Status);
  1533. //assume unknown state
  1534. CL_LOGFAILURE(Status);
  1535. trXsactionState = XsactionUnknown;
  1536. }
  1537. //if the transaction is successful apply it, else continue
  1538. if (trXsactionState == XsactionCommitted)
  1539. {
  1540. Status = LogScanXsaction(ghQuoLog, Lsn, Transaction, DmpApplyTransactionCb,
  1541. NULL);
  1542. if (Status != ERROR_SUCCESS)
  1543. {
  1544. ClRtlLogPrint(LOG_NOISE,
  1545. "[DM] DmpLogApplyChangesCb :LogScanTransaction for committed record failed, error=0x%1!08lx!\r\n",
  1546. Status);
  1547. bRet = FALSE;
  1548. CL_LOGFAILURE(Status);
  1549. break;
  1550. }
  1551. pDmAppliedChangeContext->dwSequence = Transaction;
  1552. }
  1553. else
  1554. {
  1555. ClRtlLogPrint(LOG_NOISE, "[DM] TransactionState = %1!u!\r\n",
  1556. trXsactionState);
  1557. }
  1558. break;
  1559. case TTCompleteXsaction:
  1560. bRet = DmpApplyTransactionCb(NULL, Lsn, Resource, ResourceType,
  1561. Transaction, pLogData, DataLength);
  1562. pDmAppliedChangeContext->dwSequence = Transaction;
  1563. break;
  1564. default:
  1565. CL_ASSERT(FALSE);
  1566. }
  1567. return(bRet);
  1568. }
  1569. BOOL WINAPI DmpApplyTransactionCb(
  1570. IN PVOID pContext,
  1571. IN LSN Lsn,
  1572. IN RMID Resource,
  1573. IN RMTYPE ResourceType,
  1574. IN TRID TransactionId,
  1575. IN const PVOID pLogData,
  1576. IN DWORD dwDataLength)
  1577. {
  1578. DWORD Status;
  1579. switch(ResourceType)
  1580. {
  1581. case DmUpdateCreateKey:
  1582. ClRtlLogPrint(LOG_NOISE,"[DM] DmpLogScanCb::DmUpdateCreateKey\n");
  1583. //SS: we dont care at this point as to where the update originated
  1584. Status = DmpUpdateCreateKey(FALSE,
  1585. GET_ARG(pLogData,0),
  1586. GET_ARG(pLogData,1),
  1587. GET_ARG(pLogData,2));
  1588. break;
  1589. case DmUpdateDeleteKey:
  1590. ClRtlLogPrint(LOG_NOISE,"[DM] DmUpdateDeleteKey \n");
  1591. Status = DmpUpdateDeleteKey(FALSE,
  1592. (PDM_DELETE_KEY_UPDATE)((PBYTE)pLogData));
  1593. break;
  1594. case DmUpdateSetValue:
  1595. ClRtlLogPrint(LOG_NOISE,"[DM] DmUpdateSetValue \n");
  1596. Status = DmpUpdateSetValue(FALSE,
  1597. (PDM_SET_VALUE_UPDATE)((PBYTE)pLogData));
  1598. break;
  1599. case DmUpdateDeleteValue:
  1600. ClRtlLogPrint(LOG_NOISE,"[DM] DmUpdateDeleteValue\n");
  1601. Status = DmpUpdateDeleteValue(FALSE,
  1602. (PDM_DELETE_VALUE_UPDATE)((PBYTE)pLogData));
  1603. break;
  1604. case DmUpdateJoin:
  1605. ClRtlLogPrint(LOG_UNUSUAL,"[DM] DmUpdateJoin\n");
  1606. Status = ERROR_SUCCESS;
  1607. break;
  1608. default:
  1609. ClRtlLogPrint(LOG_UNUSUAL,"[DM] DmpLogScanCb:uType = %1!u!\r\n",
  1610. ResourceType);
  1611. Status = ERROR_INVALID_DATA;
  1612. CL_UNEXPECTED_ERROR(ERROR_INVALID_DATA);
  1613. break;
  1614. }
  1615. return(TRUE);
  1616. }
  1617. /****
  1618. @func WORD| DmpLogCheckPtCb| A callback fn for DM
  1619. to take a checkpoint to the log if the quorum
  1620. resource is online on this node.
  1621. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1622. @comm This callback is called when the quorum resource
  1623. is online on this node. Since the quorum resource
  1624. synchronous callbacks are called before the resource
  1625. state changes are propagated, if the quorum is online
  1626. the log must be open.
  1627. @xref
  1628. ****/
  1629. void DmpLogCheckPointCb()
  1630. {
  1631. DWORD dwError;
  1632. //
  1633. // Chittur Subbaraman (chitturs) - 9/22/99
  1634. //
  1635. // If the quorum logging switch is off, don't do anything.
  1636. //
  1637. if (CsNoQuorumLogging) return;
  1638. //once it is online the log file should be open
  1639. //SS:BUGS: should we log something in the eventlog
  1640. if (ghQuoLog)
  1641. {
  1642. //
  1643. // Chittur Subbaraman (chitturs) - 6/3/99
  1644. //
  1645. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  1646. // so as to maintain the ordering between this lock and the log lock.
  1647. //
  1648. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1649. //get a checkpoint database
  1650. dwError = LogCheckPoint(ghQuoLog, TRUE, NULL, 0);
  1651. RELEASE_LOCK(gLockDmpRoot);
  1652. if (dwError != ERROR_SUCCESS)
  1653. {
  1654. ClRtlLogPrint(LOG_CRITICAL,
  1655. "[DM] DmpLogCheckPointCb - Failed to take a checkpoint in the log file, error=0x%1!08lx!\r\n",
  1656. dwError);
  1657. CL_UNEXPECTED_ERROR(dwError);
  1658. }
  1659. ClRtlLogPrint(LOG_NOISE,
  1660. "[DM] DmpLogCheckPointCb - taken checkpoint\r\n");
  1661. }
  1662. else
  1663. {
  1664. CsInconsistencyHalt(ERROR_QUORUMLOG_OPEN_FAILED);
  1665. }
  1666. }
  1667. /****
  1668. @func WORD| DmGetQuorumLogPath| Reads the quorum log file path configured in
  1669. the registry during setup.
  1670. @parm LPWSTR | szQuorumLogPath | A pointer to a wide string of size MAX_PATH.
  1671. @parm DWORD | dwSize | The size of szQuorumLogPath in bytes.
  1672. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1673. @comm If the quorum resource is not cabaple of logging this should not be set.
  1674. @xref
  1675. ****/
  1676. DWORD DmGetQuorumLogPath(LPWSTR szQuorumLogPath, DWORD dwSize)
  1677. {
  1678. DWORD Status;
  1679. Status = DmQuerySz( DmQuorumKey,
  1680. cszPath,
  1681. &szQuorumLogPath,
  1682. &dwSize,
  1683. &dwSize);
  1684. if (Status != ERROR_SUCCESS) {
  1685. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmGetQuorumLogPath failed, error=%1!u!\n", Status);
  1686. goto FnExit;
  1687. }
  1688. FnExit:
  1689. return(Status);
  1690. }
  1691. /****
  1692. @func WORD| DmpGetCheckpointInterval| Reads the checkpoint interval
  1693. from the registry, else returns the default.
  1694. @parm LPDWORD | pdwCheckpointInterval | A pointer to DWORD where
  1695. the checkpoint interval, in secs, is returned.
  1696. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1697. @comm The default checkpoint interval is 4 hours. The registry must be configured
  1698. in units of hours.
  1699. @xref
  1700. ****/
  1701. DWORD DmpGetCheckpointInterval(
  1702. OUT LPDWORD pdwCheckpointInterval)
  1703. {
  1704. DWORD dwDefCheckpointInterval = DEFAULT_CHECKPOINT_INTERVAL;
  1705. DWORD dwStatus = ERROR_SUCCESS;
  1706. dwStatus = DmQueryDword( DmQuorumKey,
  1707. CLUSREG_NAME_CHECKPOINT_INTERVAL,
  1708. pdwCheckpointInterval,
  1709. &dwDefCheckpointInterval);
  1710. if (dwStatus != ERROR_SUCCESS) {
  1711. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmGetCheckpointInterval Failed, error=%1!u!\n",
  1712. dwStatus);
  1713. goto FnExit;
  1714. }
  1715. //the checkpoint interval cant be less than 1 hour or more than 1 day
  1716. if ((*pdwCheckpointInterval < 1) || (*pdwCheckpointInterval>24))
  1717. *pdwCheckpointInterval = DEFAULT_CHECKPOINT_INTERVAL;
  1718. //convert to msecs
  1719. *pdwCheckpointInterval = *pdwCheckpointInterval * 60 * 60 * 1000;
  1720. FnExit:
  1721. return(dwStatus);
  1722. }
  1723. /****
  1724. @func WORD| DmGetQuorumLogMaxSize| Reads the quorum log file max size.
  1725. @parm LPDWORD | pdwMaxLogSize| A pointer to a dword containing the size.
  1726. @rdesc Returns ERROR_SUCCESS for success, else returns the error code.
  1727. @comm If the quorum resource is not cabaple of logging this should not be set.
  1728. @xref
  1729. ****/
  1730. DWORD DmGetQuorumLogMaxSize(LPDWORD pdwMaxLogSize)
  1731. {
  1732. DWORD Status;
  1733. DWORD dwDefaultLogMaxSize = CLUSTER_QUORUM_DEFAULT_MAX_LOG_SIZE;
  1734. Status = DmQueryDword( DmQuorumKey,
  1735. cszMaxQuorumLogSize,
  1736. pdwMaxLogSize,
  1737. &dwDefaultLogMaxSize);
  1738. if (Status != ERROR_SUCCESS) {
  1739. ClRtlLogPrint(LOG_UNUSUAL, "[DM] DmGetQuorumLogMaxSize failed, error=%1!u!\n",Status);
  1740. }
  1741. return(Status);
  1742. }
  1743. /****
  1744. @func DWORD | DmpCheckDiskSpace| Called to check for the disk space
  1745. on the quorum resource after it is brought online and logs are rolled up.
  1746. @rdesc ERROR_SUCCESS if successful. Win32 error code if something horrible happened.
  1747. @comm This function checks if there is enough disk space and sets up
  1748. a periodic timer to monitor the disk space.
  1749. @xref <f DmpDiskManage>
  1750. ****/
  1751. DWORD DmpCheckDiskSpace()
  1752. {
  1753. DWORD dwError = ERROR_SUCCESS;
  1754. WCHAR szQuoLogPathName[MAX_PATH];
  1755. ULARGE_INTEGER liNumTotalBytes;
  1756. ULARGE_INTEGER liNumFreeBytes;
  1757. //if you own the quorum resource, try to check the size
  1758. if (gpQuoResource && AMIOWNEROFQUORES(gpQuoResource) && gbIsQuoResOnline)
  1759. {
  1760. //get the path
  1761. if ((dwError = DmGetQuorumLogPath(szQuoLogPathName, sizeof(szQuoLogPathName)))
  1762. != ERROR_SUCCESS)
  1763. {
  1764. ClRtlLogPrint(LOG_NOISE,
  1765. "[DM] DmpCheckDiskSpace: Quorum log file is not configured, error=%1!u!\r\n",
  1766. dwError);
  1767. //log something in the event log
  1768. CL_LOGFAILURE(dwError);
  1769. goto FnExit;
  1770. }
  1771. //check the minimum space on the quorum disk
  1772. if (!GetDiskFreeSpaceEx(szQuoLogPathName, &liNumFreeBytes, &liNumTotalBytes,
  1773. NULL))
  1774. {
  1775. dwError = GetLastError();
  1776. ClRtlLogPrint(LOG_NOISE,
  1777. "[DM] DmpCheckDiskSpace: GetDiskFreeSpace returned error=0x%1!08lx!\r\n",
  1778. dwError);
  1779. goto FnExit;
  1780. }
  1781. //if not available, log something in the event log and bail out
  1782. if ((liNumFreeBytes.HighPart == 0) &&
  1783. (liNumFreeBytes.LowPart < DISKSPACE_INIT_MINREQUIRED))
  1784. {
  1785. CL_LOGCLUSWARNING(LM_DISKSPACE_HIGH_WATERMARK);
  1786. dwError = ERROR_CLUSTERLOG_NOT_ENOUGH_SPACE;
  1787. goto FnExit;
  1788. }
  1789. }
  1790. FnExit:
  1791. return(dwError);
  1792. }
  1793. /****
  1794. @func DWORD | DmpDiskManage | This is the callback registered to perform
  1795. periodic disk check functions on the quorum resource.
  1796. @comm If the disk space has dipped below the lowwatermark, this gracefully
  1797. shuts the cluster service. If the disk space dips below the high
  1798. watermark, it sends an alert to registered recipients.
  1799. @xref <f DmpCheckDiskSpace>
  1800. ****/
  1801. void WINAPI DmpDiskManage(
  1802. IN HANDLE hTimer,
  1803. IN PVOID pContext)
  1804. {
  1805. DWORD dwError;
  1806. WCHAR szQuoLogPathName[MAX_PATH];
  1807. ULARGE_INTEGER liNumTotalBytes;
  1808. ULARGE_INTEGER liNumFreeBytes;
  1809. static DWORD dwNumWarnings=0;
  1810. if (!gpQuoResource || (!AMIOWNEROFQUORES(gpQuoResource)) ||
  1811. (!gbIsQuoResOnline || (CsNoQuorumLogging)))
  1812. {
  1813. //the owner of the quorum resource checks the disk space
  1814. //the quorum disk shouldnt go offline
  1815. //skip checking if no quorum logging is required
  1816. return;
  1817. }
  1818. //get the path
  1819. if ((dwError = DmGetQuorumLogPath(szQuoLogPathName, sizeof(szQuoLogPathName)))
  1820. != ERROR_SUCCESS)
  1821. {
  1822. ClRtlLogPrint(LOG_NOISE,
  1823. "[DM] DmpDiskManage: Quorum log file is not configured, error=%1!u!\r\n",
  1824. dwError);
  1825. //log something in the event log
  1826. CL_UNEXPECTED_ERROR(dwError);
  1827. goto FnExit;
  1828. }
  1829. //check the minimum space on the quorum disk
  1830. if (!GetDiskFreeSpaceEx(szQuoLogPathName, &liNumFreeBytes, &liNumTotalBytes,
  1831. NULL))
  1832. {
  1833. dwError = GetLastError();
  1834. ClRtlLogPrint(LOG_NOISE,
  1835. "[DM] DmpDiskManage: GetDiskFreeSpace returned error=0x%1!08lx!\r\n",
  1836. dwError);
  1837. CL_LOGFAILURE(dwError);
  1838. goto FnExit;
  1839. }
  1840. if ((liNumFreeBytes.HighPart == 0) &&
  1841. (liNumFreeBytes.LowPart < DISKSPACE_LOW_WATERMARK))
  1842. {
  1843. //reached the low water mark
  1844. dwNumWarnings++;
  1845. //ss: we can control the rate at which we put things in the
  1846. //event log but once every five minutes is not bad.
  1847. //ss: post an event ???
  1848. ClRtlLogPrint(LOG_NOISE,
  1849. "[DM] DmpDiskManage: GetDiskFreeSpace - Not enough disk space, Avail=0x%1!08lx!\r\n",
  1850. liNumFreeBytes.LowPart);
  1851. CL_LOGCLUSWARNING(LM_DISKSPACE_LOW_WATERMARK);
  1852. }
  1853. else
  1854. {
  1855. gbIsQuoResEnoughSpace = TRUE;
  1856. dwNumWarnings = 0;
  1857. }
  1858. FnExit:
  1859. return;
  1860. }
  1861. /****
  1862. @func DWORD | DmpCheckpointTimerCb | This is the callback registered to perform
  1863. periodic checkpointing on the quorum log.
  1864. @parm IN HANDLE| hTimer| The timer associated with checkpointing interval.
  1865. @parm IN PVOID | pContext | A pointer to the handle for the quorum log file.
  1866. @comm This helps in backups. If you want to take a cluster backup by making
  1867. a copy of the quorum.log and checkpoint files, then if both nodes have
  1868. been up for a long time both the files can be old. By taking a periodic
  1869. checkpoint we guarantee that they are not more than n hours old.
  1870. ****/
  1871. void WINAPI DmpCheckpointTimerCb(
  1872. IN HANDLE hTimer,
  1873. IN PVOID pContext)
  1874. {
  1875. HLOG hQuoLog;
  1876. DWORD dwError;
  1877. hQuoLog = *((HLOG *)pContext);
  1878. if (hQuoLog && gbDmInited)
  1879. {
  1880. //get a checkpoint database
  1881. ClRtlLogPrint(LOG_NOISE,
  1882. "[DM]DmpCheckpointTimerCb- taking a checkpoint\r\n");
  1883. //
  1884. // Chittur Subbaraman (chitturs) - 6/3/99
  1885. //
  1886. // Make sure the gLockDmpRoot is held before LogCheckPoint is called
  1887. // so as to maintain the ordering between this lock and the log lock.
  1888. //
  1889. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1890. dwError = LogReset(hQuoLog);
  1891. RELEASE_LOCK(gLockDmpRoot);
  1892. if (dwError != ERROR_SUCCESS)
  1893. {
  1894. ClRtlLogPrint(LOG_CRITICAL,
  1895. "[DM]DmpCheckpointTimerCb - Failed to reset log, error=%1!u!\r\n",
  1896. dwError);
  1897. CL_UNEXPECTED_ERROR(dwError);
  1898. }
  1899. }
  1900. }
  1901. /****
  1902. @func DWORD | DmBackupClusterDatabase | Take a fresh checkpoint and
  1903. copy the quorum log and the checkpoint file to the supplied
  1904. path name. This function is called with gQuoLock held.
  1905. @parm IN LPCWSTR | lpszPathName | The directory path name where the
  1906. files have to be backed up. This path must be visible to the
  1907. node on which the quorum resource is online (i.e., this node
  1908. in this case).
  1909. @comm This function first takes a fresh checkpoint, updates the quorum
  1910. log file and then copies the two files to a backup area.
  1911. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  1912. @xref <f DmpLogCheckpointAndBackup> <f DmpRestoreClusterDatabase>
  1913. ****/
  1914. DWORD DmBackupClusterDatabase(
  1915. IN LPCWSTR lpszPathName)
  1916. {
  1917. HANDLE hFindFile = INVALID_HANDLE_VALUE;
  1918. WIN32_FIND_DATA FindData;
  1919. DWORD status = ERROR_SUCCESS;
  1920. LPWSTR szDestPathName = NULL;
  1921. DWORD dwLen;
  1922. //
  1923. // Chittur Subbaraman (chitturs) - 10/12/98
  1924. //
  1925. dwLen = lstrlenW( lpszPathName );
  1926. //
  1927. // It is safer to use dynamic memory allocation for user-supplied
  1928. // path since we don't want to put restrictions on the user
  1929. // on the length of the path that can be supplied. However, as
  1930. // far as our own quorum disk path is concerned, it is system-dependent
  1931. // and static memory allocation for that would suffice.
  1932. //
  1933. szDestPathName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  1934. ( dwLen + 5 ) *
  1935. sizeof ( WCHAR ) );
  1936. if ( szDestPathName == NULL )
  1937. {
  1938. status = GetLastError();
  1939. ClRtlLogPrint(LOG_NOISE,
  1940. "[DM] DmBackupClusterDatabase: Error %1!d! in allocating memory for %2!ws! !!!\n",
  1941. status,
  1942. lpszPathName);
  1943. CL_LOGFAILURE( status );
  1944. goto FnExit;
  1945. }
  1946. lstrcpyW( szDestPathName, lpszPathName );
  1947. //
  1948. // If the client-supplied path is not already terminated with '\',
  1949. // then add it.
  1950. //
  1951. if ( szDestPathName [dwLen-1] != L'\\' )
  1952. {
  1953. szDestPathName [dwLen++] = L'\\';
  1954. }
  1955. //
  1956. // Add a wild character at the end to search for any file in the
  1957. // supplied directory
  1958. //
  1959. szDestPathName[dwLen++] = L'*';
  1960. szDestPathName[dwLen] = L'\0';
  1961. //
  1962. // Find out whether you can access the supplied path by
  1963. // trying to find some file in the directory.
  1964. //
  1965. hFindFile = FindFirstFile( szDestPathName, &FindData );
  1966. if ( hFindFile == INVALID_HANDLE_VALUE )
  1967. {
  1968. status = GetLastError();
  1969. ClRtlLogPrint(LOG_NOISE,
  1970. "[DM] DmBackupClusterDatabase: Supplied path %1!ws! does not exist, Error=%2!d! !!!\n",
  1971. szDestPathName,
  1972. status);
  1973. goto FnExit;
  1974. }
  1975. //
  1976. // Check whether the log is open. It must be since we already
  1977. // verified that the quorum resource is online on this node and
  1978. // quorum logging is turned on.
  1979. //
  1980. if ( ghQuoLog )
  1981. {
  1982. //
  1983. // Remove the '*' so the same variable can be used.
  1984. //
  1985. szDestPathName [dwLen-1] = L'\0';
  1986. ClRtlLogPrint(LOG_NOISE,
  1987. "[DM] DmBackupClusterDatabase: Attempting to take a checkpoint and then backup to %1!ws!..\n",
  1988. szDestPathName);
  1989. //
  1990. // The gLockDmpRoot needs to be acquired here since otherwise
  1991. // you will get the log lock in the LogCheckPoint( )
  1992. // function and someone else could get the gLockDmpRoot.
  1993. // After you get the log lock, you also try to acquire
  1994. // the gLockDmpRoot in the function DmCommitRegistry.
  1995. // This is a potential deadlock situation and is avoided here.
  1996. //
  1997. ACQUIRE_SHARED_LOCK(gLockDmpRoot);
  1998. status = DmpLogCheckpointAndBackup ( ghQuoLog, szDestPathName );
  1999. RELEASE_LOCK(gLockDmpRoot);
  2000. if ( status == ERROR_SUCCESS )
  2001. {
  2002. ClRtlLogPrint(LOG_NOISE,
  2003. "[DM] DmBackupClusterDatabase: Successfully finished backing up to %1!ws!...\n",
  2004. szDestPathName);
  2005. }
  2006. } else
  2007. {
  2008. ClRtlLogPrint(LOG_UNUSUAL,
  2009. "[DM] DmBackupClusterDatabase: Quorum log could not be opened...\r\n");
  2010. status = ERROR_QUORUMLOG_OPEN_FAILED;
  2011. }
  2012. FnExit:
  2013. if ( hFindFile != INVALID_HANDLE_VALUE )
  2014. {
  2015. FindClose ( hFindFile );
  2016. }
  2017. LocalFree ( szDestPathName );
  2018. return ( status );
  2019. }
  2020. /****
  2021. @func DWORD | DmpLogCheckpointAndBackup | Takes a checkpoint, updates the
  2022. quorum log and then copies the files to the supplied path. This
  2023. function is called with the gQuoLock and the gLockDmpRoot held.
  2024. @parm IN HLOG | hLogFile | An identifier for the quorum log file.
  2025. @parm IN LPWSTR | lpszPathName | The path for storing the quorum log
  2026. file, the recent checkpoint file, and the resource registry
  2027. checkpoint files. This path must be visible from this node.
  2028. @comm Called by DmpBackupQuorumLog() to take a checkpoint and then
  2029. take a backup of the cluster database including resource
  2030. registry checkpoint files.
  2031. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  2032. @xref <f DmBackupClusterDatabase>
  2033. ****/
  2034. DWORD DmpLogCheckpointAndBackup(
  2035. IN HLOG hLogFile,
  2036. IN LPWSTR lpszPathName)
  2037. {
  2038. DWORD dwError;
  2039. DWORD dwLen;
  2040. WCHAR szChkPointFilePrefix[MAX_PATH];
  2041. WCHAR szQuoLogPathName[MAX_PATH];
  2042. LPWSTR szDestFileName = NULL;
  2043. WCHAR szSourceFileName[MAX_PATH];
  2044. LPWSTR szDestPathName = NULL;
  2045. LPWSTR lpChkPointFileNameStart;
  2046. LSN Lsn;
  2047. TRID Transaction;
  2048. HANDLE hFile = INVALID_HANDLE_VALUE;
  2049. //
  2050. // Chittur Subbaraman (chitturs) - 10/12/1998
  2051. //
  2052. //
  2053. // Initiate a checkpoint process. Allow a log file reset, if necessary.
  2054. //
  2055. if ( ( dwError = LogCheckPoint( hLogFile, TRUE, NULL, 0 ) )
  2056. != ERROR_SUCCESS )
  2057. {
  2058. ClRtlLogPrint(LOG_UNUSUAL,
  2059. "[DM] DmpLogCheckpointAndBackup::Callback failed to return a checkpoint. Error=%1!u!\r\n",
  2060. dwError);
  2061. CL_LOGFAILURE( dwError );
  2062. LogClose( hLogFile );
  2063. goto FnExit;
  2064. }
  2065. //
  2066. // Get the name of the most recent checkpoint file
  2067. //
  2068. szChkPointFilePrefix[0] = TEXT('\0');
  2069. if ( ( dwError = LogGetLastChkPoint( hLogFile, szChkPointFilePrefix, &Transaction, &Lsn ) )
  2070. != ERROR_SUCCESS )
  2071. {
  2072. ClRtlLogPrint(LOG_UNUSUAL,
  2073. "[DM] DmpLogCheckpointAndBackup::No check point found in the log file. Error=%1!u!\r\n",
  2074. dwError);
  2075. CL_LOGFAILURE( dwError );
  2076. LogClose( hLogFile );
  2077. goto FnExit;
  2078. }
  2079. dwError = DmGetQuorumLogPath( szQuoLogPathName, sizeof( szQuoLogPathName ) );
  2080. if ( dwError != ERROR_SUCCESS )
  2081. {
  2082. dwError = GetLastError();
  2083. ClRtlLogPrint(LOG_UNUSUAL,
  2084. "[DM] DmpLogCheckpointAndBackup::DmGetQuorumLogPath failed, Error = %1!d!\r\n",
  2085. dwError);
  2086. CL_LOGFAILURE( dwError );
  2087. goto FnExit;
  2088. }
  2089. //
  2090. // It is safer to use dynamic memory allocation for user-supplied
  2091. // path since we don't want to put restrictions on the user
  2092. // on the length of the path that can be supplied. However, as
  2093. // far as our own quorum disk path is concerned, it is system-dependent
  2094. // and static memory allocation for that would suffice.
  2095. //
  2096. szDestPathName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2097. ( lstrlenW ( lpszPathName ) + 1 ) *
  2098. sizeof ( WCHAR ) );
  2099. if ( szDestPathName == NULL )
  2100. {
  2101. dwError = GetLastError();
  2102. ClRtlLogPrint(LOG_NOISE,
  2103. "[DM] DmpLogCheckpointAndBackup: Error %1!d! in allocating memory for %2!ws! !!!\n",
  2104. dwError,
  2105. lpszPathName);
  2106. CL_LOGFAILURE( dwError );
  2107. goto FnExit;
  2108. }
  2109. //
  2110. // Get the user-supplied destination path name
  2111. //
  2112. lstrcpyW( szDestPathName, lpszPathName );
  2113. szDestFileName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2114. ( lstrlenW ( szDestPathName ) + 1 + LOG_MAX_FILENAME_LENGTH ) *
  2115. sizeof ( WCHAR ) );
  2116. if ( szDestFileName == NULL )
  2117. {
  2118. dwError = GetLastError();
  2119. ClRtlLogPrint(LOG_NOISE,
  2120. "[DM] DmpLogCheckpointAndBackup: Error %1!d! in allocating memory for chkpt file name !!!\n",
  2121. dwError);
  2122. CL_LOGFAILURE( dwError );
  2123. goto FnExit;
  2124. }
  2125. //
  2126. // Make an attempt to delete the CLUSBACKUP.DAT file
  2127. //
  2128. lstrcpyW( szDestFileName, szDestPathName );
  2129. lstrcatW( szDestFileName, L"CLUSBACKUP.DAT" );
  2130. //
  2131. // Set the file attribute to normal. Continue even if you
  2132. // fail in this step, but log an error. (Note that you are
  2133. // countering the case in which a destination file with
  2134. // the same name exists in the backup directory already and
  2135. // you are trying to delete it.)
  2136. //
  2137. if ( !SetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2138. {
  2139. dwError = GetLastError();
  2140. if ( dwError != ERROR_FILE_NOT_FOUND )
  2141. {
  2142. ClRtlLogPrint(LOG_UNUSUAL,
  2143. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, Error = %2!d!\n",
  2144. szDestFileName,
  2145. dwError);
  2146. }
  2147. }
  2148. if ( !DeleteFile( szDestFileName ) )
  2149. {
  2150. dwError = GetLastError();
  2151. if ( dwError != ERROR_FILE_NOT_FOUND )
  2152. {
  2153. ClRtlLogPrint(LOG_UNUSUAL,
  2154. "[DM] DmpLogCheckpointAndBackup::CLUSBACKUP.DAT exists, but can't delete it, Error = %1!d!\n",
  2155. dwError);
  2156. CL_LOGFAILURE( dwError );
  2157. goto FnExit;
  2158. }
  2159. }
  2160. //
  2161. // Just get the checkpoint file name without any path added.
  2162. // Note that szQuoLogPathName includes the '\'
  2163. //
  2164. dwLen = lstrlenW ( szQuoLogPathName );
  2165. lpChkPointFileNameStart = &szChkPointFilePrefix[dwLen];
  2166. //
  2167. // Now, create the path-included destination file name
  2168. //
  2169. lstrcpyW( szDestFileName, szDestPathName );
  2170. lstrcatW( szDestFileName, lpChkPointFileNameStart );
  2171. //
  2172. // And, the path-included source file name
  2173. //
  2174. lstrcpyW( szSourceFileName, szChkPointFilePrefix );
  2175. //
  2176. // Set the file attribute to normal. Continue even if you
  2177. // fail in this step, but log an error. (Note that you are
  2178. // countering the case in which a destination file with
  2179. // the same name exists in the backup directory already and
  2180. // you are trying to overwrite it.)
  2181. //
  2182. if ( !SetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2183. {
  2184. dwError = GetLastError();
  2185. if ( dwError != ERROR_FILE_NOT_FOUND )
  2186. {
  2187. ClRtlLogPrint(LOG_UNUSUAL,
  2188. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, Error = %2!d!\n",
  2189. szDestFileName,
  2190. dwError);
  2191. }
  2192. }
  2193. //
  2194. // Copy the checkpoint file to the destination
  2195. //
  2196. dwError = CopyFileW( szSourceFileName, szDestFileName, FALSE );
  2197. if ( !dwError )
  2198. {
  2199. dwError = GetLastError();
  2200. ClRtlLogPrint(LOG_UNUSUAL,
  2201. "[DM] DmpLogCheckpointAndBackup::Unable to copy file %1!ws! to %2!ws!, Error = %3!d!\n",
  2202. szSourceFileName,
  2203. szDestFileName,
  2204. dwError);
  2205. CL_LOGFAILURE( dwError );
  2206. goto FnExit;
  2207. }
  2208. //
  2209. // Set the file attribute to read-only. Continue even if you
  2210. // fail in this step, but log an error.
  2211. //
  2212. if ( !SetFileAttributes( szDestFileName, FILE_ATTRIBUTE_READONLY ) )
  2213. {
  2214. dwError = GetLastError();
  2215. ClRtlLogPrint(LOG_UNUSUAL,
  2216. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to READONLY, Error = %2!d!\n",
  2217. szDestFileName,
  2218. dwError);
  2219. }
  2220. //
  2221. // Now, create the path-included destination file name
  2222. //
  2223. lstrcpyW( szDestFileName, szDestPathName );
  2224. lstrcatW( szDestFileName, cszQuoFileName );
  2225. //
  2226. // And, the path-included source file name
  2227. //
  2228. lstrcpyW( szSourceFileName, szQuoLogPathName );
  2229. lstrcatW( szSourceFileName, cszQuoFileName );
  2230. //
  2231. // Set the destination file attribute to normal. Continue even if you
  2232. // fail in this step, but log an error. (Note that you are
  2233. // countering the case in which a destination file with
  2234. // the same name exists in the backup directory already and
  2235. // you are trying to overwrite it.)
  2236. //
  2237. if ( !SetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2238. {
  2239. dwError = GetLastError();
  2240. if ( dwError != ERROR_FILE_NOT_FOUND )
  2241. {
  2242. ClRtlLogPrint(LOG_UNUSUAL,
  2243. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, Error = %2!d!\n",
  2244. szDestFileName,
  2245. dwError);
  2246. }
  2247. }
  2248. //
  2249. // Copy the quorum log file to the destination
  2250. //
  2251. dwError = CopyFileW( szSourceFileName, szDestFileName, FALSE );
  2252. if ( !dwError )
  2253. {
  2254. dwError = GetLastError();
  2255. ClRtlLogPrint(LOG_UNUSUAL,
  2256. "[DM] DmpLogCheckpointAndBackup::Unable to copy file %1!ws! to %2!ws!, Error = %3!d!\n",
  2257. szSourceFileName,
  2258. szDestFileName,
  2259. dwError);
  2260. CL_LOGFAILURE( dwError );
  2261. goto FnExit;
  2262. }
  2263. //
  2264. // Set the destination file attribute to read-only. Continue even
  2265. // if you fail in this step, but log an error
  2266. //
  2267. if ( !SetFileAttributes( szDestFileName, FILE_ATTRIBUTE_READONLY ) )
  2268. {
  2269. dwError = GetLastError();
  2270. ClRtlLogPrint(LOG_UNUSUAL,
  2271. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to READONLY, Error = %2!d!\n",
  2272. szDestFileName,
  2273. dwError);
  2274. }
  2275. //
  2276. // Now copy the resource chkpt files to the destination. Note that
  2277. // we call this function with both gQuoLock and gLockDmpRoot held.
  2278. // The former lock prevents any checkpoint being read or written
  2279. // via CppReadCheckpoint() and CppWriteCheckpoint() while the
  2280. // following function is executing.
  2281. //
  2282. // Note: However, the CpDeleteRegistryCheckPoint() function is
  2283. // unprotected and poses a potential danger here.
  2284. //
  2285. // Note: Also, currently the following function returns ERROR_SUCCESS
  2286. // in all cases.
  2287. //
  2288. dwError = CpCopyCheckpointFiles( szDestPathName, TRUE );
  2289. if (dwError != ERROR_SUCCESS)
  2290. {
  2291. ClRtlLogPrint(LOG_UNUSUAL,
  2292. "[DM] DmpLogCheckpointAndBackup::Unable to copy resource checkpoint files, Error = %1!d!\n",
  2293. dwError);
  2294. goto FnExit;
  2295. }
  2296. //
  2297. // Now create an empty READONLY, HIDDEN, file in the destination
  2298. // directory which marks the successful ending of the backup.
  2299. //
  2300. lstrcpyW( szDestFileName, szDestPathName );
  2301. lstrcatW( szDestFileName, L"CLUSBACKUP.DAT");
  2302. hFile = CreateFileW( szDestFileName,
  2303. GENERIC_READ | GENERIC_WRITE,
  2304. 0,
  2305. NULL,
  2306. CREATE_NEW,
  2307. FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_READONLY,
  2308. NULL );
  2309. if ( hFile == INVALID_HANDLE_VALUE )
  2310. {
  2311. dwError = GetLastError();
  2312. CL_LOGFAILURE( dwError );
  2313. goto FnExit;
  2314. }
  2315. dwError = ERROR_SUCCESS;
  2316. FnExit:
  2317. LocalFree ( szDestFileName );
  2318. LocalFree ( szDestPathName );
  2319. if ( hFile != INVALID_HANDLE_VALUE )
  2320. {
  2321. CloseHandle ( hFile );
  2322. }
  2323. return ( dwError );
  2324. }
  2325. /****
  2326. @func DWORD | DmpRestoreClusterDatabase | Copy the quorum log and all the
  2327. checkpoint files from CsDatabaseRestorePath to the
  2328. quorum log path in the quorum disk.
  2329. @parm IN LPCWSTR | lpszQuoLogPathName | The quorum directory path
  2330. where the backed up files have to be copied to.
  2331. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  2332. @xref <f CppRestoreCpFiles> <f DmBackupClusterDatabase>
  2333. ****/
  2334. DWORD DmpRestoreClusterDatabase(
  2335. IN LPCWSTR lpszQuoLogPathName )
  2336. {
  2337. HANDLE hFindFile = INVALID_HANDLE_VALUE;
  2338. WIN32_FIND_DATA FindData;
  2339. DWORD status;
  2340. WCHAR szDestFileName[MAX_PATH];
  2341. LPWSTR szSourceFileName = NULL;
  2342. LPWSTR szSourcePathName = NULL;
  2343. DWORD dwLen;
  2344. WCHAR szChkptFileNameStart[4];
  2345. WCHAR szTempFileName[MAX_PATH];
  2346. //
  2347. // Chittur Subbaraman (chitturs) - 10/20/98
  2348. //
  2349. dwLen = lstrlenW ( CsDatabaseRestorePath );
  2350. //
  2351. // It is safer to use dynamic memory allocation for user-supplied
  2352. // path since we don't want to put restrictions on the user
  2353. // on the length of the path that can be supplied. However, as
  2354. // far as our own quorum disk path is concerned, it is system-dependent
  2355. // and static memory allocation for that would suffice.
  2356. //
  2357. szSourcePathName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2358. ( dwLen + 25 ) *
  2359. sizeof ( WCHAR ) );
  2360. if ( szSourcePathName == NULL )
  2361. {
  2362. status = GetLastError();
  2363. ClRtlLogPrint(LOG_NOISE,
  2364. "[DM] DmpRestoreClusterDatabase: Error %1!d! in allocating memory for %2!ws! !!!\n",
  2365. status,
  2366. CsDatabaseRestorePath);
  2367. CL_LOGFAILURE( status );
  2368. goto FnExit;
  2369. }
  2370. lstrcpyW ( szSourcePathName, CsDatabaseRestorePath );
  2371. //
  2372. // If the client-supplied path is not already terminated with '\',
  2373. // then add it.
  2374. //
  2375. if ( szSourcePathName [dwLen-1] != L'\\' )
  2376. {
  2377. szSourcePathName [dwLen++] = L'\\';
  2378. szSourcePathName[dwLen] = L'\0';
  2379. }
  2380. lstrcatW ( szSourcePathName, L"CLUSBACKUP.DAT" );
  2381. //
  2382. // Try to find the CLUSBACKUP.DAT file in the directory
  2383. //
  2384. hFindFile = FindFirstFile( szSourcePathName, &FindData );
  2385. //
  2386. // Reuse the source path name variable
  2387. //
  2388. szSourcePathName[dwLen] = L'\0';
  2389. if ( hFindFile == INVALID_HANDLE_VALUE )
  2390. {
  2391. status = GetLastError();
  2392. if ( status != ERROR_FILE_NOT_FOUND )
  2393. {
  2394. ClRtlLogPrint(LOG_NOISE,
  2395. "[DM] DmpRestoreClusterDatabase: Path %1!ws! unavailable, Error = %2!d! !!!\n",
  2396. szSourcePathName,
  2397. status);
  2398. } else
  2399. {
  2400. status = ERROR_DATABASE_BACKUP_CORRUPT;
  2401. ClRtlLogPrint(LOG_NOISE,
  2402. "[DM] DmpRestoreClusterDatabase: Backup procedure not fully successful, can't restore DB, Error = %1!d! !!!\n",
  2403. status);
  2404. }
  2405. CL_LOGFAILURE( status );
  2406. goto FnExit;
  2407. }
  2408. FindClose ( hFindFile );
  2409. szSourcePathName[dwLen++] = L'*';
  2410. szSourcePathName[dwLen] = L'\0';
  2411. //
  2412. // Try to find any file in the directory
  2413. //
  2414. hFindFile = FindFirstFile( szSourcePathName, &FindData );
  2415. //
  2416. // Reuse the source path name variable
  2417. //
  2418. szSourcePathName[dwLen-1] = L'\0';
  2419. if ( hFindFile == INVALID_HANDLE_VALUE )
  2420. {
  2421. status = GetLastError();
  2422. ClRtlLogPrint(LOG_NOISE,
  2423. "[DM] DmpRestoreClusterDatabase: Error %2!d! in trying to find file in path %1!ws!\r\n",
  2424. szSourcePathName,
  2425. status);
  2426. CL_LOGFAILURE( status );
  2427. goto FnExit;
  2428. }
  2429. szSourceFileName = (LPWSTR) LocalAlloc ( LMEM_FIXED,
  2430. ( lstrlenW ( szSourcePathName ) + 1 + LOG_MAX_FILENAME_LENGTH ) *
  2431. sizeof ( WCHAR ) );
  2432. if ( szSourceFileName == NULL )
  2433. {
  2434. status = GetLastError();
  2435. ClRtlLogPrint(LOG_NOISE,
  2436. "[DM] DmpRestoreClusterDatabase: Error %1!d! in allocating memory for source file name !!!\n",
  2437. status);
  2438. CL_LOGFAILURE( status );
  2439. goto FnExit;
  2440. }
  2441. status = ERROR_SUCCESS;
  2442. //
  2443. // Now, find and copy all relevant files from the backup area
  2444. // to the quorum disk. Note that only one of the copied chk*.tmp
  2445. // files will be used as the valid checkpoint. However, we copy
  2446. // all chk*.tmp files to make this implementation simple and
  2447. // straightforward to comprehend.
  2448. //
  2449. while ( status == ERROR_SUCCESS )
  2450. {
  2451. if ( FindData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )
  2452. {
  2453. if ( FindData.cFileName[0] == L'.' )
  2454. {
  2455. if ( FindData.cFileName[1] == L'\0' ||
  2456. FindData.cFileName[1] == L'.' && FindData.cFileName[2] == L'\0' )
  2457. {
  2458. goto skip;
  2459. }
  2460. }
  2461. //
  2462. // Since the found file is infact a directory, check
  2463. // whether it is one of the resource checkpoint directories.
  2464. // If so copy the relevant checkpoint files to the quorum
  2465. // disk.
  2466. //
  2467. if ( ( status = CpRestoreCheckpointFiles( szSourcePathName,
  2468. FindData.cFileName,
  2469. lpszQuoLogPathName ) )
  2470. != ERROR_SUCCESS )
  2471. {
  2472. ClRtlLogPrint(LOG_NOISE,
  2473. "[DM] DmpRestoreClusterDatabase: Error %1!d! in copying resource cp files !!!\n",
  2474. status);
  2475. CL_LOGFAILURE( status );
  2476. goto FnExit;
  2477. }
  2478. } else
  2479. {
  2480. lstrcpyW ( szTempFileName, FindData.cFileName );
  2481. szTempFileName[3] = L'\0';
  2482. mbstowcs( szChkptFileNameStart, "chk", 4 );
  2483. if ( ( lstrcmpW ( szTempFileName, szChkptFileNameStart ) == 0 )
  2484. ||
  2485. ( lstrcmpW ( FindData.cFileName, cszQuoFileName ) == 0 ) )
  2486. {
  2487. lstrcpyW( szSourceFileName, szSourcePathName );
  2488. lstrcatW( szSourceFileName, FindData.cFileName );
  2489. lstrcpyW( szDestFileName, lpszQuoLogPathName );
  2490. lstrcatW( szDestFileName, FindData.cFileName );
  2491. status = CopyFileW( szSourceFileName, szDestFileName, FALSE );
  2492. if ( !status )
  2493. {
  2494. status = GetLastError();
  2495. ClRtlLogPrint(LOG_UNUSUAL,
  2496. "[DM] DmpRestoreClusterDatabase: Unable to copy file %1!ws! to %2!ws!, Error = %3!d!\n",
  2497. szSourceFileName,
  2498. szDestFileName,
  2499. status);
  2500. CL_LOGFAILURE( status );
  2501. goto FnExit;
  2502. }
  2503. //
  2504. // Set the file attribute to normal. There is no reason
  2505. // to fail in this step since the quorum disk is ours
  2506. // and we succeeded in copying the file.
  2507. //
  2508. if ( !SetFileAttributes( szDestFileName, FILE_ATTRIBUTE_NORMAL ) )
  2509. {
  2510. status = GetLastError();
  2511. ClRtlLogPrint(LOG_UNUSUAL,
  2512. "[DM] DmpLogCheckpointAndBackup::Error in changing %1!ws! attribute to NORMAL, error = %2!u!\n",
  2513. szDestFileName,
  2514. status);
  2515. CL_LOGFAILURE( status );
  2516. goto FnExit;
  2517. }
  2518. }
  2519. }
  2520. skip:
  2521. if ( FindNextFile( hFindFile, &FindData ) )
  2522. {
  2523. status = ERROR_SUCCESS;
  2524. } else
  2525. {
  2526. status = GetLastError();
  2527. }
  2528. }
  2529. if ( status == ERROR_NO_MORE_FILES )
  2530. {
  2531. status = ERROR_SUCCESS;
  2532. } else
  2533. {
  2534. ClRtlLogPrint(LOG_UNUSUAL,
  2535. "[DM] DmpRestoreClusterDatabase: FindNextFile failed! Error = %1!u!\n",
  2536. status);
  2537. }
  2538. FnExit:
  2539. if ( hFindFile != INVALID_HANDLE_VALUE )
  2540. {
  2541. FindClose ( hFindFile );
  2542. }
  2543. LocalFree ( szSourceFileName );
  2544. LocalFree ( szSourcePathName );
  2545. return ( status );
  2546. }
  2547. /****
  2548. @func DWORD | DmpHandleNodeDownEvent | Handle the node down event
  2549. for DM.
  2550. @parm IN LPVOID | NotUsed | Unused parameter.
  2551. @rdesc Returns ERROR_SUCCESS.
  2552. @xref <f DmpEventHandler>
  2553. ****/
  2554. DWORD DmpHandleNodeDownEvent(
  2555. IN LPVOID NotUsed )
  2556. {
  2557. //
  2558. // Chittur Subbaraman (chitturs) - 7/23/99
  2559. //
  2560. // This function handles the DM node down processing as a separate
  2561. // thread. The reasons for creating this thread are outlined in
  2562. // DmpEventHandler.
  2563. //
  2564. ClRtlLogPrint(LOG_NOISE,
  2565. "[DM] DmpHandleNodeDownEvent - Entry...\r\n");
  2566. //
  2567. // SS: I am not the owner of the quorum resource as yet, but I might
  2568. // be after rearbitration, in that case, just set a flag saying we
  2569. // need to checkpoint. It will be looked at when the quorum resource
  2570. // comes online. The following function in FM checks if the
  2571. // quorum is online on this node and if it is, it calls
  2572. // the checkpoint callback function. If it is not, it sets the
  2573. // global boolean variable passed to TRUE.
  2574. //
  2575. FmCheckQuorumState( DmpLogCheckPointCb, &gbNeedToCheckPoint );
  2576. ClRtlLogPrint(LOG_NOISE,
  2577. "[DM] DmpHandleNodeDownEvent - Exit...\r\n");
  2578. return( ERROR_SUCCESS );
  2579. }