Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

625 lines
20 KiB

  1. //-----------------------------------------------------------------------
  2. //
  3. // File: recovery.cxx
  4. //
  5. // Contents: This file contains the part of the CDFSVOL interface which
  6. // deals with recovery stuff. All functions in here handle
  7. // recovering from a failed DFSM operation. These functions will
  8. // return error codes only if something seriously wrong happened
  9. // while performing recovery will they return an error. Most of
  10. // the errors that they may encountered are not passed back. Look
  11. // at individual functions for further details about all of this.
  12. //
  13. // History: 09-Feb-93 SudK Created.
  14. // 15-April-93 SudK Cleanup/CodeReview.
  15. //
  16. //-----------------------------------------------------------------------
  17. #include "headers.hxx"
  18. #pragma hdrstop
  19. #include <cdfsvol.hxx>
  20. //+------------------------------------------------------------------------
  21. //
  22. // Method: CDfsVolume::RecoverFromFailure
  23. //
  24. // Synopsis: This method assumes that the RecoveryState has already been
  25. // read and is in the private section of the klass. It uses the
  26. // recovery state to figure out the operation that we were in and
  27. // then calls the appropriate recovery method to do the recovery
  28. // stuff for each operation individually.
  29. //
  30. // Arguments: NONE
  31. //
  32. // Returns:
  33. //
  34. // Notes:
  35. //
  36. // History: 09-Feb-1993 SudK Created.
  37. //
  38. //-------------------------------------------------------------------------
  39. DWORD
  40. CDfsVolume::RecoverFromFailure(void)
  41. {
  42. DWORD dwErr = ERROR_SUCCESS;
  43. ULONG operation, operState;
  44. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromFailure()\n"));
  45. operation = DFS_GET_RECOVERY_STATE(_RecoveryState);
  46. operState = DFS_GET_OPER_STAGE(_RecoveryState);
  47. switch (operation) {
  48. case DFS_RECOVERY_STATE_CREATING:
  49. dwErr = RecoverFromCreation(operState);
  50. break;
  51. case DFS_RECOVERY_STATE_ADD_SERVICE:
  52. dwErr = RecoverFromAddService(operState);
  53. break;
  54. case DFS_RECOVERY_STATE_REMOVE_SERVICE:
  55. dwErr = RecoverFromRemoveService(operState);
  56. break;
  57. case DFS_RECOVERY_STATE_DELETE:
  58. dwErr = RecoverFromDelete(operState);
  59. break;
  60. case DFS_RECOVERY_STATE_MOVE:
  61. dwErr = RecoverFromMove(operState);
  62. break;
  63. default:
  64. //
  65. // This is yet another place where we would like to raise an EVENT
  66. // since the volume object is corrupt. This could never have happened.
  67. //
  68. dwErr = NERR_DfsVolumeDataCorrupt;
  69. IDfsVolInlineDebOut((DEB_ERROR, "Unrecognised RecoveryCode: %08lx\n", operation));
  70. LogMessage(DEB_ERROR,
  71. &(_peid.Prefix.Buffer),
  72. 1,
  73. DFS_UNRECOGNISED_RECOVERY_CODE_MSG);
  74. }
  75. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromFailure() exit\n"));
  76. return(dwErr);
  77. }
  78. //+------------------------------------------------------------------------
  79. //
  80. // Method: CDfsVolume::RecoverFromCreation
  81. //
  82. // Synopsis: This method handles the recovery stuff if an operation failed
  83. // during creation. This should be called only if that has been
  84. // determined. This method merely backs out the entire operation.
  85. // It attempts to contact each service of the parent and requests
  86. // it to delete the ExitPoint that MIGHT have been created there
  87. // during the actual operation. All errors while going through
  88. // such ExitPt deletion operations are ignored. If this method
  89. // is unable to delete an ExitPoint information it assumes that
  90. // at worst it would have created a Knowledge inconsistency which
  91. // would get resolved by knowledge sync.
  92. //
  93. // Arguments: [OperStage] -- The stage in which the operation crashed.
  94. //
  95. // Returns:
  96. //
  97. // Notes:
  98. //
  99. // History: 09-Feb-1993 SudK Created.
  100. //
  101. //-------------------------------------------------------------------------
  102. DWORD
  103. CDfsVolume::RecoverFromCreation(ULONG OperStage)
  104. {
  105. DWORD dwErr = ERROR_SUCCESS;
  106. CDfsService *pDfsService = NULL;
  107. CDfsVolume *parent = NULL;
  108. CDfsServiceList *svcList = NULL;
  109. IDfsVolInlineDebOut((DEB_TRACE, "IDfsVol::RecoveryFromCreation()\n"));
  110. //
  111. // Ofcourse we log an event now since we have determined the precise
  112. // recovery that we are going to do. The admin should know that such
  113. // a recovery did occur.
  114. //
  115. LogMessage(DEB_ERROR,
  116. &(_peid.Prefix.Buffer),
  117. 1,
  118. DFS_RECOVERED_FROM_CREATION_MSG);
  119. //
  120. // First we go ahead and request parent machine to delete the ExitPoints
  121. // and then we will delete the object itself. We are basically backing
  122. // off the entire operation.
  123. //
  124. dwErr = GetParent(&parent);
  125. if (dwErr != ERROR_SUCCESS) {
  126. LogMessage(DEB_TRACE, nullPtr, 0, DFS_CANT_GET_PARENT_MSG);
  127. dwErr = NERR_DfsVolumeDataCorrupt;
  128. }
  129. if (dwErr == ERROR_SUCCESS) {
  130. svcList = &(parent->_DfsSvcList);
  131. pDfsService = svcList->GetFirstService();
  132. while (pDfsService!=NULL) {
  133. dwErr = pDfsService->DeleteExitPoint(&_peid, _EntryType);
  134. //
  135. // Well we failed to delete one of the exit points. Now what do
  136. // we do. We really are not going to let this stop our progress
  137. // from recovering. Remember that all we are interested in is not
  138. // to have TOO FEW exit points at a server. If a server has too
  139. // many exit points due to this action of ours it is OK. So lets
  140. // go forth and explore where no DFSM has explored before.
  141. //
  142. if (dwErr != ERROR_SUCCESS) {
  143. IDfsVolInlineDebOut((
  144. DEB_ERROR, "Unable to delete exitPoint %ws at %ws\n",
  145. _peid.Prefix.Buffer, pDfsService->GetServiceName()));
  146. }
  147. pDfsService = svcList->GetNextService(pDfsService);
  148. }
  149. if (parent!=NULL)
  150. parent->Release();
  151. //
  152. // Once the recovery is done we delete the object itself.
  153. //
  154. DeleteObject();
  155. dwErr = ERROR_SUCCESS;
  156. }
  157. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromCreation() exit\n"));
  158. return(dwErr);
  159. }
  160. //+------------------------------------------------------------------------
  161. //
  162. // Method: CDfsVolume::RecoverFromAddService
  163. //
  164. // Synopsis: This method handles the recovery stuff if an operation failed
  165. // during an AddService operation. This should be called if only
  166. // that has already been determined. This method backs out the
  167. // entire operation irrespective of where we were during the
  168. // operation. If necessary it will attempt to do a DeleteLocalVol
  169. // on the service (if it is in SVCLIST_UPDATED state). It goes
  170. // on to delete the Service from the ServiceList if it exists.
  171. // At best we would have left a knowledge inconsistency if we are
  172. // unable to delete the Service at remote server. But that is easy
  173. // to fix and knowledge inconsistency checks will take care of
  174. // that.
  175. //
  176. // Arguments: [OperStage] -- The stage in which the operation crashed.
  177. //
  178. // Returns:
  179. //
  180. // Notes:
  181. //
  182. // History: 09-Feb-1993 SudK Created.
  183. //
  184. //-------------------------------------------------------------------------
  185. DWORD
  186. CDfsVolume::RecoverFromAddService(ULONG OperStage)
  187. {
  188. DWORD dwErr = ERROR_SUCCESS;
  189. CDfsService *pService;
  190. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromAddService()\n"));
  191. //
  192. // Ofcourse we log an event now since we have determined the precise
  193. // recovery that we are going to do. The admin should know that such
  194. // a recovery did occur.
  195. //
  196. LogMessage(DEB_ERROR,
  197. &(_peid.Prefix.Buffer),
  198. 1,
  199. DFS_RECOVERED_FROM_ADDSERVICE_MSG);
  200. //
  201. // If there is no recovery Service we are out of luck basically.
  202. // So we just return.
  203. //
  204. ASSERT(_pRecoverySvc != NULL);
  205. if (_pRecoverySvc == NULL) {
  206. LogMessage(DEB_ERROR,
  207. &(_peid.Prefix.Buffer),
  208. 1,
  209. DFS_INCONSISTENT_RECOVERY_ARGS_MSG);
  210. dwErr = ERROR_SUCCESS;
  211. _Recover.SetOperationDone();
  212. return(dwErr);
  213. }
  214. //
  215. // Now if we have a recoverySvc available we still need to get a handle to
  216. // the service actually in the SvcList. So let us do that. That is the
  217. // one in which we are really interested at this point.
  218. //
  219. dwErr = _DfsSvcList.GetService(_pRecoverySvc->GetReplicaInfo(), &pService);
  220. if (dwErr == ERROR_SUCCESS) {
  221. switch(OperStage) {
  222. case DFS_OPER_STAGE_SVCLIST_UPDATED:
  223. //
  224. // We may have gotten to informing the service or maybe not but who
  225. // cares lets go ahead and try to delete the service anyway.
  226. //
  227. dwErr = pService->DeleteLocalVolume(&_peid);
  228. dwErr = ERROR_SUCCESS;
  229. //
  230. // Fall Through here.
  231. //
  232. case DFS_OPER_STAGE_START:
  233. //
  234. // Once again here we ignore the error code because if the service
  235. // does not exist at all in the list who cares.
  236. //
  237. dwErr = _DfsSvcList.DeleteService(pService);
  238. ASSERT(dwErr == ERROR_SUCCESS);
  239. dwErr = ERROR_SUCCESS;
  240. break;
  241. default:
  242. //
  243. // Unexpected State. LogEvent.
  244. //
  245. LogMessage( DEB_ERROR,
  246. &(_peid.Prefix.Buffer),
  247. 1,
  248. DFS_UNKNOWN_RECOVERY_STATE_MSG);
  249. break;
  250. }
  251. }
  252. else {
  253. //
  254. // This means that we could not find the service in the SvcList. There
  255. // is nothing much that we can do now so we put a message and return
  256. //
  257. IDfsVolInlineDebOut((DEB_ERROR, "Could not find the service in SvcList\n of %ws in RecoverFromAddService",_peid.Prefix.Buffer));
  258. dwErr = ERROR_SUCCESS;
  259. }
  260. _Recover.SetOperationDone();
  261. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromAddService() exit\n"));
  262. return(dwErr);
  263. }
  264. //+------------------------------------------------------------------------
  265. //
  266. // Method: CDfsVolume::RecoverFromRemoveService
  267. //
  268. // Synopsis: This method handles the recovery stuff if an operation failed
  269. // during a RemoveService operation. It should be called only
  270. // if this has already been determined. This method also tries
  271. // to roll forward the entire operation. If the ServiceList does
  272. // not even have the relevant service in it that means we did
  273. // manage to finish the operation but however, did not get to
  274. // changing the recovery properties. Well that is OK.
  275. //
  276. // Arguments: [OperStage] -- The stage in which the operation crashed.
  277. //
  278. // History: 09-Feb-1993 SudK Created.
  279. //
  280. //-------------------------------------------------------------------------
  281. DWORD
  282. CDfsVolume::RecoverFromRemoveService(ULONG OperStage)
  283. {
  284. DWORD dwErr = ERROR_SUCCESS;
  285. PWCHAR ErrorStrs[3];
  286. CDfsService *pService;
  287. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromRemoveService()\n"));
  288. //
  289. // Ofcourse we log an event now since we have determined the precise
  290. // recovery that we are going to do. The admin should know that such
  291. // a recovery did occur.
  292. //
  293. ASSERT(_pRecoverySvc != NULL);
  294. if (_pRecoverySvc == NULL) {
  295. LogMessage(DEB_ERROR,
  296. &(_peid.Prefix.Buffer),
  297. 1,
  298. DFS_INCONSISTENT_RECOVERY_ARGS_MSG);
  299. //
  300. // We will return SUCCESS here.
  301. //
  302. dwErr = ERROR_SUCCESS;
  303. _Recover.SetOperationDone();
  304. return(dwErr);
  305. }
  306. LogMessage(DEB_ERROR,
  307. &(_peid.Prefix.Buffer),
  308. 1,
  309. DFS_RECOVERED_FROM_REMOVESERVICE_MSG);
  310. //
  311. // Now if we have a recoverySvc available we still need to get a handle to
  312. // the service actually in the SvcList. So let us do that. That is the
  313. // one in which we are really interested at this point.
  314. //
  315. dwErr = _DfsSvcList.GetService(_pRecoverySvc->GetReplicaInfo(), &pService);
  316. //
  317. // If we go in here it means that we never deleted the service from the
  318. // volume object at all. The best that we could have done is to inform
  319. // the remote service to delete the localvolume information.
  320. // We will roll the operation forward anyways.
  321. //
  322. if (dwErr == ERROR_SUCCESS) {
  323. switch (OperStage) {
  324. case DFS_OPER_STAGE_START:
  325. //
  326. // We dont know if we informed the service or not. So we do that
  327. // now in order to roll the operation forward.
  328. //
  329. dwErr = pService->DeleteLocalVolume(&_peid);
  330. if (dwErr != ERROR_SUCCESS) {
  331. ErrorStrs[0] = _peid.Prefix.Buffer;
  332. ErrorStrs[1] = pService->GetServiceName();
  333. LogMessage( DEB_ERROR,
  334. ErrorStrs,
  335. 2,
  336. DFS_CANT_CREATE_LOCAL_VOLUME_MSG);
  337. }
  338. dwErr = ERROR_SUCCESS;
  339. //
  340. // Fall through here and roll the rest of operation forward.
  341. //
  342. case DFS_OPER_STAGE_INFORMED_SERVICE:
  343. //
  344. // We may not have gotten to deleting the service from disk itself
  345. // so we roll this operation forward by doing that.
  346. //
  347. dwErr = _DfsSvcList.DeleteService(pService);
  348. ASSERT(dwErr == ERROR_SUCCESS);
  349. dwErr = ERROR_SUCCESS;
  350. break;
  351. default:
  352. {
  353. //
  354. // Unexpected State. LogEvent.
  355. //
  356. LogMessage( DEB_ERROR,
  357. &(_peid.Prefix.Buffer),
  358. 1,
  359. DFS_UNKNOWN_RECOVERY_STATE_MSG);
  360. }
  361. break;
  362. }
  363. }
  364. else {
  365. //
  366. // We never found this service so that means we are actually done with
  367. // the operation. So let us go ahead and return SUCCESS and set
  368. // recovery properties appropriately.
  369. //
  370. IDfsVolInlineDebOut((DEB_ERROR, "Could not find the service in SvcList\n of %ws in RecoverFromRemoveSvc",_peid.Prefix.Buffer));
  371. dwErr = ERROR_SUCCESS;
  372. }
  373. _Recover.SetOperationDone();
  374. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromRemoveService() exit\n"));
  375. return(dwErr);
  376. }
  377. //+------------------------------------------------------------------------
  378. //
  379. // Method: CDfsVolume::RecoverFromDelete
  380. //
  381. // Synopsis: This handles recover from failure during a delete operation.
  382. // If we are in the OPER_START stage of the operation then we
  383. // we will roll back the operation by attempting to inform the
  384. // relevant service (ONLY ONE EXISTS) to CreateLocalVol info.
  385. // If we are in any other state we will roll-forward the entire
  386. // operation. We will attempt to delete ExitPoints at each of
  387. // the services of the parent and then delete the object itself.
  388. //
  389. // Arguments: [OperStage] -- The stage in which the operation crashed.
  390. //
  391. // Returns:
  392. //
  393. // Notes:
  394. //
  395. // History: 09-Feb-1993 SudK Created.
  396. //
  397. //-------------------------------------------------------------------------
  398. DWORD
  399. CDfsVolume::RecoverFromDelete(ULONG OperStage)
  400. {
  401. DWORD dwErr = ERROR_SUCCESS;
  402. CDfsService *pDfsService = NULL;
  403. CDfsVolume *parent = NULL;
  404. CDfsServiceList *svcList = NULL;
  405. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromDelete()\n"));
  406. //
  407. // Ofcourse we log an event now since we have determined the precise
  408. // recovery that we are going to do. The admin should know that such
  409. // a recovery did occur.
  410. //
  411. LogMessage(DEB_ERROR,
  412. &(_peid.Prefix.Buffer),
  413. 1,
  414. DFS_RECOVERED_FROM_DELETE_MSG);
  415. //
  416. // I think recovery from this operation is a big problem. We may not be
  417. // able to avoid going over the net to fix any problems with this operation.
  418. //
  419. switch (OperStage) {
  420. case DFS_OPER_STAGE_INFORMED_SERVICE:
  421. //
  422. // we might have informed some parent as well. Anyway, we can either
  423. // ignore this or attempt to delete exit point info at the parents.
  424. // If we want to delete the exit point we do it here. IGNORE ALL ERRORS.
  425. //
  426. dwErr = GetParent(&parent);
  427. if (dwErr != ERROR_SUCCESS) {
  428. LogMessage(DEB_TRACE, nullPtr, 0, DFS_CANT_GET_PARENT_MSG);
  429. dwErr = NERR_DfsVolumeDataCorrupt; //Is this error code OK?
  430. return(dwErr);
  431. }
  432. svcList = &(parent->_DfsSvcList);
  433. pDfsService = svcList->GetFirstService();
  434. while (pDfsService!=NULL) {
  435. dwErr = pDfsService->DeleteExitPoint(&_peid, _EntryType);
  436. pDfsService = svcList->GetNextService(pDfsService);
  437. }
  438. dwErr = ERROR_SUCCESS; // Ignore all errors here: Raid 455283
  439. if (parent!=NULL)
  440. parent->Release();
  441. //
  442. // Fall Through Here.
  443. //
  444. case DFS_OPER_STAGE_INFORMED_PARENT:
  445. //
  446. // We already have informed the services to delete local volume info
  447. // and have also informed ATLEAST one parent to delete the exit point.
  448. // Well as far as all remote operations are concerned we are done. So
  449. // we only need to delete the local object and then we are done. At
  450. // the same time we invalidate this instance by setting _Deleted = TRUE.
  451. //
  452. DeleteObject();
  453. _Deleted = TRUE;
  454. break;
  455. case DFS_OPER_STAGE_START:
  456. //
  457. // In this case we will just remove the recovery state and assume
  458. // that nothing ever happened. Even if we did get to some service and
  459. // inform it to delete local volume it is not a problem - we will
  460. // detect such a service since it will refuse to accept serivce requests
  461. // However, we will attempt to Create back any local volume information
  462. // at the service that is involved.
  463. //
  464. pDfsService = _DfsSvcList.GetFirstService();
  465. //
  466. // Remember that there should only be one service otherwise we would
  467. // have never got this far in this operation.
  468. //
  469. if (pDfsService != NULL) {
  470. dwErr = pDfsService->CreateLocalVolume(&_peid, _EntryType);
  471. dwErr = ERROR_SUCCESS;
  472. }
  473. _Recover.SetOperationDone();
  474. break;
  475. default:
  476. //
  477. // Unexpected State. LogEvent.
  478. //
  479. LogMessage( DEB_ERROR,
  480. &(_peid.Prefix.Buffer),
  481. 1,
  482. DFS_UNKNOWN_RECOVERY_STATE_MSG);
  483. _Recover.SetOperationDone();
  484. }
  485. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromDelete() exit\n"));
  486. return(dwErr);
  487. }
  488. //+------------------------------------------------------------------------
  489. //
  490. // Method: CDfsVolume::RecoverFromMove
  491. //
  492. // Synopsis: Handles recovery from failure during a MOVE operation.
  493. //
  494. // Arguments: [OperStage] -- The stage in which the operation crashed.
  495. //
  496. // Returns:
  497. //
  498. // Notes:
  499. //
  500. // History: 09-Feb-1993 SudK Created.
  501. //
  502. //-------------------------------------------------------------------------
  503. DWORD
  504. CDfsVolume::RecoverFromMove(ULONG OperStage)
  505. {
  506. DWORD dwErr = ERROR_SUCCESS;
  507. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromMove()\n"));
  508. //
  509. // Ofcourse we log an event now since we have determined the precise
  510. // recovery that we are going to do. The admin should know that such
  511. // a recovery did occur.
  512. //
  513. LogMessage(DEB_ERROR,
  514. &(_peid.Prefix.Buffer),
  515. 1,
  516. DFS_RECOVERED_FROM_MOVE_MSG);
  517. IDfsVolInlineDebOut((DEB_TRACE, "CDfsVolume::RecoveryFromMove() exit\n"));
  518. return(dwErr);
  519. }