Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

649 lines
19 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. replay.c
  5. Abstract:
  6. Implements replay of records during replica recovery
  7. Author:
  8. Ahmed Mohamed (ahmedm) 1-Feb-2000
  9. Revision History:
  10. --*/
  11. #include <nt.h>
  12. #include <ntdef.h>
  13. #include <ntrtl.h>
  14. #include <nturtl.h>
  15. #include <windows.h>
  16. #include <stdio.h>
  17. #include <ntddvol.h>
  18. #include <string.h>
  19. #include <assert.h>
  20. #include "fs.h"
  21. #include "fsp.h"
  22. #include "fsutil.h"
  23. #include <strsafe.h>
  24. NTSTATUS
  25. fs_replay_create(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  26. {
  27. NTSTATUS err;
  28. fs_create_msg_t msg;
  29. fs_create_reply_t reply;
  30. WCHAR name[MAXPATH];
  31. int name_sz = sizeof(name);
  32. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, mid);
  33. fs_log_rec_t myRec;
  34. name[0] = '\0';
  35. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  36. msg.flags = lrec->flags;
  37. msg.attr = lrec->attrib;
  38. // note: use id instead of fs_id since we don't have fs_id till
  39. // a prepare has committed.
  40. FsLogReplay(("fs_replay_create: try %I64x:%I64x\n", lrec->id[0],
  41. lrec->id[1]));
  42. err = xFsGetPathById(vfd, &lrec->id, name, &name_sz);
  43. if (err == STATUS_SUCCESS) {
  44. IO_STATUS_BLOCK ios;
  45. msg.name = xFsBuildRelativePath(volinfo, mid, name);
  46. msg.name_len = (USHORT) wcslen(msg.name);
  47. msg.fnum = INVALID_FHANDLE_T;
  48. ios.Information = sizeof(reply);
  49. err = FspCreate(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  50. (PVOID) &reply, &ios.Information, (PVOID)&myRec);
  51. }
  52. FsLogReplay(("fs_replay_create: %S err %x\n", name, err));
  53. return err;
  54. }
  55. NTSTATUS
  56. fs_replay_setattr(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  57. {
  58. NTSTATUS err;
  59. fs_setattr_msg_t msg;
  60. WCHAR name[MAXPATH];
  61. int name_sz = sizeof(name);
  62. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, nid);
  63. fs_log_rec_t myRec;
  64. // find path for fs_id
  65. FsLogReplay(("fs_replay_setattr: try %I64x:%I64x\n", lrec->fs_id[0],
  66. lrec->fs_id[1]));
  67. err = xFsGetPathById(vfd, &lrec->fs_id, name, &name_sz);
  68. if (err == STATUS_SUCCESS) {
  69. IO_STATUS_BLOCK ios;
  70. ios.Information = 0;
  71. // todo: we need to read current attr from master and apply it into nid disk.
  72. // FileAttributes are not enough, we could have time changes which need to be
  73. // in sync in all disks.
  74. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  75. msg.fs_id = &lrec->fs_id;
  76. msg.name = xFsBuildRelativePath(volinfo, nid, name);
  77. msg.name_len = (USHORT) wcslen(msg.name);
  78. memset(&msg.attr, 0, sizeof(msg.attr));
  79. msg.attr.FileAttributes = lrec->attrib;
  80. err = FspSetAttr2(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  81. NULL, &ios.Information, (PVOID)&myRec);
  82. }
  83. FsLogReplay(("replay_setattr: %I64x err %x\n",
  84. lrec->fs_id[0], err));
  85. return err;
  86. }
  87. NTSTATUS
  88. fs_replay_mkdir(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  89. {
  90. NTSTATUS err;
  91. fs_create_msg_t msg;
  92. WCHAR name[MAXPATH];
  93. int name_sz = sizeof(name);
  94. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, mid);
  95. fs_log_rec_t myRec;
  96. name[0] = '\0';
  97. // note: use id instead of fs_id since we don't have fs_id till
  98. // a prepare has committed.
  99. FsLogReplay(("fs_replay_mkdir: %I64x:%I64x\n", lrec->id[0],
  100. lrec->id[1]));
  101. err = xFsGetPathById(vfd, &lrec->id, name, &name_sz);
  102. if (err == STATUS_SUCCESS) {
  103. IO_STATUS_BLOCK ios;
  104. ios.Information = 0;
  105. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  106. msg.name = xFsBuildRelativePath(volinfo, mid, name);
  107. msg.name_len = (USHORT) wcslen(msg.name);
  108. msg.flags = lrec->flags;
  109. msg.attr = lrec->attrib;
  110. err = FspMkDir(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  111. NULL, &ios.Information, (PVOID)&myRec);
  112. }
  113. FsLogReplay(("Replay Mkdir %S err %x\n", name, err));
  114. return err;
  115. }
  116. NTSTATUS
  117. fs_replay_remove(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  118. {
  119. NTSTATUS err;
  120. fs_remove_msg_t msg;
  121. // we find the objectid in the old replica, since file is already delete in master
  122. HANDLE ovfd = FS_GET_VOL_HANDLE(volinfo, nid);
  123. WCHAR name[MAXPATH];
  124. int name_sz = sizeof(name);
  125. fs_log_rec_t myRec;
  126. name[0] = '\0';
  127. FsLogReplay(("fs_relay_remove: %I64x:%I64x\n", lrec->fs_id[0],
  128. lrec->fs_id[1]));
  129. err = xFsGetPathById(ovfd, &lrec->fs_id, name, &name_sz);
  130. if (err == STATUS_SUCCESS) {
  131. IO_STATUS_BLOCK ios;
  132. ios.Information = 0;
  133. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  134. msg.fs_id = &lrec->fs_id;
  135. msg.name = xFsBuildRelativePath(volinfo, nid, name);
  136. msg.name_len = (USHORT) wcslen(msg.name);
  137. err = FspRemove(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  138. NULL, &ios.Information, (PVOID)&myRec);
  139. }
  140. FsLogReplay(("Replay remove %S err %x\n", name, err));
  141. return err;
  142. }
  143. NTSTATUS
  144. fs_replay_rename(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  145. {
  146. NTSTATUS err;
  147. fs_rename_msg_t msg;
  148. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo,mid);
  149. HANDLE ovfd = FS_GET_VOL_HANDLE(volinfo, nid);
  150. WCHAR old_name[MAXPATH];
  151. WCHAR new_name[MAXPATH];
  152. int old_name_sz = sizeof(old_name);
  153. int new_name_sz = sizeof(new_name);
  154. fs_log_rec_t myRec;
  155. new_name[0] = old_name[0] = '\0';
  156. FsLogReplay(("fs_relay_rename: %I64x:%I64x\n", lrec->fs_id[0],
  157. lrec->fs_id[1]));
  158. // get old name
  159. err = xFsGetPathById(ovfd, &lrec->fs_id, old_name, &old_name_sz);
  160. if (err == STATUS_SUCCESS) {
  161. IO_STATUS_BLOCK ios;
  162. ios.Information = 0;
  163. // get the new name
  164. err = xFsGetPathById(vfd, &lrec->fs_id, new_name, &new_name_sz);
  165. if (err == STATUS_OBJECT_PATH_NOT_FOUND) {
  166. NTSTATUS e;
  167. // if we can't find file in the master disk, we must
  168. // rename the file, pick a name based on file id
  169. StringCchPrintfW(new_name, MAXPATH, L"%s%I64x%I64x", old_name,
  170. lrec->fs_id[0],lrec->fs_id[1]);
  171. new_name_sz = wcslen(new_name);
  172. err = STATUS_SUCCESS;
  173. mid = nid;
  174. }
  175. if (err == STATUS_SUCCESS) {
  176. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  177. msg.fs_id = &lrec->fs_id;
  178. msg.sname = xFsBuildRelativePath(volinfo, nid, old_name);
  179. msg.sname_len = (USHORT) wcslen(msg.sname);
  180. msg.dname = xFsBuildRelativePath(volinfo, mid, new_name);
  181. msg.dname_len = (USHORT) wcslen(msg.dname);
  182. err = FspRename(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  183. NULL, &ios.Information, (PVOID)&myRec);
  184. }
  185. }
  186. FsLogReplay(("Replay rename %S -> %S err %x\n", old_name, new_name, err));
  187. return err;
  188. }
  189. NTSTATUS
  190. fs_replay_write(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  191. {
  192. NTSTATUS err;
  193. IO_STATUS_BLOCK ios;
  194. HANDLE shdl = INVALID_HANDLE_VALUE;
  195. HANDLE dhdl = INVALID_HANDLE_VALUE;
  196. char *buf = NULL;
  197. fs_io_msg_t msg;
  198. HANDLE ovfd = FS_GET_VOL_HANDLE(volinfo, nid);
  199. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, mid);
  200. fs_log_rec_t myRec;
  201. FsLogReplay(("fs_replay_write: %I64x:%I64x\n", lrec->fs_id[0],
  202. lrec->fs_id[1]));
  203. // get the new file first
  204. err = xFsGetHandleById(vfd, &lrec->fs_id, FILE_READ_EA|FILE_GENERIC_READ, &shdl);
  205. if (err == STATUS_SUCCESS) {
  206. LARGE_INTEGER off;
  207. IO_STATUS_BLOCK ios2;
  208. ios2.Information = 0;
  209. // get old file
  210. err = xFsGetHandleById(ovfd, &lrec->fs_id, FILE_READ_EA|FILE_GENERIC_WRITE, &dhdl);
  211. if (err != STATUS_SUCCESS) {
  212. // this is a very bad error, must abort now
  213. FsLogReplay(("Aborting replay_write err %x\n", err));
  214. err = STATUS_TRANSACTION_ABORTED;
  215. goto done;
  216. }
  217. // we need to read the new data from the sfd first
  218. if (lrec->length > 0) {
  219. // allocate buf
  220. buf = VirtualAlloc(NULL, lrec->length, MEM_COMMIT, PAGE_READWRITE);
  221. if (buf == NULL) {
  222. FsLogError(("Unable to allocate write buffer to replay\n"));
  223. err = STATUS_TRANSACTION_ABORTED;
  224. goto done;
  225. }
  226. off.LowPart = lrec->offset;
  227. off.HighPart = 0;
  228. // read local data. xxx: what if the file is locked?
  229. err = NtReadFile(shdl, NULL, NULL, NULL, &ios, buf,
  230. lrec->length, &off, NULL);
  231. if (err == STATUS_PENDING) {
  232. EventWait(shdl);
  233. err = ios.Status;
  234. }
  235. if (err != STATUS_SUCCESS) {
  236. FsLogReplay(("Read failed for replay 0x%x\n", err));
  237. err = STATUS_TRANSACTION_ABORTED;
  238. goto done;
  239. }
  240. } else {
  241. buf = NULL;
  242. ios.Information = 0;
  243. }
  244. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  245. msg.fs_id = &lrec->fs_id;
  246. msg.offset = lrec->offset;
  247. msg.size = (UINT32)ios.Information;
  248. msg.buf = buf;
  249. msg.context = (PVOID) dhdl;
  250. msg.fnum = INVALID_FHANDLE_T;
  251. err = FspWrite(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg), NULL, &ios2.Information, (PVOID)&myRec);
  252. // check if we have the same size, otherwise abort
  253. if ((ULONG)ios2.Information != lrec->length) {
  254. FsLogError(("Write sz mismatch, %d expected %d\n", (ULONG)ios2.Information, lrec->length));
  255. err = STATUS_TRANSACTION_ABORTED;
  256. }
  257. } else if (err != STATUS_OBJECT_PATH_NOT_FOUND) {
  258. err = STATUS_TRANSACTION_ABORTED;
  259. }
  260. done:
  261. if (buf != NULL) {
  262. VirtualFree(buf, 0, MEM_RELEASE);
  263. }
  264. if (shdl != INVALID_HANDLE_VALUE)
  265. xFsClose(shdl);
  266. if (dhdl != INVALID_HANDLE_VALUE)
  267. xFsClose(dhdl);
  268. FsLogReplay(("Replay write offset %d len %d err %x\n",
  269. lrec->offset, lrec->length, err));
  270. return err;
  271. }
  272. FsReplayHandler_t FsReplayCallTable[] = {
  273. fs_replay_create,
  274. fs_replay_setattr,
  275. fs_replay_write,
  276. fs_replay_mkdir,
  277. fs_replay_remove,
  278. fs_replay_rename
  279. };
  280. NTSTATUS
  281. FsReplayFid(VolInfo_t *volinfo, UserInfo_t *uinfo, int nid, int mid)
  282. {
  283. int i;
  284. // WCHAR path[MAXPATH];
  285. // WCHAR *name;
  286. int name_len;
  287. NTSTATUS err = STATUS_SUCCESS;
  288. // Open on replica nid all currently open files.
  289. for (i = 0; i < FsTableSize; i++) {
  290. HANDLE fd;
  291. UINT32 disp, share, access, flags;
  292. if (uinfo->Table[i].Flags == 0) {
  293. continue;
  294. }
  295. if (uinfo->Table[i].Fd[nid] != INVALID_HANDLE_VALUE) {
  296. continue;
  297. }
  298. // Perform replays on completely open handles only. partially opened handles
  299. // should be taken care of by the send*() functions.
  300. //
  301. if (uinfo->Table[i].hState != HandleStateOpened) {
  302. continue;
  303. }
  304. #if 0
  305. // todo: this should be in a for loop
  306. fd = uinfo->Table[i].Fd[mid];
  307. if (fd == INVALID_HANDLE_VALUE)
  308. continue;
  309. // get path name
  310. name_len = sizeof(path);
  311. err = xFsGetHandlePath(fd, path, &name_len);
  312. if (err != STATUS_SUCCESS) {
  313. FsLogReplay(("FsReplayFid %d failed on handlpath %x\n",
  314. mid, err));
  315. // todo: the master might have failed, we should just
  316. // try to go to a differnet replica if possible
  317. return err;
  318. }
  319. // issue open against nid, but first get filename from master
  320. name = xFsBuildRelativePath(volinfo, mid, path);
  321. #endif
  322. DecodeCreateParam(uinfo->Table[i].Flags, &flags, &disp, &share, &access);
  323. err = xFsOpen(&fd, FS_GET_VOL_HANDLE(volinfo, nid),
  324. uinfo->Table[i].FileName, wcslen(uinfo->Table[i].FileName),
  325. access, share, 0);
  326. if (err != STATUS_SUCCESS) {
  327. FsLogReplay(("FsReplayFid mid %d nid %d open file '%S' failed %x\n",
  328. mid, nid, uinfo->Table[i].FileName, err));
  329. // Cleanup all open handles we have before returning an
  330. // error. We cleanup this node later, so that's ok.
  331. return err;
  332. }
  333. FsLogReplay(("FsReplayFid mid %d nid %d file '%S' flags %x\n",
  334. mid, nid, uinfo->Table[i].FileName, uinfo->Table[i].Flags));
  335. // we now add the open handle to the nid slot
  336. FS_SET_USER_HANDLE(uinfo, nid, i, fd);
  337. // todo: issue locks
  338. }
  339. return err;
  340. }
  341. NTSTATUS
  342. FsReplayXid(VolInfo_t *volinfo, int nid, PVOID arg, int action, int mid)
  343. {
  344. fs_log_rec_t *p = (fs_log_rec_t *) arg;
  345. NTSTATUS err = ERROR_SUCCESS;
  346. fs_id_t *fs_id;
  347. HANDLE vhdl;
  348. vhdl = FS_GET_VOL_HANDLE(volinfo, nid);
  349. if (vhdl == INVALID_HANDLE_VALUE) {
  350. FsLogUndo(("FsUndoXid Failed to get crs handle %d\n",
  351. nid));
  352. return STATUS_TRANSACTION_ABORTED;
  353. }
  354. vhdl = FS_GET_VOL_HANDLE(volinfo, mid);
  355. if (vhdl == INVALID_HANDLE_VALUE) {
  356. FsLogReplay(("FsReplayXid Failed to get crs handle %d\n",
  357. mid));
  358. return STATUS_TRANSACTION_ABORTED;
  359. }
  360. // note: use id instead of fs_id since we don't have fs_id till
  361. // a prepare has committed.
  362. fs_id = &p->id;
  363. FsLogReplay(("Replay cmd %d mid %d nid %d objid %I64x:%I64x\n", p->command,
  364. mid, nid,
  365. (*fs_id)[0], (*fs_id)[1]));
  366. err = FsReplayCallTable[p->command](volinfo, p, nid, mid);
  367. FsLogReplay(("Replay Status %x\n", err));
  368. return err;
  369. }
  370. NTSTATUS
  371. FsQueryXid(VolInfo_t *volinfo, int nid, PVOID arg, int action, int mid)
  372. {
  373. fs_log_rec_t *p = (fs_log_rec_t *) arg;
  374. NTSTATUS err = ERROR_SUCCESS;
  375. fs_id_t *fs_id;
  376. HANDLE vhdl;
  377. WCHAR name[MAXPATH];
  378. int name_sz = sizeof(name);
  379. ASSERT(nid == mid);
  380. vhdl = FS_GET_VOL_HANDLE(volinfo, nid);
  381. if (vhdl == INVALID_HANDLE_VALUE) {
  382. FsLogUndo(("FsUndoXid Failed to get crs handle %d\n",
  383. nid));
  384. return STATUS_TRANSACTION_ABORTED;
  385. }
  386. fs_id = &p->fs_id;
  387. FsLogReplay(("Query cmd %d nid %d objid %I64x:%I64x\n", p->command,
  388. nid, (*fs_id)[0], (*fs_id)[1]));
  389. switch(p->command) {
  390. case FS_CREATE:
  391. case FS_MKDIR:
  392. // issue a lookup,
  393. // note: use id instead of fs_id since we don't have fs_id till
  394. // a prepare has committed.
  395. fs_id = &p->id;
  396. err = xFsGetPathById(vhdl, fs_id, name, &name_sz);
  397. if (err == STATUS_OBJECT_PATH_NOT_FOUND)
  398. err = STATUS_CANCELLED;
  399. break;
  400. case FS_REMOVE:
  401. err = xFsGetPathById(vhdl, fs_id, name, &name_sz);
  402. if (err == STATUS_OBJECT_PATH_NOT_FOUND)
  403. err = STATUS_SUCCESS;
  404. else if (err == STATUS_SUCCESS)
  405. err = STATUS_CANCELLED;
  406. break;
  407. default:
  408. // can't make any determination
  409. err = STATUS_NOT_FOUND;
  410. break;
  411. }
  412. FsLogReplay(("Commit Status %x\n", err));
  413. return err;
  414. }
  415. ////////////////////////// Recovery Callback ////////////////////////////
  416. NTSTATUS
  417. WINAPI
  418. FsCrsCallback(PVOID hd, int nid, CrsRecord_t *arg, int action, int mid)
  419. {
  420. NTSTATUS err = STATUS_SUCCESS;
  421. VolInfo_t *volinfo = (VolInfo_t *) hd;
  422. switch(action) {
  423. case CRS_ACTION_REPLAY:
  424. err = FsReplayXid(volinfo, nid, arg, action, mid);
  425. break;
  426. case CRS_ACTION_UNDO:
  427. err = FsUndoXid(volinfo, nid, arg, action, mid);
  428. break;
  429. case CRS_ACTION_QUERY:
  430. err = FsQueryXid(volinfo, nid, arg, action, mid);
  431. break;
  432. case CRS_ACTION_DONE:
  433. FsLogReplay(("Vol %S done recovery nid %d mid %d\n",
  434. volinfo->Root, nid, mid));
  435. // we now need to walk our current open table and join this new replica.
  436. {
  437. UserInfo_t *u = volinfo->UserList;
  438. for (; u != NULL; u = u->Next) {
  439. err = FsReplayFid(volinfo, u, nid, mid);
  440. if (err != STATUS_SUCCESS)
  441. break;
  442. }
  443. }
  444. break;
  445. case CRS_ACTION_COPY:
  446. FsLogReplay(("FullCopy Disk%d -> Disk%d\n", mid, nid));
  447. //
  448. // We need to open new directory handles instead of using current ones. Otherwise,
  449. // our enum on directory might not be consistent
  450. //
  451. if (0) {
  452. WCHAR path[MAXPATH];
  453. HANDLE mvfd, ovfd;
  454. UINT32 disp;
  455. // open root volume directory
  456. disp = FILE_OPEN;
  457. StringCchPrintfW(path, MAXPATH, L"\\??\\%s\\%s\\", FS_GET_VOL_NAME(volinfo, mid), volinfo->Root);
  458. err = xFsCreate(&mvfd, NULL, path, wcslen(path),
  459. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  460. 0,
  461. FILE_SHARE_READ|FILE_SHARE_WRITE,
  462. &disp,
  463. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  464. NULL, 0);
  465. if (err != STATUS_SUCCESS) {
  466. FsLogReplay(("Failed to open mid %d '%S' err %x\n", mid, path, err));
  467. return err;
  468. }
  469. // open root volume directory
  470. disp = FILE_OPEN;
  471. StringCchPrintfW(path, MAXPATH, L"\\??\\%s\\%s\\", FS_GET_VOL_NAME(volinfo, nid), volinfo->Root);
  472. err = xFsCreate(&ovfd, NULL, path, wcslen(path),
  473. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  474. 0,
  475. FILE_SHARE_READ|FILE_SHARE_WRITE,
  476. &disp,
  477. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  478. NULL, 0);
  479. if (err != STATUS_SUCCESS) {
  480. xFsClose(mvfd);
  481. FsLogReplay(("Failed to open nid %d '%S' err %x\n", mid, path, err));
  482. return err;
  483. }
  484. err = xFsCopyTree(mvfd, ovfd);
  485. xFsClose(mvfd);
  486. xFsClose(ovfd);
  487. } else {
  488. err = xFsCopyTree(FS_GET_VOL_HANDLE(volinfo, mid),
  489. FS_GET_VOL_HANDLE(volinfo,nid));
  490. }
  491. FsLogReplay(("SlowStart Crs%d status %x\n", nid, err));
  492. break;
  493. default:
  494. FsLogReplay(("Unknown action %d\n", action));
  495. ASSERT(FALSE);
  496. }
  497. return err;
  498. }