Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

621 lines
15 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. replay.c
  5. Abstract:
  6. Implements replay of records during replica recovery
  7. Author:
  8. Ahmed Mohamed (ahmedm) 1-Feb-2000
  9. Revision History:
  10. --*/
  11. #include <nt.h>
  12. #include <ntdef.h>
  13. #include <ntrtl.h>
  14. #include <nturtl.h>
  15. #include <windows.h>
  16. #include <stdio.h>
  17. #include <ntddvol.h>
  18. #include <string.h>
  19. #include <assert.h>
  20. #include "fs.h"
  21. #include "fsp.h"
  22. #include "fsutil.h"
  23. NTSTATUS
  24. fs_replay_create(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  25. {
  26. NTSTATUS err;
  27. fs_create_msg_t msg;
  28. fs_create_reply_t reply;
  29. WCHAR name[MAXPATH];
  30. int name_sz = sizeof(name);
  31. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, mid);
  32. name[0] = '\0';
  33. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  34. msg.flags = lrec->flags;
  35. msg.attr = lrec->attrib;
  36. // note: use id instead of fs_id since we don't have fs_id till
  37. // a prepare has committed.
  38. FsLogReplay(("fs_replay_create: try %I64x:%I64x\n", lrec->id[0],
  39. lrec->id[1]));
  40. err = xFsGetPathById(vfd, &lrec->id, name, &name_sz);
  41. if (err == STATUS_SUCCESS) {
  42. IO_STATUS_BLOCK ios;
  43. msg.name = xFsBuildRelativePath(volinfo, mid, name);
  44. msg.name_len = (USHORT) wcslen(msg.name);
  45. msg.fnum = INVALID_FHANDLE_T;
  46. ios.Information = sizeof(reply);
  47. err = FspCreate(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  48. (PVOID) &reply, &ios.Information);
  49. }
  50. FsLogReplay(("fs_replay_create: %S err %x\n", name, err));
  51. return err;
  52. }
  53. NTSTATUS
  54. fs_replay_setattr(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  55. {
  56. NTSTATUS err;
  57. fs_setattr_msg_t msg;
  58. WCHAR name[MAXPATH];
  59. int name_sz = sizeof(name);
  60. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, nid);
  61. // find path for fs_id
  62. FsLogReplay(("fs_replay_setattr: try %I64x:%I64x\n", lrec->fs_id[0],
  63. lrec->fs_id[1]));
  64. err = xFsGetPathById(vfd, &lrec->fs_id, name, &name_sz);
  65. if (err == STATUS_SUCCESS) {
  66. IO_STATUS_BLOCK ios;
  67. ios.Information = 0;
  68. // todo: we need to read current attr from master and apply it into nid disk.
  69. // FileAttributes are not enough, we could have time changes which need to be
  70. // in sync in all disks.
  71. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  72. msg.fs_id = &lrec->fs_id;
  73. msg.name = xFsBuildRelativePath(volinfo, nid, name);
  74. msg.name_len = (USHORT) wcslen(msg.name);
  75. memset(&msg.attr, 0, sizeof(msg.attr));
  76. msg.attr.FileAttributes = lrec->attrib;
  77. err = FspSetAttr2(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  78. NULL, &ios.Information);
  79. }
  80. FsLogReplay(("replay_setattr: %I64x err %x\n",
  81. lrec->fs_id[0], err));
  82. return err;
  83. }
  84. NTSTATUS
  85. fs_replay_mkdir(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  86. {
  87. NTSTATUS err;
  88. fs_create_msg_t msg;
  89. WCHAR name[MAXPATH];
  90. int name_sz = sizeof(name);
  91. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, mid);
  92. name[0] = '\0';
  93. // note: use id instead of fs_id since we don't have fs_id till
  94. // a prepare has committed.
  95. FsLogReplay(("fs_replay_mkdir: %I64x:%I64x\n", lrec->id[0],
  96. lrec->id[1]));
  97. err = xFsGetPathById(vfd, &lrec->id, name, &name_sz);
  98. if (err == STATUS_SUCCESS) {
  99. IO_STATUS_BLOCK ios;
  100. ios.Information = 0;
  101. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  102. msg.name = xFsBuildRelativePath(volinfo, mid, name);
  103. msg.name_len = (USHORT) wcslen(msg.name);
  104. msg.flags = lrec->flags;
  105. msg.attr = lrec->attrib;
  106. err = FspMkDir(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  107. NULL, &ios.Information);
  108. }
  109. FsLogReplay(("Replay Mkdir %S err %x\n", name, err));
  110. return err;
  111. }
  112. NTSTATUS
  113. fs_replay_remove(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  114. {
  115. NTSTATUS err;
  116. fs_remove_msg_t msg;
  117. // we find the objectid in the old replica, since file is already delete in master
  118. HANDLE ovfd = FS_GET_VOL_HANDLE(volinfo, nid);
  119. WCHAR name[MAXPATH];
  120. int name_sz = sizeof(name);
  121. name[0] = '\0';
  122. FsLogReplay(("fs_relay_remove: %I64x:%I64x\n", lrec->fs_id[0],
  123. lrec->fs_id[1]));
  124. err = xFsGetPathById(ovfd, &lrec->fs_id, name, &name_sz);
  125. if (err == STATUS_SUCCESS) {
  126. IO_STATUS_BLOCK ios;
  127. ios.Information = 0;
  128. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  129. msg.fs_id = &lrec->fs_id;
  130. msg.name = xFsBuildRelativePath(volinfo, nid, name);
  131. msg.name_len = (USHORT) wcslen(msg.name);
  132. err = FspRemove(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  133. NULL, &ios.Information);
  134. }
  135. FsLogReplay(("Replay remove %S err %x\n", name, err));
  136. return err;
  137. }
  138. NTSTATUS
  139. fs_replay_rename(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  140. {
  141. NTSTATUS err;
  142. fs_rename_msg_t msg;
  143. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo,mid);
  144. HANDLE ovfd = FS_GET_VOL_HANDLE(volinfo, nid);
  145. WCHAR old_name[MAXPATH];
  146. WCHAR new_name[MAXPATH];
  147. int old_name_sz = sizeof(old_name);
  148. int new_name_sz = sizeof(new_name);
  149. new_name[0] = old_name[0] = '\0';
  150. FsLogReplay(("fs_relay_rename: %I64x:%I64x\n", lrec->fs_id[0],
  151. lrec->fs_id[1]));
  152. // get old name
  153. err = xFsGetPathById(ovfd, &lrec->fs_id, old_name, &old_name_sz);
  154. if (err == STATUS_SUCCESS) {
  155. IO_STATUS_BLOCK ios;
  156. ios.Information = 0;
  157. // get the new name
  158. err = xFsGetPathById(vfd, &lrec->fs_id, new_name, &new_name_sz);
  159. if (err == STATUS_OBJECT_PATH_NOT_FOUND) {
  160. NTSTATUS e;
  161. // if we can't find file in the master disk, we must
  162. // rename the file, pick a name based on file id
  163. swprintf(new_name, L"%S%I64x%I64x", old_name,
  164. lrec->fs_id[0],lrec->fs_id[1]);
  165. new_name_sz = wcslen(new_name);
  166. err = STATUS_SUCCESS;
  167. mid = nid;
  168. }
  169. if (err == STATUS_SUCCESS) {
  170. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  171. msg.fs_id = &lrec->fs_id;
  172. msg.sname = xFsBuildRelativePath(volinfo, nid, old_name);
  173. msg.sname_len = (USHORT) wcslen(msg.sname);
  174. msg.dname = xFsBuildRelativePath(volinfo, mid, new_name);
  175. msg.dname_len = (USHORT) wcslen(msg.dname);
  176. err = FspRename(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg),
  177. NULL, &ios.Information);
  178. }
  179. }
  180. FsLogReplay(("Replay rename %S -> %S err %x\n", old_name, new_name, err));
  181. return err;
  182. }
  183. NTSTATUS
  184. fs_replay_write(VolInfo_t *volinfo, fs_log_rec_t *lrec, int nid, int mid)
  185. {
  186. NTSTATUS err;
  187. IO_STATUS_BLOCK ios;
  188. HANDLE shdl = INVALID_HANDLE_VALUE;
  189. HANDLE dhdl = INVALID_HANDLE_VALUE;
  190. char *buf = NULL;
  191. fs_io_msg_t msg;
  192. HANDLE ovfd = FS_GET_VOL_HANDLE(volinfo, nid);
  193. HANDLE vfd = FS_GET_VOL_HANDLE(volinfo, mid);
  194. FsLogReplay(("fs_replay_write: %I64x:%I64x\n", lrec->fs_id[0],
  195. lrec->fs_id[1]));
  196. // get the new file first
  197. err = xFsGetHandleById(vfd, &lrec->fs_id, FILE_GENERIC_READ, &shdl);
  198. if (err == STATUS_SUCCESS) {
  199. LARGE_INTEGER off;
  200. IO_STATUS_BLOCK ios2;
  201. ios2.Information = 0;
  202. // get old file
  203. err = xFsGetHandleById(ovfd, &lrec->fs_id, FILE_GENERIC_WRITE, &dhdl);
  204. if (err != STATUS_SUCCESS) {
  205. // this is a very bad error, must abort now
  206. FsLogReplay(("Aborting replay_write err %x\n", err));
  207. err = STATUS_TRANSACTION_ABORTED;
  208. goto done;
  209. }
  210. // we need to read the new data from the sfd first
  211. if (lrec->length > 0) {
  212. // allocate buf
  213. buf = VirtualAlloc(NULL, lrec->length, MEM_RESERVE|MEM_COMMIT,
  214. PAGE_READWRITE);
  215. if (buf == NULL) {
  216. FsLogError(("Unable to allocate write buffer to replay\n"));
  217. err = STATUS_TRANSACTION_ABORTED;
  218. goto done;
  219. }
  220. off.LowPart = lrec->offset;
  221. off.HighPart = 0;
  222. // read local data. xxx: what if the file is locked?
  223. err = NtReadFile(shdl, NULL, NULL, NULL, &ios, buf,
  224. lrec->length, &off, NULL);
  225. if (err == STATUS_PENDING) {
  226. EventWait(shdl);
  227. }
  228. if (ios.Status != STATUS_SUCCESS) {
  229. FsLogReplay(("Read failed for replay %x\n", ios.Status));
  230. err = STATUS_TRANSACTION_ABORTED;
  231. goto done;
  232. }
  233. } else {
  234. buf = NULL;
  235. ios.Information = 0;
  236. }
  237. memcpy(&msg.xid, lrec->id, sizeof(msg.xid));
  238. msg.fs_id = &lrec->fs_id;
  239. msg.offset = lrec->offset;
  240. msg.size = (UINT32)ios.Information;
  241. msg.buf = buf;
  242. msg.context = (PVOID) dhdl;
  243. msg.fnum = INVALID_FHANDLE_T;
  244. err = FspWrite(volinfo, NULL, nid, (PVOID) &msg, sizeof(msg), NULL, &ios2.Information);
  245. // check if we have the same size, otherwise abort
  246. if ((ULONG)ios2.Information != lrec->length) {
  247. FsLogError(("Write sz mismatch, %d expected %d\n", (ULONG)ios2.Information, lrec->length));
  248. err = STATUS_TRANSACTION_ABORTED;
  249. }
  250. }
  251. done:
  252. if (buf != NULL) {
  253. VirtualFree(buf, 0, MEM_DECOMMIT|MEM_RELEASE);
  254. }
  255. if (shdl != INVALID_HANDLE_VALUE)
  256. xFsClose(shdl);
  257. if (dhdl != INVALID_HANDLE_VALUE)
  258. xFsClose(dhdl);
  259. FsLogReplay(("Replay write offset %d len %d err %x\n",
  260. lrec->offset, lrec->length, err));
  261. return err;
  262. }
  263. FsReplayHandler_t FsReplayCallTable[] = {
  264. fs_replay_create,
  265. fs_replay_setattr,
  266. fs_replay_write,
  267. fs_replay_mkdir,
  268. fs_replay_remove,
  269. fs_replay_rename
  270. };
  271. NTSTATUS
  272. FsReplayFid(VolInfo_t *volinfo, UserInfo_t *uinfo, int nid, int mid)
  273. {
  274. int i;
  275. WCHAR path[MAXPATH];
  276. WCHAR *name;
  277. int name_len;
  278. NTSTATUS err = STATUS_SUCCESS;
  279. // Open on replica nid all currently open files.
  280. for (i = 0; i < FsTableSize; i++) {
  281. HANDLE fd;
  282. UINT32 disp, share, access, flags;
  283. // todo: this should be in a for loop
  284. fd = uinfo->Table[i].Fd[mid];
  285. if (fd == INVALID_HANDLE_VALUE)
  286. continue;
  287. // get path name
  288. name_len = sizeof(path);
  289. err = xFsGetHandlePath(fd, path, &name_len);
  290. if (err != STATUS_SUCCESS) {
  291. FsLogReplay(("FsReplayFid %d failed on handlpath %x\n",
  292. mid, err));
  293. // todo: the master might have failed, we should just
  294. // try to go to a differnet replica if possible
  295. return err;
  296. }
  297. // issue open against nid, but first get filename from master
  298. name = xFsBuildRelativePath(volinfo, mid, path);
  299. DecodeCreateParam(uinfo->Table[i].Flags, &flags, &disp, &share, &access);
  300. err = xFsOpen(&fd, FS_GET_VOL_HANDLE(volinfo, nid),
  301. name, wcslen(name),
  302. access, share, 0);
  303. if (err != STATUS_SUCCESS) {
  304. FsLogReplay(("FsReplayFid mid %d nid %d open file '%S' '%S' failed %x\n",
  305. mid, nid, name, path, err));
  306. // Cleanup all open handles we have before returning an
  307. // error. We cleanup this node later, so that's ok.
  308. return err;
  309. }
  310. // we now add the open handle to the nid slot
  311. FS_SET_USER_HANDLE(uinfo, nid, i, fd);
  312. // todo: issue locks
  313. }
  314. return err;
  315. }
  316. NTSTATUS
  317. FsReplayXid(VolInfo_t *volinfo, int nid, PVOID arg, int action, int mid)
  318. {
  319. fs_log_rec_t *p = (fs_log_rec_t *) arg;
  320. NTSTATUS err = ERROR_SUCCESS;
  321. fs_id_t *fs_id;
  322. HANDLE vhdl;
  323. vhdl = FS_GET_VOL_HANDLE(volinfo, nid);
  324. if (vhdl == INVALID_HANDLE_VALUE) {
  325. FsLogUndo(("FsUndoXid Failed to get crs handle %d\n",
  326. nid));
  327. return STATUS_TRANSACTION_ABORTED;
  328. }
  329. vhdl = FS_GET_VOL_HANDLE(volinfo, mid);
  330. if (vhdl == INVALID_HANDLE_VALUE) {
  331. FsLogReplay(("FsReplayXid Failed to get crs handle %d\n",
  332. mid));
  333. return STATUS_TRANSACTION_ABORTED;
  334. }
  335. // note: use id instead of fs_id since we don't have fs_id till
  336. // a prepare has committed.
  337. fs_id = &p->id;
  338. FsLogReplay(("Replay cmd %d mid %d nid %d objid %I64x:%I64x\n", p->command,
  339. mid, nid,
  340. (*fs_id)[0], (*fs_id)[1]));
  341. err = FsReplayCallTable[p->command](volinfo, p, nid, mid);
  342. FsLogReplay(("Replay Status %x\n", err));
  343. return err;
  344. }
  345. NTSTATUS
  346. FsQueryXid(VolInfo_t *volinfo, int nid, PVOID arg, int action, int mid)
  347. {
  348. fs_log_rec_t *p = (fs_log_rec_t *) arg;
  349. NTSTATUS err = ERROR_SUCCESS;
  350. fs_id_t *fs_id;
  351. HANDLE vhdl;
  352. WCHAR name[MAXPATH];
  353. int name_sz = sizeof(name);
  354. ASSERT(nid == mid);
  355. vhdl = FS_GET_VOL_HANDLE(volinfo, nid);
  356. if (vhdl == INVALID_HANDLE_VALUE) {
  357. FsLogUndo(("FsUndoXid Failed to get crs handle %d\n",
  358. nid));
  359. return STATUS_TRANSACTION_ABORTED;
  360. }
  361. fs_id = &p->fs_id;
  362. FsLogReplay(("Query cmd %d nid %d objid %I64x:%I64x\n", p->command,
  363. nid, (*fs_id)[0], (*fs_id)[1]));
  364. switch(p->command) {
  365. case FS_CREATE:
  366. case FS_MKDIR:
  367. // issue a lookup,
  368. // note: use id instead of fs_id since we don't have fs_id till
  369. // a prepare has committed.
  370. fs_id = &p->id;
  371. err = xFsGetPathById(vhdl, fs_id, name, &name_sz);
  372. if (err == STATUS_OBJECT_PATH_NOT_FOUND)
  373. err = STATUS_CANCELLED;
  374. break;
  375. case FS_REMOVE:
  376. err = xFsGetPathById(vhdl, fs_id, name, &name_sz);
  377. if (err == STATUS_OBJECT_PATH_NOT_FOUND)
  378. err = STATUS_SUCCESS;
  379. else if (err == STATUS_SUCCESS)
  380. err = STATUS_CANCELLED;
  381. break;
  382. default:
  383. // can't make any determination
  384. err = STATUS_NOT_FOUND;
  385. break;
  386. }
  387. FsLogReplay(("Commit Status %x\n", err));
  388. return err;
  389. }
  390. ////////////////////////// Recovery Callback ////////////////////////////
  391. NTSTATUS
  392. WINAPI
  393. FsCrsCallback(PVOID hd, int nid, CrsRecord_t *arg, int action, int mid)
  394. {
  395. NTSTATUS err = STATUS_SUCCESS;
  396. VolInfo_t *volinfo = (VolInfo_t *) hd;
  397. switch(action) {
  398. case CRS_ACTION_REPLAY:
  399. err = FsReplayXid(volinfo, nid, arg, action, mid);
  400. break;
  401. case CRS_ACTION_UNDO:
  402. err = FsUndoXid(volinfo, nid, arg, action, mid);
  403. break;
  404. case CRS_ACTION_QUERY:
  405. err = FsQueryXid(volinfo, nid, arg, action, mid);
  406. break;
  407. case CRS_ACTION_DONE:
  408. FsLogReplay(("Vol %S done recovery nid %d mid %d\n",
  409. volinfo->Root, nid, mid));
  410. // we now need to walk our current open table and join this new replica.
  411. {
  412. UserInfo_t *u = volinfo->UserList;
  413. for (; u != NULL; u = u->Next) {
  414. err = FsReplayFid(volinfo, u, nid, mid);
  415. if (err != STATUS_SUCCESS)
  416. break;
  417. }
  418. }
  419. break;
  420. case CRS_ACTION_COPY:
  421. FsLogReplay(("FullCopy Disk%d -> Disk%d\n", mid, nid));
  422. //
  423. // We need to open new directory handles instead of using current ones. Otherwise,
  424. // our enum on directory might not be consistent
  425. //
  426. if (0) {
  427. WCHAR path[MAXPATH];
  428. HANDLE mvfd, ovfd;
  429. UINT32 disp;
  430. // open root volume directory
  431. disp = FILE_OPEN;
  432. swprintf(path, L"\\??\\%s\\%s\\", FS_GET_VOL_NAME(volinfo, mid), volinfo->Root);
  433. err = xFsCreate(&mvfd, NULL, path, wcslen(path),
  434. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  435. 0,
  436. FILE_SHARE_READ|FILE_SHARE_WRITE,
  437. &disp,
  438. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  439. NULL, 0);
  440. if (err != STATUS_SUCCESS) {
  441. FsLogReplay(("Failed to open mid %d '%S' err %x\n", mid, path, err));
  442. return err;
  443. }
  444. // open root volume directory
  445. disp = FILE_OPEN;
  446. swprintf(path, L"\\??\\%s\\%s\\", FS_GET_VOL_NAME(volinfo, nid), volinfo->Root);
  447. err = xFsCreate(&ovfd, NULL, path, wcslen(path),
  448. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  449. 0,
  450. FILE_SHARE_READ|FILE_SHARE_WRITE,
  451. &disp,
  452. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  453. NULL, 0);
  454. if (err != STATUS_SUCCESS) {
  455. xFsClose(mvfd);
  456. FsLogReplay(("Failed to open nid %d '%S' err %x\n", mid, path, err));
  457. return err;
  458. }
  459. err = xFsCopyTree(mvfd, ovfd);
  460. xFsClose(mvfd);
  461. xFsClose(ovfd);
  462. } else {
  463. err = xFsCopyTree(FS_GET_VOL_HANDLE(volinfo, mid),
  464. FS_GET_VOL_HANDLE(volinfo,nid));
  465. }
  466. FsLogReplay(("SlowStart Crs%d status %x\n", nid, err));
  467. break;
  468. default:
  469. FsLogReplay(("Unknown action %d\n", action));
  470. ASSERT(FALSE);
  471. }
  472. return err;
  473. }