Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3293 lines
73 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. fs.c
  5. Abstract:
  6. Implements filesystem operations
  7. Author:
  8. Ahmed Mohamed (ahmedm) 1-Feb-2000
  9. Revision History:
  10. --*/
  11. #include <nt.h>
  12. #include <ntdef.h>
  13. #include <ntrtl.h>
  14. #include <nturtl.h>
  15. #include <windows.h>
  16. #include <stdio.h>
  17. #include <string.h>
  18. #include <assert.h>
  19. #include "fs.h"
  20. #include "crs.h"
  21. #include "fsp.h"
  22. #include "fsutil.h"
  23. // Locking order: ulock followed by qlock
  24. ////////////////////////////////////////////////////////////////////////////
  25. UINT32
  26. get_attributes(DWORD a)
  27. {
  28. UINT32 attr = 0;
  29. if (a & FILE_ATTRIBUTE_READONLY) attr |= ATTR_READONLY;
  30. if (a & FILE_ATTRIBUTE_HIDDEN) attr |= ATTR_HIDDEN;
  31. if (a & FILE_ATTRIBUTE_SYSTEM) attr |= ATTR_SYSTEM;
  32. if (a & FILE_ATTRIBUTE_ARCHIVE) attr |= ATTR_ARCHIVE;
  33. if (a & FILE_ATTRIBUTE_DIRECTORY) attr |= ATTR_DIRECTORY;
  34. if (a & FILE_ATTRIBUTE_COMPRESSED) attr |= ATTR_COMPRESSED;
  35. if (a & FILE_ATTRIBUTE_OFFLINE) attr |= ATTR_OFFLINE;
  36. return attr;
  37. }
  38. DWORD
  39. unget_attributes(UINT32 attr)
  40. {
  41. DWORD a = 0;
  42. if (attr & ATTR_READONLY) a |= FILE_ATTRIBUTE_READONLY;
  43. if (attr & ATTR_HIDDEN) a |= FILE_ATTRIBUTE_HIDDEN;
  44. if (attr & ATTR_SYSTEM) a |= FILE_ATTRIBUTE_SYSTEM;
  45. if (attr & ATTR_ARCHIVE) a |= FILE_ATTRIBUTE_ARCHIVE;
  46. if (attr & ATTR_DIRECTORY) a |= FILE_ATTRIBUTE_DIRECTORY;
  47. if (attr & ATTR_COMPRESSED) a |= FILE_ATTRIBUTE_COMPRESSED;
  48. if (attr & ATTR_OFFLINE) a |= FILE_ATTRIBUTE_OFFLINE;
  49. return a;
  50. }
  51. DWORD
  52. unget_disp(UINT32 flags)
  53. {
  54. switch (flags & FS_DISP_MASK) {
  55. case DISP_DIRECTORY:
  56. case DISP_CREATE_NEW: return FILE_CREATE;
  57. case DISP_CREATE_ALWAYS: return FILE_OPEN_IF;
  58. case DISP_OPEN_EXISTING: return FILE_OPEN;
  59. case DISP_OPEN_ALWAYS: return FILE_OPEN_IF;
  60. case DISP_TRUNCATE_EXISTING: return FILE_OVERWRITE;
  61. default: return 0;
  62. }
  63. }
  64. DWORD
  65. unget_access(UINT32 flags)
  66. {
  67. DWORD win32_access = (flags & FS_DISP_MASK) == DISP_DIRECTORY ?
  68. FILE_GENERIC_READ|FILE_GENERIC_WRITE : FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES;
  69. if (flags & ACCESS_READ) win32_access |= FILE_GENERIC_READ;
  70. if (flags & ACCESS_WRITE) win32_access |= FILE_GENERIC_WRITE;
  71. win32_access |= FILE_READ_EA | FILE_WRITE_EA;
  72. return win32_access;
  73. }
  74. DWORD
  75. unget_share(UINT32 flags)
  76. {
  77. // we always open read shared because this simplifies recovery.
  78. DWORD win32_share = FILE_SHARE_READ;
  79. if (flags & SHARE_READ) win32_share |= FILE_SHARE_READ;
  80. if (flags & SHARE_WRITE) win32_share |= FILE_SHARE_WRITE;
  81. return win32_share;
  82. }
  83. DWORD
  84. unget_flags(UINT32 flags)
  85. {
  86. DWORD x;
  87. x = 0;
  88. if ((flags & FS_DISP_MASK) == DISP_DIRECTORY) {
  89. x = FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT;
  90. } else {
  91. // I don't think I can tell without doing a query first, so don't!
  92. // x = FILE_NON_DIRECTORY_FILE;
  93. }
  94. if ((flags & FS_CACHE_MASK) == CACHE_WRITE_THROUGH) {
  95. x |= FILE_WRITE_THROUGH;
  96. }
  97. if ((flags & FS_CACHE_MASK) == CACHE_NO_BUFFERING) {
  98. x |= FILE_NO_INTERMEDIATE_BUFFERING;
  99. }
  100. return x;
  101. }
  102. void
  103. DecodeCreateParam(UINT32 uflags, UINT32 *flags, UINT32 *disp, UINT32 *share, UINT32 *access)
  104. {
  105. *flags = unget_flags(uflags);
  106. *disp = unget_disp(uflags);
  107. *share = unget_share(uflags);
  108. *access = unget_access(uflags);
  109. }
  110. /********************************************************************/
  111. NTSTATUS
  112. FspAllocatePrivateHandle(UserInfo_t *p, fhandle_t *fid)
  113. {
  114. int i;
  115. NTSTATUS err = STATUS_NO_MORE_FILES;
  116. LockEnter(p->Lock);
  117. for (i = 0; i < FsTableSize; i++) {
  118. if (p->Table[i].Flags == 0) {
  119. p->Table[i].Flags = ATTR_SYMLINK; // place marker
  120. err = STATUS_SUCCESS;
  121. break;
  122. }
  123. }
  124. LockExit(p->Lock);
  125. *fid = (fhandle_t) i;
  126. return err;
  127. }
  128. void
  129. FspFreeHandle(UserInfo_t *p, fhandle_t fnum)
  130. {
  131. FsLog(("FreeHandle %d\n", fnum));
  132. ASSERT(fnum != INVALID_FHANDLE_T);
  133. LockEnter(p->Lock);
  134. p->Table[fnum].Flags = 0;
  135. LockExit(p->Lock);
  136. }
  137. /*********************************************************** */
  138. void
  139. FspEvict(VolInfo_t *p, ULONG mask, BOOLEAN flag)
  140. {
  141. DWORD err;
  142. void FspCloseVolume(VolInfo_t *vol, ULONG AliveSet);
  143. ULONG set;
  144. // must be called with update lock held
  145. while (mask != 0) {
  146. FsArbLog(("FspEvict Entry: WSet %x Rset %x ASet %x set %x\n",
  147. p->WriteSet, p->ReadSet, p->AliveSet, mask));
  148. if (flag == FALSE) {
  149. // we just need to close the volume and return since
  150. // these replicas are not yet added to the aliveset and crs doesn't know
  151. // about them
  152. FspCloseVolume(p, mask);
  153. break;
  154. }
  155. LockEnter(p->qLock);
  156. // clear nid
  157. p->AliveSet &= ~mask;
  158. set = p->AliveSet;
  159. LockExit(p->qLock);
  160. // close nid handles <crs, vol, open files>
  161. FspCloseVolume(p, mask);
  162. mask = 0;
  163. err = CrsStart(p->CrsHdl, set, p->DiskListSz,
  164. &p->WriteSet, &p->ReadSet, &mask);
  165. if (mask == 0 && err == ERROR_WRITE_PROTECT) {
  166. // we have no quorum
  167. if (p->Event) {
  168. SetEvent(p->Event);
  169. }
  170. }
  171. }
  172. FsArbLog(("FspEvict Exit: vol %S WSet %x RSet %x ASet %x\n",
  173. p->Root, p->WriteSet, p->ReadSet, p->AliveSet));
  174. }
  175. void
  176. FspJoin(VolInfo_t *p, ULONG mask)
  177. {
  178. DWORD err;
  179. ULONG set = 0;
  180. // must be called with update lock
  181. if (mask != 0) {
  182. FsArbLog(("FspJoin Entry: WSet %x Rset %x ASet %x set %x\n",
  183. p->WriteSet, p->ReadSet, p->AliveSet, mask));
  184. // grab lock now
  185. LockEnter(p->qLock);
  186. p->AliveSet |= mask;
  187. set = p->AliveSet;
  188. LockExit(p->qLock);
  189. mask = 0;
  190. err = CrsStart(p->CrsHdl, set, p->DiskListSz,
  191. &p->WriteSet, &p->ReadSet, &mask);
  192. if (mask != 0) {
  193. // we need to evict dead members
  194. FspEvict(p, mask, TRUE);
  195. }
  196. if (err == ERROR_WRITE_PROTECT) {
  197. // we have no quorum
  198. if (p->Event) {
  199. SetEvent(p->Event);
  200. }
  201. }
  202. }
  203. FsArbLog(("FspJoin Exit: WSet %x Rset %x ASet %x\n",
  204. p->WriteSet, p->ReadSet, set));
  205. }
  206. void
  207. FspInitAnswers(IO_STATUS_BLOCK *ios, PVOID *rbuf, char *r, int sz)
  208. {
  209. int i;
  210. for (i = 0; i < FsMaxNodes; i++) {
  211. ios[i].Status = STATUS_HOST_UNREACHABLE;
  212. if (rbuf) {
  213. rbuf[i] = r;
  214. r += sz;
  215. }
  216. }
  217. }
  218. int
  219. FspCheckAnswers(VolInfo_t *vol, IO_STATUS_BLOCK *ios, PVOID *rbuf, UINT32 sz)
  220. {
  221. int i;
  222. int nums, numf, lasts;
  223. ULONG masks, maskf;
  224. lasts = 0;
  225. nums = numf = 0;
  226. masks = maskf = 0;
  227. for (i = 0; i < FsMaxNodes; i++) {
  228. if (ios[i].Status == STATUS_HOST_UNREACHABLE) {
  229. continue;
  230. }
  231. if (lasts == 0) {
  232. lasts = i;
  233. }
  234. if (ios[i].Status == STATUS_SUCCESS) {
  235. nums++;
  236. masks |= (1 << i);
  237. if (ios[lasts].Information != ios[i].Information) {
  238. FsLog(("Success node %d inconsistent with node %d!!!\n",
  239. lasts, i));
  240. }
  241. } else if (ios[i].Status == STATUS_CONNECTION_DISCONNECTED ||
  242. ios[i].Status == STATUS_BAD_NETWORK_PATH ||
  243. // this maps to may network errors
  244. RtlNtStatusToDosError(ios[i].Status) == ERROR_UNEXP_NET_ERR ||
  245. ios[i].Status == STATUS_VOLUME_DISMOUNTED) {
  246. ios[i].Status = STATUS_MEDIA_WRITE_PROTECTED;
  247. // evict any replica that lost connectivity
  248. FspEvict(vol, (ULONG)(1 << i), TRUE);
  249. if (lasts == i) {
  250. lasts = 0;
  251. }
  252. } else {
  253. numf++;
  254. maskf |= (1 << i);
  255. }
  256. }
  257. if (numf == 0 || nums == 0) {
  258. return lasts;
  259. }
  260. FsLog(("Nodes inconsistency success %x,%d failure %x,%d!!!\n",
  261. masks, nums, maskf, numf));
  262. // We need to evict whomever is smaller
  263. if (numf > nums) {
  264. FspEvict(vol, masks, TRUE);
  265. for (i = 0; i < FsMaxNodes; i++) {
  266. if (maskf & (1 << i)) {
  267. lasts = i;
  268. break;
  269. }
  270. }
  271. } else {
  272. FspEvict(vol, maskf, TRUE);
  273. for (i = 0; i < FsMaxNodes; i++) {
  274. if (masks & (1 << i)) {
  275. lasts = i;
  276. break;
  277. }
  278. }
  279. }
  280. FsLog(("Take result of node %d\n", lasts));
  281. return lasts;
  282. }
  283. //////////////////////////////////////////////////////////////////////////////////////
  284. NTSTATUS
  285. FspCreate(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  286. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  287. {
  288. // each file has a name stream that contains its crs log. We first
  289. // must open the parent crs log, issue a prepare on it. Create the new file
  290. // and then issuing a commit or abort on parent crs log. We also, have
  291. // to issue joins for each new crs handle that we get for the new file or
  292. // opened file. Note, this open may cause the file to enter recovery
  293. fs_create_msg_t *msg = (fs_create_msg_t *)args;
  294. NTSTATUS err, status;
  295. UINT32 disp, share, access, flags;
  296. fs_log_rec_t lrec;
  297. PVOID seq;
  298. fs_ea_t x;
  299. HANDLE fd;
  300. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  301. fs_create_reply_t *rmsg = (fs_create_reply_t *)rbuf;
  302. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  303. fs_id_t *fid;
  304. DecodeCreateParam(msg->flags, &flags, &disp, &share, &access);
  305. FsInitEa(&x);
  306. memset(&lrec.fs_id, 0, sizeof(lrec.fs_id));
  307. lrec.command = FS_CREATE;
  308. lrec.flags = msg->flags;
  309. lrec.attrib = msg->attr;
  310. seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid);
  311. if (seq == 0) {
  312. FsLog(("create: Unable to prepare log record!, open readonly\n"));
  313. return STATUS_MEDIA_WRITE_PROTECTED;
  314. }
  315. // set fid
  316. {
  317. fs_log_rec_t *p = (PVOID) seq;
  318. memcpy(p->fs_id, p->id, sizeof(fs_id_t));
  319. FsInitEaFid(&x, fid);
  320. memcpy(fid, p->id, sizeof(fs_id_t));
  321. }
  322. err = xFsCreate(&fd, vfd, msg->name, msg->name_len,
  323. flags, msg->attr, share, &disp, access,
  324. (PVOID) &x, sizeof(x));
  325. xFsLog(("create: %S err %x access %x disp %x\n", msg->name,
  326. err, access, disp));
  327. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS &&
  328. (disp == FILE_CREATED ||
  329. disp == FILE_OVERWRITTEN));
  330. if (err == STATUS_SUCCESS) {
  331. // we need to get the file id, no need to do this, for debug only
  332. err = xFsQueryObjectId(fd, (PVOID) fid);
  333. if (err != STATUS_SUCCESS) {
  334. FsLog(("Failed to get fileid %x\n", err));
  335. err = STATUS_SUCCESS;
  336. }
  337. }
  338. #ifdef FS_ASYNC
  339. BindNotificationPort(comport, fd, (PVOID) fdnum);
  340. #endif
  341. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T) {
  342. FS_SET_USER_HANDLE(uinfo, nid, msg->fnum, fd);
  343. } else {
  344. xFsClose(fd);
  345. }
  346. ASSERT(rmsg != NULL);
  347. memcpy(&rmsg->fid, fid, sizeof(fs_id_t));
  348. rmsg->action = (USHORT)disp;
  349. rmsg->access = (USHORT)access;
  350. *rlen = sizeof(*rmsg);
  351. FsLog(("Create '%S' nid %d fid %d handle %x oid %I64x:%I64x\n",
  352. msg->name,
  353. nid, msg->fnum, fd,
  354. rmsg->fid[0], rmsg->fid[1]));
  355. return err;
  356. }
  357. NTSTATUS
  358. FspOpen(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  359. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  360. {
  361. // same as create except disp is allows open only and
  362. // no crs logging
  363. fs_create_msg_t *msg = (fs_create_msg_t *)args;
  364. NTSTATUS err, status;
  365. UINT32 disp, share, access, flags;
  366. HANDLE fd;
  367. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  368. fs_create_reply_t *rmsg = (fs_create_reply_t *)rbuf;
  369. ASSERT(rmsg != NULL);
  370. DecodeCreateParam(msg->flags, &flags, &disp, &share, &access);
  371. disp = FILE_OPEN;
  372. err = xFsCreate(&fd, vfd, msg->name, msg->name_len,
  373. flags, msg->attr, share, &disp, access,
  374. NULL, 0);
  375. xFsLog(("open: %S err %x access %x disp %x\n", msg->name,
  376. err, access, disp));
  377. if (err == STATUS_SUCCESS) {
  378. ASSERT(disp != FILE_CREATED && disp != FILE_OVERWRITTEN);
  379. // we need to get the file id, no need to do this, for debug only
  380. err = xFsQueryObjectId(fd, (PVOID) &rmsg->fid);
  381. if (err != STATUS_SUCCESS) {
  382. FsLog(("Open '%S' failed to get fileid %x\n",
  383. msg->name, err));
  384. err = STATUS_SUCCESS;
  385. }
  386. }
  387. #ifdef FS_ASYNC
  388. BindNotificationPort(comport, fd, (PVOID) fdnum);
  389. #endif
  390. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T) {
  391. FS_SET_USER_HANDLE(uinfo, nid, msg->fnum, fd);
  392. } else {
  393. xFsClose(fd);
  394. }
  395. rmsg->action = (USHORT)disp;
  396. rmsg->access = (USHORT)access;
  397. *rlen = sizeof(*rmsg);
  398. FsLog(("Open '%S' nid %d fid %d handle %x oid %I64x:%I64x\n",
  399. msg->name,
  400. nid, msg->fnum, fd,
  401. rmsg->fid[0], rmsg->fid[1]));
  402. return err;
  403. }
  404. NTSTATUS
  405. FspSetAttr(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  406. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  407. {
  408. fs_setattr_msg_t *msg = (fs_setattr_msg_t *)args;
  409. NTSTATUS err;
  410. fs_log_rec_t lrec;
  411. PVOID seq;
  412. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  413. HANDLE fd = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  414. lrec.command = FS_SETATTR;
  415. memcpy((PVOID) lrec.fs_id, (PVOID) msg->fs_id, sizeof(fs_id_t));
  416. lrec.attrib = msg->attr.FileAttributes;
  417. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid)) == 0) {
  418. return STATUS_MEDIA_WRITE_PROTECTED;
  419. }
  420. // can be async ?
  421. err = xFsSetAttr(fd, &msg->attr);
  422. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  423. return err;
  424. }
  425. NTSTATUS
  426. FspSetAttr2(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  427. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  428. {
  429. fs_setattr_msg_t *msg = (fs_setattr_msg_t *)args;
  430. HANDLE fd = INVALID_HANDLE_VALUE;
  431. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  432. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  433. NTSTATUS err;
  434. fs_log_rec_t lrec;
  435. PVOID seq;
  436. assert(len == sizeof(*msg));
  437. // must be sync in order to close file
  438. err = xFsOpenWA(&fd, vfd, msg->name, msg->name_len);
  439. if (err == STATUS_SUCCESS) {
  440. err = xFsQueryObjectId(fd, (PVOID) &lrec.fs_id);
  441. }
  442. if (err == STATUS_SUCCESS) {
  443. lrec.command = FS_SETATTR;
  444. lrec.attrib = msg->attr.FileAttributes;
  445. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid)) != 0) {
  446. err = xFsSetAttr(fd, &msg->attr);
  447. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  448. } else {
  449. return STATUS_MEDIA_WRITE_PROTECTED;
  450. }
  451. }
  452. if (fd != INVALID_HANDLE_VALUE)
  453. xFsClose(fd);
  454. xFsLog(("setattr2 nid %d '%S' err %x\n", nid, msg->name, err));
  455. return err;
  456. }
  457. NTSTATUS
  458. FspLookup(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  459. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  460. {
  461. fs_lookup_msg_t *msg = (fs_lookup_msg_t *) args;
  462. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  463. FILE_NETWORK_OPEN_INFORMATION *attr = (FILE_NETWORK_OPEN_INFORMATION *)rbuf;
  464. ASSERT(*rlen == sizeof(*attr));
  465. return xFsQueryAttrName(vfd, msg->name, msg->name_len, attr);
  466. }
  467. NTSTATUS
  468. FspGetAttr(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  469. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  470. {
  471. fhandle_t handle = *(fhandle_t *) args;
  472. HANDLE fd = FS_GET_USER_HANDLE(uinfo, nid, handle);
  473. FILE_NETWORK_OPEN_INFORMATION *attr = (FILE_NETWORK_OPEN_INFORMATION *)rbuf;
  474. ASSERT(*rlen == sizeof(*attr));
  475. return xFsQueryAttr(fd, attr);
  476. }
  477. NTSTATUS
  478. FspClose(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  479. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  480. {
  481. fhandle_t handle = *(fhandle_t *) args;
  482. HANDLE fd;
  483. NTSTATUS err;
  484. if (uinfo != NULL && handle != INVALID_FHANDLE_T)
  485. fd = FS_GET_USER_HANDLE(uinfo, nid, handle);
  486. else
  487. fd = FS_GET_VOL_HANDLE(vinfo, nid);
  488. FsLog(("Closing nid %d fid %d handle %x\n", nid, handle, fd));
  489. err = xFsClose(fd);
  490. if (err != STATUS_SUCCESS)
  491. // return err;
  492. err = STATUS_SUCCESS; // don't evict a node due to this
  493. if (uinfo != NULL && handle != INVALID_FHANDLE_T) {
  494. FS_SET_USER_HANDLE(uinfo, nid, handle, INVALID_HANDLE_VALUE);
  495. } else {
  496. FS_SET_VOL_HANDLE(vinfo, nid, INVALID_HANDLE_VALUE);
  497. }
  498. return err;
  499. }
  500. NTSTATUS
  501. FspReadDir(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  502. PVOID args, ULONG len, PVOID rbuf,
  503. ULONG_PTR *entries_found)
  504. {
  505. fs_io_msg_t *msg = (fs_io_msg_t *)args;
  506. int i;
  507. NTSTATUS e = STATUS_SUCCESS;
  508. int size = (int) msg->size;
  509. int cookie = (int) msg->cookie;
  510. HANDLE dir;
  511. dirinfo_t *buffer = (dirinfo_t *)msg->buf;
  512. xFsLog(("DirLoad: size %d\n", size));
  513. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T)
  514. dir = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  515. else
  516. dir = FS_GET_VOL_HANDLE(vinfo, nid);
  517. *entries_found = 0;
  518. for(i = 0; size >= sizeof(dirinfo_t) ; i+=PAGESIZE) {
  519. // this must come from the source if we are to do async readdir
  520. char buf[PAGESIZE];
  521. int sz;
  522. sz = min(PAGESIZE, size);
  523. e = xFsReadDir(dir, buf, &sz, (cookie == 0) ? TRUE : FALSE);
  524. if (e == STATUS_SUCCESS) {
  525. PFILE_DIRECTORY_INFORMATION p;
  526. p = (PFILE_DIRECTORY_INFORMATION) buf;
  527. while (size >= sizeof(dirinfo_t)) {
  528. char *foo;
  529. int k;
  530. k = p->FileNameLength/2;
  531. p->FileName[k] = L'\0';
  532. wcscpy(buffer->name, p->FileName);
  533. buffer->attribs.file_size = p->EndOfFile.QuadPart;
  534. buffer->attribs.alloc_size = p->AllocationSize.QuadPart;
  535. buffer->attribs.create_time = p->CreationTime.QuadPart;
  536. buffer->attribs.access_time = p->LastAccessTime.QuadPart;
  537. buffer->attribs.mod_time = p->LastWriteTime.QuadPart;
  538. buffer->attribs.attributes = p->FileAttributes;
  539. buffer->cookie = ++cookie;
  540. buffer++;
  541. size -= sizeof(dirinfo_t);
  542. (*entries_found)++;
  543. if (p->NextEntryOffset == 0)
  544. break;
  545. foo = (char *) p;
  546. foo += p->NextEntryOffset;
  547. p = (PFILE_DIRECTORY_INFORMATION) foo;
  548. }
  549. }
  550. else {
  551. break;
  552. }
  553. }
  554. return e;
  555. }
  556. NTSTATUS
  557. FspMkDir(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  558. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  559. {
  560. fs_create_msg_t *msg = (fs_create_msg_t *)args;
  561. NTSTATUS err;
  562. HANDLE fd;
  563. fs_log_rec_t lrec;
  564. PVOID seq;
  565. fs_ea_t x;
  566. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  567. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  568. fs_id_t *fid;
  569. UINT32 disp, share, access, flags;
  570. FsInitEa(&x);
  571. memset(&lrec.fs_id, 0, sizeof(lrec.fs_id));
  572. lrec.command = FS_MKDIR;
  573. lrec.attrib = msg->attr;
  574. lrec.flags = msg->flags;
  575. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid)) == 0) {
  576. return STATUS_MEDIA_WRITE_PROTECTED;
  577. }
  578. // set fid
  579. {
  580. fs_log_rec_t *p = (PVOID) seq;
  581. memcpy(p->fs_id, p->id, sizeof(fs_id_t));
  582. FsInitEaFid(&x, fid);
  583. // set fs_id of the file
  584. memcpy(fid, p->id, sizeof(fs_id_t));
  585. }
  586. // decode attributes
  587. DecodeCreateParam(msg->flags, &flags, &disp, &share, &access);
  588. // always sync call
  589. err = xFsCreate(&fd, vfd, msg->name, msg->name_len, flags,
  590. msg->attr, share, &disp, access,
  591. (PVOID) &x, sizeof(x));
  592. FsLog(("Mkdir '%S' %x: cflags %x flags:%x attr:%x share:%x disp:%x access:%x\n",
  593. msg->name, err, msg->flags,
  594. flags, msg->attr, share, disp, access));
  595. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS &&
  596. (disp == FILE_CREATED ||
  597. disp == FILE_OVERWRITTEN));
  598. if (err == STATUS_SUCCESS) {
  599. // return fid
  600. if (rbuf != NULL) {
  601. ASSERT(*rlen == sizeof(fs_id_t));
  602. memcpy(rbuf, fid, sizeof(fs_id_t));
  603. }
  604. xFsClose(fd);
  605. }
  606. return err;
  607. }
  608. NTSTATUS
  609. FspRemove(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  610. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  611. {
  612. fs_remove_msg_t *msg = (fs_remove_msg_t *)args;
  613. NTSTATUS err;
  614. fs_log_rec_t lrec;
  615. PVOID seq;
  616. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  617. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  618. HANDLE fd;
  619. *rlen = 0;
  620. // next three statements to obtain name -> fs_id
  621. err = xFsOpenRA(&fd, vfd, msg->name, msg->name_len);
  622. if (err != STATUS_SUCCESS) {
  623. return err;
  624. }
  625. // get object id
  626. err = xFsQueryObjectId(fd, (PVOID) &lrec.fs_id);
  627. xFsClose(fd);
  628. lrec.command = FS_REMOVE;
  629. if (err != STATUS_SUCCESS) {
  630. return err;
  631. }
  632. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid)) == 0) {
  633. return STATUS_MEDIA_WRITE_PROTECTED;
  634. }
  635. err = xFsDelete(vfd, msg->name, msg->name_len);
  636. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  637. xFsLog(("Rm nid %d '%S' %x\n", nid, msg->name, err));
  638. return err;
  639. }
  640. NTSTATUS
  641. FspRename(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  642. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  643. {
  644. fs_rename_msg_t *msg = (fs_rename_msg_t *)args;
  645. NTSTATUS err;
  646. fs_log_rec_t lrec;
  647. PVOID seq;
  648. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  649. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  650. HANDLE fd;
  651. lrec.command = FS_RENAME;
  652. err = xFsOpen(&fd, vfd, msg->sname, msg->sname_len,
  653. STANDARD_RIGHTS_REQUIRED| SYNCHRONIZE |
  654. FILE_READ_EA |
  655. FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
  656. FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
  657. 0);
  658. if (err != STATUS_SUCCESS) {
  659. return err;
  660. }
  661. // get file id
  662. err = xFsQueryObjectId(fd, (PVOID) &lrec.fs_id);
  663. if (err == STATUS_SUCCESS) {
  664. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid)) != 0) {
  665. err = xFsRename(fd, vfd, msg->dname, msg->dname_len);
  666. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  667. } else {
  668. err = STATUS_MEDIA_WRITE_PROTECTED;
  669. }
  670. } else {
  671. xFsLog(("Failed to obtain fsid %x\n", err));
  672. }
  673. xFsClose(fd);
  674. xFsLog(("Mv nid %d %S -> %S err %x\n", nid, msg->sname, msg->dname,
  675. err));
  676. return err;
  677. }
  678. NTSTATUS
  679. FspWrite(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  680. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  681. {
  682. NTSTATUS err;
  683. IO_STATUS_BLOCK ios;
  684. LARGE_INTEGER off;
  685. ULONG key;
  686. fs_io_msg_t *msg = (fs_io_msg_t *)args;
  687. fs_log_rec_t lrec;
  688. PVOID seq;
  689. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  690. HANDLE fd;
  691. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T)
  692. fd = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  693. else
  694. fd = (HANDLE) msg->context;
  695. lrec.command = FS_WRITE;
  696. memcpy(lrec.fs_id, (PVOID) msg->fs_id, sizeof(fs_id_t));
  697. lrec.offset = msg->offset;
  698. lrec.length = msg->size;
  699. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid)) == 0) {
  700. return STATUS_MEDIA_WRITE_PROTECTED;
  701. }
  702. // Write ops
  703. xFsLog(("Write %d len %d off %d\n", nid, msg->size, msg->offset));
  704. off.LowPart = msg->offset;
  705. off.HighPart = 0;
  706. key = FS_BUILD_LOCK_KEY((uinfo ? uinfo->Uid : 0), nid, msg->fnum);
  707. if (msg->size > 0) {
  708. err = NtWriteFile(fd, NULL, NULL, (PVOID) NULL, &ios,
  709. msg->buf, msg->size, &off, &key);
  710. } else {
  711. FILE_END_OF_FILE_INFORMATION x;
  712. x.EndOfFile = off;
  713. err = NtSetInformationFile(fd, &ios,
  714. (char *) &x, sizeof(x),
  715. FileEndOfFileInformation);
  716. }
  717. if (err == STATUS_PENDING) {
  718. EventWait(fd);
  719. err = ios.Status;
  720. }
  721. *rlen = ios.Information;
  722. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  723. return err;
  724. }
  725. NTSTATUS
  726. FspRead(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  727. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  728. {
  729. fs_io_msg_t *msg = (fs_io_msg_t *)args;
  730. NTSTATUS err;
  731. IO_STATUS_BLOCK ios;
  732. LARGE_INTEGER off;
  733. HANDLE fd = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  734. ULONG key;
  735. assert(sz == sizeof(*msg));
  736. // Read ops
  737. off.LowPart = msg->offset;
  738. off.HighPart = 0;
  739. key = FS_BUILD_LOCK_KEY(uinfo->Uid, nid, msg->fnum);
  740. ios.Information = 0;
  741. err = NtReadFile(fd, NULL, NULL, NULL,
  742. &ios, msg->buf, msg->size, &off, &key);
  743. if (err == STATUS_PENDING) {
  744. EventWait(fd);
  745. err = ios.Status;
  746. }
  747. *rlen = ios.Information;
  748. xFsLog(("fs_read err %x sz %d\n", err, *rlen));
  749. return err;
  750. }
  751. NTSTATUS
  752. FspFlush(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  753. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  754. {
  755. fhandle_t fnum = *(fhandle_t *)args;
  756. IO_STATUS_BLOCK ios;
  757. HANDLE fd;
  758. ASSERT(sz == sizeof(fhandle_t));
  759. *rlen = 0;
  760. if (uinfo != NULL && fnum != INVALID_FHANDLE_T) {
  761. fd = FS_GET_USER_HANDLE(uinfo, nid, fnum);
  762. } else {
  763. fd = FS_GET_VOL_HANDLE(vinfo, nid);
  764. }
  765. return NtFlushBuffersFile(fd, &ios);
  766. }
  767. NTSTATUS
  768. FspLock(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  769. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  770. {
  771. fs_lock_msg_t *msg = (fs_lock_msg_t *)args;
  772. NTSTATUS err;
  773. IO_STATUS_BLOCK ios;
  774. LARGE_INTEGER offset, len;
  775. BOOLEAN wait, shared;
  776. ULONG key = FS_BUILD_LOCK_KEY(uinfo->Uid, nid, msg->fnum);
  777. assert(sz == sizeof(*msg));
  778. // xxx: need to log
  779. FsLog(("Lock %d off %d len %d flags %x\n", msg->fnum, msg->offset, msg->length,
  780. msg->flags));
  781. offset.LowPart = msg->offset;
  782. offset.HighPart = 0;
  783. len.LowPart = msg->length;
  784. len.HighPart = 0;
  785. // todo: need to be async, if we are the owner node and failnow is false, then
  786. // we should pass in the context and the completion port responses back
  787. // to the user
  788. wait = (BOOLEAN) ((msg->flags & FS_LOCK_WAIT) ? TRUE : FALSE);
  789. // todo: this can cause lots of headache, never wait.
  790. wait = FALSE;
  791. shared = (BOOLEAN) ((msg->flags & FS_LOCK_SHARED) ? FALSE : TRUE);
  792. err = NtLockFile(uinfo->Table[msg->fnum].Fd[nid],
  793. NULL, NULL, (PVOID) NULL, &ios,
  794. &offset, &len,
  795. key, wait, shared);
  796. // xxx: Need to log in software only
  797. *rlen = 0;
  798. FsLog(("Lock err %x\n", err));
  799. return err;
  800. }
  801. NTSTATUS
  802. FspUnlock(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  803. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  804. {
  805. fs_lock_msg_t *msg = (fs_lock_msg_t *)args;
  806. NTSTATUS err;
  807. IO_STATUS_BLOCK ios;
  808. LARGE_INTEGER offset, len;
  809. ULONG key = FS_BUILD_LOCK_KEY(uinfo->Uid, nid, msg->fnum);
  810. assert(sz == sizeof(*msg));
  811. // xxx: need to log
  812. xFsLog(("Unlock %d off %d len %d\n", msg->fnum, msg->offset, msg->length));
  813. offset.LowPart = msg->offset;
  814. offset.HighPart = 0;
  815. len.LowPart = msg->length;
  816. len.HighPart = 0;
  817. // always sync I think
  818. err = NtUnlockFile(uinfo->Table[msg->fnum].Fd[nid], &ios, &offset, &len, key);
  819. // xxx: need to log in software only
  820. FsLog(("Unlock err %x\n", err));
  821. *rlen = 0;
  822. return err;
  823. }
  824. NTSTATUS
  825. FspStatFs(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  826. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  827. {
  828. fs_attr_t *msg = (fs_attr_t *)args;
  829. NTSTATUS err;
  830. IO_STATUS_BLOCK ios;
  831. FILE_FS_SIZE_INFORMATION fsinfo;
  832. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  833. assert(sz == sizeof(*msg));
  834. // xxx: need to log
  835. lstrcpyn(msg->fs_name, "FsCrs", MAX_FS_NAME_LEN);
  836. err = NtQueryVolumeInformationFile(vfd, &ios,
  837. (PVOID) &fsinfo,
  838. sizeof(fsinfo),
  839. FileFsSizeInformation);
  840. if (err == STATUS_SUCCESS) {
  841. msg->total_units = fsinfo.TotalAllocationUnits.QuadPart;
  842. msg->free_units = fsinfo.AvailableAllocationUnits.QuadPart;
  843. msg->sectors_per_unit = fsinfo.SectorsPerAllocationUnit;
  844. msg->bytes_per_sector = fsinfo.BytesPerSector;
  845. }
  846. *rlen = 0;
  847. return err;
  848. }
  849. NTSTATUS
  850. FspCheckFs(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  851. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  852. {
  853. NTSTATUS err;
  854. IO_STATUS_BLOCK ios;
  855. FILE_FS_SIZE_INFORMATION fsinfo;
  856. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  857. PVOID crshdl = FS_GET_CRS_HANDLE(vinfo, nid);
  858. err = NtQueryVolumeInformationFile(vfd, &ios,
  859. (PVOID) &fsinfo,
  860. sizeof(fsinfo),
  861. FileFsSizeInformation);
  862. // We need to issue crsflush to flush last write
  863. CrsFlush(crshdl);
  864. if (err == STATUS_SUCCESS) {
  865. HANDLE notifyfd = FS_GET_VOL_NOTIFY_HANDLE(vinfo, nid);
  866. if (WaitForSingleObject(notifyfd, 0) == WAIT_OBJECT_0) {
  867. // reload notification again
  868. FindNextChangeNotification(notifyfd);
  869. }
  870. } else {
  871. FsLog(("FsReserve failed nid %d err %x\n", nid, err));
  872. }
  873. *rlen = 0;
  874. return err;
  875. }
  876. NTSTATUS
  877. FspGetRoot(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  878. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  879. {
  880. LPWSTR vname = FS_GET_VOL_NAME(vinfo, nid);
  881. swprintf(rbuf, L"\\\\?\\%s\\%s",vname,vinfo->Root);
  882. FsLog(("FspGetRoot '%S'\n", rbuf));
  883. return STATUS_SUCCESS;
  884. }
  885. /////////////////////////////////////////////////////////////////////////////////////
  886. BOOLEAN FsReadOnly = FALSE;
  887. int
  888. SendAvailRequest(fs_handler_t callback, VolInfo_t *vol, UserInfo_t *uinfo,
  889. PVOID msg, ULONG len, PVOID *rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  890. {
  891. ULONG mask;
  892. int i;
  893. DWORD count = 0;
  894. if (vol == NULL)
  895. return ERROR_INVALID_HANDLE;
  896. // lock volume for update
  897. LockEnter(vol->uLock);
  898. // issue update for each replica
  899. i = 0;
  900. for (mask = vol->ReadSet; mask != 0; mask = mask >> 1, i++) {
  901. if (mask & 0x1) {
  902. count++;
  903. ios[i].Information = rsz;
  904. ios[i].Status = callback(vol, uinfo, i,
  905. msg, len,
  906. rbuf ? rbuf[i] : NULL,
  907. &ios[i].Information);
  908. }
  909. }
  910. // process ios and evict replicas that don't agree with majority
  911. if ((!FsReadOnly && CRS_QUORUM(count, vol->DiskListSz)) || (FsReadOnly && vol->ReadSet != 0))
  912. i = FspCheckAnswers(vol, ios, rbuf, rsz);
  913. else {
  914. i = 0;
  915. ios[0].Status = STATUS_MEDIA_WRITE_PROTECTED;
  916. ios[0].Information = count; // return number in current read set
  917. }
  918. // unlock volume
  919. LockExit(vol->uLock);
  920. return i;
  921. }
  922. int
  923. SendRequest(fs_handler_t callback, UserInfo_t *uinfo,
  924. PVOID msg, ULONG len, PVOID *rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  925. {
  926. ULONG mask;
  927. int i;
  928. VolInfo_t *vol = uinfo->VolInfo;
  929. if (vol == NULL)
  930. return ERROR_INVALID_HANDLE;
  931. // lock volume for update
  932. LockEnter(vol->uLock);
  933. // issue update for each replica
  934. i = 0;
  935. for (mask = vol->WriteSet; mask != 0; mask = mask >> 1, i++) {
  936. if (mask & 0x1) {
  937. ios[i].Information = rsz;
  938. ios[i].Status = callback(vol, uinfo, i,
  939. msg, len,
  940. rbuf ? rbuf[i] : NULL,
  941. &ios[i].Information);
  942. }
  943. }
  944. // process ios and evict replicas that don't agree with majority
  945. if (vol->WriteSet != 0)
  946. i = FspCheckAnswers(vol, ios, rbuf, rsz);
  947. else {
  948. i = 0;
  949. ios[0].Status = STATUS_MEDIA_WRITE_PROTECTED;
  950. ios[0].Information = 0;
  951. }
  952. // unlock volume
  953. LockExit(vol->uLock);
  954. return i;
  955. }
  956. NTSTATUS
  957. SendReadRequest(fs_handler_t callback, UserInfo_t *uinfo,
  958. PVOID msg, ULONG len, PVOID rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  959. {
  960. ULONG mask;
  961. int i;
  962. VolInfo_t *vol = uinfo->VolInfo;
  963. if (vol == NULL)
  964. return ERROR_INVALID_HANDLE;
  965. // lock volume for update
  966. LockEnter(vol->uLock);
  967. // issue update for each replica
  968. i = 0;
  969. for (mask = vol->ReadSet; mask != 0; mask = mask >> 1, i++) {
  970. if (mask & 0x1) {
  971. ios->Information = rsz;
  972. ios->Status = callback(vol, uinfo, i,
  973. msg, len, rbuf, &ios->Information);
  974. if (ios->Status == STATUS_CONNECTION_DISCONNECTED ||
  975. ios->Status == STATUS_VOLUME_DISMOUNTED) {
  976. // mark replica as invalid
  977. FspEvict(vol, (ULONG)(1 << i), TRUE);
  978. // reload mask again
  979. mask = vol->ReadSet;
  980. } else {
  981. break;
  982. }
  983. }
  984. }
  985. // process ios and evict replicas that don't agree with majority
  986. if (vol->ReadSet == 0) {
  987. ios->Status = STATUS_MEDIA_WRITE_PROTECTED;
  988. ios->Information = 0;
  989. }
  990. // unlock volume
  991. LockExit(vol->uLock);
  992. return STATUS_SUCCESS;
  993. }
  994. ///////////////////////////////////////////////////////////////////////////////
  995. DWORD
  996. FsCreate(
  997. PVOID fshdl,
  998. LPWSTR name,
  999. USHORT namelen,
  1000. UINT32 flags,
  1001. fattr_t* fattr,
  1002. fhandle_t* phandle,
  1003. UINT32 *action
  1004. )
  1005. {
  1006. UserInfo_t *uinfo = (UserInfo_t *) fshdl;
  1007. NTSTATUS err;
  1008. fs_create_reply_t nfd[FsMaxNodes];
  1009. IO_STATUS_BLOCK status[FsMaxNodes];
  1010. PVOID rbuf[FsMaxNodes];
  1011. fs_create_msg_t msg;
  1012. fhandle_t fdnum;
  1013. ASSERT(uinfo != NULL);
  1014. xFsLog(("FsDT::create(%S, 0x%08X, 0x%08X, 0x%08d)\n",
  1015. name, flags, fattr, namelen));
  1016. if (!phandle) return ERROR_INVALID_PARAMETER;
  1017. *phandle = INVALID_FHANDLE_T;
  1018. if (!name) return ERROR_INVALID_PARAMETER;
  1019. if (flags != (FLAGS_MASK & flags)) {
  1020. return ERROR_INVALID_PARAMETER;
  1021. }
  1022. if (action != NULL)
  1023. *action = flags & FS_ACCESS_MASK;
  1024. // if we are doing a directory, open locally
  1025. // todo: this should be merged with other case, if
  1026. // we are doing an existing open, then no need to
  1027. // issue update and log it, but we have to do
  1028. // mcast in order for the close to work.
  1029. if (namelen > 0) {
  1030. if (*name == L'\\') {
  1031. name++;
  1032. namelen--;
  1033. }
  1034. if (name[namelen-1] == L'\\') {
  1035. namelen--;
  1036. name[namelen] = L'\0';
  1037. }
  1038. }
  1039. memset(&msg.xid, 0, sizeof(msg.xid));
  1040. msg.name = name;
  1041. msg.name_len = namelen;
  1042. msg.flags = flags;
  1043. msg.attr = 0;
  1044. if (fattr) {
  1045. msg.attr = unget_attributes(fattr->attributes);
  1046. }
  1047. FspInitAnswers(status, rbuf, (char *) nfd, sizeof(nfd[0]));
  1048. // allocate a new handle
  1049. err = FspAllocatePrivateHandle(uinfo, &fdnum);
  1050. if (err == STATUS_SUCCESS) {
  1051. int sid;
  1052. msg.fnum = fdnum;
  1053. // Set flags in advance to sync with replay
  1054. uinfo->Table[fdnum].Flags = flags;
  1055. if (namelen < 2 ||
  1056. ((flags & FS_DISP_MASK) == DISP_DIRECTORY) ||
  1057. (unget_disp(flags) == FILE_OPEN)) {
  1058. sid = SendAvailRequest(FspOpen, uinfo->VolInfo,
  1059. uinfo,
  1060. (PVOID) &msg, sizeof(msg),
  1061. rbuf, sizeof(nfd[0]),
  1062. status);
  1063. } else {
  1064. sid = SendRequest(FspCreate,
  1065. uinfo,
  1066. (PVOID) &msg, sizeof(msg),
  1067. rbuf, sizeof(nfd[0]),
  1068. status);
  1069. }
  1070. if (action != NULL) {
  1071. if (!(nfd[sid].access & FILE_GENERIC_WRITE))
  1072. flags &= ~ACCESS_WRITE;
  1073. *action = flags | nfd[sid].action;
  1074. }
  1075. err = status[sid].Status;
  1076. if (err == STATUS_SUCCESS) {
  1077. fs_id_t *fid = FS_GET_FID_HANDLE(uinfo, fdnum);
  1078. // set file id
  1079. memcpy((PVOID) fid, (PVOID) nfd[sid].fid, sizeof(fs_id_t));
  1080. FsLog(("File id %I64x:%I64x\n", (*fid)[0], (*fid)[1]));
  1081. // todo: bind handles to completion port if we do async
  1082. } else {
  1083. // free handle
  1084. FspFreeHandle(uinfo, fdnum);
  1085. }
  1086. }
  1087. // todo: need to set fid
  1088. *phandle = fdnum;
  1089. FsLog(("create: return fd %d err %x\n", *phandle, err));
  1090. return RtlNtStatusToDosError(err);
  1091. }
  1092. void
  1093. BuildFileAttr(FILE_BASIC_INFORMATION *attr, fattr_t *fattr)
  1094. {
  1095. memset(attr, 0, sizeof(*attr));
  1096. if (fattr->create_time != INVALID_UINT64)
  1097. attr->CreationTime.QuadPart = fattr->create_time;
  1098. if (fattr->mod_time != INVALID_UINT64)
  1099. attr->LastWriteTime.QuadPart = fattr->mod_time;
  1100. if (fattr->access_time != INVALID_UINT64)
  1101. attr->LastAccessTime.QuadPart = fattr->access_time;
  1102. if (fattr->attributes != INVALID_UINT32)
  1103. attr->FileAttributes = unget_attributes(fattr->attributes);
  1104. }
  1105. DWORD
  1106. FsSetAttr(
  1107. PVOID fshdl,
  1108. fhandle_t handle,
  1109. fattr_t* attr
  1110. )
  1111. {
  1112. UserInfo_t *uinfo = (UserInfo_t *)fshdl;
  1113. fs_setattr_msg_t msg;
  1114. int sid;
  1115. IO_STATUS_BLOCK status[FsMaxNodes];
  1116. if (!attr || handle == INVALID_FHANDLE_T)
  1117. return ERROR_INVALID_PARAMETER;
  1118. // todo: get file id
  1119. memset(&msg.xid, 0, sizeof(msg.xid));
  1120. msg.fs_id = FS_GET_FID_HANDLE(uinfo, handle);
  1121. BuildFileAttr(&msg.attr, attr);
  1122. msg.fnum = handle;
  1123. FspInitAnswers(status, NULL, NULL, 0);
  1124. sid = SendRequest(FspSetAttr, uinfo,
  1125. (char *)&msg, sizeof(msg),
  1126. NULL, 0,
  1127. status);
  1128. return RtlNtStatusToDosError(status[sid].Status);
  1129. }
  1130. DWORD
  1131. FsSetAttr2(
  1132. PVOID fshdl,
  1133. LPWSTR name,
  1134. USHORT name_len,
  1135. fattr_t* attr
  1136. )
  1137. {
  1138. UserInfo_t *uinfo = (UserInfo_t *) fshdl;
  1139. fs_setattr_msg_t msg;
  1140. int sid;
  1141. IO_STATUS_BLOCK status[FsMaxNodes];
  1142. if (!attr || !name)
  1143. return ERROR_INVALID_PARAMETER;
  1144. if (*name == '\\') {
  1145. name++;
  1146. name_len--;
  1147. }
  1148. // todo: locate file id
  1149. memset(&msg.xid, 0, sizeof(msg.xid));
  1150. msg.name = name;
  1151. msg.name_len = name_len;
  1152. BuildFileAttr(&msg.attr, attr);
  1153. FspInitAnswers(status, NULL, NULL, 0);
  1154. sid = SendRequest(FspSetAttr2, uinfo,
  1155. (char *)&msg, sizeof(msg),
  1156. NULL, 0,
  1157. status);
  1158. return RtlNtStatusToDosError(status[sid].Status);
  1159. }
  1160. DWORD
  1161. FsLookup(
  1162. PVOID fshdl,
  1163. LPWSTR name,
  1164. USHORT name_len,
  1165. fattr_t* fattr
  1166. )
  1167. {
  1168. fs_lookup_msg_t msg;
  1169. int err;
  1170. IO_STATUS_BLOCK ios;
  1171. FILE_NETWORK_OPEN_INFORMATION attr;
  1172. FsLog(("Lookup name '%S' %x\n", name, fattr));
  1173. if (!fattr) return ERROR_INVALID_PARAMETER;
  1174. if (*name == '\\') {
  1175. name++;
  1176. name_len--;
  1177. }
  1178. msg.name = name;
  1179. msg.name_len = name_len;
  1180. err = SendReadRequest(FspLookup, (UserInfo_t *)fshdl,
  1181. (PVOID) &msg, sizeof(msg),
  1182. (PVOID) &attr, sizeof(attr),
  1183. &ios);
  1184. err = ios.Status;
  1185. if (ios.Status == STATUS_SUCCESS) {
  1186. fattr->file_size = attr.EndOfFile.QuadPart;
  1187. fattr->alloc_size = attr.AllocationSize.QuadPart;
  1188. fattr->create_time = *(TIME64 *)&attr.CreationTime;
  1189. fattr->access_time = *(TIME64 *)&attr.LastAccessTime;
  1190. fattr->mod_time = *(TIME64 *)&attr.LastWriteTime;
  1191. fattr->attributes = get_attributes(attr.FileAttributes);
  1192. }
  1193. FsLog(("Lookup: return %x\n", err));
  1194. return RtlNtStatusToDosError(err);
  1195. }
  1196. DWORD
  1197. FsGetAttr(
  1198. PVOID fshdl,
  1199. fhandle_t handle,
  1200. fattr_t* fattr
  1201. )
  1202. {
  1203. int err;
  1204. IO_STATUS_BLOCK ios;
  1205. FILE_NETWORK_OPEN_INFORMATION attr;
  1206. xFsLog(("Getattr fid '%d' %x\n", handle, fattr));
  1207. if (!fattr) return ERROR_INVALID_PARAMETER;
  1208. err = SendReadRequest(FspGetAttr, (UserInfo_t *)fshdl,
  1209. (PVOID) &handle, sizeof(handle),
  1210. (PVOID) &attr, sizeof(attr),
  1211. &ios);
  1212. err = ios.Status;
  1213. if (err == STATUS_SUCCESS) {
  1214. fattr->file_size = attr.EndOfFile.QuadPart;
  1215. fattr->alloc_size = attr.AllocationSize.QuadPart;
  1216. fattr->create_time = *(TIME64 *)&attr.CreationTime;
  1217. fattr->access_time = *(TIME64 *)&attr.LastAccessTime;
  1218. fattr->mod_time = *(TIME64 *)&attr.LastWriteTime;
  1219. fattr->attributes =attr.FileAttributes;
  1220. }
  1221. FsLog(("Getattr: return %d\n", err));
  1222. return RtlNtStatusToDosError(err);
  1223. }
  1224. DWORD
  1225. FsClose(
  1226. PVOID fshdl,
  1227. fhandle_t handle
  1228. )
  1229. {
  1230. int sid, err;
  1231. IO_STATUS_BLOCK status[FsMaxNodes];
  1232. UserInfo_t *uinfo;
  1233. if (handle == INVALID_FHANDLE_T) return ERROR_INVALID_PARAMETER;
  1234. if (handle >= FsTableSize) return ERROR_INVALID_PARAMETER;
  1235. FsLog(("Close: fid %d\n", handle));
  1236. FspInitAnswers(status, NULL, NULL, 0);
  1237. uinfo = (UserInfo_t *) fshdl;
  1238. sid = SendAvailRequest(FspClose, uinfo->VolInfo, uinfo,
  1239. (PVOID) &handle, sizeof(handle),
  1240. NULL, 0,
  1241. status);
  1242. err = status[sid].Status;
  1243. if (err == STATUS_SUCCESS) {
  1244. // need to free this handle slot
  1245. FspFreeHandle((UserInfo_t *) fshdl, handle);
  1246. }
  1247. FsLog(("Close: fid %d err %x\n", handle, err));
  1248. return RtlNtStatusToDosError(err);
  1249. }
  1250. DWORD
  1251. FsWrite(
  1252. PVOID fshdl,
  1253. fhandle_t handle,
  1254. UINT32 offset,
  1255. UINT16 *pcount,
  1256. void* buffer,
  1257. PVOID context
  1258. )
  1259. {
  1260. DWORD err;
  1261. IO_STATUS_BLOCK status[FsMaxNodes];
  1262. int i, sid;
  1263. fs_io_msg_t msg;
  1264. UserInfo_t *uinfo = (UserInfo_t *) fshdl;
  1265. if (!pcount || handle == INVALID_FHANDLE_T) return ERROR_INVALID_PARAMETER;
  1266. FsLog(("Write %d offset %d count %d\n", handle, offset, *pcount));
  1267. i = (int) offset;
  1268. if (i < 0) {
  1269. offset = 0;
  1270. (*pcount)--;
  1271. }
  1272. // todo: locate file id
  1273. memset(&msg.xid, 0, sizeof(msg.xid));
  1274. msg.fs_id = FS_GET_FID_HANDLE(uinfo, handle);
  1275. msg.offset = offset;
  1276. msg.size = (UINT32) *pcount;
  1277. msg.buf = buffer;
  1278. msg.context = context;
  1279. msg.fnum = handle;
  1280. FspInitAnswers(status, NULL, NULL, 0);
  1281. sid = SendRequest(FspWrite, (UserInfo_t *)fshdl,
  1282. (PVOID) &msg, sizeof(msg),
  1283. NULL, 0,
  1284. status);
  1285. err = status[sid].Status;
  1286. *pcount = (USHORT) status[sid].Information;
  1287. FsLog(("write: return %x\n", err));
  1288. return RtlNtStatusToDosError(err);
  1289. }
  1290. DWORD
  1291. FsRead(
  1292. PVOID fshdl,
  1293. fhandle_t handle,
  1294. UINT32 offset,
  1295. UINT16* pcount,
  1296. void* buffer,
  1297. PVOID context
  1298. )
  1299. {
  1300. NTSTATUS err;
  1301. IO_STATUS_BLOCK ios;
  1302. fs_io_msg_t msg;
  1303. memset(&msg.xid, 0, sizeof(msg.xid));
  1304. msg.offset = offset;
  1305. msg.buf = buffer;
  1306. msg.size = (UINT32) *pcount;
  1307. msg.context = context;
  1308. msg.fnum = handle;
  1309. FsLog(("read: %x fd %d sz %d\n", context, handle, msg.size));
  1310. err = SendReadRequest(FspRead, (UserInfo_t *)fshdl,
  1311. (PVOID) &msg, sizeof(msg),
  1312. NULL, 0,
  1313. &ios);
  1314. err = ios.Status;
  1315. if (err == STATUS_END_OF_FILE) {
  1316. *pcount = 0;
  1317. return ERROR_SUCCESS;
  1318. }
  1319. err = RtlNtStatusToDosError(err);
  1320. *pcount = (USHORT) ios.Information;
  1321. FsLog(("read: %x return %x sz %d\n", context, err, *pcount));
  1322. return err;
  1323. #if 0
  1324. #ifdef FS_ASYNC
  1325. return ERROR_IO_PENDING; //err;
  1326. #else
  1327. return ERROR_SUCCESS;
  1328. #endif
  1329. #endif
  1330. }
  1331. DWORD
  1332. FsReadDir(
  1333. PVOID fshdl,
  1334. fhandle_t dir,
  1335. UINT32 cookie,
  1336. dirinfo_t* buffer,
  1337. UINT32 size,
  1338. UINT32 *entries_found
  1339. )
  1340. {
  1341. fs_io_msg_t msg;
  1342. int err;
  1343. IO_STATUS_BLOCK ios;
  1344. FsLog(("read_dir: cookie %d buf %x entries %x\n", cookie, buffer, entries_found));
  1345. if (!entries_found || !buffer) return ERROR_INVALID_PARAMETER;
  1346. msg.cookie = cookie;
  1347. msg.buf = (PVOID) buffer;
  1348. msg.size = size;
  1349. msg.fnum = dir;
  1350. err = SendReadRequest(FspReadDir, (UserInfo_t *)fshdl,
  1351. (PVOID) &msg, sizeof(msg),
  1352. NULL, 0,
  1353. &ios);
  1354. err = ios.Status;
  1355. *entries_found = (UINT32) ios.Information;
  1356. xFsLog(("read_dir: err %d entries %d\n", err, *entries_found));
  1357. return RtlNtStatusToDosError(err);
  1358. }
  1359. DWORD
  1360. FsRemove(
  1361. PVOID fshdl,
  1362. LPWSTR name,
  1363. USHORT name_len
  1364. )
  1365. {
  1366. fs_remove_msg_t msg;
  1367. int err, sid;
  1368. IO_STATUS_BLOCK status[FsMaxNodes];
  1369. if (*name == L'\\') {
  1370. name++;
  1371. name_len--;
  1372. }
  1373. memset(&msg.xid, 0, sizeof(msg.xid));
  1374. msg.name = name;
  1375. msg.name_len = name_len;
  1376. FspInitAnswers(status, NULL, NULL, 0);
  1377. sid = SendRequest(FspRemove, (UserInfo_t *) fshdl,
  1378. (PVOID *)&msg, sizeof(msg),
  1379. NULL, 0,
  1380. status);
  1381. err = status[sid].Status;
  1382. return RtlNtStatusToDosError(err);
  1383. }
  1384. DWORD
  1385. FsRename(
  1386. PVOID fshdl,
  1387. LPWSTR from_name,
  1388. USHORT from_name_len,
  1389. LPWSTR to_name,
  1390. USHORT to_name_len
  1391. )
  1392. {
  1393. int err, sid;
  1394. fs_rename_msg_t msg;
  1395. IO_STATUS_BLOCK status[FsMaxNodes];
  1396. if (!from_name || !to_name)
  1397. return ERROR_INVALID_PARAMETER;
  1398. if (*from_name == L'\\') {
  1399. from_name++;
  1400. from_name_len--;
  1401. }
  1402. if (*to_name == L'\\') {
  1403. to_name++;
  1404. to_name_len--;
  1405. }
  1406. if (*from_name == L'\0' || *to_name == L'\0')
  1407. return ERROR_INVALID_PARAMETER;
  1408. FsLog(("rename %S -> %S,%d\n", from_name, to_name,to_name_len));
  1409. memset(&msg.xid, 0, sizeof(msg.xid));
  1410. msg.sname = from_name;
  1411. msg.sname_len = from_name_len;
  1412. msg.dname = to_name;
  1413. msg.dname_len = to_name_len;
  1414. FspInitAnswers(status, NULL, NULL, 0);
  1415. sid = SendRequest(FspRename, (UserInfo_t *) fshdl,
  1416. (PVOID) &msg, sizeof(msg),
  1417. NULL, 0,
  1418. status);
  1419. err = status[sid].Status;
  1420. return RtlNtStatusToDosError(err);
  1421. }
  1422. DWORD
  1423. FsMkDir(
  1424. PVOID fshdl,
  1425. LPWSTR name,
  1426. USHORT name_len,
  1427. fattr_t* attr
  1428. )
  1429. {
  1430. int err, sid;
  1431. IO_STATUS_BLOCK status[FsMaxNodes];
  1432. fs_id_t ids[FsMaxNodes];
  1433. PVOID *rbuf[FsMaxNodes];
  1434. fs_create_msg_t msg;
  1435. // XXX: we ignore attr for now...
  1436. if (!name) return ERROR_INVALID_PARAMETER;
  1437. if (*name == L'\\') {
  1438. name++;
  1439. name_len--;
  1440. }
  1441. memset(&msg.xid, 0, sizeof(msg.xid));
  1442. msg.attr = (attr != NULL ? unget_attributes(attr->attributes) :
  1443. FILE_ATTRIBUTE_DIRECTORY);
  1444. msg.flags = DISP_DIRECTORY | SHARE_READ | SHARE_WRITE;
  1445. msg.name = name;
  1446. msg.name_len = name_len;
  1447. FspInitAnswers(status, (PVOID *)rbuf, (PVOID) ids, sizeof(ids[0]));
  1448. sid = SendRequest(FspMkDir, (UserInfo_t *) fshdl,
  1449. (PVOID) &msg, sizeof(msg),
  1450. (PVOID *)rbuf, sizeof(ids[0]),
  1451. status);
  1452. err = status[sid].Status;
  1453. // todo: insert pathname and file id into hash table
  1454. return RtlNtStatusToDosError(err);
  1455. }
  1456. DWORD
  1457. FsFlush(
  1458. PVOID fshdl,
  1459. fhandle_t handle
  1460. )
  1461. {
  1462. NTSTATUS status;
  1463. int sid;
  1464. IO_STATUS_BLOCK ios[FsMaxNodes];
  1465. FspInitAnswers(ios, NULL, NULL, 0);
  1466. sid = SendRequest(FspFlush, (UserInfo_t *) fshdl,
  1467. (PVOID) &handle, sizeof(handle),
  1468. NULL, 0,
  1469. ios);
  1470. status = ios[sid].Status;
  1471. FsLog(("Flush %d err %x\n", handle, status));
  1472. if (status == STATUS_PENDING) {
  1473. status = STATUS_SUCCESS;
  1474. }
  1475. return RtlNtStatusToDosError(status);
  1476. }
  1477. DWORD
  1478. FsLock(PVOID fshdl, fhandle_t handle, ULONG offset, ULONG length, ULONG flags,
  1479. PVOID context)
  1480. {
  1481. fs_lock_msg_t msg;
  1482. int err, sid;
  1483. IO_STATUS_BLOCK status[FsMaxNodes];
  1484. if (handle == INVALID_FHANDLE_T)
  1485. return ERROR_INVALID_PARAMETER;
  1486. memset(&msg.xid, 0, sizeof(msg.xid));
  1487. msg.offset = offset;
  1488. msg.length = length;
  1489. msg.flags = flags;
  1490. msg.fnum = handle;
  1491. FsLog(("Lock fid %d off %d len %d\n", msg.fnum, offset, length));
  1492. FspInitAnswers(status, NULL, NULL, 0);
  1493. sid = SendRequest(FspLock, (UserInfo_t *) fshdl,
  1494. (PVOID)&msg, sizeof(msg),
  1495. NULL, 0,
  1496. status);
  1497. err = status[sid].Status;
  1498. FsLog(("Lock fid %d err %x\n", msg.fnum, err));
  1499. return RtlNtStatusToDosError(err);
  1500. }
  1501. DWORD
  1502. FsUnlock(PVOID fshdl, fhandle_t handle, ULONG offset, ULONG length)
  1503. {
  1504. fs_lock_msg_t msg;
  1505. int err, sid;
  1506. IO_STATUS_BLOCK status[FsMaxNodes];
  1507. if (handle == INVALID_FHANDLE_T)
  1508. return ERROR_INVALID_PARAMETER;
  1509. memset(&msg.xid, 0, sizeof(msg.xid));
  1510. msg.offset = offset;
  1511. msg.length = length;
  1512. msg.fnum = handle;
  1513. FsLog(("Unlock fid %d off %d len %d\n", handle, offset, length));
  1514. FspInitAnswers(status, NULL, NULL, 0);
  1515. sid = SendRequest(FspUnlock, (UserInfo_t *) fshdl,
  1516. (PVOID)&msg, sizeof(msg),
  1517. NULL, 0,
  1518. status);
  1519. err = status[sid].Status;
  1520. return RtlNtStatusToDosError(err);
  1521. }
  1522. DWORD
  1523. FsStatFs(
  1524. PVOID fshdl,
  1525. fs_attr_t* attr
  1526. )
  1527. {
  1528. DWORD err;
  1529. IO_STATUS_BLOCK ios;
  1530. if (!attr) return ERROR_INVALID_PARAMETER;
  1531. err = SendReadRequest(FspStatFs, (UserInfo_t *) fshdl,
  1532. (PVOID) attr, sizeof(*attr),
  1533. NULL, 0,
  1534. &ios);
  1535. err = ios.Status;
  1536. return RtlNtStatusToDosError(err);
  1537. }
  1538. DWORD
  1539. FsGetRoot(PVOID fshdl, LPWSTR fullpath)
  1540. {
  1541. DWORD err;
  1542. IO_STATUS_BLOCK ios;
  1543. if (!fullpath || !fshdl) return ERROR_INVALID_PARAMETER;
  1544. // use local replica instead
  1545. if ((((UserInfo_t *)fshdl)->VolInfo->FsCtx->Root)) {
  1546. swprintf(fullpath, L"\\\\?\\%s\\%s",
  1547. (((UserInfo_t *)fshdl)->VolInfo->FsCtx->Root),
  1548. (((UserInfo_t *)fshdl)->VolInfo->Root));
  1549. FsLog(("FspGetRoot '%S'\n", fullpath));
  1550. err = STATUS_SUCCESS;
  1551. } else {
  1552. err = SendReadRequest(FspGetRoot, (UserInfo_t *) fshdl,
  1553. NULL, 0,
  1554. (PVOID)fullpath, 0,
  1555. &ios);
  1556. err = ios.Status;
  1557. }
  1558. return RtlNtStatusToDosError(err);
  1559. }
  1560. static FsDispatchTable gDisp = {
  1561. 0x100,
  1562. FsCreate,
  1563. FsLookup,
  1564. FsSetAttr,
  1565. FsSetAttr2,
  1566. FsGetAttr,
  1567. FsClose,
  1568. FsWrite,
  1569. FsRead,
  1570. FsReadDir,
  1571. FsStatFs,
  1572. FsRemove,
  1573. FsRename,
  1574. FsMkDir,
  1575. FsRemove,
  1576. FsFlush,
  1577. FsLock,
  1578. FsUnlock,
  1579. FsGetRoot
  1580. };
  1581. //////////////////////////////////////////////////////////////
  1582. DWORD
  1583. FsInit(PVOID resHdl, PVOID *Hdl)
  1584. {
  1585. DWORD status;
  1586. FsCtx_t *ctx;
  1587. // This should be a compile check instead of runtime check
  1588. ASSERT(sizeof(fs_log_rec_t) == CRS_RECORD_SZ);
  1589. ASSERT(sizeof(fs_log_rec_t) == sizeof(CrsRecord_t));
  1590. if (Hdl == NULL) {
  1591. return ERROR_INVALID_PARAMETER;
  1592. }
  1593. FsLog(("FsInit:\n"));
  1594. // allocate a context
  1595. ctx = (FsCtx_t *) MemAlloc(sizeof(*ctx));
  1596. if (ctx == NULL) {
  1597. return ERROR_NOT_ENOUGH_MEMORY;
  1598. }
  1599. // initialize configuration table and other global state
  1600. memset(ctx, 0, sizeof(*ctx));
  1601. // local path
  1602. ctx->Root = NULL;
  1603. LockInit(ctx->Lock);
  1604. ctx->reshdl = resHdl;
  1605. *Hdl = (PVOID) ctx;
  1606. // we need to mount the IPC share now
  1607. status = FsRegister((PVOID)ctx, L"IPC$", L"dummy", NULL, 0, &ctx->ipcHdl);
  1608. if (status == ERROR_SUCCESS) {
  1609. // Init. volume
  1610. VolInfo_t *vinfo = (VolInfo_t *)ctx->ipcHdl;
  1611. ASSERT(vinfo != NULL);
  1612. // use node zero
  1613. vinfo->Fd[0] = INVALID_HANDLE_VALUE;
  1614. vinfo->ReadSet = 0;
  1615. vinfo->AliveSet = 0;
  1616. } else {
  1617. FsLog(("FsInit: failed to register ipc share %d\n", status));
  1618. // free memory
  1619. MemFree(ctx);
  1620. *Hdl = NULL;
  1621. }
  1622. return status;
  1623. }
  1624. void
  1625. FspFreeSession(SessionInfo_t *s)
  1626. {
  1627. UserInfo_t *u;
  1628. int i, j;
  1629. u = &s->TreeCtx;
  1630. FsLog(("Session free uid %d tid %d ref %d\n", u->Uid, u->Tid, u->RefCnt));
  1631. LockEnter(u->Lock);
  1632. if (u->VolInfo != NULL) {
  1633. UserInfo_t **p;
  1634. VolInfo_t *v = u->VolInfo;
  1635. LockExit(u->Lock);
  1636. // remove from vollist now
  1637. LockEnter(v->uLock);
  1638. p = &v->UserList;
  1639. while (*p != NULL) {
  1640. if (*p == u) {
  1641. // found it
  1642. *p = u->Next;
  1643. FsLog(("Remove uinfo %x,%x from vol %x %S\n", u, u->Next,
  1644. v->UserList, v->Root));
  1645. break;
  1646. }
  1647. p = &(*p)->Next;
  1648. }
  1649. LockExit(v->uLock);
  1650. // relock again
  1651. LockEnter(u->Lock);
  1652. }
  1653. // Close all user handles
  1654. for (i = 0; i < FsTableSize; i++) {
  1655. if (u->Table[i].Flags) {
  1656. FsLog(("Close slot %d %x\n", i, u->Table[i].Flags));
  1657. FsClose((PVOID) u, (fhandle_t)i);
  1658. }
  1659. }
  1660. // sap volptr
  1661. u->VolInfo = NULL;
  1662. LockExit(u->Lock);
  1663. DeleteCriticalSection(&u->Lock);
  1664. // free memory now, don't free u since it's part of s
  1665. MemFree(s);
  1666. }
  1667. void
  1668. FspCloseVolume(VolInfo_t *vol, ULONG AliveSet)
  1669. {
  1670. DWORD i;
  1671. // clear arbitrate state now
  1672. vol->Arbitrate.State = ARB_STATE_IDLE;
  1673. // Close crs and root handles, by evicting our alive set
  1674. // close nid handles <crs, vol, open files>
  1675. for (i = 0; i < FsMaxNodes; i++) {
  1676. if (AliveSet & (1 << i)) {
  1677. if (vol->CrsHdl[i]) {
  1678. CrsClose(vol->CrsHdl[i]);
  1679. vol->CrsHdl[i] = NULL;
  1680. }
  1681. FindCloseChangeNotification(vol->NotifyFd[i]);
  1682. vol->NotifyFd[i] = INVALID_HANDLE_VALUE;
  1683. xFsClose(vol->Fd[i]);
  1684. vol->Fd[i] = INVALID_HANDLE_VALUE;
  1685. // need to close all user handles now
  1686. {
  1687. UserInfo_t *u;
  1688. for (u = vol->UserList; u; u = u->Next) {
  1689. DWORD j;
  1690. FsLog(("Lock user %x root %S\n", u, vol->Root));
  1691. LockEnter(u->Lock);
  1692. // close all handles for this node
  1693. for (j = 0; j < FsTableSize; j++) {
  1694. if (u->Table[j].Fd[i] != INVALID_HANDLE_VALUE) {
  1695. FsLog(("Close fid %d\n", j));
  1696. xFsClose(u->Table[j].Fd[i]);
  1697. u->Table[j].Fd[i] = INVALID_HANDLE_VALUE;
  1698. }
  1699. }
  1700. LockExit(u->Lock);
  1701. FsLog(("Unlock user %x\n", u));
  1702. }
  1703. }
  1704. }
  1705. }
  1706. }
  1707. // call this when we are deleting resource and we need to get ride of
  1708. // our IPC reference to directory
  1709. void
  1710. FsEnd(PVOID Hdl)
  1711. {
  1712. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1713. VolInfo_t *p;
  1714. if (!ctx)
  1715. return;
  1716. LockEnter(ctx->Lock);
  1717. p = (VolInfo_t *)ctx->ipcHdl;
  1718. if (p) {
  1719. xFsClose(p->Fd[0]);
  1720. p->Fd[0] = INVALID_HANDLE_VALUE;
  1721. p->ReadSet = 0;
  1722. p->AliveSet = 0;
  1723. }
  1724. LockExit(ctx->Lock);
  1725. }
  1726. void
  1727. FsExit(PVOID Hdl)
  1728. {
  1729. // flush all state
  1730. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1731. VolInfo_t *p;
  1732. SessionInfo_t *s;
  1733. LogonInfo_t *log;
  1734. LockEnter(ctx->Lock);
  1735. while (s = ctx->SessionList) {
  1736. ctx->SessionList = s->Next;
  1737. // free this session now
  1738. FspFreeSession(s);
  1739. }
  1740. while (p = ctx->VolList) {
  1741. ctx->VolList = p->Next;
  1742. ctx->VolListSz--;
  1743. // free this volume now
  1744. FspCloseVolume(p, p->AliveSet);
  1745. MemFree(p);
  1746. }
  1747. while (log = ctx->LogonList) {
  1748. ctx->LogonList = log->Next;
  1749. // free token
  1750. CloseHandle(log->Token);
  1751. MemFree(log);
  1752. }
  1753. // now we free our structure
  1754. LockExit(ctx->Lock);
  1755. MemFree(ctx);
  1756. }
  1757. // adds a new share to list of trees available
  1758. DWORD
  1759. FsRegister(PVOID Hdl, LPWSTR root, LPWSTR share,
  1760. LPWSTR disklist[], DWORD len, PVOID *vHdl)
  1761. {
  1762. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1763. VolInfo_t *p;
  1764. // check limit
  1765. if (len >= FsMaxNodes) {
  1766. return ERROR_TOO_MANY_NAMES;
  1767. }
  1768. if (root == NULL || share == NULL || (wcslen(share) > (MAX_PATH - 5))) {
  1769. return ERROR_INVALID_PARAMETER;
  1770. }
  1771. // add a new volume to the list of volume. path is an array
  1772. // of directories. Note: The order of this list MUST be the
  1773. // same in all nodes since it also determines the disk id
  1774. // this is a simple check and assume one thread is calling this function
  1775. LockEnter(ctx->Lock);
  1776. // update our ipc context
  1777. if (ctx->ipcHdl) {
  1778. NTSTATUS status;
  1779. UINT32 disp = FILE_OPEN;
  1780. HANDLE vfd;
  1781. WCHAR path[MAX_PATH];
  1782. p = (VolInfo_t *)ctx->ipcHdl;
  1783. if (p->Fd[0] != INVALID_HANDLE_VALUE)
  1784. xFsClose(p->Fd[0]);
  1785. p->Fd[0] = INVALID_HANDLE_VALUE;
  1786. p->ReadSet = 0;
  1787. p->AliveSet = 0;
  1788. // set local path
  1789. ctx->Root = share;
  1790. // update our ipc handle now
  1791. FsLog(("FsRegister: ipc share '%S'\n", share));
  1792. // open our local ipc path
  1793. wcscpy(path, L"\\??\\");
  1794. wcscat(path, share);
  1795. wcscat(path, L"\\");
  1796. status = xFsCreate(&vfd, NULL, path, wcslen(path),
  1797. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  1798. 0,
  1799. FILE_SHARE_READ|FILE_SHARE_WRITE,
  1800. &disp,
  1801. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  1802. NULL, 0);
  1803. if (status == STATUS_SUCCESS) {
  1804. // our root must have already been created and secured.
  1805. ASSERT(disp != FILE_CREATED);
  1806. // use node zero
  1807. p->Fd[0] = vfd;
  1808. p->ReadSet = 0x1;
  1809. p->AliveSet = 0x1;
  1810. } else {
  1811. FsLog(("Fsregister: '%S' failed to open %x\n", share, status));
  1812. LockExit(ctx->Lock);
  1813. return RtlNtStatusToDosError(status);
  1814. }
  1815. }
  1816. // find the volume share
  1817. for (p = ctx->VolList; p != NULL; p = p->Next) {
  1818. if (!wcscmp(root, p->Root)) {
  1819. LockEnter(p->uLock);
  1820. break;
  1821. }
  1822. }
  1823. LockExit(ctx->Lock);
  1824. if (p == NULL) {
  1825. p = (VolInfo_t *)MemAlloc(sizeof(*p));
  1826. if (p == NULL) {
  1827. return ERROR_NOT_ENOUGH_MEMORY;
  1828. }
  1829. memset(p, 0, sizeof(*p));
  1830. LockInit(p->uLock);
  1831. LockInit(p->qLock);
  1832. // We don't need to walk the list again to check if a register has happened because
  1833. // this is serialized in nodequorum.c
  1834. LockEnter(ctx->Lock);
  1835. p->Tid = (USHORT)++ctx->VolListSz;
  1836. p->Next = ctx->VolList;
  1837. ctx->VolList = p;
  1838. p->FsCtx = ctx;
  1839. // lock the volume
  1840. LockEnter(p->uLock);
  1841. LockExit(ctx->Lock);
  1842. p->Label = L"Cluster Quorum";
  1843. }
  1844. p->Root = root;
  1845. if (disklist) {
  1846. DWORD i;
  1847. for (i = 1; i < FsMaxNodes; i++)
  1848. p->DiskList[i] = disklist[i];
  1849. }
  1850. p->DiskListSz = len;
  1851. FsLog(("FsRegister Tid %d Share '%S' %d disks\n", p->Tid, root, len));
  1852. // drop the volume lock
  1853. LockExit(p->uLock);
  1854. *vHdl = (PVOID) p;
  1855. return ERROR_SUCCESS;
  1856. }
  1857. SessionInfo_t *
  1858. FspAllocateSession()
  1859. {
  1860. SessionInfo_t *s;
  1861. UserInfo_t *u;
  1862. int i;
  1863. // add user to our tree and initialize handle tables
  1864. s = (SessionInfo_t *)MemAlloc(sizeof(*s));
  1865. if (s != NULL) {
  1866. memset(s, 0, sizeof(*s));
  1867. u = &s->TreeCtx;
  1868. LockInit(u->Lock);
  1869. // init handle table
  1870. for (i = 0; i < FsTableSize; i++) {
  1871. int j;
  1872. for (j = 0; j < FsMaxNodes; j++) {
  1873. FS_SET_USER_HANDLE(u, j, i, INVALID_HANDLE_VALUE);
  1874. }
  1875. }
  1876. }
  1877. return s;
  1878. }
  1879. // binds a session to a specific tree/share
  1880. DWORD
  1881. FsMount(PVOID Hdl, LPWSTR root_name, USHORT uid, USHORT *tid)
  1882. {
  1883. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1884. SessionInfo_t *s = NULL, *ns;
  1885. VolInfo_t *p;
  1886. DWORD err = ERROR_SUCCESS;
  1887. *tid = 0;
  1888. // allocate new ns
  1889. ns = FspAllocateSession();
  1890. if (ns == NULL) {
  1891. return ERROR_NOT_ENOUGH_MEMORY;
  1892. }
  1893. LockEnter(ctx->Lock);
  1894. // locate share
  1895. for (p = ctx->VolList; p != NULL; p = p->Next) {
  1896. if (!_wcsicmp(root_name, p->Root)) {
  1897. FsLog(("Mount share '%S' tid %d\n", p->Root, p->Tid));
  1898. break;
  1899. }
  1900. }
  1901. if (p != NULL) {
  1902. *tid = p->Tid;
  1903. for (s = ctx->SessionList; s != NULL; s = s->Next) {
  1904. if (s->TreeCtx.Uid == uid && s->TreeCtx.Tid == p->Tid) {
  1905. break;
  1906. }
  1907. }
  1908. if (s == NULL) {
  1909. UserInfo_t *u = &ns->TreeCtx;
  1910. // insert into session list
  1911. ns->Next = ctx->SessionList;
  1912. ctx->SessionList = ns;
  1913. FsLog(("Bind uid %d -> tid %d <%x,%x>\n", uid, p->Tid,
  1914. u, p->UserList));
  1915. u->RefCnt++;
  1916. u->Uid = uid;
  1917. u->Tid = p->Tid;
  1918. u->VolInfo = p;
  1919. // insert user_info into volume list
  1920. LockEnter(p->uLock);
  1921. FsLog(("Add <%x,%x>\n", u, p->UserList));
  1922. u->Next = p->UserList;
  1923. p->UserList = u;
  1924. LockExit(p->uLock);
  1925. } else {
  1926. // we already have this session opened, increment refcnt
  1927. s->TreeCtx.RefCnt++;
  1928. // free ns
  1929. MemFree(ns);
  1930. }
  1931. } else {
  1932. err = ERROR_BAD_NET_NAME;
  1933. }
  1934. LockExit(ctx->Lock);
  1935. return (err);
  1936. }
  1937. // This function is also a CloseSession
  1938. void
  1939. FsDisMount(PVOID Hdl, USHORT uid, USHORT tid)
  1940. {
  1941. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1942. SessionInfo_t *s, **last;
  1943. // lookup tree and close all user handles
  1944. s = NULL;
  1945. LockEnter(ctx->Lock);
  1946. last = &ctx->SessionList;
  1947. while (*last != NULL) {
  1948. UserInfo_t *u = &(*last)->TreeCtx;
  1949. if (u->Uid == uid && u->Tid == tid) {
  1950. ASSERT(u->RefCnt > 0);
  1951. u->RefCnt--;
  1952. if (u->RefCnt == 0) {
  1953. FsLog(("Dismount uid %d tid %d <%x,%x>\n", uid, tid,
  1954. u, *last));
  1955. s = *last;
  1956. *last = s->Next;
  1957. }
  1958. break;
  1959. }
  1960. last = &(*last)->Next;
  1961. }
  1962. LockExit(ctx->Lock);
  1963. if (s != NULL) {
  1964. FspFreeSession(s);
  1965. }
  1966. }
  1967. // todo: I am not using the token for now, but need to use it for all
  1968. // io operations
  1969. DWORD
  1970. FsLogonUser(PVOID Hdl, HANDLE token, LUID logonid, USHORT *uid)
  1971. {
  1972. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1973. LogonInfo_t *s;
  1974. int i;
  1975. // add user to our tree and initialize handle tables
  1976. s = (LogonInfo_t *)MemAlloc(sizeof(*s));
  1977. if (s == NULL) {
  1978. return ERROR_NOT_ENOUGH_MEMORY;
  1979. }
  1980. memset(s, 0, sizeof(*s));
  1981. s->Token = token;
  1982. s->LogOnId = logonid;
  1983. LockEnter(ctx->Lock);
  1984. s->Next = ctx->LogonList;
  1985. ctx->LogonList = s;
  1986. LockExit(ctx->Lock);
  1987. *uid = (USHORT) logonid.LowPart;
  1988. FsLog(("Logon %d,%d, uid %d\n", logonid.HighPart, logonid.LowPart, *uid));
  1989. return (ERROR_SUCCESS);
  1990. }
  1991. void
  1992. FsLogoffUser(PVOID Hdl, LUID logonid)
  1993. {
  1994. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  1995. LogonInfo_t *s;
  1996. USHORT uid;
  1997. LockEnter(ctx->Lock);
  1998. for (s = ctx->LogonList; s != NULL; s = s->Next) {
  1999. if (s->LogOnId.LowPart == logonid.LowPart &&
  2000. s->LogOnId.HighPart == logonid.HighPart) {
  2001. uid = (USHORT) logonid.LowPart;
  2002. break;
  2003. }
  2004. }
  2005. if (s != NULL) {
  2006. SessionInfo_t **last;
  2007. FsLog(("Logoff user %d\n", uid));
  2008. // Flush all user trees
  2009. last = &ctx->SessionList;
  2010. while (*last != NULL) {
  2011. UserInfo_t *u = &(*last)->TreeCtx;
  2012. if (u->Uid == uid) {
  2013. SessionInfo_t *ss = *last;
  2014. // remove session and free it now
  2015. *last = ss->Next;
  2016. FspFreeSession(ss);
  2017. } else {
  2018. last = &(*last)->Next;
  2019. }
  2020. }
  2021. }
  2022. LockExit(ctx->Lock);
  2023. }
  2024. FsDispatchTable*
  2025. FsGetHandle(PVOID Hdl, USHORT tid, USHORT uid, PVOID *fshdl)
  2026. {
  2027. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2028. SessionInfo_t *s;
  2029. // locate tid,uid in session list
  2030. LockEnter(ctx->Lock);
  2031. for (s = ctx->SessionList; s != NULL; s = s->Next) {
  2032. if (s->TreeCtx.Uid == uid && s->TreeCtx.Tid == tid) {
  2033. *fshdl = (PVOID *) &s->TreeCtx;
  2034. LockExit(ctx->Lock);
  2035. return &gDisp;
  2036. }
  2037. }
  2038. LockExit(ctx->Lock);
  2039. *fshdl = NULL;
  2040. return NULL;
  2041. }
  2042. //////////////////////////////////// Arb/Release ///////////////////////////////
  2043. DWORD
  2044. FspOpenReplica(VolInfo_t *p, DWORD id, HANDLE *CrsHdl, HANDLE *Fd, HANDLE *notifyFd,
  2045. FspArbitrate_t *arb)
  2046. {
  2047. WCHAR path[MAXPATH];
  2048. UINT32 disp = FILE_OPEN_IF;
  2049. NTSTATUS err;
  2050. swprintf(path, L"\\\\?\\%s\\crs.log", p->DiskList[id]);
  2051. err = CrsOpen(FsCrsCallback, (PVOID) p, (USHORT)id,
  2052. path, FsCrsNumSectors,
  2053. CrsHdl);
  2054. if (err == ERROR_SUCCESS && CrsHdl != NULL) {
  2055. // got it
  2056. // open root volume directory
  2057. swprintf(path, L"\\??\\%s\\%s\\", p->DiskList[id], p->Root);
  2058. err = xFsCreate(Fd, NULL, path, wcslen(path),
  2059. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  2060. 0,
  2061. FILE_SHARE_READ|FILE_SHARE_WRITE,
  2062. &disp,
  2063. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  2064. NULL, 0);
  2065. if (err == STATUS_SUCCESS) {
  2066. // check if we are part of arb.
  2067. if (arb != NULL) {
  2068. // get quorum lock
  2069. LockEnter(p->qLock);
  2070. if (arb->State == ARB_STATE_BUSY) {
  2071. arb->Count++;
  2072. arb->Set |= (1 << id);
  2073. if (arb->Event && CRS_QUORUM(arb->Count, p->DiskListSz)) {
  2074. // first time only
  2075. SetEvent(arb->Event);
  2076. arb->Event = NULL;
  2077. }
  2078. // note it is safe to touch this because our parent thread already
  2079. // locked the updates out and is wait for us to finish
  2080. p->Fd[id] = *Fd;
  2081. ASSERT(p->CrsHdl[id] == NULL);
  2082. p->CrsHdl[id] = *CrsHdl;
  2083. LockExit(p->qLock);
  2084. FsLog(("Add Replica %d\n", id));
  2085. } else {
  2086. LockExit(p->qLock);
  2087. FsLog(("Stale open %d\n", id));
  2088. CrsClose(*CrsHdl);
  2089. xFsClose(*Fd);
  2090. err = ERROR_SEM_TIMEOUT;
  2091. }
  2092. }
  2093. if (err == ERROR_SUCCESS) {
  2094. FsArbLog(("Mounted %S\n", path));
  2095. swprintf(path, L"\\\\?\\%s\\", p->DiskList[id]);
  2096. // scan the tree to break any current oplocks on dead nodes
  2097. xFsTouchTree(*Fd);
  2098. // we now queue notification changes to force srv to contact client
  2099. *notifyFd = FindFirstChangeNotificationW(path, FALSE, FILE_NOTIFY_CHANGE_EA);
  2100. // if part of arb, set it now
  2101. if (arb != NULL) {
  2102. p->NotifyFd[id] = *notifyFd;
  2103. }
  2104. if (*notifyFd != INVALID_HANDLE_VALUE) {
  2105. int i;
  2106. for (i = 0; i < FsMaxNodes; i++) {
  2107. FindNextChangeNotification(*notifyFd);
  2108. }
  2109. } else {
  2110. FsArbLog(("Failed to register notification %d\n", GetLastError()));
  2111. }
  2112. }
  2113. } else {
  2114. FsArbLog(("Failed to mount root '%S' %x\n", path, err));
  2115. // close CrsHandle
  2116. CrsClose(*CrsHdl);
  2117. }
  2118. } else if (err == ERROR_LOCK_VIOLATION || err == ERROR_SHARING_VIOLATION) {
  2119. FsArbLog(("Replica '%S' already locked\n", path));
  2120. } else {
  2121. FsArbLog(("Replica '%S' probe failed %d\n", path, err));
  2122. }
  2123. return err;
  2124. }
  2125. typedef struct {
  2126. VolInfo_t *vol;
  2127. DWORD id;
  2128. }FspProbeReplicaId_t;
  2129. DWORD WINAPI
  2130. ProbeThread(LPVOID arg)
  2131. {
  2132. FspProbeReplicaId_t *probe = (FspProbeReplicaId_t *) arg;
  2133. DWORD i = probe->id;
  2134. VolInfo_t *p = probe->vol;
  2135. FspArbitrate_t *arb = &p->Arbitrate;
  2136. NTSTATUS err;
  2137. HANDLE crshdl, fshdl, notifyhdl;
  2138. DWORD retry_cnt;
  2139. // set our priority
  2140. SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
  2141. for (retry_cnt = 0; retry_cnt < 8; retry_cnt++) {
  2142. err = FspOpenReplica(p, i, &crshdl, &fshdl, &notifyhdl, arb);
  2143. if (err == ERROR_SUCCESS) {
  2144. // got it, we are done
  2145. break;
  2146. }
  2147. // handle error
  2148. if (err == ERROR_BAD_NETPATH || err == ERROR_REM_NOT_LIST || err == ERROR_SEM_TIMEOUT) {
  2149. // don't retry just bail out now
  2150. break;
  2151. } else {
  2152. BOOLEAN flag = FALSE;
  2153. // we try again as long as we are not cancelled and no quorum is reached
  2154. LockEnter(p->qLock);
  2155. if (arb->State == ARB_STATE_BUSY && !CRS_QUORUM(arb->Count, p->DiskListSz)) {
  2156. flag = TRUE;
  2157. }
  2158. // drop lock
  2159. LockExit(p->qLock);
  2160. // if cancelled we are out of here
  2161. if (flag == FALSE)
  2162. break;
  2163. // retry in 5 seconds again
  2164. Sleep(5 * 1000);
  2165. }
  2166. }
  2167. return 0;
  2168. }
  2169. ULONG
  2170. FspFindMissingReplicas(VolInfo_t *p, ULONG set)
  2171. {
  2172. ULONG FoundSet = 0;
  2173. DWORD i, err;
  2174. HANDLE crshdl, fshdl, notifyfd;
  2175. if (set == 0)
  2176. return 0;
  2177. for (i = 1; i < FsMaxNodes; i++) {
  2178. if (p->DiskList[i] == NULL)
  2179. continue;
  2180. if (!(set & (1 << i))) {
  2181. // drop the lock
  2182. LockExit(p->uLock);
  2183. err = FspOpenReplica(p, i, &crshdl, &fshdl, &notifyfd, NULL);
  2184. // get the lock
  2185. LockEnter(p->uLock);
  2186. if (err == STATUS_SUCCESS) {
  2187. if (p->CrsHdl[i] == NULL) {
  2188. p->NotifyFd[i] = notifyfd;
  2189. p->Fd[i] = fshdl;
  2190. p->CrsHdl[i] = crshdl;
  2191. FoundSet |= (1 << i);
  2192. } else {
  2193. // someone beat us to it, close ours
  2194. CrsClose(crshdl);
  2195. xFsClose(fshdl);
  2196. FindCloseChangeNotification(notifyfd);
  2197. }
  2198. }
  2199. }
  2200. }
  2201. if (FoundSet != 0)
  2202. FsArbLog(("New replica set after probe %x\n", FoundSet));
  2203. return FoundSet;
  2204. }
  2205. DWORD WINAPI
  2206. FspArbitrateThread(LPVOID arg)
  2207. {
  2208. VolInfo_t *p = (VolInfo_t *) arg;
  2209. FspArbitrate_t *arb = &p->Arbitrate;
  2210. HANDLE hdl[FsMaxNodes];
  2211. DWORD i, count = 0, err;
  2212. ULONG ReplicaSet;
  2213. DWORD Sequence;
  2214. FspProbeReplicaId_t Ids[FsMaxNodes];
  2215. FspProbeReplicaId_t *r;
  2216. BOOLEAN flag;
  2217. // set our priority
  2218. SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
  2219. // if we arb then no update can be going on now
  2220. LockEnter(p->uLock);
  2221. // our parent already stored this for us here
  2222. ReplicaSet = arb->Set;
  2223. arb->Set = 0;
  2224. FsArbLog(("ArbitrateThread begin %x\n", ReplicaSet));
  2225. // we now start a thread for each replica and do the probe in parallel
  2226. for (i = 1; i < FsMaxNodes; i++) {
  2227. if (p->DiskList[i] == NULL)
  2228. continue;
  2229. if (ReplicaSet & (1 << i))
  2230. continue;
  2231. r = &Ids[i];
  2232. r->vol = p;
  2233. r->id = i;
  2234. hdl[count] = CreateThread(NULL, 0, &ProbeThread, (LPVOID) r, 0, NULL);
  2235. if (hdl[count] != NULL) {
  2236. count++;
  2237. } else {
  2238. FsArbLog(("Unable to create thread to probe replica %d\n", i));
  2239. ProbeThread((LPVOID) r);
  2240. }
  2241. }
  2242. // we now wait
  2243. WaitForMultipleObjects(count, hdl, TRUE, INFINITE);
  2244. // Close the handles
  2245. for (i = 0; i < count; i++)
  2246. CloseHandle(hdl[i]);
  2247. flag = FALSE;
  2248. // grab lock
  2249. LockEnter(p->qLock);
  2250. if (arb->State != ARB_STATE_BUSY) {
  2251. flag = TRUE;
  2252. }
  2253. LockExit(p->qLock);
  2254. if (flag == TRUE) {
  2255. // we got cancelled, we undo what we just did and get out
  2256. if (arb->Set) {
  2257. // tell evict this not part of alive set
  2258. FspEvict(p, arb->Set, FALSE);
  2259. }
  2260. err = ERROR_CANCELLED;
  2261. goto exit;
  2262. }
  2263. count = arb->Count;
  2264. ReplicaSet = arb->Set;
  2265. FsArbLog(("ArbitrateThread working %x\n", ReplicaSet));
  2266. p->WriteSet = p->ReadSet = 0;
  2267. // check if we have a majority
  2268. if (CRS_QUORUM(count, p->DiskListSz)) {
  2269. FsArbLog(("I own quorum %d,%d set %x\n",count, p->DiskListSz, ReplicaSet));
  2270. // we need to join crs replicas
  2271. FspJoin(p, ReplicaSet);
  2272. if (p->WriteSet != 0 || p->ReadSet != 0) {
  2273. // remember event to signal if we lose quorum again
  2274. p->Event = arb->Event;
  2275. err = ERROR_SUCCESS;
  2276. } else {
  2277. // we lost the quorum
  2278. err = ERROR_WRITE_PROTECT;
  2279. }
  2280. } else {
  2281. FspEvict(p, ReplicaSet, FALSE);
  2282. err = ERROR_PATH_NOT_FOUND;
  2283. }
  2284. exit:
  2285. // clear the arb state
  2286. arb->State = ARB_STATE_IDLE;
  2287. // unlock volume
  2288. LockExit(p->uLock);
  2289. return err;
  2290. }
  2291. DWORD
  2292. FsIsQuorum(PVOID vHdl)
  2293. {
  2294. VolInfo_t *p = (VolInfo_t *)vHdl;
  2295. DWORD err = ERROR_INVALID_PARAMETER, count;
  2296. if (p) {
  2297. // Read write and avail sets. If we have a majority
  2298. // in avail set and wset is zero, we return pending.
  2299. // if wset is non-zero we return success, otherwise
  2300. // return failure
  2301. LockEnter(p->qLock);
  2302. if (p->Arbitrate.State == ARB_STATE_BUSY) {
  2303. count = p->Arbitrate.Count;
  2304. } else {
  2305. ULONG mask = p->AliveSet;
  2306. count = 0;
  2307. for (mask = p->AliveSet; mask ; mask = mask >> 1) {
  2308. if (mask & 0x1) {
  2309. count++;
  2310. }
  2311. }
  2312. }
  2313. if (CRS_QUORUM(count, p->DiskListSz))
  2314. err = ERROR_SUCCESS;
  2315. else
  2316. err = ERROR_BUSY;
  2317. LockExit(p->qLock);
  2318. }
  2319. return err;
  2320. }
  2321. DWORD
  2322. FsArbitrate(PVOID vHdl, HANDLE event, HANDLE *wait_event)
  2323. {
  2324. VolInfo_t *p = (VolInfo_t *)vHdl;
  2325. NTSTATUS err;
  2326. HANDLE hdl;
  2327. if (p) {
  2328. FspArbitrate_t *arb;
  2329. // lock volume
  2330. LockEnter(p->qLock);
  2331. arb = &p->Arbitrate;
  2332. if (p->AliveSet != 0) {
  2333. // we must have already arb. before, just bail out
  2334. LockExit(p->qLock);
  2335. return ERROR_SUCCESS;
  2336. }
  2337. if (arb->State == ARB_STATE_CANCEL) {
  2338. // there is already a pending arb, just return busy
  2339. LockExit(p->qLock);
  2340. return ERROR_CANCELLED;
  2341. }
  2342. if (arb->State == ARB_STATE_BUSY) {
  2343. // report current status
  2344. if (CRS_QUORUM(p->Arbitrate.Count, p->DiskListSz))
  2345. err = ERROR_SUCCESS;
  2346. else
  2347. err = ERROR_PATH_BUSY;
  2348. LockExit(p->qLock);
  2349. return err;
  2350. }
  2351. ASSERT(arb->State == ARB_STATE_IDLE);
  2352. arb->State = ARB_STATE_BUSY;
  2353. arb->Event = event;
  2354. arb->Set = p->AliveSet; // store alive set here
  2355. arb->Count = 0;
  2356. FsArbLog(("FsArb: queueing thread\n"));
  2357. // clear event
  2358. ResetEvent(event);
  2359. // drop lock
  2360. LockExit(p->qLock);
  2361. // we start a thread to do the arbitrate and return pending
  2362. hdl = CreateThread(NULL, 0, &FspArbitrateThread, (LPVOID) p, 0, NULL);
  2363. if (hdl != NULL) {
  2364. if (*wait_event != NULL) {
  2365. CloseHandle(*wait_event);
  2366. }
  2367. *wait_event = hdl;
  2368. err = ERROR_IO_PENDING;
  2369. } else {
  2370. // clear the state, no need for a lock here
  2371. arb->State = ARB_STATE_IDLE;
  2372. FsLogError(("FsArb: failed %d queueing thread\n", GetLastError()));
  2373. err = ERROR_INVALID_PARAMETER;
  2374. }
  2375. } else {
  2376. err = ERROR_INVALID_PARAMETER;
  2377. }
  2378. return err;
  2379. }
  2380. DWORD
  2381. FsCancelArbitration(PVOID vHdl)
  2382. {
  2383. VolInfo_t *p = (VolInfo_t *)vHdl;
  2384. FspArbitrate_t *arb;
  2385. DWORD err = ERROR_INVALID_PARAMETER;
  2386. if (p != NULL) {
  2387. LockEnter(p->qLock);
  2388. arb = &p->Arbitrate;
  2389. if (arb->State == ARB_STATE_BUSY) {
  2390. // check if we already got quorum
  2391. if (CRS_QUORUM(arb->Count, p->DiskListSz)) {
  2392. arb->Event = NULL; // no need to signal it
  2393. err = ERROR_SUCCESS;
  2394. } else {
  2395. FsArbLog(("FsCancelArbitration\n"));
  2396. arb->State = ARB_STATE_CANCEL;
  2397. err = ERROR_CANCELLED;
  2398. }
  2399. } else if (arb->State == ARB_STATE_IDLE) {
  2400. // we might already have quorum
  2401. err = (p->AliveSet) ? ERROR_SUCCESS : ERROR_CANCELLED;
  2402. } else {
  2403. err = ERROR_SUCCESS;
  2404. }
  2405. LockExit(p->qLock);
  2406. }
  2407. return err;
  2408. }
  2409. DWORD
  2410. FsRelease(PVOID vHdl)
  2411. {
  2412. DWORD i;
  2413. VolInfo_t *p = (VolInfo_t *)vHdl;
  2414. NTSTATUS err;
  2415. if (p) {
  2416. ULONG set;
  2417. // lock volume
  2418. LockEnter(p->uLock);
  2419. LockEnter(p->qLock);
  2420. set = p->AliveSet;
  2421. p->AliveSet = 0;
  2422. p->Event = 0;
  2423. LockExit(p->qLock);
  2424. FsArbLog(("FsRelease %S AliveSet %x\n", p->Root, set));
  2425. FspCloseVolume(p, set);
  2426. p->WriteSet = 0;
  2427. p->ReadSet = 0;
  2428. FsArbLog(("FsRelease %S done\n", p->Root));
  2429. // unlock volume
  2430. LockExit(p->uLock);
  2431. err = ERROR_SUCCESS;
  2432. } else {
  2433. err = ERROR_INVALID_PARAMETER;
  2434. }
  2435. return err;
  2436. }
  2437. DWORD
  2438. FsReserve(PVOID vhdl)
  2439. {
  2440. VolInfo_t *p = (VolInfo_t *)vhdl;
  2441. NTSTATUS err;
  2442. // check if there is a new replica online
  2443. if (p) {
  2444. ULONG ReplicaSet;
  2445. LockEnter(p->qLock);
  2446. if (p->Arbitrate.State != ARB_STATE_IDLE) {
  2447. // we are busy, just return success
  2448. LockExit(p->qLock);
  2449. return ERROR_SUCCESS;
  2450. }
  2451. ReplicaSet = p->AliveSet;
  2452. // drop lock now
  2453. LockExit(p->qLock);
  2454. // get update lock, do a try only if we can't do bother and try again latter
  2455. if (!LockTryEnter(p->uLock))
  2456. return ERROR_SUCCESS;
  2457. ReplicaSet = FspFindMissingReplicas(p, ReplicaSet);
  2458. // we found new disks
  2459. if (ReplicaSet > 0) {
  2460. // Add new finds
  2461. FspJoin(p, ReplicaSet);
  2462. }
  2463. LockExit(p->uLock);
  2464. }
  2465. if (p) {
  2466. // check each crs handle to be valid
  2467. IO_STATUS_BLOCK ios[FsMaxNodes];
  2468. DWORD sid;
  2469. FspInitAnswers(ios, NULL, NULL, 0);
  2470. sid = SendAvailRequest(FspCheckFs, p, NULL,
  2471. NULL, 0, NULL, 0, ios);
  2472. if (ios[sid].Status == STATUS_MEDIA_WRITE_PROTECTED &&
  2473. ios[sid].Information > 0)
  2474. err = ERROR_SUCCESS;
  2475. else
  2476. err = RtlNtStatusToDosError(ios[sid].Status);
  2477. } else {
  2478. err = ERROR_INVALID_PARAMETER;
  2479. }
  2480. if (err != ERROR_SUCCESS)
  2481. FsLogError(("FsReserve vol '%x' failed 0x%x\n", p, err));
  2482. return err;
  2483. }
  2484. DWORD
  2485. FsIsOnline(PVOID vHdl)
  2486. {
  2487. VolInfo_t *p = (VolInfo_t *)vHdl;
  2488. DWORD err = ERROR_INVALID_PARAMETER, count;
  2489. if (p) {
  2490. // Read write and avail sets. If we have a majority
  2491. // in avail set and wset is zero, we return pending.
  2492. // if wset is non-zero we return success, otherwise
  2493. // return failure
  2494. LockEnter(p->uLock);
  2495. ASSERT(p->DiskListSz != (DWORD)-1);
  2496. if (p->WriteSet > 0 || p->ReadSet > 0)
  2497. err = ERROR_SUCCESS;
  2498. else {
  2499. LockEnter(p->qLock);
  2500. if (p->Arbitrate.State == ARB_STATE_BUSY)
  2501. err = ERROR_IO_PENDING;
  2502. else {
  2503. ULONG mask = p->AliveSet;
  2504. count = 0;
  2505. for (mask = p->AliveSet; mask ; mask = mask >> 1) {
  2506. if (mask & 0x1) {
  2507. count++;
  2508. }
  2509. }
  2510. if (CRS_QUORUM(count, p->DiskListSz) || count > 0)
  2511. err = ERROR_IO_PENDING;
  2512. else
  2513. err = ERROR_BUSY;
  2514. }
  2515. LockExit(p->qLock);
  2516. }
  2517. LockExit(p->uLock);
  2518. }
  2519. return err;
  2520. }
  2521. DWORD
  2522. FsUpdateReplicaSet(PVOID vhdl, LPWSTR new_path[], DWORD new_len)
  2523. {
  2524. VolInfo_t *p = (VolInfo_t *)vhdl;
  2525. NTSTATUS err;
  2526. DWORD i, j;
  2527. ULONG evict_mask, add_mask;
  2528. if (p == NULL) {
  2529. return ERROR_INVALID_PARAMETER;
  2530. }
  2531. if (new_len >= FsMaxNodes) {
  2532. return ERROR_TOO_MANY_NAMES;
  2533. }
  2534. LockEnter(p->uLock);
  2535. // Find which current replicas are in the new set, and keep them
  2536. // We skip the IPC share, since it's local
  2537. evict_mask = 0;
  2538. for (j=1; j < FsMaxNodes; j++) {
  2539. BOOLEAN found;
  2540. if (p->DiskList[j] == NULL)
  2541. continue;
  2542. found = FALSE;
  2543. for (i=1; i < FsMaxNodes; i++) {
  2544. if (new_path[i] != NULL && wcscmp(new_path[i], p->DiskList[j]) == 0) {
  2545. // keep this replica
  2546. found = TRUE;
  2547. break;
  2548. }
  2549. }
  2550. if (found == FALSE) {
  2551. // This replica is evicted from the new set, add to evict set mask
  2552. evict_mask |= (1 << j);
  2553. FsArbLog(("FsUpdateReplicaSet evict replica # %d '%S' set 0x%x\n",
  2554. j, p->DiskList[j], evict_mask));
  2555. }
  2556. }
  2557. // At this point we have all the replicas in the current and new sets. We now need
  2558. // to find replicas that are in the new set but missing from current set.
  2559. add_mask = 0;
  2560. for (i=1; i < FsMaxNodes; i++) {
  2561. BOOLEAN found;
  2562. if (new_path[i] == NULL)
  2563. continue;
  2564. found = FALSE;
  2565. for (j=1; j < FsMaxNodes; j++) {
  2566. if (p->DiskList[j] != NULL && wcscmp(new_path[i], p->DiskList[j]) == 0) {
  2567. // keep this replica
  2568. found = TRUE;
  2569. break;
  2570. }
  2571. }
  2572. if (found == FALSE) {
  2573. add_mask |= (1 << i);
  2574. FsArbLog(("FsUpdateReplicaSet adding replica # %d '%S' set 0x%x\n",
  2575. i, new_path[i], add_mask));
  2576. }
  2577. }
  2578. // we now update our disklist with new disklist
  2579. for (i = 1; i < FsMaxNodes; i++) {
  2580. if ((evict_mask & 1 << i) || (add_mask & (1 << i)))
  2581. FsArbLog(("FsUpdateReplicat %d: %S -> %S\n",
  2582. i, p->DiskList[i], new_path[i]));
  2583. p->DiskList[i] = new_path[i];
  2584. }
  2585. p->DiskListSz = new_len;
  2586. // If we are alive, apply changes
  2587. if (p->WriteSet != 0 || p->ReadSet != 0) {
  2588. // At this point we evict old replicas
  2589. if (evict_mask != 0)
  2590. FspEvict(p, evict_mask, TRUE);
  2591. // check if there is a new replica online
  2592. if (add_mask > 0) {
  2593. ULONG ReplicaSet = 0;
  2594. // try to get the lock
  2595. if (LockTryEnter(p->qLock)) {
  2596. ReplicaSet = p->AliveSet;
  2597. LockExit(p->qLock);
  2598. }
  2599. ReplicaSet = FspFindMissingReplicas(p, ReplicaSet);
  2600. // we found new disks
  2601. if (ReplicaSet > 0) {
  2602. FspJoin(p, ReplicaSet);
  2603. }
  2604. }
  2605. }
  2606. LockExit(p->uLock);
  2607. return ERROR_SUCCESS;
  2608. }