Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5088 lines
145 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. fs.c
  5. Abstract:
  6. Implements filesystem operations
  7. Author:
  8. Ahmed Mohamed (ahmedm) 1-Feb-2000
  9. Revision History:
  10. --*/
  11. #include <nt.h>
  12. #include <ntdef.h>
  13. #include <ntrtl.h>
  14. #include <nturtl.h>
  15. #include <windows.h>
  16. #include <stdio.h>
  17. #include <string.h>
  18. #include <assert.h>
  19. #include "fs.h"
  20. #include "crs.h"
  21. #include "fsp.h"
  22. #include "fsutil.h"
  23. #include <strsafe.h>
  24. #include "clstrcmp.h"
  25. #include "Clusudef.h"
  26. #include <Align.h>
  27. #include <Ntddnfs.h>
  28. #include <Clusapi.h>
  29. // For testing only
  30. // VOID
  31. // ClRtlLogWmi(PCHAR FormatString);
  32. DWORD
  33. CrspNextLogRecord(CrsInfo_t *info, CrsRecord_t *seq,
  34. CrsRecord_t *lrec, BOOLEAN this_flag);
  35. VOID
  36. MajorityNodeSetCallLostquorumCallback(PVOID arg);
  37. ULONG
  38. FspFindMissingReplicas(VolInfo_t *p, ULONG set);
  39. void
  40. FspCloseVolume(VolInfo_t *vol, ULONG AliveSet);
  41. // CRS returns Win32 errors, so need to add them too here.
  42. #define IsNetworkFailure(x) \
  43. (((x) == STATUS_CONNECTION_DISCONNECTED)||\
  44. ((x) == STATUS_BAD_NETWORK_PATH)||\
  45. ((x) == STATUS_IO_TIMEOUT)||\
  46. ((x) == STATUS_VOLUME_DISMOUNTED)||\
  47. ((x) == STATUS_REMOTE_NOT_LISTENING)||\
  48. ((x) == ERROR_BAD_NETPATH)||\
  49. ((x) == ERROR_UNEXP_NET_ERR)||\
  50. ((x) == ERROR_NETNAME_DELETED)||\
  51. ((x) == ERROR_SEM_TIMEOUT)||\
  52. ((x) == ERROR_NOT_READY)||\
  53. ((x) == ERROR_REM_NOT_LIST)||\
  54. (RtlNtStatusToDosError(x) == ERROR_BAD_NETPATH)||\
  55. (RtlNtStatusToDosError(x) == ERROR_UNEXP_NET_ERR)||\
  56. (RtlNtStatusToDosError(x) == ERROR_NETNAME_DELETED))
  57. char Mystaticchangebuff[sizeof(FILE_NOTIFY_INFORMATION) + 16];
  58. IO_STATUS_BLOCK MystaticIoStatusBlock;
  59. VOID CALLBACK
  60. FsNotifyCallback(
  61. IN PVOID par,
  62. IN BOOLEAN isFired
  63. )
  64. {
  65. WaitRegArg_t *wReg=(WaitRegArg_t *)par;
  66. VolInfo_t *vol=(VolInfo_t *)wReg->vol;
  67. HANDLE regHdl;
  68. NTSTATUS status;
  69. if (wReg == NULL) {
  70. FsLog(("FsNotifyCallback(): Exiting...\n"));
  71. return;
  72. }
  73. FsLog(("FsNotifyCallback: Enqueing Change notification for Fd:0x%x\n", wReg->notifyFd));
  74. status = NtNotifyChangeDirectoryFile(wReg->notifyFd,
  75. vol->NotifyChangeEvent[wReg->id],
  76. NULL,
  77. NULL,
  78. &MystaticIoStatusBlock,
  79. &Mystaticchangebuff,
  80. sizeof(Mystaticchangebuff),
  81. FILE_NOTIFY_CHANGE_EA,
  82. (BOOLEAN)FALSE
  83. );
  84. if (!NT_SUCCESS(status)) {
  85. FsLog(("FsNotifyCallback: Failed to enque change notify, status 0x%x\n", status));
  86. FsLog(("FsNotifyCallback: Deregistering wait notification, nid:%d\n", wReg->id));
  87. LockEnter(vol->ArbLock);
  88. regHdl = vol->WaitRegHdl[wReg->id];
  89. vol->WaitRegHdl[wReg->id] = INVALID_HANDLE_VALUE;
  90. LockExit(vol->ArbLock);
  91. if ((regHdl != INVALID_HANDLE_VALUE)&&(!UnregisterWaitEx(regHdl, NULL))) {
  92. FsLog(("FsNotifyCallback: UnregisterWaitEx() failed, status %d\n", GetLastError()));
  93. }
  94. }
  95. }
  96. ////////////////////////////////////////////////////////////////////////////
  97. UINT32
  98. get_attributes(DWORD a)
  99. {
  100. UINT32 attr = 0;
  101. if (a & FILE_ATTRIBUTE_READONLY) attr |= ATTR_READONLY;
  102. if (a & FILE_ATTRIBUTE_HIDDEN) attr |= ATTR_HIDDEN;
  103. if (a & FILE_ATTRIBUTE_SYSTEM) attr |= ATTR_SYSTEM;
  104. if (a & FILE_ATTRIBUTE_ARCHIVE) attr |= ATTR_ARCHIVE;
  105. if (a & FILE_ATTRIBUTE_DIRECTORY) attr |= ATTR_DIRECTORY;
  106. if (a & FILE_ATTRIBUTE_COMPRESSED) attr |= ATTR_COMPRESSED;
  107. if (a & FILE_ATTRIBUTE_OFFLINE) attr |= ATTR_OFFLINE;
  108. return attr;
  109. }
  110. DWORD
  111. unget_attributes(UINT32 attr)
  112. {
  113. DWORD a = 0;
  114. if (attr & ATTR_READONLY) a |= FILE_ATTRIBUTE_READONLY;
  115. if (attr & ATTR_HIDDEN) a |= FILE_ATTRIBUTE_HIDDEN;
  116. if (attr & ATTR_SYSTEM) a |= FILE_ATTRIBUTE_SYSTEM;
  117. if (attr & ATTR_ARCHIVE) a |= FILE_ATTRIBUTE_ARCHIVE;
  118. if (attr & ATTR_DIRECTORY) a |= FILE_ATTRIBUTE_DIRECTORY;
  119. if (attr & ATTR_COMPRESSED) a |= FILE_ATTRIBUTE_COMPRESSED;
  120. if (attr & ATTR_OFFLINE) a |= FILE_ATTRIBUTE_OFFLINE;
  121. return a;
  122. }
  123. DWORD
  124. unget_disp(UINT32 flags)
  125. {
  126. switch (flags & FS_DISP_MASK) {
  127. case DISP_DIRECTORY:
  128. case DISP_CREATE_NEW: return FILE_CREATE;
  129. case DISP_CREATE_ALWAYS: return FILE_OPEN_IF;
  130. case DISP_OPEN_EXISTING: return FILE_OPEN;
  131. case DISP_OPEN_ALWAYS: return FILE_OPEN_IF;
  132. case DISP_TRUNCATE_EXISTING: return FILE_OVERWRITE;
  133. default: return 0;
  134. }
  135. }
  136. DWORD
  137. unget_access(UINT32 flags)
  138. {
  139. DWORD win32_access = (flags & FS_DISP_MASK) == DISP_DIRECTORY ?
  140. FILE_GENERIC_READ|FILE_GENERIC_WRITE : FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES;
  141. if (flags & ACCESS_READ) win32_access |= FILE_GENERIC_READ;
  142. if (flags & ACCESS_WRITE) win32_access |= FILE_GENERIC_WRITE;
  143. win32_access |= FILE_READ_EA | FILE_WRITE_EA;
  144. return win32_access;
  145. }
  146. DWORD
  147. unget_share(UINT32 flags)
  148. {
  149. // we always open read shared because this simplifies recovery.
  150. DWORD win32_share = FILE_SHARE_READ;
  151. if (flags & SHARE_READ) win32_share |= FILE_SHARE_READ;
  152. if (flags & SHARE_WRITE) win32_share |= FILE_SHARE_WRITE;
  153. return win32_share;
  154. }
  155. DWORD
  156. unget_flags(UINT32 flags)
  157. {
  158. DWORD x;
  159. x = 0;
  160. if ((flags & FS_DISP_MASK) == DISP_DIRECTORY) {
  161. x = FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT;
  162. } else {
  163. // I don't think I can tell without doing a query first, so don't!
  164. // x = FILE_NON_DIRECTORY_FILE;
  165. }
  166. if ((flags & FS_CACHE_MASK) == CACHE_WRITE_THROUGH) {
  167. x |= FILE_WRITE_THROUGH;
  168. }
  169. if ((flags & FS_CACHE_MASK) == CACHE_NO_BUFFERING) {
  170. x |= FILE_NO_INTERMEDIATE_BUFFERING;
  171. }
  172. return x;
  173. }
  174. void
  175. DecodeCreateParam(UINT32 uflags, UINT32 *flags, UINT32 *disp, UINT32 *share, UINT32 *access)
  176. {
  177. *flags = unget_flags(uflags);
  178. *disp = unget_disp(uflags);
  179. *share = unget_share(uflags);
  180. *access = unget_access(uflags);
  181. }
  182. /********************************************************************/
  183. NTSTATUS
  184. FspAllocatePrivateHandle(UserInfo_t *p, fhandle_t *fid)
  185. {
  186. int i;
  187. NTSTATUS err = STATUS_NO_MORE_FILES;
  188. int j;
  189. LockEnter(p->Lock);
  190. // Don't use entry 0, functions might interpret this as error.
  191. for (i = 1; i < FsTableSize; i++) {
  192. if (p->Table[i].Flags == 0) {
  193. p->Table[i].Flags = ATTR_SYMLINK; // place marker
  194. err = STATUS_SUCCESS;
  195. // Reset all the handle values
  196. for(j=0;j<FsMaxNodes;j++) {
  197. p->Table[i].Fd[j] = INVALID_HANDLE_VALUE;
  198. }
  199. p->Table[i].FileName = NULL;
  200. p->Table[i].hState = HandleStateAssigned;
  201. break;
  202. }
  203. }
  204. LockExit(p->Lock);
  205. *fid = (fhandle_t) i;
  206. return err;
  207. }
  208. void
  209. FspFreeHandle(UserInfo_t *p, fhandle_t fnum)
  210. {
  211. int i;
  212. FsLog(("FreeHandle %d\n", fnum));
  213. ASSERT(fnum != INVALID_FHANDLE_T);
  214. LockEnter(p->Lock);
  215. p->Table[fnum].Flags = 0;
  216. // Close any open handles in the Fd.
  217. for(i=0;i<FsMaxNodes;i++) {
  218. if (p->Table[fnum].Fd[i] != INVALID_HANDLE_VALUE) {
  219. xFsClose(p->Table[fnum].Fd[i]);
  220. p->Table[fnum].Fd[i] = INVALID_HANDLE_VALUE;
  221. }
  222. }
  223. // Deallocate the file name.
  224. if (p->Table[fnum].FileName != NULL) {
  225. LocalFree(p->Table[fnum].FileName);
  226. p->Table[fnum].FileName = NULL;
  227. }
  228. p->Table[fnum].hState = HandleStateInit;
  229. LockExit(p->Lock);
  230. }
  231. /*********************************************************** */
  232. void
  233. FspEvict(VolInfo_t *p, ULONG mask, BOOLEAN full_evict)
  234. /*++
  235. This function should be called with writer's lock held.
  236. FspJoin() & FspEvict() are the only functions which can modify VolInfo->State.
  237. */
  238. {
  239. DWORD err;
  240. ULONG set;
  241. // Only evict those shares that are still in the alive set.
  242. mask = (mask & p->AliveSet);
  243. FsArbLog(("FspEvict Entry: WSet %x Rset %x ASet %x EvictMask %x\n",
  244. p->WriteSet, p->ReadSet, p->AliveSet, mask));
  245. while (mask != 0) {
  246. if (full_evict == FALSE) {
  247. // we just need to close the volume and return since
  248. // these replicas are not yet added to the aliveset and crs doesn't know
  249. // about them
  250. FspCloseVolume(p, mask);
  251. break;
  252. }
  253. // clear nid
  254. p->AliveSet &= ~mask;
  255. set = p->AliveSet;
  256. // close nid handles <crs, vol, open files>
  257. FspCloseVolume(p, mask);
  258. mask = 0;
  259. err = CrsStart(p->CrsHdl, set, p->DiskListSz,
  260. &p->WriteSet, &p->ReadSet, &mask);
  261. if (mask == 0) {
  262. // Now update the MNS state.
  263. if (p->WriteSet) {
  264. p->State = VolumeStateOnlineReadWrite;
  265. }
  266. else if (p->ReadSet) {
  267. p->State = VolumeStateOnlineReadonly;
  268. }
  269. else {
  270. p->State = VolumeStateInit;
  271. }
  272. }
  273. }
  274. FsArbLog(("FspEvict Exit: vol %S WSet %x RSet %x ASet %x\n",
  275. p->Root, p->WriteSet, p->ReadSet, p->AliveSet));
  276. }
  277. void
  278. FspJoin(VolInfo_t *p, ULONG mask)
  279. /*++
  280. This function should be called with writer's lock held.
  281. FspJoin() & FspEvict() are the only functions which can modify VolInfo->State.
  282. */
  283. {
  284. DWORD err;
  285. ULONG set=0;
  286. DWORD i;
  287. // Join only those shares that are not already in the AliveSet.
  288. mask = (mask & (~p->AliveSet));
  289. FsArbLog(("FspJoin Entry: WSet %x Rset %x ASet %x JoinMask %x\n",
  290. p->WriteSet, p->ReadSet, p->AliveSet, mask));
  291. if (mask != 0) {
  292. p->AliveSet |= mask;
  293. set = p->AliveSet;
  294. // Mark the share state to be online, if they fail in CrsStart, they would
  295. // set to offline in FspEvict()
  296. //
  297. for(i=1;i<FsMaxNodes;i++) {
  298. if (set & (1<<i)) {
  299. p->ShareState[i] = SHARE_STATE_ONLINE;
  300. }
  301. }
  302. mask = 0;
  303. err = CrsStart(p->CrsHdl, set, p->DiskListSz,
  304. &p->WriteSet, &p->ReadSet, &mask);
  305. if (mask != 0) {
  306. // we need to evict dead members
  307. FspEvict(p, mask, TRUE);
  308. }
  309. // Now update the MNS state.
  310. if (p->WriteSet) {
  311. p->State = VolumeStateOnlineReadWrite;
  312. }
  313. else if (p->ReadSet) {
  314. p->State = VolumeStateOnlineReadonly;
  315. }
  316. else {
  317. p->State = VolumeStateInit;
  318. }
  319. }
  320. FsArbLog(("FspJoin Exit: vol %S WSet %x Rset %x ASet %x\n",
  321. p->Root, p->WriteSet, p->ReadSet, p->AliveSet));
  322. }
  323. void
  324. FspInitAnswers(IO_STATUS_BLOCK *ios, PVOID *rbuf, char *r, int sz)
  325. {
  326. int i;
  327. for (i = 0; i < FsMaxNodes; i++) {
  328. ios[i].Status = STATUS_HOST_UNREACHABLE;
  329. if (rbuf) {
  330. rbuf[i] = r;
  331. r += sz;
  332. }
  333. }
  334. }
  335. //////////////////////////////////////////////////////////////////////////////////////
  336. NTSTATUS
  337. FspCreate(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  338. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  339. {
  340. // each file has a name stream that contains its crs log. We first
  341. // must open the parent crs log, issue a prepare on it. Create the new file
  342. // and then issuing a commit or abort on parent crs log. We also, have
  343. // to issue joins for each new crs handle that we get for the new file or
  344. // opened file. Note, this open may cause the file to enter recovery
  345. fs_create_msg_t *msg = (fs_create_msg_t *)args;
  346. NTSTATUS err=STATUS_SUCCESS, status;
  347. UINT32 disp, share, access, flags;
  348. fs_log_rec_t lrec;
  349. PVOID seq;
  350. fs_ea_t x;
  351. HANDLE fd;
  352. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  353. fs_create_reply_t *rmsg = (fs_create_reply_t *)rbuf;
  354. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  355. fs_id_t *fid;
  356. ULONG retVal;
  357. // This has to work with replays, if fd is not INVALID_HANDLE_VALUE
  358. // return success immediately. This is because replaying a successful open/create
  359. // might change the disposition.
  360. //
  361. if (uinfo && (msg->fnum != INVALID_FHANDLE_T) &&
  362. (uinfo->Table[msg->fnum].Fd[nid] != INVALID_HANDLE_VALUE)) {
  363. FsLog(("Create '%S' already open nid %u fid %u handle 0x%x\n",
  364. msg->name, nid, msg->fnum,
  365. uinfo->Table[msg->fnum].Fd[nid]));
  366. return err;
  367. }
  368. DecodeCreateParam(msg->flags, &flags, &disp, &share, &access);
  369. FsInitEa(&x);
  370. memset(&lrec.fs_id, 0, sizeof(lrec.fs_id));
  371. lrec.command = FS_CREATE;
  372. lrec.flags = msg->flags;
  373. lrec.attrib = msg->attr;
  374. seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal);
  375. if (seq == 0) {
  376. FsLog(("create: Unable to prepare log record!, open readonly\n"));
  377. return retVal;
  378. }
  379. // set fid
  380. {
  381. fs_log_rec_t *p = (PVOID) seq;
  382. memcpy(p->fs_id, p->id, sizeof(fs_id_t));
  383. FsInitEaFid(&x, fid);
  384. memcpy(fid, p->id, sizeof(fs_id_t));
  385. }
  386. err = xFsCreate(&fd, vfd, msg->name, msg->name_len,
  387. flags, msg->attr, share, &disp, access,
  388. (PVOID) &x, sizeof(x));
  389. xFsLog(("create: %S err %x access %x disp %x\n", msg->name,
  390. err, access, disp));
  391. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS &&
  392. (disp == FILE_CREATED ||
  393. disp == FILE_OVERWRITTEN));
  394. if (err == STATUS_SUCCESS) {
  395. // we need to get the file id, no need to do this, for debug only
  396. err = xFsQueryObjectId(fd, (PVOID) fid);
  397. if (err != STATUS_SUCCESS) {
  398. FsLog(("Failed to get fileid %x\n", err));
  399. err = STATUS_SUCCESS;
  400. }
  401. // Copy the crs record.
  402. *(fs_log_rec_t *)rec = lrec;
  403. }
  404. #ifdef FS_ASYNC
  405. BindNotificationPort(comport, fd, (PVOID) fdnum);
  406. #endif
  407. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T) {
  408. FS_SET_USER_HANDLE(uinfo, nid, msg->fnum, fd);
  409. } else {
  410. xFsClose(fd);
  411. }
  412. ASSERT(rmsg != NULL);
  413. memcpy(&rmsg->fid, fid, sizeof(fs_id_t));
  414. rmsg->action = (USHORT)disp;
  415. rmsg->access = (USHORT)access;
  416. *rlen = sizeof(*rmsg);
  417. FsLog(("Create '%S' nid %d fid %d handle %x oid %I64x:%I64x\n",
  418. msg->name,
  419. nid, msg->fnum, fd,
  420. rmsg->fid[0], rmsg->fid[1]));
  421. return err;
  422. }
  423. NTSTATUS
  424. FspOpen(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  425. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  426. {
  427. // same as create except disp is allows open only and
  428. // no crs logging
  429. fs_create_msg_t *msg = (fs_create_msg_t *)args;
  430. NTSTATUS err=STATUS_SUCCESS, status;
  431. UINT32 disp, share, access, flags;
  432. HANDLE fd;
  433. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  434. fs_create_reply_t *rmsg = (fs_create_reply_t *)rbuf;
  435. ASSERT(rmsg != NULL);
  436. // This has to work with replays, if fd is not INVALID_HANDLE_VALUE
  437. // return success immediately. This is because replaying a successful open/create
  438. // might change the disposition.
  439. //
  440. if (uinfo && (msg->fnum != INVALID_FHANDLE_T) &&
  441. (uinfo->Table[msg->fnum].Fd[nid] != INVALID_HANDLE_VALUE)) {
  442. FsLog(("Open '%S' already open nid %u fid %u handle 0x%x\n",
  443. msg->name, nid, msg->fnum,
  444. uinfo->Table[msg->fnum].Fd[nid]));
  445. return err;
  446. }
  447. DecodeCreateParam(msg->flags, &flags, &disp, &share, &access);
  448. disp = FILE_OPEN;
  449. err = xFsCreate(&fd, vfd, msg->name, msg->name_len,
  450. flags, msg->attr, share, &disp, access,
  451. NULL, 0);
  452. xFsLog(("open: %S err %x access %x disp %x\n", msg->name,
  453. err, access, disp));
  454. if (err == STATUS_SUCCESS) {
  455. ASSERT(disp != FILE_CREATED && disp != FILE_OVERWRITTEN);
  456. // we need to get the file id, no need to do this, for debug only
  457. err = xFsQueryObjectId(fd, (PVOID) &rmsg->fid);
  458. if (err != STATUS_SUCCESS) {
  459. FsLog(("Open '%S' failed to get fileid %x\n",
  460. msg->name, err));
  461. err = STATUS_SUCCESS;
  462. }
  463. }
  464. #ifdef FS_ASYNC
  465. BindNotificationPort(comport, fd, (PVOID) fdnum);
  466. #endif
  467. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T) {
  468. FS_SET_USER_HANDLE(uinfo, nid, msg->fnum, fd);
  469. } else {
  470. xFsClose(fd);
  471. }
  472. rmsg->action = (USHORT)disp;
  473. rmsg->access = (USHORT)access;
  474. *rlen = sizeof(*rmsg);
  475. FsLog(("Open '%S' nid %d fid %d handle %x oid %I64x:%I64x\n",
  476. msg->name,
  477. nid, msg->fnum, fd,
  478. rmsg->fid[0], rmsg->fid[1]));
  479. return err;
  480. }
  481. NTSTATUS
  482. FspSetAttr(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  483. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  484. {
  485. fs_setattr_msg_t *msg = (fs_setattr_msg_t *)args;
  486. NTSTATUS err;
  487. fs_log_rec_t lrec;
  488. PVOID seq;
  489. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  490. HANDLE fd = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  491. ULONG retVal;
  492. lrec.command = FS_SETATTR;
  493. memcpy((PVOID) lrec.fs_id, (PVOID) msg->fs_id, sizeof(fs_id_t));
  494. lrec.attrib = msg->attr.FileAttributes;
  495. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal)) == 0) {
  496. return retVal;
  497. }
  498. // can be async ?
  499. err = xFsSetAttr(fd, &msg->attr);
  500. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  501. if (err == STATUS_SUCCESS) {
  502. // copy the crs record.
  503. *(fs_log_rec_t *)rec = lrec;
  504. }
  505. return err;
  506. }
  507. NTSTATUS
  508. FspSetAttr2(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  509. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  510. {
  511. fs_setattr_msg_t *msg = (fs_setattr_msg_t *)args;
  512. HANDLE fd = INVALID_HANDLE_VALUE;
  513. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  514. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  515. NTSTATUS err;
  516. fs_log_rec_t lrec;
  517. PVOID seq;
  518. ULONG retVal;
  519. assert(len == sizeof(*msg));
  520. // must be sync in order to close file
  521. err = xFsOpenWA(&fd, vfd, msg->name, msg->name_len);
  522. if (err == STATUS_SUCCESS) {
  523. err = xFsQueryObjectId(fd, (PVOID) &lrec.fs_id);
  524. }
  525. if (err == STATUS_SUCCESS) {
  526. lrec.command = FS_SETATTR;
  527. lrec.attrib = msg->attr.FileAttributes;
  528. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal)) != 0) {
  529. err = xFsSetAttr(fd, &msg->attr);
  530. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  531. if (err == STATUS_SUCCESS) {
  532. // copy the crs record.
  533. *(fs_log_rec_t *)rec = lrec;
  534. }
  535. } else {
  536. return retVal;
  537. }
  538. }
  539. if (fd != INVALID_HANDLE_VALUE)
  540. xFsClose(fd);
  541. xFsLog(("setattr2 nid %d '%S' err %x\n", nid, msg->name, err));
  542. return err;
  543. }
  544. NTSTATUS
  545. FspLookup(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  546. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  547. {
  548. fs_lookup_msg_t *msg = (fs_lookup_msg_t *) args;
  549. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  550. FILE_NETWORK_OPEN_INFORMATION *attr = (FILE_NETWORK_OPEN_INFORMATION *)rbuf;
  551. ASSERT(*rlen == sizeof(*attr));
  552. return xFsQueryAttrName(vfd, msg->name, msg->name_len, attr);
  553. }
  554. NTSTATUS
  555. FspGetAttr(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  556. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  557. {
  558. fhandle_t handle = *(fhandle_t *) args;
  559. HANDLE fd = FS_GET_USER_HANDLE(uinfo, nid, handle);
  560. FILE_NETWORK_OPEN_INFORMATION *attr = (FILE_NETWORK_OPEN_INFORMATION *)rbuf;
  561. ASSERT(*rlen == sizeof(*attr));
  562. return xFsQueryAttr(fd, attr);
  563. }
  564. NTSTATUS
  565. FspClose(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  566. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen)
  567. {
  568. fhandle_t handle = *(fhandle_t *) args;
  569. HANDLE fd;
  570. NTSTATUS err=STATUS_SUCCESS;
  571. if (uinfo != NULL && handle != INVALID_FHANDLE_T)
  572. fd = FS_GET_USER_HANDLE(uinfo, nid, handle);
  573. else
  574. fd = FS_GET_VOL_HANDLE(vinfo, nid);
  575. FsLog(("Closing nid %d fid %d handle %x\n", nid, handle, fd));
  576. if (fd != INVALID_HANDLE_VALUE) {
  577. err = xFsClose(fd);
  578. }
  579. // Map failures to success. Shares shouldn't be evicted because of this,
  580. if (err != STATUS_SUCCESS) {
  581. FsLogError(("Close nid %d fid %d handle 0x%x returns 0x%x\n", nid, handle, fd, err));
  582. err = STATUS_SUCCESS;
  583. }
  584. if (uinfo != NULL && handle != INVALID_FHANDLE_T) {
  585. FS_SET_USER_HANDLE(uinfo, nid, handle, INVALID_HANDLE_VALUE);
  586. } else {
  587. FS_SET_VOL_HANDLE(vinfo, nid, INVALID_HANDLE_VALUE);
  588. }
  589. return err;
  590. }
  591. NTSTATUS
  592. FspReadDir(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  593. PVOID args, ULONG len, PVOID rbuf,
  594. ULONG_PTR *entries_found)
  595. {
  596. fs_io_msg_t *msg = (fs_io_msg_t *)args;
  597. int i;
  598. NTSTATUS e = STATUS_SUCCESS;
  599. int size = (int) msg->size;
  600. int cookie = (int) msg->cookie;
  601. HANDLE dir;
  602. dirinfo_t *buffer = (dirinfo_t *)msg->buf;
  603. xFsLog(("DirLoad: size %d\n", size));
  604. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T)
  605. dir = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  606. else
  607. dir = FS_GET_VOL_HANDLE(vinfo, nid);
  608. *entries_found = 0;
  609. for(i = 0; size >= sizeof(dirinfo_t) ; i+=PAGESIZE) {
  610. // this must come from the source if we are to do async readdir
  611. char buf[PAGESIZE];
  612. int sz;
  613. sz = min(PAGESIZE, size);
  614. e = xFsReadDir(dir, buf, &sz, (cookie == 0) ? TRUE : FALSE);
  615. if (e == STATUS_SUCCESS) {
  616. PFILE_DIRECTORY_INFORMATION p;
  617. p = (PFILE_DIRECTORY_INFORMATION) buf;
  618. while (size >= sizeof(dirinfo_t)) {
  619. char *foo;
  620. int k;
  621. k = p->FileNameLength/sizeof(WCHAR);
  622. p->FileName[k] = L'\0';
  623. // name is a WCHAR array of size MAX_PATH.
  624. StringCchCopyW(buffer->name, MAX_PATH, p->FileName);
  625. buffer->attribs.file_size = p->EndOfFile.QuadPart;
  626. buffer->attribs.alloc_size = p->AllocationSize.QuadPart;
  627. buffer->attribs.create_time = p->CreationTime.QuadPart;
  628. buffer->attribs.access_time = p->LastAccessTime.QuadPart;
  629. buffer->attribs.mod_time = p->LastWriteTime.QuadPart;
  630. buffer->attribs.attributes = p->FileAttributes;
  631. buffer->cookie = ++cookie;
  632. buffer++;
  633. size -= sizeof(dirinfo_t);
  634. (*entries_found)++;
  635. if (p->NextEntryOffset == 0)
  636. break;
  637. foo = (char *) p;
  638. foo += p->NextEntryOffset;
  639. p = (PFILE_DIRECTORY_INFORMATION) foo;
  640. }
  641. }
  642. else {
  643. break;
  644. }
  645. }
  646. return e;
  647. }
  648. NTSTATUS
  649. FspMkDir(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  650. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  651. {
  652. fs_create_msg_t *msg = (fs_create_msg_t *)args;
  653. NTSTATUS err;
  654. HANDLE fd;
  655. fs_log_rec_t lrec;
  656. PVOID seq;
  657. fs_ea_t x;
  658. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  659. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  660. fs_id_t *fid;
  661. UINT32 disp, share, access, flags;
  662. ULONG retVal;
  663. FsInitEa(&x);
  664. memset(&lrec.fs_id, 0, sizeof(lrec.fs_id));
  665. lrec.command = FS_MKDIR;
  666. lrec.attrib = msg->attr;
  667. lrec.flags = msg->flags;
  668. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal)) == 0) {
  669. return retVal;
  670. }
  671. // set fid
  672. {
  673. fs_log_rec_t *p = (PVOID) seq;
  674. memcpy(p->fs_id, p->id, sizeof(fs_id_t));
  675. FsInitEaFid(&x, fid);
  676. // set fs_id of the file
  677. memcpy(fid, p->id, sizeof(fs_id_t));
  678. }
  679. // decode attributes
  680. DecodeCreateParam(msg->flags, &flags, &disp, &share, &access);
  681. // always sync call
  682. err = xFsCreate(&fd, vfd, msg->name, msg->name_len, flags,
  683. msg->attr, share, &disp, access,
  684. (PVOID) &x, sizeof(x));
  685. FsLog(("Mkdir '%S' %x: cflags %x flags:%x attr:%x share:%x disp:%x access:%x\n",
  686. msg->name, err, msg->flags,
  687. flags, msg->attr, share, disp, access));
  688. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS &&
  689. (disp == FILE_CREATED ||
  690. disp == FILE_OVERWRITTEN));
  691. if (err == STATUS_SUCCESS) {
  692. // return fid
  693. if (rbuf != NULL) {
  694. ASSERT(*rlen == sizeof(fs_id_t));
  695. memcpy(rbuf, fid, sizeof(fs_id_t));
  696. }
  697. xFsClose(fd);
  698. // copy the crs record.
  699. *(fs_log_rec_t *)rec = lrec;
  700. }
  701. return err;
  702. }
  703. NTSTATUS
  704. FspRemove(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  705. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  706. {
  707. fs_remove_msg_t *msg = (fs_remove_msg_t *)args;
  708. NTSTATUS err;
  709. fs_log_rec_t lrec;
  710. PVOID seq;
  711. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  712. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  713. HANDLE fd;
  714. ULONG retVal;
  715. *rlen = 0;
  716. // next three statements to obtain name -> fs_id
  717. err = xFsOpenRA(&fd, vfd, msg->name, msg->name_len);
  718. if (err != STATUS_SUCCESS) {
  719. return err;
  720. }
  721. // get object id
  722. err = xFsQueryObjectId(fd, (PVOID) &lrec.fs_id);
  723. xFsClose(fd);
  724. lrec.command = FS_REMOVE;
  725. if (err != STATUS_SUCCESS) {
  726. return err;
  727. }
  728. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal)) == 0) {
  729. return retVal;
  730. }
  731. err = xFsDelete(vfd, msg->name, msg->name_len);
  732. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  733. if (err == STATUS_SUCCESS) {
  734. // copy the crs record.
  735. *(fs_log_rec_t *)rec = lrec;
  736. }
  737. xFsLog(("Rm nid %d '%S' %x\n", nid, msg->name, err));
  738. return err;
  739. }
  740. NTSTATUS
  741. FspRename(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  742. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  743. {
  744. fs_rename_msg_t *msg = (fs_rename_msg_t *)args;
  745. NTSTATUS err;
  746. fs_log_rec_t lrec;
  747. PVOID seq;
  748. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  749. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  750. HANDLE fd;
  751. ULONG retVal;
  752. lrec.command = FS_RENAME;
  753. err = xFsOpen(&fd, vfd, msg->sname, msg->sname_len,
  754. STANDARD_RIGHTS_REQUIRED| SYNCHRONIZE |
  755. FILE_READ_EA |
  756. FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
  757. FILE_SHARE_READ, // | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
  758. 0);
  759. if (err != STATUS_SUCCESS) {
  760. return err;
  761. }
  762. // get file id
  763. err = xFsQueryObjectId(fd, (PVOID) &lrec.fs_id);
  764. if (err == STATUS_SUCCESS) {
  765. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal)) != 0) {
  766. err = xFsRename(fd, vfd, msg->dname, msg->dname_len);
  767. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  768. if (err == STATUS_SUCCESS) {
  769. // copy the crs record.
  770. *(fs_log_rec_t *)rec = lrec;
  771. }
  772. } else {
  773. err = retVal;
  774. }
  775. } else {
  776. xFsLog(("Failed to obtain fsid %x\n", err));
  777. }
  778. xFsClose(fd);
  779. xFsLog(("Mv nid %d %S -> %S err %x\n", nid, msg->sname, msg->dname,
  780. err));
  781. return err;
  782. }
  783. NTSTATUS
  784. FspWrite(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  785. PVOID args, ULONG len, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  786. {
  787. NTSTATUS err;
  788. IO_STATUS_BLOCK ios;
  789. LARGE_INTEGER off;
  790. ULONG key;
  791. fs_io_msg_t *msg = (fs_io_msg_t *)args;
  792. fs_log_rec_t lrec;
  793. PVOID seq;
  794. PVOID crs_hd = FS_GET_CRS_HANDLE(vinfo, nid);
  795. HANDLE fd;
  796. ULONG retVal;
  797. if (uinfo != NULL && msg->fnum != INVALID_FHANDLE_T)
  798. fd = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  799. else
  800. fd = (HANDLE) msg->context;
  801. lrec.command = FS_WRITE;
  802. memcpy(lrec.fs_id, (PVOID) msg->fs_id, sizeof(fs_id_t));
  803. lrec.offset = msg->offset;
  804. lrec.length = msg->size;
  805. if ((seq = CrsPrepareRecord(crs_hd, (PVOID) &lrec, msg->xid, &retVal)) == 0) {
  806. return retVal;
  807. }
  808. // Write ops
  809. xFsLog(("Write %d fd %p len %d off %d\n", nid, fd, msg->size, msg->offset));
  810. off.LowPart = msg->offset;
  811. off.HighPart = 0;
  812. key = FS_BUILD_LOCK_KEY((uinfo ? uinfo->Uid : 0), nid, msg->fnum);
  813. if (msg->size > 0) {
  814. err = NtWriteFile(fd, NULL, NULL, (PVOID) NULL, &ios,
  815. msg->buf, msg->size, &off, &key);
  816. } else {
  817. FILE_END_OF_FILE_INFORMATION x;
  818. x.EndOfFile = off;
  819. ios.Information = 0;
  820. err = NtSetInformationFile(fd, &ios,
  821. (char *) &x, sizeof(x),
  822. FileEndOfFileInformation);
  823. }
  824. if (err == STATUS_PENDING) {
  825. EventWait(fd);
  826. err = ios.Status;
  827. }
  828. *rlen = ios.Information;
  829. CrsCommitOrAbort(crs_hd, seq, err == STATUS_SUCCESS);
  830. if (err == STATUS_SUCCESS) {
  831. // copy the crs record.
  832. *(fs_log_rec_t *)rec = lrec;
  833. }
  834. xFsLog(("fs_write%d err %x sz %d\n", nid, err, *rlen));
  835. return err;
  836. }
  837. NTSTATUS
  838. FspRead(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  839. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  840. {
  841. fs_io_msg_t *msg = (fs_io_msg_t *)args;
  842. NTSTATUS err;
  843. IO_STATUS_BLOCK ios;
  844. LARGE_INTEGER off;
  845. HANDLE fd = FS_GET_USER_HANDLE(uinfo, nid, msg->fnum);
  846. ULONG key;
  847. assert(sz == sizeof(*msg));
  848. // Read ops
  849. off.LowPart = msg->offset;
  850. off.HighPart = 0;
  851. key = FS_BUILD_LOCK_KEY(uinfo->Uid, nid, msg->fnum);
  852. ios.Information = 0;
  853. err = NtReadFile(fd, NULL, NULL, NULL,
  854. &ios, msg->buf, msg->size, &off, &key);
  855. if (err == STATUS_PENDING) {
  856. EventWait(fd);
  857. err = ios.Status;
  858. }
  859. *rlen = ios.Information;
  860. xFsLog(("fs_read%d err %x sz %d\n", nid, err, *rlen));
  861. return err;
  862. }
  863. NTSTATUS
  864. FspFlush(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  865. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  866. {
  867. fhandle_t fnum = *(fhandle_t *)args;
  868. IO_STATUS_BLOCK ios;
  869. HANDLE fd;
  870. ASSERT(sz == sizeof(fhandle_t));
  871. *rlen = 0;
  872. if (uinfo != NULL && fnum != INVALID_FHANDLE_T) {
  873. fd = FS_GET_USER_HANDLE(uinfo, nid, fnum);
  874. } else {
  875. fd = FS_GET_VOL_HANDLE(vinfo, nid);
  876. }
  877. return NtFlushBuffersFile(fd, &ios);
  878. }
  879. NTSTATUS
  880. FspLock(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  881. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  882. {
  883. fs_lock_msg_t *msg = (fs_lock_msg_t *)args;
  884. NTSTATUS err;
  885. IO_STATUS_BLOCK ios;
  886. LARGE_INTEGER offset, len;
  887. BOOLEAN wait, shared;
  888. ULONG key = FS_BUILD_LOCK_KEY(uinfo->Uid, nid, msg->fnum);
  889. assert(sz == sizeof(*msg));
  890. // xxx: need to log
  891. FsLog(("Lock %d off %d len %d flags %x\n", msg->fnum, msg->offset, msg->length,
  892. msg->flags));
  893. offset.LowPart = msg->offset;
  894. offset.HighPart = 0;
  895. len.LowPart = msg->length;
  896. len.HighPart = 0;
  897. // todo: need to be async, if we are the owner node and failnow is false, then
  898. // we should pass in the context and the completion port responses back
  899. // to the user
  900. wait = (BOOLEAN) ((msg->flags & FS_LOCK_WAIT) ? TRUE : FALSE);
  901. // todo: this can cause lots of headache, never wait.
  902. wait = FALSE;
  903. shared = (BOOLEAN) ((msg->flags & FS_LOCK_SHARED) ? FALSE : TRUE);
  904. err = NtLockFile(uinfo->Table[msg->fnum].Fd[nid],
  905. NULL, NULL, (PVOID) NULL, &ios,
  906. &offset, &len,
  907. key, wait, shared);
  908. // xxx: Need to log in software only
  909. *rlen = 0;
  910. FsLog(("Lock err %x\n", err));
  911. return err;
  912. }
  913. NTSTATUS
  914. FspUnlock(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  915. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen, PVOID rec)
  916. {
  917. fs_lock_msg_t *msg = (fs_lock_msg_t *)args;
  918. NTSTATUS err;
  919. IO_STATUS_BLOCK ios;
  920. LARGE_INTEGER offset, len;
  921. ULONG key = FS_BUILD_LOCK_KEY(uinfo->Uid, nid, msg->fnum);
  922. assert(sz == sizeof(*msg));
  923. // xxx: need to log
  924. xFsLog(("Unlock %d off %d len %d\n", msg->fnum, msg->offset, msg->length));
  925. offset.LowPart = msg->offset;
  926. offset.HighPart = 0;
  927. len.LowPart = msg->length;
  928. len.HighPart = 0;
  929. // always sync I think
  930. err = NtUnlockFile(uinfo->Table[msg->fnum].Fd[nid], &ios, &offset, &len, key);
  931. // xxx: need to log in software only
  932. FsLog(("Unlock err %x\n", err));
  933. *rlen = 0;
  934. return err;
  935. }
  936. NTSTATUS
  937. FspStatFs(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  938. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  939. {
  940. fs_attr_t *msg = (fs_attr_t *)args;
  941. NTSTATUS err;
  942. IO_STATUS_BLOCK ios;
  943. FILE_FS_SIZE_INFORMATION fsinfo;
  944. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  945. assert(sz == sizeof(*msg));
  946. // xxx: need to log
  947. lstrcpyn(msg->fs_name, "FsCrs", MAX_FS_NAME_LEN);
  948. err = NtQueryVolumeInformationFile(vfd, &ios,
  949. (PVOID) &fsinfo,
  950. sizeof(fsinfo),
  951. FileFsSizeInformation);
  952. if (err == STATUS_SUCCESS) {
  953. msg->total_units = fsinfo.TotalAllocationUnits.QuadPart;
  954. msg->free_units = fsinfo.AvailableAllocationUnits.QuadPart;
  955. msg->sectors_per_unit = fsinfo.SectorsPerAllocationUnit;
  956. msg->bytes_per_sector = fsinfo.BytesPerSector;
  957. }
  958. *rlen = 0;
  959. return err;
  960. }
  961. NTSTATUS
  962. FspCheckFs(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  963. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  964. {
  965. NTSTATUS err;
  966. IO_STATUS_BLOCK ios;
  967. FILE_FS_SIZE_INFORMATION fsinfo;
  968. HANDLE vfd = FS_GET_VOL_HANDLE(vinfo, nid);
  969. PVOID crshdl = FS_GET_CRS_HANDLE(vinfo, nid);
  970. err = NtQueryVolumeInformationFile(vfd, &ios,
  971. (PVOID) &fsinfo,
  972. sizeof(fsinfo),
  973. FileFsSizeInformation);
  974. // We need to issue crsflush to flush last write
  975. CrsFlush(crshdl);
  976. if (err == STATUS_SUCCESS) {
  977. #if 0
  978. HANDLE notifyfd = FS_GET_VOL_NOTIFY_HANDLE(vinfo, nid);
  979. // Just an additional thing.
  980. if (WaitForSingleObject(notifyfd, 0) == WAIT_OBJECT_0) {
  981. // reload notification again
  982. #if 1
  983. NtNotifyChangeDirectoryFile(notifyfd,
  984. vinfo->NotifyChangeEvent[nid],
  985. NULL,
  986. NULL,
  987. &MystaticIoStatusBlock,
  988. &Mystaticchangebuff,
  989. sizeof(Mystaticchangebuff),
  990. FILE_NOTIFY_CHANGE_EA,
  991. (BOOLEAN)FALSE
  992. );
  993. #else
  994. FindNextChangeNotification(notifyfd);
  995. #endif
  996. }
  997. #endif
  998. } else {
  999. FsLog(("FsReserve failed nid %d err %x\n", nid, err));
  1000. }
  1001. *rlen = 0;
  1002. return err;
  1003. }
  1004. NTSTATUS
  1005. FspGetRoot(VolInfo_t *vinfo, UserInfo_t *uinfo, int nid,
  1006. PVOID args, ULONG sz, PVOID rbuf, ULONG_PTR *rlen)
  1007. {
  1008. LPWSTR vname = FS_GET_VOL_NAME(vinfo, nid);
  1009. // I know rbuf is 8192 WCHARS, see FileNameDest field of JobBuf_t structure.
  1010. // Use MAX_PATH.
  1011. StringCchPrintfW(rbuf, MAX_PATH, L"\\\\?\\%s\\%s",vname,vinfo->Root);
  1012. FsLog(("FspGetRoot '%S'\n", rbuf));
  1013. return STATUS_SUCCESS;
  1014. }
  1015. /////////////////////////////////////////////////////////////////////////////////////
  1016. VOID
  1017. TryAvailRequest(fs_handler_t callback, VolInfo_t *vol, UserInfo_t *uinfo,
  1018. PVOID msg, ULONG len, PVOID *rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  1019. /*
  1020. This is similar to SendAvailRequest(), but on failure this would just evict the shares.
  1021. */
  1022. {
  1023. ULONG mask;
  1024. int i;
  1025. DWORD counts=0, countf=0;
  1026. ULONG masks=0, maskf=0;
  1027. ULONG rets=0, retf=0, ret=0;
  1028. ULONG evict_set=0;
  1029. NTSTATUS statusf;
  1030. // Grab Shared Lock
  1031. RwLockShared(&vol->Lock);
  1032. for (mask = vol->ReadSet, i = 0; mask != 0; mask = mask >> 1, i++) {
  1033. if (mask & 0x1) {
  1034. ios[i].Information = rsz;
  1035. ios[i].Status = callback(vol, uinfo, i,
  1036. msg, len,
  1037. rbuf ? rbuf[i] : NULL,
  1038. &ios[i].Information);
  1039. if (ios[i].Status == STATUS_SUCCESS) {
  1040. counts++;
  1041. masks |= (1<<i);
  1042. rets = i;
  1043. }
  1044. else if (IsNetworkFailure(ios[i].Status)) {
  1045. evict_set |= (1<<i);
  1046. }
  1047. else {
  1048. countf++;
  1049. maskf |= (1<<i);
  1050. statusf = ios[i].Status;
  1051. retf = i;
  1052. }
  1053. }
  1054. }
  1055. evict_set |= maskf;
  1056. if (evict_set) {
  1057. RwUnlockShared(&vol->Lock);
  1058. RwLockExclusive(&vol->Lock);
  1059. FspEvict(vol, evict_set, TRUE);
  1060. RwUnlockExclusive(&vol->Lock);
  1061. }
  1062. else {
  1063. RwUnlockShared(&vol->Lock);
  1064. }
  1065. return;
  1066. }
  1067. int
  1068. SendAvailRequest(fs_handler_t callback, VolInfo_t *vol, UserInfo_t *uinfo,
  1069. PVOID msg, ULONG len, PVOID *rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  1070. {
  1071. ULONG mask;
  1072. int i;
  1073. DWORD counts=0, countf=0;
  1074. ULONG masks=0, maskf=0;
  1075. ULONG rets=0, retf=0, ret=0;
  1076. ULONG evict_set=0;
  1077. NTSTATUS statusf;
  1078. if (vol == NULL)
  1079. return ERROR_INVALID_HANDLE;
  1080. Retry:
  1081. mask = counts = countf = masks = maskf = rets = retf = ret = evict_set = 0;
  1082. WaitForArbCompletion(vol);
  1083. // Check for the going away flag.
  1084. if (vol->GoingAway) {
  1085. ios[0].Status = STATUS_DEVICE_NOT_READY;
  1086. ios[0].Information = 0;
  1087. return 0;
  1088. }
  1089. // Grab Shared Lock
  1090. RwLockShared(&vol->Lock);
  1091. // issue update for each replica
  1092. i = 0;
  1093. for (mask = vol->ReadSet; mask != 0; mask = mask >> 1, i++) {
  1094. if (mask & 0x1) {
  1095. ios[i].Information = rsz;
  1096. ios[i].Status = callback(vol, uinfo, i,
  1097. msg, len,
  1098. rbuf ? rbuf[i] : NULL,
  1099. &ios[i].Information);
  1100. if (ios[i].Status == STATUS_SUCCESS) {
  1101. counts++;
  1102. masks |= (1<<i);
  1103. rets = i;
  1104. }
  1105. else if (IsNetworkFailure(ios[i].Status)) {
  1106. evict_set |= (1<<i);
  1107. }
  1108. else {
  1109. countf++;
  1110. maskf |= (1<<i);
  1111. statusf = ios[i].Status;
  1112. retf = i;
  1113. }
  1114. }
  1115. }
  1116. // Logic:
  1117. // 1. Shares in the evict set have to be evicted.
  1118. // 2. If countf > counts, evict masks, and viceversa.
  1119. //
  1120. // New logic:
  1121. // 1. counts or countf have to be majority.
  1122. // 2. If 1 is correct evict shares in minority.
  1123. // 3. If 1 is wrong. evict shares in evict_set and start arbitration.
  1124. //
  1125. if (CRS_QUORUM(counts, vol->DiskListSz)) {
  1126. evict_set |= maskf;
  1127. ios[0].Status = STATUS_SUCCESS;
  1128. ios[0].Information = counts;
  1129. ret = rets;
  1130. } else if (CRS_QUORUM(countf, vol->DiskListSz)) {
  1131. evict_set |= masks;
  1132. ios[0].Status = statusf;
  1133. ios[0].Information = countf;
  1134. ret = retf;
  1135. } else {
  1136. HANDLE cleanup, arbThread;
  1137. PVOID arb;
  1138. // evict the shares in the evict set and restart arbitration.
  1139. RwUnlockShared(&vol->Lock);
  1140. RwLockExclusive(&vol->Lock);
  1141. FspEvict(vol, evict_set, TRUE);
  1142. RwUnlockExclusive(&vol->Lock);
  1143. arb = FsArbitrate(vol, &cleanup, &arbThread);
  1144. // FsLog(("SendAvailRequest() starting arbitration %x\n", arb));
  1145. ASSERT((arb != NULL));
  1146. SetEvent(cleanup);
  1147. CloseHandle(arbThread);
  1148. goto Retry;
  1149. }
  1150. // FsLog(("SendAvailRequest() exititng evict_set %x\n", evict_set));
  1151. if (evict_set) {
  1152. RwUnlockShared(&vol->Lock);
  1153. RwLockExclusive(&vol->Lock);
  1154. FspEvict(vol, evict_set, TRUE);
  1155. RwUnlockExclusive(&vol->Lock);
  1156. }
  1157. else {
  1158. RwUnlockShared(&vol->Lock);
  1159. }
  1160. return ret;
  1161. }
  1162. int
  1163. SendRequest(fs_handler1_t callback, UserInfo_t *uinfo,
  1164. PVOID msg, ULONG len, PVOID *rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  1165. {
  1166. ULONG mask;
  1167. int i, j;
  1168. VolInfo_t *vol = uinfo->VolInfo;
  1169. DWORD counts=0, countf=0;
  1170. ULONG masks=0, maskf=0;
  1171. ULONG rets=0, retf=0, ret=0;
  1172. ULONG evict_set=0;
  1173. NTSTATUS statusf;
  1174. CrsRecord_t crsRec, crsRec1;
  1175. if (vol == NULL)
  1176. return ERROR_INVALID_HANDLE;
  1177. RtlZeroMemory(&crsRec, sizeof(crsRec));
  1178. Retry:
  1179. WaitForArbCompletion(vol);
  1180. // Check for the going away flag.
  1181. if (vol->GoingAway) {
  1182. ios[0].Status = STATUS_DEVICE_NOT_READY;
  1183. ios[0].Information = 0;
  1184. return 0;
  1185. }
  1186. // lock volume for update
  1187. RwLockShared(&vol->Lock);
  1188. if(FsIsOnlineReadWrite((PVOID)vol) != ERROR_SUCCESS) {
  1189. HANDLE cleanup, arbThread;
  1190. PVOID arb;
  1191. // Start arbitration.
  1192. RwUnlockShared(&vol->Lock);
  1193. arb = FsArbitrate(vol, &cleanup, &arbThread);
  1194. ASSERT((arb != NULL));
  1195. SetEvent(cleanup);
  1196. CloseHandle(arbThread);
  1197. goto Retry;
  1198. }
  1199. // Since we are in a retry loop verify that our last attempt at update failed before
  1200. // proceeding.
  1201. //
  1202. // Try to access the crs log record, and check the state field.
  1203. //
  1204. if (crsRec.hdr.epoch) {
  1205. for (i=0;i<FsMaxNodes;i++) {
  1206. if (vol->WriteSet & (1<<i)) {
  1207. DWORD retVal;
  1208. retVal = CrspNextLogRecord(vol->CrsHdl[i], &crsRec, &crsRec1, TRUE);
  1209. if ((retVal != ERROR_SUCCESS)||(!(crsRec1.hdr.state & CRS_COMMIT))) {
  1210. // The previous update did not suceed.
  1211. // zero crsRec and continue.
  1212. RtlZeroMemory(&crsRec, sizeof(crsRec));
  1213. break;
  1214. }
  1215. else {
  1216. // The last update did suceed.
  1217. // Return replica index on which it suceeded last time and which is also in
  1218. // the current write set.
  1219. //
  1220. for (j=0;j<FsMaxNodes;j++) {
  1221. if ((masks & vol->WriteSet) & (1<<j)) {
  1222. RwUnlockShared(&vol->Lock);
  1223. return j;
  1224. }
  1225. }
  1226. RwUnlockShared(&vol->Lock);
  1227. return i;
  1228. }
  1229. }
  1230. }
  1231. }
  1232. mask = counts = countf = masks = maskf = rets = retf = ret = evict_set = 0;
  1233. // issue update for each replica
  1234. i = 0;
  1235. for (mask = vol->WriteSet; mask != 0; mask = mask >> 1, i++) {
  1236. if (mask & 0x1) {
  1237. ios[i].Information = rsz;
  1238. ios[i].Status = callback(vol, uinfo, i,
  1239. msg, len,
  1240. rbuf ? rbuf[i] : NULL,
  1241. &ios[i].Information, (PVOID)&crsRec);
  1242. if (ios[i].Status == STATUS_SUCCESS) {
  1243. counts++;
  1244. masks |= (1<<i);
  1245. rets = i;
  1246. }
  1247. else if (IsNetworkFailure(ios[i].Status)) {
  1248. evict_set |= (1<<i);
  1249. }
  1250. else {
  1251. countf++;
  1252. maskf |= (1<<i);
  1253. statusf = ios[i].Status;
  1254. retf = i;
  1255. }
  1256. }
  1257. }
  1258. // Logic:
  1259. // 1. Shares in the evict set have to be evicted.
  1260. // 2. If countf > counts. evict masks and viceversa.
  1261. //
  1262. // New logic:
  1263. // 1. counts or countf have to be majority.
  1264. // 2. If 1 is correct evict shares in minority.
  1265. // 3. If 1 is wrong. evict shares in evict_set and start arbitration.
  1266. //
  1267. if (CRS_QUORUM(counts, vol->DiskListSz)) {
  1268. evict_set |= maskf;
  1269. ios[0].Status = STATUS_SUCCESS;
  1270. ios[0].Information = counts;
  1271. ret = rets;
  1272. } else if (CRS_QUORUM(countf, vol->DiskListSz)) {
  1273. evict_set |= masks;
  1274. ios[0].Status = statusf;
  1275. ios[0].Information = countf;
  1276. ret = retf;
  1277. } else {
  1278. HANDLE cleanup, arbThread;
  1279. PVOID arb;
  1280. // evict the shares in the evict set and restart arbitration.
  1281. RwUnlockShared(&vol->Lock);
  1282. RwLockExclusive(&vol->Lock);
  1283. FspEvict(vol, evict_set, TRUE);
  1284. RwUnlockExclusive(&vol->Lock);
  1285. arb = FsArbitrate(vol, &cleanup, &arbThread);
  1286. ASSERT((arb != NULL));
  1287. SetEvent(cleanup);
  1288. CloseHandle(arbThread);
  1289. goto Retry;
  1290. }
  1291. // evict the shares in the evict set.
  1292. if (evict_set) {
  1293. RwUnlockShared(&vol->Lock);
  1294. RwLockExclusive(&vol->Lock);
  1295. FspEvict(vol, evict_set, TRUE);
  1296. RwUnlockExclusive(&vol->Lock);
  1297. }
  1298. else {
  1299. RwUnlockShared(&vol->Lock);
  1300. }
  1301. return ret;
  1302. }
  1303. NTSTATUS
  1304. SendReadRequest(fs_handler_t callback, UserInfo_t *uinfo,
  1305. PVOID msg, ULONG len, PVOID rbuf, ULONG rsz, IO_STATUS_BLOCK *ios)
  1306. {
  1307. ULONG mask;
  1308. int i;
  1309. VolInfo_t *vol = uinfo->VolInfo;
  1310. DWORD counts=0, countf=0;
  1311. ULONG masks=0, maskf=0;
  1312. ULONG evict_set=0;
  1313. NTSTATUS statusf;
  1314. if (vol == NULL)
  1315. return ERROR_INVALID_HANDLE;
  1316. Retry:
  1317. mask = counts = countf = masks = maskf = evict_set = 0;
  1318. WaitForArbCompletion(vol);
  1319. // Check for the going away flag.
  1320. if (vol->GoingAway) {
  1321. ios[0].Status = STATUS_DEVICE_NOT_READY;
  1322. ios[0].Information = 0;
  1323. return 0;
  1324. }
  1325. // Lock volume for update
  1326. RwLockShared(&vol->Lock);
  1327. #if 0
  1328. // Volume has to be online in readonly mode atleast for this to suceed.
  1329. if (FsIsOnlineReadonly((PVOID)vol) != ERROR_SUCCESS) {
  1330. ios[0].Status = STATUS_DEVICE_NOT_READY;
  1331. ios[0].Information = 0;
  1332. RwUnlockShared(&vol->Lock);
  1333. return 0;
  1334. }
  1335. #endif
  1336. // issue update for each replica
  1337. i = 0;
  1338. for (mask = vol->ReadSet; mask != 0; mask = mask >> 1, i++) {
  1339. if (mask & 0x1) {
  1340. ios->Information = rsz;
  1341. ios->Status = callback(vol, uinfo, i,
  1342. msg, len, rbuf, &ios->Information);
  1343. if (ios->Status == STATUS_SUCCESS) {
  1344. counts++;
  1345. masks |= (1<<i);
  1346. break;
  1347. }
  1348. else if (IsNetworkFailure(ios->Status)) {
  1349. evict_set |= (1<<i);
  1350. }
  1351. else {
  1352. countf++;
  1353. maskf |= (1<<i);
  1354. statusf = ios->Status;
  1355. }
  1356. }
  1357. }
  1358. // Logic:
  1359. // 1. Evict evict_set.
  1360. // 2. if counts > 0. Evict maskf.
  1361. //
  1362. // New Logic:
  1363. // 1. If couns > 0 add maskf to evict_set.
  1364. // 2.
  1365. if (counts > 0) {
  1366. evict_set |= maskf;
  1367. //ios[0].Status = STATUS_SUCCESS;
  1368. //ios[0].Information = 0;
  1369. }
  1370. else if (countf > 0) {
  1371. // ios->Status = statusf;
  1372. // ios->Information = countf;
  1373. }
  1374. else {
  1375. HANDLE cleanup, arbThread;
  1376. PVOID arb;
  1377. // evict the shares in the evict set and restart arbitration.
  1378. RwUnlockShared(&vol->Lock);
  1379. RwLockExclusive(&vol->Lock);
  1380. FspEvict(vol, evict_set, TRUE);
  1381. RwUnlockExclusive(&vol->Lock);
  1382. arb = FsArbitrate(vol, &cleanup, &arbThread);
  1383. ASSERT((arb != NULL));
  1384. SetEvent(cleanup);
  1385. CloseHandle(arbThread);
  1386. goto Retry;
  1387. }
  1388. if (evict_set) {
  1389. RwUnlockShared(&vol->Lock);
  1390. RwLockExclusive(&vol->Lock);
  1391. FspEvict(vol, evict_set, TRUE);
  1392. RwUnlockExclusive(&vol->Lock);
  1393. }
  1394. else {
  1395. RwUnlockShared(&vol->Lock);
  1396. }
  1397. return 0;
  1398. }
  1399. ///////////////////////////////////////////////////////////////////////////////
  1400. DWORD
  1401. FsCreate(
  1402. PVOID fshdl,
  1403. LPWSTR name,
  1404. USHORT namelen,
  1405. UINT32 flags,
  1406. fattr_t* fattr,
  1407. fhandle_t* phandle,
  1408. UINT32 *action
  1409. )
  1410. {
  1411. UserInfo_t *uinfo = (UserInfo_t *) fshdl;
  1412. NTSTATUS err=STATUS_SUCCESS;
  1413. fs_create_reply_t nfd[FsMaxNodes];
  1414. IO_STATUS_BLOCK status[FsMaxNodes];
  1415. PVOID rbuf[FsMaxNodes];
  1416. fs_create_msg_t msg;
  1417. fhandle_t fdnum=INVALID_FHANDLE_T;
  1418. ASSERT(uinfo != NULL);
  1419. xFsLog(("FsDT::create(%S, 0x%08X, 0x%08X, 0x%08d)\n",
  1420. name, flags, fattr, namelen));
  1421. if (!phandle) return ERROR_INVALID_PARAMETER;
  1422. *phandle = INVALID_FHANDLE_T;
  1423. if (!name) return ERROR_INVALID_PARAMETER;
  1424. if (flags != (FLAGS_MASK & flags)) {
  1425. return ERROR_INVALID_PARAMETER;
  1426. }
  1427. if (action != NULL)
  1428. *action = flags & FS_ACCESS_MASK;
  1429. // if we are doing a directory, open locally
  1430. // todo: this should be merged with other case, if
  1431. // we are doing an existing open, then no need to
  1432. // issue update and log it, but we have to do
  1433. // mcast in order for the close to work.
  1434. if (namelen > 0) {
  1435. if (*name == L'\\') {
  1436. name++;
  1437. namelen--;
  1438. }
  1439. if (name[namelen-1] == L'\\') {
  1440. namelen--;
  1441. name[namelen] = L'\0';
  1442. }
  1443. }
  1444. memset(&msg.xid, 0, sizeof(msg.xid));
  1445. msg.name = name;
  1446. msg.name_len = namelen;
  1447. msg.flags = flags;
  1448. msg.attr = 0;
  1449. if (fattr) {
  1450. msg.attr = unget_attributes(fattr->attributes);
  1451. }
  1452. FspInitAnswers(status, rbuf, (char *) nfd, sizeof(nfd[0]));
  1453. // allocate a new handle
  1454. err = FspAllocatePrivateHandle(uinfo, &fdnum);
  1455. if (err == STATUS_SUCCESS) {
  1456. int sid;
  1457. // Copy the filename to the table entry here. Has to work with retrys.
  1458. // Copy the file name.
  1459. uinfo->Table[fdnum].FileName = LocalAlloc(0, (namelen +1) * sizeof(WCHAR));
  1460. if (uinfo->Table[fdnum].FileName == NULL) {
  1461. err = GetLastError();
  1462. goto Finally;
  1463. }
  1464. if ((err = StringCchCopyW(uinfo->Table[fdnum].FileName, namelen+1, name)) != S_OK) {
  1465. LocalFree(uinfo->Table[fdnum].FileName);
  1466. uinfo->Table[fdnum].FileName = NULL;
  1467. goto Finally;
  1468. }
  1469. msg.fnum = fdnum;
  1470. // Set flags in advance to sync with replay
  1471. uinfo->Table[fdnum].Flags = flags;
  1472. if (namelen < 2 ||
  1473. ((flags & FS_DISP_MASK) == DISP_DIRECTORY) ||
  1474. (unget_disp(flags) == FILE_OPEN)) {
  1475. sid = SendAvailRequest(FspOpen, uinfo->VolInfo,
  1476. uinfo,
  1477. (PVOID) &msg, sizeof(msg),
  1478. rbuf, sizeof(nfd[0]),
  1479. status);
  1480. } else {
  1481. sid = SendRequest(FspCreate,
  1482. uinfo,
  1483. (PVOID) &msg, sizeof(msg),
  1484. rbuf, sizeof(nfd[0]),
  1485. status);
  1486. }
  1487. // Test
  1488. // FsLog(("FsCreate: Debug sid: %d flags: 0x%x action: 0x%x\n", sid, flags, nfd[sid].action));
  1489. if (action != NULL) {
  1490. if (!(nfd[sid].access & FILE_GENERIC_WRITE))
  1491. flags &= ~ACCESS_WRITE;
  1492. *action = flags | nfd[sid].action;
  1493. }
  1494. err = status[sid].Status;
  1495. if (err == STATUS_SUCCESS) {
  1496. fs_id_t *fid = FS_GET_FID_HANDLE(uinfo, fdnum);
  1497. // set file id
  1498. memcpy((PVOID) fid, (PVOID) nfd[sid].fid, sizeof(fs_id_t));
  1499. FsLog(("File id %I64x:%I64x\n", (*fid)[0], (*fid)[1]));
  1500. uinfo->Table[fdnum].hState = HandleStateOpened;
  1501. // todo: bind handles to completion port if we do async
  1502. } else {
  1503. // free handle
  1504. FspFreeHandle(uinfo, fdnum);
  1505. fdnum = INVALID_FHANDLE_T;
  1506. }
  1507. }
  1508. Finally:
  1509. // todo: need to set fid
  1510. if (err == STATUS_SUCCESS) {
  1511. *phandle = fdnum;
  1512. }
  1513. else {
  1514. if (fdnum != INVALID_FHANDLE_T) {
  1515. FspFreeHandle(uinfo, fdnum);
  1516. }
  1517. }
  1518. FsLog(("create: return fd %d err %x action 0x%x\n", *phandle, err, action? *action:0));
  1519. return RtlNtStatusToDosError(err);
  1520. }
  1521. void
  1522. BuildFileAttr(FILE_BASIC_INFORMATION *attr, fattr_t *fattr)
  1523. {
  1524. memset(attr, 0, sizeof(*attr));
  1525. if (fattr->create_time != INVALID_UINT64)
  1526. attr->CreationTime.QuadPart = fattr->create_time;
  1527. if (fattr->mod_time != INVALID_UINT64)
  1528. attr->LastWriteTime.QuadPart = fattr->mod_time;
  1529. if (fattr->access_time != INVALID_UINT64)
  1530. attr->LastAccessTime.QuadPart = fattr->access_time;
  1531. if (fattr->attributes != INVALID_UINT32)
  1532. attr->FileAttributes = unget_attributes(fattr->attributes);
  1533. }
  1534. DWORD
  1535. FsSetAttr(
  1536. PVOID fshdl,
  1537. fhandle_t handle,
  1538. fattr_t* attr
  1539. )
  1540. {
  1541. UserInfo_t *uinfo = (UserInfo_t *)fshdl;
  1542. fs_setattr_msg_t msg;
  1543. int sid;
  1544. IO_STATUS_BLOCK status[FsMaxNodes];
  1545. if (!attr || handle == INVALID_FHANDLE_T)
  1546. return ERROR_INVALID_PARAMETER;
  1547. // todo: get file id
  1548. memset(&msg.xid, 0, sizeof(msg.xid));
  1549. msg.fs_id = FS_GET_FID_HANDLE(uinfo, handle);
  1550. BuildFileAttr(&msg.attr, attr);
  1551. msg.fnum = handle;
  1552. FspInitAnswers(status, NULL, NULL, 0);
  1553. sid = SendRequest(FspSetAttr, uinfo,
  1554. (char *)&msg, sizeof(msg),
  1555. NULL, 0,
  1556. status);
  1557. return RtlNtStatusToDosError(status[sid].Status);
  1558. }
  1559. DWORD
  1560. FsSetAttr2(
  1561. PVOID fshdl,
  1562. LPWSTR name,
  1563. USHORT name_len,
  1564. fattr_t* attr
  1565. )
  1566. {
  1567. UserInfo_t *uinfo = (UserInfo_t *) fshdl;
  1568. fs_setattr_msg_t msg;
  1569. int sid;
  1570. IO_STATUS_BLOCK status[FsMaxNodes];
  1571. if (!attr || !name)
  1572. return ERROR_INVALID_PARAMETER;
  1573. if (*name == '\\') {
  1574. name++;
  1575. name_len--;
  1576. }
  1577. // todo: locate file id
  1578. memset(&msg.xid, 0, sizeof(msg.xid));
  1579. msg.name = name;
  1580. msg.name_len = name_len;
  1581. BuildFileAttr(&msg.attr, attr);
  1582. FspInitAnswers(status, NULL, NULL, 0);
  1583. sid = SendRequest(FspSetAttr2, uinfo,
  1584. (char *)&msg, sizeof(msg),
  1585. NULL, 0,
  1586. status);
  1587. return RtlNtStatusToDosError(status[sid].Status);
  1588. }
  1589. DWORD
  1590. FsLookup(
  1591. PVOID fshdl,
  1592. LPWSTR name,
  1593. USHORT name_len,
  1594. fattr_t* fattr
  1595. )
  1596. {
  1597. fs_lookup_msg_t msg;
  1598. int err;
  1599. IO_STATUS_BLOCK ios;
  1600. FILE_NETWORK_OPEN_INFORMATION attr;
  1601. FsLog(("Lookup name '%S' %x\n", name, fattr));
  1602. if (!fattr) return ERROR_INVALID_PARAMETER;
  1603. if (*name == '\\') {
  1604. name++;
  1605. name_len--;
  1606. }
  1607. msg.name = name;
  1608. msg.name_len = name_len;
  1609. err = SendReadRequest(FspLookup, (UserInfo_t *)fshdl,
  1610. (PVOID) &msg, sizeof(msg),
  1611. (PVOID) &attr, sizeof(attr),
  1612. &ios);
  1613. err = ios.Status;
  1614. if (ios.Status == STATUS_SUCCESS) {
  1615. fattr->file_size = attr.EndOfFile.QuadPart;
  1616. fattr->alloc_size = attr.AllocationSize.QuadPart;
  1617. fattr->create_time = *(TIME64 *)&attr.CreationTime;
  1618. fattr->access_time = *(TIME64 *)&attr.LastAccessTime;
  1619. fattr->mod_time = *(TIME64 *)&attr.LastWriteTime;
  1620. fattr->attributes = get_attributes(attr.FileAttributes);
  1621. }
  1622. FsLog(("Lookup: return %x\n", err));
  1623. return RtlNtStatusToDosError(err);
  1624. }
  1625. DWORD
  1626. FsGetAttr(
  1627. PVOID fshdl,
  1628. fhandle_t handle,
  1629. fattr_t* fattr
  1630. )
  1631. {
  1632. int err;
  1633. IO_STATUS_BLOCK ios;
  1634. FILE_NETWORK_OPEN_INFORMATION attr;
  1635. xFsLog(("Getattr fid '%d' %x\n", handle, fattr));
  1636. if (!fattr) return ERROR_INVALID_PARAMETER;
  1637. err = SendReadRequest(FspGetAttr, (UserInfo_t *)fshdl,
  1638. (PVOID) &handle, sizeof(handle),
  1639. (PVOID) &attr, sizeof(attr),
  1640. &ios);
  1641. err = ios.Status;
  1642. if (err == STATUS_SUCCESS) {
  1643. fattr->file_size = attr.EndOfFile.QuadPart;
  1644. fattr->alloc_size = attr.AllocationSize.QuadPart;
  1645. fattr->create_time = *(TIME64 *)&attr.CreationTime;
  1646. fattr->access_time = *(TIME64 *)&attr.LastAccessTime;
  1647. fattr->mod_time = *(TIME64 *)&attr.LastWriteTime;
  1648. fattr->attributes =attr.FileAttributes;
  1649. }
  1650. FsLog(("Getattr: return %d\n", err));
  1651. return RtlNtStatusToDosError(err);
  1652. }
  1653. DWORD
  1654. FsClose(
  1655. PVOID fshdl,
  1656. fhandle_t handle
  1657. )
  1658. {
  1659. int sid, err;
  1660. IO_STATUS_BLOCK status[FsMaxNodes];
  1661. UserInfo_t *uinfo;
  1662. if (handle == INVALID_FHANDLE_T) return ERROR_INVALID_PARAMETER;
  1663. if (handle >= FsTableSize) return ERROR_INVALID_PARAMETER;
  1664. FsLog(("Close: fid %d\n", handle));
  1665. FspInitAnswers(status, NULL, NULL, 0);
  1666. uinfo = (UserInfo_t *) fshdl;
  1667. sid = SendAvailRequest(FspClose, uinfo->VolInfo, uinfo,
  1668. (PVOID) &handle, sizeof(handle),
  1669. NULL, 0,
  1670. status);
  1671. err = status[sid].Status;
  1672. if (err == STATUS_SUCCESS) {
  1673. // need to free this handle slot
  1674. FspFreeHandle((UserInfo_t *) fshdl, handle);
  1675. }
  1676. FsLog(("Close: fid %d err %x\n", handle, err));
  1677. return RtlNtStatusToDosError(err);
  1678. }
  1679. DWORD
  1680. FsWrite(
  1681. PVOID fshdl,
  1682. fhandle_t handle,
  1683. UINT32 offset,
  1684. UINT16 *pcount,
  1685. void* buffer,
  1686. PVOID context
  1687. )
  1688. {
  1689. DWORD err;
  1690. IO_STATUS_BLOCK status[FsMaxNodes];
  1691. int i, sid;
  1692. fs_io_msg_t msg;
  1693. UserInfo_t *uinfo = (UserInfo_t *) fshdl;
  1694. if (!pcount || handle == INVALID_FHANDLE_T) return ERROR_INVALID_PARAMETER;
  1695. FsLog(("Write %d offset %d count %d\n", handle, offset, *pcount));
  1696. i = (int) offset;
  1697. if (i < 0) {
  1698. offset = 0;
  1699. (*pcount)--;
  1700. }
  1701. // todo: locate file id
  1702. memset(&msg.xid, 0, sizeof(msg.xid));
  1703. msg.fs_id = FS_GET_FID_HANDLE(uinfo, handle);
  1704. msg.offset = offset;
  1705. msg.size = (UINT32) *pcount;
  1706. msg.buf = buffer;
  1707. msg.context = context;
  1708. msg.fnum = handle;
  1709. FspInitAnswers(status, NULL, NULL, 0);
  1710. sid = SendRequest(FspWrite, (UserInfo_t *)fshdl,
  1711. (PVOID) &msg, sizeof(msg),
  1712. NULL, 0,
  1713. status);
  1714. err = status[sid].Status;
  1715. *pcount = (USHORT) status[sid].Information;
  1716. FsLog(("write: return %x\n", err));
  1717. return RtlNtStatusToDosError(err);
  1718. }
  1719. DWORD
  1720. FsRead(
  1721. PVOID fshdl,
  1722. fhandle_t handle,
  1723. UINT32 offset,
  1724. UINT16* pcount,
  1725. void* buffer,
  1726. PVOID context
  1727. )
  1728. {
  1729. NTSTATUS err;
  1730. IO_STATUS_BLOCK ios;
  1731. fs_io_msg_t msg;
  1732. memset(&msg.xid, 0, sizeof(msg.xid));
  1733. msg.offset = offset;
  1734. msg.buf = buffer;
  1735. msg.size = (UINT32) *pcount;
  1736. msg.context = context;
  1737. msg.fnum = handle;
  1738. FsLog(("read: %x fd %d sz %d\n", context, handle, msg.size));
  1739. err = SendReadRequest(FspRead, (UserInfo_t *)fshdl,
  1740. (PVOID) &msg, sizeof(msg),
  1741. NULL, 0,
  1742. &ios);
  1743. err = ios.Status;
  1744. if (err == STATUS_END_OF_FILE) {
  1745. *pcount = 0;
  1746. return ERROR_SUCCESS;
  1747. }
  1748. err = RtlNtStatusToDosError(err);
  1749. *pcount = (USHORT) ios.Information;
  1750. FsLog(("read: %x return %x sz %d\n", context, err, *pcount));
  1751. return err;
  1752. #if 0
  1753. #ifdef FS_ASYNC
  1754. return ERROR_IO_PENDING; //err;
  1755. #else
  1756. return ERROR_SUCCESS;
  1757. #endif
  1758. #endif
  1759. }
  1760. DWORD
  1761. FsReadDir(
  1762. PVOID fshdl,
  1763. fhandle_t dir,
  1764. UINT32 cookie,
  1765. dirinfo_t* buffer,
  1766. UINT32 size,
  1767. UINT32 *entries_found
  1768. )
  1769. {
  1770. fs_io_msg_t msg;
  1771. int err;
  1772. IO_STATUS_BLOCK ios;
  1773. FsLog(("read_dir: cookie %d buf %x entries %x\n", cookie, buffer, entries_found));
  1774. if (!entries_found || !buffer) return ERROR_INVALID_PARAMETER;
  1775. msg.cookie = cookie;
  1776. msg.buf = (PVOID) buffer;
  1777. msg.size = size;
  1778. msg.fnum = dir;
  1779. err = SendReadRequest(FspReadDir, (UserInfo_t *)fshdl,
  1780. (PVOID) &msg, sizeof(msg),
  1781. NULL, 0,
  1782. &ios);
  1783. err = ios.Status;
  1784. *entries_found = (UINT32) ios.Information;
  1785. xFsLog(("read_dir: err %d entries %d\n", err, *entries_found));
  1786. return RtlNtStatusToDosError(err);
  1787. }
  1788. DWORD
  1789. FsRemove(
  1790. PVOID fshdl,
  1791. LPWSTR name,
  1792. USHORT name_len
  1793. )
  1794. {
  1795. fs_remove_msg_t msg;
  1796. int err, sid;
  1797. IO_STATUS_BLOCK status[FsMaxNodes];
  1798. if (*name == L'\\') {
  1799. name++;
  1800. name_len--;
  1801. }
  1802. memset(&msg.xid, 0, sizeof(msg.xid));
  1803. msg.name = name;
  1804. msg.name_len = name_len;
  1805. FspInitAnswers(status, NULL, NULL, 0);
  1806. sid = SendRequest(FspRemove, (UserInfo_t *) fshdl,
  1807. (PVOID *)&msg, sizeof(msg),
  1808. NULL, 0,
  1809. status);
  1810. err = status[sid].Status;
  1811. return RtlNtStatusToDosError(err);
  1812. }
  1813. DWORD
  1814. FsRename(
  1815. PVOID fshdl,
  1816. LPWSTR from_name,
  1817. USHORT from_name_len,
  1818. LPWSTR to_name,
  1819. USHORT to_name_len
  1820. )
  1821. {
  1822. int err, sid;
  1823. fs_rename_msg_t msg;
  1824. IO_STATUS_BLOCK status[FsMaxNodes];
  1825. if (!from_name || !to_name)
  1826. return ERROR_INVALID_PARAMETER;
  1827. if (*from_name == L'\\') {
  1828. from_name++;
  1829. from_name_len--;
  1830. }
  1831. if (*to_name == L'\\') {
  1832. to_name++;
  1833. to_name_len--;
  1834. }
  1835. if (*from_name == L'\0' || *to_name == L'\0')
  1836. return ERROR_INVALID_PARAMETER;
  1837. FsLog(("rename %S -> %S,%d\n", from_name, to_name,to_name_len));
  1838. memset(&msg.xid, 0, sizeof(msg.xid));
  1839. msg.sname = from_name;
  1840. msg.sname_len = from_name_len;
  1841. msg.dname = to_name;
  1842. msg.dname_len = to_name_len;
  1843. FspInitAnswers(status, NULL, NULL, 0);
  1844. sid = SendRequest(FspRename, (UserInfo_t *) fshdl,
  1845. (PVOID) &msg, sizeof(msg),
  1846. NULL, 0,
  1847. status);
  1848. err = status[sid].Status;
  1849. return RtlNtStatusToDosError(err);
  1850. }
  1851. DWORD
  1852. FsMkDir(
  1853. PVOID fshdl,
  1854. LPWSTR name,
  1855. USHORT name_len,
  1856. fattr_t* attr
  1857. )
  1858. {
  1859. int err, sid;
  1860. IO_STATUS_BLOCK status[FsMaxNodes];
  1861. fs_id_t ids[FsMaxNodes];
  1862. PVOID *rbuf[FsMaxNodes];
  1863. fs_create_msg_t msg;
  1864. // XXX: we ignore attr for now...
  1865. if (!name) return ERROR_INVALID_PARAMETER;
  1866. if (*name == L'\\') {
  1867. name++;
  1868. name_len--;
  1869. }
  1870. memset(&msg.xid, 0, sizeof(msg.xid));
  1871. msg.attr = (attr != NULL ? unget_attributes(attr->attributes) :
  1872. FILE_ATTRIBUTE_DIRECTORY);
  1873. msg.flags = DISP_DIRECTORY | SHARE_READ | SHARE_WRITE;
  1874. msg.name = name;
  1875. msg.name_len = name_len;
  1876. FspInitAnswers(status, (PVOID *)rbuf, (PVOID) ids, sizeof(ids[0]));
  1877. sid = SendRequest(FspMkDir, (UserInfo_t *) fshdl,
  1878. (PVOID) &msg, sizeof(msg),
  1879. (PVOID *)rbuf, sizeof(ids[0]),
  1880. status);
  1881. err = status[sid].Status;
  1882. // todo: insert pathname and file id into hash table
  1883. return RtlNtStatusToDosError(err);
  1884. }
  1885. DWORD
  1886. FsFlush(
  1887. PVOID fshdl,
  1888. fhandle_t handle
  1889. )
  1890. {
  1891. NTSTATUS status;
  1892. int sid;
  1893. IO_STATUS_BLOCK ios[FsMaxNodes];
  1894. FspInitAnswers(ios, NULL, NULL, 0);
  1895. sid = SendRequest(FspFlush, (UserInfo_t *) fshdl,
  1896. (PVOID) &handle, sizeof(handle),
  1897. NULL, 0,
  1898. ios);
  1899. status = ios[sid].Status;
  1900. FsLog(("Flush %d err %x\n", handle, status));
  1901. if (status == STATUS_PENDING) {
  1902. status = STATUS_SUCCESS;
  1903. }
  1904. return RtlNtStatusToDosError(status);
  1905. }
  1906. DWORD
  1907. FsLock(PVOID fshdl, fhandle_t handle, ULONG offset, ULONG length, ULONG flags,
  1908. PVOID context)
  1909. {
  1910. fs_lock_msg_t msg;
  1911. int err, sid;
  1912. IO_STATUS_BLOCK status[FsMaxNodes];
  1913. if (handle == INVALID_FHANDLE_T)
  1914. return ERROR_INVALID_PARAMETER;
  1915. memset(&msg.xid, 0, sizeof(msg.xid));
  1916. msg.offset = offset;
  1917. msg.length = length;
  1918. msg.flags = flags;
  1919. msg.fnum = handle;
  1920. FsLog(("Lock fid %d off %d len %d\n", msg.fnum, offset, length));
  1921. FspInitAnswers(status, NULL, NULL, 0);
  1922. sid = SendRequest(FspLock, (UserInfo_t *) fshdl,
  1923. (PVOID)&msg, sizeof(msg),
  1924. NULL, 0,
  1925. status);
  1926. err = status[sid].Status;
  1927. FsLog(("Lock fid %d err %x\n", msg.fnum, err));
  1928. return RtlNtStatusToDosError(err);
  1929. }
  1930. DWORD
  1931. FsUnlock(PVOID fshdl, fhandle_t handle, ULONG offset, ULONG length)
  1932. {
  1933. fs_lock_msg_t msg;
  1934. int err, sid;
  1935. IO_STATUS_BLOCK status[FsMaxNodes];
  1936. if (handle == INVALID_FHANDLE_T)
  1937. return ERROR_INVALID_PARAMETER;
  1938. memset(&msg.xid, 0, sizeof(msg.xid));
  1939. msg.offset = offset;
  1940. msg.length = length;
  1941. msg.fnum = handle;
  1942. FsLog(("Unlock fid %d off %d len %d\n", handle, offset, length));
  1943. FspInitAnswers(status, NULL, NULL, 0);
  1944. sid = SendRequest(FspUnlock, (UserInfo_t *) fshdl,
  1945. (PVOID)&msg, sizeof(msg),
  1946. NULL, 0,
  1947. status);
  1948. err = status[sid].Status;
  1949. return RtlNtStatusToDosError(err);
  1950. }
  1951. DWORD
  1952. FsStatFs(
  1953. PVOID fshdl,
  1954. fs_attr_t* attr
  1955. )
  1956. {
  1957. DWORD err;
  1958. IO_STATUS_BLOCK ios;
  1959. if (!attr) return ERROR_INVALID_PARAMETER;
  1960. err = SendReadRequest(FspStatFs, (UserInfo_t *) fshdl,
  1961. (PVOID) attr, sizeof(*attr),
  1962. NULL, 0,
  1963. &ios);
  1964. err = ios.Status;
  1965. return RtlNtStatusToDosError(err);
  1966. }
  1967. DWORD
  1968. FsGetRoot(PVOID fshdl, LPWSTR fullpath)
  1969. {
  1970. DWORD err;
  1971. IO_STATUS_BLOCK ios;
  1972. if (!fullpath || !fshdl) return ERROR_INVALID_PARAMETER;
  1973. // use local replica instead
  1974. if ((((UserInfo_t *)fshdl)->VolInfo->LocalPath)) {
  1975. StringCchPrintfW(fullpath, MAX_PATH, L"\\\\?\\%s\\%s",
  1976. (((UserInfo_t *)fshdl)->VolInfo->LocalPath),
  1977. (((UserInfo_t *)fshdl)->VolInfo->Root));
  1978. FsLog(("FspGetRoot '%S'\n", fullpath));
  1979. err = STATUS_SUCCESS;
  1980. } else {
  1981. err = SendReadRequest(FspGetRoot, (UserInfo_t *) fshdl,
  1982. NULL, 0,
  1983. (PVOID)fullpath, 0,
  1984. &ios);
  1985. err = ios.Status;
  1986. }
  1987. return RtlNtStatusToDosError(err);
  1988. }
  1989. UINT32* FsGetFilePointerFromHandle(
  1990. PVOID *fshdl,
  1991. fhandle_t handle
  1992. )
  1993. {
  1994. UserInfo_t* u = (UserInfo_t *) fshdl;
  1995. return FS_GET_USER_HANDLE_OFFSET(u, handle);
  1996. }
  1997. DWORD
  1998. FsConnect(PVOID resHdl, DWORD pid)
  1999. {
  2000. UserInfo_t *u=(UserInfo_t *)resHdl;
  2001. VolInfo_t *vol=u->VolInfo;
  2002. HANDLE pHdl=NULL;
  2003. HANDLE regHdl=NULL;
  2004. DWORD status=ERROR_SUCCESS;
  2005. FsLog(("FsConnect: pid %d\n", pid));
  2006. // Get exclusive lock.
  2007. RwLockExclusive(&vol->Lock);
  2008. if((pHdl = OpenProcess(PROCESS_ALL_ACCESS,
  2009. FALSE,
  2010. pid)) == NULL) {
  2011. status = GetLastError();
  2012. FsLogError(("FsConnect: OpenProcess(%d) returns, %d\n", pid, status));
  2013. goto error_exit;
  2014. }
  2015. if (!RegisterWaitForSingleObject(&regHdl,
  2016. pHdl,
  2017. FsForceClose,
  2018. (PVOID)vol,
  2019. INFINITE,
  2020. WT_EXECUTEONLYONCE|WT_EXECUTEDEFAULT)) {
  2021. status = GetLastError();
  2022. regHdl = NULL;
  2023. FsLogError(("FsConnect: RegisterWaitForSingleObject() returns, %d\n", status));
  2024. goto error_exit;
  2025. }
  2026. error_exit:
  2027. if (status == ERROR_SUCCESS) {
  2028. // Paranoid Check.
  2029. if (vol->ClussvcTerminationHandle != INVALID_HANDLE_VALUE) {
  2030. UnregisterWaitEx(vol->ClussvcTerminationHandle, INVALID_HANDLE_VALUE);
  2031. }
  2032. if (vol->ClussvcProcess != INVALID_HANDLE_VALUE) {
  2033. CloseHandle(vol->ClussvcProcess);
  2034. }
  2035. vol->ClussvcProcess = pHdl;
  2036. vol->ClussvcTerminationHandle = regHdl;
  2037. }
  2038. else {
  2039. if (regHdl != NULL) {
  2040. UnregisterWaitEx(regHdl, INVALID_HANDLE_VALUE);
  2041. }
  2042. if (pHdl != NULL) {
  2043. CloseHandle(pHdl);
  2044. }
  2045. }
  2046. RwUnlockExclusive(&vol->Lock);
  2047. return status;
  2048. }
  2049. static FsDispatchTable gDisp = {
  2050. 0x100,
  2051. FsCreate,
  2052. FsLookup,
  2053. FsSetAttr,
  2054. FsSetAttr2,
  2055. FsGetAttr,
  2056. FsClose,
  2057. FsWrite,
  2058. FsRead,
  2059. FsReadDir,
  2060. FsStatFs,
  2061. FsRemove,
  2062. FsRename,
  2063. FsMkDir,
  2064. FsRemove,
  2065. FsFlush,
  2066. FsLock,
  2067. FsUnlock,
  2068. FsGetRoot,
  2069. FsConnect
  2070. };
  2071. //////////////////////////////////////////////////////////////
  2072. DWORD
  2073. FsInit(PVOID resHdl, PVOID *Hdl)
  2074. {
  2075. DWORD status=ERROR_SUCCESS;
  2076. FsCtx_t *ctx;
  2077. // This should be a compile check instead of runtime check
  2078. ASSERT(sizeof(fs_log_rec_t) == CRS_RECORD_SZ);
  2079. ASSERT(sizeof(fs_log_rec_t) == sizeof(CrsRecord_t));
  2080. if (Hdl == NULL) {
  2081. return ERROR_INVALID_PARAMETER;
  2082. }
  2083. FsLog(("FsInit:\n"));
  2084. // allocate a context
  2085. ctx = (FsCtx_t *) MemAlloc(sizeof(*ctx));
  2086. if (ctx == NULL) {
  2087. return ERROR_NOT_ENOUGH_MEMORY;
  2088. }
  2089. // initialize configuration table and other global state
  2090. memset(ctx, 0, sizeof(*ctx));
  2091. // local path
  2092. // Not needed.
  2093. // ctx->Root = NULL;
  2094. LockInit(ctx->Lock);
  2095. ctx->reshdl = resHdl;
  2096. *Hdl = (PVOID) ctx;
  2097. return status;
  2098. }
  2099. void
  2100. FspFreeSession(SessionInfo_t *s)
  2101. {
  2102. UserInfo_t *u;
  2103. int i, j;
  2104. u = &s->TreeCtx;
  2105. FsLog(("Session free uid %d tid %d ref %d\n", u->Uid, u->Tid, u->RefCnt));
  2106. LockEnter(u->Lock);
  2107. if (u->VolInfo != NULL) {
  2108. UserInfo_t **p;
  2109. VolInfo_t *v = u->VolInfo;
  2110. LockExit(u->Lock);
  2111. // remove from vollist now
  2112. RwLockExclusive(&v->Lock);
  2113. p = &v->UserList;
  2114. while (*p != NULL) {
  2115. if (*p == u) {
  2116. // found it
  2117. *p = u->Next;
  2118. FsLog(("Remove uinfo %x,%x from vol %x %S\n", u, u->Next,
  2119. v->UserList, v->Root));
  2120. break;
  2121. }
  2122. p = &(*p)->Next;
  2123. }
  2124. RwUnlockExclusive(&v->Lock);
  2125. // relock again
  2126. LockEnter(u->Lock);
  2127. }
  2128. // Close all user handles
  2129. for (i = 0; i < FsTableSize; i++) {
  2130. if (u->Table[i].Flags) {
  2131. FsLog(("Close slot %d %x\n", i, u->Table[i].Flags));
  2132. // Cannot call FsClose(), GoingAway Flag might be already set.
  2133. // Close the handles individually.
  2134. // FsClose((PVOID) u, (fhandle_t)i);
  2135. FspFreeHandle(u, (fhandle_t)i);
  2136. }
  2137. }
  2138. // sap volptr
  2139. u->VolInfo = NULL;
  2140. LockExit(u->Lock);
  2141. DeleteCriticalSection(&u->Lock);
  2142. // free memory now, don't free u since it's part of s
  2143. MemFree(s);
  2144. }
  2145. void
  2146. FspCloseVolume(VolInfo_t *vol, ULONG AliveSet)
  2147. {
  2148. DWORD i;
  2149. HANDLE regHdl;
  2150. // Close crs and root handles, by evicting our alive set
  2151. // close nid handles <crs, vol, open files>
  2152. for (i = 0; i < FsMaxNodes; i++) {
  2153. if (AliveSet & (1 << i)) {
  2154. vol->ShareState[i] = SHARE_STATE_OFFLINE;
  2155. if (vol->CrsHdl[i]) {
  2156. CrsClose(vol->CrsHdl[i]);
  2157. vol->CrsHdl[i] = NULL;
  2158. }
  2159. LockEnter(vol->ArbLock);
  2160. regHdl = vol->WaitRegHdl[i];
  2161. vol->WaitRegHdl[i] = INVALID_HANDLE_VALUE;
  2162. LockExit(vol->ArbLock);
  2163. if (regHdl != INVALID_HANDLE_VALUE) {
  2164. UnregisterWaitEx(regHdl, INVALID_HANDLE_VALUE);
  2165. }
  2166. if (vol->NotifyFd[i] != INVALID_HANDLE_VALUE) {
  2167. FindCloseChangeNotification(vol->NotifyFd[i]);
  2168. vol->NotifyFd[i] = INVALID_HANDLE_VALUE;
  2169. }
  2170. if (vol->Fd[i] != INVALID_HANDLE_VALUE) {
  2171. xFsClose(vol->Fd[i]);
  2172. vol->Fd[i] = INVALID_HANDLE_VALUE;
  2173. }
  2174. // need to close all user handles now
  2175. {
  2176. UserInfo_t *u;
  2177. for (u = vol->UserList; u; u = u->Next) {
  2178. DWORD j;
  2179. FsLog(("Lock user %x root %S\n", u, vol->Root));
  2180. LockEnter(u->Lock);
  2181. // close all handles for this node
  2182. for (j = 0; j < FsTableSize; j++) {
  2183. if (u->Table[j].Fd[i] != INVALID_HANDLE_VALUE) {
  2184. FsLog(("Close fid %d\n", j));
  2185. xFsClose(u->Table[j].Fd[i]);
  2186. u->Table[j].Fd[i] = INVALID_HANDLE_VALUE;
  2187. }
  2188. }
  2189. LockExit(u->Lock);
  2190. FsLog(("Unlock user %x\n", u));
  2191. }
  2192. }
  2193. // Close the tree connection handle.
  2194. if (vol->TreeConnHdl[i] != INVALID_HANDLE_VALUE) {
  2195. xFsClose(vol->TreeConnHdl[i]);
  2196. vol->TreeConnHdl[i] = INVALID_HANDLE_VALUE;
  2197. }
  2198. }
  2199. }
  2200. }
  2201. // call this when we are deleting resource and we need to get ride of
  2202. // our IPC reference to directory
  2203. void
  2204. FsEnd(PVOID Hdl)
  2205. {
  2206. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2207. VolInfo_t *p;
  2208. #if 0
  2209. if (!ctx)
  2210. return;
  2211. LockEnter(ctx->Lock);
  2212. p = (VolInfo_t *)ctx->ipcHdl;
  2213. if (p) {
  2214. xFsClose(p->Fd[0]);
  2215. p->Fd[0] = INVALID_HANDLE_VALUE;
  2216. p->ReadSet = 0;
  2217. p->AliveSet = 0;
  2218. }
  2219. LockExit(ctx->Lock);
  2220. #else
  2221. return;
  2222. #endif
  2223. }
  2224. void
  2225. FsExit(PVOID Hdl)
  2226. {
  2227. // flush all state
  2228. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2229. VolInfo_t *p;
  2230. SessionInfo_t *s;
  2231. LogonInfo_t *log;
  2232. LockEnter(ctx->Lock);
  2233. // There shouldn't be any sessions, volumes or logon info right now. If there is
  2234. // Just remove it and log a warning.
  2235. //
  2236. while (s = ctx->SessionList) {
  2237. FsLogError(("FsExit: Active session at exit, Tid=%d Uid=%d\n", s->TreeCtx.Tid, s->TreeCtx.Uid));
  2238. ctx->SessionList = s->Next;
  2239. // free this session now
  2240. FspFreeSession(s);
  2241. }
  2242. while (p = ctx->VolList) {
  2243. ctx->VolList = p->Next;
  2244. ctx->VolListSz--;
  2245. // Unregister this volume. There should not be any here now.
  2246. FsLogError(("FsExit Active volume at exit, Root=%S\n", p->Root));
  2247. RwLockExclusive(&p->Lock);
  2248. FspCloseVolume(p, p->AliveSet);
  2249. RwUnlockExclusive(&p->Lock);
  2250. RwLockDelete(&p->Lock);
  2251. MemFree(p);
  2252. }
  2253. while (log = ctx->LogonList) {
  2254. ctx->LogonList = log->Next;
  2255. FsLogError(("FsExit: Active Logon at exit, Uid=%d\n", log->LogOnId.LowPart));
  2256. // free token
  2257. if (log->Token) {
  2258. CloseHandle(log->Token);
  2259. }
  2260. MemFree(log);
  2261. }
  2262. // now we free our structure
  2263. LockExit(ctx->Lock);
  2264. LockDestroy(ctx->Lock);
  2265. MemFree(ctx);
  2266. }
  2267. // adds a new share to list of trees available
  2268. DWORD
  2269. FsRegister(PVOID Hdl, LPWSTR root, LPWSTR local_path,
  2270. LPWSTR disklist[], DWORD len, DWORD ArbTime, PVOID *vHdl)
  2271. {
  2272. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2273. VolInfo_t *p;
  2274. NTSTATUS status=ERROR_SUCCESS;
  2275. UINT32 disp = FILE_OPEN;
  2276. HANDLE vfd;
  2277. WCHAR path[MAX_PATH];
  2278. DWORD ndx;
  2279. // check limit
  2280. if (len >= FsMaxNodes) {
  2281. return ERROR_TOO_MANY_NAMES;
  2282. }
  2283. if (root == NULL || local_path == NULL || (wcslen(local_path) > (MAX_PATH - 5))) {
  2284. return ERROR_INVALID_PARAMETER;
  2285. }
  2286. // add a new volume to the list of volume. path is an array
  2287. // of directories. Note: The order of this list MUST be the
  2288. // same in all nodes since it also determines the disk id
  2289. // this is a simple check and assume one thread is calling this function
  2290. LockEnter(ctx->Lock);
  2291. // find the volume share
  2292. for (p = ctx->VolList; p != NULL; p = p->Next) {
  2293. if (!wcscmp(root, p->Root)) {
  2294. FsLog(("FsRegister: %S already registered Tid %d\n", root, p->Tid));
  2295. LockExit(ctx->Lock);
  2296. return ERROR_SUCCESS;
  2297. }
  2298. }
  2299. p = (VolInfo_t *)MemAlloc(sizeof(*p));
  2300. if (p == NULL) {
  2301. return ERROR_NOT_ENOUGH_MEMORY;
  2302. }
  2303. memset(p, 0, sizeof(*p));
  2304. // Open the root of our local share. store it in Fd[0].
  2305. StringCchCopyW(path, MAX_PATH, L"\\??\\");
  2306. StringCchCatW(path, (MAX_PATH - wcslen(path)-1), local_path);
  2307. StringCchCatW(path, (MAX_PATH - wcslen(path)-1), L"\\");
  2308. status = xFsCreate(&vfd, NULL, path, wcslen(path),
  2309. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  2310. 0,
  2311. FILE_SHARE_READ|FILE_SHARE_WRITE,
  2312. &disp,
  2313. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  2314. NULL, 0);
  2315. if (status == STATUS_SUCCESS) {
  2316. // our root must have already been created and secured.
  2317. ASSERT(disp != FILE_CREATED);
  2318. p->Fd[0] = vfd;
  2319. } else {
  2320. FsLog(("Fsregister: Failed to open share root %S status=%x\n", path, status));
  2321. LockExit(ctx->Lock);
  2322. MemFree(p);
  2323. return RtlNtStatusToDosError(status);
  2324. }
  2325. RwLockInit(&p->Lock);
  2326. // lock the volume
  2327. RwLockExclusive(&p->Lock);
  2328. p->Tid = (USHORT)++ctx->VolListSz;
  2329. p->Next = ctx->VolList;
  2330. ctx->VolList = p;
  2331. p->FsCtx = ctx;
  2332. LockExit(ctx->Lock);
  2333. p->Label = L"Cluster Quorum";
  2334. p->State = VolumeStateInit;
  2335. p->Root = root;
  2336. p->LocalPath = local_path;
  2337. p->ArbTime = ArbTime;
  2338. if (disklist) {
  2339. for (ndx = 1; ndx < FsMaxNodes; ndx++) {
  2340. p->DiskList[ndx] = disklist[ndx];
  2341. }
  2342. }
  2343. p->DiskListSz = len;
  2344. LockInit(p->ArbLock);
  2345. p->AllArbsCompleteEvent = CreateEvent(NULL, TRUE, TRUE, NULL);
  2346. p->NumArbsInProgress = 0;
  2347. p->GoingAway = FALSE;
  2348. // Initialize all handles to INVALID_HANDLE_VALUE.
  2349. for (ndx=0;ndx<FsMaxNodes;ndx++) {
  2350. p->Fd[ndx] = INVALID_HANDLE_VALUE;
  2351. p->NotifyFd[ndx] = INVALID_HANDLE_VALUE;
  2352. p->TreeConnHdl[ndx] = INVALID_HANDLE_VALUE;
  2353. p->WaitRegHdl[ndx] = INVALID_HANDLE_VALUE;
  2354. p->NotifyChangeEvent[ndx] = CreateEventW(NULL, FALSE, FALSE, NULL);
  2355. if (p->NotifyChangeEvent[ndx] == NULL) {
  2356. status = GetLastError();
  2357. break;
  2358. }
  2359. }
  2360. // This handles would be valid only after connect.
  2361. p->ClussvcTerminationHandle = INVALID_HANDLE_VALUE;
  2362. p->ClussvcProcess = INVALID_HANDLE_VALUE;
  2363. FsLog(("FsRegister Tid %d Share '%S' %d disks\n", p->Tid, root, len));
  2364. // drop the volume lock
  2365. RwUnlockExclusive(&p->Lock);
  2366. *vHdl = (PVOID) p;
  2367. if ((status != ERROR_SUCCESS) && p) {
  2368. if (p->AllArbsCompleteEvent) {
  2369. CloseHandle(p->AllArbsCompleteEvent);
  2370. }
  2371. if (p->Fd[0] != INVALID_HANDLE_VALUE) {
  2372. CloseHandle(p->Fd[0]);
  2373. }
  2374. for(ndx=0;ndx<FsMaxNodes;ndx++) {
  2375. if (p->NotifyChangeEvent[ndx] != NULL) {
  2376. CloseHandle(p->NotifyChangeEvent[ndx]);
  2377. }
  2378. }
  2379. RwLockDelete(&p->Lock);
  2380. MemFree(p);
  2381. }
  2382. return status;
  2383. }
  2384. SessionInfo_t *
  2385. FspAllocateSession()
  2386. {
  2387. SessionInfo_t *s;
  2388. UserInfo_t *u;
  2389. int i;
  2390. // add user to our tree and initialize handle tables
  2391. s = (SessionInfo_t *)MemAlloc(sizeof(*s));
  2392. if (s != NULL) {
  2393. memset(s, 0, sizeof(*s));
  2394. u = &s->TreeCtx;
  2395. LockInit(u->Lock);
  2396. // init handle table
  2397. for (i = 0; i < FsTableSize; i++) {
  2398. int j;
  2399. for (j = 0; j < FsMaxNodes; j++) {
  2400. u->Table[i].Fd[j] = INVALID_HANDLE_VALUE;
  2401. }
  2402. u->Table[i].hState = HandleStateInit;
  2403. }
  2404. }
  2405. return s;
  2406. }
  2407. // binds a session to a specific tree/share
  2408. DWORD
  2409. FsMount(PVOID Hdl, LPWSTR root_name, USHORT uid, USHORT *tid)
  2410. {
  2411. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2412. SessionInfo_t *s = NULL, *ns;
  2413. VolInfo_t *p;
  2414. DWORD err = ERROR_SUCCESS;
  2415. *tid = 0;
  2416. // allocate new ns
  2417. ns = FspAllocateSession();
  2418. if (ns == NULL) {
  2419. return ERROR_NOT_ENOUGH_MEMORY;
  2420. }
  2421. LockEnter(ctx->Lock);
  2422. // locate share
  2423. for (p = ctx->VolList; p != NULL; p = p->Next) {
  2424. if (!ClRtlStrICmp(root_name, p->Root)) {
  2425. FsLog(("Mount share '%S' tid %d\n", p->Root, p->Tid));
  2426. break;
  2427. }
  2428. }
  2429. if (p != NULL) {
  2430. *tid = p->Tid;
  2431. for (s = ctx->SessionList; s != NULL; s = s->Next) {
  2432. if (s->TreeCtx.Uid == uid && s->TreeCtx.Tid == p->Tid) {
  2433. break;
  2434. }
  2435. }
  2436. if (s == NULL) {
  2437. UserInfo_t *u = &ns->TreeCtx;
  2438. // insert into session list
  2439. ns->Next = ctx->SessionList;
  2440. ctx->SessionList = ns;
  2441. FsLog(("Bind uid %d -> tid %d <%x,%x>\n", uid, p->Tid,
  2442. u, p->UserList));
  2443. u->RefCnt++;
  2444. u->Uid = uid;
  2445. u->Tid = p->Tid;
  2446. u->VolInfo = p;
  2447. // insert user_info into volume list
  2448. RwLockExclusive(&p->Lock);
  2449. FsLog(("Add <%x,%x>\n", u, p->UserList));
  2450. u->Next = p->UserList;
  2451. p->UserList = u;
  2452. RwUnlockExclusive(&p->Lock);
  2453. } else {
  2454. // we already have this session opened, increment refcnt
  2455. s->TreeCtx.RefCnt++;
  2456. // free ns
  2457. MemFree(ns);
  2458. }
  2459. } else {
  2460. err = ERROR_BAD_NET_NAME;
  2461. }
  2462. LockExit(ctx->Lock);
  2463. return (err);
  2464. }
  2465. // This function is also a CloseSession
  2466. void
  2467. FsDisMount(PVOID Hdl, USHORT uid, USHORT tid)
  2468. {
  2469. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2470. SessionInfo_t *s, **last;
  2471. // lookup tree and close all user handles
  2472. s = NULL;
  2473. LockEnter(ctx->Lock);
  2474. last = &ctx->SessionList;
  2475. while (*last != NULL) {
  2476. UserInfo_t *u = &(*last)->TreeCtx;
  2477. if (u->Uid == uid && u->Tid == tid) {
  2478. ASSERT(u->RefCnt > 0);
  2479. u->RefCnt--;
  2480. if (u->RefCnt == 0) {
  2481. FsLog(("Dismount uid %d tid %d <%x,%x>\n", uid, tid,
  2482. u, *last));
  2483. s = *last;
  2484. *last = s->Next;
  2485. }
  2486. break;
  2487. }
  2488. last = &(*last)->Next;
  2489. }
  2490. LockExit(ctx->Lock);
  2491. if (s != NULL) {
  2492. FspFreeSession(s);
  2493. }
  2494. }
  2495. // todo: I am not using the token for now, but need to use it for all
  2496. // io operations
  2497. DWORD
  2498. FsLogonUser(PVOID Hdl, HANDLE token, LUID logonid, USHORT *uid)
  2499. {
  2500. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2501. LogonInfo_t *s;
  2502. int i;
  2503. // add user to our tree and initialize handle tables
  2504. s = (LogonInfo_t *)MemAlloc(sizeof(*s));
  2505. if (s == NULL) {
  2506. return ERROR_NOT_ENOUGH_MEMORY;
  2507. }
  2508. memset(s, 0, sizeof(*s));
  2509. s->Token = token;
  2510. s->LogOnId = logonid;
  2511. LockEnter(ctx->Lock);
  2512. s->Next = ctx->LogonList;
  2513. ctx->LogonList = s;
  2514. LockExit(ctx->Lock);
  2515. *uid = (USHORT) logonid.LowPart;
  2516. FsLog(("Logon %d,%d, uid %d\n", logonid.HighPart, logonid.LowPart, *uid));
  2517. return (ERROR_SUCCESS);
  2518. }
  2519. void
  2520. FsLogoffUser(PVOID Hdl, LUID logonid)
  2521. {
  2522. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2523. LogonInfo_t *s, **pps;
  2524. USHORT uid;
  2525. LockEnter(ctx->Lock);
  2526. for (s = ctx->LogonList, pps=&ctx->LogonList; s != NULL; s = s->Next) {
  2527. if (s->LogOnId.LowPart == logonid.LowPart &&
  2528. s->LogOnId.HighPart == logonid.HighPart) {
  2529. uid = (USHORT) logonid.LowPart;
  2530. // Remove the logon info.
  2531. *pps = s->Next;
  2532. break;
  2533. }
  2534. pps = &s->Next;
  2535. }
  2536. if (s != NULL) {
  2537. SessionInfo_t **last;
  2538. FsLog(("Logoff user %d\n", uid));
  2539. // Flush all user trees
  2540. last = &ctx->SessionList;
  2541. while (*last != NULL) {
  2542. UserInfo_t *u = &(*last)->TreeCtx;
  2543. if (u->Uid == uid) {
  2544. SessionInfo_t *ss = *last;
  2545. // remove session and free it now
  2546. *last = ss->Next;
  2547. FspFreeSession(ss);
  2548. } else {
  2549. last = &(*last)->Next;
  2550. }
  2551. }
  2552. MemFree(s);
  2553. }
  2554. LockExit(ctx->Lock);
  2555. }
  2556. FsDispatchTable*
  2557. FsGetHandle(PVOID Hdl, USHORT tid, USHORT uid, PVOID *fshdl)
  2558. {
  2559. FsCtx_t *ctx = (FsCtx_t *) Hdl;
  2560. SessionInfo_t *s;
  2561. // locate tid,uid in session list
  2562. LockEnter(ctx->Lock);
  2563. for (s = ctx->SessionList; s != NULL; s = s->Next) {
  2564. if (s->TreeCtx.Uid == uid && s->TreeCtx.Tid == tid) {
  2565. *fshdl = (PVOID *) &s->TreeCtx;
  2566. LockExit(ctx->Lock);
  2567. return &gDisp;
  2568. }
  2569. }
  2570. LockExit(ctx->Lock);
  2571. *fshdl = NULL;
  2572. return NULL;
  2573. }
  2574. //////////////////////////////////// Arb/Release ///////////////////////////////
  2575. DWORD
  2576. FspOpenReplica(VolInfo_t *p, DWORD id, LPWSTR myAddr, HANDLE *CrsHdl, HANDLE *Fd, HANDLE *notifyFd, HANDLE *WaitRegHdl)
  2577. {
  2578. WCHAR path[MAXPATH];
  2579. UINT32 disp = FILE_OPEN_IF;
  2580. NTSTATUS err=STATUS_SUCCESS;
  2581. // StringCchPrintfW(path, MAXPATH, L"\\\\?\\%s\\crs.log", p->DiskList[id]);
  2582. // Format: \Device\LanmanRedirector\<ip addr>\shareGuid$\crs.log
  2583. //
  2584. StringCchPrintfW(path, MAXPATH, L"%ws\\%ws\\%ws\\crs.log", MNS_REDIRECTOR, myAddr, p->Root);
  2585. err = CrsOpen(FsCrsCallback, (PVOID) p, (USHORT)id,
  2586. path, FsCrsNumSectors,
  2587. CrsHdl);
  2588. if (err == ERROR_SUCCESS && CrsHdl != NULL) {
  2589. // got it
  2590. // open root volume directory
  2591. // StringCchPrintfW(path, MAXPATH, L"\\??\\%s\\%s\\", p->DiskList[id], p->Root);
  2592. // Format: \Device\LanmanRedirector\<ip addr>\shareGuid$\shareGuid$\
  2593. //
  2594. StringCchPrintfW(path, MAXPATH, L"%ws\\%ws\\%ws\\%ws\\", MNS_REDIRECTOR, myAddr, p->Root, p->Root);
  2595. err = xFsCreate(Fd, NULL, path, wcslen(path),
  2596. FILE_DIRECTORY_FILE|FILE_SYNCHRONOUS_IO_ALERT,
  2597. 0,
  2598. FILE_SHARE_READ|FILE_SHARE_WRITE,
  2599. &disp,
  2600. FILE_GENERIC_READ|FILE_GENERIC_WRITE|FILE_GENERIC_EXECUTE,
  2601. NULL, 0);
  2602. if (err == STATUS_SUCCESS) {
  2603. FsArbLog(("Mounted %S\n", path));
  2604. // StringCchPrintfW(path, MAXPATH, L"\\\\?\\%s\\", p->DiskList[id]);
  2605. // Format: \Device\LanmanRedirector\<ip addr>\shareGuid$\
  2606. //
  2607. StringCchPrintfW(path, MAXPATH, L"%ws\\%ws\\%ws\\", MNS_REDIRECTOR, myAddr, p->Root);
  2608. // scan the tree to break any current oplocks on dead nodes
  2609. err = xFsTouchTree(*Fd);
  2610. if (!NT_SUCCESS(err)) {
  2611. CrsClose(*CrsHdl);
  2612. xFsClose(*Fd);
  2613. *CrsHdl = NULL;
  2614. *Fd = INVALID_HANDLE_VALUE;
  2615. return err;
  2616. }
  2617. #if 1
  2618. // Directly use NT api.
  2619. err = xFsOpenEx(notifyFd,
  2620. NULL,
  2621. path,
  2622. wcslen(path),
  2623. (ACCESS_MASK)FILE_LIST_DIRECTORY|SYNCHRONIZE,
  2624. FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE,
  2625. FILE_DIRECTORY_FILE | FILE_OPEN_FOR_BACKUP_INTENT
  2626. );
  2627. if (NT_SUCCESS(err)) {
  2628. err = NtNotifyChangeDirectoryFile(*notifyFd,
  2629. p->NotifyChangeEvent[id],
  2630. NULL,
  2631. NULL,
  2632. &MystaticIoStatusBlock,
  2633. &Mystaticchangebuff,
  2634. sizeof(Mystaticchangebuff),
  2635. FILE_NOTIFY_CHANGE_EA,
  2636. (BOOLEAN)FALSE
  2637. );
  2638. if (!NT_SUCCESS(err)) {
  2639. FindCloseChangeNotification(*notifyFd);
  2640. *notifyFd = INVALID_HANDLE_VALUE;
  2641. }
  2642. }
  2643. #else
  2644. // we now queue notification changes to force srv to contact client
  2645. *notifyFd = FindFirstChangeNotificationW(path, FALSE, FILE_NOTIFY_CHANGE_EA);
  2646. #endif
  2647. FsArbLog(("NtNotifyChangeDirectoryFile(%ws) returns 0x%x FD: %p\n", path, err, *notifyFd));
  2648. // Register wait.
  2649. if (*notifyFd != INVALID_HANDLE_VALUE) {
  2650. p->WaitRegArgs[id].notifyFd = *notifyFd;
  2651. p->WaitRegArgs[id].vol = p;
  2652. p->WaitRegArgs[id].id = id;
  2653. if (!RegisterWaitForSingleObject(WaitRegHdl,
  2654. p->NotifyChangeEvent[id],
  2655. FsNotifyCallback,
  2656. (PVOID)(&p->WaitRegArgs[id]),
  2657. INFINITE,
  2658. WT_EXECUTEINWAITTHREAD)) {
  2659. err = GetLastError();
  2660. FsArbLog(("RegisterWaitForSingleObject(0x%x) returned %d\n", *notifyFd, err));
  2661. FindCloseChangeNotification(*notifyFd);
  2662. *notifyFd = INVALID_HANDLE_VALUE;
  2663. }
  2664. }
  2665. if (*notifyFd != INVALID_HANDLE_VALUE) {
  2666. int i;
  2667. // Since we have a valid file handle, map err to success.
  2668. err = ERROR_SUCCESS;
  2669. // Just register 8 extra notifications. That way if this does not work
  2670. // we would not flood the redirector. 8 since we can have max 8 node
  2671. // cluster in Windows Server 2003.
  2672. //
  2673. for (i = 0; i < 8; i++) {
  2674. #if 1
  2675. NtNotifyChangeDirectoryFile(*notifyFd,
  2676. p->NotifyChangeEvent[id],
  2677. NULL,
  2678. NULL,
  2679. &MystaticIoStatusBlock,
  2680. &Mystaticchangebuff,
  2681. sizeof(Mystaticchangebuff),
  2682. FILE_NOTIFY_CHANGE_EA,
  2683. (BOOLEAN)FALSE
  2684. );
  2685. #else
  2686. FindNextChangeNotification(*notifyFd);
  2687. #endif
  2688. }
  2689. } else {
  2690. FsArbLog(("Failed to register notification %d\n", err));
  2691. xFsClose(*Fd);
  2692. CrsClose(*CrsHdl);
  2693. *CrsHdl = NULL;
  2694. *Fd = INVALID_HANDLE_VALUE;
  2695. }
  2696. } else {
  2697. FsArbLog(("Failed to mount root '%S' %x\n", path, err));
  2698. CrsClose(*CrsHdl);
  2699. *CrsHdl = NULL;
  2700. }
  2701. } else if (err == ERROR_LOCK_VIOLATION || err == ERROR_SHARING_VIOLATION) {
  2702. FsArbLog(("Replica '%S' already locked\n", path));
  2703. } else {
  2704. // FsArbLog(("Replica '%S' probe failed 0x%x\n", path, err));
  2705. }
  2706. // If we successfully arbitrated for the quorum set the Share State field.
  2707. if (err == ERROR_SUCCESS) {
  2708. p->ShareState[id] = SHARE_STATE_ARBITRATED;
  2709. }
  2710. return err;
  2711. }
  2712. typedef struct {
  2713. FspArbitrate_t *arb;
  2714. DWORD id;
  2715. }FspProbeReplicaId_t;
  2716. typedef struct {
  2717. AddrList_t *addrList;
  2718. DWORD addrId;
  2719. }FspProbeAddr_t;
  2720. DWORD WINAPI
  2721. ProbeThread(LPVOID arg)
  2722. {
  2723. FspProbeAddr_t *probe = (FspProbeAddr_t *) arg;
  2724. FspArbitrate_t *arb = probe->addrList->arb;
  2725. DWORD i = probe->addrList->NodeId;
  2726. VolInfo_t *p = arb->vol;
  2727. NTSTATUS status=STATUS_SUCCESS;
  2728. HANDLE crshdl, fshdl, notifyhdl, waitRegHdl, treeConnHdl=INVALID_HANDLE_VALUE;
  2729. WCHAR path[MAX_PATH];
  2730. LPWSTR myAddr=probe->addrList->Addr[probe->addrId];
  2731. // set our priority
  2732. SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
  2733. FsArbLog(("Probe thread for Replica %d Addr %ws\n", i, myAddr));
  2734. while (TRUE) {
  2735. // Open tree connection. This has to be done inside the try loop because
  2736. // it might fail during first attempt.
  2737. if (treeConnHdl == INVALID_HANDLE_VALUE) {
  2738. StringCchPrintfW(path, MAX_PATH, L"%ws\\%ws", myAddr, p->Root);
  2739. status = CreateTreeConnection(path, &treeConnHdl);
  2740. FsArbLog(("CreateTreeConnection(%ws) returned 0x%x hdl 0x%x\n", path, status, treeConnHdl));
  2741. if ((!NT_SUCCESS(status))||(treeConnHdl == INVALID_HANDLE_VALUE)) {
  2742. // set status to something that won't map to a network failure.
  2743. // We need to check for arbitration terminations and other cases below.
  2744. //
  2745. status = ERROR_LOCK_VIOLATION;
  2746. treeConnHdl = INVALID_HANDLE_VALUE;
  2747. goto Retry;
  2748. }
  2749. // Sleep.
  2750. Sleep(MNS_LOCK_DELAY * probe->addrId);
  2751. }
  2752. status = FspOpenReplica(p, i, myAddr, &crshdl, &fshdl, &notifyhdl, &waitRegHdl);
  2753. Retry:
  2754. if (status == ERROR_SUCCESS) {
  2755. EnterCriticalSection(&arb->Lock);
  2756. FsArbLog(("Probe Thread probe replica %d suceeded, ShareSet %x\n", i, (arb->NewAliveSet|(1<<i))));
  2757. arb->CrsHdl[i] = crshdl;
  2758. arb->Fd[i] = fshdl;
  2759. arb->NotifyFd[i] = notifyhdl;
  2760. arb->WaitRegHdl[i] = waitRegHdl;
  2761. arb->TreeConnHdl[i] = treeConnHdl;
  2762. arb->NewAliveSet |= (1 << i);
  2763. arb->Count++;
  2764. if (CRS_QUORUM(arb->Count, arb->DiskListSz)) {
  2765. SetEvent(arb->GotQuorumEvent);
  2766. }
  2767. LeaveCriticalSection(&arb->Lock);
  2768. break;
  2769. }
  2770. else if ((p->ShareState[i] == SHARE_STATE_ARBITRATED)||(p->GoingAway)) {
  2771. // Don't increment the count here, do it in ProbeNodeThread() to prevent.
  2772. // multiple increments.
  2773. //
  2774. // Check for the the go away flag.
  2775. #if 0
  2776. // Some other thread managed to get the share. Consider it to be success.
  2777. EnterCriticalSection(&arb->Lock);
  2778. FsArbLog(("Some other thread managed to win arbitration for the share, consider success.\n"));
  2779. arb->Count++;
  2780. if (CRS_QUORUM(arb->Count, arb->DiskListSz)) {
  2781. SetEvent(arb->GotQuorumEvent);
  2782. }
  2783. LeaveCriticalSection(&arb->Lock);
  2784. #endif
  2785. break;
  2786. }
  2787. else {
  2788. // If arbitration has been cancelled, bail out.
  2789. EnterCriticalSection(&arb->Lock);
  2790. if (arb->State != ARB_STATE_BUSY) {
  2791. LeaveCriticalSection(&arb->Lock);
  2792. break;
  2793. }
  2794. LeaveCriticalSection(&arb->Lock);
  2795. }
  2796. if ((status != ERROR_LOCK_VIOLATION) &&
  2797. (status != ERROR_SHARING_VIOLATION) &&
  2798. IsNetworkFailure(status)) {
  2799. xFsClose(treeConnHdl);
  2800. treeConnHdl = INVALID_HANDLE_VALUE;
  2801. }
  2802. // retry in 5 seconds again
  2803. Sleep(5 * 1000);
  2804. }
  2805. if ((status != STATUS_SUCCESS) && (treeConnHdl != INVALID_HANDLE_VALUE)) {
  2806. xFsClose(treeConnHdl);
  2807. }
  2808. return status;
  2809. }
  2810. DWORD WINAPI
  2811. ProbeNodeThread(LPVOID arg)
  2812. {
  2813. FspProbeReplicaId_t *probe=(FspProbeReplicaId_t *) arg;
  2814. FspArbitrate_t *arb=probe->arb;
  2815. AddrList_t aList;
  2816. NTSTATUS status;
  2817. DWORD ndx;
  2818. HANDLE hdls[MAX_ADDR_NUM];
  2819. FspProbeAddr_t probeAddr[MAX_ADDR_NUM];
  2820. DWORD hdlCount=0;
  2821. RtlZeroMemory(&aList, sizeof(aList));
  2822. aList.arb = probe->arb;
  2823. aList.NodeId = probe->id;
  2824. if ((status = GetTargetNodeAddresses(&aList)) != STATUS_SUCCESS) {
  2825. FsArbLog(("Failed to get node %u ip addresses, status 0x%x\n", probe->id, status));
  2826. return status;
  2827. }
  2828. if (aList.AddrSz == 0) {
  2829. FsArbLog(("Failed to get any target ipaddress, falling back on nodename\n"));
  2830. status = GetNodeName(aList.NodeId, aList.Addr[0]);
  2831. if (status == ERROR_SUCCESS) {
  2832. aList.AddrSz++;
  2833. }
  2834. }
  2835. for (ndx = 0; ndx < aList.AddrSz;ndx++) {
  2836. probeAddr[ndx].addrId = ndx;
  2837. aList.arb = probe->arb;
  2838. probeAddr[ndx].addrList = &aList;
  2839. hdls[ndx] = CreateThread(NULL, 0, ProbeThread, (LPVOID)(&probeAddr[ndx]), 0, NULL);
  2840. ASSERT(hdls[ndx] != NULL);
  2841. }
  2842. // Wait for the threads to complete.
  2843. if (aList.AddrSz) {
  2844. WaitForMultipleObjects(aList.AddrSz, hdls, TRUE, INFINITE);
  2845. }
  2846. // Handle the case where, the probe thread has got the share. The arbitrate threads have
  2847. // exited, but count has not been incremented.
  2848. //
  2849. EnterCriticalSection(&arb->Lock);
  2850. if ((!(arb->NewAliveSet & (1 << probe->id)))&&
  2851. (arb->vol->ShareState[probe->id] == SHARE_STATE_ARBITRATED)) {
  2852. arb->Count++;
  2853. if (CRS_QUORUM(arb->Count, arb->DiskListSz)) {
  2854. SetEvent(arb->GotQuorumEvent);
  2855. }
  2856. }
  2857. LeaveCriticalSection(&arb->Lock);
  2858. // Close all thread handles.
  2859. for (ndx = 0; ndx < aList.AddrSz;ndx++) {
  2860. CloseHandle(hdls[ndx]);
  2861. }
  2862. return 0;
  2863. }
  2864. DWORD WINAPI
  2865. VerifyThread(LPVOID arg)
  2866. /*
  2867. This function is called during arbitration to check the health of my owned shares.
  2868. */
  2869. {
  2870. FspProbeReplicaId_t *probe = (FspProbeReplicaId_t *) arg;
  2871. FspArbitrate_t *arb = probe->arb;
  2872. DWORD i = probe->id;
  2873. VolInfo_t *p = arb->vol;
  2874. ULONG_PTR rlen=0;
  2875. NTSTATUS status;
  2876. // set our priority
  2877. SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
  2878. FsArbLog(("Verify Thread for Replica %d\n", i));
  2879. while(TRUE) {
  2880. status = FspCheckFs(p, NULL, i, NULL, 0, NULL, &rlen);
  2881. if(status == STATUS_SUCCESS) {
  2882. EnterCriticalSection(&arb->Lock);
  2883. FsArbLog(("Verify Thread probe replica %d suceeded, ShareSet %x\n", i, (arb->NewAliveSet|(1<<i))));
  2884. arb->NewAliveSet |= (1<<i);
  2885. arb->Count++;
  2886. if (CRS_QUORUM(arb->Count, arb->DiskListSz)) {
  2887. SetEvent(arb->GotQuorumEvent);
  2888. }
  2889. LeaveCriticalSection(&arb->Lock);
  2890. break;
  2891. }
  2892. else if ((status != ERROR_LOCK_VIOLATION) &&
  2893. (status != ERROR_SHARING_VIOLATION) &&
  2894. IsNetworkFailure(status)) {
  2895. // No need to continue probing after these errors.
  2896. break;
  2897. } else if (p->GoingAway) {
  2898. break;
  2899. }
  2900. else {
  2901. // If arbitration has been cancelled, bail out.
  2902. EnterCriticalSection(&arb->Lock);
  2903. if (arb->State != ARB_STATE_BUSY) {
  2904. LeaveCriticalSection(&arb->Lock);
  2905. break;
  2906. }
  2907. LeaveCriticalSection(&arb->Lock);
  2908. }
  2909. // Sleep for 5 secs.
  2910. Sleep(5 * 1000);
  2911. }
  2912. return 0;
  2913. }
  2914. ULONG
  2915. FspFindMissingReplicas(VolInfo_t *p, ULONG set)
  2916. /*++
  2917. This should be called with exclusive lock held.
  2918. */
  2919. {
  2920. ULONG FoundSet = 0;
  2921. DWORD i, err;
  2922. HANDLE crshdl, fshdl, notifyfd;
  2923. // Just return here. No need to do anything.
  2924. // Trampoline functions would take care of this.
  2925. //if (set == 0)
  2926. return 0;
  2927. #if 0
  2928. for (i = 1; i < FsMaxNodes; i++) {
  2929. if (p->DiskList[i] == NULL)
  2930. continue;
  2931. if (!(set & (1 << i))) {
  2932. err = FspOpenReplica(p, i, &crshdl, &fshdl, &notifyfd);
  2933. if (err == STATUS_SUCCESS) {
  2934. if (p->CrsHdl[i] == NULL) {
  2935. p->NotifyFd[i] = notifyfd;
  2936. p->Fd[i] = fshdl;
  2937. p->CrsHdl[i] = crshdl;
  2938. FoundSet |= (1 << i);
  2939. } else {
  2940. // someone beat us to it, close ours
  2941. CrsClose(crshdl);
  2942. xFsClose(fshdl);
  2943. FindCloseChangeNotification(notifyfd);
  2944. }
  2945. }
  2946. }
  2947. }
  2948. if (FoundSet != 0)
  2949. FsArbLog(("New replica set after probe %x\n", FoundSet));
  2950. return FoundSet;
  2951. #endif
  2952. }
  2953. DWORD WINAPI
  2954. FspArbitrateThread(LPVOID arg)
  2955. {
  2956. FspArbitrate_t *arb = (FspArbitrate_t *)arg;
  2957. HANDLE hdl[FsMaxNodes];
  2958. DWORD i, count = 0, err;
  2959. FspProbeReplicaId_t Ids[FsMaxNodes];
  2960. BOOLEAN flag;
  2961. DWORD count1=0, count2=0;
  2962. IO_STATUS_BLOCK ios[FsMaxNodes];
  2963. FsArbLog(("ArbitrateThread begin\n"));
  2964. // set our priority
  2965. SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
  2966. // Before starting arbitration verify the health of existing shares. That would
  2967. // minimize chances of failure. This would clear up the stale handles.
  2968. //
  2969. FspInitAnswers(ios, NULL, NULL, 0);
  2970. TryAvailRequest(FspCheckFs, arb->vol, NULL, NULL, 0, NULL, 0, ios);
  2971. // Grab the reader's lock.
  2972. EnterCriticalSection(&arb->Lock);
  2973. RwLockShared(&arb->vol->Lock);
  2974. // Now copy rest of the arbitration stuff from volume info.
  2975. arb->OrigAliveSet = arb->vol->AliveSet;
  2976. arb->NewAliveSet = 0;
  2977. arb->Count = 0;
  2978. arb->DiskListSz = arb->vol->DiskListSz;
  2979. FsArbLog(("ArbitrateThread current AliveSet=%x\n", arb->OrigAliveSet));
  2980. // Get the epoch number, any member in the read set would do.
  2981. if (arb->vol->ReadSet) {
  2982. for (i=1;i<FsMaxNodes;i++) {
  2983. if (arb->vol->ReadSet & (1 << i)) {
  2984. arb->epoch = CrsGetEpoch(arb->vol->CrsHdl[i]);
  2985. break;
  2986. }
  2987. }
  2988. }
  2989. else {
  2990. arb->epoch = 0;
  2991. }
  2992. arb->State = ARB_STATE_BUSY;
  2993. LeaveCriticalSection(&arb->Lock);
  2994. // we now start a thread for each replica and do the probe in parallel
  2995. for (i = 1; i < FsMaxNodes; i++) {
  2996. if (arb->vol->DiskList[i] == NULL)
  2997. continue;
  2998. Ids[i].arb = arb;
  2999. Ids[i].id = i;
  3000. if (arb->OrigAliveSet & (1 << i)) {
  3001. hdl[count] = CreateThread(NULL, 0, VerifyThread, (LPVOID)(&Ids[i]), 0, NULL);
  3002. }
  3003. else {
  3004. hdl[count] = CreateThread(NULL, 0, ProbeNodeThread, (LPVOID)(&Ids[i]), 0, NULL);
  3005. }
  3006. if (hdl[count] != NULL) {
  3007. count++;
  3008. } else {
  3009. FsArbLog(("Unable to create thread to probe replica %d\n", i));
  3010. ProbeThread((LPVOID) &Ids[i]);
  3011. }
  3012. }
  3013. // we now wait
  3014. err = WaitForMultipleObjects(count, hdl, TRUE, arb->vol->ArbTime);
  3015. if (err == WAIT_TIMEOUT) {
  3016. EnterCriticalSection(&arb->Lock);
  3017. // Make the arb threads exit, with whatever they have got. Do this only if
  3018. // the main thread hasn't already cancelled the arbitration.
  3019. // Expected state and their implications:
  3020. // 1. ARB_STATE_IDLE ==> Not possible.
  3021. // 2. ARB_STATE_BUSY ==> Either the main thread is still waiting or has already returned
  3022. // with success. In any case make the rest of the arb threads exit.
  3023. // 3. ARB_STATE_CANCEL ==> Main thread has cancelled the arbitration. We should cleanup
  3024. // even if we ultimately get quorum. Arb threads have already
  3025. // begun exiting.
  3026. //
  3027. if (arb->State == ARB_STATE_BUSY) {
  3028. arb->State = ARB_STATE_IDLE;
  3029. }
  3030. LeaveCriticalSection(&arb->Lock);
  3031. WaitForMultipleObjects(count, hdl, TRUE, INFINITE);
  3032. }
  3033. // Close the handles
  3034. for (i = 0; i < count; i++)
  3035. CloseHandle(hdl[i]);
  3036. // Signal the wait Event, if we haven't done so already.
  3037. SetEvent(arb->GotQuorumEvent);
  3038. // Now wait for cleanup event. This signals the fact that the main thread has left.
  3039. WaitForSingleObject(arb->CleanupEvent, INFINITE);
  3040. // If we have been cancelled in between, or arbitration failed.
  3041. // Close the handles we grabbed during arbitration.
  3042. // Then get writers lock and evict the AliveSet.
  3043. EnterCriticalSection(&arb->Lock);
  3044. if ((arb->State == ARB_STATE_CANCEL)||(!CRS_QUORUM(arb->Count, arb->DiskListSz))) {
  3045. for (i=1;i<FsMaxNodes;i++) {
  3046. if ((arb->NewAliveSet & (~arb->OrigAliveSet)) & (1 << i)) {
  3047. CrsClose(arb->CrsHdl[i]);
  3048. UnregisterWaitEx(arb->WaitRegHdl[i], INVALID_HANDLE_VALUE);
  3049. FindCloseChangeNotification(arb->NotifyFd[i]);
  3050. xFsClose(arb->Fd[i]);
  3051. xFsClose(arb->TreeConnHdl[i]);
  3052. arb->vol->ShareState[i] = SHARE_STATE_OFFLINE;
  3053. }
  3054. }
  3055. if (arb->OrigAliveSet) {
  3056. crs_epoch_t newEpoch;
  3057. // Exit Reader's lock, get writer's lock.
  3058. RwUnlockShared(&arb->vol->Lock);
  3059. RwLockExclusive(&arb->vol->Lock);
  3060. // We released the read lock and got write lock, if in between something
  3061. // changed, don't do anything. Check the epoch.
  3062. if (arb->vol->ReadSet) {
  3063. for(i=1;i<FsMaxNodes;i++) {
  3064. if(arb->vol->ReadSet & (1 << i)) {
  3065. newEpoch = CrsGetEpoch(arb->vol->CrsHdl[i]);
  3066. break;
  3067. }
  3068. }
  3069. }
  3070. else {
  3071. newEpoch = 0;
  3072. }
  3073. if (newEpoch == arb->epoch) {
  3074. FspEvict(arb->vol, arb->OrigAliveSet, TRUE);
  3075. }
  3076. RwUnlockExclusive(&arb->vol->Lock);
  3077. }
  3078. else {
  3079. RwUnlockShared(&arb->vol->Lock);
  3080. }
  3081. // Invoke the lost quorum callback.
  3082. // Logic: If the GoingAway flag is already set don't call the lost quorum
  3083. // callback, shutdown is already in progress. If the flag is not set
  3084. // call the lost quorum callback.
  3085. //
  3086. // NOTE: The GoingAway flag should only be set if we call the lost quorum callback.
  3087. // else clussvc might decide to retry arbitration.
  3088. //
  3089. if (arb->vol->GoingAway == FALSE) {
  3090. MajorityNodeSetCallLostquorumCallback(arb->vol->FsCtx->reshdl);
  3091. }
  3092. }
  3093. else {
  3094. // Arbitration suceeded. Get Writer's lock and Join the New shares if any.
  3095. RwUnlockShared(&arb->vol->Lock);
  3096. RwLockExclusive(&arb->vol->Lock);
  3097. // Evict the shares that we had originally but were unable to verify.
  3098. FspEvict(arb->vol, (~arb->NewAliveSet) & arb->OrigAliveSet, TRUE);
  3099. // Now add the new shares.
  3100. for (i=1;i<FsMaxNodes;i++) {
  3101. if ((arb->NewAliveSet & (~arb->OrigAliveSet)) & (1 << i)) {
  3102. if (arb->vol->AliveSet & (1 << i)) {
  3103. CrsClose(arb->CrsHdl[i]);
  3104. UnregisterWaitEx(arb->WaitRegHdl[i], INVALID_HANDLE_VALUE);
  3105. FindCloseChangeNotification(arb->NotifyFd[i]);
  3106. xFsClose(arb->Fd[i]);
  3107. xFsClose(arb->TreeConnHdl[i]);
  3108. }
  3109. else {
  3110. arb->vol->CrsHdl[i] = arb->CrsHdl[i];
  3111. arb->vol->Fd[i] = arb->Fd[i];
  3112. arb->vol->NotifyFd[i] = arb->NotifyFd[i];
  3113. arb->vol->WaitRegHdl[i] = arb->WaitRegHdl[i];
  3114. arb->vol->TreeConnHdl[i] = arb->TreeConnHdl[i];
  3115. }
  3116. }
  3117. }
  3118. FspJoin(arb->vol, arb->NewAliveSet & (~arb->OrigAliveSet));
  3119. // Now the ultimate test, check for quorum. If not there, evict all the shares.
  3120. // The FsReserve thread would make the callback in Resmon.
  3121. // No use trying to signal the main thread it has already gone back.
  3122. // NOTE: This should be a rare scenario. Arbitration was able to grab a majority
  3123. // of shares but was unable to join them, which is odd.
  3124. //
  3125. // [RajDas] 607258, since the reserve thread is working in parallel, it might
  3126. // have grabbed some shares. We should map that case to success.
  3127. // The MNS arbitrating thread however would not return success
  3128. // if arb->count is not majority.
  3129. // The assumption here is, whoever grabbed the shares other than the arbitrating
  3130. // threads would be able to successfully join the shares.
  3131. //
  3132. for (i=1;i<FsMaxNodes;i++) {
  3133. if (arb->NewAliveSet & (1<<i)) {
  3134. count1++;
  3135. }
  3136. if (arb->vol->ReadSet & (1<<i)) {
  3137. count2++;
  3138. }
  3139. }
  3140. if (!CRS_QUORUM((arb->Count - count1 + count2), arb->DiskListSz)) {
  3141. FspEvict(arb->vol, arb->vol->AliveSet, TRUE);
  3142. RwUnlockExclusive(&arb->vol->Lock);
  3143. // Invoke the lost quorum callback.
  3144. // Logic: If the GoingAway flag is already set don't call the lost quorum
  3145. // callback, shutdown is already in progress. If the flag is not set
  3146. // call the lost quorum callback.
  3147. //
  3148. // NOTE: The GoingAway flag should only be set if we call the lost quorum callback.
  3149. // else clussvc might decide to retry arbitration.
  3150. //
  3151. if (arb->vol->GoingAway == FALSE) {
  3152. MajorityNodeSetCallLostquorumCallback(arb->vol->FsCtx->reshdl);
  3153. }
  3154. } else {
  3155. RwUnlockExclusive(&arb->vol->Lock);
  3156. }
  3157. }
  3158. LeaveCriticalSection(&arb->Lock);
  3159. // Signal end of arbitration.
  3160. ArbitrationEnd((PVOID)arb->vol);
  3161. // Now cleanup the fields in arb. and free the structure.
  3162. CloseHandle(arb->CleanupEvent);
  3163. CloseHandle(arb->GotQuorumEvent);
  3164. DeleteCriticalSection(&arb->Lock);
  3165. LocalFree(arb);
  3166. return 0;
  3167. }
  3168. PVOID
  3169. FsArbitrate(PVOID arg, HANDLE *Cleanup, HANDLE *ArbThread)
  3170. /*++
  3171. This routine is reentrant, i.e. it can be called multiple number of times, at the same
  3172. time.
  3173. */
  3174. {
  3175. VolInfo_t *p = (VolInfo_t *)arg;
  3176. DWORD err=ERROR_SUCCESS;
  3177. FspArbitrate_t *arb=NULL;
  3178. if (p) {
  3179. if (!(arb = LocalAlloc(LMEM_ZEROINIT|LMEM_FIXED, sizeof(FspArbitrate_t)))) {
  3180. err = GetLastError();
  3181. FsArbLog(("FsArb: Failed to allocate memory, status=%d\n", err));
  3182. goto error_exit;
  3183. }
  3184. arb->State = ARB_STATE_IDLE;
  3185. arb->vol = p;
  3186. InitializeCriticalSection(&arb->Lock);
  3187. if ((arb->CleanupEvent = CreateEvent(NULL, FALSE, FALSE, NULL)) == NULL) {
  3188. err = GetLastError();
  3189. FsArbLog(("FsArb: Failed to create cleanup event, status=%d\n", err));
  3190. LocalFree(arb);
  3191. goto error_exit;
  3192. }
  3193. if ((arb->GotQuorumEvent = CreateEvent(NULL, FALSE, FALSE, NULL)) == NULL) {
  3194. err = GetLastError();
  3195. FsArbLog(("FsArb: Failed to create notify event, status=%d\n", err));
  3196. CloseHandle(arb->CleanupEvent);
  3197. LocalFree(arb);
  3198. goto error_exit;
  3199. }
  3200. // The rest of the fields in arb comes from the voulme info and should only be
  3201. // accessed while holding the shared lock, let the arbitrate thread do it.
  3202. //
  3203. FsArbLog(("FsArb: Creating arbitration thread\n"));
  3204. #if 0
  3205. // Start the arbitration thread, close the previous handle.
  3206. if (*ArbThread != NULL) {
  3207. CloseHandle(*ArbThread);
  3208. *ArbThread = NULL;
  3209. }
  3210. #endif
  3211. *ArbThread = CreateThread(NULL, 0, FspArbitrateThread, (LPVOID) arb, 0, NULL);
  3212. if (*ArbThread == NULL) {
  3213. err = GetLastError();
  3214. FsLogError(("FsArb: Failed to create arbitration thread status=%d\n", err));
  3215. CloseHandle(arb->CleanupEvent);
  3216. CloseHandle(arb->GotQuorumEvent);
  3217. LocalFree(arb);
  3218. goto error_exit;
  3219. }
  3220. }
  3221. else {
  3222. err = ERROR_INVALID_PARAMETER;
  3223. }
  3224. error_exit:
  3225. if (err != ERROR_SUCCESS) {
  3226. arb = NULL;
  3227. }
  3228. else {
  3229. *Cleanup = arb->CleanupEvent;
  3230. ArbitrationStart((PVOID)p);
  3231. }
  3232. SetLastError(err);
  3233. return (PVOID)arb;
  3234. }
  3235. DWORD
  3236. FsCompleteArbitration(PVOID arg, DWORD delta)
  3237. {
  3238. DWORD err;
  3239. FspArbitrate_t *arb=(FspArbitrate_t *)arg;
  3240. err = WaitForSingleObject(arb->GotQuorumEvent, delta);
  3241. ASSERT((err == WAIT_OBJECT_0)||(err == WAIT_TIMEOUT));
  3242. EnterCriticalSection(&arb->Lock);
  3243. if (CRS_QUORUM(arb->Count, arb->DiskListSz)) {
  3244. err = ERROR_SUCCESS;
  3245. }
  3246. else {
  3247. // Abandon this arbitration. This would make the probe/verify threads to exit.
  3248. arb->State = ARB_STATE_CANCEL;
  3249. err = ERROR_CANCELLED;
  3250. }
  3251. LeaveCriticalSection(&arb->Lock);
  3252. // Set the cleanup event, the arbitrate thread would clean everything up.
  3253. SetEvent(arb->CleanupEvent);
  3254. return err;
  3255. }
  3256. DWORD
  3257. FsRelease(PVOID vHdl)
  3258. /*++
  3259. Check if anybody is using this volume then fail the request.
  3260. */
  3261. {
  3262. DWORD i;
  3263. VolInfo_t *p = (VolInfo_t *)vHdl;
  3264. FsCtx_t *ctx = p->FsCtx;
  3265. NTSTATUS err;
  3266. if (p) {
  3267. ULONG set;
  3268. // lock volume
  3269. ASSERT(ctx != NULL);
  3270. // Grab the FS lock and then grab the vol lock in exclusive mode. This is just to
  3271. // throw away any slackers. There shouldn't be anybody accessing the volume at this
  3272. // moment anyway.
  3273. // Set the flag
  3274. p->GoingAway = TRUE;
  3275. LockEnter(ctx->Lock);
  3276. RwLockExclusive(&p->Lock);
  3277. if (p->UserList) {
  3278. FsArbLog(("FsRelease: Volume with Tid=%d in use by user %d\n", p->Tid, p->UserList->Uid));
  3279. RwUnlockExclusive(&p->Lock);
  3280. LockExit(ctx->Lock);
  3281. return ERROR_BUSY;
  3282. }
  3283. // Evict the Shares.
  3284. set = p->AliveSet;
  3285. FsArbLog(("FsRelease %S AliveSet %x\n", p->Root, set));
  3286. FspEvict(p, p->AliveSet, TRUE);
  3287. FsArbLog(("FsRelease %S done\n", p->Root));
  3288. // unlock volume
  3289. RwUnlockExclusive(&p->Lock);
  3290. RwLockDelete(&p->Lock);
  3291. //Close the root handle.
  3292. xFsClose(p->Fd[0]);
  3293. // Remove this volume from the file system context & free the memory.
  3294. ctx = p->FsCtx;
  3295. if (ctx->VolList == p) {
  3296. ctx->VolList = p->Next;
  3297. ctx->VolListSz--;
  3298. }
  3299. else {
  3300. VolInfo_t *last=ctx->VolList;
  3301. while ((last->Next != p) && last) {
  3302. last = last->Next;
  3303. }
  3304. if (last != NULL) {
  3305. last->Next = p->Next;
  3306. ctx->VolListSz--;
  3307. }
  3308. else {
  3309. FsLogError(("FsRelease: Volume not in FsContext VolumeList Vol root=%S\n", p->Root));
  3310. }
  3311. }
  3312. LockDestroy(p->ArbLock);
  3313. CloseHandle(p->AllArbsCompleteEvent);
  3314. // Deregister the clussvc termination registration.
  3315. if (p->ClussvcTerminationHandle != INVALID_HANDLE_VALUE) {
  3316. UnregisterWaitEx(p->ClussvcTerminationHandle, INVALID_HANDLE_VALUE);
  3317. }
  3318. if (p->ClussvcProcess != INVALID_HANDLE_VALUE) {
  3319. CloseHandle(p->ClussvcProcess);
  3320. }
  3321. for (i=0;i<FsMaxNodes;i++) {
  3322. if (p->NotifyChangeEvent[i] != NULL) {
  3323. CloseHandle(p->NotifyChangeEvent[i]);
  3324. }
  3325. }
  3326. MemFree(p);
  3327. LockExit(ctx->Lock);
  3328. err = ERROR_SUCCESS;
  3329. } else {
  3330. err = ERROR_INVALID_PARAMETER;
  3331. }
  3332. return err;
  3333. }
  3334. VOID
  3335. FsForceClose(
  3336. IN PVOID par,
  3337. IN BOOLEAN isFired
  3338. )
  3339. {
  3340. VolInfo_t *vol=(VolInfo_t *)par;
  3341. DWORD ndx;
  3342. if (vol == NULL) {
  3343. FsLogError(("FsForceClose: Exiting...\n"));
  3344. return;
  3345. }
  3346. FsLogError(("FsForceClose: Force terminating volume 0x%x, root %S, AliveSet 0x%x\n", vol, vol->Root, vol->AliveSet));
  3347. vol->GoingAway = TRUE;
  3348. for(ndx=1;ndx<FsMaxNodes;ndx++) {
  3349. if (vol->AliveSet & (1 << ndx)) {
  3350. CrsForceClose(vol->CrsHdl[ndx]);
  3351. }
  3352. }
  3353. // The rest of the handles need to be closed too.
  3354. // At this point I don't care for locks, clussvc has exited. Close all the
  3355. // user handles ASAP.
  3356. //
  3357. {
  3358. UserInfo_t *user=vol->UserList;
  3359. while (user != NULL) {
  3360. for(ndx=0;ndx<FsTableSize;ndx++) {
  3361. if (user->Table[ndx].hState != HandleStateInit) {
  3362. FspFreeHandle(user, (fhandle_t)ndx);
  3363. }
  3364. }
  3365. user = user->Next;
  3366. }
  3367. }
  3368. }
  3369. DWORD
  3370. FsReserve(PVOID vhdl)
  3371. {
  3372. VolInfo_t *p = (VolInfo_t *)vhdl;
  3373. DWORD err=ERROR_SUCCESS;
  3374. DWORD NewAliveSet;
  3375. PVOID CrsHdl;
  3376. HANDLE Fd;
  3377. HANDLE NotifyFd;
  3378. HANDLE WaitRegHdl;
  3379. HANDLE TreeConnHdl;
  3380. static DWORD LastProbed=1;
  3381. DWORD i, j, ndx;
  3382. IO_STATUS_BLOCK ios[FsMaxNodes];
  3383. DWORD sid;
  3384. AddrList_t nodeAddr;
  3385. // check if there is a new replica online
  3386. // FsLog(("FsReserve: Enter LastProbed=%d AliveSet=%x\n", LastProbed, p->AliveSet));
  3387. if ((p == NULL)||(p->GoingAway)) {
  3388. return ERROR_SHUTDOWN_IN_PROGRESS;
  3389. }
  3390. RwLockShared(&p->Lock);
  3391. // Probe for missing shares, one share at a time. In circular order.
  3392. for(i=1;i<=FsMaxNodes;i++) {
  3393. j = (LastProbed + i)%FsMaxNodes;
  3394. // FsLog(("FsReserve: Debug i=%d LastProbed=%d AliveSet=0x%x\n", j, LastProbed, p->AliveSet));
  3395. if (j == 0) {
  3396. continue;
  3397. }
  3398. if (p->DiskList[j] == NULL) {
  3399. continue;
  3400. }
  3401. if (p->AliveSet & (1 << j)) {
  3402. continue;
  3403. }
  3404. LastProbed = j;
  3405. RtlZeroMemory(&nodeAddr, sizeof(nodeAddr));
  3406. nodeAddr.NodeId = j;
  3407. err = GetTargetNodeAddresses(&nodeAddr);
  3408. if (err != ERROR_SUCCESS) {
  3409. continue;
  3410. }
  3411. // Now try them one by one.
  3412. for (ndx=0;ndx<nodeAddr.AddrSz;ndx++) {
  3413. LPWSTR myAddr=nodeAddr.Addr[ndx];
  3414. WCHAR path[MAX_PATH];
  3415. StringCchPrintfW(path, MAX_PATH, L"%ws\\%ws", myAddr, p->Root);
  3416. err = CreateTreeConnection(path, &TreeConnHdl);
  3417. if (err != STATUS_SUCCESS) {
  3418. continue;
  3419. }
  3420. err = FspOpenReplica(p, j, myAddr, &CrsHdl, &Fd, &NotifyFd, &WaitRegHdl);
  3421. if (err == STATUS_SUCCESS) {
  3422. // Join this replica and exit.
  3423. FsLog(("FsReserve: Got new Replica %d, AliveSet 0x%x, Joining\n", j, p->AliveSet));
  3424. RwUnlockShared(&p->Lock);
  3425. RwLockExclusive(&p->Lock);
  3426. if (p->AliveSet & (1 << j)) {
  3427. // GET OUT!!!!
  3428. FsLogError(("FsReserve: New share already in AliveSet=%x Id=%d\n", p->AliveSet, j));
  3429. CrsClose(CrsHdl);
  3430. xFsClose(Fd);
  3431. UnregisterWaitEx(WaitRegHdl, INVALID_HANDLE_VALUE);
  3432. FindCloseChangeNotification(NotifyFd);
  3433. xFsClose(TreeConnHdl);
  3434. }
  3435. else {
  3436. p->CrsHdl[j] = CrsHdl;
  3437. p->NotifyFd[j] = NotifyFd;
  3438. p->WaitRegHdl[j] = WaitRegHdl;
  3439. p->Fd[j] = Fd;
  3440. p->TreeConnHdl[j] = TreeConnHdl;
  3441. FspJoin(p, (1 << j));
  3442. }
  3443. RwUnlockExclusive(&p->Lock);
  3444. RwLockShared(&p->Lock);
  3445. break;
  3446. } else {
  3447. xFsClose(TreeConnHdl);
  3448. }
  3449. }
  3450. // FsLog(("FsReserve: Probed Replica=%d\n", LastProbed));
  3451. break;
  3452. }
  3453. RwUnlockShared(&p->Lock);
  3454. // check each crs handle to be valid
  3455. FspInitAnswers(ios, NULL, NULL, 0);
  3456. sid = SendAvailRequest(FspCheckFs, p, NULL,
  3457. NULL, 0, NULL, 0, ios);
  3458. err = RtlNtStatusToDosError(ios[sid].Status);
  3459. // Check if the volume is online atleast in readonly mode.
  3460. err = FsIsOnlineReadonly(p);
  3461. return err;
  3462. }
  3463. DWORD
  3464. FsIsOnlineReadWrite(PVOID vHdl)
  3465. {
  3466. VolInfo_t *p = (VolInfo_t *)vHdl;
  3467. DWORD err = ERROR_INVALID_PARAMETER;
  3468. if (p) {
  3469. // Just grab the reader lock & get the state.
  3470. RwLockShared(&p->Lock);
  3471. if (p->State == VolumeStateOnlineReadWrite) {
  3472. err = ERROR_SUCCESS;
  3473. }
  3474. else {
  3475. err = ERROR_RESOURCE_NOT_ONLINE;
  3476. }
  3477. RwUnlockShared(&p->Lock);
  3478. }
  3479. return err;
  3480. }
  3481. DWORD
  3482. FsIsOnlineReadonly(PVOID vHdl)
  3483. {
  3484. VolInfo_t *p = (VolInfo_t *)vHdl;
  3485. DWORD err = ERROR_INVALID_PARAMETER;
  3486. if (p) {
  3487. // Just grab the reader lock & get the state.
  3488. RwLockShared(&p->Lock);
  3489. if ((p->State == VolumeStateOnlineReadWrite)||
  3490. (p->State == VolumeStateOnlineReadonly)) {
  3491. err = ERROR_SUCCESS;
  3492. }
  3493. else {
  3494. err = ERROR_RESOURCE_NOT_ONLINE;
  3495. }
  3496. RwUnlockShared(&p->Lock);
  3497. }
  3498. return err;
  3499. }
  3500. DWORD
  3501. FsUpdateReplicaSet(PVOID vhdl, LPWSTR new_path[], DWORD new_len)
  3502. {
  3503. VolInfo_t *p = (VolInfo_t *)vhdl;
  3504. DWORD err=ERROR_SUCCESS;
  3505. DWORD i, j;
  3506. ULONG evict_mask, add_mask;
  3507. if (p == NULL) {
  3508. return ERROR_INVALID_PARAMETER;
  3509. }
  3510. if (new_len >= FsMaxNodes) {
  3511. return ERROR_TOO_MANY_NAMES;
  3512. }
  3513. RwLockExclusive(&p->Lock);
  3514. // Find which current replicas are in the new set, and keep them
  3515. // We skip the IPC share, since it's local
  3516. evict_mask = 0;
  3517. for (j=1; j < FsMaxNodes; j++) {
  3518. BOOLEAN found;
  3519. if (p->DiskList[j] == NULL)
  3520. continue;
  3521. found = FALSE;
  3522. for (i=1; i < FsMaxNodes; i++) {
  3523. if (new_path[i] != NULL && wcscmp(new_path[i], p->DiskList[j]) == 0) {
  3524. // keep this replica
  3525. found = TRUE;
  3526. break;
  3527. }
  3528. }
  3529. if (found == FALSE) {
  3530. // This replica is evicted from the new set, add to evict set mask
  3531. evict_mask |= (1 << j);
  3532. FsArbLog(("FsUpdateReplicaSet evict replica # %d '%S' set 0x%x\n",
  3533. j, p->DiskList[j], evict_mask));
  3534. }
  3535. }
  3536. // At this point we have all the replicas in the current and new sets. We now need
  3537. // to find replicas that are in the new set but missing from current set.
  3538. add_mask = 0;
  3539. for (i=1; i < FsMaxNodes; i++) {
  3540. BOOLEAN found;
  3541. if (new_path[i] == NULL)
  3542. continue;
  3543. found = FALSE;
  3544. for (j=1; j < FsMaxNodes; j++) {
  3545. if (p->DiskList[j] != NULL && wcscmp(new_path[i], p->DiskList[j]) == 0) {
  3546. // keep this replica
  3547. found = TRUE;
  3548. break;
  3549. }
  3550. }
  3551. if (found == FALSE) {
  3552. add_mask |= (1 << i);
  3553. FsArbLog(("FsUpdateReplicaSet adding replica # %d '%S' set 0x%x\n",
  3554. i, new_path[i], add_mask));
  3555. }
  3556. }
  3557. // we now update our disklist with new disklist
  3558. for (i = 1; i < FsMaxNodes; i++) {
  3559. if ((evict_mask & 1 << i) || (add_mask & (1 << i)))
  3560. FsArbLog(("FsUpdateReplicat %d: %S -> %S\n",
  3561. i, p->DiskList[i], new_path[i]));
  3562. p->DiskList[i] = new_path[i];
  3563. }
  3564. p->DiskListSz = new_len;
  3565. // If we are alive, apply changes
  3566. if (p->WriteSet != 0 || p->ReadSet != 0) {
  3567. // At this point we evict old replicas
  3568. if (evict_mask != 0)
  3569. FspEvict(p, evict_mask, TRUE);
  3570. // check if there is a new replica online
  3571. if (add_mask > 0) {
  3572. ULONG ReplicaSet = 0;
  3573. ReplicaSet = p->AliveSet;
  3574. ReplicaSet = FspFindMissingReplicas(p, ReplicaSet);
  3575. // we found new disks
  3576. if (ReplicaSet > 0) {
  3577. FspJoin(p, ReplicaSet);
  3578. }
  3579. }
  3580. }
  3581. RwUnlockExclusive(&p->Lock);
  3582. return err;
  3583. }
  3584. VOID
  3585. ArbitrationStart(PVOID arg)
  3586. {
  3587. VolInfo_t *vol=(VolInfo_t *)arg;
  3588. if (vol == NULL) {
  3589. return;
  3590. }
  3591. LockEnter(vol->ArbLock);
  3592. vol->NumArbsInProgress++;
  3593. if (vol->NumArbsInProgress==1) {
  3594. ResetEvent(vol->AllArbsCompleteEvent);
  3595. }
  3596. LockExit(vol->ArbLock);
  3597. }
  3598. VOID
  3599. ArbitrationEnd(PVOID arg)
  3600. {
  3601. VolInfo_t *vol=(VolInfo_t *)arg;
  3602. if (vol == NULL) {
  3603. return;
  3604. }
  3605. LockEnter(vol->ArbLock);
  3606. vol->NumArbsInProgress--;
  3607. if (vol->NumArbsInProgress == 0) {
  3608. SetEvent(vol->AllArbsCompleteEvent);
  3609. }
  3610. LockExit(vol->ArbLock);
  3611. }
  3612. VOID
  3613. WaitForArbCompletion(PVOID arg)
  3614. {
  3615. VolInfo_t *vol=(VolInfo_t *)arg;
  3616. if (vol == NULL) {
  3617. return;
  3618. }
  3619. WaitForSingleObject(vol->AllArbsCompleteEvent, INFINITE);
  3620. }
  3621. BOOL
  3622. IsArbInProgress(PVOID arg)
  3623. {
  3624. VolInfo_t *vol=(VolInfo_t *)arg;
  3625. BOOL ret=FALSE;
  3626. if (vol == NULL) {
  3627. return ret;
  3628. }
  3629. LockEnter(vol->ArbLock);
  3630. ret = (vol->NumArbsInProgress > 0);
  3631. LockExit(vol->ArbLock);
  3632. return ret;
  3633. }
  3634. NTSTATUS
  3635. CreateTreeConnection(LPWSTR path, HANDLE *Fd)
  3636. {
  3637. NTSTATUS status=STATUS_SUCCESS;
  3638. IO_STATUS_BLOCK ioStatus;
  3639. UNICODE_STRING uStr;
  3640. OBJECT_ATTRIBUTES objAttr;
  3641. PFILE_FULL_EA_INFORMATION EaBuffer=NULL, Ea=NULL;
  3642. USHORT TransportNameSize=0;
  3643. ULONG EaBufferSize=0;
  3644. UCHAR EaNameTransportNameSize;
  3645. WCHAR lPath[MAX_PATH];
  3646. EaNameTransportNameSize = (UCHAR) (ROUND_UP_COUNT(
  3647. strlen(EA_NAME_TRANSPORT) + sizeof(CHAR),
  3648. ALIGN_WCHAR
  3649. ) - sizeof(CHAR));
  3650. TransportNameSize = (USHORT)(wcslen(MNS_TRANSPORT) * sizeof(WCHAR));
  3651. EaBufferSize += ROUND_UP_COUNT(
  3652. FIELD_OFFSET(FILE_FULL_EA_INFORMATION, EaName[0]) +
  3653. EaNameTransportNameSize + sizeof(CHAR) +
  3654. TransportNameSize,
  3655. ALIGN_DWORD
  3656. );
  3657. EaBuffer = LocalAlloc(LMEM_FIXED|LMEM_ZEROINIT, EaBufferSize);
  3658. if (EaBuffer == NULL) {
  3659. status = STATUS_NO_MEMORY;
  3660. goto error_exit;
  3661. }
  3662. Ea = EaBuffer;
  3663. StringCbCopyA(Ea->EaName, EaBufferSize, EA_NAME_TRANSPORT);
  3664. Ea->EaNameLength = EaNameTransportNameSize;
  3665. StringCbCopyW(
  3666. (LPWSTR) &(Ea->EaName[EaNameTransportNameSize + sizeof(CHAR)]),
  3667. EaBufferSize,
  3668. MNS_TRANSPORT
  3669. );
  3670. Ea->EaValueLength = TransportNameSize;
  3671. Ea->Flags = 0;
  3672. Ea->NextEntryOffset = 0;
  3673. // Remove back slashes at the start of the path. <dest ip addr>\shareGuid$
  3674. while (*path == L'\\') {
  3675. path++;
  3676. }
  3677. status = StringCchPrintfW(lPath, MAX_PATH, L"%ws\\%ws", MNS_REDIRECTOR, path);
  3678. if (status != S_OK) {
  3679. goto error_exit;
  3680. }
  3681. uStr.Buffer = lPath;
  3682. uStr.Length = (USHORT)(wcslen(lPath) * sizeof(WCHAR));
  3683. uStr.MaximumLength = MAX_PATH * sizeof(WCHAR);
  3684. InitializeObjectAttributes(&objAttr, &uStr, OBJ_CASE_INSENSITIVE, NULL, NULL);
  3685. *Fd = INVALID_HANDLE_VALUE;
  3686. status = NtCreateFile(
  3687. Fd,
  3688. SYNCHRONIZE|FILE_READ_DATA|FILE_WRITE_DATA,
  3689. &objAttr,
  3690. &ioStatus,
  3691. 0,
  3692. FILE_ATTRIBUTE_NORMAL,
  3693. FILE_SHARE_READ|FILE_SHARE_WRITE,
  3694. FILE_OPEN,
  3695. FILE_SYNCHRONOUS_IO_ALERT|FILE_CREATE_TREE_CONNECTION,
  3696. EaBuffer,
  3697. EaBufferSize
  3698. );
  3699. error_exit:
  3700. if (NT_SUCCESS(status)) {
  3701. status = STATUS_SUCCESS;
  3702. }
  3703. else {
  3704. *Fd = INVALID_HANDLE_VALUE;
  3705. }
  3706. if (EaBuffer) {
  3707. LocalFree(EaBuffer);
  3708. }
  3709. return status;
  3710. }
  3711. DWORD
  3712. IsNodeConnected(HKEY hClusKey, LPWSTR netName, DWORD nid, BOOL *isConnected)
  3713. {
  3714. DWORD status=ERROR_SUCCESS;
  3715. HKEY hIntfsKey=NULL, hIntfKey=NULL;
  3716. WCHAR intName[MAX_PATH], netName1[MAX_PATH], nodeId[20];
  3717. FILETIME fileTime;
  3718. DWORD size, type;
  3719. DWORD ndx;
  3720. LONG tnid;
  3721. *isConnected = FALSE;
  3722. status = RegOpenKeyExW(hClusKey, CLUSREG_KEYNAME_NETINTERFACES, 0, KEY_READ, &hIntfsKey);
  3723. if (status != ERROR_SUCCESS) {
  3724. goto error_exit;
  3725. }
  3726. for (ndx=0;TRUE;ndx++) {
  3727. size = MAX_PATH;
  3728. status = RegEnumKeyExW(hIntfsKey, ndx, intName, &size, NULL, NULL, 0, &fileTime);
  3729. if (status != ERROR_SUCCESS) {
  3730. break;
  3731. }
  3732. status = RegOpenKeyExW(hIntfsKey, intName, 0, KEY_READ, &hIntfKey);
  3733. if (status != ERROR_SUCCESS) {
  3734. break;
  3735. }
  3736. size = MAX_PATH;
  3737. status = RegQueryValueExW(hIntfKey, CLUSREG_NAME_NETIFACE_NETWORK, NULL, &type, (LPBYTE)netName1, &size);
  3738. if (status != ERROR_SUCCESS) {
  3739. break;
  3740. }
  3741. if (wcscmp(netName, netName1)) {
  3742. // Wrong network, Close interface key and continue.
  3743. RegCloseKey(hIntfKey);
  3744. hIntfKey = NULL;
  3745. continue;
  3746. }
  3747. size = 20;
  3748. status = RegQueryValueExW(hIntfKey, CLUSREG_NAME_NETIFACE_NODE, NULL, &type, (LPBYTE)nodeId, &size);
  3749. if (status != ERROR_SUCCESS) {
  3750. break;
  3751. }
  3752. tnid = wcstol(nodeId, NULL, 10);
  3753. if (tnid != nid) {
  3754. // Wrong node, close interface key and continue.
  3755. RegCloseKey(hIntfKey);
  3756. hIntfKey = NULL;
  3757. continue;
  3758. }
  3759. // The node is connected.
  3760. *isConnected = TRUE;
  3761. break;
  3762. }
  3763. // This is the only expected error.
  3764. if (status == ERROR_NO_MORE_ITEMS) {
  3765. status = ERROR_SUCCESS;
  3766. }
  3767. error_exit:
  3768. if (hIntfKey) {
  3769. RegCloseKey(hIntfKey);
  3770. }
  3771. if (hIntfsKey) {
  3772. RegCloseKey(hIntfsKey);
  3773. }
  3774. return status;
  3775. }
  3776. DWORD
  3777. GetLocalNodeId(HKEY hClusKey)
  3778. {
  3779. WCHAR nodeName[MAX_PATH], nodeId[MAX_PATH], cName[MAX_PATH];
  3780. DWORD ndx;
  3781. HKEY hNodesKey=NULL, hNodeKey=NULL;
  3782. DWORD nId=0, size, type;
  3783. DWORD status=ERROR_SUCCESS;
  3784. FILETIME fileTime;
  3785. status = RegOpenKeyExW(hClusKey, CLUSREG_KEYNAME_NODES, 0, KEY_READ, &hNodesKey);
  3786. if (status != ERROR_SUCCESS) {
  3787. goto error_exit;
  3788. }
  3789. size = MAX_PATH;
  3790. if (!GetComputerNameW(cName, &size)) {
  3791. status = GetLastError();
  3792. goto error_exit;
  3793. }
  3794. for (ndx=0;TRUE;ndx++) {
  3795. size = MAX_PATH;
  3796. status = RegEnumKeyExW(hNodesKey, ndx, nodeId, &size, NULL, NULL, 0, &fileTime);
  3797. if (status != ERROR_SUCCESS) {
  3798. break;
  3799. }
  3800. status = RegOpenKeyExW(hNodesKey, nodeId, 0, KEY_READ, &hNodeKey);
  3801. if (status != ERROR_SUCCESS) {
  3802. break;
  3803. }
  3804. size = MAX_PATH;
  3805. status = RegQueryValueExW(hNodeKey, CLUSREG_NAME_NODE_NAME, NULL, &type, (LPBYTE)nodeName, &size);
  3806. if (status != ERROR_SUCCESS) {
  3807. break;
  3808. }
  3809. if (wcscmp(cName, nodeName)) {
  3810. RegCloseKey(hNodeKey);
  3811. hNodeKey = NULL;
  3812. continue;
  3813. }
  3814. // Match.
  3815. nId = wcstol(nodeId, NULL, 10);
  3816. break;
  3817. }
  3818. error_exit:
  3819. if (hNodeKey) {
  3820. RegCloseKey(hNodeKey);
  3821. }
  3822. if (hNodesKey) {
  3823. RegCloseKey(hNodesKey);
  3824. }
  3825. SetLastError(status);
  3826. return nId;
  3827. }
  3828. DWORD
  3829. GetNodeName(DWORD nodeId, LPWSTR nodeName)
  3830. {
  3831. WCHAR nName[MAX_PATH], nId[MAX_PATH];
  3832. DWORD status=ERROR_SUCCESS;
  3833. HKEY hNodesKey=NULL, hNodeKey=NULL, hClusKey=NULL;
  3834. DWORD size, type, ndx, id;
  3835. FILETIME fileTime;
  3836. if ((status = RegOpenKeyExW(HKEY_LOCAL_MACHINE, CLUSREG_KEYNAME_CLUSTER, 0, KEY_READ, &hClusKey)) != ERROR_SUCCESS) {
  3837. goto error_exit;
  3838. }
  3839. status = RegOpenKeyExW(hClusKey, CLUSREG_KEYNAME_NODES, 0, KEY_READ, &hNodesKey);
  3840. if (status != ERROR_SUCCESS) {
  3841. goto error_exit;
  3842. }
  3843. for (ndx=0;TRUE;ndx++) {
  3844. size = MAX_PATH;
  3845. status = RegEnumKeyExW(hNodesKey, ndx, nId, &size, NULL, NULL, 0, &fileTime);
  3846. if (status != ERROR_SUCCESS) {
  3847. break;
  3848. }
  3849. id = wcstol(nId, NULL, 10);
  3850. if (id != nodeId) {
  3851. // Wrong node
  3852. continue;
  3853. }
  3854. status = RegOpenKeyExW(hNodesKey, nId, 0, KEY_READ, &hNodeKey);
  3855. if (status != ERROR_SUCCESS) {
  3856. break;
  3857. }
  3858. size = MAX_PATH;
  3859. status = RegQueryValueExW(hNodeKey, CLUSREG_NAME_NODE_NAME, NULL, &type, (LPBYTE)nName, &size);
  3860. if (status != ERROR_SUCCESS) {
  3861. break;
  3862. }
  3863. // This is a bit of cheating. I know nodeName is of size MAX_PATH.
  3864. StringCchCopyW(nodeName, MAX_PATH, nName);
  3865. break;
  3866. }
  3867. error_exit:
  3868. if (hNodeKey) {
  3869. RegCloseKey(hNodeKey);
  3870. }
  3871. if (hNodesKey) {
  3872. RegCloseKey(hNodesKey);
  3873. }
  3874. if (hClusKey) {
  3875. RegCloseKey(hClusKey);
  3876. }
  3877. return status;
  3878. }
  3879. DWORD
  3880. GetTargetNodeAddresses(AddrList_t *addrList)
  3881. {
  3882. ULONG lid, tnid;
  3883. LPWSTR networkGuids[MAX_ADDR_NUM];
  3884. DWORD ndx, ndx1, size, type, role, pri;
  3885. DWORD status=ERROR_SUCCESS;
  3886. // HCLUSTER hCluster=NULL;
  3887. HKEY hClusKey=NULL;
  3888. HKEY hNetsKey=NULL, hNetKey=NULL;
  3889. HKEY hIntfsKey=NULL, hIntfKey=NULL;
  3890. FILETIME fileTime;
  3891. WCHAR netName[MAX_PATH], intfName[MAX_PATH], nodeId[20], intAddr[MAX_ADDR_SIZE];
  3892. BOOL isConnected;
  3893. for (ndx=0;ndx<MAX_ADDR_NUM;ndx++) {
  3894. networkGuids[ndx] = NULL;
  3895. }
  3896. #if 0
  3897. // get the local node id.
  3898. ndx = 20;
  3899. if ((status = GetClusterNodeId(NULL, nodeId, &ndx)) != ERROR_SUCCESS) {
  3900. goto error_exit;
  3901. }
  3902. lid = wcstol(nodeId, NULL, 10);
  3903. #endif
  3904. // Enumearte all the networks and put the guids in the array according to their
  3905. // priorities. Remove the networks which are for client access only or ones to which
  3906. // the local node is not directly connected.
  3907. //
  3908. #if 0
  3909. if ((hCluster = OpenCluster(NULL)) == NULL) {
  3910. status = GetLastError();
  3911. goto error_exit;
  3912. }
  3913. #endif
  3914. if ((status = RegOpenKeyExW(HKEY_LOCAL_MACHINE, CLUSREG_KEYNAME_CLUSTER, 0, KEY_READ, &hClusKey)) != ERROR_SUCCESS) {
  3915. goto error_exit;
  3916. }
  3917. if ((lid = GetLocalNodeId(hClusKey)) == 0) {
  3918. status = GetLastError();
  3919. goto error_exit;
  3920. }
  3921. status = RegOpenKeyExW(hClusKey, CLUSREG_KEYNAME_NETWORKS, 0, KEY_READ, &hNetsKey);
  3922. if (status != ERROR_SUCCESS) {
  3923. goto error_exit;
  3924. }
  3925. for (ndx = 0;TRUE;ndx++) {
  3926. size = MAX_PATH;
  3927. status = RegEnumKeyExW(hNetsKey, ndx, netName, &size, NULL, NULL, 0, &fileTime);
  3928. if (status != ERROR_SUCCESS) {
  3929. break;
  3930. }
  3931. // Open the network GUID.
  3932. status = RegOpenKeyExW(hNetsKey, netName, 0, KEY_READ, &hNetKey);
  3933. if (status != ERROR_SUCCESS) {
  3934. break;
  3935. }
  3936. // Check that the network is for internal access.
  3937. size = sizeof(DWORD);
  3938. status = RegQueryValueExW(hNetKey, CLUSREG_NAME_NET_ROLE, NULL, &type, (LPBYTE)&role, &size);
  3939. if (status != ERROR_SUCCESS) {
  3940. break;
  3941. }
  3942. if (!(role & ClusterNetworkRoleInternalUse)) {
  3943. RegCloseKey(hNetKey);
  3944. hNetKey = NULL;
  3945. continue;
  3946. }
  3947. // Now check that the local node is connected to the network.
  3948. status = IsNodeConnected(hClusKey, netName, lid, &isConnected);
  3949. if (status != ERROR_SUCCESS) {
  3950. break;
  3951. }
  3952. if (!isConnected) {
  3953. RegCloseKey(hNetKey);
  3954. hNetKey = NULL;
  3955. continue;
  3956. }
  3957. // Query the network priority.
  3958. size = sizeof(DWORD);
  3959. status = RegQueryValueExW(hNetKey, CLUSREG_NAME_NET_PRIORITY, NULL, &type, (LPBYTE)&pri, &size);
  3960. if (status != ERROR_SUCCESS) {
  3961. break;
  3962. }
  3963. // Only consider networks with priorities 0<->(MAX_ADDR_NUM-1) included.
  3964. if (pri >= MAX_ADDR_NUM) {
  3965. RegCloseKey(hNetKey);
  3966. hNetKey = NULL;
  3967. continue;
  3968. }
  3969. size = (wcslen(netName) + 1) * sizeof(WCHAR);
  3970. networkGuids[pri] = HeapAlloc(GetProcessHeap(), 0, size);
  3971. if (networkGuids[pri] == NULL) {
  3972. status = GetLastError();
  3973. break;
  3974. }
  3975. status = StringCbCopyW(networkGuids[pri], size, netName);
  3976. if (status != S_OK) {
  3977. break;
  3978. }
  3979. RegCloseKey(hNetKey);
  3980. hNetKey = NULL;
  3981. }
  3982. // These are the only 2 exit conditions tolerated.
  3983. if ((status != ERROR_SUCCESS)&&(status != ERROR_NO_MORE_ITEMS)) {
  3984. goto error_exit;
  3985. }
  3986. status = ERROR_SUCCESS;
  3987. // Now enumerate the interfaces and get the ip addresses of the target node corresponding
  3988. // to the networks.
  3989. status = RegOpenKeyExW(hClusKey, CLUSREG_KEYNAME_NETINTERFACES, 0, KEY_READ, &hIntfsKey);
  3990. if (status != ERROR_SUCCESS) {
  3991. goto error_exit;
  3992. }
  3993. for (ndx1=0;ndx1<MAX_ADDR_NUM;ndx1++) {
  3994. if (networkGuids[ndx1] == NULL) {
  3995. continue;
  3996. }
  3997. for (ndx=0;TRUE;ndx++) {
  3998. size = MAX_PATH;
  3999. status = RegEnumKeyExW(hIntfsKey, ndx, intfName, &size, NULL, NULL, 0, &fileTime);
  4000. if (status != ERROR_SUCCESS) {
  4001. break;
  4002. }
  4003. status = RegOpenKeyExW(hIntfsKey, intfName, 0, KEY_READ, &hIntfKey);
  4004. if (status != ERROR_SUCCESS) {
  4005. break;
  4006. }
  4007. size = MAX_PATH;
  4008. status = RegQueryValueExW(hIntfKey, CLUSREG_NAME_NETIFACE_NETWORK, NULL, &type, (LPBYTE)netName, &size);
  4009. if (status != ERROR_SUCCESS) {
  4010. break;
  4011. }
  4012. if (wcscmp(netName, networkGuids[ndx1])) {
  4013. // Wrong network, close key and continue.
  4014. RegCloseKey(hIntfKey);
  4015. hIntfKey = NULL;
  4016. continue;
  4017. }
  4018. size = 20;
  4019. status = RegQueryValueExW(hIntfKey, CLUSREG_NAME_NETIFACE_NODE, NULL, &type, (LPBYTE)nodeId, &size);
  4020. if (status != ERROR_SUCCESS) {
  4021. break;
  4022. }
  4023. tnid = wcstol(nodeId, NULL, 10);
  4024. // If wrong target node, or I have already go MAX_ADDR_NUM addresses to the target,
  4025. // don't bother.
  4026. if ((tnid != addrList->NodeId)||(addrList->AddrSz >= MAX_ADDR_NUM)) {
  4027. // Wrong node or max target addr reached, close key and continue.
  4028. RegCloseKey(hIntfKey);
  4029. hIntfKey = NULL;
  4030. continue;
  4031. }
  4032. // Copy the ipaddress from the network interface key to the addrlist.
  4033. size = MAX_ADDR_SIZE;
  4034. status = RegQueryValueExW(hIntfKey, CLUSREG_NAME_NETIFACE_ADDRESS, NULL, &type, (LPBYTE)intAddr, &size);
  4035. if (status != ERROR_SUCCESS) {
  4036. break;
  4037. }
  4038. StringCchCopyW(addrList->Addr[addrList->AddrSz], MAX_ADDR_SIZE, intAddr);
  4039. addrList->AddrSz++;
  4040. RegCloseKey(hIntfKey);
  4041. hIntfKey = NULL;
  4042. }
  4043. if ((status != ERROR_SUCCESS)&&(status != ERROR_NO_MORE_ITEMS)) {
  4044. goto error_exit;
  4045. }
  4046. status = ERROR_SUCCESS;
  4047. // Just to be sure close the interfaces key and reopen it.
  4048. RegCloseKey(hIntfsKey);
  4049. hIntfsKey = NULL;
  4050. status = RegOpenKeyExW(hClusKey, CLUSREG_KEYNAME_NETINTERFACES, 0, KEY_READ, &hIntfsKey);
  4051. if (status != ERROR_SUCCESS) {
  4052. goto error_exit;
  4053. }
  4054. }
  4055. error_exit:
  4056. // Just testing.
  4057. // FsLog(("Node %u addresses, Sz:%u\n", addrList->NodeId, addrList->AddrSz));
  4058. // for (ndx=0;ndx<addrList->AddrSz;ndx++) {
  4059. // FsLog(("addr[%u]=%ws\n", ndx, addrList->Addr[ndx]));
  4060. // }
  4061. // This is the only tolerated error
  4062. if (status == ERROR_NO_MORE_ITEMS) {
  4063. status = ERROR_SUCCESS;
  4064. }
  4065. for(ndx=0;ndx<MAX_ADDR_NUM;ndx++) {
  4066. if (networkGuids[ndx] != NULL) {
  4067. HeapFree(GetProcessHeap(), 0, networkGuids[ndx]);
  4068. }
  4069. }
  4070. if (hIntfKey) {
  4071. RegCloseKey(hIntfKey);
  4072. }
  4073. if (hIntfsKey) {
  4074. RegCloseKey(hIntfsKey);
  4075. }
  4076. if (hNetKey) {
  4077. RegCloseKey(hNetKey);
  4078. }
  4079. if (hNetsKey) {
  4080. RegCloseKey(hNetsKey);
  4081. }
  4082. if (hClusKey) {
  4083. RegCloseKey(hClusKey);
  4084. }
  4085. #if 0
  4086. if (hCluster) {
  4087. CloseCluster(hCluster);
  4088. }
  4089. #endif
  4090. return status;
  4091. }
  4092. VOID
  4093. FsSignalShutdown(PVOID arg)
  4094. {
  4095. VolInfo_t *vol=(VolInfo_t *)arg;
  4096. if (vol) {
  4097. FsLog(("Vol '%S' going away\n", vol->Root));
  4098. vol->GoingAway = TRUE;
  4099. }
  4100. }
  4101. #if USE_RTL_RESOURCE
  4102. // Use RTL implementation of Reader-Writer Lock. Not as efficient as the RPC one.
  4103. // Defined in Fsp.h
  4104. #else
  4105. // This is the Reader Writer lock api, copied from CSharedLock class of RPC.
  4106. DWORD
  4107. RwLockInit(RwLock *lock)
  4108. {
  4109. DWORD status=ERROR_SUCCESS;
  4110. // ClRtlLogWmi("RwLockInit() Enter\n");
  4111. InitializeCriticalSection(&lock->lock);
  4112. if (!lock->hevent) {
  4113. lock->hevent = CreateEvent(NULL, FALSE, FALSE, NULL);
  4114. }
  4115. if (!lock->hevent) {
  4116. status = GetLastError();
  4117. DeleteCriticalSection(&lock->lock);
  4118. return status;
  4119. }
  4120. lock->readers = 0;
  4121. lock->writers = 0;
  4122. // ClRtlLogWmi("RwLockInit() Exit\n");
  4123. return status;
  4124. }
  4125. VOID
  4126. RwLockDelete(RwLock *lock)
  4127. {
  4128. // ClRtlLogWmi("RwLockDelete() Enter\n");
  4129. DeleteCriticalSection(&lock->lock);
  4130. if (lock->hevent) {
  4131. CloseHandle(lock->hevent);
  4132. lock->hevent = 0;
  4133. }
  4134. lock->readers = 0;
  4135. lock->writers = 0;
  4136. // ClRtlLogWmi("RwLockDelete() Exit\n");
  4137. }
  4138. VOID
  4139. RwLockShared(RwLock *lock)
  4140. {
  4141. CHAR arr[200];
  4142. ASSERT(lock->hevent != 0);
  4143. sprintf(arr, "RwLockShared(readers=%d, writers=%d) Enter\n", lock->readers, lock->writers);
  4144. // ClRtlLogWmi(arr);
  4145. InterlockedIncrement(&lock->readers);
  4146. if (lock->writers) {
  4147. if (InterlockedDecrement(&lock->readers) == 0) {
  4148. SetEvent(lock->hevent);
  4149. }
  4150. EnterCriticalSection(&lock->lock);
  4151. InterlockedIncrement(&lock->readers);
  4152. LeaveCriticalSection(&lock->lock);
  4153. }
  4154. sprintf(arr, "RwLockShared(readers=%d, writers=%d) Exit\n", lock->readers, lock->writers);
  4155. // ClRtlLogWmi(arr);
  4156. }
  4157. VOID
  4158. RwUnlockShared(RwLock *lock)
  4159. {
  4160. CHAR arr[200];
  4161. ASSERT(lock->readers > 0);
  4162. ASSERT(lock->hevent != 0);
  4163. sprintf(arr, "RwUnlockShared(readers=%d, writers=%d) Enter\n", lock->readers, lock->writers);
  4164. // ClRtlLogWmi(arr);
  4165. if ((InterlockedDecrement(&lock->readers) == 0)&&lock->writers) {
  4166. SetEvent(lock->hevent);
  4167. }
  4168. sprintf(arr, "RwUnlockShared(readers=%d, writers=%d) Exit\n", lock->readers, lock->writers);
  4169. // ClRtlLogWmi(arr);
  4170. }
  4171. VOID
  4172. RwLockExclusive(RwLock *lock)
  4173. {
  4174. CHAR arr[200];
  4175. ASSERT(lock->hevent != 0);
  4176. sprintf(arr, "RwLockExclusive(readers=%d, writers=%d) Enter\n", lock->readers, lock->writers);
  4177. // ClRtlLogWmi(arr);
  4178. EnterCriticalSection(&lock->lock);
  4179. lock->writers++;
  4180. while (lock->readers) {
  4181. WaitForSingleObject(lock->hevent, INFINITE);
  4182. }
  4183. sprintf(arr, "RwLockExclusive(readers=%d, writers=%d) Exit\n", lock->readers, lock->writers);
  4184. // ClRtlLogWmi(arr);
  4185. }
  4186. VOID
  4187. RwUnlockExclusive(RwLock *lock)
  4188. {
  4189. CHAR arr[200];
  4190. ASSERT(lock->writers > 0);
  4191. ASSERT(lock->hevent != 0);
  4192. sprintf(arr, "RwUnlockExclusive(readers=%d, writers=%d) Enter\n", lock->readers, lock->writers);
  4193. // ClRtlLogWmi(arr);
  4194. lock->writers--;
  4195. LeaveCriticalSection(&lock->lock);
  4196. sprintf(arr, "RwUnlockExclusive(readers=%d, writers=%d) Exit\n", lock->readers, lock->writers);
  4197. // ClRtlLogWmi(arr);
  4198. }
  4199. #endif