Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

380 lines
12 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. send.c
  5. Abstract:
  6. APIs for the client side of the checkpoint manager
  7. Author:
  8. John Vert (jvert) 1/14/1997
  9. Revision History:
  10. --*/
  11. #include "cpp.h"
  12. CL_NODE_ID
  13. CppGetQuorumNodeId(
  14. VOID
  15. )
  16. /*++
  17. Routine Description:
  18. Returns the node ID of the node owning the quorum resource.
  19. Arguments:
  20. None.
  21. Return Value:
  22. ERROR_SUCCESS if successful
  23. Win32 error code otherwise
  24. --*/
  25. {
  26. PFM_RESOURCE QuorumResource;
  27. DWORD Status;
  28. DWORD NodeId;
  29. Status = FmFindQuorumResource(&QuorumResource);
  30. if (Status != ERROR_SUCCESS) {
  31. return((DWORD)-1);
  32. }
  33. NodeId = FmFindQuorumOwnerNodeId(QuorumResource);
  34. OmDereferenceObject(QuorumResource);
  35. return(NodeId);
  36. }
  37. DWORD
  38. CpSaveDataFile(
  39. IN PFM_RESOURCE Resource,
  40. IN DWORD dwCheckpointId,
  41. IN LPCWSTR lpszFileName,
  42. IN BOOLEAN fCryptoCheckpoint
  43. )
  44. /*++
  45. Routine Description:
  46. This function checkpoints arbitrary data for the specified resource. The data is stored on the quorum
  47. disk to ensure that it survives partitions in time. Any node in the cluster may save or retrieve
  48. checkpointed data.
  49. Arguments:
  50. Resource - Supplies the resource associated with this data.
  51. dwCheckpointId - Supplies the unique checkpoint ID describing this data. The caller is responsible
  52. for ensuring the uniqueness of the checkpoint ID.
  53. lpszFileName - Supplies the name of the file with the checkpoint data.
  54. fCryptoCheckpoint - Indicates if the checkpoint is a crypto checkpoint.
  55. Return Value:
  56. ERROR_SUCCESS if successful
  57. Win32 error code otherwise
  58. --*/
  59. {
  60. CL_NODE_ID OwnerNode;
  61. DWORD Status;
  62. do {
  63. OwnerNode = CppGetQuorumNodeId();
  64. ClRtlLogPrint(LOG_NOISE,
  65. "[CP] CpSaveData: checkpointing data id %1!d! to quorum node %2!d!\n",
  66. dwCheckpointId,
  67. OwnerNode);
  68. if (OwnerNode == NmLocalNodeId) {
  69. Status = CppWriteCheckpoint(Resource,
  70. dwCheckpointId,
  71. lpszFileName,
  72. fCryptoCheckpoint);
  73. } else {
  74. HANDLE hFile;
  75. FILE_PIPE FilePipe;
  76. hFile = CreateFileW(lpszFileName,
  77. GENERIC_READ | GENERIC_WRITE,
  78. 0,
  79. NULL,
  80. OPEN_ALWAYS,
  81. 0,
  82. NULL);
  83. if (hFile == INVALID_HANDLE_VALUE) {
  84. Status = GetLastError();
  85. ClRtlLogPrint(LOG_CRITICAL,
  86. "[CP] CpSaveData: failed to open data file %1!ws! error %2!d!\n",
  87. lpszFileName,
  88. Status);
  89. } else {
  90. DmInitFilePipe(&FilePipe, hFile);
  91. try {
  92. if (fCryptoCheckpoint) {
  93. Status = CpDepositCryptoCheckpoint(Session[OwnerNode],
  94. OmObjectId(Resource),
  95. dwCheckpointId,
  96. FilePipe.Pipe);
  97. } else {
  98. Status = CpDepositCheckpoint(Session[OwnerNode],
  99. OmObjectId(Resource),
  100. dwCheckpointId,
  101. FilePipe.Pipe);
  102. }
  103. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  104. ClRtlLogPrint(LOG_CRITICAL,
  105. "[CP] CpSaveData - s_CpDepositCheckpoint from node %1!d! raised status %2!d!\n",
  106. OwnerNode,
  107. GetExceptionCode());
  108. Status = ERROR_HOST_NODE_NOT_RESOURCE_OWNER;
  109. }
  110. DmFreeFilePipe(&FilePipe);
  111. CloseHandle(hFile);
  112. }
  113. }
  114. if (Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER) {
  115. //
  116. // This node no longer owns the quorum resource, retry.
  117. //
  118. ClRtlLogPrint(LOG_UNUSUAL,
  119. "[CP] CpSaveData: quorum owner %1!d! no longer owner\n",
  120. OwnerNode);
  121. }
  122. } while ( Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER );
  123. return(Status);
  124. }
  125. DWORD
  126. CpDeleteCheckpointFile(
  127. IN PFM_RESOURCE Resource,
  128. IN DWORD dwCheckpointId,
  129. IN OPTIONAL LPCWSTR lpszQuorumPath
  130. )
  131. /*++
  132. Routine Description:
  133. This function removes the checkpoint file correspoinding to the
  134. checkpoint id for a given resource from the given directory.
  135. Arguments:
  136. Resource - Supplies the resource associated with this data.
  137. dwCheckpointId - Supplies the unique checkpoint ID describing this data. The caller is responsible
  138. for ensuring the uniqueness of the checkpoint ID.
  139. lpszQuorumPath - Supplies the path of the cluster files on a quorum device.
  140. Return Value:
  141. ERROR_SUCCESS if successful
  142. Win32 error code otherwise
  143. --*/
  144. {
  145. CL_NODE_ID OwnerNode;
  146. DWORD Status;
  147. do {
  148. OwnerNode = CppGetQuorumNodeId();
  149. ClRtlLogPrint(LOG_NOISE,
  150. "[CP] CpDeleteDataFile: removing checkpoint file for id %1!d! at quorum node %2!d!\n",
  151. dwCheckpointId,
  152. OwnerNode);
  153. if (OwnerNode == NmLocalNodeId)
  154. {
  155. Status = CppDeleteCheckpointFile(Resource, dwCheckpointId, lpszQuorumPath);
  156. }
  157. else
  158. {
  159. Status = CpDeleteCheckpoint(Session[OwnerNode],
  160. OmObjectId(Resource),
  161. dwCheckpointId,
  162. lpszQuorumPath);
  163. //talking to an old server, cant perform this function
  164. //ignore the error
  165. if (Status == RPC_S_PROCNUM_OUT_OF_RANGE)
  166. Status = ERROR_SUCCESS;
  167. }
  168. if (Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER) {
  169. //
  170. // This node no longer owns the quorum resource, retry.
  171. //
  172. ClRtlLogPrint(LOG_UNUSUAL,
  173. "[CP] CpSaveData: quorum owner %1!d! no longer owner\n",
  174. OwnerNode);
  175. }
  176. } while ( Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER );
  177. return(Status);
  178. }
  179. DWORD
  180. CpGetDataFile(
  181. IN PFM_RESOURCE Resource,
  182. IN DWORD dwCheckpointId,
  183. IN LPCWSTR lpszFileName,
  184. IN BOOLEAN fCryptoCheckpoint
  185. )
  186. /*++
  187. Routine Description:
  188. This function retrieves checkpoint data for the specified resource. The data must
  189. have been saved by CpSaveData. Any node in the cluster may save or retrieve
  190. checkpointed data.
  191. Arguments:
  192. Resource - Supplies the resource associated with this data.
  193. dwCheckpointId - Supplies the unique checkpoint ID describing this data. The caller is
  194. responsible for ensuring the uniqueness of the checkpoint ID.
  195. lpszFileName - Supplies the filename where the data should be retrieved.
  196. fCryptoCheckpoint - Indicates if the checkpoint is a crypto checkpoint.
  197. Return Value:
  198. ERROR_SUCCESS if successful
  199. Win32 error code otherwise
  200. --*/
  201. {
  202. CL_NODE_ID OwnerNode;
  203. DWORD Status;
  204. DWORD Count = 60;
  205. RetryRetrieveChkpoint:
  206. OwnerNode = CppGetQuorumNodeId();
  207. ClRtlLogPrint(LOG_NOISE,
  208. "[CP] CpGetDataFile: restoring data id %1!d! from quorum node %2!d!\n",
  209. dwCheckpointId,
  210. OwnerNode);
  211. if (OwnerNode == NmLocalNodeId) {
  212. Status = CppReadCheckpoint(Resource,
  213. dwCheckpointId,
  214. lpszFileName,
  215. fCryptoCheckpoint);
  216. } else {
  217. HANDLE hFile;
  218. FILE_PIPE FilePipe;
  219. hFile = CreateFileW(lpszFileName,
  220. GENERIC_READ | GENERIC_WRITE,
  221. 0,
  222. NULL,
  223. CREATE_ALWAYS,
  224. 0,
  225. NULL);
  226. if (hFile == INVALID_HANDLE_VALUE) {
  227. Status = GetLastError();
  228. ClRtlLogPrint(LOG_CRITICAL,
  229. "[CP] CpGetDataFile: failed to create new file %1!ws! error %2!d!\n",
  230. lpszFileName,
  231. Status);
  232. } else {
  233. DmInitFilePipe(&FilePipe, hFile);
  234. try {
  235. if (fCryptoCheckpoint) {
  236. Status = CpRetrieveCryptoCheckpoint(Session[OwnerNode],
  237. OmObjectId(Resource),
  238. dwCheckpointId,
  239. FilePipe.Pipe);
  240. } else {
  241. Status = CpRetrieveCheckpoint(Session[OwnerNode],
  242. OmObjectId(Resource),
  243. dwCheckpointId,
  244. FilePipe.Pipe);
  245. }
  246. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  247. ClRtlLogPrint(LOG_CRITICAL,
  248. "[CP] CpGetData - s_CpRetrieveCheckpoint from node %1!d! raised status %2!d!\n",
  249. OwnerNode,
  250. GetExceptionCode());
  251. CL_UNEXPECTED_ERROR( GetExceptionCode() );
  252. Status = ERROR_HOST_NODE_NOT_RESOURCE_OWNER;
  253. }
  254. DmFreeFilePipe(&FilePipe);
  255. CloseHandle(hFile);
  256. }
  257. }
  258. if (Status == ERROR_HOST_NODE_NOT_RESOURCE_OWNER) {
  259. //
  260. // This node no longer owns the quorum resource, retry.
  261. //
  262. ClRtlLogPrint(LOG_UNUSUAL,
  263. "[CP] CpGetData: quorum owner %1!d! no longer owner\n",
  264. OwnerNode);
  265. goto RetryRetrieveChkpoint;
  266. }
  267. else if ((Status == ERROR_ACCESS_DENIED) ||
  268. (Status == ERROR_INVALID_FUNCTION) ||
  269. (Status == ERROR_NOT_READY) ||
  270. (Status == RPC_X_INVALID_PIPE_OPERATION) ||
  271. (Status == ERROR_BUSY) ||
  272. (Status == ERROR_SWAPERROR))
  273. {
  274. //if the quorum resource offline suddenly
  275. ClRtlLogPrint(LOG_UNUSUAL,
  276. "[CP] CpGetData: quorum resource went offline in the middle, Count=%1!u!\n",
  277. Count);
  278. //we dont prevent the quorum resource from going offline if some resource
  279. //is blocked for a long time in its online/offline thread- this is because
  280. //some resources(like dtc)try to enumerate resources in the quorum group
  281. //we increase the timeout to give cp a chance to retrieve the checkpoint
  282. //while the quorum group is being moved or failed over
  283. if (Count--)
  284. {
  285. Sleep(1000);
  286. goto RetryRetrieveChkpoint;
  287. }
  288. }
  289. if (Status != ERROR_SUCCESS) {
  290. WCHAR string[16];
  291. wsprintfW(&(string[0]), L"%u", Status);
  292. ClRtlLogPrint(LOG_CRITICAL,
  293. "[CP] CpGetDataFile - failed to retrieve checkpoint %1!d! error %2!d!\n",
  294. dwCheckpointId,
  295. Status);
  296. CL_LOGCLUSERROR2(CP_RESTORE_REGISTRY_FAILURE, OmObjectName(Resource), string);
  297. #if DBG
  298. if (IsDebuggerPresent())
  299. DebugBreak();
  300. #endif
  301. }
  302. return(Status);
  303. }