Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

726 lines
20 KiB

  1. /*++
  2. Copyright (c) 1997 Microsoft Corporation
  3. Module Name:
  4. reslist.c
  5. Abstract:
  6. Cluster resource list processing routines.
  7. Author:
  8. Rod Gamache (rodga) 21-Apr-1997
  9. Revision History:
  10. --*/
  11. #include "fmp.h"
  12. //
  13. // Global data
  14. //
  15. //
  16. // Local function prototypes
  17. //
  18. BOOL FmpCheckResourcesToOnline(
  19. IN PRESOURCE_ENUM pResEnum
  20. );
  21. DWORD
  22. FmpAddResourceEntry(
  23. IN OUT PRESOURCE_ENUM *Enum,
  24. IN LPDWORD Allocated,
  25. IN PFM_RESOURCE Resource
  26. );
  27. DWORD
  28. FmpGetResourceList(
  29. OUT PRESOURCE_ENUM *ReturnEnum,
  30. IN PFM_GROUP Group
  31. )
  32. /*++
  33. Routine Description:
  34. Enumerates all the list of all resources in the Group and returns their
  35. state.
  36. Arguments:
  37. ReturnEnum - Returns the requested objects.
  38. Resource - Supplies the resource to filter. (i.e. if you supply this, you
  39. get a list of resources within that Resource)
  40. If not present, all resources are returned.
  41. Return Value:
  42. ERROR_SUCCESS if successful.
  43. Win32 error code on error.
  44. Notes:
  45. This routine should be called with the LocalGroupLock held.
  46. --*/
  47. {
  48. DWORD status;
  49. PRESOURCE_ENUM resourceEnum = NULL;
  50. DWORD allocated;
  51. PFM_RESOURCE resource;
  52. PLIST_ENTRY listEntry;
  53. allocated = ENUM_GROW_SIZE;
  54. resourceEnum = LocalAlloc(LMEM_FIXED, RESOURCE_SIZE(ENUM_GROW_SIZE));
  55. if ( resourceEnum == NULL ) {
  56. status = ERROR_NOT_ENOUGH_MEMORY;
  57. goto error_exit;
  58. }
  59. ZeroMemory( resourceEnum, RESOURCE_SIZE(ENUM_GROW_SIZE) );
  60. //set the contains quorum to -1, if the quorum is present
  61. // in this group then the containsquorum is set to the index
  62. // of the quorum resource
  63. // The quorum resource should be brought offline last and be
  64. // brought online first so that the registry replication data
  65. // can be flushed
  66. resourceEnum->ContainsQuorum = -1;
  67. //resourceEnum->EntryCount = 0;
  68. //
  69. // Enumerate all resources in the group.
  70. //
  71. for ( listEntry = Group->Contains.Flink;
  72. listEntry != &(Group->Contains);
  73. listEntry = listEntry->Flink ) {
  74. resource = CONTAINING_RECORD( listEntry,
  75. FM_RESOURCE,
  76. ContainsLinkage );
  77. status = FmpAddResourceEntry( &resourceEnum,
  78. &allocated,
  79. resource );
  80. if ( status != ERROR_SUCCESS ) {
  81. FmpDeleteResourceEnum( resourceEnum );
  82. goto error_exit;
  83. }
  84. //check if the resource is a quorum resource
  85. if (resource->QuorumResource)
  86. resourceEnum->ContainsQuorum = resourceEnum->EntryCount - 1;
  87. resourceEnum->Entry[resourceEnum->EntryCount-1].State = resource->PersistentState;
  88. }
  89. *ReturnEnum = resourceEnum;
  90. return(ERROR_SUCCESS);
  91. error_exit:
  92. *ReturnEnum = NULL;
  93. return(status);
  94. } // FmpGetResourceList
  95. DWORD
  96. FmpOnlineResourceList(
  97. IN PRESOURCE_ENUM ResourceEnum,
  98. IN PFM_GROUP pGroup
  99. )
  100. /*++
  101. Routine Description:
  102. Brings online all resources in the Enum list.
  103. Arguments:
  104. ResourceEnum - The list of resources to bring online.
  105. pGroup - the group with which the resources are associated.
  106. Returns:
  107. ERROR_SUCCESS if successful.
  108. Win32 error code on failure.
  109. --*/
  110. {
  111. PFM_RESOURCE resource;
  112. DWORD status;
  113. DWORD returnStatus = ERROR_SUCCESS;
  114. DWORD i;
  115. BOOL bResourcesToOnline;
  116. //
  117. // If the cluster service is shutting and this is not the quorum group,
  118. // then fail immediately. Otherwise, try to bring the quorum online first.
  119. //
  120. if ( FmpShutdown &&
  121. ResourceEnum->ContainsQuorum == -1 ) {
  122. return(ERROR_INVALID_STATE);
  123. }
  124. //find out if atleast one resource in the list necessitates coming being brought online
  125. bResourcesToOnline = FmpCheckResourcesToOnline(ResourceEnum);
  126. if (bResourcesToOnline)
  127. {
  128. //log an event saying we are trying on online a group
  129. FmpLogGroupInfoEvent1( FM_EVENT_GROUP_START_ONLINE, OmObjectName(pGroup));
  130. }
  131. // if the quorum resource is contained in here, bring it online first
  132. if (ResourceEnum->ContainsQuorum >= 0)
  133. {
  134. CL_ASSERT((DWORD)ResourceEnum->ContainsQuorum < ResourceEnum->EntryCount);
  135. resource = OmReferenceObjectById( ObjectTypeResource,
  136. ResourceEnum->Entry[ResourceEnum->ContainsQuorum].Id );
  137. // the resource should not vanish, we are holding the group lock after all
  138. CL_ASSERT(resource != NULL);
  139. //
  140. // If we fail to find a resource, then just continue
  141. //
  142. if ( resource != NULL ) {
  143. ClRtlLogPrint(LOG_NOISE,
  144. "[FM] FmpOnlineResourceList: Previous quorum resource state for %1!ws! is %2!u!\r\n",
  145. OmObjectId(resource), ResourceEnum->Entry[ResourceEnum->ContainsQuorum].State);
  146. if ( (ResourceEnum->Entry[ResourceEnum->ContainsQuorum].State == ClusterResourceOnline) ||
  147. (ResourceEnum->Entry[ResourceEnum->ContainsQuorum].State == ClusterResourceFailed) ) {
  148. //
  149. // Now bring the resource online if that is it's current state.
  150. //
  151. ClRtlLogPrint(LOG_NOISE,
  152. "[FM] FmpOnlineResourceList: trying to bring quorum resource %1!ws! online, state %2!u!\n",
  153. OmObjectId(resource),
  154. resource->State);
  155. status = FmpOnlineResource( resource, FALSE );
  156. if ( status != ERROR_SUCCESS ) {
  157. returnStatus = status;
  158. }
  159. }
  160. OmDereferenceObject( resource );
  161. }
  162. }
  163. // SS::: TODO what happens to the persistent state of the
  164. // other resources - is it handled correctly - note that this is
  165. // called on moving a group
  166. // Will the restart policy do the right thing in terms of bringing
  167. // them online
  168. // if the quorum resource has failed, dont bother trying
  169. // to bring the rest of the resourcess online
  170. if ((returnStatus != ERROR_SUCCESS) && (returnStatus != ERROR_IO_PENDING))
  171. {
  172. //retry after a while
  173. FmpSubmitRetryOnline(ResourceEnum, pGroup);
  174. goto FnExit;
  175. }
  176. // bring online all of the other resources
  177. for ( i = 0; i < ResourceEnum->EntryCount; i++ ) {
  178. resource = OmReferenceObjectById( ObjectTypeResource,
  179. ResourceEnum->Entry[i].Id );
  180. //
  181. // If we fail to find a resource, then just continue.
  182. //
  183. if ( resource == NULL ) {
  184. status = ERROR_RESOURCE_NOT_FOUND;
  185. continue;
  186. }
  187. //quorum resource has already been handled
  188. if (resource->QuorumResource)
  189. {
  190. OmDereferenceObject(resource);
  191. continue;
  192. }
  193. ClRtlLogPrint(LOG_NOISE,
  194. "[FM] FmpOnlineResourceList: Previous resource state for %1!ws! is %2!u!\r\n",
  195. OmObjectId(resource), ResourceEnum->Entry[i].State);
  196. if ( (ResourceEnum->Entry[i].State == ClusterResourceOnline) ||
  197. (ResourceEnum->Entry[i].State == ClusterResourceFailed) ) {
  198. //
  199. // Now bring the resource online if that is it's current state.
  200. //
  201. ClRtlLogPrint(LOG_NOISE,
  202. "[FM] FmpOnlineResourceList: trying to bring resource %1!ws! online\n",
  203. OmObjectId(resource));
  204. status = FmpOnlineResource( resource, FALSE );
  205. //overwrite the return status only if it is success
  206. //else the first error is returned
  207. if ( returnStatus == ERROR_SUCCESS ) {
  208. returnStatus = status;
  209. }
  210. //if this resource didnt come online because the quorum resource
  211. //didnt come online, dont bother bringing the other resources online
  212. //just a waste of time
  213. if (status == ERROR_QUORUM_RESOURCE_ONLINE_FAILED)
  214. {
  215. //submit a timer callback to try and bring these resources
  216. //online
  217. FmpSubmitRetryOnline(ResourceEnum, pGroup);
  218. OmDereferenceObject( resource );
  219. break;
  220. }
  221. }
  222. OmDereferenceObject( resource );
  223. }
  224. FnExit:
  225. if (returnStatus == ERROR_IO_PENDING)
  226. {
  227. CL_ASSERT(bResourcesToOnline);
  228. pGroup->dwStructState |= FM_GROUP_STRUCT_MARKED_FOR_COMPLETION_EVENT;
  229. //the failed or success event will get logged later on
  230. }
  231. else if (returnStatus == ERROR_SUCCESS)
  232. {
  233. if (bResourcesToOnline)
  234. FmpLogGroupInfoEvent1( FM_EVENT_GROUP_COMPLETE_ONLINE, OmObjectName(pGroup));
  235. }
  236. else
  237. {
  238. //SS: log an event to say that the online process failed
  239. if (bResourcesToOnline)
  240. FmpLogGroupInfoEvent1( FM_EVENT_GROUP_FAILED_ONLINE_OFFLINE, OmObjectName(pGroup));
  241. }
  242. ClRtlLogPrint(LOG_NOISE,
  243. "[FM] FmpOnlineResourceList: Exit, status=%1!u!\r\n",
  244. returnStatus);
  245. return(returnStatus);
  246. } // FmpOnlineResourceList
  247. DWORD
  248. FmpOfflineResourceList(
  249. IN PRESOURCE_ENUM ResourceEnum,
  250. IN BOOL Restore
  251. )
  252. /*++
  253. Routine Description:
  254. Takes offline all resources in the Enum list.
  255. Arguments:
  256. ResourceEnum - The list of resources to take offline.
  257. Restore - TRUE if we should set the resource back to it's previous state
  258. Returns:
  259. ERROR_SUCCESS if successful.
  260. Win32 error code on failure.
  261. --*/
  262. {
  263. PFM_RESOURCE resource;
  264. DWORD status=ERROR_SUCCESS;
  265. DWORD returnStatus = ERROR_SUCCESS;
  266. DWORD i;
  267. CLUSTER_RESOURCE_STATE prevState;
  268. // offline all resources except the quorum resource
  269. for ( i = 0; i < ResourceEnum->EntryCount; i++ ) {
  270. resource = OmReferenceObjectById( ObjectTypeResource,
  271. ResourceEnum->Entry[i].Id );
  272. if ( resource == NULL ) {
  273. return(ERROR_RESOURCE_NOT_FOUND);
  274. }
  275. //quorum resource is brought offline last
  276. if (resource->QuorumResource)
  277. {
  278. OmDereferenceObject(resource);
  279. continue;
  280. }
  281. //
  282. // Now take the Resource offline, if we own it.
  283. //
  284. if ( resource->Group->OwnerNode == NmLocalNode ) {
  285. prevState = resource->State;
  286. status = FmpOfflineResource( resource, FALSE );
  287. if ( Restore ) {
  288. //FmpPropagateResourceState( resource, prevState );
  289. //resource->State = prevState;
  290. }
  291. }
  292. OmDereferenceObject( resource );
  293. if ( (status != ERROR_SUCCESS) &&
  294. (status != ERROR_IO_PENDING) ) {
  295. return(status);
  296. }
  297. if ( status == ERROR_IO_PENDING ) {
  298. returnStatus = ERROR_IO_PENDING;
  299. }
  300. }
  301. // bring the quorum resource offline now
  302. // This allows other resources to come offline and save their checkpoints
  303. // The quorum resource offline should block till the resources have
  304. // finished saving the checkpoint
  305. if ((ResourceEnum->ContainsQuorum >= 0) && (returnStatus == ERROR_SUCCESS))
  306. {
  307. CL_ASSERT((DWORD)ResourceEnum->ContainsQuorum < ResourceEnum->EntryCount);
  308. resource = OmReferenceObjectById( ObjectTypeResource,
  309. ResourceEnum->Entry[ResourceEnum->ContainsQuorum].Id );
  310. if ( resource == NULL ) {
  311. return(ERROR_RESOURCE_NOT_FOUND);
  312. }
  313. //
  314. // Now take the Resource offline, if we own it.
  315. //
  316. if ( resource->Group->OwnerNode == NmLocalNode ) {
  317. status = FmpOfflineResource( resource, FALSE );
  318. }
  319. OmDereferenceObject( resource );
  320. if ( (status != ERROR_SUCCESS) &&
  321. (status != ERROR_IO_PENDING) ) {
  322. return(status);
  323. }
  324. if ( status == ERROR_IO_PENDING ) {
  325. returnStatus = ERROR_IO_PENDING;
  326. }
  327. }
  328. return(returnStatus);
  329. } // FmpOfflineResourceList
  330. DWORD
  331. FmpTerminateResourceList(
  332. PRESOURCE_ENUM ResourceEnum
  333. )
  334. /*++
  335. Routine Description:
  336. Terminates all resources in the Enum list.
  337. Arguments:
  338. ResourceEnum - The list of resources to take offline.
  339. Returns:
  340. ERROR_SUCCESS if successful.
  341. Win32 error code on failure.
  342. --*/
  343. {
  344. PFM_RESOURCE resource;
  345. DWORD i;
  346. for ( i = 0; i < ResourceEnum->EntryCount; i++ ) {
  347. resource = OmReferenceObjectById( ObjectTypeResource,
  348. ResourceEnum->Entry[i].Id );
  349. if ( resource == NULL ) {
  350. return(ERROR_RESOURCE_NOT_FOUND);
  351. }
  352. //
  353. // Now take the Resource offline, if we own it.
  354. //
  355. if ( resource->Group->OwnerNode == NmLocalNode ) {
  356. FmpTerminateResource( resource );
  357. }
  358. OmDereferenceObject( resource );
  359. }
  360. //for now we dont care about the return
  361. return(ERROR_SUCCESS);
  362. } // FmpTerminateResourceList
  363. DWORD
  364. FmpAddResourceEntry(
  365. IN OUT PRESOURCE_ENUM *Enum,
  366. IN LPDWORD Allocated,
  367. IN PFM_RESOURCE Resource
  368. )
  369. /*++
  370. Routine Description:
  371. Worker routine for the enumeration of Resources.
  372. This routine adds the specified Resource to the list that is being
  373. generated.
  374. Arguments:
  375. Enum - The Resource Enumeration list. Can be an output if a new list is
  376. allocated.
  377. Allocated - The number of entries allocated.
  378. Resource - The Resource object being enumerated.
  379. Returns:
  380. ERROR_SUCCESS - if successful.
  381. A Win32 error code on failure.
  382. --*/
  383. {
  384. PRESOURCE_ENUM resourceEnum;
  385. PRESOURCE_ENUM newEnum;
  386. DWORD newAllocated;
  387. DWORD index;
  388. LPWSTR newId;
  389. resourceEnum = *Enum;
  390. if ( resourceEnum->EntryCount >= *Allocated ) {
  391. //
  392. // Time to grow the RESOURCE_ENUM
  393. //
  394. newAllocated = *Allocated + ENUM_GROW_SIZE;
  395. newEnum = LocalAlloc(LMEM_FIXED, RESOURCE_SIZE(newAllocated));
  396. if ( newEnum == NULL ) {
  397. return(ERROR_NOT_ENOUGH_MEMORY);
  398. }
  399. CopyMemory(newEnum, resourceEnum, RESOURCE_SIZE(*Allocated));
  400. *Allocated = newAllocated;
  401. *Enum = newEnum;
  402. LocalFree(resourceEnum);
  403. resourceEnum = newEnum;
  404. }
  405. //
  406. // Initialize new entry
  407. //
  408. newId = LocalAlloc(LMEM_FIXED, (lstrlenW(OmObjectId(Resource))+1) * sizeof(WCHAR));
  409. if ( newId == NULL ) {
  410. return(ERROR_NOT_ENOUGH_MEMORY);
  411. }
  412. lstrcpyW(newId, OmObjectId(Resource));
  413. resourceEnum->Entry[resourceEnum->EntryCount].Id = newId;
  414. ++resourceEnum->EntryCount;
  415. return(ERROR_SUCCESS);
  416. } // FmpAddResourceEntry
  417. VOID
  418. FmpDeleteResourceEnum(
  419. IN PRESOURCE_ENUM Enum
  420. )
  421. /*++
  422. Routine Description:
  423. This routine deletes an RESOURCE_ENUM and associated name strings.
  424. Arguments:
  425. Enum - The RESOURCE_ENUM to delete. This pointer can be NULL.
  426. Returns:
  427. None.
  428. Notes:
  429. This routine will take a NULL input pointer and just return.
  430. --*/
  431. {
  432. PRESOURCE_ENUM_ENTRY enumEntry;
  433. DWORD i;
  434. if ( Enum == NULL ) {
  435. return;
  436. }
  437. for ( i = 0; i < Enum->EntryCount; i++ ) {
  438. enumEntry = &Enum->Entry[i];
  439. LocalFree(enumEntry->Id);
  440. }
  441. LocalFree(Enum);
  442. return;
  443. } // FmpDeleteResourceEnum
  444. DWORD FmpSubmitRetryOnline(
  445. IN PRESOURCE_ENUM pResourceEnum,
  446. IN PFM_GROUP pGroup)
  447. {
  448. PFM_RESLIST_ONLINE_RETRY_INFO pFmOnlineRetryInfo;
  449. PRESOURCE_ENUM_ENTRY enumEntry;
  450. DWORD dwSizeofResourceEnum;
  451. DWORD dwStatus = ERROR_SUCCESS;
  452. DWORD i;
  453. DWORD dwSize;
  454. //there is nothing to do
  455. if (pResourceEnum->EntryCount < 1)
  456. goto FnExit;
  457. dwSizeofResourceEnum = sizeof(RESOURCE_ENUM) - sizeof(RESOURCE_ENUM_ENTRY) +
  458. (sizeof(RESOURCE_ENUM_ENTRY) * pResourceEnum->EntryCount);
  459. pFmOnlineRetryInfo = LocalAlloc(LMEM_FIXED,
  460. (sizeof(FM_RESLIST_ONLINE_RETRY_INFO) - sizeof(RESOURCE_ENUM) +
  461. dwSizeofResourceEnum));
  462. if (!pFmOnlineRetryInfo)
  463. {
  464. dwStatus = ERROR_NOT_ENOUGH_MEMORY;
  465. CL_UNEXPECTED_ERROR(dwStatus);
  466. goto FnExit;
  467. }
  468. //SS: use the group field for logging
  469. //reference the group object
  470. if (pGroup)
  471. OmReferenceObject(pGroup);
  472. pFmOnlineRetryInfo->pGroup = pGroup;
  473. memcpy(&(pFmOnlineRetryInfo->ResourceEnum), pResourceEnum, dwSizeofResourceEnum);
  474. // allocate memory for Resource ID's and copy them from pResourceEnum
  475. for ( i = 0; i < pResourceEnum->EntryCount; i++ ) {
  476. enumEntry = &pResourceEnum->Entry[i];
  477. pFmOnlineRetryInfo->ResourceEnum.Entry[i].Id = NULL;
  478. dwSize = (lstrlenW(enumEntry->Id) +1)*sizeof(WCHAR);
  479. pFmOnlineRetryInfo->ResourceEnum.Entry[i].Id = (LPWSTR)(LocalAlloc(LMEM_FIXED,dwSize));
  480. if (!pFmOnlineRetryInfo->ResourceEnum.Entry[i].Id )
  481. {
  482. dwStatus = ERROR_NOT_ENOUGH_MEMORY;
  483. CL_UNEXPECTED_ERROR(dwStatus);
  484. goto FnExit;
  485. }
  486. CopyMemory(pFmOnlineRetryInfo->ResourceEnum.Entry[i].Id, enumEntry->Id, dwSize);
  487. }
  488. dwStatus = FmpQueueTimerActivity(FM_TIMER_RESLIST_ONLINE_RETRY,
  489. FmpReslistOnlineRetryCb, pFmOnlineRetryInfo );
  490. FnExit:
  491. return(dwStatus);
  492. }
  493. /****
  494. @func DWORD | FmpCheckResourcesToOnline| This routine walks a
  495. resource list and returns TRUE, if atleast one of the resources
  496. in the group must be brought online.
  497. @parm IN PRESOURCE_ENUM | pResEnum | A pointer to a list of resources
  498. in the group.
  499. @comm This is called from FmpOnlineResourceList() to determine if the
  500. group info events should be logged. For groups that have no
  501. resources to be onlined, we should not log the starting online event.
  502. This routine must be called with the group lock held. It is called
  503. by FmpOnlineResourceList().
  504. @rdesc Returns TRUE if atleast one of the resources in the list must be onlined.
  505. ****/
  506. BOOL FmpCheckResourcesToOnline(
  507. IN PRESOURCE_ENUM pResEnum
  508. )
  509. {
  510. PFM_RESOURCE pResource;
  511. DWORD i;
  512. BOOL bRet = FALSE;
  513. for ( i = 0; i < pResEnum->EntryCount; i++ )
  514. {
  515. pResource = OmReferenceObjectById( ObjectTypeResource,
  516. pResEnum->Entry[i].Id );
  517. //
  518. // If we fail to find a resource, then just continue.
  519. //
  520. if ( pResource == NULL ) {
  521. ClRtlLogPrint(LOG_NOISE,
  522. "[FM] FmpCheckResourcesToOnline: Resource for ResId %1!ws! not found.\n",
  523. pResEnum->Entry[i].Id);
  524. continue;
  525. }
  526. //check if this is the quorum resource
  527. if (pResource->QuorumResource)
  528. {
  529. //if the quorum resource is in the group, it must be brought online
  530. //irrespective of its state
  531. bRet = TRUE;
  532. OmDereferenceObject(pResource);
  533. break;
  534. }
  535. if ( (pResEnum->Entry[i].State == ClusterResourceOnline) ||
  536. (pResEnum->Entry[i].State == ClusterResourceFailed) )
  537. {
  538. bRet = TRUE;
  539. OmDereferenceObject(pResource);
  540. break;
  541. }
  542. OmDereferenceObject(pResource);
  543. }
  544. return(bRet);
  545. }