Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3952 lines
134 KiB

  1. /*++
  2. Copyright(c) 1998,99 Microsoft Corporation
  3. Module Name:
  4. load.c
  5. Abstract:
  6. Windows Load Balancing Service (WLBS)
  7. Driver - load balancing algorithm
  8. Author:
  9. bbain
  10. ToDo:
  11. Kernel mode queue mgt
  12. Fail safe mode (single server for everything)
  13. --*/
  14. #ifdef KERNEL_MODE
  15. #include <ntddk.h>
  16. #include "log.h"
  17. #include "univ.h"
  18. #include "main.h" // added for multiple nic
  19. static ULONG log_module_id = LOG_MODULE_LOAD;
  20. #else
  21. #include <stdlib.h>
  22. #include <windows.h>
  23. #endif
  24. #include <stdio.h>
  25. #include "wlbsparm.h"
  26. #include "params.h"
  27. #include "wlbsiocl.h"
  28. #include "wlbsip.h"
  29. #include "load.h"
  30. //
  31. // For WPP Event Tracing
  32. //
  33. #include "trace.h" // for event tracing
  34. #include "load.tmh" // for event tracing
  35. #ifndef KERNEL_MODE
  36. #define UNIV_PRINT(s) { printf s ; printf ("\n"); }
  37. #define Univ_ulong_to_str(x, y, z) (y)
  38. #define LOG_MSG(c,s)
  39. #define LOG_MSG1(c,s,d1)
  40. #define LOG_MSG2(c,s,d1,d2)
  41. #define LOG_MSG3(c,s,d1,d2,d3)
  42. #define LOG_MSG4(c,s,d1,d2,d3,d4)
  43. #else
  44. #define malloc(s) ExAllocatePoolWithTag (NonPagedPool, s, UNIV_POOL_TAG)
  45. #define free(s) ExFreePool (s)
  46. #endif
  47. //extern CVY_PARAMS univ_params;
  48. //#define univ_params ( * (lp -> params))
  49. void Bin_state_print(PBIN_STATE bp, ULONG my_host_id);
  50. void Load_conn_kill(PLOAD_CTXT lp, PBIN_STATE bp); /* v1.32B */
  51. // static WCHAR buf [256]; /* string buffer (V1.1.2) */
  52. /* CONSTANTS */
  53. #if 0 /* v2.06 */
  54. #define BIN_ALL_ONES ((MAP_T)-1) /* bin map state for 64 ones (v2.04) */
  55. #endif
  56. #define BIN_ALL_ONES ((MAP_T)(0xFFFFFFFFFFFFFFF)) /* bin map state for 60 ones (v2.04) */
  57. /* FUNCTIONS */
  58. /* Byte offset of a field in a structure of the specified type: */
  59. #define CVY_FIELD_OFFSET(type, field) ((LONG_PTR)&(((type *)0)->field))
  60. /*
  61. * Address of the base of the structure given its type, field name, and the
  62. * address of a field or field offset within the structure:
  63. */
  64. #define STRUCT_PTR(address, type, field) ((type *)( \
  65. (PCHAR)(address) - \
  66. (PCHAR)CVY_FIELD_OFFSET(type, field)))
  67. /*
  68. * Function: Load_teaming_consistency_notify
  69. * Description: This function is called to notify a team in which this adapter
  70. * might be participating whether the teaming configuration in the
  71. * heartbeats is consistent or not. Inconsistent configuration
  72. * results in the entire team being marked inactive - meaning that
  73. * no adapter in the team will handle any traffic, except to the DIP.
  74. * Parameters: member - a pointer to the team membership information for this adapter.
  75. * consistent - a boolean indicating the polarity of teaming consistency.
  76. * Returns: Nothing.
  77. * Author: shouse, 3.29.01
  78. * Notes: In order to check to see whether or not this adapter is part of a team,
  79. * we need to look into the team member information for this adapter. This
  80. * access should be locked, but for performance reasons, we will only lock
  81. * and check for sure if we "think" we're part of a team. Worst case is that
  82. * we are in the process of joining a team and we missed this check - no
  83. * matter, we'll notify them when/if we see this again.
  84. */
  85. VOID Load_teaming_consistency_notify (IN PBDA_MEMBER member, IN BOOL consistent) {
  86. /* Make sure that the membership information points to something. */
  87. ASSERT(member);
  88. /* We can check without locking to keep the common case minimally expensive. If we do think
  89. we're part of a team, then we'll grab the lock and make sure. If our first indication is
  90. that we're not part of a team, then just bail out and if we actually are part of a team,
  91. we'll be through here again later to notify our team if necessary. */
  92. if (!member->active) return;
  93. NdisAcquireSpinLock(&univ_bda_teaming_lock);
  94. /* If we are an active member of a BDA team, then notify our team of our state. */
  95. if (member->active) {
  96. /* Assert that the team actually points to something. */
  97. ASSERT(member->bda_team);
  98. /* Assert that the member ID is valid. */
  99. ASSERT(member->member_id <= CVY_BDA_MAXIMUM_MEMBER_ID);
  100. if (consistent) {
  101. UNIV_PRINT(("Load_teaming_consistency_notify: Consistent configuration detected."));
  102. /* Mark this member as consistent. */
  103. member->bda_team->consistency_map |= (1 << member->member_id);
  104. } else {
  105. UNIV_PRINT(("Load_teaming_consistency_notify: Inconsistent configuration detected."));
  106. /* Mark this member as inconsistent. */
  107. member->bda_team->consistency_map &= ~(1 << member->member_id);
  108. /* Inactivate the team. */
  109. member->bda_team->active = FALSE;
  110. }
  111. }
  112. NdisReleaseSpinLock(&univ_bda_teaming_lock);
  113. }
  114. /*
  115. * Function: Load_teaming_consistency_check
  116. * Description: This function is used to check our teaming configuration against the
  117. * teaming configuration received in a remote heartbeat. It does little
  118. * more than check the equality of two DWORDS, however, if this is our
  119. * first notification of bad configuration, it prints a few debug state-
  120. * ments as well.
  121. * Parameters: bAlreadyKnown - a boolean indication of whether or not we have already detected bad configuration.
  122. * If the misconfiguration is already known, no additional logging is done.
  123. * member - a pointer to the team member structure for this adapter.
  124. * myConfig - a DWORD containing the teaming "code" for me.
  125. * theirCofnig - a DWORD containing the teaming "code" received in the heartbeat from them.
  126. * Returns: BOOLEAN (as ULONG) - TRUE means the configuration is consistent, FALSE indicates that it is not.
  127. * Author: shouse, 3.29.01
  128. * Notes: In order to check to see whether or not this adapter is part of a team,
  129. * we need to look into the team member information for this adapter. This
  130. * access should be locked, but for performance reasons, we will only lock
  131. * and check for sure if we "think" we're part of a team. Worst case is that
  132. * we are in the process of joining a team and we missed this check - no
  133. * matter, we'll check again on the next heartbeat.
  134. */
  135. ULONG Load_teaming_consistency_check (IN BOOLEAN bAlreadyKnown, IN PBDA_MEMBER member, IN ULONG myConfig, IN ULONG theirConfig) {
  136. /* We can check without locking to keep the common case minimally expensive. If we do think
  137. we're part of a team, then we'll grab the lock and make sure. If our first indication is
  138. that we're not part of a team, then just bail out and if we actually are part of a team,
  139. we'll be through here again later to check the consistency. */
  140. if (!member->active) return TRUE;
  141. NdisAcquireSpinLock(&univ_bda_teaming_lock);
  142. /* If we are part of a BDA team, check the BDA teaming configuration consistency. */
  143. if (member->active) {
  144. NdisReleaseSpinLock(&univ_bda_teaming_lock);
  145. /* If the bi-directional affinity teaming configurations don't match, do something about it. */
  146. if (myConfig != theirConfig) {
  147. if (!bAlreadyKnown) {
  148. UNIV_PRINT(("Bad teaming configuration detected: Mine=0x%08x, Theirs=0x%08x", myConfig, theirConfig));
  149. /* Report whether or not the teaming active flags are consistent. */
  150. if ((myConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK)) {
  151. UNIV_PRINT(("Teaming active flags do not match: Mine=%d, Theirs=%d",
  152. (myConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) >> CVY_BDA_TEAMING_CODE_ACTIVE_OFFSET,
  153. (theirConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) >> CVY_BDA_TEAMING_CODE_ACTIVE_OFFSET));
  154. }
  155. /* Report whether or not the master flags are consistent. */
  156. if ((myConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK)) {
  157. UNIV_PRINT(("Master/slave settings do not match: Mine=%d, Theirs=%d",
  158. (myConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) >> CVY_BDA_TEAMING_CODE_MASTER_OFFSET,
  159. (theirConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) >> CVY_BDA_TEAMING_CODE_MASTER_OFFSET));
  160. }
  161. /* Report whether or not the reverse hashing flags are consistent. */
  162. if ((myConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK)) {
  163. UNIV_PRINT(("Reverse hashing flags do not match: Mine=%d, Theirs=%d",
  164. (myConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) >> CVY_BDA_TEAMING_CODE_HASHING_OFFSET,
  165. (theirConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) >> CVY_BDA_TEAMING_CODE_HASHING_OFFSET));
  166. }
  167. /* Report whether or not the number of team members is consistent. */
  168. if ((myConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK)) {
  169. UNIV_PRINT(("Numbers of team members do not match: Mine=%d, Theirs=%d",
  170. (myConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_NUM_MEMBERS_OFFSET,
  171. (theirConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_NUM_MEMBERS_OFFSET));
  172. }
  173. /* Report whether or not the team membership lists are consistent. */
  174. if ((myConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK)) {
  175. UNIV_PRINT(("Participating members lists do not match: Mine=0x%04x, Theirs=0x%04x",
  176. (myConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_MEMBERS_OFFSET,
  177. (theirConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_MEMBERS_OFFSET));
  178. }
  179. }
  180. return FALSE;
  181. }
  182. return TRUE;
  183. }
  184. NdisReleaseSpinLock(&univ_bda_teaming_lock);
  185. return TRUE;
  186. }
  187. /*
  188. * Function: Load_teaming_code_create
  189. * Description: This function pieces together the ULONG code that represents the configuration
  190. * of bi-directional affinity teaming on this adapter. If the adapter is not part
  191. * of a team, then the code is zero.
  192. * Parameters: code - a pointer to a ULONG that will receive the 32-bit code word.
  193. * member - a pointer to the team member structure for this adapter.
  194. * Returns: Nothing.
  195. * Author: shouse, 3.29.01
  196. * Notes: In order to check to see whether or not this adapter is part of a team,
  197. * we need to look into the team member information for this adapter. This
  198. * access should be locked, but for performance reasons, we will only lock
  199. * and check for sure if we "think" we're part of a team. Worst case is that
  200. * we are in the process of joining a team and we missed this check - no
  201. * matter, we'll be through here the next time er send a heartbeat anyway.
  202. */
  203. VOID Load_teaming_code_create (OUT PULONG code, IN PBDA_MEMBER member) {
  204. /* Assert that the code actually points to something. */
  205. ASSERT(code);
  206. /* Assert that the membership information actually points to something. */
  207. ASSERT(member);
  208. /* Reset the code. */
  209. *code = 0;
  210. /* We can check without locking to keep the common case minimally expensive. If we do think
  211. we're part of a team, then we'll grab the lock and make sure. If our first indication is
  212. that we're not part of a team, then just bail out and if we actually are part of a team,
  213. we'll be through here again later to generate the code next time we send a heartbeat. */
  214. if (!member->active) return;
  215. NdisAcquireSpinLock(&univ_bda_teaming_lock);
  216. /* If we are in a team, fill in the team configuration information. */
  217. if (member->active) {
  218. /* Assert that the team actually points to something. */
  219. ASSERT(member->bda_team);
  220. /* Add configuration information for teaming at each timeout. */
  221. CVY_BDA_TEAMING_CODE_CREATE(*code,
  222. member->active,
  223. member->master,
  224. member->reverse_hash,
  225. member->bda_team->membership_count,
  226. member->bda_team->membership_fingerprint);
  227. }
  228. NdisReleaseSpinLock(&univ_bda_teaming_lock);
  229. }
  230. /*
  231. * Function: Load_add_reference
  232. * Description: This function adds a reference to the load module of a given adapter.
  233. * Parameters: pLoad - a pointer to the load module to reference.
  234. * Returns: ULONG - The incremented value.
  235. * Author: shouse, 3.29.01
  236. * Notes:
  237. */
  238. ULONG Load_add_reference (IN PLOAD_CTXT pLoad) {
  239. /* Assert that the load pointer actually points to something. */
  240. ASSERT(pLoad);
  241. /* Increment the reference count. */
  242. return NdisInterlockedIncrement(&pLoad->ref_count);
  243. }
  244. /*
  245. * Function: Load_release_reference
  246. * Description: This function releases a reference on the load module of a given adapter.
  247. * Parameters: pLoad - a pointer to the load module to dereference.
  248. * Returns: ULONG - The decremented value.
  249. * Author: shouse, 3.29.01
  250. * Notes:
  251. */
  252. ULONG Load_release_reference (IN PLOAD_CTXT pLoad) {
  253. /* Assert that the load pointer actually points to something. */
  254. ASSERT(pLoad);
  255. /* Decrement the reference count. */
  256. return NdisInterlockedDecrement(&pLoad->ref_count);
  257. }
  258. /*
  259. * Function: Load_get_reference_count
  260. * Description: This function returns the current reference count on a given adapter.
  261. * Parameters: pLoad - a pointer to the load module to check.
  262. * Returns: ULONG - The current reference count.
  263. * Author: shouse, 3.29.01
  264. * Notes:
  265. */
  266. ULONG Load_get_reference_count (IN PLOAD_CTXT pLoad) {
  267. /* Assert that the load pointer actually points to something. */
  268. ASSERT(pLoad);
  269. /* Return the reference count. */
  270. return pLoad->ref_count;
  271. }
  272. /* Hash routine is based on a public-domain Tiny Encryption Algorithm (TEA) by
  273. David Wheeler and Roger Needham at the Computer Laboratory of Cambridge
  274. University. For reference, please consult
  275. http://vader.brad.ac.uk/tea/tea.shtml */
  276. ULONG Map (
  277. ULONG v1,
  278. ULONG v2) /* v2.06: removed range parameter */
  279. {
  280. ULONG y = v1,
  281. z = v2,
  282. sum = 0;
  283. const ULONG a = 0x67; //key [0];
  284. const ULONG b = 0xdf; //key [1];
  285. const ULONG c = 0x40; //key [2];
  286. const ULONG d = 0xd3; //key [3];
  287. const ULONG delta = 0x9E3779B9;
  288. //
  289. // Unroll the loop to improve performance
  290. //
  291. sum += delta;
  292. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  293. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  294. sum += delta;
  295. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  296. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  297. sum += delta;
  298. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  299. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  300. sum += delta;
  301. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  302. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  303. sum += delta;
  304. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  305. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  306. sum += delta;
  307. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  308. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  309. sum += delta;
  310. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  311. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  312. sum += delta;
  313. y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
  314. z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
  315. return y ^ z;
  316. } /* end Map */
  317. BOOLEAN Bin_targ_map_get(
  318. PLOAD_CTXT lp,
  319. PBIN_STATE binp, /* ptr. to bin state */
  320. ULONG my_host_id,
  321. PMAP_T pmap) /* ptr. to target map */
  322. /*
  323. Get target map for this host
  324. returns BOOLEAN:
  325. TRUE => valid target map is returned via pmap
  326. FALSE => error occurred; no target map returned
  327. */
  328. {
  329. ULONG remsz, /* remainder size */
  330. loadsz, /* size of a load partition */
  331. first_bit; /* first bit position of load partition */
  332. MAP_T targ_map; /* bit map of load bins for this host */
  333. ULONG tot_load = 0; /* total of load perecentages */
  334. ULONG * pload_list; /* ptr. to list of load balance perecntages */
  335. WCHAR num [20];
  336. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  337. pload_list = binp->load_amt;
  338. if (binp->mode == CVY_SINGLE)
  339. {
  340. ULONG max_pri; /* highest priority */
  341. ULONG i;
  342. first_bit = 0;
  343. /* compute max priority */
  344. max_pri = CVY_MAX_HOSTS + 1;
  345. for (i=0; i<CVY_MAX_HOSTS; i++)
  346. {
  347. tot_load += pload_list[i]; /* v2.1 */
  348. if (pload_list[i] != 0)
  349. {
  350. //
  351. // If another host has the same priority as this host, do not converge
  352. //
  353. if (i!= my_host_id && pload_list[i] == pload_list[my_host_id])
  354. {
  355. if (!(lp->dup_sspri))
  356. {
  357. UNIV_PRINT(("Host %d: duplicate single svr priorities detected", my_host_id));
  358. Univ_ulong_to_str (pload_list[my_host_id], num, 10);
  359. LOG_MSG(MSG_ERROR_SINGLE_DUP, num);
  360. lp->dup_sspri = TRUE;
  361. }
  362. /* 1.03: return error, which inhibits convergence; note that
  363. rule will be automatically reinstated when duplicate server
  364. priorities are eliminated */
  365. return FALSE;
  366. }
  367. if ( pload_list[i] <= max_pri )
  368. {
  369. max_pri = pload_list[i];
  370. }
  371. }
  372. }
  373. binp->tot_load = tot_load; /* v2.1 */
  374. /* now determine if we are the highest priority host */
  375. if (pload_list[my_host_id] == max_pri)
  376. {
  377. loadsz = CVY_MAXBINS;
  378. targ_map = BIN_ALL_ONES; /* v2.05 */
  379. }
  380. else
  381. {
  382. loadsz = 0;
  383. targ_map = 0; /* v2.05 */
  384. }
  385. }
  386. else /* load balanced */
  387. {
  388. ULONG i, j;
  389. ULONG partsz[CVY_MAX_HOSTS+1];
  390. /* new partition size per host */
  391. ULONG cur_partsz[CVY_MAX_HOSTS+1];
  392. /* current partition size per host (v2.05) */
  393. ULONG cur_host[CVY_MAXBINS];
  394. /* current host for each bin (v2.05) */
  395. ULONG tot_partsz; /* sum of partition sizes */
  396. ULONG donor; /* current donor host (v2.05) */
  397. ULONG cur_nbins; /* current # bins (v2.05) */
  398. /* setup current partition sizes and bin to host mapping from current map (v2.05) */
  399. cur_nbins = 0;
  400. for (j=0; j<CVY_MAXBINS; j++)
  401. cur_host[j] = CVY_MAX_HOSTS; /* all bins are initially orphans */
  402. for (i=0; i<CVY_MAX_HOSTS; i++)
  403. {
  404. ULONG count = 0L;
  405. MAP_T cmap = binp->cur_map[i];
  406. tot_load += pload_list[i]; /* folded into this loop v2.1 */
  407. for (j=0; j<CVY_MAXBINS && cmap != ((MAP_T)0); j++)
  408. {
  409. /* if host i has bin j and it's not a duplicate, set up the mapping */
  410. if ((cmap & ((MAP_T)0x1)) != ((MAP_T)0) && cur_host[j] == CVY_MAX_HOSTS)
  411. {
  412. count++;
  413. cur_host[j] = i;
  414. }
  415. cmap >>= 1;
  416. }
  417. cur_partsz[i] = count;
  418. cur_nbins += count;
  419. }
  420. if (cur_nbins > CVY_MAXBINS)
  421. {
  422. UNIV_PRINT(("Bin_targ_map_get: error - too many bins found"));
  423. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  424. cur_nbins = CVY_MAXBINS;
  425. }
  426. /* if there are orphan bins, give them to pseudo-host CVY_MAX_HOSTS for now (v2.05) */
  427. if (cur_nbins < CVY_MAXBINS)
  428. cur_partsz[CVY_MAX_HOSTS] = CVY_MAXBINS - cur_nbins;
  429. else
  430. cur_partsz[CVY_MAX_HOSTS] = 0;
  431. /* compute total load */
  432. binp->tot_load = tot_load; /* v2.06 */
  433. /* now compute tentative partition sizes and remainder after initially
  434. dividing up partitions among hosts */
  435. tot_partsz = 0;
  436. first_bit = 0;
  437. for (i=0; i<CVY_MAX_HOSTS; i++)
  438. {
  439. if (tot_load > 0)
  440. partsz[i] = CVY_MAXBINS * pload_list[i] / tot_load;
  441. else
  442. partsz[i] = 0;
  443. tot_partsz += partsz[i];
  444. }
  445. remsz = CVY_MAXBINS - tot_partsz;
  446. /* check for zero total load */
  447. if (tot_partsz == 0)
  448. {
  449. * pmap = 0;
  450. return TRUE;
  451. }
  452. /* first dole out remainder bits to hosts that currently have bins (this
  453. minimizes the number of bins that have to move) v2.05 */
  454. if (remsz > 0)
  455. {
  456. for (i=0; i<CVY_MAX_HOSTS && remsz > 0; i++)
  457. if (cur_partsz[i] > 0 && pload_list[i] > 0)
  458. {
  459. partsz[i]++;
  460. remsz--;
  461. }
  462. }
  463. /* now dole out remainder bits to hosts that currently have no bins (to maintain
  464. the target load balance) v2.05 */
  465. if (remsz > 0)
  466. {
  467. for (i=0; i<CVY_MAX_HOSTS && remsz > 0; i++)
  468. if (cur_partsz[i] == 0 && pload_list[i] > 0)
  469. {
  470. partsz[i]++;
  471. remsz--;
  472. }
  473. }
  474. /* now dole out remainder bits among non-zero partitions round robin */
  475. i = 0;
  476. while (remsz > 0)
  477. {
  478. if (pload_list[i] > 0)
  479. {
  480. partsz[i]++;
  481. remsz--;
  482. }
  483. i++;
  484. if (i == CVY_MAX_HOSTS)
  485. i = 0;
  486. }
  487. /* reallocate bins to target hosts to match new partition sizes (v2.05) */
  488. donor = 0;
  489. partsz[CVY_MAX_HOSTS] = 0; /* pseudo-host needs no bins */
  490. for (i=0; i<CVY_MAX_HOSTS; i++)
  491. {
  492. ULONG rcvrsz; /* current receiver's target partition */
  493. ULONG donorsz; /* current donor's target partition size */
  494. /* find and give this host some bins */
  495. rcvrsz = partsz[i];
  496. while (rcvrsz > cur_partsz[i])
  497. {
  498. /* find a host with too many bins */
  499. for (; donor < CVY_MAX_HOSTS; donor++)
  500. if (partsz[donor] < cur_partsz[donor])
  501. break;
  502. /* if donor is pseudo-host and it's out of bins, give it more bins
  503. to keep algorithm from looping; this should never happen */
  504. if (donor >= CVY_MAX_HOSTS && cur_partsz[donor] == 0)
  505. {
  506. UNIV_PRINT(("Bin_targ_map_get: error - no donor bins"));
  507. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  508. cur_partsz[donor] = CVY_MAXBINS;
  509. }
  510. /* now find the donor's bins and give them to the target host */
  511. donorsz = partsz[donor]; /* donor's target bin count */
  512. for (j=0; j<CVY_MAXBINS; j++)
  513. {
  514. if (cur_host[j] == donor)
  515. {
  516. cur_host[j] = i;
  517. cur_partsz[donor]--;
  518. cur_partsz[i]++;
  519. /* if this donor has no more to give, go find the next donor;
  520. if this receiver needs no more, go on to next receiver */
  521. if (donorsz == cur_partsz[donor] || rcvrsz == cur_partsz[i])
  522. break;
  523. }
  524. }
  525. /* if no bin was found, log a fatal error and exit */
  526. if (j == CVY_MAXBINS)
  527. {
  528. UNIV_PRINT(("Bin_targ_map_get: error - no bin found"));
  529. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  530. break;
  531. }
  532. }
  533. }
  534. /* finally, compute bit mask for this host (v2.05) */
  535. targ_map = 0;
  536. for (j=0; j<CVY_MAXBINS; j++)
  537. {
  538. if (cur_host[j] == CVY_MAX_HOSTS)
  539. {
  540. UNIV_PRINT(("Bin_targ_map_get: error - incomplete mapping"));
  541. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  542. cur_host[j] = 0;
  543. }
  544. if (cur_host[j] == my_host_id)
  545. targ_map |= ((MAP_T)1) << j;
  546. }
  547. }
  548. * pmap = targ_map;
  549. return TRUE;
  550. } /* end Bin_targ_map_get */
  551. BOOLEAN Bin_map_check(
  552. ULONG tot_load, /* total load percentage (v2.06) */
  553. PMAP_T pbin_map) /* bin map for all hosts */
  554. {
  555. MAP_T tot_map, /* total map for all hosts */
  556. ovr_map, /* overlap map between hosts */
  557. exp_tot_map; /* expected total map */
  558. ULONG i;
  559. /* compute expected total map (2.04) */
  560. if (tot_load == 0) /* v2.06 */
  561. return TRUE;
  562. else
  563. exp_tot_map = BIN_ALL_ONES;
  564. /* compute total map and overlap map */
  565. tot_map = ovr_map = 0;
  566. for (i=0; i<CVY_MAX_HOSTS; i++)
  567. {
  568. ovr_map |= (pbin_map[i] & tot_map);
  569. tot_map |= pbin_map[i];
  570. }
  571. if (tot_map == exp_tot_map && ovr_map == 0)
  572. return TRUE;
  573. else
  574. return FALSE;
  575. } /* end Bin_map_check */
  576. BOOLEAN Bin_map_covering(
  577. ULONG tot_load, /* total load percentage (v2.06) */
  578. PMAP_T pbin_map) /* bin map for all hosts */
  579. {
  580. MAP_T tot_map, /* total map for all hosts */
  581. exp_tot_map; /* expected total map */
  582. ULONG i;
  583. /* compute expected total map (v2.04) */
  584. if (tot_load == 0) /* v2.06 */
  585. return TRUE;
  586. else
  587. exp_tot_map = BIN_ALL_ONES;
  588. /* compute total map and overlap map */
  589. tot_map = 0;
  590. for (i=0; i<CVY_MAX_HOSTS; i++)
  591. {
  592. tot_map |= pbin_map[i];
  593. }
  594. if (tot_map == exp_tot_map)
  595. return TRUE;
  596. else
  597. return FALSE;
  598. } /* end Bin_map_covering */
  599. void Bin_state_init(
  600. PLOAD_CTXT lp,
  601. PBIN_STATE binp, /* ptr. to bin state */
  602. ULONG index, /* index of bin state */
  603. ULONG my_host_id,
  604. ULONG mode,
  605. ULONG prot,
  606. BOOLEAN equal_bal, /* TRUE => balance equally across hosts */
  607. USHORT affinity,
  608. ULONG load_amt) /* this host's load percentage if unequal */
  609. /*
  610. Initialize bin state for a port group
  611. */
  612. {
  613. ULONG i; /* loop variable */
  614. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  615. if ((equal_bal && mode == CVY_SINGLE) ||
  616. (mode == CVY_SINGLE && load_amt > CVY_MAX_HOSTS) ||
  617. index >= CVY_MAXBINS)
  618. {
  619. UNIV_ASSERT(FALSE); // This should never happen
  620. }
  621. binp->code = CVY_BINCODE; /* (bbain 8/19/99) */
  622. binp->equal_bal = equal_bal;
  623. binp->affinity = affinity;
  624. binp->index = index;
  625. binp->compatible = TRUE;
  626. binp->mode = mode;
  627. binp->prot = prot;
  628. /* initialize target and new load maps */
  629. binp->targ_map = 0;
  630. binp->all_idle_map = BIN_ALL_ONES;
  631. binp->cmap = 0; /* v2.1 */
  632. for (i=0; i<CVY_MAX_HOSTS; i++)
  633. {
  634. binp->new_map[i] = 0;
  635. binp->cur_map[i] = 0;
  636. binp->chk_map[i] = 0;
  637. binp->idle_map[i] = BIN_ALL_ONES;
  638. }
  639. /* initialize load percentages for all hosts */
  640. if (equal_bal)
  641. {
  642. load_amt = CVY_EQUAL_LOAD;
  643. }
  644. binp->tot_load = load_amt;
  645. for (i=0; i<CVY_MAX_HOSTS; i++)
  646. {
  647. if (i == my_host_id)
  648. {
  649. binp->orig_load_amt =
  650. binp->load_amt[i] = load_amt;
  651. }
  652. else
  653. binp->load_amt[i] = 0;
  654. }
  655. /* initialize requesting state to no requests active and all bins local or none */
  656. binp->snd_bins = 0;
  657. binp->rcv_bins = 0;
  658. binp->rdy_bins = 0;
  659. binp->idle_bins = BIN_ALL_ONES; /* we are initially idle */
  660. /* perform first initialization only once (v2.06) */
  661. if (!(binp->initialized))
  662. {
  663. binp->tconn = 0;
  664. for (i=0; i<CVY_MAXBINS; i++)
  665. {
  666. binp->nconn[i] = 0;
  667. }
  668. Queue_init(&(binp->connq));
  669. binp->initialized = TRUE;
  670. }
  671. } /* end Bin_state_init */
  672. BOOLEAN Bin_converge(
  673. PLOAD_CTXT lp,
  674. PBIN_STATE binp, /* ptr. to bin state */
  675. ULONG my_host_id)
  676. /*
  677. Explicitly attempt to converge new port group state
  678. returns BOOL:
  679. TRUE => all hosts have consistent new state for converging
  680. FALSE => parameter error or inconsistent convergence state
  681. */
  682. {
  683. MAP_T orphan_map; /* map of orphans that this host will now own */
  684. ULONG i;
  685. /* determine new target load map; 1.03: return in error if no map generated */
  686. if (!Bin_targ_map_get(lp, binp, my_host_id, &(binp->targ_map)))
  687. return FALSE;
  688. /* compute map of all currently orphan bins; note that all duplicates are
  689. considered to be orphans */
  690. orphan_map = 0;
  691. for (i=0; i<CVY_MAX_HOSTS; i++)
  692. orphan_map |= binp->cur_map[i];
  693. orphan_map = ~orphan_map;
  694. /* update our new map to include all current bins and orphans that are in the
  695. target set */
  696. binp->new_map[my_host_id] = binp->cmap | /* v2.1 */
  697. (binp->targ_map & orphan_map); /* 1.03 */
  698. /* check that new load maps are consistent and covering */
  699. return Bin_map_check(binp->tot_load, binp->new_map); /* v2.06 */
  700. } /* end Bin_converge */
  701. void Bin_converge_commit(
  702. PLOAD_CTXT lp,
  703. PBIN_STATE binp, /* ptr. to bin state */
  704. ULONG my_host_id)
  705. /*
  706. Commit to new port group state
  707. */
  708. {
  709. ULONG i;
  710. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  711. /* check that new load maps are consistent and covering */
  712. if (!(Bin_map_check(binp->tot_load, binp->new_map))) /* v2.06 */
  713. {
  714. if (!(lp->bad_map))
  715. {
  716. UNIV_PRINT(("Bin_converge_commit: bad new map"));
  717. LOG_MSG1(MSG_ERROR_INTERNAL, MSG_NONE, (ULONG_PTR)binp->new_map);
  718. lp->bad_map = TRUE;
  719. }
  720. }
  721. /* commit to new current maps */
  722. for (i=0; i<CVY_MAX_HOSTS; i++)
  723. {
  724. binp->chk_map[i] =
  725. binp->cur_map[i] = binp->new_map[i];
  726. }
  727. /* setup new send/rcv bins, and new ready to ship bins; note that ready to
  728. ship bins are cleared from the current map */
  729. binp->rdy_bins = binp->cur_map[my_host_id] & ~(binp->targ_map); /* 1.03 */
  730. binp->cur_map[my_host_id] &= ~(binp->rdy_bins);
  731. binp->rcv_bins = binp->targ_map & ~(binp->cur_map[my_host_id]);
  732. binp->cmap = binp->cur_map[my_host_id]; /* v2.1 */
  733. #if 0
  734. /* simulation output generator (2.05) */
  735. {
  736. ULONG lcount = 0L;
  737. ULONG ncount = 0L;
  738. MAP_T bins = binp->rdy_bins;
  739. for (i=0; i<CVY_MAXBINS && bins != 0; i++, bins >>= 1)
  740. if ((bins & ((MAP_T)0x1)) != ((MAP_T)0))
  741. lcount++;
  742. bins = binp->targ_map;
  743. for (i=0; i<CVY_MAXBINS && bins != 0; i++, bins >>= 1)
  744. if ((bins & ((MAP_T)0x1)) != ((MAP_T)0))
  745. ncount++;
  746. printf("Connverge at host %d pg %d: losing %d, will have %d bins\n", my_host_id, binp->index,
  747. lcount, ncount);
  748. }
  749. #endif
  750. } /* end Bin_converge_commit */
  751. BOOLEAN Bin_host_update(
  752. PLOAD_CTXT lp,
  753. PBIN_STATE binp, /* ptr. to bin state */
  754. ULONG my_host_id, /* my host's id MINUS one */
  755. BOOLEAN converging, /* TRUE => we are converging now */
  756. BOOLEAN rem_converging, /* TRUE => remote host is converging */
  757. ULONG rem_host, /* remote host's id MINUS one */
  758. MAP_T cur_map, /* remote host's current map or 0 if host died */
  759. MAP_T new_map, /* remote host's new map if converging */
  760. MAP_T idle_map, /* remote host's idle map */
  761. MAP_T rdy_bins, /* bins that host is ready to send; ignored
  762. if converging to prevent bin transfers */
  763. ULONG pkt_count, /* remote host's packet count */
  764. ULONG load_amt) /* remote host's load percentage */
  765. /*
  766. Update hosts's state for a port group
  767. returns BOOL:
  768. TRUE => if not converging, normal return
  769. otherwise, all hosts have consistent state for converging
  770. FALSE => parameter error or inconsistent convergence state
  771. function:
  772. Updates hosts's state for a port group and attempts to converge new states if
  773. in convergence mode. Called when a ping message is received or when a host
  774. is considered to have died. Handles case of newly discovered hosts. Can be
  775. called multiple times with the same information.
  776. */
  777. {
  778. ULONG i;
  779. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  780. if (rem_host >= CVY_MAX_HOSTS || rem_host == my_host_id)
  781. {
  782. UNIV_PRINT(("Bin_host_update: parameter error"));
  783. LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, rem_host+1, my_host_id+1);
  784. return FALSE;
  785. }
  786. UNIV_ASSERT(binp->code == CVY_BINCODE); /* (bbain 8/19/99) */
  787. #if 0 /* v2.06 */
  788. /* update current load balance information */
  789. if (binp->equal_bal && load_amt > 0)
  790. {
  791. load_amt = CVY_EQUAL_LOAD;
  792. }
  793. #endif
  794. /* change load percentage if load changed */
  795. if (load_amt != binp->load_amt[rem_host])
  796. {
  797. #if 0 /* v2.06 */
  798. binp->tot_load += (load_amt - binp->load_amt[rem_host]);
  799. #endif
  800. binp->load_amt[rem_host] = load_amt;
  801. }
  802. /* check for non-overlapping maps */
  803. if ((binp->cmap & cur_map) != 0) /* v2.1 */
  804. {
  805. /* if we have received fewer packets than the other host or have a higher host id,
  806. remove duplicates from current map; this uses a heuristic that a newly joining
  807. host that was subnetted probably did not receive packets; we are trying to avoid
  808. having two hosts answer to the same client while minimizing disruption of service
  809. (v1.32B) */
  810. if (lp->send_msg.pkt_count < pkt_count ||
  811. (lp->send_msg.pkt_count == pkt_count && rem_host < my_host_id))
  812. {
  813. MAP_T dup_map;
  814. dup_map = binp->cmap & cur_map; /* v2.1 */
  815. binp->cur_map[my_host_id] &= ~dup_map;
  816. binp->cmap = binp->cur_map[my_host_id]; /* v2.1 */
  817. Load_conn_kill(lp, binp);
  818. }
  819. if (!converging && !rem_converging)
  820. {
  821. if (!(lp->overlap_maps))
  822. {
  823. UNIV_PRINT(("Host %d: two hosts with overlapping maps detected %d.", my_host_id, binp->index));
  824. LOG_MSG2(MSG_WARN_OVERLAP, MSG_NONE, my_host_id+1, binp->index);
  825. lp->overlap_maps = TRUE;
  826. }
  827. /* force convergence if in normal operations */
  828. return FALSE;
  829. }
  830. }
  831. /* now update remote host's current map */
  832. binp->cur_map[rem_host] = cur_map;
  833. /* update idle map and calculate new global idle map if it's changed */
  834. if (binp->idle_map[rem_host] != idle_map)
  835. {
  836. MAP_T saved_map = binp->all_idle_map;
  837. MAP_T new_idle_map = BIN_ALL_ONES;
  838. MAP_T tmp_map;
  839. binp->idle_map[rem_host] = idle_map;
  840. /* compute new idle map for all other hosts */
  841. for (i=0; i<CVY_MAX_HOSTS; i++)
  842. if (i != my_host_id)
  843. new_idle_map &= binp->idle_map[i];
  844. binp->all_idle_map = new_idle_map;
  845. /* see which locally owned bins have gone idle in all other hosts */
  846. tmp_map = new_idle_map & (~saved_map) & binp->cmap; /* v2.1 */
  847. if (tmp_map != 0)
  848. {
  849. UNIV_PRINT(("Host %d pg %d: detected new all idle %08x for local bins",
  850. my_host_id, binp->index, tmp_map));
  851. }
  852. tmp_map = saved_map & (~new_idle_map) & binp->cmap; /* v2.1 */
  853. if (tmp_map != 0)
  854. {
  855. UNIV_PRINT(("Host %d pg %d: detected new non-idle %08x for local bins",
  856. my_host_id, binp->index, tmp_map));
  857. }
  858. }
  859. /* 1.03: eliminated else clause */
  860. /* if we are not converging AND other host not converging, exchange bins;
  861. convergence must now be complete for both hosts */
  862. if (!converging)
  863. {
  864. if (!rem_converging) { /* 1.03: reorganized code to exchange bins only when both
  865. hosts are not converging to avoid using stale bins */
  866. MAP_T new_bins; /* incoming bins from the remote host */
  867. /* check to see if remote host has received some bins from us */
  868. binp->rdy_bins &= (~cur_map);
  869. /* check to see if we can receive some bins */
  870. new_bins = binp->rcv_bins & rdy_bins;
  871. if (new_bins != 0)
  872. {
  873. if ((binp->cmap & new_bins) != 0) /* v2.1 */
  874. {
  875. if (!(lp->err_rcving_bins))
  876. {
  877. UNIV_PRINT(("Bin_host_update: receiving bins already own"));
  878. LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, binp->cur_map[my_host_id], new_bins);
  879. lp->err_rcving_bins = TRUE;
  880. }
  881. }
  882. binp->cur_map[my_host_id] |= new_bins;
  883. binp->rcv_bins &= ~new_bins;
  884. binp->cmap = binp->cur_map[my_host_id]; /* v2.1 */
  885. UNIV_PRINT(("====== host %d pg %d: received %08x ; cur now %08x",
  886. my_host_id, binp->index, new_bins, binp->cur_map[my_host_id]));
  887. }
  888. /* do consistency check that all bins are covered */
  889. binp->chk_map[rem_host] = cur_map | rdy_bins;
  890. binp->chk_map[my_host_id] = binp->cmap | binp->rdy_bins; /* v2.1 */
  891. if (!Bin_map_covering(binp->tot_load, binp->chk_map)) /* v2.06 */
  892. {
  893. if (!(lp->err_orphans))
  894. {
  895. #if 0
  896. UNIV_PRINT(("Host %d: orphan bins detected", my_host_id));
  897. LOG_MSG1(MSG_ERROR_INTERNAL, MSG_NONE, my_host_id+1);
  898. #endif
  899. lp->err_orphans = TRUE;
  900. }
  901. }
  902. }
  903. return TRUE;
  904. }
  905. /* otherwise, store proposed new load map and try to converge current host data */
  906. else
  907. {
  908. binp->chk_map[rem_host] =
  909. binp->new_map[rem_host] = new_map;
  910. return Bin_converge(lp, binp, my_host_id);
  911. }
  912. } /* end Bin_host_update */
  913. void Bin_state_print(
  914. PBIN_STATE binp, /* ptr. to bin state */
  915. ULONG my_host_id)
  916. {
  917. #if 0
  918. ULONG i;
  919. #endif
  920. UNIV_PRINT(("hst %d binp %x: maps: targ %x cur %x new %x; eq %d mode %d amt %d tot %d; bins: snd %x rcv %x rdy %x",
  921. my_host_id, binp, binp->targ_map, binp->cur_map[my_host_id], binp->new_map[my_host_id],
  922. binp->equal_bal, binp->mode, binp->load_amt[my_host_id],
  923. binp->tot_load, binp->snd_bins, binp->rcv_bins, binp->rdy_bins));
  924. #if 0
  925. for (i=0; i<CVY_MAX_HOSTS; i++)
  926. {
  927. UNIV_PRINT(("host %d: cur map %x new %x load_amt %d", i+1, binp->cur_map[i],
  928. binp->new_map[i], binp->load_amt[i]));
  929. }
  930. for (i=0; i<CVY_MAXBINS; i++)
  931. {
  932. UNIV_PRINT(("bin %d: req_host %d bin_state %d nconn %d", i, binp->req_host[i],
  933. binp->bin_state[i], binp->nconn[i]));
  934. }
  935. #endif
  936. } /* end Bin_state_print */
  937. void Load_conn_kill(
  938. PLOAD_CTXT lp,
  939. PBIN_STATE bp)
  940. /*
  941. Kill all connections in a port group (v1.32B)
  942. */
  943. {
  944. PCONN_ENTRY ep; /* ptr. to connection entry */
  945. PCONN_DESCR dp; /* ptr. to connection descriptor */
  946. QUEUE * qp; /* ptr. to bin's connection queue */
  947. QUEUE * dqp; /* ptr. to dirty queue */
  948. QUEUE * fqp; /* ptr. to free queue */
  949. LONG count[CVY_MAXBINS];
  950. /* count of cleaned up connections per bin for checking */
  951. ULONG i;
  952. BOOLEAN err_bin; /* bin id error detected */
  953. BOOLEAN err_count; /* connection count error detected */
  954. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  955. err_bin =
  956. err_count = FALSE;
  957. qp = &(bp->connq);
  958. dqp = &(lp->conn_dirtyq);
  959. fqp = &(lp->conn_freeq);
  960. for (i=0; i<CVY_MAXBINS; i++)
  961. count[i] = 0;
  962. #ifdef TRACE_DIRTY
  963. DbgPrint ("marking connections as dirty");
  964. #endif
  965. /* remove connections from bin queue and either make dirty or cleanup */
  966. ep = (PCONN_ENTRY)Queue_deq(qp);
  967. while (ep != NULL)
  968. {
  969. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/19/99) */
  970. if (ep->bin >= CVY_MAXBINS)
  971. {
  972. if (!err_bin)
  973. {
  974. UNIV_PRINT(("Load_conn_kill: bad bin id"));
  975. LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, ep->bin, CVY_MAXBINS);
  976. err_bin = TRUE;
  977. }
  978. }
  979. else
  980. {
  981. count[ep->bin]++;
  982. }
  983. /* make connection and bin dirty if we don't have a zero timeout period so that they
  984. will not be handled by TCP/IP anymore; this avoids allowing TCP/IP's now stale
  985. connection state from handling packets for newer connections should traffic be
  986. directed to this host in the future */
  987. if (lp->cln_timeout > 0)
  988. {
  989. ep->dirty = TRUE;
  990. Queue_enq(dqp, &(ep->blink));
  991. lp->dirty_bin[ep->bin] = TRUE;
  992. lp->cln_waiting = TRUE;
  993. }
  994. /* otherwise, just cleanup the connection */
  995. else
  996. {
  997. CVY_CONN_CLEAR(ep); /* v2.06 */
  998. Link_unlink(&(ep->rlink)); /* V2.1.5 */
  999. /* if entry is not in the hash table, free the descriptor */
  1000. if (ep->alloc)
  1001. {
  1002. dp = STRUCT_PTR(ep, CONN_DESCR, entry);
  1003. UNIV_ASSERT (dp->code == CVY_DESCCODE); /* (bbain 8/19/99) */
  1004. Link_unlink(&(dp->link));
  1005. Queue_enq(fqp, &(dp->link));
  1006. }
  1007. }
  1008. ep = (PCONN_ENTRY)Queue_deq(qp);
  1009. }
  1010. /* now make bins idle */
  1011. for (i=0; i<CVY_MAXBINS; i++)
  1012. {
  1013. if (bp->nconn[i] != count[i])
  1014. {
  1015. if (!err_count)
  1016. {
  1017. UNIV_PRINT(("Load_conn_kill: bad connection count %d %d bin %d", bp->nconn[i], (LONG)count[i], i));
  1018. /* KXF 2.1.1 - removed after tripped up at MSFT a few times */
  1019. #if 0
  1020. LOG_MSG3(MSG_ERROR_INTERNAL, MSG_NONE, bp->nconn[i], (LONG)count[i], i);
  1021. #endif
  1022. err_count = TRUE;
  1023. }
  1024. }
  1025. bp->nconn[i] = 0;
  1026. }
  1027. lp->nconn -= bp->tconn; /* v2.1 */
  1028. if (lp->nconn < 0)
  1029. lp->nconn = 0;
  1030. bp->tconn = 0; /* v2.06 */
  1031. bp->idle_bins = BIN_ALL_ONES;
  1032. /* if we at least one connection is dirty, restart cleanup timeout period */
  1033. if (lp->cln_waiting)
  1034. {
  1035. #ifdef TRACE_DIRTY
  1036. DbgPrint ("setting cleanup timeout");
  1037. #endif
  1038. lp->cur_time = 0;
  1039. }
  1040. else
  1041. {
  1042. #ifdef TRACE_DIRTY
  1043. DbgPrint ("no dirty connections found");
  1044. #endif
  1045. }
  1046. } /* end Load_conn_kill */
  1047. void Load_conn_cleanup(
  1048. PLOAD_CTXT lp)
  1049. /*
  1050. Clean up all dirty connections (v1.32B)
  1051. */
  1052. {
  1053. PCONN_ENTRY ep; /* ptr. to connection entry */
  1054. PCONN_DESCR dp; /* ptr. to connection descriptor */
  1055. QUEUE * fqp; /* ptr. to free queue */
  1056. QUEUE * dqp; /* ptr. to dirty queue */
  1057. BOOLEAN err_bin; /* bin id error detected */
  1058. ULONG i;
  1059. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1060. err_bin = FALSE;
  1061. dqp = &(lp->conn_dirtyq);
  1062. fqp = &(lp->conn_freeq);
  1063. #ifdef TRACE_DIRTY
  1064. DbgPrint ("cleaning up dirty connections");
  1065. #endif
  1066. /* dequeue and clean up all connections on dirty connection queue */
  1067. ep = (PCONN_ENTRY)Queue_deq(dqp);
  1068. while (ep != NULL)
  1069. {
  1070. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/19/99) */
  1071. if (ep->bin >= CVY_MAXBINS)
  1072. {
  1073. if (!err_bin)
  1074. {
  1075. UNIV_PRINT(("Load_conn_cleanup: bad bin id"));
  1076. LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, ep->bin, CVY_MAXBINS);
  1077. err_bin = TRUE;
  1078. }
  1079. }
  1080. CVY_CONN_CLEAR(ep);
  1081. ep->dirty = FALSE;
  1082. Link_unlink(&(ep->rlink)); /* V2.1.5 */
  1083. /* if entry is not in the hash table, free the descriptor */
  1084. if (ep->alloc)
  1085. {
  1086. dp = STRUCT_PTR(ep, CONN_DESCR, entry);
  1087. UNIV_ASSERT (dp->code == CVY_DESCCODE); /* (bbain 8/19/99) */
  1088. Link_unlink(&(dp->link));
  1089. Queue_enq(fqp, &(dp->link));
  1090. }
  1091. ep = (PCONN_ENTRY)Queue_deq(dqp);
  1092. }
  1093. /* clear all dirty bin flags */
  1094. for (i=0; i<CVY_MAXBINS; i++)
  1095. lp->dirty_bin[i] = FALSE;
  1096. } /* end Load_conn_cleanup */
  1097. void Load_stop(
  1098. PLOAD_CTXT lp)
  1099. {
  1100. ULONG i;
  1101. IRQLEVEL irql;
  1102. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1103. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1104. if (!(lp->active))
  1105. return;
  1106. LOCK_ENTER(&(lp->lock), &irql);
  1107. /* make connections for all rules dirty so they will not be handled */
  1108. for (i=0; i<lp->send_msg.nrules; i++)
  1109. {
  1110. PBIN_STATE bp; /* ptr. to bin state */
  1111. bp = &(lp->pg_state[i]);
  1112. UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/21/99) */
  1113. Load_conn_kill(lp, bp); /* (v1.32B) */
  1114. /* advertise that we are not handling any load in case a ping is sent out */
  1115. lp->send_msg.cur_map[i] = 0;
  1116. lp->send_msg.new_map[i] = 0;
  1117. lp->send_msg.idle_map[i] = BIN_ALL_ONES;
  1118. lp->send_msg.rdy_bins[i] = 0;
  1119. lp->send_msg.load_amt[i] = 0;
  1120. }
  1121. lp->send_msg.state = HST_CVG; /* force convergence (v2.1) */
  1122. /* go inactive until restarted */
  1123. lp->active = FALSE;
  1124. lp->nconn = 0; /* v2.1 */
  1125. LOCK_EXIT(&(lp->lock), irql);
  1126. } /* end Load_stop */
  1127. void Load_start( /* (v1.32B) */
  1128. PLOAD_CTXT lp)
  1129. {
  1130. ULONG i;
  1131. BOOLEAN ret;
  1132. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1133. WCHAR me[20];
  1134. if (!(lp->initialized))
  1135. Load_init(lp, & ctxtp -> params);
  1136. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1137. if (lp->active)
  1138. return;
  1139. lp->my_host_id =(* (lp->params)).host_priority - 1;
  1140. lp->ping_map =
  1141. lp->host_map = 1 << lp->my_host_id;
  1142. lp->last_hmap = 0; /* bbain RTM RC1 6/23/99 */
  1143. for (i=0; i<CVY_MAX_HOSTS; i++)
  1144. {
  1145. lp->nmissed_pings[i] = 0;
  1146. }
  1147. lp->min_missed_pings = (* (lp->params)).alive_tolerance;
  1148. lp->cln_timeout = (* (lp->params)).cleanup_delay;
  1149. lp->def_timeout = (* (lp->params)).alive_period;
  1150. lp->stable_map = 0;
  1151. lp->consistent = TRUE;
  1152. /* Intiialize the bad teaming configuration detected flag. */
  1153. lp->bad_team_config = FALSE;
  1154. lp->dup_hosts = FALSE;
  1155. lp->dup_sspri = FALSE;
  1156. lp->bad_map = FALSE;
  1157. lp->overlap_maps = FALSE;
  1158. lp->err_rcving_bins = FALSE;
  1159. lp->err_orphans = FALSE;
  1160. lp->bad_num_rules = FALSE;
  1161. lp->alloc_inhibited = FALSE;
  1162. lp->alloc_failed = FALSE;
  1163. lp->bad_defrule = FALSE;
  1164. lp->scale_client = (BOOLEAN)(* (lp->params)).scale_client;
  1165. lp->my_stable_ct = 0;
  1166. lp->all_stable_ct = 0;
  1167. lp->min_stable_ct = lp->min_missed_pings;
  1168. lp->dscr_per_alloc = (* (lp->params)).dscr_per_alloc;
  1169. lp->max_dscr_allocs = (* (lp->params)).max_dscr_allocs;
  1170. lp->pkt_count = 0; /* 1.32B */
  1171. /* initialize port group bin states; add a default rule at the end */
  1172. if ((* (lp->params)).num_rules >= (CVY_MAX_RULES - 1))
  1173. {
  1174. UNIV_PRINT(("Load_start: too many rules; using max possible."));
  1175. lp->send_msg.nrules = (USHORT)CVY_MAX_RULES;
  1176. }
  1177. else
  1178. lp->send_msg.nrules = (USHORT)((* (lp->params)).num_rules) + 1;
  1179. for (i=0; i<lp->send_msg.nrules; i++)
  1180. {
  1181. PBIN_STATE bp; /* ptr. to bin state */
  1182. PCVY_RULE rp; /* ptr. to rules array */
  1183. bp = &(lp->pg_state[i]);
  1184. rp = &((* (lp->params)).port_rules[i]);
  1185. if (i == (((ULONG)lp->send_msg.nrules) - 1))
  1186. /* initialize bin state for default rule to single server with
  1187. host priority */
  1188. Bin_state_init(lp, bp, i, lp->my_host_id, CVY_SINGLE, CVY_TCP_UDP,
  1189. FALSE, (USHORT)0, (* (lp->params)).host_priority);
  1190. else if (rp->mode == CVY_SINGLE)
  1191. Bin_state_init(lp, bp, i, lp->my_host_id, rp->mode, rp->protocol,
  1192. FALSE, (USHORT)0, rp->mode_data.single.priority);
  1193. else if (rp->mode == CVY_MULTI)
  1194. Bin_state_init(lp, bp, i, lp->my_host_id, rp->mode, rp->protocol,
  1195. (BOOLEAN)(rp->mode_data.multi.equal_load),
  1196. rp->mode_data.multi.affinity,
  1197. (rp->mode_data.multi.equal_load ?
  1198. CVY_EQUAL_LOAD : rp->mode_data.multi.load));
  1199. /* handle CVY_NEVER mode as multi-server. the check for
  1200. those modes is done before attempting to hash to the bin in
  1201. Load_packet_check and Load_conn_advise so bin distribution plays
  1202. no role in the behavior, but simply allows the rule to be valid
  1203. across all of the operational servers */
  1204. else
  1205. Bin_state_init(lp, bp, i, lp->my_host_id, rp->mode, rp->protocol,
  1206. TRUE, (USHORT)0, CVY_EQUAL_LOAD);
  1207. ret = Bin_converge(lp, bp, lp->my_host_id);
  1208. if (!ret)
  1209. {
  1210. UNIV_PRINT(("Load_start: initial convergence inconsistent"));
  1211. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  1212. }
  1213. /* export current port group state to send msg */
  1214. if (i == (((ULONG)(lp->send_msg.nrules)) - 1))
  1215. lp->send_msg.rcode[i]= 0;
  1216. else
  1217. lp->send_msg.rcode[i]= rp->code;
  1218. lp->send_msg.cur_map[i] = bp->cmap; /* v2.1 */
  1219. lp->send_msg.new_map[i] = bp->new_map[lp->my_host_id];
  1220. lp->send_msg.idle_map[i] = bp->idle_bins;
  1221. lp->send_msg.rdy_bins[i] = bp->rdy_bins;
  1222. lp->send_msg.load_amt[i] = bp->load_amt[lp->my_host_id];
  1223. /* ###### for keynote - ramkrish */
  1224. lp->send_msg.pg_rsvd1[i] = (ULONG)bp->all_idle_map;
  1225. }
  1226. /* initialize send msg */
  1227. lp->send_msg.host_id = (USHORT)(lp->my_host_id);
  1228. lp->send_msg.master_id = (USHORT)(lp->my_host_id);
  1229. lp->send_msg.hcode = lp->params->install_date;
  1230. lp->send_msg.pkt_count = lp->pkt_count; /* 1.32B */
  1231. Univ_ulong_to_str (lp->my_host_id+1, me, 10);
  1232. /* Tracking convergence - Starting convergence because this host is joining the cluster. */
  1233. LOG_MSGS(MSG_INFO_CONVERGING_NEW_MEMBER, me, me);
  1234. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d is joining the cluster.", lp->my_host_id+1, lp->my_host_id+1);
  1235. /* Tracking convergence - Starting convergence. */
  1236. lp->send_msg.state = HST_CVG;
  1237. /* activate module */
  1238. lp->active = TRUE;
  1239. } /* end Load_start */
  1240. void Load_init(
  1241. PLOAD_CTXT lp,
  1242. PCVY_PARAMS params)
  1243. {
  1244. ULONG i;
  1245. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1246. TRACE_INFO("-> Load_init: lp=0x%p, param=0x%p", lp, params);
  1247. LOCK_INIT(&(lp->lock));
  1248. if (!(lp->initialized))
  1249. {
  1250. lp->code = CVY_LOADCODE; /* (bbain 8/19/99) */
  1251. /* initialize hashed connection descriptors and queues */
  1252. for (i=0; i<CVY_MAX_CHASH; i++)
  1253. {
  1254. PCONN_ENTRY ep;
  1255. ep = &(lp->hashed_conn[i]);
  1256. ep->code = CVY_ENTRCODE; /* (bbain 8/19/99) */
  1257. ep->alloc = FALSE;
  1258. ep->dirty = FALSE; /* v1.32B */
  1259. CVY_CONN_CLEAR(ep);
  1260. Link_init(&(ep->blink));
  1261. Link_init(&(ep->rlink)); /* V2.1.5 */
  1262. Queue_init(&(lp->connq[i]));
  1263. }
  1264. /* initialize connection free and dirty queues; free descriptors */
  1265. Queue_init(&(lp->conn_freeq));
  1266. Queue_init(&(lp->conn_dirtyq)); /* v1.32B */
  1267. Queue_init(&(lp->conn_rcvryq)); /* V2.1.5 */
  1268. for (i=0; i<CVY_INIT_QCONN; i++)
  1269. {
  1270. lp->conn_descr[i].code = CVY_DESCCODE; /* (bbain 8/19/99) */
  1271. Link_init(&(lp->conn_descr[i].link));
  1272. lp->conn_descr[i].entry.code = CVY_ENTRCODE; /* (bbain 8/21/99) */
  1273. lp->conn_descr[i].entry.alloc = TRUE;
  1274. lp->conn_descr[i].entry.dirty = FALSE; /* v1.32B */
  1275. CVY_CONN_CLEAR(&(lp->conn_descr[i].entry));
  1276. Link_init(&(lp->conn_descr[i].entry.blink));
  1277. Link_init(&(lp->conn_descr[i].entry.rlink)); /* V2.1.5 */
  1278. Queue_enq(&(lp->conn_freeq), &(lp->conn_descr[i].link));
  1279. }
  1280. /* (v1.32B) */
  1281. for (i=0; i<CVY_MAXBINS; i++)
  1282. lp->dirty_bin[i] = FALSE;
  1283. lp->cln_waiting = FALSE;
  1284. lp->def_timeout =
  1285. lp->cur_timeout = params -> alive_period;
  1286. lp->nqalloc = 0;
  1287. lp->nconn = 0; /* v2.1 */
  1288. lp->active = FALSE;
  1289. lp->initialized = TRUE;
  1290. /* clear list of descriptor queue allocations (bbain 2/25/99) */
  1291. for (i=0; i<CVY_MAX_MAX_DSCR_ALLOCS; i++)
  1292. lp->qalloc_list[i] = (PCONN_DESCR)NULL;
  1293. lp -> params = params;
  1294. }
  1295. else
  1296. {
  1297. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1298. }
  1299. /* Initialize the reference count on this load module. */
  1300. lp->ref_count = 0;
  1301. /* don't start module (v1.32B) */
  1302. TRACE_INFO("<- Load_init");
  1303. } /* end Load_init */
  1304. void Load_cleanup( /* (bbain 2/25/99) */
  1305. PLOAD_CTXT lp)
  1306. {
  1307. ULONG i;
  1308. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1309. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1310. /* free all descriptor queue allocations */
  1311. if (lp->nqalloc > CVY_MAX_MAX_DSCR_ALLOCS)
  1312. lp->nqalloc = CVY_MAX_MAX_DSCR_ALLOCS;
  1313. for (i=0; i<lp->nqalloc; i++)
  1314. if (lp->qalloc_list[i] != (PCONN_DESCR)NULL)
  1315. free((PVOID)(lp->qalloc_list[i]));
  1316. } /* end Load_cleanup */
  1317. void Load_convergence_start(
  1318. PLOAD_CTXT lp)
  1319. {
  1320. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1321. lp->consistent = TRUE; /* 1.03 */
  1322. /* setup initial convergence state */
  1323. lp->send_msg.state = HST_CVG;
  1324. lp->stable_map = 0;
  1325. lp->my_stable_ct = 0;
  1326. lp->all_stable_ct = 0;
  1327. lp->send_msg.master_id = (USHORT)(lp->my_host_id);
  1328. } /* end Load_convergence_start */
  1329. void Load_msg_rcv(
  1330. PLOAD_CTXT lp,
  1331. PPING_MSG pmsg) /* ptr. to ping message */
  1332. {
  1333. ULONG i;
  1334. BOOLEAN consistent;
  1335. ULONG my_host;
  1336. ULONG rem_host;
  1337. ULONG saved_map; /* saved host map */
  1338. PPING_MSG sendp; /* ptr. to my send message */
  1339. IRQLEVEL irql;
  1340. WCHAR me[20];
  1341. WCHAR them[20];
  1342. ULONG map;
  1343. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1344. /* Used for tracking convergence and event logging. */
  1345. BOOLEAN bInconsistentMaster = FALSE;
  1346. BOOLEAN bInconsistentTeaming = FALSE;
  1347. BOOLEAN bInconsistentPortRules = FALSE;
  1348. UNIV_ASSERT(lp->code == CVY_LOADCODE);
  1349. TRACE_HB("Recv HB from host %d", (ULONG) pmsg->host_id + 1);
  1350. if (!(lp->active))
  1351. return;
  1352. my_host = lp->my_host_id;
  1353. rem_host = (ULONG) pmsg->host_id;
  1354. Univ_ulong_to_str (my_host+1, me, 10);
  1355. Univ_ulong_to_str (rem_host+1, them, 10);
  1356. sendp = &(lp->send_msg);
  1357. if (rem_host >= CVY_MAX_HOSTS)
  1358. return;
  1359. LOCK_ENTER(&(lp->lock), &irql);
  1360. /* filter out packets broadcast by this host */
  1361. if(rem_host == my_host)
  1362. {
  1363. /* if this packet was really from another host, we have duplicate host ids */
  1364. if (sendp->hcode != pmsg->hcode)
  1365. {
  1366. if (!(lp->dup_hosts))
  1367. {
  1368. UNIV_PRINT(("Duplicate host ids detected."));
  1369. LOG_MSG(MSG_ERROR_HOST_ID, me);
  1370. lp->dup_hosts = TRUE;
  1371. }
  1372. /* Tracking convergence - Starting convergence because duplicate host IDs were detected in the cluster. */
  1373. if (sendp->state == HST_NORMAL) {
  1374. LOG_MSGS(MSG_INFO_CONVERGING_DUPLICATE_HOST_ID, me, them);
  1375. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d is configured with the same host ID.", my_host+1, rem_host+1);
  1376. }
  1377. /* Tracking convergence - Starting convergence. */
  1378. Load_convergence_start(lp);
  1379. }
  1380. /* just update ping and host maps for us */
  1381. lp->ping_map |= (1 << my_host);
  1382. lp->host_map |= (1 << my_host);
  1383. LOCK_EXIT(&(lp->lock), irql);
  1384. return;
  1385. }
  1386. if (sendp->nrules != pmsg->nrules)
  1387. {
  1388. if (!(lp->bad_num_rules))
  1389. {
  1390. UNIV_PRINT(("Host %d: Hosts have diff # rules.", my_host));
  1391. LOG_MSG2(MSG_ERROR_RULES_MISMATCH, them, sendp->nrules, pmsg->nrules);
  1392. lp->bad_num_rules = TRUE;
  1393. }
  1394. /* Tracking convergence - Starting convergence because the number of port rules on this host and the remote host do not match. */
  1395. if (sendp->state == HST_NORMAL) {
  1396. LOG_MSGS(MSG_INFO_CONVERGING_NUM_RULES, me, them);
  1397. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d is configured with a conflicting number of port rules.", my_host+1, rem_host+1);
  1398. }
  1399. /* Tracking convergence - Starting convergence. */
  1400. Load_convergence_start(lp);
  1401. /* just update ping and host maps for remote host (bbain 2/17/99) */
  1402. lp->ping_map |= (1 << rem_host);
  1403. lp->host_map |= (1 << rem_host);
  1404. LOCK_EXIT(&(lp->lock), irql);
  1405. return;
  1406. }
  1407. /* update mastership and see if consistent */
  1408. if (rem_host < sendp->master_id)
  1409. sendp->master_id = (USHORT)rem_host;
  1410. consistent = sendp->master_id == pmsg->master_id; /* 1.03 */
  1411. /* For the purposes of logging the reason for convergence, note this inconsistency. */
  1412. if (!consistent) bInconsistentMaster = TRUE;
  1413. /* update ping and host maps to include remote host */
  1414. lp->ping_map |= (1 << rem_host);
  1415. saved_map = lp->host_map;
  1416. lp->host_map |= (1 << rem_host);
  1417. /* handle host convergence */
  1418. if (sendp->state != HST_NORMAL)
  1419. {
  1420. /* if master, update stable map for remote host */
  1421. if (sendp->master_id == my_host)
  1422. {
  1423. if (pmsg->state == HST_STABLE)
  1424. {
  1425. lp->stable_map |= (1 << rem_host);
  1426. }
  1427. else
  1428. {
  1429. lp->stable_map &= ~(1 << rem_host);
  1430. lp->all_stable_ct = 0;
  1431. }
  1432. }
  1433. /* otherwise, update state if have global stable convergence and the current
  1434. master has signalled completion by returning to the normal state; note
  1435. that we must do this prior to updating port group states */
  1436. else if (rem_host == sendp->master_id && pmsg->state == HST_NORMAL)
  1437. {
  1438. if (sendp->state == HST_STABLE)
  1439. {
  1440. sendp->state = HST_NORMAL;
  1441. /* Notify our BDA team that this cluster is consistently configured.
  1442. If we are not part of a BDA team, this call is essentially a no-op. */
  1443. Load_teaming_consistency_notify(&ctxtp->bda_teaming, TRUE);
  1444. /* Reset the bad teaming configuration detected flag if we are converged. */
  1445. lp->bad_team_config = FALSE;
  1446. lp->dup_hosts = FALSE;
  1447. lp->dup_sspri = FALSE;
  1448. lp->bad_map = FALSE;
  1449. lp->overlap_maps = FALSE;
  1450. lp->err_rcving_bins = FALSE;
  1451. lp->err_orphans = FALSE;
  1452. lp->bad_num_rules = FALSE;
  1453. lp->pkt_count = 0; /* v1.32B */
  1454. for (i=0; i<sendp->nrules; i++)
  1455. {
  1456. PBIN_STATE bp;
  1457. bp = &(lp->pg_state[i]);
  1458. bp->compatible = TRUE; /* 1.03 */
  1459. Bin_converge_commit(lp, bp, my_host);
  1460. UNIV_PRINT(("Host %d pg %d: new cur map %x idle %x all %x",
  1461. my_host, i, bp->cur_map[my_host], bp->idle_bins,
  1462. bp->all_idle_map));
  1463. #if 0 /* 1.03: only update ping message in Load_timeout to avoid locking send */
  1464. /* export current port group state */
  1465. sendp->cur_map[i] = bp->cmap; /* v2.1 */
  1466. sendp->new_map[i] = bp->new_map[my_host];
  1467. sendp->idle_map[i] = bp->idle_bins;
  1468. sendp->rdy_bins[i] = bp->rdy_bins;
  1469. sendp->load_amt[i] = bp->load_amt[my_host];
  1470. #endif
  1471. }
  1472. #if 0
  1473. sendp->pkt_count = lp->pkt_count; /* 1.32B */
  1474. #endif
  1475. UNIV_PRINT(("Host %d: converged as slave", my_host));
  1476. /* log convergence completion if host map changed (bbain RTM RC1 6/23/99) */
  1477. Load_hosts_query (lp, TRUE, & map);
  1478. lp->last_hmap = lp->host_map;
  1479. }
  1480. else
  1481. {
  1482. /* Tracking convergence - Starting convergence because the DEFAULT host prematurely ended convergence. In this case, we
  1483. are guaranteed to already be in the HST_CVG state, and because this message can be misleading in some circumstances,
  1484. we do not log an event. For instance, due to timing issues, when a host joins a cluster he can receive a HST_NORMAL
  1485. heartbeat from the DEFAULT host while it is still in the HST_CVG state simply because that heartbeat left the DEFAULT
  1486. host before it received our first heartbeat, which initiated convergence. */
  1487. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d, the DEFAULT host, prematurely terminated convergence.", my_host+1, rem_host+1);
  1488. /* Tracking convergence - Starting convergence. */
  1489. Load_convergence_start(lp);
  1490. }
  1491. }
  1492. }
  1493. /* Compare the teaming configuration of this host with the remote host. If the
  1494. two are inconsitent and we are part of a team, we will initiate convergence. */
  1495. if (!Load_teaming_consistency_check(lp->bad_team_config, &ctxtp->bda_teaming, sendp->teaming, pmsg->teaming)) {
  1496. /* Only log an event if the teaming configuration was, but is now not, consistent. */
  1497. if (!lp->bad_team_config) {
  1498. /* Note that we saw this. */
  1499. lp->bad_team_config = TRUE;
  1500. /* Log the event. */
  1501. LOG_MSG(MSG_ERROR_BDA_BAD_TEAM_CONFIG, them);
  1502. }
  1503. /* Notify the team that this cluster is NOT consistently configured. */
  1504. Load_teaming_consistency_notify(&ctxtp->bda_teaming, FALSE);
  1505. /* Mark the heartbeats inconsistent to force and retain convergence. */
  1506. consistent = FALSE;
  1507. /* For the purposes of logging the reason for convergence, note this inconsistency. */
  1508. bInconsistentTeaming = TRUE;
  1509. }
  1510. /* update port group state */
  1511. for (i=0; i<sendp->nrules; i++)
  1512. {
  1513. BOOLEAN ret;
  1514. PBIN_STATE bp;
  1515. bp = &lp->pg_state[i];
  1516. /* if rule codes don't match, print message and handle incompatibility (1.03: note
  1517. that we previously marked rule invalid, which would stop processing) */
  1518. if (sendp->rcode[i] != pmsg->rcode[i])
  1519. {
  1520. /* 1.03: if rule was peviously compatible, print message */
  1521. if (bp->compatible)
  1522. {
  1523. PCVY_RULE rp;
  1524. UNIV_PRINT(("Host %d pg %d: rule codes do not match.", lp->my_host_id, i));
  1525. /* bbain 8/27/99 */
  1526. LOG_MSG4(MSG_ERROR_RULES_MISMATCH, them, rem_host, i, sendp->rcode[i], pmsg->rcode[i]);
  1527. /* Get the port rule information for this rule. */
  1528. rp = &lp->params->port_rules[i];
  1529. /* Check to see if this is an issue with a win2k host in a cluster utilizing virtual clusters. */
  1530. if ((rp->virtual_ip_addr != CVY_ALL_VIP_NUMERIC_VALUE) && ((sendp->rcode[i] ^ ~rp->virtual_ip_addr) == pmsg->rcode[i])) {
  1531. UNIV_PRINT((" ** A Windows 2000 or NT4 host MAY be participating in a cluster utilizing virtual cluster support."));
  1532. LOG_MSG(MSG_WARN_VIRTUAL_CLUSTERS, MSG_NONE);
  1533. }
  1534. bp->compatible = FALSE;
  1535. }
  1536. /* 1.03: mark rule inconsistent to force and continue convergence */
  1537. consistent = FALSE;
  1538. /* For the purposes of logging the reason for convergence, note this inconsistency. */
  1539. bInconsistentPortRules = TRUE;
  1540. /* don't update bin state */
  1541. continue;
  1542. }
  1543. ret = Bin_host_update(lp, bp, my_host, (BOOLEAN)(sendp->state != HST_NORMAL),
  1544. (BOOLEAN)(pmsg->state != HST_NORMAL),
  1545. rem_host, pmsg->cur_map[i], pmsg->new_map[i],
  1546. pmsg->idle_map[i], pmsg->rdy_bins[i],
  1547. pmsg->pkt_count, pmsg->load_amt[i]);
  1548. #if 0 /* 1.03: only update ping message in Load_timeout to avoid locking send */
  1549. /* export current port group state */
  1550. sendp->cur_map[i] = bp->cmap; /* v2.1 */
  1551. sendp->new_map[i] = bp->new_map[my_host];
  1552. sendp->idle_map[i] = bp->idle_bins;
  1553. sendp->rdy_bins[i] = bp->rdy_bins;
  1554. sendp->load_amt[i] = bp->load_amt[my_host];
  1555. #endif
  1556. if (!ret)
  1557. consistent = FALSE;
  1558. }
  1559. /* update our consistency state */
  1560. lp->consistent = consistent;
  1561. /* if we are in normal operation and we discover a new host or a host goes into
  1562. convergence or we discover an inconsistency, go into convergence */
  1563. if (sendp->state == HST_NORMAL)
  1564. {
  1565. if (lp->host_map != saved_map || pmsg->state == HST_CVG || !consistent)
  1566. {
  1567. /* If a host has joined the cluster, or if inconsistent teaming configuration or port
  1568. rules were detected, then we need to log an event. However, we segregate the
  1569. inconsistent master host flag because it is set by the initiating host in MANY
  1570. occasions, so we want to log the most specific reason(s) for convergence if
  1571. possible and only report the inconsistent master detection only if nothing more
  1572. specific can be deduced. */
  1573. if (lp->host_map != saved_map || bInconsistentTeaming || bInconsistentPortRules) {
  1574. /* If the host maps are different, then we know that the host from which we received
  1575. this packet is joining the cluster because the ONLY operation on the host map in
  1576. this function is to ADD a remote host to our map. Otherwise, if the map has not
  1577. changed, then an inconsistent configuration got us into the branch. */
  1578. if (lp->host_map != saved_map) {
  1579. /* Tracking convergence - Starting convergence because another host is joining the cluster. */
  1580. LOG_MSGS(MSG_INFO_CONVERGING_NEW_MEMBER, me, them);
  1581. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d is joining the cluster.", my_host+1, rem_host+1);
  1582. } else if (bInconsistentTeaming || bInconsistentPortRules) {
  1583. /* Tracking convergence - Starting convergence because inconsistent configuration was detected. */
  1584. LOG_MSGS(MSG_INFO_CONVERGING_BAD_CONFIG, me, them);
  1585. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d has conflicting configuration.", my_host+1, rem_host+1);
  1586. }
  1587. /* If we have nothing better to report, report convergence for an unspecific reason. */
  1588. } else if (bInconsistentMaster || pmsg->state == HST_CVG) {
  1589. /* Tracking convergence - Starting convergence for unknown reasons. */
  1590. LOG_MSGS(MSG_INFO_CONVERGING_UNKNOWN, me, them);
  1591. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d is converging for an unknown reason.", my_host+1, rem_host+1);
  1592. }
  1593. /* Tracking convergence - Starting convergence. */
  1594. Load_convergence_start(lp);
  1595. }
  1596. }
  1597. /* otherwise, if we are in convergence and we see an inconsistency, just restart
  1598. our local convergence */
  1599. else
  1600. {
  1601. /* update our consistency state; if we didn't see consistent information,
  1602. restart this host's convergence */
  1603. if (!consistent)
  1604. {
  1605. /* Tracking convergence - Starting convergence because inconsistent configuration was detected.
  1606. This keeps hosts in a state of convergence when hosts are inconsistently configured. However,
  1607. since the cluster is already in a state of convergece (HST_CVG or HST_STABLE), don't log an
  1608. event, which may confuse a user. */
  1609. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d has conflicting configuration.", my_host+1, rem_host+1);
  1610. /* Tracking convergence - Starting convergence. */
  1611. sendp->state = HST_CVG;
  1612. lp->my_stable_ct = 0;
  1613. lp->stable_map &= ~(1 << my_host);
  1614. lp->all_stable_ct = 0;
  1615. }
  1616. }
  1617. LOCK_EXIT(&(lp->lock), irql);
  1618. } /* end Load_msg_rcv */
  1619. PPING_MSG Load_snd_msg_get(
  1620. PLOAD_CTXT lp)
  1621. {
  1622. return &(lp->send_msg);
  1623. } /* end Load_snd_msg_get */
  1624. BOOLEAN Load_timeout(
  1625. PLOAD_CTXT lp,
  1626. PULONG new_timeout,
  1627. PBOOLEAN pconverging,
  1628. PULONG pnconn)
  1629. /*
  1630. Note: we only update ping message in this function since we know that upper level code
  1631. sends out ping messages after calling this routine. We cannot be sure that Load_msg_rcv
  1632. is sequentialized with sending a message, (1.03)
  1633. Upper level code locks this routine wrt Load_msg_rcv, Load_packet_check, and
  1634. Load_conn_advise. (1.03)
  1635. */
  1636. {
  1637. ULONG missed_pings;
  1638. ULONG my_host;
  1639. ULONG i;
  1640. PPING_MSG sendp; /* ptr. to my send message */
  1641. IRQLEVEL irql;
  1642. ULONG map; /* returned host map from query */
  1643. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1644. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1645. LOCK_ENTER(&(lp->lock), &irql);
  1646. /* check for cleanup timeout (v1.32B) */
  1647. if (lp->cln_waiting)
  1648. {
  1649. lp->cur_time += lp->cur_timeout;
  1650. if (lp->cur_time >= lp->cln_timeout)
  1651. {
  1652. Load_conn_cleanup(lp);
  1653. lp->cln_waiting = FALSE;
  1654. }
  1655. }
  1656. /* return if not active */
  1657. if (!(lp->active))
  1658. {
  1659. if (new_timeout != NULL)
  1660. * new_timeout = lp->cur_timeout = lp->def_timeout;
  1661. if (pnconn != NULL) /* v2.1 */
  1662. * pnconn = lp->nconn;
  1663. if (pconverging != NULL)
  1664. * pconverging = FALSE;
  1665. LOCK_EXIT(&(lp->lock), irql);
  1666. return FALSE;
  1667. }
  1668. my_host = lp->my_host_id;
  1669. sendp = &(lp->send_msg);
  1670. /* compute which hosts missed pings and reset ping map */
  1671. missed_pings = lp->host_map & (~lp->ping_map);
  1672. #ifdef NO_CLEANUP
  1673. lp->ping_map = 1 << my_host;
  1674. #else
  1675. lp->ping_map = 0;
  1676. #endif
  1677. /* check whether any host is dead, including ourselves */
  1678. for (i=0; i<CVY_MAX_HOSTS; i++)
  1679. {
  1680. /* if we have a missed ping for this host, increment count */
  1681. if ((missed_pings & 0x1) == 1)
  1682. {
  1683. lp->nmissed_pings[i]++;
  1684. /* if we missed too many pings, declare host dead and force convergence */
  1685. if (lp->nmissed_pings[i] == lp->min_missed_pings)
  1686. {
  1687. ULONG j;
  1688. BOOLEAN ret;
  1689. WCHAR me[20];
  1690. WCHAR them[20];
  1691. if (i == my_host)
  1692. {
  1693. UNIV_PRINT(("Host %d: missed too many pings; this host declared offline", i));
  1694. /* reset our packet count since we are likely not to be receiving
  1695. packets from others now; this will make us less favored to
  1696. handle duplicate bins later (v1.32B) */
  1697. lp->pkt_count = 0;
  1698. }
  1699. lp->host_map &= ~(1<<i);
  1700. for (j=0; j<sendp->nrules; j++)
  1701. {
  1702. PBIN_STATE bp;
  1703. bp = &(lp->pg_state[j]);
  1704. UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/19/99) */
  1705. if (i == my_host)
  1706. {
  1707. ULONG k;
  1708. /* cleanup connections and restore maps to clean state */
  1709. Load_conn_kill(lp, bp);
  1710. bp->targ_map = 0;
  1711. bp->all_idle_map = BIN_ALL_ONES;
  1712. bp->cmap = 0; /* v2.1 */
  1713. bp->compatible = TRUE; /* v1.03 */
  1714. for (k=0; k<CVY_MAX_HOSTS; k++)
  1715. {
  1716. bp->new_map[k] = 0;
  1717. bp->cur_map[k] = 0;
  1718. bp->chk_map[k] = 0;
  1719. bp->idle_map[k] = BIN_ALL_ONES;
  1720. if (k != i)
  1721. bp->load_amt[k] = 0;
  1722. }
  1723. bp->snd_bins =
  1724. bp->rcv_bins =
  1725. bp->rdy_bins = 0;
  1726. bp->idle_bins = BIN_ALL_ONES;
  1727. /* compute initial new map for convergence as only host in cluster
  1728. (v 1.3.2B) */
  1729. ret = Bin_converge(lp, bp, lp->my_host_id);
  1730. if (!ret)
  1731. {
  1732. UNIV_PRINT(("Load_timeout: initial convergence inconsistent"));
  1733. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  1734. }
  1735. }
  1736. else
  1737. {
  1738. ret = Bin_host_update(lp, bp, my_host, TRUE, TRUE,
  1739. i, 0, 0, BIN_ALL_ONES, 0, 0, 0);
  1740. }
  1741. }
  1742. lp->nmissed_pings[i] = 0;
  1743. /* If a host has dropped out of the cluster, then log an event. However, we don't
  1744. log an event when we drop out because the only way for us to drop out of our own
  1745. cluster is if we are stopping anyway, or if we have lost network connectivity.
  1746. Logging such events may be misleading, so we won't bother. */
  1747. if (i != my_host) {
  1748. Univ_ulong_to_str (my_host+1, me, 10);
  1749. Univ_ulong_to_str (i+1, them, 10);
  1750. /* Tracking convergence - Starting convergence because a member has fallen out of the cluster. */
  1751. LOG_MSGS(MSG_INFO_CONVERGING_MEMBER_LOST, me, them);
  1752. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d is leaving the cluster.", my_host+1, i+1);
  1753. }
  1754. /* Tracking convergence - Starting convergence. */
  1755. Load_convergence_start(lp);
  1756. }
  1757. }
  1758. /* otherwise reset missed ping count */
  1759. else
  1760. lp->nmissed_pings[i] = 0;
  1761. missed_pings >>= 1;
  1762. }
  1763. /* handle convergence */
  1764. if (sendp->state != HST_NORMAL)
  1765. {
  1766. /* check whether we have been consistent and have received our own pings
  1767. for a sufficient period to move to a stable state and announce it to
  1768. other hosts */
  1769. if (sendp->state == HST_CVG)
  1770. {
  1771. if (lp->consistent && ((lp->host_map & (1 << my_host)) != 0))
  1772. {
  1773. lp->my_stable_ct++;
  1774. if (lp->my_stable_ct >= lp->min_stable_ct)
  1775. {
  1776. sendp->state = HST_STABLE;
  1777. lp->stable_map |= (1 << my_host);
  1778. }
  1779. }
  1780. else
  1781. lp->my_stable_ct = lp->all_stable_ct = 0; /* wlb B3RC1 */
  1782. }
  1783. /* otherwise, see if we are the master and everybody's been stable for
  1784. a sufficient period for us to terminate convergence */
  1785. else if (sendp->state == HST_STABLE &&
  1786. my_host == sendp->master_id &&
  1787. lp->stable_map == lp->host_map)
  1788. {
  1789. lp->all_stable_ct++;
  1790. if (lp->all_stable_ct >= lp->min_stable_ct)
  1791. {
  1792. sendp->state = HST_NORMAL;
  1793. /* Notify our BDA team that this cluster is consistently configured.
  1794. If we are not part of BDA team, this call is essentially a no-op. */
  1795. Load_teaming_consistency_notify(&ctxtp->bda_teaming, TRUE);
  1796. /* Reset the bad teaming configuration detected flag if we are converged. */
  1797. lp->bad_team_config = FALSE;
  1798. lp->dup_hosts = FALSE;
  1799. lp->dup_sspri = FALSE;
  1800. lp->bad_map = FALSE;
  1801. lp->overlap_maps = FALSE;
  1802. lp->err_rcving_bins = FALSE;
  1803. lp->err_orphans = FALSE;
  1804. lp->bad_num_rules = FALSE;
  1805. lp->pkt_count = 0; /* v1.32B */
  1806. for (i=0; i<sendp->nrules; i++)
  1807. {
  1808. PBIN_STATE bp;
  1809. BOOLEAN ret;
  1810. bp = &(lp->pg_state[i]);
  1811. bp->compatible = TRUE; /* 1.03 */
  1812. /* explicitly converge to new map in case we're the only host (v2.06) */
  1813. ret = Bin_converge(lp, bp, lp->my_host_id);
  1814. if (!ret)
  1815. {
  1816. UNIV_PRINT(("Load_timeout: final convergence inconsistent"));
  1817. LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
  1818. }
  1819. Bin_converge_commit(lp, bp, my_host);
  1820. UNIV_PRINT(("Host %d pg %d: new cur map %x idle %x all %x",
  1821. my_host, i, bp->cur_map[my_host], bp->idle_bins,
  1822. bp->all_idle_map));
  1823. }
  1824. UNIV_PRINT(("+++ Host %d: converged as master +++", my_host));
  1825. /* log convergence completion if host map changed (bbain RTM RC1 6/23/99) */
  1826. Load_hosts_query (lp, TRUE, & map);
  1827. lp->last_hmap = lp->host_map;
  1828. }
  1829. }
  1830. }
  1831. /* 1.03: update ping message */
  1832. for (i=0; i<sendp->nrules; i++)
  1833. {
  1834. PBIN_STATE bp;
  1835. bp = &(lp->pg_state[i]);
  1836. /* export current port group state to ping message */
  1837. sendp->cur_map[i] = bp->cmap; /* v2.1 */
  1838. sendp->new_map[i] = bp->new_map[my_host];
  1839. sendp->idle_map[i] = bp->idle_bins;
  1840. sendp->rdy_bins[i] = bp->rdy_bins;
  1841. sendp->load_amt[i] = bp->load_amt[my_host];
  1842. /* ###### for keynote - ramkrish */
  1843. sendp->pg_rsvd1[i] = (ULONG)bp->all_idle_map;
  1844. }
  1845. sendp->pkt_count = lp->pkt_count; /* 1.32B */
  1846. /* Add configuration information for teaming at each timeout. */
  1847. Load_teaming_code_create(&lp->send_msg.teaming, &ctxtp->bda_teaming);
  1848. /* request fast timeout if converging */
  1849. if (new_timeout != NULL) /* 1.03 */
  1850. {
  1851. if (sendp->state != HST_NORMAL)
  1852. * new_timeout = lp->cur_timeout = lp->def_timeout / 2;
  1853. else
  1854. * new_timeout = lp->cur_timeout = lp->def_timeout;
  1855. }
  1856. if (pnconn != NULL) /* v2.1 */
  1857. * pnconn = lp->nconn;
  1858. if (pconverging != NULL)
  1859. * pconverging = (sendp->state != HST_NORMAL);
  1860. LOCK_EXIT(&(lp->lock), irql);
  1861. return ((lp->host_map) != 0);
  1862. } /* end Load_timeout */
  1863. PBIN_STATE Load_pg_lookup(
  1864. PLOAD_CTXT lp,
  1865. ULONG svr_ipaddr,
  1866. ULONG svr_port,
  1867. BOOLEAN is_tcp)
  1868. {
  1869. PCVY_RULE rp; /* ptr. to rules array */
  1870. PBIN_STATE bp; /* ptr. to bin state */
  1871. ULONG i;
  1872. ULONG nurules; /* # user defined rules */
  1873. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1874. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1875. rp = (* (lp->params)).port_rules;
  1876. nurules = (* (lp->params)).num_rules;
  1877. /* check for invalid port value (bbain RC1 6/14/99) */
  1878. UNIV_ASSERT(svr_port <= CVY_MAX_PORT);
  1879. /* find server port rule */
  1880. for (i=0; i<nurules; i++)
  1881. {
  1882. /* For virtual clusters: If the server IP address matches the VIP for the port rule,
  1883. or if the VIP for the port rule is "ALL VIPs", and if the port lies in the range
  1884. for this rule, and if the protocol matches, this is the rule. Notice that this
  1885. give priority to rules for specific VIPs over those for "ALL VIPs", which means
  1886. that this code RELIES on the port rules being sorted by VIP/port where the "ALL
  1887. VIP" ports rules are at the end of the port rule list. */
  1888. if ((svr_ipaddr == rp->virtual_ip_addr || CVY_ALL_VIP_NUMERIC_VALUE == rp->virtual_ip_addr) &&
  1889. (svr_port >= rp->start_port && svr_port <= rp->end_port) &&
  1890. ((is_tcp && rp->protocol != CVY_UDP) || (!is_tcp && rp->protocol != CVY_TCP)))
  1891. break;
  1892. else
  1893. rp++;
  1894. }
  1895. /* use default rule if port not found or rule is invalid */
  1896. bp = &(lp->pg_state[i]);
  1897. UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/19/99) */
  1898. return bp;
  1899. } /* end Load_pg_lookup */
  1900. BOOLEAN Load_packet_check(
  1901. PLOAD_CTXT lp,
  1902. ULONG svr_ipaddr,
  1903. ULONG svr_port,
  1904. ULONG client_ipaddr,
  1905. ULONG client_port,
  1906. USHORT protocol,
  1907. BOOLEAN limit_map_fn)
  1908. {
  1909. PBIN_STATE bp; /* ptr. to bin state */
  1910. ULONG id; /* hash index for the connection */
  1911. ULONG bin; /* bin index */
  1912. QUEUE * qp; /* ptr. to connection queue */
  1913. IRQLEVEL irql;
  1914. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  1915. BOOLEAN is_tcp_pkt = (protocol == TCPIP_PROTOCOL_TCP);
  1916. BOOLEAN is_session_pkt;
  1917. is_session_pkt = is_tcp_pkt;
  1918. if (NLB_IPSEC_SESSION_SUPPORT_ENABLED() && (protocol == TCPIP_PROTOCOL_IPSEC1))
  1919. {
  1920. is_session_pkt = TRUE;
  1921. }
  1922. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  1923. if (! lp -> active)
  1924. return FALSE;
  1925. lp->pkt_count++; /* increment count of pkts handled (v1.32B) */
  1926. bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
  1927. /* V2.2 make sure that Load_pg_lookup properly handled protocol specific rules */
  1928. UNIV_ASSERT ((is_tcp_pkt && bp->prot != CVY_UDP) || (!is_tcp_pkt && bp->prot != CVY_TCP));
  1929. /* handle CVY_NEVER mode immediately */
  1930. if (bp->mode == CVY_NEVER)
  1931. return FALSE;
  1932. /* lookup connection entry in hash table */
  1933. if (limit_map_fn) {
  1934. if (bp->affinity == CVY_AFFINITY_NONE)
  1935. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  1936. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  1937. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  1938. else
  1939. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, MAP_FN_PARAMETER);
  1940. } else {
  1941. if (bp->affinity == CVY_AFFINITY_NONE)
  1942. id = Map(client_ipaddr, ((svr_port << 16) + client_port));
  1943. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  1944. id = Map(client_ipaddr, svr_ipaddr);
  1945. else
  1946. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, svr_ipaddr);
  1947. }
  1948. /* now hash client address to bin id */
  1949. bin = id % CVY_MAXBINS;
  1950. LOCK_ENTER(&(lp->lock), &irql);
  1951. /* check bin for residency and all other hosts now idle on their bins; in this
  1952. case and if we do not have dirty connections, we must be able to handle the packet */
  1953. if (((bp->cmap & (((MAP_T) 1) << bin)) != 0) && /* v2.1 */
  1954. (!is_session_pkt || (((bp->all_idle_map & (((MAP_T) 1) << bin)) != 0) && (!(lp->cln_waiting))))) /* v1.32B */
  1955. {
  1956. /* note that we may have missed a connection, but it could also be a stale
  1957. packet so we can't start tracking the connection now */
  1958. #ifdef TRACE_LOAD
  1959. DbgPrint("Host %d: check 1 accepts pkt; rule %d bin %d nconn %d %s port %d\n",
  1960. lp->my_host_id, bp->index, bin, bp->nconn[bin], is_tcp_pkt ? "TCP" : "UDP", svr_port);
  1961. #endif
  1962. LOCK_EXIT(&(lp->lock), irql);
  1963. return TRUE;
  1964. }
  1965. /* otherwise, if we have an active connection for this bin or if we have dirty
  1966. connections for this bin and the bin is resident, check for a match */
  1967. else if (bp->nconn[bin] > 0 || (lp->cln_waiting && lp->dirty_bin[bin] && ((bp->cmap & (((MAP_T) 1) << bin)) != 0)))
  1968. {
  1969. PCONN_ENTRY ep; /* ptr. to connection entry */
  1970. PCONN_DESCR dp; /* ptr. to connection descriptor */
  1971. /* now hash client address to conn. hash table index */
  1972. id = id % CVY_MAX_CHASH;
  1973. ep = &(lp->hashed_conn[id]);
  1974. qp = &(lp->connq[id]);
  1975. /* look for a connection match */
  1976. if (CVY_CONN_MATCH(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
  1977. {
  1978. /* if connection was dirty, just block the packet since TCP/IP may have stale
  1979. connection state for a previous connection from another host (v1.32B) */
  1980. if (ep->dirty)
  1981. {
  1982. LOCK_EXIT(&(lp->lock), irql);
  1983. #ifdef TRACE_DIRTY
  1984. DbgPrint ("blocking dirty connection from %d to %d\n", client_port, svr_port);
  1985. #endif
  1986. return FALSE;
  1987. }
  1988. #ifdef TRACE_LOAD
  1989. DbgPrint("Host %d: check 2 accepts pkt; rule %d bin %d nconn %d %s port %d\n",
  1990. lp->my_host_id, bp->index, bin, bp->nconn[bin], is_tcp_pkt ? "TCP" : "UDP", svr_port);
  1991. #endif
  1992. LOCK_EXIT(&(lp->lock), irql);
  1993. return TRUE;
  1994. }
  1995. else
  1996. {
  1997. for (dp = (PCONN_DESCR)Queue_front(qp); dp != NULL;
  1998. dp = (PCONN_DESCR)Queue_next(qp, &(dp->link)))
  1999. {
  2000. if (CVY_CONN_MATCH(&(dp->entry), svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
  2001. {
  2002. /* if connection was dirty, just block the packet since TCP/IP may have
  2003. stale connection state for a previous connection from another host
  2004. (v1.32B) */
  2005. if (dp->entry.dirty)
  2006. {
  2007. LOCK_EXIT(&(lp->lock), irql);
  2008. #ifdef TRACE_DIRTY
  2009. DbgPrint ("blocking dirty connection from %d to %d\n", client_port, svr_port);
  2010. #endif
  2011. return FALSE;
  2012. }
  2013. #ifdef TRACE_LOAD
  2014. DbgPrint("Host %d: check 3 accepts pkt; rule %d bin %d nconn %d %s port %d\n",
  2015. lp->my_host_id, bp->index, bin, bp->nconn[bin], is_tcp_pkt ? "TCP" : "UDP", svr_port);
  2016. #endif
  2017. LOCK_EXIT(&(lp->lock), irql);
  2018. return TRUE;
  2019. }
  2020. }
  2021. }
  2022. }
  2023. LOCK_EXIT(&(lp->lock), irql);
  2024. return FALSE;
  2025. } /* end Load_packet_check */
  2026. BOOLEAN Load_conn_advise(
  2027. PLOAD_CTXT lp,
  2028. ULONG svr_ipaddr,
  2029. ULONG svr_port,
  2030. ULONG client_ipaddr,
  2031. ULONG client_port,
  2032. USHORT protocol,
  2033. ULONG conn_status,
  2034. BOOLEAN limit_map_fn)
  2035. {
  2036. BOOLEAN match, /* TRUE => we have a record of this connection */
  2037. hit; /* TRUE => we have a hash entry hit */
  2038. ULONG id; /* hash index for the connection */
  2039. ULONG bin; /* bin index */
  2040. PBIN_STATE bp; /* ptr. to bin state */
  2041. PCONN_ENTRY ep; /* ptr. to connection entry */
  2042. PCONN_DESCR dp; /* ptr. to connection descriptor */
  2043. QUEUE * qp; /* ptr. to connection queue */
  2044. IRQLEVEL irql;
  2045. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  2046. BOOLEAN is_tcp_pkt = (protocol == TCPIP_PROTOCOL_TCP);
  2047. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  2048. if (!lp -> active)
  2049. return FALSE;
  2050. lp->pkt_count++; /* increment count of pkts handled (v1.32B) */
  2051. /* increment bin count */
  2052. bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
  2053. /* handle CVY_NEVER immediately */
  2054. if (bp->mode == CVY_NEVER)
  2055. return FALSE;
  2056. /* This function is no longer for TCP only. */
  2057. if (!NLB_SESSION_SUPPORT_ENABLED())
  2058. {
  2059. /* This should never happen with session support disabled anyway - Load_pg_lookup() will
  2060. NEVER return a UDP only rule when the is_tcp_pkt is TRUE, so this isn't necessary. */
  2061. if (bp->prot == CVY_UDP)
  2062. return TRUE;
  2063. }
  2064. /* lookup connection entry in hash table */
  2065. if (limit_map_fn) {
  2066. if (bp->affinity == CVY_AFFINITY_NONE)
  2067. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  2068. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  2069. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  2070. else
  2071. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, MAP_FN_PARAMETER);
  2072. } else {
  2073. if (bp->affinity == CVY_AFFINITY_NONE)
  2074. id = Map(client_ipaddr, ((svr_port << 16) + client_port));
  2075. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  2076. id = Map(client_ipaddr, svr_ipaddr);
  2077. else
  2078. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, svr_ipaddr);
  2079. }
  2080. /* now hash client address to bin id and conn. hash table index */
  2081. bin = id % CVY_MAXBINS;
  2082. id = id % CVY_MAX_CHASH;
  2083. /* if this connection is not in our current map and it is not a connection
  2084. down notification for a non-idle bin, just filter it out */
  2085. if ((bp->cmap & (((MAP_T) 1) << bin)) == 0 && /* v2.1 */
  2086. (!((conn_status == CVY_CONN_DOWN || conn_status == CVY_CONN_RESET) && bp->nconn[bin] > 0)))
  2087. return FALSE;
  2088. ep = &(lp->hashed_conn[id]);
  2089. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/21/99) */
  2090. qp = &(lp->connq[id]);
  2091. match = hit = FALSE;
  2092. LOCK_ENTER(&(lp->lock), &irql);
  2093. if (CVY_CONN_MATCH(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
  2094. {
  2095. hit =
  2096. match = TRUE;
  2097. }
  2098. else
  2099. {
  2100. for (dp = (PCONN_DESCR)Queue_front(qp); dp != NULL;
  2101. dp = (PCONN_DESCR)Queue_next(qp, &(dp->link)))
  2102. {
  2103. if (CVY_CONN_MATCH(&(dp->entry), svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
  2104. {
  2105. match = TRUE;
  2106. UNIV_ASSERT (dp->code == CVY_DESCCODE); /* (bbain 8/19/99) */
  2107. ep = &(dp->entry); /* v 2.06 */
  2108. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/21/99) */
  2109. /* release connection descriptor if taking down connection */
  2110. if (conn_status == CVY_CONN_DOWN || conn_status == CVY_CONN_RESET)
  2111. {
  2112. /* if connection was dirty, just block the packet since TCP/IP may have
  2113. stale connection state for a previous connection from another host
  2114. (v1.32B) */
  2115. if (ep->dirty)
  2116. {
  2117. LOCK_EXIT(&(lp->lock), irql);
  2118. #ifdef TRACE_DIRTY
  2119. DbgPrint ("blocking dirty FIN from %d to %d\n", client_port, svr_port);
  2120. #endif
  2121. return FALSE;
  2122. }
  2123. /* ###### fin count added for keynote - ramkrish. */
  2124. /* if first fin, then only increment the count and return TRUE */
  2125. if (conn_status == CVY_CONN_DOWN && ep->fin_count == 0 && is_tcp_pkt)
  2126. {
  2127. ep->fin_count++;
  2128. LOCK_EXIT(&(lp->lock), irql);
  2129. return TRUE;
  2130. }
  2131. Link_unlink(&(dp->entry.blink));
  2132. Link_unlink(&(dp->entry.rlink)); /* V2.1.5 */
  2133. Link_unlink(&(dp->link));
  2134. Queue_enq(&(lp->conn_freeq), &(dp->link));
  2135. }
  2136. break;
  2137. }
  2138. }
  2139. }
  2140. /* if we see a new connection, handle it */
  2141. if (conn_status == CVY_CONN_UP)
  2142. {
  2143. /* if we don't have a connection match, setup a new connection entry */
  2144. if (!match)
  2145. {
  2146. /* if hash entry table is not available, setup and enqueue a new entry */
  2147. if (CVY_CONN_IN_USE(ep))
  2148. {
  2149. dp = (PCONN_DESCR)Queue_deq(&(lp->conn_freeq));
  2150. if (dp == NULL)
  2151. {
  2152. /* allocate new queue descriptors if allowed */
  2153. if (lp->nqalloc < lp->max_dscr_allocs)
  2154. {
  2155. UNIV_PRINT(("Load_conn_advise: %d/%d allocating %d descriptors", lp->nqalloc, lp->max_dscr_allocs, lp->dscr_per_alloc));
  2156. lp->qalloc_list[lp->nqalloc] = /* (bbain 2/25/99) */
  2157. dp = (PCONN_DESCR)malloc((lp->dscr_per_alloc) * sizeof(CONN_DESCR));
  2158. if (dp != NULL)
  2159. {
  2160. ULONG i;
  2161. PCONN_DESCR tp;
  2162. QUEUE * fqp;
  2163. lp->nqalloc++;
  2164. /* initialize and link up descriptors; save first descriptor
  2165. for our use */
  2166. dp->code = CVY_DESCCODE; /* (bbain 8/19/99) */
  2167. Link_init(&(dp->link));
  2168. ep = &(dp->entry); /* (bbain 8/21/99) */
  2169. ep->code = CVY_ENTRCODE; /* (bbain 8/19/99) */
  2170. ep->alloc = TRUE;
  2171. ep->dirty = FALSE; /* v1.32B */
  2172. CVY_CONN_CLEAR(&(dp->entry));
  2173. Link_init(&(dp->entry.blink));
  2174. Link_init(&(dp->entry.rlink)); /* V2.1.5 */
  2175. tp = dp + 1;
  2176. fqp = &(lp->conn_freeq);
  2177. for (i=1; i<lp->dscr_per_alloc; i++)
  2178. {
  2179. tp->code = CVY_DESCCODE; /* (bbain 8/19/99) */
  2180. Link_init(&(tp->link));
  2181. tp->entry.code = CVY_ENTRCODE; /* (bbain 8/19/99) */
  2182. tp->entry.alloc = TRUE;
  2183. tp->entry.dirty = FALSE; /* v1.32B */
  2184. CVY_CONN_CLEAR(&(tp->entry));
  2185. Link_init(&(tp->entry.blink));
  2186. Link_init(&(tp->entry.rlink)); /* V2.1.5 */
  2187. Queue_enq(fqp, &(tp->link));
  2188. tp++;
  2189. }
  2190. }
  2191. else
  2192. {
  2193. if (!(lp->alloc_failed))
  2194. {
  2195. UNIV_PRINT(("Load_conn_advise: error allocating conn descrs"));
  2196. LOG_MSG(MSG_ERROR_MEMORY, MSG_NONE);
  2197. lp->alloc_failed = TRUE;
  2198. }
  2199. LOCK_EXIT(&(lp->lock), irql);
  2200. return TRUE;
  2201. }
  2202. }
  2203. else
  2204. {
  2205. /* V2.1.5 - if reached allocation limit - start taking
  2206. connection descriptors from the recover queue since
  2207. they are likely to be stale and very old */
  2208. PBIN_STATE rbp;
  2209. LINK * rlp;
  2210. #ifdef TRACE_RCVRY
  2211. DbgPrint ("Host %d: taking connection from recovery queue\n", lp->my_host_id);
  2212. #endif
  2213. rlp = (LINK *)Queue_deq(&(lp->conn_rcvryq));
  2214. UNIV_ASSERT (rlp != NULL);
  2215. /* this should not happen at all but protect anyway */
  2216. if (rlp == NULL)
  2217. {
  2218. if (!(lp->alloc_inhibited))
  2219. {
  2220. UNIV_PRINT(("Host %d: cannot allocate conn descriptors.", lp->my_host_id));
  2221. LOG_MSG(MSG_WARN_DESCRIPTORS, CVY_NAME_MAX_DSCR_ALLOCS);
  2222. lp->alloc_inhibited = TRUE;
  2223. }
  2224. LOCK_EXIT(&(lp->lock), irql);
  2225. return TRUE;
  2226. }
  2227. ep = STRUCT_PTR(rlp, CONN_ENTRY, rlink);
  2228. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/19/99) */
  2229. /* fixed for nt4/sp5 */
  2230. if (ep->alloc)
  2231. {
  2232. /* unlink allocated descriptors from the hash table
  2233. queue if necessary and set dp so that code below
  2234. will put it back in the right hash queue */
  2235. dp = STRUCT_PTR(ep, CONN_DESCR, entry);
  2236. UNIV_ASSERT (dp->code == CVY_DESCCODE); /* (bbain 8/19/99) */
  2237. Link_unlink(&(dp->link));
  2238. }
  2239. else
  2240. {
  2241. dp = NULL; /* (bbain 8/21/99) */
  2242. }
  2243. /* dirty connections are not counted */
  2244. if (! ep->dirty)
  2245. {
  2246. /* find out which port group we are on so we can clean
  2247. up its counters */
  2248. rbp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, is_tcp_pkt);
  2249. /* correct bad (negative) bin count */
  2250. if (lp->nconn <= 0)
  2251. lp->nconn = 0;
  2252. else
  2253. lp->nconn--;
  2254. if (rbp->nconn[ep->bin] <= 0)
  2255. rbp->nconn[ep->bin] = 0;
  2256. else
  2257. {
  2258. rbp->nconn[ep->bin]--;
  2259. }
  2260. if (rbp->tconn <= 0)
  2261. rbp->tconn = 0;
  2262. else
  2263. rbp->tconn--;
  2264. if (rbp->nconn[ep->bin] == 0)
  2265. {
  2266. rbp->idle_bins |= (((MAP_T) 1) << ep->bin);
  2267. }
  2268. }
  2269. Link_unlink(&(ep->blink));
  2270. CVY_CONN_CLEAR(ep);
  2271. ep->dirty = FALSE;
  2272. }
  2273. }
  2274. /* else dp is not NULL, so setup entry pointer */
  2275. else
  2276. {
  2277. ep = &(dp->entry);
  2278. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/21/99) */
  2279. }
  2280. /* enqueue descriptor in hash table unless it's already a hash table entry
  2281. (V2.1.5 recovered connection might be in hash table, so make
  2282. sure we do not end up queueing it) */
  2283. if (dp != NULL)
  2284. {
  2285. UNIV_ASSERT (dp->code == CVY_DESCCODE); /* (bbain 8/19/99) */
  2286. /* enqueue new queue descriptor and setup entry pointer */
  2287. Queue_enq(qp, &(dp->link));
  2288. }
  2289. }
  2290. /* setup new entry */
  2291. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/21/99) */
  2292. CVY_CONN_SET(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
  2293. ep->bin = (UCHAR)bin;
  2294. /* ###### fin count added for keynote - ramkrish */
  2295. /* initialize the fin count to 0 for a new connection */
  2296. ep->fin_count = 0;
  2297. /* enqueue entry into port group queue */
  2298. Queue_enq(&(bp->connq), &(ep->blink));
  2299. /* V2.1.5 add entry to the tail of connection recovery queue */
  2300. Queue_enq(&(lp->conn_rcvryq), &(ep->rlink));
  2301. /* increment # connections and mark bin not idle if necessary */
  2302. lp->nconn++; /* v2.1 */
  2303. bp->tconn++;
  2304. bp->nconn[bin]++;
  2305. if (bp->nconn[bin] == 1)
  2306. bp->idle_bins &= ~(((MAP_T) 1) << bin);
  2307. #ifdef TRACE_LOAD
  2308. DbgPrint("Host %d: advise starts conn; rule %d bin %d nconn %d\n",
  2309. lp->my_host_id, bp->index, bin, bp->nconn[bin]);
  2310. #endif
  2311. }
  2312. /* otherwise, we have a match; clean up conn entry if dirty since we have a
  2313. new connection, although TCP/IP will likely reject it if it has stale state
  2314. from another connection (v1.32B) */
  2315. else
  2316. {
  2317. if (ep->dirty)
  2318. {
  2319. #ifdef TRACE_DIRTY
  2320. DbgPrint ("converting dirty SYN from %d to %d\n", client_port, svr_port);
  2321. #endif
  2322. UNIV_ASSERT (ep->code == CVY_ENTRCODE); /* (bbain 8/21/99) */
  2323. ep->dirty = FALSE;
  2324. /* ###### initialize fin count for this new connection added for keynote - ramkrish */
  2325. /* ###### since we are reusing a dirty connection desc for a new conn., it needs to be reset */
  2326. ep->fin_count = 0;
  2327. UNIV_ASSERT (ep->bin == (USHORT)bin);
  2328. /* unlink and enqueue entry into port group queue */
  2329. Link_unlink(&(ep->blink));
  2330. Queue_enq(&(bp->connq), &(ep->blink));
  2331. /* increment # connections and mark bin not idle if necessary */
  2332. lp->nconn++; /* v2.1 */
  2333. bp->tconn++;
  2334. bp->nconn[bin]++;
  2335. if (bp->nconn[bin] == 1)
  2336. bp->idle_bins &= ~(((MAP_T) 1) << bin);
  2337. }
  2338. }
  2339. }
  2340. /* otherwise, if a known connection is going down, remove our connection entry */
  2341. /* ###### check for reset addded for keynote - ramkrish */
  2342. else if ((conn_status == CVY_CONN_DOWN || conn_status == CVY_CONN_RESET) && match)
  2343. {
  2344. /* if connection was dirty, just block the packet since TCP/IP may have stale
  2345. connection state for a previous connection from another host (v1.32B) */
  2346. if (ep->dirty)
  2347. {
  2348. LOCK_EXIT(&(lp->lock), irql);
  2349. #ifdef TRACE_DIRTY
  2350. DbgPrint ("blocking dirty FIN from %d to %d\n", client_port, svr_port);
  2351. #endif
  2352. return FALSE;
  2353. }
  2354. /* ###### fin count added for keynote - ramkrish */
  2355. /* if this is the first fin, then simply increment the fincount and return */
  2356. if (conn_status == CVY_CONN_DOWN && ep->fin_count == 0 && is_tcp_pkt)
  2357. {
  2358. ep->fin_count++;
  2359. LOCK_EXIT(&(lp->lock), irql);
  2360. return TRUE;
  2361. }
  2362. /* clear hash table entry if we had a hit; enqueued entry was already freed */
  2363. if (hit)
  2364. {
  2365. CVY_CONN_CLEAR(ep);
  2366. /* ###### clear fin count for keynote - ramkrish */
  2367. ep->fin_count = 0;
  2368. Link_unlink(&(ep->rlink)); /* V2.1.5 */
  2369. Link_unlink(&(ep->blink));
  2370. }
  2371. /* decrement # connections and mark bin idle if necessary */
  2372. #if 0
  2373. if (bp->nconn[bin] <= 0)
  2374. DbgPrint("WLBS: Load_conn_advise: count was zero %d %d\n", bin, bp->nconn[bin]);
  2375. #endif
  2376. UNIV_ASSERT(bp->nconn[bin] > 0 && bp->tconn > 0 && lp->nconn > 0);
  2377. if (lp->nconn <= 0) /* v2.1 */
  2378. lp->nconn = 0;
  2379. else
  2380. lp->nconn--;
  2381. if (bp->nconn[bin] <= 0) /* correct bad (negative) bin count */
  2382. bp->nconn[bin] = 0;
  2383. else
  2384. bp->nconn[bin]--;
  2385. if (bp->tconn <= 0)
  2386. bp->tconn = 0;
  2387. else
  2388. bp->tconn--;
  2389. if (bp->nconn[bin] == 0)
  2390. {
  2391. bp->idle_bins |= (((MAP_T) 1) << bin);
  2392. }
  2393. #ifdef TRACE_LOAD
  2394. DbgPrint("Host %d: advise removes conn; rule %d bin %d nconn %d\n",
  2395. lp->my_host_id, bp->index, bin, bp->nconn[bin]);
  2396. #endif
  2397. }
  2398. else
  2399. {
  2400. LOCK_EXIT(&(lp->lock), irql);
  2401. return FALSE;
  2402. }
  2403. LOCK_EXIT(&(lp->lock), irql);
  2404. return TRUE;
  2405. } /* end Load_conn_advise */
  2406. /*
  2407. * Function: Load_create_dscr
  2408. * Desctription:
  2409. * Parameters:
  2410. * Returns:
  2411. * Author: shouse, 5.18.01
  2412. * Notes:
  2413. */
  2414. BOOLEAN Load_create_dscr(
  2415. PLOAD_CTXT lp,
  2416. ULONG svr_ipaddr,
  2417. ULONG svr_port,
  2418. ULONG client_ipaddr,
  2419. ULONG client_port,
  2420. USHORT protocol,
  2421. BOOLEAN limit_map_fn)
  2422. {
  2423. BOOLEAN match = FALSE; /* TRUE => we have a record of this connection. */
  2424. ULONG id; /* Hash index for the connection. */
  2425. ULONG bin; /* Bin index. */
  2426. PBIN_STATE bp; /* Pointer to bin state. */
  2427. PCONN_ENTRY ep; /* Pointer to connection entry. */
  2428. PCONN_DESCR dp; /* Pointer to connection descriptor. */
  2429. QUEUE * qp; /* Pointer to connection queue. */
  2430. PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
  2431. BOOLEAN is_tcp_pkt = (protocol == TCPIP_PROTOCOL_TCP);
  2432. UNIV_ASSERT(lp->code == CVY_LOADCODE);
  2433. if (!lp->active)
  2434. return FALSE;
  2435. /* Increment count of packets handled. */
  2436. lp->pkt_count++;
  2437. /* Find the port rule for this connection. */
  2438. bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
  2439. /* Hash. */
  2440. if (limit_map_fn) {
  2441. if (bp->affinity == CVY_AFFINITY_NONE)
  2442. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  2443. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  2444. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  2445. else
  2446. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, MAP_FN_PARAMETER);
  2447. } else {
  2448. if (bp->affinity == CVY_AFFINITY_NONE)
  2449. id = Map(client_ipaddr, ((svr_port << 16) + client_port));
  2450. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  2451. id = Map(client_ipaddr, svr_ipaddr);
  2452. else
  2453. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, svr_ipaddr);
  2454. }
  2455. /* Hash client address to bin id and connection hash table index. */
  2456. bin = id % CVY_MAXBINS;
  2457. id = id % CVY_MAX_CHASH;
  2458. /* Get a pointer to the connection entry for this hash ID. */
  2459. ep = &(lp->hashed_conn[id]);
  2460. UNIV_ASSERT (ep->code == CVY_ENTRCODE);
  2461. /* Get a pointer to the conneciton queue. */
  2462. qp = &(lp->connq[id]);
  2463. if (CVY_CONN_MATCH(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
  2464. {
  2465. /* Note that we found a match for this tuple. */
  2466. match = TRUE;
  2467. } else {
  2468. for (dp = (PCONN_DESCR)Queue_front(qp); dp != NULL; dp = (PCONN_DESCR)Queue_next(qp, &(dp->link))) {
  2469. if (CVY_CONN_MATCH(&(dp->entry), svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
  2470. {
  2471. /* Note that we found a match for this tuple. */
  2472. match = TRUE;
  2473. UNIV_ASSERT (dp->code == CVY_DESCCODE);
  2474. /* Get a pointer to the connection entry. */
  2475. ep = &(dp->entry);
  2476. UNIV_ASSERT (ep->code == CVY_ENTRCODE);
  2477. break;
  2478. }
  2479. }
  2480. }
  2481. /* If we don't have a connection match, setup a new connection entry. */
  2482. if (!match) {
  2483. /* If hash entry table is not available, setup and enqueue a new entry. */
  2484. if (CVY_CONN_IN_USE(ep)) {
  2485. /* Get a pointer to a free descriptor. */
  2486. dp = (PCONN_DESCR)Queue_deq(&(lp->conn_freeq));
  2487. if (dp == NULL) {
  2488. /* Allocate new queue descriptors if allowed. */
  2489. if (lp->nqalloc < lp->max_dscr_allocs) {
  2490. UNIV_PRINT(("Load_create_dscr: %d/%d allocating %d descriptors", lp->nqalloc, lp->max_dscr_allocs, lp->dscr_per_alloc));
  2491. dp = lp->qalloc_list[lp->nqalloc] = (PCONN_DESCR)malloc((lp->dscr_per_alloc) * sizeof(CONN_DESCR));
  2492. if (dp != NULL) {
  2493. ULONG i;
  2494. PCONN_DESCR tp;
  2495. QUEUE * fqp;
  2496. /* Increment the counter for number of allocations. */
  2497. lp->nqalloc++;
  2498. /* Initialize and link up descriptors; save first descriptor for our use. */
  2499. dp->code = CVY_DESCCODE;
  2500. Link_init(&(dp->link));
  2501. /* Initialize the connection entry. */
  2502. ep = &(dp->entry);
  2503. ep->code = CVY_ENTRCODE;
  2504. ep->alloc = TRUE;
  2505. ep->dirty = FALSE;
  2506. /* Mark this entry unused. */
  2507. CVY_CONN_CLEAR(&(dp->entry));
  2508. Link_init(&(dp->entry.blink));
  2509. Link_init(&(dp->entry.rlink));
  2510. tp = dp + 1;
  2511. fqp = &(lp->conn_freeq);
  2512. /* Initialize all descriptors and tack them on the free queue. */
  2513. for (i = 1; i < lp->dscr_per_alloc; i++, tp++) {
  2514. /* Initialize the descriptor. */
  2515. tp->code = CVY_DESCCODE;
  2516. Link_init(&(tp->link));
  2517. /* Initialize the connection entry. */
  2518. tp->entry.code = CVY_ENTRCODE;
  2519. tp->entry.alloc = TRUE;
  2520. tp->entry.dirty = FALSE;
  2521. /* Mark this entry unused. */
  2522. CVY_CONN_CLEAR(&(tp->entry));
  2523. Link_init(&(tp->entry.blink));
  2524. Link_init(&(tp->entry.rlink));
  2525. /* Queue the descriptor onto the free queue. */
  2526. Queue_enq(fqp, &(tp->link));
  2527. }
  2528. } else {
  2529. /* Allocation failed, log a message and bail out. */
  2530. if (!(lp->alloc_failed)) {
  2531. UNIV_PRINT(("Load_conn_advise: error allocating conn descrs"));
  2532. LOG_MSG(MSG_ERROR_MEMORY, MSG_NONE);
  2533. lp->alloc_failed = TRUE;
  2534. }
  2535. return FALSE;
  2536. }
  2537. } else {
  2538. /* If we have reached the allocation limit, start taking connection descriptors
  2539. from the recover queue since they are likely to be stale and very old. */
  2540. PBIN_STATE rbp;
  2541. LINK * rlp;
  2542. #ifdef TRACE_RCVRY
  2543. DbgPrint ("Host %d: taking connection from recovery queue\n", lp->my_host_id);
  2544. #endif
  2545. /* Dequeue a descriptor from the recovery queue. */
  2546. rlp = (LINK *)Queue_deq(&(lp->conn_rcvryq));
  2547. UNIV_ASSERT (rlp != NULL);
  2548. /* This should not happen at all but protect anyway. */
  2549. if (rlp == NULL) {
  2550. /* Unable to get a descriptor, log a message and bail out. */
  2551. if (!(lp->alloc_inhibited)) {
  2552. UNIV_PRINT(("Host %d: cannot allocate conn descriptors.", lp->my_host_id));
  2553. LOG_MSG(MSG_WARN_DESCRIPTORS, CVY_NAME_MAX_DSCR_ALLOCS);
  2554. lp->alloc_inhibited = TRUE;
  2555. }
  2556. return FALSE;
  2557. }
  2558. /* Grab a pointer to the connection entry. */
  2559. ep = STRUCT_PTR(rlp, CONN_ENTRY, rlink);
  2560. UNIV_ASSERT (ep->code == CVY_ENTRCODE);
  2561. if (ep->alloc) {
  2562. /* Unlink allocated descriptors from the hash table queue if necessary
  2563. and set dp so that code below will put it back in the right hash queue. */
  2564. dp = STRUCT_PTR(ep, CONN_DESCR, entry);
  2565. UNIV_ASSERT (dp->code == CVY_DESCCODE);
  2566. Link_unlink(&(dp->link));
  2567. } else {
  2568. dp = NULL;
  2569. }
  2570. /* Dirty connections are not counted, so we don't need to update these counters. */
  2571. if (! ep->dirty) {
  2572. /* Find out which port group we are on so we can clean up its counters. */
  2573. rbp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, is_tcp_pkt);
  2574. if (lp->nconn <= 0)
  2575. lp->nconn = 0;
  2576. else
  2577. lp->nconn--;
  2578. if (rbp->nconn[ep->bin] <= 0)
  2579. rbp->nconn[ep->bin] = 0;
  2580. else
  2581. rbp->nconn[ep->bin]--;
  2582. if (rbp->tconn <= 0)
  2583. rbp->tconn = 0;
  2584. else
  2585. rbp->tconn--;
  2586. if (rbp->nconn[ep->bin] == 0)
  2587. rbp->idle_bins |= (((MAP_T) 1) << ep->bin);
  2588. }
  2589. Link_unlink(&(ep->blink));
  2590. /* Mark the descriptor as unused. */
  2591. CVY_CONN_CLEAR(ep);
  2592. /* Makr the descriptor as clean. */
  2593. ep->dirty = FALSE;
  2594. }
  2595. } else {
  2596. /* There was a free descriptor, so setup the connection entry pointer. */
  2597. ep = &(dp->entry);
  2598. UNIV_ASSERT (ep->code == CVY_ENTRCODE);
  2599. }
  2600. /* Enqueue descriptor in hash table unless it's already a hash table entry (a recovered
  2601. connection might be in hash table, so make sure we do not end up queueing it) */
  2602. if (dp != NULL) {
  2603. UNIV_ASSERT (dp->code == CVY_DESCCODE);
  2604. Queue_enq(qp, &(dp->link));
  2605. }
  2606. }
  2607. UNIV_ASSERT (ep->code == CVY_ENTRCODE);
  2608. /* Setup a new entry. */
  2609. CVY_CONN_SET(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
  2610. ep->bin = (UCHAR)bin;
  2611. /* Initialize the fin count to 0 for a new connection. */
  2612. ep->fin_count = 0;
  2613. /* Enqueue entry into port group queue. */
  2614. Queue_enq(&(bp->connq), &(ep->blink));
  2615. /* Add entry to the tail of connection recovery queue. */
  2616. Queue_enq(&(lp->conn_rcvryq), &(ep->rlink));
  2617. /* Increment number of connections and mark bin not idle if necessary. */
  2618. lp->nconn++;
  2619. bp->tconn++;
  2620. bp->nconn[bin]++;
  2621. if (bp->nconn[bin] == 1) bp->idle_bins &= ~(((MAP_T) 1) << bin);
  2622. #ifdef TRACE_LOAD
  2623. DbgPrint("Host %d: advise starts conn; rule %d bin %d nconn %d\n",
  2624. lp->my_host_id, bp->index, bin, bp->nconn[bin]);
  2625. #endif
  2626. } else {
  2627. /* We have a match. Clean up connection entry if it's dirty since we have a new connection,
  2628. although TCP/IP will likely reject it if it has stale state from another connection. */
  2629. if (ep->dirty) {
  2630. #ifdef TRACE_DIRTY
  2631. DbgPrint ("converting dirty SYN from %d to %d\n", client_port, svr_port);
  2632. #endif
  2633. UNIV_ASSERT (ep->code == CVY_ENTRCODE);
  2634. ep->dirty = FALSE;
  2635. ep->fin_count = 0;
  2636. UNIV_ASSERT (ep->bin == (USHORT)bin);
  2637. /* Unlink and enqueue entry into port group queue. */
  2638. Link_unlink(&(ep->blink));
  2639. Queue_enq(&(bp->connq), &(ep->blink));
  2640. /* Increment # connections and mark bin not idle if necessary. */
  2641. lp->nconn++;
  2642. bp->tconn++;
  2643. bp->nconn[bin]++;
  2644. if (bp->nconn[bin] == 1) bp->idle_bins &= ~(((MAP_T) 1) << bin);
  2645. }
  2646. }
  2647. return TRUE;
  2648. }
  2649. ULONG Load_port_change(
  2650. PLOAD_CTXT lp,
  2651. ULONG ipaddr,
  2652. ULONG port,
  2653. ULONG cmd,
  2654. ULONG value)
  2655. {
  2656. PCVY_RULE rp; /* Pointer to configured port rules. */
  2657. PBIN_STATE bp; /* Pointer to load module port rule state. */
  2658. ULONG nrules; /* Number of rules. */
  2659. ULONG i;
  2660. ULONG ret = IOCTL_CVY_NOT_FOUND;
  2661. PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
  2662. UNIV_ASSERT(lp->code == CVY_LOADCODE);
  2663. if (! lp->active)
  2664. return IOCTL_CVY_NOT_FOUND;
  2665. rp = (* (lp->params)).port_rules;
  2666. /* If we are draining whole cluster, include DEFAULT rule; Otherwise, just
  2667. include the user-defined rules (the DEFAULT rule is the last rule). */
  2668. if (cmd == IOCTL_CVY_CLUSTER_DRAIN || cmd == IOCTL_CVY_CLUSTER_PLUG)
  2669. nrules = (* (lp->params)).num_rules + 1;
  2670. else
  2671. nrules = (* (lp->params)).num_rules;
  2672. for (i=0; i<nrules; i++, rp++)
  2673. {
  2674. /* If the virtual IP address is IOCTL_ALL_VIPS (0x00000000), then we are applying this
  2675. change to all port rules for port X, regardless of VIP. If the virtual IP address is
  2676. to be applied to a particular VIP, then we apply only to port rules whose VIP matches.
  2677. Similarly, if the change is to apply to an "ALL VIP" rule, then we also apply when the
  2678. VIP matches because the caller uses CVY_ALL_VIP_NUMERIC_VALUE (0xffffffff) as the
  2679. virtual IP address, which is the same value stored in the port rule state. */
  2680. if ((ipaddr == IOCTL_ALL_VIPS || ipaddr == rp->virtual_ip_addr) &&
  2681. (port == IOCTL_ALL_PORTS || (port >= rp->start_port && port <= rp->end_port)))
  2682. {
  2683. bp = &(lp->pg_state[i]);
  2684. UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/19/99) */
  2685. /* If enabling a port rule, set the load amount to original value;
  2686. If disabling a port rule, set the load amount to zero;
  2687. Otherwise, set the load amount it to the specified amount. */
  2688. if (cmd == IOCTL_CVY_PORT_ON || cmd == IOCTL_CVY_CLUSTER_PLUG)
  2689. {
  2690. if (bp->load_amt[lp->my_host_id] == bp->orig_load_amt)
  2691. {
  2692. /* If we are the first port rule to match, then set the
  2693. return value to "Already"; Otherwise, we don't want to
  2694. overwrite some other port rule's return value of "OK"
  2695. in the case of ALL_VIPS or ALL_PORTS. */
  2696. if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
  2697. continue;
  2698. }
  2699. /* Restore the original load amount. */
  2700. bp->load_amt[lp->my_host_id] = bp->orig_load_amt;
  2701. ret = IOCTL_CVY_OK;
  2702. }
  2703. else if (cmd == IOCTL_CVY_PORT_OFF)
  2704. {
  2705. if (bp->load_amt[lp->my_host_id] == 0)
  2706. {
  2707. /* If we are the first port rule to match, then set the
  2708. return value to "Already"; Otherwise, we don't want to
  2709. overwrite some other port rule's return value of "OK"
  2710. in the case of ALL_VIPS or ALL_PORTS. */
  2711. if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
  2712. continue;
  2713. }
  2714. bp->load_amt[lp->my_host_id] = 0;
  2715. /* Immediately stop handling all traffic on the port group. */
  2716. bp->cmap = 0;
  2717. bp->cur_map[lp->my_host_id] = 0;
  2718. Load_conn_kill(lp, bp);
  2719. ret = IOCTL_CVY_OK;
  2720. }
  2721. else if (cmd == IOCTL_CVY_PORT_DRAIN || cmd == IOCTL_CVY_CLUSTER_DRAIN)
  2722. {
  2723. if (bp->load_amt[lp->my_host_id] == 0)
  2724. {
  2725. /* If we are the first port rule to match, then set the
  2726. return value to "Already"; Otherwise, we don't want to
  2727. overwrite some other port rule's return value of "OK"
  2728. in the case of ALL_VIPS or ALL_PORTS. */
  2729. if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
  2730. continue;
  2731. }
  2732. /* Set load weight to zero, but continue to handle existing connections. */
  2733. bp->load_amt[lp->my_host_id] = 0;
  2734. ret = IOCTL_CVY_OK;
  2735. }
  2736. else
  2737. {
  2738. UNIV_ASSERT(cmd == IOCTL_CVY_PORT_SET);
  2739. if (bp->load_amt[lp->my_host_id] == value)
  2740. {
  2741. /* If we are the first port rule to match, then set the
  2742. return value to "Already"; Otherwise, we don't want to
  2743. overwrite some other port rule's return value of "OK"
  2744. in the case of ALL_VIPS or ALL_PORTS. */
  2745. if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
  2746. continue;
  2747. }
  2748. /* Set the load weight for this port rule. */
  2749. bp->orig_load_amt = value;
  2750. bp->load_amt[lp->my_host_id] = value;
  2751. ret = IOCTL_CVY_OK;
  2752. }
  2753. if (port != IOCTL_ALL_PORTS && ipaddr != IOCTL_ALL_VIPS) break;
  2754. }
  2755. }
  2756. /* If the cluster isn't already converging, then initiate convergence if the load weight of a port rule has been modified. */
  2757. if (lp->send_msg.state != HST_CVG && ret == IOCTL_CVY_OK) {
  2758. WCHAR me[20];
  2759. Univ_ulong_to_str (lp->my_host_id+1, me, 10);
  2760. /* Tracking convergence - Starting convergence because our port rule configuration has changed. */
  2761. LOG_MSGS(MSG_INFO_CONVERGING_NEW_RULES, me, me);
  2762. TRACE_CONVERGENCE("Initiating convergence on host %d. Reason: Host %d has changed its port rule configuration.", lp->my_host_id+1, lp->my_host_id+1);
  2763. /* Tracking convergence. */
  2764. Load_convergence_start(lp);
  2765. }
  2766. return ret;
  2767. } /* end Load_port_change */
  2768. ULONG Load_hosts_query(
  2769. PLOAD_CTXT lp,
  2770. BOOLEAN internal,
  2771. PULONG host_map)
  2772. {
  2773. WCHAR buf1 [256];
  2774. WCHAR buf2 [256];
  2775. PWCHAR ptr1 = buf1;
  2776. PWCHAR ptr2 = buf2;
  2777. WCHAR num [20]; /* v2.1 */
  2778. WCHAR msk [33];
  2779. ULONG i, j, k;
  2780. PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
  2781. UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
  2782. buf1 [0] = 0;
  2783. buf2 [0] = 0;
  2784. msk [0] = 0;
  2785. num [0] = 0;
  2786. for (i = 0, j = 0; i < 16; i++)
  2787. {
  2788. if (lp -> host_map & (1 << i))
  2789. {
  2790. ptr1 = Univ_ulong_to_str (i + 1, ptr1, 10);
  2791. * ptr1 = L',';
  2792. ptr1 ++;
  2793. j ++;
  2794. msk [i] = L'1';
  2795. }
  2796. else
  2797. msk [i] = L'0';
  2798. }
  2799. for (i = 16, k = 0; i < 32; i++)
  2800. {
  2801. if (lp -> host_map & (1 << i))
  2802. {
  2803. ptr2 = Univ_ulong_to_str (i + 1, ptr2, 10);
  2804. * ptr2 = L',';
  2805. ptr2 ++;
  2806. k ++;
  2807. msk [i] = L'1';
  2808. }
  2809. else
  2810. msk [i] = L'0';
  2811. }
  2812. if (k)
  2813. {
  2814. ptr2 --;
  2815. // * ptr2 = L'.';
  2816. // ptr2 ++;
  2817. }
  2818. else if (j)
  2819. {
  2820. ptr1 --;
  2821. // * ptr1 = L'.';
  2822. // ptr1 ++;
  2823. }
  2824. * ptr1 = 0;
  2825. * ptr2 = 0;
  2826. * host_map = lp->host_map;
  2827. Univ_ulong_to_str ((* (lp->params)) . host_priority, num, 10); /* v2.1 */
  2828. if (lp->send_msg.state != HST_NORMAL)
  2829. {
  2830. UNIV_PRINT (("current host map is %08x and converging", lp->host_map));
  2831. if (internal) /* 1.03 */
  2832. {
  2833. LOG_MSGS3 (MSG_INFO_CONVERGING, num, buf1, buf2);
  2834. }
  2835. return IOCTL_CVY_CONVERGING;
  2836. }
  2837. /* if this host has the bins for the deafult rule, it is the default host (v2.1) */
  2838. else if (lp->pg_state[(* (lp->params)).num_rules].cmap != 0)
  2839. {
  2840. UNIV_PRINT (("current host map is %08x and converged as DEFAULT", lp->host_map));
  2841. if (internal) /* 1.03 */
  2842. {
  2843. LOG_MSGS3(MSG_INFO_MASTER, num, buf1, buf2);
  2844. }
  2845. return IOCTL_CVY_MASTER;
  2846. }
  2847. else
  2848. {
  2849. UNIV_PRINT (("current host map is %08x and converged (NON-DEFAULT)", lp->host_map));
  2850. if (internal) /* 1.03 */
  2851. {
  2852. LOG_MSGS3(MSG_INFO_SLAVE, num, buf1, buf2);
  2853. }
  2854. return IOCTL_CVY_SLAVE;
  2855. }
  2856. } /* end Load_hosts_query */
  2857. /*
  2858. * Function: Load_query_packet_filter
  2859. * Desctription:
  2860. * Parameters:
  2861. * Returns:
  2862. * Author: shouse, 5.18.01
  2863. * Notes:
  2864. */
  2865. VOID Load_query_packet_filter (
  2866. PIOCTL_QUERY_STATE_PACKET_FILTER pQuery,
  2867. PLOAD_CTXT lp,
  2868. ULONG svr_ipaddr,
  2869. ULONG svr_port,
  2870. ULONG client_ipaddr,
  2871. ULONG client_port,
  2872. USHORT protocol,
  2873. BOOLEAN limit_map_fn)
  2874. {
  2875. PBIN_STATE bp;
  2876. ULONG id;
  2877. ULONG bin;
  2878. QUEUE * qp;
  2879. /* This variable is used for port rule lookup and since the port rules only cover
  2880. UDP and TCP, we categorize as TCP and non-TCP, meaning that any protocol that's
  2881. not TCP will be treated like UDP for the sake of port rule lookup. */
  2882. BOOLEAN is_tcp_pkt = (protocol == TCPIP_PROTOCOL_TCP);
  2883. /* Further, some protocols are treated with "session" semantics, while others are
  2884. not. For TCP, this "session" is currently a single TCP connection, which is
  2885. tracked from SYN to FIN using a connection descriptor. IPSec "sessions" are
  2886. also tracked using descriptors, so even though its treated like UDP for port
  2887. rule lookup, its treated with the session semantics resembling TCP. Therefore,
  2888. by default the determination of a session packet is initially the same as the
  2889. determination of a TCP packet. */
  2890. BOOLEAN is_session_pkt = is_tcp_pkt;
  2891. /* If we have enabled IPSec session tracking, then if the protocol is IPSec, this
  2892. packet should also be treated as part of an existing session. */
  2893. if (NLB_IPSEC_SESSION_SUPPORT_ENABLED() && (protocol == TCPIP_PROTOCOL_IPSEC1)) is_session_pkt = TRUE;
  2894. UNIV_ASSERT(lp->code == CVY_LOADCODE);
  2895. /* If the load module has been "turned off", then we drop the packet. */
  2896. if (!lp->active) {
  2897. pQuery->Results.Accept = NLB_REJECT_LOAD_MODULE_INACTIVE;
  2898. return;
  2899. }
  2900. /* Find the port rule for this server IP address / port pair. */
  2901. bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
  2902. UNIV_ASSERT ((is_tcp_pkt && bp->prot != CVY_UDP) || (!is_tcp_pkt && bp->prot != CVY_TCP));
  2903. /* If the matching port rule is configured as "disabled", which means to drop any
  2904. packets that match the rule, then we drop the packet. */
  2905. if (bp->mode == CVY_NEVER) {
  2906. pQuery->Results.Accept = NLB_REJECT_PORT_RULE_DISABLED;
  2907. return;
  2908. }
  2909. /* Apply the NLB hashing algorithm on the client identification. If for reasons
  2910. such as BDA teaming, we have chosen to limit the map function, we hard code the
  2911. second parameter, rather than use some of the server identification in an
  2912. effort to make the processing of this packet agnostic to the server identity.
  2913. The hashing parameters also, of course, depend on the configured afffinity
  2914. settings for the retrieved port rule. */
  2915. if (limit_map_fn) {
  2916. if (bp->affinity == CVY_AFFINITY_NONE)
  2917. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  2918. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  2919. id = Map(client_ipaddr, MAP_FN_PARAMETER);
  2920. else
  2921. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, MAP_FN_PARAMETER);
  2922. } else {
  2923. if (bp->affinity == CVY_AFFINITY_NONE)
  2924. id = Map(client_ipaddr, ((svr_port << 16) + client_port));
  2925. else if (bp->affinity == CVY_AFFINITY_SINGLE)
  2926. id = Map(client_ipaddr, svr_ipaddr);
  2927. else
  2928. id = Map(client_ipaddr & TCPIP_CLASSC_MASK, svr_ipaddr);
  2929. }
  2930. /* Find the applicable "bucket" by a modulo operation on the number of bins, 60. */
  2931. bin = id % CVY_MAXBINS;
  2932. /* At this point, we can begin providing the requestee some actual information about
  2933. the state of the load module to better inform them as to why the decision we return
  2934. them was actually made. Here will provide some appropriate information about the
  2935. port rule we are operating on, including the "bucket" ID, the current "bucket"
  2936. ownership map and the number of connections active on this "bucket". */
  2937. pQuery->Results.HashInfo.Valid = TRUE;
  2938. pQuery->Results.HashInfo.Bin = bin;
  2939. pQuery->Results.HashInfo.CurrentMap = bp->cmap;
  2940. pQuery->Results.HashInfo.AllIdleMap = bp->all_idle_map;
  2941. pQuery->Results.HashInfo.ActiveConnections = bp->nconn[bin];
  2942. /* check bin for residency and all other hosts now idle on their bins; in this
  2943. case and if we do not have dirty connections, we must be able to handle the packet */
  2944. /* If we currently own the "bucket" to which this connection maps and either NLB provides
  2945. no session support for this protocol, or all other hosts have no exisitng connections
  2946. on this "bucket" and we have no dirty connections, then we can safely take the packet
  2947. with no regard to the connection (session) descriptors. */
  2948. if (((bp->cmap & (((MAP_T) 1) << bin)) != 0) && (!is_session_pkt || (((bp->all_idle_map & (((MAP_T) 1) << bin)) != 0) && (!(lp->cln_waiting))))) {
  2949. pQuery->Results.Accept = NLB_ACCEPT_UNCONDITIONAL_OWNERSHIP;
  2950. return;
  2951. /* Otherwise, if there are active connections on this "bucket" or if we own the
  2952. "bucket" and there are dirty connections on it, then we'll walk our descriptor
  2953. lists to determine whether or not we should take the packet or not. */
  2954. } else if (bp->nconn[bin] > 0 || (lp->cln_waiting && lp->dirty_bin[bin] && ((bp->cmap & (((MAP_T) 1) << bin)) != 0))) {
  2955. PCONN_ENTRY ep;
  2956. PCONN_DESCR dp;
  2957. /* Calculate our index into the descriptor hash table by a modulo operation on the
  2958. length of the static descriptor array, 4096. */
  2959. id = id % CVY_MAX_CHASH;
  2960. /* Grab a pointer to the descriptor in our spot in the hash table. */
  2961. ep = &(lp->hashed_conn[id]);
  2962. /* Grab a pointer to our assigned queue of descriptors - our second level hashing. */
  2963. qp = &(lp->connq[id]);
  2964. /* First look for a match in the first-level hashing array. */
  2965. if (CVY_CONN_MATCH(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol)) {
  2966. /* If we find a match in the static hash table, fill in some descriptor
  2967. information for the user, including whether or not the descriptor was
  2968. allocated or static (static is this case) and the observed FIN count. */
  2969. pQuery->Results.DescriptorInfo.Valid = TRUE;
  2970. pQuery->Results.DescriptorInfo.Alloc = ep->alloc;
  2971. pQuery->Results.DescriptorInfo.Dirty = ep->dirty;
  2972. pQuery->Results.DescriptorInfo.FinCount = ep->fin_count;
  2973. /* If the connection is dirty, we do not take the packet because TCP may
  2974. have stale information for this descriptor. */
  2975. if (ep->dirty) {
  2976. pQuery->Results.Accept = NLB_REJECT_CONNECTION_DIRTY;
  2977. return;
  2978. }
  2979. /* If the connection is not dirty, we'll take the packet, as it belongs
  2980. to an existing connection that we are servicing on this host. */
  2981. pQuery->Results.Accept = NLB_ACCEPT_FOUND_MATCHING_DESCRIPTOR;
  2982. return;
  2983. /* Otherwise, we have to walk the second-level hashing linked list of connection
  2984. (session) descriptors looking for a match. */
  2985. } else {
  2986. /* Walk the queue until we reach the end or find what we're looking for. */
  2987. for (dp = (PCONN_DESCR)Queue_front(qp); dp != NULL; dp = (PCONN_DESCR)Queue_next(qp, &(dp->link))) {
  2988. if (CVY_CONN_MATCH(&(dp->entry), svr_ipaddr, svr_port, client_ipaddr, client_port, protocol)) {
  2989. /* If we find a match in the static hash table, fill in some descriptor
  2990. information for the user, including whether or not the descriptor was
  2991. allocated or static (allocated is this case) and the observed FIN count. */
  2992. pQuery->Results.DescriptorInfo.Valid = TRUE;
  2993. pQuery->Results.DescriptorInfo.Alloc = dp->entry.alloc;
  2994. pQuery->Results.DescriptorInfo.Dirty = dp->entry.dirty;
  2995. pQuery->Results.DescriptorInfo.FinCount = dp->entry.fin_count;
  2996. /* If the connection is dirty, we do not take the packet because TCP may
  2997. have stale information for this descriptor. */
  2998. if (dp->entry.dirty) {
  2999. pQuery->Results.Accept = NLB_REJECT_CONNECTION_DIRTY;
  3000. return;
  3001. }
  3002. /* If the connection is not dirty, we'll take the packet, as it belongs
  3003. to an existing connection that we are servicing on this host. */
  3004. pQuery->Results.Accept = NLB_ACCEPT_FOUND_MATCHING_DESCRIPTOR;
  3005. return;
  3006. }
  3007. }
  3008. }
  3009. }
  3010. /* If we get all the way down here, then we aren't going to accept the packet
  3011. because we do not own the "bucket" to which the packet maps and we have no
  3012. existing connection (session) state to allow us to service the packet. */
  3013. pQuery->Results.Accept = NLB_REJECT_OWNED_ELSEWHERE;
  3014. return;
  3015. }
  3016. #if defined (SBH)
  3017. /*
  3018. * Function: Load_packet_filter
  3019. * Desctription:
  3020. * Parameters:
  3021. * Returns:
  3022. * Author: shouse, 5.18.01
  3023. * Notes:
  3024. */
  3025. BOOLEAN Load_packet_filter (
  3026. PLOAD_CTXT lp,
  3027. ULONG svr_ipaddr,
  3028. ULONG svr_port,
  3029. ULONG client_ipaddr,
  3030. ULONG client_port,
  3031. USHORT protocol,
  3032. ULONG conn_status,
  3033. BOOLEAN limit_map_fn)
  3034. {
  3035. BIN_LOOKUP();
  3036. HASH();
  3037. switch (conn_status) {
  3038. case CVY_CONN_CREATE:
  3039. CREATE_DSCR();
  3040. break;
  3041. case CVY_CONN_UP:
  3042. CREATE_DSCR();
  3043. break;
  3044. case CVY_CONN_DOWN:
  3045. case CVY_CONN_RESET:
  3046. REMOVE_DSCR();
  3047. break;
  3048. case CVY_CONN_DATA:
  3049. // protocol dependent.
  3050. CHECK_HASH();
  3051. SEARCH_QUEUE();
  3052. break;
  3053. }
  3054. }
  3055. #endif