Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

793 lines
28 KiB

  1. /*
  2. * rhizome.c
  3. *
  4. * author: John R. Douceur
  5. * date: 28 April 1997
  6. *
  7. * This source file provides functions that implement insertion, removal, and
  8. * search operations on the rhizome database. The code is object-oriented C,
  9. * transliterated from a C++ implementation.
  10. *
  11. * The rhizome is a database that stores patterns containing wildcards.
  12. * Each pattern defines a set of keys that it matches; if a pattern contains
  13. * N wildcards, then it matches 2^N keys. Since each pattern can match
  14. * multiple keys, it is possible for a given key to match multiple patterns
  15. * in the database. The rhizome requires that all patterns stored therein
  16. * have a strict hierarchical interrelationship. Two patterns may match no
  17. * common keys (in which case the patterns are said to be independent), or
  18. * one pattern may match all the keys matched by a second pattern as well as
  19. * additonal keys (in which case the second pattern is said to be more general
  20. * than the first, and the first more specific than the second). The database
  21. * will not accept two patterns which match some keys in common but each of
  22. * which also matches additional keys that the other does not.
  23. *
  24. * The database can be searched for patterns that match a given search key.
  25. * When the database is searched for a given key, the most specifically
  26. * matching pattern is found. If no patterns in the database match the key,
  27. * an appropriate indication is returned.
  28. *
  29. * None of the code or comments in this file needs to be understood by writers
  30. * of client code; all explanatory information for clients is found in the
  31. * associated header file, rhizome.h.
  32. *
  33. */
  34. #include "gpcpre.h"
  35. // The fields of the RhizomeNode structure are accessed through the following
  36. // macros. The first three are obvious; the subsequent three rely on an agreed
  37. // usage of the cdata array in the RhizomeNode. The first keybytes locations
  38. // of the cdata array are used to store the value field of the node; the second
  39. // keybytes locations store the mask field; and the third keybytes locations
  40. // store the imask field.
  41. //
  42. #define CHILDREN udata.branch.children
  43. #define REFERENCE udata.leaf.reference
  44. #define GODPARENT udata.leaf.godparent
  45. #define VALUE(pointer) (pointer->cdata)
  46. #define MASK(pointer) (pointer->cdata + rhizome->keybytes)
  47. #define IMASK(pointer) (pointer->cdata + 2 * rhizome->keybytes)
  48. // This macro allocates a new rhizome node structure. The size of the structure
  49. // is a function of the value of keybytes, since three bytes of information
  50. // need to be stored in the structure for each byte of pattern length. The
  51. // cdata array, which is the last field in the structure, is declared as a
  52. // having a single element, but this array will actually extend beyond the
  53. // defined end of the structure into additional space that is allocated for it
  54. // by the following macro.
  55. //
  56. #define NEW_RhizomeNode(_pa) \
  57. GpcAllocMem(_pa,\
  58. sizeof(RhizomeNode) + 3 * rhizome->keybytes - 1,\
  59. RhizomeTag);\
  60. TRACE(RHIZOME, *_pa, sizeof(RhizomeNode) + 3 * rhizome->keybytes - 1, "NEW_RhizomeNode")
  61. // This macro gets the indexed bit of the value, where the most-significant bit
  62. // is defined as bit 0.
  63. //
  64. #define BIT_OF(value, index) \
  65. (((value)[(index) >> 3] >> (7 - ((index) & 0x7))) & 0x1)
  66. // Following are prototypes for static functions that are used internally by
  67. // the implementation of the rhizome routines.
  68. static int
  69. node_insert(
  70. Rhizome *rhizome,
  71. RhizomeNode *new_leaf,
  72. RhizomeNode **ppoint,
  73. int prev_bit);
  74. static void
  75. node_remove(
  76. Rhizome *rhizome,
  77. RhizomeNode *leaf,
  78. RhizomeNode **ppoint);
  79. static RhizomeNode *
  80. replicate(
  81. Rhizome *rhizome,
  82. RhizomeNode *source,
  83. int pivot_bit);
  84. static void
  85. eliminate(
  86. Rhizome *rhizome,
  87. RhizomeNode *point);
  88. static void
  89. coalesce(
  90. Rhizome *rhizome,
  91. RhizomeNode **leaf_list,
  92. RhizomeNode *point);
  93. // Since this is not C++, the Rhizome structure is not self-constructing;
  94. // therefore, the following constructor code must be called on the Rhizome
  95. // structure after it is allocated. The argument keybits specifies the size
  96. // (in bits) of each pattern that will be stored in the database.
  97. //
  98. void
  99. constructRhizome(
  100. Rhizome *rhizome,
  101. int keybits)
  102. {
  103. rhizome->keybits = keybits;
  104. rhizome->keybytes = (keybits - 1) / 8 + 1;
  105. rhizome->root = 0;
  106. }
  107. // Since this is not C++, the Rhizome structure is not self-destructing;
  108. // therefore, the following destructor code must be called on the Rhizome
  109. // structure before it is deallocated.
  110. //
  111. // If the structure is non-empty, call coalesce() to eliminate
  112. // all branch nodes and to string leaf nodes into a list; then delete list.
  113. //
  114. void
  115. destructRhizome(
  116. Rhizome *rhizome)
  117. {
  118. RhizomeNode *leaf_list, *next;
  119. if (rhizome->root != 0)
  120. {
  121. leaf_list = 0;
  122. coalesce(rhizome, &leaf_list, rhizome->root);
  123. while (leaf_list != 0)
  124. {
  125. next = leaf_list->GODPARENT;
  126. GpcFreeMem(leaf_list, RhizomeTag);
  127. leaf_list = next;
  128. }
  129. }
  130. }
  131. // This function searches the database for the pattern that most specifically
  132. // matches the given key. The key is passed as an array of bytes. When the
  133. // most specific match is found, the PatternHandle of that matching pattern is
  134. // returned. From the PatternHandle can be gotten the reference value via the
  135. // macro GetReferenceFromPatternHandle. If no pattern in the database is found
  136. // to match the key, then a value of 0 is returned as the PatternHandle.
  137. //
  138. PatternHandle
  139. searchRhizome(
  140. Rhizome *rhizome,
  141. char *key)
  142. {
  143. int index;
  144. RhizomeNode *point;
  145. // If tree is empty, search fails.
  146. if (rhizome->root == 0)
  147. {
  148. return 0;
  149. }
  150. // Otherwise, start at rhizome->root and navigate tree until reaching a leaf.
  151. point = rhizome->root;
  152. while (point->pivot_bit < rhizome->keybits)
  153. {
  154. point = point->CHILDREN[BIT_OF(key, point->pivot_bit)];
  155. }
  156. // Check value for match, one byte at a time. If any byte fails to match,
  157. // continue checking godparent with same byte; since previous bytes matched
  158. // godchild, they are guaranteed to match godparent also.
  159. index = 0;
  160. while (index < rhizome->keybytes)
  161. {
  162. if ((((key)[index]) & MASK(point)[index]) != VALUE(point)[index])
  163. {
  164. if (point->GODPARENT != 0)
  165. {
  166. point = point->GODPARENT;
  167. }
  168. else
  169. {
  170. return 0;
  171. }
  172. }
  173. else
  174. {
  175. index++;
  176. }
  177. }
  178. return point;
  179. }
  180. // This function inserts a new pattern into the database. The pattern is
  181. // specified by a value and a mask. Each bit of the mask determines whether
  182. // the bit position is specified or is a wildcard: A 1 in a mask bit indicates
  183. // that the value of that bit is specified by the pattern; a 0 indicates that
  184. // the value of that bit is a wildcard. If a mask bit is 1, then the
  185. // corresponding bit in the value field indicates the specified value of that
  186. // bit. Value and mask fields are passed as arrays of bytes.
  187. //
  188. // The client specifies a void pointer reference value to associate with the
  189. // pattern. When the pattern is installed, the insertRhizome function returns
  190. // a pointer to a PatternHandle.
  191. //
  192. // If the new pattern conflicts with a pattern already installed in the
  193. // database, meaning that the two patterns match some keys in common but each
  194. // also matches additional keys that the other does not, then the new pattern
  195. // is not inserted, and a value of 0 is returned as the PatternHandle.
  196. //
  197. PatternHandle
  198. insertRhizome(
  199. Rhizome *rhizome,
  200. char *value,
  201. char *mask,
  202. void *reference,
  203. ulong *status)
  204. {
  205. RhizomeNode *new_leaf;
  206. int index0, insert_status;
  207. *status = GPC_STATUS_SUCCESS;
  208. // Create new leaf and copy data into it; restrict set bits of value to
  209. // those set in mask, since later code assumes this is the case. Add new
  210. // leaf to reference table.
  211. NEW_RhizomeNode(&new_leaf);
  212. if (new_leaf == 0)
  213. {
  214. // Memory could not be allocated for this new node. Therefore, we
  215. // return an indication of failure to the client.
  216. *status = GPC_STATUS_RESOURCES;
  217. return 0;
  218. }
  219. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  220. {
  221. VALUE(new_leaf)[index0] = value[index0] & mask[index0];
  222. MASK(new_leaf)[index0] = mask[index0];
  223. IMASK(new_leaf)[index0] = mask[index0];
  224. }
  225. new_leaf->REFERENCE = reference;
  226. new_leaf->pivot_bit = rhizome->keybits;
  227. new_leaf->GODPARENT = 0;
  228. // If tree is empty, leaf becomes first node; otherwise, attempt to insert
  229. // using recursive node_insert() routine. If new leaf conflicts with
  230. // existing leaf, node_insert() throws exception; then remove new leaf and
  231. // return failure code.
  232. if (rhizome->root == 0)
  233. {
  234. rhizome->root = new_leaf;
  235. }
  236. else
  237. {
  238. insert_status = node_insert(rhizome, new_leaf, &rhizome->root, -1);
  239. if (insert_status != GPC_STATUS_SUCCESS)
  240. {
  241. removeRhizome(rhizome, new_leaf);
  242. *status = GPC_STATUS_CONFLICT;
  243. return 0; // return null pointer
  244. };
  245. }
  246. return new_leaf;
  247. }
  248. // This function removes a pattern from the rhizome. The pattern is specified
  249. // by the PatternHandle that was returned by the insertRhizome function. No
  250. // checks are performed to insure that this is a valid handle.
  251. //
  252. void
  253. removeRhizome(
  254. Rhizome *rhizome,
  255. PatternHandle phandle)
  256. {
  257. // Call recursive node_remove() routine to remove all references to leaf;
  258. // then delete leaf.
  259. node_remove(rhizome, phandle, &rhizome->root);
  260. TRACE(RHIZOME, rhizome, phandle, "removeRhizome")
  261. GpcFreeMem(phandle, RhizomeTag);
  262. }
  263. // Insert new_leaf into subtree pointed to by *ppoint. Update *ppoint to point
  264. // to newly created nodes if necessary. Index of most recently examined bit
  265. // is given by prev_bit. The return value is a status code: Normally, it
  266. // returns GPC_STATUS_SUCCESS; if there is a conflict, then it returns NDIS_STATUS_CONFLICT;
  267. // if there is insufficient memory available to perform the insertion, then it
  268. // returns GPC_STATUS_RESOURCES.
  269. //
  270. static int
  271. node_insert(
  272. Rhizome *rhizome,
  273. RhizomeNode *new_leaf,
  274. RhizomeNode **ppoint,
  275. int prev_bit)
  276. {
  277. int index, index0, bit_value, insert_status;
  278. char sub, super;
  279. RhizomeNode *point, *child, *new_branch;
  280. // This routine has a recursive structure, but unnecessary recursions have
  281. // been replaced by iteration, in order to improve performance. This
  282. // recursion removal has introduced a forever loop which encloses the
  283. // entirety of the routine; looping back to the beginning of this loop is
  284. // thus the equivalent of recursing.
  285. while (1)
  286. {
  287. point = *ppoint;
  288. // Examine each bit index beginnig with that following last bit index
  289. // examined previously. Continue examining bits until pivot bit of
  290. // current node is reached (unless loop is terminated prematurely).
  291. for (index = prev_bit + 1; index < point->pivot_bit; index++)
  292. {
  293. // If some leaf in the current subtree cares about the value of the
  294. // current bit, and if the new leaf cares about the value of the
  295. // current bit, and these two leaves disagree about the value of
  296. // this bit, then a new branch node should be inserted here.
  297. if (BIT_OF(MASK(new_leaf), index) == 1 &&
  298. BIT_OF(MASK(point), index) == 1 &&
  299. BIT_OF(VALUE(new_leaf), index) != BIT_OF(VALUE(point), index))
  300. {
  301. // Create new branch node; insert into tree; and set fields.
  302. bit_value = BIT_OF(VALUE(new_leaf), index);
  303. NEW_RhizomeNode(&new_branch);
  304. if (new_branch == 0)
  305. {
  306. // Memory could not be allocated for this new node.
  307. // Therefore, we pass an indication of failure up the stack.
  308. return GPC_STATUS_RESOURCES;
  309. }
  310. *ppoint = new_branch;
  311. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  312. {
  313. VALUE(new_branch)[index0] =
  314. VALUE(point)[index0] | VALUE(new_leaf)[index0];
  315. MASK(new_branch)[index0] =
  316. MASK(point)[index0] | MASK(new_leaf)[index0];
  317. IMASK(new_branch)[index0] =
  318. IMASK(point)[index0] & IMASK(new_leaf)[index0];
  319. }
  320. // Pivot bit of new branch node is the bit that inspired the
  321. // creation of this branch.
  322. new_branch->pivot_bit = index;
  323. // The earlier subtree becomes the child whose bit disagreed
  324. // with that of the new leaf.
  325. new_branch->CHILDREN[1 - bit_value] = point;
  326. // If every leaf in the subtree cares about the value of this
  327. // bit, then we can insert the new leaf as the other child of
  328. // this branch.
  329. if (BIT_OF(IMASK(point), index) == 1)
  330. {
  331. // Insert new leaf here and return.
  332. new_branch->CHILDREN[bit_value] = new_leaf;
  333. return GPC_STATUS_SUCCESS;
  334. }
  335. // Otherwise, at least one leaf in the earlier subtree does not
  336. // care about the value of this bit. Copy all such leaves
  337. // (and necessary branches) to the other child of the new
  338. // branch node.
  339. child = replicate(rhizome, point, index);
  340. if (child == 0)
  341. {
  342. // Memory could not be allocated for the replica.
  343. // Therefore, we remove the new node from the structure,
  344. // delete the new node, and pass an indication of failure
  345. // up the stack.
  346. *ppoint = point;
  347. GpcFreeMem(new_branch, RhizomeTag);
  348. return GPC_STATUS_RESOURCES;
  349. }
  350. new_branch->CHILDREN[bit_value] = child;
  351. // Continue search on newly copied subtree.
  352. ppoint = &new_branch->CHILDREN[bit_value];
  353. point = *ppoint;
  354. }
  355. }
  356. // All bits have been examined up to the pivot bit of the current node.
  357. // If this node is a leaf, then we have found a leaf with which the new
  358. // leaf has no disagreements over bit values.
  359. if (point->pivot_bit >= rhizome->keybits)
  360. {
  361. // Loop up the chain of godparents until one of the four cases
  362. // below causes an exit from the subroutine.
  363. while (1)
  364. {
  365. // Case 1: We have reached the end of the godparent chain.
  366. if (point == 0)
  367. {
  368. // Insert new leaf at this point and return.
  369. *ppoint = new_leaf;
  370. return GPC_STATUS_SUCCESS;
  371. }
  372. // Case 2: We discover that we have already inserted this leaf
  373. // at the appropriate location. This can happen because two
  374. // leaves in separate parts of the tree may have a common god-
  375. // ancestor, and a leaf which is a further god-ancestor of that
  376. // leaf will be reached more than once. Since the first
  377. // occasion inserted the leaf, the second one can return without
  378. // performing any action.
  379. if (point == new_leaf)
  380. {
  381. return GPC_STATUS_SUCCESS;
  382. }
  383. // Compare mask bits of the new leaf to the current leaf.
  384. sub = 0;
  385. super = 0;
  386. for (index = 0; index < rhizome->keybytes; index++)
  387. {
  388. sub |= MASK(new_leaf)[index] & ~MASK(point)[index];
  389. super |= ~MASK(new_leaf)[index] & MASK(point)[index];
  390. }
  391. // Case 3: The new leaf cares about at least one bit that the
  392. // current leaf does not; and the current leaf does not care
  393. // about any bits that the new leaf does not; thus, the new leaf
  394. // should be a godchild of the current leaf.
  395. if (sub != 0 && super == 0)
  396. {
  397. // Update imask field of new leaf; insert into chain;
  398. // and return.
  399. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  400. {
  401. IMASK(new_leaf)[index0] &= IMASK(point)[index0];
  402. }
  403. new_leaf->GODPARENT = point;
  404. *ppoint = new_leaf;
  405. return GPC_STATUS_SUCCESS;
  406. }
  407. // Case 4: Either the new leaf has the same value and mask as
  408. // the current leaf, or there is a hierarchy conflict between
  409. // the two leaves. In either case, terminate the insertion
  410. // process and clean up (in insert() routine) anything done
  411. // already.
  412. if (sub != 0 || super == 0)
  413. {
  414. return GPC_STATUS_CONFLICT;
  415. }
  416. // None of the above cases occurred; thus, the new leaf should
  417. // be a god-ancestor of the current leaf. Update the imask
  418. // field of the current leaf, and continue with godparent of
  419. // current leaf.
  420. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  421. {
  422. IMASK(point)[index0] &= IMASK(new_leaf)[index0];
  423. }
  424. ppoint = &point->GODPARENT;
  425. point = *ppoint;
  426. }
  427. }
  428. // The current node is not a leaf node. Thus, we recurse on one or both
  429. // of the child nodes of the current node. First, update the fields of
  430. // the current node to reflect the insertion of the new leaf into the
  431. // subtree.
  432. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  433. {
  434. VALUE(point)[index0] |= VALUE(new_leaf)[index0];
  435. MASK(point)[index0] |= MASK(new_leaf)[index0];
  436. IMASK(point)[index0] &= IMASK(new_leaf)[index0];
  437. }
  438. // If the new leaf doesn't care about the value of the pivot bit of the
  439. // current leaf, then we must recurse on both children. We can only
  440. // replace a single recursive call with iteration, so we perform a true
  441. // recursion in this case, and we recurse on child 1.
  442. if (BIT_OF(MASK(new_leaf), point->pivot_bit) == 0)
  443. {
  444. insert_status =
  445. node_insert(rhizome, new_leaf, &point->CHILDREN[1],
  446. point->pivot_bit);
  447. if (insert_status != GPC_STATUS_SUCCESS)
  448. {
  449. return insert_status;
  450. }
  451. }
  452. // Update the values of prev_bit and ppoint to reflect the same
  453. // conditions that would hold in a recursive call. The pseudo-recursion
  454. // is performed on the bit indicated by the value of the pivot bit of
  455. // the new leaf. If the new leaf does not care about this bit, then
  456. // this value will be a 0, and we recursed on child 1 above. If the new
  457. // leaf does care about the value of this bit, then we continue down the
  458. // appropriate path.
  459. prev_bit = point->pivot_bit;
  460. ppoint = &point->CHILDREN[BIT_OF(VALUE(new_leaf), point->pivot_bit)];
  461. }
  462. }
  463. // Remove references to leaf from subtree pointed to by *ppoint. Update *ppoint
  464. // if necessary due to removal of branch nodes.
  465. //
  466. static void
  467. node_remove(
  468. Rhizome *rhizome,
  469. RhizomeNode *leaf,
  470. RhizomeNode **ppoint)
  471. {
  472. int pivot_bit, bit_value, index0;
  473. RhizomeNode *point, *child, *child0, *child1;
  474. point = *ppoint;
  475. pivot_bit = point->pivot_bit;
  476. if (pivot_bit < rhizome->keybits)
  477. {
  478. // The current node is a branch node.
  479. if (BIT_OF(MASK(leaf), pivot_bit) == 1)
  480. {
  481. // The leaf to be removed cares about this node's pivot bit;
  482. // therefore, we need only recurse on one of the current node's
  483. // children.
  484. bit_value = BIT_OF(VALUE(leaf), pivot_bit);
  485. node_remove(rhizome, leaf, &point->CHILDREN[bit_value]);
  486. child = point->CHILDREN[bit_value];
  487. if (child != 0 && BIT_OF(MASK(child), pivot_bit) == 1)
  488. {
  489. // Some leaf in the same subtree as the removed leaf cares about
  490. // the value of this node's pivot bit; therefore, this node
  491. // still has reason to exist. Update its fields to reflect the
  492. // change in one of its subtrees.
  493. child0 = point->CHILDREN[0];
  494. child1 = point->CHILDREN[1];
  495. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  496. {
  497. VALUE(point)[index0] =
  498. VALUE(child0)[index0] | VALUE(child1)[index0];
  499. MASK(point)[index0] =
  500. MASK(child0)[index0] | MASK(child1)[index0];
  501. IMASK(point)[index0] =
  502. IMASK(child0)[index0] & IMASK(child1)[index0];
  503. }
  504. }
  505. else
  506. {
  507. // No leaf in the same subtree as the removed leaf cares about
  508. // the value of this node's pivot bit; therefore, there is no
  509. // longer any reason for this node to exist. Have the other
  510. // subtree take the current node's place in the tree; call
  511. // remove() to remove the unneeded subtree; and delete the
  512. // current node.
  513. *ppoint = point->CHILDREN[1 - bit_value];
  514. if (child != 0)
  515. {
  516. eliminate(rhizome, child);
  517. }
  518. GpcFreeMem(point, RhizomeTag);
  519. }
  520. }
  521. else
  522. {
  523. // The leaf to be removed does not care about this node's pivot bit;
  524. // therefore, we must recurse on both of the current node's
  525. // children. This node must still be necessary, since we have not
  526. // removed any leaf which cares about this node's value. So we
  527. // update its fields to reflect the change in its two subtrees.
  528. node_remove(rhizome, leaf, &point->CHILDREN[0]);
  529. node_remove(rhizome, leaf, &point->CHILDREN[1]);
  530. child0 = point->CHILDREN[0];
  531. child1 = point->CHILDREN[1];
  532. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  533. {
  534. VALUE(point)[index0] =
  535. VALUE(child0)[index0] | VALUE(child1)[index0];
  536. MASK(point)[index0] =
  537. MASK(child0)[index0] | MASK(child1)[index0];
  538. IMASK(point)[index0] =
  539. IMASK(child0)[index0] & IMASK(child1)[index0];
  540. }
  541. }
  542. }
  543. else
  544. {
  545. // The current node is a leaf node.
  546. if (point == leaf)
  547. {
  548. // The current node is the leaf to be removed; therefore, remove it
  549. // from chain of godparents.
  550. *ppoint = leaf->GODPARENT;
  551. }
  552. else
  553. {
  554. // The current node is not leaf to be removed. Therefore, if this
  555. // node has a godparent, then recurse on that godparent. If this
  556. // node does not have a godparent, then the to-be-removed leaf
  557. // either already was removed by a different path, or it was never
  558. // inserted to begin with. The latter might be the case if remove()
  559. // was called from the catch clause of insert().
  560. if (point->GODPARENT != 0)
  561. {
  562. node_remove(rhizome, leaf, &point->GODPARENT);
  563. }
  564. // We are now popping back up the recursion stack. If this node
  565. // does not have a godparent, or if it did but it does not anymore,
  566. // then initialize imask to mask; otherwise, copy the godparent's
  567. // value of imask. Since the godparent chain follows a strict
  568. // hierarchy, and since imask is formed by successive conjunction,
  569. // all leaves in any given godparent chain will have the same value
  570. // of imask, namely the mask value of the highest god-ancestor.
  571. if (point->GODPARENT == 0)
  572. {
  573. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  574. {
  575. IMASK(point)[index0] = MASK(point)[index0];
  576. }
  577. }
  578. else
  579. {
  580. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  581. {
  582. IMASK(point)[index0] = IMASK(point->GODPARENT)[index0];
  583. }
  584. }
  585. }
  586. }
  587. }
  588. // Replicate all nodes in a subtree which do not care about the value of
  589. // pivot_bit.
  590. //
  591. static RhizomeNode *
  592. replicate(
  593. Rhizome *rhizome,
  594. RhizomeNode *source,
  595. int pivot_bit)
  596. {
  597. int index0, current_bit;
  598. RhizomeNode *new_node, *child0, *child1;
  599. // If this routine were fully recursive, the following while statement
  600. // would be an if statement. However, recursion has been replaced by
  601. // iteration where possible, so the following code loops until bottoming
  602. // out when a leaf node is reached.
  603. while (source->pivot_bit < rhizome->keybits)
  604. {
  605. if (BIT_OF(IMASK(source->CHILDREN[0]), pivot_bit) == 0)
  606. {
  607. if (BIT_OF(IMASK(source->CHILDREN[1]), pivot_bit) == 0)
  608. {
  609. // Both subtrees contain leaves which do not care about the
  610. // pivot bit; therefore, we may need to make a copy of the
  611. // current node. It is not guaranteed that we need to make
  612. // a copy, since it may be a common leaf in both subtrees
  613. // that does not care about the pivot bit. This may happen
  614. // for a leaf which is a godparent of two leaves, one in each
  615. // subtree. Recurse on each child and examine results.
  616. child0 = replicate(rhizome, source->CHILDREN[0], pivot_bit);
  617. if (child0 == 0)
  618. {
  619. // Memory could not be allocated for the child replica.
  620. // Therefore, we abort the replication process and pass an
  621. // indication of failure op the stack.
  622. return 0;
  623. }
  624. child1 = replicate(rhizome, source->CHILDREN[1], pivot_bit);
  625. if (child1 == 0)
  626. {
  627. // Memory could not be allocated for the child replica.
  628. // Therefore, we abort the replication process, eliminate
  629. // the other child replica, and pass an indication of
  630. // failure op the stack.
  631. eliminate(rhizome, child0);
  632. return 0; // return null pointer
  633. }
  634. current_bit = source->pivot_bit;
  635. if (BIT_OF(MASK(child0), current_bit) == 1)
  636. {
  637. if (BIT_OF(MASK(child1), current_bit) == 1)
  638. {
  639. // Both replicated child subtrees contain leaves which
  640. // care about the current node's bit. Since any node
  641. // which is a godparent of nodes in both subtrees could
  642. // not possibly care about the current node's bit, we
  643. // know that we need to make a copy of the current node.
  644. NEW_RhizomeNode(&new_node);
  645. if (new_node == 0)
  646. {
  647. // Memory could not be allocated for this new node.
  648. // Therefore, we have to eliminate both children
  649. // and pass an indication of failure up the stack.
  650. eliminate(rhizome, child0);
  651. eliminate(rhizome, child1);
  652. return 0; // return null pointer
  653. }
  654. for (index0 = 0; index0 < rhizome->keybytes; index0++)
  655. {
  656. VALUE(new_node)[index0] =
  657. VALUE(child0)[index0] | VALUE(child1)[index0];
  658. MASK(new_node)[index0] =
  659. MASK(child0)[index0] | MASK(child1)[index0];
  660. IMASK(new_node)[index0] =
  661. IMASK(child0)[index0] & IMASK(child1)[index0];
  662. }
  663. new_node->pivot_bit = current_bit;
  664. new_node->CHILDREN[0] = child0;
  665. new_node->CHILDREN[1] = child1;
  666. return new_node;
  667. }
  668. // Child 0's subtree contains a leaf that cares about the
  669. // current bit; however, child 1's subtree does not. Thus,
  670. // all leaves which are in child 1's subtree are also in
  671. // child 0's subtree, so we only need to keep the latter.
  672. // We therefore eliminate child 1's subtree, and we return
  673. // child 0 as the new subtree at this location, since we
  674. // do not need to create a new branch node here.
  675. eliminate(rhizome, child1);
  676. return child0;
  677. }
  678. // Child 0's subtree does not contain a leaf that cares about
  679. // the current node's bit. Thus, all leaves which are in child
  680. // 0's subtree are also in child 1's subtree, so we only need to
  681. // keep the latter. We therefore eliminate child 0's subtree,
  682. // and we return child 1 as the new subtree at this location,
  683. // since we do not need to create a new branch node here.
  684. eliminate(rhizome, child0);
  685. return child1;
  686. }
  687. // Child 0's subtree contains a leaf which does not care about the
  688. // pivot bit; however, child 1's subtree does not. Therefore, we
  689. // recurse on child 0. Rather than truly recursing, we update the
  690. // value of source and iterate once through the while loop.
  691. source = source->CHILDREN[0];
  692. }
  693. else
  694. {
  695. // Child 0's subtree does not contain a leaf which does not care
  696. // about the pivot bit. Child 1's subtree must contain such a leaf,
  697. // since the current node's subtree contains such a leaf. Thus, we
  698. // recurse on child 1. Rather than truly recursing, we update the
  699. // value of source and iterate once through the while loop.
  700. source = source->CHILDREN[1];
  701. }
  702. }
  703. // A leaf node has been reached. We now iterate through the godparents of
  704. // the leaf until we find one which does not care about the pivot bit.
  705. // Once we find it, we know that all godparents of that leaf also do not
  706. // care about the pivot bit, since the godparents are arranged in a strict
  707. // hierarchy. We thus return the first leaf found which does not care about
  708. // the value of the pivot bit.
  709. while (BIT_OF(MASK(source), pivot_bit) == 1)
  710. {
  711. source = source->GODPARENT;
  712. }
  713. return source;
  714. }
  715. // Eliminate an entire subtree.
  716. //
  717. static void
  718. eliminate(
  719. Rhizome *rhizome,
  720. RhizomeNode *point)
  721. {
  722. RhizomeNode *child;
  723. // Partial recursion removal. The while loop takes the place of one of the
  724. // recursive calls to eliminate(). We eliminate each node and recursively
  725. // eleminate each subtree under the node. We do not eliminate leaves, since
  726. // there is only one copy of each leaf stored in the entire structure.
  727. while (point->pivot_bit < rhizome->keybits)
  728. {
  729. eliminate(rhizome, point->CHILDREN[0]);
  730. child = point->CHILDREN[1];
  731. GpcFreeMem(point, RhizomeTag);
  732. point = child;
  733. }
  734. }
  735. // Coalesce leaves of subtree into a linked list and eliminate subtree. This
  736. // routine is called by the destructor so that it can deallocate the leaf nodes
  737. // after the branch nodes are eliminated.
  738. //
  739. static void
  740. coalesce(
  741. Rhizome *rhizome,
  742. RhizomeNode **leaf_list,
  743. RhizomeNode *point)
  744. {
  745. RhizomeNode *child, *godparent;
  746. // Partial recursion removal. This while loop takes the place of one of
  747. // the recursive calls to coalesce(). This performs an inorder traversal.
  748. // We delete each branch node after we have visited it, just as in the
  749. // eliminate() routine.
  750. while (point->pivot_bit < rhizome->keybits && point->pivot_bit >= 0)
  751. {
  752. coalesce(rhizome, leaf_list, point->CHILDREN[0]);
  753. child = point->CHILDREN[1];
  754. GpcFreeMem(point, RhizomeTag);
  755. point = child;
  756. }
  757. // Once we have found a leaf, we search through the chain of godparents,
  758. // adding to the list each leaf node that is not already in the list.
  759. // A pivot_bit of -1 indicates that the leaf is already in the list.
  760. // If a leaf is in the list, then so are all of its godparents.
  761. while (point != 0 && point->pivot_bit >= 0)
  762. {
  763. godparent = point->GODPARENT;
  764. point->pivot_bit = -1;
  765. point->GODPARENT = *leaf_list;
  766. *leaf_list = point;
  767. point = godparent;
  768. }
  769. }