Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

962 lines
22 KiB

  1. /*++
  2. Copyright (c) 1998 Microsoft Corporation
  3. Module Name :
  4. trie.h
  5. Abstract:
  6. Declares a trie
  7. Author:
  8. George V. Reilly (GeorgeRe) 21-Jan-1998
  9. Environment:
  10. Win32 - User Mode
  11. Project:
  12. Internet Information Server RunTime Library
  13. Revision History:
  14. --*/
  15. // A trie is a multiway search tree (aka a radix tree). See a good
  16. // algorithms text, like Knuth or Sedgewick, for a complete description.
  17. //
  18. // Briefly, given a list of strings such as
  19. // cab, car, carts, cats, dog, doge, doggy, dogs
  20. // you get a trie that looks like this:
  21. //
  22. // /-[b]
  23. // /
  24. // <c>--<a>--[r]--<t>--[s]
  25. // / \
  26. // / \-<t>--[s]
  27. // *
  28. // \ /-[e]
  29. // \ /
  30. // <d>--<o>--[g]--<g>--[y]
  31. // \
  32. // \-[s]
  33. //
  34. // where `[r]' denotes the end of a word and `<a>', the middle.
  35. //
  36. // A trie has several useful properties:
  37. // * fast
  38. // * easily handles longest substring matches
  39. // * fairly compact, especially when there are many overlapping strings
  40. //
  41. // The multiway tree is implemented as a binary tree with child and sibling
  42. // pointers.
  43. //
  44. // The CTrie template takes three parameters:
  45. // class _TOKEN: up to you
  46. // bool fIgnoreCase: case-sensitivity for searches
  47. // bool fDeleteTokens: delete _TOKEN* when Flush() called?
  48. // and it exposes three methods:
  49. // bool AddToken(ptszToken, _TOKEN*)
  50. // _TOKEN* Search(ptszSearch, pctchMatched = NULL, nMaxLen = 0)
  51. // void Flush()
  52. //
  53. // Use them like this:
  54. // CTrie<CToken, true, true> trie;
  55. // CToken* ptokHello = new CToken(...);
  56. //
  57. // IRTLVERIFY(trie.AddToken(_T("Hello"), ptokHello));
  58. //
  59. // CToken* ptok = trie.Search(_T("Goodbye"));
  60. // if (ptok != NULL) {...}
  61. //
  62. // if (fIniFileChanged)
  63. // {
  64. // trie.Flush(); // will delete all tokens
  65. // AddTokensFromIniFile(trie);
  66. // }
  67. //
  68. // Note: If you use DUMP(&trie) or ASSERT_VALID(&trie), your _TOKEN class must
  69. // have Dump() or AssertValid() methods, respectively, in its _DEBUG version.
  70. //
  71. //
  72. // TODO:
  73. // * template really ought to be parameterized on ANSI/Unicode too
  74. // * STLify it: add iterators, turn it into a container, etc
  75. // * remove Win32 dependencies (TCHAR)
  76. // * add operator= and copy ctor
  77. //
  78. //
  79. // George V. Reilly <[email protected]> Oct 1995 Initial implementation
  80. // George V. Reilly <[email protected]> Sep 1996 Add CharPresent for ANSI
  81. // George V. Reilly <[email protected]> Mar 1997 Templatized; removed MFC
  82. #ifndef __TRIE_H__
  83. #define __TRIE_H__
  84. #include <tchar.h>
  85. #include <limits.h>
  86. #include <malloc.h>
  87. #include <irtldbg.h>
  88. // Workaround for bool being a "reserved extension" in Visual C++ 4.x
  89. #if _MSC_VER<1100
  90. # ifndef bool
  91. # define bool BOOL
  92. # endif
  93. # ifndef true
  94. # define true TRUE
  95. # endif
  96. # ifndef false
  97. # define false FALSE
  98. # endif
  99. #endif
  100. // forward declaration
  101. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens> class CTrie;
  102. //+---------------------------------------------------------------------
  103. // Class: CTrieNode (tn)
  104. // one node for each letter
  105. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  106. class CTrieNode
  107. {
  108. friend class CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>;
  109. typedef CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens> _Node;
  110. public:
  111. CTrieNode();
  112. CTrieNode(
  113. _Node* pParent,
  114. const _TOKEN* ptok,
  115. const TCHAR tch,
  116. LPCTSTR ptszToken);
  117. bool
  118. SetData(
  119. const _TOKEN* ptok,
  120. LPCTSTR ptszToken);
  121. ~CTrieNode();
  122. protected:
  123. const _Node* m_pParent;
  124. _Node* m_pSibling;
  125. _Node* m_pChild;
  126. const _TOKEN* m_ptok;
  127. #ifdef _DEBUG
  128. LPTSTR m_ptszToken;
  129. #endif
  130. const TCHAR m_tch;
  131. TCHAR m_tchMaxChild; // Maximum m_tch of child nodes (1 level)
  132. // Diagnostics
  133. public:
  134. #ifdef _DEBUG
  135. void
  136. AssertValid() const;
  137. virtual void
  138. Dump() const;
  139. protected:
  140. bool
  141. CheckNodeToken() const;
  142. #endif
  143. private:
  144. // private, unimplemented copy ctor and op= to prevent
  145. // compiler synthesizing them
  146. CTrieNode(const CTrieNode&);
  147. CTrieNode& operator=(const CTrieNode&);
  148. };
  149. //+---------------------------------------------------------------------
  150. // Class: CTrie (trie)
  151. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  152. class CTrie
  153. {
  154. typedef CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens> _Node;
  155. public:
  156. CTrie();
  157. virtual
  158. ~CTrie();
  159. virtual bool
  160. AddToken(
  161. LPCTSTR ptszToken,
  162. const _TOKEN* const ptok);
  163. virtual const _TOKEN*
  164. Search(
  165. LPCTSTR ptszSearch,
  166. int* pctchMatched = NULL,
  167. const int nMaxLen = 0) const;
  168. virtual void
  169. Flush();
  170. protected:
  171. _Node m_tnRoot;
  172. TCHAR m_tchMinChild;
  173. TCHAR m_tchMaxChild;
  174. void
  175. _DeleteTrie(
  176. _Node* ptn);
  177. #ifndef _UNICODE
  178. // bit array for first letter of all tokens
  179. BYTE m_afCharPresent[(CHAR_MAX - CHAR_MIN + 1 + 7) / 8];
  180. bool
  181. _CharPresent(
  182. CHAR ch) const;
  183. void
  184. _SetCharPresent(
  185. CHAR ch,
  186. bool f);
  187. #endif // !UNICODE
  188. // Diagnostics
  189. public:
  190. #ifdef _DEBUG
  191. virtual void
  192. AssertValid() const;
  193. virtual void
  194. Dump() const;
  195. protected:
  196. int m_ctchMaxTokenLen; // length of longest token string
  197. void
  198. _AssertWalk(
  199. _Node* ptn,
  200. LPTSTR ptszName,
  201. int iLevel) const;
  202. void
  203. _DumpWalk(
  204. _Node* ptn,
  205. LPTSTR ptszName,
  206. int iLevel,
  207. int& rcNodes,
  208. int& rcTokens) const;
  209. #endif
  210. private:
  211. // private, unimplemented copy ctor and op= to prevent
  212. // compiler synthesizing them
  213. CTrie(const CTrie&);
  214. CTrie& operator=(const CTrie&);
  215. };
  216. #ifdef _UNICODE
  217. # define TCHAR_MIN L'\0'
  218. #else // !UNICODE
  219. # define TCHAR_MIN CHAR_MIN
  220. #endif // !UNICODE
  221. //-----------------------------------------------------------------------------
  222. // CTrieNode implementation
  223. // CTrieNode::CTrieNode
  224. // default ctor (needed for CTrie::m_tnRoot)
  225. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  226. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::CTrieNode()
  227. : m_pParent(NULL),
  228. m_pSibling(NULL),
  229. m_pChild(NULL),
  230. m_ptok(NULL),
  231. #ifdef _DEBUG
  232. m_ptszToken(NULL),
  233. #endif
  234. m_tch(TCHAR_MIN),
  235. m_tchMaxChild(TCHAR_MIN)
  236. {
  237. }
  238. // CTrieNode::CTrieNode
  239. // ctor
  240. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  241. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::CTrieNode(
  242. _Node* pParent,
  243. const _TOKEN* ptok,
  244. const TCHAR tch,
  245. LPCTSTR ptszToken)
  246. : m_pParent(pParent),
  247. m_pSibling(NULL),
  248. m_pChild(NULL),
  249. m_ptok(ptok),
  250. #ifdef _DEBUG
  251. m_ptszToken(NULL),
  252. #endif
  253. m_tch(tch),
  254. m_tchMaxChild(TCHAR_MIN)
  255. {
  256. IRTLASSERT(m_pParent != NULL);
  257. IRTLASSERT(m_tch > TCHAR_MIN);
  258. _Node* ptnPrev = NULL;
  259. _Node* ptn = m_pParent->m_pChild;
  260. // find where in the list of pParent's children to insert `this'
  261. while (ptn != NULL && ptn->m_tch < m_tch)
  262. {
  263. ptnPrev = ptn;
  264. ptn = ptn->m_pSibling;
  265. }
  266. IRTLASSERT(ptn == NULL || ptn->m_tch != m_tch);
  267. if (ptnPrev == NULL)
  268. {
  269. IRTLASSERT(pParent->m_pChild == ptn);
  270. pParent->m_pChild = this;
  271. }
  272. else
  273. ptnPrev->m_pSibling = this;
  274. this->m_pSibling = ptn;
  275. if (pParent->m_tchMaxChild < m_tch)
  276. pParent->m_tchMaxChild = m_tch;
  277. #ifdef _DEBUG
  278. if (ptszToken != NULL)
  279. {
  280. IRTLASSERT(m_ptok != NULL);
  281. m_ptszToken = new TCHAR [_tcslen(ptszToken) + 1];
  282. _tcscpy(m_ptszToken, ptszToken);
  283. }
  284. #endif
  285. }
  286. // CTrieNode::SetData
  287. // sets the data if it's NULL. Needed if you do
  288. // AddToken("foobar", &tokFoobar) and then AddToken("foo", &tokFoo)
  289. // to set the data for tokFoo.
  290. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  291. bool
  292. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::SetData(
  293. const _TOKEN* ptok,
  294. LPCTSTR ptszToken)
  295. {
  296. // Don't set data if ptok is NULL
  297. if (ptok == NULL)
  298. return false;
  299. // overwrite m_ptok only if it is NULL
  300. if (m_ptok == NULL)
  301. {
  302. m_ptok = ptok;
  303. #ifdef _DEBUG
  304. IRTLASSERT(m_ptszToken == NULL);
  305. IRTLASSERT(ptszToken != NULL);
  306. m_ptszToken = new TCHAR [_tcslen(ptszToken) + 1];
  307. _tcscpy(m_ptszToken, ptszToken);
  308. #endif
  309. }
  310. return true;
  311. }
  312. // CTrieNode::~CTrieNode
  313. // dtor
  314. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  315. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::~CTrieNode()
  316. {
  317. #ifdef _DEBUG
  318. delete [] m_ptszToken;
  319. #endif
  320. // Is this an auto-delete trie, i.e., do we take care of deleting
  321. // the _TOKENs?
  322. if (fDeleteTokens)
  323. {
  324. // cast away constness so that delete will work
  325. delete const_cast<_TOKEN*> (m_ptok);
  326. }
  327. IRTLASSERT(m_pChild == NULL);
  328. }
  329. //-----------------------------------------------------------------------------
  330. // CTrieNode diagnostics
  331. #ifdef _DEBUG
  332. // CTrieNode::CheckNodeToken
  333. // Do the real work of validating a CTrieNode object
  334. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  335. bool
  336. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::CheckNodeToken() const
  337. {
  338. // If there's no m_ptok, it's automatically valid
  339. if (m_ptok == NULL)
  340. return true;
  341. IRTLASSERT(m_ptszToken != NULL);
  342. const int cLen = _tcslen(m_ptszToken);
  343. const _Node* ptn = this;
  344. IRTLASSERT((m_pChild == NULL && m_tchMaxChild == TCHAR_MIN)
  345. || (m_pChild != NULL && m_tchMaxChild > TCHAR_MIN));
  346. // Walk back up towards CTrie::m_tnRoot
  347. for (int i = cLen; --i >= 0; )
  348. {
  349. IRTLASSERT(ptn != NULL);
  350. IRTLASSERT(ptn->m_tch != TCHAR_MIN);
  351. const TCHAR tch = (fIgnoreCase
  352. ? (TCHAR) _totlower(this->m_ptszToken[i])
  353. : this->m_ptszToken[i]);
  354. if (ptn->m_tch != tch)
  355. IRTLASSERT(false);
  356. IRTLASSERT(ptn->m_pParent != NULL && ptn->m_pParent->m_pChild != NULL);
  357. const _Node* ptn2;
  358. // check to see if ptn really is a child of its parent
  359. for (ptn2 = ptn->m_pParent->m_pChild;
  360. ptn2 != ptn && ptn2 != NULL;
  361. ptn2 = ptn2->m_pSibling)
  362. {}
  363. IRTLASSERT(ptn2 == ptn);
  364. // check that ptn->m_pParent->m_tchMaxChild is correct
  365. for (ptn2 = ptn->m_pParent->m_pChild;
  366. ptn2->m_pSibling != NULL;
  367. ptn2 = ptn2->m_pSibling)
  368. {
  369. IRTLASSERT(ptn2->m_tch > TCHAR_MIN
  370. && ptn2->m_tch < ptn2->m_pSibling->m_tch);
  371. }
  372. IRTLASSERT(ptn->m_pParent->m_tchMaxChild == ptn2->m_tch);
  373. ptn = ptn->m_pParent;
  374. IRTLASSERT(ptn->m_ptok != this->m_ptok);
  375. }
  376. // check to see if ptn == CTrie::m_tnRoot
  377. IRTLASSERT(ptn->m_pParent == NULL && ptn->m_pSibling == NULL
  378. && ptn->m_tch == TCHAR_MIN && ptn->m_ptok == NULL);
  379. return true;
  380. }
  381. // CTrieNode::AssertValid
  382. // Validate a CTrieNode object
  383. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  384. void
  385. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::AssertValid() const
  386. {
  387. IRTLASSERT(CheckNodeToken());
  388. }
  389. // CTrieNode::Dump
  390. // Dump a CTrieNode object
  391. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  392. void
  393. CTrieNode<_TOKEN, fIgnoreCase, fDeleteTokens>::Dump() const
  394. {
  395. // TODO: flesh out
  396. }
  397. #endif // _DEBUG
  398. //-----------------------------------------------------------------------------
  399. // CTrie implementation
  400. // CTrie::CTrie
  401. // ctor
  402. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  403. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::CTrie()
  404. {
  405. Flush();
  406. }
  407. // CTrie::~CTrie
  408. // dtor
  409. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  410. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::~CTrie()
  411. {
  412. Flush();
  413. }
  414. #ifndef _UNICODE
  415. // CTrie::_CharPresent
  416. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  417. inline bool
  418. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::_CharPresent(
  419. CHAR ch) const
  420. {
  421. IRTLASSERT(CHAR_MIN <= ch && ch <= CHAR_MAX);
  422. const UINT i = ch - CHAR_MIN; // CHAR_MIN is -128 for `signed char'
  423. return m_afCharPresent[i >> 3] & (1 << (i & 7)) ? true : false;
  424. }
  425. // CTrie::_SetCharPresent
  426. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  427. inline void
  428. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::_SetCharPresent(
  429. CHAR ch,
  430. bool f)
  431. {
  432. IRTLASSERT(CHAR_MIN <= ch && ch <= CHAR_MAX);
  433. const UINT i = ch - CHAR_MIN;
  434. if (f)
  435. m_afCharPresent[i >> 3] |= (1 << (i & 7));
  436. else
  437. m_afCharPresent[i >> 3] &= ~(1 << (i & 7));
  438. }
  439. #endif // !UNICODE
  440. // CTrie::AddToken
  441. // Add search string `ptszToken' to trie, which will return `ptok'
  442. // if searched for in Search().
  443. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  444. bool
  445. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::AddToken(
  446. LPCTSTR ptszToken,
  447. const _TOKEN* const ptok)
  448. {
  449. if (ptok == NULL || ptszToken == NULL || *ptszToken == _T('\0'))
  450. {
  451. IRTLASSERT(false);
  452. return false;
  453. }
  454. const int cLen = _tcslen(ptszToken);
  455. _Node* ptnParent = &m_tnRoot;
  456. for (int i = 0; i < cLen; ++i)
  457. {
  458. IRTLASSERT(ptnParent != NULL);
  459. _Node* ptn = ptnParent->m_pChild;
  460. const TCHAR tch = (fIgnoreCase
  461. ? (TCHAR) _totlower(ptszToken[i])
  462. : ptszToken[i]);
  463. const _TOKEN* ptok2 = (i == cLen - 1) ? ptok : NULL;
  464. LPCTSTR ptsz2 = (i == cLen - 1) ? ptszToken : NULL;
  465. while (ptn != NULL && ptn->m_tch < tch)
  466. ptn = ptn->m_pSibling;
  467. if (ptn == NULL || ptn->m_tch > tch)
  468. {
  469. ptnParent = new _Node(ptnParent, ptok2, tch, ptsz2);
  470. }
  471. else
  472. {
  473. IRTLASSERT(ptn->m_tch == tch);
  474. ptn->SetData(ptok2, ptsz2);
  475. ptnParent = ptn;
  476. }
  477. IRTLASSERT(ptnParent->CheckNodeToken());
  478. }
  479. m_tchMinChild = m_tnRoot.m_pChild->m_tch;
  480. m_tchMaxChild = m_tnRoot.m_tchMaxChild;
  481. #ifdef _DEBUG
  482. m_ctchMaxTokenLen = max(m_ctchMaxTokenLen, cLen);
  483. #endif
  484. IRTLASSERT(TCHAR_MIN < m_tchMinChild && m_tchMinChild <= m_tchMaxChild);
  485. #ifndef _UNICODE
  486. // Keep a map of the initial letter of each token, to speed up searches
  487. if (fIgnoreCase)
  488. {
  489. _SetCharPresent(tolower(ptszToken[0]), true);
  490. _SetCharPresent(toupper(ptszToken[0]), true);
  491. }
  492. else
  493. _SetCharPresent(ptszToken[0], true);
  494. #endif // !UNICODE
  495. #ifdef _DEBUG
  496. int nTemp;
  497. const _TOKEN* ptok2 = Search(ptszToken, &nTemp);
  498. IRTLASSERT(ptok2 == ptok && nTemp == cLen);
  499. #endif // _DEBUG
  500. return true;
  501. }
  502. // CTrie::Search
  503. // Search trie for `ptszSearch', returning count of characters
  504. // matched in `pctchMatched' (if non-NULL), matching at most `nMaxLen'
  505. // characters, if nMaxLen != 0, or _tcslen(ptszSearch) otherwise.
  506. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  507. const _TOKEN*
  508. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::Search(
  509. LPCTSTR ptszSearch,
  510. int* pctchMatched /* = NULL */,
  511. const int nMaxLen /* = 0 */) const
  512. {
  513. // Set count of matched characters
  514. if (pctchMatched != NULL)
  515. *pctchMatched = 0;
  516. #ifndef _UNICODE
  517. if (! _CharPresent(ptszSearch[0]))
  518. return NULL;
  519. TCHAR tch;
  520. #else // UNICODE
  521. TCHAR tch = fIgnoreCase ? (TCHAR) _totlower(ptszSearch[0]) : ptszSearch[0];
  522. if (tch < m_tchMinChild || m_tchMaxChild < tch)
  523. return NULL;
  524. #endif // UNICODE
  525. // For some uses (e.g., ptszSearch is not '\0'-terminated), nMaxLen is
  526. // specified. If it's not specified, use the length of the string.
  527. const int cLen = (nMaxLen != 0) ? nMaxLen : _tcslen(ptszSearch);
  528. IRTLASSERT(0 < cLen);
  529. bool fOvershot = true;
  530. const _Node* ptnParent = &m_tnRoot;
  531. const _Node* ptn = NULL;
  532. int i;
  533. // Find the longest approximate match. For example, if we have "foo"
  534. // and "foobar" in the trie and we're asked to match "fool", we'll work
  535. // our way down to "foob", then backtrack up to "foo".
  536. for (i = 0; i < cLen; ++i)
  537. {
  538. IRTLASSERT(ptnParent != NULL);
  539. ptn = ptnParent->m_pChild;
  540. IRTLASSERT(ptn != NULL && ptn->m_pParent == ptnParent);
  541. tch = fIgnoreCase ? (TCHAR) _totlower(ptszSearch[i]) : ptszSearch[i];
  542. IRTLASSERT(tch >= TCHAR_MIN);
  543. if (ptnParent->m_tchMaxChild < tch)
  544. {
  545. IRTLASSERT(i > 0);
  546. break;
  547. }
  548. while (ptn != NULL && ptn->m_tch < tch)
  549. ptn = ptn->m_pSibling;
  550. // failed to match?
  551. if (ptn == NULL || ptn->m_tch > tch)
  552. {
  553. IRTLASSERT(ptn == NULL || ptn->m_tch <= ptnParent->m_tchMaxChild);
  554. if (i == 0)
  555. return NULL;
  556. break;
  557. }
  558. else
  559. {
  560. IRTLASSERT(ptn->m_tch == tch);
  561. IRTLASSERT(ptn->m_pParent->m_tchMaxChild >= tch);
  562. if (ptn->m_pChild == NULL)
  563. {
  564. IRTLASSERT(ptn->m_ptok != NULL);
  565. fOvershot = false;
  566. break;
  567. }
  568. ptnParent = ptn;
  569. }
  570. }
  571. if (fOvershot)
  572. {
  573. --i; ptn = ptnParent; // back up one character
  574. }
  575. else
  576. IRTLASSERT(ptn->m_pChild == NULL);
  577. IRTLASSERT(0 <= i && i < cLen);
  578. IRTLASSERT(ptn != NULL && ptn != &m_tnRoot);
  579. // we've found an approximate match; backtrack until we find an exact match
  580. do
  581. {
  582. IRTLASSERT(ptn != NULL);
  583. IRTLASSERT(ptn->m_tch == (fIgnoreCase
  584. ? (TCHAR) _totlower(ptszSearch[i])
  585. : ptszSearch[i]));
  586. IRTLASSERT(ptn->CheckNodeToken());
  587. const _TOKEN* const ptok = ptn->m_ptok;
  588. if (ptok != NULL)
  589. {
  590. IRTLASSERT(i == (int) _tcslen(ptn->m_ptszToken) - 1);
  591. if (pctchMatched != NULL)
  592. *pctchMatched = i+1;
  593. return ptok;
  594. }
  595. ptn = ptn->m_pParent;
  596. } while (--i >= 0);
  597. return NULL;
  598. }
  599. // CTrie::Flush
  600. // flush all nodes leaving an empty trie
  601. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  602. void
  603. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::Flush()
  604. {
  605. if (m_tnRoot.m_pChild != NULL)
  606. _DeleteTrie(m_tnRoot.m_pChild);
  607. m_tnRoot.m_pChild = NULL; // or ~CTrieNode will ASSERT
  608. m_tnRoot.m_tchMaxChild = TCHAR_MIN;
  609. m_tchMinChild = m_tchMaxChild = TCHAR_MIN;
  610. #ifdef _DEBUG
  611. m_ctchMaxTokenLen = 0;
  612. #endif
  613. #ifndef _UNICODE
  614. memset(m_afCharPresent, 0, sizeof(m_afCharPresent));
  615. #endif
  616. }
  617. // CTrie::_DeleteTrie
  618. // recursively delete a subtrie
  619. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  620. void
  621. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::_DeleteTrie(
  622. _Node* ptn)
  623. {
  624. if (ptn == NULL)
  625. {
  626. IRTLASSERT(false);
  627. return;
  628. }
  629. do
  630. {
  631. if (ptn->m_pChild != NULL)
  632. {
  633. _DeleteTrie(ptn->m_pChild);
  634. ptn->m_pChild = NULL; // or ~CTrieNode will ASSERT
  635. }
  636. _Node* ptnSibling = ptn->m_pSibling;
  637. delete ptn;
  638. ptn = ptnSibling; // tail recursion
  639. } while (ptn != NULL);
  640. }
  641. //-----------------------------------------------------------------------------
  642. // CTrie diagnostics
  643. #ifdef _DEBUG
  644. // CTrie::AssertValid
  645. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  646. void
  647. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::AssertValid() const
  648. {
  649. TCHAR* ptszName = static_cast<TCHAR*>
  650. (_alloca(sizeof(TCHAR) * (m_ctchMaxTokenLen+1)));
  651. *ptszName = _T('\0');
  652. ASSERT_VALID(&m_tnRoot);
  653. IRTLASSERT(m_tnRoot.m_tchMaxChild == m_tchMaxChild);
  654. if (m_tnRoot.m_pChild != NULL)
  655. {
  656. IRTLASSERT(m_tchMinChild == m_tnRoot.m_pChild->m_tch);
  657. IRTLASSERT(m_ctchMaxTokenLen > 0);
  658. _AssertWalk(m_tnRoot.m_pChild, ptszName, 0);
  659. }
  660. else
  661. {
  662. IRTLASSERT(m_tchMinChild == TCHAR_MIN
  663. && m_tchMinChild == m_tchMaxChild);
  664. IRTLASSERT(m_ctchMaxTokenLen == 0);
  665. }
  666. }
  667. // CTrie::_AssertWalk
  668. // recursively validate a subtrie
  669. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  670. void
  671. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::_AssertWalk(
  672. _Node* ptn,
  673. LPTSTR ptszName,
  674. int iLevel) const
  675. {
  676. IRTLASSERT(iLevel < m_ctchMaxTokenLen);
  677. do
  678. {
  679. ASSERT_VALID(ptn);
  680. ptszName[iLevel] = ptn->m_tch;
  681. ptszName[iLevel+1] = _T('\0');
  682. if (ptn->m_ptok != NULL)
  683. {
  684. IRTLASSERT(ptn->m_ptszToken != NULL);
  685. if (fIgnoreCase)
  686. IRTLASSERT(_tcsicmp(ptszName, ptn->m_ptszToken) == 0);
  687. else
  688. IRTLASSERT(_tcscmp(ptszName, ptn->m_ptszToken) == 0);
  689. ASSERT_VALID(ptn->m_ptok);
  690. }
  691. if (ptn->m_pChild != NULL)
  692. _AssertWalk(ptn->m_pChild, ptszName, iLevel+1);
  693. ptn = ptn->m_pSibling; // tail recursion
  694. } while (ptn != NULL);
  695. }
  696. // CTrie::Dump
  697. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  698. void
  699. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::Dump() const
  700. {
  701. int cNodes = 0, cTokens = 0;
  702. TCHAR* ptszName = static_cast<TCHAR*>
  703. (_alloca(sizeof(TCHAR) * (m_ctchMaxTokenLen+1)));
  704. *ptszName = _T('\0');
  705. TRACE0("Dumping trie...\n");
  706. if (m_tnRoot.m_pChild != NULL)
  707. _DumpWalk(m_tnRoot.m_pChild, ptszName, 0, cNodes, cTokens);
  708. TRACE2("%d nodes, %d tokens\n", cNodes, cTokens);
  709. }
  710. // CTrie::_DumpWalk
  711. // recursively dump a subtrie
  712. template <class _TOKEN, bool fIgnoreCase, bool fDeleteTokens>
  713. void
  714. CTrie<_TOKEN, fIgnoreCase, fDeleteTokens>::_DumpWalk(
  715. _Node* ptn,
  716. LPTSTR ptszName,
  717. int iLevel,
  718. int& rcNodes,
  719. int& rcTokens) const
  720. {
  721. IRTLASSERT(iLevel < m_ctchMaxTokenLen);
  722. do
  723. {
  724. ASSERT_VALID(ptn);
  725. ++rcNodes;
  726. ptszName[iLevel] = ptn->m_tch;
  727. ptszName[iLevel+1] = _T('\0');
  728. if (ptn->m_ptok != NULL)
  729. {
  730. ++rcTokens;
  731. IRTLASSERT(ptn->m_ptszToken != NULL);
  732. TRACE2("\t%s (%s): ", ptszName, ptn->m_ptszToken);
  733. DUMP(ptn->m_ptok);
  734. TRACE0("\n");
  735. }
  736. if (ptn->m_pChild != NULL)
  737. _DumpWalk(ptn->m_pChild, ptszName, iLevel+1, rcNodes, rcTokens);
  738. ptn = ptn->m_pSibling; // tail recursion
  739. } while (ptn != NULL);
  740. }
  741. #endif // _DEBUG
  742. #endif // __TRIE_H__