Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

906 lines
28 KiB

  1. #if !defined(LANGUAGE_IDENTIFICATION)
  2. # include "precomp.h"
  3. #endif
  4. #include "trie.h"
  5. #ifdef LANGUAGE_IDENTIFICATION
  6. # include "mymalloc.h"
  7. #endif
  8. #ifndef WINCE
  9. # include "assert.h"
  10. #else
  11. # define assert(x)
  12. #endif
  13. #include "thwbplat.h"
  14. /******************************Public*Routine******************************\
  15. * TrieInit
  16. *
  17. * Given a pointer to a resource or mapped file of a mapped file this
  18. * function allocates and initializes the trie structure.
  19. *
  20. * Returns NULL for failure, trie control structure pointer for success.
  21. *
  22. * History:
  23. * 16-Jun-1997 -by- Patrick Haluptzok patrickh
  24. * Wrote it.
  25. \**************************************************************************/
  26. TRIECTRL * WINAPI TrieInit(LPBYTE lpByte)
  27. {
  28. LPWORD lpwTables;
  29. TRIECTRL *lpTrieCtrl;
  30. LPTRIESTATS lpTrieStats;
  31. lpTrieStats = (LPTRIESTATS) lpByte;
  32. //MessageBoxW(0,L"Step#1",L"Trie.C",MB_OK);
  33. if (lpTrieStats == NULL)
  34. return(NULL);
  35. // Check the version number. This code currently only supports version 1 tries
  36. //MessageBoxW(0,L"Step#2",L"Trie.C",MB_OK);
  37. if (lpTrieStats->version > 1)
  38. return NULL;
  39. //
  40. // Allocate space for the control structure and the table of SR offsets
  41. //
  42. //MessageBoxW(0,L"Step#3",L"Trie.C",MB_OK);
  43. if (!fNLGNewMemory(&lpTrieCtrl, sizeof(TRIECTRL)))
  44. return NULL;
  45. //
  46. // Allocate space for the complete header, copy the fixed part and read in the rest
  47. //
  48. //MessageBoxW(0,L"Step#4",L"Trie.C",MB_OK);
  49. lpByte += lpTrieStats->cbHeader;
  50. lpTrieCtrl->lpTrieStats = lpTrieStats;
  51. //
  52. // Set up the table pointers (all these tables are inside the TRIECTRL allocation)
  53. //
  54. lpwTables = (LPWORD)(lpTrieStats+1);
  55. lpTrieCtrl->lpwCharFlagsCodes = lpwTables;
  56. lpwTables += lpTrieStats->cCharFlagsCodesMax;
  57. if ((DWORD_PTR) lpwTables & 0x02) // Deal with possible data mis-alignment
  58. lpwTables++;
  59. lpTrieCtrl->lpwTagsCodes = lpwTables;
  60. lpwTables += lpTrieStats->cTagsCodesMax;
  61. if ((DWORD_PTR) lpwTables & 0x02) // Deal with possible data mis-alignment
  62. lpwTables++;
  63. lpTrieCtrl->lpwMRPointersCodes = lpwTables;
  64. lpwTables += lpTrieStats->cMRPointersCodesMax;
  65. if ((DWORD_PTR) lpwTables & 0x02) // Deal with possible data mis-alignment
  66. lpwTables++;
  67. lpTrieCtrl->lpwSROffsetsCodes = lpwTables;
  68. lpwTables += lpTrieStats->cSROffsetsCodesMax;
  69. if ((DWORD_PTR) lpwTables & 0x02) // Deal with possible data mis-alignment
  70. lpwTables++;
  71. lpTrieCtrl->lpCharFlags = (LPCHARFLAGS)lpwTables;
  72. lpwTables = (LPWORD)(lpTrieCtrl->lpCharFlags + lpTrieStats->cUniqueCharFlags);
  73. lpTrieCtrl->lpwTags = (DWORD *)lpwTables;
  74. lpwTables += (2 * lpTrieStats->cUniqueTags);
  75. lpTrieCtrl->lpwMRPointers = (DWORD *) lpwTables;
  76. lpwTables += (2 * lpTrieStats->cUniqueMRPointers);
  77. lpTrieCtrl->lpwSROffsets = (DWORD *) lpwTables;
  78. lpwTables += (2 * lpTrieStats->cUniqueSROffsets);
  79. //
  80. // These tables should exactly fill the allocation
  81. //
  82. assert((LPBYTE)lpwTables == (LPBYTE)lpTrieStats + lpTrieStats->cbHeader);
  83. //
  84. // Init trie pointers
  85. //
  86. lpTrieCtrl->lpbTrie = (LPBYTE)lpByte;
  87. return (void *)lpTrieCtrl;
  88. }
  89. /******************************Public*Routine******************************\
  90. * TrieFree
  91. *
  92. * Free the resources allocated for the control structure.
  93. *
  94. * History:
  95. * 16-Jun-1997 -by- Patrick Haluptzok patrickh
  96. * Wrote it.
  97. \**************************************************************************/
  98. void WINAPI TrieFree(LPTRIECTRL lpTrieCtrl)
  99. {
  100. //
  101. // Finally free the control structure and all the tables. STILL MUST FREE THIS FOR ROM
  102. //
  103. NLGFreeMemory(lpTrieCtrl);
  104. }
  105. /* Deompress a single symbol using base-256 huffman from a compressed data structure. piSymbol
  106. points to a space to hold the decompressed value, which is an index to a frequency-ordered
  107. table of symbols (0 is most frequent). pcCodes is a table of code lengths returned from
  108. HuffmanComputeTable. pbData is a pointer to memory that contains the encoded data. The
  109. return value is the number of bytes decoded. */
  110. int DecompressSymbol(WORD *piSymbol, WORD *pcCodes, unsigned char *pbData)
  111. {
  112. int cBytes = 0;
  113. WORD wCode = 0, wiSymbol = 0;
  114. /* At each stage in this loop, we're trying to see if we've got a length-n code.
  115. dwCode is which length-n code it would have to be. If there aren't that many length-n codes,
  116. we have to try n+1. To do that, we subtract the number of length-n codes and shift in
  117. the next byte. dwiSymbol is the symbol number of the first length-n code. */
  118. while (1)
  119. {
  120. wCode += *pbData++;
  121. ++cBytes;
  122. if (wCode < *pcCodes)
  123. {
  124. break;
  125. }
  126. wiSymbol += *pcCodes;
  127. wCode -= *pcCodes++;
  128. wCode <<= 8;
  129. }
  130. /* Now that dwCode is a valid number of a length-cBytes code, we can just add it to
  131. dwiSymbol, because we've already added the counts of the shorter codes to it. */
  132. wiSymbol += wCode;
  133. *piSymbol = wiSymbol;
  134. return cBytes;
  135. }
  136. DWORD Get3ByteAddress(BYTE *pb)
  137. {
  138. return ((((pb[0] << 8) | pb[1]) << 8) | pb[2]) & 0x00ffffff;
  139. }
  140. void WINAPI TrieDecompressNode(LPTRIECTRL lpTrieCtrl, LPTRIESCAN lpTrieScan)
  141. {
  142. TRIESTATS *lpTrieStats;
  143. DWORD wOffset;
  144. DWORD wOffset2;
  145. WORD wCode;
  146. DWORD dwCode;
  147. BYTE wMask;
  148. BYTE bMask;
  149. int iTag;
  150. lpTrieStats = lpTrieCtrl->lpTrieStats;
  151. /* If this is an initial call, use the first byte in the first SR segment */
  152. if (lpTrieScan->wFlags == 0)
  153. {
  154. lpTrieScan->lpbSRDown = 0;
  155. lpTrieScan->lpbNode = lpTrieCtrl->lpbTrie;
  156. }
  157. /* Decompress the char/flags */
  158. lpTrieScan->lpbNode += DecompressSymbol(&wCode, lpTrieCtrl->lpwCharFlagsCodes, lpTrieScan->lpbNode);
  159. lpTrieScan->wch = lpTrieCtrl->lpCharFlags[wCode].wch;
  160. lpTrieScan->wFlags = lpTrieCtrl->lpCharFlags[wCode].wFlags;
  161. // Decompress skip enumeration
  162. if (lpTrieScan->wFlags & TRIE_NODE_SKIP_COUNT)
  163. {
  164. // Values greater than 127 are really 15 or 21 bit values.
  165. dwCode = (DWORD) *lpTrieScan->lpbNode++;
  166. if (dwCode >= 0x00c0)
  167. {
  168. dwCode = ((dwCode & 0x003f) << 15);
  169. dwCode |= ((((DWORD) *lpTrieScan->lpbNode++) & 0x007f) << 8);
  170. dwCode |= (((DWORD) *lpTrieScan->lpbNode++) & 0x00ff);
  171. }
  172. else if (dwCode >= 0x0080)
  173. dwCode = ((dwCode & 0x007f) << 8) | (((DWORD) *lpTrieScan->lpbNode++) & 0x00ff);
  174. lpTrieScan->cSkipWords = dwCode;
  175. }
  176. /* Code to decompress enumeration goes here */
  177. if (lpTrieScan->wFlags & TRIE_NODE_COUNT)
  178. {
  179. // Values greater than 127 are really 15 or 21 bit values.
  180. dwCode = (DWORD) *lpTrieScan->lpbNode++;
  181. if (dwCode >= 0x00c0)
  182. {
  183. dwCode = ((dwCode & 0x003f) << 15);
  184. dwCode |= ((((DWORD) *lpTrieScan->lpbNode++) & 0x007f) << 8);
  185. dwCode |= (((DWORD) *lpTrieScan->lpbNode++) & 0x00ff);
  186. }
  187. else if (dwCode >= 0x0080)
  188. dwCode = ((dwCode & 0x007f) << 8) | (((DWORD) *lpTrieScan->lpbNode++) & 0x00ff);
  189. lpTrieScan->cWords = dwCode;
  190. // Decompress the tagged enumeration counts
  191. wMask = 1;
  192. for (iTag = 0; iTag < MAXTAGS; iTag++)
  193. {
  194. if (lpTrieCtrl->lpTrieStats->wEnumMask & wMask)
  195. {
  196. // Values greater than 127 are really 15 or 21 bit values.
  197. dwCode = (DWORD) *lpTrieScan->lpbNode++;
  198. if (dwCode >= 0x00c0)
  199. {
  200. dwCode = ((dwCode & 0x003f) << 15);
  201. dwCode |= ((((DWORD) *lpTrieScan->lpbNode++) & 0x007f) << 8);
  202. dwCode |= (((DWORD) *lpTrieScan->lpbNode++) & 0x00ff);
  203. }
  204. else if (dwCode >= 0x0080)
  205. dwCode = ((dwCode & 0x007f) << 8) | (((DWORD) *lpTrieScan->lpbNode++) & 0x00ff);
  206. lpTrieScan->aTags[iTag].cTag = dwCode;
  207. }
  208. else
  209. lpTrieScan->aTags[iTag].cTag = 0;
  210. wMask <<= 1;
  211. }
  212. }
  213. else
  214. lpTrieScan->cWords = 0;
  215. // Any tagged data for this node follows the counts
  216. lpTrieScan->wMask = 0;
  217. if (lpTrieScan->wFlags & TRIE_NODE_TAGGED)
  218. {
  219. // If there is only one tagged field, the mask byte won't be stored
  220. if (lpTrieCtrl->lpTrieStats->cTagFields == 1)
  221. bMask = lpTrieCtrl->lpTrieStats->wDataMask;
  222. else
  223. bMask = *lpTrieScan->lpbNode++;
  224. // Now that we know which elements are stored here, pull them in their proper place
  225. wMask = 1;
  226. for (iTag = 0; bMask && (iTag < MAXTAGS); iTag++)
  227. {
  228. if (lpTrieCtrl->lpTrieStats->wDataMask & bMask & wMask)
  229. {
  230. lpTrieScan->lpbNode += DecompressSymbol(&wCode, lpTrieCtrl->lpwTagsCodes, lpTrieScan->lpbNode);
  231. lpTrieScan->aTags[iTag].dwData = lpTrieCtrl->lpwTags[wCode];
  232. lpTrieScan->wMask |= wMask;
  233. }
  234. bMask &= ~wMask;
  235. wMask <<= 1;
  236. }
  237. }
  238. // There are two flavors of right pointers: Multiref and Skip.
  239. if (lpTrieScan->wFlags & TRIE_NODE_RIGHT)
  240. {
  241. if (lpTrieScan->wFlags & TRIE_NODE_SKIP)
  242. {
  243. lpTrieScan->lpbNode += DecompressSymbol(&wCode,lpTrieCtrl->lpwSROffsetsCodes,lpTrieScan->lpbNode);
  244. wOffset2 = lpTrieCtrl->lpwSROffsets[wCode]; // Only add this after entire node is decompressed
  245. }
  246. else
  247. {
  248. /* Multiref: The down pointer is encoded directly */
  249. lpTrieScan->lpbNode += DecompressSymbol(&wCode, lpTrieCtrl->lpwMRPointersCodes, lpTrieScan->lpbNode);
  250. lpTrieScan->lpbRight = lpTrieCtrl->lpbTrie + lpTrieCtrl->lpwMRPointers[wCode];
  251. }
  252. }
  253. else
  254. lpTrieScan->lpbRight = NULL;
  255. // There are 4 kinds of down pointer: Absolute, Inline, Multiref, and Singleref Offset.
  256. // Each requires different decompression
  257. if (lpTrieScan->wFlags & TRIE_DOWN_ABS)
  258. {
  259. // Immediate. The next 3 bytes are the absolute offset from the base of the trie.
  260. lpTrieScan->lpbDown = lpTrieCtrl->lpbTrie + Get3ByteAddress(lpTrieScan->lpbNode);
  261. lpTrieScan->lpbNode += 3;
  262. }
  263. else if (lpTrieScan->wFlags & TRIE_DOWN_INLINE)
  264. {
  265. /* Inline: The down pointer points to the next sequential byte (so it isn't stored) */
  266. assert(lpTrieScan->wFlags&TRIE_NODE_END);
  267. lpTrieScan->lpbSRDown = lpTrieScan->lpbDown = lpTrieScan->lpbNode;
  268. }
  269. else if (lpTrieScan->wFlags & TRIE_DOWN_MULTI)
  270. {
  271. /* Multiref: The down pointer is encoded directly */
  272. lpTrieScan->lpbNode += DecompressSymbol(&wCode,lpTrieCtrl->lpwMRPointersCodes,
  273. lpTrieScan->lpbNode);
  274. lpTrieScan->lpbDown = lpTrieCtrl->lpbTrie + lpTrieCtrl->lpwMRPointers[wCode];
  275. }
  276. else if (lpTrieScan->wFlags & TRIE_NODE_DOWN)
  277. {
  278. /* SR Offset. The down pointer is encoded as an offset from the LAST downpointer
  279. into this singleref segment. So we have to keep the old one around so we can add to it */
  280. lpTrieScan->lpbNode += DecompressSymbol(&wCode,lpTrieCtrl->lpwSROffsetsCodes,
  281. lpTrieScan->lpbNode);
  282. if (lpTrieScan->lpbSRDown == 0)
  283. {
  284. lpTrieScan->lpbSRDown = lpTrieScan->lpbNode; // We offset from the end of the first node when going into a new state.
  285. }
  286. wOffset = lpTrieCtrl->lpwSROffsets[wCode];
  287. lpTrieScan->lpbSRDown += wOffset;
  288. lpTrieScan->lpbDown = lpTrieScan->lpbSRDown;
  289. }
  290. else
  291. lpTrieScan->lpbDown = NULL;
  292. // We couldn't deal with this until now, since skip pointers are always delta encoded from the end of node
  293. if ((lpTrieScan->wFlags & (TRIE_NODE_RIGHT | TRIE_NODE_SKIP)) == (TRIE_NODE_RIGHT | TRIE_NODE_SKIP))
  294. lpTrieScan->lpbRight = lpTrieScan->lpbNode + wOffset2;
  295. } // TrieDecompressNode
  296. /* Given a compressed trie and a pointer to a decompresed node from it, find and decompress
  297. the next node in the same state. lpTrieScan is a user-allocated structure that holds the
  298. decompressed node and into which the new node is copied.
  299. This is equivalent to traversing a right pointer or finding the next alternative
  300. letter at the same position. If there is no next node (i.e.this is the end of the state)
  301. then TrieGetNextNode returns FALSE. To scan from the beginning of the trie, set the lpTrieScan
  302. structure to zero */
  303. BOOL WINAPI TrieGetNextNode(LPTRIECTRL lpTrieCtrl, LPTRIESCAN lpTrieScan)
  304. {
  305. // Are we at EOS?
  306. if (lpTrieScan->wFlags & TRIE_NODE_END)
  307. {
  308. // Is this is a hard EOS?
  309. if (!(lpTrieScan->wFlags & TRIE_NODE_SKIP))
  310. {
  311. // If we can follow a right pointer, do so, else fail
  312. if (lpTrieScan->wFlags & TRIE_NODE_RIGHT)
  313. lpTrieScan->lpbNode = lpTrieScan->lpbRight;
  314. else
  315. return FALSE;
  316. }
  317. // Either we're at a soft EOS or we've followed a right pointer.
  318. // Both these require us to reset the SRDown for proper decompression
  319. lpTrieScan->lpbSRDown = 0;
  320. }
  321. // Decompress the node at return success
  322. TrieDecompressNode(lpTrieCtrl, lpTrieScan);
  323. return TRUE;
  324. }
  325. BOOL WINAPI TrieSkipNextNode(LPTRIECTRL lpTrieCtrl, LPTRIESCAN lpTrieScan, WCHAR wch)
  326. {
  327. // If this is the last node in the normal or skip state, quit here
  328. if (lpTrieScan->wFlags & TRIE_NODE_END)
  329. return FALSE;
  330. // If there isn't a right pointer or if the target letter is alphabetically less then
  331. // the current letter scan right normally. Otherwise, follow the skip pointer.
  332. if (!(lpTrieScan->wFlags & TRIE_NODE_RIGHT) || (wch < lpTrieScan->wch))
  333. return TrieGetNextNode(lpTrieCtrl, lpTrieScan);
  334. lpTrieScan->lpbSRDown = 0;
  335. lpTrieScan->lpbNode = lpTrieScan->lpbRight;
  336. TrieDecompressNode(lpTrieCtrl, lpTrieScan);
  337. return TRUE;
  338. }
  339. /* Follow the down pointer to the next state. This is equivalent to accepting the character
  340. in this node and advancing to the next character position. Returns FALSE if there is no
  341. down pointer. This also decompresses the first node in the state, so all the values in
  342. lpTrieScan will be good. */
  343. BOOL WINAPI TrieGetNextState(LPTRIECTRL lpTrieCtrl, LPTRIESCAN lpTrieScan)
  344. {
  345. /* Flags can't normally be zero; that always means "top node" */
  346. if (lpTrieScan->wFlags == 0)
  347. {
  348. TrieDecompressNode(lpTrieCtrl, lpTrieScan);
  349. return TRUE;
  350. }
  351. if (!(lpTrieScan->wFlags & TRIE_NODE_DOWN))
  352. return FALSE;
  353. lpTrieScan->lpbSRDown = 0;
  354. lpTrieScan->lpbNode = lpTrieScan->lpbDown;
  355. TrieDecompressNode(lpTrieCtrl, lpTrieScan);
  356. return TRUE;
  357. } // TrieGetNextState
  358. /* Check the validity of a word or prefix. Starts from the root of pTrie looking for
  359. pwszWord. If it finds it, it returns TRUE and the user-provided lpTrieScan structure
  360. contains the final node in the word. If there is no path, TrieCheckWord returns FALSE
  361. To distinguish a valid word from a valid prefix, caller must test
  362. wFlags for TRIE_NODE_VALID. */
  363. BOOL WINAPI TrieCheckWord(LPTRIECTRL lpTrieCtrl, LPTRIESCAN lpTrieScan, wchar_t far* lpwszWord)
  364. {
  365. /* Start at the root of the trie and loop through all the letters in the word */
  366. memset(lpTrieScan,0,sizeof(*lpTrieScan));
  367. while (*lpwszWord)
  368. {
  369. /* Each new letter means we need to go to a new state. If there is none,
  370. the word is not in this trie */
  371. if (!TrieGetNextState(lpTrieCtrl, lpTrieScan))
  372. return FALSE;
  373. /* Now we walk across the state looking for this character. If we don't find
  374. it, this word is not in this trie */
  375. while (lpTrieScan->wch != *lpwszWord)
  376. {
  377. if (!TrieSkipNextNode(lpTrieCtrl, lpTrieScan, *lpwszWord))
  378. return FALSE;
  379. }
  380. ++lpwszWord;
  381. }
  382. return TRUE;
  383. } // TrieCheckWord
  384. // Find the index to the word in the trie.
  385. DWORD CountWords(TRIECTRL *ptc, TRIESCAN *pts)
  386. {
  387. TRIESCAN ts = *pts;
  388. DWORD cWords = 0;
  389. if (!TrieGetNextState(ptc, &ts))
  390. return cWords;
  391. do
  392. {
  393. if (ts.wFlags & TRIE_NODE_VALID)
  394. cWords++;
  395. cWords += CountWords(ptc, &ts);
  396. } while (TrieGetNextNode(ptc, &ts));
  397. return cWords;
  398. }
  399. int WINAPI TrieWordToIndex(TRIECTRL *ptc, wchar_t *pwszWord)
  400. {
  401. TRIESCAN ts;
  402. int ich = 0;
  403. int index = 0;
  404. BOOL bValid;
  405. memset(&ts, 0, sizeof(TRIESCAN));
  406. if (!TrieGetNextState(ptc, &ts))
  407. return FALSE;
  408. do
  409. {
  410. bValid = ts.wFlags & TRIE_NODE_VALID;
  411. // Scan to the right until we find a matching character. !!!WARNING!!! The state may not be alphabetized.
  412. // If the character doesn't match, add the subtree count to the enumeration total and slide to the right.
  413. if (ts.wch == pwszWord[ich])
  414. {
  415. ich++;
  416. // If we reached the end of word at a valid state, return the index
  417. if ((pwszWord[ich] == L'\0') && ts.wFlags & TRIE_NODE_VALID)
  418. return index;
  419. // Try going down a level
  420. if (!TrieGetNextState(ptc, &ts))
  421. return -1;
  422. }
  423. else
  424. {
  425. // Now, follow the skip pointer if exist and the alphabetic character is greater then
  426. // the pivot point. Otherwise, goto the next node. Add the sub tree count. If it's cached
  427. // use it, otherwise compute it recursively.
  428. if ((ts.wFlags & TRIE_NODE_SKIP_COUNT) && (pwszWord[ich] > ts.wch))
  429. {
  430. index += ts.cSkipWords;
  431. // This can't fail if TRIE_NODE_SKIP_COUNT is set
  432. TrieSkipNextNode(ptc, &ts, pwszWord[ich]);
  433. }
  434. else
  435. {
  436. index += (ts.wFlags & TRIE_NODE_COUNT) ? ts.cWords : CountWords(ptc, &ts);
  437. if (!TrieGetNextNode(ptc, &ts))
  438. return -1;
  439. }
  440. }
  441. // If the node we just visited was valid, increment the index
  442. if (bValid)
  443. index++;
  444. } while (TRUE);
  445. }
  446. // Given an index into the trie, return the word.
  447. BOOL WINAPI TrieIndexToWord(TRIECTRL *ptc, DWORD nIndex, wchar_t *pwszWord, int cwc)
  448. {
  449. TRIESCAN ts;
  450. int ich = 0;
  451. DWORD cWords;
  452. DWORD cSkips;
  453. memset(&ts, 0, sizeof(TRIESCAN));
  454. if (!TrieGetNextState(ptc, &ts))
  455. return FALSE;
  456. do
  457. {
  458. // If we're at the end of the buffer, fail
  459. if (ich + 1 >= cwc)
  460. return FALSE;
  461. // Remember this node's character
  462. pwszWord[ich] = ts.wch;
  463. // If we're on a valid word AND we've reached the index we're looking for, exit the loop
  464. if (ts.wFlags & TRIE_NODE_VALID)
  465. {
  466. if (!nIndex)
  467. break;
  468. nIndex--;
  469. }
  470. // Get the count of words in this subtree.
  471. cWords = (ts.wFlags & TRIE_NODE_COUNT) ? ts.cWords : CountWords(ptc, &ts);
  472. cSkips = (ts.wFlags & TRIE_NODE_SKIP_COUNT) ? ts.cSkipWords : 0x7fffffff;
  473. // Scan to the right until the word count of the subtree would be greater than or equal to the index
  474. // we're looking for. Descend that trie and repeat. !!!WARNING!!! The state may not be alphabetized.
  475. // If we can use a skip count, do so.
  476. if (nIndex < cWords)
  477. {
  478. if (!TrieGetNextState(ptc, &ts))
  479. return FALSE;
  480. ich++; // Advance the character position
  481. }
  482. else
  483. {
  484. if (nIndex >= cSkips)
  485. {
  486. nIndex -= cSkips;
  487. ts.lpbSRDown = 0;
  488. ts.lpbNode = ts.lpbRight;
  489. TrieDecompressNode(ptc, &ts);
  490. }
  491. else
  492. {
  493. nIndex -= cWords;
  494. if (!TrieGetNextNode(ptc, &ts))
  495. return FALSE;
  496. }
  497. }
  498. } while (TRUE);
  499. pwszWord[++ich] = L'\0'; // Null terminate the string
  500. return ts.wFlags & TRIE_NODE_VALID; // Return validity
  501. }
  502. int WINAPI TriePrefixToRange(TRIECTRL *ptc, wchar_t *pwszWord, int *piStart)
  503. {
  504. TRIESCAN ts;
  505. int ich = 0;
  506. int cnt;
  507. BOOL bValid;
  508. memset(&ts, 0, sizeof(TRIESCAN));
  509. *piStart = 0;
  510. if (!TrieGetNextState(ptc, &ts))
  511. return 0;
  512. // Deal with special case of empty string
  513. if (pwszWord && !*pwszWord)
  514. return ptc->lpTrieStats->cWords;
  515. do
  516. {
  517. // Get the count of words below this prefix
  518. cnt = (ts.wFlags & TRIE_NODE_COUNT) ? ts.cWords : CountWords(ptc, &ts);
  519. // If the node we just arrived at is valid, increment the count
  520. bValid = ts.wFlags & TRIE_NODE_VALID;
  521. // Scan to the right until we find a matching character. !!!WARNING!!! The state may not be alphabetized.
  522. // If the character doesn't match, add the subtree count to the enumeration total and slide to the right.
  523. if (ts.wch == pwszWord[ich])
  524. {
  525. ich++;
  526. // If we reached the end of prefix, return the count remaining below
  527. if (pwszWord[ich] == L'\0')
  528. {
  529. if (bValid)
  530. cnt++;
  531. return cnt;
  532. }
  533. // Try going down a level
  534. if (!TrieGetNextState(ptc, &ts))
  535. return 0;
  536. }
  537. else
  538. {
  539. // Add the sub tree count.
  540. *piStart += cnt;
  541. // Try the next letter in this state
  542. if (!TrieGetNextNode(ptc, &ts))
  543. return 0;
  544. }
  545. if (bValid)
  546. (*piStart)++;
  547. } while (TRUE);
  548. }
  549. // TAGS
  550. // Find the index to the word in the trie.
  551. DWORD CountTags(TRIECTRL *ptc, TRIESCAN *pts, DWORD wMask, int iTag)
  552. {
  553. TRIESCAN ts = *pts;
  554. DWORD cTags = 0;
  555. if (!TrieGetNextState(ptc, &ts))
  556. return cTags;
  557. do
  558. {
  559. if (ts.wFlags & wMask)
  560. cTags++;
  561. cTags += CountTags(ptc, &ts, wMask, iTag);
  562. } while (TrieGetNextNode(ptc, &ts));
  563. return cTags;
  564. }
  565. int WINAPI TrieWordToTagIndex(TRIECTRL *ptc, wchar_t *pwszWord, int iTag)
  566. {
  567. TRIESCAN ts;
  568. int ich = 0;
  569. int index = 0;
  570. BOOL bValid;
  571. DWORD wMask = 1 << iTag;
  572. memset(&ts, 0, sizeof(TRIESCAN));
  573. if (!TrieGetNextState(ptc, &ts))
  574. return FALSE;
  575. do
  576. {
  577. bValid = ts.wFlags & wMask;
  578. // Scan to the right until we find a matching character. !!!WARNING!!! The state may not be alphabetized.
  579. // If the character doesn't match, add the subtree count to the enumeration total and slide to the right.
  580. if (ts.wch == pwszWord[ich])
  581. {
  582. ich++;
  583. // If we reached the end of word at a valid state, return the index
  584. if ((pwszWord[ich] == L'\0') && ts.wFlags & wMask)
  585. return index;
  586. // Try going down a level
  587. if (!TrieGetNextState(ptc, &ts))
  588. return -1;
  589. }
  590. else
  591. {
  592. // Add the sub tree count. If it's cached use it, otherwise compute it recursively.
  593. index += (ts.wFlags & TRIE_NODE_COUNT) ? ts.aTags[iTag].cTag : CountTags(ptc, &ts, wMask, iTag);
  594. if (!TrieGetNextNode(ptc, &ts))
  595. return -1;
  596. }
  597. // If the node we just visited was valid, increment the index
  598. if (bValid)
  599. index++;
  600. } while (TRUE);
  601. }
  602. // Given an index into the trie, return the word.
  603. BOOL WINAPI TrieTagIndexToWord(TRIECTRL *ptc, DWORD nIndex, wchar_t *pwszWord, int cwc, int iTag)
  604. {
  605. TRIESCAN ts;
  606. int ich = 0;
  607. DWORD cTags;
  608. DWORD wMask = 1 << iTag;
  609. memset(&ts, 0, sizeof(TRIESCAN));
  610. if (!TrieGetNextState(ptc, &ts))
  611. return FALSE;
  612. do
  613. {
  614. // If we're at the end of the buffer, fail
  615. if (ich + 1 >= cwc)
  616. return FALSE;
  617. // Remember this node's character
  618. pwszWord[ich] = ts.wch;
  619. // If we're on a valid word AND we've reached the index we're looking for, exit the loop
  620. if (ts.wFlags & wMask)
  621. {
  622. if (!nIndex)
  623. break;
  624. nIndex--;
  625. }
  626. // Get the count of words in this subtree.
  627. cTags = (ts.wFlags & TRIE_NODE_COUNT) ? ts.aTags[iTag].cTag : CountTags(ptc, &ts, wMask, iTag);
  628. // Scan to the right until the word count of the subtree would be greater than or equal to the index
  629. // we're looking for. Descend that trie and repeat. !!!WARNING!!! The state may not be alphabetized.
  630. if (nIndex < cTags)
  631. {
  632. if (!TrieGetNextState(ptc, &ts))
  633. return FALSE;
  634. ich++; // Advance the character position
  635. }
  636. else
  637. {
  638. nIndex -= cTags;
  639. if (!TrieGetNextNode(ptc, &ts))
  640. return FALSE;
  641. }
  642. } while (TRUE);
  643. pwszWord[++ich] = L'\0'; // Null terminate the string
  644. return ts.wFlags & wMask; // Return validity
  645. }
  646. BOOL WINAPI
  647. TrieGetTagsFromWord(
  648. TRIECTRL *ptc, // Trie in which to find word
  649. wchar_t *pwszWord, // Word for which we're looking
  650. DWORD *pdw, // Returned values
  651. BYTE *pbValid // Mask for valid return values
  652. )
  653. {
  654. TRIESCAN ts;
  655. int iTag;
  656. WORD wMask;
  657. BYTE bMask = ptc->lpTrieStats->wTagsMask;
  658. if (!TrieCheckWord(ptc, &ts, pwszWord))
  659. return FALSE;
  660. if (ts.wFlags & TRIE_NODE_TAGGED)
  661. {
  662. wMask = 1;
  663. for (iTag = 0; bMask && (iTag < MAXTAGS); iTag++)
  664. {
  665. if (ts.wMask & wMask)
  666. {
  667. pdw[iTag] = ts.aTags[iTag].dwData;
  668. bMask |= wMask;
  669. }
  670. wMask <<= 1;
  671. }
  672. }
  673. *pbValid = (BYTE) wMask;
  674. return TRUE;
  675. }