Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

737 lines
20 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. //
  4. // CThaiWordBreak
  5. //
  6. // History:
  7. // created 7/99 aarayas
  8. //
  9. // �1999 Microsoft Corporation
  10. //----------------------------------------------------------------------------
  11. #include "cthwb.hpp"
  12. //+---------------------------------------------------------------------------
  13. //
  14. // Function: ExtractALT
  15. //
  16. // Synopsis: The functions takes a tag and return Alternate Tags.
  17. //
  18. // Arguments:
  19. //
  20. // Modifies:
  21. //
  22. // History: created 3/00 aarayas
  23. //
  24. // Notes:
  25. //
  26. //----------------------------------------------------------------------------
  27. inline BYTE ExtractALT(DWORD dwTag)
  28. {
  29. return (BYTE) ( (dwTag & iAltMask) >> iAltShift);
  30. }
  31. //+---------------------------------------------------------------------------
  32. //
  33. // Class: CThaiWordBreak
  34. //
  35. // Synopsis: Initialize ThaiWordBreak.
  36. //
  37. // Arguments:
  38. //
  39. // Modifies:
  40. //
  41. // History: created 7/99 aarayas
  42. //
  43. // Notes:
  44. //
  45. //----------------------------------------------------------------------------
  46. #if defined (NGRAM_ENABLE)
  47. PTEC CThaiWordBreak::Init(WCHAR* wzFileName, WCHAR* wzFileNameSentStruct, WCHAR* wzFileNameTrigram)
  48. #else
  49. PTEC CThaiWordBreak::Init(WCHAR* wzFileName, WCHAR* wzFileNameTrigram)
  50. #endif
  51. {
  52. // Declare and Initialize local variables.
  53. PTEC retValue = m_trie.Init(wzFileName);
  54. #if defined (NGRAM_ENABLE)
  55. if (retValue == ptecNoErrors)
  56. {
  57. // Initialize m_thaiTrieIter.
  58. m_thaiTrieIter.Init(&trie);
  59. retValue = m_trie_sentence_struct.Init(wzFileNameSentStruct);
  60. if (retValue == ptecNoErrors)
  61. {
  62. retValue = m_trie_trigram.Init(wzFileNameTrigram);
  63. /* fix re-entrant bug
  64. if (retValue == ptecNoErrors)
  65. breakTree.Init(&trie, &trie_sentence_struct, &trie_trigram);
  66. */
  67. }
  68. }
  69. #else
  70. if (retValue == ptecNoErrors)
  71. {
  72. retValue = m_trie_trigram.Init(wzFileNameTrigram);
  73. /* fix re-entrant bug
  74. if (retValue == ptecNoErrors)
  75. breakTree.Init(&trie, &trie_trigram);
  76. */
  77. }
  78. #endif
  79. return retValue;
  80. }
  81. //+---------------------------------------------------------------------------
  82. //
  83. // Class: CThaiWordBreak
  84. //
  85. // Synopsis: Initialize ThaiWordBreak.
  86. //
  87. // Arguments:
  88. //
  89. // Modifies:
  90. //
  91. // History: created 7/99 aarayas
  92. //
  93. // Notes:
  94. //
  95. //----------------------------------------------------------------------------
  96. PTEC CThaiWordBreak::InitRc(LPBYTE pThaiDic, LPBYTE pThaiTrigram)
  97. {
  98. // Declare and Initialize local variables.
  99. PTEC retValue = m_trie.InitRc(pThaiDic);
  100. if (retValue == ptecNoErrors)
  101. retValue = m_trie_trigram.InitRc(pThaiTrigram);
  102. return retValue;
  103. }
  104. //+---------------------------------------------------------------------------
  105. //
  106. // Class: CThaiWordBreak
  107. //
  108. // Synopsis: UnInitialize ThaiWordBreak.
  109. //
  110. // Arguments:
  111. //
  112. // Modifies:
  113. //
  114. // History: created 7/99 aarayas
  115. //
  116. // Notes:
  117. //
  118. //----------------------------------------------------------------------------
  119. void CThaiWordBreak::UnInit()
  120. {
  121. m_trie.UnInit();
  122. #if defined (NGRAM_ENABLE)
  123. m_trie_sentence_struct.UnInit();
  124. #endif
  125. m_trie_trigram.UnInit();
  126. }
  127. //+---------------------------------------------------------------------------
  128. //
  129. // Class: CThaiWordBreak
  130. //
  131. // Synopsis:
  132. //
  133. // Arguments:
  134. //
  135. // Modifies:
  136. //
  137. // History: created 7/99 aarayas
  138. //
  139. // Notes:
  140. //
  141. //----------------------------------------------------------------------------
  142. enum merge_direction {
  143. NO_MERGE,
  144. MERGE_RIGHT,
  145. MERGE_LEFT,
  146. MERGE_BOTH_DIRECTIONS,
  147. NOT_SURE_WHICH_DIRECTION
  148. };
  149. merge_direction DetermineMergeDirection(WCHAR wc)
  150. {
  151. if (wc == 0x0020) // space
  152. return NO_MERGE;
  153. else if ( wc == 0x0022 || // quotation mark
  154. wc == 0x0027 ) // apostrophe
  155. return NOT_SURE_WHICH_DIRECTION;
  156. else if ( wc == 0x0028 || // left parenthesis
  157. wc == 0x003C || // less than sign
  158. wc == 0x005B || // left square bracket
  159. wc == 0x007B || // left curly bracket
  160. wc == 0x201C || // left double quotation mark
  161. wc == 0x201F ) // left double quotation mark reverse
  162. return MERGE_RIGHT;
  163. // TODO: need to add MERGE_BOTH_DIRECTIONS for character joiner characters.
  164. // all other character merge left.
  165. return MERGE_LEFT;
  166. }
  167. //+---------------------------------------------------------------------------
  168. //
  169. // Class: CThaiWordBreak
  170. //
  171. // Synopsis:
  172. //
  173. // Arguments:
  174. //
  175. // Modifies:
  176. //
  177. // History: created 7/99 aarayas
  178. //
  179. // Notes:
  180. //
  181. //----------------------------------------------------------------------------
  182. DWORD_PTR CThaiWordBreak::CreateWordBreaker()
  183. {
  184. CThaiBreakTree* breakTree = NULL;
  185. breakTree = new CThaiBreakTree();
  186. #if defined (NGRAM_ENABLE)
  187. breakTree->Init(&m_trie, &m_trie_sentence_struct, &m_trie_trigram);
  188. #else
  189. breakTree->Init(&m_trie, &m_trie_trigram);
  190. #endif
  191. return (DWORD_PTR)breakTree;
  192. }
  193. //+---------------------------------------------------------------------------
  194. //
  195. // Class: CThaiWordBreak
  196. //
  197. // Synopsis:
  198. //
  199. // Arguments:
  200. //
  201. // Modifies:
  202. //
  203. // History: created 7/99 aarayas
  204. //
  205. // Notes:
  206. //
  207. //----------------------------------------------------------------------------
  208. bool CThaiWordBreak::DeleteWordBreaker(DWORD_PTR dwBreaker)
  209. {
  210. CThaiBreakTree* breakTree = (CThaiBreakTree*) dwBreaker;
  211. if (breakTree)
  212. {
  213. delete breakTree;
  214. return true;
  215. }
  216. return false;
  217. }
  218. //+---------------------------------------------------------------------------
  219. //
  220. // Class: CThaiWordBreak
  221. //
  222. // Synopsis: This funciton segment Thai word use for Indexing.
  223. //
  224. // Arguments:
  225. // wzString - input string. (in)
  226. // iStringLen - input string length. (in)
  227. // pBreakPos - array of break position. (out)
  228. // pThwb_Struct - array structure of THWB. (out)
  229. // iBreakMax - length of pBreakPos and
  230. // pThwb_Struct. (out)
  231. //
  232. // Modifies:
  233. //
  234. // History: created 3/00 aarayas
  235. //
  236. // Notes:
  237. //
  238. //----------------------------------------------------------------------------
  239. int CThaiWordBreak::IndexWordBreak(WCHAR* wzString,unsigned int iStringLen, BYTE* pBreakPos,THWB_STRUCT* pThwb_Struct,unsigned int iBreakMax)
  240. {
  241. unsigned int iBreakIndex = 0; // Contain number of Breaks.
  242. CThaiBreakTree* breakTree = NULL;
  243. breakTree = new CThaiBreakTree();
  244. if (breakTree)
  245. {
  246. breakTree->Init(&m_trie, &m_trie_trigram);
  247. iBreakIndex = FindWordBreak((DWORD_PTR)breakTree,wzString,iStringLen,pBreakPos,iBreakMax,WB_INDEX,true,pThwb_Struct);
  248. delete breakTree;
  249. }
  250. return iBreakIndex;
  251. }
  252. //+---------------------------------------------------------------------------
  253. //
  254. // Class: CThaiWordBreak
  255. //
  256. // Synopsis:
  257. //
  258. // Arguments:
  259. //
  260. // wzWord - input string. (in)
  261. // iWordLen - input string length. (in)
  262. // Alt - find close alternate word (in)
  263. // pBreakPos - array of break position allways 5 byte. (out)
  264. //
  265. // Modifies:
  266. //
  267. // History: created 3/00 aarayas
  268. //
  269. // Notes:
  270. //
  271. //----------------------------------------------------------------------------
  272. int CThaiWordBreak::FindAltWord(WCHAR* wzWord,unsigned int iWordLen, BYTE Alt, BYTE* pBreakPos)
  273. {
  274. unsigned int iBreakIndex = 0; // Contain number of Breaks.
  275. CThaiBreakTree* breakTree = NULL;
  276. breakTree = new CThaiBreakTree();
  277. if (breakTree)
  278. {
  279. breakTree->Init(&m_trie, &m_trie_trigram);
  280. iBreakIndex = breakTree->FindAltWord(wzWord,iWordLen,Alt,pBreakPos);
  281. delete breakTree;
  282. }
  283. return iBreakIndex;
  284. }
  285. //+---------------------------------------------------------------------------
  286. //
  287. // Class: CThaiWordBreak
  288. //
  289. // Synopsis: This funciton segment Thai text segment them depending on the modes specifies.
  290. //
  291. // WB_LINEBREAK - is used when the application needs to break for line wrapping,
  292. // this mode takes into the consideration of punctuations.
  293. //
  294. // WB_NORMAL - is used when application wants determine word for searching,
  295. // autocorrect, etc.
  296. //
  297. // WB_SPELLER - not yet implemented, but same as normal with additional soundex
  298. // rules.
  299. //
  300. // Arguments:
  301. //
  302. // wzString - input string. (in)
  303. // iStringLen - input string length. (in)
  304. // pBreakPos - array of break position. (out)
  305. // iBreakMax - length of pBreakPos (out)
  306. // mode - either WB_LINEBREAK, etct (in)
  307. // fFastWordBreak - true for fast algorithm (in)
  308. //
  309. // Modifies:
  310. //
  311. // History: created 7/99 aarayas
  312. //
  313. // Notes:
  314. //
  315. //----------------------------------------------------------------------------
  316. int CThaiWordBreak::FindWordBreak(WCHAR* wzString,unsigned int iStringLen, BYTE* pBreakPos,unsigned int iBreakMax, BYTE mode, bool fFastWordBreak)
  317. {
  318. unsigned int iBreakIndex = 0; // Contain number of Breaks.
  319. // fix re-entrant bug
  320. CThaiBreakTree* breakTree = NULL;
  321. breakTree = new CThaiBreakTree();
  322. if (breakTree)
  323. {
  324. #if defined (NGRAM_ENABLE)
  325. breakTree->Init(&m_trie, &trie_sentence_struct, &m_trie_trigram);
  326. #else
  327. breakTree->Init(&m_trie, &m_trie_trigram);
  328. #endif
  329. assert(mode != WB_INDEX); // If this assert come up, use function IndexWordBreak
  330. iBreakIndex = FindWordBreak((DWORD_PTR)breakTree,wzString,iStringLen,pBreakPos,iBreakMax,mode,fFastWordBreak);
  331. delete breakTree;
  332. }
  333. return iBreakIndex;
  334. }
  335. //+---------------------------------------------------------------------------
  336. //
  337. // Class: CThaiWordBreak
  338. //
  339. // Synopsis: This funciton segment Thai text segment them depending on the modes specifies.
  340. //
  341. // WB_LINEBREAK - is used when the application needs to break for line wrapping,
  342. // this mode takes into the consideration of punctuations.
  343. //
  344. // WB_NORMAL - is used when application wants determine word for searching,
  345. // autocorrect, etc.
  346. //
  347. // WB_SPELLER - not yet implemented, but same as normal with additional soundex
  348. // rules.
  349. //
  350. // WB_INDEX - is used when application wanted to do Thai indexing.
  351. //
  352. //
  353. // Arguments:
  354. //
  355. // wzString - input string. (in)
  356. // iStringLen - input string length. (in)
  357. // pBreakPos - array of break position. (out)
  358. // iBreakMax - length of pBreakPos (out)
  359. // must be greater than 1.
  360. // mode - either WB_LINEBREAK, etct (in)
  361. // fFastWordBreak - true for fast algorithm (in)
  362. // pThwb_Struct - array structure of THWB. (out)
  363. //
  364. // Modifies:
  365. //
  366. // History: created 11/99 aarayas
  367. //
  368. // Notes:
  369. //
  370. //----------------------------------------------------------------------------
  371. int CThaiWordBreak::FindWordBreak(DWORD_PTR dwBreaker, WCHAR* wzString,unsigned int iStringLen, BYTE* pBreakPos,unsigned int iBreakMax, BYTE mode, bool fFastWordBreak, THWB_STRUCT* pThwb_Struct)
  372. {
  373. // Declare and Initialize all local variables.
  374. WCHAR* pwszRunStart = wzString;
  375. WCHAR* pwszMax = wzString + iStringLen;
  376. WCHAR* pwch = wzString;
  377. bool fThaiRun = true;
  378. bool fSpaceMergeRight = false;
  379. int iRunCount = 0;
  380. unsigned int i = 0;
  381. unsigned int iBreakIndex = 0; // Contain number of Breaks.
  382. merge_direction dirPrevious = NO_MERGE;
  383. merge_direction dirCurrent = NO_MERGE;
  384. CThaiBreakTree* breakTree = (CThaiBreakTree*) dwBreaker;
  385. // check for possible invalid arguments.
  386. assert(wzString != NULL);
  387. assert(iBreakMax > 0);
  388. assert(pBreakPos != NULL);
  389. if ((wzString == NULL) || (iBreakMax == 0) || (pBreakPos == NULL))
  390. return 0;
  391. switch (mode)
  392. {
  393. case WB_LINEBREAK:
  394. case 2: // to be compatible with old api.
  395. do
  396. {
  397. while ((TWB_IsCharPunctW(*pwch) || TWB_IsCharWordDelimW(*pwch)) && iBreakIndex < iBreakMax && pwch < pwszMax)
  398. {
  399. dirCurrent = DetermineMergeDirection(*pwch);
  400. switch (dirCurrent)
  401. {
  402. case NO_MERGE:
  403. if ( pwch + 1 < pwszMax && *(pwch + 1) == THAI_Vowel_MaiYaMok && iBreakIndex > 0)
  404. {
  405. // Mai Ya Mok case only.
  406. pBreakPos[iBreakIndex - 1] += 2;
  407. dirCurrent = MERGE_LEFT;
  408. pwch++;
  409. }
  410. else
  411. pBreakPos[iBreakIndex++] = 1;
  412. break;
  413. case MERGE_RIGHT:
  414. if (dirPrevious == MERGE_RIGHT)
  415. pBreakPos[iBreakIndex - 1]++;
  416. else if (!TWB_IsCharPunctW(*(pwch + 1)))
  417. pBreakPos[iBreakIndex++] = 1;
  418. else
  419. pBreakPos[iBreakIndex++] = 1;
  420. break;
  421. case NOT_SURE_WHICH_DIRECTION:
  422. if (pwch == wzString || // if pwch is first character.
  423. TWB_IsCharWordDelimW(*(pwch - 1)) ) // if previous character is delimiter.
  424. {
  425. pBreakPos[iBreakIndex++] = 1;
  426. dirCurrent = MERGE_RIGHT;
  427. }
  428. else
  429. {
  430. pBreakPos[iBreakIndex - 1]++;
  431. dirCurrent = MERGE_LEFT;
  432. }
  433. break;
  434. case MERGE_LEFT:
  435. default:
  436. if (iBreakIndex == 0)
  437. if (pwch == wzString)
  438. pBreakPos[iBreakIndex++] = 1;
  439. else
  440. pBreakPos[iBreakIndex]++;
  441. else
  442. pBreakPos[iBreakIndex - 1]++;
  443. break;
  444. }
  445. dirPrevious = dirCurrent;
  446. pwch++;
  447. pwszRunStart = pwch;
  448. }
  449. assert(pwszRunStart == pwch);
  450. if( iBreakIndex >= iBreakMax || pwch >= pwszMax)
  451. break;
  452. // Detect if this is a Thai Run.
  453. fThaiRun = IsThaiChar(*pwch);
  454. do
  455. {
  456. pwch++;
  457. iRunCount++;
  458. } while ((IsThaiChar(*pwch)==fThaiRun &&
  459. iRunCount < (MAXBREAK - 2) &&
  460. *pwch &&
  461. !TWB_IsCharWordDelimW(*pwch) &&
  462. (pwch < pwszMax) ) ||
  463. ( ( *pwch == 0x2c || *pwch == 0x2e) && (iRunCount < (MAXBREAK - 2)) && (pwch < pwszMax) ));
  464. if (fThaiRun)
  465. {
  466. unsigned int iBreak = breakTree->TrigramBreak(pwszRunStart,pwch);
  467. for (i=0; i < iBreak && iBreakIndex <iBreakMax; i++)
  468. {
  469. // First Thai character of the run.
  470. if (dirPrevious == MERGE_RIGHT)
  471. {
  472. assert(iBreakIndex != 0);
  473. pBreakPos[iBreakIndex - 1] += breakTree->breakArray[i];
  474. }
  475. else
  476. pBreakPos[iBreakIndex++] = breakTree->breakArray[i];
  477. dirPrevious = NO_MERGE;
  478. }
  479. }
  480. else
  481. {
  482. // Not a Thai Run simply put the whole thing in the break array.
  483. assert(pwch > pwszRunStart); // pwch must be greater than pwszRunStart, since we just walk.
  484. if (dirPrevious == MERGE_RIGHT)
  485. {
  486. assert(iBreakIndex != 0);
  487. pBreakPos[iBreakIndex - 1] += (BYTE) (pwch - pwszRunStart);
  488. }
  489. else
  490. pBreakPos[iBreakIndex++] = (BYTE) (pwch - pwszRunStart);
  491. }
  492. iRunCount = 0;
  493. pwszRunStart = pwch;
  494. // Make sure we haven't pass iBreakMax define by user else return whatever we got.
  495. } while(iBreakIndex < iBreakMax && pwch < pwszMax);
  496. break;
  497. case WB_INDEX:
  498. // Make sure argument is the same.
  499. assert(pThwb_Struct != NULL);
  500. if (pThwb_Struct == NULL)
  501. return 0;
  502. do
  503. {
  504. while (TWB_IsCharWordDelimW(*pwch) && pwszMax > pwch)
  505. pwch++;
  506. if( pwszRunStart < pwch)
  507. {
  508. pBreakPos[iBreakIndex++] = (BYTE)(pwch - pwszRunStart);
  509. pwszRunStart = pwch;
  510. }
  511. if( iBreakIndex >= iBreakMax || pwch >= pwszMax)
  512. break;
  513. // Detect if this is a Thai Run.
  514. fThaiRun = IsThaiChar(*pwch); //TODO: Add comma and period to Thai range.
  515. do
  516. {
  517. pwch++;
  518. iRunCount++;
  519. } while ((IsThaiChar(*pwch)==fThaiRun &&
  520. iRunCount < (MAXBREAK - 2) &&
  521. *pwch &&
  522. !TWB_IsCharWordDelimW(*pwch) &&
  523. (pwch < pwszMax) ) ||
  524. ( ( *pwch == 0x2c || *pwch == 0x2e) && (iRunCount < (MAXBREAK - 2)) && (pwch < pwszMax) ));
  525. if (fThaiRun)
  526. {
  527. unsigned int iBreak = breakTree->TrigramBreak(pwszRunStart,pwch);
  528. for (i=0; i < iBreak && iBreakIndex <iBreakMax; i++)
  529. {
  530. pThwb_Struct[iBreakIndex].fThai = true;
  531. pThwb_Struct[iBreakIndex].alt = ExtractALT(breakTree->tagArray[i]);
  532. pBreakPos[iBreakIndex++] = breakTree->breakArray[i];
  533. }
  534. }
  535. else
  536. {
  537. // Not a Thai Run simply put the whole thing in the break array.
  538. assert(pwch > pwszRunStart); // pwch must be greater than pwszRunStart, since we just walk.
  539. pThwb_Struct[iBreakIndex].fThai = false;
  540. pThwb_Struct[iBreakIndex].alt = 0;
  541. pBreakPos[iBreakIndex++] = (BYTE)(pwch - pwszRunStart);
  542. }
  543. iRunCount = 0;
  544. pwszRunStart = pwch;
  545. // Make sure we haven't pass iBreakMax define by user else return whatever we got.
  546. } while(iBreakIndex < iBreakMax && pwch < pwszMax);
  547. break;
  548. case WB_CARETBREAK:
  549. fSpaceMergeRight = true;
  550. case WB_NORMAL:
  551. default:
  552. do
  553. {
  554. while (TWB_IsCharWordDelimW(*pwch) && pwszMax > pwch)
  555. pwch++;
  556. if( pwszRunStart < pwch)
  557. {
  558. if (fSpaceMergeRight && *pwszRunStart == L' ' && iBreakIndex > 0)
  559. // This is a caret movement features, should merge space to
  560. // the right words.
  561. pBreakPos[iBreakIndex - 1] += (BYTE)(pwch - pwszRunStart);
  562. else
  563. pBreakPos[iBreakIndex++] = (BYTE)(pwch - pwszRunStart);
  564. pwszRunStart = pwch;
  565. }
  566. if( iBreakIndex >= iBreakMax || pwch >= pwszMax)
  567. break;
  568. // Detect if this is a Thai Run.
  569. fThaiRun = IsThaiChar(*pwch); //TODO: Add comma and period to Thai range.
  570. do
  571. {
  572. pwch++;
  573. iRunCount++;
  574. } while ((IsThaiChar(*pwch)==fThaiRun &&
  575. iRunCount < (MAXBREAK - 2) &&
  576. *pwch &&
  577. !TWB_IsCharWordDelimW(*pwch) &&
  578. (pwch < pwszMax) ) ||
  579. ( ( *pwch == 0x2c || *pwch == 0x2e) && (iRunCount < (MAXBREAK - 2)) && (pwch < pwszMax) ));
  580. if (fThaiRun)
  581. {
  582. #if defined (NGRAM_ENABLE)
  583. if (!fFastWordBreak)
  584. {
  585. if (WordBreak(pwszRunStart,pwch))
  586. for (i=0; i < breakTree.maxToken && iBreakIndex <iBreakMax; i++)
  587. pBreakPos[iBreakIndex++] = breakTree->maximalMatchingBreakArray[i];
  588. }
  589. else
  590. {
  591. unsigned int iBreak = breakTree->TrigramBreak(pwszRunStart,pwch);
  592. for (i=0; i < iBreak && iBreakIndex <iBreakMax; i++)
  593. pBreakPos[iBreakIndex++] = breakTree->breakArray[i];
  594. }
  595. #else
  596. unsigned int iBreak = breakTree->TrigramBreak(pwszRunStart,pwch);
  597. for (i=0; i < iBreak && iBreakIndex <iBreakMax; i++)
  598. pBreakPos[iBreakIndex++] = breakTree->breakArray[i];
  599. #endif
  600. }
  601. else
  602. {
  603. // Not a Thai Run simply put the whole thing in the break array.
  604. assert(pwch > pwszRunStart); // pwch must be greater than pwszRunStart, since we just walk.
  605. pBreakPos[iBreakIndex++] = (BYTE)(pwch - pwszRunStart);
  606. }
  607. iRunCount = 0;
  608. pwszRunStart = pwch;
  609. // Make sure we haven't pass iBreakMax define by user else return whatever we got.
  610. } while(iBreakIndex < iBreakMax && pwch < pwszMax);
  611. break;
  612. }
  613. #if defined (_DEBUG)
  614. unsigned int iTotalChar = 0;
  615. for (i = 0; i < iBreakIndex; i++)
  616. {
  617. iTotalChar += pBreakPos[i];
  618. }
  619. if (iBreakIndex < iBreakMax)
  620. assert(iStringLen == iTotalChar);
  621. #endif
  622. return iBreakIndex;
  623. }
  624. //+---------------------------------------------------------------------------
  625. //
  626. // Class: CThaiWordBreak
  627. //
  628. // Synopsis:
  629. //
  630. // Arguments:
  631. //
  632. // Modifies:
  633. //
  634. // History: created 7/99 aarayas
  635. //
  636. // Notes:
  637. //
  638. //----------------------------------------------------------------------------
  639. #if defined (NGRAM_ENABLE)
  640. BOOL CThaiWordBreak::WordBreak(WCHAR* pszBegin, WCHAR* pszEnd)
  641. {
  642. // Declare and Initialize all local variables.
  643. bool fWordEnd = false;
  644. bool fCorrectPath = false;
  645. WCHAR* pszIndex = pszBegin;
  646. int iNumCluster = 1;
  647. assert(pszBegin < pszEnd); // Make sure pszEnd is at least greater pszBegin.
  648. breakTree.GenerateTree(pszBegin, pszEnd);
  649. breakTree.MaximalMatching();
  650. return (breakTree.maxToken > 0);
  651. }
  652. #endif
  653. //+---------------------------------------------------------------------------
  654. //
  655. // Class: CThaiWordBreak
  656. //
  657. // Synopsis:
  658. //
  659. // Arguments:
  660. //
  661. // Modifies:
  662. //
  663. // History: created 7/99 aarayas
  664. //
  665. // Notes:
  666. //
  667. //----------------------------------------------------------------------------
  668. BOOL CThaiWordBreak::Find(WCHAR* wzString, DWORD* pdwPOS)
  669. {
  670. return m_trie.Find(wzString, pdwPOS);
  671. }