Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

519 lines
11 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. //
  4. // CTrie - class CTrie encapsulation for Trie data structure.
  5. //
  6. // History:
  7. // created 6/99 aarayas
  8. //
  9. // �1999 Microsoft Corporation
  10. //----------------------------------------------------------------------------
  11. #include "ctrie.hpp"
  12. #define VERSIONMAJOR 1
  13. #define VERSIONMINOR 0
  14. //+---------------------------------------------------------------------------
  15. //
  16. // Class: CTrieIter
  17. //
  18. // Synopsis: constructor
  19. //
  20. // Arguments:
  21. //
  22. // Modifies:
  23. //
  24. // History: created 6/99 aarayas
  25. //
  26. // Notes:
  27. //
  28. //----------------------------------------------------------------------------
  29. CTrieIter::CTrieIter()
  30. {
  31. // Initialize local variables.
  32. Reset();
  33. wc = 0;
  34. fWordEnd = FALSE;
  35. fRestricted = FALSE;
  36. frq = 0;
  37. dwTag = 0;
  38. }
  39. //+---------------------------------------------------------------------------
  40. //
  41. // Class: CTrieIter
  42. //
  43. // Synopsis: copy constructor
  44. //
  45. // Arguments:
  46. //
  47. // Modifies:
  48. //
  49. // History: created 6/99 aarayas
  50. //
  51. // Notes:
  52. //
  53. //----------------------------------------------------------------------------
  54. CTrieIter::CTrieIter(const CTrieIter& trieIter)
  55. {
  56. // Copy all variables from Initial trie.
  57. memcpy(&trieScan, &trieIter.trieScan, sizeof(TRIESCAN));
  58. pTrieCtrl = trieIter.pTrieCtrl;
  59. wc = trieIter.wc;
  60. fWordEnd = trieIter.fWordEnd;
  61. fRestricted = trieIter.fRestricted;
  62. frq = trieIter.frq;
  63. dwTag = trieIter.dwTag;
  64. }
  65. //+---------------------------------------------------------------------------
  66. //
  67. // Class: CTrieIter
  68. //
  69. // Synopsis: Initialize variables.
  70. //
  71. // Arguments:
  72. //
  73. // Modifies:
  74. //
  75. // History: created 6/99 aarayas
  76. //
  77. // Notes:
  78. //
  79. //----------------------------------------------------------------------------
  80. void CTrieIter::Init(CTrie* ctrie)
  81. {
  82. // Initialize TrieCtrl
  83. pTrieCtrl = ctrie->pTrieCtrl;
  84. }
  85. //+---------------------------------------------------------------------------
  86. //
  87. // Class: CTrieIter
  88. //
  89. // Synopsis: Initialize variables.
  90. //
  91. // Arguments:
  92. //
  93. // Modifies:
  94. //
  95. // History: created 3/00 aarayas
  96. //
  97. // Notes:
  98. //
  99. //----------------------------------------------------------------------------
  100. void CTrieIter::Init(TRIECTRL* pTrieCtrl1)
  101. {
  102. // Initialize TrieCtrl
  103. pTrieCtrl = pTrieCtrl1;
  104. }
  105. //+---------------------------------------------------------------------------
  106. //
  107. // Class: CTrieIter
  108. //
  109. // Synopsis: Bring interation index to the first node.
  110. //
  111. // Arguments:
  112. //
  113. // Modifies:
  114. //
  115. // History: created 6/99 aarayas
  116. //
  117. // Notes:
  118. //
  119. //----------------------------------------------------------------------------
  120. void CTrieIter::Reset()
  121. {
  122. // Reset Trie.
  123. memset(&trieScan, 0, sizeof(TRIESCAN));
  124. }
  125. //+---------------------------------------------------------------------------
  126. //
  127. // Class: CTrieIter
  128. //
  129. // Synopsis: Move Iteration index down one node.
  130. //
  131. // Arguments:
  132. //
  133. // Modifies:
  134. //
  135. // History: created 6/99 aarayas
  136. //
  137. // Notes:
  138. //
  139. //----------------------------------------------------------------------------
  140. BOOL CTrieIter::Down()
  141. {
  142. // Move the Trie down one node.
  143. return TrieGetNextState(pTrieCtrl, &trieScan);
  144. }
  145. //+---------------------------------------------------------------------------
  146. //
  147. // Class: CTrieIter
  148. //
  149. // Synopsis: Move Iteration index right one node.
  150. //
  151. // Arguments:
  152. //
  153. // Modifies:
  154. //
  155. // History: created 6/99 aarayas
  156. //
  157. // Notes:
  158. //
  159. //----------------------------------------------------------------------------
  160. BOOL CTrieIter::Right()
  161. {
  162. // Move the Trie right one node.
  163. return TrieGetNextNode(pTrieCtrl, &trieScan);
  164. }
  165. //+---------------------------------------------------------------------------
  166. //
  167. // Class: CTrieIter
  168. //
  169. // Synopsis: Bring interation index to the first node.
  170. //
  171. // Arguments:
  172. //
  173. // Modifies:
  174. //
  175. // History: created 6/99 aarayas
  176. //
  177. // Notes:
  178. //
  179. //----------------------------------------------------------------------------
  180. void CTrieIter::GetNode()
  181. {
  182. wc = trieScan.wch;
  183. fWordEnd = (trieScan.wFlags & TRIE_NODE_VALID) &&
  184. (!(trieScan.wFlags & TRIE_NODE_TAGGED) ||
  185. (trieScan.aTags[0].dwData & iDialectMask));
  186. if (fWordEnd)
  187. {
  188. fRestricted = (trieScan.wFlags & TRIE_NODE_TAGGED) &&
  189. (trieScan.aTags[0].dwData & iRestrictedMask);
  190. frq = (BYTE) (trieScan.wFlags & TRIE_NODE_TAGGED ?
  191. (trieScan.aTags[0].dwData & 0x300) >> iFrqShift :
  192. frqpenNormal);
  193. posTag = (DWORD) (trieScan.wFlags & TRIE_NODE_TAGGED ?
  194. (trieScan.aTags[0].dwData & iPosMask) >> iPosShift :
  195. 0);
  196. dwTag = (DWORD) (trieScan.wFlags & TRIE_NODE_TAGGED ?
  197. trieScan.aTags[0].dwData :
  198. 0);
  199. }
  200. }
  201. //+---------------------------------------------------------------------------
  202. //
  203. // Class: CTrie
  204. //
  205. // Synopsis: Constructor
  206. //
  207. // Arguments:
  208. //
  209. // Modifies:
  210. //
  211. // History: created 6/99 aarayas
  212. //
  213. // Notes:
  214. //
  215. //----------------------------------------------------------------------------
  216. CTrie::CTrie()
  217. {
  218. pMapFile = NULL;
  219. pTrieCtrl = NULL;
  220. pTrieScan = NULL;
  221. }
  222. //+---------------------------------------------------------------------------
  223. //
  224. // Class: CTrie
  225. //
  226. // Synopsis: Destructor
  227. //
  228. // Arguments:
  229. //
  230. // Modifies:
  231. //
  232. // History: created 6/99 aarayas
  233. //
  234. // Notes:
  235. //
  236. //----------------------------------------------------------------------------
  237. CTrie::~CTrie()
  238. {
  239. UnInit();
  240. }
  241. //+---------------------------------------------------------------------------
  242. //
  243. // Class: CTrie
  244. //
  245. // Synopsis: Initialize Trie.
  246. //
  247. // Arguments:
  248. //
  249. // Modifies:
  250. //
  251. // History: created 6/99 aarayas
  252. //
  253. // Notes:
  254. //
  255. //----------------------------------------------------------------------------
  256. PTEC CTrie::Init(WCHAR* szFileName)
  257. {
  258. // Declare and Initialize all local variables.
  259. PTEC ptec = ptecModuleError;
  260. // The function assume that pMapFile and pTrieCtrl is NULL, else there are possible memory leak.
  261. // possibility of this could be call Initilization without Terminating.
  262. assert(pMapFile == NULL);
  263. assert(pTrieCtrl == NULL);
  264. // Initialize pMapFile and pTrieCtrl to NULL.
  265. pMapFile = NULL;
  266. pTrieCtrl = NULL;
  267. pMapFile = OpenMapFileW(szFileName);
  268. if (pMapFile == NULL)
  269. {
  270. // Unable to load map files, return invalid read error.
  271. ptec = retcode(ptecIOErrorMainLex, ptecFileRead);
  272. }
  273. else if (pMapFile->pvMap == NULL)
  274. {
  275. // Return Invalid format and close the files.
  276. ptec = retcode(ptecIOErrorMainLex, ptecInvalidFormat);
  277. CloseMapFile(pMapFile);
  278. }
  279. else
  280. {
  281. BYTE *pmap = (BYTE *) pMapFile->pvMap;
  282. // find the header
  283. LEXHEADER *plxhead = (LEXHEADER *) pmap;
  284. pmap += sizeof(LEXHEADER);
  285. // verify that it's a valid lex file
  286. if (!(plxhead->lxid == lxidSpeller && plxhead->vendorid == vendoridMicrosoft &&
  287. PROOFMAJORVERSION(plxhead->version) == VERSIONMAJOR ))
  288. {
  289. // If we reached here than the lexicon is no in a valid Thai wordbreak format.
  290. ptec = retcode(ptecIOErrorMainLex, ptecInvalidFormat);
  291. }
  292. else
  293. {
  294. // Make sure the language matches check the first dialect of the lexicon.
  295. // CTrie also support both Thai and Vietnamese language.
  296. if ( (plxhead->lidArray[0] != lidThai) && (plxhead->lidArray[0] != lidViet) )
  297. {
  298. // If we reached here than we are not using Thai lexicon.
  299. ptec = retcode(ptecIOErrorMainLex, ptecInvalidLanguage);
  300. }
  301. else
  302. {
  303. // The size of the copyright notice
  304. int cCopyright = 0;
  305. WCHAR* pwzCopyright = NULL;
  306. int cLexSup = 0;
  307. cCopyright = * (int *) pmap;
  308. pmap += sizeof(int);
  309. // The copyright notice itself
  310. pwzCopyright = (WCHAR *) pmap;
  311. pmap += cCopyright * sizeof(WCHAR);
  312. // Skip Supplemental data for Thai word break.
  313. cLexSup = * (int *) pmap;
  314. pmap += sizeof(int);
  315. pmap += cLexSup;
  316. pTrieCtrl = TrieInit(pmap);
  317. if (pTrieCtrl)
  318. {
  319. // We were able to load and point to the Trie okay.
  320. //MessageBoxW(0,L"Was able to initialize Trie",pwsz,MB_OK);
  321. pTrieScan = new CTrieIter();
  322. pTrieScan->Init(this);
  323. ptec = ptecNoErrors;
  324. }
  325. else
  326. {
  327. // We were not able to initailize main lexicon.
  328. ptec = retcode(ptecIOErrorMainLex, ptecInvalidMainLex);
  329. }
  330. }
  331. }
  332. }
  333. return ptec;
  334. }
  335. //+---------------------------------------------------------------------------
  336. //
  337. // Class: CTrie
  338. //
  339. // Synopsis: Initialize Trie.
  340. //
  341. // Arguments:
  342. //
  343. // Modifies:
  344. //
  345. // History: created 2/2000 aarayas
  346. //
  347. // Notes:
  348. //
  349. //----------------------------------------------------------------------------
  350. PTEC CTrie::InitRc(LPBYTE pmap)
  351. {
  352. // Declare and Initialize all local variables.
  353. PTEC ptec = ptecModuleError;
  354. // The function assume that pMapFile and pTrieCtrl is NULL, else there are possible memory leak.
  355. // possibility of this could be call Initilization without Terminating.
  356. assert(pMapFile == NULL);
  357. assert(pTrieCtrl == NULL);
  358. assert(pTrieScan == NULL);
  359. // Initialize pMapFile and pTrieCtrl to NULL.
  360. pMapFile = NULL;
  361. pTrieCtrl = NULL;
  362. pTrieScan = NULL;
  363. LEXHEADER *plxhead = (LEXHEADER *) pmap;
  364. pmap += sizeof(LEXHEADER);
  365. // The size of the copyright notice
  366. int cCopyright = 0;
  367. WCHAR* pwzCopyright = NULL;
  368. int cLexSup = 0;
  369. cCopyright = * (int *) pmap;
  370. pmap += sizeof(int);
  371. // The copyright notice itself
  372. pwzCopyright = (WCHAR *) pmap;
  373. pmap += cCopyright * sizeof(WCHAR);
  374. // Skip Supplemental data for Thai word break.
  375. cLexSup = * (int *) pmap;
  376. pmap += sizeof(int);
  377. pmap += cLexSup;
  378. pTrieCtrl = TrieInit(pmap);
  379. if (pTrieCtrl)
  380. {
  381. // We were able to load and point to the Trie okay.
  382. //MessageBoxW(0,L"Was able to initialize Trie",L"ThWB",MB_OK);
  383. pTrieScan = new CTrieIter();
  384. pTrieScan->Init(this);
  385. ptec = ptecNoErrors;
  386. }
  387. else
  388. {
  389. // We were not able to initailize main lexicon.
  390. ptec = retcode(ptecIOErrorMainLex, ptecInvalidMainLex);
  391. }
  392. return ptec;
  393. }
  394. //+---------------------------------------------------------------------------
  395. //
  396. // Class: CTrie
  397. //
  398. // Synopsis: UnInitialize Trie.
  399. //
  400. // Arguments:
  401. //
  402. // Modifies:
  403. //
  404. // History: created 6/99 aarayas
  405. //
  406. // Notes:
  407. //
  408. //----------------------------------------------------------------------------
  409. void CTrie::UnInit()
  410. {
  411. // Free up memory allocated by Trie.
  412. if (pTrieCtrl)
  413. {
  414. TrieFree(pTrieCtrl);
  415. pTrieCtrl = NULL;
  416. }
  417. // Close the map files.
  418. if (pMapFile)
  419. {
  420. CloseMapFile(pMapFile);
  421. pMapFile = NULL;
  422. }
  423. if (pTrieScan)
  424. {
  425. delete pTrieScan;
  426. pTrieScan = NULL;
  427. }
  428. }
  429. //+---------------------------------------------------------------------------
  430. //
  431. // Class: CTrie
  432. //
  433. // Synopsis: searches for the given string in the trie
  434. //
  435. // Arguments:
  436. //
  437. // Modifies:
  438. //
  439. // History: created 6/99 aarayas
  440. //
  441. // Notes:
  442. //
  443. //----------------------------------------------------------------------------
  444. BOOL CTrie::Find(WCHAR* szWord, DWORD* pdwPOS)
  445. {
  446. // Declare and initialize all local variables.
  447. int i = 0;
  448. if (pTrieScan == NULL)
  449. return FALSE;
  450. pTrieScan->Reset();
  451. if (!pTrieScan->Down())
  452. return FALSE;
  453. while (TRUE)
  454. {
  455. pTrieScan->GetNode();
  456. if (pTrieScan->wc == szWord[i])
  457. {
  458. i++;
  459. if (pTrieScan->fWordEnd && szWord[i] == '\0')
  460. {
  461. *pdwPOS = pTrieScan->posTag;
  462. return TRUE;
  463. }
  464. else if (szWord[i] == '\0') break;
  465. // Move down the Trie Branch.
  466. else if (!pTrieScan->Down()) break;
  467. }
  468. // Move right of the Trie Branch
  469. else if (!pTrieScan->Right()) break;
  470. }
  471. *pdwPOS = POS_UNKNOWN;
  472. return FALSE;
  473. }