Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1032 lines
20 KiB

  1. // IndexRec.cpp
  2. //
  3. // final index record & lists
  4. //
  5. // Copyright 2001 Microsoft Corp.
  6. //
  7. // Modification History:
  8. // 19 MAR 2001 bhshin created
  9. #include "StdAfx.h"
  10. #include "KorWbrk.h"
  11. #include "IndexRec.h"
  12. #include "Morpho.h"
  13. // the number of records (in prgIndexRec) that we should allocate in a clump.
  14. // this is used whenever we need to re-alloc the array
  15. #define RECORD_CLUMP_SIZE 100
  16. //======================================================
  17. // CRecList
  18. //======================================================
  19. // CRecList::CRecList
  20. //
  21. // constructor
  22. //
  23. // Parameters:
  24. // (NONE)
  25. //
  26. // Result:
  27. // (NONE)
  28. //
  29. // 20MAR01 bhshin began
  30. CRecList::CRecList()
  31. {
  32. m_prgnRecID = NULL;
  33. m_nMaxRec = 0;
  34. m_nCurrRec = MIN_RECORD;
  35. }
  36. // CRecList::~CRecList
  37. //
  38. // destructor
  39. //
  40. // Parameters:
  41. // (NONE)
  42. //
  43. // Result:
  44. // (NONE)
  45. //
  46. // 20MAR01 bhshin began
  47. CRecList::~CRecList()
  48. {
  49. Uninitialize();
  50. }
  51. // CRecList::Initialize
  52. //
  53. // initialize CRecList
  54. //
  55. // Parameters:
  56. // (NONE)
  57. //
  58. // Result:
  59. // (BOOL) TRUE if succeed, otherwise return FALSE
  60. //
  61. // 20MAR01 bhshin began
  62. BOOL CRecList::Initialize(void)
  63. {
  64. m_nCurrRec = MIN_RECORD;
  65. // allocate new IndexRec
  66. if (m_prgnRecID == NULL)
  67. {
  68. m_nMaxRec = RECORD_CLUMP_SIZE;
  69. m_prgnRecID = (int*)malloc(m_nMaxRec * sizeof(int));
  70. if (m_prgnRecID == NULL)
  71. {
  72. m_nMaxRec = 0;
  73. return FALSE;
  74. }
  75. }
  76. return TRUE;
  77. }
  78. // CRecList::Uninitialize
  79. //
  80. // unitialize CRecList
  81. //
  82. // Parameters:
  83. // (NONE)
  84. //
  85. // Result:
  86. // (BOOL) TRUE if succeed, otherwise return FALSE
  87. //
  88. // 20MAR01 bhshin began
  89. void CRecList::Uninitialize(void)
  90. {
  91. // free IndexRec
  92. if (m_prgnRecID != NULL)
  93. {
  94. free(m_prgnRecID);
  95. m_prgnRecID = NULL;
  96. }
  97. m_nMaxRec = 0;
  98. m_nCurrRec = MIN_RECORD;
  99. }
  100. // CRecList::AddRec
  101. //
  102. // add record id
  103. //
  104. // Parameters:
  105. // nRecID -> (int) record id
  106. //
  107. // Result:
  108. // (BOOL) TRUE if succeed, otherwise return FALSE
  109. //
  110. // 20MAR01 bhshin began
  111. BOOL CRecList::AddRec(int nRecID)
  112. {
  113. int *prgnRecID;
  114. int nMaxRec;
  115. if (m_prgnRecID == NULL)
  116. return FALSE;
  117. // make sure if there is enough room for new record (maximum 3 records can be added)
  118. if (m_nMaxRec <= m_nCurrRec)
  119. {
  120. nMaxRec = m_nMaxRec + RECORD_CLUMP_SIZE;
  121. prgnRecID = (int*)realloc(m_prgnRecID, nMaxRec * sizeof(int));
  122. if (prgnRecID == NULL)
  123. return FALSE;
  124. m_nMaxRec = nMaxRec;
  125. m_prgnRecID = prgnRecID;
  126. }
  127. m_prgnRecID[m_nCurrRec] = nRecID;
  128. m_nCurrRec++;
  129. return TRUE;
  130. }
  131. // CRecList::operator =
  132. //
  133. // assign operator
  134. //
  135. // Parameters:
  136. // objRecList -> (CRecList&)
  137. //
  138. // Result:
  139. // (CRecList&)
  140. //
  141. // 20MAR01 bhshin began
  142. CRecList& CRecList::operator = (CRecList& objRecList)
  143. {
  144. int nRecord, nRecID;
  145. // re-initialize this
  146. Uninitialize();
  147. if (!Initialize())
  148. throw 0;
  149. for (nRecord = MIN_RECORD; nRecord < objRecList.m_nCurrRec; nRecord++)
  150. {
  151. nRecID = objRecList.m_prgnRecID[nRecord];
  152. if (!AddRec(nRecID))
  153. throw 0;
  154. }
  155. return *this;
  156. }
  157. // CRecList::operator +=
  158. //
  159. // unary sum operator
  160. //
  161. // Parameters:
  162. // objRecList -> (CRecList&)
  163. //
  164. // Result:
  165. // (CRecList&)
  166. //
  167. // 20MAR01 bhshin began
  168. CRecList& CRecList::operator += (CRecList& objRecList)
  169. {
  170. int nRecord, nRecID;
  171. for (nRecord = MIN_RECORD; nRecord < objRecList.m_nCurrRec; nRecord++)
  172. {
  173. nRecID = objRecList.m_prgnRecID[nRecord];
  174. if (!AddRec(nRecID))
  175. throw 0;
  176. }
  177. return *this;
  178. }
  179. //======================================================
  180. // CIndexInfo
  181. //======================================================
  182. // CIndexInfo::CIndexInfo
  183. //
  184. // default constructor of CIndexRec
  185. //
  186. // Parameters:
  187. // (NONE)
  188. //
  189. // Result:
  190. // (NONE)
  191. //
  192. // 19MAR01 bhshin began
  193. CIndexInfo::CIndexInfo()
  194. {
  195. m_prgIndexRec = NULL;
  196. m_nMaxRec = 0;
  197. m_nCurrRec = MIN_RECORD;
  198. m_cchTextProcessed = 0;
  199. m_cwcSrcPos = 0;
  200. m_pWordSink = NULL;
  201. m_pPhraseSink = NULL;
  202. m_wzRomaji[0] = L'\0';
  203. m_cchRomaji = 0;
  204. m_cchPrefix = 0;
  205. m_fAddRomaji = FALSE;
  206. m_nFinalHead = 0;
  207. }
  208. // CIndexInfo::~CIndexInfo
  209. //
  210. // destructor of CIndexRec
  211. //
  212. // Parameters:
  213. // (NONE)
  214. //
  215. // Result:
  216. // (NONE)
  217. //
  218. // 19MAR01 bhshin began
  219. CIndexInfo::~CIndexInfo()
  220. {
  221. Uninitialize();
  222. }
  223. // CIndexInfo::IsExistIndex
  224. //
  225. // check if index term already exist.
  226. //
  227. // Parameters:
  228. // pwzIndex -> (const WCHAR*) index string
  229. //
  230. // Result:
  231. // (BOOL)
  232. //
  233. // 19MAR01 bhshin began
  234. BOOL CIndexInfo::IsExistIndex(const WCHAR *pwzIndex)
  235. {
  236. for (int i = MIN_RECORD; i < m_nCurrRec; i++)
  237. {
  238. // found duplicate index term
  239. if (wcscmp(m_prgIndexRec[i].wzIndex, pwzIndex) == 0)
  240. return TRUE;
  241. }
  242. return FALSE;
  243. }
  244. // CIndexInfo::SetRomajiInfo
  245. //
  246. // make final index list to put word
  247. //
  248. // Parameters:
  249. // pwzRomaji -> (WCHAR*) leading romaji string
  250. // cchRomaji -> (int) length of romaji string
  251. // cchPrefix -> (int) length of prefix (ex, http://)
  252. //
  253. // Result:
  254. // (BOOL) TRUE if succeed, otherwise return FALSE
  255. //
  256. // 19MAR01 bhshin began
  257. BOOL CIndexInfo::SetRomajiInfo(WCHAR *pwzRomaji, int cchRomaji, int cchPrefix)
  258. {
  259. if (pwzRomaji == NULL || cchRomaji > MAX_INDEX_STRING)
  260. {
  261. m_wzRomaji[0] = L'\0';
  262. m_cchRomaji = 0;
  263. m_cchPrefix = 0;
  264. return FALSE;
  265. }
  266. wcsncpy(m_wzRomaji, pwzRomaji, cchRomaji);
  267. m_wzRomaji[cchRomaji] = L'\0';
  268. m_cchRomaji = cchRomaji;
  269. m_cchPrefix = cchPrefix;
  270. return TRUE;
  271. }
  272. // CIndexInfo::Initialize
  273. //
  274. // initialize all the members of CIndexRec
  275. //
  276. // Parameters:
  277. // cchTextProcessed -> (int) length of text processed
  278. // cwcSrcPos -> (int) position value of source string
  279. // pWordSink -> (IWordSink) IWordSink for PutWord/PutAltWord
  280. // pPhraseSink -> (IPhraseSink) IPhraseSink for PutWord/PutAltWord
  281. //
  282. // Result:
  283. // (BOOL) TRUE if it succeeds to initialize
  284. //
  285. // 19MAR01 bhshin began
  286. BOOL CIndexInfo::Initialize(int cchTextProcessed, int cwcSrcPos, IWordSink *pWordSink, IPhraseSink *pPhraseSink)
  287. {
  288. // parameter validations
  289. if (cchTextProcessed <= 0 || cwcSrcPos < 0)
  290. return FALSE;
  291. if (pWordSink == NULL)
  292. return FALSE;
  293. // allocate new IndexRec
  294. if (m_prgIndexRec == NULL)
  295. {
  296. m_nMaxRec = RECORD_CLUMP_SIZE;
  297. m_prgIndexRec = (INDEX_REC*)malloc(m_nMaxRec * sizeof(INDEX_REC));
  298. if (m_prgIndexRec == NULL)
  299. {
  300. m_nMaxRec = 0;
  301. return FALSE;
  302. }
  303. }
  304. m_cchTextProcessed = cchTextProcessed;
  305. m_cwcSrcPos = cwcSrcPos;
  306. m_pWordSink = pWordSink;
  307. m_pPhraseSink = pPhraseSink;
  308. return TRUE;
  309. }
  310. // CIndexInfo::Uninitialize
  311. //
  312. // initialize all the members of CIndexRec
  313. //
  314. // Parameters:
  315. // (NONE)
  316. //
  317. // Result:
  318. // (NONE)
  319. //
  320. // 19MAR01 bhshin began
  321. void CIndexInfo::Uninitialize()
  322. {
  323. // free IndexRec
  324. if (m_prgIndexRec != NULL)
  325. {
  326. free(m_prgIndexRec);
  327. m_prgIndexRec = NULL;
  328. }
  329. m_nMaxRec = 0;
  330. m_nCurrRec = 0;
  331. m_cchTextProcessed = 0;
  332. m_cwcSrcPos = 0;
  333. m_pWordSink = NULL;
  334. m_pPhraseSink = NULL;
  335. m_wzRomaji[0] = L'\0';
  336. m_cchRomaji = 0;
  337. m_cchPrefix = 0;
  338. m_fAddRomaji = FALSE;
  339. m_nFinalHead = 0;
  340. }
  341. // CIndexInfo::AddIndex
  342. //
  343. // add index term information
  344. //
  345. // Parameters:
  346. // pwzIndex -> (const WCHAR*) index string
  347. // cchIndex -> (int) length of index string
  348. // nFT -> (int) first position of original input
  349. // nLT -> (int) last position of original input
  350. // fWeight -> (float) weight value of index record
  351. //
  352. // Result:
  353. // (BOOL)
  354. //
  355. // 19MAR01 bhshin began
  356. BOOL CIndexInfo::AddIndex(const WCHAR *pwzIndex, int cchIndex, float fWeight, int nFT, int nLT)
  357. {
  358. WCHAR wzIndex[MAX_INDEX_STRING+1];
  359. int nMaxRec, nNewRec;
  360. INDEX_REC *prgIndexRec;
  361. int nLTAdd;
  362. // parameter validation
  363. if (pwzIndex == 0 || cchIndex <= 0)
  364. return FALSE;
  365. if (nFT < 0 || nLT < 0 || fWeight < 0)
  366. return FALSE;
  367. if ((m_cchRomaji + cchIndex) > MAX_INDEX_STRING)
  368. return FALSE;
  369. // make sure if there is enough room for new record (maximum 3 records can be added)
  370. if (m_nMaxRec <= m_nCurrRec + 3)
  371. {
  372. nMaxRec = m_nMaxRec + RECORD_CLUMP_SIZE;
  373. prgIndexRec = (INDEX_REC*)realloc(m_prgIndexRec, nMaxRec * sizeof(INDEX_REC));
  374. if (prgIndexRec == NULL)
  375. return FALSE;
  376. m_nMaxRec = nMaxRec;
  377. m_prgIndexRec = prgIndexRec;
  378. }
  379. // set up index string and correct LT value
  380. wcsncpy(wzIndex, pwzIndex, cchIndex);
  381. wzIndex[cchIndex] = L'\0';
  382. nLTAdd = nLT;
  383. if (nLT >= 0 && m_cchRomaji > 0)
  384. nLTAdd += m_cchRomaji;
  385. // if added record is leading one and there is just length one romaji,
  386. // then conjoin leading romaji & leading index string, and add merged term
  387. if (nFT == 0 && m_cchRomaji == 1)
  388. {
  389. WCHAR wzMerge[MAX_INDEX_STRING+1];
  390. wcscpy(wzMerge, m_wzRomaji);
  391. wcscat(wzMerge, wzIndex);
  392. if (!IsExistIndex(wzMerge))
  393. {
  394. // add index term
  395. nNewRec = m_nCurrRec;
  396. m_nCurrRec++;
  397. wcscpy(m_prgIndexRec[nNewRec].wzIndex, wzMerge);
  398. m_prgIndexRec[nNewRec].cchIndex = cchIndex + m_cchRomaji;
  399. m_prgIndexRec[nNewRec].nFT = nFT;
  400. m_prgIndexRec[nNewRec].nLT = nLTAdd;
  401. m_prgIndexRec[nNewRec].fWeight = fWeight;
  402. m_prgIndexRec[nNewRec].nNext = 0;
  403. WB_LOG_ADD_INDEX(wzMerge, cchIndex + m_cchRomaji, INDEX_SYMBOL);
  404. ATLASSERT(m_prgIndexRec[nNewRec].nFT <= m_prgIndexRec[nNewRec].nLT);
  405. }
  406. // add index term removing prefix
  407. if (m_cchPrefix > 0)
  408. {
  409. // add index term
  410. if (!IsExistIndex(wzMerge + m_cchPrefix))
  411. {
  412. nNewRec = m_nCurrRec;
  413. m_nCurrRec++;
  414. wcscpy(m_prgIndexRec[nNewRec].wzIndex, wzMerge + m_cchPrefix);
  415. m_prgIndexRec[nNewRec].cchIndex = cchIndex + m_cchRomaji - m_cchPrefix;
  416. m_prgIndexRec[nNewRec].nFT = nFT + m_cchPrefix;
  417. m_prgIndexRec[nNewRec].nLT = nLTAdd;
  418. m_prgIndexRec[nNewRec].fWeight = fWeight;
  419. m_prgIndexRec[nNewRec].nNext = 0;
  420. WB_LOG_ADD_INDEX(wzMerge + m_cchPrefix, cchIndex + m_cchRomaji - m_cchPrefix, INDEX_SYMBOL);
  421. ATLASSERT(m_prgIndexRec[nNewRec].nFT <= m_prgIndexRec[nNewRec].nLT);
  422. }
  423. }
  424. }
  425. else
  426. {
  427. if (!IsExistIndex(wzIndex))
  428. {
  429. // add index term
  430. nNewRec = m_nCurrRec;
  431. m_nCurrRec++;
  432. wcscpy(m_prgIndexRec[nNewRec].wzIndex, wzIndex);
  433. m_prgIndexRec[nNewRec].cchIndex = cchIndex;
  434. m_prgIndexRec[nNewRec].nFT = nFT + m_cchRomaji;
  435. m_prgIndexRec[nNewRec].nLT = nLTAdd;
  436. m_prgIndexRec[nNewRec].fWeight = fWeight;
  437. m_prgIndexRec[nNewRec].nNext = 0;
  438. ATLASSERT(m_prgIndexRec[nNewRec].nFT <= m_prgIndexRec[nNewRec].nLT);
  439. }
  440. // if there is a romaji and it has not added yet, then add it just one time
  441. if (m_cchRomaji > 1 && m_fAddRomaji == FALSE)
  442. {
  443. if (!IsExistIndex(m_wzRomaji))
  444. {
  445. // add index term
  446. nNewRec = m_nCurrRec;
  447. m_nCurrRec++;
  448. wcscpy(m_prgIndexRec[nNewRec].wzIndex, m_wzRomaji);
  449. m_prgIndexRec[nNewRec].cchIndex = m_cchRomaji;
  450. m_prgIndexRec[nNewRec].nFT = 0;
  451. m_prgIndexRec[nNewRec].nLT = m_cchRomaji - 1;
  452. m_prgIndexRec[nNewRec].fWeight = WEIGHT_HARD_MATCH;
  453. m_prgIndexRec[nNewRec].nNext = 0;
  454. WB_LOG_ADD_INDEX(m_wzRomaji, m_cchRomaji, INDEX_SYMBOL);
  455. ATLASSERT(m_prgIndexRec[nNewRec].nFT <= m_prgIndexRec[nNewRec].nLT);
  456. }
  457. // if there is a prefix, then add index term removing the prefix
  458. if (m_cchPrefix > 0)
  459. {
  460. if (!IsExistIndex(m_wzRomaji + m_cchPrefix))
  461. {
  462. // add index term
  463. nNewRec = m_nCurrRec;
  464. m_nCurrRec++;
  465. wcscpy(m_prgIndexRec[nNewRec].wzIndex, m_wzRomaji + m_cchPrefix);
  466. m_prgIndexRec[nNewRec].cchIndex = m_cchRomaji - m_cchPrefix;
  467. m_prgIndexRec[nNewRec].nFT = m_cchPrefix;
  468. m_prgIndexRec[nNewRec].nLT = m_cchRomaji-m_cchPrefix-1;
  469. m_prgIndexRec[nNewRec].fWeight = WEIGHT_HARD_MATCH;
  470. m_prgIndexRec[nNewRec].nNext = 0;
  471. WB_LOG_ADD_INDEX(m_wzRomaji + m_cchPrefix, m_cchRomaji - m_cchPrefix, INDEX_SYMBOL);
  472. ATLASSERT(m_prgIndexRec[nNewRec].nFT <= m_prgIndexRec[nNewRec].nLT);
  473. }
  474. }
  475. m_fAddRomaji = TRUE;
  476. }
  477. }
  478. return TRUE;
  479. }
  480. // CIndexInfo::FindAndMergeIndexTerm
  481. //
  482. // find index term matching FT, LT
  483. //
  484. // Parameters:
  485. // pIndexSrc -> (INDEX_REC *) index term to merge
  486. // nFT -> (int) FT position, -1 means don't care
  487. // nLT -> (int) LT position, -1 means don't care
  488. //
  489. // Result:
  490. // (BOOL) TRUE if succeed, otherwise return FALSE
  491. //
  492. // 19MAR01 bhshin began
  493. BOOL CIndexInfo::FindAndMergeIndexTerm(INDEX_REC *pIndexSrc, int nFT, int nLT)
  494. {
  495. INDEX_REC *pIndexRec;
  496. WCHAR wchIndex;
  497. int cchIndex;
  498. int nFTAdd, nLTAdd;
  499. int nNewRec;
  500. WCHAR wzIndex[MAX_INDEX_STRING+1];
  501. BOOL fFound = FALSE;
  502. if (pIndexSrc == NULL)
  503. return FALSE;
  504. if (nFT < 0 && nLT < 0)
  505. return FALSE;
  506. for (int i = MIN_RECORD; i < m_nCurrRec; i++)
  507. {
  508. pIndexRec = &m_prgIndexRec[i];
  509. if (pIndexRec->cchIndex == 0)
  510. continue;
  511. if (nFT != -1 && pIndexRec->nFT != nFT)
  512. continue;
  513. if (nLT != -1 && pIndexRec->nLT != nLT)
  514. continue;
  515. // found it
  516. // check [��,��] suffix case, then don't merge and just add itself
  517. if (pIndexRec->nFT > 0 && pIndexRec->cchIndex == 1)
  518. {
  519. wchIndex = pIndexRec->wzIndex[0];
  520. if (wchIndex == 0xB4E4 || wchIndex == 0xBFD0)
  521. continue;
  522. }
  523. // check buffer size
  524. cchIndex = wcslen(pIndexRec->wzIndex);
  525. if (cchIndex == 0 || cchIndex + 1 >= MAX_INDEX_STRING)
  526. continue;
  527. if (pIndexSrc->nFT == 0)
  528. {
  529. wcscpy(wzIndex, pIndexSrc->wzIndex);
  530. wcscat(wzIndex, pIndexRec->wzIndex);
  531. nFTAdd = pIndexSrc->nFT;
  532. nLTAdd = pIndexRec->nLT;
  533. }
  534. else
  535. {
  536. wcscpy(wzIndex, pIndexRec->wzIndex);
  537. wcscat(wzIndex, pIndexSrc->wzIndex);
  538. nFTAdd = pIndexRec->nFT;
  539. nLTAdd = pIndexSrc->nLT;
  540. }
  541. fFound = TRUE;
  542. // check it dupliate index exist
  543. if (!IsExistIndex(wzIndex))
  544. {
  545. WB_LOG_ADD_INDEX(wzIndex, cchIndex+1, INDEX_PARSE);
  546. // add merged one
  547. nNewRec = m_nCurrRec;
  548. m_nCurrRec++;
  549. wcscpy(m_prgIndexRec[nNewRec].wzIndex, wzIndex);
  550. m_prgIndexRec[nNewRec].cchIndex = cchIndex+1;
  551. m_prgIndexRec[nNewRec].nFT = nFTAdd;
  552. m_prgIndexRec[nNewRec].nLT = nLTAdd;
  553. m_prgIndexRec[nNewRec].fWeight = pIndexSrc->fWeight;
  554. m_prgIndexRec[nNewRec].nNext = 0;
  555. }
  556. }
  557. return fFound;
  558. }
  559. // CIndexInfo::MakeSingleLengthMergedIndex
  560. //
  561. // make single length merged index term (MSN search)
  562. //
  563. // Parameters:
  564. //
  565. // Result:
  566. // (BOOL) TRUE if succeed, otherwise return FALSE
  567. //
  568. // 19MAR01 bhshin began
  569. BOOL CIndexInfo::MakeSingleLengthMergedIndex()
  570. {
  571. INDEX_REC *pIndexRec;
  572. int nFT;
  573. WCHAR wchIndex;
  574. BOOL fFound;
  575. if (m_pWordSink == NULL)
  576. return FALSE;
  577. WB_LOG_ROOT_INDEX(L"", TRUE); // set root empty
  578. for (int i = MIN_RECORD; i < m_nCurrRec; i++)
  579. {
  580. pIndexRec = &m_prgIndexRec[i];
  581. if (pIndexRec->cchIndex == 1)
  582. {
  583. WB_LOG_REMOVE_INDEX(pIndexRec->wzIndex);
  584. nFT = pIndexRec->nFT;
  585. wchIndex = pIndexRec->wzIndex[0];
  586. // check [��,��] suffix case, then just remove it
  587. if ((wchIndex == 0xB4E4 || wchIndex == 0xBFD0) && nFT > 0)
  588. {
  589. // make it empty
  590. pIndexRec->cchIndex = 0;
  591. pIndexRec->wzIndex[0] = L'\0';
  592. pIndexRec->nFT = 0;
  593. pIndexRec->nLT = 0;
  594. pIndexRec->nNext = 0;
  595. continue;
  596. }
  597. // find conjoined term and make merged term and put it
  598. fFound = FALSE;
  599. if (nFT == 0 && pIndexRec->nLT != -1)
  600. fFound = FindAndMergeIndexTerm(pIndexRec, pIndexRec->nLT + 1, -1);
  601. else
  602. fFound = FindAndMergeIndexTerm(pIndexRec, -1, nFT-1);
  603. if (fFound)
  604. {
  605. // make it empty
  606. pIndexRec->cchIndex = 0;
  607. pIndexRec->wzIndex[0] = L'\0';
  608. pIndexRec->nFT = 0;
  609. pIndexRec->nLT = 0;
  610. pIndexRec->nNext = 0;
  611. continue;
  612. }
  613. }
  614. }
  615. return TRUE;
  616. }
  617. // CIndexInfo::InsertFinalIndex
  618. //
  619. // search index term starting with given FT and insert it to final list
  620. //
  621. // Parameters:
  622. // nFT -> (int) first pos of index term
  623. //
  624. // Result:
  625. // (BOOL) TRUE if succeed, otherwise return FALSE
  626. //
  627. // 19MAR01 bhshin began
  628. BOOL CIndexInfo::InsertFinalIndex(int nFT)
  629. {
  630. INDEX_REC *pIndexRec;
  631. int cchIndex, nCurr, nPrev;
  632. BOOL fInsert;
  633. for (int nRecord = MIN_RECORD; nRecord < m_nCurrRec; nRecord++)
  634. {
  635. pIndexRec = &m_prgIndexRec[nRecord];
  636. cchIndex = pIndexRec->cchIndex;
  637. if (cchIndex == 0)
  638. continue; // skip removed entry
  639. if (pIndexRec->nFT != nFT)
  640. continue; // FT match index found
  641. // search inserting position. final list ordered by increamental length.
  642. nCurr = m_nFinalHead;
  643. nPrev = -1;
  644. fInsert = FALSE;
  645. while (!fInsert)
  646. {
  647. if (nCurr != 0)
  648. {
  649. if (m_prgIndexRec[nCurr].nFT != nFT || cchIndex > m_prgIndexRec[nCurr].cchIndex)
  650. {
  651. nPrev = nCurr;
  652. nCurr = m_prgIndexRec[nCurr].nNext;
  653. continue;
  654. }
  655. }
  656. // insert it
  657. if (nPrev == -1)
  658. {
  659. pIndexRec->nNext = m_nFinalHead;
  660. m_nFinalHead = nRecord;
  661. }
  662. else
  663. {
  664. pIndexRec->nNext = m_prgIndexRec[nPrev].nNext;
  665. m_prgIndexRec[nPrev].nNext = nRecord;
  666. }
  667. fInsert = TRUE;
  668. }
  669. }
  670. return TRUE;
  671. }
  672. // CIndexInfo::PutFinalIndexList
  673. //
  674. // put word final index list (index time)
  675. //
  676. // Parameters:
  677. // lpcwzSrc -> (LPCWSTR) source string to get source pos
  678. //
  679. // Result:
  680. // (BOOL) TRUE if succeed, otherwise return FALSE
  681. //
  682. // 19MAR01 bhshin began
  683. BOOL CIndexInfo::PutFinalIndexList(LPCWSTR lpcwzSrc)
  684. {
  685. int nCurr, nNext;
  686. int nNextFT;
  687. WCHAR *pwzFind;
  688. int cchProcessed, cwcSrcPos;
  689. INDEX_REC *pIndexRec;
  690. if (m_pWordSink == NULL)
  691. return FALSE;
  692. // fill final index list
  693. for (int i = 0; i < m_cchTextProcessed; i++)
  694. {
  695. InsertFinalIndex(i);
  696. }
  697. // put final index list
  698. nCurr = m_nFinalHead;
  699. while (nCurr != 0)
  700. {
  701. ATLASSERT(nCurr < m_nCurrRec);
  702. pIndexRec = &m_prgIndexRec[nCurr];
  703. // skip removed record
  704. if (pIndexRec->cchIndex == 0)
  705. continue;
  706. // check if index term has substring or not
  707. pwzFind = wcsstr(lpcwzSrc, pIndexRec->wzIndex);
  708. if (pwzFind == NULL)
  709. continue;
  710. cwcSrcPos = m_cwcSrcPos + (int)(pwzFind - lpcwzSrc);
  711. cchProcessed = m_cchTextProcessed - (int)(pwzFind - lpcwzSrc);
  712. // get next FT
  713. nNext = pIndexRec->nNext;
  714. if (nNext == 0)
  715. nNextFT = -1;
  716. else
  717. nNextFT = m_prgIndexRec[nNext].nFT;
  718. if (pIndexRec->nFT != nNextFT)
  719. {
  720. m_pWordSink->PutWord(pIndexRec->cchIndex, pIndexRec->wzIndex,
  721. pIndexRec->cchIndex, cwcSrcPos);
  722. }
  723. else
  724. {
  725. m_pWordSink->PutAltWord(pIndexRec->cchIndex, pIndexRec->wzIndex,
  726. pIndexRec->cchIndex, cwcSrcPos);
  727. }
  728. nCurr = pIndexRec->nNext;
  729. }
  730. return TRUE;
  731. }
  732. // CIndexInfo::MakeSeqIndexList
  733. //
  734. // make final sequence index list
  735. //
  736. // Parameters:
  737. // nFT -> (int) matching FT pos
  738. // plistFinal -> (CRecList*) previous sequence list
  739. //
  740. // Result:
  741. // (BOOL) TRUE if succeed, otherwise return FALSE
  742. //
  743. // 20MAR01 bhshin began
  744. BOOL CIndexInfo::MakeSeqIndexList(int nFT/*=0*/, CRecList *plistFinal/*=NULL*/)
  745. {
  746. int nRecord;
  747. INDEX_REC *pIndexRec;
  748. BOOL fFound = FALSE;
  749. for (nRecord = MIN_RECORD; nRecord < m_nCurrRec; nRecord++)
  750. {
  751. CRecList listTemp;
  752. pIndexRec = &m_prgIndexRec[nRecord];
  753. // skip removed entry & skip not FT matching entry
  754. if (pIndexRec->cchIndex != 0 && pIndexRec->nFT == nFT)
  755. {
  756. fFound = TRUE;
  757. try
  758. {
  759. if (pIndexRec->nLT >= m_cchTextProcessed-1)
  760. {
  761. if (plistFinal == NULL)
  762. {
  763. m_FinalRecList.AddRec(nRecord);
  764. }
  765. else
  766. {
  767. listTemp = *plistFinal;
  768. if (!listTemp.AddRec(nRecord))
  769. return FALSE;
  770. m_FinalRecList += listTemp;
  771. }
  772. }
  773. else
  774. {
  775. if (plistFinal == NULL)
  776. {
  777. if (!listTemp.Initialize())
  778. return FALSE;
  779. }
  780. else
  781. {
  782. listTemp = *plistFinal;
  783. }
  784. if (!listTemp.AddRec(nRecord))
  785. return FALSE;
  786. if (!MakeSeqIndexList(pIndexRec->nLT + 1, &listTemp))
  787. return FALSE;
  788. }
  789. }
  790. catch (...)
  791. {
  792. return FALSE;
  793. }
  794. }
  795. }
  796. if (!fFound && plistFinal != NULL)
  797. {
  798. try
  799. {
  800. m_FinalRecList += *plistFinal;
  801. }
  802. catch(...)
  803. {
  804. return FALSE;
  805. }
  806. }
  807. return TRUE;
  808. }
  809. // CIndexInfo::PutQueryIndexList
  810. //
  811. // call IWordSink::PutWord with collected index terms for Query time
  812. //
  813. // Parameters:
  814. //
  815. // Result:
  816. // (BOOL) TRUE if succeed, otherwise return FALSE
  817. //
  818. // 20MAR01 bhshin began
  819. BOOL CIndexInfo::PutQueryIndexList()
  820. {
  821. int nRecordID;
  822. INDEX_REC *pIndexRec;
  823. WCHAR *pwzIndex;
  824. int cchIndex;
  825. WCHAR wzIndex[MAX_INDEX_STRING+1];
  826. if (m_pWordSink == NULL)
  827. return FALSE;
  828. if (!m_FinalRecList.Initialize())
  829. return FALSE;
  830. if (!MakeSeqIndexList())
  831. return FALSE;
  832. // put query index terms
  833. for (int i = MIN_RECORD; i < m_FinalRecList.m_nCurrRec; i++)
  834. {
  835. nRecordID = m_FinalRecList.m_prgnRecID[i];
  836. if (nRecordID < MIN_RECORD || nRecordID >= m_nCurrRec)
  837. return FALSE; // invalid record id
  838. pIndexRec = &m_prgIndexRec[nRecordID];
  839. if (pIndexRec->nFT == 0 && m_nCurrRec > MIN_RECORD+1)
  840. m_pWordSink->StartAltPhrase();
  841. cchIndex = 0;
  842. pwzIndex = pIndexRec->wzIndex;
  843. while (*pwzIndex != L'\0')
  844. {
  845. if (*pwzIndex == L'\t')
  846. {
  847. if (cchIndex > 0)
  848. {
  849. wzIndex[cchIndex] = L'\0';
  850. m_pWordSink->PutWord(cchIndex, wzIndex,
  851. m_cchTextProcessed, m_cwcSrcPos);
  852. cchIndex = 0;
  853. }
  854. }
  855. else
  856. {
  857. wzIndex[cchIndex++] = *pwzIndex;
  858. }
  859. pwzIndex++;
  860. }
  861. if (cchIndex > 0)
  862. {
  863. wzIndex[cchIndex] = L'\0';
  864. m_pWordSink->PutWord(cchIndex, wzIndex,
  865. m_cchTextProcessed, m_cwcSrcPos);
  866. }
  867. }
  868. if (m_nCurrRec > MIN_RECORD+1)
  869. m_pWordSink->EndAltPhrase();
  870. return TRUE;
  871. }