Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2338 lines
66 KiB

4 years ago
  1. // Tokens.cpp -- Created 2/9/93 by Ron Murray
  2. // Implementation for the CTokenList class
  3. #include "stdafx.h"
  4. #include "Tokens.h"
  5. #include "MemEx.h"
  6. #include "TxDBase.h"
  7. #include <malloc.h>
  8. #include "ftslex.h"
  9. #include "AbrtSrch.h"
  10. #define INC_LAST_CHAR_SIZE 0x10000L
  11. #define WORKBUF_SIZE 0x200
  12. #define BASE_WEIGHT 0x02 // basic character weight (no case or no diacritic)
  13. UINT SortKeyText(PWCHAR pwText, UINT cwText, PWCHAR pwOut, UINT cwOut);
  14. BOOL AllLowerCase(PWCHAR pwText, UINT cwText);
  15. /////////////////////////////////////////////////////////////////////////////
  16. // Worker functions
  17. BOOL HasAPrefix(PWCHAR pwL, UINT cwL, PWCHAR pwR, UINT cwR)
  18. {
  19. if (cwL > cwR) return FALSE;
  20. for (cwL >>= 1; cwL--; pwL++, pwR++)
  21. {
  22. if (*pwL++ != *pwR++)
  23. return FALSE;
  24. if (HIBYTE(*pwL) > BASE_WEIGHT && HIBYTE(*pwL) != HIBYTE(*pwR))
  25. return FALSE;
  26. if (LOBYTE(*pwL) > BASE_WEIGHT && LOBYTE(*pwL) != LOBYTE(*pwR))
  27. return FALSE;
  28. }
  29. return TRUE;
  30. }
  31. BOOL HasASuffix(PWCHAR pwL, UINT cwL, PWCHAR pwR, UINT cwR)
  32. {
  33. if (cwL > cwR) return FALSE;
  34. pwR += cwR - cwL;
  35. for (cwL >>= 1; cwL--; pwL++, pwR++)
  36. {
  37. if (*pwL++ != *pwR++)
  38. return FALSE;
  39. if (HIBYTE(*pwL) > BASE_WEIGHT && HIBYTE(*pwL) != HIBYTE(*pwR))
  40. return FALSE;
  41. if (LOBYTE(*pwL) > BASE_WEIGHT && LOBYTE(*pwL) != LOBYTE(*pwR))
  42. return FALSE;
  43. }
  44. return TRUE;
  45. }
  46. BOOL HasASubstring(PWCHAR pwL, UINT cwL, PWCHAR pwR, UINT cwR)
  47. {
  48. if (cwL > cwR)
  49. return FALSE;
  50. UINT cwDelta = 1 + cwR - cwL;
  51. while (cwDelta--)
  52. if (HasAPrefix(pwL, cwL, pwR++, cwR--))
  53. return TRUE;
  54. return FALSE;
  55. }
  56. // End of Worker functions
  57. /////////////////////////////////////////////////////////////////////////////
  58. #ifdef _DEBUG
  59. CTokenList::CTokenList(BOOL fFromFile, PSZ pszTypeName) : CTextMatrix WithType(pszTypeName)
  60. #else // _DEBUG
  61. CTokenList::CTokenList(BOOL fFromFile) : CTextMatrix()
  62. #endif // _DEBUG
  63. {
  64. m_fFromFileImage = fFromFile;
  65. m_How_Constructed = From_Nothing;
  66. m_cbMaxLength = 0;
  67. m_pbImages = NULL;
  68. m_pwDispImages = NULL;
  69. m_cbImages = 0;
  70. m_cwDispImages = 0;
  71. m_pd = NULL;
  72. m_cd = 0;
  73. m_lcidSorting = LCID(-1);
  74. m_ppdSorted = NULL;
  75. m_ppdTailSorted = NULL;
  76. m_ptdb = NULL;
  77. m_ptklSource = NULL;
  78. m_pafClassifications = NULL;
  79. m_fLeadingElipsis = FALSE;
  80. m_fTrailingElipsis = FALSE;
  81. m_pLRRanking = NULL;
  82. m_pRLRanking = NULL;
  83. }
  84. CTokenList *CTokenList::NewTokenList(PWCHAR pwcDisplay, UINT cwcDisplay,
  85. PDESCRIPTOR pd, int cd, LCID lcid,
  86. PWCHAR pwSortKeys, UINT cwSortKeys,
  87. PDESCRIPTOR *papdLRSorting,
  88. PDESCRIPTOR *papdRLSorting
  89. )
  90. {
  91. CTokenList *ptl= NULL;
  92. __try
  93. {
  94. ptl= New CTokenList(FALSE);
  95. ptl->InitialTokenList(pwcDisplay, cwcDisplay, pd, cd, lcid,
  96. pwSortKeys, cwSortKeys,
  97. papdLRSorting, papdRLSorting
  98. );
  99. }
  100. __finally
  101. {
  102. if (_abnormal_termination() && ptl)
  103. {
  104. delete ptl; ptl= NULL;
  105. }
  106. }
  107. return ptl;
  108. }
  109. void CTokenList::InitialTokenList(PWCHAR pwcDisplay, UINT cwcDisplay,
  110. PDESCRIPTOR pd, int cd, LCID lcid,
  111. PWCHAR pwSortKeys, UINT cwSortKeys,
  112. PDESCRIPTOR *papdLRSorting,
  113. PDESCRIPTOR *papdRLSorting
  114. )
  115. {
  116. // We don't need __try/__finally brackets here because all allocations
  117. // are bound to our token list structure. We assume our caller has
  118. // a __try/__finally bracket which will delete this object in the event
  119. // of an unhandled exception.
  120. m_How_Constructed = From_Images;
  121. m_fLeadingElipsis = FALSE;
  122. m_fTrailingElipsis = FALSE;
  123. m_pLRRanking = NULL;
  124. m_pRLRanking = NULL;
  125. m_ptdb = NULL;
  126. m_cd = cd;
  127. m_cwDispImages = cwcDisplay;
  128. m_cbImages = cwSortKeys;
  129. m_lcidSorting = lcid;
  130. m_pd = pd; pd = NULL;
  131. m_pwDispImages = pwcDisplay; pwcDisplay = NULL;
  132. m_pbImages = pwSortKeys; pwSortKeys = NULL;
  133. m_ppdSorted = papdLRSorting; papdLRSorting = NULL;
  134. m_ppdTailSorted = papdRLSorting; papdRLSorting = NULL;
  135. CompleteTokenList();
  136. }
  137. void CTokenList::CompleteTokenList(BOOL fIgnoreSortKeys)
  138. {
  139. LCID lcidUser = GetUserDefaultLCID();
  140. if ((lcidUser & 0x0FF) != (m_lcidSorting & 0x0FF))
  141. {
  142. fIgnoreSortKeys= FALSE;
  143. if (m_pbImages)
  144. {
  145. VFree(m_pbImages); m_pbImages = NULL; m_cbImages = 0;
  146. }
  147. if (m_ppdSorted)
  148. {
  149. VFree(m_ppdSorted); m_ppdSorted = NULL;
  150. }
  151. if (m_ppdTailSorted)
  152. {
  153. VFree(m_ppdTailSorted); m_ppdTailSorted = NULL;
  154. }
  155. m_lcidSorting= lcidUser;
  156. }
  157. if (!m_pbImages && !fIgnoreSortKeys) ConstructSortKeys(m_lcidSorting);
  158. m_clsf.Initial();
  159. m_clsf.ScanAndRankData(m_pbImages, m_cbImages);
  160. ASSERT(m_ppdSorted || m_pbImages);
  161. if (!m_ppdSorted)
  162. {
  163. m_ppdSorted = (PDESCRIPTOR *) VAlloc(FALSE, m_cd * sizeof(PDESCRIPTOR *));
  164. PDESCRIPTOR *ppd = m_ppdSorted;
  165. PDESCRIPTOR pd = m_pd;
  166. UINT cd = m_cd;
  167. for (; cd--; ) *ppd++ = pd++;
  168. qsort(m_ppdSorted, m_cd, sizeof(PDESCRIPTOR *), CompareImagesLR);
  169. }
  170. m_pafClassifications= (PUINT) VAlloc(FALSE, m_cd * sizeof(UINT));
  171. PUINT pf= m_pafClassifications;
  172. PDESCRIPTOR pdNext, *ppdNext;
  173. UINT c;
  174. m_cbMaxLength= 0; // This will be computed on demand...
  175. for (c= m_cd, ppdNext= m_ppdSorted; c--; )
  176. {
  177. pdNext= *ppdNext++;
  178. INT cwDisplayImage= CwDisplay(pdNext);
  179. ASSERT(cwDisplayImage >= 0);
  180. if (cwDisplayImage > m_cwDispMaxLength)
  181. m_cwDispMaxLength= cwDisplayImage;
  182. *pf++ = m_clsf.ClassifyData(pdNext->pbImage, CbImage(pdNext));
  183. }
  184. }
  185. CTokenList *CTokenList::NewTokenList(CTextDatabase *ptdb)
  186. {
  187. CTokenList *ptl= NULL;
  188. __try
  189. {
  190. ptl= New CTokenList(FALSE);
  191. ptl->InitialTokenList(ptdb);
  192. }
  193. __finally
  194. {
  195. if (_abnormal_termination() && ptl)
  196. {
  197. delete ptl; ptl= NULL;
  198. }
  199. }
  200. return ptl;
  201. }
  202. void CTokenList::InitialTokenList(CTextDatabase *ptdb)
  203. {
  204. m_fFromFileImage = FALSE;
  205. m_How_Constructed = TDB_FULL_REF;
  206. ASSERT(ptdb);
  207. m_ptdb= NULL; AttachRef(m_ptdb, ptdb);
  208. m_pbImages = ptdb->ImageBase();
  209. m_pd = ptdb->DescriptorBase();
  210. m_cd = ptdb->m_pdNextGlobal - m_pd;
  211. m_lcidSorting = GetUserDefaultLCID();
  212. m_ppdSorted = ptdb->m_ppdSorted;
  213. m_ppdTailSorted = ptdb->m_ppdTailSorted;
  214. m_pafClassifications = ptdb->m_pafClassifications;
  215. m_fLeadingElipsis = FALSE;
  216. m_fTrailingElipsis = FALSE;
  217. m_pLRRanking = NULL;
  218. m_pRLRanking = NULL;
  219. if (m_ppdSorted && m_cd == ptdb->m_cdSorted)
  220. m_cbMaxLength= ptdb->MaxTokenWidth();
  221. SynchronizeDatabase();
  222. #if 0
  223. long cRefs= 0, iLimit= (m_cd < 59984)? m_cd : 59984, i;
  224. // Reference Statistics:
  225. for (i= 0; i < iLimit; ++i) cRefs += m_pd[i].cReferences;
  226. for (cRefs= 0; i < m_cd; ++i) cRefs += m_pd[i].cReferences;
  227. #endif
  228. }
  229. void CTokenList::ConstructSortKeys(LCID lcid)
  230. {
  231. ASSERT(!m_pbImages);
  232. MY_VIRTUAL_BUFFER mvb;
  233. mvb.Base= NULL;
  234. CreateVirtualBuffer(&mvb, m_cwDispImages, MaxSortKeyBytes(m_cwDispImages));
  235. __try
  236. {
  237. PWCHAR pbImageBase = PWCHAR(mvb.Base);
  238. PWCHAR pb = pbImageBase;
  239. PWCHAR pbLimit = pb + MaxSortKeyBytes(m_cwDispImages);
  240. UINT c;
  241. PDESCRIPTOR pd;
  242. __try
  243. {
  244. for (c = m_cd, pd = m_pd; c--; pd++)
  245. {
  246. pd->pbImage = pb;
  247. pb += LCSortKeyW(lcid, 0, pd->pwDisplay, CwDisplay(pd), pb, pbLimit - pb);
  248. }
  249. }
  250. __except(VirtualBufferExceptionFilter(GetExceptionCode(), GetExceptionInformation(), &mvb))
  251. {
  252. RaiseException(STATUS_NO_MEMORY, EXCEPTION_NONCONTINUABLE, 0, NULL);
  253. }
  254. pd->pbImage = pb;
  255. UINT cwcImages= pb - pbImageBase;
  256. m_cbImages= cwcImages;
  257. ASSERT(!m_pbImages);
  258. m_pbImages = PWCHAR(VAlloc(FALSE, sizeof(WCHAR) * cwcImages));
  259. CopyMemory(m_pbImages, pbImageBase, sizeof(WCHAR) * cwcImages);
  260. INT cwDelta;
  261. for (cwDelta= m_pbImages - pbImageBase, c= m_cd+1, pd= m_pd;
  262. c--;
  263. pd++
  264. )
  265. pd->pbImage += cwDelta;
  266. }
  267. __finally
  268. {
  269. if (mvb.Base) FreeVirtualBuffer(&mvb);
  270. }
  271. }
  272. CTokenList *CTokenList::TokenSubset(PUINT paiSubset, UINT cTokensInSubset)
  273. {
  274. CTokenList *ptl= NULL;
  275. __try
  276. {
  277. ptl= New CTokenList;
  278. ptl->InitialTokenList(this, paiSubset, cTokensInSubset);
  279. }
  280. __finally
  281. {
  282. if (_abnormal_termination() && ptl)
  283. {
  284. delete ptl; ptl= NULL;
  285. }
  286. }
  287. return ptl;
  288. }
  289. void CTokenList::InitialTokenList(CTokenList *ptklSource, PUINT paiSubset, UINT cTokensInSubset)
  290. {
  291. AttachRef(m_ptklSource, ptklSource);
  292. m_How_Constructed = TKL_SUBSET;
  293. m_cbMaxLength = ptklSource->m_cbMaxLength;
  294. m_pbImages = ptklSource->m_pbImages;
  295. m_pwDispImages = ptklSource->m_pwDispImages;
  296. m_cbImages = ptklSource->m_cbImages;
  297. m_cwDispImages = ptklSource->m_cwDispImages;
  298. m_pd = ptklSource->m_pd;
  299. m_cd = cTokensInSubset;
  300. m_ppdTailSorted = NULL;
  301. ASSERT(sizeof(UINT) == sizeof(PDESCRIPTOR));
  302. PDESCRIPTOR *ppdDestination = (PDESCRIPTOR *) paiSubset;
  303. PDESCRIPTOR *papdSource = ptklSource->m_ppdSorted;
  304. m_ppdSorted = ppdDestination;
  305. for (; cTokensInSubset--; )
  306. *ppdDestination++ = papdSource[*paiSubset++];
  307. }
  308. typedef struct _TokenListHeader
  309. {
  310. UINT cbMaxLength;
  311. UINT cbImages;
  312. // UINT offImages;
  313. UINT cwDispMaxLength;
  314. UINT cwDispImages;
  315. UINT offwDispImages;
  316. UINT cDescriptors;
  317. UINT offDescriptors;
  318. UINT offppdSorted;
  319. UINT offppdTailSorted;
  320. UINT offpafClassifications;
  321. UINT offClassifier;
  322. UINT cnTokenSortKeys;
  323. UINT cnDispSortKeys;
  324. UINT lcid;
  325. PDESCRIPTOR pdOld;
  326. } TokenListHeader;
  327. typedef struct _TokenListHeader2
  328. {
  329. UINT cwDispMaxLength;
  330. UINT cwDispImages;
  331. UINT offwDispImages;
  332. UINT cnDispSortKeys;
  333. UINT cwSortKeyImages;
  334. UINT offwSortKeyImages;
  335. UINT cnSortKeyImages;
  336. UINT cDescriptors;
  337. UINT offReferenceCounts;
  338. UINT offDescriptorFlags;
  339. UINT offppdSorted;
  340. UINT offppdTailSorted;
  341. UINT lcidSorting;
  342. } TokenListHeader2;
  343. // Save/Load Interface --
  344. void CTokenList::StoreImage2(CPersist *pDiskImage, BOOL fIgnoreSortKeys)
  345. {
  346. ASSERT(m_How_Constructed == From_Images);
  347. TokenListHeader2 *ptlh= (TokenListHeader2 *) pDiskImage->ReserveTableSpace(sizeof(TokenListHeader2));
  348. ptlh->cwDispMaxLength = MaxWidthToken();
  349. ptlh->cwDispImages = m_cwDispImages;
  350. ptlh->cDescriptors = m_cd;
  351. ptlh->lcidSorting = GetUserDefaultLCID();
  352. ptlh->offwDispImages = pDiskImage->NextOffset(); ptlh->cnDispSortKeys = pDiskImage->Encode(PBYTE(m_pwDispImages), m_cwDispImages * sizeof(WCHAR));
  353. ptlh->cwSortKeyImages = m_cbImages;
  354. if (fIgnoreSortKeys) ptlh->offwSortKeyImages = 0;
  355. else ptlh->offwSortKeyImages = pDiskImage->NextOffset(); ptlh->cnSortKeyImages = pDiskImage->Encode(PBYTE(m_pbImages ), m_cbImages * sizeof(WCHAR));
  356. PUINT pcRefs = NULL;
  357. CCompressedSet* pcsOffsets = NULL;
  358. __try
  359. {
  360. pcRefs= PUINT(VAlloc(FALSE, sizeof(UINT) * m_cd));
  361. PUINT pui;
  362. PDESCRIPTOR pd;
  363. PWCHAR pwcBase= m_pwDispImages;
  364. PBYTE pb;
  365. UINT c;
  366. for (pd= m_pd, c= m_cd, pui= pcRefs; c--; ) *pui++ = (pd++)->cReferences;
  367. ptlh->offReferenceCounts = pDiskImage->NextOffset(); pDiskImage->WriteDWords(pcRefs, m_cd);
  368. for (pd= m_pd, c= m_cd, pb= PBYTE(pcRefs); c--; pd++)
  369. {
  370. *pb++ = pd->bCharset;
  371. *pb++ = pd->fImageFlags;
  372. }
  373. ptlh->offDescriptorFlags = pDiskImage->NextOffset(); pDiskImage->WriteBytes(PBYTE(pcRefs), m_cd * 2);
  374. for (pwcBase= m_pwDispImages, pd= m_pd, c= m_cd, pui= pcRefs; c--; )
  375. *pui++ = (pd++)->pwDisplay - pwcBase;
  376. pcsOffsets= CCompressedSet::NewCompressedSet(pcRefs, m_cd, m_cwDispImages);
  377. pcsOffsets->StoreImage(pDiskImage);
  378. delete pcsOffsets; pcsOffsets= NULL;
  379. if (!fIgnoreSortKeys)
  380. {
  381. for (pwcBase= m_pbImages, pd= m_pd, c= m_cd, pui= pcRefs; c--; )
  382. *pui++ = (pd++)->pbImage - pwcBase;
  383. pcsOffsets= CCompressedSet::NewCompressedSet(pcRefs, m_cd, m_cbImages);
  384. pcsOffsets->StoreImage(pDiskImage);
  385. }
  386. }
  387. __finally
  388. {
  389. if (pcRefs ) { VFree(pcRefs); pcRefs = NULL; }
  390. if (pcsOffsets) { delete pcsOffsets; pcsOffsets = NULL; }
  391. }
  392. ptlh->offppdSorted = StoreSortOrder(pDiskImage, m_ppdSorted);
  393. ptlh->offppdTailSorted = StoreSortOrder(pDiskImage, PPDTailSorting());
  394. }
  395. UINT CTokenList::StoreSortOrder(CPersist *pDiskImage, PDESCRIPTOR *ppdSortOrder)
  396. {
  397. PUINT puiGrade = NULL;
  398. PBYTE pbSlice = NULL;
  399. UINT offset= pDiskImage->NextOffset();
  400. __try
  401. {
  402. PUINT pui= puiGrade = PUINT(VAlloc(FALSE, m_cd * sizeof(UINT)));
  403. PDESCRIPTOR *ppd = ppdSortOrder;
  404. UINT c = m_cd;
  405. for (; c--; ) *pui++ = *ppd++ - m_pd;
  406. UINT cb= ((m_cd + 3) >> 2) << 2;
  407. pbSlice = PBYTE(VAlloc(FALSE, cb * sizeof(BYTE)));
  408. PBYTE pb;
  409. if (m_cd > 0x1000000)
  410. {
  411. for (c= m_cd, pui= puiGrade, pb= pbSlice; c--; ) *pb++ = (*pui++) >> 24;
  412. pDiskImage->WriteBytes(pbSlice, cb);
  413. }
  414. if (m_cd > 0x10000)
  415. {
  416. for (c= m_cd, pui= puiGrade, pb= pbSlice; c--; ) *pb++ = (*pui++) >> 16;
  417. pDiskImage->WriteBytes(pbSlice, cb);
  418. }
  419. if (m_cd > 0x0100)
  420. {
  421. for (c= m_cd, pui= puiGrade, pb= pbSlice; c--; ) *pb++ = (*pui++) >> 8;
  422. pDiskImage->WriteBytes(pbSlice, cb);
  423. }
  424. for (c= m_cd, pui= puiGrade, pb= pbSlice; c--; ) *pb++ = *pui++;
  425. pDiskImage->WriteBytes(pbSlice, cb);
  426. }
  427. __finally
  428. {
  429. if (puiGrade) { VFree(puiGrade); puiGrade = NULL; }
  430. if (pbSlice ) { VFree(pbSlice ); pbSlice = NULL; }
  431. }
  432. return offset;
  433. }
  434. PDESCRIPTOR *CTokenList::LoadSortOrder(CPersist *pDiskImage, UINT offset)
  435. {
  436. PUINT puiGrade = NULL;
  437. __try
  438. {
  439. puiGrade= PUINT(VAlloc(TRUE, m_cd * sizeof(UINT)));
  440. UINT cb= ((m_cd + 3) >> 2) << 2;
  441. PBYTE pbBase = PBYTE(pDiskImage->LocationOf(offset));
  442. UINT c;
  443. PBYTE pb;
  444. PUINT pui;
  445. if (m_cd > 0x1000000)
  446. {
  447. for (c= m_cd, pb= pbBase, pui= puiGrade; c--; )
  448. *pui++ |= UINT(*pb++) << 24;
  449. pbBase += cb;
  450. }
  451. if (m_cd > 0x10000)
  452. {
  453. for (c= m_cd, pb= pbBase, pui= puiGrade; c--; )
  454. *pui++ |= UINT(*pb++) << 16;
  455. pbBase += cb;
  456. }
  457. if (m_cd > 0x100)
  458. {
  459. for (c= m_cd, pb= pbBase, pui= puiGrade; c--; )
  460. *pui++ |= UINT(*pb++) << 8;
  461. pbBase += cb;
  462. }
  463. for (c= m_cd, pb= pbBase, pui= puiGrade; c--; )
  464. *pui++ |= UINT(*pb++);
  465. PDESCRIPTOR *ppd = (PDESCRIPTOR *) puiGrade;
  466. for (pui= puiGrade, c= m_cd; c--; ) *ppd++ = m_pd + *pui++;
  467. }
  468. __finally
  469. {
  470. if (_abnormal_termination() && puiGrade)
  471. {
  472. VFree(puiGrade); puiGrade= NULL;
  473. }
  474. }
  475. return (PDESCRIPTOR *) puiGrade;
  476. }
  477. void CTokenList::SkipImage2(CPersist *pDiskImage)
  478. {
  479. TokenListHeader2 *ptlh= (TokenListHeader2 *) pDiskImage->ReserveTableSpace(sizeof(TokenListHeader2));
  480. CCompressedSet::SkipImage(pDiskImage);
  481. CCompressedSet::SkipImage(pDiskImage);
  482. }
  483. BOOL CTokenList::ConnectImage2(CPersist *pDiskImage, BOOL fIgnoreSortKeys)
  484. {
  485. TokenListHeader2 *ptlh= (TokenListHeader2 *) pDiskImage->ReserveTableSpace(sizeof(TokenListHeader2));
  486. m_cd = ptlh->cDescriptors;
  487. m_How_Constructed = From_Images;
  488. m_cwDispMaxLength = ptlh->cwDispMaxLength;
  489. m_cwDispImages = ptlh->cwDispImages;
  490. m_lcidSorting = ptlh->lcidSorting;
  491. if ( (pDiskImage->IsFTSFile() && pDiskImage->VersionIndex() == FTSVERSION_MIN)
  492. || (pDiskImage->IsFTGFile() && pDiskImage->VersionIndex() == FTGVERSION_MIN)
  493. ) m_lcidSorting = ~GetUserDefaultLCID(); // To force resorting
  494. m_pwDispImages = (PWCHAR)VAlloc(FALSE, ptlh->cwDispImages * sizeof(WCHAR));
  495. Decode((PUINT)pDiskImage->LocationOf(ptlh->offwDispImages), ptlh->cnDispSortKeys, (PBYTE)m_pwDispImages);
  496. BOOL fLcidUnchanged= (m_lcidSorting & 0x0FF) == (GetUserDefaultLCID() & 0x0FF);
  497. if (fLcidUnchanged && !fIgnoreSortKeys)
  498. {
  499. m_cbImages= ptlh->cwSortKeyImages;
  500. m_pbImages= (PWCHAR)VAlloc(FALSE, m_cbImages * sizeof(WCHAR));
  501. Decode((PUINT)pDiskImage->LocationOf(ptlh->offwSortKeyImages), ptlh->cnSortKeyImages, (PBYTE)m_pbImages);
  502. }
  503. m_pd= (PDESCRIPTOR) VAlloc(FALSE, sizeof(DESCRIPTOR) * (m_cd + 1));
  504. m_pd[m_cd].pwDisplay = m_pwDispImages + m_cwDispImages;
  505. if (fLcidUnchanged) m_pd[m_cd].pbImage = m_pbImages + m_cbImages;
  506. PUINT pcRefs = PUINT(pDiskImage->LocationOf(ptlh->offReferenceCounts));
  507. PUINT pui;
  508. PDESCRIPTOR pd;
  509. UINT c;
  510. for (pd= m_pd, pui= pcRefs, c= m_cd; c--; ) (pd++)->cReferences = *pui++;
  511. PBYTE pbFlags= PBYTE(pDiskImage->LocationOf(ptlh->offDescriptorFlags));
  512. for (pd= m_pd, c= m_cd; c--; pd++)
  513. {
  514. pd->bCharset = *pbFlags++;
  515. pd->fImageFlags = *pbFlags++;
  516. }
  517. CCompressedSet* pcsOffsets = NULL;
  518. CCmpEnumerator* pEnumerator = NULL;
  519. __try
  520. {
  521. AttachRef(pcsOffsets, CCompressedSet::CreateImage(pDiskImage));
  522. pEnumerator= CCmpEnumerator::NewEnumerator(pcsOffsets);
  523. for (pd= m_pd, c= m_cd; c; )
  524. {
  525. UINT cChunk= c;
  526. const UINT *pui= pEnumerator->NextDWordsIn(&cChunk);
  527. c -= cChunk;
  528. for (; cChunk--; pd++)
  529. pd->pwDisplay = m_pwDispImages + *pui++;
  530. }
  531. delete pEnumerator; pEnumerator = NULL;
  532. DetachRef(pcsOffsets);
  533. if (fIgnoreSortKeys) __leave;
  534. if (!fLcidUnchanged)
  535. {
  536. CCompressedSet::SkipImage(pDiskImage);
  537. __leave;
  538. }
  539. AttachRef(pcsOffsets, CCompressedSet::CreateImage(pDiskImage));
  540. pEnumerator= CCmpEnumerator::NewEnumerator(pcsOffsets);
  541. for (pd= m_pd, c= m_cd; c; )
  542. {
  543. UINT cChunk= c;
  544. const UINT *pui= pEnumerator->NextDWordsIn(&cChunk);
  545. c -= cChunk;
  546. for (; cChunk--; pd++)
  547. pd->pbImage = m_pbImages + *pui++;
  548. }
  549. }
  550. __finally
  551. {
  552. if (pEnumerator) { delete pEnumerator; pEnumerator = NULL; }
  553. if (pcsOffsets ) DetachRef(pcsOffsets);
  554. }
  555. for (pd= m_pd, c= m_cd; c--; pd++)
  556. pd->cwDisplay = (pd+1)->pwDisplay - pd->pwDisplay;
  557. m_ppdSorted = LoadSortOrder(pDiskImage, ptlh->offppdSorted );
  558. m_ppdTailSorted = LoadSortOrder(pDiskImage, ptlh->offppdTailSorted);
  559. CompleteTokenList(fIgnoreSortKeys);
  560. return !fLcidUnchanged;
  561. }
  562. void CTokenList::StoreImage(CPersist *pDiskImage)
  563. {
  564. ASSERT(m_How_Constructed == From_Images);
  565. TokenListHeader *ptlh= (TokenListHeader *) pDiskImage->ReserveTableSpace(sizeof(TokenListHeader));
  566. ptlh->cbMaxLength = m_cbMaxLength;
  567. ptlh->cbImages = m_cbImages;
  568. ptlh->cwDispMaxLength = m_cwDispMaxLength;
  569. ptlh->cwDispImages = m_cwDispImages;
  570. ptlh->cDescriptors = m_cd;
  571. ptlh->pdOld = m_pd;
  572. ptlh->lcid = GetUserDefaultLCID();
  573. // ptlh->offImages = pDiskImage->NextOffset(); ptlh->cnTokenSortKeys = pDiskImage->Encode(PBYTE(m_pbImages), m_cbImages * sizeof(WCHAR));
  574. ptlh->offwDispImages = pDiskImage->NextOffset(); ptlh->cnDispSortKeys = pDiskImage->Encode(PBYTE(m_pwDispImages), m_cwDispImages * sizeof(WCHAR));
  575. ptlh->offDescriptors = pDiskImage->NextOffset(); pDiskImage->SaveData(PBYTE(m_pd), sizeof(DESCRIPTOR) * (m_cd + 1));
  576. ptlh->offppdSorted = pDiskImage->NextOffset(); pDiskImage->WriteDWords(PUINT(m_ppdSorted), m_cd); ASSERT(sizeof(PDESCRIPTOR) == sizeof(UINT));
  577. ptlh->offppdTailSorted = pDiskImage->NextOffset(); pDiskImage->WriteDWords(PUINT(PPDTailSorting()), m_cd);
  578. ptlh->offpafClassifications = pDiskImage->NextOffset(); pDiskImage->WriteDWords(m_pafClassifications, m_cd);
  579. ptlh->offClassifier = pDiskImage->NextOffset(); pDiskImage->SaveData(PBYTE(&m_clsf), sizeof(m_clsf));
  580. }
  581. CTokenList *CTokenList::CreateImage(CPersist *pDiskImage)
  582. {
  583. CTokenList *ptl= NULL;
  584. __try
  585. {
  586. ptl= New CTokenList(TRUE);
  587. ptl->ConnectImage(pDiskImage);
  588. }
  589. __finally
  590. {
  591. if (_abnormal_termination() && ptl)
  592. {
  593. delete ptl; ptl= NULL;
  594. }
  595. }
  596. return ptl;
  597. }
  598. void CTokenList::SkipImage(CPersist *pDiskImage)
  599. {
  600. TokenListHeader *ptlh= (TokenListHeader *) pDiskImage->ReserveTableSpace(sizeof(TokenListHeader));
  601. }
  602. void CTokenList::ConnectImage(CPersist *pDiskImage)
  603. {
  604. UINT c;
  605. int cbDelta;
  606. LCID lcid = GetUserDefaultLCID();
  607. TokenListHeader *ptlh= (TokenListHeader *) pDiskImage->ReserveTableSpace(sizeof(TokenListHeader));
  608. m_cd = ptlh->cDescriptors;
  609. m_How_Constructed = From_Images;
  610. m_cbMaxLength = ptlh->cbMaxLength;
  611. m_cbImages = ptlh->cbImages;
  612. m_cwDispMaxLength = ptlh->cwDispMaxLength;
  613. m_cwDispImages = ptlh->cwDispImages;
  614. PDESCRIPTOR pd= PDESCRIPTOR(pDiskImage->LocationOf(ptlh->offDescriptors));
  615. m_pd= (PDESCRIPTOR) VAlloc(FALSE, sizeof(DESCRIPTOR) * (m_cd + 1));
  616. CopyMemory(m_pd, pd, sizeof(DESCRIPTOR) * (m_cd + 1));
  617. ValidateHeap();
  618. m_pwDispImages = (PWCHAR)VAlloc(FALSE, ptlh->cwDispImages * sizeof(WCHAR));
  619. int cwcDelta= m_pwDispImages - m_pd->pwDisplay;
  620. ValidateHeap();
  621. for (pd= m_pd, c= m_cd + 1; c--; ++pd) pd->pwDisplay += cwcDelta;
  622. ValidateHeap();
  623. Decode((PUINT)pDiskImage->LocationOf(ptlh->offwDispImages), ptlh->cnDispSortKeys, (PBYTE)m_pwDispImages);
  624. ValidateHeap();
  625. ConstructSortKeys(lcid);
  626. BOOL fValidSortOrder = TRUE;
  627. if (pDiskImage->IsFTSFile())
  628. {
  629. if (pDiskImage->VersionIndex() == FTSVERSION_MIN)
  630. fValidSortOrder = FALSE;
  631. }
  632. else
  633. if (pDiskImage->IsFTGFile())
  634. {
  635. if (pDiskImage->VersionIndex() == FTGVERSION_MIN)
  636. fValidSortOrder = FALSE;
  637. }
  638. if (fValidSortOrder && (ptlh->lcid & 0x0FF) == (lcid & 0x0FF))
  639. {
  640. m_ppdSorted = (PDESCRIPTOR *) VAlloc(FALSE, m_cd * sizeof(PDESCRIPTOR));
  641. PDESCRIPTOR *ppdSrc = (PDESCRIPTOR *) (pDiskImage->LocationOf(ptlh->offppdSorted));
  642. PDESCRIPTOR *ppdDest = m_ppdSorted;
  643. cbDelta= PBYTE(m_pd) - PBYTE(ptlh->pdOld);
  644. for (c= m_cd; c--; ) *ppdDest++ = (PDESCRIPTOR) (PBYTE(*ppdSrc++) + cbDelta);
  645. m_ppdTailSorted= (PDESCRIPTOR *) VAlloc(FALSE, m_cd * sizeof(PDESCRIPTOR));
  646. ppdSrc = (PDESCRIPTOR *) (pDiskImage->LocationOf(ptlh->offppdTailSorted));
  647. ppdDest = m_ppdTailSorted;
  648. for (c= m_cd; c--; ) *ppdDest++ = (PDESCRIPTOR) (PBYTE(*ppdSrc++) + cbDelta);
  649. }
  650. else
  651. {
  652. // major languages do not match between stored sort keys and user LCID.
  653. // The sort ordering is probably different.
  654. UINT cdSorted= 0;
  655. SortTokenImages(m_pd, &m_ppdSorted, &m_ppdTailSorted, &cdSorted, m_cd);
  656. }
  657. // BugBug! The classification code below is broken when the sort keys
  658. // have changed!
  659. m_pafClassifications= PUINT(pDiskImage->LocationOf(ptlh->offpafClassifications));
  660. CopyMemory(&m_clsf, PBYTE(pDiskImage->LocationOf(ptlh->offClassifier)), sizeof(m_clsf));
  661. }
  662. void CTokenList::SynchronizeDatabase()
  663. {
  664. ASSERT(m_How_Constructed == TDB_FULL_REF);
  665. if (m_ptdb->m_pulstate->pld) m_ptdb->SyncForQueries();
  666. m_cbImages = m_ptdb->m_pbNextGalactic - m_pbImages;
  667. m_ppdSorted = m_ptdb->m_ppdSorted;
  668. m_cwDispImages = m_ptdb->m_pwDispNextGalactic - m_pwDispImages;
  669. UINT cd= m_ptdb->m_pdNextGalactic - m_pd;
  670. if (m_cd != cd)
  671. {
  672. m_cd = cd; // NB: MaxWidthToken depends on m_ppdSorted
  673. // and m_cd being set correctly!
  674. m_cbMaxLength= m_ptdb->MaxTokenWidth();
  675. }
  676. m_ppdTailSorted = m_ptdb->m_ppdTailSorted;
  677. m_pafClassifications = m_ptdb->m_pafClassifications;
  678. #if 0
  679. // Some measurement code...
  680. int cTokens= ptdb->TokenCount();
  681. int cRefThreshhold= (cTokens+31)/32, cRefs;
  682. int cIndexSets= 0, cIndices= 0, cBitSets= 0, cActiveBits= 0, cSingletons= 0,
  683. cMaxRefs= 0;
  684. int acLogClassEntries[33] = { 0 },
  685. acLogClassSums [33] = { 0 };
  686. int iClass;
  687. PDESCRIPTOR pd = ptdb->DescriptorBase(),
  688. pdLimit = ptdb->m_pdNextGalactic;
  689. for (; pd < pdLimit; ++pd)
  690. {
  691. cRefs= pd->cReferences;
  692. iClass= CBitsToRepresent(cRefs);
  693. ++acLogClassEntries[iClass];
  694. acLogClassSums [iClass] += cRefs;
  695. if (cRefs > cMaxRefs) cMaxRefs= cRefs;
  696. if (cRefs > cRefThreshhold)
  697. {
  698. ++cBitSets; cActiveBits+= cRefs;
  699. }
  700. else
  701. {
  702. ++cIndexSets; cIndices+= cRefs;
  703. if (cRefs == 1) ++cSingletons;
  704. }
  705. }
  706. #endif
  707. }
  708. CTokenList::~CTokenList()
  709. {
  710. if (m_pLRRanking) VFree(m_pLRRanking);
  711. if (m_pRLRanking) VFree(m_pRLRanking);
  712. switch (m_How_Constructed)
  713. {
  714. case TDB_FULL_REF:
  715. ASSERT(m_ptdb);
  716. DetachRef(m_ptdb);
  717. case From_Nothing:
  718. break;
  719. case TDB_PARTIAL_REF:
  720. if (m_ppdSorted ) VFree(m_ppdSorted);
  721. if (m_ppdTailSorted) VFree(m_ppdTailSorted);
  722. if (m_ptdb) DetachRef(m_ptdb);
  723. if (!m_fFromFileImage && m_pafClassifications) VFree(m_pafClassifications);
  724. break;
  725. case From_Images:
  726. if (m_pbImages ) VFree(m_pbImages );
  727. if (m_pwDispImages ) VFree(m_pwDispImages );
  728. if (m_pd ) VFree(m_pd );
  729. if (m_ppdSorted ) VFree(m_ppdSorted );
  730. if (m_ppdTailSorted) VFree(m_ppdTailSorted);
  731. if (!m_fFromFileImage && m_pafClassifications) VFree(m_pafClassifications);
  732. break;
  733. case TKL_SUBSET:
  734. if (m_ppdSorted) VFree(m_ppdSorted);
  735. if (m_ptklSource) DetachRef(m_ptklSource);
  736. break;
  737. }
  738. }
  739. int CTokenList::MaxWidthToken()
  740. {
  741. if (m_cwDispMaxLength) return m_cwDispMaxLength;
  742. PDESCRIPTOR *ppd= m_ppdSorted;
  743. int c= m_cd;
  744. int cwDispMaxLength= 0;
  745. for (; c--; )
  746. {
  747. int cbWidth;
  748. PDESCRIPTOR pd= *ppd++;
  749. cbWidth= CwDisplay(pd);
  750. if (cwDispMaxLength < cbWidth) cwDispMaxLength= cbWidth;
  751. }
  752. m_cwDispMaxLength= cwDispMaxLength;
  753. return m_cwDispMaxLength;
  754. }
  755. void CTokenList::AddTokens(CTokenList *ptl)
  756. {
  757. PDESCRIPTOR pdResult = NULL;
  758. PDESCRIPTOR *ppdResult = NULL;
  759. PWCHAR pbImages = NULL;
  760. PUINT pafClassifications = NULL;
  761. // Combines the tokens in this list with those in *ptl.
  762. __try
  763. {
  764. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  765. if (ptl->m_How_Constructed == From_Nothing) return;
  766. int cdResult= m_cd + ptl->m_cd;
  767. pdResult = (PDESCRIPTOR ) ExAlloc(LPTR, sizeof( DESCRIPTOR) *(cdResult+1));
  768. ppdResult = (PDESCRIPTOR *) ExAlloc(LPTR, sizeof(PDESCRIPTOR) * cdResult);
  769. MergeImageRefSets((PVOID *) ppdResult, cdResult,
  770. (PVOID *) m_ppdSorted, m_cd,
  771. (PVOID *) ptl->m_ppdSorted, ptl->m_cd,
  772. CompareImagesLR
  773. );
  774. PDESCRIPTOR pdDest, *ppdDest;
  775. int c, cbImages= 0;
  776. for (ppdDest= ppdResult, c= cdResult; c-- ;)
  777. cbImages += CbImage(*ppdDest++);
  778. pbImages= (PWCHAR) ExAlloc(LPTR, cbImages * sizeof(WCHAR));
  779. PWCHAR pbDest= pbImages;
  780. for (c= cdResult, pdDest= pdResult, ppdDest= ppdResult; c--; ++pdDest)
  781. {
  782. *pdDest= **ppdDest;
  783. UINT cb= CbImage(*ppdDest);
  784. *ppdDest++ = pdDest;
  785. wcsncpy(pbDest, pdDest->pbImage, cb);
  786. pdDest->pbImage= pbDest;
  787. pbDest += cb;
  788. }
  789. pdDest->pbImage= pbDest;
  790. pdDest->pwDisplay = pbDest;
  791. m_clsf.Initial();
  792. m_clsf.ScanAndRankData(pbImages, cbImages);
  793. pafClassifications= (PUINT ) VAlloc(FALSE, cdResult * sizeof(BOOL *));
  794. PUINT pf;
  795. for (pf= pafClassifications, c=cdResult, ppdDest= ppdResult; c--; )
  796. {
  797. pdDest= *ppdDest++;
  798. *pf++ = m_clsf.ClassifyData(pdDest->pbImage, CbImage(pdDest));
  799. }
  800. switch (m_How_Constructed)
  801. {
  802. case TDB_FULL_REF:
  803. case From_Nothing:
  804. break;
  805. case TDB_PARTIAL_REF:
  806. VFree(m_ppdSorted);
  807. if (m_ppdTailSorted) VFree(m_ppdTailSorted);
  808. break;
  809. case From_Images:
  810. VFree(m_pbImages);
  811. VFree(m_pwDispImages);
  812. VFree(m_pd);
  813. VFree(m_ppdSorted);
  814. if (m_ppdTailSorted) VFree(m_ppdTailSorted);
  815. if (m_pafClassifications) VFree(m_pafClassifications);
  816. break;
  817. }
  818. m_How_Constructed = From_Images;
  819. m_pbImages = pbImages; pbImages= NULL;
  820. m_pwDispImages = pbImages;
  821. m_cbImages = cbImages;
  822. m_cwDispImages = cbImages;
  823. m_pd = pdResult; pdResult= NULL;
  824. m_cd = cdResult;
  825. m_ppdSorted = ppdResult; ppdResult= NULL;
  826. m_ppdTailSorted = NULL; // This will be computed on demand.
  827. m_ptdb = NULL;
  828. m_cbMaxLength = 0; // This will be computed on demand.
  829. m_pafClassifications = pafClassifications; pafClassifications= NULL;
  830. }
  831. __finally
  832. {
  833. if (_abnormal_termination())
  834. {
  835. if (pdResult ) { VFree(pdResult ); pdResult = NULL; }
  836. if (ppdResult ) { VFree(ppdResult ); ppdResult = NULL; }
  837. if (pbImages ) { VFree(pbImages ); pbImages = NULL; }
  838. if (pafClassifications) { VFree(pafClassifications); pafClassifications = NULL; }
  839. }
  840. }
  841. }
  842. // Sets up incTail according to the algorithm given by Wojciech Rytter in
  843. // his paper, CORRECT PREPROCESSING ALGORITHM FOR BOYER-MOORE STRING SEARCHING,
  844. // Society for Industrial and Applied Mathematics, Vol. 9, No. 3, Aug 1980.
  845. // The labels given below correspond roughly to those given in Rytter's paper.
  846. /* Let incTail[n] be an array of BYTEs, where n=patlen. Then we have four
  847. * possible values for each incTail[j], 1 <= j <= n. We have for 1 <= j <= n
  848. *
  849. * incTail[j] = 2*n-j for the case of *pat[j]...*pat[n] not
  850. * appearing elsewhere in the pattern
  851. * incTail[j] < n where incTail[j]=n-l, l=max{i | *pat[i+1]...
  852. * *pat[n] appears elsewhere in the pattern and
  853. * *pat[l] != *pat[j]}
  854. * n <= incTail[j] < 2*n-j j <= SHIFT(*pat), where SHIFT(*pat) is the
  855. * smallest shift of the pattern on itself s.t.
  856. * the two sections of the pattern match. In
  857. * other words, if SHIFT(*pat)=i, then *pat[1]...
  858. * *pat[n-i]=*pat[i+1]...*pat[n]. For example,
  859. * consider abbaaab. The smallest shift on itself
  860. * is 5, giving us
  861. *
  862. * abbaaab
  863. * --->>abbaaab
  864. *
  865. * Note: if the pattern cannot be shifted on
  866. * itself and still have a prefix match a suffix,
  867. * then SHIFT(*pat)=patlen; for example, consider
  868. * string:
  869. *
  870. * string
  871. * ---->>string
  872. * n < incTail[j] < 2*n-j j > SHIFT(*pat).
  873. */
  874. #define MIN(a,b) (((a) <= (b))? (a) : (b))
  875. void SetUpTables(WORD n, PWCHAR pat, WORD *incLastChar, WORD *incVar, WORD *incTail)
  876. {
  877. WORD k, j, j1, t, t1, q, q1; /* n = patlen */
  878. WORD i, *pi;
  879. WORD *pit;
  880. PWCHAR pb;
  881. for (t= WORD(INC_LAST_CHAR_SIZE-1), pi= incLastChar; t; t--) *pi++ = n;
  882. for (t= n, pb= pat; t--; ) incLastChar[*pb++]= t;
  883. /* Case 1: incTail[j] = 2*n-j */
  884. // A1:
  885. for (i= 2*n, j= n, pit= incTail; j--;) *pit++ = --i;
  886. // for (k=n; k>0; k--) incTail[k-1] = 2*n-k;
  887. /* Case 2: incTail[j] < n, *pat[l] != *pat[j], where l=n-incTail[j] */
  888. // A2:
  889. for (j= n, t= n; j--; --t)
  890. for (incVar[j]= t; t < n && pat[j] != pat[t]; t= incVar[t])
  891. incTail[t]= MIN(incTail[t],n-j-1);
  892. // for (j= n, t= n+1; j>0; --t, --j)
  893. // for (incVar[j-1]= t; t <= n && pat[j-1] != pat[t-1]; t= incVar[t-1])
  894. // incTail[t-1]= MIN(incTail[t-1],n-j);
  895. q = t;
  896. t = n-q;
  897. // B1:
  898. for (j1= 0, t1= (WORD)-1; j1 < t; ++t1, ++j1)
  899. for (incVar[j1]= t1; t1 != (WORD)-1 && pat[j1] != pat[t1]; )
  900. t1= incVar[t1];
  901. /* Case 3: n <= incTail[j] < 2*n-j, j <= SHIFT(*pat) = incVar[0] = t,
  902. AND
  903. Case 4: n < incTail[j] < 2*n-j, j > SHIFT(*pat),
  904. where SHIFT = minimum non-zero shift of pattern itself
  905. */
  906. // B2:
  907. for (q1= 0; q < n-1; q1++, q=q+t-incVar[t-1]-1, t= incVar[t-1]+1)
  908. for (k=q1; k <= q; k++) incTail[k] = MIN(incTail[k],n+q-k);
  909. }
  910. /* Strategic Goal: How to make the TokensContaining function fast for very
  911. Large Token Sets
  912. The time required for an invocation of TokensContaining is a function of
  913. the total string lengths for the token set and the length of the target
  914. string. For a constant target the search operation will be a linear
  915. function of the total string length and the number of partial and
  916. complete string matches. For a given token set the time for a search is
  917. inversely proportional to the length of the target string.
  918. This leads to two very good cases:
  919. * a short or medium list of token strings to search
  920. * a long target string
  921. and one very poor case:
  922. * a long list of token strings to search and a short target string
  923. [Searching for a single character is the worst situation.]
  924. >> First Idea: Tag each token with letter set flags.
  925. This will be implemented as a vector of DWords corresponding to each
  926. token. We'll group the set of 256 characters (65,536 glyphs when we
  927. move to Unicode) into 32 sets based on the count of their use within
  928. the token set. Letters such as "E"and "T" with very high frequencies
  929. will be treated as separate classes while less frequently used characters
  930. (0x255, for example) will be aggregated into classes.
  931. Then we search for target string, we will first construct a 32-bit mask
  932. which defines the classes of characters contained in the target. Then
  933. we'll strobe the masks for each token looking for a class set match:
  934. if ( (afClassMasks[iToken] & fTarget) == fTarget) ...
  935. When we find a class match, then we'll invoke the string search code to
  936. determine whether we have an actual hit or just a class collision. For a
  937. single character target, no string search is unnecessary.
  938. >> Second Idea: Maintain "Not-Used" Flags for each Character Value
  939. Then when a target string contains a character known to never occur
  940. in the token string set, we can immediately abandon the search.
  941. ** Aside: How to Partition the Set of Characters in Use?
  942. The partitioning algorithm must be bounded linearly in the number of
  943. unique characters actually used in the token set, and it must meet these
  944. goals:
  945. -- The partition sets must be partially ordered. That is, if aRefs is
  946. a vector of reference counts for the code points in each partition,
  947. then acRefs[i] >= acRefs[i+1].
  948. -- The of count members in each partition is also partially ordered. That is,
  949. acMembers[i] <= acMembers[i+1].
  950. -- The number of partition will be maximized.
  951. -- The count of members in each partitions will be be minimized given
  952. that the above conditions are satisfied.
  953. An Algorithm:
  954. Assume -- aiSortByCRefs is a permutation vector for the set of unique
  955. character values such that acRefChar[aiSortByCRefs] is
  956. partially ordered.
  957. aiPartitionBase is a 33 element array which will contain
  958. index values defining the characters contained in each
  959. partition. In particular partition j will consist of the
  960. characters
  961. aiSortByCRefs[aiPartitionBase[j]] through
  962. aiSortByCRefs[aiPartitionBase[j+1]-1]
  963. cPartitions= (cCharClasses <= 32)? cCharClasses : 32;
  964. for (j=0; j < cPartitions; ++j) aiPartitionBase[j]=j;
  965. aiPartitionBase[j]= cCharClasses;
  966. if (cCharClasses > 32)
  967. {
  968. for (i= 0; i < 32; ++i)
  969. {
  970. cRefs= 0
  971. for (j= aiPartitionBase[i], limit= aiPartitionBase[i+1];
  972. j < limit;
  973. ++j
  974. ) cRefs+= acRefChar[aiSortByCRefs[j]];
  975. acRefsPartition[i]= cRefs;
  976. }
  977. do
  978. for (fChanges= FALSE, i= 32; --i;)
  979. while ( acRefsPartition[i ] > acRefsPartitition[i-1]
  980. && (aiPartitionBase[i+1] - aiPartitionBase [i ]) > 1
  981. )
  982. {
  983. cRefs= acRefChar[aiSortByCRefs[aiPartitionBase[i]++]];
  984. acRefsPartition[i ] -= cRefs;
  985. acRefsPartition[i-1] += cRefs;
  986. fChanges= TRUE;
  987. }
  988. while (fChanges);
  989. }
  990. >> Third Idea: Maintain Search Histories
  991. The browser uses incremental searches most of the time. That is, the next
  992. search request is usually the same as the previous search with either a
  993. character added or a character deleted.
  994. Consider the case where each successive target adds a letter to the right
  995. end of the string. Each search result will always be a subset of the
  996. preceding searches. The basic idea here is to keep an array of WORD or
  997. DWORD flags corresponding to each token, along with the most recently
  998. used target string. Each character in the target will correspond to one
  999. bit in the WORD or DWORD tag. When we add a new trailing character, we'll
  1000. examine only the tokens corresponding to the most recent bit flag, and
  1001. we'll add a new bit for the new search subset, shifting the previous
  1002. bits left by one position.
  1003. Of course this proceedure can only continue for 16 or 32 iterations.
  1004. However a target longer than 16 characters will always have very few
  1005. instances among the token set [except in contrived cases].
  1006. Usually we'll have a complete set of flags for the predecessor sets.
  1007. However for those odd cases where we don't, we can keep the string
  1008. corresponding to each bit flag. In general this will allow for rapid
  1009. incremental searchs and will also make backspace operations very swift.
  1010. >> Fourth Idea: Maintain Multiple Sort Mappings
  1011. For the "Begin With" case the matching tokens will aways be contiguously
  1012. located within the sorted list of tokens. Thus we can use a simple binary
  1013. search to locate the end points of the matching token subset. In a
  1014. similar fashion we can make "End With" matches very fast by constructing
  1015. a sorting map based on the reverse byte ordering of each token.
  1016. */
  1017. CIndicatorSet *CTokenList::TokensContaining(PWCHAR pszSubstring, BOOL fStarting,
  1018. BOOL fEnding, CIndicatorSet *pisFilter)
  1019. {
  1020. // Returns an indicator set for the tokens which contain the string denoted by
  1021. // pszSubstring. If fStarting is TRUE, the string must occur at the beginning
  1022. // of the token. If fEnding is TRUE, the string must occur at the end of token.
  1023. ASSERT(!pisFilter || m_cd == pisFilter->ItemCount());
  1024. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1025. CIndicatorSet *pisResult= NULL;
  1026. if (fStarting || fEnding)
  1027. {
  1028. if (fStarting)
  1029. AttachRef(pisResult, TokensStartingWith(pszSubstring, fEnding));
  1030. else AttachRef(pisResult, TokensEndingWith (pszSubstring ));
  1031. CAbortSearch::CheckContinueState();
  1032. if (pisFilter) pisResult->ANDWith(pisFilter);
  1033. ForgetRef(pisResult);
  1034. return pisResult;
  1035. }
  1036. CIndicatorSet *pisCandidates = NULL;
  1037. int *paiCandidates = NULL;
  1038. PWORD pIncLastChar = NULL;
  1039. __try
  1040. {
  1041. PWCHAR workL = PWCHAR(_alloca(WORKBUF_SIZE * sizeof(WCHAR)));
  1042. if (!workL) RaiseException(STATUS_NO_MEMORY, EXCEPTION_NONCONTINUABLE, 0, NULL);
  1043. PWCHAR workR = PWCHAR(_alloca(WORKBUF_SIZE * sizeof(WCHAR)));
  1044. if (!workR) RaiseException(STATUS_NO_MEMORY, EXCEPTION_NONCONTINUABLE, 0, NULL);
  1045. pszSubstring++; // skip over alpha-num-punc prefix
  1046. UINT cwL = SortKeyText(pszSubstring, wcslen(pszSubstring), workL, WORKBUF_SIZE);
  1047. if (cwL > 2*MAX_PATTERN_LENGTH) cwL = 2*MAX_PATTERN_LENGTH;
  1048. UINT cwPattern = cwL / 2;
  1049. CClassifier *pclsf = (m_How_Constructed == TDB_FULL_REF) ? &(m_ptdb->m_clsfTokens) : &m_clsf;
  1050. UINT fClass= pclsf->ClassifyData(pszSubstring, cwPattern);
  1051. if (fClass & CClassifier::UNUSED_GLYPH)
  1052. {
  1053. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1054. __leave;
  1055. }
  1056. CAbortSearch::CheckContinueState();
  1057. AttachRef(pisCandidates, CIndicatorSet::NewIndicatorSet(m_cd, m_pafClassifications, fClass, fClass));
  1058. if (pisFilter) pisCandidates->ANDWith(pisFilter);
  1059. UINT cCandidates= pisCandidates->SelectionCount();
  1060. UINT cProcessed;
  1061. UINT c, cwR, cCandidatesChunk;
  1062. int *pi, iRank;
  1063. CAbortSearch::CheckContinueState();
  1064. paiCandidates = (int *) VAlloc(FALSE, CDW_CANDIDATE_BUFFER * sizeof(int));
  1065. ASSERT(paiCandidates);
  1066. if (cwL == 2)
  1067. {
  1068. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1069. for (cProcessed = 0; cProcessed < cCandidates; cProcessed += cCandidatesChunk)
  1070. {
  1071. CAbortSearch::CheckContinueState();
  1072. cCandidatesChunk = pisCandidates->MarkedItems(cProcessed, paiCandidates, CDW_CANDIDATE_BUFFER);
  1073. for (c = cCandidatesChunk, pi = paiCandidates; c--; )
  1074. {
  1075. iRank= *pi++;
  1076. PDESCRIPTOR pdCandidate = m_ppdSorted[iRank];
  1077. cwR = SortKeyText(pdCandidate->pbImage+1, CbImage(pdCandidate)-1, workR, WORKBUF_SIZE);
  1078. if (HasASubstring(workL, cwL, workR, cwR))
  1079. pisResult->RawSetBit(iRank);
  1080. }
  1081. }
  1082. VFree(paiCandidates);
  1083. DetachRef(pisCandidates);
  1084. pisResult->InvalidateCache();
  1085. __leave;
  1086. }
  1087. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1088. pIncLastChar = New WORD[INC_LAST_CHAR_SIZE];
  1089. WORD incVar[MAX_PATTERN_LENGTH], incTail[MAX_PATTERN_LENGTH];
  1090. SetUpTables(cwPattern, pszSubstring, pIncLastChar, incVar, incTail);
  1091. BOOL fAllLowerCase= AllLowerCase(pszSubstring, wcslen(pszSubstring));
  1092. for (cProcessed = 0; cProcessed < cCandidates; cProcessed += cCandidatesChunk)
  1093. {
  1094. CAbortSearch::CheckContinueState();
  1095. cCandidatesChunk = pisCandidates->MarkedItems(cProcessed, paiCandidates, CDW_CANDIDATE_BUFFER);
  1096. for (c = cCandidatesChunk, pi = paiCandidates; c--; )
  1097. {
  1098. iRank= *pi++;
  1099. PDESCRIPTOR pdCandidate = m_ppdSorted[iRank];
  1100. UINT inc;
  1101. PWCHAR pwBase = pdCandidate->pbImage+1;
  1102. PWCHAR pwStart = pwBase;
  1103. PWCHAR pwLimit = pwBase;
  1104. while (HIBYTE(*pwLimit) != SORT_KEY_SEPARATOR)
  1105. pwLimit++;
  1106. for (pwBase += cwPattern-1; pwBase < pwLimit; pwBase += inc)
  1107. {
  1108. inc = *(pIncLastChar + *pwBase);
  1109. if (inc)
  1110. continue;
  1111. int cwUnmatched = cwPattern-1;
  1112. PWCHAR pwTarget = pszSubstring + cwUnmatched;
  1113. for ( ; cwUnmatched && *--pwTarget == *--pwBase; --cwUnmatched) {};
  1114. if (cwUnmatched)
  1115. {
  1116. inc = incTail[cwUnmatched-1];
  1117. continue;
  1118. }
  1119. if (fAllLowerCase)
  1120. {
  1121. pisResult->RawSetBit(iRank);
  1122. break;
  1123. }
  1124. inc = pwBase - pwStart;
  1125. cwR = SortKeyText(pwStart /*+ inc*/, CbImage(pdCandidate)-1 /*- inc*/, workR, WORKBUF_SIZE);
  1126. if (HasASubstring(workL, cwL, workR, cwR))
  1127. pisResult->RawSetBit(iRank);
  1128. break;
  1129. }
  1130. }
  1131. }
  1132. delete pIncLastChar; pIncLastChar = NULL;
  1133. VFree(paiCandidates); paiCandidates = NULL;
  1134. DetachRef(pisCandidates);
  1135. pisResult->InvalidateCache();
  1136. __leave;
  1137. }
  1138. __finally
  1139. {
  1140. if (_abnormal_termination())
  1141. {
  1142. if (pIncLastChar ) { delete pIncLastChar; pIncLastChar = NULL; }
  1143. if (paiCandidates) { VFree(paiCandidates); paiCandidates = NULL; }
  1144. if (pisResult ) DetachRef(pisResult );
  1145. if (pisCandidates) DetachRef(pisCandidates);
  1146. }
  1147. }
  1148. ForgetRef(pisResult);
  1149. return pisResult;
  1150. }
  1151. BOOL CTokenList::TokenSpan(PDESCRIPTOR *ppdSorted, PWCHAR pszSubstring, PCompareImages pCompareImages,
  1152. PUINT piSpanBase, PUINT piSpanLimit)
  1153. {
  1154. // Returns an pair of numbers, *piSpanBase and *piSpanLimit which define a span within
  1155. // the supplied toking sorting vector, pptiSorted. The tokens in that span match the supplied
  1156. // pszSubstring in the sense that either the leading or trailing characters match the substring.
  1157. // The comparison function, pCompareImages, determines whether leading or trailing characters
  1158. // are significant.
  1159. //
  1160. // The explicit result will be FALSE if the span is empty, and TRUE otherwise.
  1161. //
  1162. // Side Effect: The string will be overwritten.
  1163. int cbPattern = wcslen(pszSubstring);
  1164. if (cbPattern > MAX_PATTERN_LENGTH) cbPattern= MAX_PATTERN_LENGTH;
  1165. int cb = cbPattern;
  1166. PWCHAR pb = pszSubstring;
  1167. BOOL fLeftToRight = pCompareImages == &CompareImagesLR;
  1168. WCHAR wSaved;
  1169. DESCRIPTOR tki[2];
  1170. DESCRIPTOR *ptki= &tki[0];
  1171. tki[0].pbImage = pszSubstring;
  1172. tki[1].pbImage = pszSubstring + cbPattern;
  1173. UINT iMatchBase, iMatchLimit, iMatchMiddle, cDiff, iBracketBase, iBracketLimit;
  1174. if (fLeftToRight)
  1175. for (int i = 0; i < cb; i++) // skipping characters by two (alpha sort weights)
  1176. if (HIBYTE(pszSubstring[i]) == SORT_KEY_SEPARATOR) // search for first weight separator
  1177. {
  1178. wSaved = pszSubstring[i];
  1179. pszSubstring[i] = 0;
  1180. cb = i; // return character length
  1181. break;
  1182. }
  1183. // Now we're going to use a binary search algorithm to find
  1184. // the lowest index iMBracketBase where tki <= ppdSorted[iMBracketBase]
  1185. if (0 >= pCompareImages(&ptki, &ppdSorted[0]) ) iBracketBase= 0;
  1186. else
  1187. {
  1188. // Here the loop invariants are:
  1189. //
  1190. // for i in [0 .. iMatchBase], ppdSorted[i] < tki
  1191. //
  1192. // for j in [iMatchLimit .. m_cd-1], ppdSorted[j] >= tki
  1193. for (iMatchBase = 0, iMatchLimit = m_cd; 1 < (cDiff = iMatchLimit - iMatchBase); )
  1194. {
  1195. iMatchMiddle = iMatchBase + cDiff/2;
  1196. CAbortSearch::CheckContinueState();
  1197. if (0 < pCompareImages(&ptki, &ppdSorted[iMatchMiddle]) )
  1198. iMatchBase = iMatchMiddle;
  1199. else iMatchLimit = iMatchMiddle;
  1200. }
  1201. iBracketBase= iMatchLimit;
  1202. }
  1203. if (iBracketBase == m_cd)
  1204. {
  1205. *piSpanBase = 0;
  1206. *piSpanLimit = 0;
  1207. if (fLeftToRight) pszSubstring[cb] = wSaved;
  1208. return FALSE;
  1209. }
  1210. iBracketLimit= m_cd;
  1211. BOOL fReturn = TRUE;
  1212. if (fLeftToRight)
  1213. pb = pszSubstring + cb;
  1214. else
  1215. pb = pszSubstring;
  1216. if (fLeftToRight) tki[1].pbImage = pb--;
  1217. else tki[0].pbImage = pb++;
  1218. (*pb)++;
  1219. if (0 >= pCompareImages(&ptki, &ppdSorted[iBracketBase]))
  1220. {
  1221. *piSpanBase = 0;
  1222. *piSpanLimit = 0;
  1223. fReturn = FALSE;
  1224. }
  1225. else
  1226. {
  1227. for (iMatchBase= iBracketBase, iMatchLimit= m_cd; 1 < (cDiff= iMatchLimit - iMatchBase); )
  1228. {
  1229. iMatchMiddle= iMatchBase + cDiff/2;
  1230. CAbortSearch::CheckContinueState();
  1231. if (0 < pCompareImages(&ptki, &ppdSorted[iMatchMiddle]))
  1232. iMatchBase = iMatchMiddle;
  1233. else iMatchLimit = iMatchMiddle;
  1234. }
  1235. iBracketLimit= iMatchLimit;
  1236. *piSpanBase = iBracketBase;
  1237. *piSpanLimit = iBracketLimit;
  1238. }
  1239. (*pb)--;
  1240. if (fLeftToRight) pszSubstring[cb] = wSaved;
  1241. return fReturn;
  1242. }
  1243. CIndicatorSet *CTokenList::TokensStartingWith(PWCHAR pszSubstring, BOOL fMatching)
  1244. {
  1245. // Returns an indicator set for the tokens which begin with the string denoted by
  1246. // pszSubstring. If fMatching is TRUE, the string must exactly match the token.
  1247. CIndicatorSet *pisResult = NULL;
  1248. __try
  1249. {
  1250. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1251. UINT iBracketBase, iBracketLimit;
  1252. if (!TokenSpan(m_ppdSorted, pszSubstring, CompareImagesLR, &iBracketBase, &iBracketLimit))
  1253. {
  1254. CAbortSearch::CheckContinueState();
  1255. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1256. __leave;
  1257. }
  1258. if (!fMatching && AllLowerCase(pszSubstring + 1, wcslen(pszSubstring)))
  1259. {
  1260. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd, iBracketBase, iBracketLimit - iBracketBase));
  1261. __leave;
  1262. }
  1263. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1264. PWCHAR workL = PWCHAR(_alloca(WORKBUF_SIZE * sizeof(WCHAR)));
  1265. PWCHAR workR = PWCHAR(_alloca(WORKBUF_SIZE * sizeof(WCHAR)));
  1266. UINT cwL = SortKeyText(pszSubstring+1, wcslen(pszSubstring)-1, workL, WORKBUF_SIZE);
  1267. UINT cwR;
  1268. PDESCRIPTOR pdNew;
  1269. for ( ; iBracketBase < iBracketLimit; iBracketBase++)
  1270. {
  1271. CAbortSearch::CheckContinueState();
  1272. pdNew = m_ppdSorted[iBracketBase];
  1273. cwR = SortKeyText(pdNew->pbImage+1, CbImage(pdNew)-1, workR, WORKBUF_SIZE);
  1274. if (!fMatching || cwL == cwR)
  1275. if (HasAPrefix(workL, cwL, workR, cwR))
  1276. pisResult->RawSetBit(iBracketBase);
  1277. }
  1278. pisResult->InvalidateCache();
  1279. __leave;
  1280. }
  1281. __finally
  1282. {
  1283. if (_abnormal_termination())
  1284. {
  1285. if (pisResult) DetachRef(pisResult);
  1286. }
  1287. }
  1288. ForgetRef(pisResult);
  1289. return pisResult;
  1290. }
  1291. const UINT *CTokenList::LRRanking()
  1292. {
  1293. // This routine doesn't need __try/__finally brackets because:
  1294. //
  1295. // 1. It has only one allocation.
  1296. // 2. It doesn't call any other routines that might allocate memory.
  1297. if (m_pLRRanking) return m_pLRRanking;
  1298. m_pLRRanking= (PUINT) VAlloc(FALSE, m_cd * sizeof(UINT));
  1299. UINT c= m_cd;
  1300. PDESCRIPTOR *ppd = m_ppdSorted + c;
  1301. for (; c--; ) m_pLRRanking[*--ppd - m_pd] = c;
  1302. return m_pLRRanking;
  1303. }
  1304. const UINT *CTokenList::RLRanking()
  1305. {
  1306. if (m_pRLRanking) return m_pRLRanking;
  1307. __try
  1308. {
  1309. m_pRLRanking= (PUINT) VAlloc(FALSE, m_cd * sizeof(UINT));
  1310. UINT c= m_cd;
  1311. PDESCRIPTOR *ppd = PPDTailSorting() + c;
  1312. const UINT *puiLR = LRRanking();
  1313. for (; c--; ) m_pRLRanking[c]= puiLR[*--ppd - m_pd];
  1314. }
  1315. __finally
  1316. {
  1317. if (_abnormal_termination() && m_pRLRanking)
  1318. {
  1319. VFree(m_pRLRanking); m_pRLRanking= NULL;
  1320. }
  1321. }
  1322. return m_pRLRanking;
  1323. }
  1324. CIndicatorSet *CTokenList::TokensEndingWith(PWCHAR pszSubstring)
  1325. {
  1326. // Returns an indicator set for the tokens which end with the string denoted by pszSubstring.
  1327. CIndicatorSet *pisResult = NULL;
  1328. __try
  1329. {
  1330. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1331. UINT iBracketBase, iBracketLimit;
  1332. PDESCRIPTOR *ppdSortOrder = PPDTailSorting();
  1333. if (!TokenSpan(ppdSortOrder, pszSubstring, CompareImagesRL, &iBracketBase, &iBracketLimit))
  1334. {
  1335. CAbortSearch::CheckContinueState();
  1336. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1337. __leave;
  1338. }
  1339. const UINT *pRLRanking= RLRanking();
  1340. AttachRef(pisResult, CIndicatorSet::NewIndicatorSet(m_cd));
  1341. if (AllLowerCase(pszSubstring + 1, wcslen(pszSubstring)))
  1342. {
  1343. for ( ; iBracketBase < iBracketLimit; iBracketBase++)
  1344. {
  1345. UINT iRank= pRLRanking[iBracketBase];
  1346. pisResult->RawSetBit(iRank);
  1347. }
  1348. pisResult->InvalidateCache();
  1349. __leave;
  1350. }
  1351. PWCHAR workL = PWCHAR(_alloca(WORKBUF_SIZE * sizeof(WCHAR)));
  1352. PWCHAR workR = PWCHAR(_alloca(WORKBUF_SIZE * sizeof(WCHAR)));
  1353. UINT cwL = SortKeyText(pszSubstring+1, wcslen(pszSubstring)-1, workL, WORKBUF_SIZE);
  1354. UINT cwR;
  1355. for ( ; iBracketBase < iBracketLimit; iBracketBase++)
  1356. {
  1357. CAbortSearch::CheckContinueState();
  1358. UINT iRank = pRLRanking [iBracketBase];
  1359. PDESCRIPTOR pdNew = ppdSortOrder[iBracketBase];
  1360. cwR = SortKeyText(pdNew->pbImage+1, CbImage(pdNew)-1, workR, WORKBUF_SIZE);
  1361. if (HasASuffix(workL, cwL, workR, cwR))
  1362. pisResult->RawSetBit(iRank);
  1363. }
  1364. pisResult->InvalidateCache();
  1365. __leave;
  1366. }
  1367. __finally
  1368. {
  1369. if (_abnormal_termination())
  1370. {
  1371. if (pisResult) DetachRef(pisResult);
  1372. }
  1373. }
  1374. ForgetRef(pisResult);
  1375. return pisResult;
  1376. }
  1377. PDESCRIPTOR *CTokenList::PPDTailSorting()
  1378. {
  1379. // This routine doesn't need __try/__finally brackets because:
  1380. //
  1381. // 1. It has only one allocation.
  1382. // 2. It doesn't call any other routines that might allocate memory.
  1383. if (m_ppdTailSorted) return m_ppdTailSorted;
  1384. PDESCRIPTOR *ppdTailSorted= (PDESCRIPTOR *) VAlloc(FALSE, m_cd * sizeof(PDESCRIPTOR *));
  1385. memcpy(ppdTailSorted, m_ppdSorted, m_cd * sizeof(PDESCRIPTOR *));
  1386. qsort(ppdTailSorted, m_cd, sizeof(PDESCRIPTOR *), CompareImagesRL);
  1387. m_ppdTailSorted= ppdTailSorted;
  1388. return ppdTailSorted;
  1389. }
  1390. CTokenList *CTokenList::IndicatedTokens(CIndicatorSet *pis, BOOL fFullCopy)
  1391. {
  1392. // Returns a list of the tokens denoted by the indicator set *pis. A NULL
  1393. // pis is considered to be equivalent to an all-1's selection. That is,
  1394. // it selects the entirety of the token list.
  1395. //
  1396. // Setting fFullCopy TRUE usually forces a complete result structure to be
  1397. // constructed. The exception to that rule is when pis is all 1's and this
  1398. // token list is simply an indirect reference to the token set for a text
  1399. // database.
  1400. //
  1401. // When fFullCopy is false we construct only the m_ppdSorted vector and
  1402. // leave references to the rest of the data structures which are presumed
  1403. // to reside within a text database.
  1404. //
  1405. // For token lists that aren't connected to a text database, we always
  1406. // create a complete set of data arrays.
  1407. CTokenList *ptlResult = NULL;
  1408. int *piResult = NULL;
  1409. PUINT pfClassifications = NULL;
  1410. __try
  1411. {
  1412. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1413. if (m_How_Constructed == From_Images) fFullCopy= TRUE;
  1414. ASSERT(!pis || pis->ItemCount() == m_cd);
  1415. UINT cMarks= pis? pis->SelectionCount() : m_cd;
  1416. AttachRef(ptlResult, New CTokenList);
  1417. if (!cMarks) __leave;
  1418. if (m_How_Constructed == TDB_FULL_REF && cMarks == m_cd)
  1419. {
  1420. ptlResult->m_How_Constructed = m_How_Constructed;
  1421. ptlResult->m_cbMaxLength = m_cbMaxLength;
  1422. ptlResult->m_pbImages = m_pbImages;
  1423. ptlResult->m_pwDispImages = m_pwDispImages;
  1424. ptlResult->m_cbImages = m_cbImages;
  1425. ptlResult->m_cwDispImages = m_cwDispImages;
  1426. ptlResult->m_pd = m_pd;
  1427. ptlResult->m_cd = m_cd;
  1428. ptlResult->m_ppdSorted = m_ppdSorted;
  1429. ptlResult->m_ppdTailSorted = m_ppdTailSorted;
  1430. ptlResult->m_ptdb = NULL;
  1431. ClsAttachRef(ptlResult, m_ptdb, m_ptdb);
  1432. __leave;
  1433. }
  1434. piResult= (int *) ExAlloc(LPTR, cMarks * sizeof(int));
  1435. pfClassifications= (PUINT ) VAlloc(FALSE, cMarks * sizeof(UINT));
  1436. PUINT pfDest;
  1437. int c, *pi;
  1438. if (pis)
  1439. pis->MarkedItems(0, piResult, cMarks);
  1440. else
  1441. for (c= cMarks; c-- ; ) piResult[c]= c;
  1442. for (pfDest= pfClassifications+cMarks, c= cMarks, pi= piResult+cMarks; c--; )
  1443. *--pfDest = m_pafClassifications[*--pi];
  1444. ptlResult->m_pafClassifications= pfClassifications; pfClassifications= NULL;
  1445. memcpy(&ptlResult->m_clsf, &m_clsf, sizeof(m_clsf));
  1446. PDESCRIPTOR *ppdResult= (PDESCRIPTOR *)piResult;
  1447. PDESCRIPTOR *ppd;
  1448. for (c= cMarks, ppd= ppdResult+cMarks, piResult+= cMarks; c--; )
  1449. *--ppd = m_ppdSorted[*--piResult];
  1450. ptlResult->m_ppdSorted = ppdResult; piResult= NULL;
  1451. ptlResult->m_cd = cMarks;
  1452. ptlResult->m_cbMaxLength = 0; // This will be computed on demand.
  1453. if (!fFullCopy)
  1454. {
  1455. ptlResult->m_How_Constructed = TDB_PARTIAL_REF;
  1456. ptlResult->m_pbImages = m_pbImages;
  1457. ptlResult->m_pwDispImages = m_pwDispImages;
  1458. ptlResult->m_cbImages = m_cbImages;
  1459. ptlResult->m_cwDispImages = m_cwDispImages;
  1460. ptlResult->m_pd = m_pd;
  1461. ptlResult->m_ptdb = NULL;
  1462. ptlResult->m_cbMaxLength = m_cbMaxLength;
  1463. ClsAttachRef(ptlResult, m_ptdb, m_ptdb);
  1464. ptlResult->m_ppdTailSorted= NULL; // Will be computed on demand
  1465. __leave;
  1466. }
  1467. ptlResult->m_How_Constructed = From_Images;
  1468. PDESCRIPTOR pd= ptlResult->m_pd= (PDESCRIPTOR) ExAlloc(LPTR, sizeof(DESCRIPTOR) * (cMarks+1));
  1469. int cbImages= 0;
  1470. int cwDispImages = 0;
  1471. for (c= cMarks, ppd= ptlResult->m_ppdSorted; c--; )
  1472. {
  1473. cwDispImages += CwDisplay(*ppd);
  1474. cbImages += CbImage(*ppd++);
  1475. }
  1476. ptlResult->m_cbImages= cbImages;
  1477. ptlResult->m_cwDispImages = cwDispImages;
  1478. PWCHAR pb= ptlResult->m_pbImages= (PWCHAR) ExAlloc(LPTR, cbImages * sizeof(WCHAR));
  1479. PWCHAR pwDisp = ptlResult->m_pwDispImages= (PWCHAR) ExAlloc(LPTR, cwDispImages * sizeof(WCHAR));
  1480. for (c= cMarks, pd= ptlResult->m_pd, ppd= ptlResult->m_ppdSorted; c--; )
  1481. {
  1482. int cbImage= CbImage(*ppd);
  1483. *pd = **ppd;
  1484. wcsncpy(pb, pd->pbImage, cbImage);
  1485. pd->pbImage= pb;
  1486. pb+= cbImage;
  1487. int cwDispImage = CwDisplay(*ppd);
  1488. wcsncpy(pwDisp, pd->pwDisplay, cwDispImage);
  1489. pd->pwDisplay = pwDisp;
  1490. pwDisp += cwDispImage;
  1491. *ppd++ = pd++;
  1492. }
  1493. pd->pbImage= pb;
  1494. pd->pwDisplay = pwDisp;
  1495. ptlResult->m_ppdTailSorted= NULL; // Will be computed on demand.
  1496. __leave;
  1497. }
  1498. __finally
  1499. {
  1500. if (_abnormal_termination())
  1501. {
  1502. if (pfClassifications) { VFree(pfClassifications); pfClassifications = NULL; }
  1503. if (piResult ) { VFree(piResult ); piResult = NULL; }
  1504. if (ptlResult ) DetachRef(ptlResult);
  1505. }
  1506. }
  1507. ForgetRef(ptlResult);
  1508. return(ptlResult);
  1509. }
  1510. int CTokenList::Data_cRows()
  1511. {
  1512. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1513. return(m_cd);
  1514. }
  1515. int CTokenList::Data_cCols()
  1516. {
  1517. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1518. int cb= MaxWidthToken();
  1519. if ( m_fLeadingElipsis) cb+= cbElipsis + 1;
  1520. if (m_fTrailingElipsis) cb+= cbElipsis + 1;
  1521. return cb;
  1522. }
  1523. void CTokenList::Data_GetTextMatrix(int rowTop, int colLeft,
  1524. int rows, int cols, PWCHAR lpb, PUINT charsets)
  1525. {
  1526. int i;
  1527. if (m_How_Constructed == TDB_FULL_REF) SynchronizeDatabase();
  1528. for (i = 0; i < rows*cols; i++)
  1529. lpb[i] = UNICODE_SPACE_CHAR;
  1530. if (rowTop >= (int) m_cd) return;
  1531. if (rowTop+rows > (int) m_cd) rows= m_cd-rowTop;
  1532. int cbLimit= colLeft + cols;
  1533. for (i = 0; rows--; ++rowTop, i++, lpb+= cols)
  1534. {
  1535. int cbOffset= 0;
  1536. if (m_fLeadingElipsis)
  1537. {
  1538. cbOffset= cbElipsis + 1;
  1539. if (colLeft < cbElipsis)
  1540. CopyMemory(lpb+colLeft, pszElipsis + colLeft, (cbElipsis - colLeft) * sizeof(WCHAR));
  1541. }
  1542. charsets[i] = m_ppdSorted[rowTop]->bCharset;
  1543. cbOffset= FormatAToken(m_ppdSorted[rowTop], cbOffset, colLeft, cbLimit, lpb);
  1544. if (m_fTrailingElipsis && ++cbOffset < cbLimit)
  1545. {
  1546. UINT cb= cbLimit - cbOffset;
  1547. if (cb > cbElipsis) cb= cbElipsis;
  1548. CopyMemory(lpb + cbOffset, pszElipsis, cb * sizeof(WCHAR));
  1549. }
  1550. }
  1551. }
  1552. int CTokenList::GetTokenI(int iToken, PWCHAR pb, UINT cbMax, BOOL fSortedOrder)
  1553. {
  1554. ASSERT(iToken >= 0 && iToken < (int) m_cd);
  1555. ASSERT(fSortedOrder || m_How_Constructed == From_Images); // Otherwise the token set isn't dense.
  1556. PDESCRIPTOR pd= fSortedOrder? m_ppdSorted[iToken] : m_pd + iToken;
  1557. if (!cbMax || !pb) return CbImage(pd);
  1558. if (--cbMax > CbImage(pd)) cbMax= CbImage(pd);
  1559. wcsncpy(pb, pd->pbImage, cbMax);
  1560. *(pb+cbMax)= 0;
  1561. return(CbImage(pd));
  1562. }
  1563. BYTE CTokenList::GetCharSetI(int iToken, BOOL fSortedOrder)
  1564. {
  1565. ASSERT(iToken >= 0 && iToken < (int) m_cd);
  1566. ASSERT(fSortedOrder || m_How_Constructed == From_Images); // Otherwise the token set isn't dense.
  1567. PDESCRIPTOR pd= fSortedOrder? m_ppdSorted[iToken] : m_pd + iToken;
  1568. return(pd->bCharset);
  1569. }
  1570. int CTokenList::GetWTokenI(int iToken, PWCHAR pb, UINT cbMax, BOOL fSortedOrder)
  1571. {
  1572. ASSERT(iToken >= 0 && iToken < (int) m_cd);
  1573. ASSERT(fSortedOrder || m_How_Constructed == From_Images); // Otherwise the token set isn't dense.
  1574. PDESCRIPTOR pd= fSortedOrder? m_ppdSorted[iToken] : m_pd + iToken;
  1575. if (!cbMax || !pb) return CwDisplay(pd);
  1576. if (--cbMax > CwDisplay(pd)) cbMax= CwDisplay(pd);
  1577. wcsncpy(pb, pd->pwDisplay, cbMax);
  1578. *(pb+cbMax)= 0;
  1579. return(CwDisplay(pd));
  1580. }
  1581. CMaskedTokenList::CMaskedTokenList() : CTextMatrix WithType("MaskedTokenList")
  1582. {
  1583. m_ptl = NULL;
  1584. m_psel = NULL;
  1585. }
  1586. CMaskedTokenList *CMaskedTokenList::NewMaskedTokenList(CTokenList *ptl, CIndicatorSet *pis)
  1587. {
  1588. CMaskedTokenList *pmtl= NULL;
  1589. __try
  1590. {
  1591. pmtl= New CMaskedTokenList;
  1592. pmtl->InitialMaskedTokenList(ptl, pis);
  1593. __leave;
  1594. }
  1595. __finally
  1596. {
  1597. if (_abnormal_termination() && pmtl)
  1598. {
  1599. delete pmtl; pmtl= NULL;
  1600. }
  1601. }
  1602. return pmtl;
  1603. }
  1604. void CMaskedTokenList::InitialMaskedTokenList(CTokenList *ptl, CIndicatorSet *pis)
  1605. {
  1606. NullFilterShowsAll(TRUE);
  1607. m_ptl= NULL;
  1608. AttachRef(m_ptl, ptl);
  1609. m_psel= CTMMultipleSelect::NewTMMultipleSelect(this);
  1610. SetSelector(m_psel);
  1611. SetSubstringFilter(pis);
  1612. }
  1613. CMaskedTokenList::~CMaskedTokenList()
  1614. {
  1615. if (m_ptl) DetachRef(m_ptl);
  1616. delete m_psel;
  1617. }
  1618. void CMaskedTokenList::SetTokenList(CTokenList *ptl)
  1619. {
  1620. if (ptl)
  1621. {
  1622. ChangeRef(m_ptl, ptl);
  1623. SetSubstringFilter(CIndicatorSet::NewIndicatorSet(Data_cRows()));
  1624. }
  1625. else
  1626. {
  1627. if (m_ptl) DetachRef(m_ptl);
  1628. SetSubstringFilter(NULL);
  1629. }
  1630. }
  1631. long CMaskedTokenList::SelectionCount()
  1632. {
  1633. return (m_psel->GetSelection())->SelectionCount();
  1634. }
  1635. CTokenList *CMaskedTokenList::SelectedTokens()
  1636. {
  1637. return m_ptl? m_ptl->IndicatedTokens(m_psel->GetSelection()) : NULL;
  1638. }
  1639. CIndicatorSet *CMaskedTokenList::GetIndicators()
  1640. {
  1641. return m_ptl ? m_psel->GetSelection() : NULL;
  1642. }
  1643. void CMaskedTokenList::SetSelection(CIndicatorSet *pis)
  1644. {
  1645. ASSERT(m_psel);
  1646. m_psel->SetSelection(pis);
  1647. }
  1648. void CMaskedTokenList::ClearSelection()
  1649. {
  1650. ASSERT(m_ptl);
  1651. ASSERT(m_psel);
  1652. m_psel->ClearSelection();
  1653. }
  1654. ////////////////////////////////// Hiliter support //////////////////////////
  1655. void AddSerial2(UINT iValue, PVOID pvTag, PVOID pvEnvironment)
  1656. { // adds the index to each token to the hash table
  1657. *PUINT(pvTag)= iValue;
  1658. }
  1659. CSegHashTable *CTokenList::GetFilledHashTable()
  1660. { // creates a segmented hash table and fills it with the tokenlist
  1661. CSegHashTable *pHash = NULL;
  1662. CAValRef *pavr;
  1663. __try
  1664. { // we need to create a new hash table
  1665. pHash = CSegHashTable::NewSegHashTable(sizeof(UINT), sizeof(UINT));
  1666. // get the sorted list of tokens
  1667. pavr = m_ptdb->DescriptorList(m_pd, m_cd);
  1668. // .. as a pointer to a set of value references
  1669. // BugBug! m_ptdb is non-functional. Probably should
  1670. // ..move Descriptor list to a better home
  1671. pHash->Assimilate(pavr, NULL, NULL, AddSerial2);
  1672. // add to the table along with their index
  1673. }
  1674. __except(FilterFTExceptions(_exception_code()))
  1675. {
  1676. if (pHash) { delete pHash; pHash = NULL; }
  1677. }
  1678. if (pavr) delete pavr;
  1679. return pHash;
  1680. }