Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

615 lines
18 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1992.
  5. //
  6. // File: WordList.Cxx
  7. //
  8. // Contents: Implementation of the CWordList class
  9. //
  10. // Classes: CWordList
  11. //
  12. // History: 06-Mar-91 KyleP Created.
  13. // 04-Apr-91 BartoszM Removed init
  14. // 10-May-91 BartoszM Load CWLCursor cache correctly
  15. // 13-May-91 KyleP Removed extraneous TRY ... CATCH
  16. // 22-May-91 Brianb Changed to use own sorter
  17. // 04-Jun-91 BartoszM Rewrote it
  18. // 19-Jun-91 reviewed
  19. // 18-Mar-93 AmyA Moved all entry buffer code to
  20. // ebufhdlr.cxx
  21. //
  22. //----------------------------------------------------------------------------
  23. #include <pch.cxx>
  24. #pragma hdrstop
  25. #include <doclist.hxx>
  26. #include "wordlist.hxx"
  27. #include "invcur.hxx"
  28. //+---------------------------------------------------------------------------
  29. //
  30. // Member: CWordList::Size, public
  31. //
  32. // Synopsis: Returns rough size estimate in 4k pages
  33. //
  34. // History: 22-May-92 BartoszM Created.
  35. //
  36. //----------------------------------------------------------------------------
  37. unsigned CWordList::Size() const
  38. {
  39. unsigned size = 0;
  40. CSortChunk * p = _chunks;
  41. while ( 0 != p )
  42. {
  43. size += p->BlockCount() * ( cbInitialBlock / 4096 );
  44. p = p->next;
  45. }
  46. //
  47. // If we have 'unfiltered' documents, then add a one size unit for
  48. // them.
  49. //
  50. if ( _fUnfiltered )
  51. size += 1;
  52. return size;
  53. } //Size
  54. //+---------------------------------------------------------------------------
  55. //
  56. // Member: CWordList::CWordList, public
  57. //
  58. // Synopsis: Constructor for CWordList
  59. //
  60. // Effects: Initializes sort data structures
  61. //
  62. // Arguments: [id] -- Index ID of the wordlist.
  63. // [widMax] -- maximum work id
  64. // [cbMemory] -- suggested size of buffer
  65. //
  66. // History: 07-Mar-91 KyleP Created.
  67. // 03-Apr-91 KyleP Combined with initialization
  68. // 22-May-91 Brianb Converted to use new sort algorithm
  69. //
  70. //----------------------------------------------------------------------------
  71. CWordList::CWordList( INDEXID iid, WORKID widMax )
  72. : CIndex(iid),
  73. _sigWordList(eSigWordList),
  74. _chunks(0),
  75. _count(0),
  76. _fUnfiltered(FALSE)
  77. {
  78. // check sizes of data items in index
  79. ciAssert(sizeof(PROPID) == 4);
  80. ciAssert(sizeof(OCCURRENCE) == 4);
  81. // Make sure the sentinel is not too big
  82. ciAssert(MAXKEYSIZE < 256);
  83. SetMaxWorkId ( widMax );
  84. }
  85. //+---------------------------------------------------------------------------
  86. //
  87. // Member: CWordList::~CWordList, public
  88. //
  89. // Synopsis: Destructor
  90. //
  91. // Effects: Release all memory used by
  92. //
  93. // History: 06-Mar-91 KyleP Created.
  94. //
  95. //----------------------------------------------------------------------------
  96. CWordList::~CWordList()
  97. {
  98. while( _chunks != NULL)
  99. {
  100. CSortChunk *pChunk = _chunks;
  101. _chunks = _chunks->next;
  102. delete pChunk;
  103. }
  104. }
  105. //+---------------------------------------------------------------------------
  106. //
  107. // Member: CWordList::MakeChunk, public
  108. //
  109. // Synopsis: Creates new sorted chunk from data in entry buffer
  110. //
  111. // Arguments: [pEntryBuf] -- pointer to buffer to create sorted chunk from
  112. // [cb] -- count of bytes in buffer
  113. //
  114. // Expects: Sentinel entry added to pEntryBuf and that the buffer is in the
  115. // correct format.
  116. //
  117. // Returns: FALSE if there was a memory exception. TRUE otherwise.
  118. //
  119. // History: 04-Jun-89 BartoszM Created.
  120. // 18-Mar-93 AmyA Added entry buffer passing
  121. //
  122. //----------------------------------------------------------------------------
  123. BOOL CWordList::MakeChunk ( const BYTE * pEntryBuf, ULONG cb )
  124. {
  125. XPtr<CSortChunk> xChunk( new CSortChunk( _maxOccTable ) );
  126. xChunk->Init( pEntryBuf, cb, MaxWorkId() );
  127. CSortChunk* pChunk = xChunk.Acquire();
  128. pChunk->next = _chunks;
  129. _chunks = pChunk;
  130. _count++;
  131. return TRUE;
  132. }
  133. //+---------------------------------------------------------------------------
  134. //
  135. // Member: CWordList::QueryCursor, public
  136. //
  137. // Synopsis: Create a cursor for the WordList
  138. //
  139. // Effects: Creates a cursor
  140. //
  141. // Returns: A pointer to a CKeyCursor.
  142. //
  143. // History: 15-Apr-91 KyleP Created.
  144. // 22-May-92 BrianB Modified to use chunk merges
  145. // 07-Jun-91 BartoszM Rewrote
  146. // 24-Jan-92 AmyA Modified to use CKeyCurArray to remove
  147. // TRY...CATCH.
  148. //
  149. //----------------------------------------------------------------------------
  150. CKeyCursor * CWordList::QueryCursor()
  151. {
  152. if ( 0 == _count && !_fUnfiltered )
  153. return 0;
  154. CKeyCursor *pCur = 0;
  155. if ( 0 == _count && _fUnfiltered )
  156. {
  157. pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );
  158. }
  159. else if ( _count == 1 && !_fUnfiltered )
  160. {
  161. // single chunk return chunk cursor
  162. pCur = _chunks->QueryCursor( GetId(), _widTable, MaxWorkId() );
  163. }
  164. else
  165. {
  166. // multiple chunks create merge cursor
  167. CKeyCurStack stkCursor;
  168. for ( CSortChunk* pChunk = _chunks;
  169. pChunk != 0;
  170. pChunk = pChunk->next )
  171. {
  172. XPtr<CKeyCursor> xCur( pChunk->QueryCursor( GetId(),
  173. _widTable,
  174. MaxWorkId() ) );
  175. if ( !xCur.IsNull() )
  176. {
  177. stkCursor.Push( xCur.GetPointer() );
  178. xCur.Acquire();
  179. }
  180. }
  181. if ( _fUnfiltered )
  182. {
  183. XPtr<CUnfilteredCursor> xCur( new CUnfilteredCursor( GetId(),
  184. MaxWorkId(),
  185. _widTable ) );
  186. stkCursor.Push( xCur.GetPointer() );
  187. xCur.Acquire();
  188. }
  189. pCur = stkCursor.QueryWlCursor( MaxWorkId() );
  190. }
  191. return pCur;
  192. } //QueryCursor
  193. //+---------------------------------------------------------------------------
  194. //
  195. // Member: CWordList::QueryKeyCursor, public
  196. //
  197. // Synopsis: Create a cursor for the WordList
  198. //
  199. // Returns: A pointer to a CKeyCursor.
  200. //
  201. // History: 06-Oct-98 dlee Added header, author unknown
  202. //
  203. //----------------------------------------------------------------------------
  204. CKeyCursor * CWordList::QueryKeyCursor( CKey const * pkeyTarget )
  205. {
  206. CKeyCursor * pcur = QueryCursor();
  207. for ( CKeyBuf const * pkey = pcur->GetKey();
  208. pkey != 0 && pkey->Compare( *pkeyTarget ) < 0;
  209. pkey = pcur->GetNextKey() )
  210. continue;
  211. if ( 0 != pkey )
  212. {
  213. if ( pkey->Compare( *pkeyTarget ) == 0 )
  214. return pcur;
  215. }
  216. delete pcur;
  217. return 0;
  218. } //QueryKeyCursor
  219. //+---------------------------------------------------------------------------
  220. //
  221. // Member: CWordList::QueryCursor, public
  222. //
  223. // Synopsis: Create a cursor for the WordList
  224. //
  225. // Effects: Creates a cursor
  226. //
  227. // Arguments: [pkey] -- Key to initially position the cursor on.
  228. // [isRange] -- TRUE for range query
  229. // [cMaxNodes] -- Max number of nodes to create. Decremented
  230. // on return.
  231. //
  232. // Returns: A pointer to a CKeyCursor.
  233. //
  234. // History: 15-Apr-91 KyleP Created.
  235. // 22-May-92 BrianB Modified to use chunk merges
  236. // 07-Jun-91 BartoszM Rewrote
  237. // 24-Jan-92 AmyA Modified to use CKeyCurArray to remove
  238. // TRY...CATCH.
  239. //
  240. //----------------------------------------------------------------------------
  241. COccCursor * CWordList::QueryCursor( const CKey * pkey,
  242. BOOL isRange,
  243. ULONG & cMaxNodes )
  244. {
  245. Win4Assert( cMaxNodes > 0 );
  246. if ( _count == 0 && !_fUnfiltered )
  247. return 0;
  248. if (isRange)
  249. {
  250. CKey keyEnd;
  251. keyEnd.FillMax (*pkey);
  252. return QueryRangeCursor ( pkey, &keyEnd, cMaxNodes );
  253. }
  254. if (pkey->Pid() == pidAll)
  255. {
  256. return QueryRangeCursor ( pkey, pkey, cMaxNodes );
  257. }
  258. cMaxNodes--;
  259. if ( 0 == cMaxNodes )
  260. {
  261. ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryCursor\n" ));
  262. THROW( CException( STATUS_TOO_MANY_NODES ) );
  263. }
  264. CKeyCursor *pCur = 0;
  265. if ( CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) == 0 )
  266. {
  267. pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );
  268. }
  269. else
  270. {
  271. if(_count == 1)
  272. {
  273. // single chunk return chunk cursor
  274. pCur = _chunks->QueryCursor ( GetId(), _widTable, pkey, MaxWorkId() );
  275. }
  276. else
  277. {
  278. // multiple chunks create merge cursor
  279. CKeyCurStack stkCursor;
  280. for ( CSortChunk* pChunk = _chunks;
  281. pChunk != 0;
  282. pChunk = pChunk->next)
  283. {
  284. XPtr<CKeyCursor> xCur( pChunk->QueryCursor( GetId(),
  285. _widTable,
  286. pkey,
  287. MaxWorkId() ) );
  288. if ( !xCur.IsNull() )
  289. {
  290. stkCursor.Push( xCur.GetPointer() );
  291. xCur.Acquire();
  292. }
  293. }
  294. pCur = stkCursor.QueryWlCursor( MaxWorkId() );
  295. }
  296. }
  297. return pCur;
  298. } //QueryCursor
  299. //+---------------------------------------------------------------------------
  300. //
  301. // Member: CWordList::QueryRangeCursor, public
  302. //
  303. // Synopsis: Create a range cursor for the WordList
  304. //
  305. // Effects: Creates a cursor
  306. //
  307. // Arguments: [pkey] -- Beginning of query range.
  308. // [pkeyEnd] -- End of query range.
  309. // [cMaxNodes] -- Max number of nodes to create. Decremented
  310. // on return.
  311. //
  312. // Returns: A pointer to a CKeyCursor.
  313. //
  314. // History: 27-Jan-92 AmyA Created.
  315. // 07-Feb-92 AmyA Moved some code to CreateRange().
  316. //
  317. //----------------------------------------------------------------------------
  318. COccCursor * CWordList::QueryRangeCursor( const CKey * pkey,
  319. const CKey * pkeyEnd,
  320. ULONG & cMaxNodes )
  321. {
  322. Win4Assert( cMaxNodes > 0 );
  323. Win4Assert( pkey->Pid() == pkeyEnd->Pid() );
  324. // Win4Assert( pkey->Pid() != pidAll ||
  325. // pkey->CompareStr( *pkeyEnd ) == 0 );
  326. //
  327. // Decide if the invalid key is in the range.
  328. //
  329. BOOL fInvalidInRange;
  330. if ( pkey->Pid() == pidUnfiltered )
  331. {
  332. cMaxNodes--;
  333. if ( 0 == cMaxNodes )
  334. {
  335. ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" ));
  336. THROW( CException( STATUS_TOO_MANY_NODES ) );
  337. }
  338. if ( _fUnfiltered &&
  339. CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) >= 0 &&
  340. CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkeyEnd ) <= 0 )
  341. {
  342. return new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );
  343. }
  344. else
  345. {
  346. return( 0 );
  347. }
  348. }
  349. ciDebugOut(( DEB_ITRACE, "Chunk count is %d\n", _count ));
  350. if ( _count == 0 )
  351. return 0;
  352. //
  353. // Cheat a little here. Build the whole range before subtracting nodes. Also, consider
  354. // a 'node' to be one cursor in every chunk. So only subtract off the maximum contribution
  355. // of any single chunk.
  356. //
  357. COccCurStack curStk;
  358. ULONG cMaxPerChunk = 0;
  359. ULONG cCursor = 0;
  360. for (CSortChunk* pChunk = _chunks;
  361. pChunk != 0;
  362. pChunk = pChunk->next)
  363. {
  364. pChunk->CreateRange(curStk, pkey, pkeyEnd, GetId(), _widTable, MaxWorkId());
  365. Win4Assert( curStk.Count() >= cCursor );
  366. ULONG cInChunk = curStk.Count() - cCursor;
  367. if ( cInChunk > cMaxPerChunk )
  368. {
  369. cMaxPerChunk = cInChunk;
  370. if ( cMaxPerChunk >= cMaxNodes )
  371. {
  372. ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" ));
  373. cMaxNodes = 0;
  374. THROW( CException( STATUS_TOO_MANY_NODES ) );
  375. }
  376. }
  377. cCursor = curStk.Count();
  378. }
  379. cMaxNodes -= cMaxPerChunk;
  380. Win4Assert( cMaxNodes > 0 );
  381. return curStk.QuerySynCursor(MaxWorkId());
  382. }
  383. //+---------------------------------------------------------------------------
  384. //
  385. // Member: CWordList::QuerySynCursor, public
  386. //
  387. // Synopsis: Create a synonym cursor for the WordList
  388. //
  389. // Effects: Creates a cursor
  390. //
  391. // Arguments: [keyStk] -- Keys to query on.
  392. // [isRange] -- Whether the query will be a range query.
  393. // [cMaxNodes] -- Max nodes (keys) to add
  394. //
  395. // Returns: A pointer to a CKeyCursor.
  396. //
  397. // History: 31-Jan-92 AmyA Created.
  398. //
  399. //----------------------------------------------------------------------------
  400. COccCursor * CWordList::QuerySynCursor( CKeyArray & keyArr,
  401. BOOL isRange,
  402. ULONG & cMaxNodes )
  403. {
  404. Win4Assert( cMaxNodes > 0 );
  405. if (_count == 0)
  406. return(0);
  407. //
  408. // Cheat a little here. Build the whole range before subtracting nodes. Also, consider
  409. // a 'node' to be one cursor in every chunk. So only subtract off the maximum contribution
  410. // of any single chunk.
  411. //
  412. COccCurStack curStk;
  413. ULONG cMaxPerChunk = 0;
  414. ULONG cCursor = 0;
  415. int keyCount = keyArr.Count();
  416. ciDebugOut((DEB_ITRACE, "KeyCount is %d\n", keyCount));
  417. for (CSortChunk* pChunk = _chunks;
  418. pChunk != 0;
  419. pChunk = pChunk->next)
  420. {
  421. for (int i = 0; i < keyCount; i++)
  422. {
  423. CKey& key = keyArr.Get(i);
  424. ciDebugOut((DEB_ITRACE, "Key is %.*ws\n", key.StrLen(), key.GetStr()));
  425. if (isRange)
  426. {
  427. CKey keyEnd;
  428. keyEnd.FillMax(key);
  429. pChunk->CreateRange(
  430. curStk, &key, &keyEnd, GetId(), _widTable, MaxWorkId());
  431. }
  432. else if ( key.Pid() == pidAll )
  433. {
  434. pChunk->CreateRange(
  435. curStk, &key, &key, GetId(), _widTable, MaxWorkId());
  436. }
  437. else
  438. {
  439. XPtr<CChunkCursor> xNewCur( pChunk->QueryCursor(
  440. GetId(), _widTable, &key, MaxWorkId() ) );
  441. if ( !xNewCur.IsNull() )
  442. {
  443. curStk.Push( xNewCur.GetPointer() );
  444. xNewCur.Acquire();
  445. }
  446. }
  447. }
  448. Win4Assert( curStk.Count() >= cCursor );
  449. ULONG cInChunk = curStk.Count() - cCursor;
  450. if ( cInChunk > cMaxPerChunk )
  451. {
  452. cMaxPerChunk = cInChunk;
  453. if ( cMaxPerChunk >= cMaxNodes )
  454. {
  455. ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QuerySynCursor\n" ));
  456. cMaxNodes = 0;
  457. THROW( CException( STATUS_TOO_MANY_NODES ) );
  458. }
  459. }
  460. cCursor = curStk.Count();
  461. }
  462. cMaxNodes -= cMaxPerChunk;
  463. Win4Assert( cMaxNodes > 0 );
  464. return curStk.QuerySynCursor(MaxWorkId());
  465. }
  466. void CWordList::GetDocuments( CDocList & doclist )
  467. {
  468. unsigned cWid = 0;
  469. for ( unsigned i = 0; i < _widTable.Count(); i++ )
  470. {
  471. if ( _widTable.FakeWidToWid(iDocToFakeWid(i)) != widInvalid )
  472. {
  473. doclist.Set( cWid,
  474. _widTable.FakeWidToWid( iDocToFakeWid(i) ),
  475. 0, // Use usn of 0 for refiled wids
  476. _widTable.VolumeId( iDocToFakeWid(i) ) );
  477. cWid++;
  478. }
  479. }
  480. doclist.LokSetCount( cWid ); // okay not to have resman lock here
  481. }
  482. //+---------------------------------------------------------------------------
  483. //
  484. // Member: CWordList::Done, public
  485. //
  486. // Synopsis: Called when a wordlist if fully constructed and available
  487. // for query.
  488. //
  489. // Effects: Sets _fUnfiltered to TRUE if there are wids in the wid table
  490. // that have been invalidated.
  491. //
  492. // History: 09-Nov-94 KyleP Created.
  493. //
  494. //----------------------------------------------------------------------------
  495. void CWordList::Done()
  496. {
  497. Win4Assert( !_fUnfiltered );
  498. //
  499. // How many invalid wids are there?
  500. //
  501. unsigned cUnfiltered = 0;
  502. for ( unsigned i = 1; i <= _widTable.Count(); i++ )
  503. {
  504. if ( _widTable.IsValid(i) && !_widTable.IsFiltered(i) )
  505. cUnfiltered++;
  506. }
  507. //
  508. // Create chunk of invalid property.
  509. //
  510. if ( cUnfiltered > 0 )
  511. {
  512. ciDebugOut(( DEB_ITRACE, "%d unfiltered wids in wordlist %x\n",
  513. cUnfiltered, GetId() ));
  514. _fUnfiltered = TRUE;
  515. }
  516. }