//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991 - 1992. // // File: WordList.Cxx // // Contents: Implementation of the CWordList class // // Classes: CWordList // // History: 06-Mar-91 KyleP Created. // 04-Apr-91 BartoszM Removed init // 10-May-91 BartoszM Load CWLCursor cache correctly // 13-May-91 KyleP Removed extraneous TRY ... CATCH // 22-May-91 Brianb Changed to use own sorter // 04-Jun-91 BartoszM Rewrote it // 19-Jun-91 reviewed // 18-Mar-93 AmyA Moved all entry buffer code to // ebufhdlr.cxx // //---------------------------------------------------------------------------- #include #pragma hdrstop #include #include "wordlist.hxx" #include "invcur.hxx" //+--------------------------------------------------------------------------- // // Member: CWordList::Size, public // // Synopsis: Returns rough size estimate in 4k pages // // History: 22-May-92 BartoszM Created. // //---------------------------------------------------------------------------- unsigned CWordList::Size() const { unsigned size = 0; CSortChunk * p = _chunks; while ( 0 != p ) { size += p->BlockCount() * ( cbInitialBlock / 4096 ); p = p->next; } // // If we have 'unfiltered' documents, then add a one size unit for // them. // if ( _fUnfiltered ) size += 1; return size; } //Size //+--------------------------------------------------------------------------- // // Member: CWordList::CWordList, public // // Synopsis: Constructor for CWordList // // Effects: Initializes sort data structures // // Arguments: [id] -- Index ID of the wordlist. // [widMax] -- maximum work id // [cbMemory] -- suggested size of buffer // // History: 07-Mar-91 KyleP Created. // 03-Apr-91 KyleP Combined with initialization // 22-May-91 Brianb Converted to use new sort algorithm // //---------------------------------------------------------------------------- CWordList::CWordList( INDEXID iid, WORKID widMax ) : CIndex(iid), _sigWordList(eSigWordList), _chunks(0), _count(0), _fUnfiltered(FALSE) { // check sizes of data items in index ciAssert(sizeof(PROPID) == 4); ciAssert(sizeof(OCCURRENCE) == 4); // Make sure the sentinel is not too big ciAssert(MAXKEYSIZE < 256); SetMaxWorkId ( widMax ); } //+--------------------------------------------------------------------------- // // Member: CWordList::~CWordList, public // // Synopsis: Destructor // // Effects: Release all memory used by // // History: 06-Mar-91 KyleP Created. // //---------------------------------------------------------------------------- CWordList::~CWordList() { while( _chunks != NULL) { CSortChunk *pChunk = _chunks; _chunks = _chunks->next; delete pChunk; } } //+--------------------------------------------------------------------------- // // Member: CWordList::MakeChunk, public // // Synopsis: Creates new sorted chunk from data in entry buffer // // Arguments: [pEntryBuf] -- pointer to buffer to create sorted chunk from // [cb] -- count of bytes in buffer // // Expects: Sentinel entry added to pEntryBuf and that the buffer is in the // correct format. // // Returns: FALSE if there was a memory exception. TRUE otherwise. // // History: 04-Jun-89 BartoszM Created. // 18-Mar-93 AmyA Added entry buffer passing // //---------------------------------------------------------------------------- BOOL CWordList::MakeChunk ( const BYTE * pEntryBuf, ULONG cb ) { XPtr xChunk( new CSortChunk( _maxOccTable ) ); xChunk->Init( pEntryBuf, cb, MaxWorkId() ); CSortChunk* pChunk = xChunk.Acquire(); pChunk->next = _chunks; _chunks = pChunk; _count++; return TRUE; } //+--------------------------------------------------------------------------- // // Member: CWordList::QueryCursor, public // // Synopsis: Create a cursor for the WordList // // Effects: Creates a cursor // // Returns: A pointer to a CKeyCursor. // // History: 15-Apr-91 KyleP Created. // 22-May-92 BrianB Modified to use chunk merges // 07-Jun-91 BartoszM Rewrote // 24-Jan-92 AmyA Modified to use CKeyCurArray to remove // TRY...CATCH. // //---------------------------------------------------------------------------- CKeyCursor * CWordList::QueryCursor() { if ( 0 == _count && !_fUnfiltered ) return 0; CKeyCursor *pCur = 0; if ( 0 == _count && _fUnfiltered ) { pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable ); } else if ( _count == 1 && !_fUnfiltered ) { // single chunk return chunk cursor pCur = _chunks->QueryCursor( GetId(), _widTable, MaxWorkId() ); } else { // multiple chunks create merge cursor CKeyCurStack stkCursor; for ( CSortChunk* pChunk = _chunks; pChunk != 0; pChunk = pChunk->next ) { XPtr xCur( pChunk->QueryCursor( GetId(), _widTable, MaxWorkId() ) ); if ( !xCur.IsNull() ) { stkCursor.Push( xCur.GetPointer() ); xCur.Acquire(); } } if ( _fUnfiltered ) { XPtr xCur( new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable ) ); stkCursor.Push( xCur.GetPointer() ); xCur.Acquire(); } pCur = stkCursor.QueryWlCursor( MaxWorkId() ); } return pCur; } //QueryCursor //+--------------------------------------------------------------------------- // // Member: CWordList::QueryKeyCursor, public // // Synopsis: Create a cursor for the WordList // // Returns: A pointer to a CKeyCursor. // // History: 06-Oct-98 dlee Added header, author unknown // //---------------------------------------------------------------------------- CKeyCursor * CWordList::QueryKeyCursor( CKey const * pkeyTarget ) { CKeyCursor * pcur = QueryCursor(); for ( CKeyBuf const * pkey = pcur->GetKey(); pkey != 0 && pkey->Compare( *pkeyTarget ) < 0; pkey = pcur->GetNextKey() ) continue; if ( 0 != pkey ) { if ( pkey->Compare( *pkeyTarget ) == 0 ) return pcur; } delete pcur; return 0; } //QueryKeyCursor //+--------------------------------------------------------------------------- // // Member: CWordList::QueryCursor, public // // Synopsis: Create a cursor for the WordList // // Effects: Creates a cursor // // Arguments: [pkey] -- Key to initially position the cursor on. // [isRange] -- TRUE for range query // [cMaxNodes] -- Max number of nodes to create. Decremented // on return. // // Returns: A pointer to a CKeyCursor. // // History: 15-Apr-91 KyleP Created. // 22-May-92 BrianB Modified to use chunk merges // 07-Jun-91 BartoszM Rewrote // 24-Jan-92 AmyA Modified to use CKeyCurArray to remove // TRY...CATCH. // //---------------------------------------------------------------------------- COccCursor * CWordList::QueryCursor( const CKey * pkey, BOOL isRange, ULONG & cMaxNodes ) { Win4Assert( cMaxNodes > 0 ); if ( _count == 0 && !_fUnfiltered ) return 0; if (isRange) { CKey keyEnd; keyEnd.FillMax (*pkey); return QueryRangeCursor ( pkey, &keyEnd, cMaxNodes ); } if (pkey->Pid() == pidAll) { return QueryRangeCursor ( pkey, pkey, cMaxNodes ); } cMaxNodes--; if ( 0 == cMaxNodes ) { ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryCursor\n" )); THROW( CException( STATUS_TOO_MANY_NODES ) ); } CKeyCursor *pCur = 0; if ( CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) == 0 ) { pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable ); } else { if(_count == 1) { // single chunk return chunk cursor pCur = _chunks->QueryCursor ( GetId(), _widTable, pkey, MaxWorkId() ); } else { // multiple chunks create merge cursor CKeyCurStack stkCursor; for ( CSortChunk* pChunk = _chunks; pChunk != 0; pChunk = pChunk->next) { XPtr xCur( pChunk->QueryCursor( GetId(), _widTable, pkey, MaxWorkId() ) ); if ( !xCur.IsNull() ) { stkCursor.Push( xCur.GetPointer() ); xCur.Acquire(); } } pCur = stkCursor.QueryWlCursor( MaxWorkId() ); } } return pCur; } //QueryCursor //+--------------------------------------------------------------------------- // // Member: CWordList::QueryRangeCursor, public // // Synopsis: Create a range cursor for the WordList // // Effects: Creates a cursor // // Arguments: [pkey] -- Beginning of query range. // [pkeyEnd] -- End of query range. // [cMaxNodes] -- Max number of nodes to create. Decremented // on return. // // Returns: A pointer to a CKeyCursor. // // History: 27-Jan-92 AmyA Created. // 07-Feb-92 AmyA Moved some code to CreateRange(). // //---------------------------------------------------------------------------- COccCursor * CWordList::QueryRangeCursor( const CKey * pkey, const CKey * pkeyEnd, ULONG & cMaxNodes ) { Win4Assert( cMaxNodes > 0 ); Win4Assert( pkey->Pid() == pkeyEnd->Pid() ); // Win4Assert( pkey->Pid() != pidAll || // pkey->CompareStr( *pkeyEnd ) == 0 ); // // Decide if the invalid key is in the range. // BOOL fInvalidInRange; if ( pkey->Pid() == pidUnfiltered ) { cMaxNodes--; if ( 0 == cMaxNodes ) { ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" )); THROW( CException( STATUS_TOO_MANY_NODES ) ); } if ( _fUnfiltered && CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) >= 0 && CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkeyEnd ) <= 0 ) { return new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable ); } else { return( 0 ); } } ciDebugOut(( DEB_ITRACE, "Chunk count is %d\n", _count )); if ( _count == 0 ) return 0; // // Cheat a little here. Build the whole range before subtracting nodes. Also, consider // a 'node' to be one cursor in every chunk. So only subtract off the maximum contribution // of any single chunk. // COccCurStack curStk; ULONG cMaxPerChunk = 0; ULONG cCursor = 0; for (CSortChunk* pChunk = _chunks; pChunk != 0; pChunk = pChunk->next) { pChunk->CreateRange(curStk, pkey, pkeyEnd, GetId(), _widTable, MaxWorkId()); Win4Assert( curStk.Count() >= cCursor ); ULONG cInChunk = curStk.Count() - cCursor; if ( cInChunk > cMaxPerChunk ) { cMaxPerChunk = cInChunk; if ( cMaxPerChunk >= cMaxNodes ) { ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" )); cMaxNodes = 0; THROW( CException( STATUS_TOO_MANY_NODES ) ); } } cCursor = curStk.Count(); } cMaxNodes -= cMaxPerChunk; Win4Assert( cMaxNodes > 0 ); return curStk.QuerySynCursor(MaxWorkId()); } //+--------------------------------------------------------------------------- // // Member: CWordList::QuerySynCursor, public // // Synopsis: Create a synonym cursor for the WordList // // Effects: Creates a cursor // // Arguments: [keyStk] -- Keys to query on. // [isRange] -- Whether the query will be a range query. // [cMaxNodes] -- Max nodes (keys) to add // // Returns: A pointer to a CKeyCursor. // // History: 31-Jan-92 AmyA Created. // //---------------------------------------------------------------------------- COccCursor * CWordList::QuerySynCursor( CKeyArray & keyArr, BOOL isRange, ULONG & cMaxNodes ) { Win4Assert( cMaxNodes > 0 ); if (_count == 0) return(0); // // Cheat a little here. Build the whole range before subtracting nodes. Also, consider // a 'node' to be one cursor in every chunk. So only subtract off the maximum contribution // of any single chunk. // COccCurStack curStk; ULONG cMaxPerChunk = 0; ULONG cCursor = 0; int keyCount = keyArr.Count(); ciDebugOut((DEB_ITRACE, "KeyCount is %d\n", keyCount)); for (CSortChunk* pChunk = _chunks; pChunk != 0; pChunk = pChunk->next) { for (int i = 0; i < keyCount; i++) { CKey& key = keyArr.Get(i); ciDebugOut((DEB_ITRACE, "Key is %.*ws\n", key.StrLen(), key.GetStr())); if (isRange) { CKey keyEnd; keyEnd.FillMax(key); pChunk->CreateRange( curStk, &key, &keyEnd, GetId(), _widTable, MaxWorkId()); } else if ( key.Pid() == pidAll ) { pChunk->CreateRange( curStk, &key, &key, GetId(), _widTable, MaxWorkId()); } else { XPtr xNewCur( pChunk->QueryCursor( GetId(), _widTable, &key, MaxWorkId() ) ); if ( !xNewCur.IsNull() ) { curStk.Push( xNewCur.GetPointer() ); xNewCur.Acquire(); } } } Win4Assert( curStk.Count() >= cCursor ); ULONG cInChunk = curStk.Count() - cCursor; if ( cInChunk > cMaxPerChunk ) { cMaxPerChunk = cInChunk; if ( cMaxPerChunk >= cMaxNodes ) { ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QuerySynCursor\n" )); cMaxNodes = 0; THROW( CException( STATUS_TOO_MANY_NODES ) ); } } cCursor = curStk.Count(); } cMaxNodes -= cMaxPerChunk; Win4Assert( cMaxNodes > 0 ); return curStk.QuerySynCursor(MaxWorkId()); } void CWordList::GetDocuments( CDocList & doclist ) { unsigned cWid = 0; for ( unsigned i = 0; i < _widTable.Count(); i++ ) { if ( _widTable.FakeWidToWid(iDocToFakeWid(i)) != widInvalid ) { doclist.Set( cWid, _widTable.FakeWidToWid( iDocToFakeWid(i) ), 0, // Use usn of 0 for refiled wids _widTable.VolumeId( iDocToFakeWid(i) ) ); cWid++; } } doclist.LokSetCount( cWid ); // okay not to have resman lock here } //+--------------------------------------------------------------------------- // // Member: CWordList::Done, public // // Synopsis: Called when a wordlist if fully constructed and available // for query. // // Effects: Sets _fUnfiltered to TRUE if there are wids in the wid table // that have been invalidated. // // History: 09-Nov-94 KyleP Created. // //---------------------------------------------------------------------------- void CWordList::Done() { Win4Assert( !_fUnfiltered ); // // How many invalid wids are there? // unsigned cUnfiltered = 0; for ( unsigned i = 1; i <= _widTable.Count(); i++ ) { if ( _widTable.IsValid(i) && !_widTable.IsFiltered(i) ) cUnfiltered++; } // // Create chunk of invalid property. // if ( cUnfiltered > 0 ) { ciDebugOut(( DEB_ITRACE, "%d unfiltered wids in wordlist %x\n", cUnfiltered, GetId() )); _fUnfiltered = TRUE; } }