You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
615 lines
18 KiB
615 lines
18 KiB
//+---------------------------------------------------------------------------
|
|
//
|
|
// Microsoft Windows
|
|
// Copyright (C) Microsoft Corporation, 1991 - 1992.
|
|
//
|
|
// File: WordList.Cxx
|
|
//
|
|
// Contents: Implementation of the CWordList class
|
|
//
|
|
// Classes: CWordList
|
|
//
|
|
// History: 06-Mar-91 KyleP Created.
|
|
// 04-Apr-91 BartoszM Removed init
|
|
// 10-May-91 BartoszM Load CWLCursor cache correctly
|
|
// 13-May-91 KyleP Removed extraneous TRY ... CATCH
|
|
// 22-May-91 Brianb Changed to use own sorter
|
|
// 04-Jun-91 BartoszM Rewrote it
|
|
// 19-Jun-91 reviewed
|
|
// 18-Mar-93 AmyA Moved all entry buffer code to
|
|
// ebufhdlr.cxx
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#include <pch.cxx>
|
|
#pragma hdrstop
|
|
|
|
#include <doclist.hxx>
|
|
|
|
#include "wordlist.hxx"
|
|
#include "invcur.hxx"
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::Size, public
|
|
//
|
|
// Synopsis: Returns rough size estimate in 4k pages
|
|
//
|
|
// History: 22-May-92 BartoszM Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
unsigned CWordList::Size() const
|
|
{
|
|
unsigned size = 0;
|
|
|
|
CSortChunk * p = _chunks;
|
|
|
|
while ( 0 != p )
|
|
{
|
|
size += p->BlockCount() * ( cbInitialBlock / 4096 );
|
|
|
|
p = p->next;
|
|
}
|
|
|
|
//
|
|
// If we have 'unfiltered' documents, then add a one size unit for
|
|
// them.
|
|
//
|
|
|
|
if ( _fUnfiltered )
|
|
size += 1;
|
|
|
|
return size;
|
|
} //Size
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::CWordList, public
|
|
//
|
|
// Synopsis: Constructor for CWordList
|
|
//
|
|
// Effects: Initializes sort data structures
|
|
//
|
|
// Arguments: [id] -- Index ID of the wordlist.
|
|
// [widMax] -- maximum work id
|
|
// [cbMemory] -- suggested size of buffer
|
|
//
|
|
// History: 07-Mar-91 KyleP Created.
|
|
// 03-Apr-91 KyleP Combined with initialization
|
|
// 22-May-91 Brianb Converted to use new sort algorithm
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CWordList::CWordList( INDEXID iid, WORKID widMax )
|
|
: CIndex(iid),
|
|
_sigWordList(eSigWordList),
|
|
_chunks(0),
|
|
_count(0),
|
|
_fUnfiltered(FALSE)
|
|
{
|
|
// check sizes of data items in index
|
|
ciAssert(sizeof(PROPID) == 4);
|
|
ciAssert(sizeof(OCCURRENCE) == 4);
|
|
// Make sure the sentinel is not too big
|
|
ciAssert(MAXKEYSIZE < 256);
|
|
|
|
SetMaxWorkId ( widMax );
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::~CWordList, public
|
|
//
|
|
// Synopsis: Destructor
|
|
//
|
|
// Effects: Release all memory used by
|
|
//
|
|
// History: 06-Mar-91 KyleP Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
CWordList::~CWordList()
|
|
{
|
|
while( _chunks != NULL)
|
|
{
|
|
CSortChunk *pChunk = _chunks;
|
|
_chunks = _chunks->next;
|
|
delete pChunk;
|
|
}
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::MakeChunk, public
|
|
//
|
|
// Synopsis: Creates new sorted chunk from data in entry buffer
|
|
//
|
|
// Arguments: [pEntryBuf] -- pointer to buffer to create sorted chunk from
|
|
// [cb] -- count of bytes in buffer
|
|
//
|
|
// Expects: Sentinel entry added to pEntryBuf and that the buffer is in the
|
|
// correct format.
|
|
//
|
|
// Returns: FALSE if there was a memory exception. TRUE otherwise.
|
|
//
|
|
// History: 04-Jun-89 BartoszM Created.
|
|
// 18-Mar-93 AmyA Added entry buffer passing
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
|
|
BOOL CWordList::MakeChunk ( const BYTE * pEntryBuf, ULONG cb )
|
|
{
|
|
XPtr<CSortChunk> xChunk( new CSortChunk( _maxOccTable ) );
|
|
xChunk->Init( pEntryBuf, cb, MaxWorkId() );
|
|
CSortChunk* pChunk = xChunk.Acquire();
|
|
pChunk->next = _chunks;
|
|
_chunks = pChunk;
|
|
_count++;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::QueryCursor, public
|
|
//
|
|
// Synopsis: Create a cursor for the WordList
|
|
//
|
|
// Effects: Creates a cursor
|
|
//
|
|
// Returns: A pointer to a CKeyCursor.
|
|
//
|
|
// History: 15-Apr-91 KyleP Created.
|
|
// 22-May-92 BrianB Modified to use chunk merges
|
|
// 07-Jun-91 BartoszM Rewrote
|
|
// 24-Jan-92 AmyA Modified to use CKeyCurArray to remove
|
|
// TRY...CATCH.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
CKeyCursor * CWordList::QueryCursor()
|
|
{
|
|
if ( 0 == _count && !_fUnfiltered )
|
|
return 0;
|
|
|
|
CKeyCursor *pCur = 0;
|
|
|
|
if ( 0 == _count && _fUnfiltered )
|
|
{
|
|
pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );
|
|
}
|
|
else if ( _count == 1 && !_fUnfiltered )
|
|
{
|
|
// single chunk return chunk cursor
|
|
|
|
pCur = _chunks->QueryCursor( GetId(), _widTable, MaxWorkId() );
|
|
}
|
|
else
|
|
{
|
|
// multiple chunks create merge cursor
|
|
|
|
CKeyCurStack stkCursor;
|
|
|
|
for ( CSortChunk* pChunk = _chunks;
|
|
pChunk != 0;
|
|
pChunk = pChunk->next )
|
|
{
|
|
XPtr<CKeyCursor> xCur( pChunk->QueryCursor( GetId(),
|
|
_widTable,
|
|
MaxWorkId() ) );
|
|
|
|
if ( !xCur.IsNull() )
|
|
{
|
|
stkCursor.Push( xCur.GetPointer() );
|
|
xCur.Acquire();
|
|
}
|
|
}
|
|
|
|
if ( _fUnfiltered )
|
|
{
|
|
XPtr<CUnfilteredCursor> xCur( new CUnfilteredCursor( GetId(),
|
|
MaxWorkId(),
|
|
_widTable ) );
|
|
stkCursor.Push( xCur.GetPointer() );
|
|
xCur.Acquire();
|
|
}
|
|
|
|
pCur = stkCursor.QueryWlCursor( MaxWorkId() );
|
|
}
|
|
|
|
return pCur;
|
|
} //QueryCursor
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::QueryKeyCursor, public
|
|
//
|
|
// Synopsis: Create a cursor for the WordList
|
|
//
|
|
// Returns: A pointer to a CKeyCursor.
|
|
//
|
|
// History: 06-Oct-98 dlee Added header, author unknown
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
CKeyCursor * CWordList::QueryKeyCursor( CKey const * pkeyTarget )
|
|
{
|
|
CKeyCursor * pcur = QueryCursor();
|
|
|
|
for ( CKeyBuf const * pkey = pcur->GetKey();
|
|
pkey != 0 && pkey->Compare( *pkeyTarget ) < 0;
|
|
pkey = pcur->GetNextKey() )
|
|
continue;
|
|
|
|
if ( 0 != pkey )
|
|
{
|
|
if ( pkey->Compare( *pkeyTarget ) == 0 )
|
|
return pcur;
|
|
}
|
|
|
|
delete pcur;
|
|
return 0;
|
|
} //QueryKeyCursor
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::QueryCursor, public
|
|
//
|
|
// Synopsis: Create a cursor for the WordList
|
|
//
|
|
// Effects: Creates a cursor
|
|
//
|
|
// Arguments: [pkey] -- Key to initially position the cursor on.
|
|
// [isRange] -- TRUE for range query
|
|
// [cMaxNodes] -- Max number of nodes to create. Decremented
|
|
// on return.
|
|
//
|
|
// Returns: A pointer to a CKeyCursor.
|
|
//
|
|
// History: 15-Apr-91 KyleP Created.
|
|
// 22-May-92 BrianB Modified to use chunk merges
|
|
// 07-Jun-91 BartoszM Rewrote
|
|
// 24-Jan-92 AmyA Modified to use CKeyCurArray to remove
|
|
// TRY...CATCH.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
COccCursor * CWordList::QueryCursor( const CKey * pkey,
|
|
BOOL isRange,
|
|
ULONG & cMaxNodes )
|
|
{
|
|
Win4Assert( cMaxNodes > 0 );
|
|
|
|
if ( _count == 0 && !_fUnfiltered )
|
|
return 0;
|
|
|
|
if (isRange)
|
|
{
|
|
CKey keyEnd;
|
|
keyEnd.FillMax (*pkey);
|
|
return QueryRangeCursor ( pkey, &keyEnd, cMaxNodes );
|
|
}
|
|
|
|
if (pkey->Pid() == pidAll)
|
|
{
|
|
return QueryRangeCursor ( pkey, pkey, cMaxNodes );
|
|
}
|
|
|
|
cMaxNodes--;
|
|
|
|
if ( 0 == cMaxNodes )
|
|
{
|
|
ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryCursor\n" ));
|
|
THROW( CException( STATUS_TOO_MANY_NODES ) );
|
|
}
|
|
|
|
CKeyCursor *pCur = 0;
|
|
|
|
if ( CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) == 0 )
|
|
{
|
|
pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );
|
|
}
|
|
else
|
|
{
|
|
if(_count == 1)
|
|
{
|
|
// single chunk return chunk cursor
|
|
|
|
pCur = _chunks->QueryCursor ( GetId(), _widTable, pkey, MaxWorkId() );
|
|
}
|
|
else
|
|
{
|
|
// multiple chunks create merge cursor
|
|
|
|
CKeyCurStack stkCursor;
|
|
|
|
for ( CSortChunk* pChunk = _chunks;
|
|
pChunk != 0;
|
|
pChunk = pChunk->next)
|
|
{
|
|
XPtr<CKeyCursor> xCur( pChunk->QueryCursor( GetId(),
|
|
_widTable,
|
|
pkey,
|
|
MaxWorkId() ) );
|
|
|
|
if ( !xCur.IsNull() )
|
|
{
|
|
stkCursor.Push( xCur.GetPointer() );
|
|
xCur.Acquire();
|
|
}
|
|
}
|
|
|
|
pCur = stkCursor.QueryWlCursor( MaxWorkId() );
|
|
}
|
|
}
|
|
|
|
return pCur;
|
|
} //QueryCursor
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::QueryRangeCursor, public
|
|
//
|
|
// Synopsis: Create a range cursor for the WordList
|
|
//
|
|
// Effects: Creates a cursor
|
|
//
|
|
// Arguments: [pkey] -- Beginning of query range.
|
|
// [pkeyEnd] -- End of query range.
|
|
// [cMaxNodes] -- Max number of nodes to create. Decremented
|
|
// on return.
|
|
//
|
|
// Returns: A pointer to a CKeyCursor.
|
|
//
|
|
// History: 27-Jan-92 AmyA Created.
|
|
// 07-Feb-92 AmyA Moved some code to CreateRange().
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
COccCursor * CWordList::QueryRangeCursor( const CKey * pkey,
|
|
const CKey * pkeyEnd,
|
|
ULONG & cMaxNodes )
|
|
{
|
|
Win4Assert( cMaxNodes > 0 );
|
|
|
|
Win4Assert( pkey->Pid() == pkeyEnd->Pid() );
|
|
// Win4Assert( pkey->Pid() != pidAll ||
|
|
// pkey->CompareStr( *pkeyEnd ) == 0 );
|
|
|
|
//
|
|
// Decide if the invalid key is in the range.
|
|
//
|
|
|
|
BOOL fInvalidInRange;
|
|
|
|
if ( pkey->Pid() == pidUnfiltered )
|
|
{
|
|
cMaxNodes--;
|
|
|
|
if ( 0 == cMaxNodes )
|
|
{
|
|
ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" ));
|
|
THROW( CException( STATUS_TOO_MANY_NODES ) );
|
|
}
|
|
|
|
if ( _fUnfiltered &&
|
|
CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) >= 0 &&
|
|
CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkeyEnd ) <= 0 )
|
|
{
|
|
return new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );
|
|
}
|
|
else
|
|
{
|
|
return( 0 );
|
|
}
|
|
}
|
|
|
|
ciDebugOut(( DEB_ITRACE, "Chunk count is %d\n", _count ));
|
|
|
|
if ( _count == 0 )
|
|
return 0;
|
|
|
|
//
|
|
// Cheat a little here. Build the whole range before subtracting nodes. Also, consider
|
|
// a 'node' to be one cursor in every chunk. So only subtract off the maximum contribution
|
|
// of any single chunk.
|
|
//
|
|
|
|
COccCurStack curStk;
|
|
|
|
ULONG cMaxPerChunk = 0;
|
|
ULONG cCursor = 0;
|
|
|
|
for (CSortChunk* pChunk = _chunks;
|
|
pChunk != 0;
|
|
pChunk = pChunk->next)
|
|
{
|
|
pChunk->CreateRange(curStk, pkey, pkeyEnd, GetId(), _widTable, MaxWorkId());
|
|
|
|
Win4Assert( curStk.Count() >= cCursor );
|
|
|
|
ULONG cInChunk = curStk.Count() - cCursor;
|
|
|
|
if ( cInChunk > cMaxPerChunk )
|
|
{
|
|
cMaxPerChunk = cInChunk;
|
|
|
|
if ( cMaxPerChunk >= cMaxNodes )
|
|
{
|
|
ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" ));
|
|
cMaxNodes = 0;
|
|
|
|
THROW( CException( STATUS_TOO_MANY_NODES ) );
|
|
}
|
|
}
|
|
|
|
cCursor = curStk.Count();
|
|
}
|
|
|
|
cMaxNodes -= cMaxPerChunk;
|
|
Win4Assert( cMaxNodes > 0 );
|
|
|
|
return curStk.QuerySynCursor(MaxWorkId());
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::QuerySynCursor, public
|
|
//
|
|
// Synopsis: Create a synonym cursor for the WordList
|
|
//
|
|
// Effects: Creates a cursor
|
|
//
|
|
// Arguments: [keyStk] -- Keys to query on.
|
|
// [isRange] -- Whether the query will be a range query.
|
|
// [cMaxNodes] -- Max nodes (keys) to add
|
|
//
|
|
// Returns: A pointer to a CKeyCursor.
|
|
//
|
|
// History: 31-Jan-92 AmyA Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
COccCursor * CWordList::QuerySynCursor( CKeyArray & keyArr,
|
|
BOOL isRange,
|
|
ULONG & cMaxNodes )
|
|
{
|
|
Win4Assert( cMaxNodes > 0 );
|
|
|
|
if (_count == 0)
|
|
return(0);
|
|
|
|
//
|
|
// Cheat a little here. Build the whole range before subtracting nodes. Also, consider
|
|
// a 'node' to be one cursor in every chunk. So only subtract off the maximum contribution
|
|
// of any single chunk.
|
|
//
|
|
|
|
COccCurStack curStk;
|
|
|
|
ULONG cMaxPerChunk = 0;
|
|
ULONG cCursor = 0;
|
|
|
|
int keyCount = keyArr.Count();
|
|
|
|
ciDebugOut((DEB_ITRACE, "KeyCount is %d\n", keyCount));
|
|
|
|
for (CSortChunk* pChunk = _chunks;
|
|
pChunk != 0;
|
|
pChunk = pChunk->next)
|
|
{
|
|
for (int i = 0; i < keyCount; i++)
|
|
{
|
|
CKey& key = keyArr.Get(i);
|
|
|
|
ciDebugOut((DEB_ITRACE, "Key is %.*ws\n", key.StrLen(), key.GetStr()));
|
|
if (isRange)
|
|
{
|
|
CKey keyEnd;
|
|
keyEnd.FillMax(key);
|
|
|
|
pChunk->CreateRange(
|
|
curStk, &key, &keyEnd, GetId(), _widTable, MaxWorkId());
|
|
}
|
|
else if ( key.Pid() == pidAll )
|
|
{
|
|
pChunk->CreateRange(
|
|
curStk, &key, &key, GetId(), _widTable, MaxWorkId());
|
|
}
|
|
else
|
|
{
|
|
XPtr<CChunkCursor> xNewCur( pChunk->QueryCursor(
|
|
GetId(), _widTable, &key, MaxWorkId() ) );
|
|
if ( !xNewCur.IsNull() )
|
|
{
|
|
curStk.Push( xNewCur.GetPointer() );
|
|
xNewCur.Acquire();
|
|
}
|
|
}
|
|
}
|
|
|
|
Win4Assert( curStk.Count() >= cCursor );
|
|
|
|
ULONG cInChunk = curStk.Count() - cCursor;
|
|
|
|
if ( cInChunk > cMaxPerChunk )
|
|
{
|
|
cMaxPerChunk = cInChunk;
|
|
|
|
if ( cMaxPerChunk >= cMaxNodes )
|
|
{
|
|
ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QuerySynCursor\n" ));
|
|
cMaxNodes = 0;
|
|
|
|
THROW( CException( STATUS_TOO_MANY_NODES ) );
|
|
}
|
|
}
|
|
|
|
cCursor = curStk.Count();
|
|
}
|
|
|
|
cMaxNodes -= cMaxPerChunk;
|
|
Win4Assert( cMaxNodes > 0 );
|
|
|
|
return curStk.QuerySynCursor(MaxWorkId());
|
|
}
|
|
|
|
void CWordList::GetDocuments( CDocList & doclist )
|
|
{
|
|
unsigned cWid = 0;
|
|
|
|
for ( unsigned i = 0; i < _widTable.Count(); i++ )
|
|
{
|
|
if ( _widTable.FakeWidToWid(iDocToFakeWid(i)) != widInvalid )
|
|
{
|
|
doclist.Set( cWid,
|
|
_widTable.FakeWidToWid( iDocToFakeWid(i) ),
|
|
0, // Use usn of 0 for refiled wids
|
|
_widTable.VolumeId( iDocToFakeWid(i) ) );
|
|
cWid++;
|
|
}
|
|
}
|
|
|
|
doclist.LokSetCount( cWid ); // okay not to have resman lock here
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CWordList::Done, public
|
|
//
|
|
// Synopsis: Called when a wordlist if fully constructed and available
|
|
// for query.
|
|
//
|
|
// Effects: Sets _fUnfiltered to TRUE if there are wids in the wid table
|
|
// that have been invalidated.
|
|
//
|
|
// History: 09-Nov-94 KyleP Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CWordList::Done()
|
|
{
|
|
Win4Assert( !_fUnfiltered );
|
|
|
|
//
|
|
// How many invalid wids are there?
|
|
//
|
|
|
|
unsigned cUnfiltered = 0;
|
|
|
|
for ( unsigned i = 1; i <= _widTable.Count(); i++ )
|
|
{
|
|
if ( _widTable.IsValid(i) && !_widTable.IsFiltered(i) )
|
|
cUnfiltered++;
|
|
}
|
|
|
|
//
|
|
// Create chunk of invalid property.
|
|
//
|
|
|
|
if ( cUnfiltered > 0 )
|
|
{
|
|
ciDebugOut(( DEB_ITRACE, "%d unfiltered wids in wordlist %x\n",
|
|
cUnfiltered, GetId() ));
|
|
|
|
_fUnfiltered = TRUE;
|
|
}
|
|
}
|