windows-server-2003/inetsrv/query/cindex/wordlist.cxx


								//+---------------------------------------------------------------------------

								//

								//  Microsoft Windows

								//  Copyright (C) Microsoft Corporation, 1991 - 1992.

								//

								//  File:       WordList.Cxx

								//

								//  Contents:   Implementation of the CWordList class

								//

								//  Classes:    CWordList

								//

								//  History:    06-Mar-91       KyleP       Created.

								//              04-Apr-91       BartoszM    Removed init

								//              10-May-91       BartoszM    Load CWLCursor cache correctly

								//              13-May-91       KyleP       Removed extraneous TRY ... CATCH

								//              22-May-91       Brianb      Changed to use own sorter

								//              04-Jun-91       BartoszM    Rewrote it

								//              19-Jun-91       reviewed

								//              18-Mar-93       AmyA        Moved all entry buffer code to

								//                                          ebufhdlr.cxx

								//

								//----------------------------------------------------------------------------


								#include <pch.cxx>

								#pragma hdrstop


								#include <doclist.hxx>


								#include "wordlist.hxx"

								#include "invcur.hxx"


								//+---------------------------------------------------------------------------

								//

								// Member:     CWordList::Size, public

								//

								// Synopsis:   Returns rough size estimate in 4k pages

								//

								// History:    22-May-92    BartoszM       Created.

								//

								//----------------------------------------------------------------------------


								unsigned CWordList::Size() const

								{

								    unsigned size = 0;


								    CSortChunk * p = _chunks;


								    while ( 0 != p )

								    {

								        size += p->BlockCount() * ( cbInitialBlock / 4096 );


								        p = p->next;

								    }


								    //

								    // If we have 'unfiltered' documents, then add a one size unit for

								    // them.

								    //


								    if ( _fUnfiltered )

								        size += 1;


								    return size;

								} //Size


								//+---------------------------------------------------------------------------

								//

								// Member:     CWordList::CWordList, public

								//

								// Synopsis:   Constructor for CWordList

								//

								// Effects:    Initializes sort data structures

								//

								// Arguments:   [id] -- Index ID of the wordlist.

								//              [widMax] -- maximum work id

								//              [cbMemory] -- suggested size of buffer

								//

								// History:     07-Mar-91   KyleP       Created.

								//              03-Apr-91   KyleP       Combined with initialization

								//              22-May-91   Brianb      Converted to use new sort algorithm

								//

								//----------------------------------------------------------------------------

								CWordList::CWordList( INDEXID iid, WORKID widMax )

								        : CIndex(iid),

								          _sigWordList(eSigWordList),

								          _chunks(0),

								          _count(0),

								          _fUnfiltered(FALSE)

								{

								    // check sizes of data items in index

								    ciAssert(sizeof(PROPID) == 4);

								    ciAssert(sizeof(OCCURRENCE) == 4);

								    // Make sure the sentinel is not too big

								    ciAssert(MAXKEYSIZE < 256);


								    SetMaxWorkId ( widMax );

								}


								//+---------------------------------------------------------------------------

								//

								// Member:     CWordList::~CWordList, public

								//

								// Synopsis:   Destructor

								//

								// Effects:    Release all memory used by

								//

								// History:    06-Mar-91   KyleP       Created.

								//

								//----------------------------------------------------------------------------


								CWordList::~CWordList()

								{

								    while( _chunks != NULL)

								    {

								        CSortChunk *pChunk = _chunks;

								        _chunks = _chunks->next;

								        delete pChunk;

								    }

								}


								//+---------------------------------------------------------------------------

								//

								// Member:     CWordList::MakeChunk, public

								//

								// Synopsis:   Creates new sorted chunk from data in entry buffer

								//

								// Arguments:  [pEntryBuf] -- pointer to buffer to create sorted chunk from

								//             [cb] -- count of bytes in buffer

								//

								// Expects:    Sentinel entry added to pEntryBuf and that the buffer is in the

								//             correct format.

								//

								// Returns:    FALSE if there was a memory exception.  TRUE otherwise.

								//

								// History:    04-Jun-89    BartoszM    Created.

								//             18-Mar-93    AmyA        Added entry buffer passing

								//

								//----------------------------------------------------------------------------


								BOOL CWordList::MakeChunk ( const BYTE * pEntryBuf, ULONG cb )

								{

								    XPtr<CSortChunk> xChunk( new CSortChunk( _maxOccTable ) );

								    xChunk->Init( pEntryBuf, cb, MaxWorkId() );

								    CSortChunk* pChunk = xChunk.Acquire();

								    pChunk->next = _chunks;

								    _chunks = pChunk;

								    _count++;


								    return TRUE;

								}


								//+---------------------------------------------------------------------------

								//

								// Member:      CWordList::QueryCursor, public

								//

								// Synopsis:    Create a cursor for the WordList

								//

								// Effects:     Creates a cursor

								//

								// Returns:     A pointer to a CKeyCursor.

								//

								// History:     15-Apr-91   KyleP       Created.

								//              22-May-92   BrianB      Modified to use chunk merges

								//              07-Jun-91   BartoszM    Rewrote

								//              24-Jan-92   AmyA        Modified to use CKeyCurArray to remove

								//                                      TRY...CATCH.

								//

								//----------------------------------------------------------------------------


								CKeyCursor * CWordList::QueryCursor()

								{

								    if ( 0 == _count && !_fUnfiltered )

								        return 0;


								    CKeyCursor *pCur = 0;


								    if ( 0 == _count && _fUnfiltered )

								    {

								        pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );

								    }

								    else if ( _count == 1 && !_fUnfiltered )

								    {

								        // single chunk return chunk cursor


								        pCur = _chunks->QueryCursor( GetId(), _widTable, MaxWorkId() );

								    }

								    else

								    {

								        // multiple chunks create merge cursor


								        CKeyCurStack stkCursor;


								        for ( CSortChunk* pChunk = _chunks;

								              pChunk != 0;

								              pChunk = pChunk->next )

								        {

								            XPtr<CKeyCursor> xCur( pChunk->QueryCursor( GetId(),

								                                                        _widTable,

								                                                        MaxWorkId() ) );


								            if ( !xCur.IsNull() )

								            {

								                stkCursor.Push( xCur.GetPointer() );

								                xCur.Acquire();

								            }

								        }


								        if ( _fUnfiltered )

								        {

								            XPtr<CUnfilteredCursor> xCur( new CUnfilteredCursor( GetId(),

								                                                                 MaxWorkId(),

								                                                                 _widTable ) );

								            stkCursor.Push( xCur.GetPointer() );

								            xCur.Acquire();

								        }


								        pCur = stkCursor.QueryWlCursor( MaxWorkId() );

								    }


								    return pCur;

								} //QueryCursor


								//+---------------------------------------------------------------------------

								//

								// Member:      CWordList::QueryKeyCursor, public

								//

								// Synopsis:    Create a cursor for the WordList

								//

								// Returns:     A pointer to a CKeyCursor.

								//

								// History:     06-Oct-98   dlee        Added header, author unknown

								//

								//----------------------------------------------------------------------------


								CKeyCursor * CWordList::QueryKeyCursor( CKey const * pkeyTarget )

								{

								    CKeyCursor * pcur = QueryCursor();


								    for ( CKeyBuf const * pkey = pcur->GetKey();

								          pkey != 0 && pkey->Compare( *pkeyTarget ) < 0;

								          pkey = pcur->GetNextKey() )

								        continue;


								    if ( 0 != pkey )

								    {

								        if ( pkey->Compare( *pkeyTarget ) == 0 )

								            return pcur;

								    }


								    delete pcur;

								    return 0;

								} //QueryKeyCursor


								//+---------------------------------------------------------------------------

								//

								// Member:      CWordList::QueryCursor, public

								//

								// Synopsis:    Create a cursor for the WordList

								//

								// Effects:     Creates a cursor

								//

								// Arguments:   [pkey]      -- Key to initially position the cursor on.

								//              [isRange]   -- TRUE for range query

								//              [cMaxNodes] -- Max number of nodes to create. Decremented

								//                             on return.

								//

								// Returns:     A pointer to a CKeyCursor.

								//

								// History:     15-Apr-91   KyleP       Created.

								//              22-May-92   BrianB      Modified to use chunk merges

								//              07-Jun-91   BartoszM    Rewrote

								//              24-Jan-92   AmyA        Modified to use CKeyCurArray to remove

								//                                      TRY...CATCH.

								//

								//----------------------------------------------------------------------------

								COccCursor * CWordList::QueryCursor( const CKey * pkey,

								                                     BOOL isRange,

								                                     ULONG & cMaxNodes )

								{

								    Win4Assert( cMaxNodes > 0 );


								    if ( _count == 0 && !_fUnfiltered )

								        return 0;


								    if (isRange)

								    {

								        CKey keyEnd;

								        keyEnd.FillMax (*pkey);

								        return QueryRangeCursor ( pkey, &keyEnd, cMaxNodes );

								    }


								    if (pkey->Pid() == pidAll)

								    {

								        return QueryRangeCursor ( pkey, pkey, cMaxNodes );

								    }


								    cMaxNodes--;


								    if ( 0 == cMaxNodes )

								    {

								        ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryCursor\n" ));

								        THROW( CException( STATUS_TOO_MANY_NODES ) );

								    }


								    CKeyCursor *pCur = 0;


								    if ( CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) == 0 )

								    {

								        pCur = new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );

								    }

								    else

								    {

								        if(_count == 1)

								        {

								            // single chunk return chunk cursor


								            pCur = _chunks->QueryCursor ( GetId(), _widTable, pkey, MaxWorkId() );

								        }

								        else

								        {

								            // multiple chunks create merge cursor


								            CKeyCurStack stkCursor;


								            for ( CSortChunk* pChunk = _chunks;

								                  pChunk != 0;

								                  pChunk = pChunk->next)

								            {

								                XPtr<CKeyCursor> xCur( pChunk->QueryCursor( GetId(),

								                                                            _widTable,

								                                                            pkey,

								                                                            MaxWorkId() ) );


								                if ( !xCur.IsNull() )

								                {

								                    stkCursor.Push( xCur.GetPointer() );

								                    xCur.Acquire();

								                }

								            }


								            pCur = stkCursor.QueryWlCursor( MaxWorkId() );

								        }

								    }


								    return pCur;

								} //QueryCursor


								//+---------------------------------------------------------------------------

								//

								// Member:      CWordList::QueryRangeCursor, public

								//

								// Synopsis:    Create a range cursor for the WordList

								//

								// Effects:     Creates a cursor

								//

								// Arguments:   [pkey]      -- Beginning of query range.

								//              [pkeyEnd]   -- End of query range.

								//              [cMaxNodes] -- Max number of nodes to create. Decremented

								//                             on return.

								//

								// Returns:     A pointer to a CKeyCursor.

								//

								// History:     27-Jan-92   AmyA        Created.

								//              07-Feb-92   AmyA        Moved some code to CreateRange().

								//

								//----------------------------------------------------------------------------

								COccCursor * CWordList::QueryRangeCursor( const CKey * pkey,

								                                          const CKey * pkeyEnd,

								                                          ULONG & cMaxNodes )

								{

								    Win4Assert( cMaxNodes > 0 );


								    Win4Assert( pkey->Pid() == pkeyEnd->Pid() );

								//    Win4Assert( pkey->Pid() != pidAll ||

								//                pkey->CompareStr( *pkeyEnd ) == 0 );


								    //

								    // Decide if the invalid key is in the range.

								    //


								    BOOL fInvalidInRange;


								    if ( pkey->Pid() == pidUnfiltered )

								    {

								        cMaxNodes--;


								        if ( 0 == cMaxNodes )

								        {

								            ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" ));

								            THROW( CException( STATUS_TOO_MANY_NODES ) );

								        }


								        if ( _fUnfiltered &&

								             CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkey ) >= 0 &&

								             CUnfilteredCursor::CompareAgainstUnfilteredKey( *pkeyEnd ) <= 0 )

								        {

								            return new CUnfilteredCursor( GetId(), MaxWorkId(), _widTable );

								        }

								        else

								        {

								            return( 0 );

								        }

								    }


								    ciDebugOut(( DEB_ITRACE, "Chunk count is %d\n", _count ));


								    if ( _count == 0 )

								        return 0;


								    //

								    // Cheat a little here. Build the whole range before subtracting nodes.  Also, consider

								    // a 'node' to be one cursor in every chunk.  So only subtract off the maximum contribution

								    // of any single chunk.

								    //


								    COccCurStack curStk;


								    ULONG cMaxPerChunk = 0;

								    ULONG cCursor = 0;


								    for (CSortChunk* pChunk = _chunks;

								         pChunk != 0;

								         pChunk = pChunk->next)

								    {

								        pChunk->CreateRange(curStk, pkey, pkeyEnd, GetId(), _widTable, MaxWorkId());


								        Win4Assert( curStk.Count() >= cCursor );


								        ULONG cInChunk = curStk.Count() - cCursor;


								        if ( cInChunk > cMaxPerChunk )

								        {

								            cMaxPerChunk = cInChunk;


								            if ( cMaxPerChunk >= cMaxNodes )

								            {

								                ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QueryRangeCursor\n" ));

								                cMaxNodes = 0;


								                THROW( CException( STATUS_TOO_MANY_NODES ) );

								            }

								        }


								        cCursor = curStk.Count();

								    }


								    cMaxNodes -= cMaxPerChunk;

								    Win4Assert( cMaxNodes > 0 );


								    return curStk.QuerySynCursor(MaxWorkId());

								}


								//+---------------------------------------------------------------------------

								//

								// Member:      CWordList::QuerySynCursor, public

								//

								// Synopsis:    Create a synonym cursor for the WordList

								//

								// Effects:     Creates a cursor

								//

								// Arguments:   [keyStk]    -- Keys to query on.

								//              [isRange]   -- Whether the query will be a range query.

								//              [cMaxNodes] -- Max nodes (keys) to add

								//

								// Returns:     A pointer to a CKeyCursor.

								//

								// History:     31-Jan-92   AmyA        Created.

								//

								//----------------------------------------------------------------------------

								COccCursor * CWordList::QuerySynCursor( CKeyArray & keyArr,

								                                        BOOL isRange,

								                                        ULONG & cMaxNodes )

								{

								    Win4Assert( cMaxNodes > 0 );


								    if (_count == 0)

								        return(0);


								    //

								    // Cheat a little here. Build the whole range before subtracting nodes.  Also, consider

								    // a 'node' to be one cursor in every chunk.  So only subtract off the maximum contribution

								    // of any single chunk.

								    //


								    COccCurStack curStk;


								    ULONG cMaxPerChunk = 0;

								    ULONG cCursor = 0;


								    int keyCount = keyArr.Count();


								    ciDebugOut((DEB_ITRACE, "KeyCount is %d\n", keyCount));


								    for (CSortChunk* pChunk = _chunks;

								            pChunk != 0;

								            pChunk = pChunk->next)

								    {

								        for (int i = 0; i < keyCount; i++)

								        {

								            CKey& key = keyArr.Get(i);


								            ciDebugOut((DEB_ITRACE, "Key is %.*ws\n", key.StrLen(), key.GetStr()));

								            if (isRange)

								            {

								                CKey keyEnd;

								                keyEnd.FillMax(key);


								                pChunk->CreateRange(

								                    curStk, &key, &keyEnd, GetId(), _widTable, MaxWorkId());

								            }

								            else if ( key.Pid() == pidAll )

								            {

								                pChunk->CreateRange(

								                    curStk, &key, &key, GetId(), _widTable, MaxWorkId());

								            }

								            else

								            {

								                XPtr<CChunkCursor> xNewCur( pChunk->QueryCursor(

								                    GetId(), _widTable, &key, MaxWorkId() ) );

								                if ( !xNewCur.IsNull() )

								                {

								                    curStk.Push( xNewCur.GetPointer() );

								                    xNewCur.Acquire();

								                }

								            }

								        }


								        Win4Assert( curStk.Count() >= cCursor );


								        ULONG cInChunk = curStk.Count() - cCursor;


								        if ( cInChunk > cMaxPerChunk )

								        {

								            cMaxPerChunk = cInChunk;


								            if ( cMaxPerChunk >= cMaxNodes )

								            {

								                ciDebugOut(( DEB_WARN, "Exceeded node limit in: CWordList::QuerySynCursor\n" ));

								                cMaxNodes = 0;


								                THROW( CException( STATUS_TOO_MANY_NODES ) );

								            }

								        }


								        cCursor = curStk.Count();

								    }


								    cMaxNodes -= cMaxPerChunk;

								    Win4Assert( cMaxNodes > 0 );


								    return curStk.QuerySynCursor(MaxWorkId());

								}


								void CWordList::GetDocuments( CDocList & doclist )

								{

								    unsigned cWid = 0;


								    for ( unsigned i = 0; i < _widTable.Count(); i++ )

								    {

								        if ( _widTable.FakeWidToWid(iDocToFakeWid(i)) != widInvalid )

								        {

								            doclist.Set( cWid,

								                         _widTable.FakeWidToWid( iDocToFakeWid(i) ),

								                         0,      // Use usn of 0 for refiled wids

								                         _widTable.VolumeId( iDocToFakeWid(i) ) );

								            cWid++;

								        }

								    }


								    doclist.LokSetCount( cWid );    //  okay not to have resman lock here

								}


								//+---------------------------------------------------------------------------

								//

								// Member:      CWordList::Done, public

								//

								// Synopsis:    Called when a wordlist if fully constructed and available

								//              for query.

								//

								// Effects:     Sets _fUnfiltered to TRUE if there are wids in the wid table

								//              that have been invalidated.

								//

								// History:     09-Nov-94    KyleP      Created.

								//

								//----------------------------------------------------------------------------


								void CWordList::Done()

								{

								    Win4Assert( !_fUnfiltered );


								    //

								    // How many invalid wids are there?

								    //


								    unsigned cUnfiltered = 0;


								    for ( unsigned i = 1; i <= _widTable.Count(); i++ )

								    {

								        if ( _widTable.IsValid(i) && !_widTable.IsFiltered(i) )

								            cUnfiltered++;

								    }


								    //

								    // Create chunk of invalid property.

								    //


								    if ( cUnfiltered > 0 )

								    {

								        ciDebugOut(( DEB_ITRACE, "%d unfiltered wids in wordlist %x\n",

								                     cUnfiltered, GetId() ));


								        _fUnfiltered = TRUE;

								    }

								}