|
|
//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1991 - 2000.
//
// File: PHRCUR.CXX
//
// Contents: Phrase Cursor. Computes intersection of multiple cursors
// with constraints on occurrances.
//
// Classes: CPhraseCursor
//
// History: 24-May-91 BartoszM Created.
// 19-Feb-92 AmyA Modified to be a COccCursor instead of
// a CCursor.
//
//----------------------------------------------------------------------------
#include <pch.cxx>
#pragma hdrstop
#include <misc.hxx>
#include <curstk.hxx>
#include <cudebug.hxx>
#include "phrcur.hxx"
#pragma optimize( "t", on )
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::CPhraseCursor, public
//
// Synopsis: Create a cursor that merges a number of cursors.
//
// Arguments:
// [curStack] -- cursors to be merged
// [aOcc] -- a safe array of OCCURRENCEs for the cursors
//
// Notes: All cursors must come from the same index
// all keys have the same property id
//
// History: 24-May-91 BartoszM Created
//
//----------------------------------------------------------------------------
CPhraseCursor::CPhraseCursor( COccCurStack& curStack, XArray<OCCURRENCE>& aOcc ) : COccCursor(curStack.Get(0)->MaxWorkId()), _cCur(aOcc.Count()), _aCur(curStack.AcqStack()), _cOcc(0), _maxOcc(OCC_INVALID) { _aOcc = aOcc.Acquire(); _iid = _aCur[0]->IndexId(); _pid = _aCur[0]->Pid(); _wid = _aCur[0]->WorkId(); _logWidMax = Log2(_widMax); if (FindPhrase()) _cOcc++; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::~CPhraseCursor, public
//
// Synopsis: Deletes children
//
// History: 24-May-91 BartoszM Created
//
//----------------------------------------------------------------------------
CPhraseCursor::~CPhraseCursor() { for ( unsigned i=0; i < _cCur; i++) { delete _aCur[i]; } delete _aCur; delete _aOcc; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::WorkId, public
//
// Synopsis: Get current work id.
//
// History: 24-May-91 BartoszM Created
//
//----------------------------------------------------------------------------
WORKID CPhraseCursor::WorkId() { return _wid; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::NextWorkID, public
//
// Synopsis: Move to next work id
//
// Returns: Target work id or widInvalid if no more wid's for current key
//
// History: 24-May-91 BartoszM Created
//
// Notes: Resets _cOcc
//
//----------------------------------------------------------------------------
WORKID CPhraseCursor::NextWorkId() { _cOcc = 0;
// NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
_wid = _aCur[0]->NextWorkId(); _pid = _aCur[0]->Pid(); if (FindPhrase()) _cOcc++; return _wid; }
void CPhraseCursor::RatioFinished (ULONG& denom, ULONG& num) { denom = 1; num = 0;
for (unsigned i = 0; i < _cCur; i++) { ULONG d, n; _aCur[i]->RatioFinished(d, n);
if (d == n) { // done if any cursor is done.
denom = d; num = n; Win4Assert( d > 0 ); break; } else if (d > denom) { // the one with largest denom
// is the most meaningful
denom = d; num = n; } else if (d == denom && n < num ) { num = n; // be pessimistic
} } }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::FindPhrase, private
//
// Synopsis: Find nearest phrase. First try to align wid's,
// Then align occurrences. Loop until success
// or no more wid alignments.
//
// Requires: _wid set to any of the current wid's
//
// History: 24-May-91 BartoszM Created
//
// Notes: If cursors point to phrase, no change results
//
//----------------------------------------------------------------------------
BOOL CPhraseCursor::FindPhrase () { if ( _wid == widInvalid ) { _occ = OCC_INVALID; return FALSE; }
while ( FindWidConjunction() && !FindOccConjunction() ) { _wid = _aCur[0]->NextWorkId(); _pid = _aCur[0]->Pid(); }
if ( _occ != OCC_INVALID ) return TRUE; else return FALSE; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::NextOccurrence, public
//
// Synopsis: Find phrase for next conjunction of work id's and return _occ
//
// Requires: _occ set to any of the cursors' occurrences
//
// History: 03-Mar-92 AmyA Created
//
// Notes: Increments _cOcc unless another occurrence is not found.
//
//----------------------------------------------------------------------------
OCCURRENCE CPhraseCursor::NextOccurrence() { _occ = _aCur[0]->NextOccurrence();
if (FindOccConjunction()) _cOcc++; return _occ; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::OccurrenceCount, public
//
// Synopsis: Returns correct _cOcc by looping through NextOccurrence until
// it returns OCC_INVALID.
//
// Requires: _occ set to any of the cursors' occurrences
//
// History: 28-Feb-92 AmyA Created
//
// Notes: _occ may get changed.
//
//----------------------------------------------------------------------------
ULONG CPhraseCursor::OccurrenceCount() { while (NextOccurrence() != OCC_INVALID) { // do nothing.
} return _cOcc; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::MaxOccurrence
//
// Synopsis: Returns max occurrence count of current wid
//
// History: 26-Jun-96 SitaramR Created
//
//----------------------------------------------------------------------------
OCCURRENCE CPhraseCursor::MaxOccurrence() { Win4Assert( _wid != widInvalid );
if ( _wid == widInvalid ) return OCC_INVALID; else return _maxOcc; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::HitCount, public
//
// Synopsis: Returns correct _cOcc by looping through NextOccurrence until
// it returns OCC_INVALID.
//
// Requires: _occ set to any of the cursors' occurrences
//
// History: 28-Feb-92 AmyA Created
//
// Notes: _occ may get changed.
//
//----------------------------------------------------------------------------
ULONG CPhraseCursor::HitCount() { return OccurrenceCount(); }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::FindOccConjunction, private
//
// Synopsis: Find phrase for current conjunction of work id's
//
// Requires: _occ set to any of the cursors' occurrences
//
// History: 24-May-91 BartoszM Created
//
//----------------------------------------------------------------------------
BOOL CPhraseCursor::FindOccConjunction () { if ( _occ == OCC_INVALID ) return FALSE;
unsigned i;
do { // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
for ( i = 0; i < _cCur; i++ ) { // Iterate until we have a matching occurrence
OCCURRENCE occTarget = _occ + _aOcc[i];
cuDebugOut(( DEB_ITRACE, "cursor %d, _occ %d, target %d, _pid %d\n", i, _occ, occTarget, _pid ));
OCCURRENCE occTmp = _aCur[i]->Occurrence();
while ( occTmp < occTarget ) { occTmp = _aCur[i]->NextOccurrence();
if ( OCC_INVALID == occTmp ) { _occ = OCC_INVALID; return FALSE; } }
// Keep looping until the pid matches
while ( occTmp == occTarget && _aCur[i]->Pid() < _pid ) { cuDebugOut(( DEB_ITRACE, "looking for matching pid\n" ));
occTmp = _aCur[i]->NextOccurrence();
if ( OCC_INVALID == occTmp ) { _occ = OCC_INVALID; return FALSE; } }
// if overshot, try again with new _occ
if ( occTmp > occTarget ) { cuDebugOut(( DEB_ITRACE, "overshot occ\n" ));
_occ = _aCur[i]->Occurrence() - _aOcc[i]; break; }
Win4Assert( _aCur[i]->Occurrence() == occTarget );
if ( _aCur[i]->Pid() > _pid ) { cuDebugOut(( DEB_ITRACE, "overshot pid, cur %d, _pid %d\n", _aCur[i]->Pid(), _pid ));
//
// This pid just won't do. Move cursor 0 to the next
// occurrence, use that pid, and start all over.
//
if ( _aCur[0]->NextOccurrence() == OCC_INVALID ) { _occ = OCC_INVALID; return FALSE; }
_occ = _aCur[0]->Occurrence(); _pid = _aCur[0]->Pid(); break; } } } while ( i < _cCur );
return TRUE; } //FindOccConjunction
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::FindWidConjunction, private
//
// Synopsis: Find nearest conjunction of all the same work id's
//
// Requires: _wid set to any of the current wid's
//
// History: 24-May-91 BartoszM Created
//
// Notes: If cursors are in conjunction, no change results
//
//----------------------------------------------------------------------------
BOOL CPhraseCursor::FindWidConjunction () { if ( _wid == widInvalid ) return FALSE;
BOOL change;
do { change = FALSE;
// NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
for ( unsigned i = 0; i < _cCur; i++ ) { // Seek _wid
WORKID widTmp = _aCur[i]->WorkId();
while ( widTmp < _wid ) { widTmp = _aCur[i]->NextWorkId();
if ( widInvalid == widTmp ) { _wid = widInvalid; _pid = pidInvalid; _occ = OCC_INVALID; return FALSE; } }
if ( widTmp > _wid ) // overshot!
{ _wid = widTmp; _pid = _aCur[i]->Pid();
change = TRUE; break; } } } while ( change );
_occ = _aCur[0]->Occurrence(); _maxOcc = _aCur[0]->MaxOccurrence();
return TRUE; } //FindWidConjunction
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::Hit(), public
//
// Synopsis:
//
// Arguments:
//
// History: 17-Sep-92 MikeHew Created
//
// Notes:
//
//----------------------------------------------------------------------------
LONG CPhraseCursor::Hit() { if ( _occ == OCC_INVALID ) { return rankInvalid; }
for (unsigned i=0; i<_cCur; i++) { _aCur[i]->Hit(); }
return MAX_QUERY_RANK; }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::NextHit(), public
//
// Synopsis:
//
// Arguments:
//
// History: 17-Sep-92 MikeHew Created
//
// Notes:
//
//----------------------------------------------------------------------------
LONG CPhraseCursor::NextHit() { NextOccurrence();
return Hit(); }
//+---------------------------------------------------------------------------
//
// Member: CPhraseCursor::Rank(), public
//
// Synopsis: Returns phrase rank
//
// History: 23-Jun-94 SitaramR created
//
// Notes: rank = HitCount*Log(_widMax/widCount). We make the
// assumption that the phrase appears in this and this
// document only, ie widcount = 1
//
//----------------------------------------------------------------------------
LONG CPhraseCursor::Rank() { Win4Assert( MaxOccurrence() != 0 );
LONG rank = RANK_MULTIPLIER * HitCount() * _logWidMax / MaxOccurrence(); if (rank > MAX_QUERY_RANK) rank = MAX_QUERY_RANK;
return rank; }
|