You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
622 lines
16 KiB
622 lines
16 KiB
//+---------------------------------------------------------------------------
|
|
//
|
|
// Microsoft Windows
|
|
// Copyright (C) Microsoft Corporation, 1991 - 1999.
|
|
//
|
|
// File: VECCURS.CXX
|
|
//
|
|
// Contents: Vector-or Cursor. Computes union of multiple cursors with
|
|
// weighted rank computation.
|
|
//
|
|
// Classes: CVectorCursor
|
|
//
|
|
// History: 23-Jul-92 KyleP Created
|
|
// 01-Mar-93 KyleP Use 64-bit arithmetic
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#include <pch.cxx>
|
|
#pragma hdrstop
|
|
|
|
#include <curstk.hxx>
|
|
|
|
#include "veccurs.hxx"
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::CVectorCursor, public
|
|
//
|
|
// Synopsis: Creates a vector cursor.
|
|
//
|
|
// Arguments: [cCursor] -- count of cursors
|
|
// [curStack] -- cursors to be merged
|
|
// [RankMethod] -- Indicates formula used to compute rank.
|
|
//
|
|
// History: 23-Jul-92 KyleP Created
|
|
//
|
|
// Notes: The cursors and the array will be deleted by destructor.
|
|
// The cursors have to come from one index
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
CVectorCursor::CVectorCursor( int cCursor,
|
|
CCurStack& curStack,
|
|
ULONG RankMethod )
|
|
: _cChild( cCursor ),
|
|
_RankMethod( RankMethod ),
|
|
_lMaxWeight( 0 ),
|
|
_lSumWeight( 0 ),
|
|
_ulSumSquaredWeight( 0 ),
|
|
_widRank( widInvalid ),
|
|
_iCur( -1 ),
|
|
_aChildCursor( cCursor ),
|
|
_aChildRank( cCursor ),
|
|
_aChildWeight( cCursor )
|
|
{
|
|
// Two step construction of the heap.
|
|
// We have to make sure that all cursors have a valid key
|
|
|
|
int count = 0;
|
|
|
|
//
|
|
// aCursor is a compacted version of the cursor array which
|
|
// only contains valid cursors. It is passed to the wid heap.
|
|
//
|
|
|
|
CCursor ** aCursor = curStack.AcqStack();
|
|
RtlCopyMemory( _aChildCursor.GetPointer(),
|
|
aCursor,
|
|
cCursor * sizeof( CCursor * ) );
|
|
|
|
//
|
|
// remove empty cursors
|
|
//
|
|
|
|
for ( int i = 0; i < cCursor; i++ )
|
|
{
|
|
if ( aCursor[i] == 0 || aCursor[i]->WorkId() == widInvalid )
|
|
{
|
|
//
|
|
// Invalid cursor
|
|
//
|
|
|
|
delete aCursor[i];
|
|
_aChildCursor[i] = 0;
|
|
_aChildRank[i] = 0;
|
|
_aChildWeight[i] = 0;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Valid cursor
|
|
//
|
|
|
|
_aChildWeight[i] = _aChildCursor[i]->GetWeight();
|
|
_lMaxWeight = max( _lMaxWeight, _aChildWeight[i] );
|
|
_lSumWeight += _aChildWeight[i];
|
|
_ulSumSquaredWeight += _aChildWeight[i] * _aChildWeight[i];
|
|
|
|
if ( count != i )
|
|
aCursor[count++] = aCursor[i];
|
|
else
|
|
count++;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Avoid divide-by-zero in rank computation
|
|
//
|
|
|
|
if ( _lMaxWeight == 0 )
|
|
_lMaxWeight = 1;
|
|
|
|
if ( _lSumWeight == 0 )
|
|
_lSumWeight = 1;
|
|
|
|
if ( _ulSumSquaredWeight == 0 )
|
|
_ulSumSquaredWeight = 1;
|
|
|
|
_widHeap.MakeHeap ( count, aCursor );
|
|
if ( !_widHeap.IsEmpty() )
|
|
{
|
|
_iid = _widHeap.Top()->IndexId();
|
|
_pid = _widHeap.Top()->Pid();
|
|
_RefreshRanks();
|
|
}
|
|
} //CVectorCursor
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::WorkId, public
|
|
//
|
|
// Synopsis: Get current work id.
|
|
//
|
|
// History: 23-Jul-92 KyleP Lifted from COrCursor
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
WORKID CVectorCursor::WorkId()
|
|
{
|
|
if ( _widHeap.IsEmpty() )
|
|
return widInvalid;
|
|
|
|
return _widHeap.Top()->WorkId();
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::NextWorkId, public
|
|
//
|
|
// Synopsis: Move to next work id
|
|
//
|
|
// Returns: Target work id or widInvalid if no more wid's for current key
|
|
//
|
|
// History: 23-Jul-92 KyleP Created from COrCursor.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
WORKID CVectorCursor::NextWorkId()
|
|
{
|
|
WORKID widOld = WorkId();
|
|
WORKID widNew;
|
|
|
|
if ( widOld == widInvalid )
|
|
return widInvalid;
|
|
|
|
do
|
|
{
|
|
_widHeap.Top()->NextWorkId();
|
|
_widHeap.Reheap();
|
|
widNew = _widHeap.Top()->WorkId();
|
|
}
|
|
while ( widNew == widOld );
|
|
|
|
return widNew;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::RatioFinished, public
|
|
//
|
|
// Synopsis: return approximate ratio of documents processed to total
|
|
// documents.
|
|
//
|
|
// Notes: The ratio, while approximate, should not return 1/1 until
|
|
// all cursors are exhausted.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void CVectorCursor::RatioFinished (ULONG& denom, ULONG& num)
|
|
{
|
|
WORKID widTop = WorkId();
|
|
if (widTop == widInvalid)
|
|
{
|
|
denom = num = 1;
|
|
return;
|
|
}
|
|
|
|
denom = 0;
|
|
num = 0;
|
|
|
|
unsigned cValid = 1;
|
|
|
|
for (unsigned i = 0; i < _cChild; i++)
|
|
{
|
|
ULONG d, n;
|
|
if (_aChildCursor[i])
|
|
{
|
|
_aChildCursor[i]->RatioFinished(d, n);
|
|
Win4Assert( n <= d && d > 0 );
|
|
|
|
denom += d;
|
|
num += n;
|
|
Win4Assert( d <= denom ); // overflow?
|
|
|
|
if (n == d)
|
|
{
|
|
WORKID widCurrent = _aChildCursor[i]->WorkId();
|
|
if (widCurrent != widInvalid && widCurrent != widTop)
|
|
cValid++;
|
|
}
|
|
}
|
|
}
|
|
Win4Assert ( denom > 0 );
|
|
if (num == denom && cValid > 1)
|
|
denom++;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::WorkIdCount, public
|
|
//
|
|
// Synopsis: return wid count
|
|
//
|
|
// History: 23-Jul-92 KyleP Lifted from COrCursor
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
ULONG CVectorCursor::WorkIdCount()
|
|
{
|
|
Win4Assert (( FALSE && "CVectorCursor::WorkIdCount called" ));
|
|
return(0);
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::HitCount, public
|
|
//
|
|
// Synopsis: Return occurrence count
|
|
//
|
|
// History: 23-Jul-92 KyleP Lifted from COrCursor
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
ULONG CVectorCursor::HitCount()
|
|
{
|
|
WORKID wid = _widHeap.Top()->WorkId();
|
|
|
|
if (wid == widInvalid)
|
|
return 0;
|
|
|
|
ULONG hitCnt = 0;
|
|
|
|
for (UINT i=0; i < _cChild; i++)
|
|
{
|
|
if ( _aChildCursor[i] && _aChildCursor[i]->WorkId() == wid )
|
|
hitCnt += _aChildCursor[i]->HitCount();
|
|
}
|
|
|
|
return hitCnt;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::Rank, public
|
|
//
|
|
// Returns: Rank.
|
|
//
|
|
// History: 23-Jul-92 KyleP Created
|
|
// 29-Jan-93 KyleP Fixed rounding error in Jaccard
|
|
//
|
|
// Notes: Uses algorithm specified by user from a small, precomputed
|
|
// set.
|
|
//
|
|
// See "Automatic Text Processing", G. Salton, 10.1.1 and
|
|
// 10.4.2 for a discussion of the weight formulas.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
static int const cMaxChildrenInner = ( 0xFFFFFFFF /
|
|
( MAX_QUERY_RANK * MAX_QUERY_RANK ) );
|
|
|
|
static int const cMaxChildrenDice = ( 0xFFFFFFFF /
|
|
( MAX_QUERY_RANK * MAX_QUERY_RANK * 2 ) );
|
|
|
|
static int const cMaxChildrenJaccard = ( 0xFFFFFFFF /
|
|
( MAX_QUERY_RANK * MAX_QUERY_RANK ) );
|
|
|
|
LONG CVectorCursor::Rank()
|
|
{
|
|
LONG lRank;
|
|
WORKID wid = _widHeap.Top()->WorkId();
|
|
|
|
//
|
|
// An empty heap is a minimum rank.
|
|
//
|
|
|
|
if (wid == widInvalid)
|
|
{
|
|
Win4Assert( FALSE && "Rank called on empty heap!" );
|
|
return 0;
|
|
}
|
|
|
|
//
|
|
// Get ranks for this wid.
|
|
//
|
|
|
|
_RefreshRanks();
|
|
|
|
//
|
|
// Otherwise, compute rank based on selected method.
|
|
//
|
|
|
|
switch ( _RankMethod )
|
|
{
|
|
case VECTOR_RANK_MIN:
|
|
{
|
|
// MAX[ wi * ( MaxRank - ri ) ]
|
|
// VECTOR_RANK_MIN MaxRank - ---------------------------------
|
|
// MAX[wi]
|
|
|
|
lRank = (MAX_QUERY_RANK - _aChildRank[0]) * _aChildWeight[0];
|
|
|
|
for ( UINT i = 1; i < _cChild; i++ )
|
|
{
|
|
LONG lNew = (MAX_QUERY_RANK - _aChildRank[i]) * _aChildWeight[i];
|
|
lRank = max( lRank, lNew );
|
|
}
|
|
|
|
lRank = MAX_QUERY_RANK - (lRank / _lMaxWeight);
|
|
|
|
break;
|
|
}
|
|
|
|
case VECTOR_RANK_MAX:
|
|
{
|
|
// MAX[ wi * ri ]
|
|
// VECTOR_RANK_MAX -----------------
|
|
// MAX[wi]
|
|
|
|
lRank = _aChildRank[0] * _aChildWeight[0];
|
|
|
|
for ( UINT i = 1; i < _cChild; i++ )
|
|
{
|
|
LONG lNew = _aChildRank[i] * _aChildWeight[i];
|
|
lRank = max( lRank, lNew );
|
|
}
|
|
|
|
lRank = lRank / _lMaxWeight;
|
|
|
|
break;
|
|
}
|
|
|
|
case VECTOR_RANK_INNER:
|
|
{
|
|
// n
|
|
// SUM ri * wi
|
|
// i=1
|
|
// VECTOR_RANK_INNER -------------
|
|
// n
|
|
// SUM wi
|
|
// i=1
|
|
|
|
if ( _cChild > cMaxChildrenInner )
|
|
{
|
|
THROW( CException( STATUS_INVALID_PARAMETER ) );
|
|
}
|
|
|
|
lRank = 0;
|
|
|
|
for ( UINT i = 0; i < _cChild; i++ )
|
|
{
|
|
lRank += _aChildRank[i] * _aChildWeight[i];
|
|
}
|
|
|
|
lRank /= _lSumWeight;
|
|
|
|
break;
|
|
}
|
|
|
|
case VECTOR_RANK_DICE:
|
|
{
|
|
// n
|
|
// 2 * SUM ri * wi
|
|
// i=1
|
|
// VECTOR_RANK_DICE --------------------
|
|
// n 2 n 2
|
|
// SUM ri + SUM wi
|
|
// i=1 i=1
|
|
|
|
if ( _cChild > cMaxChildrenDice )
|
|
{
|
|
THROW( CException( STATUS_INVALID_PARAMETER ) );
|
|
}
|
|
|
|
ULONG ulWeightSum = 0;
|
|
|
|
lRank = 0;
|
|
|
|
for ( UINT i = 0; i < _cChild; i++ )
|
|
{
|
|
lRank += _aChildRank[i] * _aChildWeight[i];
|
|
ulWeightSum += _aChildRank[i] * _aChildRank[i];
|
|
}
|
|
|
|
ulWeightSum += _ulSumSquaredWeight;
|
|
|
|
//
|
|
// Avoid nasty rounding errors
|
|
//
|
|
|
|
LONGLONG liTop;
|
|
|
|
liTop = UInt32x32To64( lRank, 2 * MAX_QUERY_RANK );
|
|
|
|
liTop /= ulWeightSum;
|
|
|
|
lRank = lltoul(liTop);
|
|
|
|
break;
|
|
}
|
|
|
|
case VECTOR_RANK_JACCARD:
|
|
{
|
|
// n
|
|
// SUM ri * wi
|
|
// i=1
|
|
// VECTOR_RANK_JACCARD ---------------------------------
|
|
// n 2 n 2 n
|
|
// SUM ri + SUM wi - SUM ri * wi
|
|
// i=1 i=1 i=1
|
|
|
|
if ( _cChild > cMaxChildrenJaccard )
|
|
{
|
|
THROW( CException( STATUS_INVALID_PARAMETER ) );
|
|
}
|
|
|
|
ULONG ulWeightSum = 0;
|
|
|
|
lRank = 0;
|
|
|
|
for ( UINT i = 0; i < _cChild; i++ )
|
|
{
|
|
lRank += _aChildRank[i] * _aChildWeight[i];
|
|
ulWeightSum += _aChildRank[i] * _aChildRank[i];
|
|
}
|
|
|
|
ulWeightSum += _ulSumSquaredWeight;
|
|
ulWeightSum -= lRank;
|
|
|
|
//
|
|
// Avoid nasty rounding errors
|
|
//
|
|
|
|
LONGLONG liTop;
|
|
|
|
liTop = UInt32x32To64( lRank, MAX_QUERY_RANK );
|
|
|
|
liTop /= ulWeightSum;
|
|
|
|
lRank = lltoul(liTop);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
Win4Assert( FALSE && "Invalid rank calculation method." );
|
|
lRank = 0;
|
|
}
|
|
|
|
Win4Assert( lRank <= MAX_QUERY_RANK );
|
|
|
|
return ( lRank );
|
|
}
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::GetRankVector, public
|
|
//
|
|
// Synopsis: Fetches the rank vector for the cursor.
|
|
//
|
|
// Arguments: [pulVector] -- The vector is copied here.
|
|
//
|
|
// Requires: There is enough space in [pulVector] for all the
|
|
// elements of the vector. No overflow checking is done.
|
|
//
|
|
// Returns: The count of elements copied.
|
|
//
|
|
// History: 24-Jul-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
ULONG CVectorCursor::GetRankVector( LONG * plVector, ULONG cElements )
|
|
{
|
|
//
|
|
// Get ranks for this wid.
|
|
//
|
|
|
|
_RefreshRanks();
|
|
|
|
if ( cElements >= _cChild )
|
|
RtlCopyMemory( plVector,
|
|
_aChildRank.GetPointer(),
|
|
_cChild * sizeof LONG );
|
|
|
|
return _cChild;
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::Hit, public
|
|
//
|
|
// Returns: Current hit.
|
|
//
|
|
// History: 07-Sep-92 MikeHew Created
|
|
// 12-Dec-92 KyleP Modified for Vector Cursor
|
|
//
|
|
// Notes: A hit for the vector cursor is identical to a hit
|
|
// for an or cursor -- 1 hilite at a time.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
LONG CVectorCursor::Hit()
|
|
{
|
|
//
|
|
// The first time Hit() is called, we need to position on the first hit.
|
|
//
|
|
|
|
CCursor ** aCur = _widHeap.GetVector();
|
|
|
|
if ( _iCur == -1 )
|
|
{
|
|
NextHit();
|
|
}
|
|
|
|
if ( -1 == _iCur )
|
|
return rankInvalid;
|
|
|
|
return ( aCur[_iCur]->Hit() );
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::NextHit, public
|
|
//
|
|
// Returns: Next hit.
|
|
//
|
|
// History: 07-Sep-92 MikeHew Created
|
|
// 12-Dec-92 KyleP Modified for Vector Cursor
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
LONG CVectorCursor::NextHit()
|
|
{
|
|
CCursor ** aCur = _widHeap.GetVector();
|
|
|
|
LONG rank;
|
|
|
|
if ( _iCur == -1 )
|
|
rank = rankInvalid;
|
|
else
|
|
rank = aCur[_iCur]->NextHit();
|
|
|
|
//
|
|
// If this cursor is empty (rank == rankInvalid) and
|
|
// there are more cursors available, find one that's non-empty.
|
|
//
|
|
|
|
while ( rank == rankInvalid && _iCur < _widHeap.Count() - 1 )
|
|
{
|
|
++_iCur;
|
|
rank = aCur[_iCur]->Hit();
|
|
}
|
|
|
|
return rank;
|
|
}
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Member: CVectorCursor::_RefreshRanks, private
|
|
//
|
|
// Synopsis: Fetches ranks from children with matching workids.
|
|
//
|
|
// History: 24-Jul-92 KyleP Created
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
void CVectorCursor::_RefreshRanks()
|
|
{
|
|
WORKID wid = _widHeap.Top()->WorkId();
|
|
|
|
//
|
|
// If the cache is up-to-date, do nothing.
|
|
|
|
if ( _widRank == wid )
|
|
return;
|
|
|
|
for ( UINT i = 0; i < _cChild; i++ )
|
|
{
|
|
WORKID widCurrent = ( _aChildCursor[i] ) ?
|
|
_aChildCursor[i]->WorkId() : widInvalid;
|
|
|
|
if ( widCurrent == widInvalid || widCurrent != wid )
|
|
{
|
|
_aChildRank[i] = 0;
|
|
}
|
|
else
|
|
{
|
|
_aChildRank[i] = _aChildCursor[i]->Rank();
|
|
}
|
|
}
|
|
|
|
_widRank = wid;
|
|
}
|