Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

622 lines
16 KiB

//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1991 - 1999.
//
// File: VECCURS.CXX
//
// Contents: Vector-or Cursor. Computes union of multiple cursors with
// weighted rank computation.
//
// Classes: CVectorCursor
//
// History: 23-Jul-92 KyleP Created
// 01-Mar-93 KyleP Use 64-bit arithmetic
//
//----------------------------------------------------------------------------
#include <pch.cxx>
#pragma hdrstop
#include <curstk.hxx>
#include "veccurs.hxx"
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::CVectorCursor, public
//
// Synopsis: Creates a vector cursor.
//
// Arguments: [cCursor] -- count of cursors
// [curStack] -- cursors to be merged
// [RankMethod] -- Indicates formula used to compute rank.
//
// History: 23-Jul-92 KyleP Created
//
// Notes: The cursors and the array will be deleted by destructor.
// The cursors have to come from one index
//
//----------------------------------------------------------------------------
CVectorCursor::CVectorCursor( int cCursor,
CCurStack& curStack,
ULONG RankMethod )
: _cChild( cCursor ),
_RankMethod( RankMethod ),
_lMaxWeight( 0 ),
_lSumWeight( 0 ),
_ulSumSquaredWeight( 0 ),
_widRank( widInvalid ),
_iCur( -1 ),
_aChildCursor( cCursor ),
_aChildRank( cCursor ),
_aChildWeight( cCursor )
{
// Two step construction of the heap.
// We have to make sure that all cursors have a valid key
int count = 0;
//
// aCursor is a compacted version of the cursor array which
// only contains valid cursors. It is passed to the wid heap.
//
CCursor ** aCursor = curStack.AcqStack();
RtlCopyMemory( _aChildCursor.GetPointer(),
aCursor,
cCursor * sizeof( CCursor * ) );
//
// remove empty cursors
//
for ( int i = 0; i < cCursor; i++ )
{
if ( aCursor[i] == 0 || aCursor[i]->WorkId() == widInvalid )
{
//
// Invalid cursor
//
delete aCursor[i];
_aChildCursor[i] = 0;
_aChildRank[i] = 0;
_aChildWeight[i] = 0;
}
else
{
//
// Valid cursor
//
_aChildWeight[i] = _aChildCursor[i]->GetWeight();
_lMaxWeight = max( _lMaxWeight, _aChildWeight[i] );
_lSumWeight += _aChildWeight[i];
_ulSumSquaredWeight += _aChildWeight[i] * _aChildWeight[i];
if ( count != i )
aCursor[count++] = aCursor[i];
else
count++;
}
}
//
// Avoid divide-by-zero in rank computation
//
if ( _lMaxWeight == 0 )
_lMaxWeight = 1;
if ( _lSumWeight == 0 )
_lSumWeight = 1;
if ( _ulSumSquaredWeight == 0 )
_ulSumSquaredWeight = 1;
_widHeap.MakeHeap ( count, aCursor );
if ( !_widHeap.IsEmpty() )
{
_iid = _widHeap.Top()->IndexId();
_pid = _widHeap.Top()->Pid();
_RefreshRanks();
}
} //CVectorCursor
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::WorkId, public
//
// Synopsis: Get current work id.
//
// History: 23-Jul-92 KyleP Lifted from COrCursor
//
//----------------------------------------------------------------------------
WORKID CVectorCursor::WorkId()
{
if ( _widHeap.IsEmpty() )
return widInvalid;
return _widHeap.Top()->WorkId();
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::NextWorkId, public
//
// Synopsis: Move to next work id
//
// Returns: Target work id or widInvalid if no more wid's for current key
//
// History: 23-Jul-92 KyleP Created from COrCursor.
//
//----------------------------------------------------------------------------
WORKID CVectorCursor::NextWorkId()
{
WORKID widOld = WorkId();
WORKID widNew;
if ( widOld == widInvalid )
return widInvalid;
do
{
_widHeap.Top()->NextWorkId();
_widHeap.Reheap();
widNew = _widHeap.Top()->WorkId();
}
while ( widNew == widOld );
return widNew;
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::RatioFinished, public
//
// Synopsis: return approximate ratio of documents processed to total
// documents.
//
// Notes: The ratio, while approximate, should not return 1/1 until
// all cursors are exhausted.
//
//----------------------------------------------------------------------------
void CVectorCursor::RatioFinished (ULONG& denom, ULONG& num)
{
WORKID widTop = WorkId();
if (widTop == widInvalid)
{
denom = num = 1;
return;
}
denom = 0;
num = 0;
unsigned cValid = 1;
for (unsigned i = 0; i < _cChild; i++)
{
ULONG d, n;
if (_aChildCursor[i])
{
_aChildCursor[i]->RatioFinished(d, n);
Win4Assert( n <= d && d > 0 );
denom += d;
num += n;
Win4Assert( d <= denom ); // overflow?
if (n == d)
{
WORKID widCurrent = _aChildCursor[i]->WorkId();
if (widCurrent != widInvalid && widCurrent != widTop)
cValid++;
}
}
}
Win4Assert ( denom > 0 );
if (num == denom && cValid > 1)
denom++;
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::WorkIdCount, public
//
// Synopsis: return wid count
//
// History: 23-Jul-92 KyleP Lifted from COrCursor
//
//----------------------------------------------------------------------------
ULONG CVectorCursor::WorkIdCount()
{
Win4Assert (( FALSE && "CVectorCursor::WorkIdCount called" ));
return(0);
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::HitCount, public
//
// Synopsis: Return occurrence count
//
// History: 23-Jul-92 KyleP Lifted from COrCursor
//
//----------------------------------------------------------------------------
ULONG CVectorCursor::HitCount()
{
WORKID wid = _widHeap.Top()->WorkId();
if (wid == widInvalid)
return 0;
ULONG hitCnt = 0;
for (UINT i=0; i < _cChild; i++)
{
if ( _aChildCursor[i] && _aChildCursor[i]->WorkId() == wid )
hitCnt += _aChildCursor[i]->HitCount();
}
return hitCnt;
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::Rank, public
//
// Returns: Rank.
//
// History: 23-Jul-92 KyleP Created
// 29-Jan-93 KyleP Fixed rounding error in Jaccard
//
// Notes: Uses algorithm specified by user from a small, precomputed
// set.
//
// See "Automatic Text Processing", G. Salton, 10.1.1 and
// 10.4.2 for a discussion of the weight formulas.
//
//----------------------------------------------------------------------------
static int const cMaxChildrenInner = ( 0xFFFFFFFF /
( MAX_QUERY_RANK * MAX_QUERY_RANK ) );
static int const cMaxChildrenDice = ( 0xFFFFFFFF /
( MAX_QUERY_RANK * MAX_QUERY_RANK * 2 ) );
static int const cMaxChildrenJaccard = ( 0xFFFFFFFF /
( MAX_QUERY_RANK * MAX_QUERY_RANK ) );
LONG CVectorCursor::Rank()
{
LONG lRank;
WORKID wid = _widHeap.Top()->WorkId();
//
// An empty heap is a minimum rank.
//
if (wid == widInvalid)
{
Win4Assert( FALSE && "Rank called on empty heap!" );
return 0;
}
//
// Get ranks for this wid.
//
_RefreshRanks();
//
// Otherwise, compute rank based on selected method.
//
switch ( _RankMethod )
{
case VECTOR_RANK_MIN:
{
// MAX[ wi * ( MaxRank - ri ) ]
// VECTOR_RANK_MIN MaxRank - ---------------------------------
// MAX[wi]
lRank = (MAX_QUERY_RANK - _aChildRank[0]) * _aChildWeight[0];
for ( UINT i = 1; i < _cChild; i++ )
{
LONG lNew = (MAX_QUERY_RANK - _aChildRank[i]) * _aChildWeight[i];
lRank = max( lRank, lNew );
}
lRank = MAX_QUERY_RANK - (lRank / _lMaxWeight);
break;
}
case VECTOR_RANK_MAX:
{
// MAX[ wi * ri ]
// VECTOR_RANK_MAX -----------------
// MAX[wi]
lRank = _aChildRank[0] * _aChildWeight[0];
for ( UINT i = 1; i < _cChild; i++ )
{
LONG lNew = _aChildRank[i] * _aChildWeight[i];
lRank = max( lRank, lNew );
}
lRank = lRank / _lMaxWeight;
break;
}
case VECTOR_RANK_INNER:
{
// n
// SUM ri * wi
// i=1
// VECTOR_RANK_INNER -------------
// n
// SUM wi
// i=1
if ( _cChild > cMaxChildrenInner )
{
THROW( CException( STATUS_INVALID_PARAMETER ) );
}
lRank = 0;
for ( UINT i = 0; i < _cChild; i++ )
{
lRank += _aChildRank[i] * _aChildWeight[i];
}
lRank /= _lSumWeight;
break;
}
case VECTOR_RANK_DICE:
{
// n
// 2 * SUM ri * wi
// i=1
// VECTOR_RANK_DICE --------------------
// n 2 n 2
// SUM ri + SUM wi
// i=1 i=1
if ( _cChild > cMaxChildrenDice )
{
THROW( CException( STATUS_INVALID_PARAMETER ) );
}
ULONG ulWeightSum = 0;
lRank = 0;
for ( UINT i = 0; i < _cChild; i++ )
{
lRank += _aChildRank[i] * _aChildWeight[i];
ulWeightSum += _aChildRank[i] * _aChildRank[i];
}
ulWeightSum += _ulSumSquaredWeight;
//
// Avoid nasty rounding errors
//
LONGLONG liTop;
liTop = UInt32x32To64( lRank, 2 * MAX_QUERY_RANK );
liTop /= ulWeightSum;
lRank = lltoul(liTop);
break;
}
case VECTOR_RANK_JACCARD:
{
// n
// SUM ri * wi
// i=1
// VECTOR_RANK_JACCARD ---------------------------------
// n 2 n 2 n
// SUM ri + SUM wi - SUM ri * wi
// i=1 i=1 i=1
if ( _cChild > cMaxChildrenJaccard )
{
THROW( CException( STATUS_INVALID_PARAMETER ) );
}
ULONG ulWeightSum = 0;
lRank = 0;
for ( UINT i = 0; i < _cChild; i++ )
{
lRank += _aChildRank[i] * _aChildWeight[i];
ulWeightSum += _aChildRank[i] * _aChildRank[i];
}
ulWeightSum += _ulSumSquaredWeight;
ulWeightSum -= lRank;
//
// Avoid nasty rounding errors
//
LONGLONG liTop;
liTop = UInt32x32To64( lRank, MAX_QUERY_RANK );
liTop /= ulWeightSum;
lRank = lltoul(liTop);
break;
}
default:
Win4Assert( FALSE && "Invalid rank calculation method." );
lRank = 0;
}
Win4Assert( lRank <= MAX_QUERY_RANK );
return ( lRank );
}
//+-------------------------------------------------------------------------
//
// Member: CVectorCursor::GetRankVector, public
//
// Synopsis: Fetches the rank vector for the cursor.
//
// Arguments: [pulVector] -- The vector is copied here.
//
// Requires: There is enough space in [pulVector] for all the
// elements of the vector. No overflow checking is done.
//
// Returns: The count of elements copied.
//
// History: 24-Jul-92 KyleP Created
//
//--------------------------------------------------------------------------
ULONG CVectorCursor::GetRankVector( LONG * plVector, ULONG cElements )
{
//
// Get ranks for this wid.
//
_RefreshRanks();
if ( cElements >= _cChild )
RtlCopyMemory( plVector,
_aChildRank.GetPointer(),
_cChild * sizeof LONG );
return _cChild;
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::Hit, public
//
// Returns: Current hit.
//
// History: 07-Sep-92 MikeHew Created
// 12-Dec-92 KyleP Modified for Vector Cursor
//
// Notes: A hit for the vector cursor is identical to a hit
// for an or cursor -- 1 hilite at a time.
//
//----------------------------------------------------------------------------
LONG CVectorCursor::Hit()
{
//
// The first time Hit() is called, we need to position on the first hit.
//
CCursor ** aCur = _widHeap.GetVector();
if ( _iCur == -1 )
{
NextHit();
}
if ( -1 == _iCur )
return rankInvalid;
return ( aCur[_iCur]->Hit() );
}
//+---------------------------------------------------------------------------
//
// Member: CVectorCursor::NextHit, public
//
// Returns: Next hit.
//
// History: 07-Sep-92 MikeHew Created
// 12-Dec-92 KyleP Modified for Vector Cursor
//
//----------------------------------------------------------------------------
LONG CVectorCursor::NextHit()
{
CCursor ** aCur = _widHeap.GetVector();
LONG rank;
if ( _iCur == -1 )
rank = rankInvalid;
else
rank = aCur[_iCur]->NextHit();
//
// If this cursor is empty (rank == rankInvalid) and
// there are more cursors available, find one that's non-empty.
//
while ( rank == rankInvalid && _iCur < _widHeap.Count() - 1 )
{
++_iCur;
rank = aCur[_iCur]->Hit();
}
return rank;
}
//+-------------------------------------------------------------------------
//
// Member: CVectorCursor::_RefreshRanks, private
//
// Synopsis: Fetches ranks from children with matching workids.
//
// History: 24-Jul-92 KyleP Created
//
//--------------------------------------------------------------------------
void CVectorCursor::_RefreshRanks()
{
WORKID wid = _widHeap.Top()->WorkId();
//
// If the cache is up-to-date, do nothing.
if ( _widRank == wid )
return;
for ( UINT i = 0; i < _cChild; i++ )
{
WORKID widCurrent = ( _aChildCursor[i] ) ?
_aChildCursor[i]->WorkId() : widInvalid;
if ( widCurrent == widInvalid || widCurrent != wid )
{
_aChildRank[i] = 0;
}
else
{
_aChildRank[i] = _aChildCursor[i]->Rank();
}
}
_widRank = wid;
}