Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

622 lines
15 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1999.
  5. //
  6. // File: VECCURS.CXX
  7. //
  8. // Contents: Vector-or Cursor. Computes union of multiple cursors with
  9. // weighted rank computation.
  10. //
  11. // Classes: CVectorCursor
  12. //
  13. // History: 23-Jul-92 KyleP Created
  14. // 01-Mar-93 KyleP Use 64-bit arithmetic
  15. //
  16. //----------------------------------------------------------------------------
  17. #include <pch.cxx>
  18. #pragma hdrstop
  19. #include <curstk.hxx>
  20. #include "veccurs.hxx"
  21. //+---------------------------------------------------------------------------
  22. //
  23. // Member: CVectorCursor::CVectorCursor, public
  24. //
  25. // Synopsis: Creates a vector cursor.
  26. //
  27. // Arguments: [cCursor] -- count of cursors
  28. // [curStack] -- cursors to be merged
  29. // [RankMethod] -- Indicates formula used to compute rank.
  30. //
  31. // History: 23-Jul-92 KyleP Created
  32. //
  33. // Notes: The cursors and the array will be deleted by destructor.
  34. // The cursors have to come from one index
  35. //
  36. //----------------------------------------------------------------------------
  37. CVectorCursor::CVectorCursor( int cCursor,
  38. CCurStack& curStack,
  39. ULONG RankMethod )
  40. : _cChild( cCursor ),
  41. _RankMethod( RankMethod ),
  42. _lMaxWeight( 0 ),
  43. _lSumWeight( 0 ),
  44. _ulSumSquaredWeight( 0 ),
  45. _widRank( widInvalid ),
  46. _iCur( -1 ),
  47. _aChildCursor( cCursor ),
  48. _aChildRank( cCursor ),
  49. _aChildWeight( cCursor )
  50. {
  51. // Two step construction of the heap.
  52. // We have to make sure that all cursors have a valid key
  53. int count = 0;
  54. //
  55. // aCursor is a compacted version of the cursor array which
  56. // only contains valid cursors. It is passed to the wid heap.
  57. //
  58. CCursor ** aCursor = curStack.AcqStack();
  59. RtlCopyMemory( _aChildCursor.GetPointer(),
  60. aCursor,
  61. cCursor * sizeof( CCursor * ) );
  62. //
  63. // remove empty cursors
  64. //
  65. for ( int i = 0; i < cCursor; i++ )
  66. {
  67. if ( aCursor[i] == 0 || aCursor[i]->WorkId() == widInvalid )
  68. {
  69. //
  70. // Invalid cursor
  71. //
  72. delete aCursor[i];
  73. _aChildCursor[i] = 0;
  74. _aChildRank[i] = 0;
  75. _aChildWeight[i] = 0;
  76. }
  77. else
  78. {
  79. //
  80. // Valid cursor
  81. //
  82. _aChildWeight[i] = _aChildCursor[i]->GetWeight();
  83. _lMaxWeight = max( _lMaxWeight, _aChildWeight[i] );
  84. _lSumWeight += _aChildWeight[i];
  85. _ulSumSquaredWeight += _aChildWeight[i] * _aChildWeight[i];
  86. if ( count != i )
  87. aCursor[count++] = aCursor[i];
  88. else
  89. count++;
  90. }
  91. }
  92. //
  93. // Avoid divide-by-zero in rank computation
  94. //
  95. if ( _lMaxWeight == 0 )
  96. _lMaxWeight = 1;
  97. if ( _lSumWeight == 0 )
  98. _lSumWeight = 1;
  99. if ( _ulSumSquaredWeight == 0 )
  100. _ulSumSquaredWeight = 1;
  101. _widHeap.MakeHeap ( count, aCursor );
  102. if ( !_widHeap.IsEmpty() )
  103. {
  104. _iid = _widHeap.Top()->IndexId();
  105. _pid = _widHeap.Top()->Pid();
  106. _RefreshRanks();
  107. }
  108. } //CVectorCursor
  109. //+---------------------------------------------------------------------------
  110. //
  111. // Member: CVectorCursor::WorkId, public
  112. //
  113. // Synopsis: Get current work id.
  114. //
  115. // History: 23-Jul-92 KyleP Lifted from COrCursor
  116. //
  117. //----------------------------------------------------------------------------
  118. WORKID CVectorCursor::WorkId()
  119. {
  120. if ( _widHeap.IsEmpty() )
  121. return widInvalid;
  122. return _widHeap.Top()->WorkId();
  123. }
  124. //+---------------------------------------------------------------------------
  125. //
  126. // Member: CVectorCursor::NextWorkId, public
  127. //
  128. // Synopsis: Move to next work id
  129. //
  130. // Returns: Target work id or widInvalid if no more wid's for current key
  131. //
  132. // History: 23-Jul-92 KyleP Created from COrCursor.
  133. //
  134. //----------------------------------------------------------------------------
  135. WORKID CVectorCursor::NextWorkId()
  136. {
  137. WORKID widOld = WorkId();
  138. WORKID widNew;
  139. if ( widOld == widInvalid )
  140. return widInvalid;
  141. do
  142. {
  143. _widHeap.Top()->NextWorkId();
  144. _widHeap.Reheap();
  145. widNew = _widHeap.Top()->WorkId();
  146. }
  147. while ( widNew == widOld );
  148. return widNew;
  149. }
  150. //+---------------------------------------------------------------------------
  151. //
  152. // Member: CVectorCursor::RatioFinished, public
  153. //
  154. // Synopsis: return approximate ratio of documents processed to total
  155. // documents.
  156. //
  157. // Notes: The ratio, while approximate, should not return 1/1 until
  158. // all cursors are exhausted.
  159. //
  160. //----------------------------------------------------------------------------
  161. void CVectorCursor::RatioFinished (ULONG& denom, ULONG& num)
  162. {
  163. WORKID widTop = WorkId();
  164. if (widTop == widInvalid)
  165. {
  166. denom = num = 1;
  167. return;
  168. }
  169. denom = 0;
  170. num = 0;
  171. unsigned cValid = 1;
  172. for (unsigned i = 0; i < _cChild; i++)
  173. {
  174. ULONG d, n;
  175. if (_aChildCursor[i])
  176. {
  177. _aChildCursor[i]->RatioFinished(d, n);
  178. Win4Assert( n <= d && d > 0 );
  179. denom += d;
  180. num += n;
  181. Win4Assert( d <= denom ); // overflow?
  182. if (n == d)
  183. {
  184. WORKID widCurrent = _aChildCursor[i]->WorkId();
  185. if (widCurrent != widInvalid && widCurrent != widTop)
  186. cValid++;
  187. }
  188. }
  189. }
  190. Win4Assert ( denom > 0 );
  191. if (num == denom && cValid > 1)
  192. denom++;
  193. }
  194. //+---------------------------------------------------------------------------
  195. //
  196. // Member: CVectorCursor::WorkIdCount, public
  197. //
  198. // Synopsis: return wid count
  199. //
  200. // History: 23-Jul-92 KyleP Lifted from COrCursor
  201. //
  202. //----------------------------------------------------------------------------
  203. ULONG CVectorCursor::WorkIdCount()
  204. {
  205. Win4Assert (( FALSE && "CVectorCursor::WorkIdCount called" ));
  206. return(0);
  207. }
  208. //+---------------------------------------------------------------------------
  209. //
  210. // Member: CVectorCursor::HitCount, public
  211. //
  212. // Synopsis: Return occurrence count
  213. //
  214. // History: 23-Jul-92 KyleP Lifted from COrCursor
  215. //
  216. //----------------------------------------------------------------------------
  217. ULONG CVectorCursor::HitCount()
  218. {
  219. WORKID wid = _widHeap.Top()->WorkId();
  220. if (wid == widInvalid)
  221. return 0;
  222. ULONG hitCnt = 0;
  223. for (UINT i=0; i < _cChild; i++)
  224. {
  225. if ( _aChildCursor[i] && _aChildCursor[i]->WorkId() == wid )
  226. hitCnt += _aChildCursor[i]->HitCount();
  227. }
  228. return hitCnt;
  229. }
  230. //+---------------------------------------------------------------------------
  231. //
  232. // Member: CVectorCursor::Rank, public
  233. //
  234. // Returns: Rank.
  235. //
  236. // History: 23-Jul-92 KyleP Created
  237. // 29-Jan-93 KyleP Fixed rounding error in Jaccard
  238. //
  239. // Notes: Uses algorithm specified by user from a small, precomputed
  240. // set.
  241. //
  242. // See "Automatic Text Processing", G. Salton, 10.1.1 and
  243. // 10.4.2 for a discussion of the weight formulas.
  244. //
  245. //----------------------------------------------------------------------------
  246. static int const cMaxChildrenInner = ( 0xFFFFFFFF /
  247. ( MAX_QUERY_RANK * MAX_QUERY_RANK ) );
  248. static int const cMaxChildrenDice = ( 0xFFFFFFFF /
  249. ( MAX_QUERY_RANK * MAX_QUERY_RANK * 2 ) );
  250. static int const cMaxChildrenJaccard = ( 0xFFFFFFFF /
  251. ( MAX_QUERY_RANK * MAX_QUERY_RANK ) );
  252. LONG CVectorCursor::Rank()
  253. {
  254. LONG lRank;
  255. WORKID wid = _widHeap.Top()->WorkId();
  256. //
  257. // An empty heap is a minimum rank.
  258. //
  259. if (wid == widInvalid)
  260. {
  261. Win4Assert( FALSE && "Rank called on empty heap!" );
  262. return 0;
  263. }
  264. //
  265. // Get ranks for this wid.
  266. //
  267. _RefreshRanks();
  268. //
  269. // Otherwise, compute rank based on selected method.
  270. //
  271. switch ( _RankMethod )
  272. {
  273. case VECTOR_RANK_MIN:
  274. {
  275. // MAX[ wi * ( MaxRank - ri ) ]
  276. // VECTOR_RANK_MIN MaxRank - ---------------------------------
  277. // MAX[wi]
  278. lRank = (MAX_QUERY_RANK - _aChildRank[0]) * _aChildWeight[0];
  279. for ( UINT i = 1; i < _cChild; i++ )
  280. {
  281. LONG lNew = (MAX_QUERY_RANK - _aChildRank[i]) * _aChildWeight[i];
  282. lRank = max( lRank, lNew );
  283. }
  284. lRank = MAX_QUERY_RANK - (lRank / _lMaxWeight);
  285. break;
  286. }
  287. case VECTOR_RANK_MAX:
  288. {
  289. // MAX[ wi * ri ]
  290. // VECTOR_RANK_MAX -----------------
  291. // MAX[wi]
  292. lRank = _aChildRank[0] * _aChildWeight[0];
  293. for ( UINT i = 1; i < _cChild; i++ )
  294. {
  295. LONG lNew = _aChildRank[i] * _aChildWeight[i];
  296. lRank = max( lRank, lNew );
  297. }
  298. lRank = lRank / _lMaxWeight;
  299. break;
  300. }
  301. case VECTOR_RANK_INNER:
  302. {
  303. // n
  304. // SUM ri * wi
  305. // i=1
  306. // VECTOR_RANK_INNER -------------
  307. // n
  308. // SUM wi
  309. // i=1
  310. if ( _cChild > cMaxChildrenInner )
  311. {
  312. THROW( CException( STATUS_INVALID_PARAMETER ) );
  313. }
  314. lRank = 0;
  315. for ( UINT i = 0; i < _cChild; i++ )
  316. {
  317. lRank += _aChildRank[i] * _aChildWeight[i];
  318. }
  319. lRank /= _lSumWeight;
  320. break;
  321. }
  322. case VECTOR_RANK_DICE:
  323. {
  324. // n
  325. // 2 * SUM ri * wi
  326. // i=1
  327. // VECTOR_RANK_DICE --------------------
  328. // n 2 n 2
  329. // SUM ri + SUM wi
  330. // i=1 i=1
  331. if ( _cChild > cMaxChildrenDice )
  332. {
  333. THROW( CException( STATUS_INVALID_PARAMETER ) );
  334. }
  335. ULONG ulWeightSum = 0;
  336. lRank = 0;
  337. for ( UINT i = 0; i < _cChild; i++ )
  338. {
  339. lRank += _aChildRank[i] * _aChildWeight[i];
  340. ulWeightSum += _aChildRank[i] * _aChildRank[i];
  341. }
  342. ulWeightSum += _ulSumSquaredWeight;
  343. //
  344. // Avoid nasty rounding errors
  345. //
  346. LONGLONG liTop;
  347. liTop = UInt32x32To64( lRank, 2 * MAX_QUERY_RANK );
  348. liTop /= ulWeightSum;
  349. lRank = lltoul(liTop);
  350. break;
  351. }
  352. case VECTOR_RANK_JACCARD:
  353. {
  354. // n
  355. // SUM ri * wi
  356. // i=1
  357. // VECTOR_RANK_JACCARD ---------------------------------
  358. // n 2 n 2 n
  359. // SUM ri + SUM wi - SUM ri * wi
  360. // i=1 i=1 i=1
  361. if ( _cChild > cMaxChildrenJaccard )
  362. {
  363. THROW( CException( STATUS_INVALID_PARAMETER ) );
  364. }
  365. ULONG ulWeightSum = 0;
  366. lRank = 0;
  367. for ( UINT i = 0; i < _cChild; i++ )
  368. {
  369. lRank += _aChildRank[i] * _aChildWeight[i];
  370. ulWeightSum += _aChildRank[i] * _aChildRank[i];
  371. }
  372. ulWeightSum += _ulSumSquaredWeight;
  373. ulWeightSum -= lRank;
  374. //
  375. // Avoid nasty rounding errors
  376. //
  377. LONGLONG liTop;
  378. liTop = UInt32x32To64( lRank, MAX_QUERY_RANK );
  379. liTop /= ulWeightSum;
  380. lRank = lltoul(liTop);
  381. break;
  382. }
  383. default:
  384. Win4Assert( FALSE && "Invalid rank calculation method." );
  385. lRank = 0;
  386. }
  387. Win4Assert( lRank <= MAX_QUERY_RANK );
  388. return ( lRank );
  389. }
  390. //+-------------------------------------------------------------------------
  391. //
  392. // Member: CVectorCursor::GetRankVector, public
  393. //
  394. // Synopsis: Fetches the rank vector for the cursor.
  395. //
  396. // Arguments: [pulVector] -- The vector is copied here.
  397. //
  398. // Requires: There is enough space in [pulVector] for all the
  399. // elements of the vector. No overflow checking is done.
  400. //
  401. // Returns: The count of elements copied.
  402. //
  403. // History: 24-Jul-92 KyleP Created
  404. //
  405. //--------------------------------------------------------------------------
  406. ULONG CVectorCursor::GetRankVector( LONG * plVector, ULONG cElements )
  407. {
  408. //
  409. // Get ranks for this wid.
  410. //
  411. _RefreshRanks();
  412. if ( cElements >= _cChild )
  413. RtlCopyMemory( plVector,
  414. _aChildRank.GetPointer(),
  415. _cChild * sizeof LONG );
  416. return _cChild;
  417. }
  418. //+---------------------------------------------------------------------------
  419. //
  420. // Member: CVectorCursor::Hit, public
  421. //
  422. // Returns: Current hit.
  423. //
  424. // History: 07-Sep-92 MikeHew Created
  425. // 12-Dec-92 KyleP Modified for Vector Cursor
  426. //
  427. // Notes: A hit for the vector cursor is identical to a hit
  428. // for an or cursor -- 1 hilite at a time.
  429. //
  430. //----------------------------------------------------------------------------
  431. LONG CVectorCursor::Hit()
  432. {
  433. //
  434. // The first time Hit() is called, we need to position on the first hit.
  435. //
  436. CCursor ** aCur = _widHeap.GetVector();
  437. if ( _iCur == -1 )
  438. {
  439. NextHit();
  440. }
  441. if ( -1 == _iCur )
  442. return rankInvalid;
  443. return ( aCur[_iCur]->Hit() );
  444. }
  445. //+---------------------------------------------------------------------------
  446. //
  447. // Member: CVectorCursor::NextHit, public
  448. //
  449. // Returns: Next hit.
  450. //
  451. // History: 07-Sep-92 MikeHew Created
  452. // 12-Dec-92 KyleP Modified for Vector Cursor
  453. //
  454. //----------------------------------------------------------------------------
  455. LONG CVectorCursor::NextHit()
  456. {
  457. CCursor ** aCur = _widHeap.GetVector();
  458. LONG rank;
  459. if ( _iCur == -1 )
  460. rank = rankInvalid;
  461. else
  462. rank = aCur[_iCur]->NextHit();
  463. //
  464. // If this cursor is empty (rank == rankInvalid) and
  465. // there are more cursors available, find one that's non-empty.
  466. //
  467. while ( rank == rankInvalid && _iCur < _widHeap.Count() - 1 )
  468. {
  469. ++_iCur;
  470. rank = aCur[_iCur]->Hit();
  471. }
  472. return rank;
  473. }
  474. //+-------------------------------------------------------------------------
  475. //
  476. // Member: CVectorCursor::_RefreshRanks, private
  477. //
  478. // Synopsis: Fetches ranks from children with matching workids.
  479. //
  480. // History: 24-Jul-92 KyleP Created
  481. //
  482. //--------------------------------------------------------------------------
  483. void CVectorCursor::_RefreshRanks()
  484. {
  485. WORKID wid = _widHeap.Top()->WorkId();
  486. //
  487. // If the cache is up-to-date, do nothing.
  488. if ( _widRank == wid )
  489. return;
  490. for ( UINT i = 0; i < _cChild; i++ )
  491. {
  492. WORKID widCurrent = ( _aChildCursor[i] ) ?
  493. _aChildCursor[i]->WorkId() : widInvalid;
  494. if ( widCurrent == widInvalid || widCurrent != wid )
  495. {
  496. _aChildRank[i] = 0;
  497. }
  498. else
  499. {
  500. _aChildRank[i] = _aChildCursor[i]->Rank();
  501. }
  502. }
  503. _widRank = wid;
  504. }