Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

513 lines
13 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: PHRCUR.CXX
  7. //
  8. // Contents: Phrase Cursor. Computes intersection of multiple cursors
  9. // with constraints on occurrances.
  10. //
  11. // Classes: CPhraseCursor
  12. //
  13. // History: 24-May-91 BartoszM Created.
  14. // 19-Feb-92 AmyA Modified to be a COccCursor instead of
  15. // a CCursor.
  16. //
  17. //----------------------------------------------------------------------------
  18. #include <pch.cxx>
  19. #pragma hdrstop
  20. #include <misc.hxx>
  21. #include <curstk.hxx>
  22. #include <cudebug.hxx>
  23. #include "phrcur.hxx"
  24. #pragma optimize( "t", on )
  25. //+---------------------------------------------------------------------------
  26. //
  27. // Member: CPhraseCursor::CPhraseCursor, public
  28. //
  29. // Synopsis: Create a cursor that merges a number of cursors.
  30. //
  31. // Arguments:
  32. // [curStack] -- cursors to be merged
  33. // [aOcc] -- a safe array of OCCURRENCEs for the cursors
  34. //
  35. // Notes: All cursors must come from the same index
  36. // all keys have the same property id
  37. //
  38. // History: 24-May-91 BartoszM Created
  39. //
  40. //----------------------------------------------------------------------------
  41. CPhraseCursor::CPhraseCursor( COccCurStack& curStack, XArray<OCCURRENCE>& aOcc )
  42. : COccCursor(curStack.Get(0)->MaxWorkId()),
  43. _cCur(aOcc.Count()),
  44. _aCur(curStack.AcqStack()),
  45. _cOcc(0),
  46. _maxOcc(OCC_INVALID)
  47. {
  48. _aOcc = aOcc.Acquire();
  49. _iid = _aCur[0]->IndexId();
  50. _pid = _aCur[0]->Pid();
  51. _wid = _aCur[0]->WorkId();
  52. _logWidMax = Log2(_widMax);
  53. if (FindPhrase())
  54. _cOcc++;
  55. }
  56. //+---------------------------------------------------------------------------
  57. //
  58. // Member: CPhraseCursor::~CPhraseCursor, public
  59. //
  60. // Synopsis: Deletes children
  61. //
  62. // History: 24-May-91 BartoszM Created
  63. //
  64. //----------------------------------------------------------------------------
  65. CPhraseCursor::~CPhraseCursor()
  66. {
  67. for ( unsigned i=0; i < _cCur; i++)
  68. {
  69. delete _aCur[i];
  70. }
  71. delete _aCur;
  72. delete _aOcc;
  73. }
  74. //+---------------------------------------------------------------------------
  75. //
  76. // Member: CPhraseCursor::WorkId, public
  77. //
  78. // Synopsis: Get current work id.
  79. //
  80. // History: 24-May-91 BartoszM Created
  81. //
  82. //----------------------------------------------------------------------------
  83. WORKID CPhraseCursor::WorkId()
  84. {
  85. return _wid;
  86. }
  87. //+---------------------------------------------------------------------------
  88. //
  89. // Member: CPhraseCursor::NextWorkID, public
  90. //
  91. // Synopsis: Move to next work id
  92. //
  93. // Returns: Target work id or widInvalid if no more wid's for current key
  94. //
  95. // History: 24-May-91 BartoszM Created
  96. //
  97. // Notes: Resets _cOcc
  98. //
  99. //----------------------------------------------------------------------------
  100. WORKID CPhraseCursor::NextWorkId()
  101. {
  102. _cOcc = 0;
  103. // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
  104. _wid = _aCur[0]->NextWorkId();
  105. _pid = _aCur[0]->Pid();
  106. if (FindPhrase())
  107. _cOcc++;
  108. return _wid;
  109. }
  110. void CPhraseCursor::RatioFinished (ULONG& denom, ULONG& num)
  111. {
  112. denom = 1;
  113. num = 0;
  114. for (unsigned i = 0; i < _cCur; i++)
  115. {
  116. ULONG d, n;
  117. _aCur[i]->RatioFinished(d, n);
  118. if (d == n)
  119. {
  120. // done if any cursor is done.
  121. denom = d;
  122. num = n;
  123. Win4Assert( d > 0 );
  124. break;
  125. }
  126. else if (d > denom)
  127. {
  128. // the one with largest denom
  129. // is the most meaningful
  130. denom = d;
  131. num = n;
  132. }
  133. else if (d == denom && n < num )
  134. {
  135. num = n; // be pessimistic
  136. }
  137. }
  138. }
  139. //+---------------------------------------------------------------------------
  140. //
  141. // Member: CPhraseCursor::FindPhrase, private
  142. //
  143. // Synopsis: Find nearest phrase. First try to align wid's,
  144. // Then align occurrences. Loop until success
  145. // or no more wid alignments.
  146. //
  147. // Requires: _wid set to any of the current wid's
  148. //
  149. // History: 24-May-91 BartoszM Created
  150. //
  151. // Notes: If cursors point to phrase, no change results
  152. //
  153. //----------------------------------------------------------------------------
  154. BOOL CPhraseCursor::FindPhrase ()
  155. {
  156. if ( _wid == widInvalid )
  157. {
  158. _occ = OCC_INVALID;
  159. return FALSE;
  160. }
  161. while ( FindWidConjunction() && !FindOccConjunction() )
  162. {
  163. _wid = _aCur[0]->NextWorkId();
  164. _pid = _aCur[0]->Pid();
  165. }
  166. if ( _occ != OCC_INVALID )
  167. return TRUE;
  168. else
  169. return FALSE;
  170. }
  171. //+---------------------------------------------------------------------------
  172. //
  173. // Member: CPhraseCursor::NextOccurrence, public
  174. //
  175. // Synopsis: Find phrase for next conjunction of work id's and return _occ
  176. //
  177. // Requires: _occ set to any of the cursors' occurrences
  178. //
  179. // History: 03-Mar-92 AmyA Created
  180. //
  181. // Notes: Increments _cOcc unless another occurrence is not found.
  182. //
  183. //----------------------------------------------------------------------------
  184. OCCURRENCE CPhraseCursor::NextOccurrence()
  185. {
  186. _occ = _aCur[0]->NextOccurrence();
  187. if (FindOccConjunction())
  188. _cOcc++;
  189. return _occ;
  190. }
  191. //+---------------------------------------------------------------------------
  192. //
  193. // Member: CPhraseCursor::OccurrenceCount, public
  194. //
  195. // Synopsis: Returns correct _cOcc by looping through NextOccurrence until
  196. // it returns OCC_INVALID.
  197. //
  198. // Requires: _occ set to any of the cursors' occurrences
  199. //
  200. // History: 28-Feb-92 AmyA Created
  201. //
  202. // Notes: _occ may get changed.
  203. //
  204. //----------------------------------------------------------------------------
  205. ULONG CPhraseCursor::OccurrenceCount()
  206. {
  207. while (NextOccurrence() != OCC_INVALID)
  208. {
  209. // do nothing.
  210. }
  211. return _cOcc;
  212. }
  213. //+---------------------------------------------------------------------------
  214. //
  215. // Member: CPhraseCursor::MaxOccurrence
  216. //
  217. // Synopsis: Returns max occurrence count of current wid
  218. //
  219. // History: 26-Jun-96 SitaramR Created
  220. //
  221. //----------------------------------------------------------------------------
  222. OCCURRENCE CPhraseCursor::MaxOccurrence()
  223. {
  224. Win4Assert( _wid != widInvalid );
  225. if ( _wid == widInvalid )
  226. return OCC_INVALID;
  227. else return _maxOcc;
  228. }
  229. //+---------------------------------------------------------------------------
  230. //
  231. // Member: CPhraseCursor::HitCount, public
  232. //
  233. // Synopsis: Returns correct _cOcc by looping through NextOccurrence until
  234. // it returns OCC_INVALID.
  235. //
  236. // Requires: _occ set to any of the cursors' occurrences
  237. //
  238. // History: 28-Feb-92 AmyA Created
  239. //
  240. // Notes: _occ may get changed.
  241. //
  242. //----------------------------------------------------------------------------
  243. ULONG CPhraseCursor::HitCount()
  244. {
  245. return OccurrenceCount();
  246. }
  247. //+---------------------------------------------------------------------------
  248. //
  249. // Member: CPhraseCursor::FindOccConjunction, private
  250. //
  251. // Synopsis: Find phrase for current conjunction of work id's
  252. //
  253. // Requires: _occ set to any of the cursors' occurrences
  254. //
  255. // History: 24-May-91 BartoszM Created
  256. //
  257. //----------------------------------------------------------------------------
  258. BOOL CPhraseCursor::FindOccConjunction ()
  259. {
  260. if ( _occ == OCC_INVALID )
  261. return FALSE;
  262. unsigned i;
  263. do
  264. {
  265. // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
  266. for ( i = 0; i < _cCur; i++ )
  267. {
  268. // Iterate until we have a matching occurrence
  269. OCCURRENCE occTarget = _occ + _aOcc[i];
  270. cuDebugOut(( DEB_ITRACE, "cursor %d, _occ %d, target %d, _pid %d\n", i, _occ, occTarget, _pid ));
  271. OCCURRENCE occTmp = _aCur[i]->Occurrence();
  272. while ( occTmp < occTarget )
  273. {
  274. occTmp = _aCur[i]->NextOccurrence();
  275. if ( OCC_INVALID == occTmp )
  276. {
  277. _occ = OCC_INVALID;
  278. return FALSE;
  279. }
  280. }
  281. // Keep looping until the pid matches
  282. while ( occTmp == occTarget &&
  283. _aCur[i]->Pid() < _pid )
  284. {
  285. cuDebugOut(( DEB_ITRACE, "looking for matching pid\n" ));
  286. occTmp = _aCur[i]->NextOccurrence();
  287. if ( OCC_INVALID == occTmp )
  288. {
  289. _occ = OCC_INVALID;
  290. return FALSE;
  291. }
  292. }
  293. // if overshot, try again with new _occ
  294. if ( occTmp > occTarget )
  295. {
  296. cuDebugOut(( DEB_ITRACE, "overshot occ\n" ));
  297. _occ = _aCur[i]->Occurrence() - _aOcc[i];
  298. break;
  299. }
  300. Win4Assert( _aCur[i]->Occurrence() == occTarget );
  301. if ( _aCur[i]->Pid() > _pid )
  302. {
  303. cuDebugOut(( DEB_ITRACE, "overshot pid, cur %d, _pid %d\n", _aCur[i]->Pid(), _pid ));
  304. //
  305. // This pid just won't do. Move cursor 0 to the next
  306. // occurrence, use that pid, and start all over.
  307. //
  308. if ( _aCur[0]->NextOccurrence() == OCC_INVALID )
  309. {
  310. _occ = OCC_INVALID;
  311. return FALSE;
  312. }
  313. _occ = _aCur[0]->Occurrence();
  314. _pid = _aCur[0]->Pid();
  315. break;
  316. }
  317. }
  318. } while ( i < _cCur );
  319. return TRUE;
  320. } //FindOccConjunction
  321. //+---------------------------------------------------------------------------
  322. //
  323. // Member: CPhraseCursor::FindWidConjunction, private
  324. //
  325. // Synopsis: Find nearest conjunction of all the same work id's
  326. //
  327. // Requires: _wid set to any of the current wid's
  328. //
  329. // History: 24-May-91 BartoszM Created
  330. //
  331. // Notes: If cursors are in conjunction, no change results
  332. //
  333. //----------------------------------------------------------------------------
  334. BOOL CPhraseCursor::FindWidConjunction ()
  335. {
  336. if ( _wid == widInvalid )
  337. return FALSE;
  338. BOOL change;
  339. do
  340. {
  341. change = FALSE;
  342. // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
  343. for ( unsigned i = 0; i < _cCur; i++ )
  344. {
  345. // Seek _wid
  346. WORKID widTmp = _aCur[i]->WorkId();
  347. while ( widTmp < _wid )
  348. {
  349. widTmp = _aCur[i]->NextWorkId();
  350. if ( widInvalid == widTmp )
  351. {
  352. _wid = widInvalid;
  353. _pid = pidInvalid;
  354. _occ = OCC_INVALID;
  355. return FALSE;
  356. }
  357. }
  358. if ( widTmp > _wid ) // overshot!
  359. {
  360. _wid = widTmp;
  361. _pid = _aCur[i]->Pid();
  362. change = TRUE;
  363. break;
  364. }
  365. }
  366. } while ( change );
  367. _occ = _aCur[0]->Occurrence();
  368. _maxOcc = _aCur[0]->MaxOccurrence();
  369. return TRUE;
  370. } //FindWidConjunction
  371. //+---------------------------------------------------------------------------
  372. //
  373. // Member: CPhraseCursor::Hit(), public
  374. //
  375. // Synopsis:
  376. //
  377. // Arguments:
  378. //
  379. // History: 17-Sep-92 MikeHew Created
  380. //
  381. // Notes:
  382. //
  383. //----------------------------------------------------------------------------
  384. LONG CPhraseCursor::Hit()
  385. {
  386. if ( _occ == OCC_INVALID )
  387. {
  388. return rankInvalid;
  389. }
  390. for (unsigned i=0; i<_cCur; i++)
  391. {
  392. _aCur[i]->Hit();
  393. }
  394. return MAX_QUERY_RANK;
  395. }
  396. //+---------------------------------------------------------------------------
  397. //
  398. // Member: CPhraseCursor::NextHit(), public
  399. //
  400. // Synopsis:
  401. //
  402. // Arguments:
  403. //
  404. // History: 17-Sep-92 MikeHew Created
  405. //
  406. // Notes:
  407. //
  408. //----------------------------------------------------------------------------
  409. LONG CPhraseCursor::NextHit()
  410. {
  411. NextOccurrence();
  412. return Hit();
  413. }
  414. //+---------------------------------------------------------------------------
  415. //
  416. // Member: CPhraseCursor::Rank(), public
  417. //
  418. // Synopsis: Returns phrase rank
  419. //
  420. // History: 23-Jun-94 SitaramR created
  421. //
  422. // Notes: rank = HitCount*Log(_widMax/widCount). We make the
  423. // assumption that the phrase appears in this and this
  424. // document only, ie widcount = 1
  425. //
  426. //----------------------------------------------------------------------------
  427. LONG CPhraseCursor::Rank()
  428. {
  429. Win4Assert( MaxOccurrence() != 0 );
  430. LONG rank = RANK_MULTIPLIER * HitCount() * _logWidMax / MaxOccurrence();
  431. if (rank > MAX_QUERY_RANK)
  432. rank = MAX_QUERY_RANK;
  433. return rank;
  434. }