Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

428 lines
13 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: PROXCUR.CXX
  7. //
  8. // Contents: Proximity Cursor. Computes intersection of multiple
  9. // cursors with rank computed based on word occurrance
  10. // proximity.
  11. //
  12. // Classes: CProxCursor
  13. //
  14. // History: 14-Apr-92 AmyA Created.
  15. //
  16. //----------------------------------------------------------------------------
  17. #include <pch.cxx>
  18. #pragma hdrstop
  19. #include <misc.hxx>
  20. #include <curstk.hxx>
  21. #include "proxcur.hxx"
  22. //+---------------------------------------------------------------------------
  23. //
  24. // Member: CProxCursor::CProxCursor, public
  25. //
  26. // Synopsis: Create a cursor that merges a number of cursors.
  27. //
  28. // Arguments: [cCursor] -- count of cursors
  29. // [curArray] -- pointers to cursors (aquired to an array)
  30. // [maxDist] -- the maximum distance between occurrences
  31. //
  32. // Notes: All cursors must come from the same index
  33. // and the same property
  34. //
  35. // History: 15-Apr-92 AmyA Created
  36. //
  37. //----------------------------------------------------------------------------
  38. CProxCursor::CProxCursor( unsigned cCursor,
  39. COccCurStack& curStack,
  40. LONG maxDist )
  41. : _cCur ( cCursor ),
  42. _maxDist ( maxDist ),
  43. _rank ( rankInvalid )
  44. {
  45. COccCursor *pCur = curStack.Get(0);
  46. _occHeap.MakeHeap ( _cCur, curStack.AcqStack() );
  47. Win4Assert ( pCur != 0 );
  48. _iid = pCur->IndexId();
  49. _pid = pCur->Pid();
  50. // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
  51. _wid = pCur->WorkId();
  52. _logWidMax = Log2(pCur->MaxWorkId());
  53. FindConjunction();
  54. }
  55. //+---------------------------------------------------------------------------
  56. //
  57. // Member: CProxCursor::WorkId, public
  58. //
  59. // Synopsis: Get current work id.
  60. //
  61. // History: 17-Apr-92 AmyA Created
  62. //
  63. //----------------------------------------------------------------------------
  64. WORKID CProxCursor::WorkId()
  65. {
  66. return _wid;
  67. }
  68. //+---------------------------------------------------------------------------
  69. //
  70. // Member: CProxCursor::NextWorkID, public
  71. //
  72. // Synopsis: Move to next work id
  73. //
  74. // Returns: Target work id or widInvalid if no more wid's for current key
  75. //
  76. // History: 17-Apr-92 AmyA Created
  77. //
  78. //----------------------------------------------------------------------------
  79. WORKID CProxCursor::NextWorkId()
  80. {
  81. _rank = rankInvalid;
  82. // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
  83. _wid = _occHeap.Top()->NextWorkId();
  84. FindConjunction();
  85. return _wid;
  86. }
  87. //+---------------------------------------------------------------------------
  88. //
  89. // Member: CProxCursor::HitCount, public
  90. //
  91. // Synopsis: Returns smallest HitCount of all keys in current wid.
  92. //
  93. // Requires: _wid set to any of the current wid's
  94. //
  95. // Returns: smallest occurrence count of all keys in wid.
  96. //
  97. // History: 17-Apr-92 AmyA Created
  98. //
  99. // Notes: If there is no conjunction in current wid, returns 0.
  100. //
  101. //----------------------------------------------------------------------------
  102. ULONG CProxCursor::HitCount()
  103. {
  104. if ( _rank == rankInvalid )
  105. _rank = CalculateRank(); // This needs to be called before HitCount
  106. // so taht the occurrence information in
  107. // the children cursors will be valid when
  108. // its called.
  109. COccCursor **aCur = _occHeap.GetVector();
  110. ULONG count = aCur[0]->HitCount();
  111. for ( unsigned i = 1; i < _cCur; i++ )
  112. {
  113. ULONG newcount = aCur[i]->HitCount();
  114. if ( newcount < count )
  115. count = newcount;
  116. }
  117. return count;
  118. }
  119. void CProxCursor::RatioFinished (ULONG& denom, ULONG& num)
  120. {
  121. COccCursor **vector = _occHeap.GetVector();
  122. denom = 1;
  123. num = 0;
  124. for (unsigned i=0; i < _cCur; i++)
  125. {
  126. ULONG d, n;
  127. vector[i]->RatioFinished(d, n);
  128. if (d == n)
  129. {
  130. // done if any cursor is done
  131. denom = d;
  132. num = n;
  133. Win4Assert( denom > 0 );
  134. break;
  135. }
  136. else if (d > denom)
  137. {
  138. // the one with largest denom
  139. // is the most meaningful
  140. denom = d;
  141. num = n;
  142. }
  143. else if (d == denom && n < num )
  144. {
  145. num = n; // be pessimistic
  146. }
  147. }
  148. }
  149. //+---------------------------------------------------------------------------
  150. //
  151. // Member: CProxCursor::Rank, public
  152. //
  153. // Synopsis: Checks to see if CalculateRank has been called. If not, calls
  154. // it.
  155. //
  156. // Requires: _wid set to any of the current wid's
  157. //
  158. // Returns: _rank
  159. //
  160. // History: 20-Apr-92 AmyA Created
  161. //
  162. //----------------------------------------------------------------------------
  163. LONG CProxCursor::Rank()
  164. {
  165. if ( _rank == rankInvalid )
  166. _rank = CalculateRank();
  167. return _rank;
  168. }
  169. //+---------------------------------------------------------------------------
  170. //
  171. // Member: CProxCursor::FindConjunction, private
  172. //
  173. // Synopsis: Find nearest conjunction of all the same work id's
  174. //
  175. // Requires: _wid set to any of the current wid's
  176. //
  177. // Modifies: [_wid] to point to conjunction or to widInvalid
  178. //
  179. // History: 15-Apr-92 AmyA Copied from CAndCursor.
  180. //
  181. // Notes: If cursors are in conjunction, no change results
  182. //
  183. //----------------------------------------------------------------------------
  184. void CProxCursor::FindConjunction ()
  185. {
  186. BOOL change;
  187. COccCursor **aCur = _occHeap.GetVector();
  188. do {
  189. change = FALSE;
  190. // NTRAID#DB-NTBUG9-84004-2000/07/31-dlee Indexing Service internal cursors aren't optimized to use shortest cursors first
  191. // for all cursors in turn try to align them on _wid
  192. for ( unsigned i = 0; i < _cCur; i++ )
  193. {
  194. // increment cursor to or past current _wid
  195. // or exit when exhausted
  196. while ( aCur[i]->WorkId() < _wid )
  197. {
  198. if ( aCur[i]->NextWorkId() == widInvalid )
  199. {
  200. _wid = widInvalid;
  201. return;
  202. }
  203. }
  204. // if overshot, try again with new _wid
  205. if ( aCur[i]->WorkId() > _wid )
  206. {
  207. _wid = aCur[i]->WorkId();
  208. change = TRUE;
  209. break;
  210. }
  211. }
  212. } while ( change );
  213. }
  214. //+---------------------------------------------------------------------------
  215. //
  216. // Member: CProxCursor::CalculateRank, private
  217. //
  218. // Synopsis: Assigns a rank based on the shortest distance between an
  219. // occurrence of each child.
  220. //
  221. // Requires: _wid set to any of the current wid's, at least two child
  222. // cursors.
  223. //
  224. // Returns: calculated rank
  225. //
  226. // History: 17-Apr-92 AmyA Created
  227. //
  228. // Notes: If there is no conjunction in current wid, returns 0.
  229. //
  230. // New Rank computation:
  231. // rank = cOcc*Log2(_widMax)*normalizedProximity(distMin)
  232. // where,
  233. // cOcc = hits_with_dist(distMin)
  234. // where normalizedProximity(i) = ProxDefault[i]/MAX_QUERY_RANK
  235. //
  236. //----------------------------------------------------------------------------
  237. // The idea is that we are looking for the combination of occurrences (one
  238. // for each child cursor) that is closest together for the current wid. To
  239. // do this, we only need to look at two of the child cursors from a set: the
  240. // one with the smallest occurrence and the one furthest from it. We look
  241. // at these sets in a loop, getting the next occurrence on the cursor with
  242. // the smallest occurrence, then reheaping to find the new smallest
  243. // occurrence, and then finding the occurrence furthest from it. By getting
  244. // the next occurrence on the cursor with the smallest occurrence, we are
  245. // guaranteeing that we will not skip over a set of cursors that are closer
  246. // together. If you need proof of this, draw a picture with the cursors
  247. // represented as parallel lines and the occurrences as hash marks on those
  248. // lines and step through the algorithm. Remember that we start this
  249. // function while all the child cursors are at thier smallest occurrence
  250. // within the current wid, since this function needs to be called before any
  251. // work with occurrences is done within a wid.
  252. LONG CProxCursor::CalculateRank()
  253. {
  254. Win4Assert ( _cCur >= 2 );
  255. ULONG distMin = _maxDist + 1;
  256. unsigned cOcc = 0; // #hits at distMin
  257. // loop through occurrence combinations to find the set of occurrences
  258. // for different cursors that are the closest together
  259. do
  260. {
  261. // Get smallest occurrence
  262. _occHeap.Reheap();
  263. OCCURRENCE smallOcc = _occHeap.Top()->Occurrence();
  264. COccCursor **aCur = _occHeap.GetVector();
  265. OCCURRENCE largeOcc = aCur[1]->Occurrence();
  266. // loop through all occurrences (except the first, which is the
  267. // smallest and the second) to find the occurrence furthest from the
  268. // smallest.
  269. for ( unsigned count = 2; count < _cCur; count++ )
  270. {
  271. OCCURRENCE newOcc = aCur[count]->Occurrence();
  272. if ( newOcc > largeOcc )
  273. largeOcc = newOcc;
  274. }
  275. if (largeOcc - smallOcc < PROX_MAX)
  276. {
  277. if (largeOcc - smallOcc < distMin)
  278. {
  279. distMin = largeOcc - smallOcc;
  280. cOcc = 1; // reset # hits
  281. } else if (largeOcc - smallOcc == distMin) {
  282. cOcc++;
  283. }
  284. } // else children are too far apart to affect rank
  285. // get the next occurrence on the cursor with the smallest occurrence
  286. } while ( _occHeap.Top()->NextOccurrence() != OCC_INVALID );
  287. if (distMin >= PROX_MAX) {
  288. return(0);
  289. }
  290. LONG rank = cOcc * _logWidMax * ProxDefault[distMin] / MAX_QUERY_RANK;
  291. if (rank > MAX_QUERY_RANK) {
  292. rank = MAX_QUERY_RANK;
  293. }
  294. return rank;
  295. }
  296. //+---------------------------------------------------------------------------
  297. //
  298. // Member: CProxCursor::Hit, public
  299. //
  300. // Synopsis: Hits current child (indexed by _iCur)
  301. //
  302. // History: 07-Sep-92 MikeHew Created
  303. //
  304. // Notes: Hit() should not be called more than once, except by
  305. // NextHit()
  306. //
  307. // The occurrence heap is assumed valid upon entry, and remains
  308. // valid on exit.
  309. //
  310. //----------------------------------------------------------------------------
  311. LONG CProxCursor::Hit()
  312. {
  313. Win4Assert ( _cCur >= 2 );
  314. COccCursor **aCur = _occHeap.GetVector();
  315. // Make sure none of the cursors are empty
  316. for ( unsigned i=0; i<_cCur; ++i )
  317. {
  318. if ( aCur[i]->IsEmpty() )
  319. return rankInvalid;
  320. }
  321. // Starting with smallest occurrence, loop through all cursors,
  322. // Hitting() each one and searching for the largest occurrence.
  323. OCCURRENCE largeOcc = _occHeap.Top()->Occurrence();
  324. OCCURRENCE smallOcc = largeOcc;
  325. for ( i=0; i<_cCur; ++i )
  326. {
  327. aCur[i]->Hit();
  328. OCCURRENCE thisOcc = aCur[i]->Occurrence();
  329. if ( thisOcc > largeOcc )
  330. {
  331. largeOcc = thisOcc;
  332. }
  333. // get the next occurrence on the cursor with the smallest occurrence
  334. }
  335. unsigned dist = largeOcc - smallOcc;
  336. if (dist >= PROX_MAX)
  337. return(0);
  338. return ProxDefault[dist];
  339. }
  340. //+---------------------------------------------------------------------------
  341. //
  342. // Member: CProxCursor::NextHit, public
  343. //
  344. // Synopsis: calls NextOccurrence() on smallest child, then
  345. // returns Hit() if NextOccurrence() is valid
  346. //
  347. // History: 07-Sep-92 MikeHew Created
  348. //
  349. // Notes: NextHit() should not be called after returning rankInvalid
  350. //
  351. //----------------------------------------------------------------------------
  352. LONG CProxCursor::NextHit()
  353. {
  354. if ( _occHeap.Top()->NextOccurrence() == OCC_INVALID )
  355. {
  356. return rankInvalid;
  357. }
  358. _occHeap.Reheap();
  359. return Hit();
  360. }