Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

432 lines
11 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1992.
  5. //
  6. // File: RWEX.CXX
  7. //
  8. // Contents: Relevant word extraction
  9. //
  10. // Classes: CRelevantWord, CRWStore, CRWHeap
  11. //
  12. // History: 25-Apr-94 dlee Created
  13. //
  14. //----------------------------------------------------------------------------
  15. #include <pch.cxx>
  16. #pragma hdrstop
  17. #include <rwex.hxx>
  18. //+---------------------------------------------------------------------------
  19. //
  20. // Member: CRWStore::CRWStore, public
  21. //
  22. // Synopsis: Constructor for relevant word store
  23. //
  24. // Arguments: [pwList] -- array of work ids to operate over, must be in
  25. // increasing order
  26. // [cWids] -- # of items in pwList
  27. // [cIds] -- # of relevant word key ids per wid to store
  28. //
  29. // History: 25-Apr-94 dlee Created.
  30. //
  31. //----------------------------------------------------------------------------
  32. CRWStore::CRWStore(WORKID *pwList,ULONG cWids,ULONG cIds)
  33. : _cWids(cWids), _cIds(cIds), _ulSearchLeftOff(0)
  34. {
  35. _cbRow = _RowSize(cIds);
  36. for (ULONG x = 0; x < _cWids; x++)
  37. {
  38. SRWHeader *p = GetRow(x);
  39. p->wid = pwList[x];
  40. p->cItems = 0;
  41. }
  42. } //CRWStore
  43. //+---------------------------------------------------------------------------
  44. //
  45. // Member: CRWStore::new, public
  46. //
  47. // Synopsis: Private new operator
  48. //
  49. // Arguments: [st] -- predefined param
  50. // [cWids] -- # of rows for the array
  51. // [cIds] -- # of relevant word key ids per wid to store
  52. //
  53. // History: 25-Apr-94 dlee Created.
  54. //
  55. //----------------------------------------------------------------------------
  56. void * CRWStore::operator new(size_t st,ULONG cWids,ULONG cIds)
  57. {
  58. return (void *) ::new char[_ObjectSize(cWids,cIds)];
  59. } //new
  60. #if _MSC_VER >= 1200
  61. void CRWStore::operator delete(void * p, ULONG cWids, ULONG cIds)
  62. {
  63. ::delete (p);
  64. }
  65. #endif
  66. //+---------------------------------------------------------------------------
  67. //
  68. // Member: CRWStore::Insert, public
  69. //
  70. // Synopsis: Inserts a keyid in a wid's heap if the rank is sufficient.
  71. //
  72. // Arguments: [wid] -- wid on whose heap is operated
  73. // [keyid] -- keyid to add
  74. // [lRank] -- rank of the keyid in the wid
  75. //
  76. // History: 17-Jun-94 dlee Created.
  77. //
  78. //----------------------------------------------------------------------------
  79. void CRWStore::Insert(WORKID wid,KEYID keyid, LONG lRank)
  80. {
  81. SRWHeader *ph = _Find(wid,GetRow(_ulSearchLeftOff),
  82. _cWids - _ulSearchLeftOff);
  83. Win4Assert(ph != 0);
  84. //
  85. // This heap object is merely an accessor to a heap whose storage
  86. // has already been allocated. The heap object operates on the heap
  87. // but does not own the heap's memory.
  88. //
  89. CRWHeap heap(ph,_cIds);
  90. heap.Insert(keyid,lRank);
  91. //
  92. // Start the next binary search one after where the last search
  93. // left off, since we know the wids will come through in sorted order.
  94. //
  95. _ulSearchLeftOff = _HeaderToRow(ph) + 1;
  96. } //Insert
  97. //+---------------------------------------------------------------------------
  98. //
  99. // Member: CRWStore::_Find, private
  100. //
  101. // Synopsis: Finds a wid's heap in the array of heaps given a wid
  102. //
  103. // Arguments: [wid] -- wid of heap to find
  104. // [base] -- pointer to first element in heap array
  105. // [cRows] -- # of rows in the array
  106. //
  107. // Returns: pointer to the head of the heap for the wid or 0 if not found.
  108. //
  109. // History: 25-Apr-94 dlee Created.
  110. //
  111. //----------------------------------------------------------------------------
  112. SRWHeader * CRWStore::_Find(WORKID wid,SRWHeader *pBase,ULONG cRows)
  113. {
  114. Win4Assert(cRows != 0);
  115. SRWHeader *lo = pBase;
  116. SRWHeader *hi = lo->Forward(cRows - 1,_cbRow);
  117. SRWHeader *mid;
  118. ULONG cHalf;
  119. while (lo <= hi)
  120. {
  121. if (cHalf = cRows / 2)
  122. {
  123. mid = lo->Forward((cRows & 1) ? cHalf : (cHalf - 1),_cbRow);
  124. if (wid == mid->wid)
  125. {
  126. return mid;
  127. }
  128. else if (wid < mid->wid)
  129. {
  130. hi = mid->Backward(1,_cbRow);
  131. cRows = (cRows & 1) ? cHalf : (cHalf - 1);
  132. }
  133. else
  134. {
  135. lo = mid->Forward(1,_cbRow);
  136. cRows = cHalf;
  137. }
  138. }
  139. else if (cRows != 0)
  140. {
  141. if (wid == lo->wid)
  142. return lo;
  143. else
  144. return 0;
  145. }
  146. else
  147. {
  148. break;
  149. }
  150. }
  151. return 0;
  152. } //_Find
  153. //+---------------------------------------------------------------------------
  154. //
  155. // Member: CRWHeap::DeQueue, public
  156. //
  157. // Synopsis: Removes the lowest-ranking keyid in the heap for a wid
  158. //
  159. // Returns: keyid of the lowest-ranking member of the heap
  160. //
  161. // History: 25-Apr-94 dlee Created.
  162. //
  163. //----------------------------------------------------------------------------
  164. KEYID CRWHeap::DeQueue()
  165. {
  166. ULONG ulL,ulR,ulMax;
  167. KEYID kRet = _ph->aItems[0].kKeyId;
  168. _ph->cItems--;
  169. //
  170. // Take out the bottom-right most leaf and bubble it down from
  171. // the top of the tree until it is less than its parent.
  172. //
  173. SRWItem iFix = _ph->aItems[_ph->cItems];
  174. ULONG ulPos = 0;
  175. while (!_IsLeaf(ulPos))
  176. {
  177. ulL = _Left(ulPos);
  178. ulR = _Right(ulPos);
  179. if (!_IsValid(ulR))
  180. ulMax = ulL;
  181. else
  182. {
  183. if (_ph->aItems[ulL].lRank < _ph->aItems[ulR].lRank)
  184. ulMax = ulL;
  185. else
  186. ulMax = ulR;
  187. }
  188. if (_ph->aItems[ulMax].lRank < iFix.lRank)
  189. {
  190. _ph->aItems[ulPos] = _ph->aItems[ulMax];
  191. ulPos = ulMax;
  192. }
  193. else
  194. {
  195. break;
  196. }
  197. }
  198. _ph->aItems[ulPos] = iFix;
  199. return kRet;
  200. } //DeQueue
  201. //+---------------------------------------------------------------------------
  202. //
  203. // Member: CRWStore::Insert, public
  204. //
  205. // Synopsis: Inserts an keyid in the rw heap for a wid if the keyid's rank
  206. // is greater than the lowest ranking keyid in the heap or if
  207. // the heap is not yet full.
  208. //
  209. // Arguments: [keyid] -- item to insert
  210. // [lRank] -- rank of the keyid
  211. //
  212. // History: 25-Apr-94 dlee Created.
  213. //
  214. //----------------------------------------------------------------------------
  215. void CRWHeap::Insert(KEYID keyid, LONG lRank)
  216. {
  217. if ((_ph->cItems < _ulMaxIds) ||
  218. (_ph->aItems[0].lRank < lRank))
  219. {
  220. //
  221. // Pop off the top element if the list is full
  222. //
  223. if (_ph->cItems == _ulMaxIds)
  224. DeQueue();
  225. //
  226. // Insert element as the rightmost bottom level leaf in the heap
  227. //
  228. ULONG ulPos = _ph->cItems++;
  229. _ph->aItems[ulPos].kKeyId = keyid;
  230. _ph->aItems[ulPos].lRank = lRank;
  231. //
  232. // bubble the element up until it fits correctly in the tree
  233. //
  234. while (ulPos)
  235. {
  236. ULONG ulParent = _Parent(ulPos);
  237. if (_ph->aItems[ulPos].lRank < _ph->aItems[ulParent].lRank)
  238. {
  239. //
  240. // swap the elements
  241. //
  242. SRWItem t = _ph->aItems[ulPos];
  243. _ph->aItems[ulPos] = _ph->aItems[ulParent];
  244. _ph->aItems[ulParent] = t;
  245. ulPos = ulParent;
  246. }
  247. else
  248. {
  249. break;
  250. }
  251. }
  252. }
  253. } //Insert
  254. //+---------------------------------------------------------------------------
  255. //
  256. // Member: CRelevantWord::CRelevantWord, public
  257. //
  258. // Synopsis: Constructor for the relevant word object
  259. //
  260. // Arguments: [pwid] -- array of wids in sorted order to track
  261. // [cWidsUsed] -- # of wids in the array
  262. // [cRW] -- # of relevant words per wid to track
  263. //
  264. // History: 25-Apr-94 dlee Created.
  265. //
  266. //----------------------------------------------------------------------------
  267. CRelevantWord::CRelevantWord(WORKID *pwid,ULONG cWidsUsed,ULONG cRW)
  268. : _pWidItem(0), _pstore(0), _cWidsAdded(0)
  269. {
  270. TRY
  271. {
  272. _pstore = new(cWidsUsed,cRW) CRWStore(pwid,cWidsUsed,cRW);
  273. _pWidItem = new SRWWidItem[cWidsUsed];
  274. }
  275. CATCH ( CException, e )
  276. {
  277. delete _pWidItem;
  278. delete _pstore;
  279. RETHROW();
  280. }
  281. END_CATCH
  282. } //CRelevantWord
  283. //+---------------------------------------------------------------------------
  284. //
  285. // Member: CRelevantWord::~CRelevantWord
  286. //
  287. // Synopsis: Destructor for the relevant word object
  288. //
  289. // History: 25-Apr-94 dlee Created.
  290. //
  291. //----------------------------------------------------------------------------
  292. CRelevantWord::~CRelevantWord()
  293. {
  294. delete _pWidItem;
  295. delete _pstore; // may be 0
  296. } //~CRelevantWord
  297. //+---------------------------------------------------------------------------
  298. //
  299. // Member: CRelevantWord::DoneWithKey, public
  300. //
  301. // Synopsis: Computes rank for each wid occurance of a keyid and adjusts
  302. // the heaps appropriately
  303. //
  304. // Arguments: [keyid] -- keyid on which to calculate
  305. // [maxWid] -- # docs on disk
  306. // [cWids] -- # docs with key on disk
  307. //
  308. // History: 25-Apr-94 dlee Created.
  309. //
  310. //----------------------------------------------------------------------------
  311. void CRelevantWord::DoneWithKey(KEYID keyid,ULONG maxWid,ULONG cWids)
  312. {
  313. if (0 != cWids)
  314. {
  315. _SetWidInfo(maxWid,cWids);
  316. for (ULONG x = 0; x < _cWidsAdded; x++)
  317. {
  318. Win4Assert(_pWidItem[x].wid != 0);
  319. _pstore->Insert(_pWidItem[x].wid,keyid,_Rank(_pWidItem[x].cOcc));
  320. }
  321. _pstore->DoneWithKey();
  322. _cWidsAdded = 0;
  323. }
  324. else
  325. {
  326. Win4Assert(0 == _cWidsAdded);
  327. }
  328. } //DoneWithKey
  329. //+-------------------------------------------------------------------------
  330. //
  331. // Function: _SortULongArray, private
  332. //
  333. // Synopsis: Sorts an array of unsigned longs from low to high
  334. //
  335. // Arguments: [pulItems] -- list of IDs to be sorted
  336. // [cItems] -- # of root words to be sorted
  337. //
  338. // Returns: void
  339. //
  340. // Algorithm: Heapsort, not quick sort. Give up 20% speed to save kernel
  341. // stack and prevent against n*n performance on sorted lists.
  342. //
  343. // History: 14-Mar-94 Dlee Created
  344. //
  345. //--------------------------------------------------------------------------
  346. #define _CompUL(x,y) ((*(x)) > (*(y)) ? 1 : (*(x)) == (*(y)) ? 0 : -1)
  347. #define _SwapUL(x,y) { ULONG _t = *(x); *(x) = *(y); *(y) = _t; }
  348. inline static void _AddRootUL(ULONG x,ULONG n,ULONG *p)
  349. {
  350. ULONG _x = x;
  351. ULONG _j = (2 * (_x + 1)) - 1;
  352. while (_j < n)
  353. {
  354. if (((_j + 1) < n) &&
  355. (_CompUL(p + _j,p + _j + 1) < 0))
  356. _j++;
  357. if (_CompUL(p + _x,p + _j) < 0)
  358. {
  359. _SwapUL(p + _x,p + _j);
  360. _x = _j;
  361. _j = (2 * (_j + 1)) - 1;
  362. }
  363. else break;
  364. }
  365. } //_AddRootUL
  366. void _SortULongArray(ULONG *pulItems,ULONG cItems)
  367. {
  368. if (cItems == 0)
  369. return;
  370. long z;
  371. for (z = (((long) cItems + 1) / 2) - 1; z >= 0; z--)
  372. {
  373. _AddRootUL(z,cItems,pulItems);
  374. }
  375. for (z = cItems - 1; z != 0; z--)
  376. {
  377. _SwapUL(pulItems,pulItems + z);
  378. _AddRootUL(0,(ULONG) z,pulItems);
  379. }
  380. } //_SortULongArray