Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1618 lines
46 KiB

  1. //#define _DUMPALL
  2. #include <mvopsys.h>
  3. #include <mem.h>
  4. #include <memory.h>
  5. #include <orkin.h>
  6. #ifdef DOS_ONLY
  7. #include <assert.h>
  8. #endif // DOS_ONLY
  9. #include <mvsearch.h>
  10. #include "common.h"
  11. #include "search.h"
  12. #ifdef _DEBUG
  13. static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
  14. #endif
  15. typedef int (PASCAL NEAR * FCMP)(LPV, LPV);
  16. #define MAX_HEAP_ENTRIES 0xffff/sizeof(LPV) // Maximum entries for heap sort
  17. #define MIN_HEAP_ENTRIES 100 // Minimum entries for heap sort
  18. /*************************************************************************
  19. *
  20. * INTERNAL GLOBAL FUNCTIONS
  21. *************************************************************************/
  22. PUBLIC HRESULT PASCAL NEAR OrHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
  23. PUBLIC HRESULT PASCAL NEAR AndHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
  24. PUBLIC HRESULT PASCAL NEAR NotHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
  25. PUBLIC HRESULT PASCAL NEAR NearHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
  26. PUBLIC HRESULT PASCAL NEAR PhraseHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
  27. PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (LPQT, _LPQTNODE, BOOL);
  28. PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT, _LPQTNODE);
  29. PUBLIC VOID PASCAL NEAR MergeOccurence(LPQT, LPITOPIC , LPITOPIC);
  30. PUBLIC VOID PASCAL NEAR SortResult (LPQT, _LPQTNODE, WORD);
  31. PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (LPQT, _LPQTNODE);
  32. PUBLIC HRESULT PASCAL NEAR TopicListSort (_LPQTNODE, BOOL);
  33. /*************************************************************************
  34. * GLOBAL VARIABLES
  35. *************************************************************************/
  36. extern FNHANDLER HandlerFuncTable[];
  37. /*************************************************************************
  38. *
  39. * INTERNAL PRIVATE FUNCTIONS
  40. * All of them should be declared near
  41. *************************************************************************/
  42. PRIVATE VOID PASCAL NEAR RemoveQuery(LPQT, _LPQTNODE);
  43. PUBLIC HRESULT PASCAL NEAR ProximityCheck(LPITOPIC, LPIOCC, WORD);
  44. PRIVATE HRESULT PASCAL NEAR HandleNullNode(LPQT, _LPQTNODE , _LPQTNODE, int);
  45. PRIVATE VOID PASCAL NEAR RemoveUnmarkedOccList (LPQT, LPITOPIC, LPIOCC, int);
  46. PRIVATE VOID PASCAL NEAR OccurenceSort (LPQT, LPITOPIC);
  47. PRIVATE int PASCAL NEAR FRange(DWORD, DWORD, WORD);
  48. PRIVATE HRESULT PASCAL NEAR InsertMarker (LPQT, LPITOPIC);
  49. PRIVATE LPIOCC PASCAL NEAR FindMarker (LPIOCC);
  50. PRIVATE HRESULT PASCAL NEAR NearListMatch (LPIOCC, LPIOCC, WORD);
  51. PRIVATE VOID PASCAL NEAR HeapUp (LPITOPIC far*, WORD, FCMP);
  52. PRIVATE VOID PASCAL NEAR HeapDown (LPITOPIC far*, int, FCMP);
  53. PRIVATE int PASCAL NEAR TopicWeightCompare (LPV, LPV);
  54. PRIVATE int PASCAL NEAR HitCountCompare (LPV, LPV);
  55. /*************************************************************************
  56. * @doc INTERNAL
  57. *
  58. * @func HRESULT PASCAL NEAR | OrHandler |
  59. * Handle ORing the strings
  60. *
  61. * @parm int | fOperationType |
  62. * Tell what kinds of operations we are dealing with
  63. *
  64. * @parm _LPQTNODE | lpResQtNode |
  65. * The query node structure that we add the result to
  66. *
  67. * @parm LPV | lpStruct |
  68. * Vanilla pointers to different types of structures we are dealing with.
  69. * The contents of those pointers are determined by the value of
  70. * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
  71. * EXPRESSION_EXPRESSION, this is a_LPQTNODE
  72. *
  73. * @rdesc S_OK : if the operation has been carried
  74. * E_FAIL : if some errors happened (out-of-memory)
  75. *
  76. * @comm The implementation is straightforward:
  77. *************************************************************************/
  78. PUBLIC HRESULT PASCAL NEAR OrHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
  79. LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
  80. {
  81. _LPQTNODE lpCurQtNode;
  82. LPITOPIC lpCurTopicList;
  83. LPITOPIC lpNextTopicList;
  84. switch (fOperationType) {
  85. case EXPRESSION_TERM:
  86. /* We are adding a new occurence into a TopicID list. This can
  87. only happens when we are loading the infos for a query's
  88. TERM_NODE node
  89. */
  90. RET_ASSERT(lpResTopicList);
  91. /* Adding the new occurence to the TopicID list */
  92. return OccNodeInsert(lpQueryTree, lpResTopicList, (LPIOCC)lpStruct);
  93. break;
  94. case EXPRESSION_EXPRESSION:
  95. lpCurQtNode = (_LPQTNODE)lpStruct;
  96. /* Handle different variations of:
  97. (EXPRESSION_NODE | NULL_NODE) or (NULL_NODE | EXPRESSION_NODE)
  98. */
  99. if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, OR_OP))
  100. return S_OK;
  101. /* Make sure that we are pointing to the right place to search
  102. */
  103. lpQueryTree->lpTopicStartSearch = lpResQtNode->lpTopicList;
  104. /* Thread the TopicID List and add them to lpResQtNode */
  105. for (lpCurTopicList = QTN_TOPICLIST(lpCurQtNode); lpCurTopicList;
  106. lpCurTopicList = lpNextTopicList) {
  107. lpNextTopicList = lpCurTopicList->pNext;
  108. /* Find the location of the TopicID List in the query. */
  109. if ((lpResTopicList = TopicNodeSearch(lpQueryTree, lpResQtNode,
  110. lpCurTopicList->dwTopicId)) == NULL){
  111. /* The list doesn't exist yet, so we just transfer the
  112. new TopicID list to lpResQtNode
  113. */
  114. RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL,
  115. (LPSLINK) lpCurTopicList, (TOPICLIST_NODE | DONT_FREE));
  116. TopicNodeInsert (lpQueryTree, lpResQtNode, lpCurTopicList);
  117. }
  118. else {
  119. /* Merging two TopicList together by adding the new
  120. occurence list to the old result doc list
  121. */
  122. MergeOccurence(lpQueryTree, lpResTopicList, lpCurTopicList);
  123. /* Remove the now empty TopicList */
  124. RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL,
  125. (LPSLINK) lpCurTopicList, TOPICLIST_NODE);
  126. }
  127. }
  128. /* Assure that all nodes are transferred */
  129. RET_ASSERT (QTN_TOPICLIST(lpCurQtNode) == NULL) ;
  130. break;
  131. }
  132. return S_OK;
  133. }
  134. /*************************************************************************
  135. * @doc INTERNAL
  136. *
  137. * @func PUBLIC HRESULT PASCAL NEAR | AndHandler |
  138. * Handle Anding the strings
  139. *
  140. * @parm _LPQTNODE | lpResQtNode |
  141. * The query structure that we add the result to
  142. *
  143. * @parm LPITOPIC | lpResTopicList |
  144. * The TopicList structure that we add the result to
  145. *
  146. * @parm LPV | lpStruct |
  147. * Vanilla pointers to different types of structures we are dealing with.
  148. * The contents of those pointers are determined by the value of
  149. * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
  150. * EXPRESSION_EXPRESSION, this is a_LPQTNODE
  151. *
  152. * @parm int | fOperationType |
  153. * Tell what kinds of nodes we are handling, query-occurence or
  154. * query-query
  155. *
  156. * @rdesc S_OK : if the operation has been carried
  157. * errors : if some errors happened (out-of-memory)
  158. *************************************************************************/
  159. PUBLIC HRESULT PASCAL NEAR AndHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
  160. LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
  161. {
  162. _LPQTNODE lpCurQtNode;
  163. LPITOPIC lpTopicNode1;
  164. LPITOPIC lpTopicNode2;
  165. LPITOPIC lpNextTopic1;
  166. LPITOPIC lpNextTopic2;
  167. LPITOPIC lpPrev;
  168. long fResult;
  169. switch (fOperationType) {
  170. case EXPRESSION_TERM:
  171. RET_ASSERT (lpResTopicList);
  172. ((LPIOCC)lpStruct)->fFlag |= TO_BE_KEPT;
  173. lpResTopicList->fFlag |= TO_BE_KEPT;
  174. /* Adding the new occurence to the TopicID list */
  175. return OccNodeInsert(lpQueryTree, lpResTopicList, (LPIOCC)lpStruct);
  176. case EXPRESSION_EXPRESSION:
  177. /* Doing an AND combination is equivalent to merging the
  178. * two lists together for same doc ID
  179. */
  180. lpCurQtNode = (_LPQTNODE)lpStruct;
  181. if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, AND_OP))
  182. return S_OK;
  183. /* Initialize variables */
  184. lpTopicNode1 = QTN_TOPICLIST(lpResQtNode);
  185. lpTopicNode2 = QTN_TOPICLIST(lpCurQtNode);
  186. lpPrev = NULL;
  187. while (lpTopicNode1 && lpTopicNode2)
  188. {
  189. /* Get the next nodes */
  190. lpNextTopic1 = lpTopicNode1->pNext;
  191. lpNextTopic2 = lpTopicNode2->pNext;
  192. if ((fResult = lpTopicNode1->dwTopicId -
  193. lpTopicNode2->dwTopicId) == 0)
  194. {
  195. /* The TopicIds match */
  196. /* Merge the occurrences together */
  197. MergeOccurence (lpQueryTree, lpTopicNode1, lpTopicNode2);
  198. lpPrev = lpTopicNode1;
  199. lpTopicNode1 = lpNextTopic1;
  200. lpTopicNode2 = lpNextTopic2;
  201. }
  202. else if (fResult < 0)
  203. {
  204. /* List 1 < List 2 */
  205. /* Remove Topic node 1*/
  206. TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1);
  207. lpTopicNode1 = lpNextTopic1;
  208. }
  209. else
  210. {
  211. /* List 1 > List 2 */
  212. lpTopicNode2 = lpNextTopic2;
  213. }
  214. }
  215. /* Free remaining doc list */
  216. while (lpTopicNode1)
  217. {
  218. /* Get the next nodes */
  219. lpNextTopic1 = lpTopicNode1->pNext;
  220. /* Remove Topic node 1*/
  221. TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1);
  222. lpTopicNode1 = lpNextTopic1;
  223. }
  224. /* Free doc 2 list */
  225. RemoveQuery(lpQueryTree, lpCurQtNode);
  226. if (QTN_TOPICLIST(lpResQtNode) == NULL)
  227. QTN_NODETYPE(lpResQtNode) = NULL_NODE;
  228. return S_OK;
  229. default: /* Weird parameters */
  230. RET_ASSERT(UNREACHED);
  231. }
  232. }
  233. /*************************************************************************
  234. * @doc INTERNAL
  235. *
  236. * @func PUBLIC HRESULT PASCAL NEAR | NotHandler |
  237. * Handle NOT the strings
  238. *
  239. * @parm _LPQTNODE | lpResQtNode |
  240. * The query structure that we add the result to
  241. *
  242. * @parm LPITOPIC | lpResTopicList |
  243. * The TopicList structure that we add the result to
  244. *
  245. * @parm LPV | lpStruct |
  246. * Vanilla pointers to different types of structures we are dealing with.
  247. * The contents of those pointers are determined by the value of
  248. * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
  249. * EXPRESSION_EXPRESSION, this is a_LPQTNODE
  250. *
  251. * @parm int | fOperationType |
  252. * Tell what kinds of nodes we are handling, query-occurence or
  253. * query-query
  254. *
  255. * @rdesc S_OK : if the operation has been carried
  256. * errors : if some errors happened (out-of-memory)
  257. *************************************************************************/
  258. PUBLIC HRESULT PASCAL NEAR NotHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
  259. LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
  260. {
  261. _LPQTNODE lpCurQtNode;
  262. LPITOPIC lpTopicNode1;
  263. LPITOPIC lpTopicNode2;
  264. LPITOPIC lpNextTopic1;
  265. LPITOPIC lpNextTopic2;
  266. LPITOPIC lpPrev;
  267. long fResult;
  268. switch (fOperationType) {
  269. case EXPRESSION_TERM:
  270. RET_ASSERT(UNREACHED);
  271. break;
  272. case EXPRESSION_EXPRESSION:
  273. lpCurQtNode = (_LPQTNODE)lpStruct;
  274. if (HandleNullNode(lpQueryTree, lpResQtNode,
  275. lpCurQtNode, NOT_OP))
  276. return S_OK;
  277. /* Initialize variables */
  278. lpTopicNode1 = QTN_TOPICLIST(lpResQtNode);
  279. lpTopicNode2 = QTN_TOPICLIST(lpCurQtNode);
  280. lpPrev = NULL;
  281. while (lpTopicNode1 && lpTopicNode2) {
  282. /* Get the next nodes */
  283. lpNextTopic1 = lpTopicNode1->pNext;
  284. lpNextTopic2 = lpTopicNode2->pNext;
  285. if ((fResult = lpTopicNode1->dwTopicId -
  286. lpTopicNode2->dwTopicId) == 0) {
  287. /* The TopicIds match */
  288. TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1);
  289. lpTopicNode1 = lpNextTopic1;
  290. lpTopicNode2 = lpNextTopic2;
  291. }
  292. else if (fResult < 0) {
  293. /* List 1 < List 2 */
  294. lpPrev = lpTopicNode1;
  295. lpTopicNode1 = lpNextTopic1;
  296. }
  297. else {
  298. /* List 1 > List 2 */
  299. lpTopicNode2 = lpNextTopic2;
  300. }
  301. }
  302. /* Free doc 2 list */
  303. RemoveQuery(lpQueryTree, lpCurQtNode);
  304. if (QTN_TOPICLIST(lpResQtNode) == NULL)
  305. QTN_NODETYPE(lpResQtNode) = NULL_NODE;
  306. return S_OK;
  307. default: /* Weird parameters */
  308. RET_ASSERT(UNREACHED);
  309. }
  310. return S_OK;
  311. }
  312. /*************************************************************************
  313. * NEARHANDLER Description
  314. *
  315. * Sematics:
  316. * The current chosen sematics is:
  317. * A near B near C --> (A near B) and (B near C)
  318. * Other possible sematics of NEAR for (A near B near C) are:
  319. * - A and B anc C must be near each other
  320. * - Any two of A or B or C can be near each other
  321. *
  322. * Observation:
  323. * With the above semantics, we notice that only the last word (ie. B)
  324. * has meaning in the comparison with C.
  325. *
  326. * Implementation:
  327. * A special node will be used to differentiate between occurrences
  328. * coming from A and from B. Only the ones from B will be used in the
  329. * combination with C. Consider the following example (ProxDist = 5):
  330. * A B C
  331. * 10 15 5 (the numbers are word counts)
  332. * 14 18 16
  333. * After combining A and B we will end up with:
  334. * 15
  335. * 18
  336. * M <- marker separates occurrences from A and B
  337. * 10
  338. * 14
  339. * After that we only combine B's terms with C's terms. The result will
  340. * look like:
  341. * 16
  342. * M <- marker separates occurrences from B and C
  343. * 15
  344. * 18
  345. * M <- marker separates occurrences from A and B
  346. * 10
  347. * 14
  348. * Note that C's 5 is dropped since there is no match with B, even
  349. * that A's 10 matches it.
  350. * After sorting and getting rid of the marker nodes, the final result
  351. * will look as followed:
  352. * 10 14 15 16 18
  353. *************************************************************************/
  354. PRIVATE HRESULT PASCAL NEAR NearHandlerInsert (_LPQT lpQueryTree,
  355. LPITOPIC lpResTopicList, LPIOCC lpStartOcc, LPIOCC lpCurOcc)
  356. {
  357. HRESULT fRet = FALSE;
  358. if (!(lpCurOcc->fFlag & IS_MARKER_NODE) &&
  359. (fRet = NearListMatch(lpCurOcc, lpStartOcc, lpQueryTree->wProxDist))) {
  360. /* Insert the occurrence node */
  361. lpCurOcc->pNext = lpResTopicList->lpOccur;
  362. lpResTopicList->lpOccur = lpCurOcc;
  363. lpResTopicList->lcOccur ++;
  364. }
  365. else {
  366. /* Remove the occurrence node */
  367. RemoveNode(lpQueryTree, (LPV) NULL, (LPSLINK)NULL,
  368. (LPSLINK) lpCurOcc, OCCURENCE_NODE);
  369. }
  370. return (fRet);
  371. }
  372. /*************************************************************************
  373. * @doc INTERNAL
  374. *
  375. * @func LPIOCC PASCAL NEAR | FindMarker |
  376. * Given a starting occurrence node, traverse it and find the first
  377. * marker node
  378. *
  379. * @parm LPIOCC | lpStartOcc |
  380. * Starting node
  381. *
  382. * @rdesc The marker node
  383. *************************************************************************/
  384. PRIVATE LPIOCC PASCAL NEAR FindMarker (LPIOCC lpStartOcc)
  385. {
  386. LPIOCC lpCurOcc;
  387. for (lpCurOcc = lpStartOcc; lpCurOcc; lpCurOcc = lpCurOcc->pNext) {
  388. if (lpCurOcc->fFlag & IS_MARKER_NODE)
  389. break;
  390. }
  391. return lpCurOcc;
  392. }
  393. /*************************************************************************
  394. * @doc INTERNAL
  395. *
  396. * @func HRESULT PASCAL NEAR | InsertMarker |
  397. * This function will insert a marker node at the beginning of
  398. * lpResTopicList->lpOccur
  399. *
  400. * @parm LPQT | lpQueryTree |
  401. * Pointer to query tree where all globals are
  402. *
  403. * @parm LPITOPIC | lpResTopicList |
  404. * Pointer to TopicId node
  405. *
  406. * @rdesc S_OK
  407. *************************************************************************/
  408. PRIVATE HRESULT PASCAL NEAR InsertMarker (LPQT lpQueryTree, LPITOPIC lpResTopicList)
  409. {
  410. LPMARKER lpMark;
  411. /* Do some preparations by allocating marker nodes */
  412. if ((lpResTopicList->fFlag & HAS_MARKER) == FALSE) {
  413. if (!(lpMark = (LPMARKER)OccNodeAllocate(lpQueryTree)))
  414. return E_TOOMANYTOPICS;
  415. lpMark->fFlag |= (IS_MARKER_NODE | TO_BE_KEPT);
  416. /* Link the markers together with the nodes */
  417. lpMark->pNext = lpResTopicList->lpOccur;
  418. lpResTopicList->lpOccur = (LPIOCC)lpMark;
  419. /* Link with the next marker */
  420. lpMark->pNextMark = (LPMARKER)FindMarker (lpMark->pNext);
  421. /* Mark that we already has a marker */
  422. lpResTopicList->fFlag |= HAS_MARKER;
  423. /* Increment lcOccur, since this node will be removed
  424. * like a regular occurrence node */
  425. lpResTopicList->lcOccur ++;
  426. }
  427. return S_OK;
  428. }
  429. /*************************************************************************
  430. * @doc INTERNAL
  431. *
  432. * @func HRESULT PASCAL NEAR | NearListMatch |
  433. * Traverse a list and match all nodes against lpCurOcc
  434. *
  435. * @parm LPIOCC | lpCurOcc |
  436. * Occurrence node to be compared with
  437. *
  438. * @parm LPIOCC | lpStartOcc |
  439. * Start of the occurrence list
  440. *
  441. * @parm WORD | wProxDist |
  442. * Proximity distance
  443. *
  444. * @rdesc
  445. * return TRUE if the the node matches
  446. *************************************************************************/
  447. PRIVATE HRESULT PASCAL NEAR NearListMatch (LPIOCC lpCurOcc,
  448. LPIOCC lpStartOcc, WORD wProxDist)
  449. {
  450. LPIOCC lpResOcc;
  451. BOOL fMatch = FALSE;
  452. for (lpResOcc = lpStartOcc; lpResOcc; lpResOcc = lpResOcc->pNext) {
  453. if (lpResOcc->fFlag & IS_MARKER_NODE)
  454. break;
  455. if (!FRange(lpResOcc->dwCount, lpCurOcc->dwCount,
  456. wProxDist)) {
  457. lpResOcc->fFlag |= TO_BE_KEPT;
  458. fMatch = TRUE;
  459. }
  460. }
  461. if (fMatch)
  462. lpCurOcc->fFlag |= TO_BE_KEPT;
  463. return fMatch;
  464. }
  465. /*************************************************************************
  466. * @doc INTERNAL
  467. *
  468. * @func VOID PASCAL FAR | NearHandlerTopicCleanUp |
  469. * Clean up a TopicList by going thru each sequence of occurrence
  470. * delimited by marker, do the check again and remove all extra
  471. * occurrences
  472. *
  473. * @parm _LPQT | lpQueryTree |
  474. * Pointer to query tree
  475. *
  476. * @parm _LPQTNODE | lpResQtNode |
  477. * Pointer to result query node
  478. *
  479. * @parm LPITOPIC | lpCurTopic |
  480. * Current doc id
  481. *************************************************************************/
  482. PUBLIC HRESULT PASCAL FAR NearHandlerTopicCleanUp (_LPQT lpQueryTree,
  483. _LPQTNODE lpResQtNode, LPITOPIC lpCurTopic)
  484. {
  485. LPMARKER lpMarkStart;
  486. LPMARKER lpCurMark;
  487. LPIOCC lpCurOcc;
  488. LPIOCC lpStartOcc;
  489. BOOL fDone;
  490. /* Find the first marker */
  491. lpMarkStart = (LPMARKER)FindMarker(lpCurTopic->lpOccur);
  492. if (lpMarkStart == NULL) {
  493. /* This branch has been cleaned up of marker nodes */
  494. return E_FAIL;
  495. }
  496. /* The first occurrences from the start to lpMarkStart must be
  497. * TO_BE_KEPT, so we don't have to check them since they just
  498. * freshly came from a near handler. All we have to do
  499. * is set the flag */
  500. for (lpCurOcc = lpCurTopic->lpOccur; lpCurOcc && !(lpCurOcc->fFlag &
  501. IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext)
  502. lpCurOcc->fFlag |= TO_BE_KEPT;
  503. if (lpMarkStart->pNextMark == NULL) {
  504. /* Simple A NEAR B, just set the flag */
  505. for (lpCurOcc = lpMarkStart->pNext; lpCurOcc && !(lpCurOcc->fFlag &
  506. IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext)
  507. lpCurOcc->fFlag |= TO_BE_KEPT;
  508. }
  509. else {
  510. /* Complex NEAR terms, such as: A NEAR B NEAR C */
  511. lpStartOcc = lpMarkStart->pNext;
  512. lpCurMark = lpMarkStart->pNextMark;
  513. fDone = FALSE;
  514. for (;lpCurMark; lpCurMark = lpCurMark->pNextMark)
  515. {
  516. for (lpCurOcc = lpCurMark->pNext;
  517. lpCurOcc && !(lpCurOcc->fFlag & IS_MARKER_NODE);
  518. lpCurOcc = lpCurOcc->pNext) {
  519. if (NearListMatch (lpCurOcc, lpStartOcc,
  520. lpQueryTree->wProxDist) == FALSE)
  521. {
  522. fDone = TRUE;
  523. }
  524. }
  525. lpStartOcc = lpCurMark->pNext;
  526. if (fDone)
  527. break;
  528. }
  529. }
  530. /* Clear up all the marker node flag to ensure that they will be
  531. * removed */
  532. while (lpMarkStart) {
  533. lpMarkStart->fFlag &= ~(TO_BE_KEPT | IS_MARKER_NODE);
  534. lpMarkStart = lpMarkStart->pNextMark;
  535. }
  536. return S_OK;
  537. }
  538. PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (_LPQT lpQueryTree,
  539. _LPQTNODE lpResQtNode)
  540. {
  541. LPITOPIC lpCurTopic;
  542. for (lpCurTopic = QTN_TOPICLIST(lpResQtNode); lpCurTopic;
  543. lpCurTopic = lpCurTopic->pNext) {
  544. if (NearHandlerTopicCleanUp (lpQueryTree, lpResQtNode,
  545. lpCurTopic) == S_OK) {
  546. RemoveUnmarkedOccList(lpQueryTree, lpCurTopic,
  547. lpCurTopic->lpOccur, TRUE);
  548. }
  549. }
  550. }
  551. /*************************************************************************
  552. * @doc INTERNAL
  553. *
  554. * @func PUBLIC HRESULT PASCAL NEAR | NearHandler |
  555. * Handle NEAR operation
  556. *
  557. * @parm _LPQTNODE | lpResQtNode |
  558. * The query structure that we add the result to
  559. *
  560. * @parm LPITOPIC | lpResTopicList |
  561. * The TopicList structure that we add the result to
  562. *
  563. * @parm LPV | lpStruct |
  564. * Vanilla pointers to different types of structures we are dealing with.
  565. * The contents of those pointers are determined by the value of
  566. * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
  567. * EXPRESSION_EXPRESSION, this is a_LPQTNODE
  568. * @parm int | fOperationType |
  569. * Tell what kinds of nodes we are handling, query-occurence or
  570. * query-query
  571. *
  572. * @rdesc S_OK : if the operation has been carried
  573. * errors : if some errors happened (out-of-memory)
  574. *************************************************************************/
  575. PUBLIC HRESULT PASCAL NEAR NearHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
  576. LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
  577. {
  578. _LPQTNODE lpCurQtNode; /* Current query tree node */
  579. LPITOPIC lpCurTopicList; /* Current TopicId node */
  580. LPITOPIC lpNextTopicList;
  581. LPIOCC lpCurOcc;
  582. LPIOCC lpStartOcc;
  583. LPITOPIC lpPrevRes;
  584. LPSLINK lpTmp; //erinfox
  585. switch (fOperationType) {
  586. case EXPRESSION_TERM:
  587. /* Insert a marker node if necessary */
  588. if (InsertMarker (lpQueryTree, lpResTopicList) != S_OK)
  589. return E_TOOMANYTOPICS;
  590. /* Look for the starting point */
  591. lpStartOcc = FindMarker(lpResTopicList->lpOccur);
  592. RET_ASSERT(lpStartOcc);
  593. /* Handle the near operation */
  594. if (NearHandlerInsert (lpQueryTree, lpResTopicList, lpStartOcc->pNext,
  595. (LPIOCC)lpStruct))
  596. lpResTopicList->fFlag |= TO_BE_KEPT;
  597. break;
  598. case EXPRESSION_EXPRESSION:
  599. lpCurQtNode = (_LPQTNODE)lpStruct;
  600. if (HandleNullNode(lpQueryTree, lpResQtNode,
  601. lpCurQtNode, NEAR_OP))
  602. return S_OK;
  603. /* Now doing the real jobs */
  604. /* Make sure that we are pointing to the right place to search
  605. */
  606. lpQueryTree->lpTopicStartSearch = lpResQtNode->lpTopicList;
  607. /* First check the coming data from lpCurTopicList.
  608. * If there isn't an equivalent TopicId in QTN_TOPICLIST(lpResQtNode),
  609. * then remove it
  610. */
  611. for (lpCurTopicList = QTN_TOPICLIST(lpCurQtNode); lpCurTopicList;
  612. lpCurTopicList = lpNextTopicList) {
  613. lpNextTopicList = lpCurTopicList->pNext;
  614. /* Find the location of the TopicID List in the query. */
  615. if ((lpResTopicList = TopicNodeSearch(lpQueryTree,
  616. lpResQtNode, lpCurTopicList->dwTopicId)) == NULL) {
  617. /* Can't find equivalent TopicId in the result list, just
  618. remove lpCurTopicList
  619. */
  620. TopicNodeFree(lpQueryTree, lpCurQtNode, NULL, lpCurTopicList);
  621. continue;
  622. }
  623. /* An equivalent TopicId is found */
  624. /* Insert a marker node if necessary */
  625. if (InsertMarker (lpQueryTree, lpResTopicList) != S_OK)
  626. return E_TOOMANYTOPICS;
  627. /* Look for the starting point */
  628. RET_ASSERT(lpResTopicList->lpOccur)
  629. lpStartOcc = FindMarker(lpResTopicList->lpOccur);
  630. lpStartOcc = lpStartOcc->pNext; // Skip marker node
  631. for (lpCurOcc = lpCurTopicList->lpOccur; lpCurOcc;
  632. lpCurOcc = lpCurTopicList->lpOccur) {
  633. /* "Unlink" lpCurOcc */
  634. lpCurTopicList->lpOccur = lpCurOcc->pNext;
  635. /* Handle the near operation */
  636. if (NearHandlerInsert (lpQueryTree, lpResTopicList,
  637. lpStartOcc, lpCurOcc)) {
  638. lpResTopicList->fFlag |= TO_BE_KEPT;
  639. }
  640. }
  641. RET_ASSERT(lpCurTopicList->lpOccur == NULL);
  642. RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL,
  643. (LPSLINK) lpCurTopicList, TOPICLIST_NODE);
  644. if (lpResTopicList->lpOccur->fFlag & IS_MARKER_NODE) {
  645. /* We didn't find any match, remove this TopicList */
  646. // erinfox - I don't know lpPrevRes for lpResTopicList, so I get it here
  647. for (lpPrevRes = NULL, lpTmp = (LPSLINK)lpResQtNode->lpTopicList; lpTmp;
  648. lpTmp = lpTmp->pNext)
  649. {
  650. if (lpTmp == (LPSLINK)lpResTopicList)
  651. break;
  652. lpPrevRes = (LPITOPIC) lpTmp;
  653. }
  654. TopicNodeFree(lpQueryTree, lpResQtNode, lpPrevRes, lpResTopicList);
  655. }
  656. else {
  657. /* Remove all unmarked occurrences, but don't
  658. * reset the TO_BE_KEPT flag */
  659. RemoveUnmarkedOccList(lpQueryTree, lpResTopicList,
  660. lpResTopicList->lpOccur, FALSE);
  661. }
  662. }
  663. RemoveQuery(lpQueryTree, lpCurQtNode);
  664. if (QTN_TOPICLIST(lpResQtNode) == NULL)
  665. QTN_NODETYPE(lpResQtNode) = NULL_NODE;
  666. return S_OK;
  667. break;
  668. }
  669. return S_OK;
  670. }
  671. /*************************************************************************
  672. * @doc INTERNAL
  673. *
  674. * @func PUBLIC HRESULT PASCAL NEAR | PhraseHandler |
  675. * Handle PHRASE operation
  676. *
  677. * @parm _LPQTNODE | lpResQtNode |
  678. * The query structure that we add the result to
  679. *
  680. * @parm LPITOPIC | lpResTopicList |
  681. * The TopicList structure that we add the result to
  682. *
  683. * @parm LPV | lpStruct |
  684. * Vanilla pointers to different types of structures we are dealing with.
  685. * The contents of those pointers are determined by the value of
  686. * fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
  687. * EXPRESSION_EXPRESSION, this is a_LPQTNODE
  688. *
  689. * @parm int | fOperationType |
  690. * Tell what kinds of nodes we are handling, query-occurence or
  691. * query-query
  692. *
  693. * @rdesc S_OK : if the operation has been carried
  694. * errors : if some errors happened (out-of-memory)
  695. *************************************************************************/
  696. PUBLIC HRESULT PASCAL NEAR PhraseHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
  697. LPITOPIC lpResTopicList, LPIOCC lpCurOcc, int fOperationType)
  698. {
  699. LPIOCC lpResOcc;
  700. BOOL fResult = 0;
  701. LPIOCC lpStartOcc = NULL;
  702. RET_ASSERT(fOperationType == EXPRESSION_TERM);
  703. /* Start at the beginning if necessary */
  704. if ((lpResOcc = lpQueryTree->lpOccStartSearch) == NULL)
  705. lpResOcc = lpResTopicList->lpOccur;
  706. for (; lpResOcc; lpResOcc = lpResOcc->pNext) {
  707. if ((lpResOcc->fFlag & TO_BE_SKIPPED) == 0) {
  708. if ((fResult = (int)(lpCurOcc->dwCount - lpResOcc->dwCount))
  709. == 1) {
  710. /* The nodes are consecutive, mark them TO_BE_KEPT */
  711. lpResOcc->fFlag |= TO_BE_KEPT | TO_BE_SKIPPED;
  712. lpResOcc->fFlag &= ~TO_BE_COMPARED;
  713. lpCurOcc->fFlag |= TO_BE_KEPT | TO_BE_SKIPPED |
  714. TO_BE_COMPARED;
  715. lpResTopicList->fFlag |= TO_BE_KEPT;
  716. break;
  717. }
  718. /* Reset lpStartOcc */
  719. lpStartOcc = NULL;
  720. if (fResult <= 0) {
  721. /* CurOcc is less than what is in the result list */
  722. break;
  723. }
  724. }
  725. else {
  726. /* Get a skipped node. Mark the assumed starting node */
  727. if (lpStartOcc == NULL) {
  728. lpStartOcc = lpResOcc;
  729. }
  730. }
  731. }
  732. if (fResult == 1) {
  733. /* Add this node, and mark the starting point for next search */
  734. lpQueryTree->lpOccStartSearch = lpCurOcc->pNext = lpResOcc->pNext;
  735. lpResOcc->pNext = lpCurOcc;
  736. lpResTopicList->lcOccur ++;
  737. /* Mark all previous nodes TO_BE_KEPT */
  738. if (lpStartOcc) {
  739. for (; lpStartOcc != lpCurOcc; lpStartOcc = lpStartOcc->pNext)
  740. lpStartOcc->fFlag |= TO_BE_KEPT;
  741. }
  742. }
  743. else {
  744. RemoveNode(lpQueryTree, (LPV) NULL, (LPSLINK)NULL,
  745. (LPSLINK) lpCurOcc, OCCURENCE_NODE);
  746. }
  747. return S_OK;
  748. }
  749. /*************************************************************************
  750. * @doc INTERNAL
  751. *
  752. * @func VOID PASCAL NEAR | MergeOccurence |
  753. * Merge two occurences lists together
  754. *
  755. * @parm _LPQT | lpQueryTree |
  756. * Pointer to query tree (where global variables are)
  757. *
  758. * @parm LPITOPIC | lpResTopicList |
  759. * Resulting TopicList that has the merged occurence list
  760. *
  761. * @parm LPITOPIC | lpCurTopicList |
  762. * TopicList that has the occurrence list to be merged to the
  763. * resulting list
  764. *************************************************************************/
  765. PUBLIC VOID PASCAL NEAR MergeOccurence(_LPQT lpQueryTree,
  766. LPITOPIC lpResTopicList, LPITOPIC lpCurTopicList)
  767. {
  768. register LPIOCC lpTmpOcc;
  769. register LPIOCC lpNextOcc;
  770. /* Reset lpOccStartSearch */
  771. lpQueryTree->lpOccStartSearch = NULL;
  772. for (lpTmpOcc = lpCurTopicList->lpOccur; lpTmpOcc; lpTmpOcc = lpNextOcc){
  773. lpNextOcc = lpTmpOcc->pNext;
  774. OccNodeInsert(lpQueryTree, lpResTopicList, lpTmpOcc);
  775. }
  776. lpCurTopicList->lpOccur = NULL;
  777. lpCurTopicList->lcOccur = 0;
  778. }
  779. /*************************************************************************
  780. * @doc INTERNAL
  781. *
  782. * @func VOID PASCAL NEAR | RemoveUnmarkedTopicList |
  783. * Remove all the TopicLists that are not marked TO_BE_KEPT
  784. *
  785. * @parm _LPQT | lpQueryTree |
  786. * Pointer to query tree (for globasl variables)
  787. *
  788. * @parm _LPQTNODE | lpQtNode |
  789. * Query tree node to be checked
  790. *
  791. * @parm HRESULT | fKeepOccurence |
  792. * If 0, then check and remove all occurrences nodes that are not
  793. * marked TO_BE_KEPT
  794. *************************************************************************/
  795. PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (_LPQT lpQueryTree,
  796. _LPQTNODE lpQtNode, BOOL fKeepOccurence)
  797. {
  798. register LPITOPIC lpTopicList;
  799. register LPITOPIC lpNextTopicList;
  800. // erinfox: add to keep track of previous node
  801. register LPITOPIC lpPrev;
  802. /* Traverse the doclist */
  803. for (lpPrev = NULL, lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList;
  804. lpTopicList = lpNextTopicList) {
  805. lpNextTopicList = lpTopicList->pNext;
  806. if ((lpTopicList->fFlag & TO_BE_KEPT) == 0) {
  807. /* Free the doc node and its occurences list */
  808. TopicNodeFree(lpQueryTree, lpQtNode, lpPrev, lpTopicList);
  809. }
  810. else {
  811. lpTopicList->fFlag &= ~(TO_BE_KEPT | HAS_MARKER); // Reset the flag
  812. if (!fKeepOccurence) {
  813. /* Check the occurences list, and free all nodes that
  814. * are not marked TO_BE_KEPT
  815. */
  816. RemoveUnmarkedOccList(lpQueryTree, lpTopicList,
  817. lpTopicList->lpOccur, TRUE);
  818. if (lpTopicList->lpOccur == NULL) {
  819. RemoveNode(lpQueryTree, (LPV) lpQtNode, NULL,
  820. (LPSLINK) lpTopicList, TOPICLIST_NODE);
  821. }
  822. }
  823. lpPrev = lpTopicList;
  824. }
  825. }
  826. }
  827. /*************************************************************************
  828. * @doc INTERNAL
  829. *
  830. * @func VOID PASCAL NEAR | RemoveUnmarkedNearTopicList |
  831. * Remove all the TopicLists that are not marked TO_BE_KEPT
  832. *
  833. * @parm _LPQT | lpQueryTree |
  834. * Pointer to query tree (for globasl variables)
  835. *
  836. * @parm _LPQTNODE | lpQtNode |
  837. * Query tree node to be checked
  838. *
  839. * @parm BOOL | fKeepOccurence |
  840. * If 0, then check and remove all occurrences nodes that are not
  841. * marked TO_BE_KEPT
  842. *************************************************************************/
  843. PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT lpQueryTree,
  844. _LPQTNODE lpQtNode)
  845. {
  846. register LPITOPIC lpTopicList;
  847. register LPITOPIC lpNextTopicList;
  848. LPIOCC lpMark;
  849. // erinfox - add to keep track of previous node
  850. register LPITOPIC lpPrev;
  851. /* Traverse the doclist */
  852. for (lpPrev = NULL, lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList;
  853. lpTopicList = lpNextTopicList) {
  854. lpNextTopicList = lpTopicList->pNext;
  855. if ((lpTopicList->fFlag & TO_BE_KEPT) == 0) {
  856. /* Free the doc node and its occurences list */
  857. TopicNodeFree(lpQueryTree, lpQtNode, lpPrev, lpTopicList);
  858. }
  859. else {
  860. // Reset the flag
  861. lpTopicList->fFlag &= ~(TO_BE_KEPT | HAS_MARKER);
  862. /* Find the marker */
  863. lpMark = FindMarker(lpTopicList->lpOccur);
  864. RemoveUnmarkedOccList(lpQueryTree, lpTopicList,
  865. lpTopicList->lpOccur, TRUE);
  866. /* Remove all unmarked occurrences between this marker and
  867. * the next one */
  868. if (lpMark)
  869. RemoveUnmarkedOccList(lpQueryTree, lpTopicList,
  870. lpMark->pNext, TRUE);
  871. lpPrev = lpTopicList;
  872. }
  873. }
  874. }
  875. /*************************************************************************
  876. * @doc INTERNAL
  877. *
  878. * @func PASCAL NEAR | MarkTopicList |
  879. * Mark all TopicId nodes in a doc list TO_BE_KEPT
  880. *
  881. * @parm _LPQTNODE | lpQtNode |
  882. * Pointer to the query tree node that contains the doc list
  883. *************************************************************************/
  884. PUBLIC VOID PASCAL NEAR MarkTopicList (_LPQTNODE lpQtNode)
  885. {
  886. register LPITOPIC lpTopicList;
  887. for (lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList;
  888. lpTopicList = lpTopicList->pNext) {
  889. lpTopicList->fFlag |= TO_BE_KEPT;
  890. }
  891. }
  892. /*************************************************************************
  893. * @doc INTERNAL
  894. *
  895. * @func VOID PASCAL NEAR | RemoveUnmarkedOccList |
  896. * Remove all the occurrence nodes that are not marked TO_BE_KEPT
  897. *
  898. * @parm LPQT | lpQueryTree |
  899. * Pointer to query tree (for global variables)
  900. *
  901. * @parm int | fResetFlag |
  902. * Do we reset the TO_BE_KEPT flag ?
  903. *
  904. * @parm LPIOCC | lpTopicList |
  905. * Pointer to Topic list to be checked
  906. *
  907. * @parm LPIOCC | lpOccList |
  908. * Pointer to occurrence list to be checked
  909. *************************************************************************/
  910. PRIVATE VOID PASCAL NEAR RemoveUnmarkedOccList (LPQT lpQueryTree,
  911. LPITOPIC lpTopicList, LPIOCC lpOccList, int fResetFlag)
  912. {
  913. register LPIOCC lpNextOccList;
  914. register LPIOCC lpPrevOccList;
  915. lpPrevOccList = NULL;
  916. for (;lpOccList && !(lpOccList->fFlag & IS_MARKER_NODE);
  917. lpOccList = lpNextOccList) {
  918. lpNextOccList = lpOccList->pNext;
  919. if ((lpOccList->fFlag & TO_BE_KEPT) == 0) {
  920. RemoveNode(lpQueryTree, (LPV) lpTopicList, (LPSLINK)lpPrevOccList,
  921. (LPSLINK) lpOccList, OCCURENCE_NODE);
  922. }
  923. else if (fResetFlag) {
  924. lpOccList->fFlag &= ~TO_BE_KEPT; // Reset the flag
  925. if (lpOccList->fFlag & TO_BE_COMPARED)
  926. lpOccList->fFlag &= ~TO_BE_SKIPPED;
  927. lpPrevOccList = lpOccList;
  928. }
  929. }
  930. /* Reset the flag of the marker node */
  931. if (lpOccList)
  932. lpOccList->fFlag &= ~TO_BE_KEPT;
  933. }
  934. /*************************************************************************
  935. * @doc INTERNAL
  936. *
  937. * @func VOID PASCAL NEAR | CleanMarkedOccList |
  938. * Clean all the occurrence nodes from TO_BE_KEPT
  939. *
  940. * @parm LPIOCC | lpTopicList |
  941. * Pointer to Topic list to be checked
  942. *************************************************************************/
  943. VOID PASCAL FAR CleanMarkedOccList (LPITOPIC lpTopicList)
  944. {
  945. register LPIOCC lpCurOcc;
  946. for (;lpTopicList; lpTopicList = lpTopicList->pNext)
  947. {
  948. for (lpCurOcc = lpTopicList->lpOccur; lpCurOcc;
  949. lpCurOcc = lpCurOcc->pNext)
  950. {
  951. lpCurOcc->fFlag &= ~TO_BE_KEPT;
  952. }
  953. }
  954. }
  955. /*************************************************************************
  956. * @doc INTERNAL
  957. *
  958. * @func HRESULT PASCAL NEAR | HandleNullNode |
  959. * Handle NULL query node. This is an optimization which will
  960. * save processing time.
  961. *
  962. * @parm LPQT | lpQueryTree |
  963. * Pointer to query tree (for global variables)
  964. *
  965. * @parm _LPQTNODE | lpResQtNode |
  966. * Pointer to result query tree node
  967. *
  968. * @parm _LPQTNODE | lpCurQtNode |
  969. * Pointer to query tree node
  970. *
  971. * @parm int | Operator |
  972. * What operator are we dealing with
  973. *
  974. * @rdesc FALSE, if no optimization can be done, TRUE otherwise
  975. *************************************************************************/
  976. PRIVATE HRESULT PASCAL NEAR HandleNullNode(LPQT lpQueryTree,
  977. _LPQTNODE lpResQtNode, _LPQTNODE lpCurQtNode, int Operator)
  978. {
  979. _LPQTNODE lpChild;
  980. if (QTN_NODETYPE(lpResQtNode) != NULL_NODE &&
  981. QTN_NODETYPE(lpCurQtNode) != NULL_NODE)
  982. return FALSE;
  983. if (Operator == NOT_OP) {
  984. if (QTN_NODETYPE(lpResQtNode) == NULL_NODE) {
  985. /* NULL ! a = NULL */
  986. RemoveQuery(lpQueryTree, lpCurQtNode);
  987. QTN_NODETYPE(lpCurQtNode) = NULL_NODE;
  988. return TRUE;
  989. }
  990. else if (QTN_NODETYPE(lpCurQtNode) == NULL_NODE) {
  991. /* a ! NULL = a */
  992. return TRUE;
  993. }
  994. return FALSE;
  995. }
  996. lpChild = QTN_NODETYPE(lpResQtNode) == NULL_NODE ?
  997. lpCurQtNode : lpResQtNode;
  998. switch (Operator) {
  999. case AND_OP: // a & NULL = NULL
  1000. case NEAR_OP: // a # NULL = NULL
  1001. case PHRASE_OP: // a + NULL = NULL ??
  1002. RemoveQuery(lpQueryTree, lpChild);
  1003. QTN_NODETYPE(lpChild) = NULL_NODE;
  1004. return TRUE;
  1005. case OR_OP: // a | NULL = a
  1006. if (QTN_NODETYPE(lpResQtNode) == NULL_NODE) {
  1007. *lpResQtNode = *lpChild;
  1008. QTN_NODETYPE(lpChild) = NULL_NODE;
  1009. QTN_LEFT(lpChild) = QTN_RIGHT(lpChild) = NULL;
  1010. QTN_TOPICLIST(lpChild) = NULL;
  1011. }
  1012. return TRUE;
  1013. }
  1014. return FALSE;
  1015. }
  1016. PRIVATE int PASCAL NEAR FRange(DWORD dwCount1, DWORD dwCount2, WORD cProxDist)
  1017. {
  1018. long fResult;
  1019. int fRet = 1;
  1020. fResult = dwCount1 - dwCount2;
  1021. if (fResult < 0) {
  1022. fRet = -1;
  1023. fResult = -fResult;
  1024. }
  1025. if (fResult != 0 && fResult <= (long)cProxDist)
  1026. return 0;
  1027. else
  1028. return fRet;
  1029. }
  1030. /*************************************************************************
  1031. * @doc INTERNAL
  1032. *
  1033. * @func VOID PASCAL NEAR | RemoveQuery |
  1034. * Remove all doc nodes for a query node
  1035. *
  1036. * @parm LPQT | lpQueryTree |
  1037. * Pointer to query tree (for global variables)
  1038. *
  1039. * @parm _LPQTNODE | lpCurQtNode |
  1040. * Pointer to query tree node to be cleared
  1041. *************************************************************************/
  1042. PRIVATE VOID PASCAL NEAR RemoveQuery(LPQT lpQueryTree, _LPQTNODE lpCurQtNode)
  1043. {
  1044. register LPITOPIC lpCurTopicList;
  1045. register LPITOPIC lpNextTopicList;
  1046. /* Remove all occurences of all doclist */
  1047. if ((lpCurTopicList = QTN_TOPICLIST(lpCurQtNode)) == NULL)
  1048. return;
  1049. for (; lpCurTopicList; lpCurTopicList = lpNextTopicList)
  1050. {
  1051. lpNextTopicList = lpCurTopicList->pNext;
  1052. TopicNodeFree(lpQueryTree, lpCurQtNode, NULL, lpCurTopicList);
  1053. }
  1054. QTN_TOPICLIST(lpCurQtNode) = NULL;
  1055. }
  1056. /*************************************************************************
  1057. * @doc INTERNAL
  1058. *
  1059. * @func VOID PASCAL NEAR | SortResult |
  1060. * Sort the results according to flag
  1061. *
  1062. * @parm _LPQT | lpQueryTree |
  1063. * Pointer to query tree (containing globals)
  1064. *
  1065. * @parm _LPQTNODE | lpQtNode |
  1066. * Pointer to query node
  1067. *
  1068. * @parm WORD | fFlag |
  1069. * Tell how to sort the result:
  1070. * @flag ORDERED_BASED |
  1071. * Everything is ordered TopicId, hit offsets
  1072. * @flag HIT_COUNT_BASED |
  1073. * The doc id with most hit will be returned first
  1074. * @flag WEIGHT_BASED |
  1075. * The topicId with most weight will be returned first
  1076. *************************************************************************/
  1077. PUBLIC VOID PASCAL NEAR SortResult (_LPQT lpQueryTree, _LPQTNODE lpQtNode,
  1078. WORD fFlag)
  1079. {
  1080. register LPITOPIC lpTopic;
  1081. switch (fFlag) {
  1082. case ORDERED_BASED:
  1083. for (lpTopic = lpQtNode->lpTopicList; lpTopic; lpTopic = lpTopic->pNext)
  1084. OccurenceSort (lpQueryTree, lpTopic);
  1085. break;
  1086. case HIT_COUNT_BASED:
  1087. case WEIGHT_BASED:
  1088. TopicListSort (lpQtNode, fFlag);
  1089. break;
  1090. }
  1091. #if defined(_DEBUG) && defined(SIMILARITY) && defined(_DUMPALL)
  1092. {
  1093. int i;
  1094. _DPF1("Sort total: %lu\n", lpQtNode->cTopic);
  1095. for (i = 0, lpTopic = lpQtNode->lpTopicList; lpTopic && i < 10; lpTopic = lpTopic->pNext, i++)
  1096. {
  1097. _DPF2("Topic %lu (%u)\n", lpTopic->dwTopicId, lpTopic->wWeight);
  1098. }
  1099. }
  1100. #endif
  1101. }
  1102. PRIVATE HRESULT PASCAL NEAR TopicListInsertionSort (_LPQTNODE lpQtNode, BOOL fFlag)
  1103. {
  1104. LPITOPIC lpPrevTopic;
  1105. LPITOPIC lpCurTopic;
  1106. LPITOPIC lpNextTopic;
  1107. LPITOPIC lpTmpTopic;
  1108. FCMP fCompare;
  1109. if (fFlag == HIT_COUNT_BASED)
  1110. fCompare = HitCountCompare;
  1111. else
  1112. fCompare = TopicWeightCompare;
  1113. for (lpCurTopic = lpQtNode->lpTopicList; lpCurTopic; lpCurTopic = lpNextTopic) {
  1114. if (lpNextTopic = lpCurTopic->pNext) {
  1115. if ((*fCompare) (lpCurTopic, lpNextTopic) < 0) {
  1116. /* Out of order sequence */
  1117. /* Unlink the out of order node */
  1118. lpCurTopic->pNext = lpNextTopic->pNext;
  1119. /* Do an insertion sort */
  1120. for (lpPrevTopic = NULL, lpTmpTopic = lpQtNode->lpTopicList;;
  1121. lpTmpTopic = lpTmpTopic->pNext) {
  1122. if ((*fCompare) (lpTmpTopic, lpNextTopic) < 0) {
  1123. /* We just pass the insertion point */
  1124. if (lpPrevTopic == NULL) {
  1125. lpNextTopic->pNext = lpQtNode->lpTopicList;
  1126. lpQtNode->lpTopicList = lpNextTopic;
  1127. }
  1128. else {
  1129. lpNextTopic->pNext = lpPrevTopic->pNext;
  1130. lpPrevTopic->pNext = lpNextTopic;
  1131. }
  1132. break;
  1133. }
  1134. lpPrevTopic = lpTmpTopic;
  1135. }
  1136. /* Reset lpNextTopic */
  1137. lpNextTopic = lpCurTopic;
  1138. }
  1139. }
  1140. }
  1141. return S_OK;
  1142. }
  1143. /*************************************************************************
  1144. * @doc INTERNAL
  1145. *
  1146. * @func VOID PASCAL NEAR | TopicListSort |
  1147. * Sort the results according to flag
  1148. *
  1149. * @parm _LPQT | lpQueryTree |
  1150. * Pointer to query tree (containing globals)
  1151. *
  1152. * @parm _LPQTNODE | lpQtNode |
  1153. * Pointer to query node
  1154. *
  1155. * @parm WORD | fFlag |
  1156. * Tell how to sort the result:
  1157. * @flag HIT_COUNT_BASED |
  1158. * The doc id with most hit will be returned first
  1159. * @flag WEIGHT_BASED |
  1160. * The topicId with most weight will be returned first
  1161. *************************************************************************/
  1162. HRESULT PASCAL NEAR TopicListSort (_LPQTNODE lpQtNode, BOOL fFlag)
  1163. {
  1164. HANDLE hHeap; /* Handle to heap block */
  1165. LPITOPIC far *lrgHeap; /* Pointer to heap block */
  1166. TOPIC_LIST Dummy; /* Dummy node to speed up search, compare */
  1167. LPITOPIC lpCurTopic; /* Current Topic node */
  1168. LPITOPIC lpNextTopic; /* Next Topic node */
  1169. LPITOPIC lpInsertPt; /* Current insertion point */
  1170. WORD cLastItem;
  1171. WORD MaxItem;
  1172. LPITOPIC far * lpPQNode;
  1173. LPITOPIC lpTopNode;
  1174. LPITOPIC lpNextNode;
  1175. WORD wCurWeight;
  1176. FCMP fCompare;
  1177. /* Allocate the heap */
  1178. if (lpQtNode->cTopic > MAX_HEAP_ENTRIES)
  1179. MaxItem = MAX_HEAP_ENTRIES;
  1180. else
  1181. MaxItem = (WORD)lpQtNode->cTopic + 1;
  1182. /* If the list is short, we can use insertion sort since it is faster
  1183. * then preparing and use heap sort
  1184. */
  1185. if (MaxItem <= 20)
  1186. return TopicListInsertionSort (lpQtNode, fFlag);
  1187. if ((hHeap = _GLOBALALLOC(DLLGMEM, MaxItem * sizeof(LPV))) == NULL) {
  1188. /* We run out of memory for the heap. Try a smaller size */
  1189. if ((hHeap = _GLOBALALLOC(DLLGMEM,
  1190. (MaxItem = MIN_HEAP_ENTRIES)* sizeof(LPV))) == NULL) {
  1191. /* We really run out of memory, so just do a regular
  1192. * insertion sort. It is slow but at least something
  1193. * works
  1194. */
  1195. return TopicListInsertionSort (lpQtNode, fFlag);
  1196. }
  1197. }
  1198. MaxItem --; /* Since node 0 is used for sentinel */
  1199. lrgHeap = (LPITOPIC far *)_GLOBALLOCK (hHeap);
  1200. /* Initialize of Dummy */
  1201. Dummy.wWeight = 0xffff; // Maximum weigth for sentinel
  1202. Dummy.pNext = NULL;
  1203. /* Set the sentinel */
  1204. lrgHeap[0] = &Dummy;
  1205. /* Initialize the variables */
  1206. lpInsertPt = &Dummy;
  1207. lpCurTopic = lpQtNode->lpTopicList;
  1208. if (fFlag == HIT_COUNT_BASED)
  1209. fCompare = HitCountCompare;
  1210. else
  1211. fCompare = TopicWeightCompare;
  1212. while (lpCurTopic) {
  1213. lpPQNode = &lrgHeap[1];
  1214. for (cLastItem = 1; lpCurTopic && cLastItem <= MaxItem;
  1215. cLastItem++, lpPQNode++) {
  1216. lpNextTopic = lpCurTopic->pNext;
  1217. *lpPQNode = lpCurTopic;
  1218. lpCurTopic->pNext = NULL;
  1219. lpCurTopic = lpNextTopic;
  1220. HeapUp (lrgHeap, cLastItem, fCompare);
  1221. }
  1222. cLastItem--;
  1223. /* Set up the last pointer */
  1224. for (; cLastItem > 0;) {
  1225. lpTopNode = lrgHeap[1];
  1226. /* Get the new node's weight */
  1227. wCurWeight = lpTopNode->wWeight;
  1228. /* Insert into the resulting list in decreasing order */
  1229. if (wCurWeight > lpInsertPt->wWeight) {
  1230. /* Start from the beginning of the list */
  1231. lpInsertPt = &Dummy;
  1232. }
  1233. while (lpNextNode = lpInsertPt->pNext) {
  1234. if (lpNextNode->wWeight < wCurWeight)
  1235. break;
  1236. lpInsertPt = lpNextNode;
  1237. }
  1238. lpTopNode->pNext = lpInsertPt->pNext;
  1239. lpInsertPt->pNext = lpTopNode;
  1240. lpInsertPt = lpTopNode;
  1241. lrgHeap[1] = lrgHeap[cLastItem--];
  1242. HeapDown (lrgHeap, cLastItem, fCompare);
  1243. }
  1244. }
  1245. /* Update the pointer to the sorted list */
  1246. lpQtNode->lpTopicList = Dummy.pNext;
  1247. /* Release the memory */
  1248. _GLOBALUNLOCK(hHeap);
  1249. _GLOBALFREE(hHeap);
  1250. return S_OK;
  1251. }
  1252. /*************************************************************************
  1253. * @doc INTERNAL
  1254. *
  1255. * @func VOID PASCAL NEAR | OccurenceSort |
  1256. * Sort all the occurrences depending on their offsets. If two
  1257. * occurrences have the same offset, ie. they must be identical
  1258. * then one will be removed. Simple insertion sort is used since
  1259. * it it expected that most of the time we will have less than
  1260. * 15 occurences per TopicId
  1261. *
  1262. * @func _LPQT | lpQueryTree |
  1263. * Pointer to query tree structure where all globals are
  1264. *
  1265. * @func LPITOPIC | lpTopic |
  1266. * Pointer to doclist with the occurrence list to be sorted
  1267. *************************************************************************/
  1268. PRIVATE VOID PASCAL NEAR OccurenceSort (_LPQT lpQueryTree, LPITOPIC lpTopic)
  1269. {
  1270. LPIOCC lpPrevOcc;
  1271. LPIOCC lpCurOcc;
  1272. LPIOCC lpNextOcc;
  1273. LPIOCC lpTmpOcc;
  1274. int fResult;
  1275. for (lpCurOcc = lpTopic->lpOccur; lpCurOcc; lpCurOcc = lpNextOcc) {
  1276. if (lpNextOcc = lpCurOcc->pNext) {
  1277. if ((fResult = OccCompare(lpCurOcc, lpNextOcc)) <= 0) {
  1278. /* Out of order sequence */
  1279. /* Unlink the out of order node */
  1280. lpCurOcc->pNext = lpNextOcc->pNext;
  1281. if (fResult == 0) {
  1282. /* Duplicate data, just free the node */
  1283. lpNextOcc->pNext = (LPIOCC)lpQueryTree->lpOccFreeList;
  1284. lpQueryTree->lpOccFreeList = (LPSLINK)lpNextOcc;
  1285. lpTopic->lcOccur--;
  1286. /* Reset lpNextOcc */
  1287. lpNextOcc = lpCurOcc;
  1288. continue;
  1289. }
  1290. /* Do an insertion sort */
  1291. for (lpPrevOcc = NULL, lpTmpOcc = lpTopic->lpOccur;;
  1292. lpTmpOcc = lpTmpOcc->pNext) {
  1293. if (lpTmpOcc != NULL &&
  1294. (fResult = OccCompare(lpNextOcc, lpTmpOcc)) == 0) {
  1295. /* Duplicate data, just free the node */
  1296. lpNextOcc->pNext = (LPIOCC)lpQueryTree->lpOccFreeList;
  1297. lpQueryTree->lpOccFreeList = (LPSLINK)lpNextOcc;
  1298. lpTopic->lcOccur--;
  1299. break;
  1300. }
  1301. if (lpTmpOcc == NULL || fResult > 0) {
  1302. /* We just pass the insertion point */
  1303. if (lpPrevOcc == NULL) {
  1304. lpNextOcc->pNext = lpTopic->lpOccur;
  1305. lpTopic->lpOccur = lpNextOcc;
  1306. }
  1307. else {
  1308. lpNextOcc->pNext = lpPrevOcc->pNext;
  1309. lpPrevOcc->pNext = lpNextOcc;
  1310. }
  1311. break;
  1312. }
  1313. lpPrevOcc = lpTmpOcc;
  1314. }
  1315. /* Reset lpNextOcc */
  1316. lpNextOcc = lpCurOcc;
  1317. }
  1318. }
  1319. }
  1320. }
  1321. PRIVATE int PASCAL NEAR TopicWeightCompare (LPITOPIC lpTopic1, LPITOPIC lpTopic2)
  1322. {
  1323. if (lpTopic1->wWeight > lpTopic2->wWeight)
  1324. {
  1325. return 1;
  1326. }
  1327. else if (lpTopic1->wWeight < lpTopic2->wWeight)
  1328. {
  1329. return -1;
  1330. }
  1331. else // must be equal
  1332. {
  1333. if (lpTopic1->lcOccur >= lpTopic2->lcOccur)
  1334. return 1;
  1335. return -1;
  1336. }
  1337. }
  1338. PRIVATE int PASCAL NEAR HitCountCompare (LPITOPIC lpTopic1, LPITOPIC lpTopic2)
  1339. {
  1340. if (lpTopic1->lcOccur >= lpTopic2->lcOccur)
  1341. return 1;
  1342. return -1;
  1343. }
  1344. PRIVATE VOID PASCAL NEAR HeapUp (LPITOPIC far * lrgHeap, WORD ChildIndex,
  1345. FCMP fCompare)
  1346. {
  1347. WORD ParentIndex;
  1348. LPITOPIC far * lplpvParent;
  1349. LPITOPIC far * lplpvChild;
  1350. LPITOPIC lpSaved;
  1351. LPITOPIC lpvParent;
  1352. lplpvChild = &lrgHeap [ChildIndex];
  1353. ParentIndex = ChildIndex/2;
  1354. lpSaved = *lplpvChild;
  1355. while (ParentIndex) {
  1356. lplpvParent = &lrgHeap[ParentIndex];
  1357. lpvParent = *lplpvParent;
  1358. if ((*fCompare)((LPV)lpvParent, (LPV)lpSaved) > 0)
  1359. break;
  1360. *lplpvChild = lpvParent;
  1361. lplpvChild = lplpvParent;
  1362. ParentIndex /= 2;
  1363. };
  1364. *lplpvChild = lpSaved;
  1365. }
  1366. PRIVATE VOID PASCAL NEAR HeapDown (LPITOPIC far * lrgHeap, int MaxChildIndex,
  1367. FCMP fCompare)
  1368. {
  1369. int ChildIndex;
  1370. LPITOPIC far * lplpvParent;
  1371. LPITOPIC far * lplpvChild;
  1372. LPITOPIC lpTopicChild;
  1373. LPITOPIC lpTopicChild2;
  1374. LPITOPIC lpSaved;
  1375. lpSaved = *(lplpvParent = &lrgHeap[1]);
  1376. ChildIndex = 2;
  1377. for (; ChildIndex <= MaxChildIndex; ) {
  1378. lplpvChild = &lrgHeap[ChildIndex];
  1379. lpTopicChild = *lplpvChild;
  1380. /* Find the minimum of the two children */
  1381. if (ChildIndex < MaxChildIndex &&
  1382. (lpTopicChild2 = *(lplpvChild + 1))) {
  1383. if ((*fCompare)((LPV)lpTopicChild, (LPV)lpTopicChild2) < 0) {
  1384. lplpvChild++;
  1385. ChildIndex ++;
  1386. }
  1387. }
  1388. if ((*fCompare)((LPV)lpSaved, (LPV)*lplpvChild) > 0)
  1389. break;
  1390. /* Replace the node */
  1391. *lplpvParent = *lplpvChild;
  1392. lplpvParent = lplpvChild;
  1393. ChildIndex *= 2;
  1394. }
  1395. *lplpvParent = lpSaved;
  1396. }