Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

844 lines
23 KiB

  1. /******************************************************************************
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. SearchResults.cpp
  5. Abstract:
  6. This file contains the implementation of the keyword search.
  7. Revision History:
  8. Davide Massarenti (Dmassare) 05/28/2000
  9. created
  10. ******************************************************************************/
  11. #include "stdafx.h"
  12. ////////////////////////////////////////////////////////////////////////////////
  13. Taxonomy::KeywordSearch::Token::Token()
  14. {
  15. m_type = TOKEN_INVALID; // TOKEN m_type;
  16. // MPC::wstring m_strToken;
  17. // WeightedMatchSet m_results;
  18. //
  19. m_left = NULL; // Token* m_left;
  20. m_right = NULL; // Token* m_right;
  21. }
  22. Taxonomy::KeywordSearch::Token::~Token()
  23. {
  24. if(m_left ) delete m_left;
  25. if(m_right) delete m_right;
  26. }
  27. //////////////////////////////////////////////////
  28. bool Taxonomy::KeywordSearch::Token::HasNOT()
  29. {
  30. if(m_type == TOKEN_NOT) return true;
  31. if(m_left && m_left ->HasNOT()) return true;
  32. if(m_right && m_right->HasNOT()) return true;
  33. return false;
  34. }
  35. bool Taxonomy::KeywordSearch::Token::HasExplicitOperators()
  36. {
  37. switch(m_type)
  38. {
  39. case TOKEN_NOT:
  40. case TOKEN_AND:
  41. case TOKEN_OR : return true;
  42. }
  43. if(m_left && m_left ->HasExplicitOperators()) return true;
  44. if(m_right && m_right->HasExplicitOperators()) return true;
  45. return false;
  46. }
  47. void Taxonomy::KeywordSearch::Token::AddHit( /*[in]*/ long ID, /*[in]*/ long priority )
  48. {
  49. std::pair<WeightedMatchIter,bool> ins = m_results.insert( WeightedMatchSet::value_type( ID, 0 ) );
  50. ins.first->second += priority;
  51. }
  52. HRESULT Taxonomy::KeywordSearch::Token::ExecuteText( /*[in]*/ LPCWSTR szKeyword ,
  53. /*[in]*/ RS_Keywords* rsKeywords ,
  54. /*[in]*/ RS_Matches* rsMatches )
  55. {
  56. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::Token::Execute" );
  57. HRESULT hr;
  58. bool fFound;
  59. __MPC_EXIT_IF_METHOD_FAILS(hr, rsKeywords->Seek_ByName( szKeyword, &fFound ));
  60. if(fFound)
  61. {
  62. __MPC_EXIT_IF_METHOD_FAILS(hr, rsMatches->Seek_ByKeyword( rsKeywords->m_ID_keyword, &fFound ));
  63. while(fFound)
  64. {
  65. AddHit( rsMatches->m_ID_topic, rsMatches->m_lPriority );
  66. __MPC_EXIT_IF_METHOD_FAILS(hr, rsMatches->Move( 0, JET_MoveNext, &fFound ));
  67. }
  68. }
  69. hr = S_OK;
  70. __HCP_FUNC_CLEANUP;
  71. __HCP_FUNC_EXIT(hr);
  72. }
  73. HRESULT Taxonomy::KeywordSearch::Token::Execute( /*[in]*/ MatchSet& setAllTheTopics ,
  74. /*[in]*/ Updater& updater ,
  75. /*[in]*/ RS_Keywords* rsKeywords ,
  76. /*[in]*/ RS_Matches* rsMatches )
  77. {
  78. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::Token::Execute" );
  79. HRESULT hr;
  80. if(m_type == TOKEN_TEXT)
  81. {
  82. MPC::WStringList lst;
  83. MPC::WStringIter it;
  84. __MPC_EXIT_IF_METHOD_FAILS(hr, ExecuteText( m_strToken.c_str(), rsKeywords, rsMatches ));
  85. __MPC_EXIT_IF_METHOD_FAILS(hr, updater.LocateSynonyms( m_strToken.c_str(), lst, /*fMatchOwner*/false ));
  86. for(it=lst.begin(); it!=lst.end(); it++)
  87. {
  88. __MPC_EXIT_IF_METHOD_FAILS(hr, ExecuteText( it->c_str(), rsKeywords, rsMatches ));
  89. }
  90. }
  91. if(m_type == TOKEN_AND_IMPLICIT ||
  92. m_type == TOKEN_AND )
  93. {
  94. WeightedMatchSet* master;
  95. WeightedMatchSet* slave;
  96. WeightedMatchIter it;
  97. if(m_left == NULL ||
  98. m_right == NULL )
  99. {
  100. __MPC_SET_ERROR_AND_EXIT(hr, S_OK);
  101. }
  102. __MPC_EXIT_IF_METHOD_FAILS(hr, m_left->Execute( setAllTheTopics, updater, rsKeywords, rsMatches ));
  103. if(m_left->m_results.size() == 0)
  104. {
  105. __MPC_SET_ERROR_AND_EXIT(hr, S_OK);
  106. }
  107. __MPC_EXIT_IF_METHOD_FAILS(hr, m_right->Execute( setAllTheTopics, updater, rsKeywords, rsMatches ));
  108. if(m_right->m_results.size() == 0)
  109. {
  110. __MPC_SET_ERROR_AND_EXIT(hr, S_OK);
  111. }
  112. //
  113. // Select the shorter for the outer loop (that is linear).
  114. //
  115. if(m_left->m_results.size() < m_right->m_results.size())
  116. {
  117. master = &m_left ->m_results;
  118. slave = &m_right->m_results;
  119. }
  120. else
  121. {
  122. master = &m_right->m_results;
  123. slave = &m_left ->m_results;
  124. }
  125. for(it=master->begin(); it!=master->end(); it++)
  126. {
  127. if(slave->find( it->first ) != slave->end())
  128. {
  129. AddHit( it->first, it->second );
  130. }
  131. }
  132. }
  133. if(m_type == TOKEN_OR)
  134. {
  135. WeightedMatchIter it;
  136. if(m_left)
  137. {
  138. __MPC_EXIT_IF_METHOD_FAILS(hr, m_left->Execute( setAllTheTopics, updater, rsKeywords, rsMatches ));
  139. for(it=m_left->m_results.begin(); it!=m_left->m_results.end(); it++)
  140. {
  141. AddHit( it->first, it->second );
  142. }
  143. }
  144. if(m_right)
  145. {
  146. __MPC_EXIT_IF_METHOD_FAILS(hr, m_right->Execute( setAllTheTopics, updater, rsKeywords, rsMatches ));
  147. for(it=m_right->m_results.begin(); it!=m_right->m_results.end(); it++)
  148. {
  149. AddHit( it->first, it->second );
  150. }
  151. }
  152. }
  153. if(m_type == TOKEN_NOT)
  154. {
  155. MatchIter it;
  156. if(m_left)
  157. {
  158. __MPC_EXIT_IF_METHOD_FAILS(hr, m_left->Execute( setAllTheTopics, updater, rsKeywords, rsMatches ));
  159. }
  160. for(it=setAllTheTopics.begin(); it!=setAllTheTopics.end(); it++)
  161. {
  162. if(m_left == NULL || m_left->m_results.find( *it ) == m_left->m_results.end())
  163. {
  164. AddHit( *it, 0 );
  165. }
  166. }
  167. }
  168. hr = S_OK;
  169. __HCP_FUNC_CLEANUP;
  170. __HCP_FUNC_EXIT(hr);
  171. }
  172. void Taxonomy::KeywordSearch::Token::CollectKeywords( /*[in/out]*/ MPC::WStringList& lst ) const
  173. {
  174. if(m_type == TOKEN_TEXT)lst.push_back( m_strToken );
  175. if(m_left ) m_left ->CollectKeywords( lst );
  176. if(m_right) m_right->CollectKeywords( lst );
  177. }
  178. HRESULT Taxonomy::KeywordSearch::Token::Stringify( /*[in]*/ MPC::wstring& strNewQuery )
  179. {
  180. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::Token::Stringify" );
  181. HRESULT hr;
  182. if(m_type == TOKEN_TEXT)
  183. {
  184. strNewQuery = m_strToken;
  185. }
  186. else
  187. {
  188. if(m_left)
  189. {
  190. __MPC_EXIT_IF_METHOD_FAILS(hr, m_left->Stringify( strNewQuery ));
  191. if(m_right)
  192. {
  193. MPC::wstring strTmp;
  194. __MPC_EXIT_IF_METHOD_FAILS(hr, m_right->Stringify( strTmp ));
  195. if(strTmp.size())
  196. {
  197. strNewQuery += L" ";
  198. strNewQuery += strTmp;
  199. }
  200. }
  201. }
  202. else
  203. {
  204. __MPC_EXIT_IF_METHOD_FAILS(hr, m_right->Stringify( strNewQuery ));
  205. }
  206. }
  207. hr = S_OK;
  208. __HCP_FUNC_CLEANUP;
  209. __HCP_FUNC_EXIT(hr);
  210. }
  211. ////////////////////////////////////////////////////////////////////////////////
  212. LPCWSTR Taxonomy::KeywordSearch::SkipWhite( /*[in]*/ LPCWSTR szStr )
  213. {
  214. while(iswspace( *szStr )) szStr++;
  215. return szStr;
  216. }
  217. bool Taxonomy::KeywordSearch::IsNotString( /*[in]*/ LPCWSTR szSrc ,
  218. /*[in]*/ WCHAR cQuote )
  219. {
  220. WCHAR c;
  221. while((c = *++szSrc) && !iswspace( c ) && c != cQuote);
  222. return (c != cQuote);
  223. }
  224. bool Taxonomy::KeywordSearch::IsQueryChar( WCHAR c )
  225. {
  226. if(iswspace( c ) ||
  227. iswcntrl( c ) ||
  228. c == '"' ||
  229. c == '(' ||
  230. c == ')' )
  231. {
  232. return false;
  233. }
  234. return true;
  235. }
  236. ////////////////////////////////////////
  237. void Taxonomy::KeywordSearch::RemoveStopSignsAtEnd( /*[in]*/ LPWSTR szText )
  238. {
  239. WCHAR c;
  240. MPC::wstring strCmp;
  241. Taxonomy::WordIter itEnd = m_setStopSignsAtEnd->end();
  242. LPWSTR szEnd = szText + wcslen( szText );
  243. while(szEnd > szText)
  244. {
  245. strCmp = *--szEnd;
  246. if(m_setStopSignsAtEnd->find( strCmp ) != itEnd)
  247. {
  248. szEnd[0] = ' ';
  249. }
  250. else
  251. {
  252. break;
  253. }
  254. }
  255. }
  256. void Taxonomy::KeywordSearch::RemoveStopSignsWithoutContext( /*[in]*/ LPWSTR szText )
  257. {
  258. WCHAR c;
  259. MPC::wstring strCmp;
  260. Taxonomy::WordIter itEnd = m_setStopSignsWithoutContext->end();
  261. while((c = *szText++))
  262. {
  263. strCmp = c;
  264. if(m_setStopSignsWithoutContext->find( strCmp ) != itEnd)
  265. {
  266. szText[-1] = ' ';
  267. }
  268. }
  269. }
  270. void Taxonomy::KeywordSearch::CopyAndEliminateExtraWhiteSpace( /*[in]*/ LPCWSTR szSrc, /*[out]*/ LPWSTR szDst )
  271. {
  272. bool fWhitespace = false;
  273. WCHAR c;
  274. szSrc = SkipWhite( szSrc );
  275. while((c = *szSrc++))
  276. {
  277. if(iswspace(c))
  278. {
  279. if(fWhitespace == false)
  280. {
  281. *szDst++ = ' ';
  282. fWhitespace = true;
  283. }
  284. }
  285. else
  286. {
  287. *szDst++ = c;
  288. fWhitespace = false;
  289. }
  290. }
  291. if(fWhitespace) szDst[-1] = 0;
  292. else szDst[ 0] = 0;
  293. }
  294. Taxonomy::KeywordSearch::TOKEN Taxonomy::KeywordSearch::NextToken( /*[in/out]*/ LPCWSTR& szSrc ,
  295. /*[out] */ LPWSTR szToken )
  296. {
  297. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::NextToken" );
  298. TOKEN token = TOKEN_INVALID;
  299. LPCWSTR szPtr = SkipWhite( szSrc );
  300. LPWSTR szDst = szToken;
  301. WCHAR c;
  302. //
  303. // End of query?
  304. //
  305. c = *szPtr;
  306. if(c == 0)
  307. {
  308. token = TOKEN_EMPTY; __MPC_FUNC_LEAVE;
  309. }
  310. //
  311. // Now deal with Quoted String, which may come in the form of "Quoted String" or 'Quoted String'
  312. //
  313. if(c == '"')
  314. {
  315. WCHAR cQuote = c;
  316. while((c = *++szPtr) && c != cQuote)
  317. {
  318. *szDst++ = c;
  319. }
  320. if(c) szPtr++; // Skip past the closing quote.
  321. token = TOKEN_TEXT; __MPC_FUNC_LEAVE;
  322. }
  323. //
  324. // This is a special case operator which is '||' synonim for OR.
  325. //
  326. if(c == '|')
  327. {
  328. if(szPtr[1] != '|') { token = TOKEN_INVALID; __MPC_FUNC_LEAVE; }
  329. szPtr += 2;
  330. token = TOKEN_OR; __MPC_FUNC_LEAVE;
  331. }
  332. //
  333. // Single Character Tokens we admit are '+', '&', '(' and ')', return as is, and adjust szPtr.
  334. //
  335. if(c == '(') { szPtr++; token = TOKEN_PAREN_OPEN ; __MPC_FUNC_LEAVE; }
  336. if(c == ')') { szPtr++; token = TOKEN_PAREN_CLOSE; __MPC_FUNC_LEAVE; }
  337. // if(c == '+') { szPtr++; token = TOKEN_OR ; __MPC_FUNC_LEAVE; }
  338. // if(c == '&') { szPtr++; token = TOKEN_AND ; __MPC_FUNC_LEAVE; }
  339. // if(c == '!') { szPtr++; token = TOKEN_NOT ; __MPC_FUNC_LEAVE; }
  340. //
  341. // Deal with Alphanumerics:
  342. //
  343. // KW-A, 0-A, Abcdedd, ABC2_WE all are taken as a single Query Term
  344. //
  345. if(IsQueryChar( c ))
  346. {
  347. while(c)
  348. {
  349. szPtr++; *szDst++ = c;
  350. if(IsQueryChar( c = *szPtr )) continue;
  351. //
  352. // We are not done yet, if stop character was a quote character we need to find out whether a string comes after.
  353. //
  354. if(c == '"' && IsNotString( szPtr, c )) continue;
  355. break;
  356. }
  357. *szDst = 0;
  358. {
  359. MPC::wstring strCmp( szToken );
  360. if(m_setOpNOT->find( strCmp ) != m_setOpNOT->end()) { token = TOKEN_NOT; __MPC_FUNC_LEAVE; }
  361. if(m_setOpAND->find( strCmp ) != m_setOpAND->end()) { token = TOKEN_AND; __MPC_FUNC_LEAVE; }
  362. if(m_setOpOR ->find( strCmp ) != m_setOpOR ->end()) { token = TOKEN_OR ; __MPC_FUNC_LEAVE; }
  363. }
  364. token = TOKEN_TEXT; __MPC_FUNC_LEAVE;
  365. }
  366. __HCP_FUNC_CLEANUP;
  367. szSrc = szPtr; *szDst = 0;
  368. __HCP_FUNC_EXIT(token);
  369. }
  370. ////////////////////////////////////////////////////////////////////////////////
  371. HRESULT Taxonomy::KeywordSearch::AllocateQuery( /*[in]*/ const MPC::wstring& strQuery ,
  372. /*[out]*/ LPWSTR& szInput ,
  373. /*[out]*/ LPWSTR& szOutput )
  374. {
  375. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::AllocateQuery" );
  376. HRESULT hr;
  377. szInput = new WCHAR[strQuery.size()+2];
  378. szOutput = new WCHAR[strQuery.size()+2];
  379. if(szInput == NULL || szOutput == NULL)
  380. {
  381. __MPC_SET_ERROR_AND_EXIT(hr, E_OUTOFMEMORY);
  382. }
  383. wcscpy( szInput, strQuery.c_str() );
  384. hr = S_OK;
  385. __HCP_FUNC_CLEANUP;
  386. __HCP_FUNC_EXIT(hr);
  387. }
  388. HRESULT Taxonomy::KeywordSearch::PreprocessQuery( /*[in/out]*/ MPC::wstring& strQuery )
  389. {
  390. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::PreprocessQuery" );
  391. HRESULT hr;
  392. LPWSTR szInput = NULL;
  393. LPWSTR szOutput = NULL;
  394. __MPC_EXIT_IF_METHOD_FAILS(hr, AllocateQuery( strQuery, szInput, szOutput ));
  395. RemoveStopSignsAtEnd ( szInput );
  396. RemoveStopSignsWithoutContext ( szInput );
  397. CopyAndEliminateExtraWhiteSpace( szInput, szOutput );
  398. strQuery = szOutput;
  399. hr = S_OK;
  400. __HCP_FUNC_CLEANUP;
  401. if(szInput ) delete [] szInput;
  402. if(szOutput) delete [] szOutput;
  403. __HCP_FUNC_EXIT(hr);
  404. }
  405. ////////////////////////////////////////////////////////////////////////////////
  406. HRESULT Taxonomy::KeywordSearch::Parse( /*[in/out]*/ LPCWSTR& szInput, /*[in]*/ LPWSTR szTmpBuf, /*[in]*/ bool fSubExpr, /*[out]*/ Token*& res )
  407. {
  408. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::Parse" );
  409. HRESULT hr;
  410. Token* obj = NULL;
  411. Token* objOp = NULL;
  412. Token* objDangling = NULL;
  413. while(1)
  414. {
  415. TOKEN token = NextToken( szInput, szTmpBuf );
  416. if(token == TOKEN_EMPTY) break;
  417. if(token == TOKEN_INVALID)
  418. {
  419. __MPC_SET_ERROR_AND_EXIT(hr, E_INVALIDARG);
  420. }
  421. //
  422. // Skip stop words.
  423. //
  424. if(token == TOKEN_TEXT && m_setStopWords->find( szTmpBuf ) != m_setStopWords->end()) continue;
  425. if(token == TOKEN_PAREN_CLOSE)
  426. {
  427. if(fSubExpr) break;
  428. __MPC_SET_ERROR_AND_EXIT(hr, E_INVALIDARG);
  429. }
  430. if(token == TOKEN_PAREN_OPEN)
  431. {
  432. __MPC_EXIT_IF_METHOD_FAILS(hr, Parse( szInput, szTmpBuf, true, obj ));
  433. //
  434. // Empty subexpression? Not allowed...
  435. //
  436. if(obj == NULL) __MPC_SET_ERROR_AND_EXIT(hr, E_INVALIDARG);
  437. //
  438. // Let's treat a subexpression as a value.
  439. //
  440. token = TOKEN_TEXT;
  441. }
  442. else
  443. {
  444. __MPC_EXIT_IF_ALLOC_FAILS(hr, obj, new Token());
  445. obj->m_type = token;
  446. obj->m_strToken = szTmpBuf;
  447. }
  448. if(token == TOKEN_TEXT ||
  449. token == TOKEN_NOT )
  450. {
  451. if(res == NULL) // First token...
  452. {
  453. res = obj;
  454. }
  455. else if(objDangling) // Last token of a operator...
  456. {
  457. if(objDangling->m_type == TOKEN_NOT) objDangling->m_left = obj;
  458. else objDangling->m_right = obj;
  459. }
  460. else // Implicit AND...
  461. {
  462. __MPC_EXIT_IF_ALLOC_FAILS(hr, objOp, new Token());
  463. objOp->m_type = TOKEN_AND_IMPLICIT;
  464. objOp->m_left = res;
  465. objOp->m_right = obj;
  466. res = objOp;
  467. objOp = NULL;
  468. }
  469. objDangling = (obj->m_type == TOKEN_NOT) ? obj : NULL;
  470. obj = NULL;
  471. }
  472. else
  473. {
  474. //
  475. // What's left are binary operators.
  476. //
  477. if(res == NULL || objDangling)
  478. {
  479. //
  480. // We need a left part...
  481. //
  482. __MPC_SET_ERROR_AND_EXIT(hr, E_INVALIDARG);
  483. }
  484. //
  485. // Rotate result.
  486. //
  487. obj->m_left = res;
  488. res = obj;
  489. objDangling = obj;
  490. obj = NULL;
  491. }
  492. }
  493. //
  494. // Let's make sure operators have the associated data. '
  495. //
  496. if(objDangling)
  497. {
  498. __MPC_SET_ERROR_AND_EXIT(hr, E_INVALIDARG);
  499. }
  500. hr = S_OK;
  501. __HCP_FUNC_CLEANUP;
  502. if(obj ) delete obj;
  503. if(objOp) delete objOp;
  504. __HCP_FUNC_EXIT(hr);
  505. }
  506. HRESULT Taxonomy::KeywordSearch::GenerateResults( /*[in]*/ Token* obj ,
  507. /*[in]*/ CPCHQueryResultCollection* pColl ,
  508. /*[in]*/ MPC::WStringUCSet& setURLs ,
  509. /*[in]*/ Taxonomy::MatchSet* psetNodes )
  510. {
  511. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::GenerateResults" );
  512. HRESULT hr;
  513. WeightedMatchIter it;
  514. bool fFound;
  515. for(it=obj->m_results.begin(); it!=obj->m_results.end(); it++)
  516. {
  517. __MPC_EXIT_IF_METHOD_FAILS(hr, m_rsTopics->Seek_SingleTopic( it->first, &fFound ));
  518. if(fFound)
  519. {
  520. MPC::wstringUC strTopicURL = m_rsTopics->m_strURI;
  521. if(setURLs.find( strTopicURL ) == setURLs.end())
  522. {
  523. CComPtr<CPCHQueryResult> item;
  524. CPCHQueryResult::Payload data;
  525. //
  526. // Not under a node? Skip it.
  527. //
  528. if(psetNodes && psetNodes->find( m_rsTopics->m_ID_node ) == psetNodes->end()) continue;
  529. __MPC_EXIT_IF_METHOD_FAILS(hr, pColl->CreateItem( &item ));
  530. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.ExpandURL( m_rsTopics->m_strURI ));
  531. data.m_bstrTitle = m_rsTopics->m_strTitle .c_str();
  532. data.m_bstrTopicURL = m_rsTopics->m_strURI .c_str();
  533. data.m_bstrDescription = m_rsTopics->m_strDescription.c_str();
  534. data.m_lType = m_rsTopics->m_lType ;
  535. data.m_lPriority = it->second;
  536. item->Initialize( data );
  537. setURLs.insert( strTopicURL );
  538. }
  539. }
  540. }
  541. hr = S_OK;
  542. __HCP_FUNC_CLEANUP;
  543. __HCP_FUNC_EXIT(hr);
  544. }
  545. ////////////////////////////////////////////////////////////////////////////////
  546. Taxonomy::KeywordSearch::KeywordSearch( /*[in]*/ Updater& updater ) : m_updater( updater )
  547. {
  548. // Updater& m_updater;
  549. //
  550. m_setStopSignsAtEnd = NULL; // WordSet* m_setStopSignsAtEnd;
  551. m_setStopSignsWithoutContext = NULL; // WordSet* m_setStopSignsWithoutContext;
  552. m_setStopWords = NULL; // WordSet* m_setStopWords;
  553. m_setOpNOT = NULL; // WordSet* m_setOpNOT;
  554. m_setOpAND = NULL; // WordSet* m_setOpAND;
  555. m_setOpOR = NULL; // WordSet* m_setOpOR;
  556. }
  557. Taxonomy::KeywordSearch::~KeywordSearch()
  558. {
  559. }
  560. HRESULT Taxonomy::KeywordSearch::Execute( /*[in]*/ LPCWSTR szQuery ,
  561. /*[in]*/ LPCWSTR szSubsite ,
  562. /*[in]*/ CPCHQueryResultCollection* pColl ,
  563. /*[in]*/ MPC::WStringList* lst )
  564. {
  565. __HCP_FUNC_ENTRY( "Taxonomy::KeywordSearch::Execute" );
  566. HRESULT hr;
  567. MPC::wstring strCleanedQuery;
  568. MPC::WStringUCSet setURLs;
  569. Taxonomy::MatchSet setNodes;
  570. Taxonomy::MatchSet* psetNodes = NULL;
  571. Token* mainQuery = NULL;
  572. Token* stringifyQuery = NULL;
  573. LPWSTR szInput = NULL;
  574. LPWSTR szOutput = NULL;
  575. LPCWSTR szToken;
  576. //
  577. // Initialize the database stuff.
  578. //
  579. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetWordSet( UPDATER_SET_STOPSIGNS , &m_setStopSignsWithoutContext ));
  580. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetWordSet( UPDATER_SET_STOPSIGNS_ATENDOFWORD, &m_setStopSignsAtEnd ));
  581. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetWordSet( UPDATER_SET_STOPWORDS , &m_setStopWords ));
  582. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetWordSet( UPDATER_SET_OPERATOR_NOT , &m_setOpNOT ));
  583. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetWordSet( UPDATER_SET_OPERATOR_AND , &m_setOpAND ));
  584. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetWordSet( UPDATER_SET_OPERATOR_OR , &m_setOpOR ));
  585. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetTopics ( &m_rsTopics ));
  586. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetKeywords( &m_rsKeywords ));
  587. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetMatches ( &m_rsMatches ));
  588. //
  589. // Parse the query.
  590. //
  591. __MPC_EXIT_IF_METHOD_FAILS(hr, PreprocessQuery( strCleanedQuery = szQuery ));
  592. __MPC_EXIT_IF_METHOD_FAILS(hr, AllocateQuery ( strCleanedQuery, szInput, szOutput ));
  593. __MPC_EXIT_IF_METHOD_FAILS(hr, Parse( szToken = szInput, szOutput, false, mainQuery ));
  594. if(mainQuery)
  595. {
  596. MatchSet setAllTheTopics;
  597. MatchIter it;
  598. bool fFound;
  599. if(STRINGISPRESENT(szSubsite))
  600. {
  601. long ID_node;
  602. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.LocateTaxonomyNode( ID_node, szSubsite, /*fLookForFather*/false ));
  603. __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.LocateSubNodes ( ID_node , /*fRecurse */true , /*fOnlyVisible*/false, setNodes ));
  604. setNodes.insert( ID_node ); // Add the node itself.
  605. psetNodes = &setNodes;
  606. }
  607. if(mainQuery->HasNOT())
  608. {
  609. //
  610. // Unfortunately, with the NOT operator we need to load all the topics...
  611. //
  612. __MPC_EXIT_IF_METHOD_FAILS(hr, m_rsTopics->Move( 0, JET_MoveFirst, &fFound ));
  613. while(fFound)
  614. {
  615. setAllTheTopics.insert( m_rsTopics->m_ID_topic );
  616. __MPC_EXIT_IF_METHOD_FAILS(hr, m_rsTopics->Move( 0, JET_MoveNext, &fFound ));
  617. }
  618. }
  619. else if(mainQuery->HasExplicitOperators() == false && mainQuery->m_type != TOKEN_TEXT)
  620. {
  621. //
  622. // No explicit operators and more than one term, let's try to "stringify" the query...
  623. //
  624. MPC::wstring strNewQuery;
  625. __MPC_EXIT_IF_METHOD_FAILS(hr, mainQuery->Stringify( strNewQuery ));
  626. __MPC_EXIT_IF_ALLOC_FAILS(hr, stringifyQuery, new Token());
  627. stringifyQuery->m_type = TOKEN_TEXT;
  628. stringifyQuery->m_strToken = strNewQuery;
  629. __MPC_EXIT_IF_METHOD_FAILS(hr, stringifyQuery->Execute( setAllTheTopics, m_updater, m_rsKeywords, m_rsMatches ));
  630. if(lst) stringifyQuery->CollectKeywords( *lst );
  631. __MPC_EXIT_IF_METHOD_FAILS(hr, GenerateResults( stringifyQuery, pColl, setURLs, psetNodes ));
  632. }
  633. __MPC_EXIT_IF_METHOD_FAILS(hr, mainQuery->Execute( setAllTheTopics, m_updater, m_rsKeywords, m_rsMatches ));
  634. if(lst) mainQuery->CollectKeywords( *lst );
  635. __MPC_EXIT_IF_METHOD_FAILS(hr, GenerateResults( mainQuery, pColl, setURLs, psetNodes ));
  636. }
  637. hr = S_OK;
  638. __HCP_FUNC_CLEANUP;
  639. if(mainQuery ) delete mainQuery;
  640. if(stringifyQuery) delete stringifyQuery;
  641. if(szInput ) delete [] szInput;
  642. if(szOutput) delete [] szOutput;
  643. __HCP_FUNC_EXIT(hr);
  644. }