Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

778 lines
21 KiB

  1. /*************************************************************************
  2. * @doc SHROOM INTERNAL API *
  3. * *
  4. * IDXOBR.CPP *
  5. * *
  6. * Copyright (C) Microsoft Corporation 1997 *
  7. * All Rights reserved. *
  8. * *
  9. * This file contains the implementation of CITIndexObjBridge, *
  10. * which is a class used by CITIndexLocal to allow the old .c *
  11. * search internals to call the new COM-based breaker and stemmer *
  12. * objects.
  13. * *
  14. * *
  15. **************************************************************************
  16. * *
  17. * Written By : Bill Aloof *
  18. * Current Owner: billa *
  19. * *
  20. **************************************************************************/
  21. #include <mvopsys.h>
  22. #ifdef _DEBUG
  23. static char s_aszModule[] = __FILE__; /* For error report */
  24. #endif
  25. #include <atlinc.h>
  26. // MediaView (InfoTech) includes
  27. #include <orkin.h>
  28. #include <groups.h>
  29. #include <itquery.h>
  30. #include <itcat.h>
  31. #include <itwbrk.h>
  32. #include <itwbrkid.h>
  33. #include "indeximp.h"
  34. #include "queryimp.h"
  35. #include "mvsearch.h"
  36. #include "idxobr.h"
  37. #include "common.h"
  38. //---------------------------------------------------------------------------
  39. // Constructor and Destructor
  40. //---------------------------------------------------------------------------
  41. CITIndexObjBridge::CITIndexObjBridge()
  42. {
  43. m_cRef = 0;
  44. m_piwbrk = NULL;
  45. m_piwbrkc = NULL;
  46. m_pistem = NULL;
  47. m_piitstwdl = NULL;
  48. m_pexbrkpm = NULL;
  49. m_fNormWord = FALSE;
  50. m_dwCodePageID = 0;
  51. m_hmemSrc = m_hmemDestNorm = m_hmemDestRaw = NULL;
  52. m_cbBufSrcCur = m_cbBufDestNormCur = m_cbBufDestRawCur = 0;
  53. m_lpsipbTermHit = NULL;
  54. }
  55. CITIndexObjBridge::~CITIndexObjBridge()
  56. {
  57. if (m_cRef > 0)
  58. {
  59. ITASSERT(FALSE);
  60. }
  61. if (m_hmemSrc != NULL)
  62. {
  63. _GLOBALFREE(m_hmemSrc);
  64. m_hmemSrc = NULL;
  65. m_cbBufSrcCur = 0;
  66. }
  67. if (m_hmemDestNorm != NULL)
  68. {
  69. _GLOBALFREE(m_hmemDestNorm);
  70. m_hmemDestNorm = NULL;
  71. m_cbBufDestNormCur = 0;
  72. }
  73. if (m_hmemDestRaw != NULL)
  74. {
  75. _GLOBALFREE(m_hmemDestRaw);
  76. m_hmemDestRaw = NULL;
  77. m_cbBufDestRawCur = 0;
  78. }
  79. if (m_piwbrk != NULL)
  80. {
  81. m_piwbrk->Release();
  82. m_piwbrk = NULL;
  83. }
  84. if (m_piwbrkc != NULL)
  85. {
  86. m_piwbrkc->Release();
  87. m_piwbrkc = NULL;
  88. }
  89. if (m_pistem != NULL)
  90. {
  91. m_pistem->Release();
  92. m_pistem = NULL;
  93. }
  94. if (m_piitstwdl != NULL)
  95. {
  96. m_piitstwdl->Release();
  97. m_piitstwdl = NULL;
  98. }
  99. MVStopListDispose(m_lpsipbTermHit);
  100. }
  101. //---------------------------------------------------------------------------
  102. // IUnknown Method Implementations
  103. //---------------------------------------------------------------------------
  104. // NOTE: This implementation of IUnknown assumes that this object is used
  105. // only in a local context, meaning that no piece of code will hold onto
  106. // an IUnknown pointer obtained via QueryInterface beyond the scope that
  107. // an instance of this object was created in. For example, this object
  108. // will very likely be created/destroyed in the same method. That's why
  109. // there's no controlling IUnknown for us to forward AddRef's and Release's
  110. // to. It is also the reason that IUnknown::Release doesn't call the
  111. // class's destructor when the ref count goes to 0.
  112. STDMETHODIMP
  113. CITIndexObjBridge::QueryInterface(REFIID riid, LPVOID *ppvObj)
  114. {
  115. HRESULT hr = S_OK;
  116. void *pvObj = NULL;
  117. if (ppvObj == NULL)
  118. return (SetErrReturn(E_POINTER));
  119. if (riid == IID_IWordSink)
  120. pvObj = (void *)((PIWRDSNK) this);
  121. else if (riid == IID_IStemSink)
  122. pvObj = (void *)((PISTEMSNK) this);
  123. else if (riid == IID_IUnknown)
  124. pvObj = (void *)((IUnknown *) ((PIWRDSNK) this));
  125. if (pvObj != NULL)
  126. *ppvObj = pvObj;
  127. else
  128. hr = E_NOINTERFACE;
  129. return (hr);
  130. }
  131. STDMETHODIMP_(ULONG)
  132. CITIndexObjBridge::AddRef(void)
  133. {
  134. return (++m_cRef);
  135. }
  136. STDMETHODIMP_(ULONG)
  137. CITIndexObjBridge::Release(void)
  138. {
  139. if (m_cRef > 0)
  140. --m_cRef;
  141. else
  142. {
  143. ITASSERT(FALSE);
  144. }
  145. return (m_cRef);
  146. }
  147. //---------------------------------------------------------------------------
  148. // IWordSink Method Implementations
  149. //---------------------------------------------------------------------------
  150. /*****************************************************************
  151. * @method STDMETHODIMP | IWordSink | PutWord |
  152. * This method notifies IWordSink of a new word.
  153. *
  154. * @parm WCHAR const | *pwcInBuf | Pointer to the word to add.
  155. * @parm ULONG |cwc | Count of characters in the word.
  156. * @parm ULONG |cwcSrcLen | count of characters in pTextSource buffer
  157. * (see <om IWordBreaker.BreakText>) that corresponds to the output word
  158. * @parm ULONG |cwcSrcPos | the position of the word in pTextSource
  159. * buffer that corresponds to the output word
  160. *
  161. * @rvalue S_OK | The operation completed successfully.
  162. * @rvalue E_POINTER | Input buffer is NULL.
  163. *
  164. * @comm The values of <p cwcSrcLen> and <p cwcSrcPos> are used by the
  165. * ISearch interface, which given a query and a text source, will highlight
  166. * all hits within the text source that match the query. The location
  167. * of the text to be highlighted is computed from <p cwcSrcLen> and
  168. * <p cwcSrcPos>. Since <p pwcInfbuf> is constant and should not be
  169. * modified by PutWord, it can point directly into <p pTextSource>.
  170. * Values of cwc larger than the ulMaxTokenSize specified in
  171. * <om IWordBreaker.Init> will result in LANGUAGE_S_LARGE_WORD.
  172. * @comm Text sent to PutWord should match the source text as closely
  173. * as possible, including capitalization and accents.
  174. *
  175. * @comm You need to call this method for every word retrieved from
  176. * <p pTextSource> except those for which the <om .PutAltWord> call
  177. * has been made. The word sink automatically adds an end of word break
  178. * (EOW) after this token.
  179. *
  180. ****************************************************************/
  181. STDMETHODIMP
  182. CITIndexObjBridge::PutWord(WCHAR const *pwcInBuf, ULONG cwc,
  183. ULONG cwcSrcLen, ULONG cwcSrcPos)
  184. {
  185. HRESULT hr;
  186. DWORD cbAnsi;
  187. if (pwcInBuf == NULL)
  188. return (E_POINTER);
  189. cbAnsi = (sizeof(WCHAR) * cwc) + sizeof(WORD);
  190. if (SUCCEEDED(hr =
  191. ReallocBuffer(&m_hmemDestRaw, &m_cbBufDestRawCur, cbAnsi)))
  192. {
  193. char *lpchBufRaw;
  194. lpchBufRaw = (char *) _GLOBALLOCK(m_hmemDestRaw);
  195. if ((*((WORD *)lpchBufRaw) = (WORD)
  196. WideCharToMultiByte(m_dwCodePageID, NULL, pwcInBuf, cwc,
  197. lpchBufRaw + sizeof(WORD), cbAnsi - sizeof(WORD),
  198. NULL, NULL)) > 0)
  199. {
  200. char *lpchBufNorm;
  201. lpchBufNorm = (char *) _GLOBALLOCK(m_hmemDestNorm);
  202. if (!m_fNormWord)
  203. MEMCPY(lpchBufNorm, lpchBufRaw,
  204. *((WORD *)lpchBufRaw) + sizeof(WORD));
  205. ITASSERT(m_pexbrkpm != NULL);
  206. if (m_pexbrkpm->lpfnOutWord != NULL)
  207. {
  208. DWORD ibAnsiOffset;
  209. WCHAR *lpwchSrc;
  210. lpwchSrc = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
  211. // Compute the ANSI offset of the beginning of the raw word.
  212. // The ANSI buffer we pass won't get written to - we just
  213. // pass a pointer just in case the routine requires a non-NULL
  214. // for that param (documentation doesn't say).
  215. ibAnsiOffset = WideCharToMultiByte(m_dwCodePageID, NULL,
  216. lpwchSrc, cwcSrcPos,
  217. lpchBufRaw, 0, NULL, NULL);
  218. // Call the supplied word callback function.
  219. hr = m_pexbrkpm->lpfnOutWord((LPBYTE)lpchBufRaw,
  220. (LPBYTE)lpchBufNorm,
  221. ibAnsiOffset, m_pexbrkpm->lpvUser);
  222. _GLOBALUNLOCK(m_hmemSrc);
  223. }
  224. _GLOBALUNLOCK(m_hmemDestNorm);
  225. }
  226. else
  227. hr = E_UNEXPECTED;
  228. _GLOBALUNLOCK(m_hmemDestRaw);
  229. }
  230. return (hr);
  231. }
  232. /****************************************************************
  233. * @method STDMETHODIMP | IWordSink | PutAltWord |
  234. * Allows the word breaker to put more than one word in the same place.
  235. * @parm WCHAR const | *pwcInBuf | Pointer to the word to add.
  236. * @parm ULONG |cwc | Count of characters in the word.
  237. * @parm ULONG |cwcSrcLen | count of characters in pTextSource buffer
  238. * (see <om IWordBreaker.BreakText>) that corresponds to the output word.
  239. * @parm ULONG |cwcSrcPos | the position of the word in pTextSource
  240. * buffer that corresponds to the output word
  241. *
  242. * @rvalue S_OK | The operation completed successfully.
  243. * @rvalue E_POINTER | Input buffer is NULL.
  244. *
  245. * @comm
  246. * When you need to add more than one word in the same place, use
  247. * PutAltWord for all alternative words except the last one. Use
  248. * PutWord for the final alternative, indicating movement to the next position.
  249. * @ex The phrase "Where is Kyle's document" would be stored as: |
  250. * pWSink->PutWord( L"Where", 5, 5, 0 );
  251. * pWSink->PutWord( L"is", 2, 2, 6 );
  252. * pWSink->PutAltWord( L"Kyle", 4, 6, 9 );
  253. * pWSink->PutWord( L"Kyle's", 6, 6, 9 );
  254. * pWSink->PutWord( L"document", 8, 8, 16 );
  255. *
  256. *
  257. ***************************************************************/
  258. STDMETHODIMP
  259. CITIndexObjBridge::PutAltWord(WCHAR const *pwcInBuf, ULONG cwc,
  260. ULONG cwcSrcLen, ULONG cwcSrcPos)
  261. {
  262. HRESULT hr;
  263. DWORD cbAnsi;
  264. if (pwcInBuf == NULL)
  265. return (E_POINTER);
  266. cbAnsi = (sizeof(WCHAR) * cwc) + sizeof(WORD);
  267. if (SUCCEEDED(hr =
  268. ReallocBuffer(&m_hmemDestNorm, &m_cbBufDestNormCur, cbAnsi)))
  269. {
  270. char *lpchBuf;
  271. lpchBuf = (char *) _GLOBALLOCK(m_hmemDestNorm);
  272. if ((*((WORD *)lpchBuf) = (WORD)
  273. WideCharToMultiByte(m_dwCodePageID, NULL, pwcInBuf, cwc,
  274. lpchBuf + sizeof(WORD), cbAnsi - sizeof(WORD),
  275. NULL, NULL)) > 0)
  276. {
  277. m_fNormWord = TRUE;
  278. }
  279. else
  280. hr = E_UNEXPECTED;
  281. _GLOBALUNLOCK(m_hmemDestNorm);
  282. }
  283. return (hr);
  284. }
  285. /****************************************************************
  286. * @method STDMETHODIMP | IWordSink | StartAltPhrase |
  287. * This method is not implemented.
  288. ***************************************************************/
  289. STDMETHODIMP
  290. CITIndexObjBridge::StartAltPhrase(void)
  291. {
  292. return (E_NOTIMPL);
  293. }
  294. /****************************************************************
  295. * @method STDMETHODIMP | IWordSink | EndAltPhrase|
  296. * This method is not implemented.
  297. ***************************************************************/
  298. STDMETHODIMP
  299. CITIndexObjBridge::EndAltPhrase(void)
  300. {
  301. return (E_NOTIMPL);
  302. }
  303. /****************************************************************
  304. * @method STDMETHODIMP | IWordSink | PutBreak |
  305. * This method is not implemented.
  306. *
  307. * @parm WORDREP_BREAK_TYPE | breakType | Specifies break type
  308. *
  309. *
  310. ***************************************************************/
  311. STDMETHODIMP
  312. CITIndexObjBridge::PutBreak(WORDREP_BREAK_TYPE breakType)
  313. {
  314. return (E_NOTIMPL);
  315. }
  316. //---------------------------------------------------------------------------
  317. // IStemSink Method Implementations
  318. //---------------------------------------------------------------------------
  319. /****************************************************************
  320. * @method STDMETHODIMP | IStemSink | PutWord |
  321. * Notifies IStemSink of a word that is similar to the input word
  322. * of <om IStemmer.StemWord> method.
  323. *
  324. * @parm WCHAR const | *pwcInBuf | Pointer to the word
  325. * @parm ULONG | cwc | Number of characters in the word
  326. *
  327. * @rvalue E_POINTER | The input buffer is NULL.
  328. *
  329. ***************************************************************/
  330. STDMETHODIMP
  331. CITIndexObjBridge::PutWord(WCHAR const *pwcInBuf, ULONG cwc)
  332. {
  333. HRESULT hr;
  334. DWORD cbAnsi;
  335. if (pwcInBuf == NULL)
  336. return (E_POINTER);
  337. cbAnsi = (sizeof(WCHAR) * cwc) + sizeof(WORD);
  338. if (SUCCEEDED(hr =
  339. ReallocBuffer(&m_hmemDestNorm, &m_cbBufDestNormCur, cbAnsi)))
  340. {
  341. char *lpchBuf;
  342. lpchBuf = (char *) _GLOBALLOCK(m_hmemDestNorm);
  343. if ((*((WORD *)lpchBuf) = (WORD)
  344. WideCharToMultiByte(m_dwCodePageID, NULL, pwcInBuf, cwc,
  345. lpchBuf + sizeof(WORD), cbAnsi - sizeof(WORD),
  346. NULL, NULL)) == 0)
  347. hr = E_UNEXPECTED;
  348. _GLOBALUNLOCK(m_hmemDestNorm);
  349. }
  350. return (hr);
  351. }
  352. /****************************************************************
  353. * @method STDMETHODIMP | IStemSink | PutAltWord |
  354. * Notifies IStemSink of a word that is similar to the input word
  355. * of <om IStemmer.StemWord> method.
  356. * @parm WCHAR const | *pwcInBuf | Pointer to the word
  357. * @parm ULONG | cwc | Number of characters in the word
  358. *
  359. * @rvalue S_OK | This method always returns success.
  360. *
  361. * @comm
  362. * InfoTech Search only supports getting back one stemmed version
  363. * of the raw word. Any others are ignored.
  364. * @xref <om .PutWord>
  365. ***************************************************************/
  366. STDMETHODIMP
  367. CITIndexObjBridge::PutAltWord(WCHAR const *pwcInBuf, ULONG cwc)
  368. {
  369. // We only support getting back one stemmed version of the raw word,
  370. // so we ignore all the others.
  371. return (S_OK);
  372. }
  373. //---------------------------------------------------------------------------
  374. // Other Public Method Implementations
  375. //---------------------------------------------------------------------------
  376. // By the time this method is called, we assume the breaker has been fully
  377. // initialized via IWordBreakerConfig (if present) and via IWordBreaker::Init.
  378. STDMETHODIMP
  379. CITIndexObjBridge::SetWordBreaker(PIWBRK piwbrk)
  380. {
  381. LCID lcid;
  382. if (piwbrk == NULL)
  383. return (SetErrReturn(E_POINTER));
  384. if (m_piwbrk != NULL)
  385. return (SetErrReturn(E_ALREADYINIT));
  386. // Pick up IWordBreakerConfig if its there, otherwise we'll go without it.
  387. // Do the same for IStemmer if we got IWordBreakerConfig.
  388. if (SUCCEEDED(piwbrk->QueryInterface(IID_IWordBreakerConfig,
  389. (LPVOID *) &m_piwbrkc)))
  390. m_piwbrkc->GetWordStemmer(&m_pistem);
  391. // Pick up IITStopWordList if its there, otherwise we'll go without it.
  392. piwbrk->QueryInterface(IID_IITStopWordList, (LPVOID *) &m_piitstwdl);
  393. if (m_piwbrkc == NULL ||
  394. FAILED(m_piwbrkc->GetLocaleInfo(&m_dwCodePageID, &lcid)))
  395. m_dwCodePageID = GetACP();
  396. (m_piwbrk = piwbrk)->AddRef();
  397. return (S_OK);
  398. }
  399. // NOTE: If CITIndexObjBridge::BreakText was going to provide more than
  400. // one buffer's worth of text to the COM breaker, then the very first members of
  401. // CITIndexObjBridge would be made to match those of TEXT_SOURCE so that
  402. // FillTextSource callback could call back into us (by casting the TEXT_SOURCE
  403. // param passed to it). Otherwise, we would have no way of providing
  404. // object-oriented breaking - we would have to resort to using globals.
  405. SCODE __stdcall FillTextSource(TEXT_SOURCE *pTextSource)
  406. {
  407. // We always return failure to signify no more text.
  408. return E_FAIL;
  409. }
  410. STDMETHODIMP
  411. CITIndexObjBridge::BreakText(PEXBRKPM pexbrkpm)
  412. {
  413. HRESULT hr = S_OK;
  414. if (m_piwbrk == NULL)
  415. return (E_UNEXPECTED);
  416. if (pexbrkpm == NULL)
  417. return (SetErrReturn(E_POINTER));
  418. if (pexbrkpm->lpbBuf == NULL)
  419. return (SetErrReturn(E_INVALIDARG));
  420. // Configure word breaker if we got IWordBreakerConfig; otherwise,
  421. // check values in *pexbrkpm to see if they are compatible with defaults.
  422. if (m_piwbrkc != NULL)
  423. {
  424. DWORD grfBreakFlags;
  425. if (SUCCEEDED(hr =
  426. m_piwbrkc->SetBreakWordType(pexbrkpm->dwBreakWordType)) &&
  427. SUCCEEDED(hr =
  428. m_piwbrkc->GetControlInfo(&grfBreakFlags, NULL)))
  429. {
  430. SetGrfFlag(&grfBreakFlags, IITWBC_BREAK_ACCEPT_WILDCARDS,
  431. (pexbrkpm->fFlags & ACCEPT_WILDCARD));
  432. hr = m_piwbrkc->SetControlInfo(grfBreakFlags, NULL);
  433. }
  434. }
  435. else
  436. {
  437. if (pexbrkpm->dwBreakWordType != IITWBC_BREAKTYPE_TEXT)
  438. hr = E_NOTSUPPORTED;
  439. }
  440. if (SUCCEEDED(hr))
  441. {
  442. DWORD cwch;
  443. m_fNormWord = FALSE;
  444. m_pexbrkpm = pexbrkpm;
  445. cwch = pexbrkpm->cbBufCount;
  446. if (SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
  447. sizeof(WCHAR) * cwch)))
  448. {
  449. WCHAR *lpwchBuf;
  450. lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
  451. // Convert the text source buffer to Unicode.
  452. if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
  453. (LPCSTR) pexbrkpm->lpbBuf, pexbrkpm->cbBufCount,
  454. lpwchBuf, cwch)) > 0)
  455. {
  456. TEXT_SOURCE txtsrc;
  457. txtsrc.pfnFillTextBuffer = FillTextSource;
  458. txtsrc.awcBuffer = lpwchBuf;
  459. txtsrc.iCur = 0;
  460. txtsrc.iEnd = cwch;
  461. // Send the Unicode text buffer to the breaker.
  462. hr = m_piwbrk->BreakText(&txtsrc, (PIWRDSNK) this, NULL);
  463. }
  464. else
  465. hr = E_UNEXPECTED;
  466. _GLOBALUNLOCK(m_hmemSrc);
  467. }
  468. m_pexbrkpm = NULL;
  469. }
  470. return (hr);
  471. }
  472. // The stop word is in WORD length prefix format.
  473. STDMETHODIMP
  474. CITIndexObjBridge::LookupStopWord(LPBYTE lpbStopWord)
  475. {
  476. HRESULT hr;
  477. DWORD cwch;
  478. DWORD cbAnsi;
  479. if (lpbStopWord == NULL)
  480. return (SetErrReturn(E_POINTER));
  481. if (m_piitstwdl == NULL)
  482. return (SetErrReturn(E_NOTIMPL));
  483. cwch = cbAnsi = (DWORD)(*((WORD *)lpbStopWord));
  484. if (SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
  485. sizeof(WCHAR) * cwch)))
  486. {
  487. WCHAR *lpwchBuf;
  488. lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
  489. // Convert the stop word to Unicode.
  490. if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
  491. (LPCSTR)lpbStopWord + sizeof(WORD), cbAnsi,
  492. lpwchBuf, cwch)) > 0)
  493. {
  494. // Lookup the stop word.
  495. hr = m_piitstwdl->LookupWord(lpwchBuf, cwch);
  496. }
  497. else
  498. hr = E_UNEXPECTED;
  499. _GLOBALUNLOCK(m_hmemSrc);
  500. }
  501. return (hr);
  502. }
  503. // Stem the raw word and return result in lpbStemWord.
  504. // Both word buffers are in WORD length prefix format.
  505. STDMETHODIMP
  506. CITIndexObjBridge::StemWord(LPBYTE lpbStemWord, LPBYTE lpbRawWord)
  507. {
  508. HRESULT hr;
  509. DWORD cwch;
  510. DWORD cbAnsi;
  511. if (lpbStemWord == NULL || lpbRawWord == NULL)
  512. return (SetErrReturn(E_POINTER));
  513. if (m_pistem == NULL)
  514. return (SetErrReturn(E_NOSTEMMER));
  515. cwch = cbAnsi = (DWORD)(*((WORD *)lpbRawWord));
  516. if (SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
  517. sizeof(WCHAR) * cwch)))
  518. {
  519. WCHAR *lpwchBuf;
  520. lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
  521. // Convert the word to be stemmed to Unicode.
  522. if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
  523. (LPCSTR)lpbRawWord + sizeof(WORD), cbAnsi,
  524. lpwchBuf, cwch)) > 0)
  525. {
  526. // Stem the raw word.
  527. if (SUCCEEDED(hr =
  528. m_pistem->StemWord(lpwchBuf, cwch, (PISTEMSNK) this)))
  529. {
  530. char *lpchStemBuf;
  531. WORD cbStemWord;
  532. lpchStemBuf = (char *) _GLOBALLOCK(m_hmemDestNorm);
  533. // Copy stem word from the normalized word destination buffer
  534. // (where our implementation of IStemSink::PutWord put it) to
  535. // lpbStemWord as long as it is not longer than the raw word.
  536. if ((cbStemWord = *((WORD *)lpchStemBuf)) <= cbAnsi)
  537. MEMCPY(lpbStemWord, lpchStemBuf, cbStemWord + sizeof(WORD));
  538. else
  539. hr = E_WORDTOOLONG;
  540. _GLOBALUNLOCK(m_hmemDestNorm);
  541. }
  542. }
  543. else
  544. hr = E_UNEXPECTED;
  545. _GLOBALUNLOCK(m_hmemSrc);
  546. }
  547. return (hr);
  548. }
  549. // On entry, lpbTermHit is a WORD-prefixed MBCS string.
  550. // On exit, *ppvTermHit is a WORD-prefixed Unicode string.
  551. STDMETHODIMP
  552. CITIndexObjBridge::AddQueryResultTerm(LPBYTE lpbTermHit, LPVOID *ppvTermHit)
  553. {
  554. DWORD cwch;
  555. DWORD cbAnsi;
  556. HRESULT hr = S_OK;
  557. if (lpbTermHit == NULL || ppvTermHit == NULL)
  558. return (SetErrReturn(E_POINTER));
  559. if (m_dwCodePageID == 0)
  560. return (SetErrReturn(E_NOTINIT));
  561. cwch = cbAnsi = (DWORD)(*((WORD *)lpbTermHit));
  562. // When allocating the buffer, add 1 char to leave room for the
  563. // Unicode string's WORD prefix.
  564. if ((m_lpsipbTermHit != NULL ||
  565. (m_lpsipbTermHit = MVStopListInitiate(IDXOBR_TERMHASH_SIZE,
  566. &hr)) != NULL) &&
  567. SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
  568. sizeof(WCHAR) * (cwch + 1))))
  569. {
  570. WCHAR *lpwchBuf;
  571. lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
  572. // Convert lpbTermHit to Unicode before searching or storing it;
  573. // leave space in the Unicode buffer for the WORD length prefix.
  574. if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
  575. (LPCSTR)lpbTermHit + sizeof(WORD), cbAnsi,
  576. lpwchBuf + 1, cwch)) > 0)
  577. {
  578. // Store the Unicode string length, but restate it in bytes
  579. // since the stopword list lookup code assumes MBCS.
  580. *lpwchBuf = (WORD)cwch * sizeof(WCHAR);
  581. // Add the word to the list and then get a pointer to it.
  582. if (SUCCEEDED(hr = MVStopListAddWord(m_lpsipbTermHit,
  583. (LPBYTE) lpwchBuf)))
  584. {
  585. hr = MVStopListFindWordPtr(m_lpsipbTermHit,
  586. (LST)lpwchBuf, (LST *)ppvTermHit);
  587. }
  588. }
  589. else
  590. hr = E_UNEXPECTED;
  591. _GLOBALUNLOCK(m_hmemSrc);
  592. }
  593. return (hr);
  594. }
  595. // This method should only be called after a query term hit list has been
  596. // completely built. It will iterate over all the terms and reduce the
  597. // length prefixes from byte-based to WCHAR-based - i.e. the lengths
  598. // ill be divided by two. Once this method has been called, it will
  599. // no longer be possible to search for terms in the term list.
  600. // We do this so that the direct pointer refs to terms that end up in the
  601. // query result list point to correct WCHAR-based length prefixes.
  602. STDMETHODIMP
  603. CITIndexObjBridge::AdjustQueryResultTerms(void)
  604. {
  605. if (m_lpsipbTermHit != NULL)
  606. {
  607. LST lstWord;
  608. LONG lWordInfo = -1L;
  609. LPVOID pvWordInfo = NULL;
  610. while (SUCCEEDED(MVStopListEnumWords(m_lpsipbTermHit, &lstWord,
  611. &lWordInfo, &pvWordInfo)))
  612. {
  613. ITASSERT(*((WORD *)lstWord) % sizeof(WCHAR) == 0);
  614. *((WORD *)lstWord) /= sizeof(WCHAR);
  615. }
  616. }
  617. return (S_OK);
  618. }
  619. //---------------------------------------------------------------------------
  620. // Private Method Implementations
  621. //---------------------------------------------------------------------------
  622. HRESULT
  623. CITIndexObjBridge::ReallocBuffer(HGLOBAL *phmemBuf, DWORD *pcbBufCur,
  624. DWORD cbBufNew)
  625. {
  626. return (ReallocBufferHmem(phmemBuf, pcbBufCur,
  627. max(cbBufNew, cbConvBufInit)));
  628. }