Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

576 lines
17 KiB

  1. /****************************************************************
  2. * @doc SHROOM EXTERNAL API
  3. *
  4. * A Legsdin added autodoc headers for IITBuildCollect Interface
  5. *
  6. ****************************************************************/
  7. // ftuMain.CPP: Implementation of CITIndexBuild
  8. #include <mvopsys.h>
  9. #ifdef _DEBUG
  10. static char s_aszModule[] = __FILE__; /* For error report */
  11. #endif
  12. #include <windows.h>
  13. #ifdef IA64
  14. #include <itdfguid.h>
  15. #endif
  16. #include <iterror.h>
  17. #include <itpropl.h>
  18. #include <ccfiles.h>
  19. #include <atlinc.h>
  20. #include <itwbrk.h>
  21. #include <itwbrkid.h>
  22. #include <mvsearch.h>
  23. #include <_mvutil.h>
  24. #include <msitstg.h>
  25. #include <orkin.h>
  26. #include "..\svWrdSnk.h"
  27. #include "ftuMain.h"
  28. #define ULMAXTOKENSIZE 1024
  29. #define OCCF_DEFAULT OCCF_TOPICID | OCCF_FIELDID | OCCF_COUNT
  30. HRESULT __stdcall FillText(TEXT_SOURCE * pTextSource)
  31. {
  32. return E_FAIL;//WBREAK_E_END_OF_TEXT;
  33. }
  34. CITIndexBuild::CITIndexBuild()
  35. {
  36. m_fInitialized = FALSE;
  37. m_fIsDirty = FALSE;
  38. m_piWordSink = NULL;
  39. m_piwb = NULL;
  40. m_piwbConfig = NULL;
  41. m_lpipb = NULL;
  42. m_dwUID = m_dwVFLD = m_dwDType = m_dwWordCount = m_dwCodePage = 0;
  43. m_lpbfText = NULL;
  44. m_dwOccFlags = OCCF_DEFAULT;
  45. }
  46. CITIndexBuild::~CITIndexBuild()
  47. {
  48. (void)Close();
  49. }
  50. /************************************************************************
  51. * @method STDMETHODIMP | IITBuildCollect | GetTypeString |
  52. * Returns a prefix to use when the storage or stream object is created.
  53. *
  54. * @parm LPWSTR | pPrefix | Pointer to a buffer in which to copy the prefix
  55. * @parm DWORD | *pLen | Length of the buffer
  56. *
  57. * @rvalue S_OK | The operation completed successfully
  58. *
  59. *
  60. * @comm If you are creating a new build object, you need to decide on a
  61. * unique prefix to identify that object. Word wheels use $WW, for example.
  62. *
  63. ************************************************************************/
  64. STDMETHODIMP CITIndexBuild::GetTypeString(LPWSTR pPrefix, DWORD *pLen)
  65. {
  66. DWORD dwLen = (DWORD) WSTRLEN (SZ_GP_STORAGE) + 1;
  67. if (NULL == pPrefix)
  68. {
  69. *pLen = dwLen;
  70. return S_OK;
  71. }
  72. if (pLen && *pLen < dwLen)
  73. {
  74. *pLen = dwLen;
  75. return S_OK;
  76. }
  77. if (pLen)
  78. *pLen = dwLen;
  79. WSTRCPY (pPrefix, SZ_FI_STREAM);
  80. return S_OK;
  81. } /* GetTypeString */
  82. /****************************************************************
  83. * @method STDMETHODIMP | IITBuildCollect | SetConfigInfo |
  84. * Passes initialization parameters to a build object.
  85. *
  86. * @parm IITDatabase | *piitdb | Pointer to database
  87. * @parm VARARG | vaParams | Configuration parameters
  88. *
  89. * @rvalue S_OK | The operation completed successfully.
  90. * @comm Call this method before calling InitHelperInstance.
  91. *
  92. ****************************************************************/
  93. // This must be called before InitHelperInstance!
  94. STDMETHODIMP CITIndexBuild::SetConfigInfo
  95. (IITDatabase *piitdb, VARARG vaParams)
  96. {
  97. if(vaParams.dwArgc)
  98. {
  99. m_dwOccFlags = 0;
  100. // Work through params backwards
  101. // If we add more params we may need to scan forward
  102. for (int loop = vaParams.dwArgc; loop; --loop)
  103. {
  104. LPWSTR pwstr = (LPWSTR)vaParams.Argv[loop - 1];
  105. if(!WSTRICMP(pwstr, L"OCC_VFLD"))
  106. m_dwOccFlags |= OCCF_FIELDID;
  107. else if(!WSTRICMP(pwstr, L"OCC_UID"))
  108. m_dwOccFlags |= OCCF_TOPICID;
  109. else if(!WSTRICMP(pwstr, L"OCC_COUNT"))
  110. m_dwOccFlags |= OCCF_COUNT;
  111. else if(!WSTRICMP(pwstr, L"OCC_LENGTH"))
  112. m_dwOccFlags |= OCCF_LENGTH;
  113. else if(!WSTRICMP(pwstr, L"OCC_OFFSET"))
  114. m_dwOccFlags |= OCCF_OFFSET;
  115. else if(!WSTRICMP(pwstr, L"OCC_NONE"))
  116. {
  117. m_dwOccFlags = 0;
  118. break;
  119. }
  120. }
  121. }
  122. return S_OK;
  123. } /* SetConfigInfo */
  124. /********************************************************************
  125. * @method HRESULT WINAPI | IITBuildCollect | InitHelperInstance |
  126. * Allows you to configure a helper object used by a
  127. * build object (such as sort objects for a word wheel, or breaker
  128. * objects for a full-text index).
  129. *
  130. * @parm DWORD | dwHelperObjInstance | Helper object instance ID.
  131. * @parm IITDatabase | *pITDatabase | Pointer to database.
  132. * @parm DWORD | dwCodePage | Code page identifier.
  133. * @parm LCID | lcid | Locale identifier.
  134. * @parm VARARG | vaDword | Flags you want to use to configure the object.
  135. * @parm VARARG | vaString | String parameters you want to use to
  136. * configure the object.
  137. *
  138. * @rvalue E_FAIL | The object is already initialized or file create failed
  139. *
  140. ********************************************************************/
  141. STDMETHODIMP CITIndexBuild::InitHelperInstance(
  142. DWORD dwHelperObjInstance,
  143. IITDatabase *pITDatabase, DWORD dwCodePage,
  144. LCID lcid, VARARG vaDword, VARARG vaString
  145. )
  146. {
  147. if (TRUE == m_fInitialized)
  148. return SetErrReturn(E_ALREADYINIT);
  149. HRESULT hr = S_OK;
  150. BOOL fLicense;
  151. IPersistStreamInit *piipstm;
  152. m_dwCodePage = dwCodePage;
  153. // Open nested indexer
  154. INDEXINFO IndexInfo;
  155. IndexInfo.dwMemSize = 0x100000;
  156. IndexInfo.Occf = m_dwOccFlags;
  157. IndexInfo.Idxf = 0;
  158. IndexInfo.dwBlockSize = 0; // Use default
  159. IndexInfo.dwBreakerInstID = dwHelperObjInstance;
  160. IndexInfo.dwCodePageID = dwCodePage;
  161. IndexInfo.lcid = lcid;
  162. if (NULL == (m_lpipb = MVIndexInitiate(&IndexInfo, &hr)))
  163. SetErrCode(&hr, E_FAIL);
  164. // Set up the helper (breaker)
  165. if (SUCCEEDED(hr))
  166. {
  167. // Get the Breaker
  168. hr = pITDatabase->GetObject
  169. (dwHelperObjInstance, IID_IWordBreaker, (void **)&m_piwb);
  170. }
  171. // Config the breaker if it is supported
  172. if (SUCCEEDED(hr) &&
  173. SUCCEEDED(hr = m_piwb->Init(FALSE, ULMAXTOKENSIZE, &fLicense)))
  174. {
  175. if (SUCCEEDED(pITDatabase->GetObject (dwHelperObjInstance,
  176. IID_IWordBreakerConfig, (void **)&m_piwbConfig)))
  177. {
  178. // We don't really care if these fail
  179. hr = m_piwbConfig->SetLocaleInfo(dwCodePage, lcid);
  180. hr = m_piwbConfig->SetBreakWordType(IITWBC_BREAKTYPE_TEXT);
  181. if (vaDword.dwArgc >= 1)
  182. {
  183. hr = m_piwbConfig->SetControlInfo(*(LPDWORD)vaDword.Argv, 0);
  184. }
  185. IFSStorage *pifsstg = NULL;
  186. IStream *piistm;
  187. if (vaString.dwArgc)
  188. { // Create ITSS stuff
  189. hr = CoCreateInstance(CLSID_IFSStorage, NULL,
  190. CLSCTX_INPROC_SERVER, IID_IFSStorage, (VOID **)&pifsstg);
  191. ITASSERT(SUCCEEDED(hr));
  192. }
  193. if(vaString.dwArgc >= 1 && *(LPWSTR)vaString.Argv[0])
  194. {
  195. if(SUCCEEDED(pifsstg->FSOpenStream((LPWSTR)vaString.Argv[0],
  196. STGM_SHARE_DENY_WRITE | STGM_READWRITE, &piistm)))
  197. {
  198. hr = m_piwbConfig->LoadExternalBreakerData
  199. (piistm, IITWBC_EXTDATA_CHARTABLE);
  200. piistm->Release();
  201. }
  202. }
  203. if (vaString.dwArgc >= 2 && *(LPWSTR)vaString.Argv[1])
  204. {
  205. if (SUCCEEDED(pifsstg->FSOpenStream((LPWSTR)vaString.Argv[1],
  206. STGM_SHARE_DENY_WRITE | STGM_READWRITE, &piistm)))
  207. {
  208. hr = m_piwbConfig->LoadExternalBreakerData
  209. (piistm, IITWBC_EXTDATA_STOPWORDLIST);
  210. piistm->Release();
  211. }
  212. }
  213. if (vaString.dwArgc >= 3 && *(LPWSTR)vaString.Argv[2])
  214. {
  215. // Get the CLSID and instantiate the stemmer
  216. CLSID clsid;
  217. IStemmer *pStemmer;
  218. hr = CLSIDFromProgID((LPWSTR)vaString.Argv[2], &clsid);
  219. if(SUCCEEDED(hr))
  220. hr = CoCreateInstance(clsid, NULL, CLSCTX_INPROC_SERVER,
  221. IID_IStemmer, (VOID **)&pStemmer);
  222. if (SUCCEEDED(hr))
  223. {
  224. if(SUCCEEDED(hr = pStemmer->QueryInterface
  225. (IID_IPersistStreamInit, (void **)&piipstm)))
  226. {
  227. piipstm->InitNew();
  228. piipstm->Release();
  229. }
  230. (void)pStemmer->Init(ULMAXTOKENSIZE, &fLicense);
  231. // Check for IStemmerConfig interface
  232. IStemmerConfig *pistemConfig;
  233. hr = pStemmer->QueryInterface
  234. (IID_IStemmerConfig, (void **)&pistemConfig);
  235. if (SUCCEEDED(hr))
  236. {
  237. hr = pistemConfig->SetLocaleInfo(dwCodePage, lcid);
  238. pistemConfig->Release();
  239. }
  240. hr = m_piwbConfig->SetWordStemmer(clsid, pStemmer);
  241. pStemmer->Release();
  242. }
  243. }
  244. if (pifsstg)
  245. pifsstg->Release();
  246. hr = S_OK;
  247. }
  248. }
  249. if(SUCCEEDED(hr) &&
  250. SUCCEEDED(hr = CoCreateInstance(CLSID_IITWordSink, NULL,
  251. CLSCTX_INPROC_SERVER, IID_IWordSink, (LPVOID *)&m_piWordSink)) &&
  252. SUCCEEDED(hr =
  253. ((CDefWordSink *)m_piWordSink)->SetLocaleInfo(dwCodePage, lcid))
  254. && SUCCEEDED(hr = ((CDefWordSink *)m_piWordSink)->SetIPB(m_lpipb)))
  255. {
  256. m_fInitialized = TRUE;
  257. }
  258. return hr;
  259. } /* InitHelperInstance */
  260. /****************************************************************
  261. * @method STDMETHODIMP | IITBuildCollect | SetEntry |
  262. * Sets properties for a build object.
  263. *
  264. *
  265. * @parm LPCWSTR | szDest | Property destination
  266. * @parm IITPropList | *pPropList | Pointer to property list
  267. *
  268. * @comm Like CSvDoc::AddObjectEntry, this method is called
  269. * several times for all the properties that you need to set.
  270. ****************************************************************/
  271. STDMETHODIMP CITIndexBuild::SetEntry(LPCWSTR szDest, IITPropList *pPropList)
  272. {
  273. if (FALSE == m_fInitialized)
  274. return SetErrReturn(E_NOTINIT);
  275. m_fIsDirty = TRUE;
  276. CProperty cProp;
  277. HRESULT hr;
  278. LPWSTR pwstrIndexText;
  279. BOOL fTerm = FALSE;
  280. if(SUCCEEDED(hr = pPropList->Get(STDPROP_INDEX_BREAK, cProp)))
  281. {
  282. SendTextToBreaker();
  283. return S_OK;
  284. }
  285. // Check for REQUIRED text (can be either INDEX_TEXT or INDEX_TERM)
  286. if(FAILED(hr = pPropList->Get(STDPROP_INDEX_TEXT, cProp)))
  287. {
  288. if(SUCCEEDED(hr = pPropList->Get(STDPROP_INDEX_TERM, cProp)))
  289. fTerm = TRUE;
  290. }
  291. if(SUCCEEDED(hr))
  292. pwstrIndexText = (LPWSTR)cProp.lpszwData;
  293. // Check for REQUIRED UID
  294. if (SUCCEEDED(hr) &&
  295. SUCCEEDED(hr = pPropList->Get(STDPROP_UID, cProp)) &&
  296. m_dwUID != cProp.dwValue)
  297. {
  298. SendTextToBreaker();
  299. m_dwUID = cProp.dwValue;
  300. m_dwWordCount = 0;
  301. }
  302. // Check for OPTIONAL VFLD
  303. if (SUCCEEDED(hr) &&
  304. SUCCEEDED(pPropList->Get(STDPROP_INDEX_VFLD, cProp)) &&
  305. m_dwVFLD != cProp.dwValue)
  306. {
  307. SendTextToBreaker();
  308. m_dwVFLD = cProp.dwValue;
  309. }
  310. // Check for OPTIONAL DTYPE
  311. if (SUCCEEDED(hr) && m_piwbConfig &&
  312. SUCCEEDED(pPropList->Get(STDPROP_INDEX_DTYPE, cProp))
  313. && m_dwDType != cProp.dwValue)
  314. {
  315. SendTextToBreaker();
  316. hr = m_piwbConfig->SetBreakWordType(cProp.dwValue);
  317. }
  318. DWORD cchText;
  319. if (SUCCEEDED(pPropList->Get(STDPROP_INDEX_LENGTH, cProp)))
  320. cchText = (WORD)cProp.dwValue;
  321. else
  322. cchText = (DWORD) WSTRLEN(pwstrIndexText);
  323. if (SUCCEEDED(hr))
  324. {
  325. if (fTerm)
  326. {
  327. // Get actual index term length
  328. // Fill-ou occurrence info
  329. OCC occ;
  330. occ.dwFieldId = m_dwVFLD;
  331. occ.dwTopicID = m_dwUID;
  332. occ.dwCount = m_dwWordCount++;
  333. // Is there a diffrerent highlite length?
  334. if (SUCCEEDED(pPropList->Get(STDPROP_INDEX_TERM_RAW_LENGTH, cProp)))
  335. occ.wWordLen = (WORD)cProp.dwValue;
  336. else
  337. occ.wWordLen = (WORD)cchText;
  338. if (cchText > 255)
  339. return SetErrReturn(E_UNEXPECTED);
  340. char strTerm[256 + sizeof(WORD)];
  341. if(!WideCharToMultiByte(m_dwCodePage, 0, pwstrIndexText, cchText,
  342. strTerm + sizeof(WORD), 255, NULL, NULL))
  343. {
  344. // The conversion failed! -- very bad
  345. return SetErrReturn(E_UNEXPECTED);
  346. }
  347. *(LPWORD)strTerm = (SHORT)cchText;
  348. hr = MVIndexAddWord(m_lpipb, (LPB)strTerm, &occ);
  349. }
  350. else
  351. {
  352. // Accumulate text until we need to send it along
  353. if (!DynBufferAppend (m_lpbfText,
  354. (LPBYTE)pwstrIndexText, cchText * sizeof (WCHAR)))
  355. SetErr(&hr, E_OUTOFMEMORY);
  356. }
  357. }
  358. return hr;
  359. } /* SetEntry */
  360. STDMETHODIMP CITIndexBuild::SendTextToBreaker(void)
  361. {
  362. HRESULT hr;
  363. // TODO: Call these only for our own word sink
  364. hr = ((CDefWordSink *)m_piWordSink)->SetDocID(m_dwUID);
  365. hr = ((CDefWordSink *)m_piWordSink)->SetVFLD(m_dwVFLD);
  366. // TODO: We can set TYPE here, so we can use the same breaker instance for
  367. // multiple FTI and they will not interfere with each other. This would be
  368. // different than current behavior, however, so I have left it out for now.
  369. TEXT_SOURCE tsText;
  370. tsText.pfnFillTextBuffer = FillText;
  371. tsText.awcBuffer = (LPWSTR)DynBufferPtr(m_lpbfText);
  372. tsText.iEnd = DynBufferLen(m_lpbfText) / sizeof (WCHAR);
  373. tsText.iCur = 0;
  374. hr = m_piwb->BreakText(&tsText, m_piWordSink, NULL);
  375. DynBufferReset(m_lpbfText);
  376. return hr;
  377. } /* SendTextToBreaker */
  378. /*****************************************************************
  379. * @method STDMETHODIMP | IITBuildCollect | Close |
  380. * Closes the build object and frees memory.
  381. *
  382. * @Rvalue E_NOTINIT | Object has not been initialized.
  383. * @comm Calling this method is optional, but the build object must
  384. * implement it. Any object that implements IITBuildCollect interface
  385. * must support the Close method.
  386. *
  387. ****************************************************************/
  388. STDMETHODIMP CITIndexBuild::Close(void)
  389. {
  390. if (FALSE == m_fInitialized)
  391. return SetErrReturn(E_NOTINIT);
  392. if(m_piwb)
  393. m_piwb->Release();
  394. if(m_piWordSink)
  395. m_piWordSink->Release();
  396. if(m_piwbConfig)
  397. m_piwbConfig->Release();
  398. if(m_piwbConfig)
  399. m_piwbConfig = NULL;
  400. if (m_lpipb)
  401. MVIndexDispose(m_lpipb);
  402. m_fInitialized = FALSE;
  403. m_fIsDirty = FALSE;
  404. m_piWordSink = NULL;
  405. m_piwb = NULL;
  406. m_piwbConfig = NULL;
  407. m_lpipb = NULL;
  408. m_dwUID = m_dwVFLD = m_dwDType = m_dwWordCount = m_dwCodePage = 0;
  409. if (m_lpbfText)
  410. {
  411. DynBufferFree (m_lpbfText);
  412. m_lpbfText = NULL;
  413. }
  414. // Reset the occurrence flags to the default
  415. m_dwOccFlags = OCCF_DEFAULT;
  416. return S_OK;
  417. } /* Close */
  418. STDMETHODIMP CITIndexBuild::InitNew(void)
  419. {
  420. if(NULL == (m_lpbfText = DynBufferAlloc (0x4000)))
  421. return SetErrReturn(E_OUTOFMEMORY);
  422. return S_OK;
  423. } /* IPersistStreamInit::InitNew */
  424. STDMETHODIMP CITIndexBuild::GetClassID(CLSID *pClsID)
  425. {
  426. if (NULL == pClsID
  427. || IsBadWritePtr(pClsID, sizeof(CLSID)))
  428. return SetErrReturn(E_INVALIDARG);
  429. *pClsID = CLSID_IITIndexBuild;
  430. return S_OK;
  431. } /* GetClassID */
  432. inline STDMETHODIMP CITIndexBuild::IsDirty(void)
  433. {
  434. return m_fIsDirty ? S_OK : S_FALSE;
  435. } /* IsDirty */
  436. STDMETHODIMP CITIndexBuild::Load(IStream *piistm)
  437. {
  438. return SetErrReturn(E_NOTIMPL);
  439. } /* IPersistStreamInit::Load */
  440. STDMETHODIMP CITIndexBuild::Save(IStream *piistm, BOOL fClearDirty)
  441. {
  442. if (FALSE == m_fInitialized)
  443. return SetErrReturn(E_NOTINIT);
  444. SendTextToBreaker();
  445. HRESULT hr;
  446. HFPB hfpbSave = FpbFromHf(piistm, &hr);
  447. if (SUCCEEDED(hr))
  448. {
  449. hr = MVIndexBuild (0, m_lpipb, hfpbSave, NULL);
  450. MVIndexDispose (m_lpipb);
  451. m_lpipb = NULL;
  452. if (fClearDirty)
  453. m_fIsDirty = FALSE;
  454. FreeHfpb(hfpbSave);
  455. }
  456. return hr;
  457. } /* IPersistStreamInit::Save */
  458. STDMETHODIMP CITIndexBuild::GetSizeMax(ULARGE_INTEGER *pcbSize)
  459. {
  460. return SetErrReturn(E_NOTIMPL);
  461. } /* GetSizeMax */
  462. // ********************* IPersisFile Methods *********************
  463. STDMETHODIMP CITIndexBuild::Load(LPCWSTR pszFileName, DWORD dwMode)
  464. {
  465. return SetErrReturn(E_NOTIMPL);
  466. } /* IPersistFile::Load */
  467. STDMETHODIMP CITIndexBuild::Save(LPCWSTR pszFileName, BOOL fRemember)
  468. {
  469. return SetErrReturn(E_NOTIMPL);
  470. } /* IPersistFile::Save */
  471. STDMETHODIMP CITIndexBuild::SaveCompleted(LPCWSTR pszFileName)
  472. {
  473. return SetErrReturn(E_NOTIMPL);
  474. } /* IPersistFile::SaveCompleted */
  475. STDMETHODIMP CITIndexBuild::GetCurFile(LPWSTR *ppszFileName)
  476. {
  477. return SetErrReturn(E_NOTIMPL);
  478. } /* IPersistFile::GetCurFile */