|
|
/****************************************************************
* @doc SHROOM EXTERNAL API * * A Legsdin added autodoc headers for IITBuildCollect Interface * ****************************************************************/ // ftuMain.CPP: Implementation of CITIndexBuild
#include <mvopsys.h>
#ifdef _DEBUG
static char s_aszModule[] = __FILE__; /* For error report */ #endif
#include <windows.h>
#ifdef IA64
#include <itdfguid.h>
#endif
#include <iterror.h>
#include <itpropl.h>
#include <ccfiles.h>
#include <atlinc.h>
#include <itwbrk.h>
#include <itwbrkid.h>
#include <mvsearch.h>
#include <_mvutil.h>
#include <msitstg.h>
#include <orkin.h>
#include "..\svWrdSnk.h"
#include "ftuMain.h"
#define ULMAXTOKENSIZE 1024
#define OCCF_DEFAULT OCCF_TOPICID | OCCF_FIELDID | OCCF_COUNT
HRESULT __stdcall FillText(TEXT_SOURCE * pTextSource) { return E_FAIL;//WBREAK_E_END_OF_TEXT;
}
CITIndexBuild::CITIndexBuild() { m_fInitialized = FALSE; m_fIsDirty = FALSE;
m_piWordSink = NULL; m_piwb = NULL; m_piwbConfig = NULL; m_lpipb = NULL;
m_dwUID = m_dwVFLD = m_dwDType = m_dwWordCount = m_dwCodePage = 0;
m_lpbfText = NULL;
m_dwOccFlags = OCCF_DEFAULT; }
CITIndexBuild::~CITIndexBuild() { (void)Close(); } /************************************************************************
* @method STDMETHODIMP | IITBuildCollect | GetTypeString | * Returns a prefix to use when the storage or stream object is created. * * @parm LPWSTR | pPrefix | Pointer to a buffer in which to copy the prefix * @parm DWORD | *pLen | Length of the buffer * * @rvalue S_OK | The operation completed successfully * * * @comm If you are creating a new build object, you need to decide on a * unique prefix to identify that object. Word wheels use $WW, for example. * ************************************************************************/
STDMETHODIMP CITIndexBuild::GetTypeString(LPWSTR pPrefix, DWORD *pLen) { DWORD dwLen = (DWORD) WSTRLEN (SZ_GP_STORAGE) + 1;
if (NULL == pPrefix) { *pLen = dwLen; return S_OK; }
if (pLen && *pLen < dwLen) { *pLen = dwLen; return S_OK; }
if (pLen) *pLen = dwLen;
WSTRCPY (pPrefix, SZ_FI_STREAM); return S_OK; } /* GetTypeString */
/****************************************************************
* @method STDMETHODIMP | IITBuildCollect | SetConfigInfo | * Passes initialization parameters to a build object. * * @parm IITDatabase | *piitdb | Pointer to database * @parm VARARG | vaParams | Configuration parameters * * @rvalue S_OK | The operation completed successfully. * @comm Call this method before calling InitHelperInstance. * ****************************************************************/ // This must be called before InitHelperInstance!
STDMETHODIMP CITIndexBuild::SetConfigInfo (IITDatabase *piitdb, VARARG vaParams) { if(vaParams.dwArgc) { m_dwOccFlags = 0;
// Work through params backwards
// If we add more params we may need to scan forward
for (int loop = vaParams.dwArgc; loop; --loop) { LPWSTR pwstr = (LPWSTR)vaParams.Argv[loop - 1]; if(!WSTRICMP(pwstr, L"OCC_VFLD")) m_dwOccFlags |= OCCF_FIELDID; else if(!WSTRICMP(pwstr, L"OCC_UID")) m_dwOccFlags |= OCCF_TOPICID; else if(!WSTRICMP(pwstr, L"OCC_COUNT")) m_dwOccFlags |= OCCF_COUNT; else if(!WSTRICMP(pwstr, L"OCC_LENGTH")) m_dwOccFlags |= OCCF_LENGTH; else if(!WSTRICMP(pwstr, L"OCC_OFFSET")) m_dwOccFlags |= OCCF_OFFSET; else if(!WSTRICMP(pwstr, L"OCC_NONE")) { m_dwOccFlags = 0; break; } } }
return S_OK; } /* SetConfigInfo */
/********************************************************************
* @method HRESULT WINAPI | IITBuildCollect | InitHelperInstance | * Allows you to configure a helper object used by a * build object (such as sort objects for a word wheel, or breaker * objects for a full-text index). * * @parm DWORD | dwHelperObjInstance | Helper object instance ID. * @parm IITDatabase | *pITDatabase | Pointer to database. * @parm DWORD | dwCodePage | Code page identifier. * @parm LCID | lcid | Locale identifier. * @parm VARARG | vaDword | Flags you want to use to configure the object. * @parm VARARG | vaString | String parameters you want to use to * configure the object. * * @rvalue E_FAIL | The object is already initialized or file create failed * ********************************************************************/ STDMETHODIMP CITIndexBuild::InitHelperInstance( DWORD dwHelperObjInstance, IITDatabase *pITDatabase, DWORD dwCodePage, LCID lcid, VARARG vaDword, VARARG vaString ) { if (TRUE == m_fInitialized) return SetErrReturn(E_ALREADYINIT);
HRESULT hr = S_OK; BOOL fLicense; IPersistStreamInit *piipstm;
m_dwCodePage = dwCodePage;
// Open nested indexer
INDEXINFO IndexInfo; IndexInfo.dwMemSize = 0x100000; IndexInfo.Occf = m_dwOccFlags; IndexInfo.Idxf = 0; IndexInfo.dwBlockSize = 0; // Use default
IndexInfo.dwBreakerInstID = dwHelperObjInstance; IndexInfo.dwCodePageID = dwCodePage; IndexInfo.lcid = lcid; if (NULL == (m_lpipb = MVIndexInitiate(&IndexInfo, &hr))) SetErrCode(&hr, E_FAIL);
// Set up the helper (breaker)
if (SUCCEEDED(hr)) { // Get the Breaker
hr = pITDatabase->GetObject (dwHelperObjInstance, IID_IWordBreaker, (void **)&m_piwb); }
// Config the breaker if it is supported
if (SUCCEEDED(hr) && SUCCEEDED(hr = m_piwb->Init(FALSE, ULMAXTOKENSIZE, &fLicense))) { if (SUCCEEDED(pITDatabase->GetObject (dwHelperObjInstance, IID_IWordBreakerConfig, (void **)&m_piwbConfig))) { // We don't really care if these fail
hr = m_piwbConfig->SetLocaleInfo(dwCodePage, lcid); hr = m_piwbConfig->SetBreakWordType(IITWBC_BREAKTYPE_TEXT); if (vaDword.dwArgc >= 1) { hr = m_piwbConfig->SetControlInfo(*(LPDWORD)vaDword.Argv, 0); }
IFSStorage *pifsstg = NULL; IStream *piistm; if (vaString.dwArgc) { // Create ITSS stuff
hr = CoCreateInstance(CLSID_IFSStorage, NULL, CLSCTX_INPROC_SERVER, IID_IFSStorage, (VOID **)&pifsstg); ITASSERT(SUCCEEDED(hr)); }
if(vaString.dwArgc >= 1 && *(LPWSTR)vaString.Argv[0]) { if(SUCCEEDED(pifsstg->FSOpenStream((LPWSTR)vaString.Argv[0], STGM_SHARE_DENY_WRITE | STGM_READWRITE, &piistm))) { hr = m_piwbConfig->LoadExternalBreakerData (piistm, IITWBC_EXTDATA_CHARTABLE); piistm->Release(); } } if (vaString.dwArgc >= 2 && *(LPWSTR)vaString.Argv[1]) { if (SUCCEEDED(pifsstg->FSOpenStream((LPWSTR)vaString.Argv[1], STGM_SHARE_DENY_WRITE | STGM_READWRITE, &piistm))) { hr = m_piwbConfig->LoadExternalBreakerData (piistm, IITWBC_EXTDATA_STOPWORDLIST); piistm->Release(); }
} if (vaString.dwArgc >= 3 && *(LPWSTR)vaString.Argv[2]) { // Get the CLSID and instantiate the stemmer
CLSID clsid; IStemmer *pStemmer; hr = CLSIDFromProgID((LPWSTR)vaString.Argv[2], &clsid); if(SUCCEEDED(hr)) hr = CoCreateInstance(clsid, NULL, CLSCTX_INPROC_SERVER, IID_IStemmer, (VOID **)&pStemmer); if (SUCCEEDED(hr)) { if(SUCCEEDED(hr = pStemmer->QueryInterface (IID_IPersistStreamInit, (void **)&piipstm))) { piipstm->InitNew(); piipstm->Release(); } (void)pStemmer->Init(ULMAXTOKENSIZE, &fLicense);
// Check for IStemmerConfig interface
IStemmerConfig *pistemConfig; hr = pStemmer->QueryInterface (IID_IStemmerConfig, (void **)&pistemConfig); if (SUCCEEDED(hr)) { hr = pistemConfig->SetLocaleInfo(dwCodePage, lcid); pistemConfig->Release(); } hr = m_piwbConfig->SetWordStemmer(clsid, pStemmer); pStemmer->Release(); } }
if (pifsstg) pifsstg->Release();
hr = S_OK; } }
if(SUCCEEDED(hr) && SUCCEEDED(hr = CoCreateInstance(CLSID_IITWordSink, NULL, CLSCTX_INPROC_SERVER, IID_IWordSink, (LPVOID *)&m_piWordSink)) && SUCCEEDED(hr = ((CDefWordSink *)m_piWordSink)->SetLocaleInfo(dwCodePage, lcid)) && SUCCEEDED(hr = ((CDefWordSink *)m_piWordSink)->SetIPB(m_lpipb))) { m_fInitialized = TRUE; }
return hr; } /* InitHelperInstance */
/****************************************************************
* @method STDMETHODIMP | IITBuildCollect | SetEntry | * Sets properties for a build object. * * * @parm LPCWSTR | szDest | Property destination * @parm IITPropList | *pPropList | Pointer to property list * * @comm Like CSvDoc::AddObjectEntry, this method is called * several times for all the properties that you need to set. ****************************************************************/ STDMETHODIMP CITIndexBuild::SetEntry(LPCWSTR szDest, IITPropList *pPropList) { if (FALSE == m_fInitialized) return SetErrReturn(E_NOTINIT); m_fIsDirty = TRUE;
CProperty cProp; HRESULT hr; LPWSTR pwstrIndexText; BOOL fTerm = FALSE;
if(SUCCEEDED(hr = pPropList->Get(STDPROP_INDEX_BREAK, cProp))) { SendTextToBreaker(); return S_OK; }
// Check for REQUIRED text (can be either INDEX_TEXT or INDEX_TERM)
if(FAILED(hr = pPropList->Get(STDPROP_INDEX_TEXT, cProp))) { if(SUCCEEDED(hr = pPropList->Get(STDPROP_INDEX_TERM, cProp))) fTerm = TRUE; }
if(SUCCEEDED(hr)) pwstrIndexText = (LPWSTR)cProp.lpszwData;
// Check for REQUIRED UID
if (SUCCEEDED(hr) && SUCCEEDED(hr = pPropList->Get(STDPROP_UID, cProp)) && m_dwUID != cProp.dwValue) { SendTextToBreaker(); m_dwUID = cProp.dwValue; m_dwWordCount = 0; }
// Check for OPTIONAL VFLD
if (SUCCEEDED(hr) && SUCCEEDED(pPropList->Get(STDPROP_INDEX_VFLD, cProp)) && m_dwVFLD != cProp.dwValue) { SendTextToBreaker(); m_dwVFLD = cProp.dwValue; }
// Check for OPTIONAL DTYPE
if (SUCCEEDED(hr) && m_piwbConfig && SUCCEEDED(pPropList->Get(STDPROP_INDEX_DTYPE, cProp)) && m_dwDType != cProp.dwValue) { SendTextToBreaker(); hr = m_piwbConfig->SetBreakWordType(cProp.dwValue); }
DWORD cchText; if (SUCCEEDED(pPropList->Get(STDPROP_INDEX_LENGTH, cProp))) cchText = (WORD)cProp.dwValue; else cchText = (DWORD) WSTRLEN(pwstrIndexText);
if (SUCCEEDED(hr)) { if (fTerm) { // Get actual index term length
// Fill-ou occurrence info
OCC occ; occ.dwFieldId = m_dwVFLD; occ.dwTopicID = m_dwUID; occ.dwCount = m_dwWordCount++; // Is there a diffrerent highlite length?
if (SUCCEEDED(pPropList->Get(STDPROP_INDEX_TERM_RAW_LENGTH, cProp))) occ.wWordLen = (WORD)cProp.dwValue; else occ.wWordLen = (WORD)cchText;
if (cchText > 255) return SetErrReturn(E_UNEXPECTED);
char strTerm[256 + sizeof(WORD)]; if(!WideCharToMultiByte(m_dwCodePage, 0, pwstrIndexText, cchText, strTerm + sizeof(WORD), 255, NULL, NULL)) { // The conversion failed! -- very bad
return SetErrReturn(E_UNEXPECTED); } *(LPWORD)strTerm = (SHORT)cchText; hr = MVIndexAddWord(m_lpipb, (LPB)strTerm, &occ); } else { // Accumulate text until we need to send it along
if (!DynBufferAppend (m_lpbfText, (LPBYTE)pwstrIndexText, cchText * sizeof (WCHAR))) SetErr(&hr, E_OUTOFMEMORY); } }
return hr; } /* SetEntry */
STDMETHODIMP CITIndexBuild::SendTextToBreaker(void) { HRESULT hr;
// TODO: Call these only for our own word sink
hr = ((CDefWordSink *)m_piWordSink)->SetDocID(m_dwUID); hr = ((CDefWordSink *)m_piWordSink)->SetVFLD(m_dwVFLD);
// TODO: We can set TYPE here, so we can use the same breaker instance for
// multiple FTI and they will not interfere with each other. This would be
// different than current behavior, however, so I have left it out for now.
TEXT_SOURCE tsText; tsText.pfnFillTextBuffer = FillText; tsText.awcBuffer = (LPWSTR)DynBufferPtr(m_lpbfText); tsText.iEnd = DynBufferLen(m_lpbfText) / sizeof (WCHAR); tsText.iCur = 0;
hr = m_piwb->BreakText(&tsText, m_piWordSink, NULL);
DynBufferReset(m_lpbfText); return hr; } /* SendTextToBreaker */
/*****************************************************************
* @method STDMETHODIMP | IITBuildCollect | Close | * Closes the build object and frees memory. * * @Rvalue E_NOTINIT | Object has not been initialized. * @comm Calling this method is optional, but the build object must * implement it. Any object that implements IITBuildCollect interface * must support the Close method. * ****************************************************************/ STDMETHODIMP CITIndexBuild::Close(void) { if (FALSE == m_fInitialized) return SetErrReturn(E_NOTINIT);
if(m_piwb) m_piwb->Release(); if(m_piWordSink) m_piWordSink->Release(); if(m_piwbConfig) m_piwbConfig->Release(); if(m_piwbConfig) m_piwbConfig = NULL;
if (m_lpipb) MVIndexDispose(m_lpipb);
m_fInitialized = FALSE; m_fIsDirty = FALSE;
m_piWordSink = NULL; m_piwb = NULL; m_piwbConfig = NULL; m_lpipb = NULL;
m_dwUID = m_dwVFLD = m_dwDType = m_dwWordCount = m_dwCodePage = 0;
if (m_lpbfText) { DynBufferFree (m_lpbfText); m_lpbfText = NULL; }
// Reset the occurrence flags to the default
m_dwOccFlags = OCCF_DEFAULT;
return S_OK; } /* Close */
STDMETHODIMP CITIndexBuild::InitNew(void) { if(NULL == (m_lpbfText = DynBufferAlloc (0x4000))) return SetErrReturn(E_OUTOFMEMORY);
return S_OK; } /* IPersistStreamInit::InitNew */
STDMETHODIMP CITIndexBuild::GetClassID(CLSID *pClsID) { if (NULL == pClsID || IsBadWritePtr(pClsID, sizeof(CLSID))) return SetErrReturn(E_INVALIDARG);
*pClsID = CLSID_IITIndexBuild; return S_OK; } /* GetClassID */
inline STDMETHODIMP CITIndexBuild::IsDirty(void) { return m_fIsDirty ? S_OK : S_FALSE; } /* IsDirty */
STDMETHODIMP CITIndexBuild::Load(IStream *piistm) { return SetErrReturn(E_NOTIMPL); } /* IPersistStreamInit::Load */
STDMETHODIMP CITIndexBuild::Save(IStream *piistm, BOOL fClearDirty) { if (FALSE == m_fInitialized) return SetErrReturn(E_NOTINIT);
SendTextToBreaker();
HRESULT hr; HFPB hfpbSave = FpbFromHf(piistm, &hr);
if (SUCCEEDED(hr)) { hr = MVIndexBuild (0, m_lpipb, hfpbSave, NULL); MVIndexDispose (m_lpipb); m_lpipb = NULL;
if (fClearDirty) m_fIsDirty = FALSE;
FreeHfpb(hfpbSave); }
return hr; } /* IPersistStreamInit::Save */
STDMETHODIMP CITIndexBuild::GetSizeMax(ULARGE_INTEGER *pcbSize) { return SetErrReturn(E_NOTIMPL); } /* GetSizeMax */
// ********************* IPersisFile Methods *********************
STDMETHODIMP CITIndexBuild::Load(LPCWSTR pszFileName, DWORD dwMode) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::Load */
STDMETHODIMP CITIndexBuild::Save(LPCWSTR pszFileName, BOOL fRemember) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::Save */
STDMETHODIMP CITIndexBuild::SaveCompleted(LPCWSTR pszFileName) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::SaveCompleted */
STDMETHODIMP CITIndexBuild::GetCurFile(LPWSTR *ppszFileName) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::GetCurFile */
|