// 10/12/99 scotthan created #include "shellprv.h" #include "filtgrep.h" #include #include class CGrepTokens // maintains an index of unicode and ansi grep tokens. { public: STDMETHODIMP Initialize(UINT nCodepage, LPCWSTR pwszMatch, LPCWSTR pwszExclude, BOOL bCaseSensitive); STDMETHODIMP_(void) Reset(); STDMETHODIMP_(BOOL) GrepW(LPCWSTR pwszText); STDMETHODIMP_(BOOL) GrepA(LPCSTR pwszText); STDMETHODIMP GetMatchTokens(OUT LPWSTR pszTokens, UINT cchTokens) const; STDMETHODIMP GetExcludeTokens(OUT LPWSTR pszTokens, UINT cchTokens) const; private: UINT _nCodepage; LPWSTR _pszMatchW, _pszExcludeW; // raw strings, unicode LPSTR _pszMatchA, _pszExcludeA; // raw strings, ansi LPCWSTR *_rgpszMatchW, *_rgpszExcludeW; // token index, unicode LPCSTR *_rgpszMatchA, *_rgpszExcludeA; // token index, ansi LONG _cMatch, _cExclude; // token counts LPWSTR (__stdcall * _pfnStrStrW)(LPCWSTR, LPCWSTR); LPSTR (__stdcall * _pfnStrStrA)(LPCSTR, LPCSTR); public: // Ctor, Dtor CGrepTokens() : _nCodepage(0), _cMatch(0), _cExclude(0), _pfnStrStrW(StrStrIW), _pfnStrStrA(StrStrIA), _pszMatchW(NULL), _pszExcludeW(NULL), _rgpszMatchW(NULL), _rgpszExcludeW(NULL), _pszMatchA(NULL), _pszExcludeA(NULL), _rgpszMatchA(NULL), _rgpszExcludeA(NULL) {} ~CGrepTokens() { Reset(); } }; class CGrepBuffer // auxilliary class: per-thread grep buffer { public: CGrepBuffer(ULONG dwThreadID) : _dwThreadID(dwThreadID), _pszBuf(NULL), _cchBuf(0) {} virtual ~CGrepBuffer() {delete [] _pszBuf;} STDMETHODIMP Alloc(ULONG cch); STDMETHODIMP_(BOOL) IsThread(ULONG dwThread) const {return dwThread == _dwThreadID;} STDMETHODIMP_(LPWSTR) Buffer() { return _pszBuf; } #define DEFAULT_GREPBUFFERSIZE 0x00FF // +1 = 1 page. private: LPWSTR _pszBuf; ULONG _cchBuf; ULONG _dwThreadID; }; // Makes a heap copy of a widechar string LPWSTR _AllocAndCopyString(LPCWSTR pszSrc, UINT cch = -1) { if (pszSrc) { if ((int)cch < 0) // must cast to "int" since cch is a UINT cch = lstrlenW(pszSrc); LPWSTR pszRet = new WCHAR[cch + 1]; if (pszRet) { // no StrCpyN, this is a double-NULL list CopyMemory(pszRet, pszSrc, sizeof(*pszSrc) * cch); pszRet[cch] = 0; return pszRet; } } return NULL; } // Makes an ansi copy of a widechar string LPSTR _AllocAndCopyAnsiString(UINT nCodepage, LPCWSTR pszSrc, UINT cch = -1) { if (pszSrc) { if ((int)cch < 0) // must cast to "int" since cch is a UINT cch = lstrlenW(pszSrc); int cchBuf = WideCharToMultiByte(nCodepage, 0, pszSrc, cch, NULL, 0, NULL, NULL); LPSTR pszRet = new CHAR[cchBuf+1]; if (pszRet) { int cchRet = WideCharToMultiByte(nCodepage, 0, pszSrc, cch, pszRet, cchBuf, NULL, NULL); pszRet[cchRet] = 0; return pszRet; } } return NULL; } // CGrepBuffer impl STDMETHODIMP CGrepBuffer::Alloc(ULONG cch) { LPWSTR pszBuf = NULL; if (cch) { if (_pszBuf && _cchBuf >= cch) return S_OK; pszBuf = new WCHAR[cch+1]; if (NULL == pszBuf) return E_OUTOFMEMORY; *pszBuf = 0; } delete [] _pszBuf; _pszBuf = pszBuf; _cchBuf = cch; return _pszBuf != NULL ? S_OK : S_FALSE ; } // CGrepTokens impl // Counts the number of characters in a string containing NULL-delimited tokens ("foo\0bloke\0TheEnd\0\0") LONG _GetTokenListLength(LPCWSTR pszList, LONG* pcTokens = NULL) { LONG cchRet = 0; if (pcTokens) *pcTokens = 0; if (pszList && *pszList) { LPCWSTR pszToken, pszPrev; int i = 0; for (pszToken = pszPrev = pszList; pszToken && *pszToken;) { if (pcTokens) (*pcTokens)++; pszToken += lstrlenW(pszToken) + 1, cchRet += (DWORD)(pszToken - pszPrev) ; pszPrev = pszToken; } } return cchRet; } // wide version: Counts and/or indexes NULL-delimited string tokens ("foo\0bloke\0TheEnd\0\0") LONG _IndexTokensW(LPCWSTR pszList, LPCWSTR* prgszTokens = NULL) { LONG cRet = 0; if (pszList && *pszList) { LPCWSTR psz = pszList; for (int i = 0; psz && *psz; psz += (lstrlenW(psz) + 1), i++) { if (prgszTokens) prgszTokens[i] = psz; cRet++; } } return cRet; } // ansi version: Counts and/or indexes NULL-delimited string tokens ("foo\0bloke\0TheEnd\0\0") LONG _IndexTokensA(LPCSTR pszList, LPCSTR* prgszTokens = NULL) { LONG cRet = 0; if (pszList && *pszList) { LPCSTR psz = pszList; for (int i = 0; psz && *psz; psz += (lstrlenA(psz) + 1), i++) { if (prgszTokens) prgszTokens[i] = psz; cRet++; } } return cRet; } // wide version: Allocates a string token index and indexes a string of NULL-delimited tokens. STDMETHODIMP _AllocAndIndexTokensW(LONG cTokens, LPCWSTR pszList, LPCWSTR** pprgszTokens) { if (cTokens) { if (NULL == (*pprgszTokens = new LPCWSTR[cTokens])) return E_OUTOFMEMORY; if (cTokens != _IndexTokensW(pszList, *pprgszTokens)) { delete [] (*pprgszTokens); *pprgszTokens = NULL; return E_FAIL; } } return S_OK; } // ansi version: Allocates a string token index and indexes a string of NULL-delimited tokens. STDMETHODIMP _AllocAndIndexTokensA(LONG cTokens, LPCSTR pszList, LPCSTR** pprgszTokens) { if (cTokens) { if (NULL == (*pprgszTokens = new LPCSTR[cTokens])) return E_OUTOFMEMORY; if (cTokens != _IndexTokensA(pszList, *pprgszTokens)) { delete [] (*pprgszTokens); *pprgszTokens = NULL; return E_FAIL; } } return S_OK; } // Frees unicode and ansi token lists and corresponding indices. void _FreeUniAnsiTokenList( OUT LPWSTR* ppszListW, OUT LPSTR* ppszListA, OUT LPCWSTR** pprgTokensW, OUT LPCSTR** pprgTokensA) { delete [] *ppszListW; *ppszListW = NULL; delete [] *ppszListA; *ppszListA = NULL; delete [] *pprgTokensW; *pprgTokensW = NULL; delete [] *pprgTokensA; *pprgTokensA = NULL; } // Allocates unicode and ansi token lists and corresponding indices. STDMETHODIMP _AllocUniAnsiTokenList( UINT nCodepage, LPCWSTR pszList, OUT LPWSTR* ppszListW, OUT LPSTR* ppszListA, OUT LONG* pcTokens, OUT LPCWSTR** pprgTokensW, OUT LPCSTR** pprgTokensA) { HRESULT hr = S_FALSE; LONG cTokens = 0; UINT cch = _GetTokenListLength(pszList, &cTokens); *ppszListW = NULL; *ppszListA = NULL; *pprgTokensW = NULL; *pprgTokensA = NULL; *pcTokens = 0; if (cTokens) { hr = E_OUTOFMEMORY; if (NULL == (*ppszListW = _AllocAndCopyString(pszList, cch))) goto failure_exit; if (NULL == (*ppszListA = _AllocAndCopyAnsiString(nCodepage, pszList, cch))) goto failure_exit; if (FAILED((hr = _AllocAndIndexTokensW(cTokens, *ppszListW, pprgTokensW)))) goto failure_exit; if (FAILED((hr = _AllocAndIndexTokensA(cTokens, *ppszListA, pprgTokensA)))) goto failure_exit; *pcTokens = cTokens; hr = S_OK; } return hr; failure_exit: _FreeUniAnsiTokenList(ppszListW, ppszListA, pprgTokensW, pprgTokensA); return hr; } STDMETHODIMP CGrepTokens::Initialize(UINT nCodepage, LPCWSTR pszMatch, LPCWSTR pszExclude, BOOL bCaseSensitive) { HRESULT hr = E_INVALIDARG; Reset(); BOOL bMatchString = (pszMatch && *pszMatch); BOOL bExcludeString = (pszExclude && *pszExclude); if (!(bMatchString || bExcludeString)) return E_INVALIDARG; _nCodepage = nCodepage; if (bCaseSensitive) { _pfnStrStrW = StrStrW; _pfnStrStrA = StrStrA; } else { _pfnStrStrW = StrStrIW; _pfnStrStrA = StrStrIA; } if (bMatchString) { if (FAILED((hr = _AllocUniAnsiTokenList(nCodepage, pszMatch, &_pszMatchW, &_pszMatchA, &_cMatch, &_rgpszMatchW, &_rgpszMatchA)))) { return hr; } } if (bExcludeString) { if (FAILED((hr = _AllocUniAnsiTokenList(nCodepage, pszExclude, &_pszExcludeW, &_pszExcludeA, &_cExclude, &_rgpszExcludeW, &_rgpszExcludeA)))) { return hr; } } return hr; } // S_OK we have some match tokens, S_FALSE otherwise STDMETHODIMP CGrepTokens::GetMatchTokens(OUT LPWSTR pszMatch, UINT cchMatch) const { HRESULT hr = (_pszMatchW && *_pszMatchW) ? S_OK : S_FALSE; if (pszMatch) { hr = StringCchCopy(pszMatch, cchMatch, _pszMatchW ? _pszMatchW : L""); } return hr; } // S_OK we have some exclude tokens, S_FALSE otherwise STDMETHODIMP CGrepTokens::GetExcludeTokens(OUT LPWSTR pszExclude, UINT cchExclude) const { HRESULT hr = (_pszExcludeW && *_pszExcludeW) ? S_OK : S_FALSE; if (pszExclude) { hr = StringCchCopy(pszExclude, cchExclude, _pszExcludeW ? _pszExcludeW : L""); } return hr; } void CGrepTokens::Reset() { _FreeUniAnsiTokenList(&_pszMatchW, &_pszMatchA, &_rgpszMatchW, &_rgpszMatchA); _FreeUniAnsiTokenList(&_pszExcludeW, &_pszExcludeA, &_rgpszExcludeW, &_rgpszExcludeA); _cMatch = _cExclude = 0; _nCodepage = 0; } STDMETHODIMP_(BOOL) CGrepTokens::GrepW(LPCWSTR pszText) { BOOL bMatch = FALSE; if (pszText) { BOOL bExclude = FALSE; for (int i = 0; i < _cMatch; i++) { if (_pfnStrStrW(pszText, _rgpszMatchW[i])) { bMatch = TRUE; break; } } for (i = 0; i < _cExclude; i++) { if (_pfnStrStrW(pszText, _rgpszExcludeW[i])) { bExclude = TRUE; break; } } if (_cMatch && _cExclude) return bMatch || !_cExclude; if (_cExclude) return !bExclude; } return bMatch; } STDMETHODIMP_(BOOL) CGrepTokens::GrepA(LPCSTR pszText) { BOOL bMatch = FALSE; if (pszText) { BOOL bExclude = FALSE; for (int i = 0; i < _cMatch; i++) { if (_pfnStrStrA(pszText, _rgpszMatchA[i])) { bMatch = TRUE; break; } } for (i = 0; i < _cExclude; i++) { if (_pfnStrStrA(pszText, _rgpszExcludeA[i])) { bExclude = TRUE; break; } } if (_cMatch && _cExclude) return bMatch || !_cExclude; if (_cExclude) return !bExclude; } return bMatch; } inline STDMETHODIMP_(BOOL) _IsEqualAttribute(const FULLPROPSPEC& fps, REFFMTID fmtid, PROPID propid) { return IsEqualGUID(fmtid, fps.guidPropSet) && PRSPEC_PROPID == fps.psProperty.ulKind && propid == fps.psProperty.propid; } STDMETHODIMP_(BOOL) _PropVariantGrep(PROPVARIANT* pvar, CGrepTokens* pTokens) { BOOL bRet = FALSE; switch(pvar->vt) { case VT_LPWSTR: bRet = pTokens->GrepW(pvar->pwszVal); break; case VT_BSTR: bRet = pTokens->GrepW(pvar->bstrVal); break; case VT_LPSTR: bRet = pTokens->GrepA(pvar->pszVal); break; case VT_VECTOR|VT_LPWSTR: { for (UINT i = 0; !bRet && i < pvar->calpwstr.cElems; i++) bRet = pTokens->GrepW(pvar->calpwstr.pElems[i]); break; } case VT_VECTOR|VT_BSTR: { for (UINT i = 0; !bRet && i < pvar->cabstr.cElems; i++) bRet = pTokens->GrepW(pvar->cabstr.pElems[i]); break; } case VT_VECTOR|VT_LPSTR: { for (UINT i = 0; !bRet && i < pvar->calpstr.cElems; i++) bRet = pTokens->GrepA(pvar->calpstr.pElems[i]); break; } case VT_VECTOR|VT_VARIANT: { for (UINT i = 0; !bRet && i < pvar->capropvar.cElems; i++) bRet = _PropVariantGrep(pvar->capropvar.pElems + i, pTokens); break; } case VT_BSTR|VT_ARRAY: { // Only grep 1-dimensional arrays. UINT cDims = SafeArrayGetDim(pvar->parray); if (cDims == 1) { LONG lBound, uBound; if (SUCCEEDED(SafeArrayGetLBound(pvar->parray, 1, &lBound)) && SUCCEEDED(SafeArrayGetUBound(pvar->parray, 1, &uBound)) && uBound > lBound) { BSTR *rgpbstr; if (SUCCEEDED(SafeArrayAccessData(pvar->parray, (void **)&rgpbstr))) { for (int i = 0; !bRet && i <= (uBound - lBound); i++) { bRet = pTokens->GrepW(rgpbstr[i]); } SafeArrayUnaccessData(pvar->parray); } } } else if (cDims > 1) { ASSERT(FALSE); // we didn't expect > 1 dimension on bstr arrays! } break; } } return bRet; } CFilterGrep::CFilterGrep() : _hdpaGrepBuffers(NULL), _pTokens(NULL), _dwFlags(0), _pwszContentRestricted(NULL), _pwszPropertiesRestricted(NULL) { } CFilterGrep::~CFilterGrep() { _ClearGrepBuffers(); delete [] _pwszContentRestricted; delete [] _pwszPropertiesRestricted; delete _pTokens; if (_fcritsec) { DeleteCriticalSection(&_critsec); } } STDMETHODIMP CFilterGrep::InitSelf(void) { if (_fcritsec == FALSE) { if (!InitializeCriticalSectionAndSpinCount(&_critsec, 0)) { return E_FAIL; } _fcritsec = TRUE; } return S_OK; } STDMETHODIMP CFilterGrep::Initialize(UINT nCodepage, LPCWSTR pszMatch, LPCWSTR pszExclude, DWORD dwFlags) { Reset(); if ((0 == (dwFlags & (FGIF_BLANKETGREP|FGIF_GREPFILENAME))) || !((pszMatch && *pszMatch) || (pszExclude && *pszExclude))) return E_INVALIDARG; if (!(_pTokens || (_pTokens = new CGrepTokens) != NULL)) return E_OUTOFMEMORY; _dwFlags = dwFlags; return _pTokens->Initialize(nCodepage, pszMatch, pszExclude, BOOLIFY(dwFlags & FGIF_CASESENSITIVE)); } STDMETHODIMP CFilterGrep::Reset() { if (_pTokens) _pTokens->Reset(); _dwFlags = 0; return S_OK; } // converts non critical errors into S_FALSE, other return as FAILED(hr) HRESULT _MapFilterCriticalError(HRESULT hr) { switch (hr) { case FILTER_E_END_OF_CHUNKS: case FILTER_E_NO_MORE_TEXT: case FILTER_E_NO_MORE_VALUES: case FILTER_W_MONIKER_CLIPPED: case FILTER_E_NO_TEXT: case FILTER_E_NO_VALUES: case FILTER_E_EMBEDDING_UNAVAILABLE: case FILTER_E_LINK_UNAVAILABLE: hr = S_FALSE; break; } return hr; } // returns: // S_OK match // S_FALSE did not match STDMETHODIMP CFilterGrep::Grep(IShellFolder *psf, LPCITEMIDLIST pidl, LPCTSTR pszName) { HRESULT hr = S_FALSE; BOOL bHit = FALSE; ULONG ulFlags = IFILTER_FLAGS_OLE_PROPERTIES; // default to try to use pss ULONG dwThread = GetCurrentThreadId(); if (NULL == _pTokens) return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); if (_IsRestrictedFileType(pszName)) return S_FALSE; // Grep the filename. if ((_dwFlags & FGIF_GREPFILENAME) && _pTokens->GrepW(pszName)) { return S_OK; } IFilter *pFilter; if (SUCCEEDED(psf->BindToStorage(pidl, NULL, IID_PPV_ARG(IFilter, &pFilter)))) { __try { hr = pFilter->Init(IFILTER_INIT_CANON_PARAGRAPHS | IFILTER_INIT_CANON_HYPHENS | IFILTER_INIT_CANON_SPACES | IFILTER_INIT_APPLY_INDEX_ATTRIBUTES | IFILTER_INIT_INDEXING_ONLY, 0, 0, &ulFlags); } __except(EXCEPTION_EXECUTE_HANDLER) { hr = E_ABORT; } while (!bHit && (S_OK == hr)) { STAT_CHUNK stat; __try { hr = pFilter->GetChunk(&stat); while ((S_OK == hr) && (0 == (stat.flags & (CHUNK_TEXT | CHUNK_VALUE)))) { TraceMsg(TF_WARNING, "CFilterGrep::Grep encountered bad/unknown type for chunk; skipping."); hr = pFilter->GetChunk(&stat); } } __except(EXCEPTION_EXECUTE_HANDLER) { hr = E_ABORT; } hr = _MapFilterCriticalError(hr); // convert filter errors into S_FALSE if (S_OK == hr) { ULONG grfDescriminate = (_dwFlags & FGIF_BLANKETGREP); if (FGIF_BLANKETGREP == grfDescriminate || (_IsEqualAttribute(stat.attribute, FMTID_Storage, PID_STG_CONTENTS) ? FGIF_GREPPROPERTIES == grfDescriminate : FGIF_GREPCONTENT == grfDescriminate)) { if (((stat.flags & CHUNK_VALUE) && S_OK == _GrepValue(pFilter, &stat)) || ((stat.flags & CHUNK_TEXT) && S_OK == _GrepText(pFilter, &stat, dwThread))) { bHit = TRUE; } } } } pFilter->Release(); } // Grep OLE/NFF properties if appropriate if (SUCCEEDED(hr)) { if (!bHit && (ulFlags & IFILTER_FLAGS_OLE_PROPERTIES) && (_dwFlags & FGIF_BLANKETGREP)) { IPropertySetStorage *pps; if (SUCCEEDED(psf->BindToStorage(pidl, NULL, IID_PPV_ARG(IPropertySetStorage, &pps)))) { hr = _GrepProperties(pps); bHit = (S_OK == hr); pps->Release(); } } } if (SUCCEEDED(hr)) hr = bHit ? S_OK : S_FALSE; return hr; } STDMETHODIMP CFilterGrep::_GrepValue(IFilter* pFilter, STAT_CHUNK* pstat) { PROPVARIANT* pvar = NULL; HRESULT hr; __try { hr = pFilter->GetValue(&pvar); } __except(EXCEPTION_EXECUTE_HANDLER) { hr = E_ABORT; } if (SUCCEEDED(hr)) { hr = _PropVariantGrep(pvar, _pTokens) ? S_OK : S_FALSE; PropVariantClear(pvar); CoTaskMemFree(pvar); } return hr; } // Greps OLE/NFF properties. STDMETHODIMP CFilterGrep::_GrepProperties(IPropertySetStorage *pss) { BOOL bHit = FALSE; IEnumSTATPROPSETSTG* pEnumSet; if (SUCCEEDED(pss->Enum(&pEnumSet))) { STATPROPSETSTG statSet[8]; DWORD cSets = 0; while (!bHit && SUCCEEDED(pEnumSet->Next(ARRAYSIZE(statSet), statSet, &cSets)) && cSets) { for (UINT i = 0; !bHit && i < cSets; i++) { IPropertyStorage *pstg; if (SUCCEEDED(pss->Open(statSet[i].fmtid, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, &pstg))) { bHit = (S_OK == _GrepEnumPropStg(pstg)); pstg->Release(); } } } pEnumSet->Release(); } return bHit ? S_OK : S_FALSE; } #define PROPGREPBUFSIZE 16 // Reads and greps a block of properties described by a // caller-supplied array of PROPSPECs. STDMETHODIMP CFilterGrep::_GrepPropStg(IPropertyStorage *pstg, ULONG cspec, PROPSPEC rgspec[]) { PROPVARIANT rgvar[PROPGREPBUFSIZE] = {0}, // stack buffer *prgvar = rgvar; BOOL bHit = FALSE; if (cspec > ARRAYSIZE(rgvar)) // stack buffer large enough? { if (NULL == (prgvar = new PROPVARIANT[cspec])) return E_OUTOFMEMORY; } // Read properties: HRESULT hr = pstg->ReadMultiple(cspec, rgspec, prgvar); if (SUCCEEDED(hr)) { for (UINT i = 0; i < cspec; i++) { if (!bHit) bHit = _PropVariantGrep(prgvar + i, _pTokens); PropVariantClear(rgvar + i); } } if (prgvar != rgvar) delete [] prgvar; if (SUCCEEDED(hr)) return bHit ? S_OK : S_FALSE; return hr; } // Enumerates and greps all properties in a property set STDMETHODIMP CFilterGrep::_GrepEnumPropStg(IPropertyStorage* pstg) { BOOL bHit = FALSE; IEnumSTATPROPSTG* pEnumStg; if (SUCCEEDED(pstg->Enum(&pEnumStg))) { STATPROPSTG statProp[PROPGREPBUFSIZE]; DWORD cProps; while (!bHit && SUCCEEDED(pEnumStg->Next(ARRAYSIZE(statProp), statProp, &cProps)) && cProps) { PROPSPEC rgspec[PROPGREPBUFSIZE] = {0}; for (UINT i = 0; (i < cProps) && (i < ARRAYSIZE(rgspec)); i++) { rgspec[i].ulKind = PRSPEC_PROPID; rgspec[i].propid = statProp[i].propid; CoTaskMemFree(statProp[i].lpwstrName); } bHit = (S_OK == _GrepPropStg(pstg, cProps, rgspec)); } pEnumStg->Release(); } return bHit ? S_OK : S_FALSE; } // Reports whether the indicated unicode character is a // word-breaking character. inline BOOL _IsWordBreakCharW(IN LPWSTR pszBuf, IN ULONG ich) { WORD wChar; return GetStringTypeW(CT_CTYPE1, pszBuf + ich, 1, &wChar) && (wChar & (C1_SPACE|C1_PUNCT|C1_CNTRL|C1_BLANK)); } // Finds the last word-breaking character. LPWSTR _FindLastWordBreakW(IN LPWSTR pszBuf, IN ULONG cch) { while(--cch) { if (_IsWordBreakCharW(pszBuf, cch)) return pszBuf + cch; } return NULL; } // {c1243ca0-bf96-11cd-b579-08002b30bfeb} const CLSID CLSID_PlainTextFilter = {0xc1243ca0, 0xbf96, 0x11cd, {0xb5, 0x79, 0x08, 0x00, 0x2b, 0x30, 0xbf, 0xeb}}; void _ReplaceNulsWithSpaces(LPWSTR pszBuf, UINT cch) { LPWSTR pszEnd = pszBuf + cch; while (pszBuf < pszEnd) { if (*pszBuf == 0) { *pszBuf = TEXT(' '); } pszBuf++; } } STDMETHODIMP CFilterGrep::_GrepText(IFilter* pFilter, STAT_CHUNK* pstat, DWORD dwThreadID) { ASSERT(pstat); LPWSTR pszBuf = NULL; ULONG cchBuf = pstat->cwcLenSource ? pstat->cwcLenSource : DEFAULT_GREPBUFFERSIZE; HRESULT hr = _GetThreadGrepBuffer(dwThreadID, cchBuf, &pszBuf); if (SUCCEEDED(hr)) { LPWSTR pszFetch = pszBuf, pszTail = NULL; ULONG cchFetch = cchBuf, cchTail = 0; // Fetch first block of text __try { hr = pFilter->GetText(&cchFetch, pszFetch); } __except(EXCEPTION_EXECUTE_HANDLER) { hr = E_ABORT; } CLSID clsid = {0}; IUnknown_GetClassID(pFilter, &clsid); // to workaround a bug in the text filter while (SUCCEEDED(hr) && cchFetch) { ASSERT((cchFetch + cchTail) <= cchBuf); _ReplaceNulsWithSpaces(pszBuf, cchFetch + cchTail); // Let us work over binary files too pszBuf[cchFetch + cchTail] = 0; // don't trust filter to zero-terminate buffer. // When you get the FILTER_S_LAST_TEXT, that's it, you'll get no more text, so treat the tail part as part of the text if (hr == FILTER_S_LAST_TEXT) { pszTail = NULL; cchTail = 0; } else if (CLSID_PlainTextFilter == clsid) { // CLSID_PlainText filter always returns S_OK, instead of FILTER_S_LAST_TEXT, this forces us to scan // the entire chunk now, AND (see below) to pass it off as a tail for scanning next chunk too. // pszTail and cchTail are set below. } else { pszTail = _FindLastWordBreakW(pszBuf, cchFetch + cchTail); if (pszTail) { // Break on word boundary and leave remainder (tail) for next iteration *pszTail = TEXT('\0'); pszTail++; cchTail = lstrlenW(pszTail); } else { // Wow, big block, with no word break, search its entirety. // REVIEW: cross chunk items won't be found pszTail = NULL; cchTail = 0; } } // do the string scan if (_pTokens->GrepW(pszBuf)) { *pszBuf = 0; return S_OK; } else if (FILTER_S_LAST_TEXT == hr) { *pszBuf = 0; return S_FALSE; } // prepare for next fetch... // If it is the plaintext filter, grab the tail anyway, even though we've tested it already // WinSE 25867 if (CLSID_PlainTextFilter == clsid) { pszTail = _FindLastWordBreakW(pszBuf, cchFetch + cchTail); if (pszTail) { *pszTail = TEXT('\0'); pszTail++; cchTail = lstrlenW(pszTail); } else { pszTail = NULL; cchTail = 0; } } // prepare for next fetch... *pszBuf = 0; pszFetch = pszBuf; cchFetch = cchBuf; // If there is a tail to deal with, move it to the front of // the buffer and prepare to have the next block of incoming text // appended to the tail.. if (pszTail && cchTail) { MoveMemory(pszBuf, pszTail, cchTail * sizeof(*pszTail)); pszBuf[cchTail] = 0; pszFetch += cchTail; cchFetch -= cchTail; } // Fetch next block of text. __try { hr = pFilter->GetText(&cchFetch, pszFetch); } __except(EXCEPTION_EXECUTE_HANDLER) { hr = E_ABORT; } } } if (SUCCEEDED(hr) || FILTER_E_NO_MORE_TEXT == hr || FILTER_E_NO_TEXT == hr) return S_FALSE; return hr; } // Returns a grep buffer of the requested size for the specified thread. STDMETHODIMP CFilterGrep::_GetThreadGrepBuffer( DWORD dwThreadID, ULONG cchNeed, LPWSTR* ppszBuf) { ASSERT(dwThreadID); ASSERT(cchNeed > 0); ASSERT(ppszBuf); HRESULT hr = E_FAIL; *ppszBuf = NULL; _EnterCritical(); if (_hdpaGrepBuffers || (_hdpaGrepBuffers = DPA_Create(4)) != NULL) { CGrepBuffer *pgb, *pgbCached = NULL; for (int i = 0, cnt = DPA_GetPtrCount(_hdpaGrepBuffers); i < cnt; i++) { pgb = (CGrepBuffer*)DPA_FastGetPtr(_hdpaGrepBuffers, i); if (pgb->IsThread(dwThreadID)) { pgbCached = pgb; hr = pgbCached->Alloc(cchNeed); if (S_OK == hr) *ppszBuf = pgbCached->Buffer(); break; } } if (NULL == pgbCached) // not cached? { if ((pgb = new CGrepBuffer(dwThreadID)) != NULL) { hr = pgb->Alloc(cchNeed); if (S_OK == hr) { *ppszBuf = pgb->Buffer(); DPA_AppendPtr(_hdpaGrepBuffers, pgb); } else delete pgb; } else hr = E_OUTOFMEMORY; } } else hr = E_OUTOFMEMORY; _LeaveCritical(); return hr; } // Clears grep buffer for all threads STDMETHODIMP_(void) CFilterGrep::_ClearGrepBuffers() { _EnterCritical(); if (_hdpaGrepBuffers) { while(DPA_GetPtrCount(_hdpaGrepBuffers)) { CGrepBuffer* pgb = (CGrepBuffer*)DPA_DeletePtr(_hdpaGrepBuffers, 0); delete pgb; } DPA_Destroy(_hdpaGrepBuffers); _hdpaGrepBuffers = NULL; } _LeaveCritical(); } // Reports whether the file type is restricted from full-text grep. STDMETHODIMP_(BOOL) CFilterGrep::_IsRestrictedFileType(LPCWSTR pwszFile) { return FALSE; } STDMETHODIMP CFilterGrep::GetMatchTokens(OUT LPWSTR pszTokens, UINT cchTokens) const { HRESULT hr = _pTokens ? _pTokens->GetMatchTokens(pszTokens, cchTokens) : S_FALSE; if (S_OK != hr && pszTokens) *pszTokens = 0; return hr; } STDMETHODIMP CFilterGrep::GetExcludeTokens(OUT LPWSTR pszTokens, UINT cchTokens) const { HRESULT hr = _pTokens ? _pTokens->GetExcludeTokens(pszTokens, cchTokens) : S_FALSE; if (S_OK != hr && pszTokens) *pszTokens = 0; return hr; }