Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1100 lines
30 KiB

  1. // 10/12/99 scotthan created
  2. #include "shellprv.h"
  3. #include "filtgrep.h"
  4. #include <ntquery.h>
  5. #include <filterr.h>
  6. class CGrepTokens // maintains an index of unicode and ansi grep tokens.
  7. {
  8. public:
  9. STDMETHODIMP Initialize(UINT nCodepage, LPCWSTR pwszMatch, LPCWSTR pwszExclude, BOOL bCaseSensitive);
  10. STDMETHODIMP_(void) Reset();
  11. STDMETHODIMP_(BOOL) GrepW(LPCWSTR pwszText);
  12. STDMETHODIMP_(BOOL) GrepA(LPCSTR pwszText);
  13. STDMETHODIMP GetMatchTokens(OUT LPWSTR pszTokens, UINT cchTokens) const;
  14. STDMETHODIMP GetExcludeTokens(OUT LPWSTR pszTokens, UINT cchTokens) const;
  15. private:
  16. UINT _nCodepage;
  17. LPWSTR _pszMatchW, _pszExcludeW; // raw strings, unicode
  18. LPSTR _pszMatchA, _pszExcludeA; // raw strings, ansi
  19. LPCWSTR *_rgpszMatchW, *_rgpszExcludeW; // token index, unicode
  20. LPCSTR *_rgpszMatchA, *_rgpszExcludeA; // token index, ansi
  21. LONG _cMatch, _cExclude; // token counts
  22. LPWSTR (__stdcall * _pfnStrStrW)(LPCWSTR, LPCWSTR);
  23. LPSTR (__stdcall * _pfnStrStrA)(LPCSTR, LPCSTR);
  24. public:
  25. // Ctor, Dtor
  26. CGrepTokens()
  27. : _nCodepage(0), _cMatch(0), _cExclude(0), _pfnStrStrW(StrStrIW), _pfnStrStrA(StrStrIA),
  28. _pszMatchW(NULL), _pszExcludeW(NULL), _rgpszMatchW(NULL), _rgpszExcludeW(NULL),
  29. _pszMatchA(NULL), _pszExcludeA(NULL), _rgpszMatchA(NULL), _rgpszExcludeA(NULL) {}
  30. ~CGrepTokens() { Reset(); }
  31. };
  32. class CGrepBuffer // auxilliary class: per-thread grep buffer
  33. {
  34. public:
  35. CGrepBuffer(ULONG dwThreadID) : _dwThreadID(dwThreadID), _pszBuf(NULL), _cchBuf(0) {}
  36. virtual ~CGrepBuffer() {delete [] _pszBuf;}
  37. STDMETHODIMP Alloc(ULONG cch);
  38. STDMETHODIMP_(BOOL) IsThread(ULONG dwThread) const {return dwThread == _dwThreadID;}
  39. STDMETHODIMP_(LPWSTR) Buffer() { return _pszBuf; }
  40. #define DEFAULT_GREPBUFFERSIZE 0x00FF // +1 = 1 page.
  41. private:
  42. LPWSTR _pszBuf;
  43. ULONG _cchBuf;
  44. ULONG _dwThreadID;
  45. };
  46. // Makes a heap copy of a widechar string
  47. LPWSTR _AllocAndCopyString(LPCWSTR pszSrc, UINT cch = -1)
  48. {
  49. if (pszSrc)
  50. {
  51. if ((int)cch < 0) // must cast to "int" since cch is a UINT
  52. cch = lstrlenW(pszSrc);
  53. LPWSTR pszRet = new WCHAR[cch + 1];
  54. if (pszRet)
  55. {
  56. // no StrCpyN, this is a double-NULL list
  57. CopyMemory(pszRet, pszSrc, sizeof(*pszSrc) * cch);
  58. pszRet[cch] = 0;
  59. return pszRet;
  60. }
  61. }
  62. return NULL;
  63. }
  64. // Makes an ansi copy of a widechar string
  65. LPSTR _AllocAndCopyAnsiString(UINT nCodepage, LPCWSTR pszSrc, UINT cch = -1)
  66. {
  67. if (pszSrc)
  68. {
  69. if ((int)cch < 0) // must cast to "int" since cch is a UINT
  70. cch = lstrlenW(pszSrc);
  71. int cchBuf = WideCharToMultiByte(nCodepage, 0, pszSrc, cch, NULL, 0, NULL, NULL);
  72. LPSTR pszRet = new CHAR[cchBuf+1];
  73. if (pszRet)
  74. {
  75. int cchRet = WideCharToMultiByte(nCodepage, 0, pszSrc, cch, pszRet, cchBuf, NULL, NULL);
  76. pszRet[cchRet] = 0;
  77. return pszRet;
  78. }
  79. }
  80. return NULL;
  81. }
  82. // CGrepBuffer impl
  83. STDMETHODIMP CGrepBuffer::Alloc(ULONG cch)
  84. {
  85. LPWSTR pszBuf = NULL;
  86. if (cch)
  87. {
  88. if (_pszBuf && _cchBuf >= cch)
  89. return S_OK;
  90. pszBuf = new WCHAR[cch+1];
  91. if (NULL == pszBuf)
  92. return E_OUTOFMEMORY;
  93. *pszBuf = 0;
  94. }
  95. delete [] _pszBuf;
  96. _pszBuf = pszBuf;
  97. _cchBuf = cch;
  98. return _pszBuf != NULL ? S_OK : S_FALSE ;
  99. }
  100. // CGrepTokens impl
  101. // Counts the number of characters in a string containing NULL-delimited tokens ("foo\0bloke\0TheEnd\0\0")
  102. LONG _GetTokenListLength(LPCWSTR pszList, LONG* pcTokens = NULL)
  103. {
  104. LONG cchRet = 0;
  105. if (pcTokens) *pcTokens = 0;
  106. if (pszList && *pszList)
  107. {
  108. LPCWSTR pszToken, pszPrev;
  109. int i = 0;
  110. for (pszToken = pszPrev = pszList;
  111. pszToken && *pszToken;)
  112. {
  113. if (pcTokens)
  114. (*pcTokens)++;
  115. pszToken += lstrlenW(pszToken) + 1,
  116. cchRet += (DWORD)(pszToken - pszPrev) ;
  117. pszPrev = pszToken;
  118. }
  119. }
  120. return cchRet;
  121. }
  122. // wide version: Counts and/or indexes NULL-delimited string tokens ("foo\0bloke\0TheEnd\0\0")
  123. LONG _IndexTokensW(LPCWSTR pszList, LPCWSTR* prgszTokens = NULL)
  124. {
  125. LONG cRet = 0;
  126. if (pszList && *pszList)
  127. {
  128. LPCWSTR psz = pszList;
  129. for (int i = 0; psz && *psz; psz += (lstrlenW(psz) + 1), i++)
  130. {
  131. if (prgszTokens)
  132. prgszTokens[i] = psz;
  133. cRet++;
  134. }
  135. }
  136. return cRet;
  137. }
  138. // ansi version: Counts and/or indexes NULL-delimited string tokens ("foo\0bloke\0TheEnd\0\0")
  139. LONG _IndexTokensA(LPCSTR pszList, LPCSTR* prgszTokens = NULL)
  140. {
  141. LONG cRet = 0;
  142. if (pszList && *pszList)
  143. {
  144. LPCSTR psz = pszList;
  145. for (int i = 0; psz && *psz; psz += (lstrlenA(psz) + 1), i++)
  146. {
  147. if (prgszTokens)
  148. prgszTokens[i] = psz;
  149. cRet++;
  150. }
  151. }
  152. return cRet;
  153. }
  154. // wide version: Allocates a string token index and indexes a string of NULL-delimited tokens.
  155. STDMETHODIMP _AllocAndIndexTokensW(LONG cTokens, LPCWSTR pszList, LPCWSTR** pprgszTokens)
  156. {
  157. if (cTokens)
  158. {
  159. if (NULL == (*pprgszTokens = new LPCWSTR[cTokens]))
  160. return E_OUTOFMEMORY;
  161. if (cTokens != _IndexTokensW(pszList, *pprgszTokens))
  162. {
  163. delete [] (*pprgszTokens);
  164. *pprgszTokens = NULL;
  165. return E_FAIL;
  166. }
  167. }
  168. return S_OK;
  169. }
  170. // ansi version: Allocates a string token index and indexes a string of NULL-delimited tokens.
  171. STDMETHODIMP _AllocAndIndexTokensA(LONG cTokens, LPCSTR pszList, LPCSTR** pprgszTokens)
  172. {
  173. if (cTokens)
  174. {
  175. if (NULL == (*pprgszTokens = new LPCSTR[cTokens]))
  176. return E_OUTOFMEMORY;
  177. if (cTokens != _IndexTokensA(pszList, *pprgszTokens))
  178. {
  179. delete [] (*pprgszTokens);
  180. *pprgszTokens = NULL;
  181. return E_FAIL;
  182. }
  183. }
  184. return S_OK;
  185. }
  186. // Frees unicode and ansi token lists and corresponding indices.
  187. void _FreeUniAnsiTokenList(
  188. OUT LPWSTR* ppszListW,
  189. OUT LPSTR* ppszListA,
  190. OUT LPCWSTR** pprgTokensW,
  191. OUT LPCSTR** pprgTokensA)
  192. {
  193. delete [] *ppszListW; *ppszListW = NULL;
  194. delete [] *ppszListA; *ppszListA = NULL;
  195. delete [] *pprgTokensW; *pprgTokensW = NULL;
  196. delete [] *pprgTokensA; *pprgTokensA = NULL;
  197. }
  198. // Allocates unicode and ansi token lists and corresponding indices.
  199. STDMETHODIMP _AllocUniAnsiTokenList(
  200. UINT nCodepage,
  201. LPCWSTR pszList,
  202. OUT LPWSTR* ppszListW,
  203. OUT LPSTR* ppszListA,
  204. OUT LONG* pcTokens,
  205. OUT LPCWSTR** pprgTokensW,
  206. OUT LPCSTR** pprgTokensA)
  207. {
  208. HRESULT hr = S_FALSE;
  209. LONG cTokens = 0;
  210. UINT cch = _GetTokenListLength(pszList, &cTokens);
  211. *ppszListW = NULL;
  212. *ppszListA = NULL;
  213. *pprgTokensW = NULL;
  214. *pprgTokensA = NULL;
  215. *pcTokens = 0;
  216. if (cTokens)
  217. {
  218. hr = E_OUTOFMEMORY;
  219. if (NULL == (*ppszListW = _AllocAndCopyString(pszList, cch)))
  220. goto failure_exit;
  221. if (NULL == (*ppszListA = _AllocAndCopyAnsiString(nCodepage, pszList, cch)))
  222. goto failure_exit;
  223. if (FAILED((hr = _AllocAndIndexTokensW(cTokens, *ppszListW, pprgTokensW))))
  224. goto failure_exit;
  225. if (FAILED((hr = _AllocAndIndexTokensA(cTokens, *ppszListA, pprgTokensA))))
  226. goto failure_exit;
  227. *pcTokens = cTokens;
  228. hr = S_OK;
  229. }
  230. return hr;
  231. failure_exit:
  232. _FreeUniAnsiTokenList(ppszListW, ppszListA, pprgTokensW, pprgTokensA);
  233. return hr;
  234. }
  235. STDMETHODIMP CGrepTokens::Initialize(UINT nCodepage, LPCWSTR pszMatch, LPCWSTR pszExclude, BOOL bCaseSensitive)
  236. {
  237. HRESULT hr = E_INVALIDARG;
  238. Reset();
  239. BOOL bMatchString = (pszMatch && *pszMatch);
  240. BOOL bExcludeString = (pszExclude && *pszExclude);
  241. if (!(bMatchString || bExcludeString))
  242. return E_INVALIDARG;
  243. _nCodepage = nCodepage;
  244. if (bCaseSensitive)
  245. {
  246. _pfnStrStrW = StrStrW;
  247. _pfnStrStrA = StrStrA;
  248. }
  249. else
  250. {
  251. _pfnStrStrW = StrStrIW;
  252. _pfnStrStrA = StrStrIA;
  253. }
  254. if (bMatchString)
  255. {
  256. if (FAILED((hr = _AllocUniAnsiTokenList(nCodepage, pszMatch,
  257. &_pszMatchW, &_pszMatchA, &_cMatch, &_rgpszMatchW, &_rgpszMatchA))))
  258. {
  259. return hr;
  260. }
  261. }
  262. if (bExcludeString)
  263. {
  264. if (FAILED((hr = _AllocUniAnsiTokenList(nCodepage, pszExclude,
  265. &_pszExcludeW, &_pszExcludeA, &_cExclude, &_rgpszExcludeW, &_rgpszExcludeA))))
  266. {
  267. return hr;
  268. }
  269. }
  270. return hr;
  271. }
  272. // S_OK we have some match tokens, S_FALSE otherwise
  273. STDMETHODIMP CGrepTokens::GetMatchTokens(OUT LPWSTR pszMatch, UINT cchMatch) const
  274. {
  275. HRESULT hr = (_pszMatchW && *_pszMatchW) ? S_OK : S_FALSE;
  276. if (pszMatch)
  277. {
  278. hr = StringCchCopy(pszMatch, cchMatch, _pszMatchW ? _pszMatchW : L"");
  279. }
  280. return hr;
  281. }
  282. // S_OK we have some exclude tokens, S_FALSE otherwise
  283. STDMETHODIMP CGrepTokens::GetExcludeTokens(OUT LPWSTR pszExclude, UINT cchExclude) const
  284. {
  285. HRESULT hr = (_pszExcludeW && *_pszExcludeW) ? S_OK : S_FALSE;
  286. if (pszExclude)
  287. {
  288. hr = StringCchCopy(pszExclude, cchExclude, _pszExcludeW ? _pszExcludeW : L"");
  289. }
  290. return hr;
  291. }
  292. void CGrepTokens::Reset()
  293. {
  294. _FreeUniAnsiTokenList(&_pszMatchW, &_pszMatchA, &_rgpszMatchW, &_rgpszMatchA);
  295. _FreeUniAnsiTokenList(&_pszExcludeW, &_pszExcludeA, &_rgpszExcludeW, &_rgpszExcludeA);
  296. _cMatch = _cExclude = 0;
  297. _nCodepage = 0;
  298. }
  299. STDMETHODIMP_(BOOL) CGrepTokens::GrepW(LPCWSTR pszText)
  300. {
  301. BOOL bMatch = FALSE;
  302. if (pszText)
  303. {
  304. BOOL bExclude = FALSE;
  305. for (int i = 0; i < _cMatch; i++)
  306. {
  307. if (_pfnStrStrW(pszText, _rgpszMatchW[i]))
  308. {
  309. bMatch = TRUE;
  310. break;
  311. }
  312. }
  313. for (i = 0; i < _cExclude; i++)
  314. {
  315. if (_pfnStrStrW(pszText, _rgpszExcludeW[i]))
  316. {
  317. bExclude = TRUE;
  318. break;
  319. }
  320. }
  321. if (_cMatch && _cExclude)
  322. return bMatch || !_cExclude;
  323. if (_cExclude)
  324. return !bExclude;
  325. }
  326. return bMatch;
  327. }
  328. STDMETHODIMP_(BOOL) CGrepTokens::GrepA(LPCSTR pszText)
  329. {
  330. BOOL bMatch = FALSE;
  331. if (pszText)
  332. {
  333. BOOL bExclude = FALSE;
  334. for (int i = 0; i < _cMatch; i++)
  335. {
  336. if (_pfnStrStrA(pszText, _rgpszMatchA[i]))
  337. {
  338. bMatch = TRUE;
  339. break;
  340. }
  341. }
  342. for (i = 0; i < _cExclude; i++)
  343. {
  344. if (_pfnStrStrA(pszText, _rgpszExcludeA[i]))
  345. {
  346. bExclude = TRUE;
  347. break;
  348. }
  349. }
  350. if (_cMatch && _cExclude)
  351. return bMatch || !_cExclude;
  352. if (_cExclude)
  353. return !bExclude;
  354. }
  355. return bMatch;
  356. }
  357. inline STDMETHODIMP_(BOOL) _IsEqualAttribute(const FULLPROPSPEC& fps, REFFMTID fmtid, PROPID propid)
  358. {
  359. return IsEqualGUID(fmtid, fps.guidPropSet) &&
  360. PRSPEC_PROPID == fps.psProperty.ulKind &&
  361. propid == fps.psProperty.propid;
  362. }
  363. STDMETHODIMP_(BOOL) _PropVariantGrep(PROPVARIANT* pvar, CGrepTokens* pTokens)
  364. {
  365. BOOL bRet = FALSE;
  366. switch(pvar->vt)
  367. {
  368. case VT_LPWSTR:
  369. bRet = pTokens->GrepW(pvar->pwszVal);
  370. break;
  371. case VT_BSTR:
  372. bRet = pTokens->GrepW(pvar->bstrVal);
  373. break;
  374. case VT_LPSTR:
  375. bRet = pTokens->GrepA(pvar->pszVal);
  376. break;
  377. case VT_VECTOR|VT_LPWSTR:
  378. {
  379. for (UINT i = 0; !bRet && i < pvar->calpwstr.cElems; i++)
  380. bRet = pTokens->GrepW(pvar->calpwstr.pElems[i]);
  381. break;
  382. }
  383. case VT_VECTOR|VT_BSTR:
  384. {
  385. for (UINT i = 0; !bRet && i < pvar->cabstr.cElems; i++)
  386. bRet = pTokens->GrepW(pvar->cabstr.pElems[i]);
  387. break;
  388. }
  389. case VT_VECTOR|VT_LPSTR:
  390. {
  391. for (UINT i = 0; !bRet && i < pvar->calpstr.cElems; i++)
  392. bRet = pTokens->GrepA(pvar->calpstr.pElems[i]);
  393. break;
  394. }
  395. case VT_VECTOR|VT_VARIANT:
  396. {
  397. for (UINT i = 0; !bRet && i < pvar->capropvar.cElems; i++)
  398. bRet = _PropVariantGrep(pvar->capropvar.pElems + i, pTokens);
  399. break;
  400. }
  401. case VT_BSTR|VT_ARRAY:
  402. {
  403. // Only grep 1-dimensional arrays.
  404. UINT cDims = SafeArrayGetDim(pvar->parray);
  405. if (cDims == 1)
  406. {
  407. LONG lBound, uBound;
  408. if (SUCCEEDED(SafeArrayGetLBound(pvar->parray, 1, &lBound)) &&
  409. SUCCEEDED(SafeArrayGetUBound(pvar->parray, 1, &uBound)) &&
  410. uBound > lBound)
  411. {
  412. BSTR *rgpbstr;
  413. if (SUCCEEDED(SafeArrayAccessData(pvar->parray, (void **)&rgpbstr)))
  414. {
  415. for (int i = 0; !bRet && i <= (uBound - lBound); i++)
  416. {
  417. bRet = pTokens->GrepW(rgpbstr[i]);
  418. }
  419. SafeArrayUnaccessData(pvar->parray);
  420. }
  421. }
  422. }
  423. else if (cDims > 1)
  424. {
  425. ASSERT(FALSE); // we didn't expect > 1 dimension on bstr arrays!
  426. }
  427. break;
  428. }
  429. }
  430. return bRet;
  431. }
  432. CFilterGrep::CFilterGrep()
  433. : _hdpaGrepBuffers(NULL),
  434. _pTokens(NULL),
  435. _dwFlags(0),
  436. _pwszContentRestricted(NULL),
  437. _pwszPropertiesRestricted(NULL)
  438. {
  439. }
  440. CFilterGrep::~CFilterGrep()
  441. {
  442. _ClearGrepBuffers();
  443. delete [] _pwszContentRestricted;
  444. delete [] _pwszPropertiesRestricted;
  445. delete _pTokens;
  446. if (_fcritsec)
  447. {
  448. DeleteCriticalSection(&_critsec);
  449. }
  450. }
  451. STDMETHODIMP CFilterGrep::InitSelf(void)
  452. {
  453. if (_fcritsec == FALSE)
  454. {
  455. if (!InitializeCriticalSectionAndSpinCount(&_critsec, 0))
  456. {
  457. return E_FAIL;
  458. }
  459. _fcritsec = TRUE;
  460. }
  461. return S_OK;
  462. }
  463. STDMETHODIMP CFilterGrep::Initialize(UINT nCodepage, LPCWSTR pszMatch, LPCWSTR pszExclude, DWORD dwFlags)
  464. {
  465. Reset();
  466. if ((0 == (dwFlags & (FGIF_BLANKETGREP|FGIF_GREPFILENAME))) ||
  467. !((pszMatch && *pszMatch) || (pszExclude && *pszExclude)))
  468. return E_INVALIDARG;
  469. if (!(_pTokens || (_pTokens = new CGrepTokens) != NULL))
  470. return E_OUTOFMEMORY;
  471. _dwFlags = dwFlags;
  472. return _pTokens->Initialize(nCodepage, pszMatch, pszExclude, BOOLIFY(dwFlags & FGIF_CASESENSITIVE));
  473. }
  474. STDMETHODIMP CFilterGrep::Reset()
  475. {
  476. if (_pTokens)
  477. _pTokens->Reset();
  478. _dwFlags = 0;
  479. return S_OK;
  480. }
  481. // converts non critical errors into S_FALSE, other return as FAILED(hr)
  482. HRESULT _MapFilterCriticalError(HRESULT hr)
  483. {
  484. switch (hr)
  485. {
  486. case FILTER_E_END_OF_CHUNKS:
  487. case FILTER_E_NO_MORE_TEXT:
  488. case FILTER_E_NO_MORE_VALUES:
  489. case FILTER_W_MONIKER_CLIPPED:
  490. case FILTER_E_NO_TEXT:
  491. case FILTER_E_NO_VALUES:
  492. case FILTER_E_EMBEDDING_UNAVAILABLE:
  493. case FILTER_E_LINK_UNAVAILABLE:
  494. hr = S_FALSE;
  495. break;
  496. }
  497. return hr;
  498. }
  499. // returns:
  500. // S_OK match
  501. // S_FALSE did not match
  502. STDMETHODIMP CFilterGrep::Grep(IShellFolder *psf, LPCITEMIDLIST pidl, LPCTSTR pszName)
  503. {
  504. HRESULT hr = S_FALSE;
  505. BOOL bHit = FALSE;
  506. ULONG ulFlags = IFILTER_FLAGS_OLE_PROPERTIES; // default to try to use pss
  507. ULONG dwThread = GetCurrentThreadId();
  508. if (NULL == _pTokens)
  509. return HRESULT_FROM_WIN32(ERROR_INVALID_DATA);
  510. if (_IsRestrictedFileType(pszName))
  511. return S_FALSE;
  512. // Grep the filename.
  513. if ((_dwFlags & FGIF_GREPFILENAME) && _pTokens->GrepW(pszName))
  514. {
  515. return S_OK;
  516. }
  517. IFilter *pFilter;
  518. if (SUCCEEDED(psf->BindToStorage(pidl, NULL, IID_PPV_ARG(IFilter, &pFilter))))
  519. {
  520. __try
  521. {
  522. hr = pFilter->Init(IFILTER_INIT_CANON_PARAGRAPHS |
  523. IFILTER_INIT_CANON_HYPHENS |
  524. IFILTER_INIT_CANON_SPACES |
  525. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES |
  526. IFILTER_INIT_INDEXING_ONLY,
  527. 0, 0, &ulFlags);
  528. }
  529. __except(EXCEPTION_EXECUTE_HANDLER)
  530. {
  531. hr = E_ABORT;
  532. }
  533. while (!bHit && (S_OK == hr))
  534. {
  535. STAT_CHUNK stat;
  536. __try
  537. {
  538. hr = pFilter->GetChunk(&stat);
  539. while ((S_OK == hr) && (0 == (stat.flags & (CHUNK_TEXT | CHUNK_VALUE))))
  540. {
  541. TraceMsg(TF_WARNING, "CFilterGrep::Grep encountered bad/unknown type for chunk; skipping.");
  542. hr = pFilter->GetChunk(&stat);
  543. }
  544. }
  545. __except(EXCEPTION_EXECUTE_HANDLER)
  546. {
  547. hr = E_ABORT;
  548. }
  549. hr = _MapFilterCriticalError(hr); // convert filter errors into S_FALSE
  550. if (S_OK == hr)
  551. {
  552. ULONG grfDescriminate = (_dwFlags & FGIF_BLANKETGREP);
  553. if (FGIF_BLANKETGREP == grfDescriminate ||
  554. (_IsEqualAttribute(stat.attribute, FMTID_Storage, PID_STG_CONTENTS) ?
  555. FGIF_GREPPROPERTIES == grfDescriminate : FGIF_GREPCONTENT == grfDescriminate))
  556. {
  557. if (((stat.flags & CHUNK_VALUE) && S_OK == _GrepValue(pFilter, &stat)) ||
  558. ((stat.flags & CHUNK_TEXT) && S_OK == _GrepText(pFilter, &stat, dwThread)))
  559. {
  560. bHit = TRUE;
  561. }
  562. }
  563. }
  564. }
  565. pFilter->Release();
  566. }
  567. // Grep OLE/NFF properties if appropriate
  568. if (SUCCEEDED(hr))
  569. {
  570. if (!bHit && (ulFlags & IFILTER_FLAGS_OLE_PROPERTIES) && (_dwFlags & FGIF_BLANKETGREP))
  571. {
  572. IPropertySetStorage *pps;
  573. if (SUCCEEDED(psf->BindToStorage(pidl, NULL, IID_PPV_ARG(IPropertySetStorage, &pps))))
  574. {
  575. hr = _GrepProperties(pps);
  576. bHit = (S_OK == hr);
  577. pps->Release();
  578. }
  579. }
  580. }
  581. if (SUCCEEDED(hr))
  582. hr = bHit ? S_OK : S_FALSE;
  583. return hr;
  584. }
  585. STDMETHODIMP CFilterGrep::_GrepValue(IFilter* pFilter, STAT_CHUNK* pstat)
  586. {
  587. PROPVARIANT* pvar = NULL;
  588. HRESULT hr;
  589. __try
  590. {
  591. hr = pFilter->GetValue(&pvar);
  592. }
  593. __except(EXCEPTION_EXECUTE_HANDLER)
  594. {
  595. hr = E_ABORT;
  596. }
  597. if (SUCCEEDED(hr))
  598. {
  599. hr = _PropVariantGrep(pvar, _pTokens) ? S_OK : S_FALSE;
  600. PropVariantClear(pvar);
  601. CoTaskMemFree(pvar);
  602. }
  603. return hr;
  604. }
  605. // Greps OLE/NFF properties.
  606. STDMETHODIMP CFilterGrep::_GrepProperties(IPropertySetStorage *pss)
  607. {
  608. BOOL bHit = FALSE;
  609. IEnumSTATPROPSETSTG* pEnumSet;
  610. if (SUCCEEDED(pss->Enum(&pEnumSet)))
  611. {
  612. STATPROPSETSTG statSet[8];
  613. DWORD cSets = 0;
  614. while (!bHit &&
  615. SUCCEEDED(pEnumSet->Next(ARRAYSIZE(statSet), statSet, &cSets)) && cSets)
  616. {
  617. for (UINT i = 0; !bHit && i < cSets; i++)
  618. {
  619. IPropertyStorage *pstg;
  620. if (SUCCEEDED(pss->Open(statSet[i].fmtid, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, &pstg)))
  621. {
  622. bHit = (S_OK == _GrepEnumPropStg(pstg));
  623. pstg->Release();
  624. }
  625. }
  626. }
  627. pEnumSet->Release();
  628. }
  629. return bHit ? S_OK : S_FALSE;
  630. }
  631. #define PROPGREPBUFSIZE 16
  632. // Reads and greps a block of properties described by a
  633. // caller-supplied array of PROPSPECs.
  634. STDMETHODIMP CFilterGrep::_GrepPropStg(IPropertyStorage *pstg, ULONG cspec, PROPSPEC rgspec[])
  635. {
  636. PROPVARIANT rgvar[PROPGREPBUFSIZE] = {0}, // stack buffer
  637. *prgvar = rgvar;
  638. BOOL bHit = FALSE;
  639. if (cspec > ARRAYSIZE(rgvar)) // stack buffer large enough?
  640. {
  641. if (NULL == (prgvar = new PROPVARIANT[cspec]))
  642. return E_OUTOFMEMORY;
  643. }
  644. // Read properties:
  645. HRESULT hr = pstg->ReadMultiple(cspec, rgspec, prgvar);
  646. if (SUCCEEDED(hr))
  647. {
  648. for (UINT i = 0; i < cspec; i++)
  649. {
  650. if (!bHit)
  651. bHit = _PropVariantGrep(prgvar + i, _pTokens);
  652. PropVariantClear(rgvar + i);
  653. }
  654. }
  655. if (prgvar != rgvar)
  656. delete [] prgvar;
  657. if (SUCCEEDED(hr))
  658. return bHit ? S_OK : S_FALSE;
  659. return hr;
  660. }
  661. // Enumerates and greps all properties in a property set
  662. STDMETHODIMP CFilterGrep::_GrepEnumPropStg(IPropertyStorage* pstg)
  663. {
  664. BOOL bHit = FALSE;
  665. IEnumSTATPROPSTG* pEnumStg;
  666. if (SUCCEEDED(pstg->Enum(&pEnumStg)))
  667. {
  668. STATPROPSTG statProp[PROPGREPBUFSIZE];
  669. DWORD cProps;
  670. while (!bHit &&
  671. SUCCEEDED(pEnumStg->Next(ARRAYSIZE(statProp), statProp, &cProps)) && cProps)
  672. {
  673. PROPSPEC rgspec[PROPGREPBUFSIZE] = {0};
  674. for (UINT i = 0; (i < cProps) && (i < ARRAYSIZE(rgspec)); i++)
  675. {
  676. rgspec[i].ulKind = PRSPEC_PROPID;
  677. rgspec[i].propid = statProp[i].propid;
  678. CoTaskMemFree(statProp[i].lpwstrName);
  679. }
  680. bHit = (S_OK == _GrepPropStg(pstg, cProps, rgspec));
  681. }
  682. pEnumStg->Release();
  683. }
  684. return bHit ? S_OK : S_FALSE;
  685. }
  686. // Reports whether the indicated unicode character is a
  687. // word-breaking character.
  688. inline BOOL _IsWordBreakCharW(IN LPWSTR pszBuf, IN ULONG ich)
  689. {
  690. WORD wChar;
  691. return GetStringTypeW(CT_CTYPE1, pszBuf + ich, 1, &wChar)
  692. && (wChar & (C1_SPACE|C1_PUNCT|C1_CNTRL|C1_BLANK));
  693. }
  694. // Finds the last word-breaking character.
  695. LPWSTR _FindLastWordBreakW(IN LPWSTR pszBuf, IN ULONG cch)
  696. {
  697. while(--cch)
  698. {
  699. if (_IsWordBreakCharW(pszBuf, cch))
  700. return pszBuf + cch;
  701. }
  702. return NULL;
  703. }
  704. // {c1243ca0-bf96-11cd-b579-08002b30bfeb}
  705. const CLSID CLSID_PlainTextFilter = {0xc1243ca0, 0xbf96, 0x11cd, {0xb5, 0x79, 0x08, 0x00, 0x2b, 0x30, 0xbf, 0xeb}};
  706. void _ReplaceNulsWithSpaces(LPWSTR pszBuf, UINT cch)
  707. {
  708. LPWSTR pszEnd = pszBuf + cch;
  709. while (pszBuf < pszEnd)
  710. {
  711. if (*pszBuf == 0)
  712. {
  713. *pszBuf = TEXT(' ');
  714. }
  715. pszBuf++;
  716. }
  717. }
  718. STDMETHODIMP CFilterGrep::_GrepText(IFilter* pFilter, STAT_CHUNK* pstat, DWORD dwThreadID)
  719. {
  720. ASSERT(pstat);
  721. LPWSTR pszBuf = NULL;
  722. ULONG cchBuf = pstat->cwcLenSource ?
  723. pstat->cwcLenSource : DEFAULT_GREPBUFFERSIZE;
  724. HRESULT hr = _GetThreadGrepBuffer(dwThreadID, cchBuf, &pszBuf);
  725. if (SUCCEEDED(hr))
  726. {
  727. LPWSTR pszFetch = pszBuf,
  728. pszTail = NULL;
  729. ULONG cchFetch = cchBuf,
  730. cchTail = 0;
  731. // Fetch first block of text
  732. __try
  733. {
  734. hr = pFilter->GetText(&cchFetch, pszFetch);
  735. }
  736. __except(EXCEPTION_EXECUTE_HANDLER)
  737. {
  738. hr = E_ABORT;
  739. }
  740. CLSID clsid = {0};
  741. IUnknown_GetClassID(pFilter, &clsid); // to workaround a bug in the text filter
  742. while (SUCCEEDED(hr) && cchFetch)
  743. {
  744. ASSERT((cchFetch + cchTail) <= cchBuf);
  745. _ReplaceNulsWithSpaces(pszBuf, cchFetch + cchTail); // Let us work over binary files too
  746. pszBuf[cchFetch + cchTail] = 0; // don't trust filter to zero-terminate buffer.
  747. // When you get the FILTER_S_LAST_TEXT, that's it, you'll get no more text, so treat the tail part as part of the text
  748. if (hr == FILTER_S_LAST_TEXT)
  749. {
  750. pszTail = NULL;
  751. cchTail = 0;
  752. }
  753. else if (CLSID_PlainTextFilter == clsid)
  754. {
  755. // CLSID_PlainText filter always returns S_OK, instead of FILTER_S_LAST_TEXT, this forces us to scan
  756. // the entire chunk now, AND (see below) to pass it off as a tail for scanning next chunk too.
  757. // pszTail and cchTail are set below.
  758. }
  759. else
  760. {
  761. pszTail = _FindLastWordBreakW(pszBuf, cchFetch + cchTail);
  762. if (pszTail)
  763. {
  764. // Break on word boundary and leave remainder (tail) for next iteration
  765. *pszTail = TEXT('\0');
  766. pszTail++;
  767. cchTail = lstrlenW(pszTail);
  768. }
  769. else
  770. {
  771. // Wow, big block, with no word break, search its entirety.
  772. // REVIEW: cross chunk items won't be found
  773. pszTail = NULL;
  774. cchTail = 0;
  775. }
  776. }
  777. // do the string scan
  778. if (_pTokens->GrepW(pszBuf))
  779. {
  780. *pszBuf = 0;
  781. return S_OK;
  782. }
  783. else if (FILTER_S_LAST_TEXT == hr)
  784. {
  785. *pszBuf = 0;
  786. return S_FALSE;
  787. }
  788. // prepare for next fetch...
  789. // If it is the plaintext filter, grab the tail anyway, even though we've tested it already
  790. // WinSE 25867
  791. if (CLSID_PlainTextFilter == clsid)
  792. {
  793. pszTail = _FindLastWordBreakW(pszBuf, cchFetch + cchTail);
  794. if (pszTail)
  795. {
  796. *pszTail = TEXT('\0');
  797. pszTail++;
  798. cchTail = lstrlenW(pszTail);
  799. }
  800. else
  801. {
  802. pszTail = NULL;
  803. cchTail = 0;
  804. }
  805. }
  806. // prepare for next fetch...
  807. *pszBuf = 0;
  808. pszFetch = pszBuf;
  809. cchFetch = cchBuf;
  810. // If there is a tail to deal with, move it to the front of
  811. // the buffer and prepare to have the next block of incoming text
  812. // appended to the tail..
  813. if (pszTail && cchTail)
  814. {
  815. MoveMemory(pszBuf, pszTail, cchTail * sizeof(*pszTail));
  816. pszBuf[cchTail] = 0;
  817. pszFetch += cchTail;
  818. cchFetch -= cchTail;
  819. }
  820. // Fetch next block of text.
  821. __try
  822. {
  823. hr = pFilter->GetText(&cchFetch, pszFetch);
  824. }
  825. __except(EXCEPTION_EXECUTE_HANDLER)
  826. {
  827. hr = E_ABORT;
  828. }
  829. }
  830. }
  831. if (SUCCEEDED(hr) || FILTER_E_NO_MORE_TEXT == hr || FILTER_E_NO_TEXT == hr)
  832. return S_FALSE;
  833. return hr;
  834. }
  835. // Returns a grep buffer of the requested size for the specified thread.
  836. STDMETHODIMP CFilterGrep::_GetThreadGrepBuffer(
  837. DWORD dwThreadID,
  838. ULONG cchNeed,
  839. LPWSTR* ppszBuf)
  840. {
  841. ASSERT(dwThreadID);
  842. ASSERT(cchNeed > 0);
  843. ASSERT(ppszBuf);
  844. HRESULT hr = E_FAIL;
  845. *ppszBuf = NULL;
  846. _EnterCritical();
  847. if (_hdpaGrepBuffers || (_hdpaGrepBuffers = DPA_Create(4)) != NULL)
  848. {
  849. CGrepBuffer *pgb, *pgbCached = NULL;
  850. for (int i = 0, cnt = DPA_GetPtrCount(_hdpaGrepBuffers); i < cnt; i++)
  851. {
  852. pgb = (CGrepBuffer*)DPA_FastGetPtr(_hdpaGrepBuffers, i);
  853. if (pgb->IsThread(dwThreadID))
  854. {
  855. pgbCached = pgb;
  856. hr = pgbCached->Alloc(cchNeed);
  857. if (S_OK == hr)
  858. *ppszBuf = pgbCached->Buffer();
  859. break;
  860. }
  861. }
  862. if (NULL == pgbCached) // not cached?
  863. {
  864. if ((pgb = new CGrepBuffer(dwThreadID)) != NULL)
  865. {
  866. hr = pgb->Alloc(cchNeed);
  867. if (S_OK == hr)
  868. {
  869. *ppszBuf = pgb->Buffer();
  870. DPA_AppendPtr(_hdpaGrepBuffers, pgb);
  871. }
  872. else
  873. delete pgb;
  874. }
  875. else
  876. hr = E_OUTOFMEMORY;
  877. }
  878. }
  879. else
  880. hr = E_OUTOFMEMORY;
  881. _LeaveCritical();
  882. return hr;
  883. }
  884. // Clears grep buffer for all threads
  885. STDMETHODIMP_(void) CFilterGrep::_ClearGrepBuffers()
  886. {
  887. _EnterCritical();
  888. if (_hdpaGrepBuffers)
  889. {
  890. while(DPA_GetPtrCount(_hdpaGrepBuffers))
  891. {
  892. CGrepBuffer* pgb = (CGrepBuffer*)DPA_DeletePtr(_hdpaGrepBuffers, 0);
  893. delete pgb;
  894. }
  895. DPA_Destroy(_hdpaGrepBuffers);
  896. _hdpaGrepBuffers = NULL;
  897. }
  898. _LeaveCritical();
  899. }
  900. // Reports whether the file type is restricted from full-text grep.
  901. STDMETHODIMP_(BOOL) CFilterGrep::_IsRestrictedFileType(LPCWSTR pwszFile)
  902. {
  903. return FALSE;
  904. }
  905. STDMETHODIMP CFilterGrep::GetMatchTokens(OUT LPWSTR pszTokens, UINT cchTokens) const
  906. {
  907. HRESULT hr = _pTokens ? _pTokens->GetMatchTokens(pszTokens, cchTokens) : S_FALSE;
  908. if (S_OK != hr && pszTokens)
  909. *pszTokens = 0;
  910. return hr;
  911. }
  912. STDMETHODIMP CFilterGrep::GetExcludeTokens(OUT LPWSTR pszTokens, UINT cchTokens) const
  913. {
  914. HRESULT hr = _pTokens ? _pTokens->GetExcludeTokens(pszTokens, cchTokens) : S_FALSE;
  915. if (S_OK != hr && pszTokens)
  916. *pszTokens = 0;
  917. return hr;
  918. }