Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1194 lines
32 KiB

  1. // 10/12/99 scotthan created
  2. #include "shellprv.h"
  3. #include "filtgrep.h"
  4. #include <ntquery.h>
  5. #include <filterr.h>
  6. class CGrepTokens // maintains an index of unicode and ansi grep tokens.
  7. {
  8. public:
  9. STDMETHODIMP Initialize(UINT nCodepage, LPCWSTR pwszMatch, LPCWSTR pwszExclude, BOOL bCaseSensitive);
  10. STDMETHODIMP_(void) Reset();
  11. STDMETHODIMP_(BOOL) GrepW(LPCWSTR pwszText);
  12. STDMETHODIMP_(BOOL) GrepA(LPCSTR pwszText);
  13. STDMETHODIMP GetCodePage(UINT* pnCodepage) const;
  14. STDMETHODIMP GetMatchTokens(OUT LPWSTR pszTokens, UINT cchTokens) const;
  15. STDMETHODIMP GetExcludeTokens(OUT LPWSTR pszTokens, UINT cchTokens) const;
  16. private:
  17. UINT _nCodepage;
  18. LPWSTR _pszMatchW, _pszExcludeW; // raw strings, unicode
  19. LPSTR _pszMatchA, _pszExcludeA; // raw strings, ansi
  20. LPCWSTR *_rgpszMatchW, *_rgpszExcludeW; // token index, unicode
  21. LPCSTR *_rgpszMatchA, *_rgpszExcludeA; // token index, ansi
  22. LONG _cMatch, _cExclude; // token counts
  23. LPWSTR (__stdcall * _pfnStrStrW)(LPCWSTR, LPCWSTR);
  24. LPSTR (__stdcall * _pfnStrStrA)(LPCSTR, LPCSTR);
  25. public:
  26. // Ctor, Dtor
  27. CGrepTokens()
  28. : _nCodepage(0), _cMatch(0), _cExclude(0), _pfnStrStrW(StrStrIW), _pfnStrStrA(StrStrIA),
  29. _pszMatchW(NULL), _pszExcludeW(NULL), _rgpszMatchW(NULL), _rgpszExcludeW(NULL),
  30. _pszMatchA(NULL), _pszExcludeA(NULL), _rgpszMatchA(NULL), _rgpszExcludeA(NULL) {}
  31. ~CGrepTokens() { Reset(); }
  32. };
  33. class CGrepBuffer // auxilliary class: per-thread grep buffer
  34. {
  35. public:
  36. CGrepBuffer(ULONG dwThreadID) : _dwThreadID(dwThreadID), _pszBuf(NULL), _cchBuf(0) {}
  37. virtual ~CGrepBuffer() {delete [] _pszBuf;}
  38. STDMETHODIMP Alloc(ULONG cch);
  39. STDMETHODIMP_(BOOL) IsThread(ULONG dwThread) const {return dwThread == _dwThreadID;}
  40. STDMETHODIMP_(LPWSTR) Buffer() { return _pszBuf; }
  41. #define DEFAULT_GREPBUFFERSIZE 0x00FF // +1 = 1 page.
  42. private:
  43. LPWSTR _pszBuf;
  44. ULONG _cchBuf;
  45. ULONG _dwThreadID;
  46. };
  47. // Makes a heap copy of a widechar string
  48. LPWSTR _AllocAndCopyString(LPCWSTR pszSrc, UINT cch = -1)
  49. {
  50. if (pszSrc)
  51. {
  52. if ((int)cch < 0) // must cast to "int" since cch is a UINT
  53. cch = lstrlenW(pszSrc);
  54. LPWSTR pszRet = new WCHAR[cch + 1];
  55. if (pszRet)
  56. {
  57. CopyMemory(pszRet, pszSrc, sizeof(*pszSrc) * cch);
  58. pszRet[cch] = 0;
  59. return pszRet;
  60. }
  61. }
  62. return NULL;
  63. }
  64. // Makes an ansi copy of a widechar string
  65. LPSTR _AllocAndCopyAnsiString(UINT nCodepage, LPCWSTR pszSrc, UINT cch = -1)
  66. {
  67. if (pszSrc)
  68. {
  69. if ((int)cch < 0) // must cast to "int" since cch is a UINT
  70. cch = lstrlenW(pszSrc);
  71. int cchBuf = WideCharToMultiByte(nCodepage, 0, pszSrc, cch, NULL, 0, NULL, NULL);
  72. LPSTR pszRet = new CHAR[cchBuf+1];
  73. if (pszRet)
  74. {
  75. int cchRet = WideCharToMultiByte(nCodepage, 0, pszSrc, cch, pszRet, cchBuf, NULL, NULL);
  76. pszRet[cchRet] = 0;
  77. return pszRet;
  78. }
  79. }
  80. return NULL;
  81. }
  82. // CGrepBuffer impl
  83. STDMETHODIMP CGrepBuffer::Alloc(ULONG cch)
  84. {
  85. LPWSTR pszBuf = NULL;
  86. if (cch)
  87. {
  88. if (_pszBuf && _cchBuf >= cch)
  89. return S_OK;
  90. pszBuf = new WCHAR[cch+1];
  91. if (NULL == pszBuf)
  92. return E_OUTOFMEMORY;
  93. *pszBuf = 0;
  94. }
  95. delete [] _pszBuf;
  96. _pszBuf = pszBuf;
  97. _cchBuf = cch;
  98. return _pszBuf != NULL ? S_OK : S_FALSE ;
  99. }
  100. // CGrepTokens impl
  101. // Counts the number of characters in a string containing NULL-delimited tokens ("foo\0bloke\0TheEnd\0\0")
  102. LONG _GetTokenListLength(LPCWSTR pszList, LONG* pcTokens = NULL)
  103. {
  104. LONG cchRet = 0;
  105. if (pcTokens) *pcTokens = 0;
  106. if (pszList && *pszList)
  107. {
  108. LPCWSTR pszToken, pszPrev;
  109. int i = 0;
  110. for (pszToken = pszPrev = pszList;
  111. pszToken && *pszToken;)
  112. {
  113. if (pcTokens)
  114. (*pcTokens)++;
  115. pszToken += lstrlenW(pszToken) + 1,
  116. cchRet += (DWORD)(pszToken - pszPrev) ;
  117. pszPrev = pszToken;
  118. }
  119. }
  120. return cchRet;
  121. }
  122. // wide version: Counts and/or indexes NULL-delimited string tokens ("foo\0bloke\0TheEnd\0\0")
  123. LONG _IndexTokensW(LPCWSTR pszList, LPCWSTR* prgszTokens = NULL)
  124. {
  125. LONG cRet = 0;
  126. if (pszList && *pszList)
  127. {
  128. LPCWSTR psz = pszList;
  129. int i = 0;
  130. for (; psz && *psz; psz += (lstrlenW(psz) + 1), i++)
  131. {
  132. if (prgszTokens)
  133. prgszTokens[i] = psz;
  134. cRet++;
  135. }
  136. }
  137. return cRet;
  138. }
  139. // ansi version: Counts and/or indexes NULL-delimited string tokens ("foo\0bloke\0TheEnd\0\0")
  140. LONG _IndexTokensA(LPCSTR pszList, LPCSTR* prgszTokens = NULL)
  141. {
  142. LONG cRet = 0;
  143. if (pszList && *pszList)
  144. {
  145. LPCSTR psz = pszList;
  146. int i = 0;
  147. for (; psz && *psz; psz += (lstrlenA(psz) + 1), i++)
  148. {
  149. if (prgszTokens)
  150. prgszTokens[i] = psz;
  151. cRet++;
  152. }
  153. }
  154. return cRet;
  155. }
  156. // wide version: Allocates a string token index and indexes a string of NULL-delimited tokens.
  157. STDMETHODIMP _AllocAndIndexTokensW(LONG cTokens, LPCWSTR pszList, LPCWSTR** pprgszTokens)
  158. {
  159. if (cTokens)
  160. {
  161. if (NULL == (*pprgszTokens = new LPCWSTR[cTokens]))
  162. return E_OUTOFMEMORY;
  163. if (cTokens != _IndexTokensW(pszList, *pprgszTokens))
  164. {
  165. delete [] (*pprgszTokens);
  166. *pprgszTokens = NULL;
  167. return E_FAIL;
  168. }
  169. }
  170. return S_OK;
  171. }
  172. // ansi version: Allocates a string token index and indexes a string of NULL-delimited tokens.
  173. STDMETHODIMP _AllocAndIndexTokensA(LONG cTokens, LPCSTR pszList, LPCSTR** pprgszTokens)
  174. {
  175. if (cTokens)
  176. {
  177. if (NULL == (*pprgszTokens = new LPCSTR[cTokens]))
  178. return E_OUTOFMEMORY;
  179. if (cTokens != _IndexTokensA(pszList, *pprgszTokens))
  180. {
  181. delete [] (*pprgszTokens);
  182. *pprgszTokens = NULL;
  183. return E_FAIL;
  184. }
  185. }
  186. return S_OK;
  187. }
  188. // Frees unicode and ansi token lists and corresponding indices.
  189. void _FreeUniAnsiTokenList(
  190. OUT LPWSTR* ppszListW,
  191. OUT LPSTR* ppszListA,
  192. OUT LPCWSTR** pprgTokensW,
  193. OUT LPCSTR** pprgTokensA)
  194. {
  195. delete [] *ppszListW; *ppszListW = NULL;
  196. delete [] *ppszListA; *ppszListA = NULL;
  197. delete [] *pprgTokensW; *pprgTokensW = NULL;
  198. delete [] *pprgTokensA; *pprgTokensA = NULL;
  199. }
  200. // Allocates unicode and ansi token lists and corresponding indices.
  201. STDMETHODIMP _AllocUniAnsiTokenList(
  202. UINT nCodepage,
  203. LPCWSTR pszList,
  204. OUT LPWSTR* ppszListW,
  205. OUT LPSTR* ppszListA,
  206. OUT LONG* pcTokens,
  207. OUT LPCWSTR** pprgTokensW,
  208. OUT LPCSTR** pprgTokensA)
  209. {
  210. HRESULT hr = S_FALSE;
  211. LONG cTokens = 0;
  212. UINT cch = _GetTokenListLength(pszList, &cTokens);
  213. *ppszListW = NULL;
  214. *ppszListA = NULL;
  215. *pprgTokensW = NULL;
  216. *pprgTokensA = NULL;
  217. *pcTokens = 0;
  218. if (cTokens)
  219. {
  220. hr = E_OUTOFMEMORY;
  221. if (NULL == (*ppszListW = _AllocAndCopyString(pszList, cch)))
  222. goto failure_exit;
  223. if (NULL == (*ppszListA = _AllocAndCopyAnsiString(nCodepage, pszList, cch)))
  224. goto failure_exit;
  225. if (FAILED((hr = _AllocAndIndexTokensW(cTokens, *ppszListW, pprgTokensW))))
  226. goto failure_exit;
  227. if (FAILED((hr = _AllocAndIndexTokensA(cTokens, *ppszListA, pprgTokensA))))
  228. goto failure_exit;
  229. *pcTokens = cTokens;
  230. hr = S_OK;
  231. }
  232. return hr;
  233. failure_exit:
  234. _FreeUniAnsiTokenList(ppszListW, ppszListA, pprgTokensW, pprgTokensA);
  235. return hr;
  236. }
  237. STDMETHODIMP CGrepTokens::Initialize(UINT nCodepage, LPCWSTR pszMatch, LPCWSTR pszExclude, BOOL bCaseSensitive)
  238. {
  239. HRESULT hr = E_INVALIDARG;
  240. Reset();
  241. BOOL bMatchString = (pszMatch && *pszMatch);
  242. BOOL bExcludeString = (pszExclude && *pszExclude);
  243. if (!(bMatchString || bExcludeString))
  244. return E_INVALIDARG;
  245. _nCodepage = nCodepage;
  246. if (bCaseSensitive)
  247. {
  248. _pfnStrStrW = StrStrW;
  249. _pfnStrStrA = StrStrA;
  250. }
  251. else
  252. {
  253. _pfnStrStrW = StrStrIW;
  254. _pfnStrStrA = StrStrIA;
  255. }
  256. if (bMatchString)
  257. {
  258. if (FAILED((hr = _AllocUniAnsiTokenList(nCodepage, pszMatch,
  259. &_pszMatchW, &_pszMatchA, &_cMatch, &_rgpszMatchW, &_rgpszMatchA))))
  260. {
  261. return hr;
  262. }
  263. }
  264. if (bExcludeString)
  265. {
  266. if (FAILED((hr = _AllocUniAnsiTokenList(nCodepage, pszExclude,
  267. &_pszExcludeW, &_pszExcludeA, &_cExclude, &_rgpszExcludeW, &_rgpszExcludeA))))
  268. {
  269. return hr;
  270. }
  271. }
  272. return hr;
  273. }
  274. STDMETHODIMP CGrepTokens::GetCodePage(UINT* pnCodepage) const
  275. {
  276. HRESULT hr = _nCodepage ? S_OK : S_FALSE;
  277. if (pnCodepage)
  278. *pnCodepage = _nCodepage;
  279. return hr;
  280. }
  281. // S_OK we have some match tokens, S_FALSE otherwise
  282. STDMETHODIMP CGrepTokens::GetMatchTokens(OUT LPWSTR pszMatch, UINT cchMatch) const
  283. {
  284. HRESULT hr = (_pszMatchW && *_pszMatchW) ? S_OK : S_FALSE;
  285. if (pszMatch)
  286. lstrcpynW(pszMatch, _pszMatchW ? _pszMatchW : L"", cchMatch);
  287. return hr;
  288. }
  289. // S_OK we have some exclude tokens, S_FALSE otherwise
  290. STDMETHODIMP CGrepTokens::GetExcludeTokens(OUT LPWSTR pszExclude, UINT cchExclude) const
  291. {
  292. HRESULT hr = (_pszExcludeW && *_pszExcludeW) ? S_OK : S_FALSE;
  293. if (pszExclude)
  294. lstrcpynW(pszExclude, _pszExcludeW ? _pszExcludeW : L"", cchExclude);
  295. return hr;
  296. }
  297. void CGrepTokens::Reset()
  298. {
  299. _FreeUniAnsiTokenList(&_pszMatchW, &_pszMatchA, &_rgpszMatchW, &_rgpszMatchA);
  300. _FreeUniAnsiTokenList(&_pszExcludeW, &_pszExcludeA, &_rgpszExcludeW, &_rgpszExcludeA);
  301. _cMatch = _cExclude = 0;
  302. _nCodepage = 0;
  303. }
  304. STDMETHODIMP_(BOOL) CGrepTokens::GrepW(LPCWSTR pszText)
  305. {
  306. BOOL bMatch = FALSE;
  307. if (pszText)
  308. {
  309. BOOL bExclude = FALSE;
  310. for (int i = 0; i < _cMatch; i++)
  311. {
  312. if (_pfnStrStrW(pszText, _rgpszMatchW[i]))
  313. {
  314. bMatch = TRUE;
  315. break;
  316. }
  317. }
  318. for (i = 0; i < _cExclude; i++)
  319. {
  320. if (_pfnStrStrW(pszText, _rgpszExcludeW[i]))
  321. {
  322. bExclude = TRUE;
  323. break;
  324. }
  325. }
  326. if (_cMatch && _cExclude)
  327. return bMatch || !_cExclude;
  328. if (_cExclude)
  329. return !bExclude;
  330. }
  331. return bMatch;
  332. }
  333. STDMETHODIMP_(BOOL) CGrepTokens::GrepA(LPCSTR pszText)
  334. {
  335. BOOL bMatch = FALSE;
  336. if (pszText)
  337. {
  338. BOOL bExclude = FALSE;
  339. for (int i = 0; i < _cMatch; i++)
  340. {
  341. if (_pfnStrStrA(pszText, _rgpszMatchA[i]))
  342. {
  343. bMatch = TRUE;
  344. break;
  345. }
  346. }
  347. for (i = 0; i < _cExclude; i++)
  348. {
  349. if (_pfnStrStrA(pszText, _rgpszExcludeA[i]))
  350. {
  351. bExclude = TRUE;
  352. break;
  353. }
  354. }
  355. if (_cMatch && _cExclude)
  356. return bMatch || !_cExclude;
  357. if (_cExclude)
  358. return !bExclude;
  359. }
  360. return bMatch;
  361. }
  362. inline STDMETHODIMP_(BOOL) _IsEqualAttribute(const FULLPROPSPEC& fps, REFFMTID fmtid, PROPID propid)
  363. {
  364. return IsEqualGUID(fmtid, fps.guidPropSet) &&
  365. PRSPEC_PROPID == fps.psProperty.ulKind &&
  366. propid == fps.psProperty.propid;
  367. }
  368. STDMETHODIMP_(BOOL) _PropVariantGrep(PROPVARIANT* pvar, CGrepTokens* pTokens)
  369. {
  370. BOOL bRet = FALSE;
  371. switch(pvar->vt)
  372. {
  373. case VT_LPWSTR:
  374. bRet = pTokens->GrepW(pvar->pwszVal);
  375. break;
  376. case VT_BSTR:
  377. bRet = pTokens->GrepW(pvar->bstrVal);
  378. break;
  379. case VT_LPSTR:
  380. bRet = pTokens->GrepA(pvar->pszVal);
  381. break;
  382. case VT_VECTOR|VT_LPWSTR:
  383. {
  384. for (UINT i = 0; !bRet && i < pvar->calpwstr.cElems; i++)
  385. bRet = pTokens->GrepW(pvar->calpwstr.pElems[i]);
  386. break;
  387. }
  388. case VT_VECTOR|VT_BSTR:
  389. {
  390. for (UINT i = 0; !bRet && i < pvar->cabstr.cElems; i++)
  391. bRet = pTokens->GrepW(pvar->cabstr.pElems[i]);
  392. break;
  393. }
  394. case VT_VECTOR|VT_LPSTR:
  395. {
  396. for (UINT i = 0; !bRet && i < pvar->calpstr.cElems; i++)
  397. bRet = pTokens->GrepA(pvar->calpstr.pElems[i]);
  398. break;
  399. }
  400. case VT_VECTOR|VT_VARIANT:
  401. {
  402. for (UINT i = 0; !bRet && i < pvar->capropvar.cElems; i++)
  403. bRet = _PropVariantGrep(pvar->capropvar.pElems + i, pTokens);
  404. break;
  405. }
  406. case VT_BSTR|VT_ARRAY:
  407. {
  408. // Only grep 1-dimensional arrays.
  409. UINT cDims = SafeArrayGetDim(pvar->parray);
  410. if (cDims == 1)
  411. {
  412. LONG lBound, uBound;
  413. if (SUCCEEDED(SafeArrayGetLBound(pvar->parray, 1, &lBound)) &&
  414. SUCCEEDED(SafeArrayGetUBound(pvar->parray, 1, &uBound)) &&
  415. uBound > lBound)
  416. {
  417. BSTR *rgpbstr;
  418. if (SUCCEEDED(SafeArrayAccessData(pvar->parray, (void **)&rgpbstr)))
  419. {
  420. for (int i = 0; !bRet && i <= (uBound - lBound); i++)
  421. {
  422. bRet = pTokens->GrepW(rgpbstr[i]);
  423. }
  424. SafeArrayUnaccessData(pvar->parray);
  425. }
  426. }
  427. }
  428. else if (cDims > 1)
  429. {
  430. ASSERT(FALSE); // we didn't expect > 1 dimension on bstr arrays!
  431. }
  432. break;
  433. }
  434. }
  435. return bRet;
  436. }
  437. // Retrieves grep restriction settings
  438. STDMETHODIMP_(BOOL) _FetchRestrictionSettings(LPCWSTR pwszVal, LPWSTR* ppwszSettings, BOOL bRefresh)
  439. {
  440. ASSERT(ppwszSettings);
  441. if (bRefresh)
  442. {
  443. delete [] *ppwszSettings;
  444. *ppwszSettings = NULL;
  445. }
  446. if (NULL == *ppwszSettings)
  447. {
  448. HKEY hkeyPolicy = NULL;
  449. if (RegOpenKeyExW(HKEY_CURRENT_USER, L"Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\Search\\ExcludedFileTypes", 0,
  450. KEY_READ, &hkeyPolicy) == ERROR_SUCCESS)
  451. {
  452. DWORD dwType, cbData = 0;
  453. if (RegQueryValueExW(hkeyPolicy, pwszVal, NULL, &dwType, NULL, &cbData) == ERROR_SUCCESS &&
  454. REG_MULTI_SZ == dwType)
  455. {
  456. if ((*ppwszSettings = new WCHAR[(cbData/sizeof(WCHAR))+ 1]) != NULL)
  457. {
  458. if (RegQueryValueExW(hkeyPolicy, pwszVal, NULL,
  459. &dwType, (LPBYTE)*ppwszSettings, &cbData) != ERROR_SUCCESS)
  460. {
  461. *ppwszSettings = 0;
  462. }
  463. }
  464. }
  465. RegCloseKey(hkeyPolicy);
  466. }
  467. if (NULL == *ppwszSettings) // we found no restriction key or value.
  468. {
  469. if ((*ppwszSettings = new WCHAR[1]))
  470. **ppwszSettings = 0;
  471. }
  472. }
  473. return *ppwszSettings != NULL;
  474. }
  475. // Scans restriction entries for a match against the specified filename extension
  476. STDMETHODIMP_(BOOL) _ScanRestrictionSettings(LPCWSTR pwszSettings, LPCWSTR pwszExt)
  477. {
  478. ASSERT(pwszSettings);
  479. ASSERT(pwszExt);
  480. for (LPCWSTR psz = pwszSettings; *psz; psz += (lstrlenW(psz) + 1))
  481. {
  482. if (0 == StrCmpIW(psz, pwszExt))
  483. return TRUE;
  484. }
  485. return FALSE;
  486. }
  487. CFilterGrep::CFilterGrep()
  488. : _hdpaGrepBuffers(NULL),
  489. _pTokens(NULL),
  490. _dwFlags(0),
  491. _pwszContentRestricted(NULL),
  492. _pwszPropertiesRestricted(NULL)
  493. {
  494. InitializeCriticalSection(&_critsec);
  495. }
  496. CFilterGrep::~CFilterGrep()
  497. {
  498. _ClearGrepBuffers();
  499. delete [] _pwszContentRestricted;
  500. delete [] _pwszPropertiesRestricted;
  501. delete _pTokens;
  502. DeleteCriticalSection(&_critsec);
  503. }
  504. STDMETHODIMP CFilterGrep::Initialize(UINT nCodepage, LPCWSTR pszMatch, LPCWSTR pszExclude, DWORD dwFlags)
  505. {
  506. Reset();
  507. if ((0 == (dwFlags & (FGIF_BLANKETGREP|FGIF_GREPFILENAME))) ||
  508. !((pszMatch && *pszMatch) || (pszExclude && *pszExclude)))
  509. return E_INVALIDARG;
  510. if (!(_pTokens || (_pTokens = new CGrepTokens) != NULL))
  511. return E_OUTOFMEMORY;
  512. _dwFlags = dwFlags;
  513. return _pTokens->Initialize(nCodepage, pszMatch, pszExclude, BOOLIFY(dwFlags & FGIF_CASESENSITIVE));
  514. }
  515. STDMETHODIMP CFilterGrep::Reset()
  516. {
  517. if (_pTokens)
  518. _pTokens->Reset();
  519. _dwFlags = 0;
  520. return S_OK;
  521. }
  522. // converts non critical errors into S_FALSE, other return as FAILED(hr)
  523. HRESULT _MapFilterCriticalError(HRESULT hr)
  524. {
  525. switch (hr)
  526. {
  527. case FILTER_E_END_OF_CHUNKS:
  528. case FILTER_E_NO_MORE_TEXT:
  529. case FILTER_E_NO_MORE_VALUES:
  530. case FILTER_W_MONIKER_CLIPPED:
  531. case FILTER_E_NO_TEXT:
  532. case FILTER_E_NO_VALUES:
  533. case FILTER_E_EMBEDDING_UNAVAILABLE:
  534. case FILTER_E_LINK_UNAVAILABLE:
  535. hr = S_FALSE;
  536. break;
  537. }
  538. return hr;
  539. }
  540. // returns:
  541. // S_OK match
  542. // S_FALSE did not match
  543. STDMETHODIMP CFilterGrep::Grep(IShellFolder *psf, LPCITEMIDLIST pidl, LPCTSTR pszName)
  544. {
  545. HRESULT hr = S_FALSE;
  546. BOOL bHit = FALSE;
  547. ULONG ulFlags = IFILTER_FLAGS_OLE_PROPERTIES; // default to try to use pss
  548. ULONG dwThread = GetCurrentThreadId();
  549. if (NULL == _pTokens)
  550. return HRESULT_FROM_WIN32(ERROR_INVALID_DATA);
  551. if (_IsRestrictedFileType(pszName))
  552. return S_FALSE;
  553. // Grep the filename.
  554. if ((_dwFlags & FGIF_GREPFILENAME) && _pTokens->GrepW(pszName))
  555. {
  556. return S_OK;
  557. }
  558. IFilter *pFilter;
  559. if (SUCCEEDED(psf->BindToStorage(pidl, NULL, IID_PPV_ARG(IFilter, &pFilter))))
  560. {
  561. __try
  562. {
  563. hr = pFilter->Init(IFILTER_INIT_CANON_PARAGRAPHS |
  564. IFILTER_INIT_CANON_HYPHENS |
  565. IFILTER_INIT_CANON_SPACES |
  566. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES |
  567. IFILTER_INIT_INDEXING_ONLY,
  568. 0, 0, &ulFlags);
  569. }
  570. __except(EXCEPTION_EXECUTE_HANDLER)
  571. {
  572. hr = E_ABORT;
  573. }
  574. while (!bHit && (S_OK == hr))
  575. {
  576. STAT_CHUNK stat;
  577. __try
  578. {
  579. hr = pFilter->GetChunk(&stat);
  580. while ((S_OK == hr) && (0 == (stat.flags & (CHUNK_TEXT | CHUNK_VALUE))))
  581. {
  582. TraceMsg(TF_WARNING, "CFilterGrep::Grep encountered bad/unknown type for chunk; skipping.");
  583. hr = pFilter->GetChunk(&stat);
  584. }
  585. }
  586. __except(EXCEPTION_EXECUTE_HANDLER)
  587. {
  588. hr = E_ABORT;
  589. }
  590. hr = _MapFilterCriticalError(hr); // convert filter errors into S_FALSE
  591. if (S_OK == hr)
  592. {
  593. ULONG grfDescriminate = (_dwFlags & FGIF_BLANKETGREP);
  594. if (FGIF_BLANKETGREP == grfDescriminate ||
  595. (_IsEqualAttribute(stat.attribute, FMTID_Storage, PID_STG_CONTENTS) ?
  596. FGIF_GREPPROPERTIES == grfDescriminate : FGIF_GREPCONTENT == grfDescriminate))
  597. {
  598. if (((stat.flags & CHUNK_VALUE) && S_OK == _GrepValue(pFilter, &stat)) ||
  599. ((stat.flags & CHUNK_TEXT) && S_OK == _GrepText(pFilter, &stat, dwThread)))
  600. {
  601. bHit = TRUE;
  602. }
  603. }
  604. }
  605. }
  606. pFilter->Release();
  607. }
  608. // Grep OLE/NFF properties if appropriate
  609. if (SUCCEEDED(hr))
  610. {
  611. if (!bHit && (ulFlags & IFILTER_FLAGS_OLE_PROPERTIES) && (_dwFlags & FGIF_BLANKETGREP))
  612. {
  613. IPropertySetStorage *pps;
  614. if (SUCCEEDED(psf->BindToStorage(pidl, NULL, IID_PPV_ARG(IPropertySetStorage, &pps))))
  615. {
  616. hr = _GrepProperties(pps);
  617. bHit = (S_OK == hr);
  618. pps->Release();
  619. }
  620. }
  621. }
  622. if (SUCCEEDED(hr))
  623. hr = bHit ? S_OK : S_FALSE;
  624. return hr;
  625. }
  626. STDMETHODIMP CFilterGrep::_GrepValue(IFilter* pFilter, STAT_CHUNK* pstat)
  627. {
  628. PROPVARIANT* pvar = NULL;
  629. HRESULT hr;
  630. __try
  631. {
  632. hr = pFilter->GetValue(&pvar);
  633. }
  634. __except(EXCEPTION_EXECUTE_HANDLER)
  635. {
  636. hr = E_ABORT;
  637. }
  638. if (SUCCEEDED(hr))
  639. {
  640. hr = _PropVariantGrep(pvar, _pTokens) ? S_OK : S_FALSE;
  641. PropVariantClear(pvar);
  642. CoTaskMemFree(pvar);
  643. }
  644. return hr;
  645. }
  646. // Greps OLE/NFF properties.
  647. STDMETHODIMP CFilterGrep::_GrepProperties(IPropertySetStorage *pss)
  648. {
  649. BOOL bHit = FALSE;
  650. IEnumSTATPROPSETSTG* pEnumSet;
  651. if (SUCCEEDED(pss->Enum(&pEnumSet)))
  652. {
  653. STATPROPSETSTG statSet[8];
  654. DWORD cSets = 0;
  655. while (!bHit &&
  656. SUCCEEDED(pEnumSet->Next(ARRAYSIZE(statSet), statSet, &cSets)) && cSets)
  657. {
  658. for (UINT i = 0; !bHit && i < cSets; i++)
  659. {
  660. IPropertyStorage *pstg;
  661. if (SUCCEEDED(pss->Open(statSet[i].fmtid, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, &pstg)))
  662. {
  663. bHit = (S_OK == _GrepEnumPropStg(pstg));
  664. pstg->Release();
  665. }
  666. }
  667. }
  668. pEnumSet->Release();
  669. }
  670. return bHit ? S_OK : S_FALSE;
  671. }
  672. #define PROPGREPBUFSIZE 16
  673. // Reads and greps a block of properties described by a
  674. // caller-supplied array of PROPSPECs.
  675. STDMETHODIMP CFilterGrep::_GrepPropStg(IPropertyStorage *pstg, ULONG cspec, PROPSPEC rgspec[])
  676. {
  677. PROPVARIANT rgvar[PROPGREPBUFSIZE] = {0}, // stack buffer
  678. *prgvar = rgvar;
  679. BOOL bHit = FALSE;
  680. if (cspec > ARRAYSIZE(rgvar)) // stack buffer large enough?
  681. {
  682. if (NULL == (prgvar = new PROPVARIANT[cspec]))
  683. return E_OUTOFMEMORY;
  684. ZeroMemory(prgvar, sizeof(PROPVARIANT) * cspec);
  685. }
  686. // Read properties:
  687. HRESULT hr = pstg->ReadMultiple(cspec, rgspec, prgvar);
  688. if (SUCCEEDED(hr))
  689. {
  690. for (UINT i = 0; i < cspec; i++)
  691. {
  692. if (!bHit)
  693. bHit = _PropVariantGrep(prgvar + i, _pTokens);
  694. PropVariantClear(rgvar + i);
  695. }
  696. }
  697. if (prgvar != rgvar)
  698. delete [] prgvar;
  699. if (SUCCEEDED(hr))
  700. return bHit ? S_OK : S_FALSE;
  701. return hr;
  702. }
  703. // Enumerates and greps all properties in a property set
  704. STDMETHODIMP CFilterGrep::_GrepEnumPropStg(IPropertyStorage* pstg)
  705. {
  706. BOOL bHit = FALSE;
  707. IEnumSTATPROPSTG* pEnumStg;
  708. if (SUCCEEDED(pstg->Enum(&pEnumStg)))
  709. {
  710. STATPROPSTG statProp[PROPGREPBUFSIZE];
  711. DWORD cProps;
  712. while (!bHit &&
  713. SUCCEEDED(pEnumStg->Next(ARRAYSIZE(statProp), statProp, &cProps)) && cProps)
  714. {
  715. PROPSPEC rgspec[PROPGREPBUFSIZE] = {0};
  716. for (UINT i = 0; i < cProps; i++)
  717. {
  718. rgspec[i].ulKind = PRSPEC_PROPID;
  719. rgspec[i].propid = statProp[i].propid;
  720. CoTaskMemFree(statProp[i].lpwstrName);
  721. }
  722. bHit = (S_OK == _GrepPropStg(pstg, cProps, rgspec));
  723. }
  724. pEnumStg->Release();
  725. }
  726. return bHit ? S_OK : S_FALSE;
  727. }
  728. // Reports whether the indicated unicode character is a
  729. // word-breaking character.
  730. inline BOOL _IsWordBreakCharW(IN LPWSTR pszBuf, IN ULONG ich)
  731. {
  732. WORD wChar;
  733. return GetStringTypeW(CT_CTYPE1, pszBuf + ich, 1, &wChar)
  734. && (wChar & (C1_SPACE|C1_PUNCT|C1_CNTRL|C1_BLANK));
  735. }
  736. // Finds the last word-breaking character.
  737. LPWSTR _FindLastWordBreakW(IN LPWSTR pszBuf, IN ULONG cch)
  738. {
  739. while(--cch)
  740. {
  741. if (_IsWordBreakCharW(pszBuf, cch))
  742. return pszBuf + cch;
  743. }
  744. return NULL;
  745. }
  746. // {c1243ca0-bf96-11cd-b579-08002b30bfeb}
  747. const CLSID CLSID_PlainTextFilter = {0xc1243ca0, 0xbf96, 0x11cd, {0xb5, 0x79, 0x08, 0x00, 0x2b, 0x30, 0xbf, 0xeb}};
  748. STDMETHODIMP CFilterGrep::_GrepText(IFilter* pFilter, STAT_CHUNK* pstat, DWORD dwThreadID)
  749. {
  750. ASSERT(pstat);
  751. LPWSTR pszBuf = NULL;
  752. ULONG cchBuf = pstat->cwcLenSource ?
  753. pstat->cwcLenSource : DEFAULT_GREPBUFFERSIZE;
  754. HRESULT hr = _GetThreadGrepBuffer(dwThreadID, cchBuf, &pszBuf);
  755. if (SUCCEEDED(hr))
  756. {
  757. LPWSTR pszFetch = pszBuf,
  758. pszTail = NULL;
  759. ULONG cchFetch = cchBuf,
  760. cchTail = 0;
  761. // Fetch first block of text
  762. __try
  763. {
  764. hr = pFilter->GetText(&cchFetch, pszFetch);
  765. }
  766. __except(EXCEPTION_EXECUTE_HANDLER)
  767. {
  768. hr = E_ABORT;
  769. }
  770. CLSID clsid = {0};
  771. IUnknown_GetClassID(pFilter, &clsid); // to workaround a bug in the text filter
  772. while (SUCCEEDED(hr) && cchFetch)
  773. {
  774. ASSERT((cchFetch + cchTail) <= cchBuf);
  775. pszBuf[cchFetch + cchTail] = 0; // don't trust filter to zero-terminate buffer.
  776. // When you get the FILTER_S_LAST_TEXT, that's it, you'll get no more text, so treat the tail part as part of the text
  777. if (hr == FILTER_S_LAST_TEXT)
  778. {
  779. pszTail = NULL;
  780. cchTail = 0;
  781. }
  782. else if (CLSID_PlainTextFilter == clsid)
  783. {
  784. // CLSID_PlainText filter always returns S_OK, instead of FILTER_S_LAST_TEXT, this forces us to scan
  785. // the entire chunk now, AND (see below) to pass it off as a tail for scanning next chunk too.
  786. // pszTail and cchTail are set below.
  787. }
  788. else
  789. {
  790. pszTail = _FindLastWordBreakW(pszBuf, cchFetch + cchTail);
  791. if (pszTail)
  792. {
  793. // Break on word boundary and leave remainder (tail) for next iteration
  794. *pszTail = TEXT('\0');
  795. pszTail++;
  796. cchTail = lstrlenW(pszTail);
  797. }
  798. else
  799. {
  800. // Wow, big block, with no word break, search its entirety.
  801. // REVIEW: cross chunk items won't be found
  802. pszTail = NULL;
  803. cchTail = 0;
  804. }
  805. }
  806. // do the string scan
  807. if (_pTokens->GrepW(pszBuf))
  808. {
  809. *pszBuf = 0;
  810. return S_OK;
  811. }
  812. else if (FILTER_S_LAST_TEXT == hr)
  813. {
  814. *pszBuf = 0;
  815. return S_FALSE;
  816. }
  817. // prepare for next fetch...
  818. // If it is the plaintext filter, grab the tail anyway, even though we've tested it already
  819. // WinSE 25867
  820. if (CLSID_PlainTextFilter == clsid)
  821. {
  822. pszTail = _FindLastWordBreakW(pszBuf, cchFetch + cchTail);
  823. if (pszTail)
  824. {
  825. *pszTail = TEXT('\0');
  826. pszTail++;
  827. cchTail = lstrlenW(pszTail);
  828. }
  829. else
  830. {
  831. pszTail = NULL;
  832. cchTail = 0;
  833. }
  834. }
  835. *pszBuf = 0;
  836. pszFetch = pszBuf;
  837. cchFetch = cchBuf;
  838. // If there is a tail to deal with, move it to the front of
  839. // the buffer and prepare to have the next block of incoming text
  840. // appended to the tail..
  841. if (pszTail && cchTail)
  842. {
  843. MoveMemory(pszBuf, pszTail, cchTail * sizeof(*pszTail));
  844. pszBuf[cchTail] = 0;
  845. pszFetch += cchTail;
  846. cchFetch -= cchTail;
  847. }
  848. // Fetch next block of text.
  849. __try
  850. {
  851. hr = pFilter->GetText(&cchFetch, pszFetch);
  852. }
  853. __except(EXCEPTION_EXECUTE_HANDLER)
  854. {
  855. hr = E_ABORT;
  856. }
  857. }
  858. }
  859. if (SUCCEEDED(hr) || FILTER_E_NO_MORE_TEXT == hr || FILTER_E_NO_TEXT == hr)
  860. return S_FALSE;
  861. return hr;
  862. }
  863. // Returns a grep buffer of the requested size for the specified thread.
  864. STDMETHODIMP CFilterGrep::_GetThreadGrepBuffer(
  865. DWORD dwThreadID,
  866. ULONG cchNeed,
  867. LPWSTR* ppszBuf)
  868. {
  869. ASSERT(dwThreadID);
  870. ASSERT(cchNeed > 0);
  871. ASSERT(ppszBuf);
  872. HRESULT hr = E_FAIL;
  873. *ppszBuf = NULL;
  874. _EnterCritical();
  875. if (_hdpaGrepBuffers || (_hdpaGrepBuffers = DPA_Create(4)) != NULL)
  876. {
  877. CGrepBuffer *pgb, *pgbCached = NULL;
  878. for (int i = 0, cnt = DPA_GetPtrCount(_hdpaGrepBuffers); i < cnt; i++)
  879. {
  880. pgb = (CGrepBuffer*)DPA_FastGetPtr(_hdpaGrepBuffers, i);
  881. if (pgb->IsThread(dwThreadID))
  882. {
  883. pgbCached = pgb;
  884. hr = pgbCached->Alloc(cchNeed);
  885. if (S_OK == hr)
  886. *ppszBuf = pgbCached->Buffer();
  887. break;
  888. }
  889. }
  890. if (NULL == pgbCached) // not cached?
  891. {
  892. if ((pgb = new CGrepBuffer(dwThreadID)) != NULL)
  893. {
  894. hr = pgb->Alloc(cchNeed);
  895. if (S_OK == hr)
  896. {
  897. *ppszBuf = pgb->Buffer();
  898. DPA_AppendPtr(_hdpaGrepBuffers, pgb);
  899. }
  900. else
  901. delete pgb;
  902. }
  903. else
  904. hr = E_OUTOFMEMORY;
  905. }
  906. }
  907. else
  908. hr = E_OUTOFMEMORY;
  909. _LeaveCritical();
  910. return hr;
  911. }
  912. // Frees grep buffer for the specified thread
  913. STDMETHODIMP CFilterGrep::_FreeThreadGrepBuffer(DWORD dwThreadID)
  914. {
  915. HRESULT hr = S_FALSE;
  916. _EnterCritical();
  917. for (int i = 0, cnt = DPA_GetPtrCount(_hdpaGrepBuffers); i < cnt; i++)
  918. {
  919. CGrepBuffer* pgb = (CGrepBuffer*) DPA_FastGetPtr(_hdpaGrepBuffers, i);
  920. if (pgb->IsThread(dwThreadID))
  921. {
  922. DPA_DeletePtr(_hdpaGrepBuffers, i);
  923. hr = S_OK;
  924. break;
  925. }
  926. }
  927. _LeaveCritical();
  928. return hr;
  929. }
  930. // Clears grep buffer for all threads
  931. STDMETHODIMP_(void) CFilterGrep::_ClearGrepBuffers()
  932. {
  933. _EnterCritical();
  934. if (_hdpaGrepBuffers)
  935. {
  936. while(DPA_GetPtrCount(_hdpaGrepBuffers))
  937. {
  938. CGrepBuffer* pgb = (CGrepBuffer*)DPA_DeletePtr(_hdpaGrepBuffers, 0);
  939. delete pgb;
  940. }
  941. DPA_Destroy(_hdpaGrepBuffers);
  942. _hdpaGrepBuffers = NULL;
  943. }
  944. _LeaveCritical();
  945. }
  946. //#define _USE_GREP_RESTRICTIONS_ // Check for registered list of excluded files types
  947. // Reports whether the file type is restricted from full-text grep.
  948. STDMETHODIMP_(BOOL) CFilterGrep::_IsRestrictedFileType(LPCWSTR pwszFile)
  949. {
  950. #ifdef _USE_GREP_RESTRICTIONS_
  951. LPCWSTR pwszExt = PathFindExtensionW(pwszFile);
  952. if (pwszExt && *pwszExt)
  953. {
  954. if (_dwFlags & FGIF_GREPCONTENT &&
  955. _FetchRestrictionSettings(L"Content", &_pwszContentRestricted, FALSE))
  956. {
  957. if (_ScanRestrictionSettings(_pwszContentRestricted, pwszExt))
  958. return TRUE;
  959. }
  960. if (_dwFlags & FGIF_GREPCONTENT &&
  961. _FetchRestrictionSettings(L"Properties", &_pwszPropertiesRestricted, FALSE))
  962. {
  963. if (_ScanRestrictionSettings(_pwszPropertiesRestricted, pwszExt))
  964. return TRUE;
  965. }
  966. }
  967. #endif
  968. return FALSE;
  969. }
  970. STDMETHODIMP CFilterGrep::GetMatchTokens(OUT LPWSTR pszTokens, UINT cchTokens) const
  971. {
  972. HRESULT hr = _pTokens ? _pTokens->GetMatchTokens(pszTokens, cchTokens) : S_FALSE;
  973. if (S_OK != hr && pszTokens)
  974. *pszTokens = 0;
  975. return hr;
  976. }
  977. STDMETHODIMP CFilterGrep::GetExcludeTokens(OUT LPWSTR pszTokens, UINT cchTokens) const
  978. {
  979. HRESULT hr = _pTokens ? _pTokens->GetExcludeTokens(pszTokens, cchTokens) : S_FALSE;
  980. if (S_OK != hr && pszTokens)
  981. *pszTokens = 0;
  982. return hr;
  983. }
  984. STDMETHODIMP CFilterGrep::GetCodePage(UINT* pnCodepage) const
  985. {
  986. HRESULT hr = _pTokens ? _pTokens->GetCodePage(pnCodepage) : S_FALSE;
  987. if (S_OK != hr && pnCodepage)
  988. *pnCodepage = 0;
  989. return hr;
  990. }
  991. STDMETHODIMP CFilterGrep::GetFlags(DWORD* pdwFlags) const
  992. {
  993. if (*pdwFlags)
  994. *pdwFlags = _dwFlags;
  995. return S_OK;
  996. }