Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1001 lines
31 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: FDRIVER.CXX
  7. //
  8. // Contents: Filter driver
  9. //
  10. //----------------------------------------------------------------------------
  11. #include <pch.cxx>
  12. #pragma hdrstop
  13. #include <ciole.hxx>
  14. #include <drep.hxx>
  15. #include <tfilt.hxx>
  16. #include <tsource.hxx>
  17. #include <fwevent.hxx>
  18. #include <cievtmsg.h>
  19. #include <propspec.hxx>
  20. #include <imprsnat.hxx>
  21. #include <oleprop.hxx>
  22. #include <fdaemon.hxx>
  23. #include <ntopen.hxx>
  24. #include <ciguid.hxx>
  25. #include "fdriver.hxx"
  26. #include "propfilt.hxx"
  27. #include "docsum.hxx"
  28. static GUID guidNull = { 0x00000000,
  29. 0x0000,
  30. 0x0000,
  31. { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
  32. //
  33. // Local procedures
  34. //
  35. BOOL IsNonIndexableProp( CFullPropSpec const & fps, PROPVARIANT const & var );
  36. static CFullPropSpec psRevName( guidQuery, DISPID_QUERY_REVNAME );
  37. static CFullPropSpec psName( guidStorage, PID_STG_NAME );
  38. static CFullPropSpec psPath( guidStorage, PID_STG_PATH);
  39. static CFullPropSpec psDirectory( guidStorage, PID_STG_DIRECTORY );
  40. static CFullPropSpec psCharacterization( guidCharacterization,
  41. propidCharacterization );
  42. static CFullPropSpec psTitle( guidDocSummary, propidTitle );
  43. static GUID guidHtmlInformation = defGuidHtmlInformation;
  44. static CFullPropSpec psAttrib( guidStorage, PID_STG_ATTRIBUTES );
  45. //
  46. // Helper functions
  47. //
  48. inline BOOL IsSpecialPid( FULLPROPSPEC const & fps )
  49. {
  50. return ( fps.psProperty.ulKind == PRSPEC_PROPID &&
  51. fps.psProperty.propid <= PID_CODEPAGE );
  52. }
  53. //+---------------------------------------------------------------------------
  54. //
  55. // Member: CFilterDriver::CFilterDriver, public
  56. //
  57. // Arguments:
  58. // [drep] -- pointer to the data repository for filtered
  59. // information
  60. // [perfobj] -- performance object to update
  61. // [cFilteredBlocks] -- Number of blocks filtered for the current
  62. // document
  63. // [cat] -- reference to a catalog proxy
  64. //
  65. //----------------------------------------------------------------------------
  66. CFilterDriver::CFilterDriver ( CDataRepository * drep,
  67. ICiCAdviseStatus * pAdviseStatus,
  68. ICiCFilterClient * pFilterClient,
  69. CCiFrameworkParams & params,
  70. CI_CLIENT_FILTER_CONFIG_INFO const & configInfo,
  71. ULONG & cFilteredBlocks,
  72. CNonStoredProps & NonStoredProps,
  73. ULONG cbBuf )
  74. : _drep( drep ),
  75. _llFileSize( 0 ),
  76. _cFilteredBlocks( cFilteredBlocks ),
  77. _params( params ),
  78. _pAdviseStatus( pAdviseStatus ),
  79. _pFilterClient( pFilterClient ),
  80. _configInfo( configInfo ),
  81. _NonStoredProps( NonStoredProps ),
  82. _cbBuf( cbBuf ),
  83. _attrib(0),
  84. _lcidSystemDefault( GetSystemDefaultLCID() )
  85. {
  86. }
  87. //+---------------------------------------------------------------------------
  88. //
  89. // Member: CFilterDriver::FillEntryBuffer, public
  90. //
  91. // Synopsis: Filters the document that IFilter loaded.
  92. //
  93. // Arguments: [pbDocName] -- Document in filter
  94. // [cbDocName] -- Size of [pbDocName]
  95. //
  96. // Notes: Calls to SwitchToThread() give up processor.
  97. //
  98. //----------------------------------------------------------------------------
  99. STATUS CFilterDriver::FillEntryBuffer( BYTE const * pbDocName, ULONG cbDocName )
  100. {
  101. _status = CANNOT_OPEN_STREAM;
  102. BOOL fFilterContents = FALSE; // Assume we should NOT filter contents
  103. //
  104. // Get opendoc for access to stored state and safely save it
  105. //
  106. ICiCOpenedDoc *pDocument;
  107. SCODE sc = _pFilterClient->GetOpenedDoc( &pDocument ); SwitchToThread();
  108. if ( !SUCCEEDED( sc ) )
  109. {
  110. ciDebugOut(( DEB_ERROR, "Unable to get OpenedDoc - %x\n", sc ));
  111. return _status;
  112. }
  113. XInterface<ICiCOpenedDoc> Document( pDocument );
  114. //
  115. // Attempt to open the document
  116. //
  117. sc = Document->Open( pbDocName, cbDocName );
  118. SwitchToThread();
  119. if (!SUCCEEDED( sc ))
  120. {
  121. if ( ::IsSharingViolation( sc ) )
  122. {
  123. _status = CI_SHARING_VIOLATION;
  124. }
  125. else
  126. {
  127. ciDebugOut(( DEB_IWARN, "Unable to open docname at 0x%X - 0x%X\n",
  128. pbDocName, sc ));
  129. if ( FILTER_E_UNREACHABLE == sc )
  130. _status = CI_NOT_REACHABLE;
  131. return _status;
  132. }
  133. }
  134. // Initialize LCIDs counter.
  135. _cLCIDs = 0;
  136. //
  137. // Attempt to filter properties
  138. //
  139. CDocCharacterization docChar( _params.GenerateCharacterization() ?
  140. _params.GetMaxCharacterization() : 0 );
  141. //
  142. // Get the stat property enumerator and filter based on it.
  143. //
  144. CDocStatPropertyEnum CPEProp( Document.GetPointer() ); SwitchToThread();
  145. fFilterContents = CPEProp.GetFilterContents( _params.FilterDirectories() );
  146. _llFileSize = CPEProp.GetFileSize( );
  147. FilterObject( CPEProp,
  148. *_drep,
  149. docChar ); SwitchToThread();
  150. //
  151. // filter security on the file.
  152. //
  153. if ( _configInfo.fSupportsSecurity )
  154. {
  155. FilterSecurity( Document.GetPointer( ), *_drep ); SwitchToThread();
  156. }
  157. if ( CI_SHARING_VIOLATION == _status )
  158. return _status;
  159. _status = SUCCESS;
  160. BOOL fFilterOleProperties = fFilterContents;
  161. BOOL fKnownFilter = TRUE;
  162. BOOL fIndexable = TRUE;
  163. if ( fFilterContents && ( 0 == ( FILE_ATTRIBUTE_ENCRYPTED & _attrib )) )
  164. {
  165. //
  166. // Filter time in Mb / hr
  167. //
  168. CFwPerfTime filterCounter( _pAdviseStatus,
  169. CI_PERF_FILTER_TIME,
  170. 1024*1024, 1000*60*60 );
  171. filterCounter.TStart();
  172. CFwPerfTime bindCounter( _pAdviseStatus,
  173. CI_PERF_BIND_TIME );
  174. bindCounter.TStart();
  175. IFilter *pTmpIFilter;
  176. sc = Document->GetIFilter( &pTmpIFilter ); SwitchToThread();
  177. if ( !SUCCEEDED( sc ) )
  178. pTmpIFilter = 0;
  179. _pIFilter.Set( pTmpIFilter );
  180. bindCounter.TStop( );
  181. if ( _pIFilter.IsNull( ))
  182. {
  183. //
  184. // We could not obtain an IFilter but we have filtered properties.
  185. // We should just return whatever status we have.
  186. //
  187. ciDebugOut(( DEB_IWARN,
  188. "Did not get a filter for document 0x%X\n",
  189. pbDocName ));
  190. if ( ::IsSharingViolation( sc ))
  191. _status = CI_SHARING_VIOLATION;
  192. else if ( FILTER_E_UNREACHABLE == sc )
  193. _status = CI_NOT_REACHABLE;
  194. if ( fFilterOleProperties )
  195. {
  196. //
  197. // No filter, but it might have properties. Get them.
  198. //
  199. COLEPropertyEnum oleProp( Document.GetPointer( ) ); SwitchToThread();
  200. BOOL fIsStorage = oleProp.IsStorage();
  201. if (fIsStorage)
  202. FilterObject( oleProp,
  203. *_drep,
  204. docChar ); SwitchToThread();
  205. }
  206. return _status;
  207. }
  208. ULONG ulFlags;
  209. STAT_CHUNK statChunk;
  210. sc = _pIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
  211. IFILTER_INIT_CANON_HYPHENS |
  212. IFILTER_INIT_CANON_SPACES |
  213. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES |
  214. IFILTER_INIT_INDEXING_ONLY,
  215. 0,
  216. 0,
  217. &ulFlags ); SwitchToThread();
  218. if ( FAILED(sc) )
  219. {
  220. ciDebugOut(( DEB_WARN, "IFilter->Init() failed.\n" ));
  221. THROW( CException( sc ) );
  222. }
  223. fFilterOleProperties = (( ulFlags & IFILTER_FLAGS_OLE_PROPERTIES ) != 0);
  224. //
  225. // Determine the maximum number of filtered blocks allowed for this
  226. // file.
  227. //
  228. unsigned __int64 ullMultiplier = _params.GetMaxFilesizeMultiplier();
  229. unsigned __int64 ullcbBuf = _cbBuf;
  230. unsigned __int64 ullcbFile = _llFileSize;
  231. unsigned __int64 ullcBlocks = 1 + ( ullcbFile / ullcbBuf );
  232. unsigned __int64 ullmaxBlocks = ullcBlocks * ullMultiplier;
  233. if ( ullmaxBlocks > ULONG_MAX )
  234. ullmaxBlocks = ULONG_MAX;
  235. ULONG ulMaxFilteredBlocks = (ULONG) ullmaxBlocks;
  236. ciDebugOut(( DEB_ITRACE,
  237. "cbfile %I64d, cBlocks %I64d, maxcBlocks %I64d\n",
  238. ullcbFile, ullcBlocks, ullmaxBlocks ));
  239. //
  240. // Get the first chunk
  241. //
  242. do
  243. {
  244. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  245. if (SUCCEEDED(sc))
  246. RegisterLocale(statChunk.locale);
  247. }
  248. while ( SUCCEEDED(sc) && IsSpecialPid( statChunk.attribute ) );
  249. _drep->InitFilteredBlockCount( ulMaxFilteredBlocks );
  250. _cFilteredBlocks = 0;
  251. NTSTATUS Status = STATUS_SUCCESS;
  252. TRY
  253. {
  254. BOOL fBadEmbeddingReport = FALSE;
  255. while ( SUCCEEDED(sc) ||
  256. FILTER_E_LINK_UNAVAILABLE == sc ||
  257. FILTER_E_EMBEDDING_UNAVAILABLE == sc )
  258. {
  259. BOOL fInUse;
  260. Document->IsInUseByAnotherProcess( &fInUse ); SwitchToThread();
  261. if ( fInUse )
  262. {
  263. _status = FILTER_EXCEPTION; // Force retry in driver
  264. break; // Stop filtering this doc
  265. }
  266. _cFilteredBlocks = _drep->GetFilteredBlockCount();
  267. if ( SUCCEEDED(sc) )
  268. {
  269. if ( IsSpecialPid( statChunk.attribute ) )
  270. {
  271. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  272. if (SUCCEEDED(sc))
  273. RegisterLocale(statChunk.locale);
  274. continue;
  275. }
  276. //
  277. // Skip over unknown chunks.
  278. //
  279. if ( 0 == (statChunk.flags & (CHUNK_TEXT | CHUNK_VALUE)) )
  280. {
  281. ciDebugOut(( DEB_WARN,
  282. "Filtering of docname at 0x%X produced bogus chunk (not text or value)\n",
  283. pbDocName ));
  284. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  285. if (SUCCEEDED(sc))
  286. RegisterLocale(statChunk.locale);
  287. continue;
  288. }
  289. if ( statChunk.flags & CHUNK_VALUE )
  290. {
  291. PROPVARIANT * pvar = 0;
  292. sc = _pIFilter->GetValue( &pvar );
  293. if ( SUCCEEDED(sc) )
  294. {
  295. XPtr<CStorageVariant> xvar( (CStorageVariant *)(ULONG_PTR)pvar );
  296. CFullPropSpec * pps = (CFullPropSpec *)(ULONG_PTR)(&statChunk.attribute);
  297. //
  298. // HACK #275: If we see a ROBOTS=NOINDEX tag, then bail out.
  299. //
  300. if ( IsNonIndexableProp( *pps, *pvar ) )
  301. {
  302. ciDebugOut(( DEB_WARN,
  303. "Document %x is not indexable (robots Meta-tag)\n",
  304. pbDocName ));
  305. sc = S_OK;
  306. fFilterOleProperties = FALSE;
  307. fIndexable = FALSE;
  308. break;
  309. }
  310. // Index this property twice -- once with default locale and with
  311. // the chunk locale.
  312. FilterProperty( *pvar, *pps, *_drep, docChar, statChunk.locale ); SwitchToThread();
  313. if (_lcidSystemDefault != statChunk.locale)
  314. {
  315. FilterProperty( *pvar, *pps, *_drep, docChar, _lcidSystemDefault ); SwitchToThread();
  316. }
  317. //
  318. // Only fetch next if we're done with this chunk.
  319. //
  320. if ( 0 == (statChunk.flags & CHUNK_TEXT) || !SUCCEEDED(sc) )
  321. {
  322. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  323. if (SUCCEEDED(sc))
  324. RegisterLocale(statChunk.locale);
  325. continue;
  326. }
  327. }
  328. }
  329. if ( (statChunk.flags & CHUNK_TEXT) && SUCCEEDED(sc) )
  330. {
  331. if ( _drep->PutLanguage( statChunk.locale ) &&
  332. _drep->PutPropName( *((CFullPropSpec *)&statChunk.attribute) ) )
  333. {
  334. CTextSource tsource( _pIFilter.GetPointer( ), statChunk );
  335. docChar.Add( tsource.awcBuffer + tsource.iCur,
  336. tsource.iEnd - tsource.iCur,
  337. statChunk.attribute ); SwitchToThread();
  338. _drep->PutStream( &tsource ); SwitchToThread();
  339. sc = tsource.GetStatus();
  340. }
  341. else
  342. {
  343. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  344. if (SUCCEEDED(sc))
  345. RegisterLocale(statChunk.locale);
  346. }
  347. if ( sc == FILTER_E_NO_TEXT && (statChunk.flags & CHUNK_VALUE) )
  348. sc = S_OK;
  349. }
  350. }
  351. if ( FILTER_E_EMBEDDING_UNAVAILABLE == sc )
  352. {
  353. if ( !fBadEmbeddingReport &&
  354. (_params.GetEventLogFlags()&CI_EVTLOG_FLAGS_FAILED_EMBEDDING) )
  355. {
  356. ReportFilterEmbeddingFailure( pbDocName, cbDocName );
  357. fBadEmbeddingReport = TRUE;
  358. }
  359. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  360. if (SUCCEEDED(sc))
  361. RegisterLocale(statChunk.locale);
  362. }
  363. else if ( FILTER_E_LINK_UNAVAILABLE == sc )
  364. {
  365. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  366. if (SUCCEEDED(sc))
  367. RegisterLocale(statChunk.locale);
  368. }
  369. }
  370. }
  371. CATCH ( CException, e )
  372. {
  373. Status = e.GetErrorCode();
  374. ciDebugOut(( DEB_IERROR,
  375. "Exception 0x%x thrown from filter DLL while filtering docName at 0x%X\n",
  376. Status,
  377. pbDocName ));
  378. }
  379. END_CATCH
  380. if ( !NT_SUCCESS(Status) && Status != FDAEMON_E_TOOMANYFILTEREDBLOCKS )
  381. {
  382. THROW( CException(FDAEMON_E_FATALERROR) );
  383. }
  384. if ( Status == FDAEMON_E_TOOMANYFILTEREDBLOCKS )
  385. {
  386. Win4Assert( _drep->GetFilteredBlockCount() > ulMaxFilteredBlocks );
  387. LogOverflow( pbDocName, cbDocName );
  388. //
  389. // Force exit from the loop
  390. //
  391. sc = FILTER_E_END_OF_CHUNKS;
  392. }
  393. _pIFilter.Free( );
  394. filterCounter.TStop( (ULONG)_llFileSize );
  395. }
  396. if ( FILTER_E_END_OF_CHUNKS != sc &&
  397. FILTER_E_PARTIALLY_FILTERED != sc &&
  398. FAILED( sc ) )
  399. {
  400. ciDebugOut(( DEB_IWARN, "Filter document at 0x(%X) returned SCODE 0x%x\n",
  401. pbDocName, sc ));
  402. QUIETTHROW( CException( sc ) );
  403. }
  404. BOOL fIsStorage = FALSE;
  405. if ( fFilterOleProperties )
  406. {
  407. //
  408. // filter ole properties only if it is a docfile
  409. //
  410. COLEPropertyEnum oleProp( Document.GetPointer( ) ); SwitchToThread();
  411. fIsStorage = oleProp.IsStorage();
  412. if (fIsStorage)
  413. FilterObject( oleProp,
  414. *_drep,
  415. docChar ); SwitchToThread();
  416. }
  417. //
  418. // Store the document characterization in the property cache.
  419. // Don't bother if characterization is turned off.
  420. //
  421. if ( _params.GenerateCharacterization() )
  422. {
  423. PROPVARIANT var;
  424. WCHAR awcSummary[ CI_MAX_CHARACTERIZATION_MAX + 1 ];
  425. if ( fIndexable && docChar.HasCharacterization() )
  426. {
  427. unsigned cwcSummary = sizeof awcSummary / sizeof WCHAR;
  428. // Use the raw text in the abstract unless we defaulted
  429. // to the text filter and the file has ole properties.
  430. BOOL fUseRawText = fKnownFilter || !fIsStorage;
  431. docChar.Get( awcSummary, cwcSummary, fUseRawText ); SwitchToThread();
  432. if ( 0 == cwcSummary )
  433. {
  434. var.vt = VT_EMPTY;
  435. }
  436. else
  437. {
  438. var.vt = VT_LPWSTR;
  439. var.pwszVal = awcSummary;
  440. }
  441. }
  442. else
  443. {
  444. var.vt = VT_EMPTY;
  445. }
  446. _drep->StoreValue( psCharacterization, var ); SwitchToThread();
  447. }
  448. return _status;
  449. }
  450. //+---------------------------------------------------------------------------
  451. //
  452. // Member: CFilterDriver::LogOverflow
  453. //
  454. // Synopsis: Notifies the client that there were too many blocks in the
  455. // given document
  456. //
  457. // Arguments: [pbDocName] - Document Name
  458. // [cbDocName] - Number of bytes in the document name.
  459. //
  460. // History: 1-22-97 srikants Created
  461. //
  462. //----------------------------------------------------------------------------
  463. void CFilterDriver::LogOverflow( BYTE const * pbDocName, ULONG cbDocName )
  464. {
  465. PROPVARIANT var[2];
  466. var[0].vt = VT_VECTOR | VT_UI1;
  467. var[0].caub.cElems = cbDocName;
  468. var[0].caub.pElems = (BYTE *) pbDocName;
  469. var[1].vt = VT_UI4;
  470. var[1].ulVal = _params.GetMaxFilesizeMultiplier();
  471. SCODE sc = _pAdviseStatus->NotifyStatus( CI_NOTIFY_FILTER_TOO_MANY_BLOCKS,
  472. 2,
  473. var );
  474. if ( !SUCCEEDED(sc) )
  475. {
  476. ciDebugOut(( DEB_WARN,
  477. "Failed to report filter to many blocks event. Error 0x%X\n",
  478. sc ));
  479. }
  480. }
  481. //+---------------------------------------------------------------------------
  482. //
  483. // Member: CFilterDriver::ReportFilterEmbeddingFailure
  484. //
  485. // Synopsis: Notifies the client that there was a failure filtering an
  486. // embedding.
  487. //
  488. // Arguments: [pbDocName] - Document name
  489. // [cbDocName] - Number of bytes in the serialized document name
  490. //
  491. // History: 1-22-97 srikants Created
  492. //
  493. //----------------------------------------------------------------------------
  494. void CFilterDriver::ReportFilterEmbeddingFailure( BYTE const * pbDocName, ULONG cbDocName )
  495. {
  496. PROPVARIANT var;
  497. var.vt = VT_VECTOR | VT_UI1;
  498. var.caub.cElems = cbDocName;
  499. var.caub.pElems = (BYTE *) pbDocName;
  500. SCODE sc = _pAdviseStatus->NotifyStatus( CI_NOTIFY_FILTER_EMBEDDING_FAILURE,
  501. 1,
  502. &var );
  503. if ( !SUCCEEDED(sc) )
  504. {
  505. ciDebugOut(( DEB_WARN,
  506. "Failed to report filter embedding failure event. Error 0x%X\n",
  507. sc ));
  508. }
  509. }
  510. //+---------------------------------------------------------------------------
  511. //
  512. // Method: CFilterDriver::FilterProperty
  513. //
  514. // Arguments: [var] -- Property value
  515. // [ps] -- Property ID
  516. // [drep] -- Data repository for filtered information
  517. // [docChar] -- Characterization
  518. //
  519. // Notes: Calls to SwitchToThread() give up processor.
  520. //
  521. //----------------------------------------------------------------------------
  522. inline void CFilterDriver::FilterProperty( CStorageVariant const & var,
  523. CFullPropSpec & ps,
  524. CDataRepository & drep,
  525. CDocCharacterization & docChar,
  526. LCID locale )
  527. {
  528. //
  529. // Filter one very special property: Backwards name
  530. //
  531. if (ps == psName && var.Type( ) == VT_LPWSTR)
  532. {
  533. const WCHAR *pwszPath = var.GetLPWSTR( );
  534. int j = wcslen( pwszPath );
  535. XGrowable<WCHAR> xwcsRevName( j + 1 );
  536. int i;
  537. for ( i = 0; i < j; i++ )
  538. {
  539. xwcsRevName[i] = pwszPath[j - 1 - i];
  540. }
  541. xwcsRevName[i] = L'\0';
  542. PROPVARIANT Variant;
  543. Variant.vt = VT_LPWSTR;
  544. Variant.pwszVal = xwcsRevName.Get();
  545. //
  546. // Cast to avoid turning the PROPVARIANT into a CStorageVariant for no good
  547. // reason. Convert involves alloc/free.
  548. //
  549. CStorageVariant const * pvar = (CStorageVariant const *)(ULONG_PTR)(&Variant);
  550. FilterProperty( *pvar, psRevName, drep, docChar, 0 ); SwitchToThread();
  551. }
  552. //
  553. // Don't filter paths
  554. //
  555. if ( ps != psPath )
  556. {
  557. Win4Assert( psDirectory != ps );
  558. vqDebugOut(( DEB_FILTER, "Filter property 0x%x\n", ps.GetPropertyPropid() ));
  559. //
  560. // Save some property values for use in document characterization
  561. //
  562. docChar.Add( var, ps ); SwitchToThread();
  563. //
  564. // output the property to the data repository
  565. //
  566. drep.PutLanguage( locale );
  567. drep.PutPropName( ps );
  568. drep.PutValue( var ); SwitchToThread();
  569. // Store the value in the property cache if it should be stored there
  570. if ( !_NonStoredProps.IsNonStored( ps ) )
  571. {
  572. BOOL fStoredInCache;
  573. if ( IsNullPointerVariant( (PROPVARIANT *) & var ) )
  574. {
  575. PROPVARIANT propVar;
  576. propVar.vt = VT_EMPTY;
  577. fStoredInCache = drep.StoreValue( ps, propVar ); SwitchToThread();
  578. }
  579. else
  580. {
  581. fStoredInCache = drep.StoreValue( ps, var ); SwitchToThread();
  582. }
  583. // should we ignore this property in the future?
  584. if ( !fStoredInCache )
  585. _NonStoredProps.Add( ps );
  586. }
  587. }
  588. if ( ps == psAttrib )
  589. _attrib = var.GetUI4();
  590. } //FilterProperty
  591. //+---------------------------------------------------------------------------
  592. //
  593. // Method: CFilterDriver::FilterObject
  594. //
  595. // Arguments: [propEnum] -- iterator for properties in a file
  596. // [drep] -- pointer to the data repository for filtered
  597. // information
  598. // [docChar] -- some property values are written here so that
  599. // document characterization can happen
  600. //
  601. // Notes: Calls to SwitchToThread() give up processor.
  602. //
  603. //----------------------------------------------------------------------------
  604. void CFilterDriver::FilterObject(
  605. CPropertyEnum & propEnum,
  606. CDataRepository & drep,
  607. CDocCharacterization & docChar )
  608. {
  609. #if CIDBG == 1
  610. ULONG ulStartTime = GetTickCount();
  611. #endif
  612. CFullPropSpec ps;
  613. // Get the locale for the property set. Use that if available, else use all the
  614. // known locales to maximize the chances of retrieving a property.
  615. LCID locale;
  616. BOOL fUseKnownLocale = SUCCEEDED( propEnum.GetPropertySetLocale(locale));
  617. for ( CStorageVariant const * pvar = propEnum.Next( ps );
  618. pvar != 0;
  619. pvar = propEnum.Next( ps ) )
  620. {
  621. //
  622. // Filter each of the properties and property sets until we run
  623. // out of them. Register each property for each of the registered locales.
  624. //
  625. FilterProperty( *pvar, ps, drep, docChar, _lcidSystemDefault ); SwitchToThread();
  626. if (fUseKnownLocale)
  627. {
  628. ciDebugOut((DEB_FILTER, "Propset locale is 0x%x\n", locale));
  629. if (locale != _lcidSystemDefault)
  630. {
  631. FilterProperty( *pvar, ps, drep, docChar, locale ); SwitchToThread();
  632. }
  633. }
  634. else
  635. {
  636. // We want to index this property with all the known locales only if it
  637. // is a "string" type. For non-string types, locale doesn't matter
  638. VARTYPE vt = pvar->Type() | VT_VECTOR; // enables check with or without vt_vector bit
  639. if (vt == (VT_VECTOR | VT_LPWSTR) ||
  640. vt == (VT_VECTOR | VT_BSTR) ||
  641. vt == (VT_VECTOR | VT_LPSTR)
  642. )
  643. {
  644. int iMin = min(_cLCIDs, cLCIDMax);
  645. for (int i = 0; i < iMin; i++)
  646. {
  647. ciDebugOut(( DEB_ITRACE, "Filtering property 0x%x with locale 0x%x\n",
  648. pvar, _alcidSeen[i] ));
  649. if (_alcidSeen[i] != _lcidSystemDefault)
  650. {
  651. FilterProperty( *pvar, ps, drep, docChar, _alcidSeen[i] ); SwitchToThread();
  652. }
  653. }
  654. }
  655. }
  656. }
  657. #if CIDBG == 1
  658. ULONG ulEndTime = GetTickCount();
  659. ciDebugOut (( DEB_USER1,
  660. "Filtering properties took %d ms\n",
  661. ulEndTime-ulStartTime ));
  662. #endif
  663. }
  664. //+-------------------------------------------------------------------------
  665. //
  666. // Member: CFilterDriver::FilterSecurity, private
  667. //
  668. // Synopsis: Store the security descriptor and map to an SDID
  669. //
  670. // Arguments: [wcsFileName] - file name (used only for error reporting)
  671. // [oplock] - oplock held on the file
  672. // [drep] - data repository
  673. //
  674. // Notes: using ACCESS_SYSTEM_SECURITY AccessMode will cause an
  675. // oplock break, so we should call FilterSecurity before
  676. // taking the oplock.
  677. //
  678. // Notes: Calls to SwitchToThread() give up processor.
  679. //
  680. //--------------------------------------------------------------------------
  681. void CFilterDriver::FilterSecurity(
  682. ICiCOpenedDoc *Document,
  683. CDataRepository & drep )
  684. {
  685. BOOL fCouldStore = FALSE;
  686. SCODE sc;
  687. //
  688. // Initial guess about security descriptor size
  689. //
  690. const cInitSD = 512;
  691. BYTE abBuffer[cInitSD];
  692. ULONG cbSD = cInitSD;
  693. BYTE * pbBuffer = abBuffer;
  694. XPtr<SECURITY_DESCRIPTOR> xSD;
  695. while (TRUE)
  696. {
  697. //
  698. // Attempt to get the security descriptor into the buffer
  699. //
  700. sc = Document->GetSecurity( pbBuffer, &cbSD ); SwitchToThread();
  701. //
  702. // If we don't need to resize, then exit while
  703. //
  704. if (SUCCEEDED( sc ) || CI_E_BUFFERTOOSMALL != sc)
  705. {
  706. break;
  707. }
  708. //
  709. // Allocate a bigger buffer and retrieve the security information into
  710. // it.
  711. //
  712. xSD.Free();
  713. xSD.Set( (SECURITY_DESCRIPTOR *) new BYTE [cbSD] );
  714. pbBuffer = (BYTE *) xSD.GetPointer();
  715. }
  716. if ( !SUCCEEDED( sc ) || 0 == cbSD )
  717. {
  718. //
  719. // Store NULL security descriptor for the file
  720. //
  721. fCouldStore = drep.StoreSecurity( 0, 0 ); SwitchToThread();
  722. }
  723. else
  724. {
  725. // Now store away the security descriptor and map to an SDID
  726. fCouldStore =
  727. drep.StoreSecurity( pbBuffer, cbSD ); SwitchToThread();
  728. }
  729. if (! fCouldStore)
  730. {
  731. ciDebugOut(( DEB_ERROR, "Failed to store security info\n" ));
  732. }
  733. }
  734. //+---------------------------------------------------------------------------
  735. //
  736. // Function: CFilterDriver::RegisterLocale, private
  737. //
  738. // Synopsis: Registers a locale
  739. //
  740. // Arguments: [locale] - the locale
  741. //
  742. // Returns: none
  743. //
  744. // History: 27-Jan-99 KrishnaN Created
  745. //
  746. //----------------------------------------------------------------------------
  747. void CFilterDriver::RegisterLocale(LCID locale)
  748. {
  749. // Ensure that the locale wasn't already registered
  750. int iMin = min(_cLCIDs, cLCIDMax);
  751. for (int i = 0; i < iMin; i++)
  752. {
  753. if (locale == _alcidSeen[i])
  754. return;
  755. }
  756. _alcidSeen[_cLCIDs % cLCIDMax] = locale;
  757. ciDebugOut(( DEB_ITRACE, "Registered %d locale 0x%x\n", _cLCIDs+1, locale));
  758. _cLCIDs++;
  759. }
  760. //+---------------------------------------------------------------------------
  761. //
  762. // Function: IsNonIndexableProp, private
  763. //
  764. // Synopsis: Looks for ROBOTS=NOINDEX tag
  765. //
  766. // Arguments: [fps] -- Property
  767. // [var] -- Value
  768. //
  769. // Returns: TRUE if property [fps] == ROBOTS and value [var] == NOINDEX
  770. //
  771. // History: 7-Oct-97 KyleP Stole from Site Server
  772. //
  773. // Notes: I based my changes to this code in information found at:
  774. // http://info.webcrawler.com/mak/projects/robots/meta-user.html
  775. //
  776. //----------------------------------------------------------------------------
  777. BOOL IsNonIndexableProp( CFullPropSpec const & fps, PROPVARIANT const & var )
  778. {
  779. static GUID guidHTMLMeta = HTMLMetaGuid;
  780. BOOL fIsNonIndexable = FALSE;
  781. if ( fps.IsPropertyName() &&
  782. fps.GetPropSet() == guidHTMLMeta &&
  783. _wcsicmp( fps.GetPropertyName(), L"ROBOTS" ) == 0 &&
  784. (var.vt == VT_LPWSTR || var.vt == VT_BSTR) &&
  785. 0 != var.pwszVal )
  786. {
  787. //
  788. // Convert to lowercase to do wcsstr search.
  789. //
  790. unsigned cc = wcslen( var.pwszVal ) + 1;
  791. XGrowable<WCHAR> xwcsTemp( cc );
  792. RtlCopyMemory( xwcsTemp.Get(), var.pwszVal, cc * sizeof(WCHAR) );
  793. _wcslwr( xwcsTemp.Get() );
  794. //
  795. // Check "noindex"
  796. //
  797. fIsNonIndexable = wcsstr( xwcsTemp.Get(), L"noindex") != 0;
  798. //
  799. // Check "all"
  800. //
  801. if ( !fIsNonIndexable )
  802. fIsNonIndexable = wcsstr( xwcsTemp.Get(), L"none") != 0;
  803. }
  804. return fIsNonIndexable;
  805. }