Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1017 lines
32 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: FDRIVER.CXX
  7. //
  8. // Contents: Filter driver
  9. //
  10. //----------------------------------------------------------------------------
  11. #include <pch.cxx>
  12. #pragma hdrstop
  13. #include <ciole.hxx>
  14. #include <drep.hxx>
  15. #include <tfilt.hxx>
  16. #include <tsource.hxx>
  17. #include <fwevent.hxx>
  18. #include <cievtmsg.h>
  19. #include <propspec.hxx>
  20. #include <imprsnat.hxx>
  21. #include <oleprop.hxx>
  22. #include <fdaemon.hxx>
  23. #include <ntopen.hxx>
  24. #include <ciguid.hxx>
  25. #include "fdriver.hxx"
  26. #include "propfilt.hxx"
  27. #include "docsum.hxx"
  28. static GUID guidNull = { 0x00000000,
  29. 0x0000,
  30. 0x0000,
  31. { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
  32. //
  33. // Local procedures
  34. //
  35. BOOL IsNonIndexableProp( CFullPropSpec const & fps, PROPVARIANT const & var );
  36. static CFullPropSpec psRevName( guidQuery, DISPID_QUERY_REVNAME );
  37. static CFullPropSpec psName( guidStorage, PID_STG_NAME );
  38. static CFullPropSpec psPath( guidStorage, PID_STG_PATH);
  39. static CFullPropSpec psDirectory( guidStorage, PID_STG_DIRECTORY );
  40. static CFullPropSpec psCharacterization( guidCharacterization,
  41. propidCharacterization );
  42. static CFullPropSpec psTitle( guidDocSummary, propidTitle );
  43. static GUID guidHtmlInformation = defGuidHtmlInformation;
  44. static CFullPropSpec psAttrib( guidStorage, PID_STG_ATTRIBUTES );
  45. //
  46. // Helper functions
  47. //
  48. inline BOOL IsSpecialPid( FULLPROPSPEC const & fps )
  49. {
  50. return ( fps.psProperty.ulKind == PRSPEC_PROPID &&
  51. fps.psProperty.propid <= PID_CODEPAGE );
  52. }
  53. //+---------------------------------------------------------------------------
  54. //
  55. // Member: CFilterDriver::CFilterDriver, public
  56. //
  57. // Arguments:
  58. // [drep] -- pointer to the data repository for filtered
  59. // information
  60. // [perfobj] -- performance object to update
  61. // [cFilteredBlocks] -- Number of blocks filtered for the current
  62. // document
  63. // [cat] -- reference to a catalog proxy
  64. //
  65. //----------------------------------------------------------------------------
  66. CFilterDriver::CFilterDriver ( CDataRepository * drep,
  67. ICiCAdviseStatus * pAdviseStatus,
  68. ICiCFilterClient * pFilterClient,
  69. CCiFrameworkParams & params,
  70. CI_CLIENT_FILTER_CONFIG_INFO const & configInfo,
  71. ULONG & cFilteredBlocks,
  72. CNonStoredProps & NonStoredProps,
  73. ULONG cbBuf )
  74. : _drep( drep ),
  75. _llFileSize( 0 ),
  76. _cFilteredBlocks( cFilteredBlocks ),
  77. _params( params ),
  78. _pAdviseStatus( pAdviseStatus ),
  79. _pFilterClient( pFilterClient ),
  80. _configInfo( configInfo ),
  81. _NonStoredProps( NonStoredProps ),
  82. _cbBuf( cbBuf ),
  83. _attrib(0),
  84. _lcidSystemDefault( GetSystemDefaultLCID() )
  85. {
  86. }
  87. //+---------------------------------------------------------------------------
  88. //
  89. // Member: CFilterDriver::FillEntryBuffer, public
  90. //
  91. // Synopsis: Filters the document that IFilter loaded.
  92. //
  93. // Arguments: [pbDocName] -- Document in filter
  94. // [cbDocName] -- Size of [pbDocName]
  95. //
  96. // Notes: Calls to SwitchToThread() give up processor.
  97. //
  98. //----------------------------------------------------------------------------
  99. STATUS CFilterDriver::FillEntryBuffer( BYTE const * pbDocName, ULONG cbDocName )
  100. {
  101. _status = CANNOT_OPEN_STREAM;
  102. BOOL fFilterContents = FALSE; // Assume we should NOT filter contents
  103. //
  104. // Get opendoc for access to stored state and safely save it
  105. //
  106. ICiCOpenedDoc *pDocument;
  107. SCODE sc = _pFilterClient->GetOpenedDoc( &pDocument ); SwitchToThread();
  108. if ( !SUCCEEDED( sc ) )
  109. {
  110. ciDebugOut(( DEB_ERROR, "Unable to get OpenedDoc - %x\n", sc ));
  111. return _status;
  112. }
  113. XInterface<ICiCOpenedDoc> Document( pDocument );
  114. //
  115. // Attempt to open the document
  116. //
  117. sc = Document->Open( pbDocName, cbDocName );
  118. SwitchToThread();
  119. if (!SUCCEEDED( sc ))
  120. {
  121. if ( ::IsSharingViolation( sc ) )
  122. {
  123. _status = CI_SHARING_VIOLATION;
  124. }
  125. else
  126. {
  127. ciDebugOut(( DEB_IWARN, "Unable to open docname at 0x%X - 0x%X\n",
  128. pbDocName, sc ));
  129. if ( FILTER_E_UNREACHABLE == sc )
  130. _status = CI_NOT_REACHABLE;
  131. return _status;
  132. }
  133. }
  134. // Initialize LCIDs counter.
  135. _cLCIDs = 0;
  136. //
  137. // Attempt to filter properties
  138. //
  139. CDocCharacterization docChar( _params.GenerateCharacterization() ?
  140. _params.GetMaxCharacterization() : 0 );
  141. //
  142. // Get the stat property enumerator and filter based on it.
  143. //
  144. CDocStatPropertyEnum CPEProp( Document.GetPointer() ); SwitchToThread();
  145. fFilterContents = CPEProp.GetFilterContents( _params.FilterDirectories() );
  146. _llFileSize = CPEProp.GetFileSize( );
  147. FilterObject( CPEProp,
  148. *_drep,
  149. docChar ); SwitchToThread();
  150. //
  151. // filter security on the file.
  152. //
  153. if ( _configInfo.fSupportsSecurity )
  154. {
  155. FilterSecurity( Document.GetPointer( ), *_drep ); SwitchToThread();
  156. }
  157. if ( CI_SHARING_VIOLATION == _status )
  158. return _status;
  159. _status = SUCCESS;
  160. BOOL fFilterOleProperties = fFilterContents;
  161. BOOL fKnownFilter = TRUE;
  162. BOOL fIndexable = TRUE;
  163. WCHAR const * pwcPath = (WCHAR const *) pbDocName;
  164. #if 0
  165. // c:\foo
  166. // \\?\c:\foo
  167. BOOL fRemote = ( ( L':' != pwcPath[1] ) && ( L':' != pwcPath[5] ) );
  168. if ( fFilterContents &&
  169. ( fRemote || ( 0 == ( FILE_ATTRIBUTE_ENCRYPTED & _attrib )) ) )
  170. #else
  171. if ( fFilterContents && ( 0 == ( FILE_ATTRIBUTE_ENCRYPTED & _attrib )) )
  172. #endif
  173. {
  174. //
  175. // Filter time in Mb / hr
  176. //
  177. CFwPerfTime filterCounter( _pAdviseStatus,
  178. CI_PERF_FILTER_TIME,
  179. 1024*1024, 1000*60*60 );
  180. filterCounter.TStart();
  181. CFwPerfTime bindCounter( _pAdviseStatus,
  182. CI_PERF_BIND_TIME );
  183. bindCounter.TStart();
  184. IFilter *pTmpIFilter;
  185. sc = Document->GetIFilter( &pTmpIFilter ); SwitchToThread();
  186. if ( !SUCCEEDED( sc ) )
  187. pTmpIFilter = 0;
  188. _pIFilter.Set( pTmpIFilter );
  189. bindCounter.TStop( );
  190. if ( _pIFilter.IsNull( ))
  191. {
  192. //
  193. // We could not obtain an IFilter but we have filtered properties.
  194. // We should just return whatever status we have.
  195. //
  196. ciDebugOut(( DEB_IWARN,
  197. "Did not get a filter for document 0x%X\n",
  198. pbDocName ));
  199. if ( ::IsSharingViolation( sc ))
  200. _status = CI_SHARING_VIOLATION;
  201. else if ( FILTER_E_UNREACHABLE == sc )
  202. _status = CI_NOT_REACHABLE;
  203. if ( fFilterOleProperties )
  204. {
  205. //
  206. // No filter, but it might have properties. Get them.
  207. //
  208. COLEPropertyEnum oleProp( Document.GetPointer( ) ); SwitchToThread();
  209. BOOL fIsStorage = oleProp.IsStorage();
  210. if (fIsStorage)
  211. FilterObject( oleProp,
  212. *_drep,
  213. docChar ); SwitchToThread();
  214. }
  215. return _status;
  216. }
  217. ULONG ulFlags;
  218. STAT_CHUNK statChunk;
  219. sc = _pIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
  220. IFILTER_INIT_CANON_HYPHENS |
  221. IFILTER_INIT_CANON_SPACES |
  222. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES |
  223. IFILTER_INIT_INDEXING_ONLY,
  224. 0,
  225. 0,
  226. &ulFlags ); SwitchToThread();
  227. if ( FAILED(sc) )
  228. {
  229. ciDebugOut(( DEB_WARN, "IFilter->Init() failed.\n" ));
  230. THROW( CException( sc ) );
  231. }
  232. fFilterOleProperties = (( ulFlags & IFILTER_FLAGS_OLE_PROPERTIES ) != 0);
  233. //
  234. // Determine the maximum number of filtered blocks allowed for this
  235. // file.
  236. //
  237. unsigned __int64 ullMultiplier = _params.GetMaxFilesizeMultiplier();
  238. unsigned __int64 ullcbBuf = _cbBuf;
  239. unsigned __int64 ullcbFile = _llFileSize;
  240. unsigned __int64 ullcBlocks = 1 + ( ullcbFile / ullcbBuf );
  241. unsigned __int64 ullmaxBlocks = ullcBlocks * ullMultiplier;
  242. if ( ullmaxBlocks > ULONG_MAX )
  243. ullmaxBlocks = ULONG_MAX;
  244. ULONG ulMaxFilteredBlocks = (ULONG) ullmaxBlocks;
  245. ciDebugOut(( DEB_ITRACE,
  246. "cbfile %I64d, cBlocks %I64d, maxcBlocks %I64d\n",
  247. ullcbFile, ullcBlocks, ullmaxBlocks ));
  248. //
  249. // Get the first chunk
  250. //
  251. do
  252. {
  253. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  254. if (SUCCEEDED(sc))
  255. RegisterLocale(statChunk.locale);
  256. }
  257. while ( SUCCEEDED(sc) && IsSpecialPid( statChunk.attribute ) );
  258. _drep->InitFilteredBlockCount( ulMaxFilteredBlocks );
  259. _cFilteredBlocks = 0;
  260. NTSTATUS Status = STATUS_SUCCESS;
  261. TRY
  262. {
  263. BOOL fBadEmbeddingReport = FALSE;
  264. while ( SUCCEEDED(sc) ||
  265. FILTER_E_LINK_UNAVAILABLE == sc ||
  266. FILTER_E_EMBEDDING_UNAVAILABLE == sc )
  267. {
  268. BOOL fInUse;
  269. Document->IsInUseByAnotherProcess( &fInUse ); SwitchToThread();
  270. if ( fInUse )
  271. {
  272. _status = FILTER_EXCEPTION; // Force retry in driver
  273. break; // Stop filtering this doc
  274. }
  275. _cFilteredBlocks = _drep->GetFilteredBlockCount();
  276. if ( SUCCEEDED(sc) )
  277. {
  278. if ( IsSpecialPid( statChunk.attribute ) )
  279. {
  280. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  281. if (SUCCEEDED(sc))
  282. RegisterLocale(statChunk.locale);
  283. continue;
  284. }
  285. //
  286. // Skip over unknown chunks.
  287. //
  288. if ( 0 == (statChunk.flags & (CHUNK_TEXT | CHUNK_VALUE)) )
  289. {
  290. ciDebugOut(( DEB_WARN,
  291. "Filtering of docname at 0x%X produced bogus chunk (not text or value)\n",
  292. pbDocName ));
  293. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  294. if (SUCCEEDED(sc))
  295. RegisterLocale(statChunk.locale);
  296. continue;
  297. }
  298. if ( statChunk.flags & CHUNK_VALUE )
  299. {
  300. PROPVARIANT * pvar = 0;
  301. sc = _pIFilter->GetValue( &pvar );
  302. if ( SUCCEEDED(sc) )
  303. {
  304. XPtr<CStorageVariant> xvar( (CStorageVariant *)(ULONG_PTR)pvar );
  305. CFullPropSpec * pps = (CFullPropSpec *)(ULONG_PTR)(&statChunk.attribute);
  306. //
  307. // HACK #275: If we see a ROBOTS=NOINDEX tag, then bail out.
  308. //
  309. if ( IsNonIndexableProp( *pps, *pvar ) )
  310. {
  311. ciDebugOut(( DEB_WARN,
  312. "Document %x is not indexable (robots Meta-tag)\n",
  313. pbDocName ));
  314. sc = S_OK;
  315. fFilterOleProperties = FALSE;
  316. fIndexable = FALSE;
  317. break;
  318. }
  319. // Index this property twice -- once with default locale and with
  320. // the chunk locale.
  321. FilterProperty( *pvar, *pps, *_drep, docChar, statChunk.locale ); SwitchToThread();
  322. if (_lcidSystemDefault != statChunk.locale)
  323. {
  324. FilterProperty( *pvar, *pps, *_drep, docChar, _lcidSystemDefault ); SwitchToThread();
  325. }
  326. //
  327. // Only fetch next if we're done with this chunk.
  328. //
  329. if ( 0 == (statChunk.flags & CHUNK_TEXT) || !SUCCEEDED(sc) )
  330. {
  331. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  332. if (SUCCEEDED(sc))
  333. RegisterLocale(statChunk.locale);
  334. continue;
  335. }
  336. }
  337. }
  338. if ( (statChunk.flags & CHUNK_TEXT) && SUCCEEDED(sc) )
  339. {
  340. if ( _drep->PutLanguage( statChunk.locale ) &&
  341. _drep->PutPropName( *((CFullPropSpec *)&statChunk.attribute) ) )
  342. {
  343. CTextSource tsource( _pIFilter.GetPointer( ), statChunk );
  344. docChar.Add( tsource.awcBuffer + tsource.iCur,
  345. tsource.iEnd - tsource.iCur,
  346. statChunk.attribute ); SwitchToThread();
  347. _drep->PutStream( &tsource ); SwitchToThread();
  348. sc = tsource.GetStatus();
  349. }
  350. else
  351. {
  352. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  353. if (SUCCEEDED(sc))
  354. RegisterLocale(statChunk.locale);
  355. }
  356. if ( sc == FILTER_E_NO_TEXT && (statChunk.flags & CHUNK_VALUE) )
  357. sc = S_OK;
  358. }
  359. }
  360. if ( FILTER_E_EMBEDDING_UNAVAILABLE == sc )
  361. {
  362. if ( !fBadEmbeddingReport &&
  363. (_params.GetEventLogFlags()&CI_EVTLOG_FLAGS_FAILED_EMBEDDING) )
  364. {
  365. ReportFilterEmbeddingFailure( pbDocName, cbDocName );
  366. fBadEmbeddingReport = TRUE;
  367. }
  368. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  369. if (SUCCEEDED(sc))
  370. RegisterLocale(statChunk.locale);
  371. }
  372. else if ( FILTER_E_LINK_UNAVAILABLE == sc )
  373. {
  374. sc = _pIFilter->GetChunk( &statChunk ); SwitchToThread();
  375. if (SUCCEEDED(sc))
  376. RegisterLocale(statChunk.locale);
  377. }
  378. }
  379. }
  380. CATCH ( CException, e )
  381. {
  382. Status = e.GetErrorCode();
  383. ciDebugOut(( DEB_IERROR,
  384. "Exception 0x%x thrown from filter DLL while filtering docName at 0x%X\n",
  385. Status,
  386. pbDocName ));
  387. }
  388. END_CATCH
  389. if ( !NT_SUCCESS(Status) && Status != FDAEMON_E_TOOMANYFILTEREDBLOCKS )
  390. {
  391. ciDebugOut(( DEB_FORCE, "error %#x, converting to FDAEMON_E_FATALERROR and exiting process\n", Status ));
  392. THROW( CException(FDAEMON_E_FATALERROR) );
  393. }
  394. if ( Status == FDAEMON_E_TOOMANYFILTEREDBLOCKS )
  395. {
  396. Win4Assert( _drep->GetFilteredBlockCount() > ulMaxFilteredBlocks );
  397. LogOverflow( pbDocName, cbDocName );
  398. //
  399. // Force exit from the loop
  400. //
  401. sc = FILTER_E_END_OF_CHUNKS;
  402. }
  403. _pIFilter.Free( );
  404. filterCounter.TStop( (ULONG)_llFileSize );
  405. }
  406. if ( FILTER_E_END_OF_CHUNKS != sc &&
  407. FILTER_E_PARTIALLY_FILTERED != sc &&
  408. FAILED( sc ) )
  409. {
  410. ciDebugOut(( DEB_IWARN, "Filter document at 0x(%X) returned SCODE 0x%x\n",
  411. pbDocName, sc ));
  412. QUIETTHROW( CException( sc ) );
  413. }
  414. BOOL fIsStorage = FALSE;
  415. if ( fFilterOleProperties )
  416. {
  417. //
  418. // filter ole properties only if it is a docfile
  419. //
  420. COLEPropertyEnum oleProp( Document.GetPointer( ) ); SwitchToThread();
  421. fIsStorage = oleProp.IsStorage();
  422. if (fIsStorage)
  423. FilterObject( oleProp,
  424. *_drep,
  425. docChar ); SwitchToThread();
  426. }
  427. //
  428. // Store the document characterization in the property cache.
  429. // Don't bother if characterization is turned off.
  430. //
  431. if ( _params.GenerateCharacterization() )
  432. {
  433. PROPVARIANT var;
  434. WCHAR awcSummary[ CI_MAX_CHARACTERIZATION_MAX + 1 ];
  435. if ( fIndexable && docChar.HasCharacterization() )
  436. {
  437. unsigned cwcSummary = sizeof awcSummary / sizeof WCHAR;
  438. // Use the raw text in the abstract unless we defaulted
  439. // to the text filter and the file has ole properties.
  440. BOOL fUseRawText = fKnownFilter || !fIsStorage;
  441. docChar.Get( awcSummary, cwcSummary, fUseRawText ); SwitchToThread();
  442. if ( 0 == cwcSummary )
  443. {
  444. var.vt = VT_EMPTY;
  445. }
  446. else
  447. {
  448. var.vt = VT_LPWSTR;
  449. var.pwszVal = awcSummary;
  450. }
  451. }
  452. else
  453. {
  454. var.vt = VT_EMPTY;
  455. }
  456. _drep->StoreValue( psCharacterization, var ); SwitchToThread();
  457. }
  458. return _status;
  459. }
  460. //+---------------------------------------------------------------------------
  461. //
  462. // Member: CFilterDriver::LogOverflow
  463. //
  464. // Synopsis: Notifies the client that there were too many blocks in the
  465. // given document
  466. //
  467. // Arguments: [pbDocName] - Document Name
  468. // [cbDocName] - Number of bytes in the document name.
  469. //
  470. // History: 1-22-97 srikants Created
  471. //
  472. //----------------------------------------------------------------------------
  473. void CFilterDriver::LogOverflow( BYTE const * pbDocName, ULONG cbDocName )
  474. {
  475. PROPVARIANT var[2];
  476. var[0].vt = VT_VECTOR | VT_UI1;
  477. var[0].caub.cElems = cbDocName;
  478. var[0].caub.pElems = (BYTE *) pbDocName;
  479. var[1].vt = VT_UI4;
  480. var[1].ulVal = _params.GetMaxFilesizeMultiplier();
  481. SCODE sc = _pAdviseStatus->NotifyStatus( CI_NOTIFY_FILTER_TOO_MANY_BLOCKS,
  482. 2,
  483. var );
  484. if ( !SUCCEEDED(sc) )
  485. {
  486. ciDebugOut(( DEB_WARN,
  487. "Failed to report filter to many blocks event. Error 0x%X\n",
  488. sc ));
  489. }
  490. }
  491. //+---------------------------------------------------------------------------
  492. //
  493. // Member: CFilterDriver::ReportFilterEmbeddingFailure
  494. //
  495. // Synopsis: Notifies the client that there was a failure filtering an
  496. // embedding.
  497. //
  498. // Arguments: [pbDocName] - Document name
  499. // [cbDocName] - Number of bytes in the serialized document name
  500. //
  501. // History: 1-22-97 srikants Created
  502. //
  503. //----------------------------------------------------------------------------
  504. void CFilterDriver::ReportFilterEmbeddingFailure( BYTE const * pbDocName, ULONG cbDocName )
  505. {
  506. PROPVARIANT var;
  507. var.vt = VT_VECTOR | VT_UI1;
  508. var.caub.cElems = cbDocName;
  509. var.caub.pElems = (BYTE *) pbDocName;
  510. SCODE sc = _pAdviseStatus->NotifyStatus( CI_NOTIFY_FILTER_EMBEDDING_FAILURE,
  511. 1,
  512. &var );
  513. if ( !SUCCEEDED(sc) )
  514. {
  515. ciDebugOut(( DEB_WARN,
  516. "Failed to report filter embedding failure event. Error 0x%X\n",
  517. sc ));
  518. }
  519. }
  520. //+---------------------------------------------------------------------------
  521. //
  522. // Method: CFilterDriver::FilterProperty
  523. //
  524. // Arguments: [var] -- Property value
  525. // [ps] -- Property ID
  526. // [drep] -- Data repository for filtered information
  527. // [docChar] -- Characterization
  528. //
  529. // Notes: Calls to SwitchToThread() give up processor.
  530. //
  531. //----------------------------------------------------------------------------
  532. inline void CFilterDriver::FilterProperty( CStorageVariant const & var,
  533. CFullPropSpec & ps,
  534. CDataRepository & drep,
  535. CDocCharacterization & docChar,
  536. LCID locale )
  537. {
  538. //
  539. // Filter one very special property: Backwards name
  540. //
  541. if (ps == psName && var.Type( ) == VT_LPWSTR)
  542. {
  543. const WCHAR *pwszPath = var.GetLPWSTR( );
  544. int j = wcslen( pwszPath );
  545. XGrowable<WCHAR> xwcsRevName( j + 1 );
  546. int i;
  547. for ( i = 0; i < j; i++ )
  548. {
  549. xwcsRevName[i] = pwszPath[j - 1 - i];
  550. }
  551. xwcsRevName[i] = L'\0';
  552. PROPVARIANT Variant;
  553. Variant.vt = VT_LPWSTR;
  554. Variant.pwszVal = xwcsRevName.Get();
  555. //
  556. // Cast to avoid turning the PROPVARIANT into a CStorageVariant for no good
  557. // reason. Convert involves alloc/free.
  558. //
  559. CStorageVariant const * pvar = (CStorageVariant const *)(ULONG_PTR)(&Variant);
  560. FilterProperty( *pvar, psRevName, drep, docChar, 0 ); SwitchToThread();
  561. }
  562. //
  563. // Don't filter paths
  564. //
  565. if ( ps != psPath )
  566. {
  567. Win4Assert( psDirectory != ps );
  568. vqDebugOut(( DEB_FILTER, "Filter property 0x%x\n", ps.GetPropertyPropid() ));
  569. //
  570. // Save some property values for use in document characterization
  571. //
  572. docChar.Add( var, ps ); SwitchToThread();
  573. //
  574. // output the property to the data repository
  575. //
  576. drep.PutLanguage( locale );
  577. drep.PutPropName( ps );
  578. drep.PutValue( var ); SwitchToThread();
  579. // Store the value in the property cache if it should be stored there
  580. if ( !_NonStoredProps.IsNonStored( ps ) )
  581. {
  582. BOOL fStoredInCache;
  583. if ( IsNullPointerVariant( (PROPVARIANT *) & var ) )
  584. {
  585. PROPVARIANT propVar;
  586. propVar.vt = VT_EMPTY;
  587. fStoredInCache = drep.StoreValue( ps, propVar ); SwitchToThread();
  588. }
  589. else
  590. {
  591. fStoredInCache = drep.StoreValue( ps, var ); SwitchToThread();
  592. }
  593. // should we ignore this property in the future?
  594. if ( !fStoredInCache )
  595. _NonStoredProps.Add( ps );
  596. }
  597. }
  598. if ( ps == psAttrib )
  599. _attrib = var.GetUI4();
  600. } //FilterProperty
  601. //+---------------------------------------------------------------------------
  602. //
  603. // Method: CFilterDriver::FilterObject
  604. //
  605. // Arguments: [propEnum] -- iterator for properties in a file
  606. // [drep] -- pointer to the data repository for filtered
  607. // information
  608. // [docChar] -- some property values are written here so that
  609. // document characterization can happen
  610. //
  611. // Notes: Calls to SwitchToThread() give up processor.
  612. //
  613. //----------------------------------------------------------------------------
  614. void CFilterDriver::FilterObject(
  615. CPropertyEnum & propEnum,
  616. CDataRepository & drep,
  617. CDocCharacterization & docChar )
  618. {
  619. #if CIDBG == 1
  620. ULONG ulStartTime = GetTickCount();
  621. #endif
  622. CFullPropSpec ps;
  623. // Get the locale for the property set. Use that if available, else use all the
  624. // known locales to maximize the chances of retrieving a property.
  625. LCID locale;
  626. BOOL fUseKnownLocale = SUCCEEDED( propEnum.GetPropertySetLocale(locale));
  627. for ( CStorageVariant const * pvar = propEnum.Next( ps );
  628. pvar != 0;
  629. pvar = propEnum.Next( ps ) )
  630. {
  631. //
  632. // Filter each of the properties and property sets until we run
  633. // out of them. Register each property for each of the registered locales.
  634. //
  635. FilterProperty( *pvar, ps, drep, docChar, _lcidSystemDefault ); SwitchToThread();
  636. if (fUseKnownLocale)
  637. {
  638. ciDebugOut((DEB_FILTER, "Propset locale is 0x%x\n", locale));
  639. if (locale != _lcidSystemDefault)
  640. {
  641. FilterProperty( *pvar, ps, drep, docChar, locale ); SwitchToThread();
  642. }
  643. }
  644. else
  645. {
  646. // We want to index this property with all the known locales only if it
  647. // is a "string" type. For non-string types, locale doesn't matter
  648. VARTYPE vt = pvar->Type() | VT_VECTOR; // enables check with or without vt_vector bit
  649. if (vt == (VT_VECTOR | VT_LPWSTR) ||
  650. vt == (VT_VECTOR | VT_BSTR) ||
  651. vt == (VT_VECTOR | VT_LPSTR)
  652. )
  653. {
  654. int iMin = min(_cLCIDs, cLCIDMax);
  655. for (int i = 0; i < iMin; i++)
  656. {
  657. ciDebugOut(( DEB_ITRACE, "Filtering property 0x%x with locale 0x%x\n",
  658. pvar, _alcidSeen[i] ));
  659. if (_alcidSeen[i] != _lcidSystemDefault)
  660. {
  661. FilterProperty( *pvar, ps, drep, docChar, _alcidSeen[i] ); SwitchToThread();
  662. }
  663. }
  664. }
  665. }
  666. }
  667. #if CIDBG == 1
  668. ULONG ulEndTime = GetTickCount();
  669. ciDebugOut (( DEB_USER1,
  670. "Filtering properties took %d ms\n",
  671. ulEndTime-ulStartTime ));
  672. #endif
  673. }
  674. //+-------------------------------------------------------------------------
  675. //
  676. // Member: CFilterDriver::FilterSecurity, private
  677. //
  678. // Synopsis: Store the security descriptor and map to an SDID
  679. //
  680. // Arguments: [wcsFileName] - file name (used only for error reporting)
  681. // [oplock] - oplock held on the file
  682. // [drep] - data repository
  683. //
  684. // Notes: using ACCESS_SYSTEM_SECURITY AccessMode will cause an
  685. // oplock break, so we should call FilterSecurity before
  686. // taking the oplock.
  687. //
  688. // Notes: Calls to SwitchToThread() give up processor.
  689. //
  690. //--------------------------------------------------------------------------
  691. void CFilterDriver::FilterSecurity(
  692. ICiCOpenedDoc *Document,
  693. CDataRepository & drep )
  694. {
  695. BOOL fCouldStore = FALSE;
  696. SCODE sc;
  697. //
  698. // Initial guess about security descriptor size
  699. //
  700. const cInitSD = 512;
  701. BYTE abBuffer[cInitSD];
  702. ULONG cbSD = cInitSD;
  703. BYTE * pbBuffer = abBuffer;
  704. XPtr<SECURITY_DESCRIPTOR> xSD;
  705. while (TRUE)
  706. {
  707. //
  708. // Attempt to get the security descriptor into the buffer
  709. //
  710. sc = Document->GetSecurity( pbBuffer, &cbSD ); SwitchToThread();
  711. //
  712. // If we don't need to resize, then exit while
  713. //
  714. if (SUCCEEDED( sc ) || CI_E_BUFFERTOOSMALL != sc)
  715. {
  716. break;
  717. }
  718. //
  719. // Allocate a bigger buffer and retrieve the security information into
  720. // it.
  721. //
  722. xSD.Free();
  723. xSD.Set( (SECURITY_DESCRIPTOR *) new BYTE [cbSD] );
  724. pbBuffer = (BYTE *) xSD.GetPointer();
  725. }
  726. if ( !SUCCEEDED( sc ) || 0 == cbSD )
  727. {
  728. //
  729. // Store NULL security descriptor for the file
  730. //
  731. fCouldStore = drep.StoreSecurity( 0, 0 ); SwitchToThread();
  732. }
  733. else
  734. {
  735. // Now store away the security descriptor and map to an SDID
  736. fCouldStore =
  737. drep.StoreSecurity( pbBuffer, cbSD ); SwitchToThread();
  738. }
  739. if (! fCouldStore)
  740. {
  741. ciDebugOut(( DEB_ERROR, "Failed to store security info\n" ));
  742. }
  743. }
  744. //+---------------------------------------------------------------------------
  745. //
  746. // Function: CFilterDriver::RegisterLocale, private
  747. //
  748. // Synopsis: Registers a locale
  749. //
  750. // Arguments: [locale] - the locale
  751. //
  752. // Returns: none
  753. //
  754. // History: 27-Jan-99 KrishnaN Created
  755. //
  756. //----------------------------------------------------------------------------
  757. void CFilterDriver::RegisterLocale(LCID locale)
  758. {
  759. // Ensure that the locale wasn't already registered
  760. int iMin = min(_cLCIDs, cLCIDMax);
  761. for (int i = 0; i < iMin; i++)
  762. {
  763. if (locale == _alcidSeen[i])
  764. return;
  765. }
  766. _alcidSeen[_cLCIDs % cLCIDMax] = locale;
  767. ciDebugOut(( DEB_ITRACE, "Registered %d locale 0x%x\n", _cLCIDs+1, locale));
  768. _cLCIDs++;
  769. }
  770. //+---------------------------------------------------------------------------
  771. //
  772. // Function: IsNonIndexableProp, private
  773. //
  774. // Synopsis: Looks for ROBOTS=NOINDEX tag
  775. //
  776. // Arguments: [fps] -- Property
  777. // [var] -- Value
  778. //
  779. // Returns: TRUE if property [fps] == ROBOTS and value [var] == NOINDEX
  780. //
  781. // History: 7-Oct-97 KyleP Stole from Site Server
  782. //
  783. // Notes: I based my changes to this code in information found at:
  784. // http://info.webcrawler.com/mak/projects/robots/meta-user.html
  785. //
  786. //----------------------------------------------------------------------------
  787. BOOL IsNonIndexableProp( CFullPropSpec const & fps, PROPVARIANT const & var )
  788. {
  789. static GUID guidHTMLMeta = HTMLMetaGuid;
  790. BOOL fIsNonIndexable = FALSE;
  791. if ( fps.IsPropertyName() &&
  792. fps.GetPropSet() == guidHTMLMeta &&
  793. _wcsicmp( fps.GetPropertyName(), L"ROBOTS" ) == 0 &&
  794. (var.vt == VT_LPWSTR || var.vt == VT_BSTR) &&
  795. 0 != var.pwszVal )
  796. {
  797. //
  798. // Convert to lowercase to do wcsstr search.
  799. //
  800. unsigned cc = wcslen( var.pwszVal ) + 1;
  801. XGrowable<WCHAR> xwcsTemp( cc );
  802. RtlCopyMemory( xwcsTemp.Get(), var.pwszVal, cc * sizeof(WCHAR) );
  803. _wcslwr( xwcsTemp.Get() );
  804. //
  805. // Check "noindex"
  806. //
  807. fIsNonIndexable = wcsstr( xwcsTemp.Get(), L"noindex") != 0;
  808. //
  809. // Check "all"
  810. //
  811. if ( !fIsNonIndexable )
  812. fIsNonIndexable = wcsstr( xwcsTemp.Get(), L"none") != 0;
  813. }
  814. return fIsNonIndexable;
  815. }