Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

839 lines
26 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Copyright (C) Microsoft Corporation, 1996 - 2001.
  4. //
  5. // File: docsum.cxx
  6. //
  7. // Contents: document summary helper class
  8. //
  9. // Classes: CDocCharacterization
  10. //
  11. // History: 12-Jan-96 dlee Created
  12. //
  13. // Todo: try to end summary on sentence or word boundary.
  14. //
  15. //----------------------------------------------------------------------------
  16. #include <pch.cxx>
  17. #pragma hdrstop
  18. #include <propspec.hxx>
  19. #include <ciguid.hxx>
  20. #include "docsum.hxx"
  21. const WCHAR wcParagraph = 0x2029;
  22. const WCHAR *pwcDescription = L"DESCRIPTION";
  23. static CFullPropSpec psRevName( guidQuery, DISPID_QUERY_REVNAME );
  24. static CFullPropSpec psName( guidStorage, PID_STG_NAME );
  25. const GUID guidCharacterization = PSGUID_CHARACTERIZATION;
  26. const GUID guidHTMLUrl = HTMLUrl;
  27. const GUID guidHTMLComment = HTMLComment;
  28. const GUID guidHTMLScript = HTMLScriptGuid;
  29. static CFullPropSpec psCharacterization( guidCharacterization,
  30. propidCharacterization );
  31. const GUID guidDocSummary = defGuidDocSummary;
  32. static CFullPropSpec psTitle( guidDocSummary, propidTitle );
  33. static const GUID guidHtmlInformation = defGuidHtmlInformation;
  34. static GUID const guidMeta = { 0xd1b5d3f0,
  35. 0xc0b3, 0x11cf,
  36. 0x9a, 0x92, 0x00, 0xa0,
  37. 0xc9, 0x08, 0xdb, 0xf1 };
  38. inline unsigned DocSumScore( PROPID propid )
  39. {
  40. switch ( propid )
  41. {
  42. case propidTitle :
  43. return scoreTitle;
  44. case propidSubject :
  45. return scoreSubject;
  46. case propidKeywords :
  47. return scoreKeywords;
  48. case propidComments :
  49. return scoreComments;
  50. case propidTemplate :
  51. case propidLastAuthor :
  52. case propidRevNumber :
  53. case propidAppName :
  54. case propidAuthor :
  55. return scoreIgnore;
  56. }
  57. return scoreIfNothingElse;
  58. } //DocSumScore
  59. inline unsigned HtmlPropScore( PROPID propid )
  60. {
  61. switch ( propid )
  62. {
  63. case PID_HEADING_1 :
  64. return scoreHeader1;
  65. case PID_HEADING_2 :
  66. return scoreHeader2;
  67. case PID_HEADING_3 :
  68. return scoreHeader3;
  69. case PID_HEADING_4 :
  70. return scoreHeader4;
  71. case PID_HEADING_5 :
  72. return scoreHeader5;
  73. case PID_HEADING_6 :
  74. return scoreHeader6;
  75. }
  76. return scoreIgnore;
  77. } //HtmlPropScore
  78. //+-------------------------------------------------------------------------
  79. //
  80. // Function: StringToClsid
  81. //
  82. // Synopsis: Convert string containing CLSID to CLSID.
  83. // The string must be of the form:
  84. // {d1b5d3f0-c0b3-11cf-9a92-00a0c908dbf1}
  85. //
  86. // Arguments: [wszClass] -- string containg CLSID
  87. // [guidClass] -- output guid
  88. //
  89. //--------------------------------------------------------------------------
  90. void StringToClsid( WCHAR *wszClass, GUID& guidClass )
  91. {
  92. wszClass[9] = 0;
  93. guidClass.Data1 = wcstoul( &wszClass[1], 0, 16 );
  94. wszClass[14] = 0;
  95. guidClass.Data2 = (USHORT)wcstoul( &wszClass[10], 0, 16 );
  96. wszClass[19] = 0;
  97. guidClass.Data3 = (USHORT)wcstoul( &wszClass[15], 0, 16 );
  98. WCHAR wc = wszClass[22];
  99. wszClass[22] = 0;
  100. guidClass.Data4[0] = (unsigned char)wcstoul( &wszClass[20], 0, 16 );
  101. wszClass[22] = wc;
  102. wszClass[24] = 0;
  103. guidClass.Data4[1] = (unsigned char)wcstoul( &wszClass[22], 0, 16 );
  104. for ( int i = 0; i < 6; i++ )
  105. {
  106. wc = wszClass[27+i*2];
  107. wszClass[27+i*2] = 0;
  108. guidClass.Data4[2+i] = (unsigned char)wcstoul( &wszClass[25+i*2], 0, 16 );
  109. wszClass[27+i*2] = wc;
  110. }
  111. }
  112. //+---------------------------------------------------------------------------
  113. //
  114. // Method: CDocCharacterization::CDocCharacterization, public
  115. //
  116. // Synopsis: constructor
  117. //
  118. // Arguments: [cwcAtMost] -- Max size of characterization. 0 --> Don't
  119. // generate one.
  120. //
  121. // History: 12-Jan-96 dlee Created
  122. // 20-Jun-97 KyleP Make 0 --> no characterization
  123. //
  124. //----------------------------------------------------------------------------
  125. CDocCharacterization::CDocCharacterization( unsigned cwcAtMost )
  126. : _queue( FALSE, cwcAtMost ),
  127. _scoreRawText( scoreRawText ),
  128. _cwcIgnoreBuf( 0 ),
  129. _fMetaDescriptionAdded( FALSE )
  130. {
  131. _fIsGenerating = (0 != cwcAtMost);
  132. }
  133. //+---------------------------------------------------------------------------
  134. //
  135. // Method: CDocCharacterization::~CDocCharacterization, public
  136. //
  137. // Synopsis: destructor
  138. //
  139. // History: 12-Jan-96 dlee Created
  140. //
  141. //----------------------------------------------------------------------------
  142. CDocCharacterization::~CDocCharacterization()
  143. {
  144. // clean up anything left in the queue -- it should be empty, except for
  145. // the exception case.
  146. CSummaryText text;
  147. while ( _queue.DeQueue( text ) )
  148. delete [] text.GetText();
  149. } //~CDocCharacterization
  150. //+---------------------------------------------------------------------------
  151. //
  152. // Method: CDocCharacterization::AddCleanedString, private
  153. //
  154. // Synopsis: Adds a noise-free string to the queue if it belongs
  155. //
  156. // Arguments: [pwcSummary] -- string to add to the summary
  157. // [cwcSummary] -- # of characters in the string
  158. // [utility] -- score for the string
  159. // [fDeliniate] -- if TRUE, a termination is added to the string
  160. //
  161. // History: 12-Jan-96 dlee Created
  162. //
  163. //----------------------------------------------------------------------------
  164. BOOL CDocCharacterization::AddCleanedString(
  165. const WCHAR * pwcSummary,
  166. unsigned cwcSummary,
  167. unsigned utility,
  168. BOOL fDeliniate )
  169. {
  170. Win4Assert( _fIsGenerating );
  171. CSummaryText text( (WCHAR *) pwcSummary,
  172. cwcSummary + ( fDeliniate ? cwcSummarySpace : 0 ),
  173. utility );
  174. unsigned cDeQueue = 0;
  175. // Check if the item will make it on the queue
  176. if ( _queue.ShouldEnQueue( text, cDeQueue ) )
  177. {
  178. //
  179. // Don't add duplicates. If the duplicate has a worse score than
  180. // the new item, remove it and add the new item.
  181. //
  182. CSummaryText testText;
  183. for ( unsigned x = 0; fDeliniate && x < _queue.Count(); x++ )
  184. {
  185. CSummaryText & testText = _queue.Peek( x );
  186. if ( testText.isSame( pwcSummary,
  187. __min( cwcSummary, testText.GetSize() ) ) )
  188. {
  189. if ( testText.GetUtility() < utility )
  190. {
  191. delete [] testText.GetText();
  192. _queue.Remove( x );
  193. // don't have to dequeue anymore if the old duplicate
  194. // is large enough
  195. BOOL f = _queue.ShouldEnQueue( text, cDeQueue );
  196. Win4Assert( f );
  197. break;
  198. }
  199. else
  200. {
  201. return TRUE;
  202. }
  203. }
  204. }
  205. // need to remove the worst item to make room for this one?
  206. for ( ; cDeQueue > 0; cDeQueue-- )
  207. {
  208. Win4Assert( 0 != _queue.Count() );
  209. CSummaryText temp;
  210. _queue.DeQueue( temp );
  211. delete [] temp.GetText();
  212. }
  213. Win4Assert( _queue.CurrentSize() <= _queue.MaxTotalSize() );
  214. // make a copy of the summary string and put in in the queue
  215. unsigned cwc = cwcSummary + ( fDeliniate ? cwcSummarySpace : 0 );
  216. XArray<WCHAR> xCopy( cwc );
  217. RtlCopyMemory( xCopy.GetPointer(),
  218. pwcSummary,
  219. cwcSummary * sizeof WCHAR );
  220. if ( fDeliniate )
  221. RtlCopyMemory( xCopy.GetPointer() + cwcSummary,
  222. awcSummarySpace,
  223. cwcSummarySpace * sizeof WCHAR );
  224. text.SetText( xCopy.GetPointer() );
  225. _queue.EnQueue( text );
  226. Win4Assert( _queue.CurrentSize() <= _queue.MaxTotalSize() );
  227. // if the EnQueue doesn't throw, the queue owns the memory
  228. xCopy.Acquire();
  229. return TRUE;
  230. }
  231. return FALSE;
  232. } //_AddCleanedString
  233. //+---------------------------------------------------------------------------
  234. //
  235. // Method: CDocCharacterization::YankNoise, private
  236. //
  237. // Synopsis: Creates a new string that has "noise" stripped out.
  238. //
  239. // Arguments: [pwcIn] -- string to add to the summary
  240. // [pwcOut] -- resulting cleaned string
  241. // [cwc] -- in/out number of characters
  242. //
  243. // History: 12-Jan-96 dlee Created
  244. //
  245. //----------------------------------------------------------------------------
  246. const WORD C1_OK = ( C1_DIGIT | C1_SPACE | C1_ALPHA );
  247. const WORD C1_CP = ( C1_CNTRL | C1_PUNCT );
  248. const WORD C1_CSP = ( C1_CNTRL | C1_SPACE | C1_PUNCT );
  249. inline BOOL isCP( WORD wC1 ) { return 0 != (C1_CP & wC1); }
  250. inline BOOL isCSP( WORD wC1 ) { return 0 != (C1_CSP & wC1); }
  251. inline BOOL isOK( WORD wC1 ) { return 0 != (C1_OK & wC1); }
  252. inline BOOL isDefined( WORD wC1 ) { return 0 != (0x200 & wC1); }
  253. inline BOOL isSpace( WORD wC1 ) { return 0 != (C1_SPACE & wC1); }
  254. inline BOOL isCntrl( WORD wC1 ) { return 0 != (C1_CNTRL & wC1); }
  255. inline BOOL isPunct( WORD wC1 ) { return 0 != (C1_PUNCT & wC1); }
  256. // For example: a Japanese vowel elongating symbol
  257. inline BOOL isDiacritic( WORD wC3 ) { return 0 != (C3_DIACRITIC & wC3 ); }
  258. void CDocCharacterization::YankNoise(
  259. const WCHAR * pwcIn,
  260. WCHAR * pwcOut,
  261. unsigned & cwc )
  262. {
  263. Win4Assert( _fIsGenerating );
  264. WORD awType[ cwcMaxRawUsed ];
  265. Win4Assert( cwc <= cwcMaxRawUsed );
  266. if ( GetStringTypeW( CT_CTYPE1, pwcIn, cwc, awType ) )
  267. {
  268. // eat any leading white space or punctuation
  269. unsigned iIn = 0;
  270. while ( ( iIn < cwc ) &&
  271. ( isCSP( awType[ iIn ] ) ) )
  272. iIn++;
  273. // make it look like the previous line ended with a CR/LF
  274. WORD wPrev = C1_CNTRL;
  275. unsigned iOut = 0;
  276. // filter the text, stripping redundant punctuation and white space
  277. while ( ( iIn < cwc ) &&
  278. ( iOut < cwcMaxRawUsed ) )
  279. {
  280. if ( ! ( isSpace( wPrev ) && isSpace( awType[ iIn ] ) ) )
  281. {
  282. // convert control characters and wcParagraph to ' '
  283. if ( ( isCntrl( awType[ iIn ] ) ) ||
  284. ( wcParagraph == pwcIn[ iIn ] ) )
  285. pwcOut[ iOut++ ] = L' ';
  286. else if ( isOK( awType[ iIn ] ) )
  287. pwcOut[ iOut++ ] = pwcIn[ iIn ];
  288. else if ( ( isPunct( awType[ iIn ] ) ) &&
  289. ( !isCP( wPrev ) ) )
  290. pwcOut[ iOut++ ] = pwcIn[ iIn ];
  291. else
  292. {
  293. if ( isDefined( awType[ iIn ] ) )
  294. {
  295. WCHAR pwszSingleChar[2];
  296. pwszSingleChar[0] = pwcIn[iIn];
  297. pwszSingleChar[1] = L'0';
  298. WORD wType;
  299. GetStringTypeW( CT_CTYPE3, pwszSingleChar, 1, &wType );
  300. if ( isDiacritic( wType ) )
  301. pwcOut[ iOut++ ] = pwcIn[ iIn ];
  302. }
  303. }
  304. }
  305. wPrev = awType[ iIn++ ];
  306. }
  307. // eat any trailing spaces
  308. while ( iOut > 0 && L' ' == pwcOut[iOut-1] )
  309. iOut--;
  310. cwc = iOut;
  311. }
  312. else
  313. {
  314. cwc = 0;
  315. }
  316. } //_YankNoise
  317. //+---------------------------------------------------------------------------
  318. //
  319. // Method: CDocCharacterization::Add, private
  320. //
  321. // Synopsis: Preps and adds a string to the queue.
  322. //
  323. // Arguments: [pwcSummary] -- string to add to the summary
  324. // [cwcSummary] -- # characters in the string
  325. // [utility] -- score for the string, higher is better
  326. // [fYankNoise] -- if TRUE, noise is removed from the string
  327. //
  328. // Returns: FALSE if the item was rejected from a full queue because
  329. // it was worse than anything in the queue, TRUE otherwise.
  330. //
  331. // History: 12-Jan-96 dlee Created
  332. //
  333. //----------------------------------------------------------------------------
  334. const unsigned cwcTextAtATime = 25;
  335. BOOL CDocCharacterization::Add(
  336. const WCHAR * pwcSummary,
  337. unsigned cwcSummary,
  338. unsigned utility,
  339. BOOL fYankNoise )
  340. {
  341. Win4Assert( _fIsGenerating );
  342. if ( scoreIgnore == utility )
  343. return FALSE;
  344. if ( 0 != cwcSummary )
  345. {
  346. unsigned cwcBuf = __min( cwcSummary, cwcMaxRawUsed );
  347. WCHAR awcBuf[ cwcMaxRawUsed ];
  348. if ( fYankNoise )
  349. {
  350. YankNoise( pwcSummary, awcBuf, cwcBuf );
  351. // no text left after removal of noise?
  352. if ( 0 == cwcBuf )
  353. return TRUE;
  354. // something we should ignore (the raw text version of the title)?
  355. if ( ( _cwcIgnoreBuf == cwcSummarySpace ) &&
  356. ( !wcsncmp( awcBuf, _awcIgnoreBuf, _cwcIgnoreBuf ) ) )
  357. return TRUE;
  358. }
  359. else
  360. {
  361. RtlCopyMemory( awcBuf, pwcSummary, cwcBuf * sizeof WCHAR );
  362. }
  363. // if it looks like it's one sentence, send it all at once
  364. if ( ( utility > scoreRawText ) ||
  365. ( cwcBuf <= cwcMaxIgnoreBuf ) )
  366. {
  367. return AddCleanedString( awcBuf, cwcBuf, utility, fYankNoise );
  368. }
  369. else
  370. {
  371. // large block of text, so send a little at a time to the queue.
  372. for ( unsigned owc = 0; owc < cwcBuf; )
  373. {
  374. unsigned cwcNow = __min( cwcBuf - owc, cwcTextAtATime );
  375. if ( !AddCleanedString( awcBuf + owc,
  376. cwcNow,
  377. utility--,
  378. FALSE ) )
  379. {
  380. return FALSE;
  381. }
  382. owc += cwcNow;
  383. }
  384. }
  385. }
  386. return TRUE;
  387. } //_Add
  388. //+---------------------------------------------------------------------------
  389. //
  390. // Method: CDocCharacterization::AddRawText, private
  391. //
  392. // Synopsis: Adds some text to the queue with a utility of raw text.
  393. //
  394. // Arguments: [pwcRawText] -- string to add to the summary
  395. // [cwcText] -- # characters in the string
  396. //
  397. // History: 12-Jan-96 dlee Created
  398. //
  399. //----------------------------------------------------------------------------
  400. void CDocCharacterization::AddRawText(
  401. const WCHAR * pwcRawText,
  402. unsigned cwcText )
  403. {
  404. Win4Assert( _fIsGenerating );
  405. Win4Assert( _queue.CurrentSize() <= _queue.MaxTotalSize() );
  406. if ( 0 != _scoreRawText )
  407. {
  408. if ( Add( pwcRawText, cwcText, _scoreRawText ) )
  409. _scoreRawText -= cwcText / cwcTextAtATime;
  410. else
  411. _scoreRawText = 0;
  412. }
  413. } //_AddRawText
  414. //+---------------------------------------------------------------------------
  415. //
  416. // Method: CDocCharacterization::RemoveLowScoringItems, private
  417. //
  418. // Synopsis: Removes low-scoring items from the queue
  419. //
  420. // Arguments: [iLimit] -- items scoring <= iLimit are removed
  421. //
  422. // History: 29-Aug-96 dlee Created
  423. //
  424. //----------------------------------------------------------------------------
  425. void CDocCharacterization::RemoveLowScoringItems(
  426. unsigned iLimit )
  427. {
  428. Win4Assert( _fIsGenerating );
  429. while ( 0 != _queue.Count() )
  430. {
  431. CSummaryText &top = _queue.PeekTop();
  432. if ( top.GetUtility() <= iLimit )
  433. {
  434. CSummaryText text;
  435. _queue.DeQueue( text );
  436. delete [] text.GetText();
  437. }
  438. else
  439. {
  440. break;
  441. }
  442. }
  443. } //_RemoveLowScoringItems
  444. //+---------------------------------------------------------------------------
  445. //
  446. // Method: CDocCharacterization::Get, public
  447. //
  448. // Synopsis: Returns the summary in one string.
  449. //
  450. // Arguments: [awcSummary] -- output string
  451. // [cwcSummary] -- in/out the length of the string
  452. // [fUseRawText] -- TRUE if raw text should be included
  453. //
  454. // History: 12-Jan-96 dlee Created
  455. //
  456. //----------------------------------------------------------------------------
  457. void CDocCharacterization::Get(
  458. WCHAR * awcSummary,
  459. unsigned & cwcSummary,
  460. BOOL fUseRawText )
  461. {
  462. Win4Assert( _fIsGenerating );
  463. // Caller should give us a buffer large enough to hold the
  464. // characterization they requested and a null termination.
  465. Win4Assert( _queue.CurrentSize() <= _queue.MaxTotalSize() );
  466. Win4Assert( cwcSummary > _queue.MaxTotalSize() );
  467. Win4Assert( cwcSummary > _queue.CurrentSize() );
  468. // If we shouldn't include raw text, pop low-scoring items off the
  469. // top of the queue queue.
  470. if ( !fUseRawText )
  471. RemoveLowScoringItems( scoreRawText );
  472. // If a meta description was added, there's no point in tacking on
  473. // additional text in the abstract.
  474. if ( _fMetaDescriptionAdded )
  475. {
  476. Win4Assert( cwcSummary > _awcMetaDescription.Count() );
  477. RtlCopyMemory( awcSummary,
  478. _awcMetaDescription.GetPointer(),
  479. _awcMetaDescription.SizeOf() );
  480. cwcSummary = _awcMetaDescription.Count();
  481. awcSummary[ cwcSummary ] = 0;
  482. }
  483. else
  484. {
  485. cwcSummary = _queue.CurrentSize();
  486. // The item on the top of the queue is the least useful item, so
  487. // we have to invert the order.
  488. WCHAR *pwcSummary = awcSummary + cwcSummary;
  489. *pwcSummary = 0;
  490. CSummaryText text;
  491. while ( _queue.DeQueue( text ) )
  492. {
  493. pwcSummary -= text.GetSize();
  494. RtlCopyMemory( pwcSummary,
  495. text.GetText(),
  496. text.GetSize() * sizeof WCHAR );
  497. delete [] text.GetText();
  498. }
  499. Win4Assert( pwcSummary == awcSummary );
  500. }
  501. } //GetSummary
  502. //+---------------------------------------------------------------------------
  503. //
  504. // Method: CDocCharacterization::Ignore, private
  505. //
  506. // Synopsis: Tells the class to ignore this string in the generation
  507. // of a summary. This is probably the "title" of an html
  508. // document, which is stored in a separate property, and it
  509. // would be redundant to store it twice.
  510. //
  511. // Arguments: [pwcIgnore] -- string to ignore
  512. // [cwcText] -- # characters in the string
  513. //
  514. // History: 12-Jan-96 dlee Created
  515. //
  516. //----------------------------------------------------------------------------
  517. void CDocCharacterization::Ignore(
  518. const WCHAR * pwcIgnore,
  519. unsigned cwcText )
  520. {
  521. Win4Assert( _fIsGenerating );
  522. // clean and save the string to ignore
  523. _cwcIgnoreBuf = __min( cwcText, cwcMaxIgnoreBuf );
  524. YankNoise( pwcIgnore, _awcIgnoreBuf, _cwcIgnoreBuf );
  525. // remove any instance of the string in the queue
  526. unsigned cwcTest = _cwcIgnoreBuf + cwcSummarySpace;
  527. for ( unsigned x = 0; x < _queue.Count(); x++ )
  528. {
  529. CSummaryText &testText = _queue.Peek( x );
  530. if ( ( cwcTest == testText.GetSize() ) &&
  531. ( testText.isSame( _awcIgnoreBuf, _cwcIgnoreBuf ) ) )
  532. {
  533. delete [] testText.GetText();
  534. _queue.Remove( x );
  535. break;
  536. }
  537. }
  538. } //_Ignore
  539. //+---------------------------------------------------------------------------
  540. //
  541. // Method: CDocCharacterization::Add, public
  542. //
  543. // Synopsis: Adds a string value to the queue if appropriate, based on the
  544. // propspec and the nature of the string.
  545. //
  546. // Arguments: [pwcSummary] -- string to ignore
  547. // [cwcSummary] -- # characters in the string
  548. //
  549. // History: 12-Jan-96 dlee Created
  550. //
  551. //----------------------------------------------------------------------------
  552. void CDocCharacterization::Add( CStorageVariant const & var,
  553. CFullPropSpec & ps )
  554. {
  555. // if the meta description has been added already, we're done.
  556. if ( _fMetaDescriptionAdded || !_fIsGenerating )
  557. return;
  558. #if CIDBG == 1
  559. ciDebugOut(( DEB_DOCSUM, "docchar::Add variant type %#x\n", var.vt ));
  560. if ( VT_LPWSTR == var.vt )
  561. ciDebugOut(( DEB_DOCSUM, " wstr: '%ws'\n", var.pwszVal ));
  562. else if ( VT_LPSTR == var.vt )
  563. ciDebugOut(( DEB_DOCSUM, " str: '%s'\n", var.pszVal ));
  564. ciDebugOut(( DEB_DOCSUM,
  565. " guid {%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}\n",
  566. ps.GetPropSet().Data1,
  567. ps.GetPropSet().Data2,
  568. ps.GetPropSet().Data3,
  569. ps.GetPropSet().Data4[0], ps.GetPropSet().Data4[1],
  570. ps.GetPropSet().Data4[2], ps.GetPropSet().Data4[3],
  571. ps.GetPropSet().Data4[4], ps.GetPropSet().Data4[5],
  572. ps.GetPropSet().Data4[6], ps.GetPropSet().Data4[7] ));
  573. if ( ps.IsPropertyName() )
  574. ciDebugOut(( DEB_DOCSUM, " string: '%ws'\n", ps.GetPropertyName() ));
  575. else
  576. ciDebugOut(( DEB_DOCSUM, " id: '%d'\n", ps.GetPropertyPropid() ));
  577. #endif // CIDBG
  578. // title is added as plain text and _Ignore() is called then.
  579. if ( ps != psTitle )
  580. {
  581. if ( VT_LPWSTR == var.Type() )
  582. {
  583. // Don't put file names or meta properties in abstracts.
  584. if ( ( psRevName != ps ) &&
  585. ( psName != ps ) )
  586. {
  587. if ( guidMeta == ps.GetPropSet() )
  588. {
  589. // This is the ideal string, based on html spec.
  590. // Toss all other meta property values.
  591. if ( ( ps.IsPropertyName() ) &&
  592. ( 0 == _wcsicmp( ps.GetPropertyName(), pwcDescription ) ) )
  593. {
  594. _fMetaDescriptionAdded = TRUE;
  595. // make a copy of the meta description
  596. if ( 0 == var.GetLPWSTR() )
  597. {
  598. _awcMetaDescription.Init( 0 );
  599. }
  600. else
  601. {
  602. unsigned cwc = __min( wcslen( var.GetLPWSTR() ),
  603. _queue.MaxTotalSize() );
  604. _awcMetaDescription.Init( cwc );
  605. RtlCopyMemory( _awcMetaDescription.GetPointer(),
  606. var.GetLPWSTR(),
  607. _awcMetaDescription.SizeOf() );
  608. }
  609. // toss everything in the queue
  610. CSummaryText text;
  611. while ( _queue.DeQueue( text ) )
  612. delete [] text.GetText();
  613. }
  614. }
  615. else if ( 0 != var.GetLPWSTR() &&
  616. ( guidDocSummary == ps.GetPropSet() ) )
  617. {
  618. Win4Assert( ps.IsPropertyPropid() );
  619. Add( var.GetLPWSTR(),
  620. wcslen( var.GetLPWSTR() ),
  621. DocSumScore( ps.GetPropertyPropid() ) );
  622. }
  623. else
  624. {
  625. if ( 0 != var.GetLPWSTR() )
  626. Add( var.GetLPWSTR(),
  627. wcslen( var.GetLPWSTR() ),
  628. scoreOtherProperty );
  629. }
  630. }
  631. } // if VT_LPWSTR
  632. } // ps != psTitle
  633. } //Add
  634. //+---------------------------------------------------------------------------
  635. //
  636. // Method: CDocCharacterization::Add, public
  637. //
  638. // Synopsis: Adds a string to the queue if appropriate, based on the
  639. // propspec and the nature of the string.
  640. //
  641. // Arguments: [pwcSummary] -- string to ignore
  642. // [cwcSummary] -- # characters in the string
  643. // [ps] -- Property being added
  644. //
  645. // History: 12-Jan-96 dlee Created
  646. //
  647. //----------------------------------------------------------------------------
  648. void CDocCharacterization::Add(
  649. const WCHAR * pwcSummary,
  650. unsigned cwcSummary,
  651. FULLPROPSPEC & ps )
  652. {
  653. Win4Assert( _queue.CurrentSize() <= _queue.MaxTotalSize() );
  654. // if the meta description has been added already, we're done.
  655. if ( _fMetaDescriptionAdded || !_fIsGenerating )
  656. return;
  657. #if CIDBG == 1
  658. ciDebugOut(( DEB_DOCSUM, "docchar::Add: '%.*ws'\n", cwcSummary, pwcSummary ));
  659. ciDebugOut(( DEB_DOCSUM,
  660. " guid {%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}\n",
  661. ps.guidPropSet.Data1,
  662. ps.guidPropSet.Data2,
  663. ps.guidPropSet.Data3,
  664. ps.guidPropSet.Data4[0], ps.guidPropSet.Data4[1],
  665. ps.guidPropSet.Data4[2], ps.guidPropSet.Data4[3],
  666. ps.guidPropSet.Data4[4], ps.guidPropSet.Data4[5],
  667. ps.guidPropSet.Data4[6], ps.guidPropSet.Data4[7] ));
  668. if ( PRSPEC_LPWSTR == ps.psProperty.ulKind )
  669. ciDebugOut(( DEB_DOCSUM, " string: '%ws'\n", ps.psProperty.lpwstr ));
  670. else
  671. ciDebugOut(( DEB_DOCSUM, " id: '%d'\n", ps.psProperty.propid ));
  672. #endif // CIDBG
  673. // add raw text unless it's the title
  674. if ( guidHtmlInformation == ps.guidPropSet )
  675. {
  676. Add( pwcSummary,
  677. cwcSummary,
  678. HtmlPropScore( ps.psProperty.propid ) );
  679. }
  680. else if ( guidHTMLUrl == ps.guidPropSet ||
  681. guidHTMLComment == ps.guidPropSet )
  682. {
  683. // just ignore it
  684. }
  685. else if ( guidHTMLScript == ps.guidPropSet )
  686. {
  687. // note: the current html filter doesn't emit scripts, but just
  688. // in case that changes this case is checked.
  689. ciDebugOut(( DEB_DOCSUM, "ignoring script\n" ));
  690. }
  691. else if ( psTitle == * ( (CFullPropSpec *)&ps ) )
  692. {
  693. Ignore( pwcSummary, cwcSummary );
  694. }
  695. else
  696. {
  697. AddRawText( pwcSummary, cwcSummary );
  698. }
  699. Win4Assert( _queue.CurrentSize() <= _queue.MaxTotalSize() );
  700. } //Add