Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

203 lines
5.9 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Copyright (C) 1996, Microsoft Corporation
  4. //
  5. // File: docsum.hxx
  6. //
  7. // Contents: document summary helper classes
  8. //
  9. // Classes: CDocCharacterization, CSummaryText
  10. //
  11. // History: 12-Jan-96 dlee Created
  12. //
  13. //----------------------------------------------------------------------------
  14. #pragma once
  15. #include <tpriq.hxx>
  16. extern const GUID guidDocSummary;
  17. extern const GUID guidCharacterization;
  18. // this is the ole 2 / ms office summary guid its property ids
  19. #define defGuidDocSummary { 0xf29f85e0, \
  20. 0x4ff9, 0x1068, \
  21. 0xab, 0x91, 0x08, 0x00, \
  22. 0x2b, 0x27, 0xb3, 0xd9 }
  23. const PROPID propidTitle = 2;
  24. const PROPID propidSubject = 3;
  25. const PROPID propidAuthor = 4;
  26. const PROPID propidKeywords = 5;
  27. const PROPID propidComments = 6;
  28. const PROPID propidTemplate = 7;
  29. const PROPID propidLastAuthor = 8;
  30. const PROPID propidRevNumber = 9;
  31. const PROPID propidAppName = 0x12;
  32. // guid and property ids used by the html filter
  33. #define defGuidHtmlInformation { 0x70eb7a10, \
  34. 0x55d9, 0x11cf, \
  35. 0xb7, 0x5b, 0x00, 0xaa, \
  36. 0x00, 0x51, 0xfe, 0x20 }
  37. const PROPID PID_HEADING_1 = 3;
  38. const PROPID PID_HEADING_2 = 4;
  39. const PROPID PID_HEADING_3 = 5;
  40. const PROPID PID_HEADING_4 = 6;
  41. const PROPID PID_HEADING_5 = 7;
  42. const PROPID PID_HEADING_6 = 8;
  43. const unsigned propidCharacterization = 2;
  44. // constant used to separate parts of a characterization
  45. #define awcSummarySpace L". "
  46. const unsigned cwcSummarySpace = 2;
  47. // maximum amount of raw text used at once
  48. const ULONG cwcMaxRawUsed = 600;
  49. // These scores are just guidelines; any value can be used for a
  50. // summary utility.
  51. const unsigned scoreInfinity = 30000;
  52. const unsigned scoreHtmlDescription = 17000;
  53. const unsigned scoreTitle = 16000;
  54. const unsigned scoreAbstract = 15000;
  55. const unsigned scoreSubject = 14000;
  56. const unsigned scoreKeywords = 13000;
  57. const unsigned scoreComments = 12000;
  58. const unsigned scoreHeader1 = 10000;
  59. const unsigned scoreHeader2 = 9000;
  60. const unsigned scoreHeader3 = 8000;
  61. const unsigned scoreHeader4 = 7000;
  62. const unsigned scoreHeader5 = 6000;
  63. const unsigned scoreHeader6 = 5000;
  64. const unsigned scoreRawText = 4000;
  65. const unsigned scoreOtherProperty = 3000;
  66. const unsigned scoreIfNothingElse = 10;
  67. const unsigned scoreIgnore = 0;
  68. //+-------------------------------------------------------------------------
  69. //
  70. // Class: CSummaryText
  71. //
  72. // Purpose: Characterizations are built up with these objects
  73. //
  74. // History: 12-Jan-96 dlee Created
  75. //
  76. //--------------------------------------------------------------------------
  77. class CSummaryText
  78. {
  79. public:
  80. CSummaryText( WCHAR * pwcText,
  81. unsigned cwc,
  82. unsigned utility ) :
  83. _pwcText( pwcText ),
  84. _cwcText( cwc ),
  85. _utility( utility ) {}
  86. CSummaryText() {}
  87. BOOL isSame( const WCHAR * pwc, unsigned cwc )
  88. { return !wcsncmp( pwc, _pwcText, cwc ); }
  89. WCHAR * GetText()
  90. { return _pwcText; }
  91. void SetText( WCHAR * pwcText )
  92. { _pwcText = pwcText; }
  93. unsigned GetUtility() { return _utility; }
  94. // methods needed by priority-queue template
  95. unsigned GetSize() { return _cwcText; }
  96. // keep the worst items at the top of the queue
  97. BOOL IsGreaterThan( CSummaryText & rOther )
  98. { return _utility < rOther._utility; }
  99. private:
  100. WCHAR * _pwcText;
  101. unsigned _cwcText;
  102. unsigned _utility;
  103. };
  104. //+-------------------------------------------------------------------------
  105. //
  106. // Class: CDocCharacterization
  107. //
  108. // Purpose: Builds characterizations
  109. //
  110. // History: 12-Jan-96 dlee Created
  111. //
  112. //--------------------------------------------------------------------------
  113. class CDocCharacterization
  114. {
  115. public:
  116. CDocCharacterization( unsigned cwcAtMost );
  117. ~CDocCharacterization();
  118. void Add( CStorageVariant const & var,
  119. CFullPropSpec & ps );
  120. void Add( const WCHAR * pwcSummary,
  121. unsigned cwcSummary,
  122. FULLPROPSPEC & ps );
  123. void Get( WCHAR * awcSummary,
  124. unsigned & cwcSummary,
  125. BOOL fUseRawText );
  126. BOOL HasCharacterization() { return _fIsGenerating; }
  127. private:
  128. void Ignore( const WCHAR * pwcIgnore,
  129. unsigned cwcText );
  130. BOOL Add( const WCHAR * pwcSummary,
  131. unsigned cwcSummary,
  132. unsigned utility,
  133. BOOL fYankNoise = TRUE );
  134. void AddRawText( const WCHAR * pwcRawText,
  135. unsigned cwcText );
  136. BOOL AddCleanedString( const WCHAR * pwcSummary,
  137. unsigned cwcSummary,
  138. unsigned utility,
  139. BOOL fDeliniate );
  140. void YankNoise( const WCHAR * pwcIn,
  141. WCHAR * pwcOut,
  142. unsigned & cwc );
  143. void RemoveLowScoringItems( unsigned iLimit );
  144. BOOL _fIsGenerating;
  145. unsigned _scoreRawText;
  146. TPriorityQueue<CSummaryText> _queue;
  147. enum { cwcMaxIgnoreBuf = 100 };
  148. WCHAR _awcIgnoreBuf[ cwcMaxIgnoreBuf ];
  149. unsigned _cwcIgnoreBuf;
  150. XArray<WCHAR> _awcMetaDescription;
  151. BOOL _fMetaDescriptionAdded;
  152. };