Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
8.5 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1992.
  5. //
  6. // File: NOISE.HXX
  7. //
  8. // Contents: Noise word list
  9. //
  10. // Classes: CNoiseList, NoiseListInit, NoiseListEmpty
  11. // CLString, CStringList, CStringTable
  12. //
  13. // History: 11-Jul-91 BartoszM Created
  14. //
  15. //----------------------------------------------------------------------------
  16. #pragma once
  17. #include <plang.hxx>
  18. const NOISE_WORD_LENGTH = cbKeyPrefix + sizeof( WCHAR ); // word length for detecting
  19. // and filtering noise words
  20. //+---------------------------------------------------------------------------
  21. //
  22. // Class: CLString
  23. //
  24. // Purpose: Linkable String
  25. //
  26. // History: 16-Jul-91 BartoszM Created
  27. //
  28. //----------------------------------------------------------------------------
  29. class CLString
  30. {
  31. public:
  32. CLString ( UINT cb, const BYTE* buf, CLString* next );
  33. void* operator new ( size_t n, UINT cb );
  34. #if _MSC_VER >= 1200
  35. void operator delete( void * p, UINT cb )
  36. {
  37. ::delete(p);
  38. }
  39. void operator delete( void * p )
  40. {
  41. ::delete(p);
  42. }
  43. #endif
  44. inline BOOL Equal ( UINT cb, const BYTE* str ) const;
  45. CLString * Next() { return _next; }
  46. #if CIDBG == 1
  47. void Dump() const { ciDebugOut (( DEB_ITRACE, "%s ", _buf )); }
  48. #endif
  49. private:
  50. CLString * _next;
  51. UINT _cb;
  52. #pragma warning(disable : 4200) // 0 sized array in struct is non-ansi
  53. BYTE _buf[];
  54. #pragma warning(default : 4200)
  55. };
  56. //+---------------------------------------------------------------------------
  57. //
  58. // Member: CLString::Equal, public
  59. //
  60. // Synopsis: String comparison
  61. //
  62. // Arguments: [cb] -- length
  63. // [str] -- string
  64. //
  65. // History: 16-Jul-91 BartoszM Created.
  66. //
  67. //----------------------------------------------------------------------------
  68. inline BOOL CLString::Equal ( UINT cb, const BYTE* str ) const
  69. {
  70. return ( (cb == _cb) && (memcmp ( str, _buf, _cb ) == 0) );
  71. }
  72. //+---------------------------------------------------------------------------
  73. //
  74. // Class: CStringList
  75. //
  76. // Purpose: List of Linkable Strings
  77. //
  78. // History: 16-Jul-91 BartoszM Created
  79. //
  80. //----------------------------------------------------------------------------
  81. class CStringList
  82. {
  83. public:
  84. CStringList(): _head(0) {}
  85. ~CStringList();
  86. void Add ( UINT cb, const BYTE * str );
  87. BOOL Find ( UINT cb, const BYTE* str ) const;
  88. BOOL IsEmpty () const { return _head == 0; }
  89. #if CIDBG == 1
  90. void Dump() const;
  91. #endif
  92. private:
  93. CLString * _head;
  94. };
  95. //+---------------------------------------------------------------------------
  96. //
  97. // Class: CStringTable
  98. //
  99. // Purpose: Hash Table of strings
  100. //
  101. // History: 16-Jul-91 BartoszM Created
  102. //
  103. //----------------------------------------------------------------------------
  104. class CStringTable
  105. {
  106. public:
  107. CStringTable( UINT size );
  108. ~CStringTable();
  109. void Add ( UINT cb, const BYTE* str, UINT hash );
  110. inline BOOL Find ( UINT cb, const BYTE* str, UINT hash ) const;
  111. #if CIDBG == 1
  112. void Dump() const;
  113. #endif
  114. private:
  115. UINT _index ( UINT hash ) const { return hash % _size; }
  116. UINT _size;
  117. CStringList* _bucket;
  118. };
  119. //+---------------------------------------------------------------------------
  120. //
  121. // Member: CStringTable::Find, public
  122. //
  123. // Synopsis: String comparison
  124. //
  125. // Arguments: [cb] -- length
  126. // [str] -- string
  127. //
  128. // History: 16-Jul-91 BartoszM Created.
  129. //
  130. //----------------------------------------------------------------------------
  131. inline BOOL CStringTable::Find ( UINT cb, const BYTE* str, UINT hash ) const
  132. {
  133. return _bucket [ _index(hash) ].Find ( cb, str );
  134. }
  135. class PKeyRepository;
  136. //+---------------------------------------------------------------------------
  137. //
  138. // Class: CNoiseList
  139. //
  140. // Purpose: Discard meaningless words from the input stream
  141. //
  142. // History: 02-May-91 BartoszM Created stub.
  143. // 30-May-91 t-WadeR Created first draft.
  144. //
  145. //----------------------------------------------------------------------------
  146. class CNoiseList: public PNoiseList
  147. {
  148. public:
  149. CNoiseList( const CStringTable& table, PKeyRepository& krep );
  150. ~CNoiseList() {};
  151. void GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf );
  152. void GetFlags ( BOOL** ppRange, CI_RANK** ppRank );
  153. void PutAltWord( UINT hash );
  154. void PutWord( UINT hash );
  155. void StartAltPhrase();
  156. void EndAltPhrase();
  157. void SkipNoiseWords( ULONG cWords )
  158. {
  159. _cNoiseWordsSkipped += cWords;
  160. *_pocc += cWords;
  161. }
  162. void SetOccurrence( OCCURRENCE occ ) { *_pocc = occ; }
  163. BOOL FoundNoise() { return _fFoundNoise; }
  164. OCCURRENCE GetOccurrence() { return *_pocc; }
  165. private:
  166. const CStringTable& _table;
  167. UINT _cbMaxOutBuf;
  168. UINT* _pcbOutBuf;
  169. BYTE* _pbOutBuf;
  170. PKeyRepository& _krep;
  171. OCCURRENCE* _pocc;
  172. BOOL _fFoundNoise; // One way trigger to TRUE when noise word found.
  173. ULONG _cNoiseWordsSkipped; // count of noise words that haven't
  174. // been passed onto the key repository.
  175. // Care must be taken to ensure that
  176. // noise words at the same occurrence
  177. // (ie alternate words) are not counted
  178. // multiple times.
  179. ULONG _cNonNoiseAltWords; // count of non-noise words at current
  180. // occurrence
  181. };
  182. //+---------------------------------------------------------------------------
  183. //
  184. // Class: CNoiseListInit
  185. //
  186. // Purpose: Initializer for the noise list
  187. //
  188. // History: 16-Jul-91 BartoszM Created
  189. //
  190. //----------------------------------------------------------------------------
  191. class CNoiseListInit: INHERIT_VIRTUAL_UNWIND, public PNoiseList
  192. {
  193. INLINE_UNWIND( CNoiseListInit )
  194. public:
  195. CNoiseListInit( UINT size );
  196. ~CNoiseListInit() { delete _table; };
  197. void GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf );
  198. void PutAltWord( UINT hash );
  199. void PutWord( UINT hash );
  200. CStringTable * AcqStringTable()
  201. {
  202. CStringTable* tmp = _table;
  203. _table = 0;
  204. return tmp;
  205. }
  206. private:
  207. CKeyBuf _key;
  208. CStringTable * _table;
  209. };
  210. //+---------------------------------------------------------------------------
  211. //
  212. // Class: CNoiseListEmpty
  213. //
  214. // Purpose: Empty Noise List (used as default)
  215. //
  216. // History: 16-Jul-91 BartoszM Created
  217. //
  218. //----------------------------------------------------------------------------
  219. class CNoiseListEmpty: public PNoiseList
  220. {
  221. public:
  222. CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy );
  223. void GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf );
  224. void GetFlags ( BOOL** ppRange, CI_RANK** ppRank );
  225. void PutAltWord( UINT hash );
  226. void PutWord( UINT hash );
  227. void StartAltPhrase();
  228. void EndAltPhrase();
  229. void SkipNoiseWords( ULONG cWords )
  230. {
  231. _cNoiseWordsSkipped += cWords;
  232. *_pocc += cWords;
  233. }
  234. void SetOccurrence( OCCURRENCE occ ) { *_pocc = occ; }
  235. BOOL FoundNoise() { return _fFoundNoise; }
  236. OCCURRENCE GetOccurrence() { return *_pocc; }
  237. private:
  238. UINT _cbMaxOutBuf;
  239. UINT* _pcbOutBuf;
  240. BYTE* _pbOutBuf;
  241. PKeyRepository& _krep;
  242. OCCURRENCE* _pocc;
  243. ULONG _ulGenerateMethod; // Fuzzines of query
  244. BOOL _fFoundNoise; // One way trigger to TRUE when noise word found.
  245. ULONG _cNoiseWordsSkipped; // count of noise words that haven't
  246. // been passed onto the key repository.
  247. // Care must be taken to ensure that
  248. // noise words at the same occurrence
  249. // (ie alternate words) are not counted
  250. // multiple times.
  251. ULONG _cNonNoiseAltWords; // count of non-noise words at current
  252. // occurrence
  253. };