Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

273 lines
8.5 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: NORM.HXX
  7. //
  8. // Contents: Language support
  9. //
  10. // Classes: CNormalizer, CValueReNormalizer
  11. //
  12. // History: 02-May-91 BartoszM Created
  13. // 07-Oct-93 DwightKr Added new methods to CValueNormalizer
  14. //
  15. // Notes: The filtering pipeline is hidden in the Data Repository
  16. // object which serves as a sink for the filter.
  17. // The sink for the Data Repository is the Key Repository.
  18. // The language dependent part of the pipeline
  19. // is obtained from the Language List object and is called
  20. // Key Maker. It consists of:
  21. //
  22. // Word Breaker
  23. // Stemmer (optional)
  24. // Normalizer
  25. // Noise List
  26. //
  27. // Each object serves as a sink for its predecessor,
  28. // Key Repository is the final sink.
  29. //
  30. //----------------------------------------------------------------------------
  31. #pragma once
  32. #include <plang.hxx>
  33. #include <entry.hxx>
  34. class PNoiseList;
  35. class CStorageVariant;
  36. //+---------------------------------------------------------------------------
  37. //
  38. // Class: CNormalizer
  39. //
  40. // Purpose: put words in standard form for indexing and querying
  41. //
  42. // History: 02-May-91 BartoszM Created stub.
  43. // 30-May-91 t-WadeR Created 1st draft.
  44. // 13-Oct-92 AmyA Added unicode support
  45. //
  46. // Notes: Normalizer owns its input buffer. Normalized
  47. // keys are copied to the output buffer obtained
  48. // from NoiseList. Input buffer is in WCHARs,
  49. // but output buffer is in BYTEs.
  50. //
  51. //----------------------------------------------------------------------------
  52. class CNormalizer: public PWordRepository
  53. {
  54. public:
  55. CNormalizer( PNoiseList& nl );
  56. ~CNormalizer() {};
  57. unsigned GetMaxBufferLen() { return cwcMaxKey; }
  58. void GetFlags ( BOOL** ppRange, CI_RANK** ppRank );
  59. void ProcessAltWord( WCHAR const *pwcInBuf, ULONG cwc );
  60. void ProcessWord( WCHAR const *pwcInBuf, ULONG cwc );
  61. void StartAltPhrase() { _noiseList.StartAltPhrase(); }
  62. void EndAltPhrase() { _noiseList.EndAltPhrase(); }
  63. void SkipNoiseWords( ULONG cWords ) { _noiseList.SkipNoiseWords( cWords ); }
  64. void SetOccurrence ( OCCURRENCE occ ) { _noiseList.SetOccurrence( occ ); }
  65. OCCURRENCE GetOccurrence () { return _noiseList.GetOccurrence(); }
  66. void NormalizeWStr( WCHAR const *pwcInBuf, ULONG cwcInBuf,
  67. BYTE *pbOutBuf, unsigned *pcbOutBuf );
  68. private:
  69. unsigned NormalizeWord( WCHAR const *pwcInBuf, ULONG cwc );
  70. unsigned NormalizeWord( WCHAR const *pwcInBuf,
  71. ULONG cwc,
  72. BYTE *pbOutBuf,
  73. unsigned *pcbOutBuf );
  74. inline void SetWordBuffer();
  75. inline void SetNextAltBuffer();
  76. inline void SetAltHash( unsigned hash );
  77. BOOL UsingAltBuffers() { return (_cAltKey > 0); }
  78. void ProcessAllWords();
  79. unsigned* _pcbOutBuf;
  80. BYTE* _pbOutBuf;
  81. PNoiseList& _noiseList;
  82. unsigned _cAltKey;
  83. XGrowable<CKeyBuf, 2> _aAltKey;
  84. };
  85. //+---------------------------------------------------------------------------
  86. //
  87. // Member: CNormalizer::SetWordBuffer, private
  88. //
  89. // Synopsis: Sets the default buffer back to the word buffer (standard
  90. // pipeline buffer).
  91. //
  92. // History: 17-Sep-99 KyleP Created.
  93. //
  94. //----------------------------------------------------------------------------
  95. inline void CNormalizer::SetWordBuffer()
  96. {
  97. _cAltKey = 0;
  98. _noiseList.GetBuffers( &_pcbOutBuf, &_pbOutBuf );
  99. }
  100. //+---------------------------------------------------------------------------
  101. //
  102. // Member: CNormalizer::SetNextAltBuffer, private
  103. //
  104. // Synopsis: Sets the default buffer to an alternate word buffer (holds
  105. // alternate words until a ::PutWord call).
  106. //
  107. // History: 17-Sep-99 KyleP Created.
  108. //
  109. //----------------------------------------------------------------------------
  110. inline void CNormalizer::SetNextAltBuffer()
  111. {
  112. _aAltKey.SetSize( _cAltKey + 1 );
  113. _pbOutBuf = _aAltKey[_cAltKey].GetWritableBuf();
  114. _pcbOutBuf = _aAltKey[_cAltKey].GetWritableCount();
  115. _cAltKey++;
  116. }
  117. //+---------------------------------------------------------------------------
  118. //
  119. // Member: CNormalizer::SetAltHash, private
  120. //
  121. // Synopsis: Stores a hash key with the current alternate buffer.
  122. //
  123. // History: 17-Sep-99 KyleP Created.
  124. //
  125. // Notes: Re-using the PID field in CKeyBuf to save an extra structure.
  126. //
  127. //----------------------------------------------------------------------------
  128. inline void CNormalizer::SetAltHash( unsigned hash )
  129. {
  130. Win4Assert( _cAltKey > 0 );
  131. Win4Assert( sizeof(PROPID) >= sizeof(hash) );
  132. _aAltKey[_cAltKey-1].SetPid(hash);
  133. }
  134. //+---------------------------------------------------------------------------
  135. //
  136. // Class: CValueNormalizer
  137. //
  138. // Purpose: Language independent key maker object
  139. //
  140. // History: 04-June-91 t-WadeR Created.
  141. // 24-Sep-92 BartoszM Implemented
  142. // 07-Oct-93 DwightKr Added new methods
  143. //
  144. //----------------------------------------------------------------------------
  145. class CValueNormalizer
  146. {
  147. public:
  148. CValueNormalizer( PKeyRepository& krep );
  149. void PutValue(PROPID pid, OCCURRENCE & occ, CStorageVariant const & var );
  150. void PutMinValue( PROPID pid, OCCURRENCE & occ, VARENUM Type );
  151. void PutMaxValue( PROPID pid, OCCURRENCE & occ, VARENUM Type );
  152. private:
  153. void PutValue(PROPID pid, BYTE byte);
  154. void PutValue(PROPID pid, CHAR ch);
  155. void PutValue(PROPID pid, USHORT usValue);
  156. void PutValue(PROPID pid, SHORT sValue);
  157. void PutValue(PROPID pid, ULONG ulValue);
  158. void PutValue(PROPID pid, LONG lValue);
  159. void PutValue(PROPID pid, float rValue);
  160. void PutValue(PROPID pid, double dValue);
  161. void PutValue(PROPID pid, LARGE_INTEGER liValue);
  162. void PutValue(PROPID pid, ULARGE_INTEGER liValue);
  163. void PutValue(PROPID pid, GUID const & Guid);
  164. void PutValue(PROPID pid, CURRENCY const & cyValue);
  165. void PutDate (PROPID pid, DATE const & date);
  166. void PutValue(PROPID pid, FILETIME const & ftValue);
  167. void PutValue(PROPID pid, unsigned uValue, BYTE bValueKey);
  168. unsigned _cbMaxOutBuf;
  169. unsigned* _pcbOutBuf;
  170. BYTE* _pbOutBuf;
  171. OCCURRENCE* _pOcc;
  172. PKeyRepository& _krep;
  173. };
  174. //+---------------------------------------------------------------------------
  175. //
  176. // Class: CValueReNormalizer
  177. //
  178. // Purpose: Converts a fake Pid to a real pid in a value key
  179. //
  180. // History: 27-Feb-95 DwightKr Created.
  181. //
  182. //----------------------------------------------------------------------------
  183. class CValueReNormalizer
  184. {
  185. public:
  186. inline CValueReNormalizer( CEntry * pEntry );
  187. inline void PutPid( PROPID pid );
  188. private:
  189. CEntry *_pEntry;
  190. };
  191. //+---------------------------------------------------------------------------
  192. //
  193. // Member: CValueReNormalizer::CValueReNormalizer
  194. //
  195. // Synopsis: constructor for a value renormalizer
  196. //
  197. // History: 27-Feb-95 DwightKr Created.
  198. //
  199. // Notes:
  200. //
  201. //----------------------------------------------------------------------------
  202. inline CValueReNormalizer::CValueReNormalizer( CEntry * pEntry )
  203. : _pEntry(pEntry)
  204. {
  205. Win4Assert( _pEntry != 0 );
  206. Win4Assert( _pEntry->Count() >= sizeof(USHORT) + sizeof(PROPID) + 1 );
  207. Win4Assert( _pEntry->Wid() != widInvalid );
  208. Win4Assert( _pEntry->IsValue() );
  209. }
  210. //+---------------------------------------------------------------------------
  211. //
  212. // Member: CValueReNormalizer::PutPid
  213. //
  214. // Synopsis: writes a new pid into the entry key
  215. //
  216. // History: 27-Feb-95 DwightKr Created.
  217. //
  218. // Notes:
  219. //
  220. //----------------------------------------------------------------------------
  221. inline void CValueReNormalizer::PutPid( PROPID pid )
  222. {
  223. BYTE * pbBuffer = _pEntry->GetKeyBuf();
  224. Win4Assert( pbBuffer != 0 );
  225. Win4Assert( *pbBuffer != STRING_KEY );
  226. pbBuffer++; // Skip key type
  227. // store property id
  228. *pbBuffer++ = (BYTE)(pid >> 24);
  229. *pbBuffer++ = (BYTE)(pid >> 16);
  230. *pbBuffer++ = (BYTE)(pid >> 8);
  231. *pbBuffer++ = (BYTE) pid;
  232. }