Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

222 lines
6.1 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000
  5. //
  6. // File: stemsink.cxx
  7. //
  8. // Contents: IStemSink implementation
  9. //
  10. // History: 03-May-95 SitaramR Created
  11. //
  12. //----------------------------------------------------------------------------
  13. #include <pch.cxx>
  14. #pragma hdrstop
  15. #include <norm.hxx>
  16. #include <stemsink.hxx>
  17. //+---------------------------------------------------------------------------
  18. //
  19. // Member: CStemmerSink::CStemmerSink
  20. //
  21. // Synopsis: Constructor
  22. //
  23. // Arguments: [pStemmer] -- stemmer
  24. // [wordRep] -- normalizer, which is the next stage in filtering
  25. // pipeline
  26. //
  27. // History: 03-May-95 SitaramR Created
  28. //
  29. //----------------------------------------------------------------------------
  30. CStemmerSink::CStemmerSink( IStemmer *pStemmer, PWordRepository& wordRep )
  31. : _pStemmer(pStemmer),
  32. _wordRep(wordRep),
  33. _fWBreakAltWord(FALSE)
  34. {
  35. _cwcMaxNormBuf = wordRep.GetMaxBufferLen();
  36. }
  37. //+---------------------------------------------------------------------------
  38. //
  39. // Member: CStemmerSink::GetFlags
  40. //
  41. // Synopsis: Returns address of ranking and range flags
  42. //
  43. // Arguments: [ppRange] -- range flag
  44. // [ppRank] -- rank flag
  45. //
  46. // History: 03-May-95 SitaramR Created.
  47. //
  48. //----------------------------------------------------------------------------
  49. void CStemmerSink::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
  50. {
  51. _wordRep.GetFlags ( ppRange, ppRank );
  52. }
  53. //+---------------------------------------------------------------------------
  54. //
  55. // Member: CStemmerSink::ProcessWord
  56. //
  57. // Synopsis: Stems word
  58. //
  59. // Arguments: [pwcInBuf] -- input buffer
  60. // [cwc] -- count of words in pwcInBuf
  61. //
  62. // History: 03-May-95 SitaramR Created
  63. //
  64. //----------------------------------------------------------------------------
  65. void CStemmerSink::ProcessWord( WCHAR const *pwcInBuf, ULONG cwc )
  66. {
  67. _fWBreakAltWord = FALSE;
  68. _pStemmer->StemWord( pwcInBuf, cwc, this );
  69. }
  70. //+---------------------------------------------------------------------------
  71. //
  72. // Member: CStemmerSink::ProcessAltWord
  73. //
  74. // Synopsis: Stems alternate word
  75. //
  76. // Arguments: [pwcInBuf] -- input buffer
  77. // [cwc] -- count of words in pwcInBuf
  78. //
  79. // History: 03-May-95 SitaramR Created
  80. //
  81. //----------------------------------------------------------------------------
  82. void CStemmerSink::ProcessAltWord( WCHAR const *pwcInBuf, ULONG cwc )
  83. {
  84. _fWBreakAltWord = TRUE;
  85. _pStemmer->StemWord( pwcInBuf, cwc, this );
  86. }
  87. //+-------------------------------------------------------------------------
  88. //
  89. // Method: CStemmerSink::PutWord
  90. //
  91. // Synopsis: pass stemmed word to normalizer
  92. //
  93. // Arguments: [pwcInBuf] -- Word
  94. // [cwc] -- Count of characters in [pwcInBuf]
  95. //
  96. // History: 03-May-1995 SitaramR Created
  97. //
  98. //--------------------------------------------------------------------------
  99. SCODE STDMETHODCALLTYPE CStemmerSink::PutWord( WCHAR const *pwcInBuf, ULONG cwc )
  100. {
  101. // IWordBreaker::PutAltWord overrides IStemmer::PutWord
  102. return ( PutStemmedWord( pwcInBuf, cwc, _fWBreakAltWord ) );
  103. }
  104. //+-------------------------------------------------------------------------
  105. //
  106. // Method: CStemmerSink::PutAltWord
  107. //
  108. // Synopsis: pass stemmed word to normalizer
  109. //
  110. // Arguments: [pwcInBuf] -- Word
  111. // [cwc] -- Count of characters in [pwcInBuf]
  112. //
  113. // History: 03-May-1995 SitaramR Created
  114. //
  115. //--------------------------------------------------------------------------
  116. SCODE STDMETHODCALLTYPE CStemmerSink::PutAltWord( WCHAR const *pwcInBuf, ULONG cwc )
  117. {
  118. return ( PutStemmedWord( pwcInBuf, cwc, TRUE ) );
  119. }
  120. //+-------------------------------------------------------------------------
  121. //
  122. // Method: CStemmerSink::PutStemmedWord
  123. //
  124. // Synopsis: actual implementation of stemmer sink methods; it puts a word
  125. // into the word repository
  126. //
  127. // Arguments: [pwcInBuf] -- Word
  128. // [cwc] -- Count of characters in [pwcInBuf]
  129. // [fAltWord] -- Is this an alternate word ? Determining whether
  130. // this word is an alternate word or not is complicated
  131. // by the fact that IWBreaker::PutAltWord overrides the
  132. // IStemmer::PutWord.
  133. //
  134. // History: 03-May-1995 SitaramR Created
  135. //
  136. //--------------------------------------------------------------------------
  137. SCODE CStemmerSink::PutStemmedWord( WCHAR const *pwcInBuf, ULONG cwc, BOOL fAltWord )
  138. {
  139. SCODE sc = S_OK;
  140. CTranslateSystemExceptions translate;
  141. TRY
  142. {
  143. if ( cwc > _cwcMaxNormBuf )
  144. {
  145. sc = LANGUAGE_S_LARGE_WORD;
  146. cwc = _cwcMaxNormBuf;
  147. }
  148. if ( cwc > 0 )
  149. {
  150. #if CIDBG == 1
  151. if ( fAltWord )
  152. ciDebugOut(( DEB_WORDS,
  153. "PutAltWord(IStemSink): \"%.*ws\" Occ = %d\n",
  154. cwc, pwcInBuf, _wordRep.GetOccurrence() ));
  155. else
  156. ciDebugOut(( DEB_WORDS,
  157. "PutWord(IStemSink): \"%.*ws\" Occ = %d\n",
  158. cwc, pwcInBuf, _wordRep.GetOccurrence() ));
  159. #endif
  160. if ( fAltWord )
  161. _wordRep.ProcessAltWord( pwcInBuf, cwc );
  162. else
  163. _wordRep.ProcessWord( pwcInBuf, cwc );
  164. }
  165. }
  166. CATCH( CException, e )
  167. {
  168. sc = e.GetErrorCode();
  169. }
  170. END_CATCH;
  171. return sc;
  172. } //PutStemmedWord
  173. //
  174. // The following are needed to make midl happy. There are no other interfaces
  175. // to bind to. Inheritance from IUnknown is unnecessary.
  176. //
  177. SCODE STDMETHODCALLTYPE CStemmerSink::QueryInterface(REFIID riid, void * * ppvObject)
  178. {
  179. *ppvObject = 0;
  180. return( E_NOTIMPL );
  181. }
  182. ULONG STDMETHODCALLTYPE CStemmerSink::AddRef()
  183. {
  184. return( 1 );
  185. }
  186. ULONG STDMETHODCALLTYPE CStemmerSink::Release()
  187. {
  188. return( 1 );
  189. }