//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991 - 2000 // // File: stemsink.cxx // // Contents: IWordformSink implementation // // History: 03-May-95 SitaramR Created // //---------------------------------------------------------------------------- #include #pragma hdrstop #include #include //+--------------------------------------------------------------------------- // // Member: CStemmerSink::CStemmerSink // // Synopsis: Constructor // // Arguments: [pStemmer] -- stemmer // [wordRep] -- normalizer, which is the next stage in filtering // pipeline // // History: 03-May-95 SitaramR Created // //---------------------------------------------------------------------------- CStemmerSink::CStemmerSink( IStemmer *pStemmer, PWordRepository& wordRep ) : _pStemmer(pStemmer), _wordRep(wordRep), _fWBreakAltWord(FALSE) { _cwcMaxNormBuf = wordRep.GetMaxBufferLen(); } //+--------------------------------------------------------------------------- // // Member: CStemmerSink::GetFlags // // Synopsis: Returns address of ranking and range flags // // Arguments: [ppRange] -- range flag // [ppRank] -- rank flag // // History: 03-May-95 SitaramR Created. // //---------------------------------------------------------------------------- void CStemmerSink::GetFlags ( BOOL** ppRange, CI_RANK** ppRank ) { _wordRep.GetFlags ( ppRange, ppRank ); } //+--------------------------------------------------------------------------- // // Member: CStemmerSink::ProcessWord // // Synopsis: Stems word // // Arguments: [pwcInBuf] -- input buffer // [cwc] -- count of words in pwcInBuf // // History: 03-May-95 SitaramR Created // //---------------------------------------------------------------------------- void CStemmerSink::ProcessWord( WCHAR const *pwcInBuf, ULONG cwc ) { _fWBreakAltWord = FALSE; _pStemmer->GenerateWordForms( pwcInBuf, cwc, this ); } //+--------------------------------------------------------------------------- // // Member: CStemmerSink::ProcessAltWord // // Synopsis: Stems alternate word // // Arguments: [pwcInBuf] -- input buffer // [cwc] -- count of words in pwcInBuf // // History: 03-May-95 SitaramR Created // //---------------------------------------------------------------------------- void CStemmerSink::ProcessAltWord( WCHAR const *pwcInBuf, ULONG cwc ) { _fWBreakAltWord = TRUE; _pStemmer->GenerateWordForms( pwcInBuf, cwc, this ); } //+------------------------------------------------------------------------- // // Method: CStemmerSink::PutWord // // Synopsis: pass stemmed word to normalizer // // Arguments: [pwcInBuf] -- Word // [cwc] -- Count of characters in [pwcInBuf] // // History: 03-May-1995 SitaramR Created // //-------------------------------------------------------------------------- SCODE STDMETHODCALLTYPE CStemmerSink::PutWord( WCHAR const *pwcInBuf, ULONG cwc ) { // IWordBreaker::PutAltWord overrides IStemmer::PutWord return ( PutStemmedWord( pwcInBuf, cwc, _fWBreakAltWord ) ); } //+------------------------------------------------------------------------- // // Method: CStemmerSink::PutAltWord // // Synopsis: pass stemmed word to normalizer // // Arguments: [pwcInBuf] -- Word // [cwc] -- Count of characters in [pwcInBuf] // // History: 03-May-1995 SitaramR Created // //-------------------------------------------------------------------------- SCODE STDMETHODCALLTYPE CStemmerSink::PutAltWord( WCHAR const *pwcInBuf, ULONG cwc ) { return ( PutStemmedWord( pwcInBuf, cwc, TRUE ) ); } //+------------------------------------------------------------------------- // // Method: CStemmerSink::PutStemmedWord // // Synopsis: actual implementation of stemmer sink methods; it puts a word // into the word repository // // Arguments: [pwcInBuf] -- Word // [cwc] -- Count of characters in [pwcInBuf] // [fAltWord] -- Is this an alternate word ? Determining whether // this word is an alternate word or not is complicated // by the fact that IWBreaker::PutAltWord overrides the // IStemmer::PutWord. // // History: 03-May-1995 SitaramR Created // //-------------------------------------------------------------------------- SCODE CStemmerSink::PutStemmedWord( WCHAR const *pwcInBuf, ULONG cwc, BOOL fAltWord ) { SCODE sc = S_OK; CTranslateSystemExceptions translate; TRY { if ( cwc > _cwcMaxNormBuf ) { sc = LANGUAGE_S_LARGE_WORD; cwc = _cwcMaxNormBuf; } if ( cwc > 0 ) { #if CIDBG == 1 if ( fAltWord ) ciDebugOut(( DEB_WORDS, "PutAltWord(IWordFormSink): \"%.*ws\" Occ = %d\n", cwc, pwcInBuf, _wordRep.GetOccurrence() )); else ciDebugOut(( DEB_WORDS, "PutWord(IWordFormSink): \"%.*ws\" Occ = %d\n", cwc, pwcInBuf, _wordRep.GetOccurrence() )); #endif if ( fAltWord ) _wordRep.ProcessAltWord( pwcInBuf, cwc ); else _wordRep.ProcessWord( pwcInBuf, cwc ); } } CATCH( CException, e ) { sc = e.GetErrorCode(); } END_CATCH; return sc; } //PutStemmedWord // // The following are needed to make midl happy. There are no other interfaces // to bind to. Inheritance from IUnknown is unnecessary. // SCODE STDMETHODCALLTYPE CStemmerSink::QueryInterface(REFIID riid, void * * ppvObject) { *ppvObject = 0; return( E_NOTIMPL ); } ULONG STDMETHODCALLTYPE CStemmerSink::AddRef() { return( 1 ); } ULONG STDMETHODCALLTYPE CStemmerSink::Release() { return( 1 ); }