Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

264 lines
6.2 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1995.
  5. //
  6. // File: stemmer.cxx
  7. //
  8. // Contents: NLG's FarEast Stemmer
  9. //
  10. // History: 01-July-1996 PatHal Created.
  11. //
  12. //----------------------------------------------------------------------------
  13. #include "pch.cxx"
  14. #pragma hdrstop
  15. #include "stemmer.hxx"
  16. extern long gulcInstances;
  17. //+---------------------------------------------------------------------------
  18. //
  19. // Member: CStemmer::CStemmer
  20. //
  21. // Synopsis: Constructor for the CStemmer class.
  22. //
  23. // Arguments: [lcid] -- locale id
  24. //
  25. //----------------------------------------------------------------------------
  26. CStemmer::CStemmer( LCID lcid )
  27. : _cRefs(1)
  28. {
  29. InterlockedIncrement( &gulcInstances );
  30. }
  31. //+---------------------------------------------------------------------------
  32. //
  33. // Member: CStemmer::~CStemmer
  34. //
  35. // Synopsis: Destructor for the CStemmer class.
  36. //
  37. // Notes: All termination/deallocation is done by embedded smart pointers
  38. //
  39. //----------------------------------------------------------------------------
  40. CStemmer::~CStemmer()
  41. {
  42. InterlockedDecrement( &gulcInstances );
  43. }
  44. //+-------------------------------------------------------------------------
  45. //
  46. // Method: CStemmer::QueryInterface
  47. //
  48. // Synopsis: Rebind to other interface
  49. //
  50. // Arguments: [riid] -- IID of new interface
  51. // [ppvObject] -- New interface * returned here
  52. //
  53. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  54. //
  55. //--------------------------------------------------------------------------
  56. SCODE STDMETHODCALLTYPE
  57. CStemmer::QueryInterface( REFIID riid, void ** ppvObject)
  58. {
  59. IUnknown *pUnkTemp;
  60. SCODE sc = S_OK;
  61. switch( riid.Data1 & 0x000000FF )
  62. {
  63. case 0x00:
  64. if ( riid == IID_IUnknown )
  65. pUnkTemp = (IUnknown *)this;
  66. else
  67. sc = E_NOINTERFACE;
  68. break;
  69. case 0x40:
  70. if ( riid == IID_IStemmer )
  71. pUnkTemp = (IUnknown *)(IStemmer *)this;
  72. else
  73. sc = E_NOINTERFACE;
  74. break;
  75. default:
  76. pUnkTemp = 0;
  77. sc = E_NOINTERFACE;
  78. break;
  79. }
  80. if( 0 != pUnkTemp )
  81. {
  82. *ppvObject = (void * )pUnkTemp;
  83. pUnkTemp->AddRef();
  84. }
  85. else
  86. *ppvObject = 0;
  87. return(sc);
  88. }
  89. //+-------------------------------------------------------------------------
  90. //
  91. // Method: CStemmer::AddRef
  92. //
  93. // Synopsis: Increments refcount
  94. //
  95. //--------------------------------------------------------------------------
  96. ULONG STDMETHODCALLTYPE
  97. CStemmer::AddRef()
  98. {
  99. return InterlockedIncrement( &_cRefs );
  100. }
  101. //+-------------------------------------------------------------------------
  102. //
  103. // Method: CStemmer::Release
  104. //
  105. // Synopsis: Decrement refcount. Delete if necessary.
  106. //
  107. //--------------------------------------------------------------------------
  108. ULONG STDMETHODCALLTYPE
  109. CStemmer::Release()
  110. {
  111. unsigned long uTmp = InterlockedDecrement( &_cRefs );
  112. if ( 0 == uTmp )
  113. delete this;
  114. return(uTmp);
  115. }
  116. //+-------------------------------------------------------------------------
  117. //
  118. // Method: CStemmer::Init
  119. //
  120. // Synopsis: Initialize stemmer
  121. //
  122. // Arguments: [ulMaxTokenSize] -- Maximum size token stored by caller
  123. // [pfLicense] -- Set to true if use restricted
  124. //
  125. // Returns: Status code
  126. //
  127. //--------------------------------------------------------------------------
  128. SCODE STDMETHODCALLTYPE
  129. CStemmer::Init(
  130. ULONG ulMaxTokenSize,
  131. BOOL *pfLicense )
  132. {
  133. if (IsBadWritePtr(pfLicense, sizeof(DWORD))) {
  134. return E_FAIL;
  135. }
  136. *pfLicense = TRUE;
  137. _ulMaxTokenSize = ulMaxTokenSize;
  138. return S_OK;
  139. }
  140. //+---------------------------------------------------------------------------
  141. //
  142. // Member: CStemmer::GetLicenseToUse
  143. //
  144. // Synopsis: Returns a pointer to vendors license information
  145. //
  146. // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned
  147. //
  148. //----------------------------------------------------------------------------
  149. SCODE STDMETHODCALLTYPE
  150. CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense )
  151. {
  152. static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1995";
  153. if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
  154. return ( E_FAIL );
  155. }
  156. *ppwcsLicense = wcsCopyright;
  157. return( S_OK );
  158. }
  159. //+---------------------------------------------------------------------------
  160. //
  161. // Member: CStemmer::StemWord
  162. //
  163. // Synopsis: Stem a word into its inflected forms, eg swim to swims and swimming
  164. //
  165. // Arguments: [pwcInBuf] -- input Unicode word
  166. // [cwc] -- count of characters in word
  167. // [pStemSink] -- sink to collect inflected forms
  168. //
  169. //----------------------------------------------------------------------------
  170. SCODE STDMETHODCALLTYPE
  171. CStemmer::StemWord(
  172. WCHAR const *pwc,
  173. ULONG cwc,
  174. IStemSink *pStemSink )
  175. {
  176. SCODE sc = S_OK;
  177. if ( 0 == pStemSink || 0 == pwc ) {
  178. return E_FAIL;
  179. }
  180. if ( 0 == cwc) {
  181. return S_OK;
  182. }
  183. CONST WCHAR *pwcStem;
  184. DWORD i;
  185. BYTE ct;
  186. BOOL fRomanWord = FALSE;
  187. __try {
  188. for ( i=1; i< ( cwc - 1 ); i++, pwc++) {
  189. ct = GetCharType(*pwc);
  190. if (ct == CH) {
  191. if (!fRomanWord) {
  192. pwcStem = pwc;
  193. fRomanWord = TRUE;
  194. }
  195. }
  196. else {
  197. if (fRomanWord) {
  198. (pStemSink->PutWord)( pwcStem, pwc - pwcStem );
  199. fRomanWord = FALSE;
  200. }
  201. else {
  202. switch (ct) {
  203. case PS:
  204. case WS:
  205. break;
  206. default:
  207. (pStemSink->PutWord)( pwc, 2 );
  208. break;
  209. }
  210. }
  211. }
  212. }
  213. // put the last English word
  214. if (fRomanWord) {
  215. (pStemSink->PutWord)( pwcStem, pwc - pwcStem );
  216. fRomanWord = FALSE;
  217. }
  218. // output inflected words to stemmer sink in EnumInflections callback
  219. } __except (1) {
  220. sc = E_UNEXPECTED;
  221. }
  222. return sc;
  223. }