Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

272 lines
6.4 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1995.
  5. //
  6. // File: stemmer.cxx
  7. //
  8. // Contents: NLG's FarEast Stemmer
  9. //
  10. // History: 01-July-1996 PatHal Created.
  11. // weibz Merged and Modified to NT5
  12. //
  13. //----------------------------------------------------------------------------
  14. #include "pch.cxx"
  15. #pragma hdrstop
  16. #include "stemmer.hxx"
  17. extern long gulcInstances;
  18. //+---------------------------------------------------------------------------
  19. //
  20. // Member: CStemmer::CStemmer
  21. //
  22. // Synopsis: Constructor for the CStemmer class.
  23. //
  24. // Arguments: [lcid] -- locale id
  25. //
  26. //----------------------------------------------------------------------------
  27. CStemmer::CStemmer( LCID lcid )
  28. : _cRefs(1)
  29. {
  30. InterlockedIncrement( &gulcInstances );
  31. }
  32. //+---------------------------------------------------------------------------
  33. //
  34. // Member: CStemmer::~CStemmer
  35. //
  36. // Synopsis: Destructor for the CStemmer class.
  37. //
  38. // Notes: All termination/deallocation is done by embedded smart pointers
  39. //
  40. //----------------------------------------------------------------------------
  41. CStemmer::~CStemmer()
  42. {
  43. InterlockedDecrement( &gulcInstances );
  44. }
  45. //+-------------------------------------------------------------------------
  46. //
  47. // Method: CStemmer::QueryInterface
  48. //
  49. // Synopsis: Rebind to other interface
  50. //
  51. // Arguments: [riid] -- IID of new interface
  52. // [ppvObject] -- New interface * returned here
  53. //
  54. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  55. //
  56. //--------------------------------------------------------------------------
  57. SCODE STDMETHODCALLTYPE
  58. CStemmer::QueryInterface( REFIID riid, void ** ppvObject)
  59. {
  60. IUnknown *pUnkTemp;
  61. SCODE sc = S_OK;
  62. switch( riid.Data1 & 0x000000FF )
  63. {
  64. case 0x00:
  65. if ( riid == IID_IUnknown )
  66. pUnkTemp = (IUnknown *)this;
  67. else
  68. sc = E_NOINTERFACE;
  69. break;
  70. case 0x40:
  71. if ( riid == IID_IStemmer )
  72. pUnkTemp = (IUnknown *)(IStemmer *)this;
  73. else
  74. sc = E_NOINTERFACE;
  75. break;
  76. default:
  77. pUnkTemp = 0;
  78. sc = E_NOINTERFACE;
  79. break;
  80. }
  81. if( 0 != pUnkTemp )
  82. {
  83. *ppvObject = (void * )pUnkTemp;
  84. pUnkTemp->AddRef();
  85. }
  86. else
  87. *ppvObject = 0;
  88. return(sc);
  89. }
  90. //+-------------------------------------------------------------------------
  91. //
  92. // Method: CStemmer::AddRef
  93. //
  94. // Synopsis: Increments refcount
  95. //
  96. //--------------------------------------------------------------------------
  97. ULONG STDMETHODCALLTYPE
  98. CStemmer::AddRef()
  99. {
  100. return InterlockedIncrement( &_cRefs );
  101. }
  102. //+-------------------------------------------------------------------------
  103. //
  104. // Method: CStemmer::Release
  105. //
  106. // Synopsis: Decrement refcount. Delete if necessary.
  107. //
  108. //--------------------------------------------------------------------------
  109. ULONG STDMETHODCALLTYPE
  110. CStemmer::Release()
  111. {
  112. unsigned long uTmp = InterlockedDecrement( &_cRefs );
  113. if ( 0 == uTmp )
  114. delete this;
  115. return(uTmp);
  116. }
  117. //+-------------------------------------------------------------------------
  118. //
  119. // Method: CStemmer::Init
  120. //
  121. // Synopsis: Initialize stemmer
  122. //
  123. // Arguments: [ulMaxTokenSize] -- Maximum size token stored by caller
  124. // [pfLicense] -- Set to true if use restricted
  125. //
  126. // Returns: Status code
  127. //
  128. //--------------------------------------------------------------------------
  129. SCODE STDMETHODCALLTYPE
  130. CStemmer::Init(
  131. ULONG ulMaxTokenSize,
  132. BOOL *pfLicense )
  133. {
  134. if ( NULL == pfLicense )
  135. return E_INVALIDARG;
  136. if (IsBadWritePtr(pfLicense, sizeof(DWORD))) {
  137. return E_INVALIDARG;
  138. }
  139. *pfLicense = TRUE;
  140. _ulMaxTokenSize = ulMaxTokenSize;
  141. return S_OK;
  142. }
  143. //+---------------------------------------------------------------------------
  144. //
  145. // Member: CStemmer::GetLicenseToUse
  146. //
  147. // Synopsis: Returns a pointer to vendors license information
  148. //
  149. // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned
  150. //
  151. //----------------------------------------------------------------------------
  152. SCODE STDMETHODCALLTYPE
  153. CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense )
  154. {
  155. static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1998";
  156. if ( NULL == ppwcsLicense )
  157. return E_INVALIDARG;
  158. if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
  159. return E_INVALIDARG;
  160. }
  161. *ppwcsLicense = wcsCopyright;
  162. return( S_OK );
  163. }
  164. //+---------------------------------------------------------------------------
  165. //
  166. // Member: CStemmer::StemWord
  167. //
  168. // Synopsis: Stem a word into its inflected forms, eg swim to swims and swimming
  169. //
  170. // Arguments: [pwcInBuf] -- input Unicode word
  171. // [cwc] -- count of characters in word
  172. // [pStemSink] -- sink to collect inflected forms
  173. //
  174. //----------------------------------------------------------------------------
  175. SCODE STDMETHODCALLTYPE
  176. CStemmer::StemWord(
  177. WCHAR const *pwc,
  178. ULONG cwc,
  179. IStemSink *pStemSink )
  180. {
  181. SCODE sc = S_OK;
  182. if ( NULL == pStemSink || NULL == pwc ) {
  183. return E_FAIL;
  184. }
  185. if ( 0 == cwc) {
  186. return S_OK;
  187. }
  188. CONST WCHAR *pwcStem;
  189. DWORD i;
  190. BYTE ct;
  191. BOOL fRomanWord = FALSE;
  192. __try {
  193. for ( i=0; i< cwc; i++, pwc++) {
  194. ct = GetCharType(*pwc);
  195. if (ct == CH) {
  196. if (!fRomanWord) {
  197. pwcStem = pwc;
  198. fRomanWord = TRUE;
  199. }
  200. }
  201. else {
  202. if (fRomanWord) {
  203. (pStemSink->PutWord)( pwcStem, (DWORD)(pwc - pwcStem) );
  204. fRomanWord = FALSE;
  205. }
  206. // else {
  207. switch (ct) {
  208. case PS:
  209. case WS:
  210. break;
  211. default:
  212. (pStemSink->PutWord)( pwc, 1 );
  213. break;
  214. }
  215. // }
  216. }
  217. }
  218. // put the last English word
  219. if (fRomanWord) {
  220. (pStemSink->PutWord)( pwcStem, (DWORD)(pwc - pwcStem) );
  221. fRomanWord = FALSE;
  222. }
  223. // output inflected words to stemmer sink in EnumInflections callback
  224. } __except (1) {
  225. sc = E_UNEXPECTED;
  226. }
  227. return sc;
  228. }