Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
6.4 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 1995.
  5. //
  6. // File: stemmer.cxx
  7. //
  8. // Contents: NLG's FarEast Stemmer
  9. //
  10. // History: 01-July-1996 PatHal Created.
  11. //
  12. //----------------------------------------------------------------------------
  13. #include "pch.cxx"
  14. #pragma hdrstop
  15. #include "stemmer.hxx"
  16. extern long gulcInstances;
  17. //+---------------------------------------------------------------------------
  18. //
  19. // Member: CStemmer::CStemmer
  20. //
  21. // Synopsis: Constructor for the CStemmer class.
  22. //
  23. // Arguments: [lcid] -- locale id
  24. //
  25. //----------------------------------------------------------------------------
  26. CStemmer::CStemmer( LCID lcid )
  27. : _cRefs(1)
  28. {
  29. InterlockedIncrement( &gulcInstances );
  30. }
  31. //+---------------------------------------------------------------------------
  32. //
  33. // Member: CStemmer::~CStemmer
  34. //
  35. // Synopsis: Destructor for the CStemmer class.
  36. //
  37. // Notes: All termination/deallocation is done by embedded smart pointers
  38. //
  39. //----------------------------------------------------------------------------
  40. CStemmer::~CStemmer()
  41. {
  42. InterlockedDecrement( &gulcInstances );
  43. }
  44. //+-------------------------------------------------------------------------
  45. //
  46. // Method: CStemmer::QueryInterface
  47. //
  48. // Synopsis: Rebind to other interface
  49. //
  50. // Arguments: [riid] -- IID of new interface
  51. // [ppvObject] -- New interface * returned here
  52. //
  53. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  54. //
  55. //--------------------------------------------------------------------------
  56. SCODE STDMETHODCALLTYPE
  57. CStemmer::QueryInterface( REFIID riid, void ** ppvObject)
  58. {
  59. IUnknown *pUnkTemp;
  60. SCODE sc = S_OK;
  61. switch( riid.Data1 & 0x000000FF )
  62. {
  63. case 0x00:
  64. if ( riid == IID_IUnknown )
  65. pUnkTemp = (IUnknown *)this;
  66. else
  67. sc = E_NOINTERFACE;
  68. break;
  69. case 0x40:
  70. if ( riid == IID_IStemmer )
  71. pUnkTemp = (IUnknown *)(IStemmer *)this;
  72. else
  73. sc = E_NOINTERFACE;
  74. break;
  75. default:
  76. pUnkTemp = 0;
  77. sc = E_NOINTERFACE;
  78. break;
  79. }
  80. if( 0 != pUnkTemp )
  81. {
  82. *ppvObject = (void * )pUnkTemp;
  83. pUnkTemp->AddRef();
  84. }
  85. else
  86. *ppvObject = 0;
  87. return(sc);
  88. }
  89. //+-------------------------------------------------------------------------
  90. //
  91. // Method: CStemmer::AddRef
  92. //
  93. // Synopsis: Increments refcount
  94. //
  95. //--------------------------------------------------------------------------
  96. ULONG STDMETHODCALLTYPE
  97. CStemmer::AddRef()
  98. {
  99. return InterlockedIncrement( &_cRefs );
  100. }
  101. //+-------------------------------------------------------------------------
  102. //
  103. // Method: CStemmer::Release
  104. //
  105. // Synopsis: Decrement refcount. Delete if necessary.
  106. //
  107. //--------------------------------------------------------------------------
  108. ULONG STDMETHODCALLTYPE
  109. CStemmer::Release()
  110. {
  111. unsigned long uTmp = InterlockedDecrement( &_cRefs );
  112. if ( 0 == uTmp )
  113. delete this;
  114. return(uTmp);
  115. }
  116. //+-------------------------------------------------------------------------
  117. //
  118. // Method: CStemmer::Init
  119. //
  120. // Synopsis: Initialize stemmer
  121. //
  122. // Arguments: [ulMaxTokenSize] -- Maximum size token stored by caller
  123. // [pfLicense] -- Set to true if use restricted
  124. //
  125. // Returns: Status code
  126. //
  127. //--------------------------------------------------------------------------
  128. SCODE STDMETHODCALLTYPE
  129. CStemmer::Init(
  130. ULONG ulMaxTokenSize,
  131. BOOL *pfLicense )
  132. {
  133. if ( NULL == pfLicense )
  134. return E_INVALIDARG;
  135. if (IsBadWritePtr(pfLicense, sizeof(DWORD))) {
  136. return E_INVALIDARG;
  137. }
  138. *pfLicense = TRUE;
  139. _ulMaxTokenSize = ulMaxTokenSize;
  140. return S_OK;
  141. }
  142. //+---------------------------------------------------------------------------
  143. //
  144. // Member: CStemmer::GetLicenseToUse
  145. //
  146. // Synopsis: Returns a pointer to vendors license information
  147. //
  148. // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned
  149. //
  150. //----------------------------------------------------------------------------
  151. SCODE STDMETHODCALLTYPE
  152. CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense )
  153. {
  154. static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1998";
  155. if (NULL == ppwcsLicense )
  156. return E_INVALIDARG;
  157. if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
  158. return E_INVALIDARG;
  159. }
  160. *ppwcsLicense = wcsCopyright;
  161. return( S_OK );
  162. }
  163. //+---------------------------------------------------------------------------
  164. //
  165. // Member: CStemmer::StemWord
  166. //
  167. // Synopsis: Stem a word into its inflected forms, eg swim to swims and swimming
  168. //
  169. // Arguments: [pwcInBuf] -- input Unicode word
  170. // [cwc] -- count of characters in word
  171. // [pStemSink] -- sink to collect inflected forms
  172. //
  173. //----------------------------------------------------------------------------
  174. SCODE STDMETHODCALLTYPE
  175. CStemmer::StemWord(
  176. WCHAR const *pwc,
  177. ULONG cwc,
  178. IStemSink *pStemSink )
  179. {
  180. SCODE sc = S_OK;
  181. if ( NULL == pStemSink || NULL == pwc ) {
  182. return E_FAIL;
  183. }
  184. if ( 0 == cwc) {
  185. return S_OK;
  186. }
  187. CONST WCHAR *pwcStem;
  188. DWORD i;
  189. BYTE ct;
  190. BOOL fRomanWord = FALSE;
  191. __try {
  192. for ( i=0; i< cwc; i++, pwc++) {
  193. ct = GetCharType(*pwc);
  194. if (ct == CH) {
  195. if (!fRomanWord) {
  196. pwcStem = pwc;
  197. fRomanWord = TRUE;
  198. }
  199. }
  200. else {
  201. if (fRomanWord) {
  202. (pStemSink->PutWord)( pwcStem, (DWORD)(pwc - pwcStem) );
  203. fRomanWord = FALSE;
  204. }
  205. // else {
  206. switch (ct) {
  207. case PS:
  208. case WS:
  209. break;
  210. default:
  211. (pStemSink->PutWord)( pwc, 1 );
  212. break;
  213. }
  214. // }
  215. }
  216. }
  217. // put the last English word
  218. if (fRomanWord) {
  219. (pStemSink->PutWord)( pwcStem, (DWORD)(pwc - pwcStem) );
  220. fRomanWord = FALSE;
  221. }
  222. // output inflected words to stemmer sink in EnumInflections callback
  223. } __except (1) {
  224. sc = E_UNEXPECTED;
  225. }
  226. return sc;
  227. }