Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

241 lines
5.6 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1997 - 1999.
  5. //
  6. // File: stemmer.cxx
  7. //
  8. // Contents: Korean Stemmer
  9. //
  10. // History: weibz, 10-Sep-1997 created
  11. //
  12. //----------------------------------------------------------------------------
  13. #include <pch.cxx>
  14. #include "stemmer.hxx"
  15. extern long gulcInstances;
  16. //+---------------------------------------------------------------------------
  17. //
  18. // Member: CStemmer::CStemmer
  19. //
  20. // Synopsis: Constructor for the CStemmer class.
  21. //
  22. // Arguments: [lcid] -- locale id
  23. //
  24. //----------------------------------------------------------------------------
  25. CStemmer::CStemmer( LCID lcid )
  26. : _cRefs(1)
  27. {
  28. InterlockedIncrement( &gulcInstances );
  29. }
  30. //+---------------------------------------------------------------------------
  31. //
  32. // Member: CStemmer::~CStemmer
  33. //
  34. // Synopsis: Destructor for the CStemmer class.
  35. //
  36. // Notes: All termination/deallocation is done by embedded smart pointers
  37. //
  38. //----------------------------------------------------------------------------
  39. CStemmer::~CStemmer()
  40. {
  41. InterlockedDecrement( &gulcInstances );
  42. }
  43. //+-------------------------------------------------------------------------
  44. //
  45. // Method: CStemmer::QueryInterface
  46. //
  47. // Synopsis: Rebind to other interface
  48. //
  49. // Arguments: [riid] -- IID of new interface
  50. // [ppvObject] -- New interface * returned here
  51. //
  52. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  53. //
  54. //--------------------------------------------------------------------------
  55. SCODE STDMETHODCALLTYPE
  56. CStemmer::QueryInterface( REFIID riid, void ** ppvObject)
  57. {
  58. IUnknown *pUnkTemp;
  59. SCODE sc = S_OK;
  60. switch( riid.Data1 & 0x000000FF )
  61. {
  62. case 0x00:
  63. if ( riid == IID_IUnknown )
  64. pUnkTemp = (IUnknown *)this;
  65. else
  66. sc = E_NOINTERFACE;
  67. break;
  68. case 0x40:
  69. if ( riid == IID_IStemmer )
  70. pUnkTemp = (IUnknown *)(IStemmer *)this;
  71. else
  72. sc = E_NOINTERFACE;
  73. break;
  74. default:
  75. pUnkTemp = 0;
  76. sc = E_NOINTERFACE;
  77. break;
  78. }
  79. if( 0 != pUnkTemp )
  80. {
  81. *ppvObject = (void * )pUnkTemp;
  82. pUnkTemp->AddRef();
  83. }
  84. else
  85. *ppvObject = 0;
  86. return(sc);
  87. }
  88. //+-------------------------------------------------------------------------
  89. //
  90. // Method: CStemmer::AddRef
  91. //
  92. // Synopsis: Increments refcount
  93. //
  94. //--------------------------------------------------------------------------
  95. ULONG STDMETHODCALLTYPE
  96. CStemmer::AddRef()
  97. {
  98. return InterlockedIncrement( &_cRefs );
  99. }
  100. //+-------------------------------------------------------------------------
  101. //
  102. // Method: CStemmer::Release
  103. //
  104. // Synopsis: Decrement refcount. Delete if necessary.
  105. //
  106. //--------------------------------------------------------------------------
  107. ULONG STDMETHODCALLTYPE
  108. CStemmer::Release()
  109. {
  110. unsigned long uTmp = InterlockedDecrement( &_cRefs );
  111. if ( 0 == uTmp )
  112. delete this;
  113. return(uTmp);
  114. }
  115. //+-------------------------------------------------------------------------
  116. //
  117. // Method: CStemmer::Init
  118. //
  119. // Synopsis: Initialize stemmer
  120. //
  121. // Arguments: [ulMaxTokenSize] -- Maximum size token stored by caller
  122. // [pfLicense] -- Set to true if use restricted
  123. //
  124. // Returns: Status code
  125. //
  126. //--------------------------------------------------------------------------
  127. SCODE STDMETHODCALLTYPE
  128. CStemmer::Init(
  129. ULONG ulMaxTokenSize,
  130. BOOL *pfLicense )
  131. {
  132. if ( NULL == pfLicense )
  133. return E_INVALIDARG;
  134. if (IsBadWritePtr(pfLicense, sizeof(DWORD)))
  135. return E_INVALIDARG;
  136. if ( !StemInit() )
  137. return LANGUAGE_E_DATABASE_NOT_FOUND;
  138. *pfLicense = TRUE;
  139. _ulMaxTokenSize = ulMaxTokenSize;
  140. return S_OK;
  141. }
  142. //+---------------------------------------------------------------------------
  143. //
  144. // Member: CStemmer::GetLicenseToUse
  145. //
  146. // Synopsis: Returns a pointer to vendors license information
  147. //
  148. // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned
  149. //
  150. //----------------------------------------------------------------------------
  151. SCODE STDMETHODCALLTYPE
  152. CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense )
  153. {
  154. static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1998";
  155. if ( NULL == ppwcsLicense ) {
  156. return E_INVALIDARG;
  157. }
  158. if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
  159. return E_INVALIDARG;
  160. }
  161. *ppwcsLicense = wcsCopyright;
  162. return( S_OK );
  163. }
  164. //+---------------------------------------------------------------------------
  165. //
  166. // Member: CStemmer::StemWord
  167. //
  168. // Synopsis: Stem a word into its inflected forms, eg swim to swims and swimming
  169. //
  170. // Arguments: [pwcInBuf] -- input Unicode word
  171. // [cwc] -- count of characters in word
  172. // [pStemSink] -- sink to collect inflected forms
  173. //
  174. //----------------------------------------------------------------------------
  175. SCODE STDMETHODCALLTYPE
  176. CStemmer::StemWord(
  177. WCHAR const *pwcInBuf,
  178. ULONG cwc,
  179. IStemSink *pStemSink )
  180. {
  181. INT nReturn;
  182. SCODE sc = S_OK;
  183. #ifdef KORDBG
  184. ULONG i;
  185. OutputDebugString("\n Stemword\n");
  186. for (i=0; i<cwc; i++)
  187. {
  188. WORD wtmp;
  189. char ctmp[80];
  190. wtmp = pwcInBuf[i];
  191. sprintf(ctmp, "%4x ", wtmp);
  192. OutputDebugString(ctmp);
  193. }
  194. #endif
  195. if ( NULL == pStemSink || NULL == pwcInBuf) {
  196. return E_FAIL;
  197. }
  198. // Currently, Korean stemmer doesn't make inflection form for tripolli.
  199. pStemSink->PutWord (pwcInBuf, cwc);
  200. return sc;
  201. }