Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

222 lines
5.2 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1997.
  5. //
  6. // File: stemmer.cxx
  7. //
  8. // Contents: Thai Stemmer
  9. //
  10. // History: weibz, 10-Nov-1997 created
  11. //
  12. //----------------------------------------------------------------------------
  13. #include <pch.cxx>
  14. #include "stemmer.hxx"
  15. extern long gulcInstances;
  16. //+---------------------------------------------------------------------------
  17. //
  18. // Member: CStemmer::CStemmer
  19. //
  20. // Synopsis: Constructor for the CStemmer class.
  21. //
  22. // Arguments: [lcid] -- locale id
  23. //
  24. //----------------------------------------------------------------------------
  25. CStemmer::CStemmer( LCID lcid )
  26. : _cRefs(1)
  27. {
  28. InterlockedIncrement( &gulcInstances );
  29. }
  30. //+---------------------------------------------------------------------------
  31. //
  32. // Member: CStemmer::~CStemmer
  33. //
  34. // Synopsis: Destructor for the CStemmer class.
  35. //
  36. // Notes: All termination/deallocation is done by embedded smart pointers
  37. //
  38. //----------------------------------------------------------------------------
  39. CStemmer::~CStemmer()
  40. {
  41. InterlockedDecrement( &gulcInstances );
  42. }
  43. //+-------------------------------------------------------------------------
  44. //
  45. // Method: CStemmer::QueryInterface
  46. //
  47. // Synopsis: Rebind to other interface
  48. //
  49. // Arguments: [riid] -- IID of new interface
  50. // [ppvObject] -- New interface * returned here
  51. //
  52. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  53. //
  54. //--------------------------------------------------------------------------
  55. SCODE STDMETHODCALLTYPE
  56. CStemmer::QueryInterface( REFIID riid, void ** ppvObject)
  57. {
  58. if ( 0 == ppvObject )
  59. return E_INVALIDARG;
  60. *ppvObject = 0;
  61. if ( IID_IStemmer == riid )
  62. *ppvObject = (IUnknown *)(IStemmer *)this;
  63. else if ( IID_IUnknown == riid )
  64. *ppvObject = (IUnknown *)this;
  65. else
  66. return E_NOINTERFACE;
  67. AddRef();
  68. return S_OK;
  69. }
  70. //+-------------------------------------------------------------------------
  71. //
  72. // Method: CStemmer::AddRef
  73. //
  74. // Synopsis: Increments refcount
  75. //
  76. //--------------------------------------------------------------------------
  77. ULONG STDMETHODCALLTYPE
  78. CStemmer::AddRef()
  79. {
  80. return InterlockedIncrement( &_cRefs );
  81. }
  82. //+-------------------------------------------------------------------------
  83. //
  84. // Method: CStemmer::Release
  85. //
  86. // Synopsis: Decrement refcount. Delete if necessary.
  87. //
  88. //--------------------------------------------------------------------------
  89. ULONG STDMETHODCALLTYPE
  90. CStemmer::Release()
  91. {
  92. unsigned long uTmp = InterlockedDecrement( &_cRefs );
  93. if ( 0 == uTmp )
  94. delete this;
  95. return(uTmp);
  96. }
  97. //+-------------------------------------------------------------------------
  98. //
  99. // Method: CStemmer::Init
  100. //
  101. // Synopsis: Initialize stemmer
  102. //
  103. // Arguments: [ulMaxTokenSize] -- Maximum size token stored by caller
  104. // [pfLicense] -- Set to true if use restricted
  105. //
  106. // Returns: Status code
  107. //
  108. //--------------------------------------------------------------------------
  109. SCODE STDMETHODCALLTYPE
  110. CStemmer::Init(
  111. ULONG ulMaxTokenSize,
  112. BOOL *pfLicense )
  113. {
  114. if ( NULL == pfLicense ) {
  115. return E_INVALIDARG;
  116. }
  117. if (IsBadWritePtr(pfLicense, sizeof(DWORD))) {
  118. return E_INVALIDARG;
  119. }
  120. *pfLicense = TRUE;
  121. _ulMaxTokenSize = ulMaxTokenSize;
  122. return S_OK;
  123. }
  124. //+---------------------------------------------------------------------------
  125. //
  126. // Member: CStemmer::GetLicenseToUse
  127. //
  128. // Synopsis: Returns a pointer to vendors license information
  129. //
  130. // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned
  131. //
  132. //----------------------------------------------------------------------------
  133. SCODE STDMETHODCALLTYPE
  134. CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense )
  135. {
  136. static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1998";
  137. if ( NULL == ppwcsLicense ) {
  138. return E_INVALIDARG;
  139. }
  140. if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
  141. return ( E_INVALIDARG );
  142. }
  143. *ppwcsLicense = wcsCopyright;
  144. return( S_OK );
  145. }
  146. //+---------------------------------------------------------------------------
  147. //
  148. // Member: CStemmer::StemWord
  149. //
  150. // Synopsis: Stem a word into its inflected forms, eg swim to swims and swimming
  151. //
  152. // Arguments: [pwcInBuf] -- input Unicode word
  153. // [cwc] -- count of characters in word
  154. // [pStemSink] -- sink to collect inflected forms
  155. //
  156. //----------------------------------------------------------------------------
  157. SCODE STDMETHODCALLTYPE
  158. CStemmer::StemWord(
  159. WCHAR const *pwcInBuf,
  160. ULONG cwc,
  161. IStemSink *pStemSink )
  162. {
  163. INT nReturn;
  164. SCODE sc = S_OK;
  165. #ifdef THAIDBG
  166. ULONG i;
  167. OutputDebugString("\n Stemword\n");
  168. for (i=0; i<cwc; i++)
  169. {
  170. WORD wtmp;
  171. char ctmp[80];
  172. wtmp = pwcInBuf[i];
  173. sprintf(ctmp, "%4x ", wtmp);
  174. OutputDebugString(ctmp);
  175. }
  176. #endif
  177. if ( NULL == pStemSink || NULL == pwcInBuf) {
  178. return E_FAIL;
  179. }
  180. // Currently, Thai stemmer doesn't make inflection form for tripolli.
  181. pStemSink->PutWord (pwcInBuf, cwc);
  182. return sc;
  183. }