Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

897 lines
26 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1997 - 1999
  5. //
  6. // File: IWBreak.cxx
  7. //
  8. // Contents: Korean Word Breaker glue code
  9. //
  10. // History: weibz, 10-Sep-1997 created
  11. //
  12. //----------------------------------------------------------------------------
  13. #include <pch.cxx>
  14. #include "iwbreak.hxx"
  15. #define MAXFORMS 20
  16. extern long gulcInstances;
  17. extern HSTM g_hStm;
  18. extern BOOL g_fLoad;
  19. //extern CRITICAL_SECTION ThCritSect;
  20. //+---------------------------------------------------------------------------
  21. //
  22. // Member: CWordBreaker::CWordBreaker
  23. //
  24. // Synopsis: Constructor for the CWordBreaker class.
  25. //
  26. // Arguments: [lcid] -- locale id
  27. //
  28. //----------------------------------------------------------------------------
  29. CWordBreaker::CWordBreaker( LCID lcid )
  30. : _cRefs(1),
  31. _lcid(lcid)
  32. {
  33. InterlockedIncrement( &gulcInstances );
  34. }
  35. //+---------------------------------------------------------------------------
  36. //
  37. // Member: CWordBreaker::~CWordBreaker
  38. //
  39. // Synopsis: Destructor for the CWordBreaker class.
  40. //
  41. // Notes: All termination/deallocation is done by embedded smart pointers
  42. //
  43. //----------------------------------------------------------------------------
  44. CWordBreaker::~CWordBreaker()
  45. {
  46. InterlockedDecrement( &gulcInstances );
  47. }
  48. //+-------------------------------------------------------------------------
  49. //
  50. // Method: CWordBreaker::QueryInterface
  51. //
  52. // Synopsis: Rebind to other interface
  53. //
  54. // Arguments: [riid] -- IID of new interface
  55. // [ppvObject] -- New interface * returned here
  56. //
  57. // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed
  58. //
  59. //--------------------------------------------------------------------------
  60. SCODE STDMETHODCALLTYPE
  61. CWordBreaker::QueryInterface( REFIID riid, void ** ppvObject)
  62. {
  63. //
  64. // Optimize QueryInterface by only checking minimal number of bytes.
  65. //
  66. // IID_IUnknown = 00000000-0000-0000-C000-000000000046
  67. // IID_IWordBreaker = D53552C8-77E3-101A-B552-08002B33B0E6
  68. // --------
  69. // |
  70. // +--- Unique!
  71. //
  72. Assert( (IID_IUnknown.Data1 & 0x000000FF) == 0x00 );
  73. Assert( (IID_IWordBreaker.Data1 & 0x000000FF) == 0xC8 );
  74. IUnknown *pUnkTemp;
  75. SCODE sc = S_OK;
  76. switch( riid.Data1 )
  77. {
  78. case 0x00000000:
  79. if ( memcmp( &IID_IUnknown, &riid, sizeof(riid) ) == 0 )
  80. pUnkTemp = (IUnknown *)this;
  81. else
  82. sc = E_NOINTERFACE;
  83. break;
  84. case 0xD53552C8:
  85. if ( memcmp( &IID_IWordBreaker, &riid, sizeof(riid) ) == 0 )
  86. pUnkTemp = (IUnknown *)(IWordBreaker *)this;
  87. else
  88. sc = E_NOINTERFACE;
  89. break;
  90. default:
  91. pUnkTemp = 0;
  92. sc = E_NOINTERFACE;
  93. break;
  94. }
  95. if( 0 != pUnkTemp )
  96. {
  97. *ppvObject = (void * )pUnkTemp;
  98. pUnkTemp->AddRef();
  99. }
  100. else
  101. *ppvObject = 0;
  102. return(sc);
  103. }
  104. //+-------------------------------------------------------------------------
  105. //
  106. // Method: CWordBreaker::AddRef
  107. //
  108. // Synopsis: Increments refcount
  109. //
  110. //--------------------------------------------------------------------------
  111. ULONG STDMETHODCALLTYPE
  112. CWordBreaker::AddRef()
  113. {
  114. return InterlockedIncrement( &_cRefs );
  115. }
  116. //+-------------------------------------------------------------------------
  117. //
  118. // Method: CWordBreaker::Release
  119. //
  120. // Synopsis: Decrement refcount. Delete if necessary.
  121. //
  122. //--------------------------------------------------------------------------
  123. ULONG STDMETHODCALLTYPE
  124. CWordBreaker::Release()
  125. {
  126. unsigned long uTmp = InterlockedDecrement( &_cRefs );
  127. if ( 0 == uTmp )
  128. delete this;
  129. return(uTmp);
  130. }
  131. //+-------------------------------------------------------------------------
  132. //
  133. // Method: CWordBreaker::Init
  134. //
  135. // Synopsis: Initialize word-breaker
  136. //
  137. // Arguments: [fQuery] -- TRUE if query-time
  138. // [ulMaxTokenSize] -- Maximum size token stored by caller
  139. // [pfLicense] -- Set to true if use restricted
  140. //
  141. // Returns: Status code
  142. //
  143. //--------------------------------------------------------------------------
  144. SCODE STDMETHODCALLTYPE
  145. CWordBreaker::Init(
  146. BOOL fQuery,
  147. ULONG ulMaxTokenSize,
  148. BOOL *pfLicense )
  149. {
  150. if ( NULL == pfLicense )
  151. return E_INVALIDARG;
  152. if (IsBadWritePtr(pfLicense, sizeof(DWORD)))
  153. return E_INVALIDARG;
  154. if ( !StemInit() )
  155. return LANGUAGE_E_DATABASE_NOT_FOUND;
  156. *pfLicense = TRUE;
  157. _fQuery = fQuery;
  158. _ulMaxTokenSize = ulMaxTokenSize;
  159. return S_OK;
  160. }
  161. //+---------------------------------------------------------------------------
  162. //
  163. // Member: CWordBreaker::ComposePhrase
  164. //
  165. // Synopsis: Convert a noun and a modifier into a phrase.
  166. //
  167. // Arguments: [pwcNoun] -- pointer to noun.
  168. // [cwcNoun] -- count of chars in pwcNoun
  169. // [pwcModifier] -- pointer to word modifying pwcNoun
  170. // [cwcModifier] -- count of chars in pwcModifier
  171. // [ulAttachmentType] -- relationship between pwcNoun &pwcModifier
  172. //
  173. //----------------------------------------------------------------------------
  174. SCODE STDMETHODCALLTYPE
  175. CWordBreaker::ComposePhrase(
  176. WCHAR const *pwcNoun,
  177. ULONG cwcNoun,
  178. WCHAR const *pwcModifier,
  179. ULONG cwcModifier,
  180. ULONG ulAttachmentType,
  181. WCHAR *pwcPhrase,
  182. ULONG *pcwcPhrase )
  183. {
  184. //
  185. // Need to code in later
  186. //
  187. if ( _fQuery )
  188. return( E_NOTIMPL );
  189. else
  190. return ( WBREAK_E_QUERY_ONLY );
  191. }
  192. //+---------------------------------------------------------------------------
  193. //
  194. // Member: CWordBreaker::GetLicenseToUse
  195. //
  196. // Synopsis: Returns a pointer to vendors license information
  197. //
  198. // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned
  199. //
  200. //----------------------------------------------------------------------------
  201. SCODE STDMETHODCALLTYPE
  202. CWordBreaker::GetLicenseToUse(
  203. const WCHAR **ppwcsLicense )
  204. {
  205. static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1998";
  206. if ( NULL == ppwcsLicense ) {
  207. return E_INVALIDARG;
  208. }
  209. if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
  210. return E_INVALIDARG;
  211. }
  212. *ppwcsLicense = wcsCopyright;
  213. return( S_OK );
  214. }
  215. //+---------------------------------------------------------------------------
  216. //
  217. // Member: CWordBreaker::BreakText
  218. //
  219. // Synopsis: Break input stream into words.
  220. //
  221. // Arguments: [pTextSource] -- source of Unicode text
  222. // [pWordSink] -- sink for collecting words
  223. // [pPhraseSink] -- sink for collecting phrases
  224. //
  225. // History: 10-Sep-1997, WeibZ, Created.
  226. //
  227. // Notes: Since the input buffer may be greater than MAX_II_BUFFER_LEN
  228. // we process the buffer in chunks of length MAX_II_BUFFER_LEN.
  229. //
  230. //----------------------------------------------------------------------------
  231. SCODE STDMETHODCALLTYPE CWordBreaker::BreakText( TEXT_SOURCE *pTextSource,
  232. IWordSink *pWordSink,
  233. IPhraseSink *pPhraseSink )
  234. {
  235. SCODE sc = S_OK;
  236. ULONG cwc;
  237. WT Type;
  238. BOOL Ret_ProcToken;
  239. if ( NULL == pTextSource ) {
  240. // OutputDebugString("\nPTextSources is Null\n");
  241. return E_INVALIDARG;
  242. }
  243. if ( NULL == pWordSink )
  244. {
  245. // BUGBUG, propagate the null word sink error code
  246. return sc;
  247. }
  248. // BUGBUG, need to normalize nums within T-Hammer, pass as flag?
  249. // turn on noun phrase analysis if there is a phrase sink
  250. if ( 0 != pPhraseSink )
  251. {
  252. // BUGBUG, do we need to pass a separate flag to T-Hammer for this?
  253. // ignore the phrase sink for now
  254. // return sc;
  255. }
  256. if (pTextSource->iEnd == pTextSource->iCur) {
  257. return S_OK;
  258. }
  259. Assert( pTextSource->iCur < pTextSource->iEnd );
  260. __try
  261. {
  262. do
  263. {
  264. while ( pTextSource->iCur < pTextSource->iEnd )
  265. {
  266. cwc = pTextSource->iEnd - pTextSource->iCur;
  267. Tokenize( cwc, TRUE, pTextSource, &Type);
  268. if ( Type != WT_REACHEND )
  269. {
  270. Ret_ProcToken = ProcessTokens( pTextSource, Type,
  271. pWordSink, pPhraseSink );
  272. if ( !Ret_ProcToken ) {
  273. // Process_Tokens return FALSE, so return here
  274. return E_UNEXPECTED;
  275. }
  276. pTextSource->iCur += _cchTextProcessed;
  277. }
  278. else
  279. break;
  280. }
  281. } while ( SUCCEEDED(pTextSource->pfnFillTextBuffer(pTextSource)) );
  282. while ( pTextSource->iCur < pTextSource->iEnd )
  283. {
  284. cwc = pTextSource->iEnd - pTextSource->iCur;
  285. Tokenize( cwc, FALSE, pTextSource, &Type);
  286. Ret_ProcToken = ProcessTokens( pTextSource, Type,
  287. pWordSink, pPhraseSink );
  288. if ( !Ret_ProcToken ) {
  289. // Process_Tokens return FALSE, so return here
  290. return E_UNEXPECTED;
  291. }
  292. pTextSource->iCur += _cchTextProcessed;
  293. }
  294. } __except(1) {
  295. sc = E_UNEXPECTED;
  296. }
  297. return sc;
  298. }
  299. void CWordBreaker::Tokenize( unsigned cwc,
  300. BOOL bMoreText,
  301. TEXT_SOURCE *pTextSource,
  302. WT *Type)
  303. {
  304. ULONG i;
  305. BYTE ct;
  306. BOOL fRomanWord = FALSE;
  307. BOOL fHanguelWord = FALSE;
  308. CONST WCHAR *pwcInput, *pwcStem;
  309. _cchTextProcessed = 0;
  310. *Type = WT_START;
  311. pwcStem = pwcInput = pTextSource->awcBuffer + pTextSource->iCur;
  312. for (i=0; i< cwc; i++, pwcInput++) {
  313. ct = GetCharType(*pwcInput);
  314. if ( (ct != WS) && (ct != PS) && (ct != HG) )
  315. ct = CH;
  316. switch (ct) {
  317. case CH :
  318. // check to see if there is a Hanguel word before this char
  319. if (fHanguelWord) {
  320. _cchTextProcessed = (DWORD)(pwcInput - pwcStem);
  321. return;
  322. }
  323. if (!fRomanWord) {
  324. pwcStem = pwcInput;
  325. fRomanWord = TRUE;
  326. *Type = WT_ROMAJI;
  327. }
  328. break;
  329. case HG :
  330. // check to see if there is an English word before this char
  331. if ( fRomanWord ) {
  332. _cchTextProcessed = (DWORD)(pwcInput - pwcStem);
  333. return;
  334. }
  335. if (!fHanguelWord) {
  336. pwcStem = pwcInput;
  337. fHanguelWord = TRUE;
  338. *Type = WT_HANGUEL;
  339. }
  340. break;
  341. case WS :
  342. if (fRomanWord || fHanguelWord) {
  343. _cchTextProcessed = (DWORD)(pwcInput - pwcStem);
  344. return;
  345. }
  346. *Type = WT_WORD_SEP;
  347. _cchTextProcessed = 1;
  348. return;
  349. case PS :
  350. if (fRomanWord || fHanguelWord) {
  351. _cchTextProcessed = (DWORD)(pwcInput - pwcStem);
  352. return;
  353. }
  354. *Type = WT_PHRASE_SEP;
  355. _cchTextProcessed = 1;
  356. return;
  357. }
  358. }
  359. if ( bMoreText ) {
  360. _cchTextProcessed = 0;
  361. *Type = WT_REACHEND;
  362. }
  363. else
  364. _cchTextProcessed = cwc;
  365. }
  366. BOOL CWordBreaker::ProcessTokens( TEXT_SOURCE *pTextSource,
  367. WT Type,
  368. IWordSink *pWordSink,
  369. IPhraseSink *pPhraseSink )
  370. {
  371. CONST WCHAR *pwcStem;
  372. if ( Type == WT_PHRASE_SEP)
  373. {
  374. pWordSink->PutBreak (WORDREP_BREAK_EOS);
  375. return TRUE;
  376. }
  377. if ( Type == WT_ROMAJI)
  378. {
  379. ULONG i;
  380. pwcStem = pTextSource->awcBuffer + pTextSource->iCur;
  381. #ifdef KORDBG
  382. OutputDebugString("\n");
  383. for (i=0; i< _cchTextProcessed; i++)
  384. {
  385. char ctmp[2];
  386. ctmp[0] = pwcStem[i] & 0xff;
  387. ctmp[1] = '\0';
  388. OutputDebugString(ctmp);
  389. }
  390. OutputDebugString(" ");
  391. #endif
  392. (pWordSink->PutWord)(_cchTextProcessed,
  393. pwcStem,
  394. _cchTextProcessed,
  395. pTextSource->iCur);
  396. return TRUE;
  397. }
  398. if ( Type == WT_HANGUEL )
  399. {
  400. WCHAR TokenWord[80];
  401. ULONG i;
  402. WDOB sob;
  403. // EnterCriticalSection(&ThCritSect);
  404. sob.wordlist = (LPWSTR)LocalAlloc(LPTR, 200);
  405. sob.sch = 200;
  406. if (sob.wordlist == NULL )
  407. return FALSE;
  408. pwcStem = pTextSource->awcBuffer + pTextSource->iCur;
  409. for (i=0; i<_cchTextProcessed; i++)
  410. {
  411. #ifdef KORDBG
  412. WORD wtmp;
  413. char ctmp[80];
  414. wtmp = pwcStem[i];
  415. sprintf(ctmp, "%4x ", wtmp);
  416. OutputDebugString(ctmp);
  417. #endif
  418. TokenWord[i] = pwcStem[i];
  419. }
  420. TokenWord[_cchTextProcessed] = L'\0';
  421. #ifdef KORDBG
  422. OutputDebugString("\nBefore StemmerDecomposeW\n");
  423. #endif
  424. if (StemmerDecomposeW(g_hStm, TokenWord, &sob) == NULL)
  425. {
  426. ULONG wInLexLen;
  427. WORD winfo;
  428. ULONG num, len, j, k;
  429. WCHAR *pWordList, *pVerb;
  430. ULONG NumEf;
  431. BOOL fExist;
  432. WCHAR *pwszStart[MAXFORMS];
  433. NumEf = 0;
  434. do
  435. {
  436. num = sob.num;
  437. pWordList = sob.wordlist;
  438. for (j=0; j<num; j++)
  439. {
  440. len = wcslen(pWordList);
  441. memcpy(&winfo,pWordList+len+1,2);
  442. switch (winfo & 0x0f00)
  443. {
  444. case POS_NOUN :
  445. case POS_ADJECTIVE :
  446. case POS_PRONOUN :
  447. case POS_ADVERB :
  448. case POS_NUMBER :
  449. fExist = FALSE;
  450. for (k=0; k<NumEf; k++)
  451. {
  452. if ( wcscmp(pWordList,pwszStart[k]) == 0 )
  453. fExist = TRUE;
  454. }
  455. if ( !fExist ) // this stem does not exist so far.
  456. {
  457. // it will contain: WordList 00 Winfo, so the length
  458. // should be len + 2 = len + 1 + 1.
  459. pwszStart[NumEf]=(LPWSTR)LocalAlloc(LPTR,(len+2)*sizeof(WCHAR));
  460. if ( pwszStart[NumEf] == NULL )
  461. {
  462. // alloc error, so return here
  463. if (sob.wordlist)
  464. LocalFree(sob.wordlist);
  465. for (k=0; k<NumEf; k++)
  466. {
  467. if ( pwszStart[k] != NULL)
  468. LocalFree(pwszStart[k]);
  469. }
  470. return FALSE;
  471. }
  472. wcscpy(pwszStart[NumEf], pWordList);
  473. pwszStart[NumEf][len] = L'\0';
  474. pwszStart[NumEf][len+1] = winfo & 0x0f00 ;
  475. NumEf++;
  476. }
  477. break;
  478. case POS_VERB : // for Verb, we will handle it specially.
  479. // Append a flag char Da (U+B2E4) to
  480. // the root form of a verb word.
  481. fExist = FALSE;
  482. // it will contain: WordList <Da> 00.
  483. // so the length should be len + 2 = len + 1 + 1.
  484. pVerb = (LPWSTR)LocalAlloc(LPTR, (len+2)*sizeof(WCHAR));
  485. if (pVerb == NULL )
  486. {
  487. if (sob.wordlist)
  488. LocalFree(sob.wordlist);
  489. for (k=0; k<NumEf; k++)
  490. {
  491. if ( pwszStart[k] != NULL )
  492. LocalFree(pwszStart[k]);
  493. }
  494. return FALSE;
  495. }
  496. wcscpy(pVerb, pWordList);
  497. pVerb[len] = VERBCHAR;
  498. pVerb[len+1] = L'\0';
  499. for (k=0; k<NumEf; k++)
  500. {
  501. if ( wcscmp(pVerb,pwszStart[k]) == 0 )
  502. fExist = TRUE;
  503. }
  504. if ( !fExist ) // this stem does not exist so far.
  505. {
  506. // it will contain: Wordlist <Da> 00 Winfo.
  507. // so the length should be len+3 = len + 1 + 1 + 1.
  508. pwszStart[NumEf]=(LPWSTR)LocalAlloc(LPTR,(len+3)*sizeof(WCHAR));
  509. if ( pwszStart[NumEf] == NULL )
  510. {
  511. // alloc error, so return here
  512. if (sob.wordlist)
  513. LocalFree(sob.wordlist);
  514. if ( pVerb != NULL )
  515. LocalFree(pVerb);
  516. for (k=0; k<NumEf; k++)
  517. {
  518. if ( pwszStart[k] != NULL)
  519. LocalFree(pwszStart[k]);
  520. }
  521. return FALSE;
  522. }
  523. wcscpy(pwszStart[NumEf], pVerb);
  524. pwszStart[NumEf][len+1] = L'\0';
  525. pwszStart[NumEf][len+2] = winfo & 0x0f00;
  526. NumEf++;
  527. }
  528. if ( pVerb != NULL )
  529. LocalFree(pVerb);
  530. break;
  531. case POS_AUXVERB :
  532. case POS_AUXADJ :
  533. case POS_SPECIFIER :
  534. case POS_PREFIX :
  535. break;
  536. }
  537. pWordList += len + 3;
  538. }
  539. } while (StemmerDecomposeMoreW(g_hStm, TokenWord, &sob) == NULL) ;
  540. //
  541. // BUGBUG: Can this legitimately happen? We're seeing it indexing microsoft.com.
  542. //
  543. if ( 0 == NumEf )
  544. {
  545. #if DBG == 1
  546. OutputDebugString( "BOGUS WORD: " );
  547. for ( WCHAR * pwc = &TokenWord[0]; 0 != *pwc; pwc++ )
  548. {
  549. char ctmp[6];
  550. sprintf(ctmp, "%4x ", *pwc);
  551. OutputDebugString(ctmp);
  552. }
  553. OutputDebugString( "\n" );
  554. #endif
  555. return TRUE;
  556. }
  557. if ( !_fQuery )
  558. {
  559. for (i=0; i< (NumEf-1); i++ )
  560. {
  561. wInLexLen = wcslen(pwszStart[i]);
  562. pWordSink->PutAltWord(wInLexLen,
  563. pwszStart[i],
  564. _cchTextProcessed,
  565. pTextSource->iCur);
  566. }
  567. // handle the last one.
  568. wInLexLen = wcslen(pwszStart[NumEf-1]);
  569. pWordSink->PutWord(wInLexLen,
  570. pwszStart[NumEf-1],
  571. _cchTextProcessed,
  572. pTextSource->iCur);
  573. }
  574. else
  575. {
  576. if ( NumEf == 1 )
  577. {
  578. // handle this only one.
  579. wInLexLen = wcslen(pwszStart[NumEf-1]);
  580. pWordSink->PutWord(wInLexLen,
  581. pwszStart[NumEf-1],
  582. _cchTextProcessed,
  583. pTextSource->iCur);
  584. }
  585. else
  586. {
  587. ULONG uNum_Noun;
  588. ULONG uIndex[MAXFORMS];
  589. uNum_Noun = 0;
  590. for (i=0; i<NumEf; i++)
  591. {
  592. wInLexLen = wcslen(pwszStart[i]);
  593. if ( (pwszStart[i][wInLexLen+1] == POS_NOUN) ||
  594. (pwszStart[i][wInLexLen+1] == POS_PRONOUN) ||
  595. (pwszStart[i][wInLexLen+1] == POS_NUMBER) )
  596. {
  597. uIndex[uNum_Noun] = i;
  598. uNum_Noun ++;
  599. }
  600. }
  601. if ( uNum_Noun == 0 )
  602. {
  603. // there is no Noun form
  604. for (i=0; i< (NumEf-1); i++ )
  605. {
  606. wInLexLen = wcslen(pwszStart[i]);
  607. pWordSink->PutAltWord(wInLexLen,
  608. pwszStart[i],
  609. _cchTextProcessed,
  610. pTextSource->iCur);
  611. }
  612. // handle the last one.
  613. wInLexLen = wcslen(pwszStart[NumEf-1]);
  614. pWordSink->PutWord(wInLexLen,
  615. pwszStart[NumEf-1],
  616. _cchTextProcessed,
  617. pTextSource->iCur);
  618. }
  619. if (uNum_Noun == 1)
  620. {
  621. // there is only One Noun, and we just use this one to query.
  622. ULONG index;
  623. index = uIndex[0];
  624. wInLexLen = wcslen(pwszStart[index]);
  625. pWordSink->PutWord(wInLexLen,
  626. pwszStart[index],
  627. _cchTextProcessed,
  628. pTextSource->iCur);
  629. }
  630. if ( uNum_Noun > 1 )
  631. {
  632. // there are more than one Noun, Use all those Noun to query.
  633. ULONG index;
  634. for (i=0; i<uNum_Noun-1; i++)
  635. {
  636. index = uIndex[i];
  637. wInLexLen = wcslen(pwszStart[index]);
  638. pWordSink->PutAltWord(wInLexLen,
  639. pwszStart[index],
  640. _cchTextProcessed,
  641. pTextSource->iCur);
  642. }
  643. // handle the last Noun.
  644. index = uIndex[uNum_Noun-1];
  645. wInLexLen = wcslen(pwszStart[index]);
  646. pWordSink->PutWord(wInLexLen,
  647. pwszStart[index],
  648. _cchTextProcessed,
  649. pTextSource->iCur);
  650. }
  651. }
  652. }
  653. #ifdef KORDBG
  654. {
  655. char ctmp[80];
  656. OutputDebugString("\nStemmerDecomposeW Correct\n");
  657. OutputDebugString(" the Num of Stemm is ");
  658. sprintf(ctmp, "%4x ", NumEf);
  659. OutputDebugString(ctmp);
  660. OutputDebugString("\n");
  661. }
  662. for (i=0; i< NumEf; i++)
  663. {
  664. WORD wtmp;
  665. char ctmp[80];
  666. wInLexLen = wcslen(pwszStart[i]);
  667. for (j=0; j<wInLexLen; j++)
  668. {
  669. wtmp = pwszStart[i][j];
  670. sprintf(ctmp, "%4x ", wtmp);
  671. OutputDebugString(ctmp);
  672. }
  673. OutputDebugString(" Type: ");
  674. wtmp = pwszStart[i][wInLexLen+1];
  675. wtmp = wtmp & 0x0f00;
  676. sprintf(ctmp, "%4x \n", wtmp);
  677. OutputDebugString(ctmp);
  678. }
  679. OutputDebugString("\n");
  680. #endif
  681. // Free the memory.
  682. for (i=0; i<NumEf; i++)
  683. {
  684. if ( pwszStart[i] != NULL )
  685. LocalFree(pwszStart[i]);
  686. }
  687. }
  688. else
  689. {
  690. #ifdef KORDBG
  691. OutputDebugString("\nStemmerDecomposeW NOT right\n");
  692. for (i=0; i<_cchTextProcessed; i++)
  693. {
  694. WORD wtmp;
  695. char ctmp[80];
  696. wtmp = TokenWord[i];
  697. sprintf(ctmp, "%4x ", wtmp);
  698. OutputDebugString(ctmp);
  699. }
  700. #endif
  701. pWordSink->PutWord(_cchTextProcessed,
  702. TokenWord,
  703. _cchTextProcessed,
  704. pTextSource->iCur);
  705. }
  706. LocalFree(sob.wordlist);
  707. // LeaveCriticalSection (&ThCritSect);
  708. }
  709. return TRUE;
  710. }