Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

706 lines
25 KiB

  1. #include <windows.h>
  2. #include <objbase.h>
  3. #include <initguid.h>
  4. #include "IWBrKr.h"
  5. #include "DefBrKr.h"
  6. #include "rulelex.h"
  7. #include "lexicon.h"
  8. #include "LexMgr.h"
  9. #include "chtbrkr.h"
  10. #include "cierror.h"
  11. static LONG g_lServerLockCnt = 0;
  12. static LONG g_lComponentCnt = 0;
  13. TCHAR g_tszModuleFileName[MAX_PATH];
  14. HINSTANCE g_hInstance;
  15. #define WBREAK_E_END_OF_TEXT ((HRESULT)0x80041780L)
  16. static WCHAR g_wszLicense[] = {L"Use of the IWordBreaker interface that is in violation of the license agreement, without the consent of the vendor(s) specified in the notice, may result in server civil and criminal penalties"};
  17. TCHAR tszEnglishLangSpecificKey[] = TEXT("System\\CurrentControlSet\\Control\\ContentIndex\\Language\\English_US");
  18. extern "C" BOOL CALLBACK DllMain(HINSTANCE, DWORD, LPVOID);
  19. BOOL CALLBACK DllMain(
  20. HINSTANCE hInstance, // instance handle of this library
  21. DWORD fdwReason, // reason called
  22. LPVOID lpvReserve) // reserve pointer
  23. {
  24. switch (fdwReason) {
  25. case DLL_PROCESS_ATTACH:
  26. g_hInstance = hInstance;
  27. GetModuleFileName(g_hInstance, g_tszModuleFileName,
  28. sizeof(g_tszModuleFileName) / sizeof(TCHAR));
  29. break;
  30. case DLL_PROCESS_DETACH:
  31. break;
  32. }
  33. return TRUE;
  34. }
  35. // Export API
  36. STDAPI DllCanUnloadNow(void)
  37. {
  38. if (g_lServerLockCnt) {
  39. } else if (g_lComponentCnt) {
  40. } else {
  41. return S_OK;
  42. }
  43. return S_FALSE;
  44. }
  45. STDAPI DllGetClassObject(
  46. const CLSID& clsid,
  47. const IID& iid,
  48. void **ppv)
  49. {
  50. IChtBrKrClassFactory *pIChtBrKrClassFactory;
  51. HRESULT hr;
  52. InterlockedIncrement(&g_lServerLockCnt);
  53. if (clsid != CLSID_CHTBRKR && clsid != CLSID_WHISTLER_CHTBRKR) {
  54. hr = CLASS_E_CLASSNOTAVAILABLE ;
  55. goto _exit;
  56. }
  57. pIChtBrKrClassFactory = new IChtBrKrClassFactory;
  58. if (!pIChtBrKrClassFactory) {
  59. hr = E_OUTOFMEMORY;
  60. goto _exit;
  61. }
  62. hr = pIChtBrKrClassFactory->QueryInterface(iid, ppv);
  63. _exit:
  64. InterlockedDecrement(&g_lServerLockCnt);
  65. return hr;
  66. }
  67. // Factory
  68. IChtBrKrClassFactory::IChtBrKrClassFactory(void)
  69. :m_lRefCnt(1)
  70. {
  71. InterlockedIncrement(&g_lComponentCnt);
  72. }
  73. IChtBrKrClassFactory::~IChtBrKrClassFactory(void)
  74. {
  75. InterlockedDecrement(&g_lComponentCnt);
  76. }
  77. STDMETHODIMP_(ULONG) IChtBrKrClassFactory::AddRef(void)
  78. {
  79. return InterlockedIncrement(&m_lRefCnt);
  80. }
  81. STDMETHODIMP_(ULONG) IChtBrKrClassFactory::Release(void)
  82. {
  83. if (InterlockedDecrement(&m_lRefCnt) == 0) {
  84. delete this;
  85. return 0;
  86. }
  87. return m_lRefCnt;
  88. }
  89. STDMETHODIMP IChtBrKrClassFactory::QueryInterface(
  90. const IID& iid,
  91. void **ppv)
  92. {
  93. if ((iid == IID_IUnknown) || (iid == IID_IClassFactory)) {
  94. *ppv = static_cast<IChtBrKrClassFactory *>(this);
  95. reinterpret_cast<IUnknown *>(*ppv)->AddRef();
  96. return S_OK;
  97. } else {
  98. *ppv = NULL;
  99. return E_NOINTERFACE;
  100. }
  101. }
  102. STDMETHODIMP IChtBrKrClassFactory::LockServer(
  103. BOOL fLock)
  104. {
  105. if (fLock) {
  106. InterlockedIncrement(&g_lServerLockCnt);
  107. } else {
  108. InterlockedDecrement(&g_lServerLockCnt);
  109. }
  110. return S_OK;
  111. }
  112. STDMETHODIMP IChtBrKrClassFactory::CreateInstance(
  113. IUnknown *pUnknownOuter,
  114. const IID& iid,
  115. void **ppv)
  116. {
  117. IWordBreaker *pIWordBreaker;
  118. HRESULT hr;
  119. *ppv = NULL;
  120. if (pUnknownOuter) { return CLASS_E_NOAGGREGATION; }
  121. pIWordBreaker = new IWordBreaker;
  122. if (!pIWordBreaker) { return E_OUTOFMEMORY; }
  123. hr = pIWordBreaker->QueryInterface(iid, ppv);
  124. pIWordBreaker->Release();
  125. return hr;
  126. }
  127. SCODE _stdcall MyFillTestBuffer(
  128. TEXT_SOURCE __RPC_FAR *pTextSource)
  129. {
  130. return WBREAK_E_END_OF_TEXT;
  131. }
  132. // IWordBreaker
  133. IWordBreaker::IWordBreaker(void)
  134. :m_lRefCnt(1),
  135. m_pcWordBreaker(NULL),
  136. m_pNonChineseTextSource(NULL),
  137. m_pNonChineseWordBreaker(NULL),
  138. m_pcDefWordBreaker(NULL),
  139. m_fIsQueryTime(FALSE)
  140. {
  141. InterlockedIncrement(&g_lComponentCnt);
  142. // CHT word breaker
  143. m_pcWordBreaker = new CCHTWordBreaker;
  144. if (!m_pcWordBreaker) {
  145. } else if (m_pcWordBreaker->InitData(g_hInstance)) {
  146. } else {
  147. delete m_pcWordBreaker;
  148. m_pcWordBreaker = NULL;
  149. }
  150. // Default wordbreaker
  151. m_pcDefWordBreaker = new CDefWordBreaker;
  152. // Non CHT Word breaker
  153. m_pNonChineseTextSource = new TEXT_SOURCE;
  154. if (m_pNonChineseTextSource) {
  155. HKEY hKey;
  156. TCHAR tszCLSID[MAX_PATH];
  157. DWORD dwBufSize = MAX_PATH;
  158. CLSID szCLSID;
  159. HRESULT hr;
  160. m_pNonChineseTextSource->pfnFillTextBuffer = MyFillTestBuffer;
  161. #if 0
  162. if (RegCreateKey(HKEY_LOCAL_MACHINE, tszEnglishLangSpecificKey, &hKey) == ERROR_SUCCESS) {
  163. if (RegQueryValueEx(hKey, TEXT("WBreakerClass"), NULL, NULL, (LPBYTE)tszCLSID, &dwBufSize) == ERROR_SUCCESS) {
  164. CoInitialize(NULL);
  165. CLSIDFromString(tszCLSID, &szCLSID);
  166. hr = CoCreateInstance(szCLSID, NULL, CLSCTX_SERVER, IID_IWordBreaker,
  167. (LPVOID*)&m_pNonChineseWordBreaker);
  168. if (!SUCCEEDED(hr)) {
  169. m_pNonChineseWordBreaker = NULL;
  170. }
  171. }
  172. }
  173. #endif
  174. }
  175. }
  176. IWordBreaker::~IWordBreaker(void)
  177. {
  178. InterlockedDecrement(&g_lComponentCnt);
  179. if (m_pcWordBreaker) {
  180. delete m_pcWordBreaker;
  181. }
  182. if (m_pNonChineseTextSource) {
  183. delete m_pNonChineseTextSource;
  184. }
  185. if (m_pNonChineseWordBreaker) {
  186. if (m_pNonChineseWordBreaker->Release() == 0) {
  187. m_pNonChineseWordBreaker = NULL;
  188. }
  189. }
  190. if (m_pcDefWordBreaker) {
  191. delete m_pcDefWordBreaker;
  192. }
  193. }
  194. STDMETHODIMP IWordBreaker::QueryInterface(
  195. const IID& iid,
  196. void **ppv)
  197. {
  198. *ppv = NULL;
  199. if ((iid == IID_IUnknown) || (iid == IID_IWordBreaker)) {
  200. *ppv = static_cast<IWordBreaker *>(this);
  201. reinterpret_cast<IUnknown *>(*ppv)->AddRef();
  202. return S_OK;
  203. } else {
  204. *ppv = NULL;
  205. return E_NOINTERFACE;
  206. }
  207. }
  208. STDMETHODIMP_(ULONG) IWordBreaker::AddRef(void)
  209. {
  210. return InterlockedIncrement(&m_lRefCnt);
  211. }
  212. STDMETHODIMP_(ULONG) IWordBreaker::Release(void)
  213. {
  214. if (m_lRefCnt == 0 || InterlockedDecrement(&m_lRefCnt) == 0) {
  215. delete this;
  216. return 0;
  217. }
  218. return m_lRefCnt;
  219. }
  220. STDMETHODIMP IWordBreaker::Init(
  221. BOOL fQuery,
  222. ULONG ulMaxTokenSize,
  223. BOOL *pfLicense)
  224. {
  225. m_uMaxCharNumberPerWord = ulMaxTokenSize;
  226. *pfLicense = FALSE;
  227. m_fIsQueryTime = fQuery;
  228. if (NULL != m_pNonChineseWordBreaker) {
  229. m_pNonChineseWordBreaker->Init(fQuery, ulMaxTokenSize, pfLicense);
  230. }
  231. return S_OK;
  232. }
  233. BOOL IsChineseChar(
  234. WCHAR wUnicode)
  235. {
  236. BOOL fRet;
  237. if (wUnicode >= 0x4E00 && wUnicode <= 0x9FA5) {
  238. fRet = TRUE;
  239. } else if (wUnicode >= 0xd800 && wUnicode <= 0xdbff) {
  240. fRet = TRUE;
  241. } else if (wUnicode >= 0xdc00 && wUnicode <= 0xdfff) {
  242. fRet = TRUE;
  243. } else if (wUnicode >= 0x1100 && wUnicode <= 0x11ff) {
  244. fRet = TRUE;
  245. } else if (wUnicode >= 0x2e80 && wUnicode <= 0xffff) {
  246. } else {
  247. fRet = FALSE;
  248. }
  249. return fRet;
  250. }
  251. BOOL IsSpecialFullShapeChar(
  252. WCHAR wChar)
  253. {
  254. BOOL fRet;
  255. if (wChar >=0xff21 && wChar <=0xff3a) {
  256. fRet = TRUE;
  257. } else if (wChar >=0xff41 && wChar <=0xff5a) {
  258. fRet = TRUE;
  259. } else if (wChar >=0xff10 && wChar <=0xff19) {
  260. fRet = TRUE;
  261. } else {
  262. fRet = FALSE;
  263. }
  264. return fRet;
  265. }
  266. DWORD GetNormaizeWord(
  267. LPWSTR lpwWordStr,
  268. UINT uWordLen,
  269. LPWSTR* lppwNormalizedWordStr)
  270. {
  271. DWORD dwNormalizedWordLen = 0;
  272. UINT i;
  273. WCHAR wChar;
  274. for (i = 0; i < uWordLen; ++i) {
  275. if (!IsSpecialFullShapeChar(lpwWordStr[i])) {
  276. goto _exit;
  277. }
  278. }
  279. *lppwNormalizedWordStr = new WCHAR[uWordLen];
  280. if (*lppwNormalizedWordStr) {
  281. for (dwNormalizedWordLen = 0; dwNormalizedWordLen < uWordLen; ++dwNormalizedWordLen) {
  282. wChar = lpwWordStr[dwNormalizedWordLen];
  283. if (wChar >=0xff21 && wChar <=0xff3a) { // A - Z
  284. (*lppwNormalizedWordStr)[dwNormalizedWordLen] = wChar - (0xff21 - 0x0041);
  285. } else if (wChar >=0xff41 && wChar <=0xff5a) { // a - z
  286. (*lppwNormalizedWordStr)[dwNormalizedWordLen] = wChar - (0xff41 - 0x0061);
  287. } else if (wChar >=0xff10 && wChar <=0xff19) { // 0 - 9
  288. (*lppwNormalizedWordStr)[dwNormalizedWordLen] = wChar - (0xff10 - 0x0030);
  289. } else {
  290. (*lppwNormalizedWordStr[dwNormalizedWordLen]) = wChar;
  291. }
  292. }
  293. }
  294. _exit:
  295. return dwNormalizedWordLen;
  296. }
  297. BOOL MyPutWordOrPhrase(
  298. CCHTWordBreaker* m_pcWordBreaker,
  299. TEXT_SOURCE *pTextSource,
  300. IWordSink *pWordSink,
  301. IPhraseSink *pPhraseSink,
  302. DWORD dwStartPosInTextStore,
  303. PDWORD pdwPrivBufToTextSourceMapping,
  304. LPWSTR lptszStencece, // Pure sentence, no enter in sentence
  305. DWORD dwSentenceLen,
  306. DWORD dwWordNum,
  307. PUINT puBreakResult,
  308. BOOL fPutWord,
  309. BOOL* fIsAPhrase)
  310. {
  311. DWORD i, dwSentenceIndex = 0, dwWordLen;
  312. BOOL fRet = TRUE;
  313. LPWSTR lpwNormalizedWordStr;
  314. DWORD dwNormalizedWordLen;
  315. for (i = 0; i < dwWordNum; ++i) {
  316. dwWordLen = pdwPrivBufToTextSourceMapping[dwSentenceIndex + puBreakResult[i] - 1] -
  317. pdwPrivBufToTextSourceMapping[dwSentenceIndex] + 1;
  318. if (fPutWord && pWordSink) {
  319. if (dwNormalizedWordLen = GetNormaizeWord(&(lptszStencece[dwSentenceIndex]), puBreakResult[i], &lpwNormalizedWordStr)) {
  320. pWordSink->PutAltWord(dwNormalizedWordLen, lpwNormalizedWordStr, dwWordLen, dwStartPosInTextStore + pdwPrivBufToTextSourceMapping[dwSentenceIndex]);
  321. delete [] lpwNormalizedWordStr;
  322. }
  323. DWORD dwAltWordNumber;
  324. LPWSTR lpwAltWord;
  325. if (dwAltWordNumber = m_pcWordBreaker->GetAltWord(&lptszStencece[dwSentenceIndex], puBreakResult[i], &lpwAltWord)) {
  326. pWordSink->PutAltWord(puBreakResult[i], lpwAltWord, dwWordLen, dwStartPosInTextStore + pdwPrivBufToTextSourceMapping[dwSentenceIndex]);
  327. delete [] lpwAltWord;
  328. }
  329. pWordSink->PutWord(puBreakResult[i], &lptszStencece[dwSentenceIndex],
  330. dwWordLen, dwStartPosInTextStore + pdwPrivBufToTextSourceMapping[dwSentenceIndex]);
  331. } else if (!fPutWord && fIsAPhrase) {
  332. if (fIsAPhrase[i] && pPhraseSink) {
  333. pPhraseSink->PutPhrase(
  334. &(pTextSource->awcBuffer[dwStartPosInTextStore + pdwPrivBufToTextSourceMapping[dwSentenceIndex]]),
  335. dwWordLen);
  336. }
  337. } else {
  338. }
  339. dwSentenceIndex += puBreakResult[i];
  340. }
  341. return fRet;
  342. }
  343. DWORD FullShapeCharProcess(
  344. LPWSTR lptszStencece,
  345. DWORD dwSentenceLen,
  346. DWORD dwWordNum,
  347. PUINT puBreakResult)
  348. {
  349. DWORD dwSentenceIndex = 0;
  350. DWORD dwMergeWordCount = 0;
  351. for (DWORD i = 0; i < dwWordNum; ++i) {
  352. dwMergeWordCount = 0;
  353. while (i + dwMergeWordCount < dwWordNum && puBreakResult[i + dwMergeWordCount] == 1 &&
  354. IsSpecialFullShapeChar(lptszStencece[dwSentenceIndex + dwMergeWordCount])) {
  355. ++dwMergeWordCount;
  356. }
  357. if (dwMergeWordCount > 1) {
  358. puBreakResult[i] += (dwMergeWordCount - 1);
  359. /*
  360. if (pfIsAPhrase) {
  361. CopyMemory(&(pfIsAPhrase[i + 1]), &(pfIsAPhrase[i + dwMergeWordCount]),
  362. sizeof(BOOL) * (dwWordNum - (i + dwMergeWordCount)));
  363. }
  364. */
  365. if (puBreakResult) {
  366. CopyMemory(&(puBreakResult[i + 1]), &(puBreakResult[i + dwMergeWordCount]),
  367. sizeof(UINT) * (dwWordNum - (i + dwMergeWordCount)));
  368. }
  369. dwWordNum -= (dwMergeWordCount - 1);
  370. dwSentenceIndex += dwMergeWordCount;
  371. } else {
  372. dwSentenceIndex += puBreakResult[i];
  373. }
  374. }
  375. return dwWordNum;
  376. }
  377. BOOL IsEnter(
  378. LPCWSTR lpwStr)
  379. {
  380. BOOL fRet;
  381. if (lpwStr[0] == 0x000D && lpwStr[1] == 0x000A) {
  382. fRet = TRUE;
  383. } else {
  384. fRet = FALSE;
  385. }
  386. return fRet;
  387. }
  388. STDMETHODIMP IWordBreaker::BreakText(
  389. TEXT_SOURCE *pTextSource,
  390. IWordSink *pWordSink,
  391. IPhraseSink *pPhraseSink)
  392. {
  393. LPTSTR lptszStencece = NULL;
  394. PDWORD pdwIndex = NULL;
  395. PUINT puResult, puResultAttrib;
  396. DWORD dwBufferSize = 0, dwBufferUsed = 0;
  397. DWORD dwIndex = 0;
  398. DWORD dwWordNum;
  399. HRESULT hr = S_OK;
  400. BOOL fIsPreChineseLanguage, fIsCurChineseLanguage;
  401. BOOL fIsEnter;
  402. DWORD dwEnterCount = 0;
  403. PUINT puOrigionalResult = NULL;
  404. PUINT puNewResult = NULL; // after decompose compose word
  405. BOOL* pfIsAPhrase = NULL;
  406. PUINT puOrigionalResultAttrib = NULL;
  407. DWORD dwNewResultNum = 0;
  408. DWORD dwSentenceIndex, dwOrgWordIndex, dwSubWordNum;
  409. if (!pTextSource) {
  410. hr = E_INVALIDARG;
  411. goto _exit;
  412. }
  413. dwBufferSize = BUFFER_GROW_UINT;
  414. lptszStencece = (LPTSTR)HeapAlloc(GetProcessHeap(), 0, dwBufferSize * sizeof(WCHAR));
  415. pdwIndex = (PDWORD)HeapAlloc(GetProcessHeap(), 0, dwBufferSize * sizeof(DWORD));
  416. if (!lptszStencece || !pdwIndex) {
  417. hr = E_OUTOFMEMORY;
  418. goto _exit;
  419. }
  420. while (TRUE) {
  421. while (pTextSource->iCur < pTextSource->iEnd) {
  422. fIsEnter = FALSE;
  423. fIsCurChineseLanguage = IsChineseChar(pTextSource->awcBuffer[pTextSource->iCur]); // Enter is not a Chinese char
  424. // Process "Enter"
  425. // Rule: One Enter only -> connect
  426. // More Than one Enter-> split
  427. // Enter after a full width char -> split
  428. if (!fIsCurChineseLanguage && dwBufferUsed != 0 && fIsPreChineseLanguage) {
  429. if (pTextSource->iCur < pTextSource->iEnd - 1) {
  430. if (IsEnter(&(pTextSource->awcBuffer[pTextSource->iCur]))) {
  431. if ((pTextSource->iCur + 3 < pTextSource->iEnd) && IsEnter(&(pTextSource->awcBuffer[pTextSource->iCur + 2]))) {
  432. } else if (IsSpecialFullShapeChar(lptszStencece[dwBufferUsed - 1])) {
  433. } else {
  434. fIsCurChineseLanguage = TRUE; // we treat "Enter" as Chinese char
  435. fIsEnter = TRUE;
  436. ++dwEnterCount;
  437. }
  438. }
  439. }
  440. }
  441. if (dwBufferUsed == 0) { // first char
  442. fIsPreChineseLanguage = fIsCurChineseLanguage;
  443. } else if (fIsPreChineseLanguage && fIsCurChineseLanguage) {
  444. } else if (!fIsPreChineseLanguage && !fIsCurChineseLanguage) {
  445. } else { // language change, process it
  446. _Break_Text:
  447. if (fIsPreChineseLanguage) {
  448. /*
  449. PWCHAR pwOutputDebugString;
  450. pwOutputDebugString = new WCHAR[dwBufferUsed + 1];
  451. CopyMemory(pwOutputDebugString, lptszStencece, sizeof(WCHAR) * dwBufferUsed);
  452. pwOutputDebugString[dwBufferUsed] = NULL;
  453. OutputDebugString(pwOutputDebugString);
  454. */
  455. dwWordNum = m_pcWordBreaker->BreakText(lptszStencece, dwBufferUsed);
  456. dwWordNum = m_pcWordBreaker->GetBreakResultWithAttribute(&puResult, &puResultAttrib);
  457. puOrigionalResult = new UINT[dwWordNum];
  458. puNewResult = new UINT[dwBufferUsed]; // alloc max size
  459. pfIsAPhrase = new BOOL[dwWordNum];
  460. puOrigionalResultAttrib = new UINT [dwWordNum];
  461. if (puOrigionalResult && puNewResult && pfIsAPhrase && puOrigionalResultAttrib) {
  462. dwNewResultNum = 0;
  463. CopyMemory(puOrigionalResult, puResult, sizeof(UINT) * dwWordNum);
  464. CopyMemory(puOrigionalResultAttrib, puResultAttrib, sizeof(UINT) * dwWordNum);
  465. for (dwSentenceIndex = 0, dwOrgWordIndex = 0; dwOrgWordIndex < dwWordNum; ++dwOrgWordIndex) {
  466. pfIsAPhrase[dwOrgWordIndex] = FALSE;
  467. if (puOrigionalResult[dwOrgWordIndex] > 2) {
  468. dwSubWordNum = m_pcWordBreaker->BreakText(&(lptszStencece[dwSentenceIndex]),
  469. puOrigionalResult[dwOrgWordIndex], NULL, puOrigionalResult[dwOrgWordIndex] - 1, FALSE);
  470. dwSubWordNum = m_pcWordBreaker->GetBreakResult(&puResult);
  471. if (puOrigionalResultAttrib[dwOrgWordIndex] == ATTR_RULE_WORD) {
  472. pfIsAPhrase[dwOrgWordIndex] = TRUE;
  473. } else {
  474. for (DWORD dwSubWordIndex = 0; dwSubWordIndex < dwSubWordNum; ++dwSubWordIndex) {
  475. if (puResult[dwSubWordIndex] > 1) {
  476. pfIsAPhrase[dwOrgWordIndex] = TRUE;
  477. break;
  478. }
  479. }
  480. }
  481. }
  482. if (pfIsAPhrase[dwOrgWordIndex]) {
  483. CopyMemory(&(puNewResult[dwNewResultNum]), puResult, sizeof(UINT) * dwSubWordNum);
  484. dwNewResultNum += dwSubWordNum;
  485. } else {
  486. puNewResult[dwNewResultNum++] = puOrigionalResult[dwOrgWordIndex];
  487. }
  488. dwSentenceIndex += puOrigionalResult[dwOrgWordIndex];
  489. }
  490. if (m_fIsQueryTime) { // Put Phrase at query time
  491. MyPutWordOrPhrase(m_pcWordBreaker, pTextSource, pWordSink, pPhraseSink, pTextSource->iCur - dwBufferUsed - dwEnterCount * 2,
  492. pdwIndex, lptszStencece, dwBufferUsed, dwWordNum, puOrigionalResult, FALSE, pfIsAPhrase);
  493. }
  494. // special process for full width A-Z, a-z, 0-9
  495. dwNewResultNum = FullShapeCharProcess(lptszStencece, dwBufferUsed, dwNewResultNum, puNewResult);
  496. // Put Word
  497. MyPutWordOrPhrase(m_pcWordBreaker, pTextSource, pWordSink, pPhraseSink, pTextSource->iCur - dwBufferUsed - dwEnterCount * 2,
  498. pdwIndex, lptszStencece, dwBufferUsed, dwNewResultNum, puNewResult, TRUE, NULL);
  499. } else { // can not do special processing
  500. MyPutWordOrPhrase(m_pcWordBreaker, pTextSource, pWordSink, pPhraseSink, pTextSource->iCur - dwBufferUsed - dwEnterCount * 2,
  501. pdwIndex, lptszStencece, dwBufferUsed, dwWordNum, puResult, TRUE, NULL);
  502. }
  503. if (puOrigionalResult) { delete [] puOrigionalResult; }
  504. if (puNewResult) { delete [] puNewResult; }
  505. if (pfIsAPhrase) { delete [] pfIsAPhrase; }
  506. if (puOrigionalResultAttrib) { delete [] puOrigionalResultAttrib; }
  507. puResult = NULL;
  508. dwEnterCount = 0;
  509. } else { // not TC language sentence
  510. /*
  511. m_pNonChineseTextSource->iCur = 0;
  512. m_pNonChineseTextSource->iEnd = dwBufferUsed;
  513. m_pNonChineseTextSource->awcBuffer = &(pTextSource->awcBuffer[pTextSource->iCur - dwBufferUsed]);//lptszStencece;
  514. */
  515. m_pNonChineseTextSource->iCur = pTextSource->iCur - dwBufferUsed;
  516. m_pNonChineseTextSource->iEnd = pTextSource->iCur;
  517. m_pNonChineseTextSource->awcBuffer = pTextSource->awcBuffer;
  518. // if (m_pNonChineseWordBreaker) {
  519. // m_pNonChineseWordBreaker->BreakText(m_pNonChineseTextSource, pWordSink, pPhraseSink);
  520. // } else
  521. if (m_pcDefWordBreaker) {
  522. //m_pcDefWordBreaker->BreakText(m_pNonChineseTextSource, pWordSink, pPhraseSink, pTextSource->iCur - dwBufferUsed);
  523. m_pcDefWordBreaker->BreakText(m_pNonChineseTextSource, pWordSink, pPhraseSink, 0);
  524. } /*else if (m_pNonChineseWordBreaker) {
  525. //m_pNonChineseWordBreaker->BreakText(m_pNonChineseTextSource, pWordSink, pPhraseSink);
  526. } */ else {
  527. }
  528. }
  529. fIsPreChineseLanguage = fIsCurChineseLanguage;
  530. dwBufferUsed = 0;
  531. dwIndex = 0;
  532. dwEnterCount = 0;
  533. }
  534. if (dwBufferUsed >= dwBufferSize) { // buffer full
  535. LPVOID lpMem1, lpMem2;
  536. lpMem1 = HeapReAlloc(GetProcessHeap(), 0, lptszStencece,
  537. (dwBufferSize + BUFFER_GROW_UINT) * sizeof(WCHAR));
  538. lpMem2 = HeapReAlloc(GetProcessHeap(), 0, pdwIndex,
  539. (dwBufferSize + BUFFER_GROW_UINT) * sizeof(DWORD));
  540. if (!lpMem1 || !lpMem2) { goto _Break_Text;
  541. } else {
  542. lptszStencece = (LPTSTR)lpMem1;
  543. pdwIndex = (PDWORD)lpMem2;
  544. dwBufferSize += BUFFER_GROW_UINT;
  545. }
  546. }
  547. if (pTextSource->iCur < pTextSource->iEnd) {
  548. if (fIsEnter) {
  549. pTextSource->iCur += 2;
  550. dwIndex += 2;
  551. } else {
  552. lptszStencece[dwBufferUsed] = pTextSource->awcBuffer[pTextSource->iCur++];
  553. pdwIndex[dwBufferUsed] = dwIndex;
  554. ++dwBufferUsed;
  555. ++dwIndex;
  556. }
  557. }
  558. }
  559. if (dwBufferUsed) { goto _Break_Text; }
  560. if (FAILED(pTextSource->pfnFillTextBuffer(pTextSource))) {
  561. break;
  562. }
  563. }
  564. _exit:
  565. if (lptszStencece) {
  566. HeapFree(GetProcessHeap(), 0, lptszStencece);
  567. }
  568. if (pdwIndex) {
  569. HeapFree(GetProcessHeap(), 0, pdwIndex);
  570. }
  571. return hr;
  572. }
  573. STDMETHODIMP IWordBreaker::ComposePhrase(
  574. const WCHAR *pwcNoun,
  575. ULONG cwcNoun,
  576. const WCHAR *pwcModifier,
  577. ULONG cwcModifier,
  578. ULONG ulAttachmentType,
  579. WCHAR *pwcPhrase,
  580. ULONG *pcwcPhrase)
  581. {
  582. return E_NOTIMPL;
  583. }
  584. STDMETHODIMP IWordBreaker::GetLicenseToUse(
  585. const WCHAR **ppwcsLicense)
  586. {
  587. *ppwcsLicense = g_wszLicense;
  588. return S_OK;
  589. }
  590. /*
  591. while (TRUE) {
  592. while (pTextSource->iCur != pTextSource->iEnd) {
  593. lptszStencece[dwBufferUsed] = pTextSource->awcBuffer[pTextSource->iCur++];
  594. if (lptszStencece[dwBufferUsed] >= 0x4E00 && lptszStencece[dwBufferUsed] <= 0x9FA5) {
  595. dwBufferUsed++;
  596. if (dwBufferUsed >= dwBufferSize) {
  597. LPVOID lpMem;
  598. lpMem = HeapReAlloc(GetProcessHeap(), 0, lptszStencece, dwBufferSize + BUFFER_GROW_UINT);
  599. if (!lpMem) {
  600. goto _heap_realloc_fail;
  601. } else {
  602. lptszStencece = (LPTSTR)lpMem ;
  603. dwBufferSize += BUFFER_GROW_UINT;
  604. }
  605. }
  606. } else {
  607. if (dwBufferUsed == 0) {
  608. ++dwBufferUsed;
  609. } else {
  610. --pTextSource->iCur;
  611. }
  612. _heap_realloc_fail:
  613. dwWordNum = pcWordBreaker->BreakText(lptszStencece, dwBufferUsed);
  614. dwWordNum = pcWordBreaker->GetBreakResult(&puResult);
  615. // To do .....
  616. DWORD dwSrcPos;
  617. dwSrcPos = pTextSource->iCur - dwBufferUsed;
  618. for (i = 0; i < dwWordNum; ++i) {
  619. pWordSink->PutWord(puResult[i], &pTextSource->awcBuffer[dwSrcPos], puResult[i], dwSrcPos);
  620. dwSrcPos += puResult[i];
  621. }
  622. puResult = NULL;
  623. dwBufferUsed = 0;
  624. }
  625. }
  626. if (FAILED(pTextSource->pfnFillTextBuffer(pTextSource))) {
  627. break;
  628. }
  629. }
  630. */