Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1537 lines
43 KiB

  1. // =========================================================================
  2. // Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
  3. //
  4. // File Name : BASEAPI.CPP
  5. // Function : NLP BASE ENGINE API Definition
  6. // =========================================================================
  7. #include <string.h>
  8. #include <malloc.h>
  9. #include <sys\stat.h>
  10. #include "basecore.hpp"
  11. #include "basecode.hpp"
  12. #include "basedef.hpp"
  13. #include "basegbl.hpp"
  14. #include "MainDict.h"
  15. extern int Compose_RIEUL_Irregular (char *, char *);
  16. extern int Compose_HIEUH_Irregular (char *, char *);
  17. extern int Compose_PIEUP_Irregular (char *, char *);
  18. extern int Compose_TIEUT_Irregular (char *, char *);
  19. extern int Compose_SIOS_Irregular (char *, char *);
  20. extern BOOL Compose_YEO_Irregular (char *, char *);
  21. extern BOOL Compose_REO_REU_Irregular (char *, char *);
  22. extern BOOL Compose_GEORA_Irregular (char *, char *);
  23. extern BOOL Compose_Regular (char *, char *);
  24. extern void SetSilHeosa (int, WORD *);
  25. #include "stemkor.h"
  26. // by dhyu -- 1996. 1
  27. typedef struct
  28. {
  29. LPCSTR contract;
  30. LPCSTR noconstract;
  31. } contract_tossi;
  32. contract_tossi ContractTossi [] =
  33. {
  34. { "\xa4\xa4", "\xB4\xC2"},
  35. { "\xA4\xA9", "\xB8\xA6"},
  36. { "\xA4\xA4\xC4\xBF\xB3\xE7", "\xB4\xC2\xC4\xBF\xB3\xE7"},
  37. { NULL, NULL}
  38. };
  39. /*
  40. char ChangableFirstStem [][2] =
  41. {
  42. {__K_D_D, __V_m}, // ssangtikeut, eu
  43. {
  44. }
  45. */
  46. inline
  47. BOOL isHANGEUL(char cCh1,char cCh2)
  48. {
  49. unsigned char ch1,ch2 ;
  50. ch1=(unsigned char)cCh1;
  51. ch2 =(unsigned char)cCh2;
  52. if ( ((ch1 >= 0xb0) && (ch1 <= 0xc8)) && (ch2>=0xa1) )
  53. return TRUE;
  54. else if ( ((ch1 >= 0x81) && (ch1 <= 0xc5)) && ( ((ch2 >= 0x41) && (ch2 <= 0x5a)) || ((ch2 >= 0x61) && (ch2 <= 0x7a)) || ((ch2 >= 0x81) && (ch2 <= 0xa0)) ) )
  55. return TRUE;
  56. else if ( ((ch1 >= 0x81) && (ch1 <= 0xa0)) && (ch2 >= 0xa1) )
  57. return TRUE;
  58. //else if ( ((ch1 >= 0xca) && (ch1 <= 0xfe)) && (ch2 >= 0xa1) )
  59. // return TRUE;
  60. else if ((ch1 == 0xa4) && (ch2 >= 0xa1))
  61. return TRUE;
  62. return FALSE;
  63. }
  64. WINSRC StemmerInit(HSTM *hStm) // Stemmer Engine session Handle
  65. {
  66. STMI *pstmi;
  67. HGLOBAL hgbl;
  68. hgbl = GlobalAlloc(GHND, sizeof(STMI));
  69. if (hgbl == NULL) return FAIL;
  70. else
  71. *hStm = (HSTM) hgbl;
  72. pstmi = (STMI*)GlobalLock(hgbl);
  73. if (pstmi == NULL) return FAIL;
  74. pstmi->Option = 0x00000000;
  75. GlobalUnlock(hgbl);
  76. return NULL; // normal operation
  77. }
  78. WINSRC StemmerSetOption (HSTM hStm, UINT Option)
  79. {
  80. STMI *pstmi;
  81. HGLOBAL hgbl = (HGLOBAL) hStm;
  82. pstmi = (STMI *)GlobalLock(hgbl);
  83. if (pstmi == NULL)
  84. {
  85. MessageBox (NULL, "StemmerSetOption", "Fail", MB_OK);
  86. GlobalUnlock(hgbl);
  87. return srcModuleError | srcInvalidID;
  88. }
  89. pstmi->Option = Option;
  90. GlobalUnlock (hgbl);
  91. return NULL;
  92. }
  93. WINSRC StemmerGetOption (HSTM hStm, UINT *Option)
  94. {
  95. STMI *pstmi;
  96. HGLOBAL hgbl = (HGLOBAL) hStm;
  97. pstmi = (STMI *)GlobalLock(hgbl);
  98. if (pstmi == NULL)
  99. {
  100. GlobalUnlock(hgbl);
  101. return srcModuleError | srcInvalidID;
  102. }
  103. *Option = pstmi->Option;
  104. GlobalUnlock (hgbl);
  105. return NULL;
  106. }
  107. WINSRC StemmerOpenMdr(HSTM sid, char *lpspathMain) // Dictionary File path
  108. {
  109. STMI *pstmi;
  110. HGLOBAL hgbl;
  111. hgbl = (HGLOBAL) sid;
  112. pstmi = (STMI *)GlobalLock(hgbl);
  113. if (pstmi == NULL)
  114. {
  115. GlobalUnlock(hgbl);
  116. return srcModuleError | srcInvalidID;
  117. }
  118. if (lstrlen(lpspathMain) == 0)
  119. {
  120. GlobalUnlock(hgbl);
  121. return srcIOErrorMdr | srcInvalidMdr;
  122. }
  123. if (!OpenMainDict (lpspathMain))
  124. {
  125. GlobalUnlock(hgbl);
  126. return srcIOErrorMdr | srcInvalidMdr;
  127. }
  128. GlobalUnlock(hgbl);
  129. return NULL; // normal operation
  130. }
  131. WINSRC StemmerCloseMdr(HSTM sid)
  132. {
  133. STMI *pstmi;
  134. HGLOBAL hgbl;
  135. hgbl = (HGLOBAL) sid;
  136. pstmi = (STMI *)GlobalLock(hgbl);
  137. if (pstmi == NULL) return FAIL;
  138. if (pstmi->bMdr)
  139. CloseMainDict ();
  140. GlobalUnlock(hgbl);
  141. return NULL; // normal operation
  142. }
  143. WINSRC StemmerDecomposeW (HSTM hStm,
  144. LPCWSTR iword,
  145. LPWDOB lpSob)
  146. {
  147. LPSTR MultiByteIword;
  148. DOB sob;
  149. int index = 0;
  150. int len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, (LPCWSTR) iword, -1, NULL, 0, NULL, NULL);
  151. MultiByteIword = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  152. // add a check for this point
  153. if ( MultiByteIword == NULL ) {
  154. return srcModuleError;
  155. }
  156. len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, (LPCWSTR) iword, -1, MultiByteIword, len, NULL, NULL);
  157. sob.wordlist = (LPSTR) LocalAlloc (LPTR, sizeof (char) * lpSob->sch);
  158. // add a check for this point
  159. if ( sob.wordlist == NULL ) {
  160. LocalFree(MultiByteIword);
  161. return srcModuleError;
  162. }
  163. sob.sch = lpSob->sch;
  164. SRC src = StemmerDecompose(hStm, MultiByteIword, &sob);
  165. lpSob->num = sob.num;
  166. if (src == NULL)
  167. {
  168. char *tmpstr;
  169. for (int j = 0, index2 = 0; j < sob.num; j++)
  170. {
  171. tmpstr = sob.wordlist+index2;
  172. len = MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, tmpstr, -1, NULL, 0);
  173. LPWSTR tmpwstr = (LPWSTR) LocalAlloc (LPTR, sizeof (WCHAR) * len);
  174. // add a check for this point
  175. if ( tmpwstr == NULL ) {
  176. LocalFree (MultiByteIword);
  177. LocalFree (sob.wordlist);
  178. return srcModuleError;
  179. }
  180. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, tmpstr, -1, (LPWSTR) tmpwstr, len);
  181. memcpy (lpSob->wordlist+index, tmpwstr, len*sizeof(WCHAR));
  182. memcpy (lpSob->wordlist+index+len, tmpstr+lstrlen (tmpstr)+1, 2);
  183. memcpy (lpSob->wordlist+index+len + 1, tmpwstr+len-1, sizeof(WCHAR));
  184. index += (len+2);
  185. index2 += (lstrlen(tmpstr)+4);
  186. LocalFree (tmpwstr);
  187. }
  188. }
  189. lpSob->len = (WORD)index;
  190. LocalFree (MultiByteIword);
  191. LocalFree (sob.wordlist);
  192. return src;
  193. }
  194. SRC GetOneResult (RLIST *rList, LPDOB lpSob)
  195. {
  196. WORD value;
  197. int count;
  198. if (rList->num >= rList->max)
  199. return srcNoMoreResult;
  200. lpSob->len = 0;
  201. lpSob->num = 0;
  202. for (UINT i = rList->num, index = 0; i < rList->max; i++)
  203. {
  204. count = 0;
  205. while (rList->next [index+count] != '+' && rList->next [index+count] != '\t')
  206. count++;
  207. if (lpSob->len + count < lpSob->sch)
  208. {
  209. memcpy (lpSob->wordlist+lpSob->len, rList->next+index, count);
  210. lpSob->num++;
  211. }
  212. else
  213. return srcOOM | srcExcessBuffer;
  214. lpSob->len += (WORD)count;
  215. lpSob->wordlist [lpSob->len++] = '\0';
  216. SetSilHeosa(rList->vbuf [i], &value);
  217. memcpy (lpSob->wordlist + lpSob->len, &value, 2);
  218. lpSob->wordlist [lpSob->len+2] = '\0';
  219. lpSob->len += 3;
  220. if (rList->next[index+count] == '\t')
  221. break;
  222. index += (count + 1);
  223. }
  224. rList->next += (index+count+1);
  225. rList->num = i+1;
  226. return NULL;
  227. }
  228. WINSRC StemmerDecompose(HSTM hstm,
  229. LPCSTR iword, // input word
  230. LPDOB psob) // the number of candidates
  231. {
  232. int len = lstrlen ((char *) iword);
  233. if (len >= 45)
  234. {
  235. psob->num = 1;
  236. lstrcpy ((LPSTR) psob->wordlist, (LPSTR) iword);
  237. psob->len = (WORD)len;
  238. return srcInvalid;
  239. }
  240. for (int i = 0; i < len; i += 2)
  241. if (!isHANGEUL (iword [i], iword [i+1]))
  242. {
  243. psob->num = 1;
  244. lstrcpy ((LPSTR) psob->wordlist, (LPSTR) iword);
  245. psob->len = (WORD)len;
  246. return srcInvalid;
  247. }
  248. STMI *pstmi;
  249. HGLOBAL hgbl = (HGLOBAL) hstm;
  250. pstmi = (STMI *)GlobalLock(hgbl);
  251. if (pstmi == NULL)
  252. {
  253. GlobalUnlock(hgbl);
  254. return srcModuleError | srcInvalidID;
  255. }
  256. BaseEngine BaseCheck;
  257. char lrgsz [400];
  258. memset (pstmi->rList.lrgsz, NULLCHAR, 400);
  259. lstrcpy (pstmi->rList.iword, iword);
  260. pstmi->rList.max = 0;
  261. BOOL affixFlag = TRUE;
  262. if (pstmi->Option & SO_ALONE)
  263. {
  264. int num = BaseCheck.NLP_BASE_ALONE (iword, lrgsz);
  265. if (num > 0)
  266. {
  267. affixFlag = FALSE;
  268. lstrcat (pstmi->rList.lrgsz, lrgsz);
  269. for (int i = 0; i < num; i++)
  270. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  271. pstmi->rList.max += num;
  272. }
  273. }
  274. if (pstmi->Option & SO_NOUNPHRASE)
  275. {
  276. int num = BaseCheck.NLP_BASE_NOUN (iword, lrgsz);
  277. if (num > 0)
  278. {
  279. affixFlag = FALSE;
  280. lstrcat (pstmi->rList.lrgsz, lrgsz);
  281. for (int i = 0; i < num; i++)
  282. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  283. pstmi->rList.max += num;
  284. }
  285. }
  286. if (pstmi->Option & SO_PREDICATE)
  287. {
  288. int num = BaseCheck.NLP_BASE_VERB (iword, lrgsz);
  289. if (num > 0)
  290. {
  291. lstrcat (pstmi->rList.lrgsz, lrgsz);
  292. for (int i = 0; i < num; i++)
  293. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  294. pstmi->rList.max += num;
  295. }
  296. }
  297. if (pstmi->Option & SO_COMPOUND)
  298. {
  299. if (pstmi->rList.max == 0)
  300. {
  301. int num = BaseCheck.NLP_BASE_COMPOUND (iword, lrgsz);
  302. if (num > 0)
  303. {
  304. lstrcpy (pstmi->rList.lrgsz, lrgsz);
  305. for (int i = 0; i < num; i++)
  306. pstmi->rList.vbuf [i] = BaseCheck.vbuf [i];
  307. pstmi->rList.max = num;
  308. }
  309. }
  310. }
  311. if (affixFlag && pstmi->Option & SO_SUFFIX)
  312. {
  313. int num = BaseCheck.NLP_BASE_AFFIX (iword, lrgsz);
  314. if (num > 0)
  315. {
  316. lstrcat (pstmi->rList.lrgsz, lrgsz);
  317. for (int i = 0; i < num; i++)
  318. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  319. pstmi->rList.max += num;
  320. }
  321. }
  322. pstmi->rList.num = 0;
  323. pstmi->rList.next = pstmi->rList.lrgsz;
  324. SRC src = GetOneResult (&(pstmi->rList), psob);
  325. if (src == srcNoMoreResult)
  326. {
  327. src = srcInvalid;
  328. lstrcpy (psob->wordlist, iword);
  329. }
  330. GlobalUnlock(hgbl);
  331. return src;
  332. }
  333. WINSRC StemmerDecomposeMoreW (HSTM hStm, LPCWSTR lpWord, LPWDOB lpSob)
  334. {
  335. LPSTR MultiByteIword;
  336. DOB sob;
  337. int len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpWord, -1, NULL, 0, NULL, NULL);
  338. MultiByteIword = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  339. // add a check for this point
  340. if ( MultiByteIword == NULL ) {
  341. return srcModuleError;
  342. }
  343. len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpWord, -1, MultiByteIword, len, NULL, NULL);
  344. sob.wordlist = (LPSTR) LocalAlloc (LPTR, sizeof (char) * lpSob->sch);
  345. // add a check for this point
  346. if ( sob.wordlist == NULL ) {
  347. LocalFree(MultiByteIword);
  348. return srcModuleError;
  349. }
  350. sob.sch = lpSob->sch;
  351. SRC src = StemmerDecomposeMore(hStm, MultiByteIword, &sob);
  352. lpSob->num = sob.num;
  353. int index = 0;
  354. if (src == NULL)
  355. {
  356. char *tmpstr;
  357. for (int j = 0, index2 = 0; j < sob.num; j++)
  358. {
  359. tmpstr = sob.wordlist+index2;
  360. len = MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, tmpstr, -1, NULL, 0);
  361. LPWSTR tmpwstr = (LPWSTR) LocalAlloc (LPTR, sizeof (WCHAR) * len);
  362. // add a check for this point
  363. if ( tmpwstr == NULL ) {
  364. LocalFree(MultiByteIword);
  365. LocalFree(sob.wordlist);
  366. return srcModuleError;
  367. }
  368. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, tmpstr, -1, (LPWSTR) tmpwstr, len);
  369. memcpy (lpSob->wordlist+index, tmpwstr, len*sizeof(WCHAR));
  370. memcpy (lpSob->wordlist+index+len, tmpstr+lstrlen (tmpstr)+1, 2);
  371. memcpy (lpSob->wordlist+index+len + 1, tmpwstr+len-1, sizeof(WCHAR));
  372. index += (len+2);
  373. index2 += (lstrlen(tmpstr)+4);
  374. LocalFree (tmpwstr);
  375. }
  376. }
  377. lpSob->len = (WORD)index;
  378. LocalFree (MultiByteIword);
  379. LocalFree (sob.wordlist);
  380. return src;
  381. }
  382. WINSRC StemmerDecomposeMore (HSTM hStm, LPCSTR lpWord, LPDOB lpSob)
  383. {
  384. STMI *pstmi;
  385. HGLOBAL hgbl = (HGLOBAL) hStm;
  386. pstmi = (STMI *)GlobalLock(hgbl);
  387. if (pstmi == NULL)
  388. {
  389. GlobalUnlock(hgbl);
  390. return srcModuleError | srcInvalidID;
  391. }
  392. if (lstrcmp (pstmi->rList.iword, lpWord))
  393. {
  394. return srcModuleError;
  395. }
  396. SRC src = GetOneResult (&(pstmi->rList), lpSob);
  397. GlobalUnlock(hgbl);
  398. return src;
  399. }
  400. WINSRC StemmerEnumDecomposeW (HSTM hStm, LPCWSTR lpWord, LPWDOB lpSob, LPFNDECOMPOSEW lpfnCallBack)
  401. {
  402. LPSTR MultiByteIword;
  403. DOB sob;
  404. int len = lstrlen ((char *) lpWord);
  405. if (len >= 45)
  406. {
  407. lpSob->num = 1;
  408. wcscpy (lpSob->wordlist, lpWord);
  409. lpSob->len = (WORD)len;
  410. return srcInvalid;
  411. }
  412. for (int i = 0; i < len; i++)
  413. if (0xabff < lpWord [i] && lpWord [i] < 0xd7a4)
  414. {
  415. lpSob->num = 1;
  416. lstrcpy ((LPSTR) lpSob->wordlist, (LPSTR) lpWord);
  417. lpSob->len = (WORD)len;
  418. return srcInvalid;
  419. }
  420. STMI *pstmi;
  421. HGLOBAL hgbl = (HGLOBAL) hStm;
  422. pstmi = (STMI *)GlobalLock(hgbl);
  423. if (pstmi == NULL)
  424. {
  425. GlobalUnlock(hgbl);
  426. return srcModuleError | srcInvalidID;
  427. }
  428. BaseEngine BaseCheck;
  429. len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpWord, -1, NULL, 0, NULL, NULL);
  430. MultiByteIword = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  431. // add a check for this point
  432. if ( MultiByteIword == NULL ) {
  433. GlobalUnlock(hgbl);
  434. return srcModuleError;
  435. }
  436. len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpWord, -1, MultiByteIword, len, NULL, NULL);
  437. sob.wordlist = (LPSTR) LocalAlloc (LPTR, sizeof (char) * lpSob->sch);
  438. // add a check for this point
  439. if ( sob.wordlist == NULL ) {
  440. GlobalUnlock(hgbl);
  441. LocalFree(MultiByteIword);
  442. return srcModuleError;
  443. }
  444. sob.sch = lpSob->sch;
  445. char lrgsz [400];
  446. memset (pstmi->rList.lrgsz, NULLCHAR, 400);
  447. lstrcpy (pstmi->rList.iword, MultiByteIword);
  448. pstmi->rList.max = 0;
  449. int num = BaseCheck.NLP_BASE_NOUN (MultiByteIword, lrgsz);
  450. if (num > 0)
  451. {
  452. lstrcpy (pstmi->rList.lrgsz, lrgsz);
  453. for (int i = 0; i < num; i++)
  454. pstmi->rList.vbuf [i] = BaseCheck.vbuf [i];
  455. pstmi->rList.max = num;
  456. }
  457. num = BaseCheck.NLP_BASE_ALONE (MultiByteIword, lrgsz);
  458. if (num > 0)
  459. {
  460. lstrcat (pstmi->rList.lrgsz, lrgsz);
  461. for (int i = 0; i < num; i++)
  462. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  463. pstmi->rList.max += num;
  464. }
  465. num = BaseCheck.NLP_BASE_VERB (MultiByteIword, lrgsz);
  466. if (num > 0)
  467. {
  468. lstrcat (pstmi->rList.lrgsz, lrgsz);
  469. for (int i = 0; i < num; i++)
  470. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  471. pstmi->rList.max += num;
  472. }
  473. if (num == 0)
  474. {
  475. num = BaseCheck.NLP_BASE_COMPOUND (MultiByteIword, lrgsz);
  476. if (num > 0)
  477. {
  478. lstrcpy (pstmi->rList.lrgsz, lrgsz);
  479. for (int i = 0; i < num; i++)
  480. pstmi->rList.vbuf [i] = BaseCheck.vbuf [i];
  481. pstmi->rList.max = num;
  482. }
  483. }
  484. pstmi->rList.num = 0;
  485. pstmi->rList.next = pstmi->rList.lrgsz;
  486. while (GetOneResult (&(pstmi->rList), &sob) == NULL)
  487. {
  488. char *tmpstr;
  489. for (int j = 0, index2 = 0, index = 0; j < sob.num; j++)
  490. {
  491. tmpstr = sob.wordlist+index2;
  492. len = MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, tmpstr, -1, NULL, 0);
  493. LPWSTR tmpwstr = (LPWSTR) LocalAlloc (LPTR, sizeof (WCHAR) * len);
  494. // add a check for this point
  495. if ( tmpwstr == NULL ) {
  496. GlobalUnlock(hgbl);
  497. LocalFree (MultiByteIword);
  498. LocalFree (sob.wordlist);
  499. return srcModuleError;
  500. }
  501. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, tmpstr, -1, (LPWSTR) tmpwstr, len);
  502. memcpy (lpSob->wordlist+index, tmpwstr, len*sizeof(WCHAR));
  503. memcpy (lpSob->wordlist+index+len, tmpstr+lstrlen (tmpstr)+1, 2);
  504. memcpy (lpSob->wordlist+index+len + 1, tmpwstr+len-1, sizeof(WCHAR));
  505. index += (len+2);
  506. index2 += (lstrlen(tmpstr)+4);
  507. LocalFree (tmpwstr);
  508. }
  509. lpSob->len = (WORD)index;
  510. lpSob->num = sob.num;
  511. lpfnCallBack (lpSob);
  512. }
  513. GlobalUnlock(hgbl);
  514. LocalFree (MultiByteIword);
  515. LocalFree (sob.wordlist);
  516. return NULL;
  517. }
  518. WINSRC StemmerEnumDecompose (HSTM hStm, LPCSTR lpWord, LPDOB lpSob, LPFNDECOMPOSE lpfnCallBack)
  519. {
  520. int len = lstrlen ((char *) lpWord);
  521. if (len >= 45)
  522. {
  523. lpSob->num = 1;
  524. lstrcpy ((LPSTR) lpSob->wordlist, lpWord);
  525. lpSob->len = (WORD)len;
  526. return srcInvalid;
  527. }
  528. for (int i = 0; i < len; i += 2)
  529. if (!isHANGEUL (lpWord [i], lpWord [i+1]))
  530. {
  531. lpSob->num = 1;
  532. lstrcpy ((LPSTR) lpSob->wordlist, (LPSTR) lpWord);
  533. lpSob->len = (WORD)len;
  534. return srcInvalid;
  535. }
  536. STMI *pstmi;
  537. HGLOBAL hgbl = (HGLOBAL) hStm;
  538. pstmi = (STMI *)GlobalLock(hgbl);
  539. if (pstmi == NULL)
  540. {
  541. GlobalUnlock(hgbl);
  542. return srcModuleError | srcInvalidID;
  543. }
  544. BaseEngine BaseCheck;
  545. char lrgsz [400];
  546. memset (pstmi->rList.lrgsz, NULLCHAR, 400);
  547. lstrcpy (pstmi->rList.iword, lpWord);
  548. int num = BaseCheck.NLP_BASE_NOUN (lpWord, lrgsz);
  549. pstmi->rList.max = 0;
  550. if (num > 0)
  551. {
  552. lstrcpy (pstmi->rList.lrgsz, lrgsz);
  553. for (int i = 0; i < num; i++)
  554. pstmi->rList.vbuf [i] = BaseCheck.vbuf [i];
  555. pstmi->rList.max = num;
  556. }
  557. num = BaseCheck.NLP_BASE_ALONE (lpWord, lrgsz);
  558. if (num > 0)
  559. {
  560. lstrcat (pstmi->rList.lrgsz, lrgsz);
  561. for (int i = 0; i < num; i++)
  562. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  563. pstmi->rList.max += num;
  564. }
  565. num = BaseCheck.NLP_BASE_VERB (lpWord, lrgsz);
  566. if (num > 0)
  567. {
  568. lstrcat (pstmi->rList.lrgsz, lrgsz);
  569. for (int i = 0; i < num; i++)
  570. pstmi->rList.vbuf [pstmi->rList.max + i] = BaseCheck.vbuf [i];
  571. pstmi->rList.max += num;
  572. }
  573. if (num == 0)
  574. {
  575. num = BaseCheck.NLP_BASE_COMPOUND (lpWord, lrgsz);
  576. if (num > 0)
  577. {
  578. lstrcpy (pstmi->rList.lrgsz, lrgsz);
  579. for (int i = 0; i < num; i++)
  580. pstmi->rList.vbuf [i] = BaseCheck.vbuf [i];
  581. pstmi->rList.max = num;
  582. }
  583. }
  584. pstmi->rList.num = 0;
  585. pstmi->rList.next = pstmi->rList.lrgsz;
  586. while (GetOneResult (&(pstmi->rList), lpSob) == NULL)
  587. lpfnCallBack (lpSob);
  588. GlobalUnlock(hgbl);
  589. return NULL;
  590. }
  591. WINSRC StemmerComposeW (HSTM hstm, WCIB sib, LPWSTR rword)
  592. {
  593. CIB tmpsib;
  594. LPSTR MultiByteRword;
  595. int len = (wcslen (sib.silsa) + 1) * 2;
  596. tmpsib.silsa = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  597. // add a check for this point.
  598. if ( tmpsib.silsa == NULL ) {
  599. return srcModuleError;
  600. }
  601. len = WideCharToMultiByte (CP_ACP, 0, (LPCWSTR) sib.silsa, -1, tmpsib.silsa, len, NULL, NULL);
  602. int len2 = (wcslen (sib.heosa) + 1) * 2;
  603. tmpsib.heosa = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len2);
  604. // add a check for this point.
  605. if ( tmpsib.heosa == NULL ) {
  606. LocalFree(tmpsib.silsa);
  607. return srcModuleError;
  608. }
  609. len2 = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, (LPCWSTR) sib.heosa, -1, tmpsib.heosa, len2, NULL, NULL);
  610. MultiByteRword = (LPSTR) LocalAlloc (LPTR, sizeof (char) * (len + len2));
  611. // add a check for this point.
  612. if ( MultiByteRword == NULL ) {
  613. LocalFree(tmpsib.silsa);
  614. LocalFree(tmpsib.heosa);
  615. return srcModuleError;
  616. }
  617. tmpsib.pos = sib.pos;
  618. SRC src = StemmerCompose (hstm, tmpsib, MultiByteRword);
  619. len = MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, MultiByteRword, -1, NULL, 0);
  620. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, MultiByteRword, -1, (LPWSTR) rword, len);
  621. LocalFree (tmpsib.silsa);
  622. LocalFree (tmpsib.heosa);
  623. LocalFree (MultiByteRword);
  624. return src;
  625. }
  626. int CVCheckNP(char *stem, char *ending, BYTE action)
  627. // Check vowel harmony for NOUN + Tossi. If the last letter of stem is RIEUR, that should seriously be considered.
  628. {
  629. int len = strlen (ending) + 1;
  630. if ((action & 0x80) && (action & 0x40)) // CV = 11
  631. return VALID;
  632. if (!(action & 0x80) && (action & 0x40)) { // CV = 01
  633. if (stem[0] >= __V_k)
  634. return VALID;
  635. if (stem[0] == __K_R && ending[0] == __K_R && ending[1] == __V_h)
  636. // Tossi is "RO"(CV=01) and the last letter of stem is RIEUR.
  637. return VALID;
  638. if (ending[0] == __K_S && ending[1] == __V_j) {
  639. // "SEO" --> "E SEO"
  640. memmove (ending+2, ending, len);
  641. ending [0] = __K_I;
  642. ending [1] = __V_p;
  643. return MORECHECK;
  644. }
  645. if (ending[0] == __K_N && ending[1] == __V_m && ending[2] == __K_N) {
  646. // "NEUN" --> "EUN"
  647. ending [0] = __K_I;
  648. }
  649. if (ending[0] == __K_G && ending[1] == __V_k) {
  650. // "GA" --> "I"
  651. ending[0] = __K_I;
  652. ending[1] = __V_l;
  653. return MORECHECK;
  654. }
  655. if (ending[0] == __K_I && ending[1] == __V_hk) {
  656. // "WA" --> "GWA"
  657. ending [0] = __K_G;
  658. return MORECHECK;
  659. }
  660. if (ending [0] == __K_R) {
  661. if (ending[1] == __V_m && ending[2] == __K_R) {
  662. // "REUL" --> "EUL"
  663. ending [0] = __K_I;
  664. return INVALID;
  665. }
  666. if (ending[1] == __V_h) {
  667. // "RO" --> "EU RO"
  668. memmove (ending+2, ending, len);
  669. ending [0] = __K_I;
  670. ending [1] = __V_m;
  671. return MORECHECK;
  672. }
  673. // add "I" to the first part of ending
  674. memmove (ending+2, ending, len);
  675. ending [0] = __K_I;
  676. ending [1] = __V_l;
  677. return MORECHECK;
  678. }
  679. if ((ending [0] == __K_N) ||
  680. (ending [0] == __K_S && ending [1] == __V_l) || // "SI"
  681. (ending [0] == __K_I && ending [1] == __V_u) || // "YEO"
  682. (ending[0] == __K_I && ending[1] == __V_i && ending[2] == __K_M // "YA MAL RO" --> "I YA MAL RO"
  683. && ending[3] == __V_k && ending[4] == __K_R && ending[5] == __K_R && ending[6] == __V_h))
  684. {
  685. // Add "I" to the first part of ending
  686. memmove (ending+2, ending, len);
  687. ending [0] = __K_I;
  688. ending [1] = __V_l;
  689. return MORECHECK;
  690. }
  691. return MORECHECK;
  692. }
  693. // CV==10
  694. if (stem[0] >= __V_k) {
  695. if (ending [0] == __K_G) {
  696. // "GWA" --> "WA"
  697. ending [0] = __K_I;
  698. return MORECHECK;
  699. }
  700. if (ending[1] == __V_l) {
  701. if (len == 3) {
  702. // "I" --> "GA"
  703. ending [0] = __K_G;
  704. ending [1] = __V_k;
  705. return MORECHECK;
  706. }
  707. else {
  708. // remove "I"
  709. memmove (ending, ending+2, len-2);
  710. return INVALID;
  711. }
  712. }
  713. if (ending[1] == __V_k)
  714. {
  715. ending [1] = __V_i;
  716. return MORECHECK;
  717. }
  718. if (ending[2] == __K_N) {
  719. // "EUN" --> "NEUN"
  720. ending [0] = __K_N;
  721. return MORECHECK;
  722. }
  723. if (len == 4) {
  724. // "EUL" --> "REUL"
  725. ending [0] = __K_R;
  726. return MORECHECK;
  727. }
  728. else {
  729. // Remove "EU"
  730. memmove (ending, ending+2, len-2);
  731. return INVALID;
  732. }
  733. }
  734. if (stem[0] == __K_R && ending[0] == __K_I && ending[1] == __V_m
  735. && ending[2] == __K_R && ending[3] == __V_h) {
  736. // Remove "EU"
  737. memmove (ending, ending+2, len-2);
  738. return INVALID;
  739. }
  740. return VALID;
  741. }
  742. WINSRC StemmerCompose (HSTM hstm, CIB sib, LPSTR rword)
  743. {
  744. STMI *pstmi;
  745. HGLOBAL hgbl = (HGLOBAL) hstm;
  746. int ret, i;
  747. BYTE action;
  748. pstmi = (STMI *)GlobalLock(hgbl);
  749. if (pstmi == NULL)
  750. {
  751. GlobalUnlock(hgbl);
  752. return srcModuleError | srcInvalidID;
  753. }
  754. lstrcpy (rword, (char *)sib.silsa);
  755. for (i = 0; sib.silsa [i] != 0; i += 2)
  756. if (!isHANGEUL (sib.silsa [i], sib.silsa [i+1]))
  757. {
  758. lstrcat (rword, sib.heosa);
  759. return NULL;
  760. }
  761. for (i = 0; sib.heosa [i] != 0; i +=2)
  762. if (!isHANGEUL (sib.heosa [i], sib.heosa [i+1]))
  763. {
  764. lstrcat (rword, sib.heosa);
  765. return NULL;
  766. }
  767. CODECONVERT conv;
  768. char *incode = (char *) LocalAlloc (LPTR, sizeof (char) * (lstrlen (sib.silsa)*3+1 + lstrlen (sib.heosa)*3+7));
  769. // add a check for this point.
  770. if ( incode == NULL ) {
  771. GlobalUnlock(hgbl);
  772. return srcModuleError;
  773. }
  774. char *inheosa = (char *) LocalAlloc (LPTR, sizeof (char) * (lstrlen (sib.heosa)*3+7));
  775. // add a check for this point.
  776. if ( inheosa == NULL ) {
  777. GlobalUnlock(hgbl);
  778. LocalFree(incode);
  779. return srcModuleError;
  780. }
  781. conv.HAN2INS (sib.silsa, incode, codeWanSeong);
  782. conv.HAN2INR (sib.heosa, inheosa, codeWanSeong);
  783. LPSTR tmptossi = (LPSTR) LocalAlloc (LPTR, sizeof (char) * lstrlen (sib.heosa)*2 );
  784. // add a check for this point
  785. if (tmptossi == NULL ) {
  786. GlobalUnlock(hgbl);
  787. LocalFree(incode);
  788. LocalFree(inheosa);
  789. return srcModuleError;
  790. }
  791. char *inending = (char *) LocalAlloc (LPTR, sizeof (char) * (lstrlen(sib.heosa)*3+7));
  792. // add a check for this point
  793. if ( inending== NULL ) {
  794. GlobalUnlock(hgbl);
  795. LocalFree(incode);
  796. LocalFree(inheosa);
  797. LocalFree(tmptossi);
  798. return srcModuleError;
  799. }
  800. char *inrword = (char *) LocalAlloc (LPTR, sizeof (char) * (lstrlen(sib.silsa)*3+lstrlen(sib.heosa)*3+6));
  801. // add a check for this point
  802. if (inrword == NULL ) {
  803. GlobalUnlock(hgbl);
  804. LocalFree(incode);
  805. LocalFree(inheosa);
  806. LocalFree(tmptossi);
  807. LocalFree(inending);
  808. return srcModuleError;
  809. }
  810. switch (sib.pos & 0x0f00)
  811. {
  812. case POS_NOUN :
  813. case POS_PRONOUN :
  814. case POS_NUMBER :
  815. lstrcpy (tmptossi, sib.heosa);
  816. if (FindHeosaWord (inheosa, _TOSSI, &action) & FINAL)
  817. {
  818. conv.ReverseIN (inheosa, inending);
  819. conv.ReverseIN (incode, inrword);
  820. CVCheckNP (inrword, inending, action);
  821. conv.INS2HAN (inending, tmptossi, codeWanSeong);
  822. // we should check contraction tossi, for example, Nieun, Rieul
  823. for (i = 0; ContractTossi [i].contract != NULL; i++)
  824. if (lstrcmp (ContractTossi [i].contract, tmptossi)==0)
  825. conv.HAN2INS ((char *)tmptossi, inending, codeWanSeong);
  826. lstrcat (incode, inending);
  827. conv.INS2HAN(incode, (char *)rword, codeWanSeong);
  828. //LocalFree (incode);
  829. LocalFree (inheosa);
  830. LocalFree (tmptossi);
  831. LocalFree (inending);
  832. LocalFree (inrword);
  833. GlobalUnlock (hgbl);
  834. return NULL;
  835. }
  836. lstrcat (rword, tmptossi);
  837. LocalFree (incode);
  838. LocalFree (inheosa);
  839. LocalFree (tmptossi);
  840. LocalFree (inending);
  841. LocalFree (inrword);
  842. GlobalUnlock (hgbl);
  843. return srcComposeError;
  844. break;
  845. case POS_VERB :
  846. case POS_ADJECTIVE :
  847. case POS_AUXVERB :
  848. case POS_AUXADJ :
  849. conv.HAN2INS ((char *)sib.heosa, inending, codeWanSeong);
  850. conv.HAN2INR ((char *)sib.silsa, incode, codeWanSeong);
  851. if ((ret = Compose_RIEUL_Irregular (incode, inending)) != NOT_COMPOSED)
  852. goto ErrorCheck;
  853. if ((ret = Compose_HIEUH_Irregular (incode, inending)) != NOT_COMPOSED)
  854. goto ErrorCheck;
  855. if ((ret = Compose_PIEUP_Irregular (incode, inending)) != NOT_COMPOSED)
  856. goto ErrorCheck;
  857. if ((ret = Compose_TIEUT_Irregular (incode, inending)) != NOT_COMPOSED)
  858. goto ErrorCheck;
  859. if ((ret = Compose_SIOS_Irregular (incode, inending)) != NOT_COMPOSED)
  860. goto ErrorCheck;
  861. if (Compose_YEO_Irregular (incode, inending))
  862. goto Quit;
  863. if (Compose_REO_REU_Irregular (incode, inending))
  864. goto Quit;
  865. if (Compose_GEORA_Irregular (incode, inending))
  866. goto Quit;
  867. Compose_Regular (incode, inending);
  868. ErrorCheck : if (ret == COMPOSE_ERROR)
  869. {
  870. lstrcat (rword, sib.heosa);
  871. LocalFree (incode);
  872. LocalFree (inheosa);
  873. LocalFree (tmptossi);
  874. LocalFree (inending);
  875. LocalFree (inrword);
  876. GlobalUnlock (hgbl);
  877. return srcComposeError;
  878. }
  879. Quit: conv.ReverseIN (incode, inrword);
  880. lstrcat (inrword, inending);
  881. conv.INS2HAN (inrword, (char *)rword, codeWanSeong);
  882. break;
  883. default :
  884. lstrcat (rword, sib.heosa);
  885. LocalFree (incode);
  886. LocalFree (inheosa);
  887. LocalFree (tmptossi);
  888. LocalFree (inending);
  889. LocalFree (inrword);
  890. GlobalUnlock (hgbl);
  891. return srcComposeError;
  892. }
  893. LocalFree (incode);
  894. LocalFree (inheosa);
  895. LocalFree (tmptossi);
  896. LocalFree (inending);
  897. LocalFree (inrword);
  898. GlobalUnlock (hgbl);
  899. return NULL;
  900. }
  901. WINSRC StemmerTerminate(HSTM hstm)
  902. {
  903. STMI *pstmi;
  904. HGLOBAL hgbl = (HGLOBAL) hstm;
  905. pstmi = (STMI *)GlobalLock(hgbl);
  906. if (pstmi == NULL)
  907. {
  908. GlobalUnlock(hgbl);
  909. return srcModuleError | srcInvalidID;
  910. }
  911. GlobalUnlock (hgbl);
  912. GlobalFree (hgbl);
  913. return NULL; //normal operation
  914. }
  915. WINSRC StemmerOpenUdr (HSTM stmi, LPCSTR lpPathUdr)
  916. {
  917. return NULL;
  918. }
  919. WINSRC StemmerCloseUdr (HSTM stmi)
  920. {
  921. return NULL;
  922. }
  923. WINSRC StemmerCompareW (HSTM hstm, LPCWSTR lpStr1, LPCWSTR lpStr2, LPWSTR lpStem, LPWSTR lpEnding1, LPWSTR lpEnding2, WORD *pos)
  924. {
  925. LPSTR MultiByteStr1, MultiByteStr2, MultiByteStem, MultiByteEnding1, MultiByteEnding2;
  926. int len1 = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpStr1, -1, NULL, 0, NULL, NULL);
  927. MultiByteStr1 = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len1);
  928. // add a check for this point.
  929. if (MultiByteStr1 == NULL ) {
  930. return srcModuleError;
  931. }
  932. len1 = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpStr1, -1, MultiByteStr1, len1, NULL, NULL);
  933. int len2 = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpStr2, -1, NULL, 0, NULL, NULL);
  934. MultiByteStr2 = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len2);
  935. // add a check for this point.
  936. if (MultiByteStr2 == NULL ) {
  937. LocalFree(MultiByteStr1);
  938. return srcModuleError;
  939. }
  940. len2 = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpStr2, -1, MultiByteStr2, len2, NULL, NULL);
  941. int len = len1 > len2 ? len1 : len2;
  942. MultiByteStem = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  943. // add a check for this point.
  944. if (MultiByteStem == NULL ) {
  945. LocalFree(MultiByteStr1);
  946. LocalFree(MultiByteStr2);
  947. return srcModuleError;
  948. }
  949. MultiByteEnding1 = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  950. // add a check for this point.
  951. if (MultiByteEnding1 == NULL ) {
  952. LocalFree(MultiByteStr1);
  953. LocalFree(MultiByteStr2);
  954. LocalFree(MultiByteStem);
  955. return srcModuleError;
  956. }
  957. MultiByteEnding2 = (LPSTR) LocalAlloc (LPTR, sizeof (char) * len);
  958. // add a check for this point.
  959. if (MultiByteEnding2 == NULL ) {
  960. LocalFree(MultiByteStr1);
  961. LocalFree(MultiByteStr2);
  962. LocalFree(MultiByteStem);
  963. LocalFree(MultiByteEnding1);
  964. return srcModuleError;
  965. }
  966. SRC src = StemmerCompare(hstm, MultiByteStr1, MultiByteStr2, MultiByteStem, MultiByteEnding1, MultiByteEnding2, pos);
  967. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, MultiByteStem, -1, lpStem, sizeof (lpStem));
  968. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, MultiByteEnding1, -1, lpEnding1, sizeof (lpEnding1));
  969. MultiByteToWideChar(UWANSUNG_CODE_PAGE, 0, MultiByteEnding2, -1, lpEnding2, sizeof (lpEnding2));
  970. LocalFree (MultiByteStr1);
  971. LocalFree (MultiByteStr2);
  972. LocalFree (MultiByteStem);
  973. LocalFree (MultiByteEnding1);
  974. LocalFree (MultiByteEnding2);
  975. return src;
  976. }
  977. WINSRC StemmerCompare (HSTM hstm, LPCSTR lpStr1, LPCSTR lpStr2, LPSTR lpStem, LPSTR lpEnding1, LPSTR lpEnding2, WORD *pos)
  978. {
  979. // First, check the chosung of two strings
  980. // if they are different, we may not use stemming.
  981. CODECONVERT conv;
  982. char inheosa1 [80], inheosa2 [80];
  983. BYTE action;
  984. char *incodeStr1 = new char [lstrlen (lpStr1) * 4 + 1];
  985. char *incodeStr2 = new char [lstrlen (lpStr2) * 4 + 1];
  986. conv.HAN2INS ((char *)lpStr1, incodeStr1, codeWanSeong);
  987. conv.HAN2INS ((char *)lpStr2, incodeStr2, codeWanSeong);
  988. if (incodeStr1 [0] != incodeStr2 [0])
  989. return srcInvalid;
  990. if (incodeStr1 [1] != incodeStr2 [1])
  991. {
  992. return srcInvalid;
  993. }
  994. delete incodeStr1;
  995. delete incodeStr2;
  996. STMI *pstmi;
  997. HGLOBAL hgbl = (HGLOBAL) hstm;
  998. pstmi = (STMI *)GlobalLock(hgbl);
  999. if (pstmi == NULL)
  1000. {
  1001. GlobalUnlock(hgbl);
  1002. return srcModuleError | srcInvalidID;
  1003. }
  1004. BaseEngine BaseCheck;
  1005. char stem1[10][100], stem2[10][100], ending1[10][100], ending2[10][100], lrgsz [400];
  1006. int num1, num2, count;
  1007. WORD winfo [10];
  1008. if ((pstmi->Option & SO_NOUNPHRASE) && (pstmi->Option & (SO_NP_NOUN | SO_NP_PRONOUN | SO_NP_NUMBER | SO_NP_DEPENDENT)))
  1009. {
  1010. int num = BaseCheck.NLP_BASE_NOUN (lpStr1, lrgsz);
  1011. BOOL first = TRUE;
  1012. for (int i = 0, index = 0, l = 0, index2 = 0; i < num; i++)
  1013. {
  1014. count = 0;
  1015. while (lrgsz [index+count] != '+' && lrgsz[index+count] != '\t')
  1016. count++;
  1017. if (first)
  1018. {
  1019. memcpy (stem1 [l], lrgsz+index, count);
  1020. stem1 [l][count] = '\0';
  1021. winfo [l] = BaseCheck.vbuf [i];
  1022. first = FALSE;
  1023. }
  1024. else
  1025. {
  1026. memcpy (ending1 [l]+index2, lrgsz+index, count);
  1027. index2 += count;
  1028. }
  1029. if (lrgsz[index+count] == '\t')
  1030. {
  1031. ending1 [l][index2] = '\0';
  1032. l++;
  1033. first = TRUE;
  1034. index2 = 0;
  1035. }
  1036. index += (count + 1);
  1037. }
  1038. num1 = l;
  1039. num = BaseCheck.NLP_BASE_NOUN (lpStr2, lrgsz);
  1040. for (i = 0, index = 0, l = 0, index2 = 0; i < num; i++)
  1041. {
  1042. count = 0;
  1043. while (lrgsz [index+count] != '+' && lrgsz [index+count] != '\t')
  1044. count++;
  1045. if (first)
  1046. {
  1047. memcpy (stem2 [l], lrgsz+index, count);
  1048. stem2 [l][count] = '\0';
  1049. first = FALSE;
  1050. }
  1051. else
  1052. {
  1053. memcpy (ending2 [l]+index2, lrgsz+index, count);
  1054. index2 += count;
  1055. }
  1056. if (lrgsz[index+count] == '\t')
  1057. {
  1058. ending2 [l][index2] = '\0';
  1059. l++;
  1060. first = TRUE;
  1061. index2 = 0;
  1062. }
  1063. index += (count + 1);
  1064. }
  1065. num2 = l;
  1066. int j;
  1067. for (i = 0; i < num1; i++)
  1068. {
  1069. for (j = 0; j < num2; j++)
  1070. if (lstrcmp (stem1[i], stem2 [j]) == 0)
  1071. break;
  1072. if (j != num2)
  1073. break;
  1074. }
  1075. if (i != num1)
  1076. {
  1077. lstrcpy (lpStem, stem1 [i]);
  1078. lstrcpy (lpEnding1, ending1 [i]);
  1079. lstrcpy (lpEnding2, ending2 [j]);
  1080. *pos = winfo [i];
  1081. GlobalUnlock (hgbl);
  1082. return NULL;
  1083. }
  1084. }
  1085. if (pstmi->Option & (SO_PREDICATE | SO_AUXILIARY))
  1086. {
  1087. int num = BaseCheck.NLP_BASE_VERB (lpStr1, lrgsz);
  1088. BOOL first = TRUE;
  1089. for (int i = 0, index = 0, l = 0, index2 = 0; i < num; i++)
  1090. {
  1091. count = 0;
  1092. while (lrgsz [index+count] != '+' && lrgsz[index+count] != '\t')
  1093. count++;
  1094. if (first)
  1095. {
  1096. memcpy (stem1 [l], lrgsz+index, count);
  1097. stem1 [l][count] = '\0';
  1098. winfo [l] = BaseCheck.vbuf [i];
  1099. first = FALSE;
  1100. }
  1101. else
  1102. {
  1103. memcpy (ending1 [l]+index2, lrgsz+index, count);
  1104. index2 += count;
  1105. }
  1106. if (lrgsz[index+count] == '\t')
  1107. {
  1108. ending1 [l][index2] = '\0';
  1109. l++;
  1110. first = TRUE;
  1111. index2 = 0;
  1112. }
  1113. index += (count + 1);
  1114. }
  1115. num1 = l;
  1116. num = BaseCheck.NLP_BASE_VERB (lpStr2, lrgsz);
  1117. for (i = 0, index = 0, l = 0, index2 = 0; i < num; i++)
  1118. {
  1119. count = 0;
  1120. while (lrgsz [index+count] != '+' && lrgsz [index+count] != '\t')
  1121. count++;
  1122. if (first)
  1123. {
  1124. memcpy (stem2 [l], lrgsz+index, count);
  1125. stem2 [l][count] = '\0';
  1126. first = FALSE;
  1127. }
  1128. else
  1129. {
  1130. memcpy (ending2 [l]+index2, lrgsz+index, count);
  1131. index2 += count;
  1132. }
  1133. if (lrgsz[index+count] == '\t')
  1134. {
  1135. ending2 [l][index2] = '\0';
  1136. l++;
  1137. first = TRUE;
  1138. index2 = 0;
  1139. }
  1140. index += (count + 1);
  1141. }
  1142. num2 = l;
  1143. int j;
  1144. for (i = 0; i < num1; i++)
  1145. {
  1146. for (j = 0; j < num2; j++)
  1147. if (lstrcmp (stem1[i], stem2 [j]) == 0)
  1148. break;
  1149. if (j != num2)
  1150. break;
  1151. }
  1152. if (i != num1)
  1153. {
  1154. lstrcpy (lpStem, stem1 [i]);
  1155. lstrcpy (lpEnding1, ending1 [i]);
  1156. lstrcpy (lpEnding2, ending2 [j]);
  1157. *pos = winfo [i];
  1158. GlobalUnlock (hgbl);
  1159. return NULL;
  1160. }
  1161. }
  1162. // for proper noun, for example, name
  1163. if (pstmi->Option & SO_NP_PROPER)
  1164. {
  1165. int len1 = lstrlen(lpStr1);
  1166. int len2 = lstrlen(lpStr2);
  1167. int shortlen = len1 > len2 ? len2 : len1;
  1168. if (strncmp (lpStr1, lpStr2, shortlen) == 0)
  1169. {
  1170. lstrcpy (lpStem, lpStr1);
  1171. lpStem [shortlen] = '\0';
  1172. char index [1];
  1173. index[0] = 'm';
  1174. CODECONVERT Conv;
  1175. BOOL res1 = TRUE, res2= TRUE;
  1176. lstrcpy (lpEnding1, lpStr1 + shortlen);
  1177. lstrcpy (lpEnding2, lpStr2 + shortlen);
  1178. if (lstrlen (lpEnding1))
  1179. {
  1180. Conv.HAN2INS ((char *)lpEnding1, inheosa1, codeWanSeong);
  1181. if (!(FindHeosaWord(inheosa1, _TOSSI, &action) & FINAL))
  1182. res1 = FALSE;
  1183. }
  1184. if (lstrlen (lpEnding2))
  1185. {
  1186. Conv.HAN2INS ((char *)lpEnding2, inheosa2, codeWanSeong);
  1187. if (!(FindHeosaWord(inheosa2, _TOSSI, &action) & FINAL))
  1188. res2 = FALSE;
  1189. }
  1190. if (res1 && res2)
  1191. {
  1192. *pos = POS_NOUN | PROPER_NOUN;
  1193. GlobalUnlock (hgbl);
  1194. return NULL;
  1195. }
  1196. }
  1197. }
  1198. GlobalUnlock (hgbl);
  1199. return srcInvalid;
  1200. }
  1201. WINSRC StemmerIsEndingW (HSTM hstm, LPCWSTR lpStr, UINT flag, BOOL *found)
  1202. {
  1203. LPSTR MultiByteStr;
  1204. int len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpStr, -1, NULL, 0, NULL, NULL);
  1205. MultiByteStr = (LPSTR) LocalAlloc (LPTR, len);
  1206. // add a check for this point
  1207. if (MultiByteStr == NULL ) {
  1208. return srcModuleError;
  1209. }
  1210. len = WideCharToMultiByte (UWANSUNG_CODE_PAGE, 0, lpStr, -1, MultiByteStr, len, NULL, NULL);
  1211. SRC src = StemmerIsEnding(hstm, MultiByteStr, flag, found);
  1212. LocalFree (MultiByteStr);
  1213. return src;
  1214. }
  1215. WINSRC StemmerIsEnding (HSTM hstm, LPCSTR lpStr, UINT flag, BOOL *found)
  1216. {
  1217. BOOL tossiCheck, endingCheck;
  1218. switch (flag)
  1219. {
  1220. case IS_TOSSI : tossiCheck = TRUE; endingCheck = FALSE; break;
  1221. case IS_ENDING : endingCheck = TRUE; tossiCheck = FALSE; break;
  1222. case IS_TOSSI | IS_ENDING : tossiCheck = endingCheck = TRUE; break;
  1223. default : return srcModuleError;
  1224. }
  1225. STMI *pstmi;
  1226. HGLOBAL hgbl = (HGLOBAL) hstm;
  1227. pstmi = (STMI *)GlobalLock(hgbl);
  1228. if (pstmi == NULL)
  1229. {
  1230. GlobalUnlock(hgbl);
  1231. return srcModuleError | srcInvalidID;
  1232. }
  1233. BYTE action;
  1234. char *inheosa = (char *)LocalAlloc (LPTR, lstrlen(lpStr) * 4 + 1);
  1235. // add a check for this point
  1236. if (inheosa == NULL ) {
  1237. GlobalUnlock(hgbl);
  1238. return srcModuleError;
  1239. }
  1240. CODECONVERT Conv;
  1241. Conv.HAN2INR ((char *)lpStr, inheosa, codeWanSeong);
  1242. *found = FALSE;
  1243. if (tossiCheck)
  1244. {
  1245. int res = FindHeosaWord(inheosa, _TOSSI, &action);
  1246. if (res & FINAL)
  1247. {
  1248. *found = TRUE;
  1249. endingCheck = FALSE;
  1250. }
  1251. }
  1252. if (endingCheck)
  1253. {
  1254. int res = FindHeosaWord(inheosa, _ENDING, &action);
  1255. if (res == FINAL)
  1256. *found = TRUE;
  1257. }
  1258. LocalFree (inheosa);
  1259. GlobalUnlock (hgbl);
  1260. return NULL;
  1261. }
  1262. /*
  1263. BOOL WINAPI DllMain (HINSTANCE hDLL, DWORD dwReason, LPVOID lpReserved){
  1264. extern char TempJumpNum [], TempSujaNum [], TempBaseNum [], TempNumNoun [], TempSuffixOut [];
  1265. extern char bTemp [], TempETC [], TempDap [];
  1266. extern LenDict JumpNum;
  1267. extern LenDict SujaNum;
  1268. extern LenDict BaseNum;
  1269. extern LenDict NumNoun;
  1270. extern LenDict Suffix;
  1271. extern LenDict B_Dict;
  1272. extern LenDict T_Dict;
  1273. extern LenDict Dap;
  1274. switch(dwReason) {
  1275. case DLL_PROCESS_ATTACH :
  1276. JumpNum.InitLenDict(TempJumpNum, 5, 5);
  1277. SujaNum.InitLenDict(TempSujaNum, 8, 27);
  1278. BaseNum.InitLenDict(TempBaseNum, 5, 3);
  1279. NumNoun.InitLenDict(TempNumNoun, 8, 32);
  1280. Suffix.InitLenDict(TempSuffixOut, 8, 8);
  1281. B_Dict.InitLenDict(bTemp, 5, 1);
  1282. T_Dict.InitLenDict(TempETC, 10, 7);
  1283. Dap.InitLenDict(TempDap, 5, 1);
  1284. break ;
  1285. case DLL_THREAD_ATTACH:
  1286. break;
  1287. case DLL_THREAD_DETACH:
  1288. break;
  1289. case DLL_PROCESS_DETACH :
  1290. break ;
  1291. } //switch
  1292. return TRUE ;
  1293. }
  1294. */