Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1103 lines
38 KiB

  1. // =========================================================================
  2. // Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
  3. //
  4. // File Name : BASEMAIN.CPP
  5. // Function : BASE ENGINE Handler
  6. // : NLP Base Engine
  7. // =========================================================================
  8. #include "basemain.hpp"
  9. #include "convert.hpp"
  10. #include "MainDict.h"
  11. /*---------------------------------------------------------------------------
  12. %%Function : GetStemEnding
  13. %%Contact : dhyu
  14. ---------------------------------------------------------------------------*/
  15. void BaseEngine::GetStemEnding (char *incode, char *stem, char *ending, int position)
  16. {
  17. int codelen = lstrlen (incode) - 1;
  18. LMEPOS = position;
  19. lstrcpy (stem, incode);
  20. if (LMEPOS == -1)
  21. {
  22. ULSPOS = codelen;
  23. ending [0] = NULLCHAR;
  24. }
  25. else
  26. {
  27. if (LMEPOS == codelen)
  28. {
  29. ULSPOS = -1;
  30. stem [0] = NULLCHAR;
  31. }
  32. else
  33. {
  34. ULSPOS = lstrlen(incode) - LMEPOS - 2;
  35. stem[ULSPOS+1] = NULLCHAR;
  36. }
  37. // ending have a reverse order.
  38. for (int k = 0, j = lstrlen(incode) - 1; k <= LMEPOS; j--, k++)
  39. ending [k] = incode [j];
  40. ending [k] = NULLCHAR;
  41. }
  42. }
  43. int BaseEngine::NLP_BASE_NOUN (LPCSTR d, char *rstrings)
  44. {
  45. char Act[10], ostem[80],
  46. oending[40], incode [100], stem [100], ending [40];
  47. int bt, sp[10];
  48. CODECONVERT Conv;
  49. wcount = 0;
  50. memset(incode, NULLCHAR, 100);
  51. memset(Act, NULLCHAR, 10);
  52. memset(lrgsz, NULLCHAR, 400);
  53. for (int i = 0; i < 10; i++) sp[i] = 0x0000;
  54. if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
  55. { // KS -> Incode
  56. return 99;
  57. }
  58. bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
  59. // for (i = bt - 1; i >= 0; i--)
  60. for (i = 0; i < bt; i++)
  61. {
  62. GetStemEnding (incode, stem, ending, sp [i]);
  63. ACT_C = GetBit(Act [i], 7); // consonant
  64. ACT_V = GetBit(Act [i], 6); // vowel
  65. ACT_N_V = GetBit(Act [i], 5);
  66. ACT_P_A = GetBit(Act [i], 4);
  67. ACT_N_E = GetBit(Act [i], 3);
  68. memset(ostem, NULLCHAR, 80);
  69. memset(oending, NULLCHAR, 40);
  70. Conv.INR2HAN(ending, oending, codeWanSeong);
  71. Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
  72. if(__IsDefEnd(LMEPOS, 1) == 1 &&
  73. ending[LMEPOS] == __K_G && ending[LMEPOS-1] == __V_p)
  74. {
  75. if(NLP_Ge_Proc(stem) != BT)
  76. {
  77. lstrcat(lrgsz, ostem);
  78. lstrcat(lrgsz, "+");
  79. lstrcat(lrgsz, oending);
  80. lstrcat(lrgsz, "\t");
  81. vbuf[wcount++] = POS_PRONOUN;
  82. vbuf[wcount++] = POS_TOSSI;
  83. }
  84. continue;
  85. }
  86. if (NLP_NCV_Proc(stem, ending) != NCV_VALID)
  87. continue;
  88. if (FindSilsaWord (ostem) & _NOUN)
  89. { // searching the noun dictionary
  90. lstrcat(lrgsz, ostem);
  91. vbuf[wcount++] = POS_NOUN;
  92. if (lstrlen (oending) > 0)
  93. {
  94. lstrcat(lrgsz, "+");
  95. lstrcat(lrgsz, oending);
  96. vbuf[wcount++] = POS_TOSSI;
  97. }
  98. lstrcat(lrgsz, "\t");
  99. }
  100. if(i == 0 || ACT_P_A == 1)
  101. {
  102. if (NLP_Find_Pronoun (stem, ending) == VALID)
  103. {
  104. if (lstrlen (oending) > 0)
  105. {
  106. lstrcat(lrgsz, "+");
  107. lstrcat(lrgsz, oending);
  108. vbuf[wcount++] = POS_TOSSI;
  109. }
  110. lstrcat(lrgsz, "\t");
  111. }
  112. }
  113. if(i == 0 || ACT_N_E == 1)
  114. {
  115. if(NLP_Num_Proc(stem) != BT)
  116. {
  117. lstrcat(lrgsz, ostem);
  118. vbuf[wcount++] = POS_NUMBER;
  119. if (lstrlen (oending) > 0)
  120. {
  121. lstrcat(lrgsz, "+");
  122. lstrcat(lrgsz, oending);
  123. vbuf[wcount++] = POS_TOSSI;
  124. }
  125. lstrcat(lrgsz, "\t");
  126. }
  127. continue; // backtracking
  128. }
  129. }
  130. lstrcpy (rstrings, lrgsz);
  131. return wcount;
  132. }
  133. int BaseEngine::NLP_BASE_AFFIX (LPCSTR d, char *rstrings)
  134. {
  135. char Act[10], oending [40], incode [100], stem [100], ending [40];
  136. int bt,
  137. ret,
  138. sp[10];
  139. CODECONVERT Conv;
  140. wcount = 0;
  141. memset(incode, NULLCHAR, 100);
  142. memset(Act, NULLCHAR, 10);
  143. memset(lrgsz, NULLCHAR, 400);
  144. for (int i = 0; i < 10; i++) sp[i] = 0x0000;
  145. if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
  146. { // KS -> Incode
  147. return 99;
  148. }
  149. bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
  150. // for (i = bt - 1; i >= 0; i--)
  151. for (i = 0; i < bt; i++)
  152. {
  153. GetStemEnding (incode, stem, ending, sp [i]);
  154. ACT_C = GetBit(Act [i], 7); // consonant
  155. ACT_V = GetBit(Act [i], 6); // vowel
  156. ACT_N_V = GetBit(Act [i], 5);
  157. ACT_P_A = GetBit(Act [i], 4);
  158. ACT_N_E = GetBit(Act [i], 3);
  159. if (NLP_NCV_Proc(stem, ending) != NCV_VALID)
  160. continue;
  161. memset(oending, NULLCHAR, 40);
  162. Conv.INR2HAN(ending, oending, codeWanSeong);
  163. ret = NLP_Fix_Proc(stem, ending);
  164. switch (ret)
  165. {
  166. case Deol_VALID :
  167. case Pref_VALID :
  168. case Suf_VALID :
  169. case PreSuf_VALID :
  170. if (lstrlen(oending) > 0)
  171. {
  172. lstrcat (lrgsz, "+");
  173. lstrcat (lrgsz, oending);
  174. vbuf [wcount++] = POS_TOSSI;
  175. }
  176. lstrcat(lrgsz, "\t");
  177. case BT : continue; // backtracking
  178. }
  179. }
  180. lstrcpy (rstrings, lrgsz);
  181. return wcount;
  182. }
  183. int BaseEngine::NLP_BASE_ALONE(LPCSTR d, char *rstrings)
  184. {
  185. char incode [100];
  186. CODECONVERT Conv;
  187. memset(incode, NULLCHAR, 100);
  188. memset(lrgsz, NULLCHAR, 400);
  189. wcount = 0;
  190. if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
  191. { // KS -> Incode
  192. return 99;
  193. }
  194. // check whether input word is ADVERB, or not
  195. if (FindSilsaWord (d) & _ALONE)
  196. {
  197. lstrcat(lrgsz, d);
  198. lstrcat(lrgsz, "\t");
  199. vbuf[wcount++] = POS_ADVERB;
  200. }
  201. lstrcpy (rstrings, lrgsz);
  202. return wcount;
  203. }
  204. int BaseEngine::NLP_BASE_VERB (LPCSTR d, char *rstrings)
  205. {
  206. char index[1],
  207. AUX_Flag, tmp[80],
  208. Act[10], ostem[80],
  209. oending[40], incode [100], stem [100], ending [40], rending [40];
  210. int bt,
  211. ret,
  212. rt,
  213. sp[10], temp, luls;
  214. CODECONVERT Conv;
  215. wcount = 0;
  216. memset(Act, NULLCHAR, 10);
  217. memset(incode, NULLCHAR, 100);
  218. memset(lrgsz, NULLCHAR, 400);
  219. for (int i = 0; i < 10; i++) sp[i] = 0x0000;
  220. if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
  221. { // KS -> Incode
  222. return 99;
  223. }
  224. bt = NLP_Get_Ending(incode, Act, sp, END);
  225. int codelen = lstrlen(incode) - 1;
  226. for (i = bt-1; i >= 0; i--)
  227. {
  228. memset(ostem, NULLCHAR, 80);
  229. memset(oending, NULLCHAR, 40);
  230. GetStemEnding (incode, stem, ending, sp [i]);
  231. if (lstrlen (stem) == 0)
  232. continue;
  233. ACT_C = GetBit(Act[i], 7);
  234. ACT_V = GetBit(Act[i], 6);
  235. ACT_N_V = GetBit(Act[i], 5);
  236. ACT_P_A = GetBit(Act[i], 4);
  237. ACT_N_E = GetBit(Act[i], 3);
  238. ACT_SS = GetBit(Act[i], 2);
  239. ACT_KE = GetBit(Act[i], 1);
  240. RestoreEnding (ending, rending);
  241. Conv.INR2HAN(rending, oending, codeWanSeong);
  242. Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
  243. lstrcpy(tmp, stem);
  244. luls = ULSPOS;
  245. if(ACT_SS == 1)
  246. {
  247. if((ret = NLP_SS_Proc(stem, ending)) < INVALID) // VALID
  248. {
  249. if (lstrlen (oending) > 0)
  250. {
  251. lstrcat(lrgsz, "+");
  252. lstrcat(lrgsz, oending);
  253. vbuf [wcount++] = POS_ENDING;
  254. }
  255. lstrcat(lrgsz, "\t");
  256. }
  257. continue;
  258. }
  259. if(i == 0)
  260. {
  261. break;
  262. }
  263. if(ACT_KE == 1)
  264. {
  265. if((ret = NLP_KTC_Proc(stem, ending)) == BT) // backtracking
  266. {
  267. continue;
  268. }
  269. }
  270. ret = NLP_VCV_Check (stem, ending);
  271. if(ret < INVALID)
  272. {
  273. AUX_Flag = 0;
  274. if(ACT_N_V == 1)
  275. {
  276. if (FindSilsaWord (ostem) & _VERB)
  277. {
  278. lstrcat(lrgsz, ostem);
  279. lstrcat(lrgsz, "+");
  280. lstrcat(lrgsz, oending);
  281. lstrcat(lrgsz, "\t");
  282. vbuf[wcount++] = POS_VERB;
  283. vbuf[wcount++] = POS_ENDING;
  284. AUX_Flag = 1;
  285. }
  286. }
  287. if(ACT_P_A == 1)
  288. {
  289. if (FindSilsaWord (ostem) & _ADJECTIVE)
  290. {
  291. lstrcat(lrgsz, ostem);
  292. lstrcat(lrgsz, "+");
  293. lstrcat(lrgsz, oending);
  294. lstrcat(lrgsz, "\t");
  295. vbuf[wcount++] = POS_ADJECTIVE;
  296. vbuf[wcount++] = POS_ENDING;
  297. AUX_Flag = 1;
  298. }
  299. if(NLP_Dap_Proc(stem) == Dap_VALID)
  300. {
  301. if (lstrlen (oending) > 0)
  302. {
  303. lstrcat(lrgsz, "+");
  304. lstrcat(lrgsz, oending);
  305. vbuf[wcount++] = POS_ENDING;
  306. }
  307. lstrcat(lrgsz, "\t");
  308. AUX_Flag = 1;
  309. }
  310. if(NLP_Gop_Proc(stem) == Gop_VALID)
  311. {
  312. if (lstrlen (oending) > 0)
  313. {
  314. lstrcat(lrgsz, "+");
  315. lstrcat(lrgsz, oending);
  316. vbuf[wcount++] = POS_ENDING;
  317. }
  318. lstrcat(lrgsz, "\t");
  319. AUX_Flag = 1;
  320. }
  321. if((rt = NLP_Manha_Proc(stem)) < INVALID)
  322. {
  323. if (lstrlen (oending) > 0)
  324. {
  325. lstrcat(lrgsz, "+");
  326. lstrcat(lrgsz, oending);
  327. vbuf[wcount++] = POS_ENDING;
  328. }
  329. lstrcat(lrgsz, "\t");
  330. AUX_Flag = 1;
  331. }
  332. if((rt = NLP_Manhaeci_Proc(stem)) == Manhaeci_VALID)
  333. {
  334. if (lstrlen (oending) > 0)
  335. {
  336. lstrcat(lrgsz, "+");
  337. lstrcat(lrgsz, oending);
  338. vbuf[wcount++] = POS_ENDING;
  339. }
  340. lstrcat(lrgsz, "\t");
  341. AUX_Flag = 1;
  342. }
  343. if((rt = NLP_Cikha_Proc(stem)) < INVALID)
  344. {
  345. if (lstrlen (oending) > 0)
  346. {
  347. lstrcat(lrgsz, "+");
  348. lstrcat(lrgsz, oending);
  349. vbuf[wcount++] = POS_ENDING;
  350. }
  351. lstrcat(lrgsz, "\t");
  352. }
  353. }
  354. // AUX_FLOW
  355. if(AUX_Flag == 0)
  356. {
  357. if(ACT_N_V == 1)
  358. {
  359. if((rt = NLP_AUX_Find(stem, 0)) < INVALID)
  360. {
  361. if (lstrlen (oending) > 0)
  362. {
  363. lstrcat(lrgsz, "+");
  364. lstrcat(lrgsz, oending);
  365. vbuf[wcount++] = POS_ENDING;
  366. }
  367. lstrcat(lrgsz, "\t");
  368. }
  369. }
  370. if(ACT_P_A == 1)
  371. {
  372. if((rt = NLP_AUX_Find(stem, 1)) < INVALID)
  373. {
  374. if (lstrlen (oending) > 0)
  375. {
  376. lstrcat(lrgsz, "+");
  377. lstrcat(lrgsz, oending);
  378. vbuf[wcount++] = POS_ENDING;
  379. }
  380. lstrcat(lrgsz, "\t");
  381. }
  382. }
  383. }
  384. }
  385. else if (ret != MORECHECK)
  386. continue; // against consonant-vowel harmony
  387. if(ACT_N_E == 1)
  388. {
  389. if(strcmp(stem, TempIkNl) == 0)
  390. {
  391. lstrcat(lrgsz, ostem);
  392. lstrcat(lrgsz, "+");
  393. lstrcat(lrgsz, oending);
  394. lstrcat(lrgsz, "\t");
  395. vbuf[wcount++] = POS_VERB; //Jap_VALID;
  396. vbuf[wcount++] = POS_ENDING;
  397. }
  398. if(__IsDefStem(ULSPOS, 1) == 1 &&
  399. stem[ULSPOS-1] == __K_I && stem[ULSPOS] == __V_l)
  400. {
  401. if(__IsDefStem(ULSPOS, 2) == 1 && stem[ULSPOS-2] == __K_M)
  402. {
  403. sp[i] = LMEPOS+3;
  404. Act[i] = 0x70; // action:01-110-00-0
  405. i++;
  406. if(__IsDefStem(ULSPOS, 4) == 1 &&
  407. stem[ULSPOS-4] == __K_I && stem[ULSPOS-3] == __V_m)
  408. {
  409. sp[i] = LMEPOS+5;
  410. Act[i] = (unsigned char)0xB0; // action:10 110 00 0
  411. i++;
  412. }
  413. }
  414. temp = ULSPOS;
  415. __DelStem2(stem, &temp);
  416. ULSPOS = temp;
  417. if((ret = NLP_Machine_T(stem, ending)) < INVALID)
  418. {
  419. if (lstrlen (oending) > 0)
  420. {
  421. lstrcat(lrgsz, "+");
  422. lstrcat(lrgsz, oending);
  423. vbuf[wcount++] = POS_ENDING;
  424. }
  425. lstrcat(lrgsz, "\t");
  426. }
  427. temp = ULSPOS;
  428. __AddStem2(stem, &temp, __K_I, __V_l);
  429. ULSPOS = temp;
  430. if(__IsDefEnd(LMEPOS, 1) == 1 &&
  431. ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j)
  432. {
  433. for (int i = 0; i < 3; i++)
  434. {
  435. if(strcmp(stem, TempJap[i]) == 0)
  436. {
  437. lstrcat(lrgsz, ostem);
  438. lstrcat(lrgsz, "+");
  439. lstrcat(lrgsz, oending);
  440. lstrcat(lrgsz, "\t");
  441. vbuf[wcount++] = POS_VERB; //VERB_VALID;
  442. vbuf[wcount++] = POS_ENDING;
  443. }
  444. }
  445. continue;
  446. }
  447. index[0] = 'm';
  448. if (FindSilsaWord (ostem) & _NOUN)
  449. {
  450. lstrcat(lrgsz, ostem);
  451. lstrcat(lrgsz, "+");
  452. lstrcat(lrgsz, oending);
  453. lstrcat(lrgsz, "\t");
  454. vbuf[wcount++] = POS_NOUN; //Jap_NOUN_VALID;
  455. vbuf[wcount++] = POS_ENDING;
  456. }
  457. if((ret = NLP_Fix_Proc(stem, ending)) < INVALID)
  458. {
  459. if (lstrlen (oending) > 0)
  460. {
  461. lstrcat(lrgsz, "+");
  462. lstrcat(lrgsz, oending);
  463. vbuf[wcount++] = POS_ENDING;
  464. }
  465. lstrcat(lrgsz, "\t");
  466. }
  467. if (NLP_Find_Pronoun (stem, ending) == VALID)
  468. {
  469. if (lstrlen (oending) > 0)
  470. {
  471. lstrcat(lrgsz, "+");
  472. lstrcat(lrgsz, oending);
  473. vbuf[wcount++] = POS_TOSSI;
  474. }
  475. lstrcat(lrgsz, "\t");
  476. }
  477. if((ret = NLP_Num_Proc(stem)) < INVALID)
  478. {
  479. lstrcat(lrgsz, ostem);
  480. lstrcat(lrgsz, "+");
  481. lstrcat(lrgsz, oending);
  482. lstrcat(lrgsz, "\t");
  483. vbuf[wcount++] = POS_NUMBER; //Jap_NUM_VALID;
  484. vbuf[wcount++] = POS_ENDING;
  485. }
  486. continue; // backtracking
  487. }
  488. else if( /*ACT_Z != 1 && */ // ACT_Z != 1
  489. ULS >= __V_k &&
  490. !(__IsDefEnd(LMEPOS, 1) == 1 &&
  491. ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j))
  492. {
  493. if((ret = NLP_Machine_T(stem, ending)) < INVALID)
  494. {
  495. if (lstrlen (oending) > 0)
  496. {
  497. lstrcat(lrgsz, "+");
  498. lstrcat(lrgsz, oending);
  499. vbuf[wcount++] = POS_ENDING;
  500. }
  501. lstrcat(lrgsz, "\t");
  502. }
  503. }
  504. }
  505. lstrcpy(tmp, stem);
  506. luls = ULSPOS;
  507. if(ACT_C == 0 && ACT_V == 1)
  508. {
  509. // by hjw : 95/3/6
  510. if((ret = NLP_Irr_01(stem, ending)) < INVALID)
  511. {
  512. if (lstrlen (oending) > 0)
  513. {
  514. lstrcat(lrgsz, "+");
  515. lstrcat(lrgsz, oending);
  516. vbuf[wcount++] = POS_ENDING;
  517. }
  518. lstrcat(lrgsz, "\t");
  519. continue;
  520. }
  521. }
  522. lstrcpy (stem, tmp);
  523. ret = BT;
  524. switch(LME)
  525. {
  526. case __K_N :
  527. if((ret = NLP_Irr_KN(stem, ending)) == Irr_KN_Vl)
  528. {
  529. ret = NLP_Irr_KN_Vl(stem);
  530. }
  531. if(ret == Irr_OPS)
  532. {
  533. if((ret = NLP_Irr_OPS(stem, ending)) == SS)
  534. {
  535. if((ret = NLP_SS_Proc(stem, ending)) == BT)
  536. {
  537. continue;
  538. }
  539. if(ret < INVALID)
  540. {
  541. ret += Irr_SS;
  542. }
  543. }
  544. }
  545. break;
  546. case __K_B :
  547. ret = NLP_Machine_A(stem, ending);
  548. break;
  549. // hjw : 95/3/17
  550. case __K_S :
  551. if(ACT_C == 1) // ATC_C == 1
  552. {
  553. ret = NLP_Irr_KS(stem, ending);
  554. }
  555. else if(ULS >= __V_k)
  556. {
  557. ret = NLP_Machine_A(stem, ending);
  558. }
  559. break;
  560. case __K_M :
  561. ret = NLP_Irr_KM(stem);
  562. break;
  563. case __K_R :
  564. if(__IsDefEnd(LMEPOS, 1) == 0 || ending[LMEPOS-1] < __V_k)
  565. {
  566. ret = NLP_Machine_A(stem,ending);
  567. if (ret != BT)
  568. {
  569. if (lstrlen (oending) > 0)
  570. {
  571. lstrcat(lrgsz, "+");
  572. lstrcat(lrgsz, oending);
  573. vbuf[wcount++] = POS_ENDING;
  574. }
  575. lstrcat(lrgsz, "\t");
  576. }
  577. }
  578. if(ACT_P_A == 1)
  579. {
  580. ret = NLP_Irr_KRadj(stem, ending);
  581. if (ret != BT)
  582. {
  583. if (lstrlen (oending) > 0)
  584. {
  585. lstrcat(lrgsz, "+");
  586. lstrcat(lrgsz, oending);
  587. vbuf[wcount++] = POS_ENDING;
  588. }
  589. lstrcat(lrgsz, "\t");
  590. }
  591. }
  592. if(ACT_N_V == 1)
  593. {
  594. if((ret = NLP_Irr_KRvb(stem, ending)) == SS)
  595. {
  596. if((ret = NLP_SS_Proc(stem, ending)) == BT)
  597. {
  598. continue;
  599. }
  600. if(ret < INVALID)
  601. {
  602. ret += Irr_SS;
  603. }
  604. }
  605. }
  606. break;
  607. case __K_I :
  608. if(__IsDefEnd(LMEPOS, 1) == 1 && (ending[LMEPOS-1] == __V_h ||
  609. ending[LMEPOS-1] == __V_hl || ending[LMEPOS-1] == __V_l ))
  610. {
  611. if(ULS >= __V_k)
  612. {
  613. ret = NLP_Irr_KI(stem,rending);
  614. }
  615. else
  616. {
  617. continue;
  618. }
  619. }
  620. if(ULS == __K_R)
  621. {
  622. ret = NLP_Irr_KI_KR(stem, ending);
  623. }
  624. if(ULS >= __V_k)
  625. {
  626. ret = NLP_Irr_KI_V(stem, ending);
  627. }
  628. break;
  629. default :
  630. continue;
  631. }
  632. if(ret >= INVALID)
  633. {
  634. continue;
  635. }
  636. if(ret >= VALID)
  637. {
  638. if (lstrlen (oending) > 0)
  639. {
  640. lstrcat(lrgsz, "+");
  641. lstrcat(lrgsz, oending);
  642. vbuf[wcount++] = POS_ENDING;
  643. }
  644. lstrcat(lrgsz, "\t");
  645. }
  646. }
  647. lstrcpy (rstrings, lrgsz);
  648. return wcount;
  649. }
  650. // made by dhyu 1996. 2
  651. // look into mrfgen01.txt to know details
  652. void BaseEngine::RestoreEnding (char *ending, char *rending)
  653. {
  654. int len = lstrlen (ending); // ending has reverse order.
  655. lstrcpy (rending, ending);
  656. if (lstrlen (ending) == 0)
  657. return;
  658. if (ACT_SS) // insert "IEUNG, EO" to the first of ending
  659. {
  660. rending [len] = __V_j;
  661. rending [len+1] = __K_I;
  662. rending [len+2] = '\0';
  663. return;
  664. }
  665. if (ACT_C && ACT_V) //CV == 11
  666. {
  667. if (ending [len - 1] == __K_I && ending [len - 2] == __V_k)
  668. rending [len - 2] = __V_j;
  669. return;
  670. }
  671. if (!ACT_C && ACT_V == TRUE) // CV == 01
  672. {
  673. switch (ending [len - 1])
  674. {
  675. case __K_B :
  676. rending [len] = __V_m; // insert "SIOS, EU" to the first of ending
  677. rending [len+1] = __K_S;
  678. rending [len+2] = '\0';
  679. return ;
  680. case __K_R :
  681. if (len == 2 && ending [0] == __V_k) // if ending is "ra" , insert "IEUNG EO" to the first of ending
  682. {
  683. rending [len] = __V_j;
  684. rending [len+1] = __K_I;
  685. rending [len+2] = '\0';
  686. return;
  687. }
  688. break;
  689. case __K_I :
  690. if (ending [len - 2] == __V_y) // if ending is "yo",
  691. {
  692. rending [len] = __V_j;
  693. rending [len+1] = __K_I;
  694. rending [len+2] = '\0';
  695. return;
  696. }
  697. break;
  698. case __K_N :
  699. if (!ACT_C && ACT_V && ACT_N_V && !ACT_P_A && !ACT_P_A && !ACT_N_E && !ACT_SS && !ACT_KE)
  700. {
  701. rending [len] = __V_m;
  702. rending [len+1] = __K_N;
  703. rending [len+2] = '\0';
  704. return;
  705. }
  706. }
  707. rending [len] = __V_m; // insert "IEUNG, EU" to the first of ending
  708. rending [len+1] = __K_I;
  709. rending [len+2] = '\0';
  710. return;
  711. }
  712. // "KE-TO-CHI" ending and copula is processed with stem together.
  713. return;
  714. }
  715. // To process compound noun, we use the window which size is 4 characters.
  716. // We decrease the size until we found noun.
  717. // However we don't decrease it less than 2 characters.
  718. // made by dhyu --- 1996. 3
  719. int BaseEngine::NLP_BASE_COMPOUND (LPCSTR d, char *rstrings)
  720. {
  721. char Act[10], ostem[80], oending[40],
  722. incode [100], stem [100], ending [40];
  723. int bt,
  724. sp[10];
  725. BOOL found;
  726. CODECONVERT Conv;
  727. memset(incode, NULLCHAR, 100);
  728. memset(Act, NULLCHAR, 10);
  729. for (int i = 0; i < 10; i++) sp[i] = 0x0000;
  730. if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
  731. return 0;
  732. bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
  733. for (i = bt-1; i >= bt-3 && i >= 0; i--)
  734. //for (i = 0; i < bt; i++)
  735. {
  736. GetStemEnding (incode, stem, ending, sp [i]);
  737. ACT_C = GetBit(Act [i], 7); // consonant
  738. ACT_V = GetBit(Act [i], 6); // vowel
  739. ACT_N_V = GetBit(Act [i], 5);
  740. ACT_P_A = GetBit(Act [i], 4);
  741. ACT_N_E = GetBit(Act [i], 3);
  742. if (NLP_NCV_Proc(stem, ending) == NCV_VALID)
  743. {
  744. memset(ostem, NULLCHAR, 80);
  745. memset(oending, NULLCHAR, 40);
  746. Conv.INR2HAN(ending, oending, codeWanSeong);
  747. Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
  748. wcount = 0;
  749. memset (lrgsz, NULLCHAR, 400);
  750. // Window size is 4 charaters (8 byte)
  751. char window [9], inwindow [25];
  752. memset (window, '\0', 9);
  753. char *next = ostem;
  754. found = TRUE;
  755. while (lstrlen (next) > 8)
  756. {
  757. found = FALSE;
  758. memcpy (window, next, 8);
  759. for (int j = 7; j >= 3; j -= 2)
  760. {
  761. if (FindSilsaWord (window) & _NOUN)
  762. { // searching the noun dictionary
  763. lstrcat(lrgsz, window);
  764. vbuf[wcount++] = POS_NOUN;
  765. lstrcat(lrgsz, "+");
  766. found = TRUE;
  767. break;
  768. }
  769. window [j] = '\0';
  770. window [j-1] = '\0';
  771. }
  772. if (!found)
  773. {
  774. // if "GYEOM" is the first character in window
  775. Conv.HAN2INS (next, inwindow, codeWanSeong);
  776. if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
  777. (inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
  778. {
  779. memcpy (window, next, 2);
  780. window [2] = '\0';
  781. lstrcat(lrgsz, window);
  782. vbuf [wcount++] = POS_ADVERB;
  783. lstrcat(lrgsz, "+");
  784. found = TRUE;
  785. next += 2;
  786. }
  787. else
  788. break;
  789. }
  790. else
  791. next += (j+1);
  792. }
  793. if (!found)
  794. continue;
  795. else
  796. {
  797. if (FindSilsaWord (next) & _NOUN)
  798. {
  799. lstrcat (lrgsz, next);
  800. vbuf[wcount++] = POS_NOUN;
  801. }
  802. else
  803. {
  804. switch (lstrlen(next))
  805. {
  806. case 8 :
  807. // if the size of last winow is 4, we divide it into same size two.
  808. memcpy (window, next, 4);
  809. window [4] = '\0';
  810. Conv.HAN2INS (window, inwindow, codeWanSeong);
  811. found = FALSE;
  812. if (FindSilsaWord (window) & _NOUN)
  813. { // searching the noun dictionary
  814. Conv.HAN2INS(next+4, inwindow, codeWanSeong);
  815. if (FindSilsaWord (next+4) & _NOUN)
  816. {
  817. lstrcat(lrgsz, window);
  818. vbuf[wcount++] = POS_NOUN;
  819. lstrcat(lrgsz, "+");
  820. lstrcat(lrgsz, next+4);
  821. vbuf[wcount++] = POS_NOUN;
  822. found = TRUE;
  823. }
  824. }
  825. if (!found)
  826. {
  827. // if "GYEOM" is the first character in window
  828. if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
  829. (inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
  830. {
  831. memcpy (window, next, 8);
  832. window [9] = '\0';
  833. if (FindSilsaWord (window) & _NOUN)
  834. {
  835. memcpy (window, next, 2);
  836. window [2] = '\0';
  837. lstrcat(lrgsz, window);
  838. vbuf [wcount++] = POS_ADVERB;
  839. lstrcat(lrgsz, "+");
  840. lstrcat(lrgsz, next+2);
  841. vbuf [wcount++] = POS_NOUN;
  842. }
  843. }
  844. else
  845. {
  846. // if "DEUNG" is the last character
  847. Conv.HAN2INS (next+6, inwindow, codeWanSeong);
  848. if ((inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I) ||
  849. (inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_M) ||
  850. (inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_B && inwindow [3] == __K_S) ||
  851. (inwindow [0] == __K_G && inwindow [1] == __V_P) ||
  852. (inwindow [0] == __K_C && inwindow [1] == __V_o && inwindow [2] == __K_G))
  853. {
  854. memcpy (window, next, 6);
  855. window [6] = '\0';
  856. if (FindSilsaWord (window) & _NOUN)
  857. {
  858. lstrcat (lrgsz, window);
  859. vbuf [wcount++] = POS_NOUN;
  860. lstrcat (lrgsz, "+");
  861. lstrcat (lrgsz, next+6);
  862. vbuf [wcount++] = POS_NOUN;
  863. }
  864. else
  865. {
  866. // if "DEUNG,DEUNG" is the part
  867. Conv.HAN2INS (next+4, inwindow, codeWanSeong);
  868. if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I)
  869. {
  870. memcpy (window, next, 4);
  871. window [4] = '\0';
  872. if (FindSilsaWord (window) & _NOUN)
  873. {
  874. lstrcat (lrgsz, window);
  875. vbuf [wcount++] = POS_NOUN;
  876. lstrcat (lrgsz, "+");
  877. lstrcat (lrgsz, next+4);
  878. vbuf [wcount++] = POS_NOUN;
  879. }
  880. else
  881. continue;
  882. }
  883. else
  884. continue;
  885. }
  886. }
  887. else
  888. continue;
  889. }
  890. }
  891. break;
  892. case 6 :
  893. Conv.HAN2INS (next, inwindow, codeWanSeong);
  894. /*
  895. if (FindSilsaWord (next) & _NOUN)
  896. {
  897. lstrcat (lrgsz, next);
  898. vbuf[wcount++] = POS_NOUN;
  899. }
  900. else
  901. {
  902. */
  903. // if "GYEOM" is the first character in window
  904. if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
  905. (inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
  906. {
  907. if (FindSilsaWord (next+2) & _NOUN)
  908. {
  909. memcpy (window, next, 2);
  910. window [2] = '\0';
  911. lstrcat(lrgsz, window);
  912. vbuf [wcount++] = POS_ADVERB;
  913. lstrcat(lrgsz, "+");
  914. lstrcat(lrgsz, next+2);
  915. vbuf [wcount++] = POS_NOUN;
  916. }
  917. else
  918. continue;
  919. }
  920. else
  921. {
  922. // if "DEUNG" is the last character
  923. Conv.HAN2INS (next+4, inwindow, codeWanSeong);
  924. if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I)
  925. {
  926. memcpy (window, next, 4);
  927. window [4] = '\0';
  928. if (FindSilsaWord (window) & _NOUN)
  929. {
  930. lstrcat (lrgsz, window);
  931. vbuf [wcount++] = POS_NOUN;
  932. lstrcat (lrgsz, "+");
  933. lstrcat (lrgsz, next+4);
  934. vbuf [wcount++] = POS_NOUN;
  935. }
  936. else
  937. continue;
  938. }
  939. else
  940. continue;
  941. }
  942. //}
  943. break;
  944. /*
  945. case 4 :
  946. if (FindSilsaWord (next) & _NOUN)
  947. {
  948. lstrcat (lrgsz, next);
  949. vbuf[wcount++] = POS_NOUN;
  950. if (lstrlen (oending) > 0)
  951. {
  952. lstrcat(lrgsz, "+");
  953. lstrcat(lrgsz, oending);
  954. vbuf[wcount++] = POS_TOSSI;
  955. }
  956. }
  957. else
  958. continue;
  959. break;
  960. */
  961. default :
  962. continue;
  963. }
  964. }
  965. if (lstrlen (oending) > 0)
  966. {
  967. lstrcat(lrgsz, "+");
  968. lstrcat(lrgsz, oending);
  969. vbuf[wcount++] = POS_TOSSI;
  970. }
  971. lstrcat(lrgsz, "\t");
  972. lstrcpy (rstrings, lrgsz);
  973. return wcount;
  974. }
  975. }
  976. }
  977. lstrcpy (rstrings, "\0");
  978. return 0;
  979. }