Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

492 lines
13 KiB

  1. // =========================================================================
  2. // Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
  3. //
  4. // FILE NAME : BASESUB.CPP
  5. // Function : BASE ENGINE FUNCTION COLLECTION
  6. // : NLP Base Engine Function
  7. // =========================================================================
  8. #include "basesub.hpp"
  9. #include "basegbl.hpp"
  10. #include "stemkor.h"
  11. #include "MainDict.h"
  12. // ------------------------------------------------------------------------
  13. //
  14. //
  15. // ------------------------------------------------------------------------
  16. int NLP_Ge_Proc( char *stem )
  17. {
  18. for (int i = 0; i < 3; i++)
  19. if(strcmp(stem, TempNoun[i]) == 0) return PRON_VALID;
  20. return BT;
  21. }
  22. // ------------------------------------------------------------------------
  23. //
  24. //
  25. // ------------------------------------------------------------------------
  26. int BaseEngine::NLP_Get_Ending( char *incode,
  27. char *Act,
  28. int *sp,
  29. int Endflag)
  30. {
  31. char ending[40];
  32. BYTE action;
  33. int res,
  34. j = 1,
  35. codelen = lstrlen(incode) - 1;
  36. memset(ending, NULL, 40);
  37. sp[0] = -1;
  38. if(Endflag == 1)
  39. Act[0] = (unsigned char)0xf8; // if there is no tossi : action code 1111-1000
  40. else
  41. Act[0] = 0x74; // if there is no endin : action code 0111-0100
  42. for (int i = 0; i <= codelen; i++)
  43. {
  44. ending[i] = incode[codelen-i];
  45. ending[i+1] = NULLCHAR;
  46. if(Endflag == 1)
  47. res = FindHeosaWord(ending, _TOSSI, &action);
  48. else
  49. res = FindHeosaWord(ending, _ENDING, &action);
  50. switch (res)
  51. {
  52. case FINAL :
  53. case FINAL_MORE :
  54. Act[j] = action;
  55. sp[j++] = i; // LMEPOS
  56. continue;
  57. case FALSE_MORE :
  58. continue;
  59. case NOT_FOUND :
  60. break;
  61. }
  62. break;
  63. }
  64. if (Endflag == 1 && sp [0] == 1)
  65. {
  66. sp [0] = 1;
  67. sp [1] = -1;
  68. Act [0] = Act [1];
  69. Act [1] = (unsigned char)0xf8;
  70. }
  71. Act[j] = NULL;
  72. sp[j] = NULL;
  73. return j;
  74. }
  75. // ------------------------------------------------------------------------
  76. //
  77. //
  78. // ------------------------------------------------------------------------
  79. int BaseEngine::NLP_Num_Proc( char *stem)
  80. {
  81. char t_stem[80];
  82. int t_ulspos;
  83. if(ULSPOS == -1) return BT;
  84. memset(t_stem, NULL, 80);
  85. lstrcpy(t_stem, stem);
  86. t_ulspos = lstrlen(t_stem)-1;
  87. int n = NumNoun.FindWord(t_stem, t_ulspos);
  88. if(n != -1)
  89. {
  90. if(NLP_CheckSuja(t_stem, t_ulspos) == VALID) return NUM_VALID;
  91. else return BT;
  92. }
  93. if(FindIrrWord(t_stem, _ZZNUM) & FINAL
  94. ) return NUM_VALID;
  95. if(NLP_CheckSuja(t_stem, t_ulspos) == VALID) return NUM_VALID;
  96. return BT;
  97. }
  98. // ----------------------------------------------------------------------
  99. //
  100. //
  101. // ----------------------------------------------------------------------
  102. int BaseEngine::NLP_CheckSuja( char *stem,
  103. int ulspos)
  104. {
  105. enum STATE {_BASE, _NUM} currentstate;
  106. currentstate = _BASE;
  107. enum OPERATION {_START, _NOSTART} currentphase;
  108. currentphase = _START;
  109. char currentbase = -1;
  110. char tempbase = -1;
  111. JumpNum.FindWord(stem, ulspos);
  112. for ( ; ulspos >= 0; )
  113. {
  114. switch (currentstate)
  115. {
  116. case _BASE :
  117. tempbase = (char)BaseNum.FindWord(stem, ulspos,currentbase+1);
  118. if(tempbase != -1)
  119. {
  120. currentstate = _BASE;
  121. currentbase = tempbase;
  122. if(currentphase == _START)
  123. {
  124. for (int i = 0; i < 8; i++)
  125. if(strcmp(stem,DoubleNum[i]) == 0) return VALID;
  126. currentphase = _NOSTART;
  127. }
  128. break;
  129. }
  130. if(currentphase == _START)
  131. {
  132. for (int i = 0; i < 8; i++)
  133. if(strcmp(stem, DoubleNum[i]) == 0) return VALID;
  134. currentphase = _NOSTART;
  135. break;
  136. }
  137. if(SujaNum.FindWord(stem, ulspos) != -1)
  138. {
  139. currentstate = _NUM;
  140. break;
  141. }
  142. return INVALID;
  143. case _NUM :
  144. tempbase = (char)BaseNum.FindWord(stem, ulspos, currentbase+1);
  145. if(tempbase != -1)
  146. {
  147. currentstate = _BASE;
  148. currentbase = tempbase;
  149. break;
  150. }
  151. return INVALID;
  152. }
  153. }
  154. return VALID;
  155. }
  156. // ----------------------------------------------------------------------
  157. //
  158. //
  159. // ----------------------------------------------------------------------
  160. int BaseEngine::NLP_NCV_Proc( char *stem,
  161. char *ending)
  162. {
  163. int lULS;
  164. lULS = lstrlen(stem) - 1;
  165. if(ACT_C == 1 && ACT_V == 1) return NCV_VALID;
  166. if(ACT_C == 0 && ACT_V == 1)
  167. {
  168. if(stem[lULS] >= __V_k) return NCV_VALID;
  169. if(LME == __K_R && ending[LMEPOS-1] == __V_h &&
  170. __IsDefEnd(LMEPOS, 1) == 1)
  171. if(stem[lULS] == __K_R) return NCV_VALID;
  172. return BT;
  173. }
  174. if(stem[lULS] >= __V_k) return BT;
  175. if(stem[lULS] == __K_R && __IsDefEnd(LMEPOS, 3) == 1 &&
  176. ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_m &&
  177. ending[LMEPOS-2] == __K_R && ending[LMEPOS-3] == __V_h) return BT;
  178. return NCV_VALID;
  179. }
  180. // ----------------------------------------------------------------------
  181. //
  182. // To process affix
  183. //
  184. // ----------------------------------------------------------------------
  185. int BaseEngine::NLP_Fix_Proc(char *stem, char *ending)
  186. {
  187. char prestem[80],
  188. bufstem[80],
  189. suffix [80],
  190. prefix [80],
  191. index[1];
  192. int ulspos, temp;
  193. prefix [0] = '\0';
  194. suffix [0] = '\0';
  195. lstrcpy(prestem, stem);
  196. ulspos = ULSPOS;
  197. if(__IsDefStem(ULSPOS, 2) == 1 &&
  198. prestem[ULSPOS-2] == __K_D && prestem[ULSPOS-1] == __V_m && prestem[ULSPOS] == __K_R)
  199. {
  200. if(lstrlen(ending) == 0 || ACT_P_A == 1) // sp == 0 || ACT_P_A == 1
  201. {
  202. if(FindIrrWord(stem, _ZPN) & FINAL)
  203. {
  204. int len = lstrlen (stem);
  205. memcpy (suffix, stem+len-3, 4);
  206. stem [len-3] = '\0';
  207. char tstem [80];
  208. Conv.INS2HAN(stem, tstem, codeWanSeong);
  209. lstrcat (lrgsz, tstem);
  210. vbuf [wcount++] = POS_PRONOUN;
  211. lstrcat (lrgsz, "+");
  212. Conv.INS2HAN(suffix, tstem, codeWanSeong);
  213. lstrcat(lrgsz, tstem);
  214. vbuf [wcount++] = POS_SUFFIX;
  215. return Deol_VALID;
  216. }
  217. }
  218. temp = ulspos;
  219. __DelStemN(prestem, &temp, 3);
  220. ulspos = temp;
  221. index[0] = 'm';
  222. char tstem [80];
  223. Conv.INS2HAN (prestem, tstem, codeWanSeong);
  224. if (FindSilsaWord (tstem) & _NOUN)
  225. {
  226. int len = lstrlen (stem);
  227. memcpy (suffix, stem+len-3, 4);
  228. lstrcpy (stem, prestem);
  229. lstrcat (lrgsz, tstem);
  230. vbuf [wcount++] = POS_NOUN;
  231. lstrcat (lrgsz, "+");
  232. Conv.INS2HAN(suffix, tstem, codeWanSeong);
  233. lstrcat(lrgsz, tstem);
  234. vbuf [wcount++] = POS_SUFFIX;
  235. return Deol_VALID;
  236. }
  237. return MORECHECK;
  238. }
  239. if(PrefixCheck(prestem, bufstem) != -1)
  240. {
  241. index[0] = 'm';
  242. char tstem [80];
  243. Conv.INS2HAN (bufstem, tstem, codeWanSeong);
  244. if (FindSilsaWord (tstem) & _NOUN)
  245. {
  246. int len = lstrlen(stem) - lstrlen(bufstem);
  247. memcpy (prefix, stem, len);
  248. prefix [len] = '\0';
  249. lstrcpy (stem, bufstem);
  250. Conv.INS2HAN(prefix, tstem, codeWanSeong);
  251. lstrcat (lrgsz, tstem);
  252. vbuf [wcount++] = POS_PREFIX;
  253. lstrcat (lrgsz, "+");
  254. Conv.INS2HAN(stem, tstem, codeWanSeong);
  255. lstrcat(lrgsz, tstem);
  256. vbuf [wcount++] = POS_NOUN;
  257. return Pref_VALID;
  258. }
  259. }
  260. if(Suffix.FindWord(prestem, ulspos) != -1)
  261. {
  262. index[0] = 'm';
  263. char tstem [80];
  264. Conv.INS2HAN (prestem, tstem, codeWanSeong);
  265. if (FindSilsaWord (tstem) & _NOUN)
  266. {
  267. lstrcat (lrgsz, tstem);
  268. vbuf [wcount++] = POS_NOUN;
  269. lstrcat (lrgsz, "+");
  270. Conv.INS2HAN(stem+lstrlen(prestem), tstem, codeWanSeong);
  271. lstrcat(lrgsz, tstem);
  272. vbuf [wcount++] = POS_SUFFIX;
  273. return Suf_VALID;
  274. }
  275. }
  276. lstrcpy(prestem, stem);
  277. ulspos = ULSPOS;
  278. if(Suffix.FindWord(prestem, ulspos) != -1 &&
  279. PrefixCheck(prestem, bufstem) != -1)
  280. {
  281. index[0] = 'm';
  282. char tstem [80];
  283. Conv.INS2HAN (bufstem, tstem, codeWanSeong);
  284. if (FindSilsaWord (tstem) & _NOUN)
  285. {
  286. prestem [lstrlen(prestem) - lstrlen(bufstem)] = 0;
  287. Conv.INS2HAN(prestem, tstem, codeWanSeong);
  288. lstrcat (lrgsz, tstem);
  289. vbuf [wcount++] = POS_PREFIX;
  290. lstrcat (lrgsz, "+");
  291. Conv.INS2HAN(bufstem, tstem, codeWanSeong);
  292. lstrcat(lrgsz, tstem);
  293. vbuf [wcount++] = POS_NOUN;
  294. lstrcat (lrgsz, "+");
  295. Conv.INS2HAN(stem + lstrlen (prestem) + lstrlen (bufstem), tstem, codeWanSeong);
  296. lstrcat(lrgsz, tstem);
  297. vbuf [wcount++] = POS_SUFFIX;
  298. return PreSuf_VALID;
  299. }
  300. }
  301. return MORECHECK;
  302. }
  303. int BaseEngine::NLP_Find_Pronoun(char *stem, char *ending)
  304. {
  305. if(FindIrrWord(stem, _ZPN) & FINAL)
  306. {
  307. if ((ending [0] == __V_k && ending [1] == __K_G) ||
  308. (ending [0] == __V_p && ending [1] == __K_G))
  309. {
  310. if ((stem [0] == __K_N && stem [1] == __V_j) ||
  311. (stem [0] == __K_N && stem [1] == __V_k) ||
  312. (stem [0] == __K_J && stem [1] == __V_j))
  313. {
  314. return MORECHECK;
  315. }
  316. else if (stem [0] == __K_N && stem [1] == __V_o)
  317. {
  318. stem [1] = __V_k;
  319. }
  320. else if (stem [0] == __K_N && stem [1] == __V_p)
  321. {
  322. stem [1] = __V_j;
  323. }
  324. else if (stem [0] == __K_J && stem [1] == __V_p)
  325. {
  326. stem [1] = __V_j;
  327. }
  328. }
  329. else
  330. {
  331. int len = lstrlen (stem) - 1;
  332. if (len > 4 && stem [len] == __K_D && stem [len - 1] == __V_m && stem [len - 2] == __K_R)
  333. stem [len-2] = '\0';
  334. }
  335. char tstem [80];
  336. Conv.INS2HAN (stem, tstem, codeWanSeong);
  337. lstrcat (lrgsz, tstem);
  338. vbuf [wcount++] = POS_PRONOUN;
  339. return VALID;
  340. }
  341. return MORECHECK;
  342. }
  343. // ------------------------------------------------------------------
  344. //
  345. //
  346. // ------------------------------------------------------------------
  347. int PrefixCheck(char *stem,
  348. char *prestem)
  349. {
  350. int i,
  351. j,
  352. l,
  353. PreLen,
  354. WordLen;
  355. char buf1[5],
  356. buf2[5];
  357. i = 0;
  358. PreLen = 9;
  359. WordLen = 5;
  360. while (i < PreLen)
  361. {
  362. j = TempPrefix[(i*WordLen)+4];
  363. memset(buf1, NULL, 5);
  364. for (l = 0; l <= j; l++) buf1[l] = TempPrefix[(i*WordLen)+l];
  365. memset(buf2, NULL, 5);
  366. for (l = 0; l <= j; l++) buf2[l] = stem[l];
  367. if(strcmp(buf1, buf2) == 0)
  368. { //found
  369. j = 0;
  370. memset(prestem, NULL, 80);
  371. while (stem[l] != 0x00) prestem[j++] = stem[l++];
  372. return 1;
  373. }
  374. i++;
  375. }
  376. return -1;
  377. }
  378. void SetSilHeosa (int ivalue, WORD *rvalue)
  379. {
  380. switch (ivalue&0x0f00)
  381. {
  382. case POS_NOUN : ivalue |= wtSilsa; break;
  383. case POS_VERB : ivalue |= wtSilsa; break;
  384. case POS_SUFFIX :
  385. if ((ivalue&0x00ff) == DEOL_SUFFIX)
  386. ivalue |= wtHeosa;
  387. else
  388. ivalue |= wtSilsa;
  389. break;
  390. case POS_PREFIX : ivalue |= wtSilsa; break;
  391. case POS_ADJECTIVE : ivalue |= wtSilsa; break;
  392. case POS_PRONOUN : ivalue |= wtSilsa; break;
  393. case POS_NUMBER : ivalue |= wtSilsa; break;
  394. case POS_AUXADJ : ivalue |= wtHeosa; break;
  395. case POS_AUXVERB : ivalue |= wtHeosa; break;
  396. case POS_OTHERS : ivalue |= wtHeosa; break;
  397. case POS_TOSSI : ivalue |= wtHeosa; break;
  398. case POS_ENDING : ivalue |= wtHeosa; break;
  399. case POS_SPECIFIER : ivalue |= wtHeosa; break;
  400. }
  401. *rvalue = (WORD)ivalue;
  402. }