|
|
// =========================================================================
// Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
//
// FILE NAME : BASESUB.CPP
// Function : BASE ENGINE FUNCTION COLLECTION
// : NLP Base Engine Function
// =========================================================================
#include "basesub.hpp"
#include "basegbl.hpp"
#include "stemkor.h"
#include "MainDict.h"
// ------------------------------------------------------------------------
//
//
// ------------------------------------------------------------------------
int NLP_Ge_Proc( char *stem ) { for (int i = 0; i < 3; i++) if(strcmp(stem, TempNoun[i]) == 0) return PRON_VALID;
return BT; }
// ------------------------------------------------------------------------
//
//
// ------------------------------------------------------------------------
int BaseEngine::NLP_Get_Ending( char *incode, char *Act, int *sp, int Endflag) { char ending[40]; BYTE action; int res, j = 1, codelen = lstrlen(incode) - 1;
memset(ending, NULL, 40);
sp[0] = -1;
if(Endflag == 1) Act[0] = (unsigned char)0xf8; // if there is no tossi : action code 1111-1000
else Act[0] = 0x74; // if there is no endin : action code 0111-0100
for (int i = 0; i <= codelen; i++) { ending[i] = incode[codelen-i]; ending[i+1] = NULLCHAR;
if(Endflag == 1) res = FindHeosaWord(ending, _TOSSI, &action); else res = FindHeosaWord(ending, _ENDING, &action);
switch (res) { case FINAL : case FINAL_MORE : Act[j] = action; sp[j++] = i; // LMEPOS
continue; case FALSE_MORE : continue; case NOT_FOUND : break; } break; }
if (Endflag == 1 && sp [0] == 1) { sp [0] = 1; sp [1] = -1; Act [0] = Act [1]; Act [1] = (unsigned char)0xf8; }
Act[j] = NULL; sp[j] = NULL;
return j; }
// ------------------------------------------------------------------------
//
//
// ------------------------------------------------------------------------
int BaseEngine::NLP_Num_Proc( char *stem) { char t_stem[80]; int t_ulspos;
if(ULSPOS == -1) return BT;
memset(t_stem, NULL, 80); lstrcpy(t_stem, stem); t_ulspos = lstrlen(t_stem)-1;
int n = NumNoun.FindWord(t_stem, t_ulspos);
if(n != -1) { if(NLP_CheckSuja(t_stem, t_ulspos) == VALID) return NUM_VALID; else return BT; }
if(FindIrrWord(t_stem, _ZZNUM) & FINAL ) return NUM_VALID;
if(NLP_CheckSuja(t_stem, t_ulspos) == VALID) return NUM_VALID;
return BT; }
// ----------------------------------------------------------------------
//
//
// ----------------------------------------------------------------------
int BaseEngine::NLP_CheckSuja( char *stem, int ulspos) { enum STATE {_BASE, _NUM} currentstate; currentstate = _BASE;
enum OPERATION {_START, _NOSTART} currentphase; currentphase = _START;
char currentbase = -1;
char tempbase = -1;
JumpNum.FindWord(stem, ulspos);
for ( ; ulspos >= 0; ) { switch (currentstate) { case _BASE :
tempbase = (char)BaseNum.FindWord(stem, ulspos,currentbase+1);
if(tempbase != -1) { currentstate = _BASE; currentbase = tempbase;
if(currentphase == _START) {
for (int i = 0; i < 8; i++) if(strcmp(stem,DoubleNum[i]) == 0) return VALID;
currentphase = _NOSTART; } break; } if(currentphase == _START) {
for (int i = 0; i < 8; i++) if(strcmp(stem, DoubleNum[i]) == 0) return VALID;
currentphase = _NOSTART; break; } if(SujaNum.FindWord(stem, ulspos) != -1) { currentstate = _NUM; break; }
return INVALID; case _NUM :
tempbase = (char)BaseNum.FindWord(stem, ulspos, currentbase+1);
if(tempbase != -1) { currentstate = _BASE; currentbase = tempbase; break; }
return INVALID; } } return VALID; }
// ----------------------------------------------------------------------
//
//
// ----------------------------------------------------------------------
int BaseEngine::NLP_NCV_Proc( char *stem, char *ending) { int lULS;
lULS = lstrlen(stem) - 1;
if(ACT_C == 1 && ACT_V == 1) return NCV_VALID;
if(ACT_C == 0 && ACT_V == 1) { if(stem[lULS] >= __V_k) return NCV_VALID;
if(LME == __K_R && ending[LMEPOS-1] == __V_h && __IsDefEnd(LMEPOS, 1) == 1) if(stem[lULS] == __K_R) return NCV_VALID;
return BT; }
if(stem[lULS] >= __V_k) return BT;
if(stem[lULS] == __K_R && __IsDefEnd(LMEPOS, 3) == 1 && ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_m && ending[LMEPOS-2] == __K_R && ending[LMEPOS-3] == __V_h) return BT;
return NCV_VALID; }
// ----------------------------------------------------------------------
//
// To process affix
//
// ----------------------------------------------------------------------
int BaseEngine::NLP_Fix_Proc(char *stem, char *ending) { char prestem[80], bufstem[80], suffix [80], prefix [80], index[1]; int ulspos, temp;
prefix [0] = '\0'; suffix [0] = '\0'; lstrcpy(prestem, stem); ulspos = ULSPOS;
if(__IsDefStem(ULSPOS, 2) == 1 && prestem[ULSPOS-2] == __K_D && prestem[ULSPOS-1] == __V_m && prestem[ULSPOS] == __K_R) { if(lstrlen(ending) == 0 || ACT_P_A == 1) // sp == 0 || ACT_P_A == 1
{
if(FindIrrWord(stem, _ZPN) & FINAL) { int len = lstrlen (stem); memcpy (suffix, stem+len-3, 4); stem [len-3] = '\0'; char tstem [80]; Conv.INS2HAN(stem, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PRONOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(suffix, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return Deol_VALID; } } temp = ulspos; __DelStemN(prestem, &temp, 3); ulspos = temp; index[0] = 'm'; char tstem [80]; Conv.INS2HAN (prestem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { int len = lstrlen (stem); memcpy (suffix, stem+len-3, 4); lstrcpy (stem, prestem); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(suffix, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return Deol_VALID; } return MORECHECK; }
if(PrefixCheck(prestem, bufstem) != -1) { index[0] = 'm'; char tstem [80]; Conv.INS2HAN (bufstem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { int len = lstrlen(stem) - lstrlen(bufstem); memcpy (prefix, stem, len); prefix [len] = '\0'; lstrcpy (stem, bufstem); Conv.INS2HAN(prefix, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PREFIX; lstrcat (lrgsz, "+"); Conv.INS2HAN(stem, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_NOUN; return Pref_VALID; } }
if(Suffix.FindWord(prestem, ulspos) != -1) { index[0] = 'm'; char tstem [80]; Conv.INS2HAN (prestem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(stem+lstrlen(prestem), tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return Suf_VALID; } }
lstrcpy(prestem, stem); ulspos = ULSPOS; if(Suffix.FindWord(prestem, ulspos) != -1 && PrefixCheck(prestem, bufstem) != -1) { index[0] = 'm'; char tstem [80]; Conv.INS2HAN (bufstem, tstem, codeWanSeong); if (FindSilsaWord (tstem) & _NOUN) { prestem [lstrlen(prestem) - lstrlen(bufstem)] = 0; Conv.INS2HAN(prestem, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PREFIX; lstrcat (lrgsz, "+"); Conv.INS2HAN(bufstem, tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); Conv.INS2HAN(stem + lstrlen (prestem) + lstrlen (bufstem), tstem, codeWanSeong); lstrcat(lrgsz, tstem); vbuf [wcount++] = POS_SUFFIX; return PreSuf_VALID; } } return MORECHECK; }
int BaseEngine::NLP_Find_Pronoun(char *stem, char *ending) { if(FindIrrWord(stem, _ZPN) & FINAL) { if ((ending [0] == __V_k && ending [1] == __K_G) || (ending [0] == __V_p && ending [1] == __K_G)) { if ((stem [0] == __K_N && stem [1] == __V_j) || (stem [0] == __K_N && stem [1] == __V_k) || (stem [0] == __K_J && stem [1] == __V_j)) { return MORECHECK; } else if (stem [0] == __K_N && stem [1] == __V_o) { stem [1] = __V_k; } else if (stem [0] == __K_N && stem [1] == __V_p) { stem [1] = __V_j; } else if (stem [0] == __K_J && stem [1] == __V_p) { stem [1] = __V_j; } } else { int len = lstrlen (stem) - 1;
if (len > 4 && stem [len] == __K_D && stem [len - 1] == __V_m && stem [len - 2] == __K_R) stem [len-2] = '\0'; } char tstem [80]; Conv.INS2HAN (stem, tstem, codeWanSeong); lstrcat (lrgsz, tstem); vbuf [wcount++] = POS_PRONOUN; return VALID; }
return MORECHECK; }
// ------------------------------------------------------------------
//
//
// ------------------------------------------------------------------
int PrefixCheck(char *stem, char *prestem) { int i, j, l, PreLen, WordLen; char buf1[5], buf2[5];
i = 0; PreLen = 9; WordLen = 5;
while (i < PreLen) { j = TempPrefix[(i*WordLen)+4]; memset(buf1, NULL, 5);
for (l = 0; l <= j; l++) buf1[l] = TempPrefix[(i*WordLen)+l];
memset(buf2, NULL, 5);
for (l = 0; l <= j; l++) buf2[l] = stem[l];
if(strcmp(buf1, buf2) == 0) { //found
j = 0; memset(prestem, NULL, 80);
while (stem[l] != 0x00) prestem[j++] = stem[l++];
return 1; } i++; } return -1; }
void SetSilHeosa (int ivalue, WORD *rvalue) { switch (ivalue&0x0f00) { case POS_NOUN : ivalue |= wtSilsa; break; case POS_VERB : ivalue |= wtSilsa; break; case POS_SUFFIX : if ((ivalue&0x00ff) == DEOL_SUFFIX) ivalue |= wtHeosa; else ivalue |= wtSilsa; break; case POS_PREFIX : ivalue |= wtSilsa; break; case POS_ADJECTIVE : ivalue |= wtSilsa; break; case POS_PRONOUN : ivalue |= wtSilsa; break; case POS_NUMBER : ivalue |= wtSilsa; break; case POS_AUXADJ : ivalue |= wtHeosa; break; case POS_AUXVERB : ivalue |= wtHeosa; break; case POS_OTHERS : ivalue |= wtHeosa; break; case POS_TOSSI : ivalue |= wtHeosa; break; case POS_ENDING : ivalue |= wtHeosa; break; case POS_SPECIFIER : ivalue |= wtHeosa; break; }
*rvalue = (WORD)ivalue; }
|