mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1103 lines
38 KiB
1103 lines
38 KiB
// =========================================================================
|
|
// Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
|
|
//
|
|
// File Name : BASEMAIN.CPP
|
|
// Function : BASE ENGINE Handler
|
|
// : NLP Base Engine
|
|
// =========================================================================
|
|
#include "basemain.hpp"
|
|
#include "convert.hpp"
|
|
#include "MainDict.h"
|
|
|
|
|
|
|
|
|
|
/*---------------------------------------------------------------------------
|
|
%%Function : GetStemEnding
|
|
%%Contact : dhyu
|
|
|
|
---------------------------------------------------------------------------*/
|
|
void BaseEngine::GetStemEnding (char *incode, char *stem, char *ending, int position)
|
|
{
|
|
int codelen = lstrlen (incode) - 1;
|
|
|
|
LMEPOS = position;
|
|
lstrcpy (stem, incode);
|
|
|
|
if (LMEPOS == -1)
|
|
{
|
|
ULSPOS = codelen;
|
|
ending [0] = NULLCHAR;
|
|
}
|
|
else
|
|
{
|
|
if (LMEPOS == codelen)
|
|
{
|
|
ULSPOS = -1;
|
|
stem [0] = NULLCHAR;
|
|
}
|
|
else
|
|
{
|
|
ULSPOS = lstrlen(incode) - LMEPOS - 2;
|
|
stem[ULSPOS+1] = NULLCHAR;
|
|
}
|
|
// ending have a reverse order.
|
|
for (int k = 0, j = lstrlen(incode) - 1; k <= LMEPOS; j--, k++)
|
|
ending [k] = incode [j];
|
|
ending [k] = NULLCHAR;
|
|
}
|
|
|
|
}
|
|
|
|
int BaseEngine::NLP_BASE_NOUN (LPCSTR d, char *rstrings)
|
|
{
|
|
char Act[10], ostem[80],
|
|
oending[40], incode [100], stem [100], ending [40];
|
|
|
|
int bt, sp[10];
|
|
|
|
CODECONVERT Conv;
|
|
wcount = 0;
|
|
|
|
memset(incode, NULLCHAR, 100);
|
|
memset(Act, NULLCHAR, 10);
|
|
memset(lrgsz, NULLCHAR, 400);
|
|
|
|
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
|
|
|
|
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
|
|
{ // KS -> Incode
|
|
return 99;
|
|
}
|
|
|
|
bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
|
|
|
|
// for (i = bt - 1; i >= 0; i--)
|
|
for (i = 0; i < bt; i++)
|
|
{
|
|
GetStemEnding (incode, stem, ending, sp [i]);
|
|
|
|
ACT_C = GetBit(Act [i], 7); // consonant
|
|
ACT_V = GetBit(Act [i], 6); // vowel
|
|
ACT_N_V = GetBit(Act [i], 5);
|
|
ACT_P_A = GetBit(Act [i], 4);
|
|
ACT_N_E = GetBit(Act [i], 3);
|
|
|
|
memset(ostem, NULLCHAR, 80);
|
|
memset(oending, NULLCHAR, 40);
|
|
|
|
Conv.INR2HAN(ending, oending, codeWanSeong);
|
|
Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
|
|
|
|
if(__IsDefEnd(LMEPOS, 1) == 1 &&
|
|
ending[LMEPOS] == __K_G && ending[LMEPOS-1] == __V_p)
|
|
{
|
|
if(NLP_Ge_Proc(stem) != BT)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_PRONOUN;
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (NLP_NCV_Proc(stem, ending) != NCV_VALID)
|
|
continue;
|
|
|
|
|
|
|
|
if (FindSilsaWord (ostem) & _NOUN)
|
|
{ // searching the noun dictionary
|
|
lstrcat(lrgsz, ostem);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
|
|
if(i == 0 || ACT_P_A == 1)
|
|
{
|
|
if (NLP_Find_Pronoun (stem, ending) == VALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
if(i == 0 || ACT_N_E == 1)
|
|
{
|
|
if(NLP_Num_Proc(stem) != BT)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
vbuf[wcount++] = POS_NUMBER;
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
continue; // backtracking
|
|
}
|
|
}
|
|
|
|
|
|
lstrcpy (rstrings, lrgsz);
|
|
|
|
return wcount;
|
|
}
|
|
|
|
int BaseEngine::NLP_BASE_AFFIX (LPCSTR d, char *rstrings)
|
|
{
|
|
char Act[10], oending [40], incode [100], stem [100], ending [40];
|
|
|
|
int bt,
|
|
ret,
|
|
sp[10];
|
|
|
|
CODECONVERT Conv;
|
|
wcount = 0;
|
|
|
|
memset(incode, NULLCHAR, 100);
|
|
memset(Act, NULLCHAR, 10);
|
|
memset(lrgsz, NULLCHAR, 400);
|
|
|
|
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
|
|
|
|
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
|
|
{ // KS -> Incode
|
|
return 99;
|
|
}
|
|
|
|
bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
|
|
|
|
// for (i = bt - 1; i >= 0; i--)
|
|
for (i = 0; i < bt; i++)
|
|
{
|
|
GetStemEnding (incode, stem, ending, sp [i]);
|
|
|
|
ACT_C = GetBit(Act [i], 7); // consonant
|
|
ACT_V = GetBit(Act [i], 6); // vowel
|
|
ACT_N_V = GetBit(Act [i], 5);
|
|
ACT_P_A = GetBit(Act [i], 4);
|
|
ACT_N_E = GetBit(Act [i], 3);
|
|
|
|
|
|
if (NLP_NCV_Proc(stem, ending) != NCV_VALID)
|
|
continue;
|
|
|
|
memset(oending, NULLCHAR, 40);
|
|
Conv.INR2HAN(ending, oending, codeWanSeong);
|
|
|
|
ret = NLP_Fix_Proc(stem, ending);
|
|
switch (ret)
|
|
{
|
|
case Deol_VALID :
|
|
case Pref_VALID :
|
|
case Suf_VALID :
|
|
case PreSuf_VALID :
|
|
if (lstrlen(oending) > 0)
|
|
{
|
|
lstrcat (lrgsz, "+");
|
|
lstrcat (lrgsz, oending);
|
|
vbuf [wcount++] = POS_TOSSI;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
case BT : continue; // backtracking
|
|
}
|
|
}
|
|
|
|
|
|
lstrcpy (rstrings, lrgsz);
|
|
|
|
return wcount;
|
|
}
|
|
|
|
int BaseEngine::NLP_BASE_ALONE(LPCSTR d, char *rstrings)
|
|
{
|
|
char incode [100];
|
|
CODECONVERT Conv;
|
|
|
|
memset(incode, NULLCHAR, 100);
|
|
memset(lrgsz, NULLCHAR, 400);
|
|
wcount = 0;
|
|
|
|
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
|
|
{ // KS -> Incode
|
|
return 99;
|
|
}
|
|
|
|
// check whether input word is ADVERB, or not
|
|
if (FindSilsaWord (d) & _ALONE)
|
|
{
|
|
lstrcat(lrgsz, d);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_ADVERB;
|
|
}
|
|
|
|
lstrcpy (rstrings, lrgsz);
|
|
|
|
return wcount;
|
|
}
|
|
|
|
int BaseEngine::NLP_BASE_VERB (LPCSTR d, char *rstrings)
|
|
{
|
|
char index[1],
|
|
AUX_Flag, tmp[80],
|
|
Act[10], ostem[80],
|
|
oending[40], incode [100], stem [100], ending [40], rending [40];
|
|
|
|
int bt,
|
|
ret,
|
|
rt,
|
|
sp[10], temp, luls;
|
|
|
|
CODECONVERT Conv;
|
|
wcount = 0;
|
|
|
|
memset(Act, NULLCHAR, 10);
|
|
memset(incode, NULLCHAR, 100);
|
|
memset(lrgsz, NULLCHAR, 400);
|
|
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
|
|
|
|
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
|
|
{ // KS -> Incode
|
|
return 99;
|
|
}
|
|
|
|
bt = NLP_Get_Ending(incode, Act, sp, END);
|
|
int codelen = lstrlen(incode) - 1;
|
|
for (i = bt-1; i >= 0; i--)
|
|
{
|
|
memset(ostem, NULLCHAR, 80);
|
|
memset(oending, NULLCHAR, 40);
|
|
|
|
GetStemEnding (incode, stem, ending, sp [i]);
|
|
|
|
if (lstrlen (stem) == 0)
|
|
continue;
|
|
|
|
ACT_C = GetBit(Act[i], 7);
|
|
ACT_V = GetBit(Act[i], 6);
|
|
ACT_N_V = GetBit(Act[i], 5);
|
|
ACT_P_A = GetBit(Act[i], 4);
|
|
ACT_N_E = GetBit(Act[i], 3);
|
|
ACT_SS = GetBit(Act[i], 2);
|
|
ACT_KE = GetBit(Act[i], 1);
|
|
|
|
RestoreEnding (ending, rending);
|
|
Conv.INR2HAN(rending, oending, codeWanSeong);
|
|
Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
|
|
|
|
lstrcpy(tmp, stem);
|
|
luls = ULSPOS;
|
|
|
|
|
|
if(ACT_SS == 1)
|
|
{
|
|
if((ret = NLP_SS_Proc(stem, ending)) < INVALID) // VALID
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf [wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
continue;
|
|
}
|
|
|
|
|
|
if(i == 0)
|
|
{
|
|
break;
|
|
}
|
|
|
|
|
|
if(ACT_KE == 1)
|
|
{
|
|
if((ret = NLP_KTC_Proc(stem, ending)) == BT) // backtracking
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
ret = NLP_VCV_Check (stem, ending);
|
|
if(ret < INVALID)
|
|
{
|
|
AUX_Flag = 0;
|
|
|
|
if(ACT_N_V == 1)
|
|
{
|
|
|
|
if (FindSilsaWord (ostem) & _VERB)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_VERB;
|
|
vbuf[wcount++] = POS_ENDING;
|
|
AUX_Flag = 1;
|
|
}
|
|
}
|
|
|
|
if(ACT_P_A == 1)
|
|
{
|
|
|
|
if (FindSilsaWord (ostem) & _ADJECTIVE)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_ADJECTIVE;
|
|
vbuf[wcount++] = POS_ENDING;
|
|
AUX_Flag = 1;
|
|
}
|
|
if(NLP_Dap_Proc(stem) == Dap_VALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
AUX_Flag = 1;
|
|
}
|
|
if(NLP_Gop_Proc(stem) == Gop_VALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
AUX_Flag = 1;
|
|
}
|
|
if((rt = NLP_Manha_Proc(stem)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
AUX_Flag = 1;
|
|
}
|
|
if((rt = NLP_Manhaeci_Proc(stem)) == Manhaeci_VALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
AUX_Flag = 1;
|
|
}
|
|
if((rt = NLP_Cikha_Proc(stem)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
// AUX_FLOW
|
|
if(AUX_Flag == 0)
|
|
{
|
|
if(ACT_N_V == 1)
|
|
{
|
|
if((rt = NLP_AUX_Find(stem, 0)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
if(ACT_P_A == 1)
|
|
{
|
|
if((rt = NLP_AUX_Find(stem, 1)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (ret != MORECHECK)
|
|
continue; // against consonant-vowel harmony
|
|
|
|
|
|
if(ACT_N_E == 1)
|
|
{
|
|
if(strcmp(stem, TempIkNl) == 0)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_VERB; //Jap_VALID;
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
if(__IsDefStem(ULSPOS, 1) == 1 &&
|
|
stem[ULSPOS-1] == __K_I && stem[ULSPOS] == __V_l)
|
|
{
|
|
|
|
if(__IsDefStem(ULSPOS, 2) == 1 && stem[ULSPOS-2] == __K_M)
|
|
{
|
|
sp[i] = LMEPOS+3;
|
|
Act[i] = 0x70; // action:01-110-00-0
|
|
i++;
|
|
if(__IsDefStem(ULSPOS, 4) == 1 &&
|
|
stem[ULSPOS-4] == __K_I && stem[ULSPOS-3] == __V_m)
|
|
{
|
|
sp[i] = LMEPOS+5;
|
|
Act[i] = (unsigned char)0xB0; // action:10 110 00 0
|
|
i++;
|
|
}
|
|
}
|
|
temp = ULSPOS;
|
|
__DelStem2(stem, &temp);
|
|
ULSPOS = temp;
|
|
|
|
|
|
|
|
|
|
if((ret = NLP_Machine_T(stem, ending)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
|
|
temp = ULSPOS;
|
|
__AddStem2(stem, &temp, __K_I, __V_l);
|
|
ULSPOS = temp;
|
|
if(__IsDefEnd(LMEPOS, 1) == 1 &&
|
|
ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j)
|
|
{
|
|
|
|
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
if(strcmp(stem, TempJap[i]) == 0)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_VERB; //VERB_VALID;
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
index[0] = 'm';
|
|
if (FindSilsaWord (ostem) & _NOUN)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_NOUN; //Jap_NOUN_VALID;
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
if((ret = NLP_Fix_Proc(stem, ending)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
if (NLP_Find_Pronoun (stem, ending) == VALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
if((ret = NLP_Num_Proc(stem)) < INVALID)
|
|
{
|
|
lstrcat(lrgsz, ostem);
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
lstrcat(lrgsz, "\t");
|
|
vbuf[wcount++] = POS_NUMBER; //Jap_NUM_VALID;
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
continue; // backtracking
|
|
}
|
|
else if( /*ACT_Z != 1 && */ // ACT_Z != 1
|
|
ULS >= __V_k &&
|
|
!(__IsDefEnd(LMEPOS, 1) == 1 &&
|
|
ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j))
|
|
|
|
|
|
{
|
|
if((ret = NLP_Machine_T(stem, ending)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
}
|
|
lstrcpy(tmp, stem);
|
|
luls = ULSPOS;
|
|
|
|
|
|
if(ACT_C == 0 && ACT_V == 1)
|
|
{
|
|
// by hjw : 95/3/6
|
|
if((ret = NLP_Irr_01(stem, ending)) < INVALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
continue;
|
|
}
|
|
}
|
|
|
|
lstrcpy (stem, tmp);
|
|
ret = BT;
|
|
switch(LME)
|
|
{
|
|
case __K_N :
|
|
if((ret = NLP_Irr_KN(stem, ending)) == Irr_KN_Vl)
|
|
{
|
|
ret = NLP_Irr_KN_Vl(stem);
|
|
}
|
|
if(ret == Irr_OPS)
|
|
{
|
|
if((ret = NLP_Irr_OPS(stem, ending)) == SS)
|
|
{
|
|
if((ret = NLP_SS_Proc(stem, ending)) == BT)
|
|
{
|
|
continue;
|
|
}
|
|
if(ret < INVALID)
|
|
{
|
|
ret += Irr_SS;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case __K_B :
|
|
ret = NLP_Machine_A(stem, ending);
|
|
break;
|
|
// hjw : 95/3/17
|
|
case __K_S :
|
|
if(ACT_C == 1) // ATC_C == 1
|
|
{
|
|
ret = NLP_Irr_KS(stem, ending);
|
|
}
|
|
else if(ULS >= __V_k)
|
|
{
|
|
ret = NLP_Machine_A(stem, ending);
|
|
}
|
|
break;
|
|
case __K_M :
|
|
ret = NLP_Irr_KM(stem);
|
|
break;
|
|
case __K_R :
|
|
if(__IsDefEnd(LMEPOS, 1) == 0 || ending[LMEPOS-1] < __V_k)
|
|
{
|
|
ret = NLP_Machine_A(stem,ending);
|
|
|
|
if (ret != BT)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
|
|
}
|
|
if(ACT_P_A == 1)
|
|
{
|
|
ret = NLP_Irr_KRadj(stem, ending);
|
|
if (ret != BT)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
if(ACT_N_V == 1)
|
|
{
|
|
if((ret = NLP_Irr_KRvb(stem, ending)) == SS)
|
|
{
|
|
if((ret = NLP_SS_Proc(stem, ending)) == BT)
|
|
{
|
|
continue;
|
|
}
|
|
if(ret < INVALID)
|
|
{
|
|
ret += Irr_SS;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case __K_I :
|
|
if(__IsDefEnd(LMEPOS, 1) == 1 && (ending[LMEPOS-1] == __V_h ||
|
|
ending[LMEPOS-1] == __V_hl || ending[LMEPOS-1] == __V_l ))
|
|
{
|
|
if(ULS >= __V_k)
|
|
{
|
|
ret = NLP_Irr_KI(stem,rending);
|
|
}
|
|
else
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
if(ULS == __K_R)
|
|
{
|
|
ret = NLP_Irr_KI_KR(stem, ending);
|
|
}
|
|
if(ULS >= __V_k)
|
|
{
|
|
ret = NLP_Irr_KI_V(stem, ending);
|
|
}
|
|
break;
|
|
default :
|
|
continue;
|
|
}
|
|
if(ret >= INVALID)
|
|
{
|
|
continue;
|
|
}
|
|
if(ret >= VALID)
|
|
{
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_ENDING;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
}
|
|
}
|
|
|
|
lstrcpy (rstrings, lrgsz);
|
|
|
|
return wcount;
|
|
}
|
|
|
|
// made by dhyu 1996. 2
|
|
// look into mrfgen01.txt to know details
|
|
void BaseEngine::RestoreEnding (char *ending, char *rending)
|
|
{
|
|
int len = lstrlen (ending); // ending has reverse order.
|
|
lstrcpy (rending, ending);
|
|
|
|
if (lstrlen (ending) == 0)
|
|
return;
|
|
|
|
if (ACT_SS) // insert "IEUNG, EO" to the first of ending
|
|
{
|
|
rending [len] = __V_j;
|
|
rending [len+1] = __K_I;
|
|
rending [len+2] = '\0';
|
|
|
|
return;
|
|
}
|
|
|
|
if (ACT_C && ACT_V) //CV == 11
|
|
{
|
|
if (ending [len - 1] == __K_I && ending [len - 2] == __V_k)
|
|
rending [len - 2] = __V_j;
|
|
|
|
return;
|
|
}
|
|
|
|
if (!ACT_C && ACT_V == TRUE) // CV == 01
|
|
{
|
|
switch (ending [len - 1])
|
|
{
|
|
case __K_B :
|
|
rending [len] = __V_m; // insert "SIOS, EU" to the first of ending
|
|
rending [len+1] = __K_S;
|
|
rending [len+2] = '\0';
|
|
return ;
|
|
case __K_R :
|
|
if (len == 2 && ending [0] == __V_k) // if ending is "ra" , insert "IEUNG EO" to the first of ending
|
|
{
|
|
rending [len] = __V_j;
|
|
rending [len+1] = __K_I;
|
|
rending [len+2] = '\0';
|
|
return;
|
|
}
|
|
break;
|
|
case __K_I :
|
|
if (ending [len - 2] == __V_y) // if ending is "yo",
|
|
{
|
|
rending [len] = __V_j;
|
|
rending [len+1] = __K_I;
|
|
rending [len+2] = '\0';
|
|
return;
|
|
}
|
|
break;
|
|
case __K_N :
|
|
if (!ACT_C && ACT_V && ACT_N_V && !ACT_P_A && !ACT_P_A && !ACT_N_E && !ACT_SS && !ACT_KE)
|
|
{
|
|
rending [len] = __V_m;
|
|
rending [len+1] = __K_N;
|
|
rending [len+2] = '\0';
|
|
return;
|
|
}
|
|
}
|
|
|
|
rending [len] = __V_m; // insert "IEUNG, EU" to the first of ending
|
|
rending [len+1] = __K_I;
|
|
rending [len+2] = '\0';
|
|
|
|
return;
|
|
}
|
|
|
|
// "KE-TO-CHI" ending and copula is processed with stem together.
|
|
|
|
return;
|
|
}
|
|
|
|
// To process compound noun, we use the window which size is 4 characters.
|
|
// We decrease the size until we found noun.
|
|
// However we don't decrease it less than 2 characters.
|
|
// made by dhyu --- 1996. 3
|
|
int BaseEngine::NLP_BASE_COMPOUND (LPCSTR d, char *rstrings)
|
|
{
|
|
char Act[10], ostem[80], oending[40],
|
|
incode [100], stem [100], ending [40];
|
|
|
|
int bt,
|
|
sp[10];
|
|
BOOL found;
|
|
|
|
CODECONVERT Conv;
|
|
|
|
memset(incode, NULLCHAR, 100);
|
|
memset(Act, NULLCHAR, 10);
|
|
|
|
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
|
|
|
|
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
|
|
return 0;
|
|
|
|
|
|
bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
|
|
|
|
for (i = bt-1; i >= bt-3 && i >= 0; i--)
|
|
//for (i = 0; i < bt; i++)
|
|
{
|
|
GetStemEnding (incode, stem, ending, sp [i]);
|
|
|
|
ACT_C = GetBit(Act [i], 7); // consonant
|
|
ACT_V = GetBit(Act [i], 6); // vowel
|
|
ACT_N_V = GetBit(Act [i], 5);
|
|
ACT_P_A = GetBit(Act [i], 4);
|
|
ACT_N_E = GetBit(Act [i], 3);
|
|
|
|
if (NLP_NCV_Proc(stem, ending) == NCV_VALID)
|
|
{
|
|
memset(ostem, NULLCHAR, 80);
|
|
memset(oending, NULLCHAR, 40);
|
|
Conv.INR2HAN(ending, oending, codeWanSeong);
|
|
Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
|
|
|
|
wcount = 0;
|
|
memset (lrgsz, NULLCHAR, 400);
|
|
// Window size is 4 charaters (8 byte)
|
|
char window [9], inwindow [25];
|
|
memset (window, '\0', 9);
|
|
char *next = ostem;
|
|
found = TRUE;
|
|
while (lstrlen (next) > 8)
|
|
{
|
|
found = FALSE;
|
|
memcpy (window, next, 8);
|
|
for (int j = 7; j >= 3; j -= 2)
|
|
{
|
|
if (FindSilsaWord (window) & _NOUN)
|
|
{ // searching the noun dictionary
|
|
lstrcat(lrgsz, window);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
lstrcat(lrgsz, "+");
|
|
found = TRUE;
|
|
break;
|
|
}
|
|
window [j] = '\0';
|
|
window [j-1] = '\0';
|
|
}
|
|
if (!found)
|
|
{
|
|
// if "GYEOM" is the first character in window
|
|
Conv.HAN2INS (next, inwindow, codeWanSeong);
|
|
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
|
|
(inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
|
|
{
|
|
memcpy (window, next, 2);
|
|
window [2] = '\0';
|
|
lstrcat(lrgsz, window);
|
|
vbuf [wcount++] = POS_ADVERB;
|
|
lstrcat(lrgsz, "+");
|
|
found = TRUE;
|
|
next += 2;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
else
|
|
next += (j+1);
|
|
}
|
|
|
|
if (!found)
|
|
continue;
|
|
else
|
|
{
|
|
if (FindSilsaWord (next) & _NOUN)
|
|
{
|
|
lstrcat (lrgsz, next);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
}
|
|
else
|
|
{
|
|
switch (lstrlen(next))
|
|
{
|
|
case 8 :
|
|
// if the size of last winow is 4, we divide it into same size two.
|
|
memcpy (window, next, 4);
|
|
window [4] = '\0';
|
|
Conv.HAN2INS (window, inwindow, codeWanSeong);
|
|
|
|
found = FALSE;
|
|
if (FindSilsaWord (window) & _NOUN)
|
|
{ // searching the noun dictionary
|
|
Conv.HAN2INS(next+4, inwindow, codeWanSeong);
|
|
if (FindSilsaWord (next+4) & _NOUN)
|
|
{
|
|
lstrcat(lrgsz, window);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, next+4);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
found = TRUE;
|
|
}
|
|
}
|
|
if (!found)
|
|
{
|
|
// if "GYEOM" is the first character in window
|
|
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
|
|
(inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
|
|
{
|
|
memcpy (window, next, 8);
|
|
window [9] = '\0';
|
|
if (FindSilsaWord (window) & _NOUN)
|
|
{
|
|
memcpy (window, next, 2);
|
|
window [2] = '\0';
|
|
lstrcat(lrgsz, window);
|
|
vbuf [wcount++] = POS_ADVERB;
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, next+2);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// if "DEUNG" is the last character
|
|
Conv.HAN2INS (next+6, inwindow, codeWanSeong);
|
|
if ((inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I) ||
|
|
(inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_M) ||
|
|
(inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_B && inwindow [3] == __K_S) ||
|
|
(inwindow [0] == __K_G && inwindow [1] == __V_P) ||
|
|
(inwindow [0] == __K_C && inwindow [1] == __V_o && inwindow [2] == __K_G))
|
|
|
|
{
|
|
memcpy (window, next, 6);
|
|
window [6] = '\0';
|
|
if (FindSilsaWord (window) & _NOUN)
|
|
{
|
|
lstrcat (lrgsz, window);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
lstrcat (lrgsz, "+");
|
|
lstrcat (lrgsz, next+6);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
}
|
|
else
|
|
{
|
|
// if "DEUNG,DEUNG" is the part
|
|
Conv.HAN2INS (next+4, inwindow, codeWanSeong);
|
|
if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I)
|
|
{
|
|
memcpy (window, next, 4);
|
|
window [4] = '\0';
|
|
if (FindSilsaWord (window) & _NOUN)
|
|
{
|
|
lstrcat (lrgsz, window);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
lstrcat (lrgsz, "+");
|
|
lstrcat (lrgsz, next+4);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
}
|
|
break;
|
|
case 6 :
|
|
Conv.HAN2INS (next, inwindow, codeWanSeong);
|
|
/*
|
|
if (FindSilsaWord (next) & _NOUN)
|
|
{
|
|
lstrcat (lrgsz, next);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
}
|
|
else
|
|
{
|
|
*/
|
|
// if "GYEOM" is the first character in window
|
|
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
|
|
(inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
|
|
{
|
|
if (FindSilsaWord (next+2) & _NOUN)
|
|
{
|
|
memcpy (window, next, 2);
|
|
window [2] = '\0';
|
|
lstrcat(lrgsz, window);
|
|
vbuf [wcount++] = POS_ADVERB;
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, next+2);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// if "DEUNG" is the last character
|
|
Conv.HAN2INS (next+4, inwindow, codeWanSeong);
|
|
if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I)
|
|
{
|
|
memcpy (window, next, 4);
|
|
window [4] = '\0';
|
|
if (FindSilsaWord (window) & _NOUN)
|
|
{
|
|
lstrcat (lrgsz, window);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
lstrcat (lrgsz, "+");
|
|
lstrcat (lrgsz, next+4);
|
|
vbuf [wcount++] = POS_NOUN;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
else
|
|
continue;
|
|
}
|
|
//}
|
|
break;
|
|
/*
|
|
case 4 :
|
|
|
|
if (FindSilsaWord (next) & _NOUN)
|
|
{
|
|
lstrcat (lrgsz, next);
|
|
vbuf[wcount++] = POS_NOUN;
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
}
|
|
else
|
|
continue;
|
|
break;
|
|
*/
|
|
default :
|
|
continue;
|
|
}
|
|
}
|
|
if (lstrlen (oending) > 0)
|
|
{
|
|
lstrcat(lrgsz, "+");
|
|
lstrcat(lrgsz, oending);
|
|
vbuf[wcount++] = POS_TOSSI;
|
|
}
|
|
lstrcat(lrgsz, "\t");
|
|
|
|
lstrcpy (rstrings, lrgsz);
|
|
|
|
return wcount;
|
|
}
|
|
}
|
|
}
|
|
|
|
lstrcpy (rstrings, "\0");
|
|
|
|
return 0;
|
|
}
|