|
|
// =========================================================================
// Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
//
// File Name : BASEMAIN.CPP
// Function : BASE ENGINE Handler
// : NLP Base Engine
// =========================================================================
#include "basemain.hpp"
#include "convert.hpp"
#include "MainDict.h"
/*---------------------------------------------------------------------------
%%Function : GetStemEnding %%Contact : dhyu
---------------------------------------------------------------------------*/ void BaseEngine::GetStemEnding (char *incode, char *stem, char *ending, int position) { int codelen = lstrlen (incode) - 1;
LMEPOS = position; lstrcpy (stem, incode);
if (LMEPOS == -1) { ULSPOS = codelen; ending [0] = NULLCHAR; } else { if (LMEPOS == codelen) { ULSPOS = -1; stem [0] = NULLCHAR; } else { ULSPOS = lstrlen(incode) - LMEPOS - 2; stem[ULSPOS+1] = NULLCHAR; } // ending have a reverse order.
for (int k = 0, j = lstrlen(incode) - 1; k <= LMEPOS; j--, k++) ending [k] = incode [j]; ending [k] = NULLCHAR; }
}
int BaseEngine::NLP_BASE_NOUN (LPCSTR d, char *rstrings) { char Act[10], ostem[80], oending[40], incode [100], stem [100], ending [40]; int bt, sp[10]; CODECONVERT Conv; wcount = 0;
memset(incode, NULLCHAR, 100); memset(Act, NULLCHAR, 10); memset(lrgsz, NULLCHAR, 400);
for (int i = 0; i < 10; i++) sp[i] = 0x0000; if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS) { // KS -> Incode
return 99; } bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
// for (i = bt - 1; i >= 0; i--)
for (i = 0; i < bt; i++) { GetStemEnding (incode, stem, ending, sp [i]);
ACT_C = GetBit(Act [i], 7); // consonant
ACT_V = GetBit(Act [i], 6); // vowel
ACT_N_V = GetBit(Act [i], 5); ACT_P_A = GetBit(Act [i], 4); ACT_N_E = GetBit(Act [i], 3);
memset(ostem, NULLCHAR, 80); memset(oending, NULLCHAR, 40);
Conv.INR2HAN(ending, oending, codeWanSeong); Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
if(__IsDefEnd(LMEPOS, 1) == 1 && ending[LMEPOS] == __K_G && ending[LMEPOS-1] == __V_p) { if(NLP_Ge_Proc(stem) != BT) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_PRONOUN; vbuf[wcount++] = POS_TOSSI; } continue; }
if (NLP_NCV_Proc(stem, ending) != NCV_VALID) continue;
if (FindSilsaWord (ostem) & _NOUN) { // searching the noun dictionary
lstrcat(lrgsz, ostem); vbuf[wcount++] = POS_NOUN; if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_TOSSI; } lstrcat(lrgsz, "\t"); } if(i == 0 || ACT_P_A == 1) { if (NLP_Find_Pronoun (stem, ending) == VALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_TOSSI; } lstrcat(lrgsz, "\t"); } } if(i == 0 || ACT_N_E == 1) { if(NLP_Num_Proc(stem) != BT) { lstrcat(lrgsz, ostem); vbuf[wcount++] = POS_NUMBER; if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_TOSSI; } lstrcat(lrgsz, "\t"); } continue; // backtracking
} } lstrcpy (rstrings, lrgsz);
return wcount; }
int BaseEngine::NLP_BASE_AFFIX (LPCSTR d, char *rstrings) { char Act[10], oending [40], incode [100], stem [100], ending [40]; int bt, ret, sp[10]; CODECONVERT Conv; wcount = 0;
memset(incode, NULLCHAR, 100); memset(Act, NULLCHAR, 10); memset(lrgsz, NULLCHAR, 400);
for (int i = 0; i < 10; i++) sp[i] = 0x0000; if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS) { // KS -> Incode
return 99; } bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
// for (i = bt - 1; i >= 0; i--)
for (i = 0; i < bt; i++) { GetStemEnding (incode, stem, ending, sp [i]);
ACT_C = GetBit(Act [i], 7); // consonant
ACT_V = GetBit(Act [i], 6); // vowel
ACT_N_V = GetBit(Act [i], 5); ACT_P_A = GetBit(Act [i], 4); ACT_N_E = GetBit(Act [i], 3);
if (NLP_NCV_Proc(stem, ending) != NCV_VALID) continue;
memset(oending, NULLCHAR, 40); Conv.INR2HAN(ending, oending, codeWanSeong);
ret = NLP_Fix_Proc(stem, ending); switch (ret) { case Deol_VALID : case Pref_VALID : case Suf_VALID : case PreSuf_VALID : if (lstrlen(oending) > 0) { lstrcat (lrgsz, "+"); lstrcat (lrgsz, oending); vbuf [wcount++] = POS_TOSSI; } lstrcat(lrgsz, "\t"); case BT : continue; // backtracking
} } lstrcpy (rstrings, lrgsz);
return wcount; }
int BaseEngine::NLP_BASE_ALONE(LPCSTR d, char *rstrings) { char incode [100]; CODECONVERT Conv;
memset(incode, NULLCHAR, 100); memset(lrgsz, NULLCHAR, 400); wcount = 0;
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS) { // KS -> Incode
return 99; } // check whether input word is ADVERB, or not
if (FindSilsaWord (d) & _ALONE) { lstrcat(lrgsz, d); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_ADVERB; }
lstrcpy (rstrings, lrgsz);
return wcount; }
int BaseEngine::NLP_BASE_VERB (LPCSTR d, char *rstrings) { char index[1], AUX_Flag, tmp[80], Act[10], ostem[80], oending[40], incode [100], stem [100], ending [40], rending [40]; int bt, ret, rt, sp[10], temp, luls; CODECONVERT Conv; wcount = 0;
memset(Act, NULLCHAR, 10); memset(incode, NULLCHAR, 100); memset(lrgsz, NULLCHAR, 400); for (int i = 0; i < 10; i++) sp[i] = 0x0000; if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS) { // KS -> Incode
return 99; }
bt = NLP_Get_Ending(incode, Act, sp, END); int codelen = lstrlen(incode) - 1; for (i = bt-1; i >= 0; i--) { memset(ostem, NULLCHAR, 80); memset(oending, NULLCHAR, 40); GetStemEnding (incode, stem, ending, sp [i]); if (lstrlen (stem) == 0) continue; ACT_C = GetBit(Act[i], 7); ACT_V = GetBit(Act[i], 6); ACT_N_V = GetBit(Act[i], 5); ACT_P_A = GetBit(Act[i], 4); ACT_N_E = GetBit(Act[i], 3); ACT_SS = GetBit(Act[i], 2); ACT_KE = GetBit(Act[i], 1);
RestoreEnding (ending, rending); Conv.INR2HAN(rending, oending, codeWanSeong); Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
lstrcpy(tmp, stem); luls = ULSPOS; if(ACT_SS == 1) { if((ret = NLP_SS_Proc(stem, ending)) < INVALID) // VALID
{ if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf [wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } continue; } if(i == 0) { break; } if(ACT_KE == 1) { if((ret = NLP_KTC_Proc(stem, ending)) == BT) // backtracking
{ continue; } } ret = NLP_VCV_Check (stem, ending); if(ret < INVALID) { AUX_Flag = 0; if(ACT_N_V == 1) { if (FindSilsaWord (ostem) & _VERB) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_VERB; vbuf[wcount++] = POS_ENDING; AUX_Flag = 1; } } if(ACT_P_A == 1) { if (FindSilsaWord (ostem) & _ADJECTIVE) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_ADJECTIVE; vbuf[wcount++] = POS_ENDING; AUX_Flag = 1; } if(NLP_Dap_Proc(stem) == Dap_VALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); AUX_Flag = 1; } if(NLP_Gop_Proc(stem) == Gop_VALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); AUX_Flag = 1; } if((rt = NLP_Manha_Proc(stem)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); AUX_Flag = 1; } if((rt = NLP_Manhaeci_Proc(stem)) == Manhaeci_VALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); AUX_Flag = 1; } if((rt = NLP_Cikha_Proc(stem)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } } // AUX_FLOW
if(AUX_Flag == 0) { if(ACT_N_V == 1) { if((rt = NLP_AUX_Find(stem, 0)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } } if(ACT_P_A == 1) { if((rt = NLP_AUX_Find(stem, 1)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } } } } else if (ret != MORECHECK) continue; // against consonant-vowel harmony
if(ACT_N_E == 1) { if(strcmp(stem, TempIkNl) == 0) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_VERB; //Jap_VALID;
vbuf[wcount++] = POS_ENDING; } if(__IsDefStem(ULSPOS, 1) == 1 && stem[ULSPOS-1] == __K_I && stem[ULSPOS] == __V_l) { if(__IsDefStem(ULSPOS, 2) == 1 && stem[ULSPOS-2] == __K_M) { sp[i] = LMEPOS+3; Act[i] = 0x70; // action:01-110-00-0
i++; if(__IsDefStem(ULSPOS, 4) == 1 && stem[ULSPOS-4] == __K_I && stem[ULSPOS-3] == __V_m) { sp[i] = LMEPOS+5; Act[i] = (unsigned char)0xB0; // action:10 110 00 0
i++; } } temp = ULSPOS; __DelStem2(stem, &temp); ULSPOS = temp;
if((ret = NLP_Machine_T(stem, ending)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } temp = ULSPOS; __AddStem2(stem, &temp, __K_I, __V_l); ULSPOS = temp; if(__IsDefEnd(LMEPOS, 1) == 1 && ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j) { for (int i = 0; i < 3; i++) { if(strcmp(stem, TempJap[i]) == 0) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_VERB; //VERB_VALID;
vbuf[wcount++] = POS_ENDING; } } continue; } index[0] = 'm'; if (FindSilsaWord (ostem) & _NOUN) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_NOUN; //Jap_NOUN_VALID;
vbuf[wcount++] = POS_ENDING; } if((ret = NLP_Fix_Proc(stem, ending)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } if (NLP_Find_Pronoun (stem, ending) == VALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_TOSSI; } lstrcat(lrgsz, "\t"); } if((ret = NLP_Num_Proc(stem)) < INVALID) { lstrcat(lrgsz, ostem); lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); lstrcat(lrgsz, "\t"); vbuf[wcount++] = POS_NUMBER; //Jap_NUM_VALID;
vbuf[wcount++] = POS_ENDING; } continue; // backtracking
} else if( /*ACT_Z != 1 && */ // ACT_Z != 1
ULS >= __V_k && !(__IsDefEnd(LMEPOS, 1) == 1 && ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j)) { if((ret = NLP_Machine_T(stem, ending)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } } } lstrcpy(tmp, stem); luls = ULSPOS; if(ACT_C == 0 && ACT_V == 1) { // by hjw : 95/3/6
if((ret = NLP_Irr_01(stem, ending)) < INVALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); continue; } }
lstrcpy (stem, tmp); ret = BT; switch(LME) { case __K_N : if((ret = NLP_Irr_KN(stem, ending)) == Irr_KN_Vl) { ret = NLP_Irr_KN_Vl(stem); } if(ret == Irr_OPS) { if((ret = NLP_Irr_OPS(stem, ending)) == SS) { if((ret = NLP_SS_Proc(stem, ending)) == BT) { continue; } if(ret < INVALID) { ret += Irr_SS; } } } break; case __K_B : ret = NLP_Machine_A(stem, ending); break; // hjw : 95/3/17
case __K_S : if(ACT_C == 1) // ATC_C == 1
{ ret = NLP_Irr_KS(stem, ending); } else if(ULS >= __V_k) { ret = NLP_Machine_A(stem, ending); } break; case __K_M : ret = NLP_Irr_KM(stem); break; case __K_R : if(__IsDefEnd(LMEPOS, 1) == 0 || ending[LMEPOS-1] < __V_k) { ret = NLP_Machine_A(stem,ending); if (ret != BT) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } } if(ACT_P_A == 1) { ret = NLP_Irr_KRadj(stem, ending); if (ret != BT) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } } if(ACT_N_V == 1) { if((ret = NLP_Irr_KRvb(stem, ending)) == SS) { if((ret = NLP_SS_Proc(stem, ending)) == BT) { continue; } if(ret < INVALID) { ret += Irr_SS; } } } break; case __K_I : if(__IsDefEnd(LMEPOS, 1) == 1 && (ending[LMEPOS-1] == __V_h || ending[LMEPOS-1] == __V_hl || ending[LMEPOS-1] == __V_l )) { if(ULS >= __V_k) { ret = NLP_Irr_KI(stem,rending); } else { continue; } } if(ULS == __K_R) { ret = NLP_Irr_KI_KR(stem, ending); } if(ULS >= __V_k) { ret = NLP_Irr_KI_V(stem, ending); } break; default : continue; } if(ret >= INVALID) { continue; } if(ret >= VALID) { if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_ENDING; } lstrcat(lrgsz, "\t"); } }
lstrcpy (rstrings, lrgsz);
return wcount; }
// made by dhyu 1996. 2
// look into mrfgen01.txt to know details
void BaseEngine::RestoreEnding (char *ending, char *rending) { int len = lstrlen (ending); // ending has reverse order.
lstrcpy (rending, ending);
if (lstrlen (ending) == 0) return;
if (ACT_SS) // insert "IEUNG, EO" to the first of ending
{ rending [len] = __V_j; rending [len+1] = __K_I; rending [len+2] = '\0';
return; }
if (ACT_C && ACT_V) //CV == 11
{ if (ending [len - 1] == __K_I && ending [len - 2] == __V_k) rending [len - 2] = __V_j;
return; }
if (!ACT_C && ACT_V == TRUE) // CV == 01
{ switch (ending [len - 1]) { case __K_B : rending [len] = __V_m; // insert "SIOS, EU" to the first of ending
rending [len+1] = __K_S; rending [len+2] = '\0'; return ; case __K_R : if (len == 2 && ending [0] == __V_k) // if ending is "ra" , insert "IEUNG EO" to the first of ending
{ rending [len] = __V_j; rending [len+1] = __K_I; rending [len+2] = '\0'; return; } break; case __K_I : if (ending [len - 2] == __V_y) // if ending is "yo",
{ rending [len] = __V_j; rending [len+1] = __K_I; rending [len+2] = '\0'; return; } break; case __K_N : if (!ACT_C && ACT_V && ACT_N_V && !ACT_P_A && !ACT_P_A && !ACT_N_E && !ACT_SS && !ACT_KE) { rending [len] = __V_m; rending [len+1] = __K_N; rending [len+2] = '\0'; return; } } rending [len] = __V_m; // insert "IEUNG, EU" to the first of ending
rending [len+1] = __K_I; rending [len+2] = '\0';
return; } // "KE-TO-CHI" ending and copula is processed with stem together.
return; }
// To process compound noun, we use the window which size is 4 characters.
// We decrease the size until we found noun.
// However we don't decrease it less than 2 characters.
// made by dhyu --- 1996. 3
int BaseEngine::NLP_BASE_COMPOUND (LPCSTR d, char *rstrings) { char Act[10], ostem[80], oending[40], incode [100], stem [100], ending [40]; int bt, sp[10]; BOOL found;
CODECONVERT Conv;
memset(incode, NULLCHAR, 100); memset(Act, NULLCHAR, 10);
for (int i = 0; i < 10; i++) sp[i] = 0x0000; if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS) return 0; bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
for (i = bt-1; i >= bt-3 && i >= 0; i--) //for (i = 0; i < bt; i++)
{ GetStemEnding (incode, stem, ending, sp [i]);
ACT_C = GetBit(Act [i], 7); // consonant
ACT_V = GetBit(Act [i], 6); // vowel
ACT_N_V = GetBit(Act [i], 5); ACT_P_A = GetBit(Act [i], 4); ACT_N_E = GetBit(Act [i], 3);
if (NLP_NCV_Proc(stem, ending) == NCV_VALID) { memset(ostem, NULLCHAR, 80); memset(oending, NULLCHAR, 40); Conv.INR2HAN(ending, oending, codeWanSeong); Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
wcount = 0; memset (lrgsz, NULLCHAR, 400); // Window size is 4 charaters (8 byte)
char window [9], inwindow [25]; memset (window, '\0', 9); char *next = ostem; found = TRUE; while (lstrlen (next) > 8) { found = FALSE; memcpy (window, next, 8); for (int j = 7; j >= 3; j -= 2) { if (FindSilsaWord (window) & _NOUN) { // searching the noun dictionary
lstrcat(lrgsz, window); vbuf[wcount++] = POS_NOUN; lstrcat(lrgsz, "+"); found = TRUE; break; } window [j] = '\0'; window [j-1] = '\0'; } if (!found) { // if "GYEOM" is the first character in window
Conv.HAN2INS (next, inwindow, codeWanSeong); if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) || (inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C)) { memcpy (window, next, 2); window [2] = '\0'; lstrcat(lrgsz, window); vbuf [wcount++] = POS_ADVERB; lstrcat(lrgsz, "+"); found = TRUE; next += 2; } else break; } else next += (j+1); }
if (!found) continue; else { if (FindSilsaWord (next) & _NOUN) { lstrcat (lrgsz, next); vbuf[wcount++] = POS_NOUN; } else { switch (lstrlen(next)) { case 8 : // if the size of last winow is 4, we divide it into same size two.
memcpy (window, next, 4); window [4] = '\0'; Conv.HAN2INS (window, inwindow, codeWanSeong); found = FALSE; if (FindSilsaWord (window) & _NOUN) { // searching the noun dictionary
Conv.HAN2INS(next+4, inwindow, codeWanSeong); if (FindSilsaWord (next+4) & _NOUN) { lstrcat(lrgsz, window); vbuf[wcount++] = POS_NOUN; lstrcat(lrgsz, "+"); lstrcat(lrgsz, next+4); vbuf[wcount++] = POS_NOUN; found = TRUE; } } if (!found) { // if "GYEOM" is the first character in window
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) || (inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C)) { memcpy (window, next, 8); window [9] = '\0'; if (FindSilsaWord (window) & _NOUN) { memcpy (window, next, 2); window [2] = '\0'; lstrcat(lrgsz, window); vbuf [wcount++] = POS_ADVERB; lstrcat(lrgsz, "+"); lstrcat(lrgsz, next+2); vbuf [wcount++] = POS_NOUN; } } else { // if "DEUNG" is the last character
Conv.HAN2INS (next+6, inwindow, codeWanSeong); if ((inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I) || (inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_M) || (inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_B && inwindow [3] == __K_S) || (inwindow [0] == __K_G && inwindow [1] == __V_P) || (inwindow [0] == __K_C && inwindow [1] == __V_o && inwindow [2] == __K_G))
{ memcpy (window, next, 6); window [6] = '\0'; if (FindSilsaWord (window) & _NOUN) { lstrcat (lrgsz, window); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); lstrcat (lrgsz, next+6); vbuf [wcount++] = POS_NOUN; } else { // if "DEUNG,DEUNG" is the part
Conv.HAN2INS (next+4, inwindow, codeWanSeong); if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I) { memcpy (window, next, 4); window [4] = '\0'; if (FindSilsaWord (window) & _NOUN) { lstrcat (lrgsz, window); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); lstrcat (lrgsz, next+4); vbuf [wcount++] = POS_NOUN; } else continue; } else continue; } } else continue; } } break; case 6 : Conv.HAN2INS (next, inwindow, codeWanSeong); /*
if (FindSilsaWord (next) & _NOUN) { lstrcat (lrgsz, next); vbuf[wcount++] = POS_NOUN; } else { */ // if "GYEOM" is the first character in window
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) || (inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C)) { if (FindSilsaWord (next+2) & _NOUN) { memcpy (window, next, 2); window [2] = '\0'; lstrcat(lrgsz, window); vbuf [wcount++] = POS_ADVERB; lstrcat(lrgsz, "+"); lstrcat(lrgsz, next+2); vbuf [wcount++] = POS_NOUN; } else continue; } else { // if "DEUNG" is the last character
Conv.HAN2INS (next+4, inwindow, codeWanSeong); if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I) { memcpy (window, next, 4); window [4] = '\0'; if (FindSilsaWord (window) & _NOUN) { lstrcat (lrgsz, window); vbuf [wcount++] = POS_NOUN; lstrcat (lrgsz, "+"); lstrcat (lrgsz, next+4); vbuf [wcount++] = POS_NOUN; } else continue; } else continue; } //}
break; /*
case 4 :
if (FindSilsaWord (next) & _NOUN) { lstrcat (lrgsz, next); vbuf[wcount++] = POS_NOUN; if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_TOSSI; } } else continue; break; */ default : continue; } } if (lstrlen (oending) > 0) { lstrcat(lrgsz, "+"); lstrcat(lrgsz, oending); vbuf[wcount++] = POS_TOSSI; } lstrcat(lrgsz, "\t"); lstrcpy (rstrings, lrgsz);
return wcount; } } }
lstrcpy (rstrings, "\0");
return 0; }
|