|
|
#include <windows.h>
#include <assert.h>
#include "PropNoun.H"
int __cdecl CharCompare( const void *item1, const void *item2) { PCharProb pChar1 = (PCharProb) item1; PCharProb pChar2 = (PCharProb) item2; if (pChar1->dwUnicode > pChar2->dwUnicode) { return 1; } else if (pChar1->dwUnicode < pChar2->dwUnicode) { return -1; } else { return 0; } }
int __cdecl UnicodeCompare( const void *item1, const void *item2) { int nSize1 = lstrlenW((LPWSTR) item1) * sizeof(WCHAR), nSize2 = lstrlenW((LPWSTR) item2) * sizeof(WCHAR); return memcmp(item1, item2, nSize1 > nSize2 ? nSize1 : nSize2); }
int __cdecl EngNameCompare( const void *item1, const void *item2) { PEngName p1 = (PEngName) item1; PEngName p2 = (PEngName) item2;
if (p1->wPrevUnicode > p2->wPrevUnicode) { return 1; } else if (p1->wPrevUnicode < p2->wPrevUnicode) { return -1; } else { if (p1->wNextUnicode > p2->wNextUnicode) { return 1; } else if (p1->wNextUnicode < p2->wNextUnicode) { return -1; } else { return 0; } } }
CProperNoun::CProperNoun( HINSTANCE hInstance) : m_dProperNameThreshold(FL_PROPER_NAME_THRESHOLD), m_pCharProb(NULL), m_dwTotalCharProbNum(0), m_pEngNameData(NULL), m_hProcessHeap(0), m_hInstance(hInstance) { }
CProperNoun::~CProperNoun() { }
BOOL CProperNoun::InitData() { BOOL fRet = FALSE; HRSRC hResource; HGLOBAL hGlobal;
m_hProcessHeap = GetProcessHeap();
// Find resource
hResource = FindResource(m_hInstance, TEXT("CNAME"), TEXT("BIN")); if (!hResource) { goto _exit; }
// Load resource
hGlobal = LoadResource(m_hInstance, hResource); if (!hGlobal) { goto _exit; }
m_pCharProb = (PCharProb) LockResource(hGlobal); if (!m_pCharProb) { goto _exit; } m_dwTotalCharProbNum = SizeofResource(m_hInstance, hResource) / sizeof(CharProb); /*
// Find resource
hResource = FindResource(m_hInstance, TEXT("ENAME"), TEXT("BIN")); if (!hResource) { goto _exit; }
// Load resource
hGlobal = LoadResource(m_hInstance, hResource); if (!hGlobal) { goto _exit; }
m_pEngNameData = (PEngNameData) LockResource(hGlobal); m_pEngNameData->pwUnicode = (PWORD) ((PBYTE) m_pEngNameData + sizeof(m_pEngNameData->dwTotalEngUnicodeNum) + sizeof(m_pEngNameData->dwTotalEngNamePairNum)); m_pEngNameData->pEngNamePair = (PEngName) ((PBYTE) m_pEngNameData + sizeof(m_pEngNameData->dwTotalEngUnicodeNum) + sizeof(m_pEngNameData->dwTotalEngNamePairNum) + sizeof(m_pEngNameData->pwUnicode[0]) * m_pEngNameData->dwTotalEngUnicodeNum);
// m_pEngName = (PEngName) LockResource(hGlobal);
// m_dwTotalEngNameNum = SizeofResource(m_hInstance, hResource) / sizeof(EngName);
*/ qsort(m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]), UnicodeCompare);
fRet = TRUE;
_exit:
return fRet; }
BOOL CProperNoun::IsAProperNoun( LPWSTR lpwszChar, UINT uCount) { return (IsAChineseName(lpwszChar, uCount) || IsAEnglishName(lpwszChar, uCount)); }
BOOL CProperNoun::IsAChineseName( LPCWSTR lpcwszChar, UINT uCount) { static WCHAR wszChar[3] = { NULL }; PWCHAR pwsResult;
wszChar[0] = lpcwszChar[0];
// Find surname
if (pwsResult = (PWCHAR) bsearch(wszChar, m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]), UnicodeCompare)) { FLOAT flProbability = 1; PCharProb pCharProb; CharProb CProb;
// Calculate probability to be a proper noun
for (UINT i = 1; i < uCount; ++i) { CProb.dwUnicode = lpcwszChar[i]; if (pCharProb = (PCharProb) bsearch(&CProb, m_pCharProb, m_dwTotalCharProbNum, sizeof(m_pCharProb[0]), CharCompare)) { flProbability *= pCharProb->flProbability; } else { flProbability *= (FLOAT) FL_DEFAULT_CHAR_PROBABILITY; } }
if (flProbability >= m_dProperNameThreshold) { return TRUE; } }
return FALSE; }
BOOL CProperNoun::IsAEnglishName( LPCWSTR lpwszChar, UINT uCount) { static EngName Name;
Name.wPrevUnicode = lpwszChar[0]; Name.wNextUnicode = lpwszChar[uCount - 1];
if (bsearch(&Name, m_pEngNameData->pEngNamePair, m_pEngNameData->dwTotalEngUnicodeNum, sizeof(EngName), EngNameCompare)) { return TRUE; }
return FALSE; }
WCHAR CProperNoun::m_pwszSurname[][3] = { L"�B", L"�R", L"�_", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�C", L"�K", L"�T", L"�]", L"�q", L"�v", L"��", L"��", L"��", L"��", L"��", L"��", L"�V", L"�w", L"��", L"��", L"��", L"��", L"��", L"��", L"�E", L"�d", L"�f", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�H", L"�L", L"�f", L"��", L"��", L"��", L"��", L"��", L"�P", L"�s", L"�u", L"�x", L"�}", L"��", L"��", L"��", L"�L", L"�Z", L"�k", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�J", L"�\\", L"��", L"��", L"��", L"�I", L"�R", L"�_", L"�d", L"�h", L"�q", L"�x", L"��", L"�J", L"�S", L"�]", L"�p", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�L", L"�V", L"�]", L"�c", L"�u", L"�}", L"��", L"��", L"��", L"��", L"�Z", L"��", L"��", L"��", L"��", L"��", L"�K", L"�q", L"�|", L"�}", L"��", L"��", L"�O", L"�Z", L"�d", L"�h", L"�i", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�\\", L"�s", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�^", L"��", L"��", L"��", L"��", L"��", L"��", L"�J", L"�q", L"�{", L"��", L"��", L"��", L"�O", L"�P", L"�R", L"�d", L"�k", L"�s", L"��", L"��", L"��", L"��", L"��", L"��", L"�q", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�Q", L"�l", L"�p", L"��", L"��", L"�a", L"��", L"��", L"��", L"�p", L"�u", L"��", L"��", L"��", L"�B", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"�G", L"�H", L"�|", L"��", L"��", L"��", L"�P", L"�c", L"�p", L"��", L"��", L"��", L"��", L"��", L"�F", L"�N", L"�R", L"�d", L"�j", L"�s", L"��", L"��", L"��", L"�t", L"��", L"��", L"��", L"��", L"��", L"��", L"��", L"£", L"²", L"¿", L"��", L"��", L"��", L"��", L"�C", L"�Q", L"�e", L"ù", L"��", L"��", L"��", L"��", L"�Y", L"�u", L"ĩ", L"Ī", L"Ĭ", L"��", L"��", L"�U", L"��", L"��", L"�e", L"�s", L"м", L"�\\", L"�k" };
DWORD CProperNoun::m_dwTotalSurnameNum = sizeof(m_pwszSurname) / sizeof(m_pwszSurname[0]);
|