Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

447 lines
7.5 KiB

#include <windows.h>
#include <assert.h>
#include "PropNoun.H"
int __cdecl CharCompare(
const void *item1,
const void *item2)
{
PCharProb pChar1 = (PCharProb) item1;
PCharProb pChar2 = (PCharProb) item2;
if (pChar1->dwUnicode > pChar2->dwUnicode) {
return 1;
} else if (pChar1->dwUnicode < pChar2->dwUnicode) {
return -1;
} else {
return 0;
}
}
int __cdecl UnicodeCompare(
const void *item1,
const void *item2)
{
int nSize1 = lstrlenW((LPWSTR) item1) * sizeof(WCHAR),
nSize2 = lstrlenW((LPWSTR) item2) * sizeof(WCHAR);
return memcmp(item1, item2, nSize1 > nSize2 ? nSize1 : nSize2);
}
int __cdecl EngNameCompare(
const void *item1,
const void *item2)
{
PEngName p1 = (PEngName) item1;
PEngName p2 = (PEngName) item2;
if (p1->wPrevUnicode > p2->wPrevUnicode) {
return 1;
} else if (p1->wPrevUnicode < p2->wPrevUnicode) {
return -1;
} else {
if (p1->wNextUnicode > p2->wNextUnicode) {
return 1;
} else if (p1->wNextUnicode < p2->wNextUnicode) {
return -1;
} else {
return 0;
}
}
}
CProperNoun::CProperNoun(
HINSTANCE hInstance) :
m_dProperNameThreshold(FL_PROPER_NAME_THRESHOLD),
m_pCharProb(NULL),
m_dwTotalCharProbNum(0),
m_pEngNameData(NULL),
m_hProcessHeap(0),
m_hInstance(hInstance)
{
}
CProperNoun::~CProperNoun()
{
}
BOOL CProperNoun::InitData()
{
BOOL fRet = FALSE;
HRSRC hResource;
HGLOBAL hGlobal;
m_hProcessHeap = GetProcessHeap();
// Find resource
hResource = FindResource(m_hInstance, TEXT("CNAME"), TEXT("BIN"));
if (!hResource) { goto _exit; }
// Load resource
hGlobal = LoadResource(m_hInstance, hResource);
if (!hGlobal) { goto _exit; }
m_pCharProb = (PCharProb) LockResource(hGlobal);
if (!m_pCharProb) { goto _exit; }
m_dwTotalCharProbNum = SizeofResource(m_hInstance, hResource) / sizeof(CharProb);
/*
// Find resource
hResource = FindResource(m_hInstance, TEXT("ENAME"),
TEXT("BIN"));
if (!hResource) { goto _exit; }
// Load resource
hGlobal = LoadResource(m_hInstance, hResource);
if (!hGlobal) { goto _exit; }
m_pEngNameData = (PEngNameData) LockResource(hGlobal);
m_pEngNameData->pwUnicode = (PWORD) ((PBYTE) m_pEngNameData +
sizeof(m_pEngNameData->dwTotalEngUnicodeNum) +
sizeof(m_pEngNameData->dwTotalEngNamePairNum));
m_pEngNameData->pEngNamePair = (PEngName) ((PBYTE) m_pEngNameData +
sizeof(m_pEngNameData->dwTotalEngUnicodeNum) +
sizeof(m_pEngNameData->dwTotalEngNamePairNum) +
sizeof(m_pEngNameData->pwUnicode[0]) * m_pEngNameData->dwTotalEngUnicodeNum);
// m_pEngName = (PEngName) LockResource(hGlobal);
// m_dwTotalEngNameNum = SizeofResource(m_hInstance, hResource) / sizeof(EngName);
*/
qsort(m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]), UnicodeCompare);
fRet = TRUE;
_exit:
return fRet;
}
BOOL CProperNoun::IsAProperNoun(
LPWSTR lpwszChar,
UINT uCount)
{
return (IsAChineseName(lpwszChar, uCount) || IsAEnglishName(lpwszChar, uCount));
}
BOOL CProperNoun::IsAChineseName(
LPCWSTR lpcwszChar,
UINT uCount)
{
static WCHAR wszChar[3] = { NULL };
PWCHAR pwsResult;
wszChar[0] = lpcwszChar[0];
// Find surname
if (pwsResult = (PWCHAR) bsearch(wszChar, m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]),
UnicodeCompare)) {
FLOAT flProbability = 1;
PCharProb pCharProb;
CharProb CProb;
// Calculate probability to be a proper noun
for (UINT i = 1; i < uCount; ++i) {
CProb.dwUnicode = lpcwszChar[i];
if (pCharProb = (PCharProb) bsearch(&CProb, m_pCharProb,
m_dwTotalCharProbNum, sizeof(m_pCharProb[0]), CharCompare)) {
flProbability *= pCharProb->flProbability;
} else {
flProbability *= (FLOAT) FL_DEFAULT_CHAR_PROBABILITY;
}
}
if (flProbability >= m_dProperNameThreshold) {
return TRUE;
}
}
return FALSE;
}
BOOL CProperNoun::IsAEnglishName(
LPCWSTR lpwszChar,
UINT uCount)
{
static EngName Name;
Name.wPrevUnicode = lpwszChar[0];
Name.wNextUnicode = lpwszChar[uCount - 1];
if (bsearch(&Name, m_pEngNameData->pEngNamePair, m_pEngNameData->dwTotalEngUnicodeNum, sizeof(EngName), EngNameCompare)) {
return TRUE;
}
return FALSE;
}
WCHAR CProperNoun::m_pwszSurname[][3] = {
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"\",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"\",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"",
L"\",
L""
};
DWORD CProperNoun::m_dwTotalSurnameNum = sizeof(m_pwszSurname) / sizeof(m_pwszSurname[0]);