Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

446 lines
7.5 KiB

  1. #include <windows.h>
  2. #include <assert.h>
  3. #include "PropNoun.H"
  4. int __cdecl CharCompare(
  5. const void *item1,
  6. const void *item2)
  7. {
  8. PCharProb pChar1 = (PCharProb) item1;
  9. PCharProb pChar2 = (PCharProb) item2;
  10. if (pChar1->dwUnicode > pChar2->dwUnicode) {
  11. return 1;
  12. } else if (pChar1->dwUnicode < pChar2->dwUnicode) {
  13. return -1;
  14. } else {
  15. return 0;
  16. }
  17. }
  18. int __cdecl UnicodeCompare(
  19. const void *item1,
  20. const void *item2)
  21. {
  22. int nSize1 = lstrlenW((LPWSTR) item1) * sizeof(WCHAR),
  23. nSize2 = lstrlenW((LPWSTR) item2) * sizeof(WCHAR);
  24. return memcmp(item1, item2, nSize1 > nSize2 ? nSize1 : nSize2);
  25. }
  26. int __cdecl EngNameCompare(
  27. const void *item1,
  28. const void *item2)
  29. {
  30. PEngName p1 = (PEngName) item1;
  31. PEngName p2 = (PEngName) item2;
  32. if (p1->wPrevUnicode > p2->wPrevUnicode) {
  33. return 1;
  34. } else if (p1->wPrevUnicode < p2->wPrevUnicode) {
  35. return -1;
  36. } else {
  37. if (p1->wNextUnicode > p2->wNextUnicode) {
  38. return 1;
  39. } else if (p1->wNextUnicode < p2->wNextUnicode) {
  40. return -1;
  41. } else {
  42. return 0;
  43. }
  44. }
  45. }
  46. CProperNoun::CProperNoun(
  47. HINSTANCE hInstance) :
  48. m_dProperNameThreshold(FL_PROPER_NAME_THRESHOLD),
  49. m_pCharProb(NULL),
  50. m_dwTotalCharProbNum(0),
  51. m_pEngNameData(NULL),
  52. m_hProcessHeap(0),
  53. m_hInstance(hInstance)
  54. {
  55. }
  56. CProperNoun::~CProperNoun()
  57. {
  58. }
  59. BOOL CProperNoun::InitData()
  60. {
  61. BOOL fRet = FALSE;
  62. HRSRC hResource;
  63. HGLOBAL hGlobal;
  64. m_hProcessHeap = GetProcessHeap();
  65. // Find resource
  66. hResource = FindResource(m_hInstance, TEXT("CNAME"), TEXT("BIN"));
  67. if (!hResource) { goto _exit; }
  68. // Load resource
  69. hGlobal = LoadResource(m_hInstance, hResource);
  70. if (!hGlobal) { goto _exit; }
  71. m_pCharProb = (PCharProb) LockResource(hGlobal);
  72. if (!m_pCharProb) { goto _exit; }
  73. m_dwTotalCharProbNum = SizeofResource(m_hInstance, hResource) / sizeof(CharProb);
  74. /*
  75. // Find resource
  76. hResource = FindResource(m_hInstance, TEXT("ENAME"),
  77. TEXT("BIN"));
  78. if (!hResource) { goto _exit; }
  79. // Load resource
  80. hGlobal = LoadResource(m_hInstance, hResource);
  81. if (!hGlobal) { goto _exit; }
  82. m_pEngNameData = (PEngNameData) LockResource(hGlobal);
  83. m_pEngNameData->pwUnicode = (PWORD) ((PBYTE) m_pEngNameData +
  84. sizeof(m_pEngNameData->dwTotalEngUnicodeNum) +
  85. sizeof(m_pEngNameData->dwTotalEngNamePairNum));
  86. m_pEngNameData->pEngNamePair = (PEngName) ((PBYTE) m_pEngNameData +
  87. sizeof(m_pEngNameData->dwTotalEngUnicodeNum) +
  88. sizeof(m_pEngNameData->dwTotalEngNamePairNum) +
  89. sizeof(m_pEngNameData->pwUnicode[0]) * m_pEngNameData->dwTotalEngUnicodeNum);
  90. // m_pEngName = (PEngName) LockResource(hGlobal);
  91. // m_dwTotalEngNameNum = SizeofResource(m_hInstance, hResource) / sizeof(EngName);
  92. */
  93. qsort(m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]), UnicodeCompare);
  94. fRet = TRUE;
  95. _exit:
  96. return fRet;
  97. }
  98. BOOL CProperNoun::IsAProperNoun(
  99. LPWSTR lpwszChar,
  100. UINT uCount)
  101. {
  102. return (IsAChineseName(lpwszChar, uCount) || IsAEnglishName(lpwszChar, uCount));
  103. }
  104. BOOL CProperNoun::IsAChineseName(
  105. LPCWSTR lpcwszChar,
  106. UINT uCount)
  107. {
  108. static WCHAR wszChar[3] = { NULL };
  109. PWCHAR pwsResult;
  110. wszChar[0] = lpcwszChar[0];
  111. // Find surname
  112. if (pwsResult = (PWCHAR) bsearch(wszChar, m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]),
  113. UnicodeCompare)) {
  114. FLOAT flProbability = 1;
  115. PCharProb pCharProb;
  116. CharProb CProb;
  117. // Calculate probability to be a proper noun
  118. for (UINT i = 1; i < uCount; ++i) {
  119. CProb.dwUnicode = lpcwszChar[i];
  120. if (pCharProb = (PCharProb) bsearch(&CProb, m_pCharProb,
  121. m_dwTotalCharProbNum, sizeof(m_pCharProb[0]), CharCompare)) {
  122. flProbability *= pCharProb->flProbability;
  123. } else {
  124. flProbability *= (FLOAT) FL_DEFAULT_CHAR_PROBABILITY;
  125. }
  126. }
  127. if (flProbability >= m_dProperNameThreshold) {
  128. return TRUE;
  129. }
  130. }
  131. return FALSE;
  132. }
  133. BOOL CProperNoun::IsAEnglishName(
  134. LPCWSTR lpwszChar,
  135. UINT uCount)
  136. {
  137. static EngName Name;
  138. Name.wPrevUnicode = lpwszChar[0];
  139. Name.wNextUnicode = lpwszChar[uCount - 1];
  140. if (bsearch(&Name, m_pEngNameData->pEngNamePair, m_pEngNameData->dwTotalEngUnicodeNum, sizeof(EngName), EngNameCompare)) {
  141. return TRUE;
  142. }
  143. return FALSE;
  144. }
  145. WCHAR CProperNoun::m_pwszSurname[][3] = {
  146. L"�B",
  147. L"�R",
  148. L"�_",
  149. L"��",
  150. L"��",
  151. L"��",
  152. L"��",
  153. L"��",
  154. L"��",
  155. L"��",
  156. L"��",
  157. L"��",
  158. L"��",
  159. L"��",
  160. L"�C",
  161. L"�K",
  162. L"�T",
  163. L"�]",
  164. L"�q",
  165. L"�v",
  166. L"��",
  167. L"��",
  168. L"��",
  169. L"��",
  170. L"��",
  171. L"��",
  172. L"�V",
  173. L"�w",
  174. L"��",
  175. L"��",
  176. L"��",
  177. L"��",
  178. L"��",
  179. L"��",
  180. L"�E",
  181. L"�d",
  182. L"�f",
  183. L"��",
  184. L"��",
  185. L"��",
  186. L"��",
  187. L"��",
  188. L"��",
  189. L"��",
  190. L"�H",
  191. L"�L",
  192. L"�f",
  193. L"��",
  194. L"��",
  195. L"��",
  196. L"��",
  197. L"��",
  198. L"�P",
  199. L"�s",
  200. L"�u",
  201. L"�x",
  202. L"�}",
  203. L"��",
  204. L"��",
  205. L"��",
  206. L"�L",
  207. L"�Z",
  208. L"�k",
  209. L"��",
  210. L"��",
  211. L"��",
  212. L"��",
  213. L"��",
  214. L"��",
  215. L"��",
  216. L"�J",
  217. L"\\",
  218. L"��",
  219. L"��",
  220. L"��",
  221. L"�I",
  222. L"�R",
  223. L"�_",
  224. L"�d",
  225. L"�h",
  226. L"�q",
  227. L"�x",
  228. L"��",
  229. L"�J",
  230. L"�S",
  231. L"�]",
  232. L"�p",
  233. L"��",
  234. L"��",
  235. L"��",
  236. L"��",
  237. L"��",
  238. L"��",
  239. L"��",
  240. L"��",
  241. L"�L",
  242. L"�V",
  243. L"�]",
  244. L"�c",
  245. L"�u",
  246. L"�}",
  247. L"��",
  248. L"��",
  249. L"��",
  250. L"��",
  251. L"�Z",
  252. L"��",
  253. L"��",
  254. L"��",
  255. L"��",
  256. L"��",
  257. L"�K",
  258. L"�q",
  259. L"�|",
  260. L"�}",
  261. L"��",
  262. L"��",
  263. L"�O",
  264. L"�Z",
  265. L"�d",
  266. L"�h",
  267. L"�i",
  268. L"��",
  269. L"��",
  270. L"��",
  271. L"��",
  272. L"��",
  273. L"��",
  274. L"��",
  275. L"��",
  276. L"��",
  277. L"��",
  278. L"��",
  279. L"��",
  280. L"\\",
  281. L"�s",
  282. L"��",
  283. L"��",
  284. L"��",
  285. L"��",
  286. L"��",
  287. L"��",
  288. L"��",
  289. L"��",
  290. L"��",
  291. L"��",
  292. L"��",
  293. L"��",
  294. L"�^",
  295. L"��",
  296. L"��",
  297. L"��",
  298. L"��",
  299. L"��",
  300. L"��",
  301. L"�J",
  302. L"�q",
  303. L"�{",
  304. L"��",
  305. L"��",
  306. L"��",
  307. L"�O",
  308. L"�P",
  309. L"�R",
  310. L"�d",
  311. L"�k",
  312. L"�s",
  313. L"��",
  314. L"��",
  315. L"��",
  316. L"��",
  317. L"��",
  318. L"��",
  319. L"�q",
  320. L"��",
  321. L"��",
  322. L"��",
  323. L"��",
  324. L"��",
  325. L"��",
  326. L"��",
  327. L"��",
  328. L"��",
  329. L"��",
  330. L"��",
  331. L"�Q",
  332. L"�l",
  333. L"�p",
  334. L"��",
  335. L"��",
  336. L"�a",
  337. L"��",
  338. L"��",
  339. L"��",
  340. L"�p",
  341. L"�u",
  342. L"��",
  343. L"��",
  344. L"��",
  345. L"�B",
  346. L"��",
  347. L"��",
  348. L"��",
  349. L"��",
  350. L"��",
  351. L"��",
  352. L"��",
  353. L"��",
  354. L"�G",
  355. L"�H",
  356. L"�|",
  357. L"��",
  358. L"��",
  359. L"��",
  360. L"�P",
  361. L"�c",
  362. L"�p",
  363. L"��",
  364. L"��",
  365. L"��",
  366. L"��",
  367. L"��",
  368. L"�F",
  369. L"�N",
  370. L"�R",
  371. L"�d",
  372. L"�j",
  373. L"�s",
  374. L"��",
  375. L"��",
  376. L"��",
  377. L"�t",
  378. L"��",
  379. L"��",
  380. L"��",
  381. L"��",
  382. L"��",
  383. L"��",
  384. L"��",
  385. L"£",
  386. L"²",
  387. L"¿",
  388. L"��",
  389. L"��",
  390. L"��",
  391. L"��",
  392. L"�C",
  393. L"�Q",
  394. L"�e",
  395. L"ù",
  396. L"��",
  397. L"��",
  398. L"��",
  399. L"��",
  400. L"�Y",
  401. L"�u",
  402. L"ĩ",
  403. L"Ī",
  404. L"Ĭ",
  405. L"��",
  406. L"��",
  407. L"�U",
  408. L"��",
  409. L"��",
  410. L"�e",
  411. L"�s",
  412. L"м",
  413. L"\\",
  414. L"�k"
  415. };
  416. DWORD CProperNoun::m_dwTotalSurnameNum = sizeof(m_pwszSurname) / sizeof(m_pwszSurname[0]);