Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

621 lines
22 KiB

  1. #include <windows.h>
  2. #include "lexicon.h"
  3. #include "lexmgr.h"
  4. #include "PropNoun.h"
  5. #include <stdio.h>
  6. #include <imm.h>
  7. #include <stdio.h>
  8. CCHTLexicon::CCHTLexicon()
  9. {
  10. m_psLexiconHeader = NULL;
  11. m_pbLexiconBase = NULL;
  12. #ifdef CHINESE_PROP_NAME
  13. m_pcPropName = NULL;
  14. #endif
  15. m_sAddInLexicon.dwMaxWordNumber = 0;
  16. m_sAddInLexicon.dwWordNumber = 0;
  17. m_sAddInLexicon.psWordData = NULL;
  18. FillMemory(m_sAddInLexicon.wWordBeginIndex,
  19. sizeof(m_sAddInLexicon.wWordBeginIndex), 0);
  20. }
  21. CCHTLexicon::~CCHTLexicon()
  22. {
  23. DWORD i;
  24. // Do not build one char word signature
  25. if (m_psLexiconHeader) {
  26. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  27. if (m_sWordInfo[i].pbFirstCharSignature) {
  28. delete m_sWordInfo[i].pbFirstCharSignature;
  29. }
  30. if (m_sWordInfo[i].pbLastCharSignature) {
  31. delete m_sWordInfo[i].pbLastCharSignature;
  32. }
  33. }
  34. }
  35. #ifdef CHINESE_PROP_NAME
  36. if (m_pcPropName) {
  37. delete m_pcPropName;
  38. }
  39. #endif
  40. // Free add in lexicon
  41. if (m_sAddInLexicon.psWordData) {
  42. for (i = 0; i < m_sAddInLexicon.dwWordNumber; ++i) {
  43. delete m_sAddInLexicon.psWordData[i].lpwszWordStr;
  44. m_sAddInLexicon.psWordData[i].lpwszWordStr = NULL;
  45. }
  46. delete m_sAddInLexicon.psWordData;
  47. m_sAddInLexicon.psWordData = NULL;
  48. }
  49. m_sAddInLexicon.dwMaxWordNumber = 0;
  50. m_sAddInLexicon.dwWordNumber = 0;
  51. }
  52. BOOL CCHTLexicon::InitData(
  53. HINSTANCE hInstance)
  54. {
  55. HRSRC hResInfo;
  56. HGLOBAL hResData;
  57. BOOL fRet = FALSE;
  58. TCHAR tszLexiconResName[MAX_PATH];
  59. DWORD i;
  60. // Init main lexicon
  61. lstrcpy(tszLexiconResName, TEXT("LEXICON"));
  62. if (!(hResInfo = FindResource(hInstance, tszLexiconResName, TEXT("BIN")))) {
  63. } else if (!(hResData = LoadResource(hInstance, hResInfo))) {
  64. } else if (!(m_pbLexiconBase = (LPBYTE)LockResource(hResData))) {
  65. } else {
  66. m_psLexiconHeader = (PSLexFileHeader)m_pbLexiconBase;
  67. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  68. m_sWordInfo[i].lpwWordString = (LPWSTR)(m_pbLexiconBase +
  69. m_psLexiconHeader->sLexInfo[i].dwWordStringOffset);
  70. m_sWordInfo[i].pbAttribute = (LPBYTE)(m_pbLexiconBase +
  71. m_psLexiconHeader->sLexInfo[i].dwWordAttribOffset);
  72. m_sWordInfo[i].pwUnicount = (PWORD)(m_pbLexiconBase +
  73. m_psLexiconHeader->sLexInfo[i].dwWordCountOffset);
  74. m_sWordInfo[i].pbTerminalCode = (LPBYTE)(m_pbLexiconBase +
  75. m_psLexiconHeader->sLexInfo[i].dwTerminalCodeOffset);
  76. }
  77. BuildSignatureData();
  78. fRet = TRUE;
  79. }
  80. // Init alt lexicon
  81. lstrcpy(tszLexiconResName, TEXT("ALTWORD"));
  82. if (!(hResInfo = FindResource(hInstance, tszLexiconResName, TEXT("BIN")))) {
  83. } else if (!(hResData = LoadResource(hInstance, hResInfo))) {
  84. } else if (!(m_pbAltWordBase = (LPBYTE)LockResource(hResData))) {
  85. } else {
  86. m_psAltWordHeader = (PSAltLexFileHeader)m_pbAltWordBase;
  87. for (i = 0 ; i < m_psAltWordHeader->dwMaxCharPerWord; ++i) {
  88. m_sAltWordInfo[i].lpwWordString = (LPWSTR)(m_pbAltWordBase +
  89. m_psAltWordHeader->sAltWordInfo[i].dwWordStringOffset);
  90. m_sAltWordInfo[i].pdwGroupID = (PDWORD)(m_pbAltWordBase +
  91. m_psAltWordHeader->sAltWordInfo[i].dwWordGroupOffset);
  92. }
  93. fRet = TRUE;
  94. }
  95. #ifdef CHINESE_PROP_NAME
  96. m_pcPropName = new CProperNoun(hInstance);
  97. if (m_pcPropName) {
  98. m_pcPropName->InitData();
  99. }
  100. #endif
  101. #ifdef _DEBUG
  102. FILE *fp;
  103. DWORD j;
  104. WCHAR wUnicodeString[MAX_CHAR_PER_WORD + 1];
  105. CHAR cANSIString[MAX_CHAR_PER_WORD * 2 + 1];
  106. WORD wCount;
  107. fp = fopen("DM.dmp", "wt");
  108. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  109. for (j = 0; j < m_psLexiconHeader->sLexInfo[i].dwWordNumber; ++j) {
  110. if (m_sWordInfo[i].pbAttribute[j] & ATTR_DM) {
  111. wCount = m_sWordInfo[i].pwUnicount[j];
  112. if (i == 0) {
  113. wUnicodeString[0] = (WORD)(CHT_UNICODE_BEGIN + j);
  114. } else {
  115. CopyMemory(wUnicodeString, &(m_sWordInfo[i].lpwWordString[j * (i + 1)]),
  116. sizeof(WCHAR) * (i + 1));
  117. }
  118. wUnicodeString[i + 1] = '\0';
  119. WideCharToMultiByte(950, WC_COMPOSITECHECK, wUnicodeString, i + 1 + 1,
  120. cANSIString, sizeof(cANSIString), NULL, NULL);
  121. fprintf(fp, "%s %d\n", cANSIString, wCount);
  122. }
  123. }
  124. }
  125. fclose(fp);
  126. fp = fopen("COMPOUND.dmp", "wt");
  127. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  128. for (j = 0; j < m_psLexiconHeader->sLexInfo[i].dwWordNumber; ++j) {
  129. if (m_sWordInfo[i].pbAttribute[j] & ATTR_COMPOUND) {
  130. wCount = m_sWordInfo[i].pwUnicount[j];
  131. if (i == 0) {
  132. wUnicodeString[0] = (WORD)(CHT_UNICODE_BEGIN + j);
  133. } else {
  134. CopyMemory(wUnicodeString, &(m_sWordInfo[i].lpwWordString[j * (i + 1)]),
  135. sizeof(WCHAR) * (i + 1));
  136. }
  137. wUnicodeString[i + 1] = '\0';
  138. WideCharToMultiByte(950, WC_COMPOSITECHECK, wUnicodeString, i + 1 + 1,
  139. cANSIString, sizeof(cANSIString), NULL, NULL);
  140. fprintf(fp, "%s %d\n", cANSIString, wCount);
  141. }
  142. }
  143. }
  144. fclose(fp);
  145. #endif
  146. // Init EUDP to special word
  147. LoadEUDP();
  148. return fRet;
  149. }
  150. void CCHTLexicon::BuildSignatureData(void)
  151. {
  152. DWORD i, j, dwWordNumber;
  153. WORD wFirstChar, wLastChar;
  154. for (i = 0; i < MAX_CHAR_PER_WORD; ++i) {
  155. m_sWordInfo[i].pbFirstCharSignature = NULL;
  156. m_sWordInfo[i].pbLastCharSignature = NULL;
  157. }
  158. // Do not build one char word signature
  159. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  160. dwWordNumber = m_psLexiconHeader->sLexInfo[i].dwWordNumber;
  161. if (i != 0 && dwWordNumber > WORD_NUM_TO_BUILD_SIGNATURE) {
  162. m_sWordInfo[i].pbFirstCharSignature = new BYTE[(CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8 + 1];
  163. if (NULL == m_sWordInfo[i].pbFirstCharSignature) { continue; }
  164. FillMemory(m_sWordInfo[i].pbFirstCharSignature, (CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8, 0);
  165. for (j = 0; j < dwWordNumber; ++j) {
  166. wFirstChar = m_sWordInfo[i].lpwWordString[(i + 1) * j];
  167. if (wFirstChar >= CHT_UNICODE_BEGIN) {
  168. m_sWordInfo[i].pbFirstCharSignature[(wFirstChar - CHT_UNICODE_BEGIN) / 8] |=
  169. (0x00000001 << ((wFirstChar - CHT_UNICODE_BEGIN) % 8));
  170. }
  171. }
  172. m_sWordInfo[i].pbLastCharSignature = new BYTE[(CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8 + 1];
  173. if (NULL == m_sWordInfo[i].pbLastCharSignature) { continue; }
  174. FillMemory(m_sWordInfo[i].pbLastCharSignature, (CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8, 0);
  175. for (j = 0; j < dwWordNumber; ++j) {
  176. wLastChar = m_sWordInfo[i].lpwWordString[(i + 1) * (j + 1) - 1];
  177. if (wLastChar >= CHT_UNICODE_BEGIN) {
  178. m_sWordInfo[i].pbLastCharSignature[(wLastChar - CHT_UNICODE_BEGIN) / 8] |=
  179. (0x00000001 << ((wLastChar - CHT_UNICODE_BEGIN) % 8));
  180. }
  181. }
  182. } else {
  183. m_sWordInfo[i].pbFirstCharSignature = NULL;
  184. m_sWordInfo[i].pbLastCharSignature = NULL;
  185. }
  186. }
  187. }
  188. BOOL CCHTLexicon::GetWordInfo(
  189. LPCWSTR lpcwString,
  190. DWORD dwLength,
  191. PWORD pwUnicount,
  192. PWORD pwAttrib,
  193. PBYTE pbTerminalCode)
  194. {
  195. BOOL fRet;
  196. BYTE bMainLexAttrib;
  197. fRet = GetMainLexiconWordInfo(lpcwString, dwLength, pwUnicount,
  198. &bMainLexAttrib, pbTerminalCode);
  199. *pwAttrib = bMainLexAttrib;
  200. if (fRet) { goto _exit; }
  201. #ifdef CHINESE_PROP_NAME
  202. if (dwLength == 3) {
  203. if (m_pcPropName->IsAChineseName(lpcwString, dwLength)) {
  204. *pbTerminalCode = ' ';
  205. *pwAttrib = ATTR_RULE_WORD;
  206. *pwUnicount = 100;
  207. fRet = TRUE;
  208. goto _exit;
  209. }
  210. }
  211. #endif
  212. fRet = GetAddInWordInfo(lpcwString, dwLength, pwUnicount,
  213. pwAttrib, pbTerminalCode);
  214. _exit:
  215. return fRet;
  216. }
  217. BOOL CCHTLexicon::GetMainLexiconWordInfo(
  218. LPCWSTR lpcwString,
  219. DWORD dwLength,
  220. PWORD pwUnicount,
  221. PBYTE pbAttrib,
  222. PBYTE pbTerminalCode)
  223. {
  224. INT nBegin, nEnd, nMid;
  225. INT nCmp;
  226. BOOL fRet = FALSE;
  227. LPWSTR lpwLexString;
  228. DWORD dwFirstCharIndex, dwLastCharIndex;
  229. if (dwLength > m_psLexiconHeader->dwMaxCharPerWord) { goto _exit; }
  230. if (lpcwString[0] < CHT_UNICODE_BEGIN || lpcwString[0] > CHT_UNICODE_END) {
  231. goto _exit;
  232. }
  233. dwFirstCharIndex = lpcwString[0] - CHT_UNICODE_BEGIN;
  234. if (dwLength == 1) {
  235. *pwUnicount = m_sWordInfo[dwLength - 1].pwUnicount[dwFirstCharIndex];
  236. *pbAttrib = m_sWordInfo[dwLength - 1].pbAttribute[dwFirstCharIndex];
  237. *pbTerminalCode = m_sWordInfo[dwLength - 1].pbTerminalCode[dwFirstCharIndex];
  238. fRet = TRUE;
  239. } else {
  240. // Check signature first
  241. if (m_sWordInfo[dwLength - 1].pbFirstCharSignature) {
  242. if (!(m_sWordInfo[dwLength - 1].pbFirstCharSignature[dwFirstCharIndex / 8] &
  243. (0x00000001 << (dwFirstCharIndex % 8)))) { goto _exit; }
  244. }
  245. if (lpcwString[dwLength - 1] >= CHT_UNICODE_BEGIN && lpcwString[dwLength - 1] <= CHT_UNICODE_END) {
  246. if (m_sWordInfo[dwLength - 1].pbLastCharSignature) {
  247. dwLastCharIndex = lpcwString[dwLength - 1] - CHT_UNICODE_BEGIN;
  248. if (!(m_sWordInfo[dwLength - 1].pbLastCharSignature[dwLastCharIndex / 8] &
  249. (0x00000001 << (dwLastCharIndex % 8)))) { goto _exit; }
  250. }
  251. }
  252. nBegin = 0;
  253. nEnd = m_psLexiconHeader->sLexInfo[dwLength - 1].dwWordNumber - 1;
  254. lpwLexString = m_sWordInfo[dwLength - 1].lpwWordString;
  255. DWORD dwCompByteNum = sizeof(WCHAR) * dwLength;
  256. while (nBegin <= nEnd) {
  257. nMid = (nBegin + nEnd) / 2;
  258. nCmp = memcmp(&(lpwLexString[nMid * dwLength]), lpcwString, dwCompByteNum);
  259. if (nCmp > 0) {
  260. nEnd = nMid - 1;
  261. } else if (nCmp < 0) {
  262. nBegin = nMid + 1;
  263. } else {
  264. *pwUnicount = m_sWordInfo[dwLength - 1].pwUnicount[nMid];
  265. *pbAttrib = m_sWordInfo[dwLength - 1].pbAttribute[nMid];
  266. *pbTerminalCode = m_sWordInfo[dwLength - 1].pbTerminalCode[nMid];
  267. fRet = TRUE;
  268. break;
  269. }
  270. }
  271. }
  272. _exit:
  273. if (!fRet) {
  274. *pwUnicount = 0;
  275. *pbAttrib = 0;
  276. *pbTerminalCode = ' ';
  277. }
  278. return fRet;
  279. }
  280. // Load EUDP
  281. int CALLBACK EUDPCountA(
  282. LPCSTR lpcszReading,
  283. DWORD dwStyle,
  284. LPCSTR lpcszString,
  285. LPVOID lpvData)
  286. {
  287. PSAddInLexicon psAddInLexicon;
  288. if (lstrlenA(lpcszString) / sizeof(WCHAR) <= MAX_CHAR_PER_WORD) {
  289. psAddInLexicon = (PSAddInLexicon)lpvData;
  290. ++psAddInLexicon->dwWordNumber;
  291. }
  292. return 1;
  293. }
  294. int CALLBACK EUDPCountW(
  295. LPCWSTR lpcwszReading,
  296. DWORD dwStyle,
  297. LPCWSTR lpcwszString,
  298. LPVOID lpvData)
  299. {
  300. PSAddInLexicon psAddInLexicon;
  301. if (lstrlenW(lpcwszString) <= MAX_CHAR_PER_WORD) {
  302. psAddInLexicon = (PSAddInLexicon)lpvData;
  303. ++psAddInLexicon->dwWordNumber;
  304. }
  305. return 1;
  306. }
  307. int CALLBACK EUDPLoadA(
  308. LPCSTR lpcszReading,
  309. DWORD dwStyle,
  310. LPCSTR lpcszString,
  311. LPVOID lpvData)
  312. {
  313. PSAddInLexicon psAddInLexicon;
  314. WORD wStrLen;
  315. wStrLen = (WORD)lstrlenA(lpcszString);
  316. if (wStrLen / sizeof(WCHAR) <= MAX_CHAR_PER_WORD) {
  317. psAddInLexicon = (PSAddInLexicon)lpvData;
  318. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr =
  319. new WORD[wStrLen / sizeof(WCHAR) + 1]; // zero end
  320. MultiByteToWideChar(950, MB_PRECOMPOSED, lpcszString, wStrLen + 1,
  321. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr,
  322. wStrLen / sizeof(WCHAR) + 1);
  323. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wAttrib = ATTR_EUDP_WORD;
  324. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wLen = wStrLen / sizeof(WCHAR);
  325. ++psAddInLexicon->dwWordNumber;
  326. }
  327. return 1;
  328. }
  329. int CALLBACK EUDPLoadW(
  330. LPCWSTR lpcwszReading,
  331. DWORD dwStyle,
  332. LPCWSTR lpcwszString,
  333. LPVOID lpvData)
  334. {
  335. PSAddInLexicon psAddInLexicon;
  336. WORD wStrLen;
  337. wStrLen = (WORD)lstrlenW(lpcwszString);
  338. if (wStrLen <= MAX_CHAR_PER_WORD) {
  339. psAddInLexicon = (PSAddInLexicon)lpvData;
  340. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr =
  341. new WORD[wStrLen + 1];
  342. CopyMemory(psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr,
  343. lpcwszString, (wStrLen + 1) * sizeof(WCHAR));
  344. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wAttrib = ATTR_EUDP_WORD;
  345. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wLen = wStrLen;
  346. ++psAddInLexicon->dwWordNumber;
  347. }
  348. return 1;
  349. }
  350. int __cdecl CompSWordData(
  351. const void *arg1,
  352. const void *arg2)
  353. {
  354. PSWordData psWordData1, psWordData2;
  355. psWordData1 = (PSWordData)arg1;
  356. psWordData2 = (PSWordData)arg2;
  357. if (psWordData1->wLen < psWordData2->wLen) {
  358. return -1;
  359. } else if (psWordData1->wLen > psWordData2->wLen) {
  360. return 1;
  361. } else {
  362. return memcmp(psWordData1->lpwszWordStr,
  363. psWordData2->lpwszWordStr, psWordData1->wLen * sizeof(WCHAR));
  364. }
  365. }
  366. void CCHTLexicon::LoadEUDP(void)
  367. {
  368. DWORD i;
  369. m_sAddInLexicon.dwWordNumber = 0;
  370. m_sAddInLexicon.dwMaxWordNumber = 0;
  371. m_sAddInLexicon.psWordData = NULL;
  372. OSVERSIONINFOA OSVerInfo;
  373. OSVerInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
  374. GetVersionExA(&OSVerInfo);
  375. if (OSVerInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  376. ImmEnumRegisterWordW(HKL((ULONG_PTR) 0xE0080404), EUDPCountW, NULL,
  377. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  378. } else {
  379. ImmEnumRegisterWordA(HKL((ULONG_PTR) 0xE0080404), EUDPCountA, NULL,
  380. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  381. }
  382. if (m_sAddInLexicon.dwWordNumber) {
  383. m_sAddInLexicon.dwMaxWordNumber = m_sAddInLexicon.dwWordNumber + EUDP_GROW_NUMBER;
  384. m_sAddInLexicon.psWordData = new SWordData[m_sAddInLexicon.dwMaxWordNumber];
  385. m_sAddInLexicon.dwWordNumber = 0;
  386. if (OSVerInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  387. ImmEnumRegisterWordW(HKL((ULONG_PTR) 0xE0080404), EUDPLoadW, NULL,
  388. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  389. } else {
  390. ImmEnumRegisterWordA(HKL((ULONG_PTR) 0xE0080404), EUDPLoadA, NULL,
  391. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  392. }
  393. qsort(m_sAddInLexicon.psWordData, m_sAddInLexicon.dwWordNumber,
  394. sizeof(SWordData), CompSWordData);
  395. for (i = 0; i < m_sAddInLexicon.dwWordNumber; ++i) {
  396. ++m_sAddInLexicon.wWordBeginIndex[m_sAddInLexicon.psWordData[i].wLen - 1];
  397. }
  398. WORD wCount = m_sAddInLexicon.wWordBeginIndex[0];
  399. for (i = 1; i <= MAX_CHAR_PER_WORD; ++i) {
  400. WORD wTemp = m_sAddInLexicon.wWordBeginIndex[i];
  401. m_sAddInLexicon.wWordBeginIndex[i] = wCount;
  402. wCount += wTemp;
  403. }
  404. } else {
  405. m_sAddInLexicon.dwMaxWordNumber = 0;
  406. }
  407. }
  408. // Add AP word
  409. BOOL CCHTLexicon::AddInLexiconInsert(
  410. LPCWSTR lpcwszWordStr,
  411. WORD wAttrib)
  412. {
  413. BOOL fRet = FALSE;
  414. WORD wStrLen, i, j;
  415. INT nIndex;
  416. wStrLen = (WORD)lstrlenW(lpcwszWordStr);
  417. if (wStrLen > MAX_CHAR_PER_WORD) { goto _exit; }
  418. // if exit, just change it's attrib;
  419. // Be carefully, EUDP > Error word
  420. if ((nIndex = GetAddInWordInfoIndex(lpcwszWordStr, wStrLen)) != -1) {
  421. if (m_sAddInLexicon.psWordData[nIndex].wAttrib == ATTR_EUDP_WORD) {
  422. } else {
  423. m_sAddInLexicon.psWordData[nIndex].wAttrib = wAttrib;
  424. }
  425. goto _exit;
  426. }
  427. // Enlarge space
  428. if (m_sAddInLexicon.dwMaxWordNumber == m_sAddInLexicon.dwWordNumber) {
  429. PSWordData psTempWordData;
  430. psTempWordData = new SWordData [m_sAddInLexicon.dwMaxWordNumber + EUDP_GROW_NUMBER];
  431. if (!psTempWordData) { goto _exit; }
  432. CopyMemory(psTempWordData, m_sAddInLexicon.psWordData,
  433. m_sAddInLexicon.dwWordNumber * sizeof(SWordData));
  434. delete [] m_sAddInLexicon.psWordData;
  435. m_sAddInLexicon.psWordData = psTempWordData;
  436. m_sAddInLexicon.dwMaxWordNumber += EUDP_GROW_NUMBER;
  437. }
  438. // Insert word
  439. for (i = m_sAddInLexicon.wWordBeginIndex[wStrLen - 1]; i < m_sAddInLexicon.wWordBeginIndex[wStrLen]; ++i) {
  440. if (memcmp(lpcwszWordStr, m_sAddInLexicon.psWordData[i].lpwszWordStr,
  441. wStrLen * sizeof(WCHAR)) < 0) {
  442. break;
  443. }
  444. }
  445. for (j = (WORD)m_sAddInLexicon.dwWordNumber; j > i; --j) {
  446. m_sAddInLexicon.psWordData[j] = m_sAddInLexicon.psWordData[j - 1];
  447. }
  448. m_sAddInLexicon.psWordData[i].lpwszWordStr = new WORD[wStrLen + 1];
  449. CopyMemory(m_sAddInLexicon.psWordData[i].lpwszWordStr, lpcwszWordStr,
  450. (wStrLen + 1) * sizeof(WORD));
  451. m_sAddInLexicon.psWordData[i].wAttrib = wAttrib;
  452. m_sAddInLexicon.psWordData[i].wLen = wStrLen;
  453. ++m_sAddInLexicon.dwWordNumber;
  454. for (i = wStrLen; i <= MAX_CHAR_PER_WORD; ++i) {
  455. ++m_sAddInLexicon.wWordBeginIndex[i];
  456. }
  457. fRet = TRUE;
  458. _exit:
  459. #ifdef _DEBUG
  460. for (i = 1; i <= MAX_CHAR_PER_WORD; ++i) {
  461. for (j = m_sAddInLexicon.wWordBeginIndex[i - 1]; j < m_sAddInLexicon.wWordBeginIndex[i]; ++j) {
  462. if (m_sAddInLexicon.psWordData[j].wLen != i) {
  463. MessageBox(0, TEXT("Error string length"), TEXT("Error"), MB_OK);
  464. }
  465. if (j == m_sAddInLexicon.wWordBeginIndex[i] - 1) {
  466. } else if (memcmp(m_sAddInLexicon.psWordData[j].lpwszWordStr,
  467. m_sAddInLexicon.psWordData[j + 1].lpwszWordStr,
  468. m_sAddInLexicon.psWordData[j].wLen * sizeof(WORD)) >= 0) {
  469. MessageBox(0, TEXT("Error string order"), TEXT("Error"), MB_OK);
  470. } else {
  471. }
  472. }
  473. }
  474. #endif
  475. return fRet;
  476. }
  477. BOOL CCHTLexicon::GetAddInWordInfo(
  478. LPCWSTR lpcwString,
  479. DWORD dwLength,
  480. PWORD pwUnicount,
  481. PWORD pwAttrib,
  482. PBYTE pbTerminalCode)
  483. {
  484. BOOL fRet = FALSE;
  485. INT nIndex;
  486. if (dwLength > MAX_CHAR_PER_WORD) { goto _exit; }
  487. nIndex = GetAddInWordInfoIndex(lpcwString, dwLength);
  488. if (nIndex == -1) { goto _exit; }
  489. if (pwUnicount) {
  490. *pwUnicount = 10000;
  491. }
  492. if (pwAttrib) {
  493. *pwAttrib = m_sAddInLexicon.psWordData[nIndex].wAttrib;
  494. }
  495. if (pbTerminalCode) {
  496. *pbTerminalCode = ' ';
  497. }
  498. fRet = TRUE;
  499. _exit:
  500. return fRet;
  501. }
  502. // return -1 if not find
  503. INT CCHTLexicon::GetAddInWordInfoIndex(
  504. LPCWSTR lpcwString,
  505. DWORD dwLength)
  506. {
  507. INT nRet = -1;
  508. INT nBegin, nEnd, nMid;
  509. INT nCmp;
  510. if (dwLength > MAX_CHAR_PER_WORD) { goto _exit; }
  511. if (m_sAddInLexicon.wWordBeginIndex[dwLength - 1] == m_sAddInLexicon.wWordBeginIndex[dwLength]) {
  512. goto _exit;
  513. }
  514. nBegin = m_sAddInLexicon.wWordBeginIndex[dwLength - 1];
  515. nEnd = m_sAddInLexicon.wWordBeginIndex[dwLength] - 1;
  516. while (nBegin <= nEnd) {
  517. nMid = (nBegin + nEnd) / 2;
  518. nCmp = memcmp(m_sAddInLexicon.psWordData[nMid].lpwszWordStr,
  519. lpcwString, dwLength * sizeof(WCHAR));
  520. if (nCmp > 0) {
  521. nEnd = nMid - 1;
  522. } else if (nCmp < 0) {
  523. nBegin = nMid + 1;
  524. } else {
  525. nRet = nMid;
  526. break;
  527. }
  528. }
  529. _exit:
  530. return nRet;
  531. }
  532. DWORD CCHTLexicon::GetAltWord(
  533. LPCWSTR lpcwString,
  534. DWORD dwLength,
  535. LPWSTR* lppwAltWordBuf)
  536. {
  537. INT nBegin, nEnd, nMid;
  538. INT nCmp;
  539. DWORD dwRet = 0;
  540. LPWSTR lpwAltWordString;
  541. DWORD dwGroupID;
  542. if (dwLength > m_psAltWordHeader->dwMaxCharPerWord) { goto _exit; }
  543. nBegin = 0;
  544. nEnd = m_psAltWordHeader->sAltWordInfo[dwLength - 1].dwWordNumber - 1;
  545. lpwAltWordString = m_sAltWordInfo[dwLength - 1].lpwWordString;
  546. DWORD dwCompByteNum;
  547. dwCompByteNum = sizeof(WCHAR) * dwLength;
  548. while (nBegin <= nEnd) {
  549. nMid = (nBegin + nEnd) / 2;
  550. nCmp = memcmp(&(lpwAltWordString[nMid * dwLength]), lpcwString, dwCompByteNum);
  551. if (nCmp > 0) {
  552. nEnd = nMid - 1;
  553. } else if (nCmp < 0) {
  554. nBegin = nMid + 1;
  555. } else {
  556. dwGroupID = m_sAltWordInfo[dwLength - 1].pdwGroupID[nMid];
  557. // Fill AltWord
  558. *lppwAltWordBuf = new WCHAR[dwLength + 1];
  559. if (*lppwAltWordBuf) {
  560. for (DWORD i = 0; i < m_psAltWordHeader->sAltWordInfo[dwLength - 1].dwWordNumber; ++i) {
  561. if (i != (DWORD)nMid && m_sAltWordInfo[dwLength - 1].pdwGroupID[i] == dwGroupID) {
  562. CopyMemory((LPVOID)*lppwAltWordBuf,
  563. (LPVOID)&(m_sAltWordInfo[dwLength - 1].lpwWordString[i * dwLength]),
  564. sizeof(WCHAR) * dwLength);
  565. (*lppwAltWordBuf)[dwLength] = NULL;
  566. ++dwRet;
  567. goto _exit;
  568. }
  569. }
  570. }
  571. break;
  572. }
  573. }
  574. _exit:
  575. return dwRet;
  576. }