Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

625 lines
22 KiB

  1. #include <windows.h>
  2. #include "lexicon.h"
  3. #include "lexmgr.h"
  4. #include "PropNoun.h"
  5. #include <stdio.h>
  6. #include <imm.h>
  7. #include <stdio.h>
  8. CCHTLexicon::CCHTLexicon()
  9. {
  10. m_psLexiconHeader = NULL;
  11. m_pbLexiconBase = NULL;
  12. #ifdef CHINESE_PROP_NAME
  13. m_pcPropName = NULL;
  14. #endif
  15. m_sAddInLexicon.dwMaxWordNumber = 0;
  16. m_sAddInLexicon.dwWordNumber = 0;
  17. m_sAddInLexicon.psWordData = NULL;
  18. FillMemory(m_sAddInLexicon.wWordBeginIndex,
  19. sizeof(m_sAddInLexicon.wWordBeginIndex), 0);
  20. }
  21. CCHTLexicon::~CCHTLexicon()
  22. {
  23. DWORD i;
  24. // Do not build one char word signature
  25. if (m_psLexiconHeader) {
  26. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  27. if (m_sWordInfo[i].pbFirstCharSignature) {
  28. delete m_sWordInfo[i].pbFirstCharSignature;
  29. }
  30. if (m_sWordInfo[i].pbLastCharSignature) {
  31. delete m_sWordInfo[i].pbLastCharSignature;
  32. }
  33. }
  34. }
  35. #ifdef CHINESE_PROP_NAME
  36. if (m_pcPropName) {
  37. delete m_pcPropName;
  38. }
  39. #endif
  40. // Free add in lexicon
  41. if (m_sAddInLexicon.psWordData) {
  42. for (i = 0; i < m_sAddInLexicon.dwWordNumber; ++i) {
  43. delete m_sAddInLexicon.psWordData[i].lpwszWordStr;
  44. m_sAddInLexicon.psWordData[i].lpwszWordStr = NULL;
  45. }
  46. delete m_sAddInLexicon.psWordData;
  47. m_sAddInLexicon.psWordData = NULL;
  48. }
  49. m_sAddInLexicon.dwMaxWordNumber = 0;
  50. m_sAddInLexicon.dwWordNumber = 0;
  51. }
  52. BOOL CCHTLexicon::InitData(
  53. HINSTANCE hInstance)
  54. {
  55. HRSRC hResInfo;
  56. HGLOBAL hResData;
  57. BOOL fRet = FALSE;
  58. TCHAR tszLexiconResName[MAX_PATH];
  59. DWORD i;
  60. // Init main lexicon
  61. lstrcpy(tszLexiconResName, TEXT("LEXICON"));
  62. if (!(hResInfo = FindResource(hInstance, tszLexiconResName, TEXT("BIN")))) {
  63. } else if (!(hResData = LoadResource(hInstance, hResInfo))) {
  64. } else if (!(m_pbLexiconBase = (LPBYTE)LockResource(hResData))) {
  65. } else {
  66. m_psLexiconHeader = (PSLexFileHeader)m_pbLexiconBase;
  67. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  68. m_sWordInfo[i].lpwWordString = (LPWSTR)(m_pbLexiconBase +
  69. m_psLexiconHeader->sLexInfo[i].dwWordStringOffset);
  70. m_sWordInfo[i].pbAttribute = (LPBYTE)(m_pbLexiconBase +
  71. m_psLexiconHeader->sLexInfo[i].dwWordAttribOffset);
  72. m_sWordInfo[i].pwUnicount = (PWORD)(m_pbLexiconBase +
  73. m_psLexiconHeader->sLexInfo[i].dwWordCountOffset);
  74. m_sWordInfo[i].pbTerminalCode = (LPBYTE)(m_pbLexiconBase +
  75. m_psLexiconHeader->sLexInfo[i].dwTerminalCodeOffset);
  76. }
  77. BuildSignatureData();
  78. fRet = TRUE;
  79. }
  80. // Init alt lexicon
  81. lstrcpy(tszLexiconResName, TEXT("ALTWORD"));
  82. if (!(hResInfo = FindResource(hInstance, tszLexiconResName, TEXT("BIN")))) {
  83. } else if (!(hResData = LoadResource(hInstance, hResInfo))) {
  84. } else if (!(m_pbAltWordBase = (LPBYTE)LockResource(hResData))) {
  85. } else {
  86. m_psAltWordHeader = (PSAltLexFileHeader)m_pbAltWordBase;
  87. for (i = 0 ; i < m_psAltWordHeader->dwMaxCharPerWord; ++i) {
  88. m_sAltWordInfo[i].lpwWordString = (LPWSTR)(m_pbAltWordBase +
  89. m_psAltWordHeader->sAltWordInfo[i].dwWordStringOffset);
  90. m_sAltWordInfo[i].pdwGroupID = (PDWORD)(m_pbAltWordBase +
  91. m_psAltWordHeader->sAltWordInfo[i].dwWordGroupOffset);
  92. }
  93. fRet = TRUE;
  94. }
  95. #ifdef CHINESE_PROP_NAME
  96. m_pcPropName = new CProperNoun(hInstance);
  97. if (m_pcPropName) {
  98. m_pcPropName->InitData();
  99. }
  100. #endif
  101. #ifdef _DEBUG
  102. FILE *fp;
  103. DWORD j;
  104. WCHAR wUnicodeString[MAX_CHAR_PER_WORD + 1];
  105. CHAR cANSIString[MAX_CHAR_PER_WORD * 2 + 1];
  106. WORD wCount;
  107. fp = fopen("DM.dmp", "wt");
  108. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  109. for (j = 0; j < m_psLexiconHeader->sLexInfo[i].dwWordNumber; ++j) {
  110. if (m_sWordInfo[i].pbAttribute[j] & ATTR_DM) {
  111. wCount = m_sWordInfo[i].pwUnicount[j];
  112. if (i == 0) {
  113. wUnicodeString[0] = (WORD)(CHT_UNICODE_BEGIN + j);
  114. } else {
  115. CopyMemory(wUnicodeString, &(m_sWordInfo[i].lpwWordString[j * (i + 1)]),
  116. sizeof(WCHAR) * (i + 1));
  117. }
  118. wUnicodeString[i + 1] = '\0';
  119. WideCharToMultiByte(950, WC_COMPOSITECHECK, wUnicodeString, i + 1 + 1,
  120. cANSIString, sizeof(cANSIString), NULL, NULL);
  121. fprintf(fp, "%s %d\n", cANSIString, wCount);
  122. }
  123. }
  124. }
  125. if (fp) {
  126. fclose(fp);
  127. }
  128. fp = fopen("COMPOUND.dmp", "wt");
  129. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  130. for (j = 0; j < m_psLexiconHeader->sLexInfo[i].dwWordNumber; ++j) {
  131. if (m_sWordInfo[i].pbAttribute[j] & ATTR_COMPOUND) {
  132. wCount = m_sWordInfo[i].pwUnicount[j];
  133. if (i == 0) {
  134. wUnicodeString[0] = (WORD)(CHT_UNICODE_BEGIN + j);
  135. } else {
  136. CopyMemory(wUnicodeString, &(m_sWordInfo[i].lpwWordString[j * (i + 1)]),
  137. sizeof(WCHAR) * (i + 1));
  138. }
  139. wUnicodeString[i + 1] = '\0';
  140. WideCharToMultiByte(950, WC_COMPOSITECHECK, wUnicodeString, i + 1 + 1,
  141. cANSIString, sizeof(cANSIString), NULL, NULL);
  142. fprintf(fp, "%s %d\n", cANSIString, wCount);
  143. }
  144. }
  145. }
  146. if (fp) {
  147. fclose(fp);
  148. }
  149. #endif
  150. // Init EUDP to special word
  151. LoadEUDP();
  152. return fRet;
  153. }
  154. void CCHTLexicon::BuildSignatureData(void)
  155. {
  156. DWORD i, j, dwWordNumber;
  157. WORD wFirstChar, wLastChar;
  158. for (i = 0; i < MAX_CHAR_PER_WORD; ++i) {
  159. m_sWordInfo[i].pbFirstCharSignature = NULL;
  160. m_sWordInfo[i].pbLastCharSignature = NULL;
  161. }
  162. // Do not build one char word signature
  163. for (i = 0 ; i < m_psLexiconHeader->dwMaxCharPerWord; ++i) {
  164. dwWordNumber = m_psLexiconHeader->sLexInfo[i].dwWordNumber;
  165. if (i != 0 && dwWordNumber > WORD_NUM_TO_BUILD_SIGNATURE) {
  166. m_sWordInfo[i].pbFirstCharSignature = new BYTE[(CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8 + 1];
  167. if (NULL == m_sWordInfo[i].pbFirstCharSignature) { continue; }
  168. FillMemory(m_sWordInfo[i].pbFirstCharSignature, (CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8, 0);
  169. for (j = 0; j < dwWordNumber; ++j) {
  170. wFirstChar = m_sWordInfo[i].lpwWordString[(i + 1) * j];
  171. if (wFirstChar >= CHT_UNICODE_BEGIN) {
  172. m_sWordInfo[i].pbFirstCharSignature[(wFirstChar - CHT_UNICODE_BEGIN) / 8] |=
  173. (0x00000001 << ((wFirstChar - CHT_UNICODE_BEGIN) % 8));
  174. }
  175. }
  176. m_sWordInfo[i].pbLastCharSignature = new BYTE[(CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8 + 1];
  177. if (NULL == m_sWordInfo[i].pbLastCharSignature) { continue; }
  178. FillMemory(m_sWordInfo[i].pbLastCharSignature, (CHT_UNICODE_END - CHT_UNICODE_BEGIN + 1) / 8, 0);
  179. for (j = 0; j < dwWordNumber; ++j) {
  180. wLastChar = m_sWordInfo[i].lpwWordString[(i + 1) * (j + 1) - 1];
  181. if (wLastChar >= CHT_UNICODE_BEGIN) {
  182. m_sWordInfo[i].pbLastCharSignature[(wLastChar - CHT_UNICODE_BEGIN) / 8] |=
  183. (0x00000001 << ((wLastChar - CHT_UNICODE_BEGIN) % 8));
  184. }
  185. }
  186. } else {
  187. m_sWordInfo[i].pbFirstCharSignature = NULL;
  188. m_sWordInfo[i].pbLastCharSignature = NULL;
  189. }
  190. }
  191. }
  192. BOOL CCHTLexicon::GetWordInfo(
  193. LPCWSTR lpcwString,
  194. DWORD dwLength,
  195. PWORD pwUnicount,
  196. PWORD pwAttrib,
  197. PBYTE pbTerminalCode)
  198. {
  199. BOOL fRet;
  200. BYTE bMainLexAttrib;
  201. fRet = GetMainLexiconWordInfo(lpcwString, dwLength, pwUnicount,
  202. &bMainLexAttrib, pbTerminalCode);
  203. *pwAttrib = bMainLexAttrib;
  204. if (fRet) { goto _exit; }
  205. #ifdef CHINESE_PROP_NAME
  206. if (dwLength == 3) {
  207. if (m_pcPropName->IsAChineseName(lpcwString, dwLength)) {
  208. *pbTerminalCode = ' ';
  209. *pwAttrib = ATTR_RULE_WORD;
  210. *pwUnicount = 100;
  211. fRet = TRUE;
  212. goto _exit;
  213. }
  214. }
  215. #endif
  216. fRet = GetAddInWordInfo(lpcwString, dwLength, pwUnicount,
  217. pwAttrib, pbTerminalCode);
  218. _exit:
  219. return fRet;
  220. }
  221. BOOL CCHTLexicon::GetMainLexiconWordInfo(
  222. LPCWSTR lpcwString,
  223. DWORD dwLength,
  224. PWORD pwUnicount,
  225. PBYTE pbAttrib,
  226. PBYTE pbTerminalCode)
  227. {
  228. INT nBegin, nEnd, nMid;
  229. INT nCmp;
  230. BOOL fRet = FALSE;
  231. LPWSTR lpwLexString;
  232. DWORD dwFirstCharIndex, dwLastCharIndex;
  233. if (dwLength > m_psLexiconHeader->dwMaxCharPerWord) { goto _exit; }
  234. if (lpcwString[0] < CHT_UNICODE_BEGIN || lpcwString[0] > CHT_UNICODE_END) {
  235. goto _exit;
  236. }
  237. dwFirstCharIndex = lpcwString[0] - CHT_UNICODE_BEGIN;
  238. if (dwLength == 1) {
  239. *pwUnicount = m_sWordInfo[dwLength - 1].pwUnicount[dwFirstCharIndex];
  240. *pbAttrib = m_sWordInfo[dwLength - 1].pbAttribute[dwFirstCharIndex];
  241. *pbTerminalCode = m_sWordInfo[dwLength - 1].pbTerminalCode[dwFirstCharIndex];
  242. fRet = TRUE;
  243. } else {
  244. // Check signature first
  245. if (m_sWordInfo[dwLength - 1].pbFirstCharSignature) {
  246. if (!(m_sWordInfo[dwLength - 1].pbFirstCharSignature[dwFirstCharIndex / 8] &
  247. (0x00000001 << (dwFirstCharIndex % 8)))) { goto _exit; }
  248. }
  249. if (lpcwString[dwLength - 1] >= CHT_UNICODE_BEGIN && lpcwString[dwLength - 1] <= CHT_UNICODE_END) {
  250. if (m_sWordInfo[dwLength - 1].pbLastCharSignature) {
  251. dwLastCharIndex = lpcwString[dwLength - 1] - CHT_UNICODE_BEGIN;
  252. if (!(m_sWordInfo[dwLength - 1].pbLastCharSignature[dwLastCharIndex / 8] &
  253. (0x00000001 << (dwLastCharIndex % 8)))) { goto _exit; }
  254. }
  255. }
  256. nBegin = 0;
  257. nEnd = m_psLexiconHeader->sLexInfo[dwLength - 1].dwWordNumber - 1;
  258. lpwLexString = m_sWordInfo[dwLength - 1].lpwWordString;
  259. DWORD dwCompByteNum = sizeof(WCHAR) * dwLength;
  260. while (nBegin <= nEnd) {
  261. nMid = (nBegin + nEnd) / 2;
  262. nCmp = memcmp(&(lpwLexString[nMid * dwLength]), lpcwString, dwCompByteNum);
  263. if (nCmp > 0) {
  264. nEnd = nMid - 1;
  265. } else if (nCmp < 0) {
  266. nBegin = nMid + 1;
  267. } else {
  268. *pwUnicount = m_sWordInfo[dwLength - 1].pwUnicount[nMid];
  269. *pbAttrib = m_sWordInfo[dwLength - 1].pbAttribute[nMid];
  270. *pbTerminalCode = m_sWordInfo[dwLength - 1].pbTerminalCode[nMid];
  271. fRet = TRUE;
  272. break;
  273. }
  274. }
  275. }
  276. _exit:
  277. if (!fRet) {
  278. *pwUnicount = 0;
  279. *pbAttrib = 0;
  280. *pbTerminalCode = ' ';
  281. }
  282. return fRet;
  283. }
  284. // Load EUDP
  285. int CALLBACK EUDPCountA(
  286. LPCSTR lpcszReading,
  287. DWORD dwStyle,
  288. LPCSTR lpcszString,
  289. LPVOID lpvData)
  290. {
  291. PSAddInLexicon psAddInLexicon;
  292. if (lstrlenA(lpcszString) / sizeof(WCHAR) <= MAX_CHAR_PER_WORD) {
  293. psAddInLexicon = (PSAddInLexicon)lpvData;
  294. ++psAddInLexicon->dwWordNumber;
  295. }
  296. return 1;
  297. }
  298. int CALLBACK EUDPCountW(
  299. LPCWSTR lpcwszReading,
  300. DWORD dwStyle,
  301. LPCWSTR lpcwszString,
  302. LPVOID lpvData)
  303. {
  304. PSAddInLexicon psAddInLexicon;
  305. if (lstrlenW(lpcwszString) <= MAX_CHAR_PER_WORD) {
  306. psAddInLexicon = (PSAddInLexicon)lpvData;
  307. ++psAddInLexicon->dwWordNumber;
  308. }
  309. return 1;
  310. }
  311. int CALLBACK EUDPLoadA(
  312. LPCSTR lpcszReading,
  313. DWORD dwStyle,
  314. LPCSTR lpcszString,
  315. LPVOID lpvData)
  316. {
  317. PSAddInLexicon psAddInLexicon;
  318. WORD wStrLen;
  319. wStrLen = (WORD)lstrlenA(lpcszString);
  320. if (wStrLen / sizeof(WCHAR) <= MAX_CHAR_PER_WORD) {
  321. psAddInLexicon = (PSAddInLexicon)lpvData;
  322. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr = new WORD[wStrLen / sizeof(WCHAR) + 1]; // zero end
  323. if (psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr) {
  324. MultiByteToWideChar(950, MB_PRECOMPOSED, lpcszString, wStrLen + 1, psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr, wStrLen / sizeof(WCHAR) + 1);
  325. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wAttrib = ATTR_EUDP_WORD;
  326. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wLen = wStrLen / sizeof(WCHAR);
  327. ++psAddInLexicon->dwWordNumber;
  328. }
  329. }
  330. return 1;
  331. }
  332. int CALLBACK EUDPLoadW(
  333. LPCWSTR lpcwszReading,
  334. DWORD dwStyle,
  335. LPCWSTR lpcwszString,
  336. LPVOID lpvData)
  337. {
  338. PSAddInLexicon psAddInLexicon;
  339. WORD wStrLen;
  340. wStrLen = (WORD)lstrlenW(lpcwszString);
  341. if (wStrLen <= MAX_CHAR_PER_WORD) {
  342. psAddInLexicon = (PSAddInLexicon)lpvData;
  343. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr = new WORD[wStrLen + 1];
  344. if (psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr) {
  345. CopyMemory(psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].lpwszWordStr, lpcwszString, (wStrLen + 1) * sizeof(WCHAR));
  346. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wAttrib = ATTR_EUDP_WORD;
  347. psAddInLexicon->psWordData[psAddInLexicon->dwWordNumber].wLen = wStrLen;
  348. ++psAddInLexicon->dwWordNumber;
  349. }
  350. }
  351. return 1;
  352. }
  353. int __cdecl CompSWordData(
  354. const void *arg1,
  355. const void *arg2)
  356. {
  357. PSWordData psWordData1, psWordData2;
  358. psWordData1 = (PSWordData)arg1;
  359. psWordData2 = (PSWordData)arg2;
  360. if (psWordData1->wLen < psWordData2->wLen) {
  361. return -1;
  362. } else if (psWordData1->wLen > psWordData2->wLen) {
  363. return 1;
  364. } else {
  365. return memcmp(psWordData1->lpwszWordStr,
  366. psWordData2->lpwszWordStr, psWordData1->wLen * sizeof(WCHAR));
  367. }
  368. }
  369. void CCHTLexicon::LoadEUDP(void)
  370. {
  371. DWORD i;
  372. m_sAddInLexicon.dwWordNumber = 0;
  373. m_sAddInLexicon.dwMaxWordNumber = 0;
  374. m_sAddInLexicon.psWordData = NULL;
  375. OSVERSIONINFOA OSVerInfo;
  376. OSVerInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
  377. GetVersionExA(&OSVerInfo);
  378. if (OSVerInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  379. ImmEnumRegisterWordW(HKL((ULONG_PTR) 0xE0080404), EUDPCountW, NULL,
  380. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  381. } else {
  382. ImmEnumRegisterWordA(HKL((ULONG_PTR) 0xE0080404), EUDPCountA, NULL,
  383. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  384. }
  385. if (m_sAddInLexicon.dwWordNumber) {
  386. m_sAddInLexicon.dwMaxWordNumber = m_sAddInLexicon.dwWordNumber + EUDP_GROW_NUMBER;
  387. m_sAddInLexicon.psWordData = new SWordData[m_sAddInLexicon.dwMaxWordNumber];
  388. m_sAddInLexicon.dwWordNumber = 0;
  389. if (OSVerInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  390. ImmEnumRegisterWordW(HKL((ULONG_PTR) 0xE0080404), EUDPLoadW, NULL,
  391. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  392. } else {
  393. ImmEnumRegisterWordA(HKL((ULONG_PTR) 0xE0080404), EUDPLoadA, NULL,
  394. IME_REGWORD_STYLE_USER_PHRASE, NULL, &m_sAddInLexicon);
  395. }
  396. qsort(m_sAddInLexicon.psWordData, m_sAddInLexicon.dwWordNumber,
  397. sizeof(SWordData), CompSWordData);
  398. for (i = 0; i < m_sAddInLexicon.dwWordNumber; ++i) {
  399. ++m_sAddInLexicon.wWordBeginIndex[m_sAddInLexicon.psWordData[i].wLen - 1];
  400. }
  401. WORD wCount = m_sAddInLexicon.wWordBeginIndex[0];
  402. for (i = 1; i <= MAX_CHAR_PER_WORD; ++i) {
  403. WORD wTemp = m_sAddInLexicon.wWordBeginIndex[i];
  404. m_sAddInLexicon.wWordBeginIndex[i] = wCount;
  405. wCount += wTemp;
  406. }
  407. } else {
  408. m_sAddInLexicon.dwMaxWordNumber = 0;
  409. }
  410. }
  411. // Add AP word
  412. BOOL CCHTLexicon::AddInLexiconInsert(
  413. LPCWSTR lpcwszWordStr,
  414. WORD wAttrib)
  415. {
  416. BOOL fRet = FALSE;
  417. WORD wStrLen, i, j;
  418. INT nIndex;
  419. wStrLen = (WORD)lstrlenW(lpcwszWordStr);
  420. if (wStrLen > MAX_CHAR_PER_WORD) { goto _exit; }
  421. // if exit, just change it's attrib;
  422. // Be carefully, EUDP > Error word
  423. if ((nIndex = GetAddInWordInfoIndex(lpcwszWordStr, wStrLen)) != -1) {
  424. if (m_sAddInLexicon.psWordData[nIndex].wAttrib == ATTR_EUDP_WORD) {
  425. } else {
  426. m_sAddInLexicon.psWordData[nIndex].wAttrib = wAttrib;
  427. }
  428. goto _exit;
  429. }
  430. // Enlarge space
  431. if (m_sAddInLexicon.dwMaxWordNumber == m_sAddInLexicon.dwWordNumber) {
  432. PSWordData psTempWordData;
  433. psTempWordData = new SWordData [m_sAddInLexicon.dwMaxWordNumber + EUDP_GROW_NUMBER];
  434. if (!psTempWordData) { goto _exit; }
  435. CopyMemory(psTempWordData, m_sAddInLexicon.psWordData,
  436. m_sAddInLexicon.dwWordNumber * sizeof(SWordData));
  437. delete [] m_sAddInLexicon.psWordData;
  438. m_sAddInLexicon.psWordData = psTempWordData;
  439. m_sAddInLexicon.dwMaxWordNumber += EUDP_GROW_NUMBER;
  440. }
  441. // Insert word
  442. for (i = m_sAddInLexicon.wWordBeginIndex[wStrLen - 1]; i < m_sAddInLexicon.wWordBeginIndex[wStrLen]; ++i) {
  443. if (memcmp(lpcwszWordStr, m_sAddInLexicon.psWordData[i].lpwszWordStr,
  444. wStrLen * sizeof(WCHAR)) < 0) {
  445. break;
  446. }
  447. }
  448. for (j = (WORD)m_sAddInLexicon.dwWordNumber; j > i; --j) {
  449. m_sAddInLexicon.psWordData[j] = m_sAddInLexicon.psWordData[j - 1];
  450. }
  451. m_sAddInLexicon.psWordData[i].lpwszWordStr = new WORD[wStrLen + 1];
  452. CopyMemory(m_sAddInLexicon.psWordData[i].lpwszWordStr, lpcwszWordStr,
  453. (wStrLen + 1) * sizeof(WORD));
  454. m_sAddInLexicon.psWordData[i].wAttrib = wAttrib;
  455. m_sAddInLexicon.psWordData[i].wLen = wStrLen;
  456. ++m_sAddInLexicon.dwWordNumber;
  457. for (i = wStrLen; i <= MAX_CHAR_PER_WORD; ++i) {
  458. ++m_sAddInLexicon.wWordBeginIndex[i];
  459. }
  460. fRet = TRUE;
  461. _exit:
  462. #ifdef _DEBUG
  463. for (i = 1; i <= MAX_CHAR_PER_WORD; ++i) {
  464. for (j = m_sAddInLexicon.wWordBeginIndex[i - 1]; j < m_sAddInLexicon.wWordBeginIndex[i]; ++j) {
  465. if (m_sAddInLexicon.psWordData[j].wLen != i) {
  466. MessageBox(0, TEXT("Error string length"), TEXT("Error"), MB_OK);
  467. }
  468. if (j == m_sAddInLexicon.wWordBeginIndex[i] - 1) {
  469. } else if (memcmp(m_sAddInLexicon.psWordData[j].lpwszWordStr,
  470. m_sAddInLexicon.psWordData[j + 1].lpwszWordStr,
  471. m_sAddInLexicon.psWordData[j].wLen * sizeof(WORD)) >= 0) {
  472. MessageBox(0, TEXT("Error string order"), TEXT("Error"), MB_OK);
  473. } else {
  474. }
  475. }
  476. }
  477. #endif
  478. return fRet;
  479. }
  480. BOOL CCHTLexicon::GetAddInWordInfo(
  481. LPCWSTR lpcwString,
  482. DWORD dwLength,
  483. PWORD pwUnicount,
  484. PWORD pwAttrib,
  485. PBYTE pbTerminalCode)
  486. {
  487. BOOL fRet = FALSE;
  488. INT nIndex;
  489. if (dwLength > MAX_CHAR_PER_WORD) { goto _exit; }
  490. nIndex = GetAddInWordInfoIndex(lpcwString, dwLength);
  491. if (nIndex == -1) { goto _exit; }
  492. if (pwUnicount) {
  493. *pwUnicount = 10000;
  494. }
  495. if (pwAttrib) {
  496. *pwAttrib = m_sAddInLexicon.psWordData[nIndex].wAttrib;
  497. }
  498. if (pbTerminalCode) {
  499. *pbTerminalCode = ' ';
  500. }
  501. fRet = TRUE;
  502. _exit:
  503. return fRet;
  504. }
  505. // return -1 if not find
  506. INT CCHTLexicon::GetAddInWordInfoIndex(
  507. LPCWSTR lpcwString,
  508. DWORD dwLength)
  509. {
  510. INT nRet = -1;
  511. INT nBegin, nEnd, nMid;
  512. INT nCmp;
  513. if (dwLength > MAX_CHAR_PER_WORD) { goto _exit; }
  514. if (m_sAddInLexicon.wWordBeginIndex[dwLength - 1] == m_sAddInLexicon.wWordBeginIndex[dwLength]) {
  515. goto _exit;
  516. }
  517. nBegin = m_sAddInLexicon.wWordBeginIndex[dwLength - 1];
  518. nEnd = m_sAddInLexicon.wWordBeginIndex[dwLength] - 1;
  519. while (nBegin <= nEnd) {
  520. nMid = (nBegin + nEnd) / 2;
  521. nCmp = memcmp(m_sAddInLexicon.psWordData[nMid].lpwszWordStr,
  522. lpcwString, dwLength * sizeof(WCHAR));
  523. if (nCmp > 0) {
  524. nEnd = nMid - 1;
  525. } else if (nCmp < 0) {
  526. nBegin = nMid + 1;
  527. } else {
  528. nRet = nMid;
  529. break;
  530. }
  531. }
  532. _exit:
  533. return nRet;
  534. }
  535. DWORD CCHTLexicon::GetAltWord(
  536. LPCWSTR lpcwString,
  537. DWORD dwLength,
  538. LPWSTR* lppwAltWordBuf)
  539. {
  540. INT nBegin, nEnd, nMid;
  541. INT nCmp;
  542. DWORD dwRet = 0;
  543. LPWSTR lpwAltWordString;
  544. DWORD dwGroupID;
  545. if (dwLength > m_psAltWordHeader->dwMaxCharPerWord) { goto _exit; }
  546. nBegin = 0;
  547. nEnd = m_psAltWordHeader->sAltWordInfo[dwLength - 1].dwWordNumber - 1;
  548. lpwAltWordString = m_sAltWordInfo[dwLength - 1].lpwWordString;
  549. DWORD dwCompByteNum;
  550. dwCompByteNum = sizeof(WCHAR) * dwLength;
  551. while (nBegin <= nEnd) {
  552. nMid = (nBegin + nEnd) / 2;
  553. nCmp = memcmp(&(lpwAltWordString[nMid * dwLength]), lpcwString, dwCompByteNum);
  554. if (nCmp > 0) {
  555. nEnd = nMid - 1;
  556. } else if (nCmp < 0) {
  557. nBegin = nMid + 1;
  558. } else {
  559. dwGroupID = m_sAltWordInfo[dwLength - 1].pdwGroupID[nMid];
  560. // Fill AltWord
  561. *lppwAltWordBuf = new WCHAR[dwLength + 1];
  562. if (*lppwAltWordBuf) {
  563. for (DWORD i = 0; i < m_psAltWordHeader->sAltWordInfo[dwLength - 1].dwWordNumber; ++i) {
  564. if (i != (DWORD)nMid && m_sAltWordInfo[dwLength - 1].pdwGroupID[i] == dwGroupID) {
  565. CopyMemory((LPVOID)*lppwAltWordBuf,
  566. (LPVOID)&(m_sAltWordInfo[dwLength - 1].lpwWordString[i * dwLength]),
  567. sizeof(WCHAR) * dwLength);
  568. (*lppwAltWordBuf)[dwLength] = NULL;
  569. ++dwRet;
  570. goto _exit;
  571. }
  572. }
  573. }
  574. break;
  575. }
  576. }
  577. _exit:
  578. return dwRet;
  579. }