Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

803 lines
26 KiB

  1. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  2. //
  3. // Copyright (c) 2001 Microsoft Corporation. All rights reserved.
  4. //
  5. // Module:
  6. // volcano/dll/lattice-lm.cpp
  7. //
  8. // Description:
  9. // Interface between the recognizer and IFELang3. Unlike the rest of
  10. // the recognizer this is written in C++ because IFELang3 uses a COM
  11. // interface.
  12. //
  13. // Author:
  14. // hrowley
  15. //
  16. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  17. // Conditional define for use of IFELang3, so it can be turned off for WinCE
  18. #ifdef USE_IFELANG3
  19. #include <stdlib.h>
  20. #include <limits.h>
  21. // Make sure the GUIDs defined in imlang.h actually get instantiated
  22. #define INITGUID
  23. // Uncomment to dump out the lattice fed to the language model
  24. //#define DUMP_LM_LATTICE
  25. // Uncomment to append all the lattices into one file rather than overwriting.
  26. //#define DUMP_LM_LATTICE_APPEND
  27. #define LM_LATTICE_FILENAME L"c:/temp/cht-lm-log.txt"
  28. // Uncomment this to get the dumped lattice in the form expected by the CHS test program
  29. //#define LM_LATTICE_UNICODE
  30. //#define LM_LATTICE_CHS
  31. #define LM_LATTICE_CHT
  32. #if defined(HWX_TUNE) || defined(DUMP_LM_LATTICE)
  33. #include <stdio.h>
  34. #endif
  35. #include "volcanop.h"
  36. // Japanese and IFELang3 generic stuff
  37. #include "imlang.h"
  38. // Simplified and traditional Chinese
  39. #include "fel3user.h"
  40. #ifdef USE_IFELANG3_BIGRAMS
  41. #include "lattice-bigram.h"
  42. #endif
  43. // A global pointer to the IIMLanguage interface object
  44. static IIMLanguage *g_pIFELang3=NULL;
  45. // Each language has its own set of GUIDs used as command codes, so that the IIMLanguage interface can direct requests to
  46. // the appropriate language model.
  47. static int SetLanguageModelCommands(IMLANGUAGE_LM aLMITEM[3], IMLANGUAGE_LM_PARAM *sTuneParam, SImeLMParam *sHWTip1Param)
  48. {
  49. int nLMITEM = 0;
  50. // Japanese
  51. if (wcsicmp(g_szRecognizerLanguage,L"JPN")==0) {
  52. /* aLMITEM[nLMITEM].guid = CLSID_MakeLatticeForChar;
  53. aLMITEM[nLMITEM].pParam = NULL;
  54. nLMITEM++;
  55. aLMITEM[nLMITEM].guid = CLSID_AttachNGram;
  56. aLMITEM[nLMITEM].pParam = NULL;
  57. nLMITEM++;
  58. aLMITEM[nLMITEM].guid = CLSID_SearchBestPath;
  59. aLMITEM[nLMITEM].pParam = NULL;
  60. nLMITEM++; */
  61. aLMITEM[nLMITEM].guid = CLSID_JPN_IMELM_BBO_OS;
  62. aLMITEM[nLMITEM].pParam = NULL;
  63. nLMITEM++;
  64. }
  65. // For now, disable the use of the Chinese and Korean language models, since
  66. // at least the Chinese language models currently are hurting accuracy.
  67. // Simplified Chinese
  68. if (wcsicmp(g_szRecognizerLanguage,L"CHS")==0) {
  69. sHWTip1Param->dwLicenseId = CHINESE_IMELM_LICENSEID;
  70. sHWTip1Param->flWeight = (float)(1.0);
  71. sTuneParam->guid = GUID_CHINESE_IMELM_PARAM;
  72. sTuneParam->dwSize = sizeof(SImeLMParam);
  73. sTuneParam->pbData = (BYTE*)sHWTip1Param;
  74. aLMITEM[nLMITEM].guid = CLSID_CHS_IMELM_BBO_OS;
  75. aLMITEM[nLMITEM].pParam = sTuneParam;
  76. nLMITEM++;
  77. }
  78. #if 0
  79. // Traditional Chinese
  80. if (wcsicmp(g_szRecognizerLanguage,L"CHT")==0) {
  81. sHWTip1Param->dwLicenseId = CHINESE_IMELM_LICENSEID;
  82. sHWTip1Param->flWeight = (float)(1.0);
  83. sTuneParam->guid = GUID_CHINESE_IMELM_PARAM;
  84. sTuneParam->dwSize = sizeof(SImeLMParam);
  85. sTuneParam->pbData = (BYTE*)sHWTip1Param;
  86. aLMITEM[nLMITEM].guid = CLSID_CHT_IMELM_BBO_OS;
  87. aLMITEM[nLMITEM].pParam = sTuneParam;
  88. nLMITEM++;
  89. }
  90. // Korean
  91. if (wcsicmp(g_szRecognizerLanguage,L"KOR")==0) {
  92. aLMITEM[nLMITEM].guid = CLSID_KOR_IMELM_BBO_OS;
  93. aLMITEM[nLMITEM].pParam = NULL;
  94. nLMITEM++;
  95. }
  96. #endif
  97. // Return the number of command codes, or 0 if there was no match for the current language
  98. return nLMITEM;
  99. }
  100. // This function tests the IIMLanguage interface to see if the appropriate language model is
  101. // installed.
  102. BOOL TryDummyLattice()
  103. {
  104. IMLANGUAGE_LM_PARAM sTuneParam;
  105. SImeLMParam sHWTip1Param;
  106. IMLANGUAGE_LM aLMITEM[3];
  107. int nLMITEM = 0;
  108. HRESULT rc;
  109. // Set return code to look like an error
  110. rc = E_FAIL;
  111. // Set up the language specific commands
  112. nLMITEM = SetLanguageModelCommands(aLMITEM, &sTuneParam, &sHWTip1Param);
  113. if (nLMITEM>0) {
  114. // If there are any commands for that language, then apply them to the lattice.
  115. rc = g_pIFELang3->GetLatticeMorphResult(nLMITEM, aLMITEM, 0, NULL, 0, NULL);
  116. }
  117. return (SUCCEEDED(rc));
  118. }
  119. // Load up IIMLanguage and check if it has a language model for this recognizer's language.
  120. // If successful, returns TRUE, otherwise returns FALSE.
  121. BOOL LatticeConfigIFELang3()
  122. {
  123. if (g_pIFELang3 != NULL)
  124. {
  125. return FALSE;
  126. }
  127. HRESULT res = CoCreateInstance(CLSID_IMLanguage_OS, NULL, CLSCTX_INPROC_SERVER, IID_IMLanguage, (void**)&g_pIFELang3);
  128. // TPDBG_DMSG2("%08X: CoCreate(CLSID_IMLanguage_OS) -> %08X\n", GetCurrentThreadId(), res);
  129. if (SUCCEEDED(res)) {
  130. if (TryDummyLattice()) {
  131. // TPDBG_DMSG1("%08X: TryDummyLattice() -> TRUE\n", GetCurrentThreadId());
  132. return TRUE;
  133. }
  134. }
  135. // Clean up if things didn't work.
  136. // TPDBG_DMSG1("%08X: TryDummyLattice() -> FALSE\n", GetCurrentThreadId());
  137. LatticeUnconfigIFELang3();
  138. return FALSE;
  139. }
  140. // Check whether IIMLanguage is loaded.
  141. BOOL LatticeIFELang3Available()
  142. {
  143. return (g_pIFELang3!=NULL);
  144. }
  145. // Unload IIMLanguage if it is loaded.
  146. BOOL LatticeUnconfigIFELang3()
  147. {
  148. if (g_pIFELang3!=NULL) {
  149. // TPDBG_DMSG1("%08X: g_pIFELang3->Release()\n", GetCurrentThreadId());
  150. g_pIFELang3->Release();
  151. g_pIFELang3=NULL;
  152. }
  153. return TRUE;
  154. }
  155. // qsort: compare the start time of element
  156. int __cdecl CompareElemTime(const void *ps1, const void *ps2)
  157. {
  158. return ((IMLANGUAGE_ELEMENT*)ps1)->dwFrameStart -
  159. ((IMLANGUAGE_ELEMENT*)ps2)->dwFrameStart;
  160. }
  161. BOOL ProbIsBad(LATTICE *lat, float flProb)
  162. {
  163. if (lat->fUseGuide)
  164. {
  165. return (flProb < g_vtuneInfo.pTune->flStringHwxThreshold);
  166. }
  167. else
  168. {
  169. return (flProb < g_vtuneInfo.pTune->flFreeHwxThreshold);
  170. }
  171. }
  172. // Figure out which strokes are valid end of character in paths that span the
  173. // whole lattice.
  174. static BOOL CheckIfValid(LATTICE *lat, int iStroke, BYTE *pValidEnd)
  175. {
  176. LATTICE_ALT_LIST *pAlts;
  177. int ii;
  178. BOOL fValidPath;
  179. // Have we already checked this position?
  180. if (pValidEnd[iStroke] == LCF_INVALID) {
  181. return FALSE;
  182. } else if (pValidEnd[iStroke] == LCF_VALID) {
  183. return TRUE;
  184. }
  185. // Assume no valid path, until proved otherwise.
  186. fValidPath = FALSE;
  187. // Process each element ending at this position in lattice.
  188. pAlts = lat->pAltList + iStroke;
  189. for (ii = 0; ii < pAlts->nUsed; ++ii) {
  190. LATTICE_ELEMENT *pElem;
  191. int prevEnd;
  192. pElem = pAlts->alts + ii;
  193. prevEnd = iStroke - pElem->nStrokes;
  194. ASSERT(prevEnd >= -1);
  195. if (ProbIsBad(lat, pElem->logProb)) {
  196. // Pretend that the really bad scores don't exist!
  197. // This test MUST match code building up lattice to use!
  198. } else if (prevEnd < 0) {
  199. // Reached start.
  200. fValidPath = TRUE;
  201. } else if (CheckIfValid(lat, prevEnd, pValidEnd)) {
  202. // Have path to start.
  203. fValidPath = TRUE;
  204. }
  205. }
  206. // Did one (or more) paths reach the start?
  207. if (fValidPath) {
  208. pValidEnd[iStroke] = LCF_VALID;
  209. return TRUE;
  210. } else {
  211. pValidEnd[iStroke] = LCF_INVALID;
  212. return FALSE;
  213. }
  214. }
  215. // Apply language model to produce a better current path
  216. void ApplyLanguageModel(LATTICE *lat, wchar_t *wszCorrectAnswer)
  217. {
  218. DWORD dwNeutralSize;
  219. IMLANGUAGE_ELEMENT *pLattice;
  220. IMLANGUAGE_INFO *pDataList;
  221. #ifdef USE_IFELANG3_BIGRAMS
  222. DWORD dwBigramSize;
  223. LATTICE_BIGRAM_INFO_LIST *pBigramInfo;
  224. #endif
  225. int cElem, cData;
  226. int cLattice;
  227. DWORD cElemOut;
  228. IMLANGUAGE_ELEMENT *pElemOut;
  229. IMLANGUAGE_LM_PARAM sTuneParam;
  230. SImeLMParam sHWTip1Param;
  231. IMLANGUAGE_LM aLMITEM[3];
  232. IMLANGUAGE_INFO_NEUTRAL1 *pNeutral;
  233. BYTE *pValidEnd;
  234. int size;
  235. int nLMITEM;
  236. DWORD ii, jj;
  237. HRESULT rc;
  238. #ifdef DUMP_LM_LATTICE
  239. FILE *f;
  240. #endif
  241. int iStroke, iAlt;
  242. int cPreContext = 0, cPostContext = 0;
  243. // DebugBreak();
  244. // return;
  245. // fprintf(stderr,"ApplyLanguageModel() fUseIFELang3=%d\n", lat->fUseIFELang3);
  246. ASSERT(lat!=NULL);
  247. if (!LatticeIFELang3Available()) return;
  248. if (!lat->fUseLM || !lat->fUseIFELang3) return;
  249. if (lat->nStrokes==0) return;
  250. // Check if there are any alphabetic characters in the best path. If so, skip IFELang3
  251. for (iStroke = 0; iStroke < lat->nStrokes; iStroke++)
  252. {
  253. for (iAlt = 0; iAlt < lat->pAltList[iStroke].nUsed; iAlt++)
  254. {
  255. if (lat->pAltList[iStroke].alts[iAlt].fCurrentPath)
  256. {
  257. wchar_t dch = lat->pAltList[iStroke].alts[iAlt].wChar;
  258. if (dch != SYM_UNKNOWN)
  259. {
  260. wchar_t wch = LocRunDense2Unicode(&g_locRunInfo, dch);
  261. if ((wch >= L'a' && wch <= L'z') || (wch >= L'A' && wch <= L'Z'))
  262. {
  263. return;
  264. }
  265. }
  266. }
  267. }
  268. }
  269. // Check if we have a character of context that needs to be included in the lattice
  270. if (lat->wszBefore != NULL)
  271. cPreContext = wcslen(lat->wszBefore);
  272. if (lat->wszAfter != NULL)
  273. cPostContext = wcslen(lat->wszAfter);
  274. // How big does each lattice element need to be? The structure includes space for
  275. // one character, and it needs to be followed by 2 NUL characters.
  276. dwNeutralSize = sizeof(IMLANGUAGE_INFO_NEUTRAL1) + sizeof(wchar_t)*2;
  277. // Figure out valid character transition points by stroke. We don't wan't to include
  278. // pieces of lattice that don't actually connect up for the full run. This uses
  279. // a recursive algorithm to find valid paths. Since it marks the paths as it
  280. // finds them, it can quickly test and not redo work.
  281. size = sizeof(BYTE) * lat->nStrokes;
  282. pValidEnd = (BYTE *)ExternAlloc(size);
  283. // If we failed to allocate memory, just return
  284. if (pValidEnd==NULL) return;
  285. memset(pValidEnd, 0, size);
  286. if (!CheckIfValid(lat, lat->nStrokes - 1, pValidEnd)) {
  287. // No valid paths found?!?!
  288. // This can happen... so clean up and return.
  289. // We'll just get the best path selected by the HWX engine.
  290. goto allocated_valid;
  291. }
  292. // First allocate space to build the lattice up in.
  293. cElem = lat->nStrokes * MaxAltsPerStroke + (cPreContext + cPostContext);
  294. cData = cElem;
  295. #ifdef USE_IFELANG3_BIGRAMS
  296. cData += lat->nStrokes*MaxAltsPerStroke;
  297. #endif
  298. pDataList = (IMLANGUAGE_INFO*)ExternAlloc(sizeof(IMLANGUAGE_INFO)*cData);
  299. if (pDataList==NULL) goto allocated_valid;
  300. pLattice = (IMLANGUAGE_ELEMENT*)ExternAlloc(sizeof(IMLANGUAGE_ELEMENT)*cElem);
  301. if (pLattice==NULL) goto allocated_data;
  302. #ifdef DUMP_LM_LATTICE
  303. #ifdef DUMP_LM_LATTICE_APPEND
  304. f=_wfopen(LM_LATTICE_FILENAME,L"ab");
  305. #else
  306. f=_wfopen(LM_LATTICE_FILENAME,L"wb");
  307. #endif
  308. fwprintf(f, L"<S>\r\n\r\n");
  309. #endif
  310. #ifdef HWX_TUNE
  311. if (g_pTuneFile != NULL)
  312. {
  313. for (int i = 0; i < (int) wcslen(wszCorrectAnswer); i++)
  314. {
  315. if (i > 0)
  316. {
  317. fprintf(g_pTuneFile, " ");
  318. }
  319. fprintf(g_pTuneFile, "%04X", wszCorrectAnswer[i]);
  320. }
  321. fprintf(g_pTuneFile, "\n");
  322. }
  323. #endif
  324. cLattice = 0;
  325. cData = 0;
  326. if (lat->wszBefore != NULL)
  327. {
  328. int iSrc;
  329. int iFrame = 1;
  330. for (iSrc = wcslen(lat->wszBefore) - 1; iSrc >= 0; iSrc--)
  331. {
  332. // If we have a context character, then create a lattice element for it.
  333. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1*)ExternAlloc(dwNeutralSize);
  334. if (pNeutral == NULL) goto allocated_elements;
  335. pNeutral->dwUnigram = 0;
  336. pNeutral->fHypothesis = FALSE;
  337. pNeutral->wsz[0] = LocRunDense2Unicode(&g_locRunInfo, lat->wszBefore[iSrc]);
  338. pNeutral->wsz[1] = 0;
  339. pNeutral->wsz[2] = 0;
  340. pDataList[cData].guid = GUID_IMLANGUAGE_INFO_NEUTRAL1;
  341. pDataList[cData].dwSize = dwNeutralSize;
  342. pDataList[cData].pbData = (BYTE*)pNeutral;
  343. pLattice[cLattice].dwFrameStart = iFrame++;
  344. pLattice[cLattice].dwFrameLen = 1;
  345. pLattice[cLattice].dwTotalInfo = 1;
  346. pLattice[cLattice].pInfo = pDataList + cLattice;
  347. #ifdef HWX_TUNE
  348. if (g_pTuneFile != NULL)
  349. {
  350. fprintf(g_pTuneFile, "%04X %d %d %f\n",
  351. pNeutral->wsz[0],
  352. pLattice[cLattice].dwFrameStart,
  353. pLattice[cLattice].dwFrameLen,
  354. 0.0);
  355. }
  356. #endif
  357. #ifdef DUMP_LM_LATTICE
  358. if (f!=NULL) {
  359. char sz[256];
  360. ZeroMemory( sz, 256 );
  361. #if defined(LM_LATTICE_CHS)
  362. WideCharToMultiByte( 936, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  363. #elif defined(LM_LATTICE_CHT)
  364. WideCharToMultiByte( 950, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  365. #elif defined(LM_LATTICE_JPN)
  366. WideCharToMultiByte( 932, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  367. #endif
  368. #if defined(LM_LATTICE_CHS) || defined(LM_LATTICE_CHT)
  369. fwprintf(f, L"%s, %d, %d, %d\r\n",
  370. pNeutral->wsz, pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram);
  371. #elif defined(LM_LATTICE_UNICODE)
  372. fwprintf(f, L"%3d %2d %10d %s U+%04X\r\n",
  373. pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram,
  374. pNeutral->wsz, pNeutral->wsz[0] );
  375. #endif
  376. }
  377. #endif
  378. cData++;
  379. cLattice++;
  380. }
  381. }
  382. // Now process each position.
  383. for (iStroke=0; iStroke<lat->nStrokes; iStroke++) {
  384. // Do we actually want any paths that end at this location?
  385. if (pValidEnd[iStroke] != LCF_VALID) {
  386. continue;
  387. }
  388. // int max=5;
  389. // if (iStroke==0) max=2;
  390. for (iAlt=0; iAlt<lat->pAltList[iStroke].nUsed; iAlt++) if (1 /*iAlt<max || lat->pAltList[iStroke].alts[iAlt].fCurrentPath*/) {
  391. wchar_t unicode;
  392. float flScore;
  393. // Prune really bad scores. This MUST match test in path
  394. // checking code!
  395. if (ProbIsBad(lat, lat->pAltList[iStroke].alts[iAlt].logProb)) {
  396. continue;
  397. }
  398. // Fill in lattice element
  399. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1*)ExternAlloc(dwNeutralSize);
  400. if (pNeutral==NULL) goto allocated_elements;
  401. flScore = -lat->pAltList[iStroke].alts[iAlt].logProb;
  402. if (lat->fUseGuide)
  403. {
  404. flScore *= g_vtuneInfo.pTune->flStringHwxWeight;
  405. }
  406. else
  407. {
  408. flScore *= g_vtuneInfo.pTune->flFreeHwxWeight * lat->pAltList[iStroke].alts[iAlt].nStrokes;
  409. }
  410. if (flScore < 0) flScore = (float)0;
  411. if (flScore > INT_MAX) flScore = (float)INT_MAX;
  412. pNeutral->dwUnigram = (DWORD)(flScore);
  413. if (lat->pAltList[iStroke].alts[iAlt].wChar==SYM_UNKNOWN)
  414. {
  415. unicode=L' ';
  416. }
  417. else
  418. {
  419. unicode = LocRunDense2Unicode(
  420. &g_locRunInfo, lat->pAltList[iStroke].alts[iAlt].wChar
  421. );
  422. }
  423. pNeutral->fHypothesis = FALSE;
  424. pNeutral->wsz[0] = unicode;
  425. pNeutral->wsz[1] = 0;
  426. pNeutral->wsz[2] = 0;
  427. // Check for EUDC codes, which we shouldn't be generating (for debugging)
  428. // if (pNeutral->wsz[0]>=0xE000 && pNeutral->wsz[0]<0xF900) {
  429. // fprintf(stderr,"EUDC code in lattice (stroke %d alt %d): U+%04X\n",iStroke,iAlt,pNeutral->wsz[0]);
  430. // }
  431. pLattice[cLattice].dwFrameStart = (iStroke - lat->pAltList[iStroke].alts[iAlt].nStrokes) + 2 + cPreContext;
  432. pLattice[cLattice].dwFrameLen=lat->pAltList[iStroke].alts[iAlt].nStrokes;
  433. pLattice[cLattice].dwTotalInfo = 1;
  434. pLattice[cLattice].pInfo = pDataList + cData;
  435. pDataList[cData].guid = GUID_IMLANGUAGE_INFO_NEUTRAL1;
  436. pDataList[cData].dwSize = dwNeutralSize;
  437. pDataList[cData].pbData = (BYTE*)pNeutral;
  438. cData++;
  439. #ifdef HWX_TUNE
  440. if (g_pTuneFile != NULL)
  441. {
  442. fprintf(g_pTuneFile, "%04X %d %d %f\n",
  443. pNeutral->wsz[0],
  444. pLattice[cLattice].dwFrameStart,
  445. pLattice[cLattice].dwFrameLen,
  446. lat->pAltList[iStroke].alts[iAlt].logProb);
  447. }
  448. #endif
  449. #ifdef USE_IFELANG3_BIGRAMS
  450. lat->pAltList[iStroke].alts[iAlt].indexIFELang3=cLattice;
  451. if (lat->pAltList[iStroke].alts[iAlt].nBigrams>0) {
  452. dwBigramSize=sizeof(LATTICE_BIGRAM_INFO_LIST)+
  453. sizeof(LATTICE_BIGRAM_INFO)*(lat->pAltList[iStroke].alts[iAlt].nBigrams-1);
  454. pBigramInfo = (LATTICE_BIGRAM_INFO_LIST*)ExternAlloc(dwBigramSize);
  455. if (pBigramInfo==NULL) goto allocated_elements;
  456. int iPrevStart=iStroke-lat->pAltList[iStroke].alts[iAlt].nStrokes;
  457. for (int i=0; i<lat->pAltList[iStroke].alts[iAlt].nBigrams; i++) {
  458. pBigramInfo->bigrams[i].dwBigram=(DWORD)(lat->pAltList[iStroke].alts[iAlt].bigramLogProbs[i]);
  459. pBigramInfo->bigrams[i].dwPrevElement=
  460. lat->pAltList[iPrevStart].alts[lat->pAltList[iStroke].alts[iAlt].bigramAlts[i]].indexIFELang3;
  461. }
  462. pDataList[cData].guid = GUID_LATTICE_BIGRAM_INFO_LIST;
  463. pDataList[cData].dwSize = dwBigramSize;
  464. pDataList[cData].pbData = (BYTE*)pBigramInfo;
  465. cData++;
  466. pLattice[cLattice].dwTotalInfo++;
  467. }
  468. #endif
  469. #ifdef DUMP_LM_LATTICE
  470. if (f!=NULL) {
  471. char sz[256];
  472. ZeroMemory( sz, 256 );
  473. #if defined(LM_LATTICE_CHS)
  474. WideCharToMultiByte( 936, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  475. #elif defined(LM_LATTICE_CHT)
  476. WideCharToMultiByte( 950, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  477. #elif defined(LM_LATTICE_JPN)
  478. WideCharToMultiByte( 932, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  479. #endif
  480. // fprintf(f,"%3d %2d %10d %s\n",pLattice[cLattice].dwFrameStart,pLattice[cLattice].dwFrameLen,pNeutral->dwUnigram,sz);
  481. // fprintf(f,"%3d %2d %10d %s U+%04X\n",pLattice[cLattice].dwFrameStart,pLattice[cLattice].dwFrameLen,pNeutral->dwUnigram,sz,pNeutral->wsz[0]);
  482. #if defined(LM_LATTICE_CHS) || defined(LM_LATTICE_CHT)
  483. fwprintf(f, L"%s, %d, %d, %d\r\n",
  484. pNeutral->wsz, pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram);
  485. #elif defined(LM_LATTICE_UNICODE)
  486. fwprintf(f, L"%3d %2d %10d %s U+%04X\r\n",
  487. pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram,
  488. pNeutral->wsz, pNeutral->wsz[0] );
  489. #endif
  490. #ifdef USE_IFELANG3_BIGRAMS
  491. fprintf(f,"%d",lat->pAltList[iStroke].alts[iAlt].nBigrams);
  492. int iPrevStart=iStroke-lat->pAltList[iStroke].alts[iAlt].nStrokes;
  493. for (int i=0; i<lat->pAltList[iStroke].alts[iAlt].nBigrams; i++) {
  494. fprintf(f," %d %d\n",
  495. lat->pAltList[iPrevStart].alts[lat->pAltList[iStroke].alts[iAlt].bigramAlts[i]].indexIFELang3,
  496. lat->pAltList[iStroke].alts[iAlt].bigramLogProbs);
  497. }
  498. fprintf(f,"\n");
  499. #endif
  500. }
  501. #endif
  502. cLattice++;
  503. }
  504. }
  505. if (lat->wszAfter != NULL)
  506. {
  507. int iFrame = pLattice[cLattice].dwFrameStart = (lat->nStrokes - 1) + 2 + cPreContext;
  508. for (int iSrc = 0; iSrc < (int) wcslen(lat->wszAfter); iSrc++)
  509. {
  510. // If we have a context character, then create a lattice element for it.
  511. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1*)ExternAlloc(dwNeutralSize);
  512. if (pNeutral == NULL) goto allocated_elements;
  513. pNeutral->dwUnigram = 0;
  514. pNeutral->fHypothesis = FALSE;
  515. pNeutral->wsz[0] = LocRunDense2Unicode(&g_locRunInfo, lat->wszAfter[iSrc]);
  516. pNeutral->wsz[1] = 0;
  517. pNeutral->wsz[2] = 0;
  518. pDataList[cData].guid = GUID_IMLANGUAGE_INFO_NEUTRAL1;
  519. pDataList[cData].dwSize = dwNeutralSize;
  520. pDataList[cData].pbData = (BYTE*)pNeutral;
  521. pLattice[cLattice].dwFrameStart = iFrame++;
  522. pLattice[cLattice].dwFrameLen = 1;
  523. pLattice[cLattice].dwTotalInfo = 1;
  524. pLattice[cLattice].pInfo = pDataList + cLattice;
  525. #ifdef HWX_TUNE
  526. if (g_pTuneFile != NULL)
  527. {
  528. fprintf(g_pTuneFile, "%04X %d %d %f\n",
  529. pNeutral->wsz[0],
  530. pLattice[cLattice].dwFrameStart,
  531. pLattice[cLattice].dwFrameLen,
  532. 0.0);
  533. }
  534. #endif
  535. #ifdef DUMP_LM_LATTICE
  536. if (f!=NULL) {
  537. char sz[256];
  538. ZeroMemory( sz, 256 );
  539. #if defined(LM_LATTICE_CHS)
  540. WideCharToMultiByte( 936, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  541. #elif defined(LM_LATTICE_CHT)
  542. WideCharToMultiByte( 950, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  543. #elif defined(LM_LATTICE_JPN)
  544. WideCharToMultiByte( 932, 0, pNeutral->wsz, -1, sz, 256, 0, 0 );
  545. #endif
  546. #if defined(LM_LATTICE_CHS) || defined(LM_LATTICE_CHT)
  547. fwprintf(f, L"%s, %d, %d, %d\r\n",
  548. pNeutral->wsz, pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram);
  549. #elif defined(LM_LATTICE_UNICODE)
  550. fwprintf(f, L"%3d %2d %10d %s U+%04X\r\n",
  551. pLattice[cLattice].dwFrameStart, pLattice[cLattice].dwFrameLen, pNeutral->dwUnigram,
  552. pNeutral->wsz, pNeutral->wsz[0] );
  553. #endif
  554. }
  555. #endif
  556. cData++;
  557. cLattice++;
  558. }
  559. }
  560. #ifdef DUMP_LM_LATTICE
  561. if (f!=NULL) {
  562. fwprintf(f, L"</S>\r\n");
  563. fclose(f);
  564. }
  565. #endif
  566. #ifdef HWX_TUNE
  567. if (g_pTuneFile != NULL)
  568. {
  569. fprintf(g_pTuneFile, "\n");
  570. fflush(g_pTuneFile);
  571. }
  572. #endif
  573. // define "batch" for IIMLanguage
  574. nLMITEM = SetLanguageModelCommands(aLMITEM, &sTuneParam, &sHWTip1Param);
  575. // Apply IIMLanguage
  576. rc = g_pIFELang3->GetLatticeMorphResult(
  577. nLMITEM, aLMITEM,
  578. cLattice, pLattice, &cElemOut, &pElemOut
  579. );
  580. // TPDBG_DMSG2("%08X: GetLatticeMorphResult -> %08X\n", GetCurrentThreadId(), rc);
  581. if (SUCCEEDED(rc)) {
  582. // Let's validate what came back from IFELang3. The main array of
  583. // results must be writable, so we can sort it later.
  584. if (cElemOut <= 0 || IsBadWritePtr(pElemOut, cElemOut * sizeof(*pElemOut)))
  585. {
  586. ASSERT(("IFELang3 pElemOut not readable and writable", FALSE));
  587. goto free_lattice;
  588. }
  589. // Then validate each of the pointers in the above array
  590. for (ii = 0; ii < cElemOut; ii++)
  591. {
  592. IMLANGUAGE_ELEMENT *pElem = pElemOut + ii;
  593. // If the element doesn't have any info items in it, then skip it.
  594. if (pElem->dwTotalInfo <= 0)
  595. continue;
  596. // Otherwise validate the array of info items
  597. if (IsBadReadPtr(pElem->pInfo, pElem->dwTotalInfo * sizeof(*(pElem->pInfo))))
  598. {
  599. ASSERT(("IFELang3 pElemOut->pInfo not readable", FALSE));
  600. goto free_lattice;
  601. }
  602. for (jj = 0; jj < pElem->dwTotalInfo; jj++)
  603. {
  604. // Make sure the info is readable and the size it is supposed to be
  605. if (IsBadReadPtr(pElem->pInfo[jj].pbData, pElem->pInfo[jj].dwSize))
  606. {
  607. ASSERT(("IFELang3 pElemOut->pInfo->pbData not readable", FALSE));
  608. goto free_lattice;
  609. }
  610. // We only care about info of this type
  611. if (pElem->pInfo[jj].guid == GUID_IMLANGUAGE_INFO_NEUTRAL1)
  612. {
  613. // Get the pointer to the actual info.
  614. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1 *) pElem->pInfo[jj].pbData;
  615. // cLattice + 1 is a very loose upper bound on the number of characters
  616. // which should be returned; then use the string length to check that
  617. // the specified size is correct.
  618. if (IsBadStringPtrW(pNeutral->wsz, cLattice + 1) ||
  619. sizeof(IMLANGUAGE_INFO_NEUTRAL1) +
  620. sizeof(WCHAR) * (wcslen(pNeutral->wsz) + 1) > pElem->pInfo[jj].dwSize)
  621. {
  622. ASSERT(("IFELang3 pElemOut->pInfo->pbData->wsz not readable", FALSE));
  623. goto free_lattice;
  624. }
  625. }
  626. }
  627. }
  628. // If the language model worked,
  629. // trace the path that came back from the model.
  630. int cChars = 0;
  631. wchar_t *wszBestPath = NULL;
  632. // Sort the output lattice elements into time order
  633. qsort(pElemOut, cElemOut, sizeof(IMLANGUAGE_ELEMENT), CompareElemTime);
  634. // Count the number of characters in the path
  635. for (ii=0; ii<cElemOut; ii++) {
  636. IMLANGUAGE_ELEMENT *pElem = pElemOut + ii;
  637. for (jj = 0; jj < pElem->dwTotalInfo; jj++ ) {
  638. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1 *) pElem->pInfo[jj].pbData;
  639. if (pElem->pInfo[jj].guid==GUID_IMLANGUAGE_INFO_NEUTRAL1) {
  640. /* FILE *f=fopen("/log.txt","a+");
  641. fprintf(f,"element %d has %d chars hypothesis %d\n",cChars,wcslen(pNeutral->wsz),pNeutral->fHypothesis);
  642. fclose(f); */
  643. cChars += wcslen(pNeutral->wsz);
  644. }
  645. }
  646. }
  647. // Allocate space for the path
  648. wszBestPath = (wchar_t*)ExternAlloc(sizeof(wchar_t)*(cChars+1));
  649. // If we failed, then just don't update the path
  650. if (wszBestPath != NULL) {
  651. // Copy the path out of the lattice
  652. wszBestPath[0] = 0;
  653. for (ii=0; ii<cElemOut; ii++) {
  654. IMLANGUAGE_ELEMENT *pElem = pElemOut + ii;
  655. for (jj = 0; jj < pElem->dwTotalInfo; jj++ ) {
  656. pNeutral = (IMLANGUAGE_INFO_NEUTRAL1 *) pElem->pInfo[jj].pbData;
  657. if (pElem->pInfo[jj].guid==GUID_IMLANGUAGE_INFO_NEUTRAL1)
  658. wcscat(wszBestPath, pNeutral->wsz);
  659. }
  660. }
  661. // Wipe out the post-context character(s)
  662. wszBestPath[wcslen(wszBestPath) - cPostContext] = 0;
  663. // Skip the pre-context character(s)
  664. wszBestPath += cPreContext;
  665. // Trace the string through the original lattice, using the
  666. // same code that is used for the separator.
  667. int nSubs=SearchForTargetResultInternal(lat, wszBestPath);
  668. // fprintf(stderr, "Searching for target, nSubs=%d\n", nSubs);
  669. // Put the context character(s) back
  670. wszBestPath -= cPreContext;
  671. if (nSubs!=0) {
  672. // FILE *f = fopen("c:/log.txt", "a");
  673. // fprintf(f, "Lost path with %d/%d/%d chars\n",
  674. // cPreContext, wcslen(wszBestPath) - cPreContext, cPostContext);
  675. // fclose(f);
  676. } else {
  677. // FILE *f = fopen("c:/log.txt", "a");
  678. // fprintf(f, "Found path with %d/%d/%d chars\n",
  679. // cPreContext, wcslen(wszBestPath) - cPreContext, cPostContext);
  680. // fclose(f);
  681. }
  682. ExternFree(wszBestPath);
  683. }
  684. free_lattice:
  685. // And free the memory allocated for us by IIMLanguage.
  686. if (cElemOut > 0 && !IsBadReadPtr(pElemOut, cElemOut * sizeof(*pElemOut)))
  687. {
  688. for (ii=0; ii < cElemOut; ii++) {
  689. IMLANGUAGE_ELEMENT *pElem = pElemOut + ii;
  690. if (!IsBadReadPtr(pElem->pInfo, sizeof(*(pElem->pInfo)) * pElem->dwTotalInfo))
  691. {
  692. // Free each data item in the element.
  693. for (jj = 0; jj < pElem->dwTotalInfo; jj++)
  694. {
  695. if (!IsBadReadPtr(pElem->pInfo[jj].pbData, pElem->pInfo[jj].dwSize))
  696. {
  697. CoTaskMemFree(pElem->pInfo[jj].pbData);
  698. }
  699. }
  700. // Free the element array itself.
  701. CoTaskMemFree(pElem->pInfo);
  702. }
  703. }
  704. CoTaskMemFree(pElemOut);
  705. }
  706. }
  707. // Free all the memory we allocated.
  708. allocated_elements:
  709. for (ii = 0; ii < (DWORD)cData; ++ii) {
  710. if (pDataList[ii].pbData!=NULL)
  711. ExternFree(pDataList[ii].pbData);
  712. }
  713. ExternFree(pLattice);
  714. allocated_data:
  715. ExternFree(pDataList);
  716. allocated_valid:
  717. ExternFree(pValidEnd);
  718. }
  719. #endif
  720. // USE_IFELANG3