Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1795 lines
50 KiB

  1. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  2. //
  3. // Copyright (c) 2001 Microsoft Corporation. All rights reserved.
  4. //
  5. // Module:
  6. // volcano/dll/CharRec.c
  7. //
  8. // Description:
  9. // Main sequencing code to recognize one character ignoring
  10. // size and position.
  11. //
  12. // Author:
  13. // hrowley
  14. //
  15. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  16. #include "volcanop.h"
  17. #include "frame.h"
  18. #include "glyph.h"
  19. #if defined(USE_HOUND) || defined(USE_ZILLAHOUND)
  20. # include "math16.h"
  21. # include "hound.h"
  22. # include "zillatool.h"
  23. #endif
  24. #ifndef USE_OLD_DATABASES
  25. # include "hawk.h"
  26. #endif
  27. #ifdef USE_RESOURCES
  28. # include "res.h"
  29. #endif
  30. //#define OPTIMAL_OTTER_ZILLA
  31. // Uncomment this to enable use of the old tsunami-style computation
  32. // (using OtterMatch & ZillaMatch instead of OtterMatch2 & ZillaMatch2,
  33. // and index the prob table by codepoint instead of prototype number).
  34. //#define USE_OLD_DATABASES
  35. /////////////////////////////////////////////////////////////////////////
  36. // Hack code for probabilities, this will go away once Hawk works.
  37. #include "probHack.h"
  38. PROB_HEADER *g_pProbHeader = 0;
  39. #define EntryPtr(i) \
  40. (PROB_ENTRY *)(((BYTE *)g_pProbHeader) + g_pProbHeader->aEntryOffset[i])
  41. #define AltPtr(i) \
  42. (PROB_ALT *)(((BYTE *)g_pProbHeader) + g_pProbHeader->aAltOffset[i])
  43. void ProbLoadPointer(void * pData)
  44. {
  45. BYTE *pScan = (BYTE *)pData;
  46. g_pProbHeader = (PROB_HEADER *)pScan;
  47. pScan += sizeof(PROB_HEADER);
  48. }
  49. #ifdef USE_RESOURCES
  50. BOOL ProbLoadRes(
  51. HINSTANCE hInst,
  52. int resNumber,
  53. int resType
  54. ) {
  55. BYTE *pByte;
  56. // Load the prob database
  57. pByte = DoLoadResource(NULL, hInst, resNumber, resType);
  58. if (!pByte) {
  59. return FALSE;
  60. }
  61. ProbLoadPointer(pByte);
  62. return TRUE;
  63. }
  64. #else
  65. BOOL ProbLoadFile(wchar_t *pPath, LOAD_INFO *pInfo)
  66. {
  67. HANDLE hFile, hMap;
  68. BYTE *pByte;
  69. wchar_t aFile[128];
  70. pInfo->hFile = INVALID_HANDLE_VALUE;
  71. pInfo->hMap = INVALID_HANDLE_VALUE;
  72. pInfo->pbMapping = INVALID_HANDLE_VALUE;
  73. // Generate path to file.
  74. FormatPath(aFile, pPath, (wchar_t *)0, (wchar_t *)0, (wchar_t *)0, L"prob.bin");
  75. // Map the file
  76. hFile = CreateMappingCall(
  77. aFile,
  78. GENERIC_READ,
  79. FILE_SHARE_READ,
  80. NULL,
  81. OPEN_EXISTING,
  82. FILE_ATTRIBUTE_NORMAL,
  83. NULL
  84. );
  85. if (hFile == INVALID_HANDLE_VALUE)
  86. {
  87. ASSERT(("Error in CreateMappingCall - prob", FALSE));
  88. goto error1;
  89. }
  90. // Create a mapping handle
  91. hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
  92. if (hMap == NULL)
  93. {
  94. ASSERT(("Error in CreateFileMapping - prob", FALSE));
  95. goto error2;
  96. }
  97. // Map the entire file starting at the first byte
  98. pByte = (LPBYTE) MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0);
  99. if (pByte == NULL) {
  100. ASSERT(("Error in MapViewOfFile - prob", FALSE));
  101. goto error3;
  102. }
  103. // Extract info from mapped data.
  104. ProbLoadPointer((void *)pByte);
  105. // Save away the pointers so we can close up cleanly later
  106. pInfo->hFile = hFile;
  107. pInfo->hMap = hMap;
  108. pInfo->pbMapping = pByte;
  109. return TRUE;
  110. // Error handling
  111. error3:
  112. CloseHandle(hMap);
  113. hMap = INVALID_HANDLE_VALUE;
  114. error2:
  115. CloseHandle(hFile);
  116. hFile = INVALID_HANDLE_VALUE;
  117. error1:
  118. return FALSE;
  119. }
  120. BOOL ProbUnLoadFile(LOAD_INFO *pInfo)
  121. {
  122. if (pInfo->hFile == INVALID_HANDLE_VALUE ||
  123. pInfo->hMap == INVALID_HANDLE_VALUE ||
  124. pInfo->pbMapping == INVALID_HANDLE_VALUE) {
  125. return FALSE;
  126. }
  127. UnmapViewOfFile(pInfo->pbMapping);
  128. CloseHandle(pInfo->hMap);
  129. CloseHandle(pInfo->hFile);
  130. pInfo->pbMapping = INVALID_HANDLE_VALUE;
  131. pInfo->hMap = INVALID_HANDLE_VALUE;
  132. pInfo->hFile = INVALID_HANDLE_VALUE;
  133. return TRUE;
  134. }
  135. #endif
  136. // Given an alt list with dense and possibly folded codes in it, run through it
  137. // and expand the folded lists. The unfolded alt list is returned in place.
  138. // This function assumes that the list begins with better alternates, as those
  139. // later in the list will get dropped if we run out of space.
  140. void UnfoldCodes(ALT_LIST *pAltList, CHARSET *cs)
  141. {
  142. int i, cOut=0;
  143. ALT_LIST newAltList; // This will be where the new alt list is constructed.
  144. // For each alternate in the input list and while we have space in the output list
  145. for (i=0; i<(int)pAltList->cAlt && (int)cOut<MAX_ALT_LIST; i++) {
  146. // Check if the alternate is a folded coded
  147. if (LocRunIsFoldedCode(&g_locRunInfo,pAltList->awchList[i])) {
  148. int kndex;
  149. // If it is a folded code, look up the folding set
  150. wchar_t *pFoldingSet = LocRunFolded2FoldingSet(&g_locRunInfo, pAltList->awchList[i]);
  151. // Run through the folding set, adding non-NUL items to the output list
  152. // (until the output list is full)
  153. for (kndex = 0;
  154. kndex < LOCRUN_FOLD_MAX_ALTERNATES && pFoldingSet[kndex] != 0 && (int)cOut<MAX_ALT_LIST;
  155. kndex++) {
  156. if (IsAllowedChar(&g_locRunInfo, cs, pFoldingSet[kndex]))
  157. {
  158. newAltList.awchList[cOut]=pFoldingSet[kndex];
  159. newAltList.aeScore[cOut]=pAltList->aeScore[i];
  160. cOut++;
  161. #ifdef DISABLE_UNFOLDING
  162. // If unfolding is disabled, then stop after producing one unfolded code.
  163. // This way we don't push results later in the alt list out of the alt
  164. // list, while still allowing the recognizer to return unicodes for each
  165. // alternate.
  166. break;
  167. #endif
  168. }
  169. }
  170. } else {
  171. // Dense codes that are not folded get added directly
  172. newAltList.awchList[cOut]=pAltList->awchList[i];
  173. newAltList.aeScore[cOut]=pAltList->aeScore[i];
  174. cOut++;
  175. }
  176. }
  177. // Store the length of the output list
  178. newAltList.cAlt=cOut;
  179. // Copy the output list over the input.
  180. *pAltList=newAltList;
  181. }
  182. #ifdef USE_OLD_DATABASES
  183. // Used for WinCE
  184. // Given a feature space (cFrame), an alt list, and a requested number of alts, this
  185. // function returns a new alt list with probabilities for each alternate. It uses a
  186. // fixed prob distribution.
  187. int GetProbsTsunamiFixedTable(
  188. int cFrame,
  189. ALT_LIST *pAltList,
  190. int maxAlts,
  191. RECOG_ALT *pRAlts,
  192. CHARSET *pCS
  193. ) {
  194. int rank = 0;
  195. FLOAT rankScore = pAltList->aeScore[0];
  196. int cAlt;
  197. int iDest = 0;
  198. for (cAlt = 0; cAlt < (int) pAltList->cAlt && iDest < maxAlts; ++cAlt)
  199. {
  200. if (pAltList->aeScore[cAlt] != rankScore)
  201. {
  202. rank ++;
  203. rankScore = pAltList->aeScore[cAlt];
  204. }
  205. if (IsAllowedChar(&g_locRunInfo, pCS, pAltList->awchList[cAlt]))
  206. {
  207. int count;
  208. switch (rank) {
  209. case 0:
  210. count = 141125;
  211. break;
  212. case 1:
  213. count = 6090;
  214. break;
  215. case 2:
  216. count = 957;
  217. break;
  218. case 3:
  219. count = 362;
  220. break;
  221. case 4:
  222. count = 161;
  223. break;
  224. case 5:
  225. count = 82;
  226. break;
  227. case 6:
  228. count = 66;
  229. break;
  230. case 7:
  231. count = 49;
  232. break;
  233. case 8:
  234. count = 36;
  235. break;
  236. case 9:
  237. count = 34;
  238. break;
  239. default:
  240. count = 10;
  241. break;
  242. }
  243. pRAlts[iDest].wch = pAltList->awchList[cAlt];
  244. pRAlts[iDest].prob = 65535*(float)count/(float)149903;
  245. iDest++;
  246. }
  247. }
  248. return iDest;
  249. }
  250. // Desktop
  251. // Given a feature space (cFrame), an alt list, and a requested number of alts, this
  252. // function returns a new alt list with probabilities for each alternate. The version
  253. // called GetProbs in this file does the lookup by prototype number, whereas this version
  254. // does lookups by code point (like the code in Tsunami). Note that the alt list passed
  255. // in will get modified.
  256. int GetProbsTsunami(
  257. int cFrame,
  258. ALT_LIST *pAltList,
  259. int maxAlts,
  260. RECOG_ALT *pRAlts,
  261. CHARSET *pCS
  262. ) {
  263. unsigned int cAlt;
  264. int ii;
  265. int iDest = 0;
  266. PROB_ENTRY *pEntries, *pEntriesStart, *pEntriesEnd;
  267. PROB_ALT *pAlts, *pAltsStart, *pAltsEnd;
  268. // If we didn't get any alternates, return an empty list.
  269. if (pAltList->cAlt == 0) {
  270. return 0;
  271. }
  272. // If the probability table was not loaded, just return the top one candidate.
  273. // This is useful for training the prob table.
  274. if (g_pProbHeader==NULL) {
  275. pRAlts[0].wch=pAltList->awchList[0];
  276. pRAlts[0].prob=MAX_PROB;
  277. return 1;
  278. }
  279. // ASSERT(1 <= cFrame && cFrame < 30);
  280. ASSERT(1 <= cFrame);
  281. if (cFrame >= 30) {
  282. // Can't handle this many strokes.
  283. goto fakeIt;
  284. }
  285. // Hack for U+307A/U+30DA, which probably haven't had their probs set up right
  286. /* if (LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[0])==0x307A ||
  287. LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[0])==0x30DA) {
  288. pRAlts[0].wch = LocRunUnicode2Dense(&g_locRunInfo,0x30DA);
  289. pRAlts[0].prob = MAX_PROB;
  290. pRAlts[1].wch = LocRunUnicode2Dense(&g_locRunInfo,0x307A);
  291. pRAlts[1].prob = MAX_PROB;
  292. return 2;
  293. } */
  294. pEntriesStart = EntryPtr(cFrame - 1);
  295. pEntriesEnd = EntryPtr(cFrame);
  296. pAltsStart = AltPtr(cFrame - 1);
  297. pAltsEnd = AltPtr(cFrame);
  298. // Scan until we find an alt that has a prob list.
  299. // Normally we stop on the first one, but sometimes
  300. // We had no train data to cause a prototype to come
  301. // up top one.
  302. for (cAlt = 0; cAlt < pAltList->cAlt; ++cAlt) {
  303. // Get char to look up.
  304. // wchar_t wch = LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[cAlt]);
  305. wchar_t wch = pAltList->awchList[cAlt];
  306. pAlts = pAltsStart;
  307. for (pEntries = pEntriesStart; pEntries < pEntriesEnd; ++pEntries) {
  308. if (pEntries->wch == wch) {
  309. // copy results out.
  310. for (ii = 0; ii < pEntries->cAlts && iDest < maxAlts; ++ii) {
  311. if (IsAllowedChar(&g_locRunInfo, pCS, pAlts->wchAlt))
  312. {
  313. pRAlts[iDest].wch = pAlts->wchAlt;
  314. pRAlts[iDest].prob = pAlts->prob;
  315. iDest++;
  316. }
  317. ++pAlts;
  318. }
  319. return iDest;
  320. }
  321. pAlts += pEntries->cAlts;
  322. }
  323. }
  324. fakeIt:
  325. // Fake something up.
  326. pRAlts[0].wch = pAltList->awchList[0];
  327. pRAlts[0].prob = MAX_PROB;
  328. // fprintf(stderr,"Returning no alts\n");
  329. // exit(1);
  330. return 1;
  331. }
  332. #endif
  333. // USE_OLD_DATABASES
  334. // End of hacked Prob code.
  335. ////////////////////////////////////////////////////////////////////////
  336. BOOL g_fUseJaws;
  337. JAWS_LOAD_INFO g_JawsLoadInfo;
  338. FUGU_LOAD_INFO g_FuguLoadInfo;
  339. SOLE_LOAD_INFO g_SoleLoadInfo;
  340. BOOL g_fUseZillaHound;
  341. #ifdef USE_RESOURCES
  342. #include "res.h"
  343. // Code to load and initialize the databases used.
  344. // They are loaded in this order: otter, zilla, crane/prob or hawk,
  345. BOOL LoadCharRec(HINSTANCE hInstanceDll)
  346. {
  347. BOOL fError = FALSE;
  348. if (JawsLoadRes(&g_JawsLoadInfo, hInstanceDll, RESID_JAWS, VOLCANO_RES))
  349. {
  350. // Now we need to load the databases that will be combined by this combiner
  351. // Load the Fugu database
  352. if (!fError && !FuguLoadRes(&g_FuguLoadInfo, hInstanceDll, RESID_FUGU, VOLCANO_RES, &g_locRunInfo))
  353. {
  354. fError = TRUE;
  355. ASSERT(("Error in FuguLoadRes", FALSE));
  356. }
  357. // Load the Sole database
  358. if (!fError && !SoleLoadRes(&g_SoleLoadInfo, hInstanceDll, RESID_SOLE, VOLCANO_RES, &g_locRunInfo))
  359. {
  360. fError = TRUE;
  361. ASSERT(("Error loading sole", FALSE));
  362. }
  363. g_fUseJaws = TRUE;
  364. }
  365. else
  366. {
  367. // Load the Otter database
  368. if (!fError && !OtterLoadRes(hInstanceDll, RESID_OTTER, VOLCANO_RES, &g_locRunInfo))
  369. {
  370. fError = TRUE;
  371. ASSERT(("Error in OtterLoadRes", FALSE));
  372. }
  373. g_fUseJaws = FALSE;
  374. }
  375. #if defined(USE_ZILLA) || defined(USE_ZILLAHOUND)
  376. // Load the Zilla database
  377. if (!fError && !ZillaLoadResource(
  378. hInstanceDll, RESID_ZILLA, VOLCANO_RES, RESID_COSTCALC,
  379. VOLCANO_RES, RESID_GEOSTAT, VOLCANO_RES, &g_locRunInfo
  380. )) {
  381. fError = TRUE;
  382. ASSERT(("Error in ZillaLoadResource", FALSE));
  383. }
  384. #endif
  385. #if defined(USE_HOUND)
  386. // Load the Hound database (Hound only, require it to load)
  387. if (!fError && !HoundLoadRes(hInstanceDll, RESID_HOUND, VOLCANO_RES, &g_locRunInfo)) {
  388. fError = TRUE;
  389. ASSERT(("Error in HoundLoadRes", FALSE));
  390. }
  391. #endif
  392. g_fUseZillaHound = FALSE;
  393. #if defined(USE_ZILLAHOUND)
  394. if (!fError) {
  395. // Load the Hound & Hound-Zilla databases (This is optional).
  396. if (HoundLoadRes(hInstanceDll, RESID_HOUND, VOLCANO_RES, &g_locRunInfo)) {
  397. if (ZillaHoundLoadRes(hInstanceDll, RESID_ZILLA_HOUND, VOLCANO_RES)) {
  398. g_fUseZillaHound = TRUE;
  399. }
  400. }
  401. }
  402. #endif
  403. // Load the Hawk database.
  404. #ifndef USE_OLD_DATABASES
  405. if (!fError && !HawkLoadRes(
  406. hInstanceDll, RESID_HAWK, VOLCANO_RES, &g_locRunInfo
  407. )) {
  408. fError = TRUE;
  409. ASSERT(("Error in HawkLoadRes", FALSE));
  410. }
  411. #else
  412. if (!fError && !CraneLoadRes(hInstanceDll,RESID_CRANE,VOLCANO_RES,&g_locRunInfo)) {
  413. fError=TRUE;
  414. ASSERT(("Error in CraneLoadRes", FALSE));
  415. }
  416. // Load hack probability code until we switch over to hawk.
  417. // Use hawks resID so we don't have to create an extra one.
  418. #if !defined(WINCE) && !defined(FAKE_WINCE)
  419. if (!fError && !ProbLoadRes(
  420. hInstanceDll, RESID_HAWK, VOLCANO_RES
  421. )) {
  422. // Failing to load this is no longer an error,
  423. // just fall back on the WinCE method.
  424. // fError = TRUE;
  425. // ASSERT(("Error in ProbLoadRes", FALSE));
  426. }
  427. #endif
  428. #endif
  429. // Did everything load correctly?
  430. if (fError) {
  431. // JBENN: If the databases can ever be unloaded, this is
  432. // a place the need to.
  433. // JBENN: FIXME: Set correct error code base on what really went wrong.
  434. SetLastError(ERROR_RESOURCE_NAME_NOT_FOUND);
  435. //SetLastError(ERROR_RESOURCE_DATA_NOT_FOUND);
  436. //SetLastError(ERROR_RESOURCE_TYPE_NOT_FOUND);
  437. //SetLastError(ERROR_OUTOFMEMORY);
  438. return FALSE;
  439. }
  440. return TRUE;
  441. }
  442. // Code to unload the databases used.
  443. BOOL
  444. UnloadCharRec()
  445. {
  446. BOOL retVal;
  447. retVal = TRUE;
  448. // Free hound up.
  449. # if defined(USE_HOUND)
  450. if (!HoundUnLoadRes())
  451. {
  452. retVal = FALSE;
  453. }
  454. # endif
  455. # if defined(USE_ZILLAHOUND)
  456. if (g_fUseZillaHound && !HoundUnLoadRes())
  457. {
  458. retVal = FALSE;
  459. }
  460. # endif
  461. if (!ZillaUnloadResource())
  462. {
  463. retVal = FALSE;
  464. }
  465. return retVal;
  466. }
  467. # else
  468. // Global load information specific to loading from files.
  469. #if defined(USE_OTTER) || defined(USE_OTTERFUGU)
  470. OTTER_LOAD_INFO g_OtterLoadInfo;
  471. #endif
  472. #if defined(USE_HOUND) || defined(USE_ZILLAHOUND)
  473. LOAD_INFO g_HoundLoadInfo;
  474. #endif
  475. #ifdef USE_OLD_DATABASES
  476. LOAD_INFO g_ProbLoadInfo;
  477. CRANE_LOAD_INFO g_CraneLoadInfo;
  478. #else
  479. LOAD_INFO g_HawkLoadInfo;
  480. #endif
  481. // Code to load and initialize the databases used.
  482. BOOL LoadCharRec(wchar_t *pPath)
  483. {
  484. BOOL fError = FALSE;
  485. if (JawsLoadFile(&g_JawsLoadInfo, pPath))
  486. {
  487. // Load the Fugu database
  488. if (!fError && !FuguLoadFile(&g_FuguLoadInfo, pPath, &g_locRunInfo)) {
  489. fError = TRUE;
  490. ASSERT(("Error in FuguLoadFile", FALSE));
  491. }
  492. // Load the Sole database
  493. if (!fError && !SoleLoadFile(&g_SoleLoadInfo, pPath, &g_locRunInfo)) {
  494. fError = TRUE;
  495. ASSERT(("Error in FuguLoadFile", FALSE));
  496. }
  497. g_fUseJaws = TRUE;
  498. }
  499. else
  500. {
  501. // Load the Otter database
  502. if (!fError && !OtterLoadFile(&g_locRunInfo, &g_OtterLoadInfo, pPath)) {
  503. fError = TRUE;
  504. ASSERT(("Error in OtterLoadFile", FALSE));
  505. }
  506. g_fUseJaws = FALSE;
  507. }
  508. #if defined(USE_ZILLA) || defined(USE_ZILLAHOUND)
  509. // Load the Zilla database
  510. if (!fError && !ZillaLoadFile(&g_locRunInfo, pPath, TRUE)) {
  511. fError = TRUE;
  512. ASSERT(("Error in ZillaLoadFile", FALSE));
  513. }
  514. #endif
  515. #if defined(USE_HOUND)
  516. // Load the Hound database (Hound only, require it to load)
  517. if (!fError && !HoundLoadFile(&g_locRunInfo, &g_HoundLoadInfo, pPath)) {
  518. fError = TRUE;
  519. ASSERT(("Error in HoundLoadFile", FALSE));
  520. }
  521. #endif
  522. g_fUseZillaHound = FALSE;
  523. #if defined(USE_ZILLAHOUND)
  524. if (!fError) {
  525. // Load the Hound & Hound-Zilla databases (This is optional).
  526. if (HoundLoadFile(&g_locRunInfo, &g_HoundLoadInfo, pPath)) {
  527. if (ZillaHoundLoadFile(pPath)) {
  528. g_fUseZillaHound = TRUE;
  529. }
  530. else
  531. {
  532. # ifndef TRAIN_ZILLA_HOUND_COMBINER
  533. HoundUnLoadFile(&g_HoundLoadInfo);
  534. # endif
  535. }
  536. }
  537. }
  538. #endif
  539. #ifndef USE_OLD_DATABASES
  540. // Load the Hawk database.
  541. if (!fError && !HawkLoadFile(&g_locRunInfo, &g_HawkLoadInfo, pPath)) {
  542. fError = TRUE;
  543. ASSERT(("Error in HawkLoadFile", FALSE));
  544. }
  545. #else
  546. #if !defined(WINCE) && !defined(FAKE_WINCE)
  547. // Load hack probability code until we switch over to hawk.
  548. if (!fError && !ProbLoadFile(pPath, &g_ProbLoadInfo)) {
  549. // Failing to load this is no longer an error,
  550. // just fall back on the WinCE method.
  551. // fError = TRUE;
  552. // ASSERT(("Error in ProbLoadFile", FALSE));
  553. }
  554. #endif
  555. if (!fError && !CraneLoadFile(&g_locRunInfo,&g_CraneLoadInfo, pPath)) {
  556. fError = TRUE;
  557. ASSERT(("Error in CraneLoadFile", FALSE));
  558. }
  559. #endif
  560. // Did everything load correctly?
  561. if (fError) {
  562. // JBENN: If the databases can ever be unloaded, this is
  563. // a place the need to.
  564. // JBENN: FIXME: Set correct error code base on what really went wrong.
  565. SetLastError(ERROR_RESOURCE_NAME_NOT_FOUND);
  566. //SetLastError(ERROR_RESOURCE_DATA_NOT_FOUND);
  567. //SetLastError(ERROR_RESOURCE_TYPE_NOT_FOUND);
  568. //SetLastError(ERROR_OUTOFMEMORY);
  569. return FALSE;
  570. }
  571. return TRUE;
  572. }
  573. // Code to unload the databases used.
  574. BOOL
  575. UnloadCharRec()
  576. {
  577. BOOL ok = TRUE;
  578. if (g_fUseJaws)
  579. {
  580. if (!SoleUnloadFile(&g_SoleLoadInfo)) ok = FALSE;
  581. if (!FuguUnLoadFile(&g_FuguLoadInfo)) ok = FALSE;
  582. if (!JawsUnloadFile(&g_JawsLoadInfo)) ok = FALSE;
  583. }
  584. else
  585. {
  586. if (!OtterUnLoadFile(&g_OtterLoadInfo)) ok = FALSE;
  587. }
  588. # if defined(USE_HOUND)
  589. if (!HoundUnLoadFile(&g_HoundLoadInfo))
  590. {
  591. ok = FALSE;
  592. }
  593. # endif
  594. # if defined(USE_ZILLAHOUND)
  595. if (g_fUseZillaHound)
  596. {
  597. if (!ZillaHoundUnloadFile())
  598. {
  599. ok = FALSE;
  600. }
  601. if (!HoundUnLoadFile(&g_HoundLoadInfo))
  602. {
  603. ok = FALSE;
  604. }
  605. }
  606. # endif
  607. if (!ZillaUnLoadFile()) ok = FALSE;
  608. # ifdef USE_OLD_DATABASES
  609. if (!CraneUnLoadFile(&g_CraneLoadInfo)) ok = FALSE;
  610. # if !defined(WINCE) && !defined(FAKE_WINCE)
  611. if (g_pProbHeader != NULL && !ProbUnLoadFile(&g_ProbLoadInfo)) ok = FALSE;
  612. # endif
  613. # else // USE_OLD_DATABASES
  614. if (!HawkUnLoadFile(&g_HawkLoadInfo)) ok = FALSE;
  615. # endif // USE_OLD_DATABASES
  616. return ok;
  617. }
  618. #endif
  619. // Limit on strokes that can be processed by a recognizer. Since
  620. // Zilla ignores anything beyond 29 strokes, it is safe to ignore
  621. // any extra.
  622. #define MAX_STOKES_PROCESS 30
  623. POINT *DupPoints(POINT *pOldPoints, int nPoints);
  624. GLYPH *GlyphFromStrokes(UINT cStrokes, STROKE *pStrokes);
  625. #ifndef USE_RESOURCES
  626. // Build a copy of the glyph structure.
  627. GLYPH *CopyGlyph(GLYPH *pOldGlyph)
  628. {
  629. GLYPH *pGlyph = NULL, *pLastGlyph = NULL;
  630. // Convert strokes to GLYPHs and FRAMEs so that we can call the
  631. // old code.
  632. while (pOldGlyph != NULL) {
  633. GLYPH *pGlyphCur;
  634. // Alloc glyph.
  635. pGlyphCur = NewGLYPH();
  636. if (!pGlyphCur) {
  637. goto error;
  638. }
  639. // Add to list, and alloc frame
  640. if (pLastGlyph != NULL) {
  641. pLastGlyph->next = pGlyphCur;
  642. pLastGlyph = pGlyphCur;
  643. } else {
  644. pGlyph = pGlyphCur;
  645. pLastGlyph = pGlyphCur;
  646. }
  647. pGlyphCur->next = NULL;
  648. pGlyphCur->frame = NewFRAME();
  649. if (!pGlyphCur->frame) {
  650. goto error;
  651. }
  652. // Fill in frame. We just fill in what we need, and ignore
  653. // fields not used by Otter and Zilla, or are set by them.
  654. pGlyphCur->frame->info.cPnt = pOldGlyph->frame->info.cPnt;
  655. pGlyphCur->frame->info.wPdk = pOldGlyph->frame->info.wPdk;
  656. pGlyphCur->frame->rgrawxy = DupPoints(pOldGlyph->frame->rgrawxy, pOldGlyph->frame->info.cPnt);
  657. pGlyphCur->frame->rect = pOldGlyph->frame->rect;
  658. pGlyphCur->frame->iframe = pOldGlyph->frame->iframe;
  659. if (pGlyphCur->frame->rgrawxy == NULL) {
  660. goto error;
  661. }
  662. pOldGlyph = pOldGlyph->next;
  663. }
  664. return pGlyph;
  665. error:
  666. // Cleanup glyphs on error.
  667. if (pGlyph != NULL) {
  668. DestroyFramesGLYPH(pGlyph);
  669. DestroyGLYPH(pGlyph);
  670. }
  671. return NULL;
  672. }
  673. #endif // !USE_RESOURCES
  674. #ifdef USE_OLD_DATABASES
  675. /******************************Public*Routine******************************\
  676. * AdHocRuleCost
  677. *
  678. * Because of character folding and the inability of the shape matchers
  679. * to distinguish between a cluster a 1000 samples map to versus 1 point
  680. * mapping to it we have a few hard rule we throw in to fix obvious
  681. * problems.
  682. *
  683. * History:
  684. * 11-Jul-1995 -by- Patrick Haluptzok patrickh
  685. * Wrote it.
  686. \**************************************************************************/
  687. float AdHocRuleCost(int cStrokes, wchar_t dch, VOLCANO_WEIGHTS *pScores)
  688. {
  689. #ifdef DISABLE_HEURISTICS
  690. return 0;
  691. #else
  692. wchar_t wch;
  693. int cFrame;
  694. // Get character and number of strokes. Note we need character in Unicode
  695. // so that we can compare with constant character codes.
  696. // ASSUMPTION: SYM_UNKNOWN should be the only sym if its present.
  697. // So there aren't any alternatives that could get a "better" cost
  698. // so it probably doesn't really matter what cost we return here
  699. if (dch == SYM_UNKNOWN)
  700. {
  701. return 0;
  702. }
  703. wch = LocRunDense2Unicode(&g_locRunInfo, dch);
  704. cFrame = cStrokes;
  705. // Check for 0 (2 strokes), penalize all circle shapes
  706. // except 0 when 2 strokes occur.
  707. if (cFrame >= 2)
  708. {
  709. // 0x824f is the 0 that we don't want to penalize.
  710. // All other circle shapes are penalized.
  711. if ((wch == 0x006F) ||
  712. (wch == 0x004F) ||
  713. (wch == 0x00B0) ||
  714. (wch == 0x3002) ||
  715. (wch == 0x3007)
  716. )
  717. {
  718. pScores->afl[VTUNE_ADHOC_CIRCLE] = -1;
  719. return -g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_CIRCLE];
  720. }
  721. }
  722. // Check for 1 stroke lower-case i and j. No dot is a extra penalty.
  723. if (cFrame == 1)
  724. {
  725. if ((wch == 0x0069) || (wch == 0x006A))
  726. {
  727. pScores->afl[VTUNE_ADHOC_IJ] = -1;
  728. return -g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_IJ];
  729. }
  730. }
  731. return 0;
  732. #endif
  733. }
  734. BOOL Afterburn(ALT_LIST *pAltList, GLYPH *pGlyph, CHARSET *cs, RECT *rGuide, RECT rc)
  735. {
  736. DRECTS drcs;
  737. if (pGlyph==NULL || rGuide==NULL)
  738. return FALSE;
  739. // Scale and translate the guide box to compute the 'delta rectangle'
  740. drcs.x = rGuide->left;
  741. drcs.y = rGuide->top;
  742. drcs.w = rGuide->right - rGuide->left;
  743. drcs.h = rGuide->bottom - rGuide->top;
  744. // Translate, convert to delta form
  745. rc.left -= drcs.x;
  746. rc.top -= drcs.y;
  747. rc.right -= (drcs.x + rc.left);
  748. rc.bottom -= (drcs.y + rc.top);
  749. // Scale. We do isotropic scaling and center the shorter dimension.
  750. if (drcs.w > drcs.h) {
  751. drcs.x = ((1000 * rc.left) / drcs.w);
  752. drcs.y = ((1000 * rc.top) / drcs.w) + ((drcs.w - drcs.h) / 2);
  753. drcs.h = ((1000 * rc.bottom) / drcs.w);
  754. drcs.w = ((1000 * rc.right) / drcs.w);
  755. } else {
  756. drcs.x = ((1000 * rc.left) / drcs.h) + ((drcs.h - drcs.w) / 2);
  757. drcs.y = ((1000 * rc.top) / drcs.h);
  758. drcs.w = ((1000 * rc.right) / drcs.h);
  759. drcs.h = ((1000 * rc.bottom) / drcs.h);
  760. }
  761. #ifndef DISABLE_HEURISTICS
  762. return CraneMatch(pAltList, MAX_ALT_LIST, pGlyph, cs, &drcs, 0, &g_locRunInfo);
  763. #else
  764. return FALSE;
  765. #endif
  766. }
  767. // Hack to get around lack of data for training Crane
  768. BOOL IsFaultyKana(wchar_t wch)
  769. {
  770. switch (wch) {
  771. // case 0x3041:
  772. case 0x3042:
  773. // case 0x3043:
  774. case 0x3044:
  775. // case 0x3045:
  776. case 0x3046:
  777. // case 0x3047:
  778. case 0x3048:
  779. // case 0x3049:
  780. case 0x304A:
  781. // case 0x30E9:
  782. return TRUE;
  783. }
  784. return FALSE;
  785. }
  786. #endif // USE_OLD_DATABASES
  787. // Sort the alternate list.
  788. // We do a bubble sort. The list is small and we can't use qsort because the data is stored in
  789. // three parallel arrays.
  790. void SortAltListAndTune(ALT_LIST *pAltList, VOLCANO_WEIGHTS *pTuneScore)
  791. {
  792. int pos1, pos2;
  793. int limit1, limit2;
  794. FLOAT * const peScore = pAltList->aeScore;
  795. wchar_t * const pwchList = pAltList->awchList;
  796. limit2 = pAltList->cAlt;
  797. limit1 = limit2 - 1;
  798. for (pos1 = 0; pos1 < limit1; ++pos1) {
  799. for (pos2 = pos1 + 1; pos2 < limit2; ++pos2) {
  800. // Are elements pos1 and pos2 out of order?
  801. if (peScore[pos1] < peScore[pos2]) {
  802. FLOAT eTemp;
  803. wchar_t wchTemp;
  804. VOLCANO_WEIGHTS weights;
  805. // Swap scores and swap characters.
  806. eTemp = peScore[pos1];
  807. peScore[pos1] = peScore[pos2];
  808. peScore[pos2] = eTemp;
  809. wchTemp = pwchList[pos1];
  810. pwchList[pos1] = pwchList[pos2];
  811. pwchList[pos2] = wchTemp;
  812. weights = pTuneScore[pos1];
  813. pTuneScore[pos1]= pTuneScore[pos2];
  814. pTuneScore[pos2]= weights;
  815. }
  816. }
  817. }
  818. }
  819. // Call the core recognizer for the given character. Returned the
  820. // number of alternates produced, or -1 if an error occurs.
  821. int CoreRecognizeChar(
  822. ALT_LIST *pAltList, // Alt list to be returned
  823. int cAlt, // Max number of alternates
  824. GLYPH **ppGlyph, // Character to recognize (which may be modified)
  825. int nRealStrokes, // Real stroke count for abort processing
  826. RECT *pGuideBox, // Guide box (for partial mode)
  827. RECOG_SETTINGS *pRecogSettings, // Partial mode, other settings
  828. CHARSET *pCS, // ALCs
  829. int *piRecognizer, // Returns the VOLCANO_CONFIG_* constant for the recognizer used
  830. int *piSpace) // The space number in that recognizer
  831. {
  832. int iRet = -1;
  833. int iRecognizer = VOLCANO_CONFIG_NONE;
  834. int nStrokes = CframeGLYPH(*ppGlyph);
  835. if (nStrokes > VOLCANO_CONFIG_MAX_STROKE_COUNT) nStrokes = VOLCANO_CONFIG_MAX_STROKE_COUNT;
  836. if (pRecogSettings->partialMode) nStrokes = 0;
  837. iRecognizer = g_latticeConfigInfo.iRecognizers[nStrokes];
  838. *piRecognizer = iRecognizer;
  839. *piSpace = -1;
  840. pAltList->cAlt = 0;
  841. // Call the selected recognizer
  842. switch (iRecognizer)
  843. {
  844. case VOLCANO_CONFIG_OTTER:
  845. if (g_fUseJaws)
  846. {
  847. iRet = JawsMatch(&g_JawsLoadInfo, &g_FuguLoadInfo, &g_SoleLoadInfo,
  848. pAltList, cAlt, *ppGlyph, pGuideBox, pCS, &g_locRunInfo);
  849. *piSpace = nStrokes;
  850. }
  851. else
  852. {
  853. iRet = OtterMatch2(pAltList, cAlt, *ppGlyph, pCS, &g_locRunInfo, piSpace);
  854. // Other experiments
  855. // iRet = FuguMatch(&g_FuguLoadInfo.fugu, pAltList, cAlt, *ppGlyph, NULL /*pGuideBox*/, pCS, &g_locRunInfo);
  856. // iRet = SoleMatch(pAltList, cAlt, *ppGlyph, pGuideBox, pCS, &g_locRunInfo);
  857. // *piSpace = nStrokes;
  858. }
  859. break;
  860. case VOLCANO_CONFIG_ZILLA:
  861. iRet = ZillaMatch(pAltList, cAlt, ppGlyph, pCS, g_vtuneInfo.pTune->flZillaGeo,
  862. (pRecogSettings->partialMode ? pRecogSettings->pAbort : NULL),
  863. nRealStrokes, pRecogSettings->partialMode, pGuideBox);
  864. // For Zilla, the space number is the feature count. To make them disjoint from the
  865. // Otter spaces, add on the maximum number of Otter spaces.
  866. *piSpace = CframeGLYPH(*ppGlyph) + OTTER_NUM_SPACES;
  867. // Here you can change the iRecognizer that is returned to indicate that the Hound/Zilla
  868. // combiner ran, instead of just Zilla alone. That way tuning will know to use a different
  869. // weighting parameter.
  870. break;
  871. default:
  872. // No recognizer available for this stroke count
  873. iRet = -1;
  874. break;
  875. }
  876. return iRet;
  877. }
  878. // Allocate a cache for the recognizer results.
  879. void *AllocateRecognizerCache()
  880. {
  881. CACHE *pCache = (CACHE *) ExternAlloc(sizeof(CACHE));
  882. if (pCache == NULL)
  883. {
  884. return NULL;
  885. }
  886. pCache->nStrokes = 0;
  887. pCache->pStrokes = NULL;
  888. return pCache;
  889. }
  890. // Free up a cache for the recognizer results.
  891. void FreeRecognizerCache(void *pvCache)
  892. {
  893. CACHE *pCache = (CACHE *) pvCache;
  894. CACHE_ENTRY *pEntry;
  895. int iStroke;
  896. if (pvCache == NULL)
  897. {
  898. return;
  899. }
  900. for (iStroke = 0; iStroke < pCache->nStrokes; iStroke++)
  901. {
  902. pEntry = pCache->pStrokes[iStroke];
  903. while (pEntry != NULL)
  904. {
  905. CACHE_ENTRY *pNext = pEntry->pNext;
  906. ExternFree(pEntry);
  907. pEntry = pNext;
  908. }
  909. }
  910. ExternFree(pCache->pStrokes);
  911. ExternFree(pCache);
  912. }
  913. // Look for results for a given range of strokes, return the recognizer and its
  914. // alternate list.
  915. ALT_LIST *LookupRecognizerCache(void *pvCache, int iStroke, int nStrokes, int *piRecognizer)
  916. {
  917. CACHE *pCache = (CACHE *) pvCache;
  918. CACHE_ENTRY *pEntry;
  919. if (pCache == NULL || iStroke >= pCache->nStrokes)
  920. {
  921. return NULL;
  922. }
  923. // For the given ending stroke, look for a result for the right number of strokes
  924. pEntry = pCache->pStrokes[iStroke];
  925. while (pEntry != NULL && pEntry->nStrokes != nStrokes)
  926. {
  927. pEntry = pEntry->pNext;
  928. }
  929. // If not found, return nothing.
  930. if (pEntry == NULL)
  931. {
  932. return NULL;
  933. }
  934. // Otherwise return the cached results.
  935. *piRecognizer = pEntry->iRecognizer;
  936. return &(pEntry->alts);
  937. }
  938. // Add the alternate list to the cache.
  939. void AddRecognizerCache(void *pvCache, int iStroke, int nStrokes, int iRecognizer, ALT_LIST *pAlts)
  940. {
  941. CACHE *pCache = (CACHE *) pvCache;
  942. CACHE_ENTRY *pEntry;
  943. // If no cache, then exit
  944. if (pCache == NULL)
  945. {
  946. return;
  947. }
  948. // If the cache is currently too small, then allocate more space for it.
  949. if (iStroke >= pCache->nStrokes)
  950. {
  951. int i;
  952. int nStrokesNew = max(10, (iStroke + 1) * 2);
  953. CACHE_ENTRY **pStrokesNew = (CACHE_ENTRY **) ExternRealloc(pCache->pStrokes, sizeof(CACHE_ENTRY *) * nStrokesNew);
  954. if (pStrokesNew == NULL)
  955. {
  956. // If the allocation failed, just continue with the current cache size
  957. return;
  958. }
  959. // Initialize the memory
  960. for (i = pCache->nStrokes; i < nStrokesNew; i++)
  961. {
  962. pStrokesNew[i] = NULL;
  963. }
  964. pCache->pStrokes = pStrokesNew;
  965. pCache->nStrokes = nStrokesNew;
  966. }
  967. // If we got here, then add the entry to the cache
  968. pEntry = (CACHE_ENTRY *) ExternAlloc(sizeof(CACHE_ENTRY));
  969. if (pEntry == NULL)
  970. {
  971. return;
  972. }
  973. pEntry->nStrokes = nStrokes;
  974. pEntry->iRecognizer = iRecognizer;
  975. pEntry->alts = *pAlts;
  976. pEntry->pNext = pCache->pStrokes[iStroke];
  977. pCache->pStrokes[iStroke] = pEntry;
  978. }
  979. #ifdef USE_OLD_DATABASES
  980. // This call is roughly the equivalent of the RecognizeChar call below, but instead of
  981. // returning probabilities, it returns an alternate list with scores. It uses the old Tsunami
  982. // recognition procedure, with otter and zilla returning code points, followed by adhoc rules,
  983. // language model, baseline/height scores, and crane. The result of this is used by RecognizeChar
  984. // to look up the old probability table.
  985. INT RecognizeCharInsurance(
  986. RECOG_SETTINGS *pRecogSettings,// In: Setting for recognizers.
  987. UINT cStrokes, // In: Number of strokes to process.
  988. UINT cRealStrokes, // In: Number of strokes before merging
  989. STROKE *pStrokes, // In: Array of strokes to process.
  990. FLOAT *pProbIsChar, // Out: probability of being valid char.
  991. UINT maxAlts, // In: Size of alts array supplied.
  992. RECOG_ALT *pProbAlts, // Out: alternate list matched with probabilities.
  993. int *pnProbAlts,
  994. RECOG_ALT *pScoreAlts, // Out: alternate list matched with scores
  995. int *pnScoreAlts,
  996. RECT *pGuideBox, // In: Guide box for this ink.
  997. wchar_t dchContext, // In: Context
  998. int *pSpace, // Out: Space number used for matching
  999. VOLCANO_WEIGHTS *pTuneScore, // Out: score components
  1000. BOOL fStringMode, // In: Whether or not the recognizer is in string mode
  1001. BOOL fProbMode, // In: Whether the recognizer is in probability mode
  1002. void *pvCache, // In/Out: Pointer to cache, or NULL if not being used
  1003. int iStroke // In: Index of last stroke of character
  1004. ) {
  1005. ALT_LIST *pCacheResult = NULL;
  1006. BOXINFO box;
  1007. RECT bbox;
  1008. int iAlt;
  1009. GLYPH *pGlyph;
  1010. ALT_LIST altList;
  1011. CHARSET charSet; // Mask used for core recognizers
  1012. CHARSET charSetMask; // Mask used for probability table lookup
  1013. BOOL fCraneBonus = FALSE;
  1014. int iRecognizer;
  1015. // Convert strokes to GLYPHs and FRAMEs so that we can call the
  1016. // old code.
  1017. pGlyph = GlyphFromStrokes(cStrokes, pStrokes);
  1018. if (!pGlyph)
  1019. {
  1020. return -1;
  1021. }
  1022. // Run otter or zilla as needed.
  1023. altList.cAlt = 0;
  1024. charSetMask.recmask = pRecogSettings->alcValid;
  1025. charSetMask.recmaskPriority = pRecogSettings->alcPriority;
  1026. charSetMask.pbAllowedChars = pRecogSettings->pbAllowedChars;
  1027. charSetMask.pbPriorityChars = pRecogSettings->pbPriorityChars;
  1028. if (fProbMode)
  1029. {
  1030. // In probability mode, don't mask off the core recognizers
  1031. charSet.recmask = 0xFFFFFFFF;
  1032. charSet.recmaskPriority = 0;
  1033. charSet.pbAllowedChars = NULL;
  1034. charSet.pbPriorityChars = NULL;
  1035. }
  1036. else
  1037. {
  1038. // In score mode, mask off the core recognizers
  1039. charSet = charSetMask;
  1040. }
  1041. // Get the bounding box for the character
  1042. GetRectGLYPH(pGlyph,&bbox);
  1043. // Try going to the cache
  1044. pCacheResult = LookupRecognizerCache(pvCache, iStroke, cStrokes, &iRecognizer);
  1045. if (pCacheResult != NULL)
  1046. {
  1047. // If it was the Zilla recognizer before, we need to run featurization because
  1048. // of its side-effect of fragmenting the strokes, which crane needs.
  1049. if (iRecognizer == VOLCANO_CONFIG_ZILLA)
  1050. {
  1051. BIGPRIM rgprim[CPRIMMAX];
  1052. BYTE aSampleVector[29 * 4];
  1053. ZillaFeaturize(&pGlyph, rgprim, aSampleVector);
  1054. }
  1055. altList = *pCacheResult;
  1056. }
  1057. else
  1058. {
  1059. // Invoke Otter or Zilla or any other recognizer that has been specified in the configuration
  1060. CoreRecognizeChar(&altList, MAX_ALT_LIST, &pGlyph, cRealStrokes, pGuideBox, pRecogSettings, &charSet, &iRecognizer, pSpace);
  1061. // Add it to the cache, since it isn't there already.
  1062. AddRecognizerCache(pvCache, iStroke, cStrokes, iRecognizer, &altList);
  1063. }
  1064. // If we're doing an experiment to simulate an optimal otter or zilla,
  1065. // replace the real alt list with a fake one.
  1066. #ifdef OPTIMAL_OTTER_ZILLA
  1067. {
  1068. wchar_t dch;
  1069. altList.cAlt = 1;
  1070. altList.aeScore[0] = 0;
  1071. {
  1072. FILE *f = fopen("c:/answer.txt", "r");
  1073. fscanf(f, "%hx", &(altList.awchList[0]));
  1074. fclose(f);
  1075. }
  1076. dch = LocRunUnicode2Dense(&g_locRunInfo, altList.awchList[0]);
  1077. if (dch != LOC_TRAIN_NO_DENSE_CODE) {
  1078. wchar_t fdch = LocRunDense2Folded(&g_locRunInfo, dch);
  1079. if (fdch != 0) dch = fdch;
  1080. altList.awchList[0] = dch;
  1081. } else {
  1082. altList.cAlt = 0;
  1083. }
  1084. }
  1085. #endif
  1086. // Get our rough approximation of the probability that this is
  1087. // actually a character. If zero alternates are returned, then
  1088. // set the space number to -1 as an error flag.
  1089. if (altList.cAlt == 0) {
  1090. *pSpace = -1;
  1091. *pProbIsChar = 0;
  1092. *pnProbAlts = 0;
  1093. *pnScoreAlts = 0;
  1094. goto cleanup;
  1095. }
  1096. // Unfold anything in the alt list which needs it.
  1097. UnfoldCodes(&altList, &charSet);
  1098. // If we couldn't load the probability table, then use the
  1099. // WinCE method to get probabilities.
  1100. if (g_pProbHeader == NULL)
  1101. {
  1102. *pnProbAlts = GetProbsTsunamiFixedTable(cStrokes, &altList, maxAlts, pProbAlts, &charSetMask);
  1103. }
  1104. // Apply crane, if we have a guide for it to use and we are not in partial mode
  1105. if (pRecogSettings->partialMode == HWX_PARTIAL_ALL && pGuideBox != NULL && altList.cAlt > 0) {
  1106. fCraneBonus = Afterburn(&altList, pGlyph, &charSet, pGuideBox, bbox);
  1107. // Hack to bypass crane if otter a troublesome kana character
  1108. if (IsFaultyKana(LocRunDense2Unicode(&g_locRunInfo,altList.awchList[0]))) {
  1109. fCraneBonus = FALSE;
  1110. }
  1111. }
  1112. // Save away the scores for the alternates, then apply the weight for the particular
  1113. // recognizer used. Then add in the crane bonus/penalty and the adhoc rules.
  1114. for (iAlt=0; iAlt<(int)altList.cAlt; iAlt++)
  1115. {
  1116. int iParam = (fStringMode ? VTUNE_STRING_CORE : VTUNE_CHAR_CORE) + iRecognizer;
  1117. pTuneScore[iAlt].afl[iParam] = altList.aeScore[iAlt];
  1118. altList.aeScore[iAlt] *= g_vtuneInfo.pTune->weights.afl[iParam];
  1119. // Crane is now implemented as a penalty rather than a bonus. This means
  1120. // all alternates after the first one get a penalty, and even the first one
  1121. // gets a penalty if no crane bonus is applied.
  1122. if (iAlt > 0 || !fCraneBonus)
  1123. {
  1124. iParam = fStringMode ? VTUNE_STRING_CRANE : VTUNE_CHAR_CRANE;
  1125. pTuneScore[iAlt].afl[iParam] = -1;
  1126. altList.aeScore[iAlt] -= g_vtuneInfo.pTune->weights.afl[iParam];
  1127. }
  1128. // Add adhoc penalties for the one stroke i and j and two stroke circle shapes
  1129. if (pRecogSettings->partialMode == HWX_PARTIAL_ALL)
  1130. {
  1131. altList.aeScore[iAlt] += AdHocRuleCost(cStrokes, altList.awchList[iAlt], pTuneScore + iAlt);
  1132. }
  1133. }
  1134. // Sort the alternates out.
  1135. SortAltListAndTune(&altList, pTuneScore);
  1136. // Copy the score-based alts to the output
  1137. for (iAlt = 0; iAlt < (int)altList.cAlt && iAlt < (int)maxAlts && iAlt < (int)MAX_ALT_LIST; ++iAlt)
  1138. {
  1139. pScoreAlts[iAlt].wch = altList.awchList[iAlt];
  1140. pScoreAlts[iAlt].prob = altList.aeScore[iAlt];
  1141. }
  1142. *pnScoreAlts = altList.cAlt;
  1143. // Re-score the alternates using the old weightings in the
  1144. // TTune structure, so that prob table lookup will be weighting
  1145. // independent.
  1146. for (iAlt = 0; iAlt < (int)altList.cAlt; ++iAlt)
  1147. {
  1148. altList.aeScore[iAlt] =
  1149. g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_IJ] * pTuneScore[iAlt].afl[VTUNE_ADHOC_IJ] +
  1150. g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_CIRCLE] * pTuneScore[iAlt].afl[VTUNE_ADHOC_CIRCLE] +
  1151. (cStrokes > 2 ? g_ttuneInfo.pTTuneCosts->ZillaChar.CARTAddWeight :
  1152. g_ttuneInfo.pTTuneCosts->OtterChar.CARTAddWeight)
  1153. * pTuneScore[iAlt].afl[fStringMode ? VTUNE_STRING_CRANE : VTUNE_CHAR_CRANE] +
  1154. pTuneScore[iAlt].afl[(fStringMode ? VTUNE_STRING_CORE : VTUNE_CHAR_CORE) + iRecognizer];
  1155. }
  1156. // Build up a BOXINFO structure from the guide, for use in the baseline/height scoring
  1157. if (pGuideBox!=NULL) {
  1158. box.size = pGuideBox->bottom - pGuideBox->top;
  1159. box.baseline = pGuideBox->bottom;
  1160. box.xheight = box.size / 2;
  1161. box.midline = box.baseline - box.xheight;
  1162. }
  1163. // For each alternate
  1164. for (iAlt=0; iAlt<(int)altList.cAlt; iAlt++) {
  1165. float cost;
  1166. // Apply baseline/height and language model unigram scores
  1167. if (cStrokes<3) {
  1168. if (pGuideBox!=NULL) {
  1169. cost = BaselineTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
  1170. * g_ttuneInfo.pTTuneCosts->OtterChar.BaseWeight;
  1171. altList.aeScore[iAlt] += cost;
  1172. cost = BaselineBoxCost(altList.awchList[iAlt],bbox,&box)
  1173. * g_ttuneInfo.pTTuneCosts->OtterChar.BoxBaselineWeight;
  1174. altList.aeScore[iAlt] += cost;
  1175. cost = HeightTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
  1176. * g_ttuneInfo.pTTuneCosts->OtterChar.HeightWeight;
  1177. altList.aeScore[iAlt] += cost;
  1178. cost = HeightBoxCost(altList.awchList[iAlt],bbox,&box)
  1179. * g_ttuneInfo.pTTuneCosts->OtterChar.BoxHeightWeight;
  1180. altList.aeScore[iAlt] += cost;
  1181. }
  1182. cost = UnigramCost(&g_unigramInfo,altList.awchList[iAlt])
  1183. * g_ttuneInfo.pTTuneCosts->OtterChar.UniWeight;
  1184. altList.aeScore[iAlt] += cost;
  1185. } else {
  1186. if (pGuideBox!=NULL) {
  1187. cost = BaselineTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
  1188. * g_ttuneInfo.pTTuneCosts->ZillaChar.BaseWeight;
  1189. altList.aeScore[iAlt] += cost;
  1190. cost = BaselineBoxCost(altList.awchList[iAlt],bbox,&box)
  1191. * g_ttuneInfo.pTTuneCosts->ZillaChar.BoxBaselineWeight;
  1192. altList.aeScore[iAlt] += cost;
  1193. cost = HeightTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box)
  1194. * g_ttuneInfo.pTTuneCosts->ZillaChar.HeightWeight;
  1195. altList.aeScore[iAlt] += cost;
  1196. cost = HeightBoxCost(altList.awchList[iAlt],bbox,&box)
  1197. * g_ttuneInfo.pTTuneCosts->ZillaChar.BoxHeightWeight;
  1198. altList.aeScore[iAlt] += cost;
  1199. }
  1200. cost = UnigramCost(&g_unigramInfo,altList.awchList[iAlt])
  1201. * g_ttuneInfo.pTTuneCosts->ZillaChar.UniWeight;
  1202. altList.aeScore[iAlt] += cost;
  1203. // Zilla scores get fudged
  1204. altList.aeScore[iAlt] *= g_ttuneInfo.pTTuneCosts->ZillaStrFudge;
  1205. }
  1206. // If context was available for this character, then use the bigram/class bigram scores
  1207. if (dchContext != SYM_UNKNOWN && dchContext != 0) {
  1208. #if !defined(WINCE) && !defined(FAKE_WINCE)
  1209. cost = BigramTransitionCost(&g_locRunInfo,&g_bigramInfo,dchContext,altList.awchList[iAlt])
  1210. * g_ttuneInfo.pTTuneCosts->BiWeight;
  1211. altList.aeScore[iAlt] += cost;
  1212. #endif
  1213. cost = ClassBigramTransitionCost(&g_locRunInfo,&g_classBigramInfo,dchContext,altList.awchList[iAlt])
  1214. * g_ttuneInfo.pTTuneCosts->BiClassWeight;
  1215. altList.aeScore[iAlt] += cost;
  1216. }
  1217. }
  1218. // Sort the resulting alternates
  1219. SortAltList(&altList);
  1220. // This is a temporary call to get probs directly, until we have Hawk.
  1221. if (g_pProbHeader != NULL)
  1222. {
  1223. *pnProbAlts = GetProbsTsunami(cStrokes, &altList, maxAlts, pProbAlts, &charSetMask);
  1224. }
  1225. #if 0
  1226. {
  1227. FILE *f=fopen("c:/temp/prob.log","a+");
  1228. fprintf(f,"%04X %g -> %04X %g\n", altList.awchList[0], altList.aeScore[0],
  1229. pProbAlts[0].wch, pProbAlts[0].prob);
  1230. fclose(f);
  1231. }
  1232. #endif
  1233. //#define TEST_FOR_PATRICKH
  1234. #ifdef TEST_FOR_PATRICKH
  1235. {
  1236. int i;
  1237. for (i=0; i<*pnProbAlts && i<(int)altList.cAlt; i++)
  1238. pProbAlts[i].wch = altList.awchList[i];
  1239. *pnProbAlts = i;
  1240. }
  1241. #endif
  1242. cleanup:
  1243. // Free the glyph structure.
  1244. DestroyFramesGLYPH(pGlyph);
  1245. DestroyGLYPH(pGlyph);
  1246. return *pnProbAlts;
  1247. }
  1248. #else
  1249. // Version of Afterburn to call Hawk.
  1250. int Afterburn(
  1251. ALT_LIST *pAltList, // Input used to select correct CART tree
  1252. GLYPH *pGlyph,
  1253. CHARSET *cs,
  1254. RECT *rGuide,
  1255. int otterSpace,
  1256. UINT maxAlts, // Size of alts array supplied.
  1257. RECOG_ALT *pAlts // Out: alternate list matched.
  1258. ) {
  1259. UINT ii;
  1260. UINT iDest;
  1261. // UINT jj, kk;
  1262. BASICINFO basicInfo;
  1263. FEATINFO featInfo;
  1264. HANDLE hCartTree;
  1265. QALT aQAlt[MAX_RECOG_ALTS];
  1266. UINT cQAlt;
  1267. #if 0
  1268. double aWeights[MAX_ALT_LIST];
  1269. double fSum;
  1270. double offset;
  1271. FILE *pFile;
  1272. #endif
  1273. RECT bbox;
  1274. DRECTS drcs;
  1275. if (pGlyph == NULL) {
  1276. return -1;
  1277. }
  1278. // Get the bounding box for the character
  1279. GetRectGLYPH(pGlyph, &bbox);
  1280. // Scale and translate the guide box to compute the 'delta rectangle'
  1281. if (rGuide == NULL) {
  1282. // No guide given, This is the current assumption.
  1283. drcs.x = 0;
  1284. drcs.y = 0;
  1285. drcs.w = 1000;
  1286. drcs.h = 1000;
  1287. } else {
  1288. // Actually got a guide, pass it on. Current code ignores the
  1289. // guide, but may add it back so don't lose code path.
  1290. drcs.x = rGuide->left;
  1291. drcs.y = rGuide->top;
  1292. drcs.w = rGuide->right - rGuide->left;
  1293. drcs.h = rGuide->bottom - rGuide->top;
  1294. }
  1295. // Translate, convert to delta form
  1296. bbox.left -= drcs.x;
  1297. bbox.top -= drcs.y;
  1298. bbox.right -= (drcs.x + bbox.left);
  1299. bbox.bottom -= (drcs.y + bbox.top);
  1300. // Scale. We do isotropic scaling and center the shorter dimension.
  1301. if (drcs.w > drcs.h) {
  1302. drcs.x = ((1000 * bbox.left) / drcs.w);
  1303. drcs.y = ((1000 * bbox.top) / drcs.w) + ((drcs.w - drcs.h) / 2);
  1304. drcs.h = ((1000 * bbox.bottom) / drcs.w);
  1305. drcs.w = ((1000 * bbox.right) / drcs.w);
  1306. } else {
  1307. drcs.x = ((1000 * bbox.left) / drcs.h) + ((drcs.h - drcs.w) / 2);
  1308. drcs.y = ((1000 * bbox.top) / drcs.h);
  1309. drcs.w = ((1000 * bbox.right) / drcs.h);
  1310. drcs.h = ((1000 * bbox.bottom) / drcs.h);
  1311. }
  1312. // Fill in basic info.
  1313. // basicInfo.cStrk -- Filed in by MakeFeatures.
  1314. basicInfo.cSpace = (short)otterSpace;
  1315. basicInfo.drcs = drcs;
  1316. // Fill in feature info.
  1317. if (!MakeFeatures(&basicInfo, &featInfo, pGlyph)) {
  1318. return -1;
  1319. }
  1320. #if 1
  1321. // Find cart tree
  1322. hCartTree = (HANDLE)0;
  1323. for (ii = 0; !hCartTree && ii < pAltList->cAlt; ++ii) {
  1324. hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]);
  1325. }
  1326. if (!hCartTree) {
  1327. // No cart tree for anything in the alt list!?!?!
  1328. return -1;
  1329. }
  1330. // Do the match.
  1331. //HawkMatch(pAltList, MAX_ALT_LIST, pGlyph, cs, &drcs, eCARTWeight, &g_locRunInfo);
  1332. cQAlt = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
  1333. // Copy out the alt list, applying the ALC
  1334. iDest = 0;
  1335. for (ii = 0; ii < cQAlt && iDest < maxAlts; ++ii)
  1336. {
  1337. if (IsAllowedChar(&g_locRunInfo, cs, aQAlt[ii].dch))
  1338. {
  1339. pAlts[iDest].wch = aQAlt[ii].dch;
  1340. pAlts[iDest].prob = aQAlt[ii].prob;
  1341. iDest++;
  1342. }
  1343. }
  1344. cQAlt = iDest;
  1345. #elif 0
  1346. // Select stroke dependent offset used to compute weights below.
  1347. switch (basicInfo.cStrk) {
  1348. case 1 : offset = .01; break;
  1349. case 2 : offset = .05; break;
  1350. default : offset = .05; break;
  1351. }
  1352. // Compute wighting to apply to each trees results.
  1353. fSum = 0.0;
  1354. for (ii = 0; ii < pAltList->cAlt; ++ii) {
  1355. double ratio;
  1356. ratio = offset / (offset + pAltList->aeScore[0] - pAltList->aeScore[ii]);
  1357. aWeights[ii] = ratio * ratio * ratio;
  1358. fSum += aWeights[ii];
  1359. }
  1360. // Normalize to sum to one.
  1361. for (ii = 0; ii < pAltList->cAlt; ++ii) {
  1362. aWeights[ii] /= fSum;
  1363. }
  1364. pFile = fopen("AltList.dump", "a");
  1365. fprintf(pFile, "Start Dump:\n");
  1366. // Find each cart tree and add results to list.
  1367. hCartTree = (HANDLE)0;
  1368. cQAlt = 0;
  1369. for (ii = 0; ii < pAltList->cAlt && cQAlt < maxAlts; ++ii) {
  1370. hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]);
  1371. if (hCartTree) {
  1372. UINT cQAltNew;
  1373. SCORE penalty;
  1374. int skipped;
  1375. // Do the match.
  1376. cQAltNew = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
  1377. // How much can we add?
  1378. if (cQAltNew > maxAlts - cQAlt) {
  1379. cQAltNew = maxAlts - cQAlt;
  1380. }
  1381. // Convert our weight (Probability) to a log prob.
  1382. penalty = ProbToScore(aWeights[ii]);
  1383. // Zilla overgenerates prototypes, so look for different top one from
  1384. // additional trees.
  1385. if (ii > 0 && basicInfo.cStrk >= 3 && aQAlt[0].dch == pAlts[0].wch) {
  1386. continue;
  1387. }
  1388. // Add to list.
  1389. skipped = 0;
  1390. for (jj = 0; jj < cQAltNew; ++jj) {
  1391. SCORE newScore;
  1392. // Check for duplicates in the alternate list. Each individual list has not
  1393. // dups, so we don't have to check them.
  1394. newScore = aQAlt[jj].prob + penalty;
  1395. fprintf(pFile, " %04X:%d->%d", LocRunDense2Unicode(&g_locRunInfo,aQAlt[jj].dch),aQAlt[jj].prob,newScore);
  1396. for (kk = 0; kk < cQAlt; ++kk) {
  1397. if (aQAlt[jj].dch == pAlts[kk].wch) {
  1398. ASSERT(pAlts[kk].prob == (float)(int)pAlts[kk].prob);
  1399. pAlts[kk].prob = ScoreAddProbs((SCORE)pAlts[kk].prob, newScore);
  1400. ++skipped;
  1401. goto noAdd;
  1402. }
  1403. }
  1404. pAlts[jj - skipped + cQAlt].wch = aQAlt[jj].dch;
  1405. pAlts[jj - skipped + cQAlt].prob = (float)newScore;
  1406. noAdd: ;
  1407. }
  1408. fprintf(pFile, "\n");
  1409. cQAlt += cQAltNew - skipped;
  1410. }
  1411. }
  1412. for (kk = 0; kk < cQAlt; ++kk) {
  1413. fprintf(pFile, " %04X:%g", LocRunDense2Unicode(&g_locRunInfo,pAlts[kk].wch),pAlts[kk].prob);
  1414. }
  1415. fprintf(pFile, "\n");
  1416. fprintf(pFile, "End Dump\n");
  1417. fclose(pFile);
  1418. #else
  1419. // Select stroke dependent offset used to compute weights below.
  1420. switch (basicInfo.cStrk) {
  1421. case 1 : offset = 1.0; break;
  1422. case 2 : offset = 1.0; break;
  1423. default : offset = 1.0; break;
  1424. }
  1425. pFile = fopen("AltList.dump", "a");
  1426. fprintf(pFile, "Start Dump:\n");
  1427. // Find each cart tree and add results to list.
  1428. hCartTree = (HANDLE)0;
  1429. cQAlt = 0;
  1430. for (ii = 0; ii < pAltList->cAlt && cQAlt < maxAlts; ++ii) {
  1431. hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]);
  1432. if (hCartTree) {
  1433. UINT cQAltNew;
  1434. SCORE penalty;
  1435. int skipped;
  1436. // Do the match.
  1437. cQAltNew = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
  1438. // How much can we add?
  1439. if (cQAltNew > maxAlts - cQAlt) {
  1440. cQAltNew = maxAlts - cQAlt;
  1441. }
  1442. // Convert our weight (Probability) to a log prob.
  1443. penalty = (SCORE)((pAltList->aeScore[0] - pAltList->aeScore[ii]) * 2040);
  1444. // Zilla overgenerates prototypes, so look for different top one from
  1445. // additional trees.
  1446. if (ii > 0 && basicInfo.cStrk >= 3 && aQAlt[0].dch == pAlts[0].wch) {
  1447. continue;
  1448. }
  1449. // Add to list.
  1450. skipped = 0;
  1451. for (jj = 0; jj < cQAltNew; ++jj) {
  1452. SCORE newScore;
  1453. // Check for duplicates in the alternate list. Each individual list has not
  1454. // dups, so we don't have to check them.
  1455. newScore = aQAlt[jj].prob + penalty;
  1456. fprintf(pFile, " %04X:%d->%d", LocRunDense2Unicode(&g_locRunInfo,aQAlt[jj].dch),aQAlt[jj].prob,newScore);
  1457. for (kk = 0; kk < cQAlt; ++kk) {
  1458. if (aQAlt[jj].dch == pAlts[kk].wch) {
  1459. ASSERT(pAlts[kk].prob == (float)(int)pAlts[kk].prob);
  1460. pAlts[kk].prob = ScoreAddProbs((SCORE)pAlts[kk].prob, newScore);
  1461. ++skipped;
  1462. goto noAdd;
  1463. }
  1464. }
  1465. pAlts[jj - skipped + cQAlt].wch = aQAlt[jj].dch;
  1466. pAlts[jj - skipped + cQAlt].prob = (float)newScore;
  1467. noAdd: ;
  1468. }
  1469. fprintf(pFile, "\n");
  1470. cQAlt += cQAltNew - skipped;
  1471. }
  1472. }
  1473. for (kk = 0; kk < cQAlt; ++kk) {
  1474. fprintf(pFile, " %04X:%g", LocRunDense2Unicode(&g_locRunInfo,pAlts[kk].wch),pAlts[kk].prob);
  1475. }
  1476. fprintf(pFile, "\n");
  1477. fprintf(pFile, "End Dump\n");
  1478. fclose(pFile);
  1479. #endif
  1480. FreeFeatures(&featInfo);
  1481. return cQAlt;
  1482. }
  1483. #endif
  1484. #ifndef USE_OLD_DATABASES
  1485. // Do the recognition.
  1486. INT
  1487. RecognizeChar(
  1488. RECOG_SETTINGS *pRecogSettings,// Setting for recognizers.
  1489. UINT cStrokes, // Number of strokes to process.
  1490. UINT cRealStrokes, // Number of strokes before merging
  1491. STROKE *pStrokes, // Array of strokes to process.
  1492. FLOAT *pProbIsChar, // Out: probability of being valid char.
  1493. UINT maxAlts, // Size of alts array supplied.
  1494. RECOG_ALT *pAlts, // Out: alternate list matched.
  1495. RECT *pGuideBox, // Guide box for this ink.
  1496. int *pCount
  1497. ) {
  1498. INT cAlts;
  1499. GLYPH *pGlyph;
  1500. ALT_LIST altList;
  1501. CHARSET charSet;
  1502. int iRecognizer;
  1503. // Convert strokes to GLYPHs and FRAMEs so that we can call the
  1504. // old code.
  1505. pGlyph = GlyphFromStrokes(cStrokes, pStrokes);
  1506. if (!pGlyph) {
  1507. return -1;
  1508. }
  1509. // Run otter or zilla as needed.
  1510. // a possible optimization would be Switch to proto matching versions of match calls
  1511. altList.cAlt = 0;
  1512. charSet.recmask = 0xFFFFFFFF;
  1513. charSet.recmaskPriority = 0;
  1514. charSet.pbAllowedChars = NULL;
  1515. charSet.pbPriorityChars = NULL;
  1516. // Invoke Otter or Zilla or any other recognizer that has been specified in the configuration
  1517. CoreRecognizeChar(&altList, MAX_ALT_LIST, &pGlyph, cRealStrokes, pGuideBox, pRecogSettings, &charSet, &iRecognizer, pCount);
  1518. charSet.recmask = pRecogSettings->alcValid;
  1519. charSet.recmaskPriority = pRecogSettings->alcPriority;
  1520. charSet.pbAllowedChars = pRecogSettings->pbAllowedChars;
  1521. charSet.pbPriorityChars = pRecogSettings->pbPriorityChars;
  1522. if (pRecogSettings->partialMode != HWX_PARTIAL_ALL) {
  1523. unsigned int ii;
  1524. // Unfold anything in the alt list which needs it.
  1525. UnfoldCodes(&altList, &charSet);
  1526. // Copy over the alt list.
  1527. // Note that we don't have probabilities, and they don't
  1528. // really make sense anyway. However the code that
  1529. // follows will discard items with a prob of zero, so
  1530. // they should be set to something.
  1531. for (ii = 0; ii < maxAlts && ii < altList.cAlt; ++ii) {
  1532. pAlts[ii].wch = altList.awchList[ii];
  1533. pAlts[ii].prob = -altList.aeScore[ii];
  1534. }
  1535. // Free the glyph structure.
  1536. DestroyFramesGLYPH(pGlyph);
  1537. DestroyGLYPH(pGlyph);
  1538. return ii;
  1539. }
  1540. // Get our rough approximation of the probability that this is
  1541. // actually a character.
  1542. *pProbIsChar = altList.aeScore[0];
  1543. // Run Hawk.
  1544. #ifndef DISABLE_HEURISTICS
  1545. cAlts = Afterburn(&altList, pGlyph, &charSet, pGuideBox, *pCount, maxAlts, pAlts);
  1546. #else
  1547. {
  1548. unsigned int ii;
  1549. UnfoldCodes(&altList, &charSet);
  1550. for (ii = 0; ii < maxAlts && ii < altList.cAlt; ii++)
  1551. {
  1552. pAlts[ii].wch = altList.awchList[ii];
  1553. pAlts[ii].prob = -altList.aeScore[ii];
  1554. }
  1555. cAlts = ii;
  1556. }
  1557. #endif
  1558. // Free the glyph structure.
  1559. DestroyFramesGLYPH(pGlyph);
  1560. DestroyGLYPH(pGlyph);
  1561. return cAlts;
  1562. }
  1563. #endif