|
|
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
//
// Copyright (c) 2001 Microsoft Corporation. All rights reserved.
//
// Module:
// volcano/dll/CharRec.c
//
// Description:
// Main sequencing code to recognize one character ignoring
// size and position.
//
// Author:
// hrowley
//
//$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
#include "volcanop.h"
#include "frame.h"
#include "glyph.h"
#if defined(USE_HOUND) || defined(USE_ZILLAHOUND)
# include "math16.h"
# include "hound.h"
# include "zillatool.h"
#endif
#ifndef USE_OLD_DATABASES
# include "hawk.h"
#endif
#ifdef USE_RESOURCES
# include "res.h"
#endif
//#define OPTIMAL_OTTER_ZILLA
// Uncomment this to enable use of the old tsunami-style computation
// (using OtterMatch & ZillaMatch instead of OtterMatch2 & ZillaMatch2,
// and index the prob table by codepoint instead of prototype number).
//#define USE_OLD_DATABASES
/////////////////////////////////////////////////////////////////////////
// Hack code for probabilities, this will go away once Hawk works.
#include "probHack.h"
PROB_HEADER *g_pProbHeader = 0;
#define EntryPtr(i) \
(PROB_ENTRY *)(((BYTE *)g_pProbHeader) + g_pProbHeader->aEntryOffset[i]) #define AltPtr(i) \
(PROB_ALT *)(((BYTE *)g_pProbHeader) + g_pProbHeader->aAltOffset[i])
void ProbLoadPointer(void * pData) { BYTE *pScan = (BYTE *)pData;
g_pProbHeader = (PROB_HEADER *)pScan; pScan += sizeof(PROB_HEADER); }
#ifdef USE_RESOURCES
BOOL ProbLoadRes( HINSTANCE hInst, int resNumber, int resType ) { BYTE *pByte;
// Load the prob database
pByte = DoLoadResource(NULL, hInst, resNumber, resType); if (!pByte) { return FALSE; } ProbLoadPointer(pByte);
return TRUE; }
#else
BOOL ProbLoadFile(wchar_t *pPath, LOAD_INFO *pInfo) { HANDLE hFile, hMap; BYTE *pByte; wchar_t aFile[128];
pInfo->hFile = INVALID_HANDLE_VALUE; pInfo->hMap = INVALID_HANDLE_VALUE; pInfo->pbMapping = INVALID_HANDLE_VALUE;
// Generate path to file.
FormatPath(aFile, pPath, (wchar_t *)0, (wchar_t *)0, (wchar_t *)0, L"prob.bin");
// Map the file
hFile = CreateMappingCall( aFile, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL );
if (hFile == INVALID_HANDLE_VALUE) { ASSERT(("Error in CreateMappingCall - prob", FALSE)); goto error1; }
// Create a mapping handle
hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL); if (hMap == NULL) { ASSERT(("Error in CreateFileMapping - prob", FALSE)); goto error2; }
// Map the entire file starting at the first byte
pByte = (LPBYTE) MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); if (pByte == NULL) { ASSERT(("Error in MapViewOfFile - prob", FALSE)); goto error3; }
// Extract info from mapped data.
ProbLoadPointer((void *)pByte);
// Save away the pointers so we can close up cleanly later
pInfo->hFile = hFile; pInfo->hMap = hMap; pInfo->pbMapping = pByte;
return TRUE;
// Error handling
error3: CloseHandle(hMap); hMap = INVALID_HANDLE_VALUE;
error2: CloseHandle(hFile); hFile = INVALID_HANDLE_VALUE;
error1:
return FALSE; }
BOOL ProbUnLoadFile(LOAD_INFO *pInfo) { if (pInfo->hFile == INVALID_HANDLE_VALUE || pInfo->hMap == INVALID_HANDLE_VALUE || pInfo->pbMapping == INVALID_HANDLE_VALUE) { return FALSE; }
UnmapViewOfFile(pInfo->pbMapping); CloseHandle(pInfo->hMap); CloseHandle(pInfo->hFile);
pInfo->pbMapping = INVALID_HANDLE_VALUE; pInfo->hMap = INVALID_HANDLE_VALUE; pInfo->hFile = INVALID_HANDLE_VALUE;
return TRUE; }
#endif
// Given an alt list with dense and possibly folded codes in it, run through it
// and expand the folded lists. The unfolded alt list is returned in place.
// This function assumes that the list begins with better alternates, as those
// later in the list will get dropped if we run out of space.
void UnfoldCodes(ALT_LIST *pAltList, CHARSET *cs) { int i, cOut=0; ALT_LIST newAltList; // This will be where the new alt list is constructed.
// For each alternate in the input list and while we have space in the output list
for (i=0; i<(int)pAltList->cAlt && (int)cOut<MAX_ALT_LIST; i++) {
// Check if the alternate is a folded coded
if (LocRunIsFoldedCode(&g_locRunInfo,pAltList->awchList[i])) { int kndex; // If it is a folded code, look up the folding set
wchar_t *pFoldingSet = LocRunFolded2FoldingSet(&g_locRunInfo, pAltList->awchList[i]);
// Run through the folding set, adding non-NUL items to the output list
// (until the output list is full)
for (kndex = 0; kndex < LOCRUN_FOLD_MAX_ALTERNATES && pFoldingSet[kndex] != 0 && (int)cOut<MAX_ALT_LIST; kndex++) { if (IsAllowedChar(&g_locRunInfo, cs, pFoldingSet[kndex])) { newAltList.awchList[cOut]=pFoldingSet[kndex]; newAltList.aeScore[cOut]=pAltList->aeScore[i]; cOut++; #ifdef DISABLE_UNFOLDING
// If unfolding is disabled, then stop after producing one unfolded code.
// This way we don't push results later in the alt list out of the alt
// list, while still allowing the recognizer to return unicodes for each
// alternate.
break; #endif
} } } else { // Dense codes that are not folded get added directly
newAltList.awchList[cOut]=pAltList->awchList[i]; newAltList.aeScore[cOut]=pAltList->aeScore[i]; cOut++; } } // Store the length of the output list
newAltList.cAlt=cOut;
// Copy the output list over the input.
*pAltList=newAltList; }
#ifdef USE_OLD_DATABASES
// Used for WinCE
// Given a feature space (cFrame), an alt list, and a requested number of alts, this
// function returns a new alt list with probabilities for each alternate. It uses a
// fixed prob distribution.
int GetProbsTsunamiFixedTable( int cFrame, ALT_LIST *pAltList, int maxAlts, RECOG_ALT *pRAlts, CHARSET *pCS ) { int rank = 0; FLOAT rankScore = pAltList->aeScore[0]; int cAlt; int iDest = 0;
for (cAlt = 0; cAlt < (int) pAltList->cAlt && iDest < maxAlts; ++cAlt) { if (pAltList->aeScore[cAlt] != rankScore) { rank ++; rankScore = pAltList->aeScore[cAlt]; }
if (IsAllowedChar(&g_locRunInfo, pCS, pAltList->awchList[cAlt])) { int count; switch (rank) { case 0: count = 141125; break; case 1: count = 6090; break; case 2: count = 957; break; case 3: count = 362; break; case 4: count = 161; break; case 5: count = 82; break; case 6: count = 66; break; case 7: count = 49; break; case 8: count = 36; break; case 9: count = 34; break; default: count = 10; break; } pRAlts[iDest].wch = pAltList->awchList[cAlt]; pRAlts[iDest].prob = 65535*(float)count/(float)149903; iDest++; } } return iDest; }
// Desktop
// Given a feature space (cFrame), an alt list, and a requested number of alts, this
// function returns a new alt list with probabilities for each alternate. The version
// called GetProbs in this file does the lookup by prototype number, whereas this version
// does lookups by code point (like the code in Tsunami). Note that the alt list passed
// in will get modified.
int GetProbsTsunami( int cFrame, ALT_LIST *pAltList, int maxAlts, RECOG_ALT *pRAlts, CHARSET *pCS ) { unsigned int cAlt; int ii; int iDest = 0; PROB_ENTRY *pEntries, *pEntriesStart, *pEntriesEnd; PROB_ALT *pAlts, *pAltsStart, *pAltsEnd;
// If we didn't get any alternates, return an empty list.
if (pAltList->cAlt == 0) { return 0; }
// If the probability table was not loaded, just return the top one candidate.
// This is useful for training the prob table.
if (g_pProbHeader==NULL) { pRAlts[0].wch=pAltList->awchList[0]; pRAlts[0].prob=MAX_PROB; return 1; }
// ASSERT(1 <= cFrame && cFrame < 30);
ASSERT(1 <= cFrame); if (cFrame >= 30) { // Can't handle this many strokes.
goto fakeIt; }
// Hack for U+307A/U+30DA, which probably haven't had their probs set up right
/* if (LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[0])==0x307A ||
LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[0])==0x30DA) { pRAlts[0].wch = LocRunUnicode2Dense(&g_locRunInfo,0x30DA); pRAlts[0].prob = MAX_PROB; pRAlts[1].wch = LocRunUnicode2Dense(&g_locRunInfo,0x307A); pRAlts[1].prob = MAX_PROB; return 2; } */
pEntriesStart = EntryPtr(cFrame - 1); pEntriesEnd = EntryPtr(cFrame); pAltsStart = AltPtr(cFrame - 1); pAltsEnd = AltPtr(cFrame);
// Scan until we find an alt that has a prob list.
// Normally we stop on the first one, but sometimes
// We had no train data to cause a prototype to come
// up top one.
for (cAlt = 0; cAlt < pAltList->cAlt; ++cAlt) { // Get char to look up.
// wchar_t wch = LocRunDense2Unicode(&g_locRunInfo,pAltList->awchList[cAlt]);
wchar_t wch = pAltList->awchList[cAlt];
pAlts = pAltsStart; for (pEntries = pEntriesStart; pEntries < pEntriesEnd; ++pEntries) { if (pEntries->wch == wch) { // copy results out.
for (ii = 0; ii < pEntries->cAlts && iDest < maxAlts; ++ii) { if (IsAllowedChar(&g_locRunInfo, pCS, pAlts->wchAlt)) { pRAlts[iDest].wch = pAlts->wchAlt; pRAlts[iDest].prob = pAlts->prob; iDest++; } ++pAlts; } return iDest; } pAlts += pEntries->cAlts; } } fakeIt: // Fake something up.
pRAlts[0].wch = pAltList->awchList[0]; pRAlts[0].prob = MAX_PROB; // fprintf(stderr,"Returning no alts\n");
// exit(1);
return 1; }
#endif
// USE_OLD_DATABASES
// End of hacked Prob code.
////////////////////////////////////////////////////////////////////////
BOOL g_fUseJaws; JAWS_LOAD_INFO g_JawsLoadInfo; FUGU_LOAD_INFO g_FuguLoadInfo; SOLE_LOAD_INFO g_SoleLoadInfo; BOOL g_fUseZillaHound;
#ifdef USE_RESOURCES
#include "res.h"
// Code to load and initialize the databases used.
// They are loaded in this order: otter, zilla, crane/prob or hawk,
BOOL LoadCharRec(HINSTANCE hInstanceDll) { BOOL fError = FALSE;
if (JawsLoadRes(&g_JawsLoadInfo, hInstanceDll, RESID_JAWS, VOLCANO_RES)) { // Now we need to load the databases that will be combined by this combiner
// Load the Fugu database
if (!fError && !FuguLoadRes(&g_FuguLoadInfo, hInstanceDll, RESID_FUGU, VOLCANO_RES, &g_locRunInfo)) { fError = TRUE; ASSERT(("Error in FuguLoadRes", FALSE)); }
// Load the Sole database
if (!fError && !SoleLoadRes(&g_SoleLoadInfo, hInstanceDll, RESID_SOLE, VOLCANO_RES, &g_locRunInfo)) { fError = TRUE; ASSERT(("Error loading sole", FALSE)); } g_fUseJaws = TRUE; } else { // Load the Otter database
if (!fError && !OtterLoadRes(hInstanceDll, RESID_OTTER, VOLCANO_RES, &g_locRunInfo)) { fError = TRUE; ASSERT(("Error in OtterLoadRes", FALSE)); } g_fUseJaws = FALSE; }
#if defined(USE_ZILLA) || defined(USE_ZILLAHOUND)
// Load the Zilla database
if (!fError && !ZillaLoadResource( hInstanceDll, RESID_ZILLA, VOLCANO_RES, RESID_COSTCALC, VOLCANO_RES, RESID_GEOSTAT, VOLCANO_RES, &g_locRunInfo )) { fError = TRUE; ASSERT(("Error in ZillaLoadResource", FALSE)); } #endif
#if defined(USE_HOUND)
// Load the Hound database (Hound only, require it to load)
if (!fError && !HoundLoadRes(hInstanceDll, RESID_HOUND, VOLCANO_RES, &g_locRunInfo)) { fError = TRUE; ASSERT(("Error in HoundLoadRes", FALSE)); } #endif
g_fUseZillaHound = FALSE; #if defined(USE_ZILLAHOUND)
if (!fError) { // Load the Hound & Hound-Zilla databases (This is optional).
if (HoundLoadRes(hInstanceDll, RESID_HOUND, VOLCANO_RES, &g_locRunInfo)) { if (ZillaHoundLoadRes(hInstanceDll, RESID_ZILLA_HOUND, VOLCANO_RES)) { g_fUseZillaHound = TRUE; } } } #endif
// Load the Hawk database.
#ifndef USE_OLD_DATABASES
if (!fError && !HawkLoadRes( hInstanceDll, RESID_HAWK, VOLCANO_RES, &g_locRunInfo )) { fError = TRUE; ASSERT(("Error in HawkLoadRes", FALSE)); } #else
if (!fError && !CraneLoadRes(hInstanceDll,RESID_CRANE,VOLCANO_RES,&g_locRunInfo)) { fError=TRUE; ASSERT(("Error in CraneLoadRes", FALSE)); }
// Load hack probability code until we switch over to hawk.
// Use hawks resID so we don't have to create an extra one.
#if !defined(WINCE) && !defined(FAKE_WINCE)
if (!fError && !ProbLoadRes( hInstanceDll, RESID_HAWK, VOLCANO_RES )) { // Failing to load this is no longer an error,
// just fall back on the WinCE method.
// fError = TRUE;
// ASSERT(("Error in ProbLoadRes", FALSE));
} #endif
#endif
// Did everything load correctly?
if (fError) { // JBENN: If the databases can ever be unloaded, this is
// a place the need to.
// JBENN: FIXME: Set correct error code base on what really went wrong.
SetLastError(ERROR_RESOURCE_NAME_NOT_FOUND); //SetLastError(ERROR_RESOURCE_DATA_NOT_FOUND);
//SetLastError(ERROR_RESOURCE_TYPE_NOT_FOUND);
//SetLastError(ERROR_OUTOFMEMORY);
return FALSE; }
return TRUE; }
// Code to unload the databases used.
BOOL UnloadCharRec() { BOOL retVal;
retVal = TRUE;
// Free hound up.
# if defined(USE_HOUND)
if (!HoundUnLoadRes()) { retVal = FALSE; } # endif
# if defined(USE_ZILLAHOUND)
if (g_fUseZillaHound && !HoundUnLoadRes()) { retVal = FALSE; } # endif
if (!ZillaUnloadResource()) { retVal = FALSE; }
return retVal; }
# else
// Global load information specific to loading from files.
#if defined(USE_OTTER) || defined(USE_OTTERFUGU)
OTTER_LOAD_INFO g_OtterLoadInfo; #endif
#if defined(USE_HOUND) || defined(USE_ZILLAHOUND)
LOAD_INFO g_HoundLoadInfo; #endif
#ifdef USE_OLD_DATABASES
LOAD_INFO g_ProbLoadInfo; CRANE_LOAD_INFO g_CraneLoadInfo; #else
LOAD_INFO g_HawkLoadInfo; #endif
// Code to load and initialize the databases used.
BOOL LoadCharRec(wchar_t *pPath) { BOOL fError = FALSE;
if (JawsLoadFile(&g_JawsLoadInfo, pPath)) { // Load the Fugu database
if (!fError && !FuguLoadFile(&g_FuguLoadInfo, pPath, &g_locRunInfo)) { fError = TRUE; ASSERT(("Error in FuguLoadFile", FALSE)); }
// Load the Sole database
if (!fError && !SoleLoadFile(&g_SoleLoadInfo, pPath, &g_locRunInfo)) { fError = TRUE; ASSERT(("Error in FuguLoadFile", FALSE)); } g_fUseJaws = TRUE; } else { // Load the Otter database
if (!fError && !OtterLoadFile(&g_locRunInfo, &g_OtterLoadInfo, pPath)) { fError = TRUE; ASSERT(("Error in OtterLoadFile", FALSE)); } g_fUseJaws = FALSE; }
#if defined(USE_ZILLA) || defined(USE_ZILLAHOUND)
// Load the Zilla database
if (!fError && !ZillaLoadFile(&g_locRunInfo, pPath, TRUE)) { fError = TRUE; ASSERT(("Error in ZillaLoadFile", FALSE)); } #endif
#if defined(USE_HOUND)
// Load the Hound database (Hound only, require it to load)
if (!fError && !HoundLoadFile(&g_locRunInfo, &g_HoundLoadInfo, pPath)) { fError = TRUE; ASSERT(("Error in HoundLoadFile", FALSE)); } #endif
g_fUseZillaHound = FALSE; #if defined(USE_ZILLAHOUND)
if (!fError) { // Load the Hound & Hound-Zilla databases (This is optional).
if (HoundLoadFile(&g_locRunInfo, &g_HoundLoadInfo, pPath)) { if (ZillaHoundLoadFile(pPath)) { g_fUseZillaHound = TRUE; } else { # ifndef TRAIN_ZILLA_HOUND_COMBINER
HoundUnLoadFile(&g_HoundLoadInfo); # endif
} } } #endif
#ifndef USE_OLD_DATABASES
// Load the Hawk database.
if (!fError && !HawkLoadFile(&g_locRunInfo, &g_HawkLoadInfo, pPath)) { fError = TRUE; ASSERT(("Error in HawkLoadFile", FALSE)); }
#else
#if !defined(WINCE) && !defined(FAKE_WINCE)
// Load hack probability code until we switch over to hawk.
if (!fError && !ProbLoadFile(pPath, &g_ProbLoadInfo)) { // Failing to load this is no longer an error,
// just fall back on the WinCE method.
// fError = TRUE;
// ASSERT(("Error in ProbLoadFile", FALSE));
} #endif
if (!fError && !CraneLoadFile(&g_locRunInfo,&g_CraneLoadInfo, pPath)) { fError = TRUE; ASSERT(("Error in CraneLoadFile", FALSE)); } #endif
// Did everything load correctly?
if (fError) { // JBENN: If the databases can ever be unloaded, this is
// a place the need to.
// JBENN: FIXME: Set correct error code base on what really went wrong.
SetLastError(ERROR_RESOURCE_NAME_NOT_FOUND); //SetLastError(ERROR_RESOURCE_DATA_NOT_FOUND);
//SetLastError(ERROR_RESOURCE_TYPE_NOT_FOUND);
//SetLastError(ERROR_OUTOFMEMORY);
return FALSE; }
return TRUE; }
// Code to unload the databases used.
BOOL UnloadCharRec() { BOOL ok = TRUE; if (g_fUseJaws) { if (!SoleUnloadFile(&g_SoleLoadInfo)) ok = FALSE; if (!FuguUnLoadFile(&g_FuguLoadInfo)) ok = FALSE; if (!JawsUnloadFile(&g_JawsLoadInfo)) ok = FALSE; } else { if (!OtterUnLoadFile(&g_OtterLoadInfo)) ok = FALSE; }
# if defined(USE_HOUND)
if (!HoundUnLoadFile(&g_HoundLoadInfo)) { ok = FALSE; } # endif
# if defined(USE_ZILLAHOUND)
if (g_fUseZillaHound) { if (!ZillaHoundUnloadFile()) { ok = FALSE; } if (!HoundUnLoadFile(&g_HoundLoadInfo)) { ok = FALSE; } } # endif
if (!ZillaUnLoadFile()) ok = FALSE; # ifdef USE_OLD_DATABASES
if (!CraneUnLoadFile(&g_CraneLoadInfo)) ok = FALSE; # if !defined(WINCE) && !defined(FAKE_WINCE)
if (g_pProbHeader != NULL && !ProbUnLoadFile(&g_ProbLoadInfo)) ok = FALSE; # endif
# else // USE_OLD_DATABASES
if (!HawkUnLoadFile(&g_HawkLoadInfo)) ok = FALSE; # endif // USE_OLD_DATABASES
return ok; }
#endif
// Limit on strokes that can be processed by a recognizer. Since
// Zilla ignores anything beyond 29 strokes, it is safe to ignore
// any extra.
#define MAX_STOKES_PROCESS 30
POINT *DupPoints(POINT *pOldPoints, int nPoints); GLYPH *GlyphFromStrokes(UINT cStrokes, STROKE *pStrokes);
#ifndef USE_RESOURCES
// Build a copy of the glyph structure.
GLYPH *CopyGlyph(GLYPH *pOldGlyph) { GLYPH *pGlyph = NULL, *pLastGlyph = NULL;
// Convert strokes to GLYPHs and FRAMEs so that we can call the
// old code.
while (pOldGlyph != NULL) { GLYPH *pGlyphCur;
// Alloc glyph.
pGlyphCur = NewGLYPH(); if (!pGlyphCur) { goto error; }
// Add to list, and alloc frame
if (pLastGlyph != NULL) { pLastGlyph->next = pGlyphCur; pLastGlyph = pGlyphCur; } else { pGlyph = pGlyphCur; pLastGlyph = pGlyphCur; } pGlyphCur->next = NULL; pGlyphCur->frame = NewFRAME(); if (!pGlyphCur->frame) { goto error; }
// Fill in frame. We just fill in what we need, and ignore
// fields not used by Otter and Zilla, or are set by them.
pGlyphCur->frame->info.cPnt = pOldGlyph->frame->info.cPnt; pGlyphCur->frame->info.wPdk = pOldGlyph->frame->info.wPdk; pGlyphCur->frame->rgrawxy = DupPoints(pOldGlyph->frame->rgrawxy, pOldGlyph->frame->info.cPnt); pGlyphCur->frame->rect = pOldGlyph->frame->rect; pGlyphCur->frame->iframe = pOldGlyph->frame->iframe;
if (pGlyphCur->frame->rgrawxy == NULL) { goto error; }
pOldGlyph = pOldGlyph->next; }
return pGlyph;
error: // Cleanup glyphs on error.
if (pGlyph != NULL) { DestroyFramesGLYPH(pGlyph); DestroyGLYPH(pGlyph); } return NULL; } #endif // !USE_RESOURCES
#ifdef USE_OLD_DATABASES
/******************************Public*Routine******************************\
* AdHocRuleCost * * Because of character folding and the inability of the shape matchers * to distinguish between a cluster a 1000 samples map to versus 1 point * mapping to it we have a few hard rule we throw in to fix obvious * problems. * * History: * 11-Jul-1995 -by- Patrick Haluptzok patrickh * Wrote it. \**************************************************************************/
float AdHocRuleCost(int cStrokes, wchar_t dch, VOLCANO_WEIGHTS *pScores) { #ifdef DISABLE_HEURISTICS
return 0; #else
wchar_t wch; int cFrame;
// Get character and number of strokes. Note we need character in Unicode
// so that we can compare with constant character codes.
// ASSUMPTION: SYM_UNKNOWN should be the only sym if its present.
// So there aren't any alternatives that could get a "better" cost
// so it probably doesn't really matter what cost we return here
if (dch == SYM_UNKNOWN) { return 0; }
wch = LocRunDense2Unicode(&g_locRunInfo, dch); cFrame = cStrokes;
// Check for 0 (2 strokes), penalize all circle shapes
// except 0 when 2 strokes occur.
if (cFrame >= 2) { // 0x824f is the 0 that we don't want to penalize.
// All other circle shapes are penalized.
if ((wch == 0x006F) || (wch == 0x004F) || (wch == 0x00B0) || (wch == 0x3002) || (wch == 0x3007) ) { pScores->afl[VTUNE_ADHOC_CIRCLE] = -1; return -g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_CIRCLE]; } }
// Check for 1 stroke lower-case i and j. No dot is a extra penalty.
if (cFrame == 1) { if ((wch == 0x0069) || (wch == 0x006A)) { pScores->afl[VTUNE_ADHOC_IJ] = -1; return -g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_IJ]; } }
return 0; #endif
}
BOOL Afterburn(ALT_LIST *pAltList, GLYPH *pGlyph, CHARSET *cs, RECT *rGuide, RECT rc) { DRECTS drcs;
if (pGlyph==NULL || rGuide==NULL) return FALSE;
// Scale and translate the guide box to compute the 'delta rectangle'
drcs.x = rGuide->left; drcs.y = rGuide->top; drcs.w = rGuide->right - rGuide->left; drcs.h = rGuide->bottom - rGuide->top;
// Translate, convert to delta form
rc.left -= drcs.x; rc.top -= drcs.y; rc.right -= (drcs.x + rc.left); rc.bottom -= (drcs.y + rc.top);
// Scale. We do isotropic scaling and center the shorter dimension.
if (drcs.w > drcs.h) { drcs.x = ((1000 * rc.left) / drcs.w); drcs.y = ((1000 * rc.top) / drcs.w) + ((drcs.w - drcs.h) / 2); drcs.h = ((1000 * rc.bottom) / drcs.w); drcs.w = ((1000 * rc.right) / drcs.w); } else { drcs.x = ((1000 * rc.left) / drcs.h) + ((drcs.h - drcs.w) / 2); drcs.y = ((1000 * rc.top) / drcs.h); drcs.w = ((1000 * rc.right) / drcs.h); drcs.h = ((1000 * rc.bottom) / drcs.h); }
#ifndef DISABLE_HEURISTICS
return CraneMatch(pAltList, MAX_ALT_LIST, pGlyph, cs, &drcs, 0, &g_locRunInfo); #else
return FALSE; #endif
}
// Hack to get around lack of data for training Crane
BOOL IsFaultyKana(wchar_t wch) { switch (wch) { // case 0x3041:
case 0x3042: // case 0x3043:
case 0x3044: // case 0x3045:
case 0x3046: // case 0x3047:
case 0x3048: // case 0x3049:
case 0x304A: // case 0x30E9:
return TRUE; } return FALSE; } #endif // USE_OLD_DATABASES
// Sort the alternate list.
// We do a bubble sort. The list is small and we can't use qsort because the data is stored in
// three parallel arrays.
void SortAltListAndTune(ALT_LIST *pAltList, VOLCANO_WEIGHTS *pTuneScore) { int pos1, pos2; int limit1, limit2; FLOAT * const peScore = pAltList->aeScore; wchar_t * const pwchList = pAltList->awchList;
limit2 = pAltList->cAlt; limit1 = limit2 - 1; for (pos1 = 0; pos1 < limit1; ++pos1) { for (pos2 = pos1 + 1; pos2 < limit2; ++pos2) { // Are elements pos1 and pos2 out of order?
if (peScore[pos1] < peScore[pos2]) { FLOAT eTemp; wchar_t wchTemp; VOLCANO_WEIGHTS weights;
// Swap scores and swap characters.
eTemp = peScore[pos1]; peScore[pos1] = peScore[pos2]; peScore[pos2] = eTemp;
wchTemp = pwchList[pos1]; pwchList[pos1] = pwchList[pos2]; pwchList[pos2] = wchTemp;
weights = pTuneScore[pos1]; pTuneScore[pos1]= pTuneScore[pos2]; pTuneScore[pos2]= weights; } } } }
// Call the core recognizer for the given character. Returned the
// number of alternates produced, or -1 if an error occurs.
int CoreRecognizeChar( ALT_LIST *pAltList, // Alt list to be returned
int cAlt, // Max number of alternates
GLYPH **ppGlyph, // Character to recognize (which may be modified)
int nRealStrokes, // Real stroke count for abort processing
RECT *pGuideBox, // Guide box (for partial mode)
RECOG_SETTINGS *pRecogSettings, // Partial mode, other settings
CHARSET *pCS, // ALCs
int *piRecognizer, // Returns the VOLCANO_CONFIG_* constant for the recognizer used
int *piSpace) // The space number in that recognizer
{ int iRet = -1; int iRecognizer = VOLCANO_CONFIG_NONE; int nStrokes = CframeGLYPH(*ppGlyph);
if (nStrokes > VOLCANO_CONFIG_MAX_STROKE_COUNT) nStrokes = VOLCANO_CONFIG_MAX_STROKE_COUNT; if (pRecogSettings->partialMode) nStrokes = 0; iRecognizer = g_latticeConfigInfo.iRecognizers[nStrokes];
*piRecognizer = iRecognizer; *piSpace = -1;
pAltList->cAlt = 0;
// Call the selected recognizer
switch (iRecognizer) { case VOLCANO_CONFIG_OTTER: if (g_fUseJaws) { iRet = JawsMatch(&g_JawsLoadInfo, &g_FuguLoadInfo, &g_SoleLoadInfo, pAltList, cAlt, *ppGlyph, pGuideBox, pCS, &g_locRunInfo); *piSpace = nStrokes; } else { iRet = OtterMatch2(pAltList, cAlt, *ppGlyph, pCS, &g_locRunInfo, piSpace);
// Other experiments
// iRet = FuguMatch(&g_FuguLoadInfo.fugu, pAltList, cAlt, *ppGlyph, NULL /*pGuideBox*/, pCS, &g_locRunInfo);
// iRet = SoleMatch(pAltList, cAlt, *ppGlyph, pGuideBox, pCS, &g_locRunInfo);
// *piSpace = nStrokes;
} break;
case VOLCANO_CONFIG_ZILLA: iRet = ZillaMatch(pAltList, cAlt, ppGlyph, pCS, g_vtuneInfo.pTune->flZillaGeo, (pRecogSettings->partialMode ? pRecogSettings->pAbort : NULL), nRealStrokes, pRecogSettings->partialMode, pGuideBox);
// For Zilla, the space number is the feature count. To make them disjoint from the
// Otter spaces, add on the maximum number of Otter spaces.
*piSpace = CframeGLYPH(*ppGlyph) + OTTER_NUM_SPACES;
// Here you can change the iRecognizer that is returned to indicate that the Hound/Zilla
// combiner ran, instead of just Zilla alone. That way tuning will know to use a different
// weighting parameter.
break;
default: // No recognizer available for this stroke count
iRet = -1; break; }
return iRet; }
// Allocate a cache for the recognizer results.
void *AllocateRecognizerCache() { CACHE *pCache = (CACHE *) ExternAlloc(sizeof(CACHE)); if (pCache == NULL) { return NULL; } pCache->nStrokes = 0; pCache->pStrokes = NULL; return pCache; }
// Free up a cache for the recognizer results.
void FreeRecognizerCache(void *pvCache) { CACHE *pCache = (CACHE *) pvCache; CACHE_ENTRY *pEntry; int iStroke; if (pvCache == NULL) { return; } for (iStroke = 0; iStroke < pCache->nStrokes; iStroke++) { pEntry = pCache->pStrokes[iStroke]; while (pEntry != NULL) { CACHE_ENTRY *pNext = pEntry->pNext; ExternFree(pEntry); pEntry = pNext; } } ExternFree(pCache->pStrokes); ExternFree(pCache); } // Look for results for a given range of strokes, return the recognizer and its
// alternate list.
ALT_LIST *LookupRecognizerCache(void *pvCache, int iStroke, int nStrokes, int *piRecognizer) { CACHE *pCache = (CACHE *) pvCache; CACHE_ENTRY *pEntry; if (pCache == NULL || iStroke >= pCache->nStrokes) { return NULL; } // For the given ending stroke, look for a result for the right number of strokes
pEntry = pCache->pStrokes[iStroke]; while (pEntry != NULL && pEntry->nStrokes != nStrokes) { pEntry = pEntry->pNext; } // If not found, return nothing.
if (pEntry == NULL) { return NULL; } // Otherwise return the cached results.
*piRecognizer = pEntry->iRecognizer; return &(pEntry->alts); }
// Add the alternate list to the cache.
void AddRecognizerCache(void *pvCache, int iStroke, int nStrokes, int iRecognizer, ALT_LIST *pAlts) { CACHE *pCache = (CACHE *) pvCache; CACHE_ENTRY *pEntry; // If no cache, then exit
if (pCache == NULL) { return; } // If the cache is currently too small, then allocate more space for it.
if (iStroke >= pCache->nStrokes) { int i; int nStrokesNew = max(10, (iStroke + 1) * 2); CACHE_ENTRY **pStrokesNew = (CACHE_ENTRY **) ExternRealloc(pCache->pStrokes, sizeof(CACHE_ENTRY *) * nStrokesNew); if (pStrokesNew == NULL) { // If the allocation failed, just continue with the current cache size
return; } // Initialize the memory
for (i = pCache->nStrokes; i < nStrokesNew; i++) { pStrokesNew[i] = NULL; } pCache->pStrokes = pStrokesNew; pCache->nStrokes = nStrokesNew; } // If we got here, then add the entry to the cache
pEntry = (CACHE_ENTRY *) ExternAlloc(sizeof(CACHE_ENTRY)); if (pEntry == NULL) { return; } pEntry->nStrokes = nStrokes; pEntry->iRecognizer = iRecognizer; pEntry->alts = *pAlts; pEntry->pNext = pCache->pStrokes[iStroke]; pCache->pStrokes[iStroke] = pEntry; }
#ifdef USE_OLD_DATABASES
// This call is roughly the equivalent of the RecognizeChar call below, but instead of
// returning probabilities, it returns an alternate list with scores. It uses the old Tsunami
// recognition procedure, with otter and zilla returning code points, followed by adhoc rules,
// language model, baseline/height scores, and crane. The result of this is used by RecognizeChar
// to look up the old probability table.
INT RecognizeCharInsurance( RECOG_SETTINGS *pRecogSettings,// In: Setting for recognizers.
UINT cStrokes, // In: Number of strokes to process.
UINT cRealStrokes, // In: Number of strokes before merging
STROKE *pStrokes, // In: Array of strokes to process.
FLOAT *pProbIsChar, // Out: probability of being valid char.
UINT maxAlts, // In: Size of alts array supplied.
RECOG_ALT *pProbAlts, // Out: alternate list matched with probabilities.
int *pnProbAlts, RECOG_ALT *pScoreAlts, // Out: alternate list matched with scores
int *pnScoreAlts, RECT *pGuideBox, // In: Guide box for this ink.
wchar_t dchContext, // In: Context
int *pSpace, // Out: Space number used for matching
VOLCANO_WEIGHTS *pTuneScore, // Out: score components
BOOL fStringMode, // In: Whether or not the recognizer is in string mode
BOOL fProbMode, // In: Whether the recognizer is in probability mode
void *pvCache, // In/Out: Pointer to cache, or NULL if not being used
int iStroke // In: Index of last stroke of character
) { ALT_LIST *pCacheResult = NULL; BOXINFO box; RECT bbox; int iAlt; GLYPH *pGlyph; ALT_LIST altList; CHARSET charSet; // Mask used for core recognizers
CHARSET charSetMask; // Mask used for probability table lookup
BOOL fCraneBonus = FALSE; int iRecognizer;
// Convert strokes to GLYPHs and FRAMEs so that we can call the
// old code.
pGlyph = GlyphFromStrokes(cStrokes, pStrokes); if (!pGlyph) { return -1; }
// Run otter or zilla as needed.
altList.cAlt = 0; charSetMask.recmask = pRecogSettings->alcValid; charSetMask.recmaskPriority = pRecogSettings->alcPriority; charSetMask.pbAllowedChars = pRecogSettings->pbAllowedChars; charSetMask.pbPriorityChars = pRecogSettings->pbPriorityChars; if (fProbMode) { // In probability mode, don't mask off the core recognizers
charSet.recmask = 0xFFFFFFFF; charSet.recmaskPriority = 0; charSet.pbAllowedChars = NULL; charSet.pbPriorityChars = NULL; } else { // In score mode, mask off the core recognizers
charSet = charSetMask; }
// Get the bounding box for the character
GetRectGLYPH(pGlyph,&bbox);
// Try going to the cache
pCacheResult = LookupRecognizerCache(pvCache, iStroke, cStrokes, &iRecognizer); if (pCacheResult != NULL) { // If it was the Zilla recognizer before, we need to run featurization because
// of its side-effect of fragmenting the strokes, which crane needs.
if (iRecognizer == VOLCANO_CONFIG_ZILLA) { BIGPRIM rgprim[CPRIMMAX]; BYTE aSampleVector[29 * 4]; ZillaFeaturize(&pGlyph, rgprim, aSampleVector); } altList = *pCacheResult; } else { // Invoke Otter or Zilla or any other recognizer that has been specified in the configuration
CoreRecognizeChar(&altList, MAX_ALT_LIST, &pGlyph, cRealStrokes, pGuideBox, pRecogSettings, &charSet, &iRecognizer, pSpace);
// Add it to the cache, since it isn't there already.
AddRecognizerCache(pvCache, iStroke, cStrokes, iRecognizer, &altList); }
// If we're doing an experiment to simulate an optimal otter or zilla,
// replace the real alt list with a fake one.
#ifdef OPTIMAL_OTTER_ZILLA
{ wchar_t dch; altList.cAlt = 1; altList.aeScore[0] = 0; { FILE *f = fopen("c:/answer.txt", "r"); fscanf(f, "%hx", &(altList.awchList[0])); fclose(f); } dch = LocRunUnicode2Dense(&g_locRunInfo, altList.awchList[0]); if (dch != LOC_TRAIN_NO_DENSE_CODE) { wchar_t fdch = LocRunDense2Folded(&g_locRunInfo, dch); if (fdch != 0) dch = fdch; altList.awchList[0] = dch; } else { altList.cAlt = 0; } } #endif
// Get our rough approximation of the probability that this is
// actually a character. If zero alternates are returned, then
// set the space number to -1 as an error flag.
if (altList.cAlt == 0) { *pSpace = -1; *pProbIsChar = 0;
*pnProbAlts = 0; *pnScoreAlts = 0; goto cleanup; }
// Unfold anything in the alt list which needs it.
UnfoldCodes(&altList, &charSet);
// If we couldn't load the probability table, then use the
// WinCE method to get probabilities.
if (g_pProbHeader == NULL) { *pnProbAlts = GetProbsTsunamiFixedTable(cStrokes, &altList, maxAlts, pProbAlts, &charSetMask); }
// Apply crane, if we have a guide for it to use and we are not in partial mode
if (pRecogSettings->partialMode == HWX_PARTIAL_ALL && pGuideBox != NULL && altList.cAlt > 0) { fCraneBonus = Afterburn(&altList, pGlyph, &charSet, pGuideBox, bbox); // Hack to bypass crane if otter a troublesome kana character
if (IsFaultyKana(LocRunDense2Unicode(&g_locRunInfo,altList.awchList[0]))) { fCraneBonus = FALSE; } }
// Save away the scores for the alternates, then apply the weight for the particular
// recognizer used. Then add in the crane bonus/penalty and the adhoc rules.
for (iAlt=0; iAlt<(int)altList.cAlt; iAlt++) { int iParam = (fStringMode ? VTUNE_STRING_CORE : VTUNE_CHAR_CORE) + iRecognizer; pTuneScore[iAlt].afl[iParam] = altList.aeScore[iAlt]; altList.aeScore[iAlt] *= g_vtuneInfo.pTune->weights.afl[iParam];
// Crane is now implemented as a penalty rather than a bonus. This means
// all alternates after the first one get a penalty, and even the first one
// gets a penalty if no crane bonus is applied.
if (iAlt > 0 || !fCraneBonus) { iParam = fStringMode ? VTUNE_STRING_CRANE : VTUNE_CHAR_CRANE; pTuneScore[iAlt].afl[iParam] = -1; altList.aeScore[iAlt] -= g_vtuneInfo.pTune->weights.afl[iParam]; }
// Add adhoc penalties for the one stroke i and j and two stroke circle shapes
if (pRecogSettings->partialMode == HWX_PARTIAL_ALL) { altList.aeScore[iAlt] += AdHocRuleCost(cStrokes, altList.awchList[iAlt], pTuneScore + iAlt); } }
// Sort the alternates out.
SortAltListAndTune(&altList, pTuneScore);
// Copy the score-based alts to the output
for (iAlt = 0; iAlt < (int)altList.cAlt && iAlt < (int)maxAlts && iAlt < (int)MAX_ALT_LIST; ++iAlt) { pScoreAlts[iAlt].wch = altList.awchList[iAlt]; pScoreAlts[iAlt].prob = altList.aeScore[iAlt]; } *pnScoreAlts = altList.cAlt;
// Re-score the alternates using the old weightings in the
// TTune structure, so that prob table lookup will be weighting
// independent.
for (iAlt = 0; iAlt < (int)altList.cAlt; ++iAlt) { altList.aeScore[iAlt] = g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_IJ] * pTuneScore[iAlt].afl[VTUNE_ADHOC_IJ] + g_vtuneInfo.pTune->weights.afl[VTUNE_ADHOC_CIRCLE] * pTuneScore[iAlt].afl[VTUNE_ADHOC_CIRCLE] + (cStrokes > 2 ? g_ttuneInfo.pTTuneCosts->ZillaChar.CARTAddWeight : g_ttuneInfo.pTTuneCosts->OtterChar.CARTAddWeight) * pTuneScore[iAlt].afl[fStringMode ? VTUNE_STRING_CRANE : VTUNE_CHAR_CRANE] + pTuneScore[iAlt].afl[(fStringMode ? VTUNE_STRING_CORE : VTUNE_CHAR_CORE) + iRecognizer]; }
// Build up a BOXINFO structure from the guide, for use in the baseline/height scoring
if (pGuideBox!=NULL) { box.size = pGuideBox->bottom - pGuideBox->top; box.baseline = pGuideBox->bottom; box.xheight = box.size / 2; box.midline = box.baseline - box.xheight; }
// For each alternate
for (iAlt=0; iAlt<(int)altList.cAlt; iAlt++) { float cost; // Apply baseline/height and language model unigram scores
if (cStrokes<3) { if (pGuideBox!=NULL) { cost = BaselineTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->OtterChar.BaseWeight; altList.aeScore[iAlt] += cost;
cost = BaselineBoxCost(altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->OtterChar.BoxBaselineWeight; altList.aeScore[iAlt] += cost;
cost = HeightTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->OtterChar.HeightWeight; altList.aeScore[iAlt] += cost;
cost = HeightBoxCost(altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->OtterChar.BoxHeightWeight; altList.aeScore[iAlt] += cost; } cost = UnigramCost(&g_unigramInfo,altList.awchList[iAlt]) * g_ttuneInfo.pTTuneCosts->OtterChar.UniWeight; altList.aeScore[iAlt] += cost; } else { if (pGuideBox!=NULL) { cost = BaselineTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->ZillaChar.BaseWeight; altList.aeScore[iAlt] += cost;
cost = BaselineBoxCost(altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->ZillaChar.BoxBaselineWeight; altList.aeScore[iAlt] += cost;
cost = HeightTransitionCost(0,bbox,&box,altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->ZillaChar.HeightWeight; altList.aeScore[iAlt] += cost;
cost = HeightBoxCost(altList.awchList[iAlt],bbox,&box) * g_ttuneInfo.pTTuneCosts->ZillaChar.BoxHeightWeight; altList.aeScore[iAlt] += cost;
} cost = UnigramCost(&g_unigramInfo,altList.awchList[iAlt]) * g_ttuneInfo.pTTuneCosts->ZillaChar.UniWeight; altList.aeScore[iAlt] += cost;
// Zilla scores get fudged
altList.aeScore[iAlt] *= g_ttuneInfo.pTTuneCosts->ZillaStrFudge; }
// If context was available for this character, then use the bigram/class bigram scores
if (dchContext != SYM_UNKNOWN && dchContext != 0) { #if !defined(WINCE) && !defined(FAKE_WINCE)
cost = BigramTransitionCost(&g_locRunInfo,&g_bigramInfo,dchContext,altList.awchList[iAlt]) * g_ttuneInfo.pTTuneCosts->BiWeight; altList.aeScore[iAlt] += cost; #endif
cost = ClassBigramTransitionCost(&g_locRunInfo,&g_classBigramInfo,dchContext,altList.awchList[iAlt]) * g_ttuneInfo.pTTuneCosts->BiClassWeight; altList.aeScore[iAlt] += cost; } }
// Sort the resulting alternates
SortAltList(&altList);
// This is a temporary call to get probs directly, until we have Hawk.
if (g_pProbHeader != NULL) { *pnProbAlts = GetProbsTsunami(cStrokes, &altList, maxAlts, pProbAlts, &charSetMask); } #if 0
{ FILE *f=fopen("c:/temp/prob.log","a+"); fprintf(f,"%04X %g -> %04X %g\n", altList.awchList[0], altList.aeScore[0], pProbAlts[0].wch, pProbAlts[0].prob); fclose(f); } #endif
//#define TEST_FOR_PATRICKH
#ifdef TEST_FOR_PATRICKH
{ int i; for (i=0; i<*pnProbAlts && i<(int)altList.cAlt; i++) pProbAlts[i].wch = altList.awchList[i]; *pnProbAlts = i; } #endif
cleanup: // Free the glyph structure.
DestroyFramesGLYPH(pGlyph); DestroyGLYPH(pGlyph);
return *pnProbAlts; }
#else
// Version of Afterburn to call Hawk.
int Afterburn( ALT_LIST *pAltList, // Input used to select correct CART tree
GLYPH *pGlyph, CHARSET *cs, RECT *rGuide, int otterSpace, UINT maxAlts, // Size of alts array supplied.
RECOG_ALT *pAlts // Out: alternate list matched.
) { UINT ii; UINT iDest; // UINT jj, kk;
BASICINFO basicInfo; FEATINFO featInfo; HANDLE hCartTree; QALT aQAlt[MAX_RECOG_ALTS]; UINT cQAlt; #if 0
double aWeights[MAX_ALT_LIST]; double fSum; double offset; FILE *pFile; #endif
RECT bbox; DRECTS drcs;
if (pGlyph == NULL) { return -1; }
// Get the bounding box for the character
GetRectGLYPH(pGlyph, &bbox);
// Scale and translate the guide box to compute the 'delta rectangle'
if (rGuide == NULL) { // No guide given, This is the current assumption.
drcs.x = 0; drcs.y = 0; drcs.w = 1000; drcs.h = 1000; } else { // Actually got a guide, pass it on. Current code ignores the
// guide, but may add it back so don't lose code path.
drcs.x = rGuide->left; drcs.y = rGuide->top; drcs.w = rGuide->right - rGuide->left; drcs.h = rGuide->bottom - rGuide->top; }
// Translate, convert to delta form
bbox.left -= drcs.x; bbox.top -= drcs.y; bbox.right -= (drcs.x + bbox.left); bbox.bottom -= (drcs.y + bbox.top);
// Scale. We do isotropic scaling and center the shorter dimension.
if (drcs.w > drcs.h) { drcs.x = ((1000 * bbox.left) / drcs.w); drcs.y = ((1000 * bbox.top) / drcs.w) + ((drcs.w - drcs.h) / 2); drcs.h = ((1000 * bbox.bottom) / drcs.w); drcs.w = ((1000 * bbox.right) / drcs.w); } else { drcs.x = ((1000 * bbox.left) / drcs.h) + ((drcs.h - drcs.w) / 2); drcs.y = ((1000 * bbox.top) / drcs.h); drcs.w = ((1000 * bbox.right) / drcs.h); drcs.h = ((1000 * bbox.bottom) / drcs.h); }
// Fill in basic info.
// basicInfo.cStrk -- Filed in by MakeFeatures.
basicInfo.cSpace = (short)otterSpace; basicInfo.drcs = drcs;
// Fill in feature info.
if (!MakeFeatures(&basicInfo, &featInfo, pGlyph)) { return -1; }
#if 1
// Find cart tree
hCartTree = (HANDLE)0; for (ii = 0; !hCartTree && ii < pAltList->cAlt; ++ii) { hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]); } if (!hCartTree) { // No cart tree for anything in the alt list!?!?!
return -1; }
// Do the match.
//HawkMatch(pAltList, MAX_ALT_LIST, pGlyph, cs, &drcs, eCARTWeight, &g_locRunInfo);
cQAlt = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
// Copy out the alt list, applying the ALC
iDest = 0; for (ii = 0; ii < cQAlt && iDest < maxAlts; ++ii) { if (IsAllowedChar(&g_locRunInfo, cs, aQAlt[ii].dch)) { pAlts[iDest].wch = aQAlt[ii].dch; pAlts[iDest].prob = aQAlt[ii].prob; iDest++; } } cQAlt = iDest; #elif 0
// Select stroke dependent offset used to compute weights below.
switch (basicInfo.cStrk) { case 1 : offset = .01; break; case 2 : offset = .05; break; default : offset = .05; break; }
// Compute wighting to apply to each trees results.
fSum = 0.0; for (ii = 0; ii < pAltList->cAlt; ++ii) { double ratio; ratio = offset / (offset + pAltList->aeScore[0] - pAltList->aeScore[ii]); aWeights[ii] = ratio * ratio * ratio; fSum += aWeights[ii]; }
// Normalize to sum to one.
for (ii = 0; ii < pAltList->cAlt; ++ii) { aWeights[ii] /= fSum; }
pFile = fopen("AltList.dump", "a"); fprintf(pFile, "Start Dump:\n"); // Find each cart tree and add results to list.
hCartTree = (HANDLE)0; cQAlt = 0; for (ii = 0; ii < pAltList->cAlt && cQAlt < maxAlts; ++ii) { hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]); if (hCartTree) { UINT cQAltNew; SCORE penalty; int skipped;
// Do the match.
cQAltNew = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
// How much can we add?
if (cQAltNew > maxAlts - cQAlt) { cQAltNew = maxAlts - cQAlt; }
// Convert our weight (Probability) to a log prob.
penalty = ProbToScore(aWeights[ii]);
// Zilla overgenerates prototypes, so look for different top one from
// additional trees.
if (ii > 0 && basicInfo.cStrk >= 3 && aQAlt[0].dch == pAlts[0].wch) { continue; }
// Add to list.
skipped = 0; for (jj = 0; jj < cQAltNew; ++jj) { SCORE newScore;
// Check for duplicates in the alternate list. Each individual list has not
// dups, so we don't have to check them.
newScore = aQAlt[jj].prob + penalty; fprintf(pFile, " %04X:%d->%d", LocRunDense2Unicode(&g_locRunInfo,aQAlt[jj].dch),aQAlt[jj].prob,newScore); for (kk = 0; kk < cQAlt; ++kk) { if (aQAlt[jj].dch == pAlts[kk].wch) { ASSERT(pAlts[kk].prob == (float)(int)pAlts[kk].prob); pAlts[kk].prob = ScoreAddProbs((SCORE)pAlts[kk].prob, newScore); ++skipped; goto noAdd; } } pAlts[jj - skipped + cQAlt].wch = aQAlt[jj].dch; pAlts[jj - skipped + cQAlt].prob = (float)newScore; noAdd: ; } fprintf(pFile, "\n");
cQAlt += cQAltNew - skipped; } } for (kk = 0; kk < cQAlt; ++kk) { fprintf(pFile, " %04X:%g", LocRunDense2Unicode(&g_locRunInfo,pAlts[kk].wch),pAlts[kk].prob); } fprintf(pFile, "\n"); fprintf(pFile, "End Dump\n"); fclose(pFile);
#else
// Select stroke dependent offset used to compute weights below.
switch (basicInfo.cStrk) { case 1 : offset = 1.0; break; case 2 : offset = 1.0; break; default : offset = 1.0; break; }
pFile = fopen("AltList.dump", "a"); fprintf(pFile, "Start Dump:\n"); // Find each cart tree and add results to list.
hCartTree = (HANDLE)0; cQAlt = 0; for (ii = 0; ii < pAltList->cAlt && cQAlt < maxAlts; ++ii) { hCartTree = HawkFindTree(basicInfo.cStrk, basicInfo.cSpace, pAltList->awchList[ii]); if (hCartTree) { UINT cQAltNew; SCORE penalty; int skipped;
// Do the match.
cQAltNew = HawkMatch(&basicInfo, &featInfo, hCartTree, aQAlt);
// How much can we add?
if (cQAltNew > maxAlts - cQAlt) { cQAltNew = maxAlts - cQAlt; }
// Convert our weight (Probability) to a log prob.
penalty = (SCORE)((pAltList->aeScore[0] - pAltList->aeScore[ii]) * 2040);
// Zilla overgenerates prototypes, so look for different top one from
// additional trees.
if (ii > 0 && basicInfo.cStrk >= 3 && aQAlt[0].dch == pAlts[0].wch) { continue; }
// Add to list.
skipped = 0; for (jj = 0; jj < cQAltNew; ++jj) { SCORE newScore;
// Check for duplicates in the alternate list. Each individual list has not
// dups, so we don't have to check them.
newScore = aQAlt[jj].prob + penalty; fprintf(pFile, " %04X:%d->%d", LocRunDense2Unicode(&g_locRunInfo,aQAlt[jj].dch),aQAlt[jj].prob,newScore); for (kk = 0; kk < cQAlt; ++kk) { if (aQAlt[jj].dch == pAlts[kk].wch) { ASSERT(pAlts[kk].prob == (float)(int)pAlts[kk].prob); pAlts[kk].prob = ScoreAddProbs((SCORE)pAlts[kk].prob, newScore); ++skipped; goto noAdd; } } pAlts[jj - skipped + cQAlt].wch = aQAlt[jj].dch; pAlts[jj - skipped + cQAlt].prob = (float)newScore; noAdd: ; } fprintf(pFile, "\n");
cQAlt += cQAltNew - skipped; } } for (kk = 0; kk < cQAlt; ++kk) { fprintf(pFile, " %04X:%g", LocRunDense2Unicode(&g_locRunInfo,pAlts[kk].wch),pAlts[kk].prob); } fprintf(pFile, "\n"); fprintf(pFile, "End Dump\n"); fclose(pFile);
#endif
FreeFeatures(&featInfo);
return cQAlt; }
#endif
#ifndef USE_OLD_DATABASES
// Do the recognition.
INT RecognizeChar( RECOG_SETTINGS *pRecogSettings,// Setting for recognizers.
UINT cStrokes, // Number of strokes to process.
UINT cRealStrokes, // Number of strokes before merging
STROKE *pStrokes, // Array of strokes to process.
FLOAT *pProbIsChar, // Out: probability of being valid char.
UINT maxAlts, // Size of alts array supplied.
RECOG_ALT *pAlts, // Out: alternate list matched.
RECT *pGuideBox, // Guide box for this ink.
int *pCount ) { INT cAlts; GLYPH *pGlyph; ALT_LIST altList; CHARSET charSet; int iRecognizer;
// Convert strokes to GLYPHs and FRAMEs so that we can call the
// old code.
pGlyph = GlyphFromStrokes(cStrokes, pStrokes); if (!pGlyph) { return -1; }
// Run otter or zilla as needed.
// a possible optimization would be Switch to proto matching versions of match calls
altList.cAlt = 0;
charSet.recmask = 0xFFFFFFFF; charSet.recmaskPriority = 0; charSet.pbAllowedChars = NULL; charSet.pbPriorityChars = NULL;
// Invoke Otter or Zilla or any other recognizer that has been specified in the configuration
CoreRecognizeChar(&altList, MAX_ALT_LIST, &pGlyph, cRealStrokes, pGuideBox, pRecogSettings, &charSet, &iRecognizer, pCount);
charSet.recmask = pRecogSettings->alcValid; charSet.recmaskPriority = pRecogSettings->alcPriority; charSet.pbAllowedChars = pRecogSettings->pbAllowedChars; charSet.pbPriorityChars = pRecogSettings->pbPriorityChars;
if (pRecogSettings->partialMode != HWX_PARTIAL_ALL) { unsigned int ii;
// Unfold anything in the alt list which needs it.
UnfoldCodes(&altList, &charSet);
// Copy over the alt list.
// Note that we don't have probabilities, and they don't
// really make sense anyway. However the code that
// follows will discard items with a prob of zero, so
// they should be set to something.
for (ii = 0; ii < maxAlts && ii < altList.cAlt; ++ii) { pAlts[ii].wch = altList.awchList[ii]; pAlts[ii].prob = -altList.aeScore[ii]; }
// Free the glyph structure.
DestroyFramesGLYPH(pGlyph); DestroyGLYPH(pGlyph);
return ii; }
// Get our rough approximation of the probability that this is
// actually a character.
*pProbIsChar = altList.aeScore[0];
// Run Hawk.
#ifndef DISABLE_HEURISTICS
cAlts = Afterburn(&altList, pGlyph, &charSet, pGuideBox, *pCount, maxAlts, pAlts); #else
{ unsigned int ii; UnfoldCodes(&altList, &charSet); for (ii = 0; ii < maxAlts && ii < altList.cAlt; ii++) { pAlts[ii].wch = altList.awchList[ii]; pAlts[ii].prob = -altList.aeScore[ii]; } cAlts = ii; } #endif
// Free the glyph structure.
DestroyFramesGLYPH(pGlyph); DestroyGLYPH(pGlyph);
return cAlts; } #endif
|