Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

345 lines
14 KiB

  1. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  2. //
  3. // Copyright (c) 2001 Microsoft Corporation. All rights reserved.
  4. //
  5. // Module:
  6. // volcano/inc/lattice.h
  7. //
  8. // Description:
  9. // Holds the internal structures related to the lattice for Volcano,
  10. // as well as the functions used to manipulate the lattice.
  11. //
  12. // Author:
  13. // hrowley
  14. //
  15. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  16. #pragma once
  17. // Uncomment this (or put it in the project settings) to enable tuning code.
  18. // The tuning code writes out data to the file c:\tune.log when the SearchForTargetResult()
  19. // API function is called, saving the components of the score for the best path and all
  20. // alternate paths through the lattice. This allows a simple program to adjust the linear
  21. // weightings of these components to maximize the number of times the correct path is chosen.
  22. //#define HWX_TUNE
  23. //#define USE_IFELANG3_BIGRAMS
  24. #include <windows.h>
  25. #include "vtune.h"
  26. #include "charrec.h"
  27. #ifdef __cplusplus
  28. extern "C" {
  29. #endif
  30. // References to the various databases that get loaded
  31. // Eventually this stuff, along with a few other globals, should
  32. // be placed in a structure so we can have multiple languages
  33. // active at once.
  34. extern BBOX_PROB_TABLE *g_pProbTable;
  35. extern UNIGRAM_INFO g_unigramInfo;
  36. extern BIGRAM_INFO g_bigramInfo;
  37. extern CLASS_BIGRAM_INFO g_classBigramInfo;
  38. extern TTUNE_INFO g_ttuneInfo;
  39. extern wchar_t g_pLocale[16];
  40. extern wchar_t g_pLocaleDir[1024];
  41. extern wchar_t g_pRecogDir[1024];
  42. extern HINSTANCE g_hDLL;
  43. extern CENTIPEDE_INFO g_centipedeInfo;
  44. extern VOLCANO_PARAMS_INFO g_vtuneInfo; // Tuning parameters
  45. extern VOLCANO_CONFIG g_latticeConfigInfo; // Configuration data
  46. extern JAWS_LOAD_INFO g_JawsLoadInfo;
  47. extern FUGU_LOAD_INFO g_FuguLoadInfo;
  48. extern SOLE_LOAD_INFO g_SoleLoadInfo;
  49. extern BOOL g_fUseJaws;
  50. extern BOOL g_fUseZillaHound;
  51. // Initialize the configuration info
  52. void LatticeConfigInit();
  53. /////////////////////////////////////////////////////////////////////////////////////
  54. // References to legacy baseline/height and language model, currently in lattice.c //
  55. /////////////////////////////////////////////////////////////////////////////////////
  56. typedef struct tagBOXINFO
  57. {
  58. int size; // Absolute size.
  59. int xheight; // Absolute height to midline.
  60. int baseline; // Baseline in tablet coordinates.
  61. int midline; // Midline in tablet coordinates.
  62. } BOXINFO;
  63. FLOAT BaselineTransitionCost(SYM symPrev, RECT rPrev, BOXINFO *biPrev, SYM sym, RECT r, BOXINFO *bi);
  64. FLOAT HeightTransitionCost(SYM symPrev, RECT rPrev, BOXINFO *biPrev, SYM sym, RECT r, BOXINFO *bi);
  65. FLOAT HeightBoxCost(SYM sym, RECT r, BOXINFO *bi);
  66. FLOAT BaselineBoxCost(SYM sym, RECT r, BOXINFO *bi);
  67. ////////////////////////////
  68. // Public data structures //
  69. ////////////////////////////
  70. typedef struct tagLATTICE_PATH_ELEMENT {
  71. wchar_t wChar; // Unicode character
  72. int iStroke, iAlt; // Information to look up the character in the lattice
  73. int nStrokes, iBoxNum; // Box number
  74. // FLOAT score; // Raw zilla/otter score
  75. // RECT bbox; // Bounding box of the character
  76. } LATTICE_PATH_ELEMENT;
  77. // This structure is returned by GetCurrentPath.
  78. typedef struct tagLATTICE_PATH {
  79. int nChars; // Number of character in current path
  80. LATTICE_PATH_ELEMENT *pElem; // The characters themselves
  81. } LATTICE_PATH;
  82. //////////////////////////////////////////////////////////////////////////
  83. // Data structures, which are for the most part internal to the module. //
  84. //////////////////////////////////////////////////////////////////////////
  85. typedef struct tagLATTICE LATTICE;
  86. #define MinLogProb Log2Range
  87. #define MaxStrokesPerCharacter 32
  88. #define MaxAltsPerStroke 20
  89. //#define SCALE_FOR_IFELANG3 -1024
  90. //#define SCALE_FOR_IFELANG3 -10000
  91. #define SCALE_FOR_IFELANG3 (-1024.0*log(2.0))
  92. // Cache entry for recognition results
  93. typedef struct CACHE_ENTRY
  94. {
  95. int nStrokes; // Number of strokes for this character
  96. int iRecognizer; // Which recognizer gave an answer
  97. ALT_LIST alts; // The results
  98. struct CACHE_ENTRY *pNext; // Pointer to the next cache entry
  99. } CACHE_ENTRY;
  100. // Cache for recognition results
  101. typedef struct CACHE
  102. {
  103. int nStrokes; // How many strokes we have allocated space for
  104. CACHE_ENTRY **pStrokes; // Pointers to the cache entries for each stroke
  105. } CACHE;
  106. void *AllocateRecognizerCache();
  107. void FreeRecognizerCache(void *pvCache);
  108. ALT_LIST *LookupRecognizerCache(void *pvCache, int iStroke, int nStrokes, int *piRecognizer);
  109. void AddRecognizerCache(void *pvCache, int iStroke, int nStrokes, int iRecognizer, ALT_LIST *pAlts);
  110. typedef struct tagLATTICE_ELEMENT {
  111. BOOL fUsed; // Whether this alternate is in use
  112. float logProb; // Score for just this alternate, without language model
  113. float logProbPath; // Score for path to this point, including language model
  114. int iCharDetectorScore; // score coming from char detector
  115. int iPathLength; // How many characters are on the best path to this element
  116. int nStrokes; // Number of strokes in this character
  117. int iPrevAlt; // Index of previous character in the appropriate column
  118. int nPrevStrokes; // Number of strokes in the previous character
  119. wchar_t wChar; // Dense code of this character
  120. wchar_t wPrevChar; // Dense code of previous character
  121. RECT bbox; // Bounds of the ink
  122. RECT writingBox; // Estimated writing box
  123. int space;
  124. int area;
  125. BOOL fCurrentPath; // Whether this alternate is on the best path
  126. FLOAT score;
  127. int maxDist;
  128. int nHits; // How many times this element has occurred on paths
  129. int iPromptPtr; // How far along we are in the prompt string.
  130. // Eventually this field could represent the current state
  131. // of the DFA implementing a factoid.
  132. #ifdef USE_IFELANG3_BIGRAMS
  133. int indexIFELang3;
  134. int nBigrams;
  135. float bigramLogProbs[MaxAltsPerStroke];
  136. int bigramAlts[MaxAltsPerStroke];
  137. #endif
  138. #ifdef HWX_TUNE
  139. VOLCANO_WEIGHTS tuneScores; // Store the components of the score for this alternate
  140. #endif
  141. } LATTICE_ELEMENT;
  142. typedef struct tagLATTICE_ALT_LIST {
  143. int iBrkNetScore; // is this a hard break point
  144. BOOL fSpaceAfterStroke; // is there a hard space after this stroke
  145. int nUsed; // Number of alternates used in this column
  146. LATTICE_ELEMENT alts[MaxAltsPerStroke]; // Column of alternates
  147. } LATTICE_ALT_LIST;
  148. typedef struct tagSCORES_ALT_LIST {
  149. VOLCANO_WEIGHTS alts[MaxAltsPerStroke];
  150. } SCORES_ALT_LIST;
  151. typedef struct tagLATTICE {
  152. RECOG_SETTINGS recogSettings; // Other settings (ALCs, abort flag)
  153. BOOL fWordMode; // "Word" mode, in which free mode treats all the ink as one char
  154. BOOL fCoerceMode; // ALCPriority -> ALCValid
  155. BOOL fSingleSeg; // Return only a single segmentation
  156. BOOL fUseFactoid; // Whether to use factoid settings or not
  157. ALC alcFactoid; // ALC from factoid settings
  158. BYTE *pbFactoidChars; // Bitmask of chars from factoid
  159. BOOL fSepMode; // "Separate" mode, disables language model
  160. wchar_t *wszAnswer; // Used during tuning and training, to tell the recognizer in advance
  161. // what the correct answer is. Also used by separator
  162. BOOL fUseGuide; // Whether or not to use the guide
  163. HWXGUIDE guide; // The guide
  164. int nStrokes; // How many strokes (after merging) are in the pStroke array
  165. int nStrokesAllocated; // How much space is allocated in the pStroke array
  166. int nRealStrokes; // How many strokes have been added to the lattice (before merging)
  167. STROKE *pStroke; // The array of strokes
  168. LATTICE_ALT_LIST *pAltList; // Array of lattice columns
  169. wchar_t *wszBefore; // Pre-context (in reverse order, the first character is the one just before the ink)
  170. wchar_t *wszAfter; // Post-context (in normal order, the first character is the one just after the ink)
  171. BOOL fProbMode; // Whether the score associated with alt is a log prob or a score
  172. BOOL fUseIFELang3; // Whether to use IFELang3 (available, and enough characters to be useful)
  173. int nProcessed; // How many strokes have been processed so far
  174. BOOL fEndInput; // Whether we have reached the end of the input
  175. BOOL fIncremental; // Whether we're doing processing incrementally
  176. int nFixedResult; // The number of strokes for which results have already been returned, and
  177. // whose interpretation cannot change.
  178. void *pvCache; // Cache for recognition results, or NULL if unused
  179. BOOL fUseLM; // Whether to use the language model
  180. } LATTICE;
  181. #ifdef HWX_TUNE
  182. extern FILE *g_pTuneFile;
  183. extern int g_iTuneMode;
  184. #endif
  185. ///////////////////
  186. // API Functions //
  187. ///////////////////
  188. // Get the number of strokes which have been added to the lattice
  189. int GetLatticeStrokeCount(LATTICE *lat);
  190. // Create an empty lattice data structure. Returns NULL if it fails to allocate memory.
  191. LATTICE *AllocateLattice();
  192. // Create a new lattice with the same settings as the given lattice.
  193. LATTICE *CreateCompatibleLattice(LATTICE *lat);
  194. // Set the ALC values for the underlying boxed recognizer
  195. void SetLatticeALCValid(LATTICE *lat, ALC alcValid);
  196. void SetLatticeALCPriority(LATTICE *lat, ALC alcPriority);
  197. // Set the guide for the lattice (which will switch things to boxed mode)
  198. void SetLatticeGuide(LATTICE *lat, HWXGUIDE *pGuide);
  199. // Destroy lattice data structure.
  200. void FreeLattice(LATTICE *lat);
  201. // Add a stroke to the lattice, returns TRUE if it succeeds.
  202. BOOL AddStrokeToLattice(LATTICE *lat, int nInk, POINT *pts, DWORD time);
  203. // Update the probabilities in the lattice, including setting current
  204. // path to the most likely path so far (not including language model).
  205. // Can be called repeatedly for incremental processing after each stroke.
  206. BOOL ProcessLattice(LATTICE *lat, BOOL fEndInput);
  207. BOOL ProcessLatticeRange(LATTICE *lat, int iStrtStroke, int iEndStroke);
  208. // Return the current path in a LATTICE_PATH structure, which contains
  209. // arrays of the bounding boxes for each character, the stroke counts,
  210. // and the characters themselves.
  211. // When called after ProcessLattice, returns the highest probability path.
  212. // The memory for the path should be freed by the caller.
  213. BOOL GetCurrentPath(LATTICE *lat, LATTICE_PATH **pPath);
  214. // Free a LATTICE_PATH structure returned by GetCurrentPath()
  215. void FreeLatticePath(LATTICE_PATH *path);
  216. // Given a lattice and a path through it, for characters iStartChar through iEndChar
  217. // inclusive, return the time stamps of the first and last strokes in those characters.
  218. // Returns FALSE if there are no strokes associated with the characters (eg, spaces)
  219. BOOL GetCharacterTimeRange(LATTICE *lat, LATTICE_PATH *path, int iStartChar, int iEndChar,
  220. DWORD *piStartTime, DWORD *piEndTime);
  221. // Given a character number in the current path (counting from zero),
  222. // and the number of alternates to return, it returns alternates for
  223. // the character which contain the same number of strokes. This means
  224. // alternate segmentations cannot be returned. The number of alternates
  225. // actually stored in the array are returned.
  226. int GetAlternatesForCharacterInCurrentPath(LATTICE *lat, LATTICE_PATH *path, int iChar, int nAlts, wchar_t *pwAlts);
  227. // Apply language model to produce a better current path. Currently only
  228. // runs IFELang3 if available, and asserts otherwise. This will be changed
  229. // to use the existing unigram/bigram stuff later.
  230. void ApplyLanguageModel(LATTICE *lat, wchar_t *wszCorrectAnswer);
  231. // Load some global tables (the locale table, unigrams, bigrams, and class bigrams)
  232. // The tables needed by the segmenter are not loaded until needed, this will
  233. // probably change later. The path is the same format as taken by the file loading
  234. // functions. This interface will probably change when I read the code in hwx.c
  235. // to see how it should be done. :)
  236. BOOL LatticeConfigFile(wchar_t *pRecogDir);
  237. // Unload any global tables loaded earlier by LatticeConfig
  238. BOOL LatticeUnconfigFile();
  239. // Load some global tables (the locale table, unigrams, bigrams, and class bigrams)
  240. // The tables needed by the segmenter are not loaded until needed, this will
  241. // probably change later. The path is the same format as taken by the file loading
  242. // functions. This interface will probably change when I read the code in hwx.c
  243. // to see how it should be done. :)
  244. BOOL LatticeConfig(HINSTANCE hInst);
  245. // Unload any global tables loaded earlier by LatticeConfig
  246. BOOL LatticeUnconfig();
  247. BOOL GetBoxOfAlternateInCurrentPath(LATTICE *pLattice, LATTICE_PATH *path, int iChar, RECT *pRect);
  248. ///////////////
  249. // Call flow //
  250. ///////////////
  251. // 0. LatticeConfig
  252. // 1. AllocateLattice
  253. // 2. For each stroke
  254. // a. AddStrokeToLattice
  255. // b. ProcessLattice (this is optional, for incremental processing)
  256. // c. GetCurrentPath (this is optional, for incremental results)
  257. // 3. ProcessLattice
  258. // 4. ApplyLanguageModel (optional, probably not useful at the moment)
  259. // 5. GetCurrentPath
  260. // 6. GetAlternatesForCharacterInCurrentPath
  261. // 7. FreeLatticePath
  262. // 8. FreeLattice
  263. // 9. LatticeUnconfig
  264. int SearchForTargetResultInternal(LATTICE *lat, wchar_t *wsz);
  265. RECT GetAlternateBbox(LATTICE *lat, int iStroke, int iAlt);
  266. FLOAT GetAlternateRecogScore(LATTICE *lat, int iStroke, int iAlt);
  267. FLOAT GetAlternateScore(LATTICE *lat, int iStroke, int iAlt);
  268. FLOAT GetAlternatePathScore(LATTICE *lat, int iStroke, int iAlt);
  269. int GetAlternateStrokes(LATTICE *lat, int iStroke, int iAlt);
  270. wchar_t GetAlternateChar(LATTICE *lat, int iStroke, int iAlt);
  271. /////////////////////
  272. // Internal stuff. //
  273. /////////////////////
  274. void BuildStrokeCountRecogAlts(LATTICE *lat, int iStroke, int cStrk);
  275. int FindFullPath(LATTICE *lat);
  276. void ClearAltList(LATTICE_ALT_LIST *list);
  277. void FixupBackPointers (LATTICE *pLat);
  278. BOOL LatticeConfigIFELang3();
  279. BOOL LatticeIFELang3Available();
  280. BOOL LatticeUnconfigIFELang3();
  281. // Flags used to check for valid paths in lattice.
  282. #define LCF_UNKNOWN 0
  283. #define LCF_INVALID 1
  284. #define LCF_VALID 2
  285. // Figure out which strokes are valid end of character in paths that span the
  286. // whole lattice.
  287. BOOL CheckIfValid(LATTICE *lat, int iStartStroke, int iStroke, BYTE *pValidEnd);
  288. #ifdef __cplusplus
  289. }
  290. #endif