Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
6.2 KiB

  1. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  2. //
  3. // Copyright (c) 2001 Microsoft Corporation. All rights reserved.
  4. //
  5. // Module:
  6. // volcano/dll/segm.c
  7. //
  8. // Description:
  9. // Functions to implement the functionality of managing segmentation structures.
  10. //
  11. // Author:
  12. // ahmadab 11/14/01
  13. //
  14. //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  15. #include "common.h"
  16. #include "volcanop.h"
  17. #include "lattice.h"
  18. #include "runnet.h"
  19. #include "brknet.h"
  20. #include "segm.h"
  21. #include "nnet.h"
  22. float FuguSegScore(int cStrokes, STROKE *pStrokes, LOCRUN_INFO *pLocRunInfo);
  23. // enumerate all the segmentations currently present in the lattice
  24. // for the specified ink segment
  25. BOOL EnumerateInkSegmentations (LATTICE *pLat, INK_SEGMENT *pInkSegment)
  26. {
  27. int cAlt, iAlt, iPrevAlt;
  28. LATTICE_ALT_LIST *pAlt;
  29. // find unique segmentations
  30. pAlt = pLat->pAltList + pInkSegment->StrokeRange.iEndStrk;
  31. cAlt = pAlt->nUsed;
  32. // for each alt find out if there is any prev alt that suggests the same stroke count
  33. for (iAlt = 0; iAlt < cAlt; iAlt++)
  34. {
  35. for (iPrevAlt = 0; iPrevAlt < iAlt; iPrevAlt++)
  36. {
  37. // found one, that is enough exit this prev loop
  38. if (pAlt->alts[iAlt].nStrokes == pAlt->alts[iPrevAlt].nStrokes)
  39. {
  40. break;
  41. }
  42. }
  43. // if we found no match, then this is the first existense of this count, we want it
  44. if (iPrevAlt == iAlt)
  45. {
  46. ELEMLIST *pSegm;
  47. int iStrk, iStrkAlt;
  48. LATTICE_ALT_LIST *pStrkAlt;
  49. // now we want to create a new segmentation
  50. pInkSegment->ppSegm = (ELEMLIST **) ExternRealloc (pInkSegment->ppSegm,
  51. (pInkSegment->cSegm + 1) * sizeof (*pInkSegment->ppSegm));
  52. if (!pInkSegment->ppSegm)
  53. {
  54. return FALSE;
  55. }
  56. pInkSegment->ppSegm[pInkSegment->cSegm] =
  57. (ELEMLIST *) ExternAlloc (sizeof (*pInkSegment->ppSegm[pInkSegment->cSegm]));
  58. if (!pInkSegment->ppSegm[pInkSegment->cSegm])
  59. {
  60. return FALSE;
  61. }
  62. pSegm = pInkSegment->ppSegm[pInkSegment->cSegm];
  63. pInkSegment->cSegm++;
  64. memset (pSegm, 0, sizeof (*pSegm));
  65. iStrk = pInkSegment->StrokeRange.iEndStrk;
  66. iStrkAlt = iAlt;
  67. while (iStrk >= pInkSegment->StrokeRange.iStartStrk)
  68. {
  69. pStrkAlt = pLat->pAltList + iStrk;
  70. if (!InsertListElement (pSegm, iStrk, iStrkAlt, pStrkAlt->alts + iStrkAlt))
  71. {
  72. return FALSE;
  73. }
  74. iStrk -= pStrkAlt->alts[iStrkAlt].nStrokes;
  75. iStrkAlt = pStrkAlt->alts[iStrkAlt].iPrevAlt;
  76. }
  77. ReverseElementList (pSegm);
  78. // if we had already exceed the maximum number of segmentations, return
  79. if (pInkSegment->cSegm > MAX_SEGMENTATIONS)
  80. {
  81. return TRUE;
  82. }
  83. }
  84. }
  85. return TRUE;
  86. }
  87. // computes the set of features for a segmentation
  88. int FeaturizeSegmentation (LATTICE *pLat, ELEMLIST *pSeg, int *pFeat)
  89. {
  90. int cFeat = 0;
  91. LATTICE_ELEMENT *pElem, *pPrevElem;
  92. LATTICE_PATH_ELEMENT *pPathElem;
  93. int iChar,
  94. iBrkNetOut;
  95. pPrevElem = NULL;
  96. iBrkNetOut = 0;
  97. pPathElem = pSeg->pElem;
  98. for (iChar = 0; iChar < MAX_SEG_CHAR; iChar++, pPathElem++)
  99. {
  100. // we going beyond the actual number of chars
  101. if (iChar >= pSeg->cElem)
  102. {
  103. // char features
  104. // FEATURE 0: is this a real char
  105. pFeat[cFeat++] = 0;
  106. // FEATURE 1: # of strokes
  107. pFeat[cFeat++] = 0;
  108. // FEATURE 2: -log prob
  109. pFeat[cFeat++] = 65535;
  110. // FEATURE 3: unigram of code point
  111. pFeat[cFeat++] = 65535;
  112. // FEATURE 3: fake fugu score
  113. pFeat[cFeat++] = 0;
  114. // char pair features
  115. if (iChar > 0)
  116. {
  117. // FEATURE 0: bigram pair
  118. pFeat[cFeat++] = 65535;
  119. // FEATURE 1: normalized delx
  120. pFeat[cFeat++] = 0;
  121. // FEATURE 2: output of brk net
  122. pFeat[cFeat++] = 0;
  123. }
  124. pPrevElem = NULL;
  125. }
  126. else
  127. {
  128. ASSERT (pPathElem->iStroke < pLat->nStrokes);
  129. ASSERT (pPathElem->iAlt < pLat->pAltList[pPathElem->iStroke].nUsed);
  130. pElem = pLat->pAltList[pPathElem->iStroke].alts + pPathElem->iAlt;
  131. // FEATURE 0: is this a real char
  132. pFeat[cFeat++] = 65535;
  133. // FEATURE 1: # of strokes
  134. pFeat[cFeat++] = min (65535, pElem->nStrokes * 1000);
  135. // FEATURE 2: -log prob
  136. pFeat[cFeat++] = min (65535, (int) (-1000.0 * pElem->logProb));
  137. // FEATURE 3: unigram of code point
  138. pFeat[cFeat++] =
  139. min (65535, (int) (-255.0 * UnigramCost (&g_unigramInfo, pElem->wChar)));
  140. // feature 4: char detector score
  141. if (pElem->iCharDetectorScore == -1)
  142. {
  143. pElem->iCharDetectorScore =
  144. min (65535, (int) (65535.0 * FuguSegScore (pElem->nStrokes,
  145. pLat->pStroke + pPathElem->iStroke - pElem->nStrokes + 1,
  146. &g_locRunInfo)));
  147. }
  148. pFeat[cFeat++] = max (min (65535, pElem->iCharDetectorScore), 0);
  149. // char pair features
  150. if (iChar > 0)
  151. {
  152. int xDist, yHgt;
  153. ASSERT (pPrevElem != NULL);
  154. // FEATURE 0: bigram pair
  155. pFeat[cFeat++] = min (65536,
  156. (int) (-1000.0 * BigramTransitionCost (&g_locRunInfo, &g_bigramInfo,
  157. pPrevElem->wChar, pElem->wChar)));
  158. // FEATURE 1: normalized delx
  159. xDist = pElem->bbox.left - pPrevElem->bbox.right;
  160. yHgt = 1 + ( (pElem->bbox.bottom - pElem->bbox.top) +
  161. (pPrevElem->bbox.bottom - pPrevElem->bbox.top)
  162. ) / 2;
  163. pFeat[cFeat++] = 32768 +
  164. max (-32767, min (32767, (int)(32768.0 * xDist / (abs(xDist) + yHgt))));
  165. // FEATURE 2: output of brk net after prev char
  166. pFeat[cFeat++] = iBrkNetOut;
  167. }
  168. iBrkNetOut = pLat->pAltList[pPathElem->iStroke].iBrkNetScore;
  169. pPrevElem = pElem;
  170. }
  171. }
  172. return cFeat;
  173. }
  174. // frees an ink segment
  175. void FreeInkSegment (INK_SEGMENT *pInkSegment)
  176. {
  177. int iSeg;
  178. for (iSeg = 0; iSeg < pInkSegment->cSegm; iSeg++)
  179. {
  180. if (pInkSegment->ppSegm[iSeg])
  181. {
  182. FreeElemList (pInkSegment->ppSegm[iSeg]);
  183. ExternFree (pInkSegment->ppSegm[iSeg]);
  184. }
  185. }
  186. if (pInkSegment->ppSegm)
  187. {
  188. ExternFree (pInkSegment->ppSegm);
  189. }
  190. }
  191. // featurize an ink segment
  192. int FeaturizeInkSegment (LATTICE *pLat, INK_SEGMENT *pInkSegment, int *pFeat)
  193. {
  194. int iSeg,
  195. cSegFeat,
  196. cFeat = 0;
  197. for (iSeg = 0; iSeg < pInkSegment->cSegm; iSeg++)
  198. {
  199. // featurize this segmentation
  200. cSegFeat = FeaturizeSegmentation (pLat,
  201. pInkSegment->ppSegm[iSeg], pFeat + cFeat);
  202. // did we fail
  203. if (cSegFeat <= 0)
  204. {
  205. return -1;
  206. }
  207. // increment the number of features
  208. cFeat += cSegFeat;
  209. }
  210. return cFeat;
  211. }