Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

380 lines
14 KiB

  1. //
  2. // Header file for CRANE.LIB
  3. //
  4. #include "common.h"
  5. #ifdef __cplusplus
  6. extern "C"
  7. {
  8. #endif
  9. #define MAX_RECOG_ALTS 10
  10. #pragma warning (disable : 4200)
  11. typedef struct tagFEATURES
  12. {
  13. DWORD cElements;
  14. BYTE data[0];
  15. } FEATURES;
  16. #pragma warning (default : 4200)
  17. // An 'end-point' is the X or Y component of a line segment. X0,X1 or Y0,Y1
  18. typedef struct tagEND_POINTS
  19. {
  20. short start;
  21. short end;
  22. } END_POINTS;
  23. // A RECT is a rectangle, upper-left and lower-right
  24. typedef struct tagRECTS
  25. {
  26. short x1;
  27. short y1;
  28. short x2;
  29. short y2;
  30. } RECTS;
  31. // A d-RECT is a delta rectangle, upper-left and width, height
  32. typedef struct tagDRECT
  33. {
  34. long x;
  35. long y;
  36. long w;
  37. long h;
  38. } DRECT;
  39. /* We over flowed shorts when we past all the panels of
  40. a file togather.
  41. typedef struct tagDRECTS
  42. {
  43. short x;
  44. short y;
  45. short w;
  46. short h;
  47. } DRECTS;
  48. */
  49. typedef DRECT DRECTS;
  50. typedef enum tagFEATURE_TYPE
  51. {
  52. typeBOOL,
  53. typeBYTE,
  54. type8DOT8,
  55. typeSHORT,
  56. typeUSHORT,
  57. type16DOT16,
  58. typePOINTS,
  59. typeLONG,
  60. typeULONG,
  61. type32DOT32,
  62. typeRECTS,
  63. typeDRECTS,
  64. typePOINT,
  65. typeDRECT,
  66. typeCOUNT
  67. } FEATURE_TYPE;
  68. typedef enum tagFEATURE_FREQ
  69. {
  70. freqSTROKE,
  71. freqFEATURE,
  72. freqSTEP,
  73. freqPOINT
  74. } FEATURE_FREQ;
  75. typedef struct tagFEATURE_KIND
  76. {
  77. FEATURE_TYPE type;
  78. FEATURE_FREQ freq;
  79. } FEATURE_KIND;
  80. enum {
  81. FEATURE_ANGLE_NET,
  82. FEATURE_ANGLE_ABS,
  83. FEATURE_STEPS,
  84. FEATURE_FEATURES,
  85. FEATURE_XPOS,
  86. FEATURE_YPOS,
  87. FEATURE_STROKE_BBOX,
  88. FEATURE_LENGTH,
  89. FEATURE_COUNT
  90. };
  91. // Information about the whole sample. The sample is normalized to a 1000 by 1000 square.
  92. typedef struct tagSAMPLE {
  93. short cstrk; // Strokes in this sample
  94. wchar_t wchLabel; // The character that labels the sample
  95. wchar_t aSampleFile[22]; // Where sample came from.
  96. short ipanel; // Panel number of character
  97. short ichar; // Index in panel of character
  98. short fDakuten; // Does this character have a dakuten?
  99. DRECTS drcs; // Guide bounds
  100. wchar_t awchAlts[MAX_RECOG_ALTS]; // List of recognizer alternates.
  101. FEATURES *apfeat[FEATURE_COUNT];
  102. } SAMPLE;
  103. #define MIN_STROKE_CNT 3
  104. #define MAX_STROKE_CNT 32
  105. #define MAX_RATIO 0xFFFF
  106. // Character we want to print information about
  107. // Comment out this line to get full tree printed.
  108. // Number of characters in the whole 16-bit character set
  109. #define cClasses 0x10000
  110. // Information about a training sample used while selecting questions.
  111. typedef struct tagSAMPLE_INFO {
  112. SAMPLE *pSample;
  113. int iAnswer; // Answer to question being checked at the moment.
  114. } SAMPLE_INFO;
  115. // Information about each alternate in the alternate list of terminal nodes.
  116. typedef struct tagALT_ENTRY {
  117. wchar_t wchLabel;
  118. WORD fDataSets; // Bit zero -> in train, bit one -> in test
  119. int cSamples;
  120. } ALT_ENTRY;
  121. // Information about each question asked.
  122. typedef struct tagCART_NODE
  123. {
  124. // Samples that make up this node.
  125. int cSamples;
  126. SAMPLE_INFO *pSamples;
  127. // Pointers making CART tree.
  128. struct tagCART_NODE *pLess;
  129. struct tagCART_NODE *pGreater;
  130. struct tagCART_NODE *pParent;
  131. // Pointer used to build up a list of selected nodes.
  132. struct tagCART_NODE *pNextSelected;
  133. // Question used to decide branching to less or greater sub trees.
  134. WORD questionType; // What type of question
  135. WORD questionPart1; // Which question for the type may be specified in one or two
  136. WORD questionPart2; // pieces. The delta X and Y questions use these to identify
  137. // the start and end points that the delta is done on.
  138. int questionValue; // Value that question splits on. Because of the integer
  139. // rounding of the value when we compute this, we need to do
  140. // a <= test.
  141. // These values are set when the tree is first built and are not changed
  142. // during pruning. These must be set before calling CARTPrune.
  143. wchar_t wchLabelMax; // max-weight char in this subtree
  144. double eLabelMaxWeight; // total weight of wchLabelMax in this subtree
  145. // These values are set and used by CARTPrune and are meaningless after it returns
  146. int cTerminalNodes; // Number of terminals in this subtree
  147. double eTerminalLabelWeights; // sum of eLabelWeight from terminal nodes
  148. double ePruneValue; // Alpha required to make pruning here a cost/complexity win
  149. int iHeap; // Used with heap routines to know which elements to sift/delete
  150. // This value is useful after CARTPrune returns
  151. int iTreePrunePoint; // Zero means never prune, otherwise indicates successive pruned
  152. // trees corresponding to different alphas (See Brieman, ch 3)
  153. // These values must be set for the honest estimate code
  154. double eHonestLabelWeight; // total weight of wchLabel in subtree according to test
  155. double eHonestNodeWeight; // total weight of all characters in subtree according to test
  156. // The alternate list. This is set for terminals when we clip the CART tree back to its
  157. // final size.
  158. int cAlternates;
  159. ALT_ENTRY *pAlternates;
  160. // Misc. statistics
  161. int cUniqData;
  162. } CART_NODE;
  163. // Valid types of questions.
  164. typedef enum tagQUESTION_TYPE
  165. {
  166. questionNONE,
  167. questionX, // X position
  168. questionY, // Y position
  169. questionXDelta, // Delta between two X positions
  170. questionYDelta, // Delta between two Y positions
  171. questionXAngle, // Angle relative to X axis
  172. questionYAngle, // Angle relative to Y axis
  173. questionDelta, // Squared distance between two points
  174. questionDakuTen, // Chance this character has a dakuten
  175. questionNetAngle, // Net angle of a stroke
  176. questionCnsAngle, // Difference of net angle and absolute angle
  177. questionAbsAngle, // Absolute angle of a stroke
  178. questionCSteps, // Count of steps in a stroke
  179. questionCFeatures, // Count of features in a stroke
  180. questionXPointsRight, // # of points to the right of a given X value
  181. questionYPointsBelow, // # of points below a given Y value
  182. questionPerpDist, // Perpendicular distance from a line to a point
  183. questionSumXDelta, // Sum of X deltas of a stroke
  184. questionSumYDelta, // Sum of Y deltas of a stroke
  185. questionSumDelta, // Sum of magnitudes of a stroke
  186. questionSumNetAngle, // Sum of net angles of a stroke
  187. questionSumAbsAngle, // Sum of absolute angles of a stroke
  188. questionCompareXDelta, // Derivative of X deltas
  189. questionCompareYDelta, // Derivative of Y deltas
  190. questionCompareDelta, // Derivative of magnitudes
  191. questionCompareAngle, // Derivatice of angles
  192. questionPointsInBBox, // Points in a particular box
  193. questionCharLeft, // Leftmost position of a character
  194. questionCharTop, // Topmost position of a character
  195. questionCharWidth, // Width of a character
  196. questionCharHeight, // Height of a character
  197. questionCharDiagonal, // Length of character's diagonal
  198. questionCharTheta, // Angle of character's diagonal
  199. questionStrokeLeft, // Left most position of a bounding box of stroke range
  200. questionStrokeTop, // Top most position of a bounding box of stroke range
  201. questionStrokeWidth, // Width of a bounding box of stroke range
  202. questionStrokeHeight, // Height of a bounding box of stroke range
  203. questionStrokeDiagonal, // Length of (bounding box of stroke range)'s diagonal
  204. questionStrokeTheta, // Angle of (bounding box of stroke range)'s diagonal
  205. questionStrokeRight, // Right most position of a bounding box of stroke range
  206. questionStrokeBottom, // Bottom most position of a bounding box of stroke range
  207. questionStrokeLength, // Total curvilinear length of stroke
  208. questionStrokeCurve, // Delta between curvilinear length and straight-line length
  209. questionCharLength, // Total curvilinear length of all strokes in character
  210. questionCharCurve, // Delta between curvilinear length and straight-line length
  211. questionAltList, // Position in recognizer alternate list.
  212. questionCount
  213. } QUESTION_TYPE;
  214. #define QART_QUESTION 0xd0
  215. #define QART_NOBRANCH 0x01
  216. typedef struct tagQART
  217. {
  218. BYTE question;
  219. BYTE flag;
  220. } QART;
  221. typedef union tagUNIQART
  222. {
  223. WORD unicode;
  224. QART qart;
  225. } UNIQART;
  226. // This is the packed binary format of the question tree. Each node will either be a question
  227. // with it parameters, value and branch offset or a UNICODE character. If the UNICODE character
  228. // would be in the range 0xd000 - 0xdfff then it's a question node. Bits 0-3 are then flags
  229. // about the question. Since branch offsets are limited to 64K and it's remotely possible to
  230. // have >64K on a branch, the code 0xffff will represent an ESCAPE code. The optional DWORD
  231. // in 'extra' will then be the long form of the branch. A sample file might look like this:
  232. //
  233. // offset field comment
  234. // +0000 d0 This is a question
  235. // +0001 02 Question #2
  236. // +0002 00 Parameter 1 is 0
  237. // +0003 01 Parameter 2 is 1
  238. // +0004 03e8 Value is 1000
  239. // +0006 000a Branch if greater to current position + 0x000a
  240. // +0008 d1 This is a question with no branch
  241. // +0009 07 Question #7
  242. // +000a 03 Parameter 1 is 3
  243. // +000b 02 Parameter 2 is 2
  244. // +000c ffef Value is -17
  245. // +000e 568a Return UNICODE value 0x568a if greater then -17
  246. // +0010 887b Return UNICODE value 0x887b
  247. // +0012 4e00 Return UNICODE value 0x4e00
  248. #pragma warning (disable : 4200)
  249. typedef struct tagQNODE
  250. {
  251. UNIQART uniqart;
  252. BYTE param1;
  253. BYTE param2;
  254. short value;
  255. WORD offset;
  256. DWORD extra[0];
  257. } QNODE;
  258. // For UNICODE support, modify awIndex to be 84 long and convert the HIGH_INDEX
  259. // macro to the following:
  260. //
  261. // #define HIGH_INDEX(x) ((((x) < 0x0100 ? (x) - 0x0100 : (x) < 0x4e00 ? (x) - 0x2f00 : (x) - 0x4c00) >> 8) & 0x00ff)
  262. //
  263. // This maps U+0000 to index 0, page U+3000 to index 1 and U+4e00 to index 2. All other
  264. // Kanji follow. This uses only 600 bytes more space then the XJIS encoding and is even
  265. // better then loading an 8K table for crushed UNICODE.
  266. #define HIGH_INDEX_LIMIT 64
  267. #define HIGH_INDEX(x) (((x) >> 8) & 0x00ff)
  268. typedef struct tagQHEAD
  269. {
  270. WORD awIndex[HIGH_INDEX_LIMIT];
  271. DWORD aqIndex[0];
  272. } QHEAD;
  273. #pragma warning (default : 4200)
  274. typedef struct tagCRANE_LOAD_INFO
  275. {
  276. void * pLoadInfo1;
  277. void * pLoadInfo2;
  278. void * pLoadInfo3;
  279. } CRANE_LOAD_INFO;
  280. // Exported entry points
  281. #ifndef HWX_PRODUCT
  282. wchar_t *LastLineSample(void);
  283. SAMPLE *ReadSample(SAMPLE *, FILE *);
  284. void ResetReadSampleH();
  285. SAMPLE *ReadSampleH(SAMPLE *_this, HANDLE);
  286. BOOL WriteSample(SAMPLE *, FILE *);
  287. #endif
  288. BOOL CraneLoadRes(HINSTANCE, int, int, LOCRUN_INFO *pLocRunInfo);
  289. BOOL CraneLoadFile(LOCRUN_INFO *pLocRunInfo,CRANE_LOAD_INFO *,wchar_t *);
  290. BOOL CraneUnLoadFile(CRANE_LOAD_INFO *);
  291. BOOL CraneMatch(ALT_LIST *pAlt, int cAlt, GLYPH *pGlyph, CHARSET *pCS, DRECTS *pdrcs, FLOAT eCARTWeight,LOCRUN_INFO *pLocRunInfo);
  292. void InitFeatures(SAMPLE *);
  293. void FreeFeatures(SAMPLE *);
  294. BOOL MakeFeatures(SAMPLE *, void *);
  295. void AnswerQuestion(WORD, WORD, WORD, SAMPLE_INFO *, int);
  296. // Ask all the questions on the passed in data set, calling back after each.
  297. void
  298. AskAllQuestions(
  299. int cStrokes, // Number of strokes in each sample (same for all samples)
  300. int cSamples, // Number of samples of data
  301. SAMPLE_INFO *pSamples, // Pointer to samples of data
  302. void (*pfCallBack)( // Called after each question
  303. WORD questionType, // e.g. single-point, point/stroke, etc.
  304. WORD part1, // Question constant part 1
  305. WORD part2, // Question constant part 2
  306. SAMPLE_INFO *pSamples,
  307. int cSamples,
  308. void *pvCallBackControl
  309. ),
  310. void *pvCallBackControl // passed to pfCallBack each time
  311. );
  312. /* Array of short codes for question types */
  313. extern const char * const apQuestionTypeCode[];
  314. #ifdef __cplusplus
  315. };
  316. #endif