|
|
//
// Header file for CRANE.LIB
//
#include "common.h"
#ifdef __cplusplus
extern "C" { #endif
#define MAX_RECOG_ALTS 10
#pragma warning (disable : 4200)
typedef struct tagFEATURES { DWORD cElements; BYTE data[0]; } FEATURES; #pragma warning (default : 4200)
// An 'end-point' is the X or Y component of a line segment. X0,X1 or Y0,Y1
typedef struct tagEND_POINTS { short start; short end; } END_POINTS;
// A RECT is a rectangle, upper-left and lower-right
typedef struct tagRECTS { short x1; short y1; short x2; short y2; } RECTS;
// A d-RECT is a delta rectangle, upper-left and width, height
typedef struct tagDRECT { long x; long y; long w; long h; } DRECT;
/* We over flowed shorts when we past all the panels of
a file togather. typedef struct tagDRECTS { short x; short y; short w; short h; } DRECTS; */ typedef DRECT DRECTS;
typedef enum tagFEATURE_TYPE { typeBOOL, typeBYTE, type8DOT8, typeSHORT, typeUSHORT, type16DOT16, typePOINTS, typeLONG, typeULONG, type32DOT32, typeRECTS, typeDRECTS, typePOINT, typeDRECT, typeCOUNT } FEATURE_TYPE;
typedef enum tagFEATURE_FREQ { freqSTROKE, freqFEATURE, freqSTEP, freqPOINT } FEATURE_FREQ;
typedef struct tagFEATURE_KIND { FEATURE_TYPE type; FEATURE_FREQ freq; } FEATURE_KIND;
enum { FEATURE_ANGLE_NET, FEATURE_ANGLE_ABS, FEATURE_STEPS, FEATURE_FEATURES, FEATURE_XPOS, FEATURE_YPOS, FEATURE_STROKE_BBOX, FEATURE_LENGTH, FEATURE_COUNT };
// Information about the whole sample. The sample is normalized to a 1000 by 1000 square.
typedef struct tagSAMPLE { short cstrk; // Strokes in this sample
wchar_t wchLabel; // The character that labels the sample
wchar_t aSampleFile[22]; // Where sample came from.
short ipanel; // Panel number of character
short ichar; // Index in panel of character
short fDakuten; // Does this character have a dakuten?
DRECTS drcs; // Guide bounds
wchar_t awchAlts[MAX_RECOG_ALTS]; // List of recognizer alternates.
FEATURES *apfeat[FEATURE_COUNT]; } SAMPLE;
#define MIN_STROKE_CNT 3
#define MAX_STROKE_CNT 32
#define MAX_RATIO 0xFFFF
// Character we want to print information about
// Comment out this line to get full tree printed.
// Number of characters in the whole 16-bit character set
#define cClasses 0x10000
// Information about a training sample used while selecting questions.
typedef struct tagSAMPLE_INFO { SAMPLE *pSample; int iAnswer; // Answer to question being checked at the moment.
} SAMPLE_INFO;
// Information about each alternate in the alternate list of terminal nodes.
typedef struct tagALT_ENTRY { wchar_t wchLabel; WORD fDataSets; // Bit zero -> in train, bit one -> in test
int cSamples; } ALT_ENTRY;
// Information about each question asked.
typedef struct tagCART_NODE { // Samples that make up this node.
int cSamples; SAMPLE_INFO *pSamples;
// Pointers making CART tree.
struct tagCART_NODE *pLess; struct tagCART_NODE *pGreater; struct tagCART_NODE *pParent;
// Pointer used to build up a list of selected nodes.
struct tagCART_NODE *pNextSelected;
// Question used to decide branching to less or greater sub trees.
WORD questionType; // What type of question
WORD questionPart1; // Which question for the type may be specified in one or two
WORD questionPart2; // pieces. The delta X and Y questions use these to identify
// the start and end points that the delta is done on.
int questionValue; // Value that question splits on. Because of the integer
// rounding of the value when we compute this, we need to do
// a <= test.
// These values are set when the tree is first built and are not changed
// during pruning. These must be set before calling CARTPrune.
wchar_t wchLabelMax; // max-weight char in this subtree
double eLabelMaxWeight; // total weight of wchLabelMax in this subtree
// These values are set and used by CARTPrune and are meaningless after it returns
int cTerminalNodes; // Number of terminals in this subtree
double eTerminalLabelWeights; // sum of eLabelWeight from terminal nodes
double ePruneValue; // Alpha required to make pruning here a cost/complexity win
int iHeap; // Used with heap routines to know which elements to sift/delete
// This value is useful after CARTPrune returns
int iTreePrunePoint; // Zero means never prune, otherwise indicates successive pruned
// trees corresponding to different alphas (See Brieman, ch 3)
// These values must be set for the honest estimate code
double eHonestLabelWeight; // total weight of wchLabel in subtree according to test
double eHonestNodeWeight; // total weight of all characters in subtree according to test
// The alternate list. This is set for terminals when we clip the CART tree back to its
// final size.
int cAlternates; ALT_ENTRY *pAlternates;
// Misc. statistics
int cUniqData; } CART_NODE;
// Valid types of questions.
typedef enum tagQUESTION_TYPE { questionNONE, questionX, // X position
questionY, // Y position
questionXDelta, // Delta between two X positions
questionYDelta, // Delta between two Y positions
questionXAngle, // Angle relative to X axis
questionYAngle, // Angle relative to Y axis
questionDelta, // Squared distance between two points
questionDakuTen, // Chance this character has a dakuten
questionNetAngle, // Net angle of a stroke
questionCnsAngle, // Difference of net angle and absolute angle
questionAbsAngle, // Absolute angle of a stroke
questionCSteps, // Count of steps in a stroke
questionCFeatures, // Count of features in a stroke
questionXPointsRight, // # of points to the right of a given X value
questionYPointsBelow, // # of points below a given Y value
questionPerpDist, // Perpendicular distance from a line to a point
questionSumXDelta, // Sum of X deltas of a stroke
questionSumYDelta, // Sum of Y deltas of a stroke
questionSumDelta, // Sum of magnitudes of a stroke
questionSumNetAngle, // Sum of net angles of a stroke
questionSumAbsAngle, // Sum of absolute angles of a stroke
questionCompareXDelta, // Derivative of X deltas
questionCompareYDelta, // Derivative of Y deltas
questionCompareDelta, // Derivative of magnitudes
questionCompareAngle, // Derivatice of angles
questionPointsInBBox, // Points in a particular box
questionCharLeft, // Leftmost position of a character
questionCharTop, // Topmost position of a character
questionCharWidth, // Width of a character
questionCharHeight, // Height of a character
questionCharDiagonal, // Length of character's diagonal
questionCharTheta, // Angle of character's diagonal
questionStrokeLeft, // Left most position of a bounding box of stroke range
questionStrokeTop, // Top most position of a bounding box of stroke range
questionStrokeWidth, // Width of a bounding box of stroke range
questionStrokeHeight, // Height of a bounding box of stroke range
questionStrokeDiagonal, // Length of (bounding box of stroke range)'s diagonal
questionStrokeTheta, // Angle of (bounding box of stroke range)'s diagonal
questionStrokeRight, // Right most position of a bounding box of stroke range
questionStrokeBottom, // Bottom most position of a bounding box of stroke range
questionStrokeLength, // Total curvilinear length of stroke
questionStrokeCurve, // Delta between curvilinear length and straight-line length
questionCharLength, // Total curvilinear length of all strokes in character
questionCharCurve, // Delta between curvilinear length and straight-line length
questionAltList, // Position in recognizer alternate list.
questionCount } QUESTION_TYPE;
#define QART_QUESTION 0xd0
#define QART_NOBRANCH 0x01
typedef struct tagQART { BYTE question; BYTE flag; } QART;
typedef union tagUNIQART { WORD unicode; QART qart; } UNIQART;
// This is the packed binary format of the question tree. Each node will either be a question
// with it parameters, value and branch offset or a UNICODE character. If the UNICODE character
// would be in the range 0xd000 - 0xdfff then it's a question node. Bits 0-3 are then flags
// about the question. Since branch offsets are limited to 64K and it's remotely possible to
// have >64K on a branch, the code 0xffff will represent an ESCAPE code. The optional DWORD
// in 'extra' will then be the long form of the branch. A sample file might look like this:
//
// offset field comment
// +0000 d0 This is a question
// +0001 02 Question #2
// +0002 00 Parameter 1 is 0
// +0003 01 Parameter 2 is 1
// +0004 03e8 Value is 1000
// +0006 000a Branch if greater to current position + 0x000a
// +0008 d1 This is a question with no branch
// +0009 07 Question #7
// +000a 03 Parameter 1 is 3
// +000b 02 Parameter 2 is 2
// +000c ffef Value is -17
// +000e 568a Return UNICODE value 0x568a if greater then -17
// +0010 887b Return UNICODE value 0x887b
// +0012 4e00 Return UNICODE value 0x4e00
#pragma warning (disable : 4200)
typedef struct tagQNODE { UNIQART uniqart; BYTE param1; BYTE param2; short value; WORD offset; DWORD extra[0]; } QNODE;
// For UNICODE support, modify awIndex to be 84 long and convert the HIGH_INDEX
// macro to the following:
//
// #define HIGH_INDEX(x) ((((x) < 0x0100 ? (x) - 0x0100 : (x) < 0x4e00 ? (x) - 0x2f00 : (x) - 0x4c00) >> 8) & 0x00ff)
//
// This maps U+0000 to index 0, page U+3000 to index 1 and U+4e00 to index 2. All other
// Kanji follow. This uses only 600 bytes more space then the XJIS encoding and is even
// better then loading an 8K table for crushed UNICODE.
#define HIGH_INDEX_LIMIT 64
#define HIGH_INDEX(x) (((x) >> 8) & 0x00ff)
typedef struct tagQHEAD { WORD awIndex[HIGH_INDEX_LIMIT]; DWORD aqIndex[0]; } QHEAD; #pragma warning (default : 4200)
typedef struct tagCRANE_LOAD_INFO { void * pLoadInfo1; void * pLoadInfo2; void * pLoadInfo3; } CRANE_LOAD_INFO;
// Exported entry points
#ifndef HWX_PRODUCT
wchar_t *LastLineSample(void); SAMPLE *ReadSample(SAMPLE *, FILE *); void ResetReadSampleH(); SAMPLE *ReadSampleH(SAMPLE *_this, HANDLE); BOOL WriteSample(SAMPLE *, FILE *); #endif
BOOL CraneLoadRes(HINSTANCE, int, int, LOCRUN_INFO *pLocRunInfo); BOOL CraneLoadFile(LOCRUN_INFO *pLocRunInfo,CRANE_LOAD_INFO *,wchar_t *); BOOL CraneUnLoadFile(CRANE_LOAD_INFO *); BOOL CraneMatch(ALT_LIST *pAlt, int cAlt, GLYPH *pGlyph, CHARSET *pCS, DRECTS *pdrcs, FLOAT eCARTWeight,LOCRUN_INFO *pLocRunInfo); void InitFeatures(SAMPLE *); void FreeFeatures(SAMPLE *); BOOL MakeFeatures(SAMPLE *, void *); void AnswerQuestion(WORD, WORD, WORD, SAMPLE_INFO *, int);
// Ask all the questions on the passed in data set, calling back after each.
void AskAllQuestions( int cStrokes, // Number of strokes in each sample (same for all samples)
int cSamples, // Number of samples of data
SAMPLE_INFO *pSamples, // Pointer to samples of data
void (*pfCallBack)( // Called after each question
WORD questionType, // e.g. single-point, point/stroke, etc.
WORD part1, // Question constant part 1
WORD part2, // Question constant part 2
SAMPLE_INFO *pSamples, int cSamples, void *pvCallBackControl ), void *pvCallBackControl // passed to pfCallBack each time
);
/* Array of short codes for question types */
extern const char * const apQuestionTypeCode[];
#ifdef __cplusplus
}; #endif
|