Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

380 lines
14 KiB

//
// Header file for CRANE.LIB
//
#include "common.h"
#ifdef __cplusplus
extern "C"
{
#endif
#define MAX_RECOG_ALTS 10
#pragma warning (disable : 4200)
typedef struct tagFEATURES
{
DWORD cElements;
BYTE data[0];
} FEATURES;
#pragma warning (default : 4200)
// An 'end-point' is the X or Y component of a line segment. X0,X1 or Y0,Y1
typedef struct tagEND_POINTS
{
short start;
short end;
} END_POINTS;
// A RECT is a rectangle, upper-left and lower-right
typedef struct tagRECTS
{
short x1;
short y1;
short x2;
short y2;
} RECTS;
// A d-RECT is a delta rectangle, upper-left and width, height
typedef struct tagDRECT
{
long x;
long y;
long w;
long h;
} DRECT;
/* We over flowed shorts when we past all the panels of
a file togather.
typedef struct tagDRECTS
{
short x;
short y;
short w;
short h;
} DRECTS;
*/
typedef DRECT DRECTS;
typedef enum tagFEATURE_TYPE
{
typeBOOL,
typeBYTE,
type8DOT8,
typeSHORT,
typeUSHORT,
type16DOT16,
typePOINTS,
typeLONG,
typeULONG,
type32DOT32,
typeRECTS,
typeDRECTS,
typePOINT,
typeDRECT,
typeCOUNT
} FEATURE_TYPE;
typedef enum tagFEATURE_FREQ
{
freqSTROKE,
freqFEATURE,
freqSTEP,
freqPOINT
} FEATURE_FREQ;
typedef struct tagFEATURE_KIND
{
FEATURE_TYPE type;
FEATURE_FREQ freq;
} FEATURE_KIND;
enum {
FEATURE_ANGLE_NET,
FEATURE_ANGLE_ABS,
FEATURE_STEPS,
FEATURE_FEATURES,
FEATURE_XPOS,
FEATURE_YPOS,
FEATURE_STROKE_BBOX,
FEATURE_LENGTH,
FEATURE_COUNT
};
// Information about the whole sample. The sample is normalized to a 1000 by 1000 square.
typedef struct tagSAMPLE {
short cstrk; // Strokes in this sample
wchar_t wchLabel; // The character that labels the sample
wchar_t aSampleFile[22]; // Where sample came from.
short ipanel; // Panel number of character
short ichar; // Index in panel of character
short fDakuten; // Does this character have a dakuten?
DRECTS drcs; // Guide bounds
wchar_t awchAlts[MAX_RECOG_ALTS]; // List of recognizer alternates.
FEATURES *apfeat[FEATURE_COUNT];
} SAMPLE;
#define MIN_STROKE_CNT 3
#define MAX_STROKE_CNT 32
#define MAX_RATIO 0xFFFF
// Character we want to print information about
// Comment out this line to get full tree printed.
// Number of characters in the whole 16-bit character set
#define cClasses 0x10000
// Information about a training sample used while selecting questions.
typedef struct tagSAMPLE_INFO {
SAMPLE *pSample;
int iAnswer; // Answer to question being checked at the moment.
} SAMPLE_INFO;
// Information about each alternate in the alternate list of terminal nodes.
typedef struct tagALT_ENTRY {
wchar_t wchLabel;
WORD fDataSets; // Bit zero -> in train, bit one -> in test
int cSamples;
} ALT_ENTRY;
// Information about each question asked.
typedef struct tagCART_NODE
{
// Samples that make up this node.
int cSamples;
SAMPLE_INFO *pSamples;
// Pointers making CART tree.
struct tagCART_NODE *pLess;
struct tagCART_NODE *pGreater;
struct tagCART_NODE *pParent;
// Pointer used to build up a list of selected nodes.
struct tagCART_NODE *pNextSelected;
// Question used to decide branching to less or greater sub trees.
WORD questionType; // What type of question
WORD questionPart1; // Which question for the type may be specified in one or two
WORD questionPart2; // pieces. The delta X and Y questions use these to identify
// the start and end points that the delta is done on.
int questionValue; // Value that question splits on. Because of the integer
// rounding of the value when we compute this, we need to do
// a <= test.
// These values are set when the tree is first built and are not changed
// during pruning. These must be set before calling CARTPrune.
wchar_t wchLabelMax; // max-weight char in this subtree
double eLabelMaxWeight; // total weight of wchLabelMax in this subtree
// These values are set and used by CARTPrune and are meaningless after it returns
int cTerminalNodes; // Number of terminals in this subtree
double eTerminalLabelWeights; // sum of eLabelWeight from terminal nodes
double ePruneValue; // Alpha required to make pruning here a cost/complexity win
int iHeap; // Used with heap routines to know which elements to sift/delete
// This value is useful after CARTPrune returns
int iTreePrunePoint; // Zero means never prune, otherwise indicates successive pruned
// trees corresponding to different alphas (See Brieman, ch 3)
// These values must be set for the honest estimate code
double eHonestLabelWeight; // total weight of wchLabel in subtree according to test
double eHonestNodeWeight; // total weight of all characters in subtree according to test
// The alternate list. This is set for terminals when we clip the CART tree back to its
// final size.
int cAlternates;
ALT_ENTRY *pAlternates;
// Misc. statistics
int cUniqData;
} CART_NODE;
// Valid types of questions.
typedef enum tagQUESTION_TYPE
{
questionNONE,
questionX, // X position
questionY, // Y position
questionXDelta, // Delta between two X positions
questionYDelta, // Delta between two Y positions
questionXAngle, // Angle relative to X axis
questionYAngle, // Angle relative to Y axis
questionDelta, // Squared distance between two points
questionDakuTen, // Chance this character has a dakuten
questionNetAngle, // Net angle of a stroke
questionCnsAngle, // Difference of net angle and absolute angle
questionAbsAngle, // Absolute angle of a stroke
questionCSteps, // Count of steps in a stroke
questionCFeatures, // Count of features in a stroke
questionXPointsRight, // # of points to the right of a given X value
questionYPointsBelow, // # of points below a given Y value
questionPerpDist, // Perpendicular distance from a line to a point
questionSumXDelta, // Sum of X deltas of a stroke
questionSumYDelta, // Sum of Y deltas of a stroke
questionSumDelta, // Sum of magnitudes of a stroke
questionSumNetAngle, // Sum of net angles of a stroke
questionSumAbsAngle, // Sum of absolute angles of a stroke
questionCompareXDelta, // Derivative of X deltas
questionCompareYDelta, // Derivative of Y deltas
questionCompareDelta, // Derivative of magnitudes
questionCompareAngle, // Derivatice of angles
questionPointsInBBox, // Points in a particular box
questionCharLeft, // Leftmost position of a character
questionCharTop, // Topmost position of a character
questionCharWidth, // Width of a character
questionCharHeight, // Height of a character
questionCharDiagonal, // Length of character's diagonal
questionCharTheta, // Angle of character's diagonal
questionStrokeLeft, // Left most position of a bounding box of stroke range
questionStrokeTop, // Top most position of a bounding box of stroke range
questionStrokeWidth, // Width of a bounding box of stroke range
questionStrokeHeight, // Height of a bounding box of stroke range
questionStrokeDiagonal, // Length of (bounding box of stroke range)'s diagonal
questionStrokeTheta, // Angle of (bounding box of stroke range)'s diagonal
questionStrokeRight, // Right most position of a bounding box of stroke range
questionStrokeBottom, // Bottom most position of a bounding box of stroke range
questionStrokeLength, // Total curvilinear length of stroke
questionStrokeCurve, // Delta between curvilinear length and straight-line length
questionCharLength, // Total curvilinear length of all strokes in character
questionCharCurve, // Delta between curvilinear length and straight-line length
questionAltList, // Position in recognizer alternate list.
questionCount
} QUESTION_TYPE;
#define QART_QUESTION 0xd0
#define QART_NOBRANCH 0x01
typedef struct tagQART
{
BYTE question;
BYTE flag;
} QART;
typedef union tagUNIQART
{
WORD unicode;
QART qart;
} UNIQART;
// This is the packed binary format of the question tree. Each node will either be a question
// with it parameters, value and branch offset or a UNICODE character. If the UNICODE character
// would be in the range 0xd000 - 0xdfff then it's a question node. Bits 0-3 are then flags
// about the question. Since branch offsets are limited to 64K and it's remotely possible to
// have >64K on a branch, the code 0xffff will represent an ESCAPE code. The optional DWORD
// in 'extra' will then be the long form of the branch. A sample file might look like this:
//
// offset field comment
// +0000 d0 This is a question
// +0001 02 Question #2
// +0002 00 Parameter 1 is 0
// +0003 01 Parameter 2 is 1
// +0004 03e8 Value is 1000
// +0006 000a Branch if greater to current position + 0x000a
// +0008 d1 This is a question with no branch
// +0009 07 Question #7
// +000a 03 Parameter 1 is 3
// +000b 02 Parameter 2 is 2
// +000c ffef Value is -17
// +000e 568a Return UNICODE value 0x568a if greater then -17
// +0010 887b Return UNICODE value 0x887b
// +0012 4e00 Return UNICODE value 0x4e00
#pragma warning (disable : 4200)
typedef struct tagQNODE
{
UNIQART uniqart;
BYTE param1;
BYTE param2;
short value;
WORD offset;
DWORD extra[0];
} QNODE;
// For UNICODE support, modify awIndex to be 84 long and convert the HIGH_INDEX
// macro to the following:
//
// #define HIGH_INDEX(x) ((((x) < 0x0100 ? (x) - 0x0100 : (x) < 0x4e00 ? (x) - 0x2f00 : (x) - 0x4c00) >> 8) & 0x00ff)
//
// This maps U+0000 to index 0, page U+3000 to index 1 and U+4e00 to index 2. All other
// Kanji follow. This uses only 600 bytes more space then the XJIS encoding and is even
// better then loading an 8K table for crushed UNICODE.
#define HIGH_INDEX_LIMIT 64
#define HIGH_INDEX(x) (((x) >> 8) & 0x00ff)
typedef struct tagQHEAD
{
WORD awIndex[HIGH_INDEX_LIMIT];
DWORD aqIndex[0];
} QHEAD;
#pragma warning (default : 4200)
typedef struct tagCRANE_LOAD_INFO
{
void * pLoadInfo1;
void * pLoadInfo2;
void * pLoadInfo3;
} CRANE_LOAD_INFO;
// Exported entry points
#ifndef HWX_PRODUCT
wchar_t *LastLineSample(void);
SAMPLE *ReadSample(SAMPLE *, FILE *);
void ResetReadSampleH();
SAMPLE *ReadSampleH(SAMPLE *_this, HANDLE);
BOOL WriteSample(SAMPLE *, FILE *);
#endif
BOOL CraneLoadRes(HINSTANCE, int, int, LOCRUN_INFO *pLocRunInfo);
BOOL CraneLoadFile(LOCRUN_INFO *pLocRunInfo,CRANE_LOAD_INFO *,wchar_t *);
BOOL CraneUnLoadFile(CRANE_LOAD_INFO *);
BOOL CraneMatch(ALT_LIST *pAlt, int cAlt, GLYPH *pGlyph, CHARSET *pCS, DRECTS *pdrcs, FLOAT eCARTWeight,LOCRUN_INFO *pLocRunInfo);
void InitFeatures(SAMPLE *);
void FreeFeatures(SAMPLE *);
BOOL MakeFeatures(SAMPLE *, void *);
void AnswerQuestion(WORD, WORD, WORD, SAMPLE_INFO *, int);
// Ask all the questions on the passed in data set, calling back after each.
void
AskAllQuestions(
int cStrokes, // Number of strokes in each sample (same for all samples)
int cSamples, // Number of samples of data
SAMPLE_INFO *pSamples, // Pointer to samples of data
void (*pfCallBack)( // Called after each question
WORD questionType, // e.g. single-point, point/stroke, etc.
WORD part1, // Question constant part 1
WORD part2, // Question constant part 2
SAMPLE_INFO *pSamples,
int cSamples,
void *pvCallBackControl
),
void *pvCallBackControl // passed to pfCallBack each time
);
/* Array of short codes for question types */
extern const char * const apQuestionTypeCode[];
#ifdef __cplusplus
};
#endif