Source code of Windows XP (NT5)
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
/*
* Declarations common to compiler and detector. * * Copyright (C) 1996, 1997, Microsoft Corp. All rights reserved. * * History: 1-Feb-97 BobP Created * 5-Aug-97 BobP Added Unicode support, and persisting * Charmaps in the data file. */
#ifndef __INC_LCDCOMP_COMMON_H
#define __INC_LCDCOMP_COMMON_H
/****************************************************************/
// Compiled detection data file, in lcdetect.dll module directory
#define DETECTION_DATA_FILENAME "mlang.dat"
// Limits
#define MAX7BITLANG 30
#define MAX8BITLANG 30
#define MAXUNICODELANG 30
#define MAXSUBLANG 5 // max # of sublanguages or codepages per lang
#define MAXCHARMAPS 10 // max # of Charmaps, overall
// Special case entries for the training script and detector.
// These language IDs are never returned by the detector.
#define LANGID_UNKNOWN 0x400
#define LANGID_LATIN_GROUP 0x401
#define LANGID_CJK_GROUP 0x402
// Value type of a histogram array index.
// This is the output value of the SBCS/DBCS or WCHAR reduction mapping,
// and is used as the index into the n-gram arrays and for the Unicode
// language group IDs.
//
typedef unsigned char HIdx; typedef HIdx *PHIdx; #define HIDX_MAX UCHAR_MAX // keep consistent w/ HIdx
// Fixed index values for mapped characters
#define HIDX_IGNORE 0
#define HIDX_EXTD 1
#define HIDX_LETTER_A 2
#define HIDX_LETTER_Z (HIDX_LETTER_A + 25)
// Value type of a histogram element
typedef unsigned char HElt; typedef HElt *PHElt; #define HELT_MAX UCHAR_MAX // keep consistent w/ HElt
#define LANG7_DIM 3 // 7-bit language uses trigrams
// Fixed IDs of the Charmaps
#define CHARMAP_UNICODE 0 // Built from RANGE directives
#define CHARMAP_7BITLANG 1 // Built from CHARMAP 1
#define CHARMAP_8BITLANG 2 // From CHARMAP 2
#define CHARMAP_CODEPAGE 3 // From CHARMAP 3
#define CHARMAP_U27BIT 4 // Built internally for Unicode to 7-bit lang
#define CHARMAP_NINTERNAL 5 // First ID for dynamic subdetection maps
#define DEFAULT_7BIT_EDGESIZE 28
#define DEFAULT_8BIT_EDGESIZE 155
#define UNICODE_DEFAULT_CHAR_SCORE 50
/****************************************************************/
// Compiled file format.
// These declarations directly define the raw file format.
// Be careful making changes here, and be sure to change the
// header version number when appropriate.
#define APP_SIGNATURE 0x5444434C // "LCDT"
#define APP_VERSION 2
enum SectionTypes { // for m_dwType below
SECTION_TYPE_LANGUAGE = 1, // any language definition
SECTION_TYPE_HISTOGRAM = 2, // any histogram
SECTION_TYPE_MAP = 3 // any character mapping table
};
enum DetectionType { // SBCS/DBCS detection types
DETECT_NOTDEFINED = 0, DETECT_7BIT, DETECT_8BIT, DETECT_UNICODE, DETECT_NTYPES };
// FileHeader -- one-time header at start of file
typedef struct FileHeader { DWORD m_dwAppSig; // 'DTCT'
DWORD m_dwVersion; DWORD m_dwHdrSizeBytes; // byte offset of 1st real section
DWORD m_dwN7BitLanguages; DWORD m_dwN8BitLanguages; DWORD m_dwNUnicodeLanguages; DWORD m_dwNCharmaps; DWORD m_dwMin7BitScore; DWORD m_dwMin8BitScore; DWORD m_dwMinUnicodeScore; DWORD m_dwRelativeThreshhold; DWORD m_dwDocPctThreshhold; DWORD m_dwChunkSize; } FileHeader; typedef FileHeader *PFileHeader;
// FileSection -- common header that begins each file section
typedef struct FileSection { DWORD m_dwSizeBytes; // section size incl. header (offset to next)
DWORD m_dwType; // type of entry this section
} FileSection; typedef FileSection *PFileSection;
// FileLanguageSection -- 1st entry of sequence for an SBCS/DBCS language
//
// Followed by 1 or more histogram sections
typedef struct FileLanguageSection { // preceded by struct FileSection
DWORD m_dwDetectionType; DWORD m_dwLangID; DWORD m_dwUnicodeRangeID; // Unicode range mapping value for this lang
DWORD m_dwRecordCount; // # of histograms following this record
} FileLanguageSection; typedef FileLanguageSection *PFileLanguageSection;
// FileHistogramSection -- entry for one histogram (SBCS/DBCS or WCHAR)
typedef struct FileHistogramSection { // preceded by struct FileSection
union { DWORD m_dwCodePage; // for 7 or 8-bit, Codepage this indicates
DWORD m_dwRangeID; // for Unicode, the sublanguage group ID
}; DWORD m_dwDimensionality; DWORD m_dwEdgeSize; DWORD m_dwMappingID; // ID of Charmap to use
// HElt m_Elts[]
} FileHistogramSection; typedef struct FileHistogramSection *PFileHistogramSection;
// FileMapSection -- entry for one character map (SBCS/DBCS or WCHAR)
typedef struct FileMapSection { // preceded by struct FileSection
DWORD m_dwID; // ID by which hardwired code finds the table
DWORD m_dwSize; // size of table (256 or 65536)
DWORD m_dwNUnique; // # of unique output values
// HIdx m_map[]
} FileMapSection; typedef struct FileMapSection *PFileMapSection;
////////////////////////////////////////////////////////////////
// LangNames - lookup table to get from English-localized names to a Win32
// primary language ID.
struct LangNames { LPCSTR pcszName; unsigned short nLangID; }; LPCSTR GetLangName (int id); int GetLangID (LPCSTR pcszName); extern const struct LangNames LangNames[];
////////////////////////////////////////////////////////////////
#endif
|