|
|
////////////////////////////////////////////////////////////////////////////////
//
// Filename : PropFlags.h
// Purpose : properties definitions
//
// Project : WordBreakers
// Component: English word breaker
//
// Author : yairh
//
// Log:
//
// Jan 06 2000 yairh creation
// May 07 2000 dovh - const array generation:
// split PropArray.h => PropArray.h + PropFlags.h
// May 11 2000 dovh - Simplify GET_PROP to do double indexing always.
// Nov 11 2000 dovh - Special underscore treatment
// (Only added PROP_ALPHA_NUMERIC flag here)
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _PROP_FLAGS_H_
#define _PROP_FLAGS_H_
#define USE_WS_SENTINEL
// #undef USE_WS_SENTINEL
// #define DECLARE_BYTE_ARRAY
#undef DECLARE_BYTE_ARRAY
// #define DECLARE_ULONGLONG_ARRAY
#undef DECLARE_ULONGLONG_ARRAY
const WCHAR TRACE_CHAR[] = \ {L'S', L'E', L'U', L'L', L'N', L'~', L'!', L'@', L'#', L'$', \ L'%', L'-', L'&', L'*', L'(', L')', L'-', L'_', L'=', L'+', \ L'\\', L'|', L'{', L'}', L'[', L']', L'\"', L'\'', L';', L':', \ L'?', L'/', L'<', L'>', L'.', L',', L'w', L'C', L'T', L'B', \ L's', L'X', L'S', L'\0'};
//
// NOTE: DO NOT CHANGE THE ORDER.
// UPDATE GEN_PROP_STRING_VALUE MACRO BELOW WHENEVER FLAGS DEFINITIONS CHANGE!
//
#define PROP_DEFAULT ((ULONGLONG)0)
#define PROP_WS (((ULONGLONG)1)<< 0)
#define PROP_EOS (((ULONGLONG)1)<< 1)
#define PROP_UPPER_CASE (((ULONGLONG)1)<< 2)
#define PROP_LOWER_CASE (((ULONGLONG)1)<< 3)
#define PROP_PERIOD (((ULONGLONG)1)<< 4)
#define PROP_COMMA (((ULONGLONG)1)<< 5)
#define PROP_RESERVED_BREAKER (((ULONGLONG)1)<< 6)
#define PROP_RESERVED (((ULONGLONG)1)<< 7)
#define PROP_NUMBER (((ULONGLONG)1)<< 8)
#define PROP_TILDE (((ULONGLONG)1)<< 9)
#define PROP_EXCLAMATION_MARK (((ULONGLONG)1)<<10)
#define PROP_AT (((ULONGLONG)1)<<11)
#define PROP_POUND (((ULONGLONG)1)<<12)
#define PROP_DOLLAR (((ULONGLONG)1)<<13)
#define PROP_PERCENTAGE (((ULONGLONG)1)<<14)
#define PROP_MINUS (((ULONGLONG)1)<<15)
#define PROP_AND (((ULONGLONG)1)<<16)
#define PROP_ASTERISK (((ULONGLONG)1)<<17)
#define PROP_LEFT_PAREN (((ULONGLONG)1)<<18)
#define PROP_RIGHT_PAREN (((ULONGLONG)1)<<19)
#define PROP_DASH (((ULONGLONG)1)<<20)
#define PROP_UNDERSCORE (((ULONGLONG)1)<<21)
#define PROP_EQUAL (((ULONGLONG)1)<<22)
#define PROP_PLUS (((ULONGLONG)1)<<23)
#define PROP_BACKSLASH (((ULONGLONG)1)<<24)
#define PROP_OR (((ULONGLONG)1)<<25)
#define PROP_LEFT_CURLY_BRACKET (((ULONGLONG)1)<<26)
#define PROP_RIGHT_CURLY_BRACKET (((ULONGLONG)1)<<27)
#define PROP_LEFT_BRAKCET (((ULONGLONG)1)<<28)
#define PROP_RIGHT_BRAKCET (((ULONGLONG)1)<<29)
#define PROP_DOUBLE_QUOTE (((ULONGLONG)1)<<30)
#define PROP_APOSTROPHE (((ULONGLONG)1)<<31)
#define PROP_SEMI_COLON (((ULONGLONG)1)<<32)
#define PROP_COLON (((ULONGLONG)1)<<33)
#define PROP_QUESTION_MARK (((ULONGLONG)1)<<34)
#define PROP_SLASH (((ULONGLONG)1)<<35)
#define PROP_LT (((ULONGLONG)1)<<36)
#define PROP_GT (((ULONGLONG)1)<<37)
#define PROP_W (((ULONGLONG)1)<<38)
#define PROP_CURRENCY (((ULONGLONG)1)<<39)
#define PROP_BREAKER (((ULONGLONG)1)<<40)
#define PROP_TRANSPERENT (((ULONGLONG)1)<<41)
#define PROP_NBS (((ULONGLONG)1)<<42)
#define PROP_ALPHA_XDIGIT (((ULONGLONG)1)<<43)
#define PROP_COMMERSIAL_SIGN (((ULONGLONG)1)<<44)
#define WB_PROP_COUNT 45
//
// The following is the contents of the GEN_PROP_STRING array
// used by the array generator.
//
// NOTE: DO NOT CHANGE THE ORDER.
// UPDATE MACRO WHENEVER FLAGS DEFINITIONS CHANGE TO REFLECT CHANGES!
//
// extern const WCHAR* GEN_PROP_STRING[ WB_PROP_COUNT ];
//
#define GEN_PROP_STRING_VALUE \
{ \ \ L"PROP_WS", \ L"PROP_EOS", \ L"PROP_UPPER_CASE", \ L"PROP_LOWER_CASE", \ L"PROP_PERIOD", \ L"PROP_COMMA", \ L"PROP_RESERVED_BREAKER", \ L"PROP_RESERVED", \ \ L"PROP_NUMBER", \ L"PROP_TILDE", \ L"PROP_EXCLAMATION_MARK", \ L"PROP_AT", \ L"PROP_POUND", \ L"PROP_DOLLAR", \ L"PROP_PERCENTAGE", \ L"PROP_MINUS", \ \ L"PROP_AND", \ L"PROP_ASTERISK", \ L"PROP_LEFT_PAREN", \ L"PROP_RIGHT_PAREN", \ L"PROP_DASH", \ L"PROP_UNDERSCORE", \ L"PROP_EQUAL", \ L"PROP_PLUS", \ \ L"PROP_BACKSLASH", \ L"PROP_OR", \ L"PROP_LEFT_CURLY_BRACKET", \ L"PROP_RIGHT_CURLY_BRACKET", \ L"PROP_LEFT_BRAKCET", \ L"PROP_RIGHT_BRAKCET", \ L"PROP_DOUBLE_QUOTE", \ L"PROP_APOSTROPHE", \ \ L"PROP_SEMI_COLON", \ L"PROP_COLON", \ L"PROP_QUESTION_MARK", \ L"PROP_SLASH", \ L"PROP_LT", \ L"PROP_GT", \ L"PROP_W", \ L"PROP_CURRENCY", \ L"PROP_BREAKER" \ L"PROP_TRANSPERENT" \ L"PROP_NBS" \ L"PROP_ALPHA_XDIGIT" \ L"PROP_COMMERSIAL_SIGN" \ }
#define PROP_ALPHA (PROP_LOWER_CASE | PROP_UPPER_CASE)
#define PROP_ALPHA_NUMERIC (PROP_LOWER_CASE | PROP_UPPER_CASE | PROP_NUMBER)
#define PROP_DATE_SEPERATOR (PROP_DASH | PROP_SLASH | PROP_PERIOD)
#define PROP_XDIGIT (PROP_NUMBER | PROP_ALPHA_XDIGIT)
#define PROP_FIRST_LEVEL_BREAKER \
(PROP_BREAKER | PROP_EXCLAMATION_MARK | PROP_ASTERISK | \ PROP_LEFT_PAREN | PROP_RIGHT_PAREN | PROP_BACKSLASH | PROP_EQUAL | PROP_OR | \ PROP_LEFT_CURLY_BRACKET | PROP_RIGHT_CURLY_BRACKET | PROP_LEFT_BRAKCET | PROP_RIGHT_BRAKCET | \ PROP_DOUBLE_QUOTE | PROP_SEMI_COLON | PROP_QUESTION_MARK | PROP_SLASH | \ PROP_COMMA | PROP_GT | PROP_LT | PROP_WS ) #define PROP_SECOND_LEVEL_BREAKER \
(PROP_TILDE | PROP_AT | PROP_DOLLAR | PROP_PERCENTAGE | PROP_AND |\ PROP_DASH | PROP_PLUS | PROP_COLON | PROP_PERIOD | PROP_POUND)
#define PROP_DEFAULT_BREAKER (PROP_FIRST_LEVEL_BREAKER | PROP_SECOND_LEVEL_BREAKER)
//
// Hyphenation
//
#define HYPHENATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define HYPHENATION_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Abbreviation, acronym
//
#define ACRONYM_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define ACRONYM_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_NBS)
#define ABBREVIATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
PROP_NBS | PROP_APOSTROPHE)
#define ABBREVIATION_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_NBS | PROP_APOSTROPHE)
#define ABBREVIATION_EOS \
(PROP_SEMI_COLON | PROP_COLON | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | PROP_NBS)
#define SPECIAL_ABBREVIATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define SPECIAL_ABBREVIATION_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Parenthesis
//
#define PAREN_PUNCT_TAIL (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_PERIOD | \
PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | PROP_NBS | PROP_APOSTROPHE)
#define PAREN_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | \
PROP_QUESTION_MARK | PROP_NBS | PROP_APOSTROPHE)
//
// Clitics
//
#define CLITICS_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define CLITICS_PUNC_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Numbers date time
//
#define NUM_DATE_TIME_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
PROP_LEFT_BRAKCET | PROP_LEFT_CURLY_BRACKET | PROP_NBS) #define NUM_DATE_TIME_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_RIGHT_BRAKCET | PROP_RIGHT_CURLY_BRACKET | PROP_NBS | \ PROP_PERCENTAGE)
#define TIME_ADDITIONAL_PUNCT_HEAD (PROP_APOSTROPHE)
#define TIME_ADDITIONAL_PUNCT_TAIL (PROP_APOSTROPHE)
#define DATE_ADDITIONAL_PUNCT_HEAD (PROP_APOSTROPHE)
#define DATE_ADDITIONAL_PUNCT_TAIL (PROP_APOSTROPHE)
//
// Currency
//
#define CURRENCY_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
PROP_LEFT_BRAKCET | PROP_LEFT_CURLY_BRACKET | PROP_APOSTROPHE | \ PROP_NBS)
#define CURRENCY_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_RIGHT_BRAKCET | PROP_RIGHT_CURLY_BRACKET | \ PROP_APOSTROPHE | PROP_NBS)
//
// Misc
//
#define MISC_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define MISC_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// Commersial sign
//
#define COMMERSIAL_SIGN_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
#define COMMERSIAL_SIGN_PUNCT_TAIL \
(PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \ PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
//
// EOS
//
#define EOS_SUFFIX \
(PROP_WS | PROP_RIGHT_BRAKCET | PROP_RIGHT_PAREN | PROP_RIGHT_CURLY_BRACKET | \ PROP_APOSTROPHE | PROP_NBS)
//
// default
//
#define SIMPLE_PUNCT_HEAD (PROP_NBS | PROP_UNDERSCORE | PROP_DEFAULT_BREAKER | PROP_APOSTROPHE)
#define SIMPLE_PUNCT_TAIL (PROP_NBS | PROP_UNDERSCORE | PROP_DEFAULT_BREAKER | PROP_APOSTROPHE)
#define MAX_NUM_PROP 64
//
// PROP_FLAGS MACROS:
//
#ifndef DECLARE_ULONGLONG_ARRAY
#define GET_PROP(wch) \
( g_pPropArray->m_apCodePage[wch >> 8][(UCHAR)wch] )
#ifdef DECLARE_BYTE_ARRAY
extern const BYTE g_BytePropFlagArray[ ];
#define IS_WS(wch) (g_BytePropFlagArray[wch] & PROP_WS)
#define IS_EOS(wch) (g_BytePropFlagArray[wch] & PROP_EOS)
#define IS_BREAKER(wch) (g_BytePropFlagArray[wch] & PROP_RESERVED_BREAKER)
#else
#define IS_WS(wch) (GET_PROP(wch).m_ulFlag & PROP_WS)
#define IS_EOS(wch) (GET_PROP(wch).m_ulFlag & PROP_EOS)
#define IS_BREAKER(wch) (GET_PROP(wch).m_ulFlag & PROP_DEFAULT_BREAKER)
#endif // DECLARE_BYTE_ARRAY
#else
class CPropFlag; extern const ULONGLONG g_UllPropFlagArray[ ];
#define GET_PROP(wch) (g_PropFlagArray[wch])
#define IS_WS(wch) (g_UllPropFlagArray[wch] & PROP_WS)
#define IS_EOS(wch) (g_UllPropFlagArray[wch] & PROP_EOS)
#define IS_BREAKER(wch) (g_UllPropFlagArray[wch] & PROP_DEFAULT_BREAKER)
#endif // DECLARE_ULONGLONG_ARRAY
#define HAS_PROP_ALPHA(prop) (prop.m_ulFlag & PROP_ALPHA)
#define HAS_PROP_EXTENDED_ALPHA(prop) (prop.m_ulFlag & (PROP_ALPHA | PROP_TRANSPERENT))
#define HAS_PROP_UPPER_CASE(prop) (prop.m_ulFlag & PROP_UPPER_CASE)
#define HAS_PROP_LOWER_CASE(prop) (prop.m_ulFlag & PROP_LOWER_CASE)
#define HAS_PROP_NUMBER(prop) (prop.m_ulFlag & PROP_NUMBER)
#define HAS_PROP_CURRENCY(prop) (prop.m_ulFlag & PROP_CURRENCY)
#define HAS_PROP_LEFT_PAREN(prop) (prop.m_ulFlag & PROP_LEFT_PAREN)
#define HAS_PROP_RIGHT_PAREN(prop) (prop.m_ulFlag & PROP_RIGHT_PAREN)
#define HAS_PROP_APOSTROPHE(prop) (prop.m_ulFlag & PROP_APOSTROPHE)
#define HAS_PROP_BACKSLASH(prop) (prop.m_ulFlag & PROP_BACKSLASH)
#define HAS_PROP_SLASH(prop) (prop.m_ulFlag & PROP_SLASH)
#define HAS_PROP_PERIOD(prop) (prop.m_ulFlag & PROP_PERIOD)
#define HAS_PROP_COMMA(prop) (prop.m_ulFlag & PROP_COMMA)
#define HAS_PROP_COLON(prop) (prop.m_ulFlag & PROP_COLON)
#define HAS_PROP_DASH(prop) (prop.m_ulFlag & PROP_DASH)
#define HAS_PROP_W(prop) (prop.m_ulFlag & PROP_W)
#define IS_PROP_SIMPLE(prop) \
(!prop.m_ulFlag || \ ((prop.m_ulFlag & (PROP_ALPHA | PROP_TRANSPERENT | PROP_W | PROP_ALPHA_XDIGIT)) && \ !(prop.m_ulFlag & ~(PROP_ALPHA | PROP_TRANSPERENT | PROP_W | PROP_ALPHA_XDIGIT))))
#define TEST_PROP(prop, i) (prop.m_ulFlag & (i))
#define TEST_PROP1(prop1, prop2) (prop1.m_ulFlag & prop2.m_ulFlag)
#endif // _PROP_FLAGS_H_
|