Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

360 lines
14 KiB

  1. ////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Filename : PropFlags.h
  4. // Purpose : properties definitions
  5. //
  6. // Project : WordBreakers
  7. // Component: English word breaker
  8. //
  9. // Author : yairh
  10. //
  11. // Log:
  12. //
  13. // Jan 06 2000 yairh creation
  14. // May 07 2000 dovh - const array generation:
  15. // split PropArray.h => PropArray.h + PropFlags.h
  16. // May 11 2000 dovh - Simplify GET_PROP to do double indexing always.
  17. // Nov 11 2000 dovh - Special underscore treatment
  18. // (Only added PROP_ALPHA_NUMERIC flag here)
  19. //
  20. ////////////////////////////////////////////////////////////////////////////////
  21. #ifndef _PROP_FLAGS_H_
  22. #define _PROP_FLAGS_H_
  23. #define USE_WS_SENTINEL
  24. // #undef USE_WS_SENTINEL
  25. // #define DECLARE_BYTE_ARRAY
  26. #undef DECLARE_BYTE_ARRAY
  27. // #define DECLARE_ULONGLONG_ARRAY
  28. #undef DECLARE_ULONGLONG_ARRAY
  29. const WCHAR TRACE_CHAR[] = \
  30. {L'S', L'E', L'U', L'L', L'N', L'~', L'!', L'@', L'#', L'$', \
  31. L'%', L'-', L'&', L'*', L'(', L')', L'-', L'_', L'=', L'+', \
  32. L'\\', L'|', L'{', L'}', L'[', L']', L'\"', L'\'', L';', L':', \
  33. L'?', L'/', L'<', L'>', L'.', L',', L'w', L'C', L'T', L'B', \
  34. L's', L'X', L'S', L'\0'};
  35. //
  36. // NOTE: DO NOT CHANGE THE ORDER.
  37. // UPDATE GEN_PROP_STRING_VALUE MACRO BELOW WHENEVER FLAGS DEFINITIONS CHANGE!
  38. //
  39. #define PROP_DEFAULT ((ULONGLONG)0)
  40. #define PROP_WS (((ULONGLONG)1)<< 0)
  41. #define PROP_EOS (((ULONGLONG)1)<< 1)
  42. #define PROP_UPPER_CASE (((ULONGLONG)1)<< 2)
  43. #define PROP_LOWER_CASE (((ULONGLONG)1)<< 3)
  44. #define PROP_PERIOD (((ULONGLONG)1)<< 4)
  45. #define PROP_COMMA (((ULONGLONG)1)<< 5)
  46. #define PROP_RESERVED_BREAKER (((ULONGLONG)1)<< 6)
  47. #define PROP_RESERVED (((ULONGLONG)1)<< 7)
  48. #define PROP_NUMBER (((ULONGLONG)1)<< 8)
  49. #define PROP_TILDE (((ULONGLONG)1)<< 9)
  50. #define PROP_EXCLAMATION_MARK (((ULONGLONG)1)<<10)
  51. #define PROP_AT (((ULONGLONG)1)<<11)
  52. #define PROP_POUND (((ULONGLONG)1)<<12)
  53. #define PROP_DOLLAR (((ULONGLONG)1)<<13)
  54. #define PROP_PERCENTAGE (((ULONGLONG)1)<<14)
  55. #define PROP_MINUS (((ULONGLONG)1)<<15)
  56. #define PROP_AND (((ULONGLONG)1)<<16)
  57. #define PROP_ASTERISK (((ULONGLONG)1)<<17)
  58. #define PROP_LEFT_PAREN (((ULONGLONG)1)<<18)
  59. #define PROP_RIGHT_PAREN (((ULONGLONG)1)<<19)
  60. #define PROP_DASH (((ULONGLONG)1)<<20)
  61. #define PROP_UNDERSCORE (((ULONGLONG)1)<<21)
  62. #define PROP_EQUAL (((ULONGLONG)1)<<22)
  63. #define PROP_PLUS (((ULONGLONG)1)<<23)
  64. #define PROP_BACKSLASH (((ULONGLONG)1)<<24)
  65. #define PROP_OR (((ULONGLONG)1)<<25)
  66. #define PROP_LEFT_CURLY_BRACKET (((ULONGLONG)1)<<26)
  67. #define PROP_RIGHT_CURLY_BRACKET (((ULONGLONG)1)<<27)
  68. #define PROP_LEFT_BRAKCET (((ULONGLONG)1)<<28)
  69. #define PROP_RIGHT_BRAKCET (((ULONGLONG)1)<<29)
  70. #define PROP_DOUBLE_QUOTE (((ULONGLONG)1)<<30)
  71. #define PROP_APOSTROPHE (((ULONGLONG)1)<<31)
  72. #define PROP_SEMI_COLON (((ULONGLONG)1)<<32)
  73. #define PROP_COLON (((ULONGLONG)1)<<33)
  74. #define PROP_QUESTION_MARK (((ULONGLONG)1)<<34)
  75. #define PROP_SLASH (((ULONGLONG)1)<<35)
  76. #define PROP_LT (((ULONGLONG)1)<<36)
  77. #define PROP_GT (((ULONGLONG)1)<<37)
  78. #define PROP_W (((ULONGLONG)1)<<38)
  79. #define PROP_CURRENCY (((ULONGLONG)1)<<39)
  80. #define PROP_BREAKER (((ULONGLONG)1)<<40)
  81. #define PROP_TRANSPERENT (((ULONGLONG)1)<<41)
  82. #define PROP_NBS (((ULONGLONG)1)<<42)
  83. #define PROP_ALPHA_XDIGIT (((ULONGLONG)1)<<43)
  84. #define PROP_COMMERSIAL_SIGN (((ULONGLONG)1)<<44)
  85. #define WB_PROP_COUNT 45
  86. //
  87. // The following is the contents of the GEN_PROP_STRING array
  88. // used by the array generator.
  89. //
  90. // NOTE: DO NOT CHANGE THE ORDER.
  91. // UPDATE MACRO WHENEVER FLAGS DEFINITIONS CHANGE TO REFLECT CHANGES!
  92. //
  93. // extern const WCHAR* GEN_PROP_STRING[ WB_PROP_COUNT ];
  94. //
  95. #define GEN_PROP_STRING_VALUE \
  96. { \
  97. \
  98. L"PROP_WS", \
  99. L"PROP_EOS", \
  100. L"PROP_UPPER_CASE", \
  101. L"PROP_LOWER_CASE", \
  102. L"PROP_PERIOD", \
  103. L"PROP_COMMA", \
  104. L"PROP_RESERVED_BREAKER", \
  105. L"PROP_RESERVED", \
  106. \
  107. L"PROP_NUMBER", \
  108. L"PROP_TILDE", \
  109. L"PROP_EXCLAMATION_MARK", \
  110. L"PROP_AT", \
  111. L"PROP_POUND", \
  112. L"PROP_DOLLAR", \
  113. L"PROP_PERCENTAGE", \
  114. L"PROP_MINUS", \
  115. \
  116. L"PROP_AND", \
  117. L"PROP_ASTERISK", \
  118. L"PROP_LEFT_PAREN", \
  119. L"PROP_RIGHT_PAREN", \
  120. L"PROP_DASH", \
  121. L"PROP_UNDERSCORE", \
  122. L"PROP_EQUAL", \
  123. L"PROP_PLUS", \
  124. \
  125. L"PROP_BACKSLASH", \
  126. L"PROP_OR", \
  127. L"PROP_LEFT_CURLY_BRACKET", \
  128. L"PROP_RIGHT_CURLY_BRACKET", \
  129. L"PROP_LEFT_BRAKCET", \
  130. L"PROP_RIGHT_BRAKCET", \
  131. L"PROP_DOUBLE_QUOTE", \
  132. L"PROP_APOSTROPHE", \
  133. \
  134. L"PROP_SEMI_COLON", \
  135. L"PROP_COLON", \
  136. L"PROP_QUESTION_MARK", \
  137. L"PROP_SLASH", \
  138. L"PROP_LT", \
  139. L"PROP_GT", \
  140. L"PROP_W", \
  141. L"PROP_CURRENCY", \
  142. L"PROP_BREAKER" \
  143. L"PROP_TRANSPERENT" \
  144. L"PROP_NBS" \
  145. L"PROP_ALPHA_XDIGIT" \
  146. L"PROP_COMMERSIAL_SIGN" \
  147. }
  148. #define PROP_ALPHA (PROP_LOWER_CASE | PROP_UPPER_CASE)
  149. #define PROP_ALPHA_NUMERIC (PROP_LOWER_CASE | PROP_UPPER_CASE | PROP_NUMBER)
  150. #define PROP_DATE_SEPERATOR (PROP_DASH | PROP_SLASH | PROP_PERIOD)
  151. #define PROP_XDIGIT (PROP_NUMBER | PROP_ALPHA_XDIGIT)
  152. #define PROP_FIRST_LEVEL_BREAKER \
  153. (PROP_BREAKER | PROP_EXCLAMATION_MARK | PROP_ASTERISK | \
  154. PROP_LEFT_PAREN | PROP_RIGHT_PAREN | PROP_BACKSLASH | PROP_EQUAL | PROP_OR | \
  155. PROP_LEFT_CURLY_BRACKET | PROP_RIGHT_CURLY_BRACKET | PROP_LEFT_BRAKCET | PROP_RIGHT_BRAKCET | \
  156. PROP_DOUBLE_QUOTE | PROP_SEMI_COLON | PROP_QUESTION_MARK | PROP_SLASH | \
  157. PROP_COMMA | PROP_GT | PROP_LT | PROP_WS )
  158. #define PROP_SECOND_LEVEL_BREAKER \
  159. (PROP_TILDE | PROP_AT | PROP_DOLLAR | PROP_PERCENTAGE | PROP_AND |\
  160. PROP_DASH | PROP_PLUS | PROP_COLON | PROP_PERIOD | PROP_POUND)
  161. #define PROP_DEFAULT_BREAKER (PROP_FIRST_LEVEL_BREAKER | PROP_SECOND_LEVEL_BREAKER)
  162. //
  163. // Hyphenation
  164. //
  165. #define HYPHENATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
  166. #define HYPHENATION_PUNCT_TAIL \
  167. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  168. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
  169. //
  170. // Abbreviation, acronym
  171. //
  172. #define ACRONYM_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
  173. #define ACRONYM_PUNCT_TAIL \
  174. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  175. PROP_RIGHT_PAREN | PROP_NBS)
  176. #define ABBREVIATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
  177. PROP_NBS | PROP_APOSTROPHE)
  178. #define ABBREVIATION_PUNCT_TAIL \
  179. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  180. PROP_RIGHT_PAREN | PROP_NBS | PROP_APOSTROPHE)
  181. #define ABBREVIATION_EOS \
  182. (PROP_SEMI_COLON | PROP_COLON | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | PROP_NBS)
  183. #define SPECIAL_ABBREVIATION_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
  184. #define SPECIAL_ABBREVIATION_PUNCT_TAIL \
  185. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  186. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
  187. //
  188. // Parenthesis
  189. //
  190. #define PAREN_PUNCT_TAIL (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_PERIOD | \
  191. PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | PROP_NBS | PROP_APOSTROPHE)
  192. #define PAREN_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | \
  193. PROP_QUESTION_MARK | PROP_NBS | PROP_APOSTROPHE)
  194. //
  195. // Clitics
  196. //
  197. #define CLITICS_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
  198. #define CLITICS_PUNC_TAIL \
  199. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  200. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
  201. //
  202. // Numbers date time
  203. //
  204. #define NUM_DATE_TIME_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
  205. PROP_LEFT_BRAKCET | PROP_LEFT_CURLY_BRACKET | PROP_NBS)
  206. #define NUM_DATE_TIME_PUNCT_TAIL \
  207. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  208. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_RIGHT_BRAKCET | PROP_RIGHT_CURLY_BRACKET | PROP_NBS | \
  209. PROP_PERCENTAGE)
  210. #define TIME_ADDITIONAL_PUNCT_HEAD (PROP_APOSTROPHE)
  211. #define TIME_ADDITIONAL_PUNCT_TAIL (PROP_APOSTROPHE)
  212. #define DATE_ADDITIONAL_PUNCT_HEAD (PROP_APOSTROPHE)
  213. #define DATE_ADDITIONAL_PUNCT_TAIL (PROP_APOSTROPHE)
  214. //
  215. // Currency
  216. //
  217. #define CURRENCY_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | \
  218. PROP_LEFT_BRAKCET | PROP_LEFT_CURLY_BRACKET | PROP_APOSTROPHE | \
  219. PROP_NBS)
  220. #define CURRENCY_PUNCT_TAIL \
  221. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  222. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_RIGHT_BRAKCET | PROP_RIGHT_CURLY_BRACKET | \
  223. PROP_APOSTROPHE | PROP_NBS)
  224. //
  225. // Misc
  226. //
  227. #define MISC_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
  228. #define MISC_PUNCT_TAIL \
  229. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  230. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
  231. //
  232. // Commersial sign
  233. //
  234. #define COMMERSIAL_SIGN_PUNCT_HEAD (PROP_SEMI_COLON | PROP_COMMA | PROP_COLON | PROP_LEFT_PAREN | PROP_NBS)
  235. #define COMMERSIAL_SIGN_PUNCT_TAIL \
  236. (PROP_SEMI_COLON | PROP_COLON | PROP_COMMA | PROP_EXCLAMATION_MARK | PROP_QUESTION_MARK | \
  237. PROP_RIGHT_PAREN | PROP_PERIOD | PROP_NBS)
  238. //
  239. // EOS
  240. //
  241. #define EOS_SUFFIX \
  242. (PROP_WS | PROP_RIGHT_BRAKCET | PROP_RIGHT_PAREN | PROP_RIGHT_CURLY_BRACKET | \
  243. PROP_APOSTROPHE | PROP_NBS)
  244. //
  245. // default
  246. //
  247. #define SIMPLE_PUNCT_HEAD (PROP_NBS | PROP_UNDERSCORE | PROP_DEFAULT_BREAKER | PROP_APOSTROPHE)
  248. #define SIMPLE_PUNCT_TAIL (PROP_NBS | PROP_UNDERSCORE | PROP_DEFAULT_BREAKER | PROP_APOSTROPHE)
  249. #define MAX_NUM_PROP 64
  250. //
  251. // PROP_FLAGS MACROS:
  252. //
  253. #ifndef DECLARE_ULONGLONG_ARRAY
  254. #define GET_PROP(wch) \
  255. ( g_pPropArray->m_apCodePage[wch >> 8][(UCHAR)wch] )
  256. #ifdef DECLARE_BYTE_ARRAY
  257. extern const BYTE g_BytePropFlagArray[ ];
  258. #define IS_WS(wch) (g_BytePropFlagArray[wch] & PROP_WS)
  259. #define IS_EOS(wch) (g_BytePropFlagArray[wch] & PROP_EOS)
  260. #define IS_BREAKER(wch) (g_BytePropFlagArray[wch] & PROP_RESERVED_BREAKER)
  261. #else
  262. #define IS_WS(wch) (GET_PROP(wch).m_ulFlag & PROP_WS)
  263. #define IS_EOS(wch) (GET_PROP(wch).m_ulFlag & PROP_EOS)
  264. #define IS_BREAKER(wch) (GET_PROP(wch).m_ulFlag & PROP_DEFAULT_BREAKER)
  265. #endif // DECLARE_BYTE_ARRAY
  266. #else
  267. class CPropFlag;
  268. extern const ULONGLONG g_UllPropFlagArray[ ];
  269. #define GET_PROP(wch) (g_PropFlagArray[wch])
  270. #define IS_WS(wch) (g_UllPropFlagArray[wch] & PROP_WS)
  271. #define IS_EOS(wch) (g_UllPropFlagArray[wch] & PROP_EOS)
  272. #define IS_BREAKER(wch) (g_UllPropFlagArray[wch] & PROP_DEFAULT_BREAKER)
  273. #endif // DECLARE_ULONGLONG_ARRAY
  274. #define HAS_PROP_ALPHA(prop) (prop.m_ulFlag & PROP_ALPHA)
  275. #define HAS_PROP_EXTENDED_ALPHA(prop) (prop.m_ulFlag & (PROP_ALPHA | PROP_TRANSPERENT))
  276. #define HAS_PROP_UPPER_CASE(prop) (prop.m_ulFlag & PROP_UPPER_CASE)
  277. #define HAS_PROP_LOWER_CASE(prop) (prop.m_ulFlag & PROP_LOWER_CASE)
  278. #define HAS_PROP_NUMBER(prop) (prop.m_ulFlag & PROP_NUMBER)
  279. #define HAS_PROP_CURRENCY(prop) (prop.m_ulFlag & PROP_CURRENCY)
  280. #define HAS_PROP_LEFT_PAREN(prop) (prop.m_ulFlag & PROP_LEFT_PAREN)
  281. #define HAS_PROP_RIGHT_PAREN(prop) (prop.m_ulFlag & PROP_RIGHT_PAREN)
  282. #define HAS_PROP_APOSTROPHE(prop) (prop.m_ulFlag & PROP_APOSTROPHE)
  283. #define HAS_PROP_BACKSLASH(prop) (prop.m_ulFlag & PROP_BACKSLASH)
  284. #define HAS_PROP_SLASH(prop) (prop.m_ulFlag & PROP_SLASH)
  285. #define HAS_PROP_PERIOD(prop) (prop.m_ulFlag & PROP_PERIOD)
  286. #define HAS_PROP_COMMA(prop) (prop.m_ulFlag & PROP_COMMA)
  287. #define HAS_PROP_COLON(prop) (prop.m_ulFlag & PROP_COLON)
  288. #define HAS_PROP_DASH(prop) (prop.m_ulFlag & PROP_DASH)
  289. #define HAS_PROP_W(prop) (prop.m_ulFlag & PROP_W)
  290. #define IS_PROP_SIMPLE(prop) \
  291. (!prop.m_ulFlag || \
  292. ((prop.m_ulFlag & (PROP_ALPHA | PROP_TRANSPERENT | PROP_W | PROP_ALPHA_XDIGIT)) && \
  293. !(prop.m_ulFlag & ~(PROP_ALPHA | PROP_TRANSPERENT | PROP_W | PROP_ALPHA_XDIGIT))))
  294. #define TEST_PROP(prop, i) (prop.m_ulFlag & (i))
  295. #define TEST_PROP1(prop1, prop2) (prop1.m_ulFlag & prop2.m_ulFlag)
  296. #endif // _PROP_FLAGS_H_