Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

814 lines
30 KiB

  1. /******************************************************************************
  2. * NormData.cpp *
  3. *--------------*
  4. * This file stores the const data used in normalization
  5. *------------------------------------------------------------------------------
  6. * Copyright (C) 1999 Microsoft Corporation Date: 05/02/2000
  7. * All Rights Reserved
  8. *
  9. ****************************************************************** AARONHAL ***/
  10. #include "stdafx.h"
  11. #include "stdsentenum.h"
  12. //--- Constants used to map incoming ANSI characters to Ascii ones...
  13. const char g_pFlagCharacter = 0x00;
  14. const unsigned char g_AnsiToAscii[] =
  15. {
  16. /*** Control characters - map to whitespace ***/
  17. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  18. 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  19. 0x20, 0x20, 0x20, 0x20,
  20. /*** ASCII displayables ***/
  21. 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  22. 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  23. 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  24. 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  25. 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
  26. 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,
  27. /*** Control character ***/
  28. 0x20,
  29. /*** Euro symbol ***/
  30. 0x80,
  31. /*** Control character ***/
  32. 0x20,
  33. /*** Extended ASCII values ***/
  34. 0x27, // low single quote - map to single quote
  35. 0x20, // f-like character - map to space
  36. 0x22, // low double quote - map to double quote
  37. 0x2C, // elipsis - map to comma
  38. 0x20, // cross - map to space
  39. 0x20, // double cross - map to space
  40. 0x5E, // caret like accent - map to caret
  41. 0x89, // strange percent like sign
  42. 0x53, // S-hat - map to S
  43. 0x27, // left angle bracket like thing - map to single quote
  44. 0x20, // weird OE character - map to space
  45. 0x20, // control characters - map to space
  46. 0x20,
  47. 0x20,
  48. 0x20,
  49. 0x27, // left single quote - map to single quote
  50. 0x27, // right single quote - map to single quote
  51. 0x22, // left double quote - map to double quote
  52. 0x22, // right double quote - map to double quote
  53. 0x20, // bullet - map to space
  54. 0x2D, // long hyphen - map to hyphen
  55. 0x2D, // even longer hyphen - map to hyphen
  56. 0x7E, // tilde-like thing - map to tilde
  57. 0x99, // TM
  58. 0x73, // s-hat - map to s
  59. 0x27, // right angle bracket like thing - map to single quote
  60. 0x20, // weird oe like character - map to space
  61. 0x20, // control character - map to space
  62. 0x20, // control character - map to space
  63. 0x59, // Y with umlaut like accent - map to Y
  64. 0x20, // space? - map to space
  65. 0x20, // upside-down exclamation point - map to space
  66. 0xA2, // cents symbol
  67. 0xA3, // pounds symbol
  68. 0x20, // generic currency symbol - map to space
  69. 0xA5, // yen symbol
  70. 0x7C, // broken bar - map to bar
  71. 0x20, // strange symbol - map to space
  72. 0x20, // umlaut - map to space
  73. 0xA9, // copyright symbol
  74. 0x20, // strange a character - map to space
  75. 0x22, // strange <<-like character - map to double quote
  76. 0x20, // strange line-like character - map to space
  77. 0x2D, // hyphen-like character - map to hyphen
  78. 0xAE, // registered symbol
  79. 0x20, // high line - map to space
  80. 0xB0, // degree sign
  81. 0xB1, // plus-minus sign
  82. 0xB2, // superscript 2
  83. 0xB3, // superscript 3
  84. 0xB4, // single prime
  85. 0x20, // greek character - map to space
  86. 0x20, // paragraph symbol - map to space
  87. 0x20, // mid-height dot - map to space
  88. 0x20, // cedilla - map to space
  89. 0xB9, // superscript one
  90. 0x20, // circle with line - map to space
  91. 0x22, // strange >>-like character - map to double quote
  92. 0xBC, // vulgar 1/4
  93. 0xBD, // vulgar 1/2
  94. 0xBE, // vulgar 3/4
  95. 0x20, // upside-down question mark - map to space
  96. 0x41, // Accented uppercase As - map to A
  97. 0x41,
  98. 0x41,
  99. 0x41,
  100. 0x41,
  101. 0x41,
  102. 0x41,
  103. 0x43, // C with cedilla - map to C
  104. 0x45, // Accented uppercase Es - map to E
  105. 0x45,
  106. 0x45,
  107. 0x45,
  108. 0x49, // Accented uppercase Is - map to I
  109. 0x49,
  110. 0x49,
  111. 0x49,
  112. 0x20, // strange character - map to space
  113. 0x4E, // Accented uppercase N - map to N
  114. 0x4F, // Accented uppercase Os - map to O
  115. 0x4F,
  116. 0x4F,
  117. 0x4F,
  118. 0x4F,
  119. 0x20, // strange character - map to space
  120. 0x4F, // another O? - map to O
  121. 0x55, // Accented uppercase Us - map to U
  122. 0x55,
  123. 0x55,
  124. 0x55,
  125. 0x59, // Accented uppercase Y - map to Y
  126. 0x20, // strange character - map to space
  127. 0xDF, // Beta
  128. 0x61, // Accented lowercase as - map to a
  129. 0x61,
  130. 0x61,
  131. 0x61,
  132. 0x61,
  133. 0x61,
  134. 0x61,
  135. 0x63, // c with cedilla - map to c
  136. 0x65, // Accented lowercase es - map to e
  137. 0x65,
  138. 0x65,
  139. 0x65,
  140. 0x69, // Accented lowercase is - map to i
  141. 0x69,
  142. 0x69,
  143. 0x69,
  144. 0x75, // eth - map to t
  145. 0x6E, // Accented lowercase n - map to n
  146. 0x6F, // Accented lowercase os - map to o
  147. 0x6F,
  148. 0x6F,
  149. 0x6F,
  150. 0x6F,
  151. 0xF7, // division symbol
  152. 0x6F, // another o? - map to o
  153. 0x76, // Accented lowercase us - map to u
  154. 0x76,
  155. 0x76,
  156. 0x76,
  157. 0x79, // accented lowercase y - map to y
  158. 0x20, // strange character - map to space
  159. 0x79, // accented lowercase y - map to y
  160. };
  161. //--- Constants used by number normalization
  162. const SPLSTR g_O = DEF_SPLSTR( "o" );
  163. const SPLSTR g_negative = DEF_SPLSTR( "negative" );
  164. const SPLSTR g_decimalpoint = DEF_SPLSTR( "point" );
  165. const SPLSTR g_a = DEF_SPLSTR( "a" );
  166. const SPLSTR g_of = DEF_SPLSTR( "of" );
  167. const SPLSTR g_percent = DEF_SPLSTR( "percent" );
  168. const SPLSTR g_degree = DEF_SPLSTR( "degree" );
  169. const SPLSTR g_degrees = DEF_SPLSTR( "degrees" );
  170. const SPLSTR g_squared = DEF_SPLSTR( "squared" );
  171. const SPLSTR g_cubed = DEF_SPLSTR( "cubed" );
  172. const SPLSTR g_to = DEF_SPLSTR( "to" );
  173. const SPLSTR g_dash = DEF_SPLSTR( "dash" );
  174. const SPLSTR g_ones[] =
  175. {
  176. DEF_SPLSTR( "zero" ),
  177. DEF_SPLSTR( "one" ),
  178. DEF_SPLSTR( "two" ),
  179. DEF_SPLSTR( "three" ),
  180. DEF_SPLSTR( "four" ),
  181. DEF_SPLSTR( "five" ),
  182. DEF_SPLSTR( "six" ),
  183. DEF_SPLSTR( "seven" ),
  184. DEF_SPLSTR( "eight" ),
  185. DEF_SPLSTR( "nine" )
  186. };
  187. const SPLSTR g_tens[] =
  188. {
  189. DEF_SPLSTR( "zero" ),
  190. DEF_SPLSTR( "ten" ),
  191. DEF_SPLSTR( "twenty" ),
  192. DEF_SPLSTR( "thirty" ),
  193. DEF_SPLSTR( "forty" ),
  194. DEF_SPLSTR( "fifty" ),
  195. DEF_SPLSTR( "sixty" ),
  196. DEF_SPLSTR( "seventy" ),
  197. DEF_SPLSTR( "eighty" ),
  198. DEF_SPLSTR( "ninety" )
  199. };
  200. const SPLSTR g_teens[] =
  201. {
  202. DEF_SPLSTR( "ten" ),
  203. DEF_SPLSTR( "eleven" ),
  204. DEF_SPLSTR( "twelve" ),
  205. DEF_SPLSTR( "thirteen" ),
  206. DEF_SPLSTR( "fourteen" ),
  207. DEF_SPLSTR( "fifteen" ),
  208. DEF_SPLSTR( "sixteen" ),
  209. DEF_SPLSTR( "seventeen" ),
  210. DEF_SPLSTR( "eighteen" ),
  211. DEF_SPLSTR( "nineteen" )
  212. };
  213. const SPLSTR g_onesOrdinal[] =
  214. {
  215. DEF_SPLSTR( "zeroth" ),
  216. DEF_SPLSTR( "first" ),
  217. DEF_SPLSTR( "second" ),
  218. DEF_SPLSTR( "third" ),
  219. DEF_SPLSTR( "fourth" ),
  220. DEF_SPLSTR( "fifth" ),
  221. DEF_SPLSTR( "sixth" ),
  222. DEF_SPLSTR( "seventh" ),
  223. DEF_SPLSTR( "eighth" ),
  224. DEF_SPLSTR( "ninth" )
  225. };
  226. const SPLSTR g_tensOrdinal[] =
  227. {
  228. DEF_SPLSTR( "" ),
  229. DEF_SPLSTR( "tenth" ),
  230. DEF_SPLSTR( "twentieth" ),
  231. DEF_SPLSTR( "thirtieth" ),
  232. DEF_SPLSTR( "fortieth" ),
  233. DEF_SPLSTR( "fiftieth" ),
  234. DEF_SPLSTR( "sixtieth" ),
  235. DEF_SPLSTR( "seventieth" ),
  236. DEF_SPLSTR( "eightieth" ),
  237. DEF_SPLSTR( "ninetieth" )
  238. };
  239. const SPLSTR g_teensOrdinal[] =
  240. {
  241. DEF_SPLSTR( "tenth" ),
  242. DEF_SPLSTR( "eleventh" ),
  243. DEF_SPLSTR( "twelfth" ),
  244. DEF_SPLSTR( "thirteenth" ),
  245. DEF_SPLSTR( "fourteenth" ),
  246. DEF_SPLSTR( "fifteenth" ),
  247. DEF_SPLSTR( "sixteenth" ),
  248. DEF_SPLSTR( "seventeenth" ),
  249. DEF_SPLSTR( "eighteenth" ),
  250. DEF_SPLSTR( "nineteenth" )
  251. };
  252. const SPLSTR g_quantifiers[] =
  253. {
  254. DEF_SPLSTR( "hundred" ),
  255. DEF_SPLSTR( "thousand" ),
  256. DEF_SPLSTR( "million" ),
  257. DEF_SPLSTR( "billion" ),
  258. DEF_SPLSTR( "trillion" ),
  259. DEF_SPLSTR( "quadrillion" )
  260. };
  261. const SPLSTR g_quantifiersOrdinal[] =
  262. {
  263. DEF_SPLSTR( "hundredth" ),
  264. DEF_SPLSTR( "thousandth" ),
  265. DEF_SPLSTR( "millionth" ),
  266. DEF_SPLSTR( "billionth" ),
  267. DEF_SPLSTR( "trillionth" ),
  268. DEF_SPLSTR( "quadrillionth" )
  269. };
  270. //--- Constants used by currency normalization
  271. WCHAR g_Euro[2] = { 0x0080, 0x0000 };
  272. const CurrencySign g_CurrencySigns[] =
  273. {
  274. { DEF_SPLSTR( "$" ), DEF_SPLSTR( "dollars" ), DEF_SPLSTR( "cents" ) },
  275. { DEF_SPLSTR( "" ), DEF_SPLSTR( "pounds" ), DEF_SPLSTR( "pence" ) },
  276. { DEF_SPLSTR( "" ), DEF_SPLSTR( "yen" ), DEF_SPLSTR( "sen" ) },
  277. { DEF_SPLSTR( "EUR" ), DEF_SPLSTR( "euros" ), DEF_SPLSTR( "cents" ) },
  278. { DEF_SPLSTR( "US$" ), DEF_SPLSTR( "dollars" ), DEF_SPLSTR( "cents" ) },
  279. { { &g_Euro[0], 1 }, DEF_SPLSTR( "euros" ), DEF_SPLSTR( "cents" ) },
  280. { DEF_SPLSTR( "" ), DEF_SPLSTR( "euros" ), DEF_SPLSTR( "cents" ) },
  281. { DEF_SPLSTR( "DM" ), DEF_SPLSTR( "deutschemarks" ), DEF_SPLSTR( "pfennigs" ) },
  282. { DEF_SPLSTR( "" ), DEF_SPLSTR( "cents" ), DEF_SPLSTR( "" ) },
  283. { DEF_SPLSTR( "USD" ), DEF_SPLSTR( "dollars" ), DEF_SPLSTR( "cents" ) },
  284. { DEF_SPLSTR( "dol." ), DEF_SPLSTR( "dollars" ), DEF_SPLSTR( "cents" ) },
  285. { DEF_SPLSTR( "schil." ), DEF_SPLSTR( "schillings" ), DEF_SPLSTR( "" ) },
  286. { DEF_SPLSTR( "dol" ), DEF_SPLSTR( "dollars" ), DEF_SPLSTR( "cents" ) },
  287. { DEF_SPLSTR( "schil" ), DEF_SPLSTR( "schillings" ), DEF_SPLSTR( "" ) }
  288. };
  289. const SPLSTR g_SingularPrimaryCurrencySigns[] =
  290. {
  291. DEF_SPLSTR( "dollar" ),
  292. DEF_SPLSTR( "pound" ),
  293. DEF_SPLSTR( "yen" ),
  294. DEF_SPLSTR( "euro" ),
  295. DEF_SPLSTR( "dollar" ),
  296. DEF_SPLSTR( "euro" ),
  297. DEF_SPLSTR( "euro" ),
  298. DEF_SPLSTR( "deutschemark" ),
  299. DEF_SPLSTR( "cent" ),
  300. DEF_SPLSTR( "dollar" ),
  301. DEF_SPLSTR( "dollar" ),
  302. DEF_SPLSTR( "schilling" ),
  303. DEF_SPLSTR( "dollar" ),
  304. DEF_SPLSTR( "schilling" )
  305. };
  306. const SPLSTR g_SingularSecondaryCurrencySigns[] =
  307. {
  308. DEF_SPLSTR( "cent" ),
  309. DEF_SPLSTR( "penny" ),
  310. DEF_SPLSTR( "sen" ),
  311. DEF_SPLSTR( "cent" ),
  312. DEF_SPLSTR( "cent" ),
  313. DEF_SPLSTR( "cent" ),
  314. DEF_SPLSTR( "cent" ),
  315. DEF_SPLSTR( "pfennig" ),
  316. DEF_SPLSTR( "" ),
  317. DEF_SPLSTR( "cent" ),
  318. DEF_SPLSTR( "cent" ),
  319. DEF_SPLSTR( "" ),
  320. DEF_SPLSTR( "cent" ),
  321. DEF_SPLSTR( "" ),
  322. };
  323. //--- Constants used by date normalization
  324. const WCHAR g_DateDelimiters[] = { '/', '-', '.' };
  325. const SPLSTR g_months[] =
  326. {
  327. DEF_SPLSTR( "January" ),
  328. DEF_SPLSTR( "February" ),
  329. DEF_SPLSTR( "March" ),
  330. DEF_SPLSTR( "April" ),
  331. DEF_SPLSTR( "May" ),
  332. DEF_SPLSTR( "June" ),
  333. DEF_SPLSTR( "July" ),
  334. DEF_SPLSTR( "August" ),
  335. DEF_SPLSTR( "September" ),
  336. DEF_SPLSTR( "October" ),
  337. DEF_SPLSTR( "November" ),
  338. DEF_SPLSTR( "December" )
  339. };
  340. const SPLSTR g_monthAbbreviations[] =
  341. {
  342. DEF_SPLSTR( "jan" ),
  343. DEF_SPLSTR( "feb" ),
  344. DEF_SPLSTR( "mar" ),
  345. DEF_SPLSTR( "apr" ),
  346. DEF_SPLSTR( "may" ),
  347. DEF_SPLSTR( "jun" ),
  348. DEF_SPLSTR( "jul" ),
  349. DEF_SPLSTR( "aug" ),
  350. DEF_SPLSTR( "sept" ),
  351. DEF_SPLSTR( "sep" ),
  352. DEF_SPLSTR( "oct" ),
  353. DEF_SPLSTR( "nov" ),
  354. DEF_SPLSTR( "dec" )
  355. };
  356. const SPLSTR g_days[] =
  357. {
  358. DEF_SPLSTR( "Monday" ),
  359. DEF_SPLSTR( "Tuesday" ),
  360. DEF_SPLSTR( "Wednesday" ),
  361. DEF_SPLSTR( "Thursday" ),
  362. DEF_SPLSTR( "Friday" ),
  363. DEF_SPLSTR( "Saturday" ),
  364. DEF_SPLSTR( "Sunday" )
  365. };
  366. const SPLSTR g_dayAbbreviations[] =
  367. {
  368. DEF_SPLSTR( "Mon" ),
  369. DEF_SPLSTR( "Tues" ),
  370. DEF_SPLSTR( "Tue" ),
  371. DEF_SPLSTR( "Wed" ),
  372. DEF_SPLSTR( "Thurs" ),
  373. DEF_SPLSTR( "Thur" ),
  374. DEF_SPLSTR( "Thu" ),
  375. DEF_SPLSTR( "Fri" ),
  376. DEF_SPLSTR( "Sat" ),
  377. DEF_SPLSTR( "Sun" ),
  378. };
  379. //--- Constants used by phone number normalization
  380. const SPLSTR g_Area = DEF_SPLSTR( "area" );
  381. const SPLSTR g_Country = DEF_SPLSTR( "country" );
  382. const SPLSTR g_Code = DEF_SPLSTR( "code" );
  383. //--- Constants used by fraction normalization
  384. const SPLSTR g_Half = DEF_SPLSTR( "half" );
  385. const SPLSTR g_Tenths = DEF_SPLSTR( "tenths" );
  386. const SPLSTR g_Hundredths = DEF_SPLSTR( "hundredths" );
  387. const SPLSTR g_Sixteenths = DEF_SPLSTR( "sixteenths" );
  388. const SPLSTR g_Over = DEF_SPLSTR( "over" );
  389. const SPLSTR g_PluralDenominators[] =
  390. {
  391. DEF_SPLSTR( "" ),
  392. DEF_SPLSTR( "" ),
  393. DEF_SPLSTR( "halves" ),
  394. DEF_SPLSTR( "thirds" ),
  395. DEF_SPLSTR( "fourths" ),
  396. DEF_SPLSTR( "fifths" ),
  397. DEF_SPLSTR( "sixths" ),
  398. DEF_SPLSTR( "sevenths" ),
  399. DEF_SPLSTR( "eighths" ),
  400. DEF_SPLSTR( "ninths" )
  401. };
  402. //--- Constants used by time normalization
  403. const SPLSTR g_A = DEF_SPLSTR( "a" );
  404. const SPLSTR g_M = DEF_SPLSTR( "m" );
  405. const SPLSTR g_P = DEF_SPLSTR( "p" );
  406. const SPLSTR g_OClock = DEF_SPLSTR( "o'clock" );
  407. const SPLSTR g_hundred = DEF_SPLSTR( "hundred" );
  408. const SPLSTR g_hours = DEF_SPLSTR( "hours" );
  409. const SPLSTR g_hour = DEF_SPLSTR( "hour" );
  410. const SPLSTR g_minutes = DEF_SPLSTR( "minutes" );
  411. const SPLSTR g_minute = DEF_SPLSTR( "minute" );
  412. const SPLSTR g_seconds = DEF_SPLSTR( "seconds" );
  413. const SPLSTR g_second = DEF_SPLSTR( "second" );
  414. //--- Default normalization table
  415. const SPLSTR g_ANSICharacterProns[] =
  416. {
  417. DEF_SPLSTR( "" ), // NULL
  418. DEF_SPLSTR( "" ), // Start of heading
  419. DEF_SPLSTR( "" ), // Start of text
  420. DEF_SPLSTR( "" ), // Break/End of text
  421. DEF_SPLSTR( "" ), // End of transmission
  422. DEF_SPLSTR( "" ), // Enquiry
  423. DEF_SPLSTR( "" ), // Positive acknowledgement
  424. DEF_SPLSTR( "" ), // Bell
  425. DEF_SPLSTR( "" ), // Backspace
  426. DEF_SPLSTR( "" ), // Horizontal tab
  427. DEF_SPLSTR( "" ), // Line feed
  428. DEF_SPLSTR( "" ), // Vertical tab
  429. DEF_SPLSTR( "" ), // Form feed
  430. DEF_SPLSTR( "" ), // Carriage return
  431. DEF_SPLSTR( "" ), // Shift out
  432. DEF_SPLSTR( "" ), // Shift in/XON (resume output)
  433. DEF_SPLSTR( "" ), // Data link escape
  434. DEF_SPLSTR( "" ), // Device control character 1
  435. DEF_SPLSTR( "" ), // Device control character 2
  436. DEF_SPLSTR( "" ), // Device control character 3
  437. DEF_SPLSTR( "" ), // Device control character 4
  438. DEF_SPLSTR( "" ), // Negative acknowledgement
  439. DEF_SPLSTR( "" ), // Synchronous idle
  440. DEF_SPLSTR( "" ), // End of transmission block
  441. DEF_SPLSTR( "" ), // Cancel
  442. DEF_SPLSTR( "" ), // End of medium
  443. DEF_SPLSTR( "" ), // substitute/end of file
  444. DEF_SPLSTR( "" ), // Escape
  445. DEF_SPLSTR( "" ), // File separator
  446. DEF_SPLSTR( "" ), // Group separator
  447. DEF_SPLSTR( "" ), // Record separator
  448. DEF_SPLSTR( "" ), // Unit separator
  449. DEF_SPLSTR( "" ), // Space
  450. DEF_SPLSTR( "exclamation point" ),
  451. DEF_SPLSTR( "double quote" ),
  452. DEF_SPLSTR( "number sign" ),
  453. DEF_SPLSTR( "dollars" ),
  454. DEF_SPLSTR( "percent" ),
  455. DEF_SPLSTR( "and" ),
  456. DEF_SPLSTR( "single quote" ),
  457. DEF_SPLSTR( "left parenthesis" ),
  458. DEF_SPLSTR( "right parenthesis" ),
  459. DEF_SPLSTR( "asterisk" ),
  460. DEF_SPLSTR( "plus" ),
  461. DEF_SPLSTR( "comma" ),
  462. DEF_SPLSTR( "hyphen" ),
  463. DEF_SPLSTR( "dot" ),
  464. DEF_SPLSTR( "slash" ),
  465. DEF_SPLSTR( "zero" ),
  466. DEF_SPLSTR( "one" ),
  467. DEF_SPLSTR( "two" ),
  468. DEF_SPLSTR( "three" ),
  469. DEF_SPLSTR( "four" ),
  470. DEF_SPLSTR( "five" ),
  471. DEF_SPLSTR( "six" ),
  472. DEF_SPLSTR( "seven" ),
  473. DEF_SPLSTR( "eight" ),
  474. DEF_SPLSTR( "nine" ),
  475. DEF_SPLSTR( "colon" ),
  476. DEF_SPLSTR( "semicolon" ),
  477. DEF_SPLSTR( "less than" ),
  478. DEF_SPLSTR( "equals" ),
  479. DEF_SPLSTR( "greater than" ),
  480. DEF_SPLSTR( "question mark" ),
  481. DEF_SPLSTR( "at" ),
  482. DEF_SPLSTR( "a" ),
  483. DEF_SPLSTR( "b" ),
  484. DEF_SPLSTR( "c" ),
  485. DEF_SPLSTR( "d" ),
  486. DEF_SPLSTR( "e" ),
  487. DEF_SPLSTR( "f" ),
  488. DEF_SPLSTR( "g" ),
  489. DEF_SPLSTR( "h" ),
  490. DEF_SPLSTR( "i" ),
  491. DEF_SPLSTR( "j" ),
  492. DEF_SPLSTR( "k" ),
  493. DEF_SPLSTR( "l" ),
  494. DEF_SPLSTR( "m" ),
  495. DEF_SPLSTR( "n" ),
  496. DEF_SPLSTR( "o" ),
  497. DEF_SPLSTR( "p" ),
  498. DEF_SPLSTR( "q" ),
  499. DEF_SPLSTR( "r" ),
  500. DEF_SPLSTR( "s" ),
  501. DEF_SPLSTR( "t" ),
  502. DEF_SPLSTR( "u" ),
  503. DEF_SPLSTR( "v" ),
  504. DEF_SPLSTR( "w" ),
  505. DEF_SPLSTR( "x" ),
  506. DEF_SPLSTR( "y" ),
  507. DEF_SPLSTR( "z" ),
  508. DEF_SPLSTR( "left square bracket" ),
  509. DEF_SPLSTR( "backslash" ),
  510. DEF_SPLSTR( "right square bracket" ),
  511. DEF_SPLSTR( "circumflex accent" ),
  512. DEF_SPLSTR( "underscore" ),
  513. DEF_SPLSTR( "grave accent" ),
  514. DEF_SPLSTR( "a" ),
  515. DEF_SPLSTR( "b" ),
  516. DEF_SPLSTR( "c" ),
  517. DEF_SPLSTR( "d" ),
  518. DEF_SPLSTR( "e" ),
  519. DEF_SPLSTR( "f" ),
  520. DEF_SPLSTR( "g" ),
  521. DEF_SPLSTR( "h" ),
  522. DEF_SPLSTR( "i" ),
  523. DEF_SPLSTR( "j" ),
  524. DEF_SPLSTR( "k" ),
  525. DEF_SPLSTR( "l" ),
  526. DEF_SPLSTR( "m" ),
  527. DEF_SPLSTR( "n" ),
  528. DEF_SPLSTR( "o" ),
  529. DEF_SPLSTR( "p" ),
  530. DEF_SPLSTR( "q" ),
  531. DEF_SPLSTR( "r" ),
  532. DEF_SPLSTR( "s" ),
  533. DEF_SPLSTR( "t" ),
  534. DEF_SPLSTR( "u" ),
  535. DEF_SPLSTR( "v" ),
  536. DEF_SPLSTR( "w" ),
  537. DEF_SPLSTR( "x" ),
  538. DEF_SPLSTR( "y" ),
  539. DEF_SPLSTR( "z" ),
  540. DEF_SPLSTR( "left curly bracket" ),
  541. DEF_SPLSTR( "vertical line" ),
  542. DEF_SPLSTR( "right curly bracket" ),
  543. DEF_SPLSTR( "tilde" ),
  544. DEF_SPLSTR( "" ), // DELETE
  545. DEF_SPLSTR( "euros" ),
  546. DEF_SPLSTR( "" ), // maps to space
  547. DEF_SPLSTR( "" ), // maps to single quote
  548. DEF_SPLSTR( "" ), // maps to space
  549. DEF_SPLSTR( "" ), // maps to double quote
  550. DEF_SPLSTR( "" ), // maps to comma
  551. DEF_SPLSTR( "" ), // maps to space
  552. DEF_SPLSTR( "" ), // maps to space
  553. DEF_SPLSTR( "" ), // maps to caret
  554. DEF_SPLSTR( "per thousand" ),
  555. DEF_SPLSTR( "" ), // maps to S
  556. DEF_SPLSTR( "" ), // maps to single quote
  557. DEF_SPLSTR( "" ), // maps to space
  558. DEF_SPLSTR( "" ), // Control characters - map to space
  559. DEF_SPLSTR( "" ),
  560. DEF_SPLSTR( "" ),
  561. DEF_SPLSTR( "" ),
  562. DEF_SPLSTR( "" ), // maps to single quote
  563. DEF_SPLSTR( "" ), // maps to single quote
  564. DEF_SPLSTR( "" ), // maps to double quote
  565. DEF_SPLSTR( "" ), // maps to double quote
  566. DEF_SPLSTR( "" ), // maps to space
  567. DEF_SPLSTR( "" ), // maps to hyphen
  568. DEF_SPLSTR( "" ), // maps to hyphen
  569. DEF_SPLSTR( "" ), // maps to tilde
  570. DEF_SPLSTR( "trademark" ),
  571. DEF_SPLSTR( "" ), // maps to s
  572. DEF_SPLSTR( "" ), // maps to single quote
  573. DEF_SPLSTR( "" ), // maps to space
  574. DEF_SPLSTR( "" ), // maps to space
  575. DEF_SPLSTR( "" ), // maps to space
  576. DEF_SPLSTR( "" ), // maps to Y
  577. DEF_SPLSTR( "" ), // maps to space
  578. DEF_SPLSTR( "" ), // maps to space
  579. DEF_SPLSTR( "cents" ),
  580. DEF_SPLSTR( "pounds" ),
  581. DEF_SPLSTR( "" ), // maps to space
  582. DEF_SPLSTR( "yen" ),
  583. DEF_SPLSTR( "" ), // maps to |
  584. DEF_SPLSTR( "" ), // maps to space
  585. DEF_SPLSTR( "" ), // maps to space
  586. DEF_SPLSTR( "copyright" ),
  587. DEF_SPLSTR( "" ), // maps to space
  588. DEF_SPLSTR( "" ), // maps to double quote
  589. DEF_SPLSTR( "" ), // maps to space
  590. DEF_SPLSTR( "" ), // maps to hyphen
  591. DEF_SPLSTR( "registered trademark" ),
  592. DEF_SPLSTR( "" ), // maps to space
  593. DEF_SPLSTR( "degrees" ),
  594. DEF_SPLSTR( "plus minus" ),
  595. DEF_SPLSTR( "superscript two" ),
  596. DEF_SPLSTR( "superscript three" ),
  597. DEF_SPLSTR( "prime" ),
  598. DEF_SPLSTR( "" ), // maps to space
  599. DEF_SPLSTR( "" ), // maps to space
  600. DEF_SPLSTR( "times" ), // maps to space
  601. DEF_SPLSTR( "" ), // maps to space
  602. DEF_SPLSTR( "superscript one" ),
  603. DEF_SPLSTR( "" ), // maps to space
  604. DEF_SPLSTR( "" ), // maps to double quote
  605. DEF_SPLSTR( "one fourth" ),
  606. DEF_SPLSTR( "one half" ),
  607. DEF_SPLSTR( "three fourths" ),
  608. DEF_SPLSTR( "" ), // maps to space
  609. DEF_SPLSTR( "" ), // maps to A
  610. DEF_SPLSTR( "" ), // maps to A
  611. DEF_SPLSTR( "" ), // maps to A
  612. DEF_SPLSTR( "" ), // maps to A
  613. DEF_SPLSTR( "" ), // maps to A
  614. DEF_SPLSTR( "" ), // maps to A
  615. DEF_SPLSTR( "" ), // maps to A
  616. DEF_SPLSTR( "" ), // maps to C
  617. DEF_SPLSTR( "" ), // maps to E
  618. DEF_SPLSTR( "" ), // maps to E
  619. DEF_SPLSTR( "" ), // maps to E
  620. DEF_SPLSTR( "" ), // maps to E
  621. DEF_SPLSTR( "" ), // maps to I
  622. DEF_SPLSTR( "" ), // maps to I
  623. DEF_SPLSTR( "" ), // maps to I
  624. DEF_SPLSTR( "" ), // maps to I
  625. DEF_SPLSTR( "" ), // maps to space
  626. DEF_SPLSTR( "" ), // maps to N
  627. DEF_SPLSTR( "" ), // maps to O
  628. DEF_SPLSTR( "" ), // maps to O
  629. DEF_SPLSTR( "" ), // maps to O
  630. DEF_SPLSTR( "" ), // maps to O
  631. DEF_SPLSTR( "" ), // maps to O
  632. DEF_SPLSTR( "" ), // maps to space
  633. DEF_SPLSTR( "" ), // maps to O
  634. DEF_SPLSTR( "" ), // maps to U
  635. DEF_SPLSTR( "" ), // maps to U
  636. DEF_SPLSTR( "" ), // maps to U
  637. DEF_SPLSTR( "" ), // maps to U
  638. DEF_SPLSTR( "" ), // maps to Y
  639. DEF_SPLSTR( "" ), // maps to space
  640. DEF_SPLSTR( "beta" ),
  641. DEF_SPLSTR( "" ), // maps to a
  642. DEF_SPLSTR( "" ), // maps to a
  643. DEF_SPLSTR( "" ), // maps to a
  644. DEF_SPLSTR( "" ), // maps to a
  645. DEF_SPLSTR( "" ), // maps to a
  646. DEF_SPLSTR( "" ), // maps to a
  647. DEF_SPLSTR( "" ), // maps to a
  648. DEF_SPLSTR( "" ), // maps to c
  649. DEF_SPLSTR( "" ), // maps to e
  650. DEF_SPLSTR( "" ), // maps to e
  651. DEF_SPLSTR( "" ), // maps to e
  652. DEF_SPLSTR( "" ), // maps to e
  653. DEF_SPLSTR( "" ), // maps to i
  654. DEF_SPLSTR( "" ), // maps to i
  655. DEF_SPLSTR( "" ), // maps to i
  656. DEF_SPLSTR( "" ), // maps to i
  657. DEF_SPLSTR( "" ), // maps to t
  658. DEF_SPLSTR( "" ), // maps to n
  659. DEF_SPLSTR( "" ), // maps to o
  660. DEF_SPLSTR( "" ), // maps to o
  661. DEF_SPLSTR( "" ), // maps to o
  662. DEF_SPLSTR( "" ), // maps to o
  663. DEF_SPLSTR( "" ), // maps to o
  664. DEF_SPLSTR( "divided by" ),
  665. DEF_SPLSTR( "" ), // maps to o
  666. DEF_SPLSTR( "" ), // maps to u
  667. DEF_SPLSTR( "" ), // maps to u
  668. DEF_SPLSTR( "" ), // maps to u
  669. DEF_SPLSTR( "" ), // maps to u
  670. DEF_SPLSTR( "" ), // maps to y
  671. DEF_SPLSTR( "" ), // maps to space
  672. DEF_SPLSTR( "" ), // maps to y
  673. };
  674. //--- Constants used in decade normalization
  675. const SPLSTR g_Decades[] =
  676. {
  677. DEF_SPLSTR( "thousands" ), // this will be handled as a special case - "two thousands"
  678. DEF_SPLSTR( "tens" ),
  679. DEF_SPLSTR( "twenties" ),
  680. DEF_SPLSTR( "thirties" ),
  681. DEF_SPLSTR( "forties" ),
  682. DEF_SPLSTR( "fifties" ),
  683. DEF_SPLSTR( "sixties" ),
  684. DEF_SPLSTR( "seventies" ),
  685. DEF_SPLSTR( "eighties" ),
  686. DEF_SPLSTR( "nineties" ),
  687. };
  688. const SPLSTR g_Zeroes = DEF_SPLSTR( "zeroes" );
  689. const SPLSTR g_Hundreds = DEF_SPLSTR( "hundreds" );
  690. //--- Miscellaneous constants
  691. const StateStruct g_StateAbbreviations[] =
  692. {
  693. { DEF_SPLSTR( "AA" ), DEF_SPLSTR( "Armed Forces" ) },
  694. { DEF_SPLSTR( "AE" ), DEF_SPLSTR( "Armed Forces" ) },
  695. { DEF_SPLSTR( "AK" ), DEF_SPLSTR( "Alaska" ) },
  696. { DEF_SPLSTR( "AL" ), DEF_SPLSTR( "Alabama" ) },
  697. { DEF_SPLSTR( "AP" ), DEF_SPLSTR( "Armed Forces" ) },
  698. { DEF_SPLSTR( "AR" ), DEF_SPLSTR( "Arkansas" ) },
  699. { DEF_SPLSTR( "AS" ), DEF_SPLSTR( "American Samoa" ) },
  700. { DEF_SPLSTR( "AZ" ), DEF_SPLSTR( "Arizona" ) },
  701. { DEF_SPLSTR( "CA" ), DEF_SPLSTR( "California" ) },
  702. { DEF_SPLSTR( "CO" ), DEF_SPLSTR( "Colorado" ) },
  703. { DEF_SPLSTR( "CT" ), DEF_SPLSTR( "Connecticut" ) },
  704. { DEF_SPLSTR( "DC" ), DEF_SPLSTR( "D C" ) },
  705. { DEF_SPLSTR( "DE" ), DEF_SPLSTR( "Deleware" ) },
  706. { DEF_SPLSTR( "FL" ), DEF_SPLSTR( "Florida" ) },
  707. { DEF_SPLSTR( "FM" ), DEF_SPLSTR( "Federated States Of Micronesia" ) },
  708. { DEF_SPLSTR( "GA" ), DEF_SPLSTR( "Georgia" ) },
  709. { DEF_SPLSTR( "GU" ), DEF_SPLSTR( "Guam" ) },
  710. { DEF_SPLSTR( "HI" ), DEF_SPLSTR( "Hawaii" ) },
  711. { DEF_SPLSTR( "IA" ), DEF_SPLSTR( "Iowa" ) },
  712. { DEF_SPLSTR( "ID" ), DEF_SPLSTR( "Idaho" ) },
  713. { DEF_SPLSTR( "IL" ), DEF_SPLSTR( "Illinois" ) },
  714. { DEF_SPLSTR( "IN" ), DEF_SPLSTR( "Indiana" ) },
  715. { DEF_SPLSTR( "KS" ), DEF_SPLSTR( "Kansas" ) },
  716. { DEF_SPLSTR( "KY" ), DEF_SPLSTR( "Kentucky" ) },
  717. { DEF_SPLSTR( "LA" ), DEF_SPLSTR( "Louisiana" ) },
  718. { DEF_SPLSTR( "MA" ), DEF_SPLSTR( "Massachusetts" ) },
  719. { DEF_SPLSTR( "MD" ), DEF_SPLSTR( "Maryland" ) },
  720. { DEF_SPLSTR( "ME" ), DEF_SPLSTR( "Maine" ) },
  721. { DEF_SPLSTR( "MH" ), DEF_SPLSTR( "Marshall Islands" ) },
  722. { DEF_SPLSTR( "MI" ), DEF_SPLSTR( "Michigan" ) },
  723. { DEF_SPLSTR( "MN" ), DEF_SPLSTR( "Minnesota" ) },
  724. { DEF_SPLSTR( "MO" ), DEF_SPLSTR( "Missouri" ) },
  725. { DEF_SPLSTR( "MP" ), DEF_SPLSTR( "Northern Mariana Islands" ) },
  726. { DEF_SPLSTR( "MS" ), DEF_SPLSTR( "Mississippi" ) },
  727. { DEF_SPLSTR( "MT" ), DEF_SPLSTR( "Montana" ) },
  728. { DEF_SPLSTR( "NC" ), DEF_SPLSTR( "North Carolina" ) },
  729. { DEF_SPLSTR( "ND" ), DEF_SPLSTR( "North Dakota" ) },
  730. { DEF_SPLSTR( "NE" ), DEF_SPLSTR( "Nebraska" ) },
  731. { DEF_SPLSTR( "NH" ), DEF_SPLSTR( "New Hampshire" ) },
  732. { DEF_SPLSTR( "NJ" ), DEF_SPLSTR( "New Jersey" ) },
  733. { DEF_SPLSTR( "NM" ), DEF_SPLSTR( "New Mexico" ) },
  734. { DEF_SPLSTR( "NV" ), DEF_SPLSTR( "Nevada" ) },
  735. { DEF_SPLSTR( "NY" ), DEF_SPLSTR( "New York" ) },
  736. { DEF_SPLSTR( "OH" ), DEF_SPLSTR( "Ohio" ) },
  737. { DEF_SPLSTR( "OK" ), DEF_SPLSTR( "Oklahoma" ) },
  738. { DEF_SPLSTR( "OR" ), DEF_SPLSTR( "Oregon" ) },
  739. { DEF_SPLSTR( "PA" ), DEF_SPLSTR( "Pennsylvania" ) },
  740. { DEF_SPLSTR( "PR" ), DEF_SPLSTR( "Puerto Rico" ) },
  741. { DEF_SPLSTR( "PW" ), DEF_SPLSTR( "Palau" ) },
  742. { DEF_SPLSTR( "RI" ), DEF_SPLSTR( "Rhode Island" ) },
  743. { DEF_SPLSTR( "SC" ), DEF_SPLSTR( "South Carolina" ) },
  744. { DEF_SPLSTR( "SD" ), DEF_SPLSTR( "South Dakota" ) },
  745. { DEF_SPLSTR( "TN" ), DEF_SPLSTR( "Tennessee" ) },
  746. { DEF_SPLSTR( "TX" ), DEF_SPLSTR( "Texas" ) },
  747. { DEF_SPLSTR( "UT" ), DEF_SPLSTR( "Utah" ) },
  748. { DEF_SPLSTR( "VA" ), DEF_SPLSTR( "Virginia" ) },
  749. { DEF_SPLSTR( "VI" ), DEF_SPLSTR( "Virgin Islands" ) },
  750. { DEF_SPLSTR( "VT" ), DEF_SPLSTR( "Vermont" ) },
  751. { DEF_SPLSTR( "WA" ), DEF_SPLSTR( "Washington" ) },
  752. { DEF_SPLSTR( "WI" ), DEF_SPLSTR( "Wisconsin" ) },
  753. { DEF_SPLSTR( "WV" ), DEF_SPLSTR( "West Virginia" ) },
  754. { DEF_SPLSTR( "WY" ), DEF_SPLSTR( "Wyoming" ) },
  755. };
  756. const SPVSTATE g_DefaultXMLState =
  757. {
  758. SPVA_Speak, // SPVACTIONS
  759. 0, // LangID
  760. 0, // wReserved
  761. 0, // EmphAdj
  762. 0, // RateAdj
  763. 100, // Volume
  764. { 0, 0 }, // PitchAdj
  765. 0, // SilenceMSecs
  766. 0, // pPhoneIds
  767. SPPS_Unknown, // POS
  768. { 0, 0, 0 } // Context
  769. };
  770. const SPLSTR g_And = DEF_SPLSTR( "and" );
  771. extern const SPLSTR g_comma = DEF_SPLSTR( "," );
  772. extern const SPLSTR g_period = DEF_SPLSTR( "." );
  773. extern const SPLSTR g_periodString = DEF_SPLSTR( "period" );
  774. extern const SPLSTR g_slash = DEF_SPLSTR( "or" );