Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

859 lines
51 KiB

  1. /******************************************************************************
  2. * MiscData.cpp *
  3. *--------------*
  4. * This file stores the const data used in various non-normalization front-end
  5. * code
  6. *------------------------------------------------------------------------------
  7. * Copyright (C) 1999 Microsoft Corporation Date: 05/02/2000
  8. * All Rights Reserved
  9. *
  10. ****************************************************************** AARONHAL ***/
  11. #include "stdafx.h"
  12. #include"stdsentenum.h"
  13. //--- Variable used to make sure initialization of pronunciations only happens once!
  14. BOOL g_fAbbrevTablesInitialized = false;
  15. //--- Helper function used to delete dynamically allocated memory (for the abbreviation
  16. // table) at DLL exit time...
  17. void CleanupAbbrevTables( void )
  18. {
  19. if ( g_fAbbrevTablesInitialized )
  20. {
  21. for ( ULONG i = 0; i < sp_countof( g_AbbreviationTable ); i++ )
  22. {
  23. if ( g_AbbreviationTable[i].pPron1 )
  24. {
  25. delete [] g_AbbreviationTable[i].pPron1;
  26. }
  27. if ( g_AbbreviationTable[i].pPron2 )
  28. {
  29. delete [] g_AbbreviationTable[i].pPron2;
  30. }
  31. if ( g_AbbreviationTable[i].pPron3 )
  32. {
  33. delete [] g_AbbreviationTable[i].pPron3;
  34. }
  35. }
  36. for ( i = 0; i < sp_countof( g_AmbiguousWordTable ); i++ )
  37. {
  38. if ( g_AmbiguousWordTable[i].pPron1 )
  39. {
  40. delete [] g_AmbiguousWordTable[i].pPron1;
  41. }
  42. if ( g_AmbiguousWordTable[i].pPron2 )
  43. {
  44. delete [] g_AmbiguousWordTable[i].pPron2;
  45. }
  46. if ( g_AmbiguousWordTable[i].pPron3 )
  47. {
  48. delete [] g_AmbiguousWordTable[i].pPron3;
  49. }
  50. }
  51. for ( i = 0; i < sp_countof( g_PostLexLookupWordTable ); i++ )
  52. {
  53. if ( g_PostLexLookupWordTable[i].pPron1 )
  54. {
  55. delete [] g_PostLexLookupWordTable[i].pPron1;
  56. }
  57. if ( g_PostLexLookupWordTable[i].pPron2 )
  58. {
  59. delete [] g_PostLexLookupWordTable[i].pPron2;
  60. }
  61. if ( g_PostLexLookupWordTable[i].pPron3 )
  62. {
  63. delete [] g_PostLexLookupWordTable[i].pPron3;
  64. }
  65. }
  66. if ( g_pOfA )
  67. {
  68. delete [] g_pOfA;
  69. }
  70. if ( g_pOfAn )
  71. {
  72. delete [] g_pOfAn;
  73. }
  74. }
  75. }
  76. const BrillPatch g_POSTaggerPatches [] =
  77. {
  78. { MS_Adj, MS_Noun, CAP, MS_Unknown, MS_Unknown, NULL, NULL },
  79. { MS_Verb, MS_Noun, PREV1T, MS_Adj, MS_Unknown, NULL, NULL },
  80. { MS_Verb, MS_Noun, CAP, MS_Unknown, MS_Unknown, NULL, NULL },
  81. { MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  82. { MS_Conj, MS_Adv, NEXT2T, MS_Conj, MS_Unknown, NULL, NULL },
  83. { MS_Adj, MS_Adv, NEXT1T, MS_Verb, MS_Unknown, NULL, NULL },
  84. { MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Noun, MS_Unknown, NULL, NULL },
  85. { MS_Verb, MS_Noun, PREV2T, MS_Prep, MS_Unknown, NULL, NULL },
  86. { MS_Noun, MS_Verb, PREV1T, MS_SubjPron, MS_Unknown, NULL, NULL },
  87. { MS_Noun, MS_Verb, PREV1T, MS_Pron, MS_Unknown, NULL, NULL },
  88. { MS_Noun, MS_Verb, PREV1T, MS_Adv, MS_Unknown, NULL, NULL },
  89. { MS_Verb, MS_Noun, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
  90. { MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
  91. { MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Noun, MS_Verb, NULL, NULL },
  92. { MS_Noun, MS_Adj, NEXT1T, MS_Adj, MS_Unknown, NULL, NULL },
  93. { MS_Verb, MS_Noun, NEXT1T, MS_CConj, MS_Unknown, NULL, NULL },
  94. { MS_Adj, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  95. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adv, NULL, NULL },
  96. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adj, NULL, NULL },
  97. { MS_Adv, MS_Prep, PREV1TNEXT1T, MS_Noun, MS_Prep, NULL, NULL },
  98. { MS_Adv, MS_Prep, PREV1TNEXT2T, MS_Verb, MS_Noun, NULL, NULL },
  99. { MS_Adj, MS_Adv, NEXT1T, MS_Adv, MS_Unknown, NULL, NULL },
  100. { MS_Noun, MS_Verb, NEXT1T, MS_ObjPron, MS_Unknown, NULL, NULL },
  101. { MS_Adv, MS_Adj, PREV1TNEXT1T, MS_Prep, MS_Noun, NULL, NULL },
  102. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Interr, MS_Prep, NULL, NULL },
  103. { MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Adv, MS_Adj, NULL, NULL },
  104. { MS_Prep, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  105. { MS_Adv, MS_Conj, NEXT1T, MS_SubjPron, MS_Unknown, NULL, NULL },
  106. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Adj, MS_Prep, NULL, NULL },
  107. { MS_Noun, MS_Verb, PREV1TNEXT2T, MS_Interr, MS_Noun, NULL, NULL },
  108. { MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  109. { MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Unknown, MS_Verb, NULL, NULL },
  110. { MS_Adj, MS_Verb, PREV1TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
  111. { MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Unknown, MS_Unknown, NULL, NULL },
  112. { MS_Adv, MS_Prep, PREV1TNEXT2T, MS_Prep, MS_Unknown, NULL, NULL },
  113. { MS_Adj, MS_Adv, PREV2TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
  114. { MS_Adv, MS_Prep, NEXT1T, MS_Noun, MS_Unknown, NULL, NULL },
  115. { MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Conj, MS_Verb, NULL, NULL },
  116. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Prep, MS_Prep, NULL, NULL },
  117. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Pron, NULL, NULL },
  118. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Noun, MS_Verb, NULL, NULL },
  119. { MS_Adj, MS_Adv, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  120. { MS_Adj, MS_Noun, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
  121. { MS_Adj, MS_Adv, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
  122. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Unknown, MS_Prep, NULL, NULL },
  123. { MS_Verb, MS_Noun, PREV2TNEXT1T, MS_Unknown, MS_Verb, NULL, NULL },
  124. { MS_Noun, MS_Verb, PREV1T, MS_SubjPron, MS_Unknown, NULL, NULL },
  125. { MS_Adj, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adv, NULL, NULL },
  126. { MS_Adv, MS_Adj, PREV1TNEXT1T, MS_Conj, MS_Noun, NULL, NULL },
  127. { MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Prep, MS_Adj, NULL, NULL },
  128. { MS_Adv, MS_Prep, NEXT1T, MS_Verb, MS_Unknown, NULL, NULL },
  129. { MS_Noun, MS_Adj, NEXT1T, MS_Adj, MS_Unknown, NULL, NULL },
  130. { MS_Adv, MS_Conj, NEXT1T, MS_Pron, MS_Unknown, NULL, NULL },
  131. { MS_Conj, MS_Adv, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  132. { MS_Noun, MS_Verb, PREV1T, MS_Adv, MS_Unknown, NULL, NULL },
  133. { MS_Verb, MS_Noun, PREV2TNEXT1T, MS_CConj, MS_Unknown, NULL, NULL },
  134. { MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  135. { MS_Noun, MS_Verb, PREV1T, MS_Pron, MS_Unknown, NULL, NULL },
  136. { MS_Adv, MS_Adj, PREV2TNEXT1T, MS_Prep, MS_Noun, NULL, NULL },
  137. { MS_Noun, MS_Adj, PREV1TNEXT1T, MS_Adv, MS_Noun, NULL, NULL },
  138. { MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Adv, MS_Adj, NULL, NULL },
  139. //--- Inserted by hand - convert "A" from Det to Noun after Det...
  140. { MS_Det, MS_Noun, CURRWPREV1T, MS_Det, MS_Unknown, L"a", NULL },
  141. };
  142. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  143. AbbrevRecord g_AbbreviationTable[] =
  144. {
  145. { L"a", L"EY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  146. { L"amp", L"AE 1 M P", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  147. { L"app", L"AE 1 P", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  148. { L"apr", L"EY 1 - P R AX L", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  149. { L"apt", L"ax - p aa 1 r t - m ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  150. { L"assoc", L"ax - s ow 2 - s iy - ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  151. { L"asst", L"ax - s ih 1 s - t ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  152. { L"aug", L"ao 1 - g ax s t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  153. { L"ave", L"ae 1 v - ax - n uw 2", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  154. //--- Initial - never EOS
  155. { L"b", L"B IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  156. { L"bldg", L"b ih 1 l - d ih ng", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  157. { L"blvd", L"b uh 1 l - ax - v aa 2 r d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  158. //--- SingleOrPluralAbbreviation
  159. { L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  160. //--- Initial - never EOS
  161. { L"c", L"s eh 1 l - s iy - ax s", MS_Noun, L"s iy 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  162. //--- SingleOrPluralAbbreviation
  163. { L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  164. { L"cals", L"k ae 1 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  165. { L"ch", L"ch ae 1 p - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  166. //--- SingleOrPluralAbbreviation
  167. { L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  168. //--- SingleOrPluralAbbreviation
  169. { L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  170. { L"cms", L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  171. { L"cntr", L"s eh 1 n - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  172. //--- IfEOSNotAbbreviation
  173. { L"co", L"k ah 1 m - p ax - n iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  174. { L"cont", L"k ax n - t ih 1 n - y uw D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  175. { L"corp", L"k ao 2 r - p ax - r ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  176. //--- IfEOSNotAbbreviation
  177. { L"ct", L"k ao 1 r t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  178. { L"ctr", L"s eh 1 n - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  179. //--- IfEOSNotAbbreviation
  180. { L"cu", L"k y uw 1 - b ih k", MS_Noun, L"k y uw 1 - b ih k", MS_Noun, NULL, MS_Unknown, 1, 4 },
  181. //--- Initial - never EOS
  182. { L"d", L"D IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  183. //--- SingleOrPluralAbbreviation
  184. { L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  185. { L"dec", L"d ih - s eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  186. //--- SingleOrPluralAbbreviation
  187. { L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  188. { L"dept", L"d ih - p aa 1 r t - m ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  189. { L"dist", L"d ih 1 s - t r ax k t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  190. { L"doc", L"D AA 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  191. { L"doz", L"d ah 1 z - ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  192. //--- DoctorDriveAbbreviation
  193. { L"dr", L"d aa 1 k - t er", MS_Noun, L"D R AY 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
  194. //--- Initial - never EOS
  195. { L"e", L"IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  196. //--- IfEOSNotAbbreviation
  197. { L"ed", L"eh 2 jh - ax - k ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  198. { L"esq", L"eh 1 s - k w ay 2 r", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  199. { L"est", L"ax - s t ae 1 b - l ax sh T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  200. { L"etc", L"EH T & s eh 1 t - er - ax", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  201. //--- IfEOSNotAbbreviation
  202. { L"ex", L"ih g - z ae 1 m - p ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  203. { L"ext", L"ih k - s t eh 1 n - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  204. //--- Initial - never EOS
  205. { L"f", L"f ae 1 r - ax n - h ay 2 t", MS_Noun, L"eh 1 f", MS_Noun, NULL, MS_Unknown, 0, 3 },
  206. { L"feb", L"f eh 1 b - r uw - eh 2 r - iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  207. //--- IfEOSNotAbbreviation
  208. { L"fig", L"f ih 1 g - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  209. //--- IfEOSNotAbbreviation
  210. { L"figs", L"f ih 1 g - y er Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  211. //--- IfEOSNotAbbreviation
  212. { L"fl", L"f l uw 1 - ih d", MS_Noun, L"f l uw 1 - ih d", MS_Noun, NULL, MS_Unknown, 1, 4 },
  213. { L"fn", L"f uh 1 t - n ow 2 t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  214. { L"freq", L"f r iy 1 - k w ax n - s iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  215. { L"fri", L"f r ay 1 - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  216. //--- SingleOrPluralAbbreviation
  217. { L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
  218. { L"fwd", L"f ao 1 r - w er d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  219. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  220. { L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 1, 0 },
  221. //--- SingleOrPluralAbbreviation
  222. { L"gal", L"g ae 1 l - ax n", MS_Noun, L"g ae 1 l - ax n Z", MS_Noun, L"G AE 1 L", MS_Noun, 0, 0 },
  223. //--- TITLE - never EOS
  224. { L"gen", L"jh eh 1 n - er - ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  225. //--- IfEOSNotAbbreviation
  226. { L"gov", L"g ah 1 v - er - n ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  227. //--- Initial - never EOS
  228. { L"h", L"EY 1 CH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  229. //--- IfEOSNotAbbreviation
  230. { L"hr", L"AW 1 ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  231. { L"hrs", L"AW 1 ER Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  232. { L"ht", L"H AY 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  233. { L"hwy", L"h ay 1 w ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  234. { L"hz", L"H ER 1 T S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  235. //--- Initial - never EOS
  236. { L"i", L"AY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  237. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  238. { L"in", L"IH 1 N CH", MS_Noun, L"IH 1 N CH AX Z", MS_Noun, L"IH 1 N", MS_Prep, 1, 0 },
  239. { L"inc", L"ih n - k ao 1 r - p ax - r ey 2 - t AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  240. { L"incl", L"ih n - k l uw 1 - d AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  241. { L"intl", L"ih 2 n - t er - n ae 1 sh - ax - n ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  242. //--- IfEOSAndLowercaseNotAbbreviation
  243. { L"is", L"ay 1 - l ax n d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
  244. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  245. { L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 1, 0 },
  246. //--- IfEOSNotAbbreviation
  247. { L"jan", L"jh ae 1 n - y uw - eh 2 r - iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  248. { L"jr", L"jh uw 1 n - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  249. { L"jul", L"jh uh - l ay 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  250. { L"jun", L"JH UW 1 N", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  251. //--- Initial - never EOS
  252. { L"k", L"k eh 1 l - v ax n", MS_Noun, L"k ey 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  253. //--- SingleOrPluralAbbreviation
  254. { L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  255. //--- SingleOrPluralAbbreviation
  256. { L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  257. //--- SingleOrPluralAbbreviation
  258. { L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  259. { L"kgs", L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  260. { L"khz", L"k ih 1 l - ax - h er 2 t s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  261. //--- SingleOrPluralAbbreviation
  262. { L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  263. //--- SingleOrPluralAbbreviation
  264. { L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  265. //--- SingleOrPluralAbbreviation
  266. { L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  267. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  268. { L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 1, 0 },
  269. //--- SingleOrPluralAbbreviation
  270. { L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  271. { L"lbs", L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  272. { L"lg", L"L AA 1 R JH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  273. { L"ln", L"l ey 1 n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  274. //--- Title - never EOS
  275. { L"lt", L"l uw - t eh 1 n - ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  276. { L"ltd", L"l ih 1 m - ih - t AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  277. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  278. { L"m", L"M IY 1 T ER", MS_Noun, L"M IY 1 T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 1, 0 },
  279. //--- IfEOSNotAbbreviation???
  280. { L"mar", L"M AA 1 R CH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  281. //--- SingleOrPluralAbbreviation
  282. { L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  283. { L"mfg", L"m ae 2 n - y ax - f ae 1 k - ch er - IH NG", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  284. //--- SingleOrPluralAbbreviation
  285. { L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  286. { L"mgr", L"m ae 1 n - ih - jh ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  287. { L"mgs", L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  288. { L"mhz", L"m eh 1 g - ax - h er 2 t s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  289. //--- SingleOrPluralAbbreviation
  290. { L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  291. { L"mic", L"M AY 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  292. //--- SingleOrPluralAbbreviation
  293. { L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
  294. { L"misc", L"m ih 2 s - ax - l ey 1 - n iy - ax s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  295. //--- SingleOrPluralAbbreviation
  296. { L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  297. //--- SingleOrPluralAbbreviation
  298. { L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  299. { L"mon", L"m ah 1 n - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  300. //--- TITLE - never EOS
  301. { L"mr", L"M IH 1 S - T ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  302. //--- TITLE - never EOS
  303. { L"mrs", L"M IH 1 S - AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  304. //--- IfEOSNotAbbreviation
  305. { L"ms", L"M IH 1 Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  306. //--- SingleOrPluralAbbreviation
  307. { L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  308. { L"msecs", L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  309. { L"mt", L"M AW 1 N T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  310. { L"mtn", L"m aw 1 n - t ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  311. //--- Initial - never EOS
  312. { L"n", L"EH 1 N", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  313. //--- IfEOSNotAbbreviation
  314. { L"no", L"N UH 1 M - B ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  315. { L"nov", L"n ow - v eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  316. //--- Initial - never EOS
  317. { L"o", L"OW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  318. { L"oct", L"aa k - t ow 1 - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  319. //--- IfEOSNotAbbreviation
  320. { L"op", L"OW 1 - P AX S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  321. //--- SingleOrPluralAbbreviation
  322. { L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N - S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
  323. { L"ozs", L"AW 1 N - S AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  324. //--- AbbreviationFollowedByDigit
  325. { L"p", L"P EY 1 JH", MS_Noun, L"P IY 1", MS_Noun, NULL, MS_Unknown, 0, 2 },
  326. //--- IfEOSNotAbbreviation
  327. { L"pg", L"P EY 1 JH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  328. { L"pgs", L"P EY 1 - JH AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  329. { L"pkg", L"p ae 1 k - ih jh", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  330. { L"pkwy", L"p aa 1 r k - w ey 2", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  331. { L"pl", L"P L EY 1 S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  332. { L"pp", L"P EY 1 - JH AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  333. //--- TITLE - never EOS
  334. { L"pres", L"p r eh 1 z - ax - d ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  335. //--- TITLE - never EOS
  336. { L"prof", L"p r ax - f eh 1 - s ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  337. { L"pt", L"P OY 1 N T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  338. //--- Initial - never EOS
  339. { L"q", L"K Y UW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  340. //--- SingleOrPluralAbbreviation
  341. { L"qt", L"k w ao 1 r t", MS_Noun, L"k w ao 1 r t Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  342. //--- Initial - never EOS
  343. { L"r", L"AA 1 R", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  344. { L"rd", L"r ow 1 d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  345. { L"rec", L"R EH 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  346. { L"rep", L"r eh 2 p - r ih - z eh 1 n - t ax - t ih v", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  347. { L"rt", L"R UW 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  348. { L"rte", L"R UW 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  349. //--- Initial - never EOS
  350. { L"s", L"EH 1 S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  351. //--- IfEOSAndLowercaseNotAbbreviation
  352. { L"sat", L"s ae 1 t - er - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
  353. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  354. { L"sec", L"s eh 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d z", MS_Noun, NULL, MS_Unknown, 1, 0 },
  355. //--- TITLE - never EOS
  356. { L"sen", L"s eh 1 n - ax - t ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  357. { L"sep", L"s eh p - t eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  358. { L"sept", L"s eh p - t eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  359. { L"sm", L"S M AO 1 L", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  360. { L"sq", L"S K W EH 1 R", MS_Noun, L"S K W EH 1 R", MS_Noun, NULL, MS_Unknown, 0, 4 },
  361. //--- IfEOSNotAbbreviation
  362. { L"sr", L"s iy 1 n - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  363. //--- DoctorDriveAbbreviation
  364. { L"st", L"S EY 1 N T", MS_Noun, L"S T R IY 1 T", MS_Noun, NULL, MS_Unknown, 0, 1 },
  365. { L"ste", L"s w iy 1 t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  366. //--- IfEOSNotAbbreviation
  367. { L"sun", L"s ah 1 n - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  368. //--- Initial - never EOS
  369. { L"t", L"T IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  370. //--- SingleOrPluralAbbreviation
  371. { L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  372. { L"tech", L"T EH 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  373. { L"thu", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  374. { L"thur", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  375. { L"thurs", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  376. //--- SingleOrPluralAbbreviation
  377. { L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  378. { L"tue", L"t uw 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  379. { L"tues", L"t uw 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  380. //--- Initial - never EOS
  381. { L"u", L"Y UW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  382. { L"univ", L"y uw 2 - n ax - v er 1 - s ih - t iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  383. //--- Initial - never EOS
  384. { L"v", L"V IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  385. { L"ver", L"v er 1 - zh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  386. { L"vers", L"v er 1 - zh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  387. { L"vol", L"v aa 1 l - y uw m", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  388. { L"vs", L"v er 1 - s ax s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  389. //--- Initial - never EOS
  390. { L"w", L"d ah 1 b - ax l - y uw", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  391. //--- IfEOSAndLowercaseNotAbbreviation
  392. { L"wed", L"w eh 1 n z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
  393. { L"wk", L"W IY 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  394. { L"wt", L"W EY 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  395. { L"wy", L"W EY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  396. //--- Initial - never EOS
  397. { L"x", L"EH 1 K S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  398. //--- Initial - never EOS
  399. { L"y", L"W AY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  400. //--- SingleOrPluralAbbreviation
  401. { L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  402. { L"yds", L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  403. //--- SingleOrPluralAbbreviation
  404. { L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  405. //--- Initial - never EOS
  406. { L"z", L"Z IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  407. };
  408. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  409. AbbrevRecord g_AmbiguousWordTable[] =
  410. {
  411. //--- ADisambig
  412. { L"a", L"EY 1", MS_Noun, L"AX", MS_Det, NULL, MS_Unknown, 0, 8 },
  413. //--- AllCapsAbbreviation
  414. { L"al", L"EY 1 & EH 1 L", MS_Noun, L"AE 1 L", MS_Noun, NULL, MS_Unknown, 0, 3 },
  415. //--- AllCapsAbbreviation
  416. { L"apr", L"ey 1 & p iy 1 & aa 1 r", MS_Noun, L"ey 1 - p r ax l", MS_Noun, NULL, MS_Unknown, 0, 3 },
  417. //--- AllCapsAbbreviation
  418. { L"as", L"EY 1 & EH 1 S", MS_Noun, L"AE 1 Z", MS_Conj, NULL, MS_Unknown, 0, 3 },
  419. //--- SingleOrPluralAbbreviation
  420. { L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  421. //--- DegreeAbbreviation
  422. { L"c", L"s eh 1 l - s iy - ax s", MS_Noun, L"s iy 1", MS_Noun, NULL, MS_Unknown, 0, 6 },
  423. //--- SingleOrPluralAbbreviation
  424. { L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  425. //--- SingleOrPluralAbbreviation
  426. { L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  427. //--- SingleOrPluralAbbreviation
  428. { L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  429. //--- AllCapsAbbreviation
  430. { L"co", L"S IY 1 & OW 1", MS_Noun, L"k ah 1 m - p ax - n iy", MS_Noun, NULL, MS_Unknown, 0, 3 },
  431. //--- AllCapsAbbreviation
  432. { L"ct", L"s iy 1 & t iy 1", MS_Noun, L"k ao 1 r t", MS_Noun, NULL, MS_Unknown, 0, 3 },
  433. //--- MeasurementModifier
  434. { L"cu", L"S IY 1 & Y UW 1", MS_Noun, L"k y uw 1 - b ih k", MS_Noun, NULL, MS_Unknown, 0, 7 },
  435. //--- SingleOrPluralAbbreviation
  436. { L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  437. //--- SingleOrPluralAbbreviation
  438. { L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  439. //--- DoctorDriveAbbreviation
  440. { L"dr", L"d aa 1 k - t er", MS_Noun, L"D R AY 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
  441. //--- DegreeAbbreviation
  442. { L"f", L"f ae 1 r - ax n - h ay 2 t", MS_Noun, L"eh 1 f", MS_Noun, NULL, MS_Unknown, 0, 6 },
  443. //--- AbbreviationFollowedByDigit
  444. { L"fig", L"f ih 1 g - y er", MS_Noun, L"F IH 1 G", MS_Noun, NULL, MS_Unknown, 0, 2 },
  445. //--- AbbreviationFollowedByDigit
  446. { L"figs", L"f ih 1 g - y er Z", MS_Noun, L"F IH 1 G Z", MS_Noun, NULL, MS_Unknown, 0, 2 },
  447. //--- MeasurementModifier
  448. { L"fl", L"eh 1 f & eh 1 l", MS_Noun, L"f l uw 1 - ih d", MS_Noun, NULL, MS_Unknown, 0, 7 },
  449. //--- SingleOrPluralAbbreviation
  450. { L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
  451. //--- G, Gram, Grams
  452. { L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 0, 5 },
  453. //--- DoctorDriveAbbreviation
  454. { L"gov", L"g ah 1 v - er - n ER", MS_Noun, L"G AH 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
  455. //--- AllCapsAbbreviation
  456. { L"hi", L"EY 1 CH & AY 1", MS_Noun, L"H AY 1", MS_Interjection, NULL, MS_Unknown, 0, 3 },
  457. //--- HR hour hours
  458. { L"hr", L"AW 1 ER", MS_Noun, L"AW 1 ER Z", MS_Noun, L"EY 1 CH AA 1 R", MS_Noun, 0, 5 },
  459. //--- J, Joule, Joules
  460. { L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 0, 5 },
  461. //--- DegreeAbbreviation
  462. { L"k", L"k eh 1 l - v ax n", MS_Noun, L"k ey 1", MS_Noun, NULL, MS_Unknown, 0, 6 },
  463. //--- SingleOrPluralAbbreviation
  464. { L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  465. //--- SingleOrPluralAbbreviation
  466. { L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  467. //--- SingleOrPluralAbbreviation
  468. { L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  469. //--- SingleOrPluralAbbreviation
  470. { L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  471. //--- SingleOrPluralAbbreviation
  472. { L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  473. //--- SingleOrPluralAbbreviation
  474. { L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  475. //--- L, Liter, Liters
  476. { L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 0, 5 },
  477. //--- AllCapsAbbreviation
  478. { L"la", L"EH 1 L & EY 1", MS_Noun, L"L AH 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  479. //--- SingleOrPluralAbbreviation
  480. { L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  481. //--- AllCapsAbbreviation
  482. { L"lts", L"eh 1 l & t iy 1 & eh 1 s", MS_Noun, L"l uw t eh 1 n ax n t s", MS_Noun, NULL, MS_Unknown, 0, 3 },
  483. //--- M, Meter, Meters
  484. { L"m", L"M IY 1 - T ER", MS_Noun, L"M IY 1 - T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 0, 5 },
  485. //--- AllCapsAbbreviation
  486. { L"ma", L"EH 1 M & AA 1", MS_Noun, L"M AA 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  487. //--- March Mar
  488. { L"mar", L"M AA 1 R CH", MS_Noun, L"M AA 1 R", MS_Verb, NULL, MS_Unknown, 0, 4 },
  489. //--- SingleOrPluralAbbreviation
  490. { L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  491. //--- AllCapsAbbreviation
  492. { L"me", L"EH 1 M & IY 1", MS_Noun, L"M IY 1", MS_ObjPron, NULL, MS_Unknown, 0, 3 },
  493. //--- SingleOrPluralAbbreviation
  494. { L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  495. //--- SingleOrPluralAbbreviation
  496. { L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  497. //--- SingleOrPluralAbbreviation
  498. { L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
  499. //--- SingleOrPluralAbbreviation
  500. { L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  501. //--- SingleOrPluralAbbreviation
  502. { L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  503. //--- SingleOrPluralAbbreviation
  504. { L"mpg", L"m ay 1 l & p er 1 & g ae 1 l ax n", MS_Noun, L"m ay 1 l z & p er 1 & g ae 1 l ax n", MS_Noun, L"eh 1 m & p iy 1 & jh iy 1", MS_Noun, 0, 0 },
  505. //--- SingleOrPluralAbbreviation
  506. { L"mph", L"m ay 1 l & p er 1 & aw 1 er", MS_Noun, L"m ay 1 l z & p er 1 & aw 1 er", MS_Noun, L"eh 1 m & p iy 1 & ey 1 ch", MS_Noun, 0, 0 },
  507. //--- MS millisecond milliseconds
  508. { L"ms", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, L"eh 1 m & eh 1 s", MS_Noun, 0, 5 },
  509. //--- SingleOrPluralAbbreviation
  510. { L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  511. //--- AllCapsAbbreviation
  512. { L"mt", L"EH 1 M & T IY 1", MS_Noun, L"M AW 1 N T", MS_Noun, NULL, MS_Unknown, 0, 3 },
  513. //--- AllCapsAbbreviation
  514. { L"oh", L"OW 1 & EY 1 CH", MS_Noun, L"OW 1", MS_Interjection, NULL, MS_Unknown, 0, 3 },
  515. //--- AllCapsAbbreviation
  516. { L"or", L"OW 1 & AA 1 R", MS_Noun, L"AO 1 R", MS_CConj, NULL, MS_Unknown, 0, 3 },
  517. //--- SingleOrPluralAbbreviation
  518. { L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
  519. //--- AbbreviationFollowedByDigit
  520. { L"p", L"P EY 1 JH", MS_Noun, L"P IY 1", MS_Noun, NULL, MS_Unknown, 0, 2 },
  521. //--- AllCapsAbbreviation
  522. { L"pa", L"P IY 1 & EY 1", MS_Noun, L"P AA 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  523. //--- AllCapsAbbreviation
  524. { L"pg", L"P IY 1 & JH IY 1", MS_Noun, L"P EY 1 JH", MS_Noun, NULL, MS_Unknown, 0, 3 },
  525. //--- AllCapsAbbreviation
  526. { L"po", L"p iy 1 & ow 1", MS_Noun, L"p ow 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  527. //--- PolishDisambig
  528. { L"polish", L"p ow 1 l - ax sh", MS_Adj, L"p aa 1 l - ih sh", MS_Verb, L"p aa 1 l - ih sh", MS_Noun, 0, 9 },
  529. //--- SingleOrPluralAbbreviation
  530. { L"qt", L"K W AO 1 R T", MS_Noun, L"K W AO 1 R T Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  531. //--- Saturday Sat
  532. { L"sat", L"s ae 1 t - er - d ey", MS_Noun, L"S AE 1 T", MS_Verb, NULL, MS_Unknown, 0, 4 },
  533. //--- SEC Second Seconds
  534. { L"sec", L"s eh 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d Z", MS_Noun, L"EH 1 S & IY 1 & S IY 1", MS_Noun, 0, 5 },
  535. //--- MeasurementModifier
  536. { L"sq", L"S K W EH 1 R", MS_Noun, L"S K W EH 1 R", MS_Noun, NULL, MS_Unknown, 0, 7 },
  537. //--- AllCapsAbbreviation
  538. { L"sr", L"EH 1 S & AA 1 R", MS_Noun, L"s iy 1 n - y er", MS_Noun, NULL, MS_Unknown, 0, 3 },
  539. //--- DoctorDriveAbbreviation
  540. { L"st", L"S EY 1 N T", MS_Noun, L"S T R IY 1 T", MS_Noun, NULL, MS_Unknown, 0, 1 },
  541. //--- SingleOrPluralAbbreviation
  542. { L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  543. //--- SingleOrPluralAbbreviation
  544. { L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  545. //--- AllCapsAbbreviation
  546. { L"us", L"Y UW 1 & EH 1 S", MS_Noun, L"AH 1 S", MS_ObjPron, NULL, MS_Unknown, 0, 3 },
  547. //--- Wednesday Wed
  548. { L"wed", L"w eh 1 n z - d ey", MS_Noun, L"W EH 1 D", MS_Verb, NULL, MS_Unknown, 0, 4 },
  549. //--- AllCapsAbbreviation
  550. { L"wy", L"d ah 1 b - ax l - y uw & W AY 1", MS_Noun, L"W EY 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  551. //--- SingleOrPluralAbbreviation
  552. { L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  553. //--- SingleOrPluralAbbreviation
  554. { L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  555. };
  556. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  557. AbbrevRecord g_PostLexLookupWordTable[] =
  558. {
  559. //--- MeasurementDisambig
  560. { L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  561. //--- MeasurementDisambig
  562. { L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  563. //--- MeasurementDisambig
  564. { L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  565. //--- MeasurementDisambig
  566. { L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  567. //--- MeasurementDisambig
  568. { L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  569. //--- MeasurementDisambig
  570. { L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  571. //--- MeasurementDisambig
  572. { L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
  573. //--- MeasurementDisambig
  574. { L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 0, 0 },
  575. //--- MeasurementDisambig
  576. { L"gal", L"g ae 1 l - ax n", MS_Noun, L"g ae 1 l - ax n Z", MS_Noun, L"G AE 1 L", MS_Noun, 0, 0 },
  577. //--- MeasurementDisambig
  578. { L"hr", L"AW 1 ER", MS_Noun, L"AW 1 ER Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  579. //--- MeasurementDisambig
  580. { L"in", L"IH 1 N CH", MS_Noun, L"IH 1 N CH AX Z", MS_Noun, L"IH 1 N", MS_Prep, 0, 0 },
  581. //--- MeasurementDisambig
  582. { L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 0, 0 },
  583. //--- MeasurementDisambig
  584. { L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  585. //--- MeasurementDisambig
  586. { L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  587. //--- MeasurementDisambig
  588. { L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  589. //--- MeasurementDisambig
  590. { L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  591. //--- MeasurementDisambig
  592. { L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  593. //--- MeasurementDisambig
  594. { L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  595. //--- MeasurementDisambig
  596. { L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 0, 0 },
  597. //--- MeasurementDisambig
  598. { L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  599. //--- MeasurementDisambig
  600. { L"m", L"M IY 1 - T ER", MS_Noun, L"M IY 1 - T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 0, 0 },
  601. //--- MeasurementDisambig
  602. { L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  603. //--- MeasurementDisambig
  604. { L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  605. //--- MeasurementDisambig
  606. { L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  607. //--- MeasurementDisambig
  608. { L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
  609. //--- ReadDisambig
  610. { L"misread", L"m ih s - r iy 1 d", MS_Verb, L"m ih s - r eh 1 d", MS_Verb, NULL, MS_Unknown, 0, 2 },
  611. //--- MeasurementDisambig
  612. { L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  613. //--- MeasurementDisambig
  614. { L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  615. //--- MeasurementDisambig
  616. { L"mpg", L"m ay 1 l & p er 1 & g ae 1 l ax n", MS_Noun, L"m ay 1 l z & p er 1 & g ae 1 l ax n", MS_Noun, L"eh 1 m & p iy 1 & jh iy 1", MS_Noun, 0, 0 },
  617. //--- MeasurementDisambig
  618. { L"mph", L"m ay 1 l & p er 1 & aw 1 er", MS_Noun, L"m ay 1 l z & p er 1 & aw 1 er", MS_Noun, L"eh 1 m & p iy 1 & ey 1 ch", MS_Noun, 0, 0 },
  619. //--- MeasurementDisambig
  620. { L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  621. //--- MeasurementDisambig
  622. { L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
  623. //--- ReadDisambig
  624. { L"proofread", L"p r uw 1 f - r iy 2 d", MS_Verb, L"p r uw 1 f - r eh 2 d", MS_Verb, NULL, MS_Unknown, 0, 2 },
  625. //--- MeasurementDisambig
  626. { L"qt", L"K W AO 1 R T", MS_Noun, L"K W AO 1 R T Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  627. //--- ReadDisambig
  628. { L"read", L"R IY 1 D", MS_Verb, L"R EH 1 D", MS_Verb, NULL, MS_Unknown, 0, 2 },
  629. //--- MeasurementDisambig
  630. { L"sec", L"S EH 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  631. //--- MeasurementDisambig
  632. { L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  633. //--- TheDisambig
  634. { L"the", L"DH IY 2", MS_Det, L"DH AX 2", MS_Det, NULL, MS_Unknown, 0, 1 },
  635. //--- MeasurementDisambig
  636. { L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  637. //--- MeasurementDisambig
  638. { L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  639. //--- MeasurementDisambig
  640. { L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  641. };
  642. WCHAR *g_pOfA = L"ah 2 v & ax 2 &";
  643. WCHAR *g_pOfAn = L"ah 2 v & ax 2 n &";
  644. const SentBreakDisambigFunc g_SentBreakDisambigTable[] =
  645. {
  646. CStdSentEnum::IsAbbreviationEOS,
  647. CStdSentEnum::IfEOSNotAbbreviation,
  648. CStdSentEnum::IfEOSAndLowercaseNotAbbreviation,
  649. };
  650. const PronDisambigFunc g_PronDisambigTable[] =
  651. {
  652. CStdSentEnum::SingleOrPluralAbbreviation,
  653. CStdSentEnum::DoctorDriveAbbreviation,
  654. CStdSentEnum::AbbreviationFollowedByDigit,
  655. CStdSentEnum::DegreeAbbreviation,
  656. CStdSentEnum::AbbreviationModifier,
  657. };
  658. const PronDisambigFunc g_AmbiguousWordDisambigTable[] =
  659. {
  660. CStdSentEnum::SingleOrPluralAbbreviation,
  661. CStdSentEnum::DoctorDriveAbbreviation,
  662. CStdSentEnum::AbbreviationFollowedByDigit,
  663. CStdSentEnum::AllCapsAbbreviation,
  664. CStdSentEnum::CapitalizedAbbreviation,
  665. CStdSentEnum::SECAbbreviation,
  666. CStdSentEnum::DegreeAbbreviation,
  667. CStdSentEnum::AbbreviationModifier,
  668. CStdSentEnum::ADisambig,
  669. CStdSentEnum::PolishDisambig,
  670. };
  671. const PostLexLookupDisambigFunc g_PostLexLookupDisambigTable[] =
  672. {
  673. CStdSentEnum::MeasurementDisambig,
  674. CStdSentEnum::TheDisambig,
  675. CStdSentEnum::ReadDisambig,
  676. };
  677. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  678. // This is an alphabetized list of all non-proper-noun words which
  679. // appear within the list of the 200 most frequent first words
  680. // in sentences in both the Brown and WSJ corpora.
  681. const SPLSTR g_FirstWords[] =
  682. {
  683. DEF_SPLSTR( "A" ),
  684. DEF_SPLSTR( "About" ),
  685. DEF_SPLSTR( "According" ),
  686. DEF_SPLSTR( "After" ),
  687. DEF_SPLSTR( "Again" ), // PaulCa added 4/14/99 (Bug 107)
  688. DEF_SPLSTR( "All" ),
  689. DEF_SPLSTR( "Also" ),
  690. DEF_SPLSTR( "Although" ),
  691. DEF_SPLSTR( "Among" ),
  692. DEF_SPLSTR( "An" ),
  693. DEF_SPLSTR( "And" ),
  694. DEF_SPLSTR( "Another" ),
  695. DEF_SPLSTR( "Any" ), // PaulCa added 4/14/99 (Bug 107)
  696. DEF_SPLSTR( "Anyway" ), // PaulCa added 4/14/99 (Bug 107)
  697. DEF_SPLSTR( "Are" ), // PaulCa added 4/14/99 (Bug 107)
  698. DEF_SPLSTR( "As" ),
  699. DEF_SPLSTR( "At" ),
  700. DEF_SPLSTR( "Back" ), // PaulCa added 4/14/99 (Bug 107)
  701. DEF_SPLSTR( "Because" ),
  702. DEF_SPLSTR( "Before" ),
  703. DEF_SPLSTR( "Besides" ), // PaulCa added 4/14/99 (Bug 107)
  704. DEF_SPLSTR( "Both" ),
  705. DEF_SPLSTR( "But" ),
  706. DEF_SPLSTR( "By" ),
  707. DEF_SPLSTR( "Can" ), // PaulCa added 4/14/99 (Bug 107)
  708. DEF_SPLSTR( "Consequently" ), // PaulCa added 4/14/99 (Bug 107)
  709. DEF_SPLSTR( "Dear" ), // PaulCa added 4/14/99 (Bug 107)
  710. DEF_SPLSTR( "Despite" ),
  711. DEF_SPLSTR( "Did" ), // PaulCa added 4/14/99 (Bug 107)
  712. DEF_SPLSTR( "Do" ), // PaulCa added 4/14/99 (Bug 107)
  713. DEF_SPLSTR( "Does" ), // PaulCa added 4/14/99 (Bug 107)
  714. DEF_SPLSTR( "Don't" ), // PaulCa added 4/14/99 (Bug 107)
  715. DEF_SPLSTR( "During" ),
  716. DEF_SPLSTR( "Each" ),
  717. DEF_SPLSTR( "Early" ), // PaulCa added 4/14/99 (Bug 107)
  718. DEF_SPLSTR( "Even" ),
  719. DEF_SPLSTR( "Every" ), // PaulCa added 4/14/99 (Bug 107)
  720. DEF_SPLSTR( "Finally" ), // PaulCa added 4/14/99 (Bug 107)
  721. DEF_SPLSTR( "First" ),
  722. DEF_SPLSTR( "Following" ), // PaulCa added 4/14/99 (Bug 107)
  723. DEF_SPLSTR( "For" ),
  724. DEF_SPLSTR( "Four" ),
  725. DEF_SPLSTR( "From" ),
  726. DEF_SPLSTR( "Further" ), // PaulCa added 4/14/99 (Bug 107)
  727. DEF_SPLSTR( "Furthermore" ), // PaulCa added 4/14/99 (Bug 107)
  728. DEF_SPLSTR( "Generally" ), // PaulCa added 4/14/99 (Bug 107)
  729. DEF_SPLSTR( "Given" ), // PaulCa added 4/14/99 (Bug 107)
  730. DEF_SPLSTR( "Go" ), // PaulCa added 4/14/99 (Bug 107)
  731. DEF_SPLSTR( "Great" ), // PaulCa added 4/14/99 (Bug 107)
  732. DEF_SPLSTR( "Had" ), // PaulCa added 4/14/99 (Bug 107)
  733. DEF_SPLSTR( "Have" ), // PaulCa added 4/14/99 (Bug 107)
  734. DEF_SPLSTR( "Having" ), // PaulCa added 4/14/99 (Bug 107)
  735. DEF_SPLSTR( "He" ),
  736. DEF_SPLSTR( "Her" ),
  737. DEF_SPLSTR( "Here" ),
  738. DEF_SPLSTR( "His" ),
  739. DEF_SPLSTR( "How" ), // PaulCa added 4/14/99 (Bug 107)
  740. DEF_SPLSTR( "However" ),
  741. DEF_SPLSTR( "I" ),
  742. DEF_SPLSTR( "If" ),
  743. DEF_SPLSTR( "In" ),
  744. DEF_SPLSTR( "Indeed" ),
  745. DEF_SPLSTR( "Initially" ), // PaulCa added 4/14/99 (Bug 107)
  746. DEF_SPLSTR( "Instead" ),
  747. DEF_SPLSTR( "Is" ), // PaulCa added 4/14/99 (Bug 107)
  748. DEF_SPLSTR( "It" ),
  749. DEF_SPLSTR( "Its" ),
  750. DEF_SPLSTR( "Just" ), // PaulCa added 4/14/99 (Bug 107)
  751. DEF_SPLSTR( "Last" ),
  752. DEF_SPLSTR( "Later" ),
  753. DEF_SPLSTR( "Let" ), // PaulCa added 4/14/99 (Bug 107)
  754. DEF_SPLSTR( "Like" ),
  755. DEF_SPLSTR( "Many" ),
  756. DEF_SPLSTR( "Maybe" ), // PaulCa added 4/14/99 (Bug 107)
  757. DEF_SPLSTR( "Meanwhile" ),
  758. DEF_SPLSTR( "More" ),
  759. DEF_SPLSTR( "Moreover" ),
  760. DEF_SPLSTR( "Most" ),
  761. DEF_SPLSTR( "Much" ),
  762. DEF_SPLSTR( "My" ), // Added to fix bug #385
  763. DEF_SPLSTR( "Neither" ),
  764. DEF_SPLSTR( "Never" ), // PaulCa added 4/14/99 (Bug 107)
  765. DEF_SPLSTR( "Nevertheless" ),
  766. DEF_SPLSTR( "New" ),
  767. DEF_SPLSTR( "Next" ), // PaulCa added 4/14/99 (Bug 107)
  768. DEF_SPLSTR( "No" ),
  769. DEF_SPLSTR( "None" ), // PaulCa added 4/14/99 (Bug 107)
  770. DEF_SPLSTR( "Nonetheless" ), // PaulCa added 4/14/99 (Bug 107)
  771. DEF_SPLSTR( "Nor" ),
  772. DEF_SPLSTR( "Not" ),
  773. DEF_SPLSTR( "Nothing" ), // PaulCa added 4/14/99 (Bug 107)
  774. DEF_SPLSTR( "Now" ),
  775. DEF_SPLSTR( "Of" ),
  776. DEF_SPLSTR( "On" ),
  777. DEF_SPLSTR( "Once" ),
  778. DEF_SPLSTR( "One" ),
  779. DEF_SPLSTR( "Only" ),
  780. DEF_SPLSTR( "Or" ),
  781. DEF_SPLSTR( "Other" ),
  782. DEF_SPLSTR( "Others" ),
  783. DEF_SPLSTR( "Our" ), // PaulCa added 4/14/99 (Bug 107)
  784. DEF_SPLSTR( "Over" ),
  785. DEF_SPLSTR( "People" ),
  786. DEF_SPLSTR( "Perhaps" ), // PaulCa added 4/14/99 (Bug 107)
  787. DEF_SPLSTR( "Please" ), // PaulCa added 4/14/99 (Bug 107)
  788. DEF_SPLSTR( "Previous" ), // PaulCa added 4/14/99 (Bug 107)
  789. DEF_SPLSTR( "Recent" ), // PaulCa added 4/14/99 (Bug 107)
  790. DEF_SPLSTR( "Right" ), // PaulCa added 4/14/99 (Bug 107)
  791. DEF_SPLSTR( "Second" ), // PaulCa added 4/14/99 (Bug 107)
  792. DEF_SPLSTR( "See" ), // PaulCa added 4/14/99 (Bug 107)
  793. DEF_SPLSTR( "Several" ),
  794. DEF_SPLSTR( "She" ),
  795. DEF_SPLSTR( "Shortly" ), // PaulCa added 4/14/99 (Bug 107)
  796. DEF_SPLSTR( "Similarly" ), // PaulCa added 4/14/99 (Bug 107)
  797. DEF_SPLSTR( "Since" ),
  798. DEF_SPLSTR( "So" ),
  799. DEF_SPLSTR( "Some" ),
  800. DEF_SPLSTR( "Sometimes" ), // PaulCa added 4/14/99 (Bug 107)
  801. DEF_SPLSTR( "Soon" ), // PaulCa added 4/14/99 (Bug 107)
  802. DEF_SPLSTR( "Still" ),
  803. DEF_SPLSTR( "Subsequently" ), // PaulCa added 4/14/99 (Bug 107)
  804. DEF_SPLSTR( "Such" ),
  805. DEF_SPLSTR( "Take" ), // PaulCa added 4/14/99 (Bug 107)
  806. DEF_SPLSTR( "That" ),
  807. DEF_SPLSTR( "The" ),
  808. DEF_SPLSTR( "Their" ),
  809. DEF_SPLSTR( "Then" ),
  810. DEF_SPLSTR( "There" ),
  811. DEF_SPLSTR( "Thereafter" ), // PaulCa added 4/14/99 (Bug 107)
  812. DEF_SPLSTR( "Therefore" ), // PaulCa added 4/14/99 (Bug 107)
  813. DEF_SPLSTR( "These" ),
  814. DEF_SPLSTR( "They" ),
  815. DEF_SPLSTR( "This" ),
  816. DEF_SPLSTR( "Those" ),
  817. DEF_SPLSTR( "Though" ),
  818. DEF_SPLSTR( "Three" ), // PaulCa added 4/14/99 (Bug 107)
  819. DEF_SPLSTR( "Through" ), // PaulCa added 4/14/99 (Bug 107)
  820. DEF_SPLSTR( "Thus" ),
  821. DEF_SPLSTR( "To" ),
  822. DEF_SPLSTR( "Today" ),
  823. DEF_SPLSTR( "Two" ),
  824. DEF_SPLSTR( "Under" ),
  825. DEF_SPLSTR( "Unlike" ), // PaulCa added 4/14/99 (Bug 107)
  826. DEF_SPLSTR( "Until" ),
  827. DEF_SPLSTR( "Upon" ), // PaulCa added 4/14/99 (Bug 107)
  828. DEF_SPLSTR( "We" ),
  829. DEF_SPLSTR( "Well" ), // PaulCa added 4/14/99 (Bug 107)
  830. DEF_SPLSTR( "What" ),
  831. DEF_SPLSTR( "When" ),
  832. DEF_SPLSTR( "Where" ), // PaulCa added 4/14/99 (Bug 107)
  833. DEF_SPLSTR( "Whether" ), // PaulCa added 4/14/99 (Bug 107)
  834. DEF_SPLSTR( "Which" ), // PaulCa added 4/14/99 (Bug 107)
  835. DEF_SPLSTR( "While" ),
  836. DEF_SPLSTR( "Who" ), // PaulCa added 4/14/99 (Bug 107)
  837. DEF_SPLSTR( "Why" ), // PaulCa added 4/14/99 (Bug 107)
  838. DEF_SPLSTR( "Will" ), // PaulCa added 4/14/99 (Bug 107)
  839. DEF_SPLSTR( "With" ),
  840. DEF_SPLSTR( "Within" ),
  841. DEF_SPLSTR( "Without" ), // PaulCa added 4/14/99 (Bug 107)
  842. DEF_SPLSTR( "Yes" ), // PaulCa added 4/14/99 (Bug 107)
  843. DEF_SPLSTR( "Yet" ),
  844. DEF_SPLSTR( "You" ),
  845. DEF_SPLSTR( "Your" ),
  846. };