Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

860 lines
51 KiB

  1. /******************************************************************************
  2. * MiscData.cpp *
  3. *--------------*
  4. * This file stores the const data used in various non-normalization front-end
  5. * code
  6. *------------------------------------------------------------------------------
  7. * Copyright (C) 1999 Microsoft Corporation Date: 05/02/2000
  8. * All Rights Reserved
  9. *
  10. ****************************************************************** AARONHAL ***/
  11. #include "stdafx.h"
  12. #include"stdsentenum.h"
  13. //--- Variable used to make sure initialization of pronunciations only happens once!
  14. BOOL g_fAbbrevTablesInitialized = false;
  15. //--- Helper function used to delete dynamically allocated memory (for the abbreviation
  16. // table) at DLL exit time...
  17. void CleanupAbbrevTables( void )
  18. {
  19. if ( g_fAbbrevTablesInitialized )
  20. {
  21. for ( ULONG i = 0; i < sp_countof( g_AbbreviationTable ); i++ )
  22. {
  23. if ( g_AbbreviationTable[i].pPron1 )
  24. {
  25. delete [] g_AbbreviationTable[i].pPron1;
  26. }
  27. if ( g_AbbreviationTable[i].pPron2 )
  28. {
  29. delete [] g_AbbreviationTable[i].pPron2;
  30. }
  31. if ( g_AbbreviationTable[i].pPron3 )
  32. {
  33. delete [] g_AbbreviationTable[i].pPron3;
  34. }
  35. }
  36. for ( i = 0; i < sp_countof( g_AmbiguousWordTable ); i++ )
  37. {
  38. if ( g_AmbiguousWordTable[i].pPron1 )
  39. {
  40. delete [] g_AmbiguousWordTable[i].pPron1;
  41. }
  42. if ( g_AmbiguousWordTable[i].pPron2 )
  43. {
  44. delete [] g_AmbiguousWordTable[i].pPron2;
  45. }
  46. if ( g_AmbiguousWordTable[i].pPron3 )
  47. {
  48. delete [] g_AmbiguousWordTable[i].pPron3;
  49. }
  50. }
  51. for ( i = 0; i < sp_countof( g_PostLexLookupWordTable ); i++ )
  52. {
  53. if ( g_PostLexLookupWordTable[i].pPron1 )
  54. {
  55. delete [] g_PostLexLookupWordTable[i].pPron1;
  56. }
  57. if ( g_PostLexLookupWordTable[i].pPron2 )
  58. {
  59. delete [] g_PostLexLookupWordTable[i].pPron2;
  60. }
  61. if ( g_PostLexLookupWordTable[i].pPron3 )
  62. {
  63. delete [] g_PostLexLookupWordTable[i].pPron3;
  64. }
  65. }
  66. if ( g_pOfA )
  67. {
  68. delete [] g_pOfA;
  69. }
  70. if ( g_pOfAn )
  71. {
  72. delete [] g_pOfAn;
  73. }
  74. }
  75. }
  76. const BrillPatch g_POSTaggerPatches [] =
  77. {
  78. { MS_Adj, MS_Noun, CAP, MS_Unknown, MS_Unknown, NULL, NULL },
  79. { MS_Verb, MS_Noun, PREV1T, MS_Adj, MS_Unknown, NULL, NULL },
  80. { MS_Verb, MS_Noun, CAP, MS_Unknown, MS_Unknown, NULL, NULL },
  81. { MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  82. { MS_Conj, MS_Adv, NEXT2T, MS_Conj, MS_Unknown, NULL, NULL },
  83. { MS_Adj, MS_Adv, NEXT1T, MS_Verb, MS_Unknown, NULL, NULL },
  84. { MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Noun, MS_Unknown, NULL, NULL },
  85. { MS_Verb, MS_Noun, PREV2T, MS_Prep, MS_Unknown, NULL, NULL },
  86. { MS_Noun, MS_Verb, PREV1T, MS_SubjPron, MS_Unknown, NULL, NULL },
  87. { MS_Noun, MS_Verb, PREV1T, MS_Pron, MS_Unknown, NULL, NULL },
  88. { MS_Noun, MS_Verb, PREV1T, MS_Adv, MS_Unknown, NULL, NULL },
  89. { MS_Verb, MS_Noun, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
  90. { MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
  91. { MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Noun, MS_Verb, NULL, NULL },
  92. { MS_Noun, MS_Adj, NEXT1T, MS_Adj, MS_Unknown, NULL, NULL },
  93. { MS_Verb, MS_Noun, NEXT1T, MS_CConj, MS_Unknown, NULL, NULL },
  94. { MS_Adj, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  95. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adv, NULL, NULL },
  96. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adj, NULL, NULL },
  97. { MS_Adv, MS_Prep, PREV1TNEXT1T, MS_Noun, MS_Prep, NULL, NULL },
  98. { MS_Adv, MS_Prep, PREV1TNEXT2T, MS_Verb, MS_Noun, NULL, NULL },
  99. { MS_Adj, MS_Adv, NEXT1T, MS_Adv, MS_Unknown, NULL, NULL },
  100. { MS_Noun, MS_Verb, NEXT1T, MS_ObjPron, MS_Unknown, NULL, NULL },
  101. { MS_Adv, MS_Adj, PREV1TNEXT1T, MS_Prep, MS_Noun, NULL, NULL },
  102. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Interr, MS_Prep, NULL, NULL },
  103. { MS_Adj, MS_Adv, PREV1TNEXT1T, MS_Adv, MS_Adj, NULL, NULL },
  104. { MS_Prep, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  105. { MS_Adv, MS_Conj, NEXT1T, MS_SubjPron, MS_Unknown, NULL, NULL },
  106. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Adj, MS_Prep, NULL, NULL },
  107. { MS_Noun, MS_Verb, PREV1TNEXT2T, MS_Interr, MS_Noun, NULL, NULL },
  108. { MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  109. { MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Unknown, MS_Verb, NULL, NULL },
  110. { MS_Adj, MS_Verb, PREV1TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
  111. { MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Unknown, MS_Unknown, NULL, NULL },
  112. { MS_Adv, MS_Prep, PREV1TNEXT2T, MS_Prep, MS_Unknown, NULL, NULL },
  113. { MS_Adj, MS_Adv, PREV2TNEXT1T, MS_Verb, MS_Adj, NULL, NULL },
  114. { MS_Adv, MS_Prep, NEXT1T, MS_Noun, MS_Unknown, NULL, NULL },
  115. { MS_Verb, MS_Noun, PREV1TNEXT1T, MS_Conj, MS_Verb, NULL, NULL },
  116. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Prep, MS_Prep, NULL, NULL },
  117. { MS_Noun, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Pron, NULL, NULL },
  118. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Noun, MS_Verb, NULL, NULL },
  119. { MS_Adj, MS_Adv, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  120. { MS_Adj, MS_Noun, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
  121. { MS_Adj, MS_Adv, NEXT1T, MS_VAux, MS_Unknown, NULL, NULL },
  122. { MS_Adj, MS_Noun, PREV1TNEXT1T, MS_Unknown, MS_Prep, NULL, NULL },
  123. { MS_Verb, MS_Noun, PREV2TNEXT1T, MS_Unknown, MS_Verb, NULL, NULL },
  124. { MS_Noun, MS_Verb, PREV1T, MS_SubjPron, MS_Unknown, NULL, NULL },
  125. { MS_Adj, MS_Verb, PREV1TNEXT1T, MS_Prep, MS_Adv, NULL, NULL },
  126. { MS_Adv, MS_Adj, PREV1TNEXT1T, MS_Conj, MS_Noun, NULL, NULL },
  127. { MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Prep, MS_Adj, NULL, NULL },
  128. { MS_Adv, MS_Prep, NEXT1T, MS_Verb, MS_Unknown, NULL, NULL },
  129. { MS_Noun, MS_Adj, NEXT1T, MS_Adj, MS_Unknown, NULL, NULL },
  130. { MS_Adv, MS_Conj, NEXT1T, MS_Pron, MS_Unknown, NULL, NULL },
  131. { MS_Conj, MS_Adv, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  132. { MS_Noun, MS_Verb, PREV1T, MS_Adv, MS_Unknown, NULL, NULL },
  133. { MS_Verb, MS_Noun, PREV2TNEXT1T, MS_CConj, MS_Unknown, NULL, NULL },
  134. { MS_Noun, MS_Verb, PREV1T, MS_VAux, MS_Unknown, NULL, NULL },
  135. { MS_Noun, MS_Verb, PREV1T, MS_Pron, MS_Unknown, NULL, NULL },
  136. { MS_Adv, MS_Adj, PREV2TNEXT1T, MS_Prep, MS_Noun, NULL, NULL },
  137. { MS_Noun, MS_Adj, PREV1TNEXT1T, MS_Adv, MS_Noun, NULL, NULL },
  138. { MS_Conj, MS_Adv, PREV1TNEXT1T, MS_Adv, MS_Adj, NULL, NULL },
  139. //--- Inserted by hand - convert "A" from Det to Noun after Det...
  140. { MS_Det, MS_Noun, CURRWPREV1T, MS_Det, MS_Unknown, L"a", NULL },
  141. { MS_Verb, MS_Adj, PREV1TNEXT1T, MS_Det, MS_Noun, NULL, NULL },
  142. };
  143. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  144. AbbrevRecord g_AbbreviationTable[] =
  145. {
  146. { L"a", L"EY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  147. { L"amp", L"AE 1 M P", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  148. { L"app", L"AE 1 P", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  149. { L"apr", L"EY 1 - P R AX L", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  150. { L"apt", L"ax - p aa 1 r t - m ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  151. { L"assoc", L"ax - s ow 2 - s iy - ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  152. { L"asst", L"ax - s ih 1 s - t ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  153. { L"aug", L"ao 1 - g ax s t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  154. { L"ave", L"ae 1 v - ax - n uw 2", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  155. //--- Initial - never EOS
  156. { L"b", L"B IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  157. { L"bldg", L"b ih 1 l - d ih ng", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  158. { L"blvd", L"b uh 1 l - ax - v aa 2 r d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  159. //--- SingleOrPluralAbbreviation
  160. { L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  161. //--- Initial - never EOS
  162. { L"c", L"s eh 1 l - s iy - ax s", MS_Noun, L"s iy 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  163. //--- SingleOrPluralAbbreviation
  164. { L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  165. { L"cals", L"k ae 1 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  166. { L"ch", L"ch ae 1 p - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  167. //--- SingleOrPluralAbbreviation
  168. { L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  169. //--- SingleOrPluralAbbreviation
  170. { L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  171. { L"cms", L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  172. { L"cntr", L"s eh 1 n - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  173. //--- IfEOSNotAbbreviation
  174. { L"co", L"k ah 1 m - p ax - n iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  175. { L"cont", L"k ax n - t ih 1 n - y uw D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  176. { L"corp", L"k ao 2 r - p ax - r ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  177. //--- IfEOSNotAbbreviation
  178. { L"ct", L"k ao 1 r t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  179. { L"ctr", L"s eh 1 n - t er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  180. //--- IfEOSNotAbbreviation
  181. { L"cu", L"k y uw 1 - b ih k", MS_Noun, L"k y uw 1 - b ih k", MS_Noun, NULL, MS_Unknown, 1, 4 },
  182. //--- Initial - never EOS
  183. { L"d", L"D IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  184. //--- SingleOrPluralAbbreviation
  185. { L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  186. { L"dec", L"d ih - s eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  187. //--- SingleOrPluralAbbreviation
  188. { L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  189. { L"dept", L"d ih - p aa 1 r t - m ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  190. { L"dist", L"d ih 1 s - t r ax k t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  191. { L"doc", L"D AA 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  192. { L"doz", L"d ah 1 z - ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  193. //--- DoctorDriveAbbreviation
  194. { L"dr", L"d aa 1 k - t er", MS_Noun, L"D R AY 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
  195. //--- Initial - never EOS
  196. { L"e", L"IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  197. //--- IfEOSNotAbbreviation
  198. { L"ed", L"eh 2 jh - ax - k ey 1 - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  199. { L"esq", L"eh 1 s - k w ay 2 r", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  200. { L"est", L"ax - s t ae 1 b - l ax sh T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  201. { L"etc", L"EH T & s eh 1 t - er - ax", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  202. //--- IfEOSNotAbbreviation
  203. { L"ex", L"ih g - z ae 1 m - p ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  204. { L"ext", L"ih k - s t eh 1 n - sh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  205. //--- Initial - never EOS
  206. { L"f", L"f ae 1 r - ax n - h ay 2 t", MS_Noun, L"eh 1 f", MS_Noun, NULL, MS_Unknown, 0, 3 },
  207. { L"feb", L"f eh 1 b - r uw - eh 2 r - iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  208. //--- IfEOSNotAbbreviation
  209. { L"fig", L"f ih 1 g - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  210. //--- IfEOSNotAbbreviation
  211. { L"figs", L"f ih 1 g - y er Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  212. //--- IfEOSNotAbbreviation
  213. { L"fl", L"f l uw 1 - ih d", MS_Noun, L"f l uw 1 - ih d", MS_Noun, NULL, MS_Unknown, 1, 4 },
  214. { L"fn", L"f uh 1 t - n ow 2 t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  215. { L"freq", L"f r iy 1 - k w ax n - s iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  216. { L"fri", L"f r ay 1 - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  217. //--- SingleOrPluralAbbreviation
  218. { L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
  219. { L"fwd", L"f ao 1 r - w er d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  220. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  221. { L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 1, 0 },
  222. //--- SingleOrPluralAbbreviation
  223. { L"gal", L"g ae 1 l - ax n", MS_Noun, L"g ae 1 l - ax n Z", MS_Noun, L"G AE 1 L", MS_Noun, 0, 0 },
  224. //--- TITLE - never EOS
  225. { L"gen", L"jh eh 1 n - er - ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  226. //--- IfEOSNotAbbreviation
  227. { L"gov", L"g ah 1 v - er - n ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  228. //--- Initial - never EOS
  229. { L"h", L"EY 1 CH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  230. //--- IfEOSNotAbbreviation
  231. { L"hr", L"AW 1 ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  232. { L"hrs", L"AW 1 ER Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  233. { L"ht", L"H AY 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  234. { L"hwy", L"h ay 1 w ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  235. { L"hz", L"H ER 1 T S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  236. //--- Initial - never EOS
  237. { L"i", L"AY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  238. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  239. { L"in", L"IH 1 N CH", MS_Noun, L"IH 1 N CH AX Z", MS_Noun, L"IH 1 N", MS_Prep, 1, 0 },
  240. { L"inc", L"ih n - k ao 1 r - p ax - r ey 2 - t AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  241. { L"incl", L"ih n - k l uw 1 - d AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  242. { L"intl", L"ih 2 n - t er - n ae 1 sh - ax - n ax l", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  243. //--- IfEOSAndLowercaseNotAbbreviation
  244. { L"is", L"ay 1 - l ax n d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
  245. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  246. { L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 1, 0 },
  247. //--- IfEOSNotAbbreviation
  248. { L"jan", L"jh ae 1 n - y uw - eh 2 r - iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  249. { L"jr", L"jh uw 1 n - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  250. { L"jul", L"jh uh - l ay 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  251. { L"jun", L"JH UW 1 N", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  252. //--- Initial - never EOS
  253. { L"k", L"k eh 1 l - v ax n", MS_Noun, L"k ey 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  254. //--- SingleOrPluralAbbreviation
  255. { L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  256. //--- SingleOrPluralAbbreviation
  257. { L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  258. //--- SingleOrPluralAbbreviation
  259. { L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  260. { L"kgs", L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  261. { L"khz", L"k ih 1 l - ax - h er 2 t s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  262. //--- SingleOrPluralAbbreviation
  263. { L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  264. //--- SingleOrPluralAbbreviation
  265. { L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  266. //--- SingleOrPluralAbbreviation
  267. { L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  268. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  269. { L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 1, 0 },
  270. //--- SingleOrPluralAbbreviation
  271. { L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  272. { L"lbs", L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  273. { L"lg", L"L AA 1 R JH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  274. { L"ln", L"l ey 1 n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  275. //--- Title - never EOS
  276. { L"lt", L"l uw - t eh 1 n - ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  277. { L"ltd", L"l ih 1 m - ih - t AX D", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  278. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  279. { L"m", L"M IY 1 T ER", MS_Noun, L"M IY 1 T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 1, 0 },
  280. //--- IfEOSNotAbbreviation???
  281. { L"mar", L"M AA 1 R CH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  282. //--- SingleOrPluralAbbreviation
  283. { L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  284. { L"mfg", L"m ae 2 n - y ax - f ae 1 k - ch er - IH NG", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  285. //--- SingleOrPluralAbbreviation
  286. { L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  287. { L"mgr", L"m ae 1 n - ih - jh ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  288. { L"mgs", L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  289. { L"mhz", L"m eh 1 g - ax - h er 2 t s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  290. //--- SingleOrPluralAbbreviation
  291. { L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  292. { L"mic", L"M AY 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  293. //--- SingleOrPluralAbbreviation
  294. { L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
  295. { L"misc", L"m ih 2 s - ax - l ey 1 - n iy - ax s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  296. //--- SingleOrPluralAbbreviation
  297. { L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  298. //--- SingleOrPluralAbbreviation
  299. { L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  300. { L"mon", L"m ah 1 n - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  301. //--- TITLE - never EOS
  302. { L"mr", L"M IH 1 S - T ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  303. //--- TITLE - never EOS
  304. { L"mrs", L"M IH 1 S - AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  305. //--- IfEOSNotAbbreviation
  306. { L"ms", L"M IH 1 Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  307. //--- SingleOrPluralAbbreviation
  308. { L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  309. { L"msecs", L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  310. { L"mt", L"M AW 1 N T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  311. { L"mtn", L"m aw 1 n - t ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  312. //--- Initial - never EOS
  313. { L"n", L"EH 1 N", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  314. //--- IfEOSNotAbbreviation
  315. { L"no", L"N UH 1 M - B ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  316. { L"nov", L"n ow - v eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  317. //--- Initial - never EOS
  318. { L"o", L"OW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  319. { L"oct", L"aa k - t ow 1 - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  320. //--- IfEOSNotAbbreviation
  321. { L"op", L"OW 1 - P AX S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  322. //--- SingleOrPluralAbbreviation
  323. { L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N - S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
  324. { L"ozs", L"AW 1 N - S AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  325. //--- AbbreviationFollowedByDigit
  326. { L"p", L"P EY 1 JH", MS_Noun, L"P IY 1", MS_Noun, NULL, MS_Unknown, 0, 2 },
  327. //--- IfEOSNotAbbreviation
  328. { L"pg", L"P EY 1 JH", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  329. { L"pgs", L"P EY 1 - JH AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  330. { L"pkg", L"p ae 1 k - ih jh", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  331. { L"pkwy", L"p aa 1 r k - w ey 2", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  332. { L"pl", L"P L EY 1 S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  333. { L"pp", L"P EY 1 - JH AX Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  334. //--- TITLE - never EOS
  335. { L"pres", L"p r eh 1 z - ax - d ax n t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  336. //--- TITLE - never EOS
  337. { L"prof", L"p r ax - f eh 1 - s ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  338. { L"pt", L"P OY 1 N T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  339. //--- Initial - never EOS
  340. { L"q", L"K Y UW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  341. //--- SingleOrPluralAbbreviation
  342. { L"qt", L"k w ao 1 r t", MS_Noun, L"k w ao 1 r t Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  343. //--- Initial - never EOS
  344. { L"r", L"AA 1 R", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  345. { L"rd", L"r ow 1 d", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  346. { L"rec", L"R EH 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  347. { L"rep", L"r eh 2 p - r ih - z eh 1 n - t ax - t ih v", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  348. { L"rt", L"R UW 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  349. { L"rte", L"R UW 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  350. //--- Initial - never EOS
  351. { L"s", L"EH 1 S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  352. //--- IfEOSAndLowercaseNotAbbreviation
  353. { L"sat", L"s ae 1 t - er - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
  354. //--- SingleOrPluralAbbreviation, IfEOSNotAbbreviation
  355. { L"sec", L"s eh 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d z", MS_Noun, NULL, MS_Unknown, 1, 0 },
  356. //--- TITLE - never EOS
  357. { L"sen", L"s eh 1 n - ax - t ER", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  358. { L"sep", L"s eh p - t eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  359. { L"sept", L"s eh p - t eh 1 m - b er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  360. { L"sm", L"S M AO 1 L", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  361. { L"sq", L"S K W EH 1 R", MS_Noun, L"S K W EH 1 R", MS_Noun, NULL, MS_Unknown, 0, 4 },
  362. //--- IfEOSNotAbbreviation
  363. { L"sr", L"s iy 1 n - y er", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  364. //--- DoctorDriveAbbreviation
  365. { L"st", L"S EY 1 N T", MS_Noun, L"S T R IY 1 T", MS_Noun, NULL, MS_Unknown, 0, 1 },
  366. { L"ste", L"s w iy 1 t", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  367. //--- IfEOSNotAbbreviation
  368. { L"sun", L"s ah 1 n - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 1, -1 },
  369. //--- Initial - never EOS
  370. { L"t", L"T IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  371. //--- SingleOrPluralAbbreviation
  372. { L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  373. { L"tech", L"T EH 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  374. { L"thu", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  375. { L"thur", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  376. { L"thurs", L"th er 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  377. //--- SingleOrPluralAbbreviation
  378. { L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  379. { L"tue", L"t uw 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  380. { L"tues", L"t uw 1 z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  381. //--- Initial - never EOS
  382. { L"u", L"Y UW 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  383. { L"univ", L"y uw 2 - n ax - v er 1 - s ih - t iy", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  384. //--- Initial - never EOS
  385. { L"v", L"V IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  386. { L"ver", L"v er 1 - zh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  387. { L"vers", L"v er 1 - zh ax n", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  388. { L"vol", L"v aa 1 l - y uw m", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  389. { L"vs", L"v er 1 - s ax s", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  390. //--- Initial - never EOS
  391. { L"w", L"d ah 1 b - ax l - y uw", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  392. //--- IfEOSAndLowercaseNotAbbreviation
  393. { L"wed", L"w eh 1 n z - d ey", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 2, -1 },
  394. { L"wk", L"W IY 1 K", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  395. { L"wt", L"W EY 1 T", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  396. { L"wy", L"W EY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  397. //--- Initial - never EOS
  398. { L"x", L"EH 1 K S", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  399. //--- Initial - never EOS
  400. { L"y", L"W AY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  401. //--- SingleOrPluralAbbreviation
  402. { L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  403. { L"yds", L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, 0, -1 },
  404. //--- SingleOrPluralAbbreviation
  405. { L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  406. //--- Initial - never EOS
  407. { L"z", L"Z IY 1", MS_Noun, NULL, MS_Unknown, NULL, MS_Unknown, -1, -1 },
  408. };
  409. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  410. AbbrevRecord g_AmbiguousWordTable[] =
  411. {
  412. //--- ADisambig
  413. { L"a", L"EY 1", MS_Noun, L"AX", MS_Det, NULL, MS_Unknown, 0, 8 },
  414. //--- AllCapsAbbreviation
  415. { L"al", L"EY 1 & EH 1 L", MS_Noun, L"AE 1 L", MS_Noun, NULL, MS_Unknown, 0, 3 },
  416. //--- AllCapsAbbreviation
  417. { L"apr", L"ey 1 & p iy 1 & aa 1 r", MS_Noun, L"ey 1 - p r ax l", MS_Noun, NULL, MS_Unknown, 0, 3 },
  418. //--- AllCapsAbbreviation
  419. { L"as", L"EY 1 & EH 1 S", MS_Noun, L"AE 1 Z", MS_Conj, NULL, MS_Unknown, 0, 3 },
  420. //--- SingleOrPluralAbbreviation
  421. { L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  422. //--- DegreeAbbreviation
  423. { L"c", L"s eh 1 l - s iy - ax s", MS_Noun, L"s iy 1", MS_Noun, NULL, MS_Unknown, 0, 6 },
  424. //--- SingleOrPluralAbbreviation
  425. { L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  426. //--- SingleOrPluralAbbreviation
  427. { L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  428. //--- SingleOrPluralAbbreviation
  429. { L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  430. //--- AllCapsAbbreviation
  431. { L"co", L"S IY 1 & OW 1", MS_Noun, L"k ah 1 m - p ax - n iy", MS_Noun, NULL, MS_Unknown, 0, 3 },
  432. //--- AllCapsAbbreviation
  433. { L"ct", L"s iy 1 & t iy 1", MS_Noun, L"k ao 1 r t", MS_Noun, NULL, MS_Unknown, 0, 3 },
  434. //--- MeasurementModifier
  435. { L"cu", L"S IY 1 & Y UW 1", MS_Noun, L"k y uw 1 - b ih k", MS_Noun, NULL, MS_Unknown, 0, 7 },
  436. //--- SingleOrPluralAbbreviation
  437. { L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  438. //--- SingleOrPluralAbbreviation
  439. { L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  440. //--- DoctorDriveAbbreviation
  441. { L"dr", L"d aa 1 k - t er", MS_Noun, L"D R AY 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
  442. //--- DegreeAbbreviation
  443. { L"f", L"f ae 1 r - ax n - h ay 2 t", MS_Noun, L"eh 1 f", MS_Noun, NULL, MS_Unknown, 0, 6 },
  444. //--- AbbreviationFollowedByDigit
  445. { L"fig", L"f ih 1 g - y er", MS_Noun, L"F IH 1 G", MS_Noun, NULL, MS_Unknown, 0, 2 },
  446. //--- AbbreviationFollowedByDigit
  447. { L"figs", L"f ih 1 g - y er Z", MS_Noun, L"F IH 1 G Z", MS_Noun, NULL, MS_Unknown, 0, 2 },
  448. //--- MeasurementModifier
  449. { L"fl", L"eh 1 f & eh 1 l", MS_Noun, L"f l uw 1 - ih d", MS_Noun, NULL, MS_Unknown, 0, 7 },
  450. //--- SingleOrPluralAbbreviation
  451. { L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
  452. //--- G, Gram, Grams
  453. { L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 0, 5 },
  454. //--- DoctorDriveAbbreviation
  455. { L"gov", L"g ah 1 v - er - n ER", MS_Noun, L"G AH 1 V", MS_Noun, NULL, MS_Unknown, 0, 1 },
  456. //--- AllCapsAbbreviation
  457. { L"hi", L"EY 1 CH & AY 1", MS_Noun, L"H AY 1", MS_Interjection, NULL, MS_Unknown, 0, 3 },
  458. //--- HR hour hours
  459. { L"hr", L"AW 1 ER", MS_Noun, L"AW 1 ER Z", MS_Noun, L"EY 1 CH AA 1 R", MS_Noun, 0, 5 },
  460. //--- J, Joule, Joules
  461. { L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 0, 5 },
  462. //--- DegreeAbbreviation
  463. { L"k", L"k eh 1 l - v ax n", MS_Noun, L"k ey 1", MS_Noun, NULL, MS_Unknown, 0, 6 },
  464. //--- SingleOrPluralAbbreviation
  465. { L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  466. //--- SingleOrPluralAbbreviation
  467. { L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  468. //--- SingleOrPluralAbbreviation
  469. { L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  470. //--- SingleOrPluralAbbreviation
  471. { L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  472. //--- SingleOrPluralAbbreviation
  473. { L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  474. //--- SingleOrPluralAbbreviation
  475. { L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  476. //--- L, Liter, Liters
  477. { L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 0, 5 },
  478. //--- AllCapsAbbreviation
  479. { L"la", L"EH 1 L & EY 1", MS_Noun, L"L AH 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  480. //--- SingleOrPluralAbbreviation
  481. { L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  482. //--- AllCapsAbbreviation
  483. { L"lts", L"eh 1 l & t iy 1 & eh 1 s", MS_Noun, L"l uw t eh 1 n ax n t s", MS_Noun, NULL, MS_Unknown, 0, 3 },
  484. //--- M, Meter, Meters
  485. { L"m", L"M IY 1 - T ER", MS_Noun, L"M IY 1 - T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 0, 5 },
  486. //--- AllCapsAbbreviation
  487. { L"ma", L"EH 1 M & AA 1", MS_Noun, L"M AA 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  488. //--- March Mar
  489. { L"mar", L"M AA 1 R CH", MS_Noun, L"M AA 1 R", MS_Verb, NULL, MS_Unknown, 0, 4 },
  490. //--- SingleOrPluralAbbreviation
  491. { L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  492. //--- AllCapsAbbreviation
  493. { L"me", L"EH 1 M & IY 1", MS_Noun, L"M IY 1", MS_ObjPron, NULL, MS_Unknown, 0, 3 },
  494. //--- SingleOrPluralAbbreviation
  495. { L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  496. //--- SingleOrPluralAbbreviation
  497. { L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  498. //--- SingleOrPluralAbbreviation
  499. { L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
  500. //--- SingleOrPluralAbbreviation
  501. { L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  502. //--- SingleOrPluralAbbreviation
  503. { L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  504. //--- SingleOrPluralAbbreviation
  505. { L"mpg", L"m ay 1 l & p er 1 & g ae 1 l ax n", MS_Noun, L"m ay 1 l z & p er 1 & g ae 1 l ax n", MS_Noun, L"eh 1 m & p iy 1 & jh iy 1", MS_Noun, 0, 0 },
  506. //--- SingleOrPluralAbbreviation
  507. { L"mph", L"m ay 1 l & p er 1 & aw 1 er", MS_Noun, L"m ay 1 l z & p er 1 & aw 1 er", MS_Noun, L"eh 1 m & p iy 1 & ey 1 ch", MS_Noun, 0, 0 },
  508. //--- MS millisecond milliseconds
  509. { L"ms", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, L"eh 1 m & eh 1 s", MS_Noun, 0, 5 },
  510. //--- SingleOrPluralAbbreviation
  511. { L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  512. //--- AllCapsAbbreviation
  513. { L"mt", L"EH 1 M & T IY 1", MS_Noun, L"M AW 1 N T", MS_Noun, NULL, MS_Unknown, 0, 3 },
  514. //--- AllCapsAbbreviation
  515. { L"oh", L"OW 1 & EY 1 CH", MS_Noun, L"OW 1", MS_Interjection, NULL, MS_Unknown, 0, 3 },
  516. //--- AllCapsAbbreviation
  517. { L"or", L"OW 1 & AA 1 R", MS_Noun, L"AO 1 R", MS_CConj, NULL, MS_Unknown, 0, 3 },
  518. //--- SingleOrPluralAbbreviation
  519. { L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
  520. //--- AbbreviationFollowedByDigit
  521. { L"p", L"P EY 1 JH", MS_Noun, L"P IY 1", MS_Noun, NULL, MS_Unknown, 0, 2 },
  522. //--- AllCapsAbbreviation
  523. { L"pa", L"P IY 1 & EY 1", MS_Noun, L"P AA 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  524. //--- AllCapsAbbreviation
  525. { L"pg", L"P IY 1 & JH IY 1", MS_Noun, L"P EY 1 JH", MS_Noun, NULL, MS_Unknown, 0, 3 },
  526. //--- AllCapsAbbreviation
  527. { L"po", L"p iy 1 & ow 1", MS_Noun, L"p ow 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  528. //--- PolishDisambig
  529. { L"polish", L"p ow 1 l - ax sh", MS_Adj, L"p aa 1 l - ih sh", MS_Verb, L"p aa 1 l - ih sh", MS_Noun, 0, 9 },
  530. //--- SingleOrPluralAbbreviation
  531. { L"qt", L"K W AO 1 R T", MS_Noun, L"K W AO 1 R T Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  532. //--- Saturday Sat
  533. { L"sat", L"s ae 1 t - er - d ey", MS_Noun, L"S AE 1 T", MS_Verb, NULL, MS_Unknown, 0, 4 },
  534. //--- SEC Second Seconds
  535. { L"sec", L"s eh 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d Z", MS_Noun, L"EH 1 S & IY 1 & S IY 1", MS_Noun, 0, 5 },
  536. //--- MeasurementModifier
  537. { L"sq", L"S K W EH 1 R", MS_Noun, L"S K W EH 1 R", MS_Noun, NULL, MS_Unknown, 0, 7 },
  538. //--- AllCapsAbbreviation
  539. { L"sr", L"EH 1 S & AA 1 R", MS_Noun, L"s iy 1 n - y er", MS_Noun, NULL, MS_Unknown, 0, 3 },
  540. //--- DoctorDriveAbbreviation
  541. { L"st", L"S EY 1 N T", MS_Noun, L"S T R IY 1 T", MS_Noun, NULL, MS_Unknown, 0, 1 },
  542. //--- SingleOrPluralAbbreviation
  543. { L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  544. //--- SingleOrPluralAbbreviation
  545. { L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  546. //--- AllCapsAbbreviation
  547. { L"us", L"Y UW 1 & EH 1 S", MS_Noun, L"AH 1 S", MS_ObjPron, NULL, MS_Unknown, 0, 3 },
  548. //--- Wednesday Wed
  549. { L"wed", L"w eh 1 n z - d ey", MS_Noun, L"W EH 1 D", MS_Verb, NULL, MS_Unknown, 0, 4 },
  550. //--- AllCapsAbbreviation
  551. { L"wy", L"d ah 1 b - ax l - y uw & W AY 1", MS_Noun, L"W EY 1", MS_Noun, NULL, MS_Unknown, 0, 3 },
  552. //--- SingleOrPluralAbbreviation
  553. { L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  554. //--- SingleOrPluralAbbreviation
  555. { L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  556. };
  557. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  558. AbbrevRecord g_PostLexLookupWordTable[] =
  559. {
  560. //--- MeasurementDisambig
  561. { L"bu", L"b uh 1 sh - ax l", MS_Noun, L"b uh 1 sh - ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  562. //--- MeasurementDisambig
  563. { L"cal", L"k ae 1 l - ax - r iy", MS_Noun, L"k ae 1 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  564. //--- MeasurementDisambig
  565. { L"cl", L"s eh 1 n - t ax - l iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  566. //--- MeasurementDisambig
  567. { L"cm", L"s eh 1 n - t ax - m iy 2 - t er", MS_Noun, L"s eh 1 n - t ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  568. //--- MeasurementDisambig
  569. { L"db", L"d eh 1 s - ax - b ax l", MS_Noun, L"d eh 1 s - ax - b ax l Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  570. //--- MeasurementDisambig
  571. { L"deg", L"d ih - g r iy 1", MS_Noun, L"d ih - g r iy 1 z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  572. //--- MeasurementDisambig
  573. { L"ft", L"F UH 1 T", MS_Noun, L"F IY 1 T", MS_Noun, L"F AO 1 R T", MS_Noun, 0, 0 },
  574. //--- MeasurementDisambig
  575. { L"g", L"G R AE 1 M", MS_Noun, L"G R AE 1 M Z", MS_Noun, L"JH IY 1", MS_Noun, 0, 0 },
  576. //--- MeasurementDisambig
  577. { L"gal", L"g ae 1 l - ax n", MS_Noun, L"g ae 1 l - ax n Z", MS_Noun, L"G AE 1 L", MS_Noun, 0, 0 },
  578. //--- MeasurementDisambig
  579. { L"hr", L"AW 1 ER", MS_Noun, L"AW 1 ER Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  580. //--- MeasurementDisambig
  581. { L"in", L"IH 1 N CH", MS_Noun, L"IH 1 N CH AX Z", MS_Noun, L"IH 1 N", MS_Prep, 0, 0 },
  582. //--- MeasurementDisambig
  583. { L"j", L"JH UW 1 L", MS_Noun, L"JH UW 1 L Z", MS_Noun, L"JH EY 1", MS_Noun, 0, 0 },
  584. //--- MeasurementDisambig
  585. { L"kb", L"k ih 1 l - ax - b ay 2 t", MS_Noun, L"k ih 1 l - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  586. //--- MeasurementDisambig
  587. { L"kcal", L"k ih 1 l - ax - k ae 2 l - ax - r iy", MS_Noun, L"k ih 1 l - ax - k ae 2 l - ax - r iy Z", MS_Noun, L"K AE 1 L", MS_Noun, 0, 0 },
  588. //--- MeasurementDisambig
  589. { L"kg", L"k ih 1 l - ax - g r ae 2 m", MS_Noun, L"k ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  590. //--- MeasurementDisambig
  591. { L"kj", L"k ih 1 l - ax - jh uw 2 l", MS_Noun, L"k ih 1 l - ax - jh uw 2 l z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  592. //--- MeasurementDisambig
  593. { L"km", L"k ih - l aa 1 m - ih - t er", MS_Noun, L"k ih - l aa 1 m - ih - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  594. //--- MeasurementDisambig
  595. { L"kw", L"k ih 1 l - ax - w aa 2 t", MS_Noun, L"k ih 1 l - ax - w aa 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  596. //--- MeasurementDisambig
  597. { L"l", L"l iy 1 - t er", MS_Noun, L"l iy 1 - t er Z", MS_Noun, L"EH 1 L", MS_Noun, 0, 0 },
  598. //--- MeasurementDisambig
  599. { L"lb", L"P AW 1 N D", MS_Noun, L"P AW 1 N D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  600. //--- MeasurementDisambig
  601. { L"m", L"M IY 1 - T ER", MS_Noun, L"M IY 1 - T ER Z", MS_Noun, L"EH 1 M", MS_Noun, 0, 0 },
  602. //--- MeasurementDisambig
  603. { L"mb", L"m eh 1 g - ax - b ay 2 t", MS_Noun, L"m eh 1 g - ax - b ay 2 t S", MS_Noun, NULL, MS_Unknown, 0, 0 },
  604. //--- MeasurementDisambig
  605. { L"mg", L"m ih 1 l - ax - g r ae 2 m", MS_Noun, L"m ih 1 l - ax - g r ae 2 m Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  606. //--- MeasurementDisambig
  607. { L"mi", L"M AY 1 L", MS_Noun, L"M AY 1 L Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  608. //--- MeasurementDisambig
  609. { L"min", L"m ih 1 n - ax t", MS_Noun, L"m ih 1 n - ax t S", MS_Noun, L"m ih 1 n - ax - m ax m", MS_Noun, 0, 0 },
  610. //--- ReadDisambig
  611. { L"misread", L"m ih s - r iy 1 d", MS_Verb, L"m ih s - r eh 1 d", MS_Verb, NULL, MS_Unknown, 0, 2 },
  612. //--- MeasurementDisambig
  613. { L"ml", L"m ih 1 l - ax - l iy 2 - t er", MS_Noun, L"m ih 1 l - ax - l iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  614. //--- MeasurementDisambig
  615. { L"mm", L"m ih 1 l - ax - m iy 2 - t er", MS_Noun, L"m ih 1 l - ax - m iy 2 - t er Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  616. //--- MeasurementDisambig
  617. { L"mpg", L"m ay 1 l & p er 1 & g ae 1 l ax n", MS_Noun, L"m ay 1 l z & p er 1 & g ae 1 l ax n", MS_Noun, L"eh 1 m & p iy 1 & jh iy 1", MS_Noun, 0, 0 },
  618. //--- MeasurementDisambig
  619. { L"mph", L"m ay 1 l & p er 1 & aw 1 er", MS_Noun, L"m ay 1 l z & p er 1 & aw 1 er", MS_Noun, L"eh 1 m & p iy 1 & ey 1 ch", MS_Noun, 0, 0 },
  620. //--- MeasurementDisambig
  621. { L"msec", L"m ih 2 l - ax - s eh 1 k - ax n d", MS_Noun, L"m ih 2 l - ax - s eh 1 k - ax n d Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  622. //--- MeasurementDisambig
  623. { L"oz", L"AW 1 N S", MS_Noun, L"AW 1 N S AX Z", MS_Noun, L"AA 1 Z", MS_Noun, 0, 0 },
  624. //--- ReadDisambig
  625. { L"proofread", L"p r uw 1 f - r iy 2 d", MS_Verb, L"p r uw 1 f - r eh 2 d", MS_Verb, NULL, MS_Unknown, 0, 2 },
  626. //--- MeasurementDisambig
  627. { L"qt", L"K W AO 1 R T", MS_Noun, L"K W AO 1 R T Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  628. //--- ReadDisambig
  629. { L"read", L"R IY 1 D", MS_Verb, L"R EH 1 D", MS_Verb, NULL, MS_Unknown, 0, 2 },
  630. //--- MeasurementDisambig
  631. { L"sec", L"S EH 1 k - ax n d", MS_Noun, L"s eh 1 k - ax n d z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  632. //--- MeasurementDisambig
  633. { L"tbsp", L"t ey 1 - b ax l - s p uw 2 n", MS_Noun, L"t ey 1 - b ax l - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  634. //--- TheDisambig
  635. { L"the", L"DH IY 2", MS_Det, L"DH AX 2", MS_Det, NULL, MS_Unknown, 0, 1 },
  636. //--- MeasurementDisambig
  637. { L"tsp", L"t iy 1 - s p uw 2 n", MS_Noun, L"t iy 1 - s p uw 2 n Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  638. //--- MeasurementDisambig
  639. { L"yd", L"Y AA 1 R D", MS_Noun, L"Y AA 1 R D Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  640. //--- MeasurementDisambig
  641. { L"yr", L"Y IY 1 R", MS_Noun, L"Y IY 1 R Z", MS_Noun, NULL, MS_Unknown, 0, 0 },
  642. };
  643. WCHAR *g_pOfA = L"ah 2 v & ax 2 &";
  644. WCHAR *g_pOfAn = L"ah 2 v & ax 2 n &";
  645. const SentBreakDisambigFunc g_SentBreakDisambigTable[] =
  646. {
  647. CStdSentEnum::IsAbbreviationEOS,
  648. CStdSentEnum::IfEOSNotAbbreviation,
  649. CStdSentEnum::IfEOSAndLowercaseNotAbbreviation,
  650. };
  651. const PronDisambigFunc g_PronDisambigTable[] =
  652. {
  653. CStdSentEnum::SingleOrPluralAbbreviation,
  654. CStdSentEnum::DoctorDriveAbbreviation,
  655. CStdSentEnum::AbbreviationFollowedByDigit,
  656. CStdSentEnum::DegreeAbbreviation,
  657. CStdSentEnum::AbbreviationModifier,
  658. };
  659. const PronDisambigFunc g_AmbiguousWordDisambigTable[] =
  660. {
  661. CStdSentEnum::SingleOrPluralAbbreviation,
  662. CStdSentEnum::DoctorDriveAbbreviation,
  663. CStdSentEnum::AbbreviationFollowedByDigit,
  664. CStdSentEnum::AllCapsAbbreviation,
  665. CStdSentEnum::CapitalizedAbbreviation,
  666. CStdSentEnum::SECAbbreviation,
  667. CStdSentEnum::DegreeAbbreviation,
  668. CStdSentEnum::AbbreviationModifier,
  669. CStdSentEnum::ADisambig,
  670. CStdSentEnum::PolishDisambig,
  671. };
  672. const PostLexLookupDisambigFunc g_PostLexLookupDisambigTable[] =
  673. {
  674. CStdSentEnum::MeasurementDisambig,
  675. CStdSentEnum::TheDisambig,
  676. CStdSentEnum::ReadDisambig,
  677. };
  678. //--- IMPORTANT!!! This list must remain alphabetized for the binary search algorithm to work properly!!!
  679. // This is an alphabetized list of all non-proper-noun words which
  680. // appear within the list of the 200 most frequent first words
  681. // in sentences in both the Brown and WSJ corpora.
  682. const SPLSTR g_FirstWords[] =
  683. {
  684. DEF_SPLSTR( "A" ),
  685. DEF_SPLSTR( "About" ),
  686. DEF_SPLSTR( "According" ),
  687. DEF_SPLSTR( "After" ),
  688. DEF_SPLSTR( "Again" ), // PaulCa added 4/14/99 (Bug 107)
  689. DEF_SPLSTR( "All" ),
  690. DEF_SPLSTR( "Also" ),
  691. DEF_SPLSTR( "Although" ),
  692. DEF_SPLSTR( "Among" ),
  693. DEF_SPLSTR( "An" ),
  694. DEF_SPLSTR( "And" ),
  695. DEF_SPLSTR( "Another" ),
  696. DEF_SPLSTR( "Any" ), // PaulCa added 4/14/99 (Bug 107)
  697. DEF_SPLSTR( "Anyway" ), // PaulCa added 4/14/99 (Bug 107)
  698. DEF_SPLSTR( "Are" ), // PaulCa added 4/14/99 (Bug 107)
  699. DEF_SPLSTR( "As" ),
  700. DEF_SPLSTR( "At" ),
  701. DEF_SPLSTR( "Back" ), // PaulCa added 4/14/99 (Bug 107)
  702. DEF_SPLSTR( "Because" ),
  703. DEF_SPLSTR( "Before" ),
  704. DEF_SPLSTR( "Besides" ), // PaulCa added 4/14/99 (Bug 107)
  705. DEF_SPLSTR( "Both" ),
  706. DEF_SPLSTR( "But" ),
  707. DEF_SPLSTR( "By" ),
  708. DEF_SPLSTR( "Can" ), // PaulCa added 4/14/99 (Bug 107)
  709. DEF_SPLSTR( "Consequently" ), // PaulCa added 4/14/99 (Bug 107)
  710. DEF_SPLSTR( "Dear" ), // PaulCa added 4/14/99 (Bug 107)
  711. DEF_SPLSTR( "Despite" ),
  712. DEF_SPLSTR( "Did" ), // PaulCa added 4/14/99 (Bug 107)
  713. DEF_SPLSTR( "Do" ), // PaulCa added 4/14/99 (Bug 107)
  714. DEF_SPLSTR( "Does" ), // PaulCa added 4/14/99 (Bug 107)
  715. DEF_SPLSTR( "Don't" ), // PaulCa added 4/14/99 (Bug 107)
  716. DEF_SPLSTR( "During" ),
  717. DEF_SPLSTR( "Each" ),
  718. DEF_SPLSTR( "Early" ), // PaulCa added 4/14/99 (Bug 107)
  719. DEF_SPLSTR( "Even" ),
  720. DEF_SPLSTR( "Every" ), // PaulCa added 4/14/99 (Bug 107)
  721. DEF_SPLSTR( "Finally" ), // PaulCa added 4/14/99 (Bug 107)
  722. DEF_SPLSTR( "First" ),
  723. DEF_SPLSTR( "Following" ), // PaulCa added 4/14/99 (Bug 107)
  724. DEF_SPLSTR( "For" ),
  725. DEF_SPLSTR( "Four" ),
  726. DEF_SPLSTR( "From" ),
  727. DEF_SPLSTR( "Further" ), // PaulCa added 4/14/99 (Bug 107)
  728. DEF_SPLSTR( "Furthermore" ), // PaulCa added 4/14/99 (Bug 107)
  729. DEF_SPLSTR( "Generally" ), // PaulCa added 4/14/99 (Bug 107)
  730. DEF_SPLSTR( "Given" ), // PaulCa added 4/14/99 (Bug 107)
  731. DEF_SPLSTR( "Go" ), // PaulCa added 4/14/99 (Bug 107)
  732. DEF_SPLSTR( "Great" ), // PaulCa added 4/14/99 (Bug 107)
  733. DEF_SPLSTR( "Had" ), // PaulCa added 4/14/99 (Bug 107)
  734. DEF_SPLSTR( "Have" ), // PaulCa added 4/14/99 (Bug 107)
  735. DEF_SPLSTR( "Having" ), // PaulCa added 4/14/99 (Bug 107)
  736. DEF_SPLSTR( "He" ),
  737. DEF_SPLSTR( "Her" ),
  738. DEF_SPLSTR( "Here" ),
  739. DEF_SPLSTR( "His" ),
  740. DEF_SPLSTR( "How" ), // PaulCa added 4/14/99 (Bug 107)
  741. DEF_SPLSTR( "However" ),
  742. DEF_SPLSTR( "I" ),
  743. DEF_SPLSTR( "If" ),
  744. DEF_SPLSTR( "In" ),
  745. DEF_SPLSTR( "Indeed" ),
  746. DEF_SPLSTR( "Initially" ), // PaulCa added 4/14/99 (Bug 107)
  747. DEF_SPLSTR( "Instead" ),
  748. DEF_SPLSTR( "Is" ), // PaulCa added 4/14/99 (Bug 107)
  749. DEF_SPLSTR( "It" ),
  750. DEF_SPLSTR( "Its" ),
  751. DEF_SPLSTR( "Just" ), // PaulCa added 4/14/99 (Bug 107)
  752. DEF_SPLSTR( "Last" ),
  753. DEF_SPLSTR( "Later" ),
  754. DEF_SPLSTR( "Let" ), // PaulCa added 4/14/99 (Bug 107)
  755. DEF_SPLSTR( "Like" ),
  756. DEF_SPLSTR( "Many" ),
  757. DEF_SPLSTR( "Maybe" ), // PaulCa added 4/14/99 (Bug 107)
  758. DEF_SPLSTR( "Meanwhile" ),
  759. DEF_SPLSTR( "More" ),
  760. DEF_SPLSTR( "Moreover" ),
  761. DEF_SPLSTR( "Most" ),
  762. DEF_SPLSTR( "Much" ),
  763. DEF_SPLSTR( "My" ), // Added to fix bug #385
  764. DEF_SPLSTR( "Neither" ),
  765. DEF_SPLSTR( "Never" ), // PaulCa added 4/14/99 (Bug 107)
  766. DEF_SPLSTR( "Nevertheless" ),
  767. DEF_SPLSTR( "New" ),
  768. DEF_SPLSTR( "Next" ), // PaulCa added 4/14/99 (Bug 107)
  769. DEF_SPLSTR( "No" ),
  770. DEF_SPLSTR( "None" ), // PaulCa added 4/14/99 (Bug 107)
  771. DEF_SPLSTR( "Nonetheless" ), // PaulCa added 4/14/99 (Bug 107)
  772. DEF_SPLSTR( "Nor" ),
  773. DEF_SPLSTR( "Not" ),
  774. DEF_SPLSTR( "Nothing" ), // PaulCa added 4/14/99 (Bug 107)
  775. DEF_SPLSTR( "Now" ),
  776. DEF_SPLSTR( "Of" ),
  777. DEF_SPLSTR( "On" ),
  778. DEF_SPLSTR( "Once" ),
  779. DEF_SPLSTR( "One" ),
  780. DEF_SPLSTR( "Only" ),
  781. DEF_SPLSTR( "Or" ),
  782. DEF_SPLSTR( "Other" ),
  783. DEF_SPLSTR( "Others" ),
  784. DEF_SPLSTR( "Our" ), // PaulCa added 4/14/99 (Bug 107)
  785. DEF_SPLSTR( "Over" ),
  786. DEF_SPLSTR( "People" ),
  787. DEF_SPLSTR( "Perhaps" ), // PaulCa added 4/14/99 (Bug 107)
  788. DEF_SPLSTR( "Please" ), // PaulCa added 4/14/99 (Bug 107)
  789. DEF_SPLSTR( "Previous" ), // PaulCa added 4/14/99 (Bug 107)
  790. DEF_SPLSTR( "Recent" ), // PaulCa added 4/14/99 (Bug 107)
  791. DEF_SPLSTR( "Right" ), // PaulCa added 4/14/99 (Bug 107)
  792. DEF_SPLSTR( "Second" ), // PaulCa added 4/14/99 (Bug 107)
  793. DEF_SPLSTR( "See" ), // PaulCa added 4/14/99 (Bug 107)
  794. DEF_SPLSTR( "Several" ),
  795. DEF_SPLSTR( "She" ),
  796. DEF_SPLSTR( "Shortly" ), // PaulCa added 4/14/99 (Bug 107)
  797. DEF_SPLSTR( "Similarly" ), // PaulCa added 4/14/99 (Bug 107)
  798. DEF_SPLSTR( "Since" ),
  799. DEF_SPLSTR( "So" ),
  800. DEF_SPLSTR( "Some" ),
  801. DEF_SPLSTR( "Sometimes" ), // PaulCa added 4/14/99 (Bug 107)
  802. DEF_SPLSTR( "Soon" ), // PaulCa added 4/14/99 (Bug 107)
  803. DEF_SPLSTR( "Still" ),
  804. DEF_SPLSTR( "Subsequently" ), // PaulCa added 4/14/99 (Bug 107)
  805. DEF_SPLSTR( "Such" ),
  806. DEF_SPLSTR( "Take" ), // PaulCa added 4/14/99 (Bug 107)
  807. DEF_SPLSTR( "That" ),
  808. DEF_SPLSTR( "The" ),
  809. DEF_SPLSTR( "Their" ),
  810. DEF_SPLSTR( "Then" ),
  811. DEF_SPLSTR( "There" ),
  812. DEF_SPLSTR( "Thereafter" ), // PaulCa added 4/14/99 (Bug 107)
  813. DEF_SPLSTR( "Therefore" ), // PaulCa added 4/14/99 (Bug 107)
  814. DEF_SPLSTR( "These" ),
  815. DEF_SPLSTR( "They" ),
  816. DEF_SPLSTR( "This" ),
  817. DEF_SPLSTR( "Those" ),
  818. DEF_SPLSTR( "Though" ),
  819. DEF_SPLSTR( "Three" ), // PaulCa added 4/14/99 (Bug 107)
  820. DEF_SPLSTR( "Through" ), // PaulCa added 4/14/99 (Bug 107)
  821. DEF_SPLSTR( "Thus" ),
  822. DEF_SPLSTR( "To" ),
  823. DEF_SPLSTR( "Today" ),
  824. DEF_SPLSTR( "Two" ),
  825. DEF_SPLSTR( "Under" ),
  826. DEF_SPLSTR( "Unlike" ), // PaulCa added 4/14/99 (Bug 107)
  827. DEF_SPLSTR( "Until" ),
  828. DEF_SPLSTR( "Upon" ), // PaulCa added 4/14/99 (Bug 107)
  829. DEF_SPLSTR( "We" ),
  830. DEF_SPLSTR( "Well" ), // PaulCa added 4/14/99 (Bug 107)
  831. DEF_SPLSTR( "What" ),
  832. DEF_SPLSTR( "When" ),
  833. DEF_SPLSTR( "Where" ), // PaulCa added 4/14/99 (Bug 107)
  834. DEF_SPLSTR( "Whether" ), // PaulCa added 4/14/99 (Bug 107)
  835. DEF_SPLSTR( "Which" ), // PaulCa added 4/14/99 (Bug 107)
  836. DEF_SPLSTR( "While" ),
  837. DEF_SPLSTR( "Who" ), // PaulCa added 4/14/99 (Bug 107)
  838. DEF_SPLSTR( "Why" ), // PaulCa added 4/14/99 (Bug 107)
  839. DEF_SPLSTR( "Will" ), // PaulCa added 4/14/99 (Bug 107)
  840. DEF_SPLSTR( "With" ),
  841. DEF_SPLSTR( "Within" ),
  842. DEF_SPLSTR( "Without" ), // PaulCa added 4/14/99 (Bug 107)
  843. DEF_SPLSTR( "Yes" ), // PaulCa added 4/14/99 (Bug 107)
  844. DEF_SPLSTR( "Yet" ),
  845. DEF_SPLSTR( "You" ),
  846. DEF_SPLSTR( "Your" ),
  847. };