Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1462 lines
62 KiB

  1. /***********************************************************************************************
  2. * AlphaNorm.cpp *
  3. *---------------*
  4. * Description:
  5. * These functions normalize mostly-alpha strings.
  6. *-----------------------------------------------------------------------------------------------
  7. * Created by AARONHAL August 3, 1999
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. ***********************************************************************************************/
  12. #include "stdafx.h"
  13. #include "stdsentenum.h"
  14. /***********************************************************************************************
  15. * IsAbbreviationEOS *
  16. *-------------------*
  17. * Description:
  18. * Abbreviations which get here are ALWAYS abbreviations. This function tries to determine
  19. * whether or not the period at the end of the abbreviation is the end of the sentence.
  20. *
  21. * If match made:
  22. * Sets the Item in the ItemList at ItemPos to the abbreviation.
  23. *
  24. ********************************************************************* AH **********************/
  25. HRESULT CStdSentEnum::IsAbbreviationEOS( const AbbrevRecord* pAbbreviation, CItemList &ItemList, SPLISTPOS ItemPos,
  26. CSentItemMemory &MemoryManager, BOOL* pfIsEOS )
  27. {
  28. SPDBG_FUNC( "CStdSentEnum::IsAbbreviationEOS" );
  29. HRESULT hr = S_OK;
  30. BOOL fMatchedEOS = false;
  31. //--- Need to determine whether the abbreviation's period is also the end of the sentence.
  32. if ( !(*pfIsEOS) )
  33. {
  34. //--- Advance to the beginning of the next token
  35. const WCHAR *pTempNextChar = (WCHAR*) m_pEndOfCurrToken, *pTempEndChar = (WCHAR*) m_pEndChar;
  36. const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
  37. hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
  38. if ( SUCCEEDED( hr ) )
  39. {
  40. //--- If we have reached the end of the buffer, consider the abbreviation's period as
  41. //--- the end of the sentence.
  42. if ( !pTempNextChar )
  43. {
  44. *pfIsEOS = true;
  45. fMatchedEOS = true;
  46. }
  47. //--- Otherwise, only consider the abbreviation's period as the end of the sentence if
  48. //--- the next token is a common first word (which must be capitalized).
  49. else if ( IsCapital( *pTempNextChar ) )
  50. {
  51. WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
  52. //--- Try to match a first word.
  53. WCHAR temp = (WCHAR) *pTempEndOfItem;
  54. *pTempEndOfItem = 0;
  55. if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
  56. sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
  57. {
  58. *pfIsEOS = true;
  59. fMatchedEOS = true;
  60. }
  61. *pTempEndOfItem = temp;
  62. }
  63. }
  64. }
  65. //--- Insert abbreviation into the ItemList
  66. if ( SUCCEEDED( hr ) )
  67. {
  68. CSentItem Item;
  69. Item.pItemSrcText = m_pNextChar;
  70. Item.ulItemSrcLen = (long) (m_pEndOfCurrItem - m_pNextChar);
  71. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  72. (long)( m_pNextChar - m_pCurrFrag->pTextStart );
  73. Item.ulNumWords = 1;
  74. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  75. if ( SUCCEEDED( hr ) )
  76. {
  77. ZeroMemory( Item.Words, sizeof(TTSWord) );
  78. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  79. Item.Words[0].pWordText = Item.pItemSrcText;
  80. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  81. Item.Words[0].pLemma = Item.pItemSrcText;
  82. Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
  83. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
  84. if ( SUCCEEDED( hr ) )
  85. {
  86. if ( NeedsToBeNormalized( pAbbreviation ) )
  87. {
  88. Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
  89. }
  90. else
  91. {
  92. Item.pItemInfo->Type = eABBREVIATION;
  93. }
  94. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbreviation;
  95. ItemList.SetAt( ItemPos, Item );
  96. }
  97. }
  98. }
  99. return hr;
  100. } /* IsAbbreviationEOS */
  101. /***********************************************************************************************
  102. * IfEOSNotAbbreviation *
  103. *----------------------*
  104. * Description:
  105. * Abbreviations which get here may or may not be abbreviations. If the period is EOS,
  106. * this is not an abbreviation (and return will be E_INVALIDARG), otherwise, it is an
  107. * abbreviation.
  108. *
  109. * If match made:
  110. * Sets the Item in the ItemList at ItemPos to the abbreviation.
  111. *
  112. ********************************************************************* AH **********************/
  113. HRESULT CStdSentEnum::IfEOSNotAbbreviation( const AbbrevRecord* pAbbreviation, CItemList &ItemList, SPLISTPOS ItemPos,
  114. CSentItemMemory &MemoryManager, BOOL* pfIsEOS )
  115. {
  116. SPDBG_FUNC( "CStdSentEnum::IfEOSNotAbbreviation" );
  117. HRESULT hr = S_OK;
  118. //--- Need to determine whether the abbreviation's period is also the end of the sentence.
  119. if ( !(*pfIsEOS) )
  120. {
  121. //--- Advance to the beginning of the next token
  122. const WCHAR *pTempNextChar = m_pEndOfCurrToken, *pTempEndChar = m_pEndChar;
  123. const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
  124. hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
  125. if ( !pTempNextChar )
  126. {
  127. hr = E_INVALIDARG;
  128. }
  129. if ( SUCCEEDED( hr ) )
  130. {
  131. //--- If we have reached the end of the buffer, consider the abbreviation's period as
  132. //--- the end of the sentence.
  133. if ( !pTempNextChar )
  134. {
  135. *pfIsEOS = true;
  136. }
  137. //--- Otherwise, only consider the abbreviation's period as the end of the sentence if
  138. //--- the next token is a common first word (which must be capitalized).
  139. else if ( IsCapital( *pTempNextChar ) )
  140. {
  141. WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
  142. //--- Try to match a first word.
  143. WCHAR temp = (WCHAR) *pTempEndOfItem;
  144. *pTempEndOfItem = 0;
  145. if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
  146. sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
  147. {
  148. *pfIsEOS = true;
  149. }
  150. *pTempEndOfItem = temp;
  151. }
  152. }
  153. }
  154. if ( *pfIsEOS )
  155. {
  156. //--- EOS - not an abbreviation
  157. hr = E_INVALIDARG;
  158. }
  159. else
  160. {
  161. //--- Insert abbreviation into the ItemList
  162. CSentItem Item;
  163. Item.pItemSrcText = m_pNextChar;
  164. Item.ulItemSrcLen = (long)(m_pEndOfCurrItem - m_pNextChar);
  165. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  166. (long)( m_pNextChar - m_pCurrFrag->pTextStart );
  167. Item.ulNumWords = 1;
  168. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  169. if ( SUCCEEDED( hr ) )
  170. {
  171. ZeroMemory( Item.Words, sizeof(TTSWord) );
  172. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  173. Item.Words[0].pWordText = Item.pItemSrcText;
  174. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  175. Item.Words[0].pLemma = Item.pItemSrcText;
  176. Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
  177. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
  178. if ( SUCCEEDED( hr ) )
  179. {
  180. if ( NeedsToBeNormalized( pAbbreviation ) )
  181. {
  182. Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
  183. }
  184. else
  185. {
  186. Item.pItemInfo->Type = eABBREVIATION;
  187. }
  188. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbreviation;
  189. ItemList.SetAt( ItemPos, Item );
  190. }
  191. }
  192. }
  193. return hr;
  194. } /* IfEOSNotAbbreviation */
  195. /***********************************************************************************************
  196. * IfEOSAndLowercaseNotAbbreviation *
  197. *----------------------------------*
  198. * Description:
  199. * Abbreviations which get here may or may not be abbreviations. If the period is EOS,
  200. * and the next item is lowercase this is not an abbreviation (and return will be E_INVALIDARG),
  201. * otherwise, it is an abbreviation.
  202. *
  203. * If match made:
  204. * Sets the Item in the ItemList at ItemPos to the abbreviation.
  205. *
  206. ********************************************************************* AH **********************/
  207. HRESULT CStdSentEnum::IfEOSAndLowercaseNotAbbreviation( const AbbrevRecord* pAbbreviation, CItemList &ItemList,
  208. SPLISTPOS ItemPos, CSentItemMemory &MemoryManager,
  209. BOOL* pfIsEOS )
  210. {
  211. SPDBG_FUNC( "CStdSentEnum::IfEOSAndLowercaseNotAbbreviation" );
  212. HRESULT hr = S_OK;
  213. //--- Need to determine whether the abbreviation's period is also the end of the sentence.
  214. if ( !(*pfIsEOS) )
  215. {
  216. //--- Advance to the beginning of the next token
  217. const WCHAR *pTempNextChar = m_pEndOfCurrToken, *pTempEndChar = m_pEndChar;
  218. const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
  219. hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
  220. if ( SUCCEEDED( hr ) )
  221. {
  222. //--- If we have reached the end of the buffer, consider the abbreviation's period as
  223. //--- the end of the sentence.
  224. if ( !pTempNextChar )
  225. {
  226. *pfIsEOS = true;
  227. }
  228. //--- Otherwise, only consider the abbreviation's period as the end of the sentence if
  229. //--- the next token is a common first word (which must be capitalized).
  230. else if ( IsCapital( *pTempNextChar ) )
  231. {
  232. WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
  233. //--- Try to match a first word.
  234. WCHAR temp = (WCHAR) *pTempEndOfItem;
  235. *pTempEndOfItem = 0;
  236. if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
  237. sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
  238. {
  239. *pfIsEOS = true;
  240. }
  241. *pTempEndOfItem = temp;
  242. }
  243. }
  244. }
  245. if ( *pfIsEOS &&
  246. !iswupper( *m_pNextChar ) )
  247. {
  248. //--- EOS - not an abbreviation
  249. hr = E_INVALIDARG;
  250. }
  251. else
  252. {
  253. //--- Insert abbreviation into the ItemList
  254. CSentItem Item;
  255. Item.pItemSrcText = m_pNextChar;
  256. Item.ulItemSrcLen = (long)(m_pEndOfCurrItem - m_pNextChar);
  257. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  258. (long)( m_pNextChar - m_pCurrFrag->pTextStart );
  259. Item.ulNumWords = 1;
  260. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  261. if ( SUCCEEDED( hr ) )
  262. {
  263. ZeroMemory( Item.Words, sizeof(TTSWord) );
  264. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  265. Item.Words[0].pWordText = Item.pItemSrcText;
  266. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  267. Item.Words[0].pLemma = Item.pItemSrcText;
  268. Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
  269. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
  270. if ( SUCCEEDED( hr ) )
  271. {
  272. if ( NeedsToBeNormalized( pAbbreviation ) )
  273. {
  274. Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
  275. }
  276. else
  277. {
  278. Item.pItemInfo->Type = eABBREVIATION;
  279. }
  280. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbreviation;
  281. ItemList.SetAt( ItemPos, Item );
  282. }
  283. }
  284. }
  285. return hr;
  286. } /* IfEOSNotAbbreviation */
  287. /***********************************************************************************************
  288. * SingleOrPluralAbbreviation *
  289. *----------------------------*
  290. * Description:
  291. * At this point, we are already sure that the item is an abbreviation, and just need to
  292. * determine whether it should take its singular form, plural form, or some alternate.
  293. *
  294. ********************************************************************* AH **********************/
  295. HRESULT CStdSentEnum::SingleOrPluralAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  296. CItemList& ItemList, SPLISTPOS ListPos )
  297. {
  298. SPDBG_FUNC( "CStdSentEnum::SingleOrPluralAbbreviation" );
  299. HRESULT hr = S_OK;
  300. //--- Get Item which comes before the abbreviation
  301. SPLISTPOS TempPos = ListPos;
  302. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  303. if ( TempPos )
  304. {
  305. TempItem = ItemList.GetPrev( TempPos );
  306. }
  307. else
  308. {
  309. hr = E_INVALIDARG;
  310. }
  311. if ( TempPos )
  312. {
  313. TempItem = ItemList.GetPrev( TempPos );
  314. }
  315. else
  316. {
  317. hr = E_INVALIDARG;
  318. }
  319. if ( SUCCEEDED( hr ) )
  320. {
  321. pPron->pronArray[PRON_A].POScount = 1;
  322. pPron->pronArray[PRON_B].POScount = 0;
  323. pPron->pronArray[PRON_B].phon_Len = 0;
  324. pPron->hasAlt = false;
  325. pPron->altChoice = PRON_A;
  326. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  327. pPron->pronType = eLEXTYPE_PRIVATE1;
  328. //--- If a cardinal number, need to do singular vs. plural logic
  329. if ( TempItem.pItemInfo->Type == eNUM_CARDINAL ||
  330. TempItem.pItemInfo->Type == eDATE_YEAR )
  331. {
  332. if ( ( TempItem.ulItemSrcLen == 1 &&
  333. wcsncmp( TempItem.pItemSrcText, L"1", 1 ) == 0 ) ||
  334. ( TempItem.ulItemSrcLen == 2 &&
  335. wcsncmp( TempItem.pItemSrcText, L"-1", 2 ) == 0 ) )
  336. {
  337. //--- Use singular form - first entry
  338. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  339. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  340. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  341. pPron->POSchoice = pAbbrevInfo->POS1;
  342. }
  343. else
  344. {
  345. //--- Use plural form - second entry
  346. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  347. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  348. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  349. pPron->POSchoice = pAbbrevInfo->POS2;
  350. }
  351. }
  352. //--- If a decimal number, pick plural
  353. else if ( TempItem.pItemInfo->Type == eNUM_DECIMAL )
  354. {
  355. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  356. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  357. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  358. pPron->POSchoice = pAbbrevInfo->POS2;
  359. }
  360. //--- If an ordinal number or fraction, pick singular
  361. else if ( TempItem.pItemInfo->Type == eNUM_ORDINAL )
  362. {
  363. //--- Use singular form - first entry
  364. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  365. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  366. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  367. pPron->POSchoice = pAbbrevInfo->POS1;
  368. }
  369. //--- Fractions and mixed fractions require some more work...
  370. else if ( TempItem.pItemInfo->Type == eNUM_FRACTION )
  371. {
  372. if ( ( (TTSNumberItemInfo*) TempItem.pItemInfo )->pFractionalPart->fIsStandard )
  373. {
  374. //--- Standard fractions (e.g. 11/20) get the plural form
  375. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  376. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  377. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  378. pPron->POSchoice = pAbbrevInfo->POS2;
  379. }
  380. else
  381. {
  382. //--- Singular form with [of a] or [of an] inserted beforehand
  383. if ( bsearch( (void*) pAbbrevInfo->pPron1, (void*) g_Vowels, sp_countof( g_Vowels ),
  384. sizeof( WCHAR ), CompareWCHARAndWCHAR ) )
  385. {
  386. wcscpy( pPron->pronArray[PRON_A].phon_Str, g_pOfAn );
  387. pPron->pronArray[PRON_A].phon_Len = wcslen( g_pOfAn );
  388. }
  389. else
  390. {
  391. wcscpy( pPron->pronArray[PRON_A].phon_Str, g_pOfA );
  392. pPron->pronArray[PRON_A].phon_Len = wcslen( g_pOfA );
  393. }
  394. wcscat( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  395. pPron->pronArray[PRON_A].phon_Len += wcslen( pPron->pronArray[PRON_A].phon_Str );
  396. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  397. pPron->POSchoice = pAbbrevInfo->POS1;
  398. }
  399. }
  400. else if ( TempItem.pItemInfo->Type == eNUM_MIXEDFRACTION )
  401. {
  402. //--- Plural form
  403. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  404. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  405. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  406. pPron->POSchoice = pAbbrevInfo->POS2;
  407. }
  408. //--- Special case - preceded by "one"
  409. else if ( TempItem.ulItemSrcLen == 3 &&
  410. wcsnicmp( TempItem.pItemSrcText, L"one", 3 ) == 0 )
  411. {
  412. //--- Use singular form - first entry
  413. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  414. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  415. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  416. pPron->POSchoice = pAbbrevInfo->POS1;
  417. }
  418. //--- Special case - Number cu. MeasurementAbbrev (e.g. 10 cu. cm, 1 cu cm)
  419. //--- Special case - Number fl. MeasurementAbbrev (e.g. 10 fl. oz., 10 fl oz)
  420. else if ( ( TempItem.ulItemSrcLen == 2 &&
  421. ( _wcsnicmp( TempItem.pItemSrcText, L"cu", 2 ) == 0 ||
  422. _wcsnicmp( TempItem.pItemSrcText, L"sq", 2 ) == 0 ||
  423. _wcsnicmp( TempItem.pItemSrcText, L"fl", 2 ) == 0 ) ) ||
  424. ( TempItem.ulItemSrcLen == 3 &&
  425. ( _wcsnicmp( TempItem.pItemSrcText, L"cu.", 3 ) == 0 ||
  426. _wcsnicmp( TempItem.pItemSrcText, L"sq.", 3 ) == 0 ||
  427. _wcsnicmp( TempItem.pItemSrcText, L"fl.", 3 ) == 0 ) ) )
  428. {
  429. if ( TempPos )
  430. {
  431. TempItem = ItemList.GetPrev( TempPos );
  432. //--- If a cardinal number, need to do singular vs. plural logic
  433. if ( TempItem.pItemInfo->Type == eNUM_CARDINAL )
  434. {
  435. if ( ( TempItem.ulItemSrcLen == 1 &&
  436. wcsncmp( TempItem.pItemSrcText, L"1", 1 ) == 0 ) ||
  437. ( TempItem.ulItemSrcLen == 2 &&
  438. wcsncmp( TempItem.pItemSrcText, L"-1", 2 ) == 0 ) )
  439. {
  440. //--- Use singular form - first entry
  441. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  442. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  443. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  444. pPron->POSchoice = pAbbrevInfo->POS1;
  445. }
  446. else
  447. {
  448. //--- Use plural form - second entry
  449. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  450. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  451. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  452. pPron->POSchoice = pAbbrevInfo->POS2;
  453. }
  454. }
  455. //--- If a decimal number, pick plural
  456. else if ( TempItem.pItemInfo->Type == eNUM_DECIMAL )
  457. {
  458. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  459. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  460. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  461. pPron->POSchoice = pAbbrevInfo->POS2;
  462. }
  463. //--- If an ordinal number or fraction, pick singular
  464. else if ( TempItem.pItemInfo->Type == eNUM_ORDINAL )
  465. {
  466. //--- Use singular form - first entry
  467. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  468. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  469. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  470. pPron->POSchoice = pAbbrevInfo->POS1;
  471. }
  472. //--- Fractions and mixed fractions require some more work...
  473. else if ( TempItem.pItemInfo->Type == eNUM_FRACTION )
  474. {
  475. if (( (TTSNumberItemInfo*) TempItem.pItemInfo )->pFractionalPart->fIsStandard )
  476. {
  477. //--- Standard fractions (e.g. 11/20) get the plural form
  478. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  479. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  480. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  481. pPron->POSchoice = pAbbrevInfo->POS2;
  482. }
  483. else
  484. {
  485. //--- Singular form with [of a] or [of an] inserted beforehand
  486. //--- (this was handled when processing 'cu' or 'sq')
  487. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  488. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  489. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  490. pPron->POSchoice = pAbbrevInfo->POS1;
  491. }
  492. }
  493. else if ( TempItem.pItemInfo->Type == eNUM_MIXEDFRACTION )
  494. {
  495. //--- Plural form
  496. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  497. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  498. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  499. pPron->POSchoice = pAbbrevInfo->POS2;
  500. }
  501. //--- Special case - preceded by "one"
  502. else if ( TempItem.ulItemSrcLen == 3 &&
  503. wcsnicmp( TempItem.pItemSrcText, L"one", 3 ) == 0 )
  504. {
  505. //--- Use singular form - first entry
  506. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  507. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  508. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  509. pPron->POSchoice = pAbbrevInfo->POS1;
  510. }
  511. //--- Default behavior
  512. else
  513. {
  514. //--- Use plural form - second entry
  515. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  516. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  517. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  518. pPron->POSchoice = pAbbrevInfo->POS2;
  519. }
  520. }
  521. }
  522. //--- Check for number words - just cover through 99...
  523. else if ( ( TempItem.ulItemSrcLen == 3 &&
  524. ( wcsncmp( TempItem.pItemSrcText, L"two", 3 ) == 0 ||
  525. wcsncmp( TempItem.pItemSrcText, L"six", 3 ) == 0 ||
  526. wcsncmp( TempItem.pItemSrcText, L"ten", 3 ) == 0 ) ) ||
  527. ( TempItem.ulItemSrcLen == 4 &&
  528. ( wcsncmp( TempItem.pItemSrcText, L"four", 4 ) == 0 ||
  529. wcsncmp( TempItem.pItemSrcText, L"five", 4 ) == 0 ||
  530. wcsncmp( TempItem.pItemSrcText, L"nine", 4 ) == 0 ) ) ||
  531. ( TempItem.ulItemSrcLen == 5 &&
  532. ( wcsncmp( TempItem.pItemSrcText, L"three", 5 ) == 0 ||
  533. wcsncmp( TempItem.pItemSrcText, L"seven", 5 ) == 0 ||
  534. wcsncmp( TempItem.pItemSrcText, L"eight", 5 ) == 0 ||
  535. wcsncmp( TempItem.pItemSrcText, L"forty", 5 ) == 0 ||
  536. wcsncmp( TempItem.pItemSrcText, L"fifty", 5 ) == 0 ||
  537. wcsncmp( TempItem.pItemSrcText, L"sixty", 5 ) == 0 ) ) ||
  538. ( TempItem.ulItemSrcLen == 6 &&
  539. ( wcsncmp( TempItem.pItemSrcText, L"twenty", 6 ) == 0 ||
  540. wcsncmp( TempItem.pItemSrcText, L"thirty", 6 ) == 0 ||
  541. wcsncmp( TempItem.pItemSrcText, L"eighty", 6 ) == 0 ||
  542. wcsncmp( TempItem.pItemSrcText, L"ninety", 6 ) == 0 ||
  543. wcsncmp( TempItem.pItemSrcText, L"eleven", 6 ) == 0 ||
  544. wcsncmp( TempItem.pItemSrcText, L"twelve", 6 ) == 0 ) ) ||
  545. ( TempItem.ulItemSrcLen == 7 &&
  546. ( wcsncmp( TempItem.pItemSrcText, L"seventy", 7 ) == 0 ||
  547. wcsncmp( TempItem.pItemSrcText, L"fifteen", 7 ) == 0 ||
  548. wcsncmp( TempItem.pItemSrcText, L"sixteen", 7 ) == 0 ) ) ||
  549. ( TempItem.ulItemSrcLen == 8 &&
  550. ( wcsncmp( TempItem.pItemSrcText, L"thirteen", 8 ) == 0 ||
  551. wcsncmp( TempItem.pItemSrcText, L"fourteen", 8 ) == 0 ||
  552. wcsncmp( TempItem.pItemSrcText, L"eighteen", 8 ) == 0 ||
  553. wcsncmp( TempItem.pItemSrcText, L"nineteen", 8 ) == 0 ) ) )
  554. {
  555. //--- Use plural form - second entry
  556. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  557. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  558. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  559. pPron->POSchoice = pAbbrevInfo->POS2;
  560. }
  561. //--- Default behavior
  562. else
  563. {
  564. //--- Has alternate when non-number precedes - special case
  565. if ( pAbbrevInfo->pPron3 )
  566. {
  567. //--- Use initial form - third entry
  568. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron3 );
  569. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  570. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS3;
  571. pPron->POSchoice = pAbbrevInfo->POS3;
  572. }
  573. else
  574. {
  575. //--- Use plural form - second entry
  576. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  577. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  578. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  579. pPron->POSchoice = pAbbrevInfo->POS2;
  580. }
  581. }
  582. }
  583. //--- Default behavior
  584. else if ( hr == E_INVALIDARG )
  585. {
  586. hr = S_OK;
  587. //--- Has alternate when non-number precedes - special case
  588. if ( pAbbrevInfo->pPron3 )
  589. {
  590. //--- Use initial form - third entry
  591. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron3 );
  592. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  593. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS3;
  594. pPron->POSchoice = pAbbrevInfo->POS3;
  595. }
  596. else
  597. {
  598. //--- Use plural form - second entry
  599. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  600. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  601. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  602. pPron->POSchoice = pAbbrevInfo->POS2;
  603. }
  604. }
  605. return hr;
  606. } /* SingleOrPluralAbbreviation */
  607. /***********************************************************************************************
  608. * DoctorDriveAbbreviation *
  609. *-------------------------*
  610. * Description:
  611. * At this point, we are already sure that the item is an abbreviation, and just need to
  612. * determine whether it should be Doctor (Saint) or Drive (Street).
  613. *
  614. ********************************************************************* AH **********************/
  615. HRESULT CStdSentEnum::DoctorDriveAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  616. CItemList& ItemList, SPLISTPOS ListPos )
  617. {
  618. SPDBG_FUNC( "CStdSentEnum::SingleOrPluralAbbreviation" );
  619. HRESULT hr = S_OK;
  620. BOOL fMatch = false;
  621. BOOL fDoctor = false;
  622. pPron->pronArray[PRON_A].POScount = 1;
  623. pPron->pronArray[PRON_B].POScount = 0;
  624. pPron->pronArray[PRON_B].phon_Len = 0;
  625. pPron->hasAlt = false;
  626. pPron->altChoice = PRON_A;
  627. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  628. pPron->pronType = eLEXTYPE_PRIVATE1;
  629. //--- Get Item which comes after the Abbreviation
  630. SPLISTPOS TempPos = ListPos;
  631. if ( !ListPos )
  632. {
  633. //--- Go with Drive - end of buffer cannot be followed by a name...
  634. fDoctor = false;
  635. fMatch = true;
  636. }
  637. else
  638. {
  639. TTSSentItem TempItem = ItemList.GetNext( TempPos );
  640. if ( TempItem.eItemPartOfSpeech == MS_EOSItem )
  641. {
  642. //--- Go with Drive - end of buffer cannot be followed by a name...
  643. fDoctor = false;
  644. fMatch = true;
  645. }
  646. else
  647. {
  648. ULONG index = 0;
  649. //--- Try to match a Name (an uppercase letter followed by lowercase letters)
  650. if ( TempItem.ulItemSrcLen > 0 &&
  651. iswupper( TempItem.pItemSrcText[index] ) )
  652. {
  653. index++;
  654. while ( index < TempItem.ulItemSrcLen &&
  655. iswlower( TempItem.pItemSrcText[index] ) )
  656. {
  657. index++;
  658. }
  659. //--- Check for possessives - RAID 5823
  660. if ( index == TempItem.ulItemSrcLen - 2 &&
  661. TempItem.pItemSrcText[index+1] == L'\'' &&
  662. TempItem.pItemSrcText[index+2] == L's' )
  663. {
  664. index += 2;
  665. }
  666. //--- Check for directions - North, South, West, East, Ne, Nw, Se, Sw, N, S, E, W
  667. if ( index == TempItem.ulItemSrcLen &&
  668. wcsncmp( TempItem.pItemSrcText, L"North", 5 ) != 0 &&
  669. wcsncmp( TempItem.pItemSrcText, L"South", 5 ) != 0 &&
  670. wcsncmp( TempItem.pItemSrcText, L"West", 4 ) != 0 &&
  671. wcsncmp( TempItem.pItemSrcText, L"East", 4 ) != 0 &&
  672. !( TempItem.ulItemSrcLen == 2 &&
  673. ( wcsncmp( TempItem.pItemSrcText, L"Ne", 2 ) == 0 ||
  674. wcsncmp( TempItem.pItemSrcText, L"Nw", 2 ) == 0 ||
  675. wcsncmp( TempItem.pItemSrcText, L"Se", 2 ) == 0 ||
  676. wcsncmp( TempItem.pItemSrcText, L"Sw", 2 ) == 0 ) ) &&
  677. !( TempItem.ulItemSrcLen == 1 &&
  678. ( wcsncmp( TempItem.pItemSrcText, L"N", 1 ) == 0 ||
  679. wcsncmp( TempItem.pItemSrcText, L"S", 1 ) == 0 ||
  680. wcsncmp( TempItem.pItemSrcText, L"E", 1 ) == 0 ||
  681. wcsncmp( TempItem.pItemSrcText, L"W", 1 ) == 0 ) ) )
  682. {
  683. //--- Check for name previous item
  684. TempPos = ListPos;
  685. ItemList.GetPrev( TempPos );
  686. if ( TempPos )
  687. {
  688. ItemList.GetPrev( TempPos );
  689. if ( TempPos )
  690. {
  691. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  692. index = 0;
  693. if ( PrevItem.ulItemSrcLen > 0 &&
  694. iswupper( PrevItem.pItemSrcText[index++] ) )
  695. {
  696. while ( index < PrevItem.ulItemSrcLen &&
  697. islower( PrevItem.pItemSrcText[index] ) )
  698. {
  699. index++;
  700. }
  701. if ( index == PrevItem.ulItemSrcLen )
  702. {
  703. //--- Go with Drive - names before and after, e.g. Main St. Washington, D.C.
  704. fDoctor = false;
  705. fMatch = true;
  706. }
  707. }
  708. }
  709. }
  710. if ( !fMatch )
  711. {
  712. //--- Go with Doctor - matched a Name after and not a name before
  713. fDoctor = true;
  714. fMatch = true;
  715. }
  716. }
  717. else if ( index == 1 &&
  718. TempItem.ulItemSrcLen == 2 &&
  719. TempItem.pItemSrcText[index] == L'.' )
  720. {
  721. //--- Go with Doctor - matched an initial
  722. fDoctor = true;
  723. fMatch = true;
  724. }
  725. }
  726. }
  727. }
  728. if ( !fMatch )
  729. {
  730. //--- Try to get previous item...
  731. BOOL fSentenceInitial = false;
  732. TempPos = ListPos;
  733. if ( TempPos )
  734. {
  735. ItemList.GetPrev( TempPos );
  736. if ( TempPos )
  737. {
  738. ItemList.GetPrev( TempPos );
  739. if ( !TempPos )
  740. {
  741. fSentenceInitial = true;
  742. }
  743. else
  744. {
  745. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  746. if ( PrevItem.pItemInfo->Type == eOPEN_PARENTHESIS ||
  747. PrevItem.pItemInfo->Type == eOPEN_BRACKET ||
  748. PrevItem.pItemInfo->Type == eOPEN_BRACE ||
  749. PrevItem.pItemInfo->Type == eSINGLE_QUOTE ||
  750. PrevItem.pItemInfo->Type == eDOUBLE_QUOTE )
  751. {
  752. fSentenceInitial = true;
  753. }
  754. }
  755. }
  756. }
  757. //--- Sentence initial - go with Doctor
  758. if ( fSentenceInitial )
  759. {
  760. fDoctor = true;
  761. fMatch = true;
  762. }
  763. //--- Default - go with Drive
  764. else
  765. {
  766. fDoctor = false;
  767. fMatch = true;
  768. }
  769. }
  770. if ( fDoctor )
  771. {
  772. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  773. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  774. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  775. pPron->POSchoice = pAbbrevInfo->POS1;
  776. }
  777. else
  778. {
  779. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  780. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  781. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  782. pPron->POSchoice = pAbbrevInfo->POS2;
  783. }
  784. return hr;
  785. } /* DoctorDriveAbbreviation */
  786. /***********************************************************************************************
  787. * AbbreviationFollowedByDigit *
  788. *-----------------------------*
  789. * Description:
  790. * At this point, we are already sure that the item is an abbreviation, and just need to
  791. * determine which pronunciation to go with.
  792. *
  793. ********************************************************************* AH **********************/
  794. HRESULT CStdSentEnum::AbbreviationFollowedByDigit( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  795. CItemList& ItemList, SPLISTPOS ListPos )
  796. {
  797. SPDBG_FUNC( "CStdSentEnum::AbbreviationFollowedByDigit" );
  798. HRESULT hr = S_OK;
  799. pPron->pronArray[PRON_A].POScount = 1;
  800. pPron->pronArray[PRON_B].POScount = 0;
  801. pPron->pronArray[PRON_B].phon_Len = 0;
  802. pPron->hasAlt = false;
  803. pPron->altChoice = PRON_A;
  804. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  805. pPron->pronType = eLEXTYPE_PRIVATE1;
  806. //--- Get Item which comes after the Abbreviation
  807. SPLISTPOS TempPos = ListPos;
  808. if ( !ListPos )
  809. {
  810. //--- Go with pron 2
  811. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  812. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  813. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  814. pPron->POSchoice = pAbbrevInfo->POS2;
  815. }
  816. else
  817. {
  818. TTSSentItem TempItem = ItemList.GetNext( TempPos );
  819. if ( TempItem.ulItemSrcLen > 0 &&
  820. iswdigit( TempItem.pItemSrcText[0] ) )
  821. {
  822. //--- Go with pron 1
  823. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  824. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  825. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  826. pPron->POSchoice = pAbbrevInfo->POS1;
  827. }
  828. else
  829. {
  830. //--- Go with pron 2
  831. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  832. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  833. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  834. pPron->POSchoice = pAbbrevInfo->POS2;
  835. }
  836. }
  837. return hr;
  838. } /* AbbreviationFollowedByDigit */
  839. /***********************************************************************************************
  840. * AllCapsAbbreviation *
  841. *---------------------*
  842. * Description:
  843. * This functions disambiguates abbreviations without periods which are pronounced
  844. * differently if they are all capital letters.
  845. *
  846. ********************************************************************* AH **********************/
  847. HRESULT CStdSentEnum::AllCapsAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  848. CItemList& ItemList, SPLISTPOS ListPos )
  849. {
  850. SPDBG_FUNC( "CStdSentEnum::AllCapsAbbreviation" );
  851. HRESULT hr = S_OK;
  852. pPron->pronArray[PRON_A].POScount = 1;
  853. pPron->pronArray[PRON_B].POScount = 0;
  854. pPron->pronArray[PRON_B].phon_Len = 0;
  855. pPron->hasAlt = false;
  856. pPron->altChoice = PRON_A;
  857. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  858. pPron->pronType = eLEXTYPE_PRIVATE1;
  859. //--- Get this item
  860. SPLISTPOS TempPos = ListPos;
  861. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  862. if ( TempPos )
  863. {
  864. TempItem = ItemList.GetPrev( TempPos );
  865. }
  866. else
  867. {
  868. hr = E_INVALIDARG;
  869. }
  870. if ( SUCCEEDED( hr ) )
  871. {
  872. for ( ULONG i = 0; i < TempItem.ulItemSrcLen; i++ )
  873. {
  874. if ( !iswupper( TempItem.pItemSrcText[i] ) )
  875. {
  876. break;
  877. }
  878. }
  879. //--- All Caps - go with first pronunciation
  880. if ( i == TempItem.ulItemSrcLen )
  881. {
  882. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  883. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  884. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  885. pPron->POSchoice = pAbbrevInfo->POS1;
  886. }
  887. //--- Not All Caps - go with second pronunciation
  888. else
  889. {
  890. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  891. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  892. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  893. pPron->POSchoice = pAbbrevInfo->POS2;
  894. }
  895. }
  896. return hr;
  897. } /* AllCapsAbbreviation */
  898. /***********************************************************************************************
  899. * CapitalizedAbbreviation *
  900. *-------------------------*
  901. * Description:
  902. * This functions disambiguates abbreviations without periods which are pronounced
  903. * differently if they begin with a capital letter.
  904. *
  905. ********************************************************************* AH **********************/
  906. HRESULT CStdSentEnum::CapitalizedAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  907. CItemList& ItemList, SPLISTPOS ListPos )
  908. {
  909. SPDBG_FUNC( "CStdSentEnum::CapitalizedAbbreviation" );
  910. HRESULT hr = S_OK;
  911. pPron->pronArray[PRON_A].POScount = 1;
  912. pPron->pronArray[PRON_B].POScount = 0;
  913. pPron->pronArray[PRON_B].phon_Len = 0;
  914. pPron->hasAlt = false;
  915. pPron->altChoice = PRON_A;
  916. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  917. pPron->pronType = eLEXTYPE_PRIVATE1;
  918. //--- Get this item
  919. SPLISTPOS TempPos = ListPos;
  920. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  921. if ( TempPos )
  922. {
  923. TempItem = ItemList.GetPrev( TempPos );
  924. }
  925. else
  926. {
  927. hr = E_INVALIDARG;
  928. }
  929. if ( SUCCEEDED( hr ) )
  930. {
  931. //--- Capitalized - go with first pronunciation
  932. if ( iswupper( TempItem.pItemSrcText[0] ) )
  933. {
  934. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  935. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  936. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  937. pPron->POSchoice = pAbbrevInfo->POS1;
  938. }
  939. //--- Not Capitalized - go with second pronunciation
  940. else
  941. {
  942. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  943. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  944. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  945. pPron->POSchoice = pAbbrevInfo->POS2;
  946. }
  947. }
  948. return hr;
  949. } /* CapitalizedAbbreviation */
  950. /***********************************************************************************************
  951. * SECAbbreviation *
  952. *-----------------*
  953. * Description:
  954. * This functions disambiguates SEC, Sec, and sec and so forth...
  955. *
  956. ********************************************************************* AH **********************/
  957. HRESULT CStdSentEnum::SECAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  958. CItemList& ItemList, SPLISTPOS ListPos )
  959. {
  960. SPDBG_FUNC( "CStdSentEnum::SECAbbreviation" );
  961. HRESULT hr = S_OK;
  962. pPron->pronArray[PRON_A].POScount = 1;
  963. pPron->pronArray[PRON_B].POScount = 0;
  964. pPron->pronArray[PRON_B].phon_Len = 0;
  965. pPron->hasAlt = false;
  966. pPron->altChoice = PRON_A;
  967. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  968. pPron->pronType = eLEXTYPE_PRIVATE1;
  969. //--- Get this item
  970. SPLISTPOS TempPos = ListPos;
  971. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  972. if ( TempPos )
  973. {
  974. TempItem = ItemList.GetPrev( TempPos );
  975. }
  976. else
  977. {
  978. hr = E_INVALIDARG;
  979. }
  980. if ( SUCCEEDED( hr ) )
  981. {
  982. for ( ULONG i = 0; i < TempItem.ulItemSrcLen; i++ )
  983. {
  984. if ( !iswupper( TempItem.pItemSrcText[i] ) )
  985. {
  986. break;
  987. }
  988. }
  989. //--- All Caps - go with SEC
  990. if ( i == TempItem.ulItemSrcLen )
  991. {
  992. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron3 );
  993. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  994. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS3;
  995. pPron->POSchoice = pAbbrevInfo->POS3;
  996. }
  997. //--- Not All Caps - do SingleOrPlural disambiguation
  998. else
  999. {
  1000. SingleOrPluralAbbreviation( pAbbrevInfo, pPron, ItemList, ListPos );
  1001. }
  1002. }
  1003. return hr;
  1004. } /* SECAbbreviation */
  1005. /***********************************************************************************************
  1006. * DegreeAbbreviation *
  1007. *--------------------*
  1008. * Description:
  1009. * This functions disambiguates C, F, and K (Celsius, Fahrenheit, Kelvin)
  1010. *
  1011. ********************************************************************* AH **********************/
  1012. HRESULT CStdSentEnum::DegreeAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  1013. CItemList& ItemList, SPLISTPOS ListPos )
  1014. {
  1015. SPDBG_FUNC( "CStdSentEnum::DegreeAbbreviation" );
  1016. HRESULT hr = S_OK;
  1017. pPron->pronArray[PRON_A].POScount = 1;
  1018. pPron->pronArray[PRON_B].POScount = 0;
  1019. pPron->pronArray[PRON_B].phon_Len = 0;
  1020. pPron->hasAlt = false;
  1021. pPron->altChoice = PRON_A;
  1022. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  1023. pPron->pronType = eLEXTYPE_PRIVATE1;
  1024. //--- Get this item and previous item
  1025. SPLISTPOS TempPos = ListPos;
  1026. TTSSentItem TempItem, PrevItem;
  1027. BOOL fLetter = false;
  1028. if ( TempPos )
  1029. {
  1030. ItemList.GetPrev( TempPos );
  1031. if ( TempPos )
  1032. {
  1033. TempItem = ItemList.GetPrev( TempPos );
  1034. if ( TempPos )
  1035. {
  1036. PrevItem = ItemList.GetPrev( TempPos );
  1037. if ( PrevItem.pItemInfo->Type != eNUM_DEGREES )
  1038. {
  1039. fLetter = true;
  1040. }
  1041. }
  1042. else
  1043. {
  1044. fLetter = true;
  1045. }
  1046. }
  1047. else
  1048. {
  1049. hr = E_INVALIDARG;
  1050. }
  1051. }
  1052. else
  1053. {
  1054. hr = E_INVALIDARG;
  1055. }
  1056. if ( SUCCEEDED( hr ) )
  1057. {
  1058. if ( fLetter )
  1059. {
  1060. //--- This word is just the letter C, F, or K - second pron
  1061. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1062. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1063. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1064. pPron->POSchoice = pAbbrevInfo->POS2;
  1065. }
  1066. //--- This word is the degree expansion - Celsius, Fahrenheit, or Kelvin
  1067. else
  1068. {
  1069. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1070. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1071. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1072. pPron->POSchoice = pAbbrevInfo->POS1;
  1073. }
  1074. }
  1075. return hr;
  1076. } /* DegreeAbbreviation */
  1077. /***********************************************************************************************
  1078. * IsInitialIsm *
  1079. *--------------*
  1080. * Description:
  1081. * Checks the next token in the text stream to determine if it is an initialism. Also
  1082. * tries to determine whether or not the period at the end of the initialism is the end of
  1083. * the sentence.
  1084. *
  1085. * If match made:
  1086. * Advances m_pNextChar to the appropriate position (either the period at the end of the
  1087. * abbreviation, or just past that period). Sets the Item in the ItemList at ItemPos to the
  1088. * abbreviation.
  1089. *
  1090. ********************************************************************* AH **********************/
  1091. HRESULT CStdSentEnum::IsInitialism( CItemList &ItemList, SPLISTPOS ItemPos, CSentItemMemory &MemoryManager,
  1092. BOOL* pfIsEOS )
  1093. {
  1094. SPDBG_FUNC( "CStdSentEnum::IsInitialism" );
  1095. HRESULT hr = S_OK;
  1096. BOOL fMatchedEOS = false;
  1097. //--- Initialism must be at least two characters.
  1098. if ( (long)(m_pEndOfCurrItem - m_pNextChar) < 4 )
  1099. {
  1100. hr = E_INVALIDARG;
  1101. }
  1102. else
  1103. {
  1104. const WCHAR *pIterator = NULL;
  1105. ULONG ulCount = 0;
  1106. pIterator = m_pNextChar;
  1107. //--- Iterate through the token, each time checking for an alpha character followed by a period.
  1108. while ( SUCCEEDED(hr) &&
  1109. pIterator <= m_pEndOfCurrItem - 2)
  1110. {
  1111. if ( !iswalpha(*pIterator) ||
  1112. *(pIterator + 1) != L'.' )
  1113. {
  1114. hr = E_INVALIDARG;
  1115. }
  1116. else
  1117. {
  1118. pIterator += 2;
  1119. ulCount++;
  1120. }
  1121. }
  1122. //--- Need to determine whether the initialism's period is also the end of the sentence.
  1123. if ( SUCCEEDED( hr ) &&
  1124. !(*pfIsEOS) )
  1125. {
  1126. //--- Advance to the beginning of the next token
  1127. const WCHAR *pTempNextChar = m_pEndOfCurrToken, *pTempEndChar = m_pEndChar;
  1128. const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
  1129. hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
  1130. if ( SUCCEEDED( hr ) )
  1131. {
  1132. //--- If we have reached the end of the buffer, consider the abbreviation's period as
  1133. //--- the end of the sentence.
  1134. if ( !pTempNextChar )
  1135. {
  1136. *pfIsEOS = true;
  1137. fMatchedEOS = true;
  1138. }
  1139. //--- Otherwise, only consider the abbreviation's period as the end of the sentence if
  1140. //--- the next token is a common first word (which must be capitalized).
  1141. else if ( IsCapital( *pTempNextChar ) )
  1142. {
  1143. WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
  1144. //--- Try to match a first word.
  1145. WCHAR temp = (WCHAR) *pTempEndOfItem;
  1146. *pTempEndOfItem = 0;
  1147. if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
  1148. sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
  1149. {
  1150. *pfIsEOS = true;
  1151. fMatchedEOS = true;
  1152. }
  1153. *pTempEndOfItem = temp;
  1154. }
  1155. }
  1156. }
  1157. //--- Now insert the Initialism in the ItemList.
  1158. if ( SUCCEEDED(hr) )
  1159. {
  1160. CSentItem Item;
  1161. Item.pItemSrcText = m_pNextChar;
  1162. Item.ulItemSrcLen = (long)(m_pEndOfCurrItem - m_pNextChar);
  1163. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  1164. (long)( m_pNextChar - m_pCurrFrag->pTextStart );
  1165. Item.ulNumWords = ulCount;
  1166. Item.Words = (TTSWord*) MemoryManager.GetMemory( ulCount * sizeof(TTSWord), &hr );
  1167. if ( SUCCEEDED( hr ) )
  1168. {
  1169. SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr );
  1170. if ( SUCCEEDED( hr ) )
  1171. {
  1172. //--- Ensure letters are pronounced as nouns...
  1173. memcpy( pNewState, &m_pCurrFrag->State, sizeof( SPVSTATE ) );
  1174. pNewState->ePartOfSpeech = SPPS_Noun;
  1175. ZeroMemory( Item.Words, ulCount * sizeof(TTSWord) );
  1176. for ( ULONG i = 0; i < ulCount; i++ )
  1177. {
  1178. Item.Words[i].pXmlState = pNewState;
  1179. Item.Words[i].pWordText = &Item.pItemSrcText[ 2 * i ];
  1180. Item.Words[i].ulWordLen = 1;
  1181. Item.Words[i].pLemma = Item.Words[i].pWordText;
  1182. Item.Words[i].ulLemmaLen = Item.Words[i].ulWordLen;
  1183. }
  1184. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  1185. if ( SUCCEEDED( hr ) )
  1186. {
  1187. Item.pItemInfo->Type = eINITIALISM;
  1188. ItemList.SetAt( ItemPos, Item );
  1189. }
  1190. }
  1191. }
  1192. }
  1193. }
  1194. return hr;
  1195. } /* IsInitialism */
  1196. /***********************************************************************************************
  1197. * IsAlphaWord *
  1198. *-------------*
  1199. * Description:
  1200. * Checks the next token in the text stream to determine if it is an Alpha Word (all alpha
  1201. * characters, except possibly a single apostrophe).
  1202. *
  1203. ********************************************************************* AH **********************/
  1204. HRESULT CStdSentEnum::IsAlphaWord( const WCHAR* pStartChar, const WCHAR* pEndChar, TTSItemInfo*& pItemNormInfo,
  1205. CSentItemMemory& MemoryManager )
  1206. {
  1207. SPDBG_FUNC( "CStdSentEnum::IsAlphaWord" );
  1208. SPDBG_ASSERT( pStartChar < pEndChar );
  1209. HRESULT hr = S_OK;
  1210. bool fApostropheSeen = false;
  1211. WCHAR *pCurrChar = (WCHAR*) pStartChar;
  1212. while ( SUCCEEDED( hr ) &&
  1213. pCurrChar &&
  1214. pCurrChar < pEndChar )
  1215. {
  1216. if ( iswalpha( *pCurrChar ) )
  1217. {
  1218. pCurrChar++;
  1219. }
  1220. else if ( *pCurrChar == L'\''&&
  1221. !fApostropheSeen )
  1222. {
  1223. fApostropheSeen = true;
  1224. pCurrChar++;
  1225. }
  1226. else
  1227. {
  1228. hr = E_INVALIDARG;
  1229. }
  1230. }
  1231. if ( SUCCEEDED( hr ) )
  1232. {
  1233. //--- Matched Alpha Word
  1234. pItemNormInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  1235. if ( SUCCEEDED( hr ) )
  1236. {
  1237. pItemNormInfo->Type = eALPHA_WORD;
  1238. }
  1239. }
  1240. return hr;
  1241. } /* IsAlphaWord */
  1242. /***********************************************************************************************
  1243. * AbbreviationModifier *
  1244. *----------------------*
  1245. * Description:
  1246. * Fixes pronunciation issues for special case where 'sq' or 'cu' modifies
  1247. * a measurement.
  1248. *
  1249. *************************************************************** MERESHAW **********************/
  1250. HRESULT CStdSentEnum::AbbreviationModifier( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
  1251. CItemList& ItemList, SPLISTPOS ListPos )
  1252. {
  1253. SPDBG_FUNC( "CStdSentEnum::AbbreviationModifier" );
  1254. HRESULT hr = S_OK;
  1255. //--- Get Item which comes before the abbreviation modifier
  1256. SPLISTPOS TempPos = ListPos;
  1257. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  1258. if ( TempPos )
  1259. {
  1260. //--- Current Item - if All Caps, go with first pronunciation (need to do this before next
  1261. //--- stage of processing, since CU and FL's all caps prons take precedence over numeric...)
  1262. TempItem = ItemList.GetPrev( TempPos );
  1263. for ( ULONG i = 0; i < TempItem.ulItemSrcLen; i++ )
  1264. {
  1265. if ( !iswupper( TempItem.pItemSrcText[i] ) )
  1266. {
  1267. break;
  1268. }
  1269. }
  1270. if ( i == TempItem.ulItemSrcLen )
  1271. {
  1272. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1273. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1274. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1275. pPron->POSchoice = pAbbrevInfo->POS1;
  1276. return hr;
  1277. }
  1278. }
  1279. else
  1280. {
  1281. hr = E_INVALIDARG;
  1282. }
  1283. if ( TempPos )
  1284. {
  1285. TempItem = ItemList.GetPrev( TempPos );
  1286. }
  1287. else
  1288. {
  1289. hr = E_INVALIDARG;
  1290. }
  1291. if ( SUCCEEDED( hr ) )
  1292. {
  1293. pPron->pronArray[PRON_A].POScount = 1;
  1294. pPron->pronArray[PRON_B].POScount = 0;
  1295. pPron->pronArray[PRON_B].phon_Len = 0;
  1296. pPron->hasAlt = false;
  1297. pPron->altChoice = PRON_A;
  1298. //--- Abbreviation table pronunciations are basically just vendor lex prons...
  1299. pPron->pronType = eLEXTYPE_PRIVATE1;
  1300. //--- If a cardinal, decimal, or ordinal number, use regular form
  1301. if (( TempItem.pItemInfo->Type == eNUM_CARDINAL ) ||
  1302. ( TempItem.pItemInfo->Type == eNUM_DECIMAL ) ||
  1303. ( TempItem.pItemInfo->Type == eNUM_ORDINAL ) ||
  1304. ( TempItem.pItemInfo->Type == eNUM_MIXEDFRACTION ) ||
  1305. ( TempItem.pItemInfo->Type == eDATE_YEAR ) ||
  1306. ( TempItem.ulItemSrcLen == 3 &&
  1307. wcsnicmp( TempItem.pItemSrcText, L"one", 3 ) == 0 ))
  1308. {
  1309. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1310. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1311. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1312. pPron->POSchoice = pAbbrevInfo->POS2;
  1313. }
  1314. //--- Fractions and mixed fractions require some more work...
  1315. else if ( TempItem.pItemInfo->Type == eNUM_FRACTION )
  1316. {
  1317. if (( (TTSNumberItemInfo*) TempItem.pItemInfo )->pFractionalPart->fIsStandard )
  1318. {
  1319. //--- Standard fractions (e.g. 11/20) get the plural form
  1320. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1321. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1322. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1323. pPron->POSchoice = pAbbrevInfo->POS2;
  1324. }
  1325. else
  1326. {
  1327. //--- Singular form with [of a] inserted beforehand ([of an] case need not be
  1328. //--- checked because we're only dealing with 'sq' or 'cu'.
  1329. wcscpy( pPron->pronArray[PRON_A].phon_Str, g_pOfA );
  1330. pPron->pronArray[PRON_A].phon_Len = wcslen( g_pOfA );
  1331. wcscat( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1332. pPron->pronArray[PRON_A].phon_Len += wcslen( pPron->pronArray[PRON_A].phon_Str );
  1333. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1334. pPron->POSchoice = pAbbrevInfo->POS2;
  1335. }
  1336. }
  1337. //--- Default behavior
  1338. else
  1339. {
  1340. //--- Use default form ('sq')
  1341. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1342. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1343. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1344. pPron->POSchoice = pAbbrevInfo->POS2;
  1345. }
  1346. }
  1347. //--- Default behavior - use first pron
  1348. else if ( hr == E_INVALIDARG )
  1349. {
  1350. hr = S_OK;
  1351. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1352. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1353. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1354. pPron->POSchoice = pAbbrevInfo->POS1;
  1355. }
  1356. return hr;
  1357. } /* AbbreviationModifier */