Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1081 lines
43 KiB

  1. /*******************************************************************************
  2. * StdSentEnum.cpp *
  3. *-----------------*
  4. * Description:
  5. * This module is the main implementation file for the CStdSentEnum class.
  6. *-------------------------------------------------------------------------------
  7. * Created By: EDC Date: 03/19/99
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. *******************************************************************************/
  12. //--- Additional includes
  13. #include "stdafx.h"
  14. #ifndef StdSentEnum_h
  15. #include "stdsentenum.h"
  16. #endif
  17. #include "spttsengdebug.h"
  18. #include "SpAutoObjectLock.h"
  19. //--- Locals
  20. CComAutoCriticalSection CStdSentEnum::m_AbbrevTableCritSec;
  21. //=== CStdSentEnum ============================================================
  22. //
  23. /*****************************************************************************
  24. * CStdSentEnum::InitPron *
  25. *------------------------*
  26. * Description:
  27. * Inits pron tables
  28. ********************************************************************* AH ***/
  29. HRESULT CStdSentEnum::InitPron( WCHAR** OriginalPron )
  30. {
  31. HRESULT hr = S_OK;
  32. WCHAR *NewPron = NULL;
  33. NewPron = new WCHAR[ wcslen( *OriginalPron ) ];
  34. hr = m_cpPhonemeConverter->PhoneToId( *OriginalPron, NewPron );
  35. if ( SUCCEEDED( hr ) )
  36. {
  37. *OriginalPron = NewPron;
  38. }
  39. return hr;
  40. } /* InitPron */
  41. /*****************************************************************************
  42. * CStdSentEnum::FinalConstruct *
  43. *------------------------------*
  44. * Description:
  45. * Constructor
  46. ********************************************************************* EDC ***/
  47. HRESULT CStdSentEnum::FinalConstruct()
  48. {
  49. SPDBG_FUNC( "CStdSentEnum::FinalConstruct" );
  50. HRESULT hr = S_OK;
  51. m_dwSpeakFlags = 0;
  52. m_pTextFragList = NULL;
  53. m_pMorphLexicon = NULL;
  54. m_fHaveNamesLTS = false;
  55. m_eSeparatorAndDecimal = COMMA_PERIOD;
  56. m_eShortDateOrder = MONTH_DAY_YEAR;
  57. /*** Create phone converter ***/
  58. if ( SUCCEEDED( hr ) )
  59. {
  60. hr = SpCreatePhoneConverter( 1033, NULL, NULL, &m_cpPhonemeConverter );
  61. m_AbbrevTableCritSec.Lock();
  62. if ( !g_fAbbrevTablesInitialized )
  63. {
  64. for ( ULONG i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AbbreviationTable ); i++ )
  65. {
  66. if ( g_AbbreviationTable[i].pPron1 )
  67. {
  68. hr = InitPron( &g_AbbreviationTable[i].pPron1 );
  69. }
  70. if ( SUCCEEDED( hr ) &&
  71. g_AbbreviationTable[i].pPron2 )
  72. {
  73. hr = InitPron( &g_AbbreviationTable[i].pPron2 );
  74. }
  75. if ( SUCCEEDED( hr ) &&
  76. g_AbbreviationTable[i].pPron3 )
  77. {
  78. hr = InitPron( &g_AbbreviationTable[i].pPron3 );
  79. }
  80. }
  81. for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AmbiguousWordTable ); i++ )
  82. {
  83. if ( g_AmbiguousWordTable[i].pPron1 )
  84. {
  85. hr = InitPron( &g_AmbiguousWordTable[i].pPron1 );
  86. }
  87. if ( SUCCEEDED( hr ) &&
  88. g_AmbiguousWordTable[i].pPron2 )
  89. {
  90. hr = InitPron( &g_AmbiguousWordTable[i].pPron2 );
  91. }
  92. if ( SUCCEEDED( hr ) &&
  93. g_AmbiguousWordTable[i].pPron3 )
  94. {
  95. hr = InitPron( &g_AmbiguousWordTable[i].pPron3 );
  96. }
  97. }
  98. for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_PostLexLookupWordTable ); i++ )
  99. {
  100. if ( g_PostLexLookupWordTable[i].pPron1 )
  101. {
  102. hr = InitPron( &g_PostLexLookupWordTable[i].pPron1 );
  103. }
  104. if ( SUCCEEDED( hr ) &&
  105. g_PostLexLookupWordTable[i].pPron2 )
  106. {
  107. hr = InitPron( &g_PostLexLookupWordTable[i].pPron2 );
  108. }
  109. if ( SUCCEEDED( hr ) &&
  110. g_PostLexLookupWordTable[i].pPron3 )
  111. {
  112. hr = InitPron( &g_PostLexLookupWordTable[i].pPron3 );
  113. }
  114. }
  115. if ( SUCCEEDED( hr ) )
  116. {
  117. hr = InitPron( &g_pOfA );
  118. if ( SUCCEEDED( hr ) )
  119. {
  120. hr = InitPron( &g_pOfAn );
  121. }
  122. }
  123. }
  124. if ( SUCCEEDED( hr ) )
  125. {
  126. g_fAbbrevTablesInitialized = true;
  127. }
  128. m_AbbrevTableCritSec.Unlock();
  129. }
  130. return hr;
  131. } /* CStdSentEnum::FinalConstruct */
  132. /*****************************************************************************
  133. * CStdSentEnum::FinalRelease *
  134. *----------------------------*
  135. * Description:
  136. * Destructor
  137. ********************************************************************* EDC ***/
  138. void CStdSentEnum::FinalRelease()
  139. {
  140. SPDBG_FUNC( "CStdSentEnum::FinalRelease" );
  141. if ( m_pMorphLexicon )
  142. {
  143. delete m_pMorphLexicon;
  144. }
  145. } /* CStdSentEnum::FinalRelease */
  146. /*****************************************************************************
  147. * CStdSentEnum::SetFragList *
  148. *---------------------------*
  149. * The text fragment list passed in is guaranteed to be valid for the lifetime
  150. * of this object. Each time this method is called, the sentence enumerator
  151. * should reset its state.
  152. ********************************************************************* EDC ***/
  153. STDMETHODIMP CStdSentEnum::
  154. SetFragList( const SPVTEXTFRAG* pTextFragList, DWORD dwSpeakFlags )
  155. {
  156. SPAUTO_OBJ_LOCK;
  157. SPDBG_FUNC( "CStdSentEnum::SetFragList" );
  158. HRESULT hr = S_OK;
  159. //--- Check args
  160. if( SP_IS_BAD_READ_PTR( pTextFragList ) ||
  161. ( dwSpeakFlags & SPF_UNUSED_FLAGS ) )
  162. {
  163. hr = E_INVALIDARG;
  164. }
  165. else
  166. {
  167. m_dwSpeakFlags = dwSpeakFlags;
  168. m_pTextFragList = pTextFragList;
  169. //--- grab normalization preferences from the registry
  170. if ( SUCCEEDED( hr ) )
  171. {
  172. CComPtr<ISpObjectToken> cpToken;
  173. CSpDynamicString dstrTokenKeyName;
  174. hr = StringFromCLSID( CLSID_MSE_TTSEngine, &dstrTokenKeyName );
  175. if ( SUCCEEDED( hr ) )
  176. {
  177. hr = SpCreateNewToken( L"HKEY_CURRENT_USER\\Software\\Microsoft\\Speech\\Voices", dstrTokenKeyName,
  178. &cpToken );
  179. }
  180. if ( SUCCEEDED( hr ) )
  181. {
  182. DWORD dwTemp;
  183. if ( SUCCEEDED( cpToken->GetDWORD( L"SeparatorAndDecimal", &dwTemp ) ) )
  184. {
  185. m_eSeparatorAndDecimal = (SEPARATOR_AND_DECIMAL) dwTemp;
  186. }
  187. if ( SUCCEEDED( cpToken->GetDWORD( L"ShortDateOrder", &dwTemp ) ) )
  188. {
  189. m_eShortDateOrder = (SHORT_DATE_ORDER) dwTemp;
  190. }
  191. }
  192. }
  193. //--- Reset state
  194. Reset();
  195. }
  196. return hr;
  197. } /* CStdSentEnum::SetFragList */
  198. /*****************************************************************************
  199. * CStdSentEnum::Next *
  200. *--------------------*
  201. *
  202. ********************************************************************* EDC ***/
  203. STDMETHODIMP CStdSentEnum::Next( IEnumSENTITEM **ppSentItemEnum )
  204. {
  205. SPAUTO_OBJ_LOCK;
  206. SPDBG_FUNC( "CStdSentEnum::Next" );
  207. HRESULT hr = S_OK;
  208. //--- Check args
  209. if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
  210. {
  211. hr = E_INVALIDARG;
  212. }
  213. else
  214. {
  215. //--- If this is NULL then the enum needs to be reset
  216. if( m_pCurrFrag )
  217. {
  218. SentencePointer NewSentencePointer;
  219. NewSentencePointer.pSentenceFrag = m_pCurrFrag;
  220. NewSentencePointer.pSentenceStart = m_pNextChar;
  221. hr = GetNextSentence( ppSentItemEnum );
  222. if( hr == S_OK )
  223. {
  224. //--- Update Sentence Pointer List
  225. hr = m_SentenceStack.Push( NewSentencePointer );
  226. }
  227. }
  228. else
  229. {
  230. hr = S_FALSE;
  231. }
  232. }
  233. return hr;
  234. } /* CStdSentEnum::Next */
  235. /*****************************************************************************
  236. * CStdSentEnum::Previous *
  237. *--------------------*
  238. *
  239. ********************************************************************* AH ****/
  240. STDMETHODIMP CStdSentEnum::Previous( IEnumSENTITEM **ppSentItemEnum )
  241. {
  242. SPAUTO_OBJ_LOCK;
  243. SPDBG_FUNC( "CStdSentEnum::Previous" );
  244. HRESULT hr = S_OK;
  245. //--- Check args
  246. if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
  247. {
  248. hr = E_INVALIDARG;
  249. }
  250. else
  251. {
  252. //--- Don't care if m_pCurrFrag is NULL, as long as we have enough on the SentenceStack
  253. //--- to skip backwards...
  254. if( m_SentenceStack.GetCount() >= 2 )
  255. {
  256. //--- Get the previous Sentence from the Sentence List, and then remove the Current Sentence
  257. SentencePointer &PreviousSentence = m_SentenceStack.Pop();
  258. PreviousSentence = m_SentenceStack.Pop();
  259. //--- Reset the current frag and the current text pointer position
  260. m_pCurrFrag = PreviousSentence.pSentenceFrag;
  261. m_pNextChar = PreviousSentence.pSentenceStart;
  262. m_pEndChar = m_pCurrFrag->pTextStart + m_pCurrFrag->ulTextLen;
  263. hr = GetNextSentence( ppSentItemEnum );
  264. if( hr == S_OK )
  265. {
  266. //--- Update Sentence Pointer List
  267. hr = m_SentenceStack.Push( PreviousSentence );
  268. }
  269. }
  270. else
  271. {
  272. hr = S_FALSE;
  273. }
  274. }
  275. return hr;
  276. } /* CStdSentEnum::Previous */
  277. /*****************************************************************************
  278. * SkipWhiteSpaceAndTags *
  279. *-----------------------*
  280. * Skips m_pNextChar ahead to the next non-whitespace character (skipping
  281. * ahead in the frag list, if necessary) or sets it to NULL if it hits the
  282. * end of the frag list text...
  283. ********************************************************************* AH ****/
  284. HRESULT CStdSentEnum::SkipWhiteSpaceAndTags( const WCHAR*& pStartChar, const WCHAR*& pEndChar,
  285. const SPVTEXTFRAG*& pCurrFrag, CSentItemMemory& MemoryManager,
  286. BOOL fAddToItemList, CItemList* pItemList )
  287. {
  288. SPDBG_ASSERT( pStartChar <= pEndChar );
  289. HRESULT hr = S_OK;
  290. while ( pStartChar &&
  291. ( IsSpace( *pStartChar ) ||
  292. pStartChar == pEndChar ) )
  293. {
  294. //--- Skip whitespace
  295. while ( pStartChar < pEndChar &&
  296. IsSpace( *pStartChar ) )
  297. {
  298. ++pStartChar;
  299. }
  300. //--- Skip to next spoken frag, if necessary
  301. if ( pStartChar == pEndChar )
  302. {
  303. pCurrFrag = pCurrFrag->pNext;
  304. while ( pCurrFrag &&
  305. pCurrFrag->State.eAction != SPVA_Speak &&
  306. pCurrFrag->State.eAction != SPVA_SpellOut )
  307. {
  308. pStartChar = (WCHAR*) pCurrFrag->pTextStart;
  309. pEndChar = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
  310. //--- Add non-spoken fragments, if fAddToItemList is true.
  311. if ( fAddToItemList )
  312. {
  313. //-- Check for names lexicon XML tag...
  314. if( !m_fNameItem &&
  315. m_pCurrFrag->ulTextLen == 6 &&
  316. !_wcsnicmp( L"<NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
  317. {
  318. m_fNameItem = true;
  319. }
  320. else if( m_fNameItem &&
  321. m_pCurrFrag->ulTextLen == 7 &&
  322. !_wcsnicmp( L"</NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
  323. {
  324. m_fNameItem = false;
  325. }
  326. CSentItem Item;
  327. Item.pItemSrcText = pCurrFrag->pTextStart;
  328. Item.ulItemSrcLen = pCurrFrag->ulTextLen;
  329. Item.ulItemSrcOffset = pCurrFrag->ulTextSrcOffset;
  330. Item.ulNumWords = 1;
  331. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  332. if ( SUCCEEDED( hr ) )
  333. {
  334. ZeroMemory( Item.Words, sizeof(TTSWord) );
  335. Item.Words[0].pXmlState = &pCurrFrag->State;
  336. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  337. Item.eItemPartOfSpeech = MS_Unknown;
  338. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  339. if ( SUCCEEDED( hr ) )
  340. {
  341. Item.pItemInfo->Type = eWORDLIST_IS_VALID;
  342. pItemList->AddTail( Item );
  343. }
  344. }
  345. }
  346. pCurrFrag = pCurrFrag->pNext;
  347. }
  348. if ( !pCurrFrag )
  349. {
  350. pStartChar = NULL;
  351. pEndChar = NULL;
  352. }
  353. else
  354. {
  355. pStartChar = (WCHAR*) pCurrFrag->pTextStart;
  356. pEndChar = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
  357. }
  358. }
  359. }
  360. return hr;
  361. } /* SkipWhiteSpaceAndTags */
  362. /*****************************************************************************
  363. * FindTokenEnd *
  364. *--------------*
  365. * Returns the position of the first whitespace character after pStartChar,
  366. * or pEndChar, or the character after SP_MAX_WORD_LENGTH, whichever comes first.
  367. ********************************************************************* AH ****/
  368. const WCHAR* CStdSentEnum::FindTokenEnd( const WCHAR* pStartChar, const WCHAR* pEndChar )
  369. {
  370. SPDBG_ASSERT( pStartChar < pEndChar );
  371. ULONG ulNumChars = 1;
  372. const WCHAR *pPos = pStartChar;
  373. while ( pPos &&
  374. pPos < pEndChar &&
  375. !IsSpace( *pPos ) &&
  376. ulNumChars < SP_MAX_WORD_LENGTH )
  377. {
  378. pPos++;
  379. ulNumChars++;
  380. }
  381. return pPos;
  382. } /* FindTokenEnd */
  383. /*****************************************************************************
  384. * CStdSentEnum::AddNextSentItem *
  385. *-------------------------------*
  386. * Locates the next sentence item in the stream and adds it to the list.
  387. * Returns true if the last item added is the end of the sentence.
  388. ********************************************************************* AH ****/
  389. HRESULT CStdSentEnum::AddNextSentItem( CItemList& ItemList, CSentItemMemory& MemoryManager, BOOL* pfIsEOS )
  390. {
  391. SPDBG_ASSERT( m_pNextChar && pfIsEOS );
  392. HRESULT hr = S_OK;
  393. BOOL fHitPauseItem = false;
  394. CSentItem Item;
  395. ULONG ulTrailItems = 0;
  396. TTSItemType ItemType = eUNMATCHED;
  397. *pfIsEOS = false;
  398. //--- Skip initial whitespace characters and XML markup (by skipping ahead in the frag list).
  399. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &ItemList );
  400. //--- This will happen when we hit the end of the frag list
  401. if ( !m_pNextChar )
  402. {
  403. return S_OK;
  404. }
  405. //--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar).
  406. m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );
  407. //--- Get Primary Insert Position
  408. SPLISTPOS ItemPos = ItemList.AddTail( Item );
  409. //--- Try looking up this token in the User Lexicon...
  410. WCHAR Temp = *( (WCHAR*) m_pEndOfCurrToken );
  411. *( (WCHAR*) m_pEndOfCurrToken ) = 0;
  412. SPWORDPRONUNCIATIONLIST SPList;
  413. ZeroMemory( &SPList, sizeof( SPWORDPRONUNCIATIONLIST ) );
  414. hr = m_cpAggregateLexicon->GetPronunciations( m_pNextChar, 1033, eLEXTYPE_USER, &SPList );
  415. if( SPList.pvBuffer )
  416. {
  417. ::CoTaskMemFree( SPList.pvBuffer );
  418. }
  419. *( (WCHAR*) m_pEndOfCurrToken ) = Temp;
  420. if ( SUCCEEDED( hr ) )
  421. {
  422. Item.eItemPartOfSpeech = MS_Unknown;
  423. Item.pItemSrcText = m_pNextChar;
  424. Item.ulItemSrcLen = (ULONG) ( m_pEndOfCurrToken - m_pNextChar );
  425. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  426. (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
  427. Item.ulNumWords = 1;
  428. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  429. if ( SUCCEEDED( hr ) )
  430. {
  431. ZeroMemory( Item.Words, sizeof(TTSWord) );
  432. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  433. Item.Words[0].pWordText = m_pNextChar;
  434. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  435. Item.Words[0].pLemma = Item.Words[0].pWordText;
  436. Item.Words[0].ulLemmaLen = Item.Words[0].ulWordLen;
  437. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  438. Item.eItemPartOfSpeech = MS_Unknown;
  439. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo*), &hr );
  440. if ( SUCCEEDED( hr ) )
  441. {
  442. Item.pItemInfo->Type = eALPHA_WORD;
  443. ItemList.SetAt( ItemPos, Item );
  444. }
  445. }
  446. m_pNextChar = m_pEndOfCurrToken;
  447. }
  448. //--- Not in the user lex - itemize, normalize, etc.
  449. else if ( hr == SPERR_NOT_IN_LEX )
  450. {
  451. hr = S_OK;
  452. //--- convert text from Unicode to Ascii
  453. hr = DoUnicodeToAsciiMap( m_pNextChar, (ULONG)( m_pEndOfCurrToken - m_pNextChar ), (WCHAR*)m_pNextChar );
  454. if ( SUCCEEDED( hr ) )
  455. {
  456. //--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar)
  457. //--- AGAIN, since the mapping may have introduced new whitespace characters...
  458. m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );
  459. //--- Insert lead items (group beginnings, quotation marks)
  460. while ( m_pNextChar < m_pEndOfCurrToken &&
  461. ( ( ItemType = IsGroupBeginning( *m_pNextChar ) ) != eUNMATCHED ||
  462. ( ItemType = IsQuotationMark( *m_pNextChar ) ) != eUNMATCHED ) )
  463. {
  464. CSentItem LeadItem;
  465. LeadItem.pItemSrcText = m_pNextChar;
  466. LeadItem.ulItemSrcLen = 1;
  467. LeadItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  468. (ULONG)(( m_pNextChar - m_pCurrFrag->pTextStart ));
  469. LeadItem.ulNumWords = 1;
  470. LeadItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  471. if ( SUCCEEDED( hr ) )
  472. {
  473. ZeroMemory( LeadItem.Words, sizeof(TTSWord) );
  474. LeadItem.Words[0].pXmlState = &m_pCurrFrag->State;
  475. LeadItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  476. LeadItem.eItemPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  477. LeadItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  478. if ( SUCCEEDED( hr ) )
  479. {
  480. LeadItem.pItemInfo->Type = ItemType;
  481. if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
  482. m_pCurrFrag->State.eAction == SPVA_SpellOut )
  483. {
  484. CWordList TempWordList;
  485. ExpandPunctuation( TempWordList, *m_pNextChar );
  486. hr = SetWordList( LeadItem, TempWordList, MemoryManager );
  487. LeadItem.pItemInfo->Type = eUNMATCHED;
  488. }
  489. ItemList.InsertBefore( ItemPos, LeadItem );
  490. m_pNextChar++;
  491. }
  492. }
  493. ItemType = eUNMATCHED;
  494. }
  495. //--- Insert trail items (group endings, quotation marks, misc. punctuation, EOS Items)
  496. m_pEndOfCurrItem = m_pEndOfCurrToken;
  497. BOOL fAddTrailItem = true;
  498. BOOL fAbbreviation = false;
  499. while ( (m_pEndOfCurrItem - 1) >= m_pNextChar &&
  500. fAddTrailItem )
  501. {
  502. fAddTrailItem = false;
  503. fAbbreviation = false;
  504. //--- Check group endings, quotation marks, misc. punctuation.
  505. if ( ( ItemType = IsGroupEnding( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED ||
  506. ( ItemType = IsQuotationMark( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED ||
  507. ( ItemType = IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED )
  508. {
  509. fAddTrailItem = true;
  510. if ( ItemType == eCOMMA ||
  511. ItemType == eCOLON ||
  512. ItemType == eSEMICOLON )
  513. {
  514. fHitPauseItem = true;
  515. }
  516. }
  517. //--- Check EOS Items, except periods preceded by alpha characters
  518. else if ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED &&
  519. ! ( ItemType == ePERIOD &&
  520. ( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
  521. ( iswalpha( *(m_pEndOfCurrItem - 2) ) ) ) )
  522. {
  523. //--- Check for ellipses
  524. if ( ItemType == ePERIOD )
  525. {
  526. if ( m_pEndOfCurrItem == m_pEndOfCurrToken &&
  527. ( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
  528. ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 2) ) ) == ePERIOD ) &&
  529. ( m_pEndOfCurrItem - 3 == m_pNextChar ) &&
  530. ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 3) ) ) == ePERIOD ) )
  531. {
  532. fAddTrailItem = true;
  533. ItemType = eELLIPSIS;
  534. }
  535. else
  536. {
  537. ItemType = ePERIOD;
  538. fAddTrailItem = true;
  539. *pfIsEOS = true;
  540. }
  541. }
  542. else
  543. {
  544. fAddTrailItem = true;
  545. *pfIsEOS = true;
  546. }
  547. }
  548. //--- Period preceded by alpha character - determine whether it is EOS.
  549. else if ( ItemType == ePERIOD )
  550. {
  551. //--- Is it an Initialism ( e.g. "e.g." )? If so, only EOS if the next
  552. //--- word is in the common first words list...
  553. hr = IsInitialism( ItemList, ItemPos, MemoryManager, pfIsEOS );
  554. if ( SUCCEEDED( hr ) )
  555. {
  556. if ( *pfIsEOS )
  557. {
  558. //--- Did we see a pause item earlier? In that case, we should NOT listen to this
  559. //--- IsEOS decision from IsInitialism...
  560. if ( fHitPauseItem )
  561. {
  562. *pfIsEOS = false;
  563. }
  564. else
  565. {
  566. fAddTrailItem = true;
  567. fAbbreviation = true;
  568. }
  569. }
  570. }
  571. else if ( hr == E_INVALIDARG )
  572. {
  573. const WCHAR temp = (WCHAR) *( m_pEndOfCurrItem - 1 );
  574. *( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = 0;
  575. const AbbrevRecord* pAbbrevRecord =
  576. (AbbrevRecord*) bsearch( (void*) m_pNextChar, (void*) g_AbbreviationTable,
  577. sp_countof( g_AbbreviationTable ), sizeof( AbbrevRecord ),
  578. CompareStringAndAbbrevRecord );
  579. *( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = temp;
  580. if ( pAbbrevRecord )
  581. {
  582. //--- Matched an abbreviation
  583. if ( pAbbrevRecord->iSentBreakDisambig < 0 )
  584. {
  585. //--- Abbreviation will never end a sentence - just insert into ItemList
  586. *pfIsEOS = false;
  587. hr = S_OK;
  588. Item.pItemSrcText = m_pNextChar;
  589. Item.ulItemSrcLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  590. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  591. (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
  592. Item.ulNumWords = 1;
  593. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof( TTSWord ), &hr );
  594. if ( SUCCEEDED( hr ) )
  595. {
  596. ZeroMemory( Item.Words, sizeof( TTSWord ) );
  597. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  598. Item.Words[0].pWordText = Item.pItemSrcText;
  599. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  600. Item.Words[0].pLemma = Item.pItemSrcText;
  601. Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
  602. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
  603. if ( SUCCEEDED( hr ) )
  604. {
  605. if ( NeedsToBeNormalized( pAbbrevRecord ) )
  606. {
  607. Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
  608. }
  609. else
  610. {
  611. Item.pItemInfo->Type = eABBREVIATION;
  612. }
  613. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbrevRecord;
  614. ItemList.SetAt( ItemPos, Item );
  615. }
  616. }
  617. }
  618. else
  619. {
  620. //--- Need to do some disambiguation to determine whether,
  621. //--- a) this is indeed an abbreviation (e.g. "Ed.")
  622. //--- b) the period doubles as EOS
  623. hr = ( this->*g_SentBreakDisambigTable[pAbbrevRecord->iSentBreakDisambig] )
  624. ( pAbbrevRecord, ItemList, ItemPos, MemoryManager, pfIsEOS );
  625. if ( SUCCEEDED( hr ) )
  626. {
  627. if ( *pfIsEOS )
  628. {
  629. if ( fHitPauseItem )
  630. {
  631. *pfIsEOS = false;
  632. }
  633. else
  634. {
  635. fAddTrailItem = true;
  636. fAbbreviation = true;
  637. }
  638. }
  639. }
  640. }
  641. }
  642. if ( hr == E_INVALIDARG )
  643. {
  644. //--- Just check for periods internal to the item - this catches stuff like
  645. //--- 10:30p.m.
  646. for ( const WCHAR* pIterator = m_pNextChar; pIterator < m_pEndOfCurrItem - 1; pIterator++ )
  647. {
  648. if ( *pIterator == L'.' )
  649. {
  650. *pfIsEOS = false;
  651. break;
  652. }
  653. }
  654. //--- If all previous checks have failed, it is EOS.
  655. if ( pIterator == ( m_pEndOfCurrItem - 1 ) &&
  656. !fHitPauseItem )
  657. {
  658. hr = S_OK;
  659. fAddTrailItem = true;
  660. *pfIsEOS = true;
  661. }
  662. else if ( hr == E_INVALIDARG )
  663. {
  664. hr = S_OK;
  665. }
  666. }
  667. }
  668. }
  669. //--- Add trail item.
  670. if ( fAddTrailItem )
  671. {
  672. ulTrailItems++;
  673. CSentItem TrailItem;
  674. if ( ItemType == eELLIPSIS )
  675. {
  676. TrailItem.pItemSrcText = m_pEndOfCurrItem - 3;
  677. TrailItem.ulItemSrcLen = 3;
  678. TrailItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  679. (ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 3 );
  680. }
  681. else
  682. {
  683. TrailItem.pItemSrcText = m_pEndOfCurrItem - 1;
  684. TrailItem.ulItemSrcLen = 1;
  685. TrailItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  686. (ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 1 );
  687. }
  688. TrailItem.ulNumWords = 1;
  689. TrailItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  690. if ( SUCCEEDED( hr ) )
  691. {
  692. ZeroMemory( TrailItem.Words, sizeof(TTSWord) );
  693. TrailItem.Words[0].pXmlState = &m_pCurrFrag->State;
  694. TrailItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  695. TrailItem.eItemPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  696. TrailItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  697. if ( SUCCEEDED( hr ) )
  698. {
  699. TrailItem.pItemInfo->Type = ItemType;
  700. if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
  701. ( m_pCurrFrag->State.eAction == SPVA_SpellOut &&
  702. !fAbbreviation ) )
  703. {
  704. CWordList TempWordList;
  705. ExpandPunctuation( TempWordList, *(m_pEndOfCurrItem - 1) );
  706. hr = SetWordList( TrailItem, TempWordList, MemoryManager );
  707. TrailItem.pItemInfo->Type = eUNMATCHED;
  708. }
  709. ItemList.InsertAfter( ItemPos, TrailItem );
  710. if ( !fAbbreviation )
  711. {
  712. if ( ItemType == eELLIPSIS )
  713. {
  714. m_pEndOfCurrItem -= 3;
  715. ulTrailItems = 3;
  716. }
  717. else
  718. {
  719. m_pEndOfCurrItem--;
  720. }
  721. }
  722. }
  723. }
  724. ItemType = eUNMATCHED;
  725. if ( fAbbreviation )
  726. {
  727. break;
  728. }
  729. }
  730. }
  731. //--- Do Main Item Insertion
  732. if ( SUCCEEDED( hr ) &&
  733. m_pNextChar == m_pEndOfCurrItem )
  734. {
  735. ItemList.RemoveAt( ItemPos );
  736. }
  737. else if ( SUCCEEDED( hr ) )
  738. {
  739. hr = Normalize( ItemList, ItemPos, MemoryManager );
  740. }
  741. if( m_fNameItem )
  742. {
  743. wcscpy( ItemList.GetAt( ItemPos ).CustomLtsToken, L"Names" );
  744. }
  745. //--- Advance m_pNextChar to m_pEndOfCurrItem + once for each trail item matched.
  746. if ( SUCCEEDED( hr ) )
  747. {
  748. if ( !fAbbreviation &&
  749. m_pEndOfCurrItem + ulTrailItems != m_pEndOfCurrToken )
  750. {
  751. //--- Multi-token item matched in Normalize()... Remove all previously matched trail items,
  752. //--- as they were matched as part of the larger item...
  753. m_pNextChar = m_pEndOfCurrItem;
  754. Item = ItemList.GetNext( ItemPos );
  755. while ( ItemPos )
  756. {
  757. SPLISTPOS RemovePos = ItemPos;
  758. Item = ItemList.GetNext( ItemPos );
  759. ItemList.RemoveAt( RemovePos );
  760. }
  761. }
  762. else
  763. {
  764. m_pNextChar = m_pEndOfCurrToken;
  765. }
  766. }
  767. }
  768. }
  769. return hr;
  770. } /* CStdSentEnum::AddNextSentItem */
  771. /*****************************************************************************
  772. * CStdSentEnum::GetNextSentence *
  773. *-------------------------------*
  774. * This method is used to create a sentence item enumerator and populate it
  775. * with items. If the SPF_NLP_PASSTHROUGH flag is set, each item is the block
  776. * of text between XML states. If the SPF_NLP_PASSTHROUGH flag is not set, each
  777. * item is an individual word that is looked up in the current lexicon(s).
  778. ********************************************************************* EDC ***/
  779. HRESULT CStdSentEnum::GetNextSentence( IEnumSENTITEM** ppItemEnum )
  780. {
  781. HRESULT hr = S_OK;
  782. ULONG ulNumItems = 0;
  783. const SPVTEXTFRAG* pPrevFrag = m_pCurrFrag;
  784. //--- Is there any work to do
  785. if( m_pCurrFrag == NULL ) return S_FALSE;
  786. //--- Create sentence enum
  787. CComObject<CSentItemEnum> *pItemEnum;
  788. hr = CComObject<CSentItemEnum>::CreateInstance( &pItemEnum );
  789. if( SUCCEEDED( hr ) )
  790. {
  791. pItemEnum->AddRef();
  792. pItemEnum->_SetOwner( GetControllingUnknown() );
  793. *ppItemEnum = pItemEnum;
  794. }
  795. if( SUCCEEDED( hr ) )
  796. {
  797. BOOL fSentDone = false;
  798. BOOL fGoToNextFrag = false;
  799. CItemList& ItemList = pItemEnum->_GetList();
  800. CSentItemMemory& MemoryManager = pItemEnum->_GetMemoryManager();
  801. while( SUCCEEDED(hr) && m_pCurrFrag && !fSentDone && ulNumItems < 50 )
  802. {
  803. ulNumItems++;
  804. if( m_pCurrFrag->State.eAction == SPVA_Speak ||
  805. m_pCurrFrag->State.eAction == SPVA_SpellOut )
  806. {
  807. hr = AddNextSentItem( ItemList, MemoryManager, &fSentDone );
  808. //--- Advance fragment?
  809. if( SUCCEEDED( hr ) &&
  810. m_pNextChar &&
  811. m_pEndChar &&
  812. m_pNextChar >= m_pEndChar )
  813. {
  814. fGoToNextFrag = true;
  815. }
  816. }
  817. else
  818. {
  819. //-- Check for lexicon
  820. if( !m_fNameItem &&
  821. m_pCurrFrag->ulTextLen == 6 &&
  822. !_wcsnicmp( L"<NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
  823. {
  824. m_fNameItem = true;
  825. }
  826. else if( m_fNameItem &&
  827. m_pCurrFrag->ulTextLen == 7 &&
  828. !_wcsnicmp( L"</NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
  829. {
  830. m_fNameItem = false;
  831. }
  832. //--- Add non spoken fragments
  833. CSentItem Item;
  834. Item.pItemSrcText = m_pCurrFrag->pTextStart;
  835. Item.ulItemSrcLen = m_pCurrFrag->ulTextLen;
  836. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset;
  837. Item.ulNumWords = 1;
  838. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  839. if ( SUCCEEDED( hr ) )
  840. {
  841. ZeroMemory( Item.Words, sizeof(TTSWord) );
  842. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  843. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  844. Item.eItemPartOfSpeech = MS_Unknown;
  845. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  846. if ( SUCCEEDED( hr ) )
  847. {
  848. Item.pItemInfo->Type = eWORDLIST_IS_VALID;
  849. ItemList.AddTail( Item );
  850. }
  851. }
  852. fGoToNextFrag = true;
  853. }
  854. if( SUCCEEDED( hr ) &&
  855. fGoToNextFrag )
  856. {
  857. fGoToNextFrag = false;
  858. pPrevFrag = m_pCurrFrag;
  859. m_pCurrFrag = m_pCurrFrag->pNext;
  860. if( m_pCurrFrag )
  861. {
  862. m_pNextChar = m_pCurrFrag->pTextStart;
  863. m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
  864. }
  865. else
  866. {
  867. m_pNextChar = NULL;
  868. m_pEndChar = NULL;
  869. }
  870. }
  871. } // end while
  872. //--- If no period has been added, add one now - this will happen if the text
  873. //--- is ONLY XML markup...
  874. if ( SUCCEEDED(hr) && !fSentDone )
  875. {
  876. CSentItem EOSItem;
  877. EOSItem.pItemSrcText = g_period.pStr;
  878. EOSItem.ulItemSrcLen = g_period.Len;
  879. EOSItem.ulItemSrcOffset = pPrevFrag->ulTextSrcOffset + pPrevFrag->ulTextLen;
  880. EOSItem.ulNumWords = 1;
  881. EOSItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  882. if ( SUCCEEDED( hr ) )
  883. {
  884. ZeroMemory( EOSItem.Words, sizeof(TTSWord) );
  885. EOSItem.Words[0].pXmlState = &g_DefaultXMLState;
  886. EOSItem.Words[0].eWordPartOfSpeech = MS_EOSItem;
  887. EOSItem.eItemPartOfSpeech = MS_EOSItem;
  888. EOSItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  889. if ( SUCCEEDED( hr ) )
  890. {
  891. EOSItem.pItemInfo->Type = ePERIOD;
  892. ItemList.AddTail( EOSItem );
  893. }
  894. }
  895. }
  896. //--- Output debugging information, if sentence breaks are desired
  897. TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_SENTENCEBREAKS );
  898. if( SUCCEEDED( hr ) )
  899. {
  900. hr = DetermineProns( pItemEnum->_GetList(), pItemEnum->_GetMemoryManager() );
  901. }
  902. pItemEnum->Reset();
  903. //--- Output debugging information, if POS or Pronunciations are desired
  904. TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_LEXLOOKUP );
  905. }
  906. return hr;
  907. } /* CStdSentEnum::GetNextSentence */
  908. /*****************************************************************************
  909. * CStdSentEnum::Reset *
  910. *---------------------*
  911. *
  912. ********************************************************************* EDC ***/
  913. STDMETHODIMP CStdSentEnum::Reset( void )
  914. {
  915. SPAUTO_OBJ_LOCK;
  916. SPDBG_FUNC( "CStdSentEnum::Reset" );
  917. HRESULT hr = S_OK;
  918. m_pCurrFrag = m_pTextFragList;
  919. m_pNextChar = m_pCurrFrag->pTextStart;
  920. m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
  921. m_SentenceStack.Reset();
  922. m_fNameItem = false;
  923. return hr;
  924. } /* CStdSentEnum::Reset */
  925. /*****************************************************************************
  926. * CStdSentEnum::InitAggregateLexicon *
  927. *------------------------------------*
  928. *
  929. ********************************************************************* AH ****/
  930. HRESULT CStdSentEnum::InitAggregateLexicon( void )
  931. {
  932. return m_cpAggregateLexicon.CoCreateInstance(CLSID_SpLexicon);
  933. }
  934. /*****************************************************************************
  935. * CStdSentEnum::AddLexiconToAggregate *
  936. *-------------------------------------*
  937. *
  938. ********************************************************************* AH ****/
  939. HRESULT CStdSentEnum::AddLexiconToAggregate( ISpLexicon *pAddLexicon, DWORD dwFlags )
  940. {
  941. return m_cpAggregateLexicon->AddLexicon( pAddLexicon, dwFlags );
  942. }
  943. /*****************************************************************************
  944. * CStdSentEnum::InitMorphLexicon *
  945. *--------------------------------*
  946. *
  947. ********************************************************************* AH ****/
  948. HRESULT CStdSentEnum::InitMorphLexicon( void )
  949. {
  950. HRESULT hr = S_OK;
  951. m_pMorphLexicon = new CSMorph( m_cpAggregateLexicon, &hr );
  952. return hr;
  953. }
  954. void CStdSentEnum::fNamesLTS( bool fHaveNamesLTS )
  955. {
  956. m_fHaveNamesLTS = fHaveNamesLTS;
  957. }
  958. //
  959. //=== CSentItemEnum =========================================================
  960. //
  961. /*****************************************************************************
  962. * CSentItemEnum::Next *
  963. *---------------------*
  964. *
  965. ********************************************************************* EDC ***/
  966. STDMETHODIMP CSentItemEnum::
  967. Next( TTSSentItem *pItemEnum )
  968. {
  969. SPDBG_FUNC( "CSentItemEnum::Next" );
  970. HRESULT hr = S_OK;
  971. //--- Check args
  972. if( SPIsBadWritePtr( pItemEnum, sizeof( TTSSentItem ) ) )
  973. {
  974. hr = E_INVALIDARG;
  975. }
  976. else
  977. {
  978. if ( m_ListPos )
  979. {
  980. *pItemEnum = m_ItemList.GetNext( m_ListPos );
  981. }
  982. else
  983. {
  984. hr = S_FALSE;
  985. }
  986. }
  987. return hr;
  988. } /* CSentItemEnum::Next */
  989. /*****************************************************************************
  990. * CSentItemEnum::Reset *
  991. *----------------------*
  992. *
  993. ********************************************************************* EDC ***/
  994. STDMETHODIMP CSentItemEnum::Reset( void )
  995. {
  996. SPDBG_FUNC( "CSentItemEnum::Reset" );
  997. HRESULT hr = S_OK;
  998. m_ListPos = m_ItemList.GetHeadPosition();
  999. return hr;
  1000. } /* CSentItemEnum::Reset */