Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1016 lines
41 KiB

  1. /*******************************************************************************
  2. * StdSentEnum.cpp *
  3. *-----------------*
  4. * Description:
  5. * This module is the main implementation file for the CStdSentEnum class.
  6. *-------------------------------------------------------------------------------
  7. * Created By: EDC Date: 03/19/99
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. *******************************************************************************/
  12. //--- Additional includes
  13. #include "stdafx.h"
  14. #ifndef StdSentEnum_h
  15. #include "stdsentenum.h"
  16. #endif
  17. #include "spttsengdebug.h"
  18. #include "SpAutoObjectLock.h"
  19. //--- Locals
  20. CComAutoCriticalSection CStdSentEnum::m_AbbrevTableCritSec;
  21. //=== CStdSentEnum ============================================================
  22. //
  23. /*****************************************************************************
  24. * CStdSentEnum::InitPron *
  25. *------------------------*
  26. * Description:
  27. * Inits pron tables
  28. ********************************************************************* AH ***/
  29. HRESULT CStdSentEnum::InitPron( WCHAR** OriginalPron )
  30. {
  31. HRESULT hr = S_OK;
  32. WCHAR *NewPron = NULL;
  33. NewPron = new WCHAR[ wcslen( *OriginalPron ) ];
  34. hr = m_cpPhonemeConverter->PhoneToId( *OriginalPron, NewPron );
  35. if ( SUCCEEDED( hr ) )
  36. {
  37. *OriginalPron = NewPron;
  38. }
  39. return hr;
  40. } /* InitPron */
  41. /*****************************************************************************
  42. * CStdSentEnum::FinalConstruct *
  43. *------------------------------*
  44. * Description:
  45. * Constructor
  46. ********************************************************************* EDC ***/
  47. HRESULT CStdSentEnum::FinalConstruct()
  48. {
  49. SPDBG_FUNC( "CStdSentEnum::FinalConstruct" );
  50. HRESULT hr = S_OK;
  51. m_dwSpeakFlags = 0;
  52. m_pTextFragList = NULL;
  53. m_pMorphLexicon = NULL;
  54. m_eSeparatorAndDecimal = COMMA_PERIOD;
  55. m_eShortDateOrder = MONTH_DAY_YEAR;
  56. /*** Create phone converter ***/
  57. if ( SUCCEEDED( hr ) )
  58. {
  59. hr = SpCreatePhoneConverter( 1033, NULL, NULL, &m_cpPhonemeConverter );
  60. m_AbbrevTableCritSec.Lock();
  61. if ( !g_fAbbrevTablesInitialized )
  62. {
  63. for ( ULONG i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AbbreviationTable ); i++ )
  64. {
  65. if ( g_AbbreviationTable[i].pPron1 )
  66. {
  67. hr = InitPron( &g_AbbreviationTable[i].pPron1 );
  68. }
  69. if ( SUCCEEDED( hr ) &&
  70. g_AbbreviationTable[i].pPron2 )
  71. {
  72. hr = InitPron( &g_AbbreviationTable[i].pPron2 );
  73. }
  74. if ( SUCCEEDED( hr ) &&
  75. g_AbbreviationTable[i].pPron3 )
  76. {
  77. hr = InitPron( &g_AbbreviationTable[i].pPron3 );
  78. }
  79. }
  80. for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AmbiguousWordTable ); i++ )
  81. {
  82. if ( g_AmbiguousWordTable[i].pPron1 )
  83. {
  84. hr = InitPron( &g_AmbiguousWordTable[i].pPron1 );
  85. }
  86. if ( SUCCEEDED( hr ) &&
  87. g_AmbiguousWordTable[i].pPron2 )
  88. {
  89. hr = InitPron( &g_AmbiguousWordTable[i].pPron2 );
  90. }
  91. if ( SUCCEEDED( hr ) &&
  92. g_AmbiguousWordTable[i].pPron3 )
  93. {
  94. hr = InitPron( &g_AmbiguousWordTable[i].pPron3 );
  95. }
  96. }
  97. for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_PostLexLookupWordTable ); i++ )
  98. {
  99. if ( g_PostLexLookupWordTable[i].pPron1 )
  100. {
  101. hr = InitPron( &g_PostLexLookupWordTable[i].pPron1 );
  102. }
  103. if ( SUCCEEDED( hr ) &&
  104. g_PostLexLookupWordTable[i].pPron2 )
  105. {
  106. hr = InitPron( &g_PostLexLookupWordTable[i].pPron2 );
  107. }
  108. if ( SUCCEEDED( hr ) &&
  109. g_PostLexLookupWordTable[i].pPron3 )
  110. {
  111. hr = InitPron( &g_PostLexLookupWordTable[i].pPron3 );
  112. }
  113. }
  114. if ( SUCCEEDED( hr ) )
  115. {
  116. hr = InitPron( &g_pOfA );
  117. if ( SUCCEEDED( hr ) )
  118. {
  119. hr = InitPron( &g_pOfAn );
  120. }
  121. }
  122. }
  123. if ( SUCCEEDED( hr ) )
  124. {
  125. g_fAbbrevTablesInitialized = true;
  126. }
  127. m_AbbrevTableCritSec.Unlock();
  128. }
  129. return hr;
  130. } /* CStdSentEnum::FinalConstruct */
  131. /*****************************************************************************
  132. * CStdSentEnum::FinalRelease *
  133. *----------------------------*
  134. * Description:
  135. * Destructor
  136. ********************************************************************* EDC ***/
  137. void CStdSentEnum::FinalRelease()
  138. {
  139. SPDBG_FUNC( "CStdSentEnum::FinalRelease" );
  140. if ( m_pMorphLexicon )
  141. {
  142. delete m_pMorphLexicon;
  143. }
  144. } /* CStdSentEnum::FinalRelease */
  145. /*****************************************************************************
  146. * CStdSentEnum::SetFragList *
  147. *---------------------------*
  148. * The text fragment list passed in is guaranteed to be valid for the lifetime
  149. * of this object. Each time this method is called, the sentence enumerator
  150. * should reset its state.
  151. ********************************************************************* EDC ***/
  152. STDMETHODIMP CStdSentEnum::
  153. SetFragList( const SPVTEXTFRAG* pTextFragList, DWORD dwSpeakFlags )
  154. {
  155. SPAUTO_OBJ_LOCK;
  156. SPDBG_FUNC( "CStdSentEnum::SetFragList" );
  157. HRESULT hr = S_OK;
  158. //--- Check args
  159. if( SP_IS_BAD_READ_PTR( pTextFragList ) ||
  160. ( dwSpeakFlags & SPF_UNUSED_FLAGS ) )
  161. {
  162. hr = E_INVALIDARG;
  163. }
  164. else
  165. {
  166. m_dwSpeakFlags = dwSpeakFlags;
  167. m_pTextFragList = pTextFragList;
  168. //--- Reset state
  169. Reset();
  170. }
  171. return hr;
  172. } /* CStdSentEnum::SetFragList */
  173. /*****************************************************************************
  174. * CStdSentEnum::Next *
  175. *--------------------*
  176. *
  177. ********************************************************************* EDC ***/
  178. STDMETHODIMP CStdSentEnum::Next( IEnumSENTITEM **ppSentItemEnum )
  179. {
  180. SPAUTO_OBJ_LOCK;
  181. SPDBG_FUNC( "CStdSentEnum::Next" );
  182. HRESULT hr = S_OK;
  183. //--- Check args
  184. if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
  185. {
  186. hr = E_INVALIDARG;
  187. }
  188. else
  189. {
  190. //--- If this is NULL then the enum needs to be reset
  191. if( m_pCurrFrag )
  192. {
  193. SentencePointer NewSentencePointer;
  194. NewSentencePointer.pSentenceFrag = m_pCurrFrag;
  195. NewSentencePointer.pSentenceStart = m_pNextChar;
  196. hr = GetNextSentence( ppSentItemEnum );
  197. if( hr == S_OK )
  198. {
  199. //--- Update Sentence Pointer List
  200. hr = m_SentenceStack.Push( NewSentencePointer );
  201. }
  202. }
  203. else
  204. {
  205. hr = S_FALSE;
  206. }
  207. }
  208. return hr;
  209. } /* CStdSentEnum::Next */
  210. /*****************************************************************************
  211. * CStdSentEnum::Previous *
  212. *--------------------*
  213. *
  214. ********************************************************************* AH ****/
  215. STDMETHODIMP CStdSentEnum::Previous( IEnumSENTITEM **ppSentItemEnum )
  216. {
  217. SPAUTO_OBJ_LOCK;
  218. SPDBG_FUNC( "CStdSentEnum::Previous" );
  219. HRESULT hr = S_OK;
  220. //--- Check args
  221. if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
  222. {
  223. hr = E_INVALIDARG;
  224. }
  225. else
  226. {
  227. //--- Don't care if m_pCurrFrag is NULL, as long as we have enough on the SentenceStack
  228. //--- to skip backwards...
  229. if( m_SentenceStack.GetCount() >= 2 )
  230. {
  231. //--- Get the previous Sentence from the Sentence List, and then remove the Current Sentence
  232. SentencePointer &PreviousSentence = m_SentenceStack.Pop();
  233. PreviousSentence = m_SentenceStack.Pop();
  234. //--- Reset the current frag and the current text pointer position
  235. m_pCurrFrag = PreviousSentence.pSentenceFrag;
  236. m_pNextChar = PreviousSentence.pSentenceStart;
  237. m_pEndChar = m_pCurrFrag->pTextStart + m_pCurrFrag->ulTextLen;
  238. hr = GetNextSentence( ppSentItemEnum );
  239. if( hr == S_OK )
  240. {
  241. //--- Update Sentence Pointer List
  242. hr = m_SentenceStack.Push( PreviousSentence );
  243. }
  244. }
  245. else
  246. {
  247. hr = S_FALSE;
  248. }
  249. }
  250. return hr;
  251. } /* CStdSentEnum::Previous */
  252. /*****************************************************************************
  253. * SkipWhiteSpaceAndTags *
  254. *-----------------------*
  255. * Skips m_pNextChar ahead to the next non-whitespace character (skipping
  256. * ahead in the frag list, if necessary) or sets it to NULL if it hits the
  257. * end of the frag list text...
  258. ********************************************************************* AH ****/
  259. HRESULT CStdSentEnum::SkipWhiteSpaceAndTags( const WCHAR*& pStartChar, const WCHAR*& pEndChar,
  260. const SPVTEXTFRAG*& pCurrFrag, CSentItemMemory& MemoryManager,
  261. BOOL fAddToItemList, CItemList* pItemList )
  262. {
  263. SPDBG_ASSERT( pStartChar <= pEndChar );
  264. HRESULT hr = S_OK;
  265. while ( pStartChar &&
  266. ( IsSpace( *pStartChar ) ||
  267. pStartChar == pEndChar ) )
  268. {
  269. //--- Skip whitespace
  270. while ( pStartChar < pEndChar &&
  271. IsSpace( *pStartChar ) )
  272. {
  273. ++pStartChar;
  274. }
  275. //--- Skip to next spoken frag, if necessary
  276. if ( pStartChar == pEndChar )
  277. {
  278. pCurrFrag = pCurrFrag->pNext;
  279. while ( pCurrFrag &&
  280. pCurrFrag->State.eAction != SPVA_Speak &&
  281. pCurrFrag->State.eAction != SPVA_SpellOut )
  282. {
  283. pStartChar = (WCHAR*) pCurrFrag->pTextStart;
  284. pEndChar = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
  285. //--- Add non-spoken fragments, if fAddToItemList is true.
  286. if ( fAddToItemList )
  287. {
  288. CSentItem Item;
  289. Item.pItemSrcText = pCurrFrag->pTextStart;
  290. Item.ulItemSrcLen = pCurrFrag->ulTextLen;
  291. Item.ulItemSrcOffset = pCurrFrag->ulTextSrcOffset;
  292. Item.ulNumWords = 1;
  293. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  294. if ( SUCCEEDED( hr ) )
  295. {
  296. ZeroMemory( Item.Words, sizeof(TTSWord) );
  297. Item.Words[0].pXmlState = &pCurrFrag->State;
  298. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  299. Item.eItemPartOfSpeech = MS_Unknown;
  300. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  301. if ( SUCCEEDED( hr ) )
  302. {
  303. Item.pItemInfo->Type = eWORDLIST_IS_VALID;
  304. pItemList->AddTail( Item );
  305. }
  306. }
  307. }
  308. pCurrFrag = pCurrFrag->pNext;
  309. }
  310. if ( !pCurrFrag )
  311. {
  312. pStartChar = NULL;
  313. pEndChar = NULL;
  314. }
  315. else
  316. {
  317. pStartChar = (WCHAR*) pCurrFrag->pTextStart;
  318. pEndChar = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
  319. }
  320. }
  321. }
  322. return hr;
  323. } /* SkipWhiteSpaceAndTags */
  324. /*****************************************************************************
  325. * FindTokenEnd *
  326. *--------------*
  327. * Returns the position of the first whitespace character after pStartChar,
  328. * or pEndChar, or the character after SP_MAX_WORD_LENGTH, whichever comes first.
  329. ********************************************************************* AH ****/
  330. const WCHAR* CStdSentEnum::FindTokenEnd( const WCHAR* pStartChar, const WCHAR* pEndChar )
  331. {
  332. SPDBG_ASSERT( pStartChar < pEndChar );
  333. ULONG ulNumChars = 1;
  334. const WCHAR *pPos = pStartChar;
  335. while ( pPos &&
  336. pPos < pEndChar &&
  337. !IsSpace( *pPos ) &&
  338. ulNumChars < SP_MAX_WORD_LENGTH )
  339. {
  340. pPos++;
  341. ulNumChars++;
  342. }
  343. return pPos;
  344. } /* FindTokenEnd */
  345. /*****************************************************************************
  346. * CStdSentEnum::AddNextSentItem *
  347. *-------------------------------*
  348. * Locates the next sentence item in the stream and adds it to the list.
  349. * Returns true if the last item added is the end of the sentence.
  350. ********************************************************************* AH ****/
  351. HRESULT CStdSentEnum::AddNextSentItem( CItemList& ItemList, CSentItemMemory& MemoryManager, BOOL* pfIsEOS )
  352. {
  353. SPDBG_ASSERT( m_pNextChar && pfIsEOS );
  354. HRESULT hr = S_OK;
  355. BOOL fHitPauseItem = false;
  356. CSentItem Item;
  357. ULONG ulTrailItems = 0;
  358. TTSItemType ItemType = eUNMATCHED;
  359. *pfIsEOS = false;
  360. //--- Skip initial whitespace characters and XML markup (by skipping ahead in the frag list).
  361. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &ItemList );
  362. //--- This will happen when we hit the end of the frag list
  363. if ( !m_pNextChar )
  364. {
  365. return S_OK;
  366. }
  367. //--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar).
  368. m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );
  369. //--- Get Primary Insert Position
  370. SPLISTPOS ItemPos = ItemList.AddTail( Item );
  371. //--- Try looking up this token in the User Lexicon...
  372. WCHAR Temp = *( (WCHAR*) m_pEndOfCurrToken );
  373. *( (WCHAR*) m_pEndOfCurrToken ) = 0;
  374. SPWORDPRONUNCIATIONLIST SPList;
  375. ZeroMemory( &SPList, sizeof( SPWORDPRONUNCIATIONLIST ) );
  376. hr = m_cpAggregateLexicon->GetPronunciations( m_pNextChar, 1033, eLEXTYPE_USER, &SPList );
  377. if( SPList.pvBuffer )
  378. {
  379. ::CoTaskMemFree( SPList.pvBuffer );
  380. }
  381. *( (WCHAR*) m_pEndOfCurrToken ) = Temp;
  382. if ( SUCCEEDED( hr ) )
  383. {
  384. Item.eItemPartOfSpeech = MS_Unknown;
  385. Item.pItemSrcText = m_pNextChar;
  386. Item.ulItemSrcLen = (ULONG) ( m_pEndOfCurrToken - m_pNextChar );
  387. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  388. (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
  389. Item.ulNumWords = 1;
  390. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  391. if ( SUCCEEDED( hr ) )
  392. {
  393. ZeroMemory( Item.Words, sizeof(TTSWord) );
  394. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  395. Item.Words[0].pWordText = m_pNextChar;
  396. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  397. Item.Words[0].pLemma = Item.Words[0].pWordText;
  398. Item.Words[0].ulLemmaLen = Item.Words[0].ulWordLen;
  399. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  400. Item.eItemPartOfSpeech = MS_Unknown;
  401. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo*), &hr );
  402. if ( SUCCEEDED( hr ) )
  403. {
  404. Item.pItemInfo->Type = eALPHA_WORD;
  405. ItemList.SetAt( ItemPos, Item );
  406. }
  407. }
  408. m_pNextChar = m_pEndOfCurrToken;
  409. }
  410. //--- Not in the user lex - itemize, normalize, etc.
  411. else if ( hr == SPERR_NOT_IN_LEX )
  412. {
  413. hr = S_OK;
  414. //--- convert text from Unicode to Ascii
  415. hr = DoUnicodeToAsciiMap( m_pNextChar, (ULONG)( m_pEndOfCurrToken - m_pNextChar ), (WCHAR*)m_pNextChar );
  416. if ( SUCCEEDED( hr ) )
  417. {
  418. //--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar)
  419. //--- AGAIN, since the mapping may have introduced new whitespace characters...
  420. m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );
  421. //--- Insert lead items (group beginnings, quotation marks)
  422. while ( m_pNextChar < m_pEndOfCurrToken &&
  423. ( ( ItemType = IsGroupBeginning( *m_pNextChar ) ) != eUNMATCHED ||
  424. ( ItemType = IsQuotationMark( *m_pNextChar ) ) != eUNMATCHED ) )
  425. {
  426. CSentItem LeadItem;
  427. LeadItem.pItemSrcText = m_pNextChar;
  428. LeadItem.ulItemSrcLen = 1;
  429. LeadItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  430. (ULONG)(( m_pNextChar - m_pCurrFrag->pTextStart ));
  431. LeadItem.ulNumWords = 1;
  432. LeadItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  433. if ( SUCCEEDED( hr ) )
  434. {
  435. ZeroMemory( LeadItem.Words, sizeof(TTSWord) );
  436. LeadItem.Words[0].pXmlState = &m_pCurrFrag->State;
  437. LeadItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  438. LeadItem.eItemPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  439. LeadItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  440. if ( SUCCEEDED( hr ) )
  441. {
  442. LeadItem.pItemInfo->Type = ItemType;
  443. if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
  444. m_pCurrFrag->State.eAction == SPVA_SpellOut )
  445. {
  446. CWordList TempWordList;
  447. ExpandPunctuation( TempWordList, *m_pNextChar );
  448. hr = SetWordList( LeadItem, TempWordList, MemoryManager );
  449. LeadItem.pItemInfo->Type = eUNMATCHED;
  450. }
  451. ItemList.InsertBefore( ItemPos, LeadItem );
  452. m_pNextChar++;
  453. }
  454. }
  455. ItemType = eUNMATCHED;
  456. }
  457. //--- Insert trail items (group endings, quotation marks, misc. punctuation, EOS Items)
  458. m_pEndOfCurrItem = m_pEndOfCurrToken;
  459. BOOL fAddTrailItem = true;
  460. BOOL fAbbreviation = false;
  461. while ( (m_pEndOfCurrItem - 1) >= m_pNextChar &&
  462. fAddTrailItem )
  463. {
  464. fAddTrailItem = false;
  465. fAbbreviation = false;
  466. //--- Check group endings, quotation marks, misc. punctuation.
  467. if ( ( ItemType = IsGroupEnding( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED ||
  468. ( ItemType = IsQuotationMark( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED ||
  469. ( ItemType = IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED )
  470. {
  471. fAddTrailItem = true;
  472. if ( ItemType == eCOMMA ||
  473. ItemType == eCOLON ||
  474. ItemType == eSEMICOLON )
  475. {
  476. fHitPauseItem = true;
  477. }
  478. }
  479. //--- Check EOS Items, except periods preceded by alpha characters
  480. else if ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED &&
  481. ! ( ItemType == ePERIOD &&
  482. ( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
  483. ( iswalpha( *(m_pEndOfCurrItem - 2) ) ) ) )
  484. {
  485. //--- Check for ellipses
  486. if ( ItemType == ePERIOD )
  487. {
  488. if ( m_pEndOfCurrItem == m_pEndOfCurrToken &&
  489. ( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
  490. ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 2) ) ) == ePERIOD ) &&
  491. ( m_pEndOfCurrItem - 3 == m_pNextChar ) &&
  492. ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 3) ) ) == ePERIOD ) )
  493. {
  494. fAddTrailItem = true;
  495. ItemType = eELLIPSIS;
  496. }
  497. else
  498. {
  499. ItemType = ePERIOD;
  500. fAddTrailItem = true;
  501. *pfIsEOS = true;
  502. }
  503. }
  504. else
  505. {
  506. fAddTrailItem = true;
  507. *pfIsEOS = true;
  508. }
  509. }
  510. //--- Period preceded by alpha character - determine whether it is EOS.
  511. else if ( ItemType == ePERIOD )
  512. {
  513. //--- Is it an Initialism ( e.g. "e.g." )? If so, only EOS if the next
  514. //--- word is in the common first words list...
  515. hr = IsInitialism( ItemList, ItemPos, MemoryManager, pfIsEOS );
  516. if ( SUCCEEDED( hr ) )
  517. {
  518. if ( *pfIsEOS )
  519. {
  520. //--- Did we see a pause item earlier? In that case, we should NOT listen to this
  521. //--- IsEOS decision from IsInitialism...
  522. if ( fHitPauseItem )
  523. {
  524. *pfIsEOS = false;
  525. }
  526. else
  527. {
  528. fAddTrailItem = true;
  529. fAbbreviation = true;
  530. }
  531. }
  532. }
  533. else if ( hr == E_INVALIDARG )
  534. {
  535. const WCHAR temp = (WCHAR) *( m_pEndOfCurrItem - 1 );
  536. *( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = 0;
  537. const AbbrevRecord* pAbbrevRecord =
  538. (AbbrevRecord*) bsearch( (void*) m_pNextChar, (void*) g_AbbreviationTable,
  539. sp_countof( g_AbbreviationTable ), sizeof( AbbrevRecord ),
  540. CompareStringAndAbbrevRecord );
  541. *( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = temp;
  542. if ( pAbbrevRecord )
  543. {
  544. //--- Matched an abbreviation
  545. if ( pAbbrevRecord->iSentBreakDisambig < 0 )
  546. {
  547. //--- Abbreviation will never end a sentence - just insert into ItemList
  548. *pfIsEOS = false;
  549. hr = S_OK;
  550. Item.pItemSrcText = m_pNextChar;
  551. Item.ulItemSrcLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  552. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  553. (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
  554. Item.ulNumWords = 1;
  555. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof( TTSWord ), &hr );
  556. if ( SUCCEEDED( hr ) )
  557. {
  558. ZeroMemory( Item.Words, sizeof( TTSWord ) );
  559. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  560. Item.Words[0].pWordText = Item.pItemSrcText;
  561. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  562. Item.Words[0].pLemma = Item.pItemSrcText;
  563. Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
  564. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
  565. if ( SUCCEEDED( hr ) )
  566. {
  567. if ( NeedsToBeNormalized( pAbbrevRecord ) )
  568. {
  569. Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
  570. }
  571. else
  572. {
  573. Item.pItemInfo->Type = eABBREVIATION;
  574. }
  575. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbrevRecord;
  576. ItemList.SetAt( ItemPos, Item );
  577. }
  578. }
  579. }
  580. else
  581. {
  582. //--- Need to do some disambiguation to determine whether,
  583. //--- a) this is indeed an abbreviation (e.g. "Ed.")
  584. //--- b) the period doubles as EOS
  585. hr = ( this->*g_SentBreakDisambigTable[pAbbrevRecord->iSentBreakDisambig] )
  586. ( pAbbrevRecord, ItemList, ItemPos, MemoryManager, pfIsEOS );
  587. if ( SUCCEEDED( hr ) )
  588. {
  589. if ( *pfIsEOS )
  590. {
  591. if ( fHitPauseItem )
  592. {
  593. *pfIsEOS = false;
  594. }
  595. else
  596. {
  597. fAddTrailItem = true;
  598. fAbbreviation = true;
  599. }
  600. }
  601. }
  602. }
  603. }
  604. if ( hr == E_INVALIDARG )
  605. {
  606. //--- Just check for periods internal to the item - this catches stuff like
  607. //--- 10:30p.m.
  608. for ( const WCHAR* pIterator = m_pNextChar; pIterator < m_pEndOfCurrItem - 1; pIterator++ )
  609. {
  610. if ( *pIterator == L'.' )
  611. {
  612. *pfIsEOS = false;
  613. break;
  614. }
  615. }
  616. //--- If all previous checks have failed, it is EOS.
  617. if ( pIterator == ( m_pEndOfCurrItem - 1 ) &&
  618. !fHitPauseItem )
  619. {
  620. hr = S_OK;
  621. fAddTrailItem = true;
  622. *pfIsEOS = true;
  623. }
  624. else if ( hr == E_INVALIDARG )
  625. {
  626. hr = S_OK;
  627. }
  628. }
  629. }
  630. }
  631. //--- Add trail item.
  632. if ( fAddTrailItem )
  633. {
  634. ulTrailItems++;
  635. CSentItem TrailItem;
  636. if ( ItemType == eELLIPSIS )
  637. {
  638. TrailItem.pItemSrcText = m_pEndOfCurrItem - 3;
  639. TrailItem.ulItemSrcLen = 3;
  640. TrailItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  641. (ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 3 );
  642. }
  643. else
  644. {
  645. TrailItem.pItemSrcText = m_pEndOfCurrItem - 1;
  646. TrailItem.ulItemSrcLen = 1;
  647. TrailItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  648. (ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 1 );
  649. }
  650. TrailItem.ulNumWords = 1;
  651. TrailItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  652. if ( SUCCEEDED( hr ) )
  653. {
  654. ZeroMemory( TrailItem.Words, sizeof(TTSWord) );
  655. TrailItem.Words[0].pXmlState = &m_pCurrFrag->State;
  656. TrailItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  657. TrailItem.eItemPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
  658. TrailItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  659. if ( SUCCEEDED( hr ) )
  660. {
  661. TrailItem.pItemInfo->Type = ItemType;
  662. if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
  663. ( m_pCurrFrag->State.eAction == SPVA_SpellOut &&
  664. !fAbbreviation ) )
  665. {
  666. CWordList TempWordList;
  667. ExpandPunctuation( TempWordList, *(m_pEndOfCurrItem - 1) );
  668. hr = SetWordList( TrailItem, TempWordList, MemoryManager );
  669. TrailItem.pItemInfo->Type = eUNMATCHED;
  670. }
  671. ItemList.InsertAfter( ItemPos, TrailItem );
  672. if ( !fAbbreviation )
  673. {
  674. if ( ItemType == eELLIPSIS )
  675. {
  676. m_pEndOfCurrItem -= 3;
  677. ulTrailItems = 3;
  678. }
  679. else
  680. {
  681. m_pEndOfCurrItem--;
  682. }
  683. }
  684. }
  685. }
  686. ItemType = eUNMATCHED;
  687. if ( fAbbreviation )
  688. {
  689. break;
  690. }
  691. }
  692. }
  693. //--- Do Main Item Insertion
  694. if ( SUCCEEDED( hr ) &&
  695. m_pNextChar == m_pEndOfCurrItem )
  696. {
  697. ItemList.RemoveAt( ItemPos );
  698. }
  699. else if ( SUCCEEDED( hr ) )
  700. {
  701. hr = Normalize( ItemList, ItemPos, MemoryManager );
  702. }
  703. //--- Advance m_pNextChar to m_pEndOfCurrItem + once for each trail item matched.
  704. if ( SUCCEEDED( hr ) )
  705. {
  706. if ( !fAbbreviation &&
  707. m_pEndOfCurrItem + ulTrailItems != m_pEndOfCurrToken )
  708. {
  709. //--- Multi-token item matched in Normalize()... Remove all previously matched trail items,
  710. //--- as they were matched as part of the larger item...
  711. m_pNextChar = m_pEndOfCurrItem;
  712. Item = ItemList.GetNext( ItemPos );
  713. while ( ItemPos )
  714. {
  715. SPLISTPOS RemovePos = ItemPos;
  716. Item = ItemList.GetNext( ItemPos );
  717. ItemList.RemoveAt( RemovePos );
  718. }
  719. }
  720. else
  721. {
  722. m_pNextChar = m_pEndOfCurrToken;
  723. }
  724. }
  725. }
  726. }
  727. return hr;
  728. } /* CStdSentEnum::AddNextSentItem */
  729. /*****************************************************************************
  730. * CStdSentEnum::GetNextSentence *
  731. *-------------------------------*
  732. * This method is used to create a sentence item enumerator and populate it
  733. * with items. If the SPF_NLP_PASSTHROUGH flag is set, each item is the block
  734. * of text between XML states. If the SPF_NLP_PASSTHROUGH flag is not set, each
  735. * item is an individual word that is looked up in the current lexicon(s).
  736. ********************************************************************* EDC ***/
  737. HRESULT CStdSentEnum::GetNextSentence( IEnumSENTITEM** ppItemEnum )
  738. {
  739. HRESULT hr = S_OK;
  740. ULONG ulNumItems = 0;
  741. const SPVTEXTFRAG* pPrevFrag = m_pCurrFrag;
  742. //--- Is there any work to do
  743. if( m_pCurrFrag == NULL ) return S_FALSE;
  744. //--- Create sentence enum
  745. CComObject<CSentItemEnum> *pItemEnum;
  746. hr = CComObject<CSentItemEnum>::CreateInstance( &pItemEnum );
  747. if( SUCCEEDED( hr ) )
  748. {
  749. pItemEnum->AddRef();
  750. pItemEnum->_SetOwner( GetControllingUnknown() );
  751. *ppItemEnum = pItemEnum;
  752. }
  753. if( SUCCEEDED( hr ) )
  754. {
  755. BOOL fSentDone = false;
  756. BOOL fGoToNextFrag = false;
  757. CItemList& ItemList = pItemEnum->_GetList();
  758. CSentItemMemory& MemoryManager = pItemEnum->_GetMemoryManager();
  759. while( SUCCEEDED(hr) && m_pCurrFrag && !fSentDone && ulNumItems < 50 )
  760. {
  761. ulNumItems++;
  762. if( m_pCurrFrag->State.eAction == SPVA_Speak ||
  763. m_pCurrFrag->State.eAction == SPVA_SpellOut )
  764. {
  765. hr = AddNextSentItem( ItemList, MemoryManager, &fSentDone );
  766. //--- Advance fragment?
  767. if( SUCCEEDED( hr ) &&
  768. m_pNextChar &&
  769. m_pEndChar &&
  770. m_pNextChar >= m_pEndChar )
  771. {
  772. fGoToNextFrag = true;
  773. }
  774. }
  775. else
  776. {
  777. //--- Add non spoken fragments
  778. CSentItem Item;
  779. Item.pItemSrcText = m_pCurrFrag->pTextStart;
  780. Item.ulItemSrcLen = m_pCurrFrag->ulTextLen;
  781. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset;
  782. Item.ulNumWords = 1;
  783. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  784. if ( SUCCEEDED( hr ) )
  785. {
  786. ZeroMemory( Item.Words, sizeof(TTSWord) );
  787. Item.Words[0].pXmlState = &m_pCurrFrag->State;
  788. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  789. Item.eItemPartOfSpeech = MS_Unknown;
  790. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  791. if ( SUCCEEDED( hr ) )
  792. {
  793. Item.pItemInfo->Type = eWORDLIST_IS_VALID;
  794. ItemList.AddTail( Item );
  795. }
  796. }
  797. fGoToNextFrag = true;
  798. }
  799. if( SUCCEEDED( hr ) &&
  800. fGoToNextFrag )
  801. {
  802. fGoToNextFrag = false;
  803. pPrevFrag = m_pCurrFrag;
  804. m_pCurrFrag = m_pCurrFrag->pNext;
  805. if( m_pCurrFrag )
  806. {
  807. m_pNextChar = m_pCurrFrag->pTextStart;
  808. m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
  809. }
  810. else
  811. {
  812. m_pNextChar = NULL;
  813. m_pEndChar = NULL;
  814. }
  815. }
  816. } // end while
  817. //--- If no period has been added, add one now - this will happen if the text
  818. //--- is ONLY XML markup...
  819. if ( SUCCEEDED(hr) && !fSentDone )
  820. {
  821. CSentItem EOSItem;
  822. EOSItem.pItemSrcText = g_period.pStr;
  823. EOSItem.ulItemSrcLen = g_period.Len;
  824. EOSItem.ulItemSrcOffset = pPrevFrag->ulTextSrcOffset + pPrevFrag->ulTextLen;
  825. EOSItem.ulNumWords = 1;
  826. EOSItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  827. if ( SUCCEEDED( hr ) )
  828. {
  829. ZeroMemory( EOSItem.Words, sizeof(TTSWord) );
  830. EOSItem.Words[0].pXmlState = &g_DefaultXMLState;
  831. EOSItem.Words[0].eWordPartOfSpeech = MS_EOSItem;
  832. EOSItem.eItemPartOfSpeech = MS_EOSItem;
  833. EOSItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  834. if ( SUCCEEDED( hr ) )
  835. {
  836. EOSItem.pItemInfo->Type = ePERIOD;
  837. ItemList.AddTail( EOSItem );
  838. }
  839. }
  840. }
  841. //--- Output debugging information, if sentence breaks are desired
  842. TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_SENTENCEBREAKS );
  843. if( SUCCEEDED( hr ) )
  844. {
  845. hr = DetermineProns( pItemEnum->_GetList(), pItemEnum->_GetMemoryManager() );
  846. }
  847. pItemEnum->Reset();
  848. //--- Output debugging information, if POS or Pronunciations are desired
  849. TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_LEXLOOKUP );
  850. }
  851. return hr;
  852. } /* CStdSentEnum::GetNextSentence */
  853. /*****************************************************************************
  854. * CStdSentEnum::Reset *
  855. *---------------------*
  856. *
  857. ********************************************************************* EDC ***/
  858. STDMETHODIMP CStdSentEnum::Reset( void )
  859. {
  860. SPAUTO_OBJ_LOCK;
  861. SPDBG_FUNC( "CStdSentEnum::Reset" );
  862. HRESULT hr = S_OK;
  863. m_pCurrFrag = m_pTextFragList;
  864. m_pNextChar = m_pCurrFrag->pTextStart;
  865. m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
  866. m_SentenceStack.Reset();
  867. return hr;
  868. } /* CStdSentEnum::Reset */
  869. /*****************************************************************************
  870. * CStdSentEnum::InitAggregateLexicon *
  871. *------------------------------------*
  872. *
  873. ********************************************************************* AH ****/
  874. HRESULT CStdSentEnum::InitAggregateLexicon( void )
  875. {
  876. return m_cpAggregateLexicon.CoCreateInstance(CLSID_SpLexicon);
  877. }
  878. /*****************************************************************************
  879. * CStdSentEnum::AddLexiconToAggregate *
  880. *-------------------------------------*
  881. *
  882. ********************************************************************* AH ****/
  883. HRESULT CStdSentEnum::AddLexiconToAggregate( ISpLexicon *pAddLexicon, DWORD dwFlags )
  884. {
  885. return m_cpAggregateLexicon->AddLexicon( pAddLexicon, dwFlags );
  886. }
  887. /*****************************************************************************
  888. * CStdSentEnum::InitMorphLexicon *
  889. *--------------------------------*
  890. *
  891. ********************************************************************* AH ****/
  892. HRESULT CStdSentEnum::InitMorphLexicon( void )
  893. {
  894. HRESULT hr = S_OK;
  895. m_pMorphLexicon = new CSMorph( m_cpAggregateLexicon, &hr );
  896. return hr;
  897. }
  898. //
  899. //=== CSentItemEnum =========================================================
  900. //
  901. /*****************************************************************************
  902. * CSentItemEnum::Next *
  903. *---------------------*
  904. *
  905. ********************************************************************* EDC ***/
  906. STDMETHODIMP CSentItemEnum::
  907. Next( TTSSentItem *pItemEnum )
  908. {
  909. SPDBG_FUNC( "CSentItemEnum::Next" );
  910. HRESULT hr = S_OK;
  911. //--- Check args
  912. if( SPIsBadWritePtr( pItemEnum, sizeof( TTSSentItem ) ) )
  913. {
  914. hr = E_INVALIDARG;
  915. }
  916. else
  917. {
  918. if ( m_ListPos )
  919. {
  920. *pItemEnum = m_ItemList.GetNext( m_ListPos );
  921. }
  922. else
  923. {
  924. hr = S_FALSE;
  925. }
  926. }
  927. return hr;
  928. } /* CSentItemEnum::Next */
  929. /*****************************************************************************
  930. * CSentItemEnum::Reset *
  931. *----------------------*
  932. *
  933. ********************************************************************* EDC ***/
  934. STDMETHODIMP CSentItemEnum::Reset( void )
  935. {
  936. SPDBG_FUNC( "CSentItemEnum::Reset" );
  937. HRESULT hr = S_OK;
  938. m_ListPos = m_ItemList.GetHeadPosition();
  939. return hr;
  940. } /* CSentItemEnum::Reset */