Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

481 lines
15 KiB

  1. /*******************************************************************************
  2. * MSE_TTSEngine.cpp *
  3. *---------------*
  4. * Description:
  5. * This module is the main implementation file for the CMSE_TTSEngine class.
  6. *-------------------------------------------------------------------------------
  7. * Created By: EDC Date: 03/12/99
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. *******************************************************************************/
  12. //--- Additional includes
  13. #include "stdafx.h"
  14. #include <stdio.h>
  15. #include "TTSEngine.h"
  16. #include "stdsentenum.h"
  17. #ifdef USE_VOICEDATAOBJ
  18. #include "VoiceDataObj.h"
  19. #endif
  20. #include "commonlx.h"
  21. #include "perf\\ttsperf.h"
  22. #if USE_PERF_COUNTERS
  23. CPerfCounterManager g_pcm;
  24. #endif
  25. /*****************************************************************************
  26. * MSE_TTSEngine::FinalConstruct *
  27. *----------------------------*
  28. * Description:
  29. * Constructor
  30. ********************************************************************* EDC ***/
  31. HRESULT MSE_TTSEngine::FinalConstruct()
  32. {
  33. SPDBG_FUNC( "MSE_TTSEngine::FinalConstruct" );
  34. HRESULT hr = S_OK;
  35. m_pBEnd = NULL;
  36. #if USE_PERF_COUNTERS
  37. if (g_pcm.Init("TTSPerf", perfcMax / 2 - 1, 100) == ERROR_SUCCESS)
  38. {
  39. m_pco.Init(&g_pcm);
  40. }
  41. #endif
  42. return hr;
  43. } /* MSE_TTSEngine::FinalConstruct */
  44. /*****************************************************************************
  45. * MSE_TTSEngine::FinalRelease *
  46. *--------------------------*
  47. * Description:
  48. * destructor
  49. ********************************************************************* EDC ***/
  50. void MSE_TTSEngine::FinalRelease()
  51. {
  52. SPDBG_FUNC( "MSE_TTSEngine::FinalRelease" );
  53. if ( m_pBEnd )
  54. {
  55. delete m_pBEnd;
  56. }
  57. } /* MSE_TTSEngine::FinalRelease */
  58. /*****************************************************************************
  59. * MSE_TTSEngine::SetObjectToken *
  60. *-------------------------------*
  61. * Description:
  62. * This method is called during construction to give the TTS driver object
  63. * access to the voice's object token for initialization purposes...
  64. ******************************************************************* AARONHAL ***/
  65. HRESULT MSE_TTSEngine::SetObjectToken( ISpObjectToken *pToken )
  66. {
  67. SPDBG_FUNC( "MSE_TTSEngine::SetObjectToken" );
  68. HRESULT hr = S_OK;
  69. //--- Call old SetObjectToken, in VoiceData
  70. m_cpToken = pToken;
  71. #ifdef USE_VOICEDATAOBJ
  72. hr = m_VoiceDataObj.SetObjectToken( pToken );
  73. #endif
  74. //--- Do old VoiceInit( ) stuff...
  75. if ( SUCCEEDED( hr ) )
  76. {
  77. //--- Create sentence enumerator and initialize
  78. CComObject<CStdSentEnum> *pSentEnum;
  79. hr = CComObject<CStdSentEnum>::CreateInstance( &pSentEnum );
  80. //--- Create aggregate lexicon
  81. if ( SUCCEEDED( hr ) )
  82. {
  83. hr = pSentEnum->InitAggregateLexicon();
  84. }
  85. //--- Create vendor lexicon and add to aggregate
  86. if ( SUCCEEDED( hr ) )
  87. {
  88. CComPtr<ISpObjectToken> cpToken;
  89. hr = SpGetSubTokenFromToken(pToken, L"Lex", &cpToken);
  90. CComPtr<ISpLexicon> cpCompressedLexicon;
  91. if ( SUCCEEDED( hr ) )
  92. {
  93. hr = SpCreateObjectFromToken(cpToken, &cpCompressedLexicon);
  94. }
  95. if (SUCCEEDED(hr))
  96. {
  97. hr = pSentEnum->AddLexiconToAggregate(cpCompressedLexicon, eLEXTYPE_PRIVATE1);
  98. }
  99. }
  100. //--- Create LTS lexicon and add to aggregate
  101. if ( SUCCEEDED( hr ) )
  102. {
  103. CComPtr<ISpObjectToken> cpToken;
  104. hr = SpGetSubTokenFromToken(pToken, L"Lts", &cpToken);
  105. CComPtr<ISpLexicon> cpLTSLexicon;
  106. if ( SUCCEEDED( hr ) )
  107. {
  108. hr = SpCreateObjectFromToken(cpToken, &cpLTSLexicon);
  109. }
  110. if ( SUCCEEDED( hr ) )
  111. {
  112. hr = pSentEnum->AddLexiconToAggregate(cpLTSLexicon, eLEXTYPE_PRIVATE2);
  113. }
  114. }
  115. //--- Create Names LTS lexicon and add to aggregate
  116. if ( SUCCEEDED( hr ) )
  117. {
  118. CComPtr<ISpObjectToken> cpToken;
  119. hr = SpGetSubTokenFromToken(pToken, L"Names", &cpToken);
  120. CComPtr<ISpLexicon> cpLTSLexicon;
  121. if ( SUCCEEDED( hr ) )
  122. {
  123. hr = SpCreateObjectFromToken(cpToken, &cpLTSLexicon);
  124. if ( SUCCEEDED( hr ) )
  125. {
  126. hr = pSentEnum->AddLexiconToAggregate( cpLTSLexicon, eLEXTYPE_PRIVATE3 );
  127. if ( SUCCEEDED( hr ) )
  128. {
  129. pSentEnum->fNamesLTS( true );
  130. }
  131. }
  132. }
  133. else
  134. {
  135. //--- No "Names" subtoken in the registry - just behave as we did
  136. // before the Names LTS code was added...
  137. pSentEnum->fNamesLTS( false );
  138. hr = S_OK;
  139. }
  140. }
  141. //--- Create morphology lexicon
  142. if ( SUCCEEDED( hr ) )
  143. {
  144. hr = pSentEnum->InitMorphLexicon();
  145. }
  146. //--- Set member sentence enumerator
  147. if ( SUCCEEDED( hr ) )
  148. {
  149. m_cpSentEnum = pSentEnum;
  150. }
  151. }
  152. //--- Do old InitDriver stuff
  153. if ( SUCCEEDED( hr ) )
  154. {
  155. //--------------------------
  156. // Get voice information
  157. //--------------------------
  158. #ifdef USE_VOICEDATAOBJ
  159. hr = m_VoiceDataObj.GetVoiceInfo( &m_VoiceInfo );
  160. if( SUCCEEDED(hr) )
  161. {
  162. m_SampleRate = m_VoiceInfo.SampleRate;
  163. //-----------------------------
  164. // Reverb is always stereo
  165. //-----------------------------
  166. if (m_VoiceInfo.eReverbType != REVERB_TYPE_OFF )
  167. {
  168. //------------------
  169. // Stereo
  170. //------------------
  171. m_IsStereo = true;
  172. m_BytesPerSample = 4;
  173. }
  174. else
  175. {
  176. //------------------
  177. // MONO
  178. //------------------
  179. m_IsStereo = false;
  180. m_BytesPerSample = 2;
  181. }
  182. #else
  183. {
  184. #endif
  185. //--------------------------
  186. // Initialize BACKEND
  187. //--------------------------
  188. m_pBEnd = CBackEnd::ClassFactory();
  189. if ( m_pBEnd )
  190. {
  191. CSpDynamicString dstrSFontPath;
  192. hr = pToken->GetStringValue( L"Sfont", &dstrSFontPath );
  193. if ( SUCCEEDED( hr ) )
  194. {
  195. char *pszSFontPath = NULL;
  196. pszSFontPath = dstrSFontPath.CopyToChar();
  197. if ( !pszSFontPath )
  198. {
  199. hr = E_OUTOFMEMORY;
  200. }
  201. else if ( !m_pBEnd->LoadTable( pszSFontPath ) )
  202. {
  203. hr = E_FAIL;
  204. }
  205. else
  206. {
  207. m_pBEnd->SetFrontEndFlag ();
  208. m_pBEnd->SetGain( 2.0 );
  209. ::CoTaskMemFree( pszSFontPath );
  210. }
  211. }
  212. }
  213. else
  214. {
  215. hr = E_OUTOFMEMORY;
  216. }
  217. //--------------------------
  218. // Initialize FRONTEND obj
  219. //--------------------------
  220. if( SUCCEEDED( hr ))
  221. {
  222. EntropicPitchInfo PitchInfo;
  223. int BaseLine, RefLine, TopLine;
  224. m_pBEnd->GetSpeakerInfo( &BaseLine, &RefLine, &TopLine );
  225. PitchInfo.BasePitch = ( TopLine + BaseLine ) / 2;
  226. PitchInfo.Range = TopLine - BaseLine;
  227. #ifdef USE_VOICEDATAOBJ
  228. hr = m_FEObj.Init( &m_VoiceDataObj, NULL, &m_VoiceInfo, PitchInfo );
  229. #else
  230. hr = m_FEObj.Init( NULL /*&m_VoiceDataObj*/, NULL, NULL /*&m_VoiceInfo*/, PitchInfo, m_pBEnd->GetPhoneSetFlag() );
  231. #endif
  232. }
  233. }
  234. }
  235. return hr;
  236. } /* MSE_TTSEngine::SetObjectToken */
  237. /*****************************************************************************
  238. * MSE_TTSEngine::Speak *
  239. *-------------------*
  240. * Description:
  241. * This method is supposed to speak the text observing the associated
  242. * XML state.
  243. ********************************************************************* EDC ***/
  244. STDMETHODIMP MSE_TTSEngine::
  245. Speak( DWORD dwSpeakFlags, REFGUID rguidFormatId,
  246. const WAVEFORMATEX * /* pWaveFormatEx ignored */,
  247. const SPVTEXTFRAG* pTextFragList,
  248. ISpTTSEngineSite* pOutputSite )
  249. {
  250. SPDBG_FUNC( "MSE_TTSEngine::Speak" );
  251. HRESULT hr = S_OK;
  252. #if USE_PERF_COUNTERS
  253. m_pco.IncrementCounter (perfcSpeakCalls);
  254. #endif
  255. //--- Early exit?
  256. if( ( rguidFormatId != SPDFID_WaveFormatEx && rguidFormatId != SPDFID_Text ) || SP_IS_BAD_INTERFACE_PTR( pOutputSite ) )
  257. {
  258. hr = E_INVALIDARG;
  259. }
  260. else
  261. {
  262. //--- Debug Macro - open file for debugging output
  263. TTSDBG_OPENFILE;
  264. //--- Initialize sentence enumerator
  265. hr = m_cpSentEnum->SetFragList( pTextFragList, dwSpeakFlags );
  266. if( SUCCEEDED( hr ) )
  267. {
  268. // The following code is here just for testing.
  269. // It should be removed once all the tools accept the
  270. // new way of outputing debug info.
  271. if( rguidFormatId == SPDFID_Text )
  272. {
  273. //--- Enumerate and write out all sentence items.
  274. IEnumSENTITEM *pItemEnum;
  275. TTSSentItem Item;
  276. //--- Write unicode signature
  277. static const WCHAR Signature = 0xFEFF;
  278. hr = pOutputSite->Write( &Signature, sizeof(Signature), NULL );
  279. while( (hr = m_cpSentEnum->Next( &pItemEnum) ) == S_OK )
  280. {
  281. while( (hr = pItemEnum->Next( &Item )) == S_OK )
  282. {
  283. // Is there a valid normalized-word-list?
  284. if ( Item.pItemInfo->Type & eWORDLIST_IS_VALID )
  285. {
  286. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  287. {
  288. if ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  289. Item.Words[i].pXmlState->eAction == SPVA_SpellOut )
  290. {
  291. ULONG cb = Item.Words[i].ulWordLen * sizeof( WCHAR );
  292. hr = pOutputSite->Write( Item.Words[i].pWordText, cb, NULL );
  293. if( hr == S_OK )
  294. {
  295. //--- Insert space between items
  296. hr = pOutputSite->Write( L" ", sizeof( WCHAR ), NULL );
  297. }
  298. }
  299. }
  300. }
  301. else // no word list - just write the original text.
  302. {
  303. ULONG cb = Item.ulItemSrcLen * sizeof( WCHAR );
  304. hr = pOutputSite->Write( Item.pItemSrcText, cb, NULL );
  305. if ( SUCCEEDED(hr) )
  306. {
  307. //--- Insert space between items
  308. hr = pOutputSite->Write( L" ", sizeof( WCHAR ), NULL );
  309. }
  310. }
  311. }
  312. pItemEnum->Release();
  313. //--- Insert mark between sentences
  314. if( SUCCEEDED( hr ) )
  315. {
  316. static const WCHAR CRLF[2] = { 0x000D, 0x000A };
  317. hr = pOutputSite->Write( CRLF, 2*sizeof(WCHAR), NULL );
  318. }
  319. }
  320. static const WCHAR ENDL = 0x0000;
  321. hr = pOutputSite->Write( &ENDL, sizeof(WCHAR), NULL );
  322. }
  323. else
  324. {
  325. //--- Render the text
  326. m_FEObj.PrepareSpeech( m_cpSentEnum, pOutputSite );
  327. SPEECH_STATE SpeechState = SPEECH_CONTINUE;
  328. SentenceData *pSentence = NULL;
  329. short *pSamples = NULL;
  330. int nSamples = 0;
  331. while ( SpeechState == SPEECH_CONTINUE )
  332. {
  333. hr = m_FEObj.NextData( (void**)&pSentence, &SpeechState );
  334. if ( SUCCEEDED( hr ) &&
  335. SpeechState == SPEECH_CONTINUE )
  336. {
  337. if ( !m_pBEnd->NewPhoneString( pSentence->pPhones, pSentence->ulNumPhones,
  338. pSentence->pf0, pSentence->ulNumf0 ) )
  339. {
  340. hr = E_FAIL;
  341. }
  342. else
  343. {
  344. while ( SUCCEEDED( hr ) &&
  345. m_pBEnd->OutputPending() )
  346. {
  347. if ( !m_pBEnd->GenerateOutput( &pSamples, &nSamples ) )
  348. {
  349. hr = E_FAIL;
  350. }
  351. else if ( nSamples )
  352. {
  353. hr = pOutputSite->Write( (void*)pSamples, nSamples * sizeof( short ), NULL );
  354. }
  355. }
  356. }
  357. }
  358. if ( pSentence )
  359. {
  360. if ( pSentence->pPhones )
  361. {
  362. delete pSentence->pPhones;
  363. pSentence->pPhones = NULL;
  364. }
  365. if ( pSentence->pf0 )
  366. {
  367. delete pSentence->pf0;
  368. pSentence->pf0 = NULL;
  369. }
  370. delete pSentence;
  371. pSentence = NULL;
  372. }
  373. }
  374. }
  375. }
  376. //--- Debug Macro - close debugging file
  377. TTSDBG_CLOSEFILE;
  378. }
  379. return hr;
  380. } /* MSE_TTSEngine::Speak */
  381. //--- This is the only format the Entropic backend supports...
  382. static const WAVEFORMATEX EntropicFormat =
  383. {
  384. 1,
  385. 1,
  386. 8000,
  387. 16000,
  388. 2,
  389. 16,
  390. 0
  391. };
  392. /****************************************************************************
  393. * MSE_TTSEngine::GetOutputFormat *
  394. *-----------------------------*
  395. * Description:
  396. *
  397. * Returns:
  398. *
  399. ******************************************************************* PACOG ***/
  400. STDMETHODIMP MSE_TTSEngine::GetOutputFormat(const GUID * pTargetFormatId, const WAVEFORMATEX * /* pTargetWaveFormatEx */,
  401. GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWaveFormatEx)
  402. {
  403. SPDBG_FUNC("MSE_TTSEngine::GetOutputFormat");
  404. HRESULT hr = S_OK;
  405. if( ( SP_IS_BAD_WRITE_PTR(pDesiredFormatId) ) ||
  406. ( SP_IS_BAD_WRITE_PTR(ppCoMemDesiredWaveFormatEx) ) )
  407. {
  408. hr = E_INVALIDARG;
  409. }
  410. else if (pTargetFormatId == NULL || *pTargetFormatId != SPDFID_Text)
  411. {
  412. *pDesiredFormatId = SPDFID_WaveFormatEx;
  413. *ppCoMemDesiredWaveFormatEx = (WAVEFORMATEX *)::CoTaskMemAlloc(sizeof(WAVEFORMATEX));
  414. if (*ppCoMemDesiredWaveFormatEx)
  415. {
  416. **ppCoMemDesiredWaveFormatEx = EntropicFormat;
  417. }
  418. else
  419. {
  420. hr = E_OUTOFMEMORY;
  421. }
  422. }
  423. else
  424. {
  425. *pDesiredFormatId = SPDFID_Text;
  426. *ppCoMemDesiredWaveFormatEx = NULL;
  427. }
  428. SPDBG_REPORT_ON_FAIL( hr );
  429. return hr;
  430. } /* MSE_TTSEngine::GetOutputFormat */