* MSE_TTSEngine.cpp * *---------------* * Description: * This module is the main implementation file for the CMSE_TTSEngine class. *------------------------------------------------------------------------------- * Created By: EDC Date: 03/12/99 * Copyright (C) 1999 Microsoft Corporation * All Rights Reserved * *******************************************************************************/
//--- Additional includes
#include "stdafx.h"
#include <stdio.h>
#include "TTSEngine.h"
#include "stdsentenum.h"
#include "VoiceDataObj.h"
#include "commonlx.h"
#include "perf\\ttsperf.h"
CPerfCounterManager g_pcm; #endif
* MSE_TTSEngine::FinalConstruct * *----------------------------* * Description: * Constructor ********************************************************************* EDC ***/ HRESULT MSE_TTSEngine::FinalConstruct() { SPDBG_FUNC( "MSE_TTSEngine::FinalConstruct" ); HRESULT hr = S_OK;
m_pBEnd = NULL;
if (g_pcm.Init("TTSPerf", perfcMax / 2 - 1, 100) == ERROR_SUCCESS) { m_pco.Init(&g_pcm); } #endif
return hr; } /* MSE_TTSEngine::FinalConstruct */
* MSE_TTSEngine::FinalRelease * *--------------------------* * Description: * destructor ********************************************************************* EDC ***/ void MSE_TTSEngine::FinalRelease() { SPDBG_FUNC( "MSE_TTSEngine::FinalRelease" ); if ( m_pBEnd ) { delete m_pBEnd; } } /* MSE_TTSEngine::FinalRelease */
* MSE_TTSEngine::SetObjectToken * *-------------------------------* * Description: * This method is called during construction to give the TTS driver object * access to the voice's object token for initialization purposes... ******************************************************************* AARONHAL ***/ HRESULT MSE_TTSEngine::SetObjectToken( ISpObjectToken *pToken ) { SPDBG_FUNC( "MSE_TTSEngine::SetObjectToken" ); HRESULT hr = S_OK;
//--- Call old SetObjectToken, in VoiceData
m_cpToken = pToken;
hr = m_VoiceDataObj.SetObjectToken( pToken ); #endif
//--- Do old VoiceInit( ) stuff...
if ( SUCCEEDED( hr ) ) { //--- Create sentence enumerator and initialize
CComObject<CStdSentEnum> *pSentEnum; hr = CComObject<CStdSentEnum>::CreateInstance( &pSentEnum );
//--- Create aggregate lexicon
if ( SUCCEEDED( hr ) ) { hr = pSentEnum->InitAggregateLexicon(); }
//--- Create vendor lexicon and add to aggregate
if ( SUCCEEDED( hr ) ) { CComPtr<ISpObjectToken> cpToken; hr = SpGetSubTokenFromToken(pToken, L"Lex", &cpToken);
CComPtr<ISpLexicon> cpCompressedLexicon; if ( SUCCEEDED( hr ) ) { hr = SpCreateObjectFromToken(cpToken, &cpCompressedLexicon); }
if (SUCCEEDED(hr)) { hr = pSentEnum->AddLexiconToAggregate(cpCompressedLexicon, eLEXTYPE_PRIVATE1); } }
//--- Create LTS lexicon and add to aggregate
if ( SUCCEEDED( hr ) ) { CComPtr<ISpObjectToken> cpToken; hr = SpGetSubTokenFromToken(pToken, L"Lts", &cpToken);
CComPtr<ISpLexicon> cpLTSLexicon; if ( SUCCEEDED( hr ) ) { hr = SpCreateObjectFromToken(cpToken, &cpLTSLexicon); }
if ( SUCCEEDED( hr ) ) { hr = pSentEnum->AddLexiconToAggregate(cpLTSLexicon, eLEXTYPE_PRIVATE2); } } //--- Create Names LTS lexicon and add to aggregate
if ( SUCCEEDED( hr ) ) { CComPtr<ISpObjectToken> cpToken; hr = SpGetSubTokenFromToken(pToken, L"Names", &cpToken);
CComPtr<ISpLexicon> cpLTSLexicon; if ( SUCCEEDED( hr ) ) { hr = SpCreateObjectFromToken(cpToken, &cpLTSLexicon); if ( SUCCEEDED( hr ) ) { hr = pSentEnum->AddLexiconToAggregate( cpLTSLexicon, eLEXTYPE_PRIVATE3 ); if ( SUCCEEDED( hr ) ) { pSentEnum->fNamesLTS( true ); } } } else { //--- No "Names" subtoken in the registry - just behave as we did
// before the Names LTS code was added...
pSentEnum->fNamesLTS( false ); hr = S_OK; } }
//--- Create morphology lexicon
if ( SUCCEEDED( hr ) ) { hr = pSentEnum->InitMorphLexicon(); }
//--- Set member sentence enumerator
if ( SUCCEEDED( hr ) ) { m_cpSentEnum = pSentEnum; } }
//--- Do old InitDriver stuff
if ( SUCCEEDED( hr ) ) { //--------------------------
// Get voice information
hr = m_VoiceDataObj.GetVoiceInfo( &m_VoiceInfo );
if( SUCCEEDED(hr) ) { m_SampleRate = m_VoiceInfo.SampleRate;
// Reverb is always stereo
if (m_VoiceInfo.eReverbType != REVERB_TYPE_OFF ) { //------------------
// Stereo
m_IsStereo = true; m_BytesPerSample = 4; } else { //------------------
m_IsStereo = false; m_BytesPerSample = 2; } #else
{ #endif
// Initialize BACKEND
m_pBEnd = CBackEnd::ClassFactory();
if ( m_pBEnd ) { CSpDynamicString dstrSFontPath;
hr = pToken->GetStringValue( L"Sfont", &dstrSFontPath );
if ( SUCCEEDED( hr ) ) { char *pszSFontPath = NULL; pszSFontPath = dstrSFontPath.CopyToChar();
if ( !pszSFontPath ) { hr = E_OUTOFMEMORY; } else if ( !m_pBEnd->LoadTable( pszSFontPath ) ) { hr = E_FAIL; } else { m_pBEnd->SetFrontEndFlag (); m_pBEnd->SetGain( 2.0 ); ::CoTaskMemFree( pszSFontPath ); } } } else { hr = E_OUTOFMEMORY; }
// Initialize FRONTEND obj
if( SUCCEEDED( hr )) { EntropicPitchInfo PitchInfo; int BaseLine, RefLine, TopLine; m_pBEnd->GetSpeakerInfo( &BaseLine, &RefLine, &TopLine ); PitchInfo.BasePitch = ( TopLine + BaseLine ) / 2; PitchInfo.Range = TopLine - BaseLine; #ifdef USE_VOICEDATAOBJ
hr = m_FEObj.Init( &m_VoiceDataObj, NULL, &m_VoiceInfo, PitchInfo ); #else
hr = m_FEObj.Init( NULL /*&m_VoiceDataObj*/, NULL, NULL /*&m_VoiceInfo*/, PitchInfo, m_pBEnd->GetPhoneSetFlag() ); #endif
} } }
return hr; } /* MSE_TTSEngine::SetObjectToken */
* MSE_TTSEngine::Speak * *-------------------* * Description: * This method is supposed to speak the text observing the associated * XML state. ********************************************************************* EDC ***/ STDMETHODIMP MSE_TTSEngine:: Speak( DWORD dwSpeakFlags, REFGUID rguidFormatId, const WAVEFORMATEX * /* pWaveFormatEx ignored */, const SPVTEXTFRAG* pTextFragList, ISpTTSEngineSite* pOutputSite ) { SPDBG_FUNC( "MSE_TTSEngine::Speak" ); HRESULT hr = S_OK;
m_pco.IncrementCounter (perfcSpeakCalls); #endif
//--- Early exit?
if( ( rguidFormatId != SPDFID_WaveFormatEx && rguidFormatId != SPDFID_Text ) || SP_IS_BAD_INTERFACE_PTR( pOutputSite ) ) { hr = E_INVALIDARG; } else { //--- Debug Macro - open file for debugging output
//--- Initialize sentence enumerator
hr = m_cpSentEnum->SetFragList( pTextFragList, dwSpeakFlags );
if( SUCCEEDED( hr ) ) {
// The following code is here just for testing.
// It should be removed once all the tools accept the
// new way of outputing debug info.
if( rguidFormatId == SPDFID_Text ) { //--- Enumerate and write out all sentence items.
IEnumSENTITEM *pItemEnum; TTSSentItem Item;
//--- Write unicode signature
static const WCHAR Signature = 0xFEFF; hr = pOutputSite->Write( &Signature, sizeof(Signature), NULL );
while( (hr = m_cpSentEnum->Next( &pItemEnum) ) == S_OK ) { while( (hr = pItemEnum->Next( &Item )) == S_OK ) { // Is there a valid normalized-word-list?
if ( Item.pItemInfo->Type & eWORDLIST_IS_VALID ) { for ( ULONG i = 0; i < Item.ulNumWords; i++ ) { if ( Item.Words[i].pXmlState->eAction == SPVA_Speak || Item.Words[i].pXmlState->eAction == SPVA_SpellOut ) { ULONG cb = Item.Words[i].ulWordLen * sizeof( WCHAR ); hr = pOutputSite->Write( Item.Words[i].pWordText, cb, NULL ); if( hr == S_OK ) { //--- Insert space between items
hr = pOutputSite->Write( L" ", sizeof( WCHAR ), NULL ); } } } } else // no word list - just write the original text.
{ ULONG cb = Item.ulItemSrcLen * sizeof( WCHAR ); hr = pOutputSite->Write( Item.pItemSrcText, cb, NULL ); if ( SUCCEEDED(hr) ) { //--- Insert space between items
hr = pOutputSite->Write( L" ", sizeof( WCHAR ), NULL ); } } } pItemEnum->Release();
//--- Insert mark between sentences
if( SUCCEEDED( hr ) ) { static const WCHAR CRLF[2] = { 0x000D, 0x000A }; hr = pOutputSite->Write( CRLF, 2*sizeof(WCHAR), NULL ); } } static const WCHAR ENDL = 0x0000; hr = pOutputSite->Write( &ENDL, sizeof(WCHAR), NULL );
} else { //--- Render the text
m_FEObj.PrepareSpeech( m_cpSentEnum, pOutputSite );
SPEECH_STATE SpeechState = SPEECH_CONTINUE; SentenceData *pSentence = NULL; short *pSamples = NULL; int nSamples = 0;
while ( SpeechState == SPEECH_CONTINUE ) { hr = m_FEObj.NextData( (void**)&pSentence, &SpeechState );
if ( SUCCEEDED( hr ) && SpeechState == SPEECH_CONTINUE ) { if ( !m_pBEnd->NewPhoneString( pSentence->pPhones, pSentence->ulNumPhones, pSentence->pf0, pSentence->ulNumf0 ) ) { hr = E_FAIL; } else { while ( SUCCEEDED( hr ) && m_pBEnd->OutputPending() ) { if ( !m_pBEnd->GenerateOutput( &pSamples, &nSamples ) ) { hr = E_FAIL; } else if ( nSamples ) { hr = pOutputSite->Write( (void*)pSamples, nSamples * sizeof( short ), NULL ); } } } }
if ( pSentence ) { if ( pSentence->pPhones ) { delete pSentence->pPhones; pSentence->pPhones = NULL; } if ( pSentence->pf0 ) { delete pSentence->pf0; pSentence->pf0 = NULL; } delete pSentence; pSentence = NULL; } } } }
//--- Debug Macro - close debugging file
return hr; } /* MSE_TTSEngine::Speak */
//--- This is the only format the Entropic backend supports...
static const WAVEFORMATEX EntropicFormat = { 1, 1, 8000, 16000, 2, 16, 0 };
* MSE_TTSEngine::GetOutputFormat * *-----------------------------* * Description: * * Returns: * ******************************************************************* PACOG ***/
STDMETHODIMP MSE_TTSEngine::GetOutputFormat(const GUID * pTargetFormatId, const WAVEFORMATEX * /* pTargetWaveFormatEx */, GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWaveFormatEx) { SPDBG_FUNC("MSE_TTSEngine::GetOutputFormat"); HRESULT hr = S_OK;
if( ( SP_IS_BAD_WRITE_PTR(pDesiredFormatId) ) || ( SP_IS_BAD_WRITE_PTR(ppCoMemDesiredWaveFormatEx) ) ) { hr = E_INVALIDARG; } else if (pTargetFormatId == NULL || *pTargetFormatId != SPDFID_Text) { *pDesiredFormatId = SPDFID_WaveFormatEx; *ppCoMemDesiredWaveFormatEx = (WAVEFORMATEX *)::CoTaskMemAlloc(sizeof(WAVEFORMATEX)); if (*ppCoMemDesiredWaveFormatEx) { **ppCoMemDesiredWaveFormatEx = EntropicFormat; } else { hr = E_OUTOFMEMORY; } } else { *pDesiredFormatId = SPDFID_Text; *ppCoMemDesiredWaveFormatEx = NULL; }
SPDBG_REPORT_ON_FAIL( hr ); return hr; } /* MSE_TTSEngine::GetOutputFormat */