You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1333 lines
44 KiB
1333 lines
44 KiB
/*******************************************************************************
|
|
* Backend.cpp *
|
|
*-------------*
|
|
* Description:
|
|
* This module is the implementation file for the CBackend class.
|
|
*-------------------------------------------------------------------------------
|
|
* Created By: mc Date: 03/12/99
|
|
* Copyright (C) 1999 Microsoft Corporation
|
|
* All Rights Reserved
|
|
*
|
|
*******************************************************************************/
|
|
|
|
#include "stdafx.h"
|
|
#ifndef __spttseng_h__
|
|
#include "spttseng.h"
|
|
#endif
|
|
#ifndef Backend_H
|
|
#include "Backend.h"
|
|
#endif
|
|
#ifndef FeedChain_H
|
|
#include "FeedChain.h"
|
|
#endif
|
|
#ifndef SPDebug_h
|
|
#include <spdebug.h>
|
|
#endif
|
|
|
|
|
|
//-----------------------------
|
|
// Data.cpp
|
|
//-----------------------------
|
|
extern const short g_IPAToAllo[];
|
|
extern const short g_AlloToViseme[];
|
|
|
|
|
|
//--------------------------------------
|
|
// DEBUG: Save utterance WAV file
|
|
//--------------------------------------
|
|
//#define SAVE_WAVE_FILE 1
|
|
|
|
|
|
|
|
|
|
const unsigned char g_SineWaveTbl[] =
|
|
{
|
|
0x7b,0x7e,0x81,0x84,0x87,0x89,0x8c,0x8f,0x92,0x95,0x98,0x9b,0x9d,0xa0,0xa3,0xa6,
|
|
0xa8,0xab,0xae,0xb0,0xb3,0xb5,0xb8,0xbb,0xbd,0xbf,0xc2,0xc4,0xc7,0xc9,0xcb,0xcd,
|
|
0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,0xe0,0xe2,0xe3,0xe5,0xe6,0xe8,0xe9,
|
|
0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf2,0xf3,0xf3,0xf4,0xf4,0xf4,0xf4,
|
|
0xf5,0xf5,0xf5,0xf5,0xf4,0xf4,0xf4,0xf4,0xf3,0xf3,0xf2,0xf1,0xf1,0xf0,0xef,0xee,
|
|
0xed,0xec,0xeb,0xea,0xe9,0xe7,0xe6,0xe5,0xe3,0xe1,0xe0,0xde,0xdc,0xdb,0xd9,0xd7,
|
|
0xd5,0xd3,0xd1,0xcf,0xcd,0xcb,0xc8,0xc6,0xc4,0xc1,0xbf,0xbc,0xba,0xb7,0xb5,0xb2,
|
|
0xb0,0xad,0xaa,0xa8,0xa5,0xa2,0x9f,0x9d,0x9a,0x97,0x94,0x91,0x8f,0x8c,0x89,0x86,
|
|
0x83,0x80,0x7d,0x7a,0x77,0x75,0x72,0x6f,0x6c,0x69,0x66,0x64,0x61,0x5e,0x5b,0x58,
|
|
0x56,0x53,0x50,0x4e,0x4b,0x49,0x46,0x44,0x41,0x3f,0x3c,0x3a,0x38,0x35,0x33,0x31,
|
|
0x2f,0x2d,0x2b,0x29,0x27,0x25,0x23,0x21,0x1f,0x1e,0x1c,0x1b,0x19,0x18,0x16,0x15,
|
|
0x14,0x13,0x12,0x11,0x10,0x0f,0x0e,0x0d,0x0c,0x0c,0x0b,0x0b,0x0a,0x0a,0x0a,0x0a,
|
|
0x09,0x09,0x09,0x09,0x0a,0x0a,0x0a,0x0a,0x0b,0x0b,0x0c,0x0d,0x0d,0x0e,0x0f,0x10,
|
|
0x11,0x12,0x13,0x14,0x15,0x17,0x18,0x1a,0x1b,0x1d,0x1e,0x20,0x22,0x23,0x25,0x27,
|
|
0x29,0x2b,0x2d,0x2f,0x31,0x34,0x36,0x38,0x3a,0x3d,0x3f,0x42,0x44,0x47,0x49,0x4c,
|
|
0x4e,0x51,0x54,0x56,0x59,0x5c,0x5f,0x61,0x64,0x67,0x6a,0x6d,0x6f,0x72,0x75,0x78
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*void PredictEpochDist( float duration,
|
|
long nKnots,
|
|
float SampleRate,
|
|
float *pTime,
|
|
float *pF0)
|
|
{
|
|
long curSamplesOut, endSample, j;
|
|
float epochFreq;
|
|
long epochLen, epochCount;
|
|
|
|
|
|
curSamplesOut = 0;
|
|
endSample = (long) (SampleRate * duration );
|
|
epochCount = 0;
|
|
|
|
while( curSamplesOut < endSample )
|
|
{
|
|
j = 1;
|
|
//---------------------------------------------------
|
|
// Align to appropriate knot bassed on
|
|
// current output sample
|
|
//---------------------------------------------------
|
|
while( (j < nKnots - 1) && (curSamplesOut > pTime[j]) )
|
|
j++;
|
|
//---------------------------------------------------
|
|
// Calculate exact pitch thru linear interpolation
|
|
//---------------------------------------------------
|
|
epochFreq = LinInterp( pTime[j - 1], curSamplesOut, pTime[j], pF0[j - 1], pF0[j] );
|
|
//---------------------------------------------------
|
|
// Calc sample count for curent epoch
|
|
//---------------------------------------------------
|
|
epochLen = (long) (SampleRate / epochFreq);
|
|
epochCount++;
|
|
|
|
curSamplesOut += epochLen;
|
|
}
|
|
|
|
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::CBackend *
|
|
*--------------------*
|
|
* Description: Constructor
|
|
*
|
|
********************************************************************** MC ***/
|
|
CBackend::CBackend( )
|
|
{
|
|
SPDBG_FUNC( "CBackend::CBackend" );
|
|
m_pHistory = NULL;
|
|
m_pHistory2 = NULL;
|
|
m_pFilter = NULL;
|
|
m_pReverb = NULL;
|
|
m_pOutEpoch = NULL;
|
|
m_pMap = NULL;
|
|
m_pRevFlag = NULL;
|
|
m_pSpeechBuf = NULL;
|
|
m_VibratoDepth = 0;
|
|
m_UnitVolume = 1.0f;
|
|
m_MasterVolume = SPMAX_VOLUME;
|
|
memset( &m_Synth, 0, sizeof(MSUNITDATA) );
|
|
} /* CBackend::CBackend */
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::~CBackend *
|
|
*---------------------*
|
|
* Description: Destructor
|
|
*
|
|
********************************************************************** MC ***/
|
|
CBackend::~CBackend( )
|
|
{
|
|
SPDBG_FUNC( "CBackend::~CBackend" );
|
|
|
|
Release();
|
|
} /* CBackend::~CBackend */
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::Release *
|
|
*---------------------*
|
|
* Description:
|
|
* Free memory allocaterd by Backend
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::Release( )
|
|
{
|
|
SPDBG_FUNC( "CBackend::Release" );
|
|
CleanUpSynth( );
|
|
|
|
if( m_pSpeechBuf)
|
|
{
|
|
delete m_pSpeechBuf;
|
|
m_pSpeechBuf = NULL;
|
|
}
|
|
if( m_pHistory )
|
|
{
|
|
delete m_pHistory;
|
|
m_pHistory = NULL;
|
|
}
|
|
if( m_pHistory2 )
|
|
{
|
|
delete m_pHistory2;
|
|
m_pHistory2 = NULL;
|
|
}
|
|
if( m_pReverb )
|
|
{
|
|
delete m_pReverb;
|
|
m_pReverb = NULL;
|
|
}
|
|
} /* CBackend::Release */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::Init *
|
|
*----------------*
|
|
* Description:
|
|
* Opens a backend instance, keeping a pointer of the acoustic
|
|
* inventory.
|
|
*
|
|
********************************************************************** MC ***/
|
|
HRESULT CBackend::Init( IMSVoiceData* pVoiceDataObj, CFeedChain *pSrcObj, MSVOICEINFO* pVoiceInfo )
|
|
{
|
|
SPDBG_FUNC( "CBackend::Init" );
|
|
long LPCsize = 0;
|
|
HRESULT hr = S_OK;
|
|
|
|
m_pVoiceDataObj = pVoiceDataObj;
|
|
m_SampleRate = (float)pVoiceInfo->SampleRate;
|
|
m_pSrcObj = pSrcObj;
|
|
m_cOrder = pVoiceInfo->LPCOrder;
|
|
m_pWindow = pVoiceInfo->pWindow;
|
|
m_FFTSize = pVoiceInfo->FFTSize;
|
|
m_VibratoDepth = ((float)pVoiceInfo->VibratoDepth) / 100.0f;
|
|
m_VibratoDepth = 0; // NOTE: disable vibrato
|
|
m_VibratoFreq = pVoiceInfo->VibratoFreq;
|
|
if( pVoiceInfo->eReverbType > REVERB_TYPE_OFF )
|
|
{
|
|
m_StereoOut = true;
|
|
m_BytesPerSample = 4;
|
|
}
|
|
else
|
|
{
|
|
m_StereoOut = false;
|
|
m_BytesPerSample = 2;
|
|
}
|
|
//---------------------------------------
|
|
// Allocate AUDIO buffer
|
|
//---------------------------------------
|
|
m_pSpeechBuf = new float[SPEECH_FRAME_SIZE + SPEECH_FRAME_OVER];
|
|
if( m_pSpeechBuf == NULL )
|
|
{
|
|
//--------------------------------------
|
|
// Out of memory!
|
|
//--------------------------------------
|
|
hr = E_OUTOFMEMORY;
|
|
}
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
//---------------------------------------
|
|
// Allocate HISTORY buffer
|
|
//---------------------------------------
|
|
|
|
LPCsize = m_cOrder + 1;
|
|
m_pHistory = new float[LPCsize];
|
|
if( m_pHistory == NULL )
|
|
{
|
|
//--------------------------------------
|
|
// Out of memory!
|
|
//--------------------------------------
|
|
hr = E_OUTOFMEMORY;
|
|
}
|
|
}
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
memset( m_pHistory, 0, LPCsize * sizeof(float) );
|
|
m_pOutEpoch = NULL;
|
|
m_pMap = NULL;
|
|
m_pRevFlag = NULL;
|
|
m_fModifiers = 0;
|
|
m_vibrato_Phase1 = 0;
|
|
|
|
|
|
//--------------------------------
|
|
// Reverb Effect
|
|
//--------------------------------
|
|
//pVoiceInfo->eReverbType = REVERB_TYPE_HALL;
|
|
if( pVoiceInfo->eReverbType > REVERB_TYPE_OFF )
|
|
{
|
|
//--------------------------------
|
|
// Create ReverbFX object
|
|
//--------------------------------
|
|
if( m_pReverb == NULL )
|
|
{
|
|
m_pReverb = new CReverbFX;
|
|
if( m_pReverb )
|
|
{
|
|
short result;
|
|
result = m_pReverb->Reverb_Init( pVoiceInfo->eReverbType, (long)m_SampleRate, m_StereoOut );
|
|
if( result != KREVERB_NOERROR )
|
|
{
|
|
//--------------------------------------------
|
|
// Not enough memory to do reverb
|
|
// Recover gracefully
|
|
//--------------------------------------------
|
|
delete m_pReverb;
|
|
m_pReverb = NULL;
|
|
}
|
|
/*else
|
|
{
|
|
//--------------------------------------------------------
|
|
// Init was successful, ready to do reverb now
|
|
//--------------------------------------------------------
|
|
}*/
|
|
}
|
|
}
|
|
}
|
|
|
|
//----------------------------
|
|
// Linear taper region scale
|
|
//----------------------------
|
|
m_linearScale = (float) pow( 10.0, (double)((1.0f - LINEAR_BKPT) * LOG_RANGE) / 20.0 );
|
|
|
|
|
|
#ifdef SAVE_WAVE_FILE
|
|
m_SaveFile = (PCSaveWAV) new CSaveWAV; // No check needed, if this fails, we simply don't save file.
|
|
if( m_SaveFile )
|
|
{
|
|
m_SaveFile->OpenWavFile( (long)m_SampleRate );
|
|
}
|
|
#endif
|
|
|
|
}
|
|
else
|
|
{
|
|
if( m_pSpeechBuf )
|
|
{
|
|
delete m_pSpeechBuf;
|
|
m_pSpeechBuf = NULL;
|
|
}
|
|
if( m_pHistory )
|
|
{
|
|
delete m_pHistory;
|
|
m_pHistory = NULL;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* CBackend::Init */
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::FreeSynth *
|
|
*---------------------*
|
|
* Description:
|
|
* Return TRUE if consoants can be clustered.
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::FreeSynth( MSUNITDATA* pSynth )
|
|
{
|
|
SPDBG_FUNC( "CBackend::FreeSynth" );
|
|
if( pSynth->pEpoch )
|
|
{
|
|
delete pSynth->pEpoch;
|
|
pSynth->pEpoch = NULL;
|
|
}
|
|
if( pSynth->pRes )
|
|
{
|
|
delete pSynth->pRes;
|
|
pSynth->pRes = NULL;
|
|
}
|
|
if( pSynth->pLPC )
|
|
{
|
|
delete pSynth->pLPC;
|
|
pSynth->pLPC = NULL;
|
|
}
|
|
} /* CBackend::FreeSynth */
|
|
|
|
|
|
/*****************************************************************************
|
|
* ExpConverter *
|
|
*--------------*
|
|
* Description:
|
|
* Convert linear to exponential taper
|
|
* 'ref' is a linear value between 0.0 to 1.0
|
|
*
|
|
********************************************************************** MC ***/
|
|
static float ExpConverter( float ref, float linearScale )
|
|
{
|
|
SPDBG_FUNC( "ExpConverter" );
|
|
float audioGain;
|
|
|
|
if( ref < LINEAR_BKPT)
|
|
{
|
|
//----------------------------------------
|
|
// Linear taper below LINEAR_BKPT
|
|
//----------------------------------------
|
|
audioGain = linearScale * (ref / LINEAR_BKPT);
|
|
}
|
|
else
|
|
{
|
|
//----------------------------------------
|
|
// Log taper above LINEAR_BKPT
|
|
//----------------------------------------
|
|
audioGain = (float) pow( 10.0, (double)((1.0f - ref) * LOG_RANGE) / 20.0 );
|
|
}
|
|
|
|
return audioGain;
|
|
} /* ExpConverter */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::CvtToShort *
|
|
*----------------------*
|
|
* Description:
|
|
* Convert (in place) FLOAT audio to SHORT.
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::CvtToShort( float *pSrc, long blocksize, long stereoOut, float audioGain )
|
|
{
|
|
SPDBG_FUNC( "CBackend::CvtToShort" );
|
|
long i;
|
|
short *pDest;
|
|
float fSamp;
|
|
|
|
pDest = (short*)pSrc;
|
|
for( i = 0; i < blocksize; ++i )
|
|
{
|
|
//------------------------
|
|
// Read float sample...
|
|
//------------------------
|
|
fSamp = (*pSrc++) * audioGain;
|
|
//------------------------
|
|
// ...clip to 16-bits...
|
|
//------------------------
|
|
if( fSamp > 32767 )
|
|
{
|
|
fSamp = 32767;
|
|
}
|
|
else if( fSamp < (-32768) )
|
|
{
|
|
fSamp = (-32768);
|
|
}
|
|
//------------------------
|
|
// ...save as SHORT
|
|
//------------------------
|
|
*pDest++ = (short)fSamp;
|
|
if( stereoOut )
|
|
{
|
|
*pDest++ = (short)(0 - (int)fSamp);
|
|
}
|
|
}
|
|
} /* CBackend::CvtToShort */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::PSOLA_Stretch *
|
|
*-------------------------*
|
|
* Description:
|
|
* Does PSOLA epoch stretching or compressing
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::PSOLA_Stretch( float *pInRes, long InSize,
|
|
float *pOutRes, long OutSize,
|
|
float *pWindow,
|
|
long cWindowSize )
|
|
{
|
|
SPDBG_FUNC( "CBackend::PSOLA_Stretch" );
|
|
long i, lim;
|
|
float window, delta, kf;
|
|
|
|
memset( pOutRes, 0, sizeof(float) * OutSize );
|
|
lim = MIN(InSize, OutSize );
|
|
delta = (float)cWindowSize / (float)lim;
|
|
kf = 0.5f;
|
|
pOutRes[0] = pInRes[0];
|
|
for( i = 1; i < lim; ++i )
|
|
{
|
|
kf += delta;
|
|
window = pWindow[(long) kf];
|
|
pOutRes[i] += pInRes[i] * window;
|
|
pOutRes[OutSize - i] += pInRes[InSize - i] * window;
|
|
}
|
|
} /* CBackend::PSOLA_Stretch */
|
|
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::PrepareSpeech *
|
|
*-------------------------*
|
|
* Description:
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::PrepareSpeech( ISpTTSEngineSite* outputSite )
|
|
{
|
|
SPDBG_FUNC( "CBackend::PrepareSpeech" );
|
|
|
|
//m_pUnits = pUnits;
|
|
//m_unitCount = unitCount;
|
|
//m_CurUnitIndex = 0;
|
|
m_pOutputSite = outputSite;
|
|
m_silMode = true;
|
|
m_durationTarget = 0;
|
|
m_cOutSamples_Phon = 1;
|
|
m_cOutEpochs = 0; // Pull model big-bang
|
|
m_SpeechState = SPEECH_CONTINUE;
|
|
m_cOutSamples_Total = 0;
|
|
m_HasSpeech = false;
|
|
} /* CBackend::PrepareSpeech */
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::ProsodyMod *
|
|
*----------------------*
|
|
* Description:
|
|
* Calculate the epoch sequence for the synthesized speech
|
|
*
|
|
* INPUT:
|
|
*
|
|
* OUTPUT:
|
|
* FIlls 'pOutEpoch', 'pMap', and 'pRevFlag'
|
|
* Returns new epoch count
|
|
*
|
|
********************************************************************** MC ***/
|
|
long CBackend::ProsodyMod( UNITINFO *pCurUnit,
|
|
long cInEpochs,
|
|
float durationMpy,
|
|
long cMaxOutEpochs )
|
|
{
|
|
SPDBG_FUNC( "CBackend::ProsodyMod" );
|
|
long iframe, framesize, framesizeOut, j;
|
|
long cntOut, csamplesOut, cOutEpochs;
|
|
BOOL fUnvoiced;
|
|
short fReverse;
|
|
float totalDuration;
|
|
float durationIn; // Active accum of IN duration
|
|
float durationOut; // Active accum of OUT duration aligned to IN domain
|
|
float freqMpy;
|
|
BOOL fAdvanceInput;
|
|
float vibrato;
|
|
unsigned char *SineWavePtr;
|
|
float epochFreq;
|
|
float *pTime;
|
|
float *pF0;
|
|
|
|
iframe = 0;
|
|
durationIn = 0.0f;
|
|
durationOut = 0.0f;
|
|
csamplesOut = 0;
|
|
cntOut = 0;
|
|
cOutEpochs = 0;
|
|
fReverse = false;
|
|
pTime = pCurUnit->pTime;
|
|
pF0 = pCurUnit->pF0;
|
|
|
|
//------------------------------------
|
|
// Find total input duration
|
|
//------------------------------------
|
|
totalDuration = 0;
|
|
for( j = 0; j < cInEpochs; ++j )
|
|
{
|
|
totalDuration += ABS(m_pInEpoch[j]);
|
|
}
|
|
|
|
/*PredictEpochDist( pCurUnit->duration,
|
|
pCurUnit->nKnots,
|
|
m_SampleRate,
|
|
pTime,
|
|
pF0 );*/
|
|
|
|
while( iframe < cInEpochs && cOutEpochs < cMaxOutEpochs)
|
|
{
|
|
//-----------------------------------------
|
|
// Compute output frame length
|
|
//-----------------------------------------
|
|
if( m_pInEpoch[iframe] < 0 )
|
|
{
|
|
//-------------------------------------------------
|
|
// Since we can't change unvoiced pitch,
|
|
// do not change frame size for unvoiced frames
|
|
//-------------------------------------------------
|
|
framesize = (long)((-m_pInEpoch[iframe]) + 0.5f);
|
|
framesizeOut = framesize;
|
|
fUnvoiced = true;
|
|
}
|
|
else
|
|
{
|
|
//---------------------------------------------------
|
|
// Modify frame size for voiced epoch
|
|
// based on epoch frequency
|
|
//---------------------------------------------------
|
|
j = 1;
|
|
//---------------------------------------------------
|
|
// Align to appropriate knot bassed on
|
|
// current output sample
|
|
//---------------------------------------------------
|
|
while( (j < (long)pCurUnit->nKnots - 1) && (csamplesOut > pTime[j]) )
|
|
j++;
|
|
//---------------------------------------------------
|
|
// Calculate exact pitch thru linear interpolation
|
|
//---------------------------------------------------
|
|
|
|
epochFreq = LinInterp( pTime[j - 1], (float)csamplesOut, pTime[j], pF0[j - 1], pF0[j] );
|
|
|
|
|
|
SineWavePtr = (unsigned char*)&g_SineWaveTbl[0];
|
|
vibrato = (float)(((unsigned char)(*(SineWavePtr + (m_vibrato_Phase1 >> 16)))) - 128);
|
|
vibrato *= m_VibratoDepth;
|
|
|
|
//---------------------------------------------------
|
|
// Scale frame size using in/out ratio
|
|
//---------------------------------------------------
|
|
epochFreq = epochFreq + vibrato;
|
|
if( epochFreq < MIN_VOICE_PITCH )
|
|
{
|
|
epochFreq = MIN_VOICE_PITCH;
|
|
}
|
|
framesize = (long)(m_pInEpoch[iframe] + 0.5f);
|
|
framesizeOut = (long)(m_SampleRate / epochFreq);
|
|
|
|
|
|
vibrato = ((float)256 / ((float)22050 / m_VibratoFreq)) * (float)framesizeOut; // 3 Hz
|
|
//vibrato = ((float)256 / (float)7350) * (float)framesizeOut; // 3 Hz
|
|
m_vibrato_Phase1 += (long)(vibrato * (float)65536);
|
|
m_vibrato_Phase1 &= 0xFFFFFF;
|
|
//---------------------------------------------------
|
|
// @@@@ REMOVED 2x LIMIT
|
|
//---------------------------------------------------
|
|
/*if( framesizeOut > 2*framesize )
|
|
{
|
|
framesizeOut = 2*framesize;
|
|
}
|
|
if( framesize > 2*framesizeOut )
|
|
{
|
|
framesizeOut = framesize/2;
|
|
}*/
|
|
freqMpy = (float) framesize / framesizeOut;
|
|
fUnvoiced = false;
|
|
}
|
|
|
|
|
|
//-------------------------------------------
|
|
// Generate next output frame
|
|
//-------------------------------------------
|
|
fAdvanceInput = false;
|
|
if( durationOut + (0.5f * framesizeOut/durationMpy) <= durationIn + framesize )
|
|
{
|
|
//-----------------------------------------
|
|
// If UNvoiced and odd frame,
|
|
// reverse residual
|
|
//-----------------------------------------
|
|
if( fUnvoiced && (cntOut & 1) )
|
|
{
|
|
m_pRevFlag[cOutEpochs] = true;
|
|
fReverse = true;
|
|
}
|
|
else
|
|
{
|
|
m_pRevFlag[cOutEpochs] = false;
|
|
fReverse = false;
|
|
}
|
|
++cntOut;
|
|
|
|
durationOut += framesizeOut/durationMpy;
|
|
csamplesOut += framesizeOut;
|
|
m_pOutEpoch[cOutEpochs] = (float)framesizeOut;
|
|
m_pMap[cOutEpochs] = iframe;
|
|
cOutEpochs++;
|
|
}
|
|
else
|
|
{
|
|
fAdvanceInput = true;
|
|
}
|
|
|
|
//-------------------------------------------
|
|
// Advance to next input frame
|
|
//-------------------------------------------
|
|
if( ((durationOut + (0.5f * framesizeOut/durationMpy)) > (durationIn + framesize)) ||
|
|
//(cntOut >= 3) || @@@@ REMOVED 2x LIMIT
|
|
//(fReverse == true) ||
|
|
fAdvanceInput )
|
|
{
|
|
durationIn += framesize;
|
|
++iframe;
|
|
cntOut = 0;
|
|
}
|
|
}
|
|
|
|
return cOutEpochs;
|
|
} /* CBackend::ProsodyMod */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::LPCFilter *
|
|
*---------------------*
|
|
* Description:
|
|
* LPC filter of order cOrder. It filters the residual signal
|
|
* pRes, producing output pOutWave. This routine requires that
|
|
* pOutWave has the true waveform history from [-cOrder,0] and
|
|
* of course it has to be defined.
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::LPCFilter( float *pCurLPC, float *pCurRes, long len, float gain )
|
|
{
|
|
SPDBG_FUNC( "CBackend::LPCFilter" );
|
|
INT t, j;
|
|
|
|
for( t = 0; t < len; t++ )
|
|
{
|
|
m_pHistory[0] = pCurLPC[0] * pCurRes[t];
|
|
for( j = m_cOrder; j > 0; j-- )
|
|
{
|
|
m_pHistory[0] -= pCurLPC[j] * m_pHistory[j];
|
|
m_pHistory[j] = m_pHistory[j - 1];
|
|
}
|
|
pCurRes[t] = m_pHistory[0] * gain;
|
|
}
|
|
} /* CBackend::LPCFilter */
|
|
|
|
|
|
/*void CBackend::LPCFilter( float *pCurLPC, float *pCurRes, long len )
|
|
{
|
|
long t;
|
|
|
|
for( t = 0; t < len; t++ )
|
|
{
|
|
pCurRes[t] = pCurRes[t] * 10;
|
|
}
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::ResRecons *
|
|
*---------------------*
|
|
* Description:
|
|
* Obtains output prosody modified residual
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::ResRecons( float *pInRes,
|
|
long InSize,
|
|
float *pOutRes,
|
|
long OutSize,
|
|
float scale )
|
|
{
|
|
SPDBG_FUNC( "CBackend::ResRecons" );
|
|
long i, j;
|
|
|
|
if( m_pRevFlag[m_EpochIndex] )
|
|
{
|
|
//----------------------------------------------------
|
|
// Process repeated and reversed UNvoiced residual
|
|
//----------------------------------------------------
|
|
for( i = 0, j = OutSize-1; i < OutSize; ++i, --j )
|
|
{
|
|
pOutRes[i] = pInRes[j];
|
|
}
|
|
}
|
|
else if( InSize == OutSize )
|
|
{
|
|
//----------------------------------------------------
|
|
// Unvoiced residual or voiced residual
|
|
// with no pitch change
|
|
//----------------------------------------------------
|
|
memcpy( pOutRes, pInRes, sizeof(float) *OutSize );
|
|
}
|
|
else
|
|
{
|
|
//----------------------------------------------------
|
|
// Process voiced residual
|
|
//----------------------------------------------------
|
|
PSOLA_Stretch( pInRes, InSize, pOutRes, OutSize, m_pWindow, m_FFTSize );
|
|
}
|
|
|
|
//----------------------------------
|
|
// Amplify frame
|
|
//----------------------------------
|
|
if( scale != 1.0f )
|
|
{
|
|
for( i = 0 ; i < OutSize; ++i )
|
|
{
|
|
pOutRes[i] *= scale;
|
|
}
|
|
}
|
|
} /* CBackend::ResRecons */
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::StartNewUnit *
|
|
*------------------------*
|
|
* Description:
|
|
* Synthesize audio samples for a target unit
|
|
*
|
|
* INPUT:
|
|
* pCurUnit - unit ID, F0, duration, etc.
|
|
*
|
|
* OUTPUT:
|
|
* Sets 'pCurUnit->csamplesOut' with audio length
|
|
*
|
|
********************************************************************** MC ***/
|
|
HRESULT CBackend::StartNewUnit( )
|
|
{
|
|
SPDBG_FUNC( "CBackend::StartNewUnit" );
|
|
long cframeMax = 0, cInEpochs = 0, i;
|
|
float totalDuration, durationOut, durationMpy = 0;
|
|
UNITINFO *pCurUnit;
|
|
HRESULT hr = S_OK;
|
|
SPEVENT event;
|
|
ULONGLONG clientInterest;
|
|
USHORT volumeVal;
|
|
|
|
// Check for VOLUME change
|
|
if( m_pOutputSite->GetActions() & SPVES_VOLUME )
|
|
{
|
|
hr = m_pOutputSite->GetVolume( &volumeVal );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if( volumeVal > SPMAX_VOLUME )
|
|
{
|
|
//--- Clip rate to engine maximum
|
|
volumeVal = SPMAX_VOLUME;
|
|
}
|
|
else if ( volumeVal < SPMIN_VOLUME )
|
|
{
|
|
//--- Clip rate to engine minimum
|
|
volumeVal = SPMIN_VOLUME;
|
|
}
|
|
m_MasterVolume = volumeVal;
|
|
}
|
|
}
|
|
|
|
//---------------------------------------
|
|
// Delete previous unit
|
|
//---------------------------------------
|
|
CleanUpSynth( );
|
|
|
|
//---------------------------------------
|
|
// Get next phon
|
|
//---------------------------------------
|
|
hr = m_pSrcObj->NextData( (void**)&pCurUnit, &m_SpeechState );
|
|
if( m_SpeechState == SPEECH_CONTINUE )
|
|
{
|
|
m_HasSpeech = pCurUnit->hasSpeech;
|
|
m_pOutputSite->GetEventInterest( &clientInterest );
|
|
|
|
//------------------------------------------------
|
|
// Post SENTENCE event
|
|
//------------------------------------------------
|
|
if( (pCurUnit->flags & SENT_START_FLAG) && (clientInterest & SPFEI(SPEI_SENTENCE_BOUNDARY)) )
|
|
{
|
|
event.elParamType = SPET_LPARAM_IS_UNDEFINED;
|
|
event.eEventId = SPEI_SENTENCE_BOUNDARY;
|
|
event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
|
|
event.lParam = pCurUnit->sentencePosition; // Input word position
|
|
event.wParam = pCurUnit->sentenceLen; // Input word length
|
|
m_pOutputSite->AddEvents( &event, 1 );
|
|
}
|
|
//------------------------------------------------
|
|
// Post PHONEME event
|
|
//------------------------------------------------
|
|
if( clientInterest & SPFEI(SPEI_PHONEME) )
|
|
{
|
|
event.elParamType = SPET_LPARAM_IS_UNDEFINED;
|
|
event.eEventId = SPEI_PHONEME;
|
|
event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
|
|
event.lParam = ((ULONG)pCurUnit->AlloFeatures << 16) + g_IPAToAllo[pCurUnit->AlloID];
|
|
event.wParam = ((ULONG)(pCurUnit->duration * 1000.0f) << 16) + g_IPAToAllo[pCurUnit->NextAlloID];
|
|
m_pOutputSite->AddEvents( &event, 1 );
|
|
}
|
|
|
|
//------------------------------------------------
|
|
// Post VISEME event
|
|
//------------------------------------------------
|
|
if( clientInterest & SPFEI(SPEI_VISEME) )
|
|
{
|
|
event.elParamType = SPET_LPARAM_IS_UNDEFINED;
|
|
event.eEventId = SPEI_VISEME;
|
|
event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
|
|
event.lParam = ((ULONG)pCurUnit->AlloFeatures << 16) + g_AlloToViseme[pCurUnit->AlloID];
|
|
event.wParam = ((ULONG)(pCurUnit->duration * 1000.0f) << 16) + g_AlloToViseme[pCurUnit->NextAlloID];
|
|
m_pOutputSite->AddEvents( &event, 1 );
|
|
}
|
|
|
|
//------------------------------------------------
|
|
// Post any bookmark events
|
|
//------------------------------------------------
|
|
if( pCurUnit->pBMObj != NULL )
|
|
{
|
|
CBookmarkList *pBMObj;
|
|
BOOKMARK_ITEM* pMarker;
|
|
|
|
//-------------------------------------------------
|
|
// Retrieve marker strings from Bookmark list and
|
|
// enter into Event list
|
|
//-------------------------------------------------
|
|
pBMObj = (CBookmarkList*)pCurUnit->pBMObj;
|
|
//cMarkerCount = pBMObj->m_BMList.GetCount();
|
|
if( clientInterest & SPFEI(SPEI_TTS_BOOKMARK) )
|
|
{
|
|
//---------------------------------------
|
|
// Send event for every bookmark in list
|
|
//---------------------------------------
|
|
SPLISTPOS listPos;
|
|
|
|
listPos = pBMObj->m_BMList.GetHeadPosition();
|
|
while( listPos )
|
|
{
|
|
pMarker = (BOOKMARK_ITEM*)pBMObj->m_BMList.GetNext( listPos );
|
|
event.eEventId = SPEI_TTS_BOOKMARK;
|
|
event.elParamType = SPET_LPARAM_IS_STRING;
|
|
event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
|
|
//--- Copy in bookmark string - has been NULL terminated in source already...
|
|
event.lParam = pMarker->pBMItem;
|
|
// Engine must convert string to long for wParam.
|
|
event.wParam = _wtol((WCHAR *)pMarker->pBMItem);
|
|
m_pOutputSite->AddEvents( &event, 1 );
|
|
}
|
|
}
|
|
//---------------------------------------------
|
|
// We don't need this Bookmark list any more
|
|
//---------------------------------------------
|
|
delete pBMObj;
|
|
pCurUnit->pBMObj = NULL;
|
|
}
|
|
|
|
|
|
|
|
pCurUnit->csamplesOut = 0;
|
|
//******************************************************
|
|
// For SIL, fill buffer with zeros...
|
|
//******************************************************
|
|
if( pCurUnit->UnitID == UNIT_SIL )
|
|
{
|
|
//---------------------------------------------
|
|
// Calc SIL length
|
|
//---------------------------------------------
|
|
m_durationTarget = (long)(m_SampleRate * pCurUnit->duration);
|
|
m_cOutSamples_Phon = 0;
|
|
m_silMode = true;
|
|
|
|
//---------------------------------------------
|
|
// Clear LPC filter storage
|
|
//---------------------------------------------
|
|
memset( m_pHistory, 0, sizeof(float)*(m_cOrder+1) );
|
|
|
|
//--------------------------------
|
|
// Success!
|
|
//--------------------------------
|
|
|
|
// Debug macro - output unit data...
|
|
TTSDBG_LOGUNITS;
|
|
}
|
|
//******************************************************
|
|
// ...otherwise fill buffer with inventory data
|
|
//******************************************************
|
|
else
|
|
{
|
|
m_silMode = false;
|
|
// Get unit data from voice
|
|
hr = m_pVoiceDataObj->GetUnitData( pCurUnit->UnitID, &m_Synth );
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
durationOut = 0.0f;
|
|
cInEpochs = m_Synth.cNumEpochs;
|
|
m_pInEpoch = m_Synth.pEpoch;
|
|
//cframeMax = PeakValue( m_pInEpoch, cInEpochs );
|
|
totalDuration = (float)m_Synth.cNumSamples;
|
|
|
|
//-----------------------------------------------
|
|
// For debugging: Force duration to unit length
|
|
//-----------------------------------------------
|
|
/*float unitDur;
|
|
|
|
unitDur = totalDuration / 22050.0f;
|
|
if( pCurUnit->duration < unitDur )
|
|
{
|
|
if( pCurUnit->speechRate < 1 )
|
|
{
|
|
pCurUnit->duration = unitDur * pCurUnit->speechRate;
|
|
}
|
|
else
|
|
{
|
|
pCurUnit->duration = unitDur;
|
|
}
|
|
}*/
|
|
|
|
durationMpy = pCurUnit->duration;
|
|
|
|
cframeMax = (long)pCurUnit->pF0[0];
|
|
for( i = 1; i < (long)pCurUnit->nKnots; i++ )
|
|
{
|
|
//-----------------------------------------
|
|
// Find the longest epoch
|
|
//-----------------------------------------
|
|
cframeMax = (long)(MAX(cframeMax,pCurUnit->pF0[i]));
|
|
}
|
|
cframeMax *= (long)(durationMpy * MAX_TARGETS_PER_UNIT);
|
|
|
|
|
|
durationMpy = (m_SampleRate * durationMpy) / totalDuration;
|
|
cframeMax += (long)(durationMpy * cInEpochs * MAX_TARGETS_PER_UNIT);
|
|
//
|
|
// mplumpe 11/18/97 : added to eliminate chance of crash.
|
|
//
|
|
cframeMax *= 2;
|
|
//---------------------------------------------------
|
|
// New epochs adjusted for duration and pitch
|
|
//---------------------------------------------------
|
|
m_pOutEpoch = new float[cframeMax];
|
|
if( !m_pOutEpoch )
|
|
{
|
|
//--------------------------------------
|
|
// Out of memory!
|
|
//--------------------------------------
|
|
hr = E_OUTOFMEMORY;
|
|
pCurUnit->csamplesOut = 0;
|
|
CleanUpSynth( );
|
|
}
|
|
}
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
//---------------------------------------------------
|
|
// Index back to orig epoch
|
|
//---------------------------------------------------
|
|
m_pMap = new long[cframeMax];
|
|
if( !m_pMap )
|
|
{
|
|
//--------------------------------------
|
|
// Out of memory!
|
|
//--------------------------------------
|
|
hr = E_OUTOFMEMORY;
|
|
pCurUnit->csamplesOut = 0;
|
|
CleanUpSynth( );
|
|
}
|
|
}
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
//---------------------------------------------------
|
|
// TRUE = reverse residual
|
|
//---------------------------------------------------
|
|
m_pRevFlag = new short[cframeMax];
|
|
if( !m_pRevFlag )
|
|
{
|
|
//--------------------------------------
|
|
// Out of memory!
|
|
//--------------------------------------
|
|
hr = E_OUTOFMEMORY;
|
|
pCurUnit->csamplesOut = 0;
|
|
CleanUpSynth( );
|
|
}
|
|
}
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
//---------------------------------------------------------------------
|
|
// Compute synthesis epochs and corresponding mapping to analysis
|
|
// fills in: m_pOutEpoch, m_pMap, m_pRevFlag
|
|
//---------------------------------------------------------------------
|
|
m_cOutEpochs = ProsodyMod( pCurUnit, cInEpochs, durationMpy, cframeMax );
|
|
|
|
//------------------------------------------------
|
|
// Now that actual epoch sizes are known,
|
|
// calculate total audio sample count
|
|
// @@@@ NO LONGER NEEDED
|
|
//------------------------------------------------
|
|
pCurUnit->csamplesOut = 0;
|
|
for( i = 0; i < m_cOutEpochs; i++ )
|
|
{
|
|
pCurUnit->csamplesOut += (long)(ABS(m_pOutEpoch[i]));
|
|
}
|
|
|
|
|
|
m_cOutSamples_Phon = 0;
|
|
m_EpochIndex = 0;
|
|
m_durationTarget = (long)(pCurUnit->duration * m_SampleRate);
|
|
m_pInRes = m_Synth.pRes;
|
|
m_pLPC = m_Synth.pLPC;
|
|
m_pSynthTime = pCurUnit->pTime;
|
|
m_pSynthAmp = pCurUnit->pAmp;
|
|
m_nKnots = pCurUnit->nKnots;
|
|
// NOTE: Maybe make log volume?
|
|
m_UnitVolume = (float)pCurUnit->user_Volume / 100.0f;
|
|
|
|
//------------------------------------------------
|
|
// Post WORD event
|
|
//------------------------------------------------
|
|
if( (pCurUnit->flags & WORD_START_FLAG) && (clientInterest & SPFEI(SPEI_WORD_BOUNDARY)) )
|
|
{
|
|
event.elParamType = SPET_LPARAM_IS_UNDEFINED;
|
|
event.eEventId = SPEI_WORD_BOUNDARY;
|
|
event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
|
|
event.lParam = pCurUnit->srcPosition; // Input word position
|
|
event.wParam = pCurUnit->srcLen; // Input word length
|
|
m_pOutputSite->AddEvents( &event, 1 );
|
|
}
|
|
|
|
|
|
//--- Debug macro - output unit data
|
|
TTSDBG_LOGUNITS;
|
|
}
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* CBackend::StartNewUnit */
|
|
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::CleanUpSynth *
|
|
*------------------------*
|
|
* Description:
|
|
*
|
|
********************************************************************** MC ***/
|
|
void CBackend::CleanUpSynth( )
|
|
{
|
|
SPDBG_FUNC( "CBackend::CleanUpSynth" );
|
|
|
|
if( m_pOutEpoch )
|
|
{
|
|
delete m_pOutEpoch;
|
|
m_pOutEpoch = NULL;
|
|
}
|
|
if( m_pMap )
|
|
{
|
|
delete m_pMap;
|
|
m_pMap = NULL;
|
|
}
|
|
if( m_pRevFlag )
|
|
{
|
|
delete m_pRevFlag;
|
|
m_pRevFlag = NULL;
|
|
}
|
|
// NOTE: make object?
|
|
FreeSynth( &m_Synth );
|
|
|
|
} /* CBackend::CleanUpSynth */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
* CBackend::RenderFrame *
|
|
*-----------------------*
|
|
* Description:
|
|
* This this the central synthesis loop. Keep filling output audio
|
|
* buffer until buffer frame is full or speech is done. To render
|
|
* continous speech, get each unit one at a time from upstream buffer.
|
|
*
|
|
********************************************************************** MC ***/
|
|
HRESULT CBackend::RenderFrame( )
|
|
{
|
|
SPDBG_FUNC( "CBackend::RenderFrame" );
|
|
long InSize, OutSize;
|
|
long iframe;
|
|
float *pCurInRes, *pCurOutRes;
|
|
long i, j;
|
|
float ampMpy;
|
|
HRESULT hr = S_OK;
|
|
|
|
m_cOutSamples_Frame = 0;
|
|
do
|
|
{
|
|
OutSize = 0;
|
|
if( m_silMode )
|
|
{
|
|
//-------------------------------
|
|
// Silence mode
|
|
//-------------------------------
|
|
if( m_cOutSamples_Phon >= m_durationTarget )
|
|
{
|
|
//---------------------------
|
|
// Get next unit
|
|
//---------------------------
|
|
hr = StartNewUnit( );
|
|
if (FAILED(hr))
|
|
{
|
|
//-----------------------------------
|
|
// Try to end it gracefully...
|
|
//-----------------------------------
|
|
m_SpeechState = SPEECH_DONE;
|
|
}
|
|
|
|
TTSDBG_LOGSILEPOCH;
|
|
}
|
|
else
|
|
{
|
|
//---------------------------
|
|
// Continue with current SIL
|
|
//---------------------------
|
|
m_pSpeechBuf[m_cOutSamples_Frame] = 0;
|
|
OutSize = 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( m_EpochIndex < m_cOutEpochs )
|
|
{
|
|
//-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
//
|
|
// Continue with current phon
|
|
//
|
|
//-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
//------------------------------------
|
|
// Find current input residual
|
|
//------------------------------------
|
|
iframe = m_pMap[m_EpochIndex];
|
|
pCurInRes = m_pInRes;
|
|
for( i = 0; i < iframe; i++)
|
|
{
|
|
pCurInRes += (long) ABS(m_pInEpoch[i]);
|
|
}
|
|
|
|
pCurOutRes = m_pSpeechBuf + m_cOutSamples_Frame;
|
|
InSize = (long)(ABS(m_pInEpoch[iframe]));
|
|
OutSize = (long)(ABS(m_pOutEpoch[m_EpochIndex]));
|
|
if (m_cOutSamples_Frame + OutSize > SPEECH_FRAME_SIZE + SPEECH_FRAME_OVER)
|
|
{
|
|
m_pOutEpoch[m_EpochIndex] = SPEECH_FRAME_OVER-1; // still huge
|
|
OutSize = (long)(ABS(m_pOutEpoch[m_EpochIndex]));
|
|
}
|
|
j = 1;
|
|
while( (j < m_nKnots - 1) && (m_cOutSamples_Phon > m_pSynthTime[j]) )
|
|
{
|
|
j++;
|
|
}
|
|
ampMpy = LinInterp( m_pSynthTime[j - 1], (float)m_cOutSamples_Phon, m_pSynthTime[j], m_pSynthAmp[j - 1], m_pSynthAmp[j] );
|
|
//ampMpy = 1;
|
|
|
|
//--------------------------------------------
|
|
// Do stretching of residuals
|
|
//--------------------------------------------
|
|
ResRecons( pCurInRes, InSize, pCurOutRes, OutSize, ampMpy );
|
|
|
|
//--------------------------------------------
|
|
// Do LPC reconstruction
|
|
//--------------------------------------------
|
|
float *pCurLPC;
|
|
float totalGain;
|
|
|
|
totalGain = ExpConverter( ((float)m_MasterVolume / (float)SPMAX_VOLUME), m_linearScale )
|
|
* ExpConverter( m_UnitVolume, m_linearScale );
|
|
|
|
pCurLPC = m_pLPC + m_pMap[m_EpochIndex] * (1 + m_cOrder);
|
|
pCurLPC[0] = 1.0f;
|
|
LPCFilter( pCurLPC, &m_pSpeechBuf[m_cOutSamples_Frame], OutSize, totalGain );
|
|
m_EpochIndex++;
|
|
}
|
|
else
|
|
{
|
|
//-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
//
|
|
// Get next phon
|
|
//
|
|
//-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
hr = StartNewUnit( );
|
|
if (FAILED(hr))
|
|
{
|
|
//-----------------------------------
|
|
// Try to end it gracefully...
|
|
//-----------------------------------
|
|
m_SpeechState = SPEECH_DONE;
|
|
}
|
|
TTSDBG_LOGSILEPOCH;
|
|
}
|
|
}
|
|
m_cOutSamples_Frame += OutSize;
|
|
m_cOutSamples_Phon += OutSize;
|
|
m_cOutSamples_Total += OutSize;
|
|
|
|
TTSDBG_LOGEPOCHS;
|
|
}
|
|
while( (m_cOutSamples_Frame < SPEECH_FRAME_SIZE) && (m_SpeechState == SPEECH_CONTINUE) );
|
|
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
//----------------------------------------------
|
|
// Convert buffer from FLOAT to SHORT
|
|
//----------------------------------------------
|
|
if( m_pReverb )
|
|
{
|
|
//---------------------------------
|
|
// Add REVERB
|
|
//---------------------------------
|
|
m_pReverb->Reverb_Process( m_pSpeechBuf, m_cOutSamples_Frame, 1.0f );
|
|
}
|
|
else
|
|
{
|
|
CvtToShort( m_pSpeechBuf, m_cOutSamples_Frame, m_StereoOut, 1.0f );
|
|
}
|
|
|
|
//--- Debug Macro - output wave data to stream
|
|
TTSDBG_LOGWAVE;
|
|
}
|
|
|
|
if( SUCCEEDED( hr ) )
|
|
{
|
|
//------------------------------------
|
|
// Send this buffer to SAPI site
|
|
//------------------------------------
|
|
DWORD cbWritten;
|
|
|
|
//------------------------------------------------------------------------------------
|
|
// This was my lame hack to avoid sending buffers when nothing was spoken.
|
|
// It was causing problems (among others) since StartNewUnit() was still sending
|
|
// events - with no corresponding audio buffer!
|
|
//
|
|
// This was too simple of a scheme. Disable this feature for now...
|
|
// ...until I come up with something more robust. (MC)
|
|
//------------------------------------------------------------------------------------
|
|
|
|
//if( m_HasSpeech )
|
|
{
|
|
hr = m_pOutputSite->Write( (void*)m_pSpeechBuf,
|
|
m_cOutSamples_Frame * m_BytesPerSample,
|
|
&cbWritten );
|
|
if( FAILED( hr ) )
|
|
{
|
|
//----------------------------------------
|
|
// Abort! Unable to write audio data
|
|
//----------------------------------------
|
|
m_SpeechState = SPEECH_DONE;
|
|
}
|
|
}
|
|
}
|
|
|
|
//------------------------------------
|
|
// Return render state
|
|
//------------------------------------
|
|
return hr;
|
|
} /* CBackend::RenderFrame */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|