windows-server-2003/enduser/speech/tts/msttsdrv/engine/backend.cpp


								/*******************************************************************************

								* Backend.cpp *

								*-------------*

								*   Description:

								*       This module is the implementation file for the CBackend class.

								*-------------------------------------------------------------------------------

								*  Created By: mc                                        Date: 03/12/99

								*  Copyright (C) 1999 Microsoft Corporation

								*  All Rights Reserved

								*

								*******************************************************************************/


								#include "stdafx.h"

								#ifndef __spttseng_h__

								#include "spttseng.h"

								#endif

								#ifndef Backend_H

								#include "Backend.h"

								#endif

								#ifndef FeedChain_H

								#include "FeedChain.h"

								#endif

								#ifndef SPDebug_h

								#include <spdebug.h>

								#endif


								//-----------------------------

								// Data.cpp

								//-----------------------------

								extern const short   g_IPAToAllo[];

								extern const short   g_AlloToViseme[];


								//--------------------------------------

								// DEBUG: Save utterance WAV file

								//--------------------------------------

								//#define   SAVE_WAVE_FILE  1


								const unsigned char g_SineWaveTbl[] =

								{

								    0x7b,0x7e,0x81,0x84,0x87,0x89,0x8c,0x8f,0x92,0x95,0x98,0x9b,0x9d,0xa0,0xa3,0xa6,

								    0xa8,0xab,0xae,0xb0,0xb3,0xb5,0xb8,0xbb,0xbd,0xbf,0xc2,0xc4,0xc7,0xc9,0xcb,0xcd,

								    0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,0xe0,0xe2,0xe3,0xe5,0xe6,0xe8,0xe9,

								    0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf2,0xf3,0xf3,0xf4,0xf4,0xf4,0xf4,

								    0xf5,0xf5,0xf5,0xf5,0xf4,0xf4,0xf4,0xf4,0xf3,0xf3,0xf2,0xf1,0xf1,0xf0,0xef,0xee,

								    0xed,0xec,0xeb,0xea,0xe9,0xe7,0xe6,0xe5,0xe3,0xe1,0xe0,0xde,0xdc,0xdb,0xd9,0xd7,

								    0xd5,0xd3,0xd1,0xcf,0xcd,0xcb,0xc8,0xc6,0xc4,0xc1,0xbf,0xbc,0xba,0xb7,0xb5,0xb2,

								    0xb0,0xad,0xaa,0xa8,0xa5,0xa2,0x9f,0x9d,0x9a,0x97,0x94,0x91,0x8f,0x8c,0x89,0x86,

								    0x83,0x80,0x7d,0x7a,0x77,0x75,0x72,0x6f,0x6c,0x69,0x66,0x64,0x61,0x5e,0x5b,0x58,

								    0x56,0x53,0x50,0x4e,0x4b,0x49,0x46,0x44,0x41,0x3f,0x3c,0x3a,0x38,0x35,0x33,0x31,

								    0x2f,0x2d,0x2b,0x29,0x27,0x25,0x23,0x21,0x1f,0x1e,0x1c,0x1b,0x19,0x18,0x16,0x15,

								    0x14,0x13,0x12,0x11,0x10,0x0f,0x0e,0x0d,0x0c,0x0c,0x0b,0x0b,0x0a,0x0a,0x0a,0x0a,

								    0x09,0x09,0x09,0x09,0x0a,0x0a,0x0a,0x0a,0x0b,0x0b,0x0c,0x0d,0x0d,0x0e,0x0f,0x10,

								    0x11,0x12,0x13,0x14,0x15,0x17,0x18,0x1a,0x1b,0x1d,0x1e,0x20,0x22,0x23,0x25,0x27,

								    0x29,0x2b,0x2d,0x2f,0x31,0x34,0x36,0x38,0x3a,0x3d,0x3f,0x42,0x44,0x47,0x49,0x4c,

								    0x4e,0x51,0x54,0x56,0x59,0x5c,0x5f,0x61,0x64,0x67,0x6a,0x6d,0x6f,0x72,0x75,0x78

								};


								/*void  PredictEpochDist(   float   duration,

								long    nKnots,

								float   SampleRate,

								float   *pTime,

								float   *pF0)

								{

								long            curSamplesOut, endSample, j;

								float           epochFreq;

								long            epochLen, epochCount;


								    curSamplesOut   = 0;

								    endSample       = (long) (SampleRate * duration );

								    epochCount      = 0;


								      while( curSamplesOut < endSample )

								      {

								      j = 1;

								      //---------------------------------------------------

								      // Align to appropriate knot bassed on

								      // current output sample

								      //---------------------------------------------------

								      while( (j < nKnots - 1) && (curSamplesOut > pTime[j]) )

								      j++;

								      //---------------------------------------------------

								      // Calculate exact pitch thru linear interpolation

								      //---------------------------------------------------

								      epochFreq = LinInterp( pTime[j - 1], curSamplesOut, pTime[j], pF0[j - 1], pF0[j] );

								      //---------------------------------------------------

								      // Calc sample count for curent epoch

								      //---------------------------------------------------

								      epochLen  = (long) (SampleRate / epochFreq);

								      epochCount++;


								        curSamplesOut += epochLen;

								        }


								            }

								*/


								/*****************************************************************************

								* CBackend::CBackend *

								*--------------------*

								*   Description: Constructor

								*

								********************************************************************** MC ***/

								CBackend::CBackend( )

								{

								    SPDBG_FUNC( "CBackend::CBackend" );

								    m_pHistory      = NULL;

								    m_pHistory2     = NULL;

								    m_pFilter       = NULL;

								    m_pReverb       = NULL;

								    m_pOutEpoch     = NULL;

								    m_pMap          = NULL;

								    m_pRevFlag      = NULL;

								    m_pSpeechBuf    = NULL;

								    m_VibratoDepth  = 0;

								    m_UnitVolume    = 1.0f;

								    m_MasterVolume  = SPMAX_VOLUME;

								    memset( &m_Synth, 0, sizeof(MSUNITDATA) );

								} /* CBackend::CBackend */


								/*****************************************************************************

								* CBackend::~CBackend *

								*---------------------*

								*   Description:  Destructor

								*

								********************************************************************** MC ***/

								CBackend::~CBackend( )

								{

								    SPDBG_FUNC( "CBackend::~CBackend" );


								    Release();

								} /* CBackend::~CBackend */


								/*****************************************************************************

								* CBackend::Release *

								*---------------------*

								*   Description:

								*   Free memory allocaterd by Backend

								*

								********************************************************************** MC ***/

								void CBackend::Release( )

								{

								    SPDBG_FUNC( "CBackend::Release" );

								    CleanUpSynth( );


								    if( m_pSpeechBuf)

								    {

								        delete m_pSpeechBuf;

								        m_pSpeechBuf = NULL;

								    }

								    if( m_pHistory )

								    {

								        delete m_pHistory;

								        m_pHistory = NULL;

								    }

								    if( m_pHistory2 )

								    {

								        delete m_pHistory2;

								        m_pHistory2 = NULL;

								    }

								    if( m_pReverb )

								    {

								        delete m_pReverb;

								        m_pReverb = NULL;

								    }

								} /* CBackend::Release */


								/*****************************************************************************

								* CBackend::Init *

								*----------------*

								*   Description:

								*   Opens a backend instance, keeping a pointer of the acoustic

								*   inventory.

								*

								********************************************************************** MC ***/

								HRESULT CBackend::Init( IMSVoiceData* pVoiceDataObj, CFeedChain *pSrcObj, MSVOICEINFO* pVoiceInfo )

								{

								    SPDBG_FUNC( "CBackend::Init" );

								    long    LPCsize = 0;

								    HRESULT hr = S_OK;


								    m_pVoiceDataObj = pVoiceDataObj;

								    m_SampleRate = (float)pVoiceInfo->SampleRate;

								    m_pSrcObj   = pSrcObj;

								    m_cOrder = pVoiceInfo->LPCOrder;

								    m_pWindow = pVoiceInfo->pWindow;

								    m_FFTSize = pVoiceInfo->FFTSize;

								    m_VibratoDepth = ((float)pVoiceInfo->VibratoDepth) / 100.0f;

								    m_VibratoDepth = 0;				// NOTE: disable vibrato

								    m_VibratoFreq = pVoiceInfo->VibratoFreq;

								    if( pVoiceInfo->eReverbType > REVERB_TYPE_OFF )

								    {

								        m_StereoOut = true;

								        m_BytesPerSample = 4;

								    }

								    else

								    {

								        m_StereoOut = false;

								        m_BytesPerSample = 2;

								    }

								    //---------------------------------------

								    // Allocate AUDIO buffer

								    //---------------------------------------

								    m_pSpeechBuf = new float[SPEECH_FRAME_SIZE + SPEECH_FRAME_OVER];

								    if( m_pSpeechBuf == NULL )

								    {

								        //--------------------------------------

								        // Out of memory!

								        //--------------------------------------

								        hr = E_OUTOFMEMORY;

								    }

								    if( SUCCEEDED(hr) )

								    {

								        //---------------------------------------

								        // Allocate HISTORY buffer

								        //---------------------------------------


								        LPCsize = m_cOrder + 1;

								        m_pHistory = new float[LPCsize];

								        if( m_pHistory == NULL )

								        {

								            //--------------------------------------

								            // Out of memory!

								            //--------------------------------------

								            hr = E_OUTOFMEMORY;

								        }

								    }

								    if( SUCCEEDED(hr) )

								    {

								        memset( m_pHistory, 0, LPCsize * sizeof(float) );

								        m_pOutEpoch         = NULL;

								        m_pMap              = NULL;

								        m_pRevFlag          = NULL;

								        m_fModifiers        = 0;

								        m_vibrato_Phase1    = 0;


								        //--------------------------------

								        // Reverb Effect

								        //--------------------------------

								        //pVoiceInfo->eReverbType = REVERB_TYPE_HALL;

								        if( pVoiceInfo->eReverbType > REVERB_TYPE_OFF )

								        {

								            //--------------------------------

								            // Create ReverbFX object

								            //--------------------------------

								            if( m_pReverb == NULL )

								            {

								                m_pReverb = new CReverbFX;

								                if( m_pReverb )

								                {

								                    short       result;

								                    result = m_pReverb->Reverb_Init( pVoiceInfo->eReverbType, (long)m_SampleRate, m_StereoOut );

								                    if( result != KREVERB_NOERROR )

								                    {

								                        //--------------------------------------------

								                        // Not enough memory to do reverb

								                        // Recover gracefully

								                        //--------------------------------------------

								                        delete m_pReverb;

								                        m_pReverb = NULL;

								                    }

								                    /*else

								                    {

								                    //--------------------------------------------------------

								                    // Init was successful, ready to do reverb now

								                    //--------------------------------------------------------

								                    }*/

								                }

								            }

								        }


								        //----------------------------

								        // Linear taper region scale

								        //----------------------------

								        m_linearScale = (float) pow( 10.0, (double)((1.0f - LINEAR_BKPT) * LOG_RANGE) / 20.0 );


								    #ifdef SAVE_WAVE_FILE

								        m_SaveFile = (PCSaveWAV) new CSaveWAV;     // No check needed, if this fails, we simply don't save file.

								        if( m_SaveFile )

								        {

								            m_SaveFile->OpenWavFile( (long)m_SampleRate );

								        }

								    #endif


								    }

								    else

								    {

								        if( m_pSpeechBuf )

								        {

								            delete m_pSpeechBuf;

								            m_pSpeechBuf = NULL;

								        }

								        if( m_pHistory )

								        {

								            delete m_pHistory;

								            m_pHistory = NULL;

								        }

								    }


								    return hr;

								} /* CBackend::Init */


								/*****************************************************************************

								* CBackend::FreeSynth *

								*---------------------*

								*   Description:

								*   Return TRUE if consoants can be clustered.

								*

								********************************************************************** MC ***/

								void CBackend::FreeSynth( MSUNITDATA* pSynth )

								{

								    SPDBG_FUNC( "CBackend::FreeSynth" );

								    if( pSynth->pEpoch )

								    {

								        delete pSynth->pEpoch;

								        pSynth->pEpoch = NULL;

								    }

								    if( pSynth->pRes )

								    {

								        delete pSynth->pRes;

								        pSynth->pRes = NULL;

								    }

								    if( pSynth->pLPC )

								    {

								        delete pSynth->pLPC;

								        pSynth->pLPC = NULL;

								    }

								} /* CBackend::FreeSynth */


								/*****************************************************************************

								* ExpConverter *

								*--------------*

								*   Description:

								*   Convert linear to exponential taper

								*   'ref' is a linear value between 0.0 to 1.0

								*

								********************************************************************** MC ***/

								static float   ExpConverter( float ref, float linearScale )

								{

								    SPDBG_FUNC( "ExpConverter" );

								    float   audioGain;


								    if( ref < LINEAR_BKPT)

								    {

								        //----------------------------------------

								        // Linear taper below LINEAR_BKPT

								        //----------------------------------------

								        audioGain = linearScale * (ref / LINEAR_BKPT);

								    }

								    else

								    {

								        //----------------------------------------

								        // Log taper above LINEAR_BKPT

								        //----------------------------------------

								        audioGain = (float) pow( 10.0, (double)((1.0f - ref) * LOG_RANGE) / 20.0 );

								    }


								    return audioGain;

								} /* ExpConverter */


								/*****************************************************************************

								* CBackend::CvtToShort *

								*----------------------*

								*   Description:

								*   Convert (in place) FLOAT audio to SHORT.

								*

								********************************************************************** MC ***/

								void CBackend::CvtToShort( float *pSrc, long blocksize, long stereoOut, float audioGain )

								{

								    SPDBG_FUNC( "CBackend::CvtToShort" );

								    long        i;

								    short       *pDest;

								    float       fSamp;


								    pDest = (short*)pSrc;

								    for( i = 0; i < blocksize; ++i )

								    {

								        //------------------------

								        // Read float sample...

								        //------------------------

								        fSamp = (*pSrc++) * audioGain;

								        //------------------------

								        // ...clip to 16-bits...

								        //------------------------

								        if( fSamp > 32767 )

								        {

								            fSamp = 32767;

								        }

								        else if( fSamp < (-32768) )

								        {

								            fSamp = (-32768);

								        }

								        //------------------------

								        // ...save as SHORT

								        //------------------------

								        *pDest++ = (short)fSamp;

								        if( stereoOut )

								        {

								            *pDest++ = (short)(0 - (int)fSamp);

								        }

								    }

								} /* CBackend::CvtToShort */


								/*****************************************************************************

								* CBackend::PSOLA_Stretch *

								*-------------------------*

								*   Description:

								*   Does PSOLA epoch stretching or compressing

								*

								********************************************************************** MC ***/

								void CBackend::PSOLA_Stretch(     float *pInRes, long InSize,

								                    float *pOutRes, long OutSize,

								                    float *pWindow,

								                    long  cWindowSize )

								{

								    SPDBG_FUNC( "CBackend::PSOLA_Stretch" );

								    long    i, lim;

								    float   window, delta, kf;


								    memset( pOutRes, 0, sizeof(float) * OutSize  );

								    lim = MIN(InSize, OutSize );

								    delta = (float)cWindowSize / (float)lim;

								    kf = 0.5f;

								    pOutRes[0] = pInRes[0];

								    for( i = 1; i < lim; ++i )

								    {

								        kf += delta;

								        window = pWindow[(long) kf];

								        pOutRes[i] += pInRes[i] * window;

								        pOutRes[OutSize - i] += pInRes[InSize - i] * window;

								    }

								} /* CBackend::PSOLA_Stretch */


								/*****************************************************************************

								* CBackend::PrepareSpeech *

								*-------------------------*

								*   Description:

								*

								********************************************************************** MC ***/

								void    CBackend::PrepareSpeech( ISpTTSEngineSite* outputSite )

								{

								    SPDBG_FUNC( "CBackend::PrepareSpeech" );


								    //m_pUnits      = pUnits;

								    //m_unitCount       = unitCount;

								    //m_CurUnitIndex    = 0;

								    m_pOutputSite = outputSite;

								    m_silMode = true;

								    m_durationTarget = 0;

								    m_cOutSamples_Phon = 1;

								    m_cOutEpochs = 0;            // Pull model big-bang

								    m_SpeechState = SPEECH_CONTINUE;

								    m_cOutSamples_Total = 0;

									m_HasSpeech = false;

								} /* CBackend::PrepareSpeech */


								/*****************************************************************************

								* CBackend::ProsodyMod *

								*----------------------*

								*   Description:

								*   Calculate the epoch sequence for the synthesized speech

								*

								*   INPUT:

								*

								*   OUTPUT:

								*       FIlls 'pOutEpoch', 'pMap', and 'pRevFlag'

								*       Returns new epoch count

								*

								********************************************************************** MC ***/

								long CBackend::ProsodyMod(     UNITINFO    *pCurUnit,

								                               long         cInEpochs,

								                               float        durationMpy,

								                               long         cMaxOutEpochs )

								{

								    SPDBG_FUNC( "CBackend::ProsodyMod" );

								    long    iframe, framesize, framesizeOut, j;

								    long    cntOut, csamplesOut, cOutEpochs;

								    BOOL    fUnvoiced;

								    short   fReverse;

								    float   totalDuration;

								    float   durationIn;         // Active accum of IN duration

								    float   durationOut;        // Active accum of OUT duration aligned to IN domain

								    float   freqMpy;

								    BOOL    fAdvanceInput;

								    float           vibrato;

								    unsigned char   *SineWavePtr;

								    float           epochFreq;

								    float           *pTime;

								    float           *pF0;


								    iframe          = 0;

								    durationIn      = 0.0f;

								    durationOut     = 0.0f;

								    csamplesOut     = 0;

								    cntOut          = 0;

								    cOutEpochs      = 0;

								    fReverse        = false;

								    pTime           = pCurUnit->pTime;

								    pF0             = pCurUnit->pF0;


								    //------------------------------------

								    // Find total input duration

								    //------------------------------------

								    totalDuration   = 0;

								    for( j = 0; j < cInEpochs; ++j )

								    {

								        totalDuration += ABS(m_pInEpoch[j]);

								    }


								    /*PredictEpochDist(     pCurUnit->duration,

								    pCurUnit->nKnots,

								    m_SampleRate,

								    pTime,

								    pF0 );*/


								    while( iframe < cInEpochs && cOutEpochs < cMaxOutEpochs)

								    {

								        //-----------------------------------------

								        //  Compute output frame length

								        //-----------------------------------------

								        if( m_pInEpoch[iframe] < 0 )

								        {

								            //-------------------------------------------------

								            // Since we can't change unvoiced pitch,

								            // do not change frame size for unvoiced frames

								            //-------------------------------------------------

								            framesize       = (long)((-m_pInEpoch[iframe]) + 0.5f);

								            framesizeOut    = framesize;

								            fUnvoiced       = true;

								        }

								        else

								        {

								            //---------------------------------------------------

								            // Modify frame size for voiced epoch

								            // based on epoch frequency

								            //---------------------------------------------------

								            j = 1;

								            //---------------------------------------------------

								            // Align to appropriate knot bassed on

								            // current output sample

								            //---------------------------------------------------

								            while( (j < (long)pCurUnit->nKnots - 1) && (csamplesOut > pTime[j]) )

								                j++;

								            //---------------------------------------------------

								            // Calculate exact pitch thru linear interpolation

								            //---------------------------------------------------


								            epochFreq = LinInterp( pTime[j - 1], (float)csamplesOut, pTime[j], pF0[j - 1], pF0[j] );


								            SineWavePtr = (unsigned char*)&g_SineWaveTbl[0];

								            vibrato = (float)(((unsigned char)(*(SineWavePtr + (m_vibrato_Phase1 >> 16)))) - 128);

								            vibrato *= m_VibratoDepth;


								            //---------------------------------------------------

								            // Scale frame size using in/out ratio

								            //---------------------------------------------------

								            epochFreq       = epochFreq + vibrato;

								            if( epochFreq < MIN_VOICE_PITCH )

								            {

								                epochFreq = MIN_VOICE_PITCH;

								            }

								            framesize       = (long)(m_pInEpoch[iframe] + 0.5f);

								            framesizeOut    = (long)(m_SampleRate / epochFreq);


								            vibrato         = ((float)256 / ((float)22050 / m_VibratoFreq)) * (float)framesizeOut;    // 3 Hz

								            //vibrato           = ((float)256 / (float)7350) * (float)framesizeOut; // 3 Hz

								            m_vibrato_Phase1 += (long)(vibrato * (float)65536);

								            m_vibrato_Phase1 &= 0xFFFFFF;

								            //---------------------------------------------------

								            // @@@@ REMOVED 2x LIMIT

								            //---------------------------------------------------

								            /*if( framesizeOut > 2*framesize )

								            {

								            framesizeOut = 2*framesize;

								            }

								            if( framesize > 2*framesizeOut )

								            {

								            framesizeOut = framesize/2;

								        }*/

								            freqMpy = (float) framesize / framesizeOut;

								            fUnvoiced = false;

								        }


								        //-------------------------------------------

								        //  Generate next output frame

								        //-------------------------------------------

								        fAdvanceInput = false;

								        if( durationOut + (0.5f * framesizeOut/durationMpy) <= durationIn + framesize )

								        {

								            //-----------------------------------------

								            // If UNvoiced and odd frame,

								            // reverse residual

								            //-----------------------------------------

								            if( fUnvoiced && (cntOut & 1) )

								            {

								                m_pRevFlag[cOutEpochs] = true;

								                fReverse = true;

								            }

								            else

								            {

								                m_pRevFlag[cOutEpochs] = false;

								                fReverse = false;

								            }

								            ++cntOut;


								            durationOut += framesizeOut/durationMpy;

								            csamplesOut += framesizeOut;

								            m_pOutEpoch[cOutEpochs] = (float)framesizeOut;

								            m_pMap[cOutEpochs] = iframe;

								            cOutEpochs++;

								        }

								        else

								        {

								            fAdvanceInput = true;

								        }


								        //-------------------------------------------

								        // Advance to next input frame

								        //-------------------------------------------

								        if(     ((durationOut + (0.5f * framesizeOut/durationMpy)) > (durationIn + framesize)) ||

								            //(cntOut >= 3) ||          @@@@ REMOVED 2x LIMIT

								            //(fReverse == true) ||

								            fAdvanceInput )

								        {

								            durationIn += framesize;

								            ++iframe;

								            cntOut = 0;

								        }

								    }


								    return cOutEpochs;

								} /* CBackend::ProsodyMod */


								/*****************************************************************************

								* CBackend::LPCFilter *

								*---------------------*

								*   Description:

								*   LPC filter of order cOrder. It filters the residual signal

								*   pRes, producing output pOutWave. This routine requires that

								*   pOutWave has the true waveform history from [-cOrder,0] and

								*   of course it has to be defined.

								*

								********************************************************************** MC ***/

								void CBackend::LPCFilter( float *pCurLPC, float *pCurRes, long len, float gain )

								{

								    SPDBG_FUNC( "CBackend::LPCFilter" );

								    INT t, j;


								    for( t = 0; t < len; t++ )

								    {

								        m_pHistory[0] = pCurLPC[0] * pCurRes[t];

								        for( j = m_cOrder; j > 0; j-- )

								        {

								            m_pHistory[0] -= pCurLPC[j] * m_pHistory[j];

								            m_pHistory[j] = m_pHistory[j - 1];

								        }

								        pCurRes[t] = m_pHistory[0] * gain;

								    }

								} /* CBackend::LPCFilter */


								/*void CBackend::LPCFilter( float *pCurLPC, float *pCurRes, long len )

								{

								long        t;


								  for( t = 0; t < len; t++ )

								        {

								        pCurRes[t] = pCurRes[t] * 10;

								        }

								        }

								*/


								/*****************************************************************************

								* CBackend::ResRecons *

								*---------------------*

								*   Description:

								*   Obtains output prosody modified residual

								*

								********************************************************************** MC ***/

								void CBackend::ResRecons( float *pInRes,

								                          long  InSize,

								                          float *pOutRes,

								                          long  OutSize,

								                          float scale )

								{

								    SPDBG_FUNC( "CBackend::ResRecons" );

								    long        i, j;


								    if( m_pRevFlag[m_EpochIndex] )

								    {

								        //----------------------------------------------------

								        // Process repeated and reversed UNvoiced residual

								        //----------------------------------------------------

								        for( i = 0, j = OutSize-1;  i < OutSize;  ++i, --j )

								        {

								            pOutRes[i] = pInRes[j];

								        }

								    }

								    else if( InSize == OutSize )

								    {

								        //----------------------------------------------------

								        // Unvoiced residual or voiced residual

								        // with no pitch change

								        //----------------------------------------------------

								        memcpy( pOutRes, pInRes, sizeof(float) *OutSize );

								    }

								    else

								    {

								        //----------------------------------------------------

								        // Process voiced residual

								        //----------------------------------------------------

								        PSOLA_Stretch( pInRes, InSize, pOutRes, OutSize, m_pWindow, m_FFTSize );

								    }


								    //----------------------------------

								    // Amplify frame

								    //----------------------------------

								    if( scale != 1.0f )

								    {

								        for( i = 0 ; i < OutSize; ++i )

								        {

								            pOutRes[i] *= scale;

								        }

								    }

								} /* CBackend::ResRecons */


								/*****************************************************************************

								* CBackend::StartNewUnit *

								*------------------------*

								*   Description:

								*   Synthesize audio samples for a target unit

								*

								*   INPUT:

								*       pCurUnit - unit ID, F0, duration, etc.

								*

								*   OUTPUT:

								*       Sets 'pCurUnit->csamplesOut' with audio length

								*

								********************************************************************** MC ***/

								HRESULT CBackend::StartNewUnit( )

								{

								    SPDBG_FUNC( "CBackend::StartNewUnit" );

								    long        cframeMax = 0, cInEpochs = 0, i;

								    float       totalDuration, durationOut, durationMpy = 0;

								    UNITINFO    *pCurUnit;

								    HRESULT     hr = S_OK;

								    SPEVENT     event;

									ULONGLONG	clientInterest;

								 	USHORT		volumeVal;


									// Check for VOLUME change

									if( m_pOutputSite->GetActions() & SPVES_VOLUME )

									{

										hr = m_pOutputSite->GetVolume( &volumeVal );

										if ( SUCCEEDED( hr ) )

										{

											if( volumeVal > SPMAX_VOLUME )

											{

												//--- Clip rate to engine maximum

												volumeVal = SPMAX_VOLUME;

											}

											else if ( volumeVal < SPMIN_VOLUME )

											{

												//--- Clip rate to engine minimum

												volumeVal = SPMIN_VOLUME;

											}

											m_MasterVolume = volumeVal;

										}

									}


								    //---------------------------------------

								    // Delete previous unit

								    //---------------------------------------

								    CleanUpSynth( );


								    //---------------------------------------

								    // Get next phon

								    //---------------------------------------

								    hr = m_pSrcObj->NextData( (void**)&pCurUnit, &m_SpeechState );

								    if( m_SpeechState == SPEECH_CONTINUE )

								    {

										m_HasSpeech = pCurUnit->hasSpeech;

										m_pOutputSite->GetEventInterest( &clientInterest );


										//------------------------------------------------

								        // Post SENTENCE event

								        //------------------------------------------------

								        if( (pCurUnit->flags & SENT_START_FLAG) && (clientInterest & SPFEI(SPEI_SENTENCE_BOUNDARY)) )

								        {

											event.elParamType = SPET_LPARAM_IS_UNDEFINED;

								            event.eEventId = SPEI_SENTENCE_BOUNDARY;

								            event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;

									        event.lParam = pCurUnit->sentencePosition;	        // Input word position

									        event.wParam = pCurUnit->sentenceLen;	            // Input word length

								            m_pOutputSite->AddEvents( &event, 1 );

								        }

								        //------------------------------------------------

								        // Post PHONEME event

								        //------------------------------------------------

								        if( clientInterest & SPFEI(SPEI_PHONEME) )

										{

											event.elParamType = SPET_LPARAM_IS_UNDEFINED;

											event.eEventId = SPEI_PHONEME;

											event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;

											event.lParam = ((ULONG)pCurUnit->AlloFeatures << 16) + g_IPAToAllo[pCurUnit->AlloID];

											event.wParam = ((ULONG)(pCurUnit->duration * 1000.0f) << 16) + g_IPAToAllo[pCurUnit->NextAlloID];

											m_pOutputSite->AddEvents( &event, 1 );

										}


								        //------------------------------------------------

								        // Post VISEME event

								        //------------------------------------------------

								        if( clientInterest & SPFEI(SPEI_VISEME) )

										{

											event.elParamType = SPET_LPARAM_IS_UNDEFINED;

											event.eEventId = SPEI_VISEME;

											event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;

											event.lParam = ((ULONG)pCurUnit->AlloFeatures << 16) + g_AlloToViseme[pCurUnit->AlloID];

											event.wParam = ((ULONG)(pCurUnit->duration * 1000.0f) << 16) + g_AlloToViseme[pCurUnit->NextAlloID];

											m_pOutputSite->AddEvents( &event, 1 );

										}


								        //------------------------------------------------

								        // Post any bookmark events

								        //------------------------------------------------

								        if( pCurUnit->pBMObj != NULL )

								        {

								            CBookmarkList   *pBMObj;

								            BOOKMARK_ITEM*  pMarker;


								            //-------------------------------------------------

								            // Retrieve marker strings from Bookmark list and

								            // enter into Event list

								            //-------------------------------------------------

								            pBMObj = (CBookmarkList*)pCurUnit->pBMObj;

								            //cMarkerCount = pBMObj->m_BMList.GetCount();

											if( clientInterest & SPFEI(SPEI_TTS_BOOKMARK) )

											{

												//---------------------------------------

												// Send event for every bookmark in list

												//---------------------------------------

												SPLISTPOS	listPos;


												listPos = pBMObj->m_BMList.GetHeadPosition();

												while( listPos )

												{

													pMarker                    = (BOOKMARK_ITEM*)pBMObj->m_BMList.GetNext( listPos );

													event.eEventId             = SPEI_TTS_BOOKMARK;

													event.elParamType          = SPET_LPARAM_IS_STRING;

													event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;

								                    //--- Copy in bookmark string - has been NULL terminated in source already...

													event.lParam               = pMarker->pBMItem;

								                    // Engine must convert string to long for wParam.

								                    event.wParam               = _wtol((WCHAR *)pMarker->pBMItem);

													m_pOutputSite->AddEvents( &event, 1 );

												}

											}

								            //---------------------------------------------

								            // We don't need this Bookmark list any more

								            //---------------------------------------------

								            delete pBMObj;

								            pCurUnit->pBMObj = NULL;

								        }


								        pCurUnit->csamplesOut = 0;

								        //******************************************************

								        // For SIL, fill buffer with zeros...

								        //******************************************************

								        if( pCurUnit->UnitID == UNIT_SIL )

								        {

								            //---------------------------------------------

								            // Calc SIL length

								            //---------------------------------------------

								            m_durationTarget    = (long)(m_SampleRate * pCurUnit->duration);

								            m_cOutSamples_Phon  = 0;

								            m_silMode           = true;


								            //---------------------------------------------

								            // Clear LPC filter storage

								            //---------------------------------------------

								            memset( m_pHistory, 0, sizeof(float)*(m_cOrder+1) );


								            //--------------------------------

								            // Success!

								            //--------------------------------


								            // Debug macro - output unit data...

								            TTSDBG_LOGUNITS;

								        }

								        //******************************************************

								        // ...otherwise fill buffer with inventory data

								        //******************************************************

								        else

								        {

								            m_silMode = false;

								            // Get unit data from voice

								            hr = m_pVoiceDataObj->GetUnitData( pCurUnit->UnitID, &m_Synth );

								            if( SUCCEEDED(hr) )

								            {

								                durationOut     = 0.0f;

								                cInEpochs       = m_Synth.cNumEpochs;

								                m_pInEpoch      = m_Synth.pEpoch;

								                //cframeMax     = PeakValue( m_pInEpoch, cInEpochs );

								                totalDuration   = (float)m_Synth.cNumSamples;


								                //-----------------------------------------------

								                // For debugging: Force duration to unit length

								                //-----------------------------------------------

								                /*float       unitDur;


								                unitDur = totalDuration / 22050.0f;

								                if( pCurUnit->duration < unitDur )

								                {

								                    if( pCurUnit->speechRate < 1 )

								                    {

								                        pCurUnit->duration = unitDur * pCurUnit->speechRate;

								                    }

								                    else

								                    {

								                        pCurUnit->duration = unitDur;

								                    }

								                }*/


								                durationMpy     = pCurUnit->duration;


								                cframeMax = (long)pCurUnit->pF0[0];

								                for( i = 1; i < (long)pCurUnit->nKnots; i++ )

								                {

								                    //-----------------------------------------

								                    // Find the longest epoch

								                    //-----------------------------------------

								                    cframeMax = (long)(MAX(cframeMax,pCurUnit->pF0[i]));

								                }

								                cframeMax *= (long)(durationMpy * MAX_TARGETS_PER_UNIT);


								                durationMpy = (m_SampleRate * durationMpy) / totalDuration;

								                cframeMax += (long)(durationMpy * cInEpochs * MAX_TARGETS_PER_UNIT);

								                //

								                // mplumpe 11/18/97 : added to eliminate chance of crash.

								                //

								                cframeMax *= 2;

								                //---------------------------------------------------

								                // New epochs adjusted for duration and pitch

								                //---------------------------------------------------

								                m_pOutEpoch = new float[cframeMax];

								                if( !m_pOutEpoch )

								                {

								                    //--------------------------------------

								                    // Out of memory!

								                    //--------------------------------------

								                    hr = E_OUTOFMEMORY;

								                    pCurUnit->csamplesOut = 0;

								                    CleanUpSynth( );

								                }

								            }

								            if( SUCCEEDED(hr) )

								            {

								                //---------------------------------------------------

								                // Index back to orig epoch

								                //---------------------------------------------------

								                m_pMap = new long[cframeMax];

								                if( !m_pMap )

								                {

								                    //--------------------------------------

								                    // Out of memory!

								                    //--------------------------------------

								                    hr = E_OUTOFMEMORY;

								                    pCurUnit->csamplesOut = 0;

								                    CleanUpSynth( );

								                }

								            }

								            if( SUCCEEDED(hr) )

								            {

								                //---------------------------------------------------

								                // TRUE = reverse residual

								                //---------------------------------------------------

								                m_pRevFlag = new short[cframeMax];

								                if( !m_pRevFlag )

								                {

								                    //--------------------------------------

								                    // Out of memory!

								                    //--------------------------------------

								                    hr = E_OUTOFMEMORY;

								                    pCurUnit->csamplesOut = 0;

								                    CleanUpSynth( );

								                }

								            }

								            if( SUCCEEDED(hr) )

								            {

								                //---------------------------------------------------------------------

								                // Compute synthesis epochs and corresponding mapping to analysis

								                // fills in:    m_pOutEpoch, m_pMap, m_pRevFlag

								                //---------------------------------------------------------------------

								                m_cOutEpochs = ProsodyMod( pCurUnit, cInEpochs, durationMpy, cframeMax );


								                //------------------------------------------------

								                // Now that actual epoch sizes are known,

								                // calculate total audio sample count

								                // @@@@ NO LONGER NEEDED

								                //------------------------------------------------

								                pCurUnit->csamplesOut = 0;

								                for( i = 0; i < m_cOutEpochs; i++ )

								                {

								                    pCurUnit->csamplesOut += (long)(ABS(m_pOutEpoch[i]));

								                }


								                m_cOutSamples_Phon  = 0;

								                m_EpochIndex        = 0;

								                m_durationTarget    = (long)(pCurUnit->duration * m_SampleRate);

								                m_pInRes            = m_Synth.pRes;

								                m_pLPC              = m_Synth.pLPC;

								                m_pSynthTime        = pCurUnit->pTime;

								                m_pSynthAmp         = pCurUnit->pAmp;

								                m_nKnots            = pCurUnit->nKnots;

								                // NOTE: Maybe make log volume?

								                m_UnitVolume        = (float)pCurUnit->user_Volume / 100.0f;


								                //------------------------------------------------

								                // Post WORD event

								                //------------------------------------------------

								               if( (pCurUnit->flags & WORD_START_FLAG) && (clientInterest & SPFEI(SPEI_WORD_BOUNDARY)) )

								                {

													event.elParamType = SPET_LPARAM_IS_UNDEFINED;

								                    event.eEventId = SPEI_WORD_BOUNDARY;

								                    event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;

									                event.lParam = pCurUnit->srcPosition;	        // Input word position

									                event.wParam = pCurUnit->srcLen;	            // Input word length

								                    m_pOutputSite->AddEvents( &event, 1 );

								                }


								                //--- Debug macro - output unit data

								                TTSDBG_LOGUNITS;

								            }

								        }

								    }


								    return hr;

								} /* CBackend::StartNewUnit */


								/*****************************************************************************

								* CBackend::CleanUpSynth *

								*------------------------*

								*   Description:

								*

								********************************************************************** MC ***/

								void    CBackend::CleanUpSynth( )

								{

								    SPDBG_FUNC( "CBackend::CleanUpSynth" );


								    if( m_pOutEpoch )

								    {

								        delete m_pOutEpoch;

								        m_pOutEpoch = NULL;

								    }

								    if( m_pMap )

								    {

								        delete m_pMap;

								        m_pMap = NULL;

								    }

								    if( m_pRevFlag )

								    {

								        delete m_pRevFlag;

								        m_pRevFlag = NULL;

								    }

								    // NOTE: make object?

								    FreeSynth( &m_Synth );


								} /* CBackend::CleanUpSynth */


								/*****************************************************************************

								* CBackend::RenderFrame *

								*-----------------------*

								*   Description:

								*   This this the central synthesis loop. Keep filling output audio

								*   buffer until buffer frame is full or speech is done. To render

								*   continous speech, get each unit one at a time from upstream buffer.

								*

								********************************************************************** MC ***/

								HRESULT CBackend::RenderFrame( )

								{

								    SPDBG_FUNC( "CBackend::RenderFrame" );

								    long        InSize, OutSize;

								    long        iframe;

								    float       *pCurInRes, *pCurOutRes;

								    long        i, j;

								    float       ampMpy;

								    HRESULT     hr = S_OK;


								    m_cOutSamples_Frame = 0;

								    do

								    {

								        OutSize = 0;

								        if( m_silMode )

								        {

								            //-------------------------------

								            // Silence mode

								            //-------------------------------

								            if( m_cOutSamples_Phon >= m_durationTarget )

								            {

								                //---------------------------

								                // Get next unit

								                //---------------------------

								                hr = StartNewUnit( );

								                if (FAILED(hr))

								                {

								                    //-----------------------------------

								                    // Try to end it gracefully...

								                    //-----------------------------------

								                    m_SpeechState = SPEECH_DONE;

								                }


												TTSDBG_LOGSILEPOCH;

								            }

								            else

								            {

								                //---------------------------

								                // Continue with current SIL

								                //---------------------------

								                m_pSpeechBuf[m_cOutSamples_Frame] = 0;

								                OutSize = 1;

								            }

								        }

								        else

								        {

								            if( m_EpochIndex < m_cOutEpochs )

								            {

								                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

								                //

								                // Continue with current phon

								                //

								                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

								                //------------------------------------

								                // Find current input residual

								                //------------------------------------

								                iframe = m_pMap[m_EpochIndex];

								                pCurInRes = m_pInRes;

								                for( i = 0; i < iframe; i++)

								                {

								                    pCurInRes += (long) ABS(m_pInEpoch[i]);

								                }


								                pCurOutRes  = m_pSpeechBuf + m_cOutSamples_Frame;

								                InSize      = (long)(ABS(m_pInEpoch[iframe]));

								                OutSize     = (long)(ABS(m_pOutEpoch[m_EpochIndex]));

								                if (m_cOutSamples_Frame + OutSize > SPEECH_FRAME_SIZE + SPEECH_FRAME_OVER)

								                {

								                    m_pOutEpoch[m_EpochIndex] = SPEECH_FRAME_OVER-1;  // still huge

								                    OutSize = (long)(ABS(m_pOutEpoch[m_EpochIndex]));

								                }

								                j = 1;

								                while( (j < m_nKnots - 1) && (m_cOutSamples_Phon > m_pSynthTime[j]) )

								                {

								                    j++;

								                }

								                ampMpy = LinInterp( m_pSynthTime[j - 1], (float)m_cOutSamples_Phon, m_pSynthTime[j], m_pSynthAmp[j - 1], m_pSynthAmp[j] );

								                //ampMpy = 1;


								                //--------------------------------------------

								                // Do stretching of residuals

								                //--------------------------------------------

								                ResRecons( pCurInRes, InSize, pCurOutRes, OutSize, ampMpy );


								                //--------------------------------------------

								                // Do LPC reconstruction

								                //--------------------------------------------

								                float       *pCurLPC;

												float       totalGain;


												totalGain = ExpConverter( ((float)m_MasterVolume / (float)SPMAX_VOLUME), m_linearScale )

																* ExpConverter( m_UnitVolume, m_linearScale );


								                pCurLPC = m_pLPC + m_pMap[m_EpochIndex] * (1 + m_cOrder);

								                pCurLPC[0] = 1.0f;

								                LPCFilter( pCurLPC, &m_pSpeechBuf[m_cOutSamples_Frame], OutSize, totalGain );

								                m_EpochIndex++;

								            }

								            else

								            {

								                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

								                //

								                // Get next phon

								                //

								                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

								                hr = StartNewUnit( );

								                if (FAILED(hr))

								                {

								                    //-----------------------------------

								                    // Try to end it gracefully...

								                    //-----------------------------------

								                    m_SpeechState = SPEECH_DONE;

								                }

												TTSDBG_LOGSILEPOCH;

								            }

								        }

								        m_cOutSamples_Frame += OutSize;

								        m_cOutSamples_Phon += OutSize;

								        m_cOutSamples_Total += OutSize;


										TTSDBG_LOGEPOCHS;

								    }

								    while( (m_cOutSamples_Frame < SPEECH_FRAME_SIZE) && (m_SpeechState == SPEECH_CONTINUE) );


									if( SUCCEEDED(hr) )

									{

										//----------------------------------------------

										// Convert buffer from FLOAT to SHORT

										//----------------------------------------------

										if( m_pReverb )

										{

											//---------------------------------

											// Add REVERB

											//---------------------------------

											m_pReverb->Reverb_Process( m_pSpeechBuf, m_cOutSamples_Frame, 1.0f );

										}

										else

										{

											CvtToShort( m_pSpeechBuf, m_cOutSamples_Frame, m_StereoOut, 1.0f );

										}


								        //--- Debug Macro - output wave data to stream

								        TTSDBG_LOGWAVE;

									}


								    if( SUCCEEDED( hr ) )

								    {

								        //------------------------------------

								        // Send this buffer to SAPI site

								        //------------------------------------

								        DWORD   cbWritten;


										//------------------------------------------------------------------------------------

										// This was my lame hack to avoid sending buffers when nothing was spoken.

										// It was causing problems (among others) since StartNewUnit() was still sending

										// events - with no corresponding audio buffer!

										//

										// This was too simple of a scheme. Disable this feature for now...

										// ...until I come up with something more robust. (MC)

										//------------------------------------------------------------------------------------


										//if( m_HasSpeech )

										{

											hr = m_pOutputSite->Write( (void*)m_pSpeechBuf,

																	  m_cOutSamples_Frame * m_BytesPerSample,

																	  &cbWritten );

											if( FAILED( hr ) )

											{

												//----------------------------------------

												// Abort! Unable to write audio data

												//----------------------------------------

												m_SpeechState = SPEECH_DONE;

											}

										}

								    }


								    //------------------------------------

								    // Return render state

								    //------------------------------------

								    return hr;

								} /* CBackend::RenderFrame */