windows-server-2003/enduser/speech/tts/msttsdrv/engine/backend.cpp

/*******************************************************************************
* Backend.cpp *
*-------------*
*   Description:
*       This module is the implementation file for the CBackend class.
*-------------------------------------------------------------------------------
*  Created By: mc                                        Date: 03/12/99
*  Copyright (C) 1999 Microsoft Corporation
*  All Rights Reserved
*
*******************************************************************************/

#include "stdafx.h"
#ifndef __spttseng_h__
#include "spttseng.h"
#endif
#ifndef Backend_H
#include "Backend.h"
#endif
#ifndef FeedChain_H
#include "FeedChain.h"
#endif
#ifndef SPDebug_h
#include <spdebug.h>
#endif


//-----------------------------
// Data.cpp
//-----------------------------
extern const short   g_IPAToAllo[];
extern const short   g_AlloToViseme[];


//--------------------------------------
// DEBUG: Save utterance WAV file
//--------------------------------------
//#define   SAVE_WAVE_FILE  1


const unsigned char g_SineWaveTbl[] =
{
    0x7b,0x7e,0x81,0x84,0x87,0x89,0x8c,0x8f,0x92,0x95,0x98,0x9b,0x9d,0xa0,0xa3,0xa6,
    0xa8,0xab,0xae,0xb0,0xb3,0xb5,0xb8,0xbb,0xbd,0xbf,0xc2,0xc4,0xc7,0xc9,0xcb,0xcd,
    0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,0xe0,0xe2,0xe3,0xe5,0xe6,0xe8,0xe9,
    0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf2,0xf3,0xf3,0xf4,0xf4,0xf4,0xf4,
    0xf5,0xf5,0xf5,0xf5,0xf4,0xf4,0xf4,0xf4,0xf3,0xf3,0xf2,0xf1,0xf1,0xf0,0xef,0xee,
    0xed,0xec,0xeb,0xea,0xe9,0xe7,0xe6,0xe5,0xe3,0xe1,0xe0,0xde,0xdc,0xdb,0xd9,0xd7,
    0xd5,0xd3,0xd1,0xcf,0xcd,0xcb,0xc8,0xc6,0xc4,0xc1,0xbf,0xbc,0xba,0xb7,0xb5,0xb2,
    0xb0,0xad,0xaa,0xa8,0xa5,0xa2,0x9f,0x9d,0x9a,0x97,0x94,0x91,0x8f,0x8c,0x89,0x86,
    0x83,0x80,0x7d,0x7a,0x77,0x75,0x72,0x6f,0x6c,0x69,0x66,0x64,0x61,0x5e,0x5b,0x58,
    0x56,0x53,0x50,0x4e,0x4b,0x49,0x46,0x44,0x41,0x3f,0x3c,0x3a,0x38,0x35,0x33,0x31,
    0x2f,0x2d,0x2b,0x29,0x27,0x25,0x23,0x21,0x1f,0x1e,0x1c,0x1b,0x19,0x18,0x16,0x15,
    0x14,0x13,0x12,0x11,0x10,0x0f,0x0e,0x0d,0x0c,0x0c,0x0b,0x0b,0x0a,0x0a,0x0a,0x0a,
    0x09,0x09,0x09,0x09,0x0a,0x0a,0x0a,0x0a,0x0b,0x0b,0x0c,0x0d,0x0d,0x0e,0x0f,0x10,
    0x11,0x12,0x13,0x14,0x15,0x17,0x18,0x1a,0x1b,0x1d,0x1e,0x20,0x22,0x23,0x25,0x27,
    0x29,0x2b,0x2d,0x2f,0x31,0x34,0x36,0x38,0x3a,0x3d,0x3f,0x42,0x44,0x47,0x49,0x4c,
    0x4e,0x51,0x54,0x56,0x59,0x5c,0x5f,0x61,0x64,0x67,0x6a,0x6d,0x6f,0x72,0x75,0x78
};


/*void  PredictEpochDist(   float   duration,
long    nKnots,
float   SampleRate,
float   *pTime, 
float   *pF0)
{
long            curSamplesOut, endSample, j;
float           epochFreq;
long            epochLen, epochCount;

  
    curSamplesOut   = 0;
    endSample       = (long) (SampleRate * duration );
    epochCount      = 0;
    
      while( curSamplesOut < endSample )
      {
      j = 1;
      //---------------------------------------------------
      // Align to appropriate knot bassed on
      // current output sample
      //---------------------------------------------------
      while( (j < nKnots - 1) && (curSamplesOut > pTime[j]) ) 
      j++;
      //---------------------------------------------------
      // Calculate exact pitch thru linear interpolation
      //---------------------------------------------------
      epochFreq = LinInterp( pTime[j - 1], curSamplesOut, pTime[j], pF0[j - 1], pF0[j] );
      //---------------------------------------------------
      // Calc sample count for curent epoch
      //---------------------------------------------------
      epochLen  = (long) (SampleRate / epochFreq);
      epochCount++;
      
        curSamplesOut += epochLen;
        }
        
          
            }
*/


/*****************************************************************************
* CBackend::CBackend *
*--------------------*
*   Description: Constructor
*   
********************************************************************** MC ***/
CBackend::CBackend( )
{
    SPDBG_FUNC( "CBackend::CBackend" );
    m_pHistory      = NULL;
    m_pHistory2     = NULL;
    m_pFilter       = NULL;
    m_pReverb       = NULL;
    m_pOutEpoch     = NULL;
    m_pMap          = NULL;
    m_pRevFlag      = NULL;
    m_pSpeechBuf    = NULL;
    m_VibratoDepth  = 0;
    m_UnitVolume    = 1.0f;
    m_MasterVolume  = SPMAX_VOLUME;
    memset( &m_Synth, 0, sizeof(MSUNITDATA) );
} /* CBackend::CBackend */


/*****************************************************************************
* CBackend::~CBackend *
*---------------------*
*   Description:  Destructor
*       
********************************************************************** MC ***/
CBackend::~CBackend( )
{
    SPDBG_FUNC( "CBackend::~CBackend" );

    Release();
} /* CBackend::~CBackend */


/*****************************************************************************
* CBackend::Release *
*---------------------*
*   Description:
*   Free memory allocaterd by Backend
*       
********************************************************************** MC ***/
void CBackend::Release( )
{
    SPDBG_FUNC( "CBackend::Release" );
    CleanUpSynth( );

    if( m_pSpeechBuf)
    {
        delete m_pSpeechBuf;
        m_pSpeechBuf = NULL;
    }
    if( m_pHistory )
    {
        delete m_pHistory;
        m_pHistory = NULL;
    }
    if( m_pHistory2 )
    {
        delete m_pHistory2;
        m_pHistory2 = NULL;
    }
    if( m_pReverb )
    {
        delete m_pReverb;
        m_pReverb = NULL;
    }
} /* CBackend::Release */


/*****************************************************************************
* CBackend::Init *
*----------------*
*   Description:
*   Opens a backend instance, keeping a pointer of the acoustic
*   inventory.
*       
********************************************************************** MC ***/
HRESULT CBackend::Init( IMSVoiceData* pVoiceDataObj, CFeedChain *pSrcObj, MSVOICEINFO* pVoiceInfo )
{
    SPDBG_FUNC( "CBackend::Init" );
    long    LPCsize = 0;
    HRESULT hr = S_OK;
    
    m_pVoiceDataObj = pVoiceDataObj;
    m_SampleRate = (float)pVoiceInfo->SampleRate;
    m_pSrcObj   = pSrcObj;
    m_cOrder = pVoiceInfo->LPCOrder;
    m_pWindow = pVoiceInfo->pWindow;
    m_FFTSize = pVoiceInfo->FFTSize;
    m_VibratoDepth = ((float)pVoiceInfo->VibratoDepth) / 100.0f;
    m_VibratoDepth = 0;				// NOTE: disable vibrato
    m_VibratoFreq = pVoiceInfo->VibratoFreq;
    if( pVoiceInfo->eReverbType > REVERB_TYPE_OFF )
    {
        m_StereoOut = true;
        m_BytesPerSample = 4;
    }
    else
    {
        m_StereoOut = false;
        m_BytesPerSample = 2;
    }
    //---------------------------------------
    // Allocate AUDIO buffer
    //---------------------------------------
    m_pSpeechBuf = new float[SPEECH_FRAME_SIZE + SPEECH_FRAME_OVER];
    if( m_pSpeechBuf == NULL )
    {
        //--------------------------------------
        // Out of memory!
        //--------------------------------------
        hr = E_OUTOFMEMORY;
    }
    if( SUCCEEDED(hr) )
    {
        //---------------------------------------
        // Allocate HISTORY buffer
        //---------------------------------------

        LPCsize = m_cOrder + 1;
        m_pHistory = new float[LPCsize];
        if( m_pHistory == NULL )
        {
            //--------------------------------------
            // Out of memory!
            //--------------------------------------
            hr = E_OUTOFMEMORY;
        }
    }
    if( SUCCEEDED(hr) )
    {
        memset( m_pHistory, 0, LPCsize * sizeof(float) );
        m_pOutEpoch         = NULL;
        m_pMap              = NULL;
        m_pRevFlag          = NULL;
        m_fModifiers        = 0;
        m_vibrato_Phase1    = 0;


        //--------------------------------
        // Reverb Effect
        //--------------------------------
        //pVoiceInfo->eReverbType = REVERB_TYPE_HALL;
        if( pVoiceInfo->eReverbType > REVERB_TYPE_OFF )
        {
            //--------------------------------
            // Create ReverbFX object
            //--------------------------------
            if( m_pReverb == NULL )
            {
                m_pReverb = new CReverbFX;
                if( m_pReverb )
                {
                    short       result;
                    result = m_pReverb->Reverb_Init( pVoiceInfo->eReverbType, (long)m_SampleRate, m_StereoOut );
                    if( result != KREVERB_NOERROR )
                    {
                        //--------------------------------------------
                        // Not enough memory to do reverb
                        // Recover gracefully
                        //--------------------------------------------
                        delete m_pReverb;
                        m_pReverb = NULL;
                    }
                    /*else
                    {
                    //--------------------------------------------------------
                    // Init was successful, ready to do reverb now
                    //--------------------------------------------------------
                    }*/
                }
            }
        }

        //----------------------------
        // Linear taper region scale
        //----------------------------
        m_linearScale = (float) pow( 10.0, (double)((1.0f - LINEAR_BKPT) * LOG_RANGE) / 20.0 );


    #ifdef SAVE_WAVE_FILE
        m_SaveFile = (PCSaveWAV) new CSaveWAV;     // No check needed, if this fails, we simply don't save file.
        if( m_SaveFile )
        {
            m_SaveFile->OpenWavFile( (long)m_SampleRate );
        }
    #endif

    }
    else
    {
        if( m_pSpeechBuf )
        {
            delete m_pSpeechBuf;
            m_pSpeechBuf = NULL;
        }
        if( m_pHistory )
        {
            delete m_pHistory;
            m_pHistory = NULL;
        }
    }

    return hr;    
} /* CBackend::Init */


/*****************************************************************************
* CBackend::FreeSynth *
*---------------------*
*   Description:
*   Return TRUE if consoants can be clustered.
*       
********************************************************************** MC ***/
void CBackend::FreeSynth( MSUNITDATA* pSynth )
{
    SPDBG_FUNC( "CBackend::FreeSynth" );
    if( pSynth->pEpoch )
    {
        delete pSynth->pEpoch;
        pSynth->pEpoch = NULL;
    }
    if( pSynth->pRes )
    {
        delete pSynth->pRes;
        pSynth->pRes = NULL;
    }
    if( pSynth->pLPC )
    {
        delete pSynth->pLPC;
        pSynth->pLPC = NULL;
    }
} /* CBackend::FreeSynth */


/*****************************************************************************
* ExpConverter *
*--------------*
*   Description:
*   Convert linear to exponential taper
*   'ref' is a linear value between 0.0 to 1.0
*       
********************************************************************** MC ***/
static float   ExpConverter( float ref, float linearScale )
{
    SPDBG_FUNC( "ExpConverter" );
    float   audioGain;

    if( ref < LINEAR_BKPT)
    {
        //----------------------------------------
        // Linear taper below LINEAR_BKPT
        //----------------------------------------
        audioGain = linearScale * (ref / LINEAR_BKPT);
    }
    else
    {
        //----------------------------------------
        // Log taper above LINEAR_BKPT
        //----------------------------------------
        audioGain = (float) pow( 10.0, (double)((1.0f - ref) * LOG_RANGE) / 20.0 );
    }

    return audioGain;
} /* ExpConverter */


/*****************************************************************************
* CBackend::CvtToShort *
*----------------------*
*   Description:
*   Convert (in place) FLOAT audio to SHORT.
*       
********************************************************************** MC ***/
void CBackend::CvtToShort( float *pSrc, long blocksize, long stereoOut, float audioGain )
{
    SPDBG_FUNC( "CBackend::CvtToShort" );
    long        i;
    short       *pDest;
    float       fSamp;
    
    pDest = (short*)pSrc;
    for( i = 0; i < blocksize; ++i )
    {
        //------------------------
        // Read float sample...
        //------------------------
        fSamp = (*pSrc++) * audioGain;
        //------------------------
        // ...clip to 16-bits...
        //------------------------
        if( fSamp > 32767 )
        {
            fSamp = 32767;
        }
        else if( fSamp < (-32768) )
        {
            fSamp = (-32768);
        }
        //------------------------
        // ...save as SHORT
        //------------------------
        *pDest++ = (short)fSamp;
        if( stereoOut )
        {
            *pDest++ = (short)(0 - (int)fSamp);
        }
    }
} /* CBackend::CvtToShort */


/*****************************************************************************
* CBackend::PSOLA_Stretch *
*-------------------------*
*   Description:
*   Does PSOLA epoch stretching or compressing
*       
********************************************************************** MC ***/
void CBackend::PSOLA_Stretch(     float *pInRes, long InSize, 
                    float *pOutRes, long OutSize,
                    float *pWindow, 
                    long  cWindowSize )
{
    SPDBG_FUNC( "CBackend::PSOLA_Stretch" );
    long    i, lim;
    float   window, delta, kf;
    
    memset( pOutRes, 0, sizeof(float) * OutSize  );
    lim = MIN(InSize, OutSize );
    delta = (float)cWindowSize / (float)lim;
    kf = 0.5f;
    pOutRes[0] = pInRes[0];
    for( i = 1; i < lim; ++i )
    {
        kf += delta;
        window = pWindow[(long) kf];
        pOutRes[i] += pInRes[i] * window;
        pOutRes[OutSize - i] += pInRes[InSize - i] * window;
    }
} /* CBackend::PSOLA_Stretch */


/*****************************************************************************
* CBackend::PrepareSpeech *
*-------------------------*
*   Description:
*       
********************************************************************** MC ***/
void    CBackend::PrepareSpeech( ISpTTSEngineSite* outputSite )
{
    SPDBG_FUNC( "CBackend::PrepareSpeech" );
    
    //m_pUnits      = pUnits;
    //m_unitCount       = unitCount;
    //m_CurUnitIndex    = 0;
    m_pOutputSite = outputSite;
    m_silMode = true;
    m_durationTarget = 0;
    m_cOutSamples_Phon = 1;
    m_cOutEpochs = 0;            // Pull model big-bang
    m_SpeechState = SPEECH_CONTINUE;
    m_cOutSamples_Total = 0;
	m_HasSpeech = false;
} /* CBackend::PrepareSpeech */


/*****************************************************************************
* CBackend::ProsodyMod *
*----------------------*
*   Description:
*   Calculate the epoch sequence for the synthesized speech
* 
*   INPUT:
* 
*   OUTPUT:
*       FIlls 'pOutEpoch', 'pMap', and 'pRevFlag'
*       Returns new epoch count
*       
********************************************************************** MC ***/
long CBackend::ProsodyMod(     UNITINFO    *pCurUnit, 
                               long         cInEpochs, 
                               float        durationMpy,
                               long         cMaxOutEpochs )
{   
    SPDBG_FUNC( "CBackend::ProsodyMod" );
    long    iframe, framesize, framesizeOut, j;
    long    cntOut, csamplesOut, cOutEpochs;
    BOOL    fUnvoiced;
    short   fReverse;
    float   totalDuration;
    float   durationIn;         // Active accum of IN duration
    float   durationOut;        // Active accum of OUT duration aligned to IN domain
    float   freqMpy;
    BOOL    fAdvanceInput;
    float           vibrato;
    unsigned char   *SineWavePtr;
    float           epochFreq;
    float           *pTime;
    float           *pF0;
    
    iframe          = 0;
    durationIn      = 0.0f;
    durationOut     = 0.0f;
    csamplesOut     = 0;
    cntOut          = 0;
    cOutEpochs      = 0;
    fReverse        = false;
    pTime           = pCurUnit->pTime;
    pF0             = pCurUnit->pF0;
    
    //------------------------------------
    // Find total input duration
    //------------------------------------
    totalDuration   = 0;
    for( j = 0; j < cInEpochs; ++j )
    {
        totalDuration += ABS(m_pInEpoch[j]);
    }
    
    /*PredictEpochDist(     pCurUnit->duration,
    pCurUnit->nKnots,
    m_SampleRate,
    pTime, 
    pF0 );*/
    
    while( iframe < cInEpochs && cOutEpochs < cMaxOutEpochs)
    {
        //-----------------------------------------
        //  Compute output frame length
        //-----------------------------------------
        if( m_pInEpoch[iframe] < 0 )
        {
            //-------------------------------------------------
            // Since we can't change unvoiced pitch,
            // do not change frame size for unvoiced frames
            //-------------------------------------------------
            framesize       = (long)((-m_pInEpoch[iframe]) + 0.5f);
            framesizeOut    = framesize;
            fUnvoiced       = true;
        }
        else
        {
            //---------------------------------------------------
            // Modify frame size for voiced epoch
            // based on epoch frequency
            //---------------------------------------------------
            j = 1;
            //---------------------------------------------------
            // Align to appropriate knot bassed on
            // current output sample
            //---------------------------------------------------
            while( (j < (long)pCurUnit->nKnots - 1) && (csamplesOut > pTime[j]) ) 
                j++;
            //---------------------------------------------------
            // Calculate exact pitch thru linear interpolation
            //---------------------------------------------------
            
            epochFreq = LinInterp( pTime[j - 1], (float)csamplesOut, pTime[j], pF0[j - 1], pF0[j] );
            
            
            SineWavePtr = (unsigned char*)&g_SineWaveTbl[0];
            vibrato = (float)(((unsigned char)(*(SineWavePtr + (m_vibrato_Phase1 >> 16)))) - 128);
            vibrato *= m_VibratoDepth;
            
            //---------------------------------------------------
            // Scale frame size using in/out ratio
            //---------------------------------------------------
            epochFreq       = epochFreq + vibrato;
            if( epochFreq < MIN_VOICE_PITCH )
            {
                epochFreq = MIN_VOICE_PITCH;
            }
            framesize       = (long)(m_pInEpoch[iframe] + 0.5f);
            framesizeOut    = (long)(m_SampleRate / epochFreq);
            
            
            vibrato         = ((float)256 / ((float)22050 / m_VibratoFreq)) * (float)framesizeOut;    // 3 Hz
            //vibrato           = ((float)256 / (float)7350) * (float)framesizeOut; // 3 Hz
            m_vibrato_Phase1 += (long)(vibrato * (float)65536);
            m_vibrato_Phase1 &= 0xFFFFFF;
            //---------------------------------------------------
            // @@@@ REMOVED 2x LIMIT
            //---------------------------------------------------
            /*if( framesizeOut > 2*framesize )
            {
            framesizeOut = 2*framesize;
            }
            if( framesize > 2*framesizeOut )
            {
            framesizeOut = framesize/2;
        }*/
            freqMpy = (float) framesize / framesizeOut;
            fUnvoiced = false;
        }
        
        
        //-------------------------------------------
        //  Generate next output frame
        //-------------------------------------------
        fAdvanceInput = false;
        if( durationOut + (0.5f * framesizeOut/durationMpy) <= durationIn + framesize )
        {
            //-----------------------------------------
            // If UNvoiced and odd frame,
            // reverse residual
            //-----------------------------------------
            if( fUnvoiced && (cntOut & 1) )
            {
                m_pRevFlag[cOutEpochs] = true;
                fReverse = true;
            }
            else
            {
                m_pRevFlag[cOutEpochs] = false;
                fReverse = false;
            }
            ++cntOut;
            
            durationOut += framesizeOut/durationMpy;
            csamplesOut += framesizeOut;
            m_pOutEpoch[cOutEpochs] = (float)framesizeOut;
            m_pMap[cOutEpochs] = iframe;
            cOutEpochs++;
        }
        else 
        {
            fAdvanceInput = true;
        }
        
        //-------------------------------------------
        // Advance to next input frame
        //-------------------------------------------
        if(     ((durationOut + (0.5f * framesizeOut/durationMpy)) > (durationIn + framesize)) || 
            //(cntOut >= 3) ||          @@@@ REMOVED 2x LIMIT
            //(fReverse == true) ||
            fAdvanceInput )
        {
            durationIn += framesize;
            ++iframe;
            cntOut = 0;
        }
    }
        
    return cOutEpochs;
} /* CBackend::ProsodyMod */


/*****************************************************************************
* CBackend::LPCFilter *
*---------------------*
*   Description:
*   LPC filter of order cOrder. It filters the residual signal
*   pRes, producing output pOutWave. This routine requires that
*   pOutWave has the true waveform history from [-cOrder,0] and
*   of course it has to be defined.
*       
********************************************************************** MC ***/
void CBackend::LPCFilter( float *pCurLPC, float *pCurRes, long len, float gain )
{
    SPDBG_FUNC( "CBackend::LPCFilter" );
    INT t, j;
    
    for( t = 0; t < len; t++ )
    {
        m_pHistory[0] = pCurLPC[0] * pCurRes[t];
        for( j = m_cOrder; j > 0; j-- )
        {
            m_pHistory[0] -= pCurLPC[j] * m_pHistory[j];
            m_pHistory[j] = m_pHistory[j - 1];
        }
        pCurRes[t] = m_pHistory[0] * gain;
    }
} /* CBackend::LPCFilter */


/*void CBackend::LPCFilter( float *pCurLPC, float *pCurRes, long len )
{
long        t;

  for( t = 0; t < len; t++ )
        {
        pCurRes[t] = pCurRes[t] * 10;
        }
        }
*/


/*****************************************************************************
* CBackend::ResRecons *
*---------------------*
*   Description:
*   Obtains output prosody modified residual
*       
********************************************************************** MC ***/
void CBackend::ResRecons( float *pInRes, 
                          long  InSize, 
                          float *pOutRes, 
                          long  OutSize, 
                          float scale )
{
    SPDBG_FUNC( "CBackend::ResRecons" );
    long        i, j;
    
    if( m_pRevFlag[m_EpochIndex] )
    {
        //----------------------------------------------------
        // Process repeated and reversed UNvoiced residual
        //----------------------------------------------------
        for( i = 0, j = OutSize-1;  i < OutSize;  ++i, --j )
        {
            pOutRes[i] = pInRes[j];
        }
    }
    else if( InSize == OutSize )
    {
        //----------------------------------------------------
        // Unvoiced residual or voiced residual 
        // with no pitch change
        //----------------------------------------------------
        memcpy( pOutRes, pInRes, sizeof(float) *OutSize );
    }
    else
    {
        //----------------------------------------------------
        // Process voiced residual   
        //----------------------------------------------------
        PSOLA_Stretch( pInRes, InSize, pOutRes, OutSize, m_pWindow, m_FFTSize );
    }
    
    //----------------------------------
    // Amplify frame
    //----------------------------------
    if( scale != 1.0f )
    {
        for( i = 0 ; i < OutSize; ++i )
        {
            pOutRes[i] *= scale;
        }
    }
} /* CBackend::ResRecons */


/*****************************************************************************
* CBackend::StartNewUnit *
*------------------------*
*   Description:
*   Synthesize audio samples for a target unit
* 
*   INPUT:
*       pCurUnit - unit ID, F0, duration, etc.
* 
*   OUTPUT:
*       Sets 'pCurUnit->csamplesOut' with audio length
*       
********************************************************************** MC ***/
HRESULT CBackend::StartNewUnit( )
{   
    SPDBG_FUNC( "CBackend::StartNewUnit" );
    long        cframeMax = 0, cInEpochs = 0, i;
    float       totalDuration, durationOut, durationMpy = 0;
    UNITINFO    *pCurUnit;
    HRESULT     hr = S_OK;
    SPEVENT     event;
	ULONGLONG	clientInterest;
 	USHORT		volumeVal;
   
	// Check for VOLUME change
	if( m_pOutputSite->GetActions() & SPVES_VOLUME )
	{
		hr = m_pOutputSite->GetVolume( &volumeVal );
		if ( SUCCEEDED( hr ) )
		{
			if( volumeVal > SPMAX_VOLUME )
			{
				//--- Clip rate to engine maximum
				volumeVal = SPMAX_VOLUME;
			}
			else if ( volumeVal < SPMIN_VOLUME )
			{
				//--- Clip rate to engine minimum
				volumeVal = SPMIN_VOLUME;
			}
			m_MasterVolume = volumeVal;
		}
	}

    //---------------------------------------
    // Delete previous unit
    //---------------------------------------
    CleanUpSynth( );
    
    //---------------------------------------
    // Get next phon
    //---------------------------------------
    hr = m_pSrcObj->NextData( (void**)&pCurUnit, &m_SpeechState );
    if( m_SpeechState == SPEECH_CONTINUE )
    {
		m_HasSpeech = pCurUnit->hasSpeech;
		m_pOutputSite->GetEventInterest( &clientInterest );

		//------------------------------------------------
        // Post SENTENCE event
        //------------------------------------------------
        if( (pCurUnit->flags & SENT_START_FLAG) && (clientInterest & SPFEI(SPEI_SENTENCE_BOUNDARY)) )
        {
			event.elParamType = SPET_LPARAM_IS_UNDEFINED;
            event.eEventId = SPEI_SENTENCE_BOUNDARY;
            event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
	        event.lParam = pCurUnit->sentencePosition;	        // Input word position
	        event.wParam = pCurUnit->sentenceLen;	            // Input word length
            m_pOutputSite->AddEvents( &event, 1 );
        }
        //------------------------------------------------
        // Post PHONEME event
        //------------------------------------------------
        if( clientInterest & SPFEI(SPEI_PHONEME) )
		{
			event.elParamType = SPET_LPARAM_IS_UNDEFINED;
			event.eEventId = SPEI_PHONEME;
			event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
			event.lParam = ((ULONG)pCurUnit->AlloFeatures << 16) + g_IPAToAllo[pCurUnit->AlloID];
			event.wParam = ((ULONG)(pCurUnit->duration * 1000.0f) << 16) + g_IPAToAllo[pCurUnit->NextAlloID];
			m_pOutputSite->AddEvents( &event, 1 );
		}

        //------------------------------------------------
        // Post VISEME event
        //------------------------------------------------
        if( clientInterest & SPFEI(SPEI_VISEME) )
		{
			event.elParamType = SPET_LPARAM_IS_UNDEFINED;
			event.eEventId = SPEI_VISEME;
			event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
			event.lParam = ((ULONG)pCurUnit->AlloFeatures << 16) + g_AlloToViseme[pCurUnit->AlloID];
			event.wParam = ((ULONG)(pCurUnit->duration * 1000.0f) << 16) + g_AlloToViseme[pCurUnit->NextAlloID];
			m_pOutputSite->AddEvents( &event, 1 );
		}

        //------------------------------------------------
        // Post any bookmark events
        //------------------------------------------------
        if( pCurUnit->pBMObj != NULL )
        {
            CBookmarkList   *pBMObj;
            BOOKMARK_ITEM*  pMarker;

            //-------------------------------------------------
            // Retrieve marker strings from Bookmark list and
            // enter into Event list
            //-------------------------------------------------
            pBMObj = (CBookmarkList*)pCurUnit->pBMObj;
            //cMarkerCount = pBMObj->m_BMList.GetCount();
			if( clientInterest & SPFEI(SPEI_TTS_BOOKMARK) )
			{
				//---------------------------------------
				// Send event for every bookmark in list
				//---------------------------------------
				SPLISTPOS	listPos;

				listPos = pBMObj->m_BMList.GetHeadPosition();
				while( listPos )
				{
					pMarker                    = (BOOKMARK_ITEM*)pBMObj->m_BMList.GetNext( listPos );
					event.eEventId             = SPEI_TTS_BOOKMARK;
					event.elParamType          = SPET_LPARAM_IS_STRING;
					event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
                    //--- Copy in bookmark string - has been NULL terminated in source already...
					event.lParam               = pMarker->pBMItem;
                    // Engine must convert string to long for wParam.
                    event.wParam               = _wtol((WCHAR *)pMarker->pBMItem);
					m_pOutputSite->AddEvents( &event, 1 );
				}
			}
            //---------------------------------------------
            // We don't need this Bookmark list any more
            //---------------------------------------------
            delete pBMObj;
            pCurUnit->pBMObj = NULL;
        }
		

        pCurUnit->csamplesOut = 0;
        //******************************************************
        // For SIL, fill buffer with zeros...
        //******************************************************
        if( pCurUnit->UnitID == UNIT_SIL )
        {   
            //---------------------------------------------
            // Calc SIL length
            //---------------------------------------------
            m_durationTarget    = (long)(m_SampleRate * pCurUnit->duration);
            m_cOutSamples_Phon  = 0;
            m_silMode           = true;
        
            //---------------------------------------------
            // Clear LPC filter storage
            //---------------------------------------------
            memset( m_pHistory, 0, sizeof(float)*(m_cOrder+1) );
        
            //--------------------------------
            // Success!
            //--------------------------------

            // Debug macro - output unit data...
            TTSDBG_LOGUNITS;
        }   
        //******************************************************
        // ...otherwise fill buffer with inventory data
        //******************************************************
        else
        {
            m_silMode = false;
            // Get unit data from voice
            hr = m_pVoiceDataObj->GetUnitData( pCurUnit->UnitID, &m_Synth );
            if( SUCCEEDED(hr) )
            {
                durationOut     = 0.0f;
                cInEpochs       = m_Synth.cNumEpochs;
                m_pInEpoch      = m_Synth.pEpoch;
                //cframeMax     = PeakValue( m_pInEpoch, cInEpochs );
                totalDuration   = (float)m_Synth.cNumSamples;

                //-----------------------------------------------
                // For debugging: Force duration to unit length
                //-----------------------------------------------
                /*float       unitDur;

                unitDur = totalDuration / 22050.0f;     
                if( pCurUnit->duration < unitDur )
                {
                    if( pCurUnit->speechRate < 1 )
                    {
                        pCurUnit->duration = unitDur * pCurUnit->speechRate;
                    }
                    else
                    {
                        pCurUnit->duration = unitDur;
                    }
                }*/

                durationMpy     = pCurUnit->duration;
        
                cframeMax = (long)pCurUnit->pF0[0];
                for( i = 1; i < (long)pCurUnit->nKnots; i++ )
                {
                    //-----------------------------------------
                    // Find the longest epoch
                    //-----------------------------------------
                    cframeMax = (long)(MAX(cframeMax,pCurUnit->pF0[i]));
                }
                cframeMax *= (long)(durationMpy * MAX_TARGETS_PER_UNIT);
        
        
                durationMpy = (m_SampleRate * durationMpy) / totalDuration;
                cframeMax += (long)(durationMpy * cInEpochs * MAX_TARGETS_PER_UNIT);
                //
                // mplumpe 11/18/97 : added to eliminate chance of crash.
                //
                cframeMax *= 2;
                //---------------------------------------------------
                // New epochs adjusted for duration and pitch
                //---------------------------------------------------
                m_pOutEpoch = new float[cframeMax];
                if( !m_pOutEpoch )
                {
                    //--------------------------------------
                    // Out of memory!
                    //--------------------------------------
                    hr = E_OUTOFMEMORY;
                    pCurUnit->csamplesOut = 0;
                    CleanUpSynth( );
                }
            }
            if( SUCCEEDED(hr) )
            {
                //---------------------------------------------------
                // Index back to orig epoch
                //---------------------------------------------------
                m_pMap = new long[cframeMax];
                if( !m_pMap )
                {
                    //--------------------------------------
                    // Out of memory!
                    //--------------------------------------
                    hr = E_OUTOFMEMORY;
                    pCurUnit->csamplesOut = 0;
                    CleanUpSynth( );
                }
            }
            if( SUCCEEDED(hr) )
            {
                //---------------------------------------------------
                // TRUE = reverse residual
                //---------------------------------------------------
                m_pRevFlag = new short[cframeMax];
                if( !m_pRevFlag )
                {
                    //--------------------------------------
                    // Out of memory!
                    //--------------------------------------
                    hr = E_OUTOFMEMORY;
                    pCurUnit->csamplesOut = 0;
                    CleanUpSynth( );
                }
            }
            if( SUCCEEDED(hr) )
            {
                //---------------------------------------------------------------------
                // Compute synthesis epochs and corresponding mapping to analysis
                // fills in:    m_pOutEpoch, m_pMap, m_pRevFlag
                //---------------------------------------------------------------------
                m_cOutEpochs = ProsodyMod( pCurUnit, cInEpochs, durationMpy, cframeMax );
        
                //------------------------------------------------
                // Now that actual epoch sizes are known,
                // calculate total audio sample count
                // @@@@ NO LONGER NEEDED
                //------------------------------------------------
                pCurUnit->csamplesOut = 0;
                for( i = 0; i < m_cOutEpochs; i++ )
                {
                    pCurUnit->csamplesOut += (long)(ABS(m_pOutEpoch[i]));
                }
        
        
                m_cOutSamples_Phon  = 0;
                m_EpochIndex        = 0;
                m_durationTarget    = (long)(pCurUnit->duration * m_SampleRate);
                m_pInRes            = m_Synth.pRes;
                m_pLPC              = m_Synth.pLPC;
                m_pSynthTime        = pCurUnit->pTime;
                m_pSynthAmp         = pCurUnit->pAmp;
                m_nKnots            = pCurUnit->nKnots;
                // NOTE: Maybe make log volume?
                m_UnitVolume        = (float)pCurUnit->user_Volume / 100.0f;

                //------------------------------------------------
                // Post WORD event
                //------------------------------------------------
               if( (pCurUnit->flags & WORD_START_FLAG) && (clientInterest & SPFEI(SPEI_WORD_BOUNDARY)) )
                {
					event.elParamType = SPET_LPARAM_IS_UNDEFINED;
                    event.eEventId = SPEI_WORD_BOUNDARY;
                    event.ullAudioStreamOffset = m_cOutSamples_Total * m_BytesPerSample;
	                event.lParam = pCurUnit->srcPosition;	        // Input word position
	                event.wParam = pCurUnit->srcLen;	            // Input word length
                    m_pOutputSite->AddEvents( &event, 1 );
                }
        

                //--- Debug macro - output unit data
                TTSDBG_LOGUNITS;
            }
        }
    }

    return hr;
} /* CBackend::StartNewUnit */


/*****************************************************************************
* CBackend::CleanUpSynth *
*------------------------*
*   Description:
*       
********************************************************************** MC ***/
void    CBackend::CleanUpSynth( )
{
    SPDBG_FUNC( "CBackend::CleanUpSynth" );

    if( m_pOutEpoch )
    {
        delete m_pOutEpoch;
        m_pOutEpoch = NULL;
    }
    if( m_pMap )
    {
        delete m_pMap;
        m_pMap = NULL;
    }
    if( m_pRevFlag )
    {
        delete m_pRevFlag;
        m_pRevFlag = NULL;
    }
    // NOTE: make object?
    FreeSynth( &m_Synth );

} /* CBackend::CleanUpSynth */


/*****************************************************************************
* CBackend::RenderFrame *
*-----------------------*
*   Description:
*   This this the central synthesis loop. Keep filling output audio
*   buffer until buffer frame is full or speech is done. To render 
*   continous speech, get each unit one at a time from upstream buffer.
*       
********************************************************************** MC ***/
HRESULT CBackend::RenderFrame( )
{
    SPDBG_FUNC( "CBackend::RenderFrame" );
    long        InSize, OutSize;
    long        iframe;
    float       *pCurInRes, *pCurOutRes;
    long        i, j;
    float       ampMpy;
    HRESULT     hr = S_OK;
    
    m_cOutSamples_Frame = 0;
    do
    {
        OutSize = 0;
        if( m_silMode )
        {
            //-------------------------------
            // Silence mode
            //-------------------------------
            if( m_cOutSamples_Phon >= m_durationTarget )
            {
                //---------------------------
                // Get next unit
                //---------------------------
                hr = StartNewUnit( );
                if (FAILED(hr))
                {
                    //-----------------------------------
                    // Try to end it gracefully...
                    //-----------------------------------
                    m_SpeechState = SPEECH_DONE;
                }

				TTSDBG_LOGSILEPOCH;
            }
            else
            {
                //---------------------------
                // Continue with current SIL
                //---------------------------
                m_pSpeechBuf[m_cOutSamples_Frame] = 0;
                OutSize = 1;
            }
        }
        else
        {
            if( m_EpochIndex < m_cOutEpochs )
            {
                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                //
                // Continue with current phon
                //
                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                //------------------------------------
                // Find current input residual
                //------------------------------------
                iframe = m_pMap[m_EpochIndex];
                pCurInRes = m_pInRes;
                for( i = 0; i < iframe; i++)
                {
                    pCurInRes += (long) ABS(m_pInEpoch[i]);
                }
                
                pCurOutRes  = m_pSpeechBuf + m_cOutSamples_Frame;
                InSize      = (long)(ABS(m_pInEpoch[iframe]));
                OutSize     = (long)(ABS(m_pOutEpoch[m_EpochIndex]));
                if (m_cOutSamples_Frame + OutSize > SPEECH_FRAME_SIZE + SPEECH_FRAME_OVER)
                {
                    m_pOutEpoch[m_EpochIndex] = SPEECH_FRAME_OVER-1;  // still huge
                    OutSize = (long)(ABS(m_pOutEpoch[m_EpochIndex]));
                }
                j = 1;
                while( (j < m_nKnots - 1) && (m_cOutSamples_Phon > m_pSynthTime[j]) )
                {
                    j++;
                }
                ampMpy = LinInterp( m_pSynthTime[j - 1], (float)m_cOutSamples_Phon, m_pSynthTime[j], m_pSynthAmp[j - 1], m_pSynthAmp[j] );
                //ampMpy = 1;
                
                //--------------------------------------------
                // Do stretching of residuals
                //--------------------------------------------
                ResRecons( pCurInRes, InSize, pCurOutRes, OutSize, ampMpy );
                
                //--------------------------------------------
                // Do LPC reconstruction
                //--------------------------------------------
                float       *pCurLPC;
				float       totalGain;

				totalGain = ExpConverter( ((float)m_MasterVolume / (float)SPMAX_VOLUME), m_linearScale ) 
								* ExpConverter( m_UnitVolume, m_linearScale );
                
                pCurLPC = m_pLPC + m_pMap[m_EpochIndex] * (1 + m_cOrder);
                pCurLPC[0] = 1.0f;
                LPCFilter( pCurLPC, &m_pSpeechBuf[m_cOutSamples_Frame], OutSize, totalGain );
                m_EpochIndex++;
            }
            else
            {
                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                //
                // Get next phon
                //
                //-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                hr = StartNewUnit( );
                if (FAILED(hr))
                {
                    //-----------------------------------
                    // Try to end it gracefully...
                    //-----------------------------------
                    m_SpeechState = SPEECH_DONE;
                }
				TTSDBG_LOGSILEPOCH;
            }
        }
        m_cOutSamples_Frame += OutSize;
        m_cOutSamples_Phon += OutSize;
        m_cOutSamples_Total += OutSize;

		TTSDBG_LOGEPOCHS;
    }
    while( (m_cOutSamples_Frame < SPEECH_FRAME_SIZE) && (m_SpeechState == SPEECH_CONTINUE) );
    
	if( SUCCEEDED(hr) )
	{
		//----------------------------------------------
		// Convert buffer from FLOAT to SHORT
		//----------------------------------------------
		if( m_pReverb )
		{
			//---------------------------------
			// Add REVERB
			//---------------------------------
			m_pReverb->Reverb_Process( m_pSpeechBuf, m_cOutSamples_Frame, 1.0f );
		}
		else
		{
			CvtToShort( m_pSpeechBuf, m_cOutSamples_Frame, m_StereoOut, 1.0f );
		}

        //--- Debug Macro - output wave data to stream
        TTSDBG_LOGWAVE;
	}
    
    if( SUCCEEDED( hr ) )
    {
        //------------------------------------
        // Send this buffer to SAPI site
        //------------------------------------
        DWORD   cbWritten;

		//------------------------------------------------------------------------------------
		// This was my lame hack to avoid sending buffers when nothing was spoken.
		// It was causing problems (among others) since StartNewUnit() was still sending
		// events - with no corresponding audio buffer! 
		//
		// This was too simple of a scheme. Disable this feature for now... 
		// ...until I come up with something more robust. (MC)
		//------------------------------------------------------------------------------------

		//if( m_HasSpeech )
		{
			hr = m_pOutputSite->Write( (void*)m_pSpeechBuf, 
									  m_cOutSamples_Frame * m_BytesPerSample, 
									  &cbWritten );
			if( FAILED( hr ) )
			{
				//----------------------------------------
				// Abort! Unable to write audio data
				//----------------------------------------
				m_SpeechState = SPEECH_DONE;
			}
		}
    }

    //------------------------------------
    // Return render state
    //------------------------------------
    return hr;
} /* CBackend::RenderFrame */