tf2/tf2_src/sfmobjects/sfmphonemeextractor.cpp


								//========= Copyright Valve Corporation, All rights reserved. ============//

								//

								// Purpose:

								//

								//=============================================================================


								#include "sfmobjects/SFMPhonemeExtractor.h"

								#include "tier2/riff.h"

								#include "PhonemeConverter.h"

								#include "filesystem.h"

								#include "tier1/utlbuffer.h"

								#include "sentence.h"

								#include "movieobjects/dmesound.h"

								#include "movieobjects/dmeanimationset.h"

								#include "movieobjects/dmebookmark.h"

								#include "movieobjects/dmeclip.h"

								#include "movieobjects/dmechannel.h"

								#include "soundchars.h"

								#include "tier2/p4helpers.h"

								#include "tier2/soundutils.h"

								#include "tier1/utldict.h"


								#include <windows.h>  // WAVEFORMATEX, WAVEFORMAT and ADPCM WAVEFORMAT!!!

								#include <mmreg.h>


								// memdbgon must be the last include file in a .cpp file!!!

								#include "tier0/memdbgon.h"


								static const char *s_pAttributeValueNames[LOG_PREVIEW_FLEX_CHANNEL_COUNT] =

								{

									"value",

									"balance",

									"multilevel"

								};


								static const char *s_pDefaultAttributeValueNames[LOG_PREVIEW_FLEX_CHANNEL_COUNT] =

								{

									"defaultValue",

									"defaultBalance",

									"defaultMultilevel"

								};


								struct Extractor

								{

									PE_APITYPE			apitype;

									CSysModule			*module;

									IPhonemeExtractor	*extractor;

								};


								//-----------------------------------------------------------------------------

								// Implementations of the phoneme extractor

								//-----------------------------------------------------------------------------

								class CSFMPhonemeExtractor : public ISFMPhonemeExtractor

								{

								public:

									CSFMPhonemeExtractor();


									// Inherited from ISFMPhonemeExtractor

									virtual bool Init();

									virtual void Shutdown();

									virtual int GetAPICount();

									virtual void GetAPIInfo( int index, CUtlString* pPrintName, PE_APITYPE *pAPIType );

									virtual void Extract( const PE_APITYPE& apiType, ExtractDesc_t& info, bool bWritePhonemesToWavFiles );

									virtual void ReApply( ExtractDesc_t& info );

									virtual bool GetSentence( CDmeGameSound *gameSound, CSentence& sentence );


								private:

									int FindExtractor( PE_APITYPE type );

									bool GetWaveFormat( const char *filename, CUtlBuffer* pFormat, int *pDataSize, CSentence& sentence, bool &bGotSentence );

									void LogPhonemes( int nItemIndex, ExtractDesc_t& info );

									void ClearInterstitialSpaces( CDmeChannelsClip *pChannelsClip, CUtlDict< LogPreview_t *, int >& controlLookup, ExtractDesc_t& info );


									void StampControlValueLogs( CDmePreset *preset, DmeTime_t tHeadPosition, float flIntensity, CUtlDict< LogPreview_t *, int > &controlLookup );

									void WriteCurrentValuesIntoLogLayers( DmeTime_t tHeadPosition, const CUtlDict< LogPreview_t *, int > &controlLookup );

									void WriteDefaultValuesIntoLogLayers( DmeTime_t tHeadPosition, const CUtlDict< LogPreview_t *, int > &controlLookup );

									void BuildPhonemeLogList( CUtlVector< LogPreview_t > &list, CUtlVector< CDmeLog * > &logs );

									CDmeChannelsClip* FindFacialChannelsClip( const CUtlVector< LogPreview_t > &list );

									void BuildPhonemeToPresetMapping( const CUtlVector< CBasePhonemeTag * > &stream, CDmeAnimationSet *pSet, CDmePresetGroup * pPresetGroup, CUtlDict< CDmePreset *, unsigned short > &phonemeToPresetDict );


									CUtlVector< Extractor >	m_Extractors;

									int m_nCurrentExtractor;

								};


								//-----------------------------------------------------------------------------

								// Singleton

								//-----------------------------------------------------------------------------

								static CSFMPhonemeExtractor g_ExtractorSingleton;

								ISFMPhonemeExtractor *sfm_phonemeextractor = &g_ExtractorSingleton;


								//-----------------------------------------------------------------------------

								// Constructor

								//-----------------------------------------------------------------------------

								CSFMPhonemeExtractor::CSFMPhonemeExtractor() : m_nCurrentExtractor( -1 )

								{

								}


								//-----------------------------------------------------------------------------

								// Init, shutdown

								//-----------------------------------------------------------------------------

								bool CSFMPhonemeExtractor::Init()

								{

									// Enumerate modules under bin folder of exe

									FileFindHandle_t findHandle;

									const char *pFilename = g_pFullFileSystem->FindFirstEx( "phonemeextractors/*.dll", "EXECUTABLE_PATH", &findHandle );

									while( pFilename )

									{

										char fullpath[ 512 ];

										Q_snprintf( fullpath, sizeof( fullpath ), "phonemeextractors/%s", pFilename );


										// Msg( "Loading extractor from %s\n", fullpath );


										Extractor e;

										e.module = g_pFullFileSystem->LoadModule( fullpath );

										if ( !e.module )

										{

											pFilename = g_pFullFileSystem->FindNext( findHandle );

											continue;

										}


										CreateInterfaceFn factory = Sys_GetFactory( e.module );

										if ( !factory )

										{

											pFilename = g_pFullFileSystem->FindNext( findHandle );

											continue;

										}


										e.extractor = ( IPhonemeExtractor * )factory( VPHONEME_EXTRACTOR_INTERFACE, NULL );

										if ( !e.extractor )

										{

											Warning( "Unable to get IPhonemeExtractor interface version %s from %s\n", VPHONEME_EXTRACTOR_INTERFACE, fullpath );

											pFilename = g_pFullFileSystem->FindNext( findHandle );

											continue;

										}


										e.apitype = e.extractor->GetAPIType();


										m_Extractors.AddToTail( e );

										pFilename = g_pFullFileSystem->FindNext( findHandle );

									}


									g_pFullFileSystem->FindClose( findHandle );

									return true;

								}


								void CSFMPhonemeExtractor::Shutdown()

								{

									int c = m_Extractors.Count();

									for ( int i = c - 1; i >= 0; i-- )

									{

										Extractor *e = &m_Extractors[ i ];

										g_pFullFileSystem->UnloadModule( e->module );

									}


									m_Extractors.RemoveAll();

								}


								//-----------------------------------------------------------------------------

								// Finds an extractor of a particular type

								//-----------------------------------------------------------------------------

								int CSFMPhonemeExtractor::FindExtractor( PE_APITYPE type )

								{

									for ( int i=0; i < m_Extractors.Count(); i++ )

									{

										if ( m_Extractors[i].apitype == type )

											return i;

									}

									return -1;

								}


								//-----------------------------------------------------------------------------

								// Iterates over extractors

								//-----------------------------------------------------------------------------

								int CSFMPhonemeExtractor::GetAPICount()

								{

									return m_Extractors.Count();

								}


								void CSFMPhonemeExtractor::GetAPIInfo( int index, CUtlString* pPrintName, PE_APITYPE *pAPIType )

								{

									Assert( pPrintName );

									Assert( pAPIType );

									pPrintName->Set( m_Extractors[ index ].extractor->GetName() );

									*pAPIType = m_Extractors[ index ].apitype;

								}


								static void ParseSentence( CSentence& sentence, IterateRIFF &walk )

								{

									CUtlBuffer buf( 0, 0, CUtlBuffer::TEXT_BUFFER );


									buf.EnsureCapacity( walk.ChunkSize() );

									walk.ChunkRead( buf.Base() );

									buf.SeekPut( CUtlBuffer::SEEK_HEAD, walk.ChunkSize() );


									sentence.InitFromDataChunk( buf.Base(), buf.TellPut() );

								}


								bool CSFMPhonemeExtractor::GetWaveFormat( const char *filename, CUtlBuffer *pBuf, int *pDataSize, CSentence& sentence, bool &bGotSentence )

								{

									InFileRIFF riff( filename, *g_pFSIOReadBinary );

									Assert( riff.RIFFName() == RIFF_WAVE );


									// set up the iterator for the whole file (root RIFF is a chunk)

									IterateRIFF walk( riff, riff.RIFFSize() );


									bool gotFmt = false;

									bool gotData = false;

									bGotSentence = false;


									// Walk input chunks and copy to output

									while ( walk.ChunkAvailable() )

									{

										switch ( walk.ChunkName() )

										{

										case WAVE_FMT:

											{

												pBuf->SeekPut( CUtlBuffer::SEEK_HEAD, walk.ChunkSize() );

												walk.ChunkRead( pBuf->Base() );

												gotFmt = true;

											}

											break;

										case WAVE_DATA:

											{

												*pDataSize = walk.ChunkSize();

												gotData = true;

											}

											break;

										case WAVE_VALVEDATA:

											{

												bGotSentence = true;

												ParseSentence( sentence, walk );

											}

											break;

										default:

											break;

										}


										// Done

										if ( gotFmt && gotData && bGotSentence )

											return true;


										walk.ChunkNext();

									}

									return ( gotFmt && gotData );

								}


								bool CSFMPhonemeExtractor::GetSentence( CDmeGameSound *gameSound, CSentence& sentence )

								{

									const char *filename = gameSound->m_SoundName.Get();

									Assert( filename && filename [ 0 ] );


									char soundname[ 512 ];

									// Note, calling PSkipSoundChars to remove any decorator characters used by the engine!!!

									Q_snprintf( soundname, sizeof( soundname ), "sound/%s", PSkipSoundChars( filename ) );

									Q_FixSlashes( soundname );


									char fullpath[ 512 ];

									g_pFullFileSystem->RelativePathToFullPath( soundname, "GAME", fullpath, sizeof( fullpath ) );


									// Get sound file metrics of interest

									CUtlBuffer buf;

									int nDataSize;

									bool bValidSentence = false;

									if ( !GetWaveFormat( soundname, &buf, &nDataSize, sentence, bValidSentence ) )

										return false;


									return bValidSentence;

								}


								static void BuildPhonemeStream( CSentence& in, CUtlVector< CBasePhonemeTag * >& list )

								{

									for ( int i = 0; i < in.m_Words.Count(); ++i )

									{

										CWordTag *w = in.m_Words[ i ];

										if ( !w )

											continue;


										for ( int j = 0; j < w->m_Phonemes.Count(); ++j )

										{

											CPhonemeTag *ph = w->m_Phonemes[ j ];

											if ( !ph )

												continue;


											CBasePhonemeTag *newTag = new CBasePhonemeTag( *ph );

											list.AddToTail( newTag );

										}

									}


									if ( !in.m_Words.Count() && in.m_RunTimePhonemes.Count() )

									{

										for ( int i = 0 ; i < in.m_RunTimePhonemes.Count(); ++i )

										{

											CBasePhonemeTag *newTag = new CBasePhonemeTag( *in.m_RunTimePhonemes[ i ] );

											list.AddToTail( newTag );

										}

									}

								}


								//-----------------------------------------------------------------------------

								// Purpose: Same the phoneme data into the sound files

								//-----------------------------------------------------------------------------

								static void StoreValveDataChunk( CSentence& sentence, IterateOutputRIFF& store )

								{

									// Buffer and dump data

									CUtlBuffer buf( 0, 0, CUtlBuffer::TEXT_BUFFER );


									sentence.SaveToBuffer( buf );


									// Copy into store

									store.ChunkWriteData( buf.Base(), buf.TellPut() );

								}


								static bool SaveSentenceToWavFile( const char *pWavFile, CSentence& sentence )

								{

									char pTempFile[ 512 ];


									Q_StripExtension( pWavFile, pTempFile, sizeof( pTempFile ) );

									Q_DefaultExtension( pTempFile, ".tmp", sizeof( pTempFile ) );


									if ( g_pFullFileSystem->FileExists( pTempFile, "GAME" ) )

									{

										g_pFullFileSystem->RemoveFile( pTempFile, "GAME" );

									}


									CP4AutoEditAddFile p4Checkout( pWavFile );

									if ( !g_pFullFileSystem->IsFileWritable( pWavFile ) )

									{

										Warning( "%s is not writable, can't save sentence data to file\n", pWavFile );

										return false;

									}


									// Rename original pWavFile to temp

									g_pFullFileSystem->RenameFile( pWavFile, pTempFile, "GAME" );


									// NOTE:  Put this in it's own scope so that the destructor for outfileRFF actually closes the file!!!!

									{

										// Read from Temp

										InFileRIFF riff( pTempFile, *g_pFSIOReadBinary );

										Assert( riff.RIFFName() == RIFF_WAVE );


										// set up the iterator for the whole file (root RIFF is a chunk)

										IterateRIFF walk( riff, riff.RIFFSize() );


										// And put data back into original pWavFile by name

										OutFileRIFF riffout( pWavFile, *g_pFSIOWriteBinary );


										IterateOutputRIFF store( riffout );


										bool bWordTrackWritten = false;


										// Walk input chunks and copy to output

										while ( walk.ChunkAvailable() )

										{

											store.ChunkStart( walk.ChunkName() );


											switch ( walk.ChunkName() )

											{

											case WAVE_VALVEDATA:

												{

													// Overwrite data

													StoreValveDataChunk( sentence, store );

													bWordTrackWritten = true;

												}

												break;

											default:

												store.CopyChunkData( walk );

												break;

											}


											store.ChunkFinish();


											walk.ChunkNext();

										}


										// If we didn't write it above, write it now

										if ( !bWordTrackWritten )

										{

											store.ChunkStart( WAVE_VALVEDATA );

											StoreValveDataChunk( sentence, store );

											store.ChunkFinish();

										}

									}


									// Remove temp file

									g_pFullFileSystem->RemoveFile( pTempFile, NULL );


									return true;

								}


								//-----------------------------------------------------------------------------

								// Main entry point for phoneme extraction

								//-----------------------------------------------------------------------------

								void CSFMPhonemeExtractor::Extract( const PE_APITYPE& apiType, ExtractDesc_t& info, bool bWritePhonemesToWavFiles )

								{

									if ( !info.m_pSet )

										return;


									int iExtractor = FindExtractor( apiType );

									if ( iExtractor == -1 )

										return;


									Extractor& extractor = m_Extractors[ iExtractor ];


									int nWorkItem;

									for ( nWorkItem = 0; nWorkItem < info.m_WorkList.Count(); ++nWorkItem )

									{

										CExtractInfo& workItem = info.m_WorkList[ nWorkItem ];


										workItem.m_flDuration = 0.0f;


										CSentence in;

										CSentence out;

										in.SetText( workItem.m_sHintText.String() );

										out.SetText( workItem.m_sHintText.String() );


										const char *pFileName = workItem.m_pSound->m_SoundName.Get();

										Assert( pFileName && pFileName [ 0 ] );


										char pSoundName[ 512 ];

										// Note, calling PSkipSoundChars to remove any decorator characters used by the engine!!!

										Q_snprintf( pSoundName, sizeof( pSoundName ), "sound/%s", PSkipSoundChars( pFileName ) );

										Q_FixSlashes( pSoundName );


										char pFullPath[ 512 ];

										g_pFullFileSystem->RelativePathToFullPath( pSoundName, "GAME", pFullPath, sizeof( pFullPath ) );


										// Get sound file metrics of interest

										CUtlBuffer buf;

										WAVEFORMATEX *format;

										int nDataSize;

										if ( !GetWaveFormat( pSoundName, &buf, &nDataSize, workItem.m_Sentence, workItem.m_bSentenceValid ) )

											continue;


										format = ( WAVEFORMATEX * )buf.Base();


										if ( !( format->wBitsPerSample > ( 1 << 3 ) ) )

										{

											// Have to warn and early-out here to avoid crashing with "integer divide by zero" below

											Warning( "Cannot extract phonemes from '%s', %u bits per sample.\n", pSoundName, format->wBitsPerSample );

											continue;

										}


										int nBitsPerSample = format->wBitsPerSample;

										float flSampleRate = (float)format->nSamplesPerSec;

										int nChannels = format->nChannels;

										int nSampleCount = nDataSize / ( nBitsPerSample >> 3 );


										float flTrueSampleSize = ( nBitsPerSample * nChannels ) >> 3;

										if ( format->wFormatTag == WAVE_FORMAT_ADPCM )

										{

											nBitsPerSample = 16;

											flTrueSampleSize = 0.5f;


											ADPCMWAVEFORMAT *pFormat = (ADPCMWAVEFORMAT *)buf.Base();

											int blockSize = ((pFormat->wSamplesPerBlock - 2) * pFormat->wfx.nChannels ) / 2;

											blockSize += 7 * pFormat->wfx.nChannels;


											int blockCount = nDataSize / blockSize;

											int blockRem = nDataSize % blockSize;


											// total samples in complete blocks

											nSampleCount = blockCount * pFormat->wSamplesPerBlock;


											// add remaining in a short block

											if ( blockRem )

											{

												nSampleCount += pFormat->wSamplesPerBlock - (((blockSize - blockRem) * 2) / nChannels);

											}

										}


										if ( flSampleRate > 0.0f )

										{

											workItem.m_flDuration = (float)nSampleCount / flSampleRate;

										}

										in.CreateEventWordDistribution( workItem.m_sHintText.String(), workItem.m_flDuration );

										if ( !workItem.m_bUseSentence || !workItem.m_bSentenceValid )

										{

											extractor.extractor->Extract( pFullPath,

												(int)( workItem.m_flDuration * flSampleRate * flTrueSampleSize ),

												Msg, in, out );


											// Tracker 57389:

											// Total hack to fix a bug where the Lipsinc extractor is messing up the # channels on 16 bit stereo waves

											if ( apiType == SPEECH_API_LIPSINC && nChannels == 2 && nBitsPerSample == 16 )

											{

												flTrueSampleSize *= 2.0f;

											}


											float bytespersecond = flSampleRate * flTrueSampleSize;


											int i;

											// Now convert byte offsets to times

											for ( i = 0; i < out.m_Words.Size(); i++ )

											{

												CWordTag *tag = out.m_Words[ i ];

												Assert( tag );

												if ( !tag )

													continue;


												tag->m_flStartTime = ( float )(tag->m_uiStartByte ) / bytespersecond;

												tag->m_flEndTime = ( float )(tag->m_uiEndByte ) / bytespersecond;


												for ( int j = 0; j < tag->m_Phonemes.Size(); j++ )

												{

													CPhonemeTag *ptag = tag->m_Phonemes[ j ];

													Assert( ptag );

													if ( !ptag )

														continue;


													ptag->SetStartTime( ( float )(ptag->m_uiStartByte ) / bytespersecond );

													ptag->SetEndTime( ( float )(ptag->m_uiEndByte ) / bytespersecond );

												}

											}


											if ( bWritePhonemesToWavFiles )

											{

												SaveSentenceToWavFile( pFullPath, out );

											}

										}

										else

										{

											Msg( "Using .wav file phonemes for (%s)\n", pSoundName );

											out = workItem.m_Sentence;

										}


										// Now create channel data

										workItem.ClearTags();

										BuildPhonemeStream( out, workItem.m_ApplyTags );

									}


									if ( info.m_bCreateBookmarks )

									{

										info.m_pSet->GetBookmarks().RemoveAll();

									}


									for ( nWorkItem = 0; nWorkItem < info.m_WorkList.Count(); ++nWorkItem )

									{

										LogPhonemes( nWorkItem, info );

									}

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								static bool UniquePhonemeLessFunc( CBasePhonemeTag * const & lhs, CBasePhonemeTag * const & rhs )

								{

									return lhs->GetPhonemeCode() < rhs->GetPhonemeCode();

								}


								void CSFMPhonemeExtractor::BuildPhonemeToPresetMapping( const CUtlVector< CBasePhonemeTag * > &stream,

																					   CDmeAnimationSet *pSet, CDmePresetGroup *pPresetGroup, CUtlDict< CDmePreset *, unsigned short > &phonemeToPresetDict )

								{

									int i;

									CUtlRBTree< CBasePhonemeTag * > uniquePhonemes( 0, 0, UniquePhonemeLessFunc );

									for ( i = 0; i < stream.Count(); ++i )

									{

										CBasePhonemeTag *tag = stream[ i ];

										if ( uniquePhonemes.Find( tag ) == uniquePhonemes.InvalidIndex() )

										{

											uniquePhonemes.Insert( tag );

										}

									}


									for ( i = uniquePhonemes.FirstInorder(); i != uniquePhonemes.InvalidIndex(); i = uniquePhonemes.NextInorder( i ) )

									{

										CBasePhonemeTag *tag = uniquePhonemes[ i ];

										// Convert phoneme code to text

										char ph[ 32 ];

										Q_strncpy( ph, ConvertPhoneme( tag->GetPhonemeCode() ), sizeof( ph ) );


										char remappedph[ 32 ];

										// By default we search for a preset name p_xxx where xxx is the phoneme string

										Q_snprintf( remappedph, sizeof( remappedph ), "p_%s", ph );

										// Now find the preset in the animation set converter

										CDmePhonemeMapping *mapping = pSet->FindMapping( ph );

										if ( mapping )

										{

											Q_strncpy( remappedph, mapping->GetValueString( "preset" ), sizeof( remappedph ) );

										}


										// Now look up the preset, if it exists

										CDmePreset *preset = pPresetGroup->FindPreset( remappedph );

										if ( !preset )

										{

											Warning( "Animation set '%s' missing phoneme preset for '%s' -> '%s'\n",

												pSet->GetName(), ph, remappedph );

											continue;

										}


										// Add to dictionary if it's not already there

										if ( phonemeToPresetDict.Find( ph ) == phonemeToPresetDict.InvalidIndex() )

										{

											phonemeToPresetDict.Insert( ph, preset );

										}

									}

								}


								//-----------------------------------------------------------------------------

								// Finds the channels clip which refers to facial control values

								//-----------------------------------------------------------------------------

								CDmeChannelsClip* CSFMPhonemeExtractor::FindFacialChannelsClip( const CUtlVector< LogPreview_t > &list )

								{

									CDmeChannelsClip *pChannelsClip = NULL;


									int i;

									for ( i = list.Count() - 1; i >= 0; --i )

									{

										const LogPreview_t &lp = list[i];

										CDmeChannelsClip *check = FindAncestorReferencingElement< CDmeChannelsClip >( (CDmElement *)lp.m_hChannels[ 0 ].Get() );


										if ( !pChannelsClip && check )

										{

											pChannelsClip = check;

										}

										else

										{

											if ( pChannelsClip != check )

											{

												Warning( "Selected controls overlap multiple channels clips!!!\n" );

											}

										}

									}


									if ( !pChannelsClip )

									{

										Warning( "Unable to determine destination channels clip!!!\n" );

									}


									return pChannelsClip;

								}


								//-----------------------------------------------------------------------------

								// Builds the list of logs which target facial control values

								//-----------------------------------------------------------------------------

								void CSFMPhonemeExtractor::BuildPhonemeLogList( CUtlVector< LogPreview_t > &list, CUtlVector< CDmeLog * > &logs )

								{

									for ( int i = 0; i < list.Count(); ++i )

									{

										LogPreview_t& p = list[ i ];


										for ( int channel = 0; channel < LOG_PREVIEW_FLEX_CHANNEL_COUNT; ++channel )

										{

											CDmeChannel *ch = p.m_hChannels[ channel ];

											if ( !ch )

												continue;


											CDmeLog *log = p.m_hChannels[ channel ]->GetLog();

											if ( !log )

												continue;


											logs.AddToTail( log );

										}

									}

								}


								//-----------------------------------------------------------------------------

								// Writes default values into all log layers targetting facial control values

								//-----------------------------------------------------------------------------

								void CSFMPhonemeExtractor::WriteDefaultValuesIntoLogLayers( DmeTime_t tHeadPosition, const CUtlDict< LogPreview_t *, int > &controlLookup )

								{

									// Write a zero into all relevant log layers

									for ( int j = controlLookup.First(); j != controlLookup.InvalidIndex(); j = controlLookup.Next( j ) )

									{

										LogPreview_t* lp = controlLookup[ j ];


										CDmElement *pControl = lp->m_hControl;


										for ( int chIndex = 0; chIndex < LOG_PREVIEW_FLEX_CHANNEL_COUNT; ++chIndex )

										{

											CDmeChannel *pChannel = lp->m_hChannels[ chIndex ];

											if ( !pChannel )

												continue;


											// Now get the log for the channel

											CDmeFloatLog *pFloatLog = CastElement< CDmeFloatLog >( pChannel->GetLog() );

											if ( !pFloatLog )

												continue;


											CDmeFloatLogLayer *pLayer = pFloatLog->GetLayer( pFloatLog->GetTopmostLayer() );

											if ( !pLayer )

												continue;


											float flDefaultValue = pControl->GetValue< float >( s_pDefaultAttributeValueNames[chIndex] );

											pLayer->InsertKey( tHeadPosition, flDefaultValue );

										}

									}

								}


								//-----------------------------------------------------------------------------

								// Creates a new log key based on the interpolated value at that time

								//-----------------------------------------------------------------------------

								void CSFMPhonemeExtractor::WriteCurrentValuesIntoLogLayers( DmeTime_t tHeadPosition, const CUtlDict< LogPreview_t *, int > &controlLookup )

								{

									// Write a zero into all relevant log layers

									for ( int j = controlLookup.First(); j != controlLookup.InvalidIndex(); j = controlLookup.Next( j ) )

									{

										LogPreview_t* lp = controlLookup[ j ];


										for ( int chIndex = 0; chIndex < LOG_PREVIEW_FLEX_CHANNEL_COUNT; ++chIndex )

										{

											CDmeChannel *pChannel = lp->m_hChannels[ chIndex ];

											if ( !pChannel )

												continue;


											// Now get the log for the channel

											CDmeFloatLog *pFloatLog = CastElement< CDmeFloatLog >( pChannel->GetLog() );

											if ( !pFloatLog )

												continue;


											CDmeFloatLogLayer *pLayer = pFloatLog->GetLayer( pFloatLog->GetTopmostLayer() );

											if ( !pLayer )

												continue;


											float flCurrentValue = pLayer->GetValue( tHeadPosition );

											pLayer->InsertKey( tHeadPosition, flCurrentValue );

										}

									}

								}


								//-----------------------------------------------------------------------------

								// Samples extracted phoneme data and stamps that values into control value logs

								//-----------------------------------------------------------------------------

								void CSFMPhonemeExtractor::StampControlValueLogs( CDmePreset *preset, DmeTime_t tHeadPosition, float flIntensity, CUtlDict< LogPreview_t *, int > &controlLookup )

								{

									// Now walk the logs required by the preset

									const CDmrElementArray< CDmElement > &controlValues = preset->GetControlValues( );

									for ( int j = 0; j < controlValues.Count(); ++j )

									{

										// This control contains the preset value

										CDmElement *presetControl = controlValues[ j ];

										if ( !presetControl )

											continue;


										int visIndex = controlLookup.Find( presetControl->GetName() );

										if ( visIndex == controlLookup.InvalidIndex() )

											continue;


										LogPreview_t* lp = controlLookup[ visIndex ];


										for ( int chIndex = 0; chIndex < LOG_PREVIEW_FLEX_CHANNEL_COUNT; ++chIndex )

										{

											CDmeChannel *ch = lp->m_hChannels[ chIndex ];

											if ( !ch )

												continue;


											// Whereas this control contains the "default" value for the slider (since the presetControl won't have that value)

											CDmElement *defaultValueControl = lp->m_hControl.Get();

											if ( !defaultValueControl )

												continue;


											// Now get the log for the channel

											CDmeLog *log = ch->GetLog();

											if ( !log )

											{

												Assert( 0 );

												continue;

											}


											CDmeFloatLog *floatLog = CastElement< CDmeFloatLog >( log );

											if ( !floatLog )

												continue;


											CDmeFloatLogLayer *pLayer = floatLog->GetLayer( floatLog->GetTopmostLayer() );

											if ( !pLayer )

												continue;


											float flDefault = defaultValueControl->GetValue< float >( s_pDefaultAttributeValueNames[chIndex] );

											float flControlValue = presetControl->GetValue< float >( s_pAttributeValueNames[ chIndex ] );

											float flNewValue = flIntensity * ( flControlValue - flDefault );

											float flCurrent = pLayer->GetValue( tHeadPosition ) - flDefault;

											// Accumulate new value into topmost layer

											pLayer->InsertKey( tHeadPosition, flCurrent + flNewValue + flDefault );

										}

									}

								}


								void CSFMPhonemeExtractor::ClearInterstitialSpaces( CDmeChannelsClip *pChannelsClip, CUtlDict< LogPreview_t *, int >& controlLookup, ExtractDesc_t& info )

								{

									Assert( info.m_pShot );

									Assert( pChannelsClip );


									if ( info.m_WorkList.Count() == 0 )

										return;


									// This is handled by the main layering code...

									if ( info.m_nExtractType == EXTRACT_WIPE_SOUNDS )

										return;


									// Now walk through all relevant logs

									CUtlVector< CDmeLog * > logs;

									BuildPhonemeLogList( info.m_ControlList, logs );


									DmeTime_t tMinTime( DMETIME_MAXTIME );

									DmeTime_t tMaxTime( DMETIME_MINTIME );


									int i;

									// Walk work items and figure out time bounds

									for ( i = 0; i < info.m_WorkList.Count(); ++i )

									{

										CExtractInfo &item = info.m_WorkList[ i ];


										CUtlVector< CDmeHandle< CDmeClip > > srcStack;

										CUtlVector< CDmeHandle< CDmeClip > > dstStack;


										// Convert original .wav start to animation set channels clip relative time

										item.m_pClip->BuildClipStack( &srcStack, info.m_pMovie, info.m_pShot );


										// NOTE: Time bounds measured in sound media time goes from 0 -> flWaveDuration

										DmeTime_t tSoundMediaStartTime = CDmeClip::FromChildMediaTime( srcStack, DMETIME_ZERO, false );

										DmeTime_t tSoundMediaEndTime   = CDmeClip::FromChildMediaTime( srcStack, DmeTime_t( item.m_flDuration ), false );


										// NOTE: Start and end time are measured in sound media time

										DmeTime_t tStartTime = item.m_pClip->GetStartInChildMediaTime();

										DmeTime_t tEndTime   = item.m_pClip->GetEndInChildMediaTime();


										// And convert back down into channels clip relative time

										pChannelsClip->BuildClipStack( &dstStack, info.m_pMovie, info.m_pShot );


										// Now convert back down to channels clip relative time

										DmeTime_t tChannelMediaStartTime = CDmeClip::ToChildMediaTime( dstStack, tSoundMediaStartTime, false );

										DmeTime_t tChannelMediaEndTime = CDmeClip::ToChildMediaTime( dstStack, tSoundMediaEndTime, false );


										// Find a scale + offset which transforms data in media space of the sound [namely, the phonemes]

										// into the media space of the channels [the logs that drive the facial animation]

										DmeTime_t tEndDuration = tChannelMediaEndTime - tChannelMediaStartTime;

										double flScale = ( item.m_flDuration != 0.0f ) ? tEndDuration.GetSeconds() / item.m_flDuration : 0.0f;

										DmeTime_t tOffset = tChannelMediaStartTime;


										DmeTime_t tChannelRelativeStartTime( tStartTime * flScale );

										tChannelRelativeStartTime += tOffset;

										DmeTime_t tChannelRelativeEndTime( tEndTime * flScale );

										tChannelRelativeEndTime += tOffset;


										if ( tChannelRelativeStartTime < tMinTime )

										{

											tMinTime = tChannelRelativeStartTime;

										}

										if ( tChannelRelativeEndTime > tMaxTime )

										{

											tMaxTime = tChannelRelativeEndTime;

										}

									}


									// Bloat by one quantum

									tMinTime -= DMETIME_MINDELTA;

									tMaxTime += DMETIME_MINDELTA;


									for ( i = 0; i < logs.Count(); ++i )

									{

										CDmeLog *log = logs[ i ];


										Assert( log->GetNumLayers() == 1 );

										CDmeLogLayer *layer = log->GetLayer( log->GetTopmostLayer() );


										if ( info.m_nExtractType == EXTRACT_WIPE_RANGE )

										{

											// Write default value keys into log

											// Write a default value at that time

											WriteDefaultValuesIntoLogLayers( tMinTime, controlLookup );


											// Write a default value at that time

											WriteDefaultValuesIntoLogLayers( tMaxTime, controlLookup );


											// Now discard all keys > tMinTime and < tMaxTime

											for ( int j = layer->GetKeyCount() - 1; j >= 0; --j )

											{

												DmeTime_t &t = layer->GetKeyTime( j );

												if ( t <= tMinTime )

													continue;

												if ( t >= tMaxTime )

													continue;


												layer->RemoveKey( j );

											}

										}

										else

										{

											Assert( info.m_nExtractType == EXTRACT_WIPE_CLIP );

											layer->ClearKeys();

										}

									}

								}


								void AddAnimSetBookmarkAtSoundMediaTime( const char *pName, DmeTime_t tStart, DmeTime_t tEnd, const CUtlVector< CDmeHandle< CDmeClip > > &srcStack, ExtractDesc_t& info )

								{

									tStart = CDmeClip::FromChildMediaTime( srcStack, tStart, false );

									tEnd   = CDmeClip::FromChildMediaTime( srcStack, tEnd, false );


									tStart = info.m_pShot->ToChildMediaTime( tStart, false );

									tEnd   = info.m_pShot->ToChildMediaTime( tEnd, false );


									CDmeBookmark *pBookmark = CreateElement< CDmeBookmark >( pName );

									pBookmark->SetNote( pName );

									pBookmark->SetTime( tStart );

									pBookmark->SetDuration( tEnd - tStart );

									info.m_pSet->GetBookmarks().AddToTail( pBookmark );

								}


								//-----------------------------------------------------------------------------

								// Main entry point for generating phoneme logs

								//-----------------------------------------------------------------------------

								void CSFMPhonemeExtractor::LogPhonemes( int nItemIndex,	ExtractDesc_t& info )

								{

									CExtractInfo &item = info.m_WorkList[ nItemIndex ];


									// Validate input parameters

									Assert( info.m_pSet && item.m_pClip && item.m_pSound );

									if ( !info.m_pSet || !item.m_pClip || !item.m_pSound )

										return;


									CDmePresetGroup *pPresetGroup = info.m_pSet->FindPresetGroup( "phoneme" );

									if ( !pPresetGroup )

									{

										Warning( "Animation set '%s' missing preset group 'phoneme'\n", info.m_pSet->GetName() );

										return;

									}


									if ( !info.m_pSet->GetPhonemeMap().Count() )

									{

										info.m_pSet->RestoreDefaultPhonemeMap();

									}


									// Walk through phoneme stack and build list of unique presets

									CUtlDict< CDmePreset *, unsigned short > phonemeToPresetDict;

									BuildPhonemeToPresetMapping( item.m_ApplyTags, info.m_pSet, pPresetGroup, phonemeToPresetDict );


									CDmeChannelsClip *pChannelsClip = FindFacialChannelsClip( info.m_ControlList );

									if ( !pChannelsClip )

										return;


									// Build a fast lookup of the visible sliders

									int i;

									CUtlDict< LogPreview_t *, int > controlLookup;

									for ( i = 0; i < info.m_ControlList.Count(); ++i )

									{

										controlLookup.Insert( info.m_ControlList[ i ].m_hControl->GetName(), &info.m_ControlList[ i ] );

									}


									// Only need to do this on the first item and we have multiple .wavs selected

									if ( nItemIndex == 0 && info.m_WorkList.Count() > 1 )

									{

										ClearInterstitialSpaces( pChannelsClip, controlLookup, info );

									}


									// Set up time selection, put channels into record and stamp out keyframes


									// Convert original .wav start to animation set channels clip relative time

									CUtlVector< CDmeHandle< CDmeClip > > srcStack;

									item.m_pClip->BuildClipStack( &srcStack, info.m_pMovie, info.m_pShot );

									if ( srcStack.Count() == 0 )

									{

										item.m_pClip->BuildClipStack( &srcStack, info.m_pMovie, NULL );

										if ( srcStack.Count() == 0 )

										{

											Msg( "Couldn't build stack sound clip to current shot\n" );

											return;

										}

									}


									// NOTE: Time bounds measured in sound media time goes from 0 -> flWaveDuration

									DmeTime_t tSoundMediaStartTime = CDmeClip::FromChildMediaTime( srcStack, DMETIME_ZERO, false );

									DmeTime_t tSoundMediaEndTime   = CDmeClip::FromChildMediaTime( srcStack, DmeTime_t( item.m_flDuration ), false );


									// NOTE: Start and end time are measured in sound media time

									DmeTime_t tStartTime = item.m_pClip->GetStartInChildMediaTime();

									DmeTime_t tEndTime   = item.m_pClip->GetEndInChildMediaTime();


									// And convert back down into channels clip relative time

									CUtlVector< CDmeHandle< CDmeClip > > dstStack;

									pChannelsClip->BuildClipStack( &dstStack, info.m_pMovie, info.m_pShot );


									// Now convert back down to channels clip relative time

									DmeTime_t tChannelMediaStartTime = CDmeClip::ToChildMediaTime( dstStack, tSoundMediaStartTime, false );

									DmeTime_t tChannelMediaEndTime   = CDmeClip::ToChildMediaTime( dstStack, tSoundMediaEndTime, false );


									// Find a scale + offset which transforms data in media space of the sound [namely, the phonemes]

									// into the media space of the channels [the logs that drive the facial animation]

									DmeTime_t tEndDuration = tChannelMediaEndTime - tChannelMediaStartTime;

									double flScale = ( item.m_flDuration != 0.0f ) ? tEndDuration.GetSeconds() / item.m_flDuration : 0.0f;

									DmeTime_t tOffset = tChannelMediaStartTime;


									CUtlVector< CDmeLog * > logs;

									BuildPhonemeLogList( info.m_ControlList, logs );


									// Add new write layer to each recording log

									for ( i = 0; i < logs.Count(); ++i )

									{

										logs[ i ]->AddNewLayer();

									}


									// Iterate over the entire range of the sound

									double flStartSoundTime = max( 0, tStartTime.GetSeconds() );

									double flEndSoundTime = min( item.m_flDuration, tEndTime.GetSeconds() );


									// Stamp keys right before and after the sound so as to

									// not generate new values outside the import time range

									DmeTime_t tPrePhonemeTime( flStartSoundTime * flScale );

									tPrePhonemeTime += tOffset - DMETIME_MINDELTA;

									WriteCurrentValuesIntoLogLayers( tPrePhonemeTime, controlLookup );


									DmeTime_t tPostPhonemeTime( flEndSoundTime * flScale );

									tPostPhonemeTime += tOffset + DMETIME_MINDELTA;

									WriteCurrentValuesIntoLogLayers( tPostPhonemeTime, controlLookup );


									// add bookmarks

									if ( info.m_bCreateBookmarks )

									{

										AddAnimSetBookmarkAtSoundMediaTime( "start", tPrePhonemeTime, tPrePhonemeTime, srcStack, info );


										for ( i = 0; i < item.m_ApplyTags.Count() ; ++i )

										{

											CBasePhonemeTag *p = item.m_ApplyTags[ i ];

											const char *pPhonemeName = ConvertPhoneme( p->GetPhonemeCode() );

											DmeTime_t tStart = DmeTime_t( p->GetStartTime() );

											DmeTime_t tEnd   = DmeTime_t( p->GetEndTime() );

											AddAnimSetBookmarkAtSoundMediaTime( pPhonemeName, tStart, tEnd, srcStack, info );

										}


										AddAnimSetBookmarkAtSoundMediaTime( "end", tPostPhonemeTime, tPostPhonemeTime, srcStack, info );

									}


									if ( info.m_nFilterType == EXTRACT_FILTER_HOLD || info.m_nFilterType == EXTRACT_FILTER_LINEAR )

									{

										CDmePreset *pLastPreset = NULL;


										for ( i = 0; i < item.m_ApplyTags.Count() ; ++i )

										{

											CBasePhonemeTag *p = item.m_ApplyTags[ i ];


											DmeTime_t tStart = DmeTime_t( p->GetStartTime() );

											DmeTime_t tEnd   = DmeTime_t( p->GetEndTime() );


											int idx = phonemeToPresetDict.Find( ConvertPhoneme( p->GetPhonemeCode() ) );

											if ( idx == phonemeToPresetDict.InvalidIndex() )

												continue;


											CDmePreset *preset = phonemeToPresetDict[ idx ];

											if ( !preset )

												continue;


											DmeTime_t tKeyTime = tStart * flScale + tOffset;


											if ( info.m_nFilterType == EXTRACT_FILTER_HOLD )

											{

												// stamp value at end of phoneme (or default prior to first phoneme)

												// NOTE - this ignores phoneme length, but since all phonemes directly abut one another, this doesn't matter

												DmeTime_t tLastEnd = tKeyTime - DMETIME_MINDELTA;

												if ( tLastEnd > tPrePhonemeTime )

												{

													WriteDefaultValuesIntoLogLayers( tKeyTime - DMETIME_MINDELTA, controlLookup );

													if ( pLastPreset )

													{

														StampControlValueLogs( pLastPreset, tKeyTime - DMETIME_MINDELTA, 1.0f, controlLookup );

													}

												}

												pLastPreset = preset;

											}


											WriteDefaultValuesIntoLogLayers( tKeyTime, controlLookup );

											StampControlValueLogs( preset, tKeyTime, 1.0f, controlLookup );


											if ( info.m_nFilterType == EXTRACT_FILTER_HOLD && i == item.m_ApplyTags.Count() - 1 )

											{

												// stamp value at end of last phoneme

												tKeyTime = tEnd * flScale + tOffset;

												tKeyTime = min( tKeyTime, tPostPhonemeTime );

												WriteDefaultValuesIntoLogLayers( tKeyTime - DMETIME_MINDELTA, controlLookup );

												StampControlValueLogs( preset, tKeyTime - DMETIME_MINDELTA, 1.0f, controlLookup );


												// stamp default just after end of last phoneme to hold silence until tPostPhonemeTime

												WriteDefaultValuesIntoLogLayers( tKeyTime, controlLookup );

											}

										}

									}

									else

									{

										Assert( info.m_nFilterType == EXTRACT_FILTER_FIXED_WIDTH );


										double tStep = 1.0 / (double)clamp( info.m_flSampleRateHz, 1.0f, 1000.0f );


										float flFilter = max( info.m_flSampleFilterSize, 0.001f );

										float flOOFilter = 1.0f / flFilter;


										for ( double t = flStartSoundTime; t < flEndSoundTime; t += tStep )

										{

											DmeTime_t tPhonemeTime( t );


											// Determine the location of the sample in the channels clip

											DmeTime_t tKeyTime( t * flScale );

											tKeyTime += tOffset;


											// Write a default value at that time

											WriteDefaultValuesIntoLogLayers( tKeyTime, controlLookup );


											// Walk phonemes...

											for ( i = 0; i < item.m_ApplyTags.Count() ; ++i )

											{

												CBasePhonemeTag *p = item.m_ApplyTags[ i ];


												DmeTime_t tStart = DmeTime_t( p->GetStartTime() );

												DmeTime_t tEnd = DmeTime_t( p->GetEndTime() );


												bool bContinue = false;

												float flI = 0.0f;

												{

													DmeTime_t tFilter( flFilter );

													if ( tStart >= tPhonemeTime + tFilter || tEnd <= tPhonemeTime )

														bContinue = true;


													tStart = max( tStart, tPhonemeTime );

													tEnd = min( tEnd, tPhonemeTime + tFilter );


													flI = ( tEnd - tStart ).GetSeconds() * flOOFilter;

												}


												DmeTime_t dStart = tStart - tPhonemeTime;

												DmeTime_t dEnd = tEnd - tPhonemeTime;


												float t1 = dStart.GetSeconds() * flOOFilter;

												float t2 = dEnd.GetSeconds() * flOOFilter;


												Assert( bContinue == !( t1 < 1.0f && t2 > 0.0f ) );

												if ( !( t1 < 1.0f && t2 > 0.0f ) )

													continue;


												if ( t2 > 1 )

												{

													t2 = 1;

												}

												if ( t1 < 0 )

												{

													t1 = 0;

												}


												float flIntensity = ( t2 - t1 );

												Assert( fabs( flI - flIntensity ) < 0.000001f );


												int idx = phonemeToPresetDict.Find( ConvertPhoneme( p->GetPhonemeCode() ) );

												if ( idx == phonemeToPresetDict.InvalidIndex() )

													continue;


												CDmePreset *preset = phonemeToPresetDict[ idx ];

												if ( !preset )

													continue;


												StampControlValueLogs( preset, tKeyTime, flIntensity, controlLookup );

											}

										}

									}


									// Flatten write layers

									for ( i = 0; i < logs.Count(); ++i )

									{

										logs[ i ]->FlattenLayers( DMELOG_DEFAULT_THRESHHOLD, CDmeLog::FLATTEN_NODISCONTINUITY_FIXUP );

									}

								}


								void CSFMPhonemeExtractor::ReApply( ExtractDesc_t& info )

								{

									if ( info.m_bCreateBookmarks )

									{

										info.m_pSet->GetBookmarks().RemoveAll();

									}


									for ( int nWorkItem = 0; nWorkItem < info.m_WorkList.Count(); ++nWorkItem )

									{

										LogPhonemes( nWorkItem, info );

									}

								}