|
|
//========= Copyright � 1996-2005, Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//=============================================================================//
#include <stdio.h>
#include <stdarg.h>
#include <memory.h>
#include <windows.h>
#include <mmsystem.h>
#include <mmreg.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "phonemeextractor/PhonemeExtractor.h"
#include "ims_helper/ims_helper.h"
#include "tier0/dbg.h"
#include "sentence.h"
#include "PhonemeConverter.h"
#include "tier1/strtools.h"
#define TEXTLESS_WORDNAME "[Textless]"
static IImsHelper *talkback = NULL;
//-----------------------------------------------------------------------------
// Purpose: Expose the interface
//-----------------------------------------------------------------------------
class CPhonemeExtractorLipSinc : public IPhonemeExtractor { public: virtual PE_APITYPE GetAPIType() const { return SPEECH_API_LIPSINC; }
// Used for menus, etc
virtual char const *GetName() const { return "IMS (LipSinc)"; }
SR_RESULT Extract( const char *wavfile, int numsamples, void (*pfnPrint)( const char *fmt, ... ), CSentence& inwords, CSentence& outwords );
CPhonemeExtractorLipSinc( void ); ~CPhonemeExtractorLipSinc( void );
enum { MAX_WORD_LENGTH = 128, }; private:
class CAnalyzedWord { public: char buffer[ MAX_WORD_LENGTH ]; double starttime; double endtime; };
class CAnalyzedPhoneme { public: char phoneme[ 32 ]; double starttime; double endtime; };
bool InitLipSinc( void ); void ShutdownLipSinc( void );
void DescribeError( TALKBACK_ERR err ); void Printf( char const *fmt, ... );
bool CheckSoundFile( char const *filename ); bool GetInitialized( void ); void SetInitialized( bool init );
void (*m_pfnPrint)( const char *fmt, ... );
char const *ConstructInputSentence( CSentence& inwords ); bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
char const *ApplyTBWordRules( char const *word );
void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
int ComputeByteFromTime( float time );
bool m_bInitialized;
float m_flSampleCount; float m_flDuration;
float m_flSamplesPerSecond;
int m_nBytesPerSample;
HMODULE m_hHelper; };
CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) { m_hHelper = (HMODULE)0; m_pfnPrint = NULL;
m_bInitialized = false; m_flSampleCount = 0.0f; m_flDuration = 0.0f;
m_flSamplesPerSecond = 0.0f;
m_nBytesPerSample = 0; }
CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) { if ( GetInitialized() ) { ShutdownLipSinc(); } }
bool CPhonemeExtractorLipSinc::GetInitialized( void ) { return m_bInitialized; }
void CPhonemeExtractorLipSinc::SetInitialized( bool init ) { m_bInitialized = init; }
int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) { if ( !m_flDuration ) return 0;
float frac = time / m_flDuration;
float sampleNumber = frac * m_flSampleCount;
int bytenumber = sampleNumber * m_nBytesPerSample;
return bytenumber; }
void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) { Assert( m_pfnPrint );
// Get the error description.
char errorDesc[256] = ""; if ( err != TALKBACK_NOERR ) { talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); } // Report or log the error...
(*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); }
//-----------------------------------------------------------------------------
// Purpose:
// Input : *fmt -
// .. -
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) { Assert( m_pfnPrint );
char string[ 4096 ];
va_list argptr; va_start( argptr, fmt ); vsprintf( string, fmt, argptr ); va_end( argptr );
(*m_pfnPrint)( "%s", string ); }
bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) { TALKBACK_SOUND_FILE_METRICS fm; memset( &fm, 0, sizeof( fm ) ); fm.m_size = sizeof( fm );
TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return false; }
if ( fm.m_canBeAnalyzed ) { Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", filename, fm.m_duration, fm.m_sampleRate, fm.m_bitsPerSample, fm.m_channelCount ); }
m_flDuration = fm.m_duration; if ( m_flDuration > 0 ) { m_flSamplesPerSecond = m_flSampleCount / m_flDuration; } else { m_flSamplesPerSecond = 0.0f; }
m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
m_flSampleCount /= m_nBytesPerSample;
m_nBytesPerSample /= fm.m_channelCount;
return fm.m_canBeAnalyzed ? true : false; }
typedef IImsHelper *(*pfnImsHelper)(void);
//-----------------------------------------------------------------------------
// Purpose:
// Output : Returns true on success, false on failure.
//-----------------------------------------------------------------------------
bool CPhonemeExtractorLipSinc::InitLipSinc( void ) { if ( GetInitialized() ) { return true; }
m_hHelper = LoadLibrary( "ims_helper.dll" ); if ( !m_hHelper ) { return false; } pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); if ( !factory ) { FreeLibrary( m_hHelper ); return false; }
talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); if ( !talkback ) { FreeLibrary( m_hHelper ); return false; }
char szExeName[ MAX_PATH ]; szExeName[0] = 0; GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
char szBaseDir[ MAX_PATH ]; Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); Q_StripTrailingSlash( szBaseDir ); Q_strlower( szBaseDir );
char coreDataDir[ 512 ]; Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", szBaseDir ); Q_FixSlashes( coreDataDir );
char szCheck[ 512 ]; Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); struct __stat64 buf;
if ( _stat64( szCheck, &buf ) != 0 ) { Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", szBaseDir ); Q_FixSlashes( coreDataDir ); Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
if ( _stat64( szCheck, &buf ) != 0 ) { Error( "Unable to find talkback data files in %s.", coreDataDir ); } }
TALKBACK_ERR err; err = talkback->TalkBackStartupLibrary( coreDataDir ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); FreeLibrary( m_hHelper ); return false; }
long verMajor = 0; long verMinor = 0; long verRevision = 0; err = talkback->TalkBackGetVersion( &verMajor, &verMinor, &verRevision); if ( err != TALKBACK_NOERR ) { DescribeError( err ); FreeLibrary( m_hHelper ); return false; }
Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
m_bInitialized = true;
return true; }
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) { // HACK HACK: This seems to crash on exit sometimes
__try { talkback->TalkBackShutdownLibrary();
FreeLibrary( m_hHelper ); } __except(EXCEPTION_EXECUTE_HANDLER ) { OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); } }
//-----------------------------------------------------------------------------
// Purpose:
// Input : inwords -
// Output : char const
//-----------------------------------------------------------------------------
char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) { static char sentence[ 16384 ];
sentence[ 0 ] = 0;
int last = inwords.m_Words.Count() - 1;
for ( int i = 0 ; i <= last; i++ ) { CWordTag *w = inwords.m_Words[ i ];
strcat( sentence, w->GetWord() ); if ( i != last ) { strcat( sentence, " " ); } }
if ( inwords.m_Words.Count() == 1 && !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) { sentence[ 0 ] = 0; }
return sentence; }
bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) { *ppAnalysis = NULL;
TALKBACK_ANALYSIS_SETTINGS settings; memset( &settings, 0, sizeof( settings ) );
// Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
// structure.
settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
// Default value: 30 (frames per second).
settings.fFrameRate = 100; // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
//
// Default value: 0 (normal analysis).
settings.fOptimizeForFlipbook = 0; // Set this to -1 to seed the random number generator with the current time.
// Any other number will be used directly for the random number seed, which
// is useful if you want repeatable speech gestures. This value does not
// influence lip-synching at all.
//
// Default value: -1 (use current time).
settings.fRandomSeed = -1; // Path to the configuration (.INI) file with phoneme-to-speech-target
// mapping. Set this to NULL to use the default mapping.
//
// Default value: NULL (use default mapping).
settings.fConfigFile = NULL;
char const *text = ConstructInputSentence( inwords );
Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
TALKBACK_ERR err = talkback->TalkBackGetAnalysis( ppAnalysis, wavfile, text, &settings );
if ( err != TALKBACK_NOERR ) { DescribeError( err ); return false; }
Printf( "Analysis successful...\n" );
return true; }
typedef struct { TALKBACK_PHONEME phoneme; char const *string; } TBPHONEMES_t;
static TBPHONEMES_t g_TBPhonemeList[]= { { TALKBACK_PHONEME_IY, "iy" }, { TALKBACK_PHONEME_IH, "ih" }, { TALKBACK_PHONEME_EH, "eh" }, { TALKBACK_PHONEME_EY, "ey" }, { TALKBACK_PHONEME_AE, "ae" }, { TALKBACK_PHONEME_AA, "aa" }, { TALKBACK_PHONEME_AW, "aw" }, { TALKBACK_PHONEME_AY, "ay" }, { TALKBACK_PHONEME_AH, "ah" }, { TALKBACK_PHONEME_AO, "ao" }, { TALKBACK_PHONEME_OY, "oy" }, { TALKBACK_PHONEME_OW, "ow" }, { TALKBACK_PHONEME_UH, "uh" }, { TALKBACK_PHONEME_UW, "uw" }, { TALKBACK_PHONEME_ER, "er" }, { TALKBACK_PHONEME_AX, "ax" }, { TALKBACK_PHONEME_S, "s" }, { TALKBACK_PHONEME_SH, "sh" }, { TALKBACK_PHONEME_Z, "z" }, { TALKBACK_PHONEME_ZH, "zh" }, { TALKBACK_PHONEME_F, "f" }, { TALKBACK_PHONEME_TH, "th" }, { TALKBACK_PHONEME_V, "v" }, { TALKBACK_PHONEME_DH, "dh" }, { TALKBACK_PHONEME_M, "m" }, { TALKBACK_PHONEME_N, "n" }, { TALKBACK_PHONEME_NG, "ng" }, { TALKBACK_PHONEME_L, "l" }, { TALKBACK_PHONEME_R, "r" }, { TALKBACK_PHONEME_W, "w" }, { TALKBACK_PHONEME_Y, "y" }, { TALKBACK_PHONEME_HH, "hh" }, { TALKBACK_PHONEME_B, "b" }, { TALKBACK_PHONEME_D, "d" }, { TALKBACK_PHONEME_JH, "jh" }, { TALKBACK_PHONEME_G, "g" }, { TALKBACK_PHONEME_P, "p" }, { TALKBACK_PHONEME_T, "t" }, { TALKBACK_PHONEME_K, "k" }, { TALKBACK_PHONEME_CH, "ch" }, { TALKBACK_PHONEME_SIL, "<sil>" }, { -1, NULL } };
char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) { if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) { return "Bogus"; }
TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; return item->string; }
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// time -
// start -
// Output : int
//-----------------------------------------------------------------------------
int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) { long count;
TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return -1; }
if ( count <= 0L ) return -1;
// Bogus
if ( count >= 100000L ) return -1;
for ( int i = 0; i < (int)count; i++ ) { TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); continue; }
double t;
if ( start ) { err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); } else { err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); }
if ( err != TALKBACK_NOERR ) { DescribeError( err ); continue; }
if ( t == time ) { return i; } }
return -1; }
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// starttime -
// Output : int
//-----------------------------------------------------------------------------
int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) { return GetPhonemeIndexAtWord( analysis, starttime, true ); }
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// endtime -
// Output : int
//-----------------------------------------------------------------------------
int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) { return GetPhonemeIndexAtWord( analysis, endtime, false ); }
CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) { static CAnalyzedPhoneme p;
memset( &p, 0, sizeof( p ) );
TALKBACK_PHONEME tb;
TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; }
strcpy( p.phoneme, TBPhonemeToString( tb ) );
err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; }
return &p; }
CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) { static CAnalyzedWord w;
memset( &w, 0, sizeof( w ) );
long chars = sizeof( w.buffer );
TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; }
err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; }
return &w; }
//-----------------------------------------------------------------------------
// Purpose:
// Input : *w1 -
// *w2 -
// Output : Returns true on success, false on failure.
//-----------------------------------------------------------------------------
bool FuzzyWordMatch( char const *w1, char const *w2 ) { int len1 = strlen( w1 ); int len2 = strlen( w2 );
int minlen = min( len1, len2 );
// Found a match
if ( !strnicmp( w1, w2, minlen ) ) return true;
int letterdiff = abs( len1 - len2 ); // More than three letters different, don't bother
if ( letterdiff > 5 ) return false;
// Compute a "delta"
char *p1 = (char *)w1; char *p2 = (char *)w2;
CUtlVector <char> word1; CUtlVector <char> word2;
while ( *p1 ) { if ( V_isalpha( *p1 ) ) { word1.AddToTail( *p1 ); } p1++; }
while ( *p2 ) { if ( V_isalpha( *p2 ) ) { word2.AddToTail( *p2 ); } p2++; }
int i; for ( i = 0; i < word1.Count(); i++ ) { char c = word1[ i ];
// See if c is in word 2, if so subtract it out
int idx = word2.Find( c );
if ( idx != word2.InvalidIndex() ) { word2.Remove( idx ); } }
if ( word2.Count() <= letterdiff ) return true;
word2.RemoveAll();
while ( *p2 ) { if ( V_isalpha( *p2 ) ) { word2.AddToTail( *p2 ); } p2++; }
for ( i = 0; i < word2.Count(); i++ ) { char c = word2[ i ];
// See if c is in word 2, if so subtract it out
int idx = word1.Find( c );
if ( idx != word1.InvalidIndex() ) { word1.Remove( idx ); } }
if ( word1.Count() <= letterdiff ) return true;
return false; }
//-----------------------------------------------------------------------------
// Purpose: For foreign language stuff, if inwords is empty, process anyway...
// Input : *analysis -
// outwords -
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) { long count;
TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return; }
CWordTag *newWord = new CWordTag;
newWord->SetWord( TEXTLESS_WORDNAME );
float starttime = 0.0f; float endtime = 1.0f;
for ( int i = 0; i < count; ++i ) { // Get phoneme and timing info
CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); if ( !ph ) continue;
CPhonemeTag *ptag = new CPhonemeTag;
if ( i == 0 || ( ph->starttime < starttime ) ) { starttime = ph->starttime; }
if ( i == 0 || ( ph->endtime > endtime ) ) { endtime = ph->endtime; }
ptag->SetStartTime( ph->starttime ); ptag->SetEndTime( ph->endtime );
ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
ptag->SetTag( ph->phoneme ); ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
newWord->m_Phonemes.AddToTail( ptag ); }
newWord->m_flStartTime = starttime; newWord->m_flEndTime = endtime;
newWord->m_uiStartByte = ComputeByteFromTime( starttime ); newWord->m_uiEndByte = ComputeByteFromTime( endtime );
outwords.Reset(); outwords.AddWordTag( newWord ); outwords.SetTextFromWords(); }
//-----------------------------------------------------------------------------
// Purpose:
// Input : *analysis -
// inwords -
// outwords -
//-----------------------------------------------------------------------------
void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) { long count;
TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return; }
if ( count <= 0L ) { if ( inwords.m_Words.Count() == 0 || !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) { ProcessWordsTextless( analysis, outwords ); } return; }
// Bogus
if ( count >= 100000L ) return;
int inwordpos = 0; int awordpos = 0;
outwords.Reset();
char previous[ 256 ]; previous[ 0 ] = 0;
while ( inwordpos < inwords.m_Words.Count() ) { CWordTag *in = inwords.m_Words[ inwordpos ];
if ( awordpos >= count ) { // Just copy the rest over without phonemes
CWordTag *copy = new CWordTag( *in );
outwords.AddWordTag( copy );
inwordpos++; continue; }
// Should never fail
CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); if ( !w ) { return; }
if ( !stricmp( w->buffer, "<SIL>" ) ) { awordpos++; continue; }
char const *check = ApplyTBWordRules( in->GetWord() ); if ( !FuzzyWordMatch( check, w->buffer ) ) { bool advance_input = true; if ( previous[ 0 ] ) { if ( FuzzyWordMatch( previous, w->buffer ) ) { advance_input = false; } }
if ( advance_input ) { inwordpos++; } awordpos++; continue; } strcpy( previous, check );
CWordTag *newWord = new CWordTag;
newWord->SetWord( in->GetWord() );
newWord->m_flStartTime = w->starttime; newWord->m_flEndTime = w->endtime;
newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
int phonemestart, phonemeend;
phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
if ( phonemestart >= 0 && phonemeend >= 0 ) { for ( ; phonemestart <= phonemeend; phonemestart++ ) { // Get phoneme and timing info
CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); if ( !ph ) continue;
CPhonemeTag *ptag = new CPhonemeTag; ptag->SetStartTime( ph->starttime ); ptag->SetEndTime( ph->endtime );
ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
ptag->SetTag( ph->phoneme ); ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
newWord->m_Phonemes.AddToTail( ptag ); } }
outwords.AddWordTag( newWord ); inwordpos++; awordpos++; } }
char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) { static char outword[ 256 ];
char const *in = word; char *out = outword;
while ( *in && ( ( out - outword ) <= 255 ) ) { if ( *in == '\t' || *in == ' ' || *in == '\n' || *in == '-' || *in == '.' || *in == ',' || *in == ';' || *in == '?' || *in == '"' || *in == ':' || *in == '(' || *in == ')' ) { in++; *out++ = ' '; continue; }
if ( !V_isprint( *in ) ) { in++; continue; }
if ( *in >= 128 ) { in++; continue; }
// Skip numbers
if ( *in >= '0' && *in <= '9' ) { in++; continue; }
// Convert all letters to upper case
if ( *in >= 'a' && *in <= 'z' ) { *out++ = ( *in++ ) - 'a' + 'A'; continue; }
if ( *in >= 'A' && *in <= 'Z' ) { *out++ = *in++; continue; }
if ( *in == '\'' ) { *out++ = *in++; continue; }
in++; }
*out = 0;
return outword; }
//-----------------------------------------------------------------------------
// Purpose: Given a wavfile and a list of inwords, determines the word/phonene
// sample counts for the sentce
// Output : SR_RESULT
//-----------------------------------------------------------------------------
SR_RESULT CPhonemeExtractorLipSinc::Extract( const char *wavfile, int numsamples, void (*pfnPrint)( const char *fmt, ... ), CSentence& inwords, CSentence& outwords ) { // g_enableTalkBackDebuggingOutput = 1;
m_pfnPrint = pfnPrint;
if ( !InitLipSinc() ) { return SR_RESULT_ERROR; } m_flSampleCount = numsamples;
if ( !CheckSoundFile( wavfile ) ) { FreeLibrary( m_hHelper ); return SR_RESULT_ERROR; }
TALKBACK_ANALYSIS *analysis = NULL;
if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) { FreeLibrary( m_hHelper ); return SR_RESULT_FAILED; } if ( strlen( inwords.GetText() ) <= 0 ) { inwords.SetTextFromWords(); }
outwords = inwords;
// Examine data
ProcessWords( analysis, inwords, outwords );
if ( analysis ) { talkback->TalkBackFreeAnalysis( &analysis ); }
return SR_RESULT_SUCCESS; }
EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );
|