//========= Copyright © 1996-2005, Valve Corporation, All rights reserved. ============// // // Purpose: // // $NoKeywords: $ //=============================================================================// #include #include #include #include #include #include #include #include #include "phonemeextractor/PhonemeExtractor.h" #include "ims_helper/ims_helper.h" #include "tier0/dbg.h" #include "sentence.h" #include "PhonemeConverter.h" #include "tier1/strtools.h" #define TEXTLESS_WORDNAME "[Textless]" static IImsHelper *talkback = NULL; //----------------------------------------------------------------------------- // Purpose: Expose the interface //----------------------------------------------------------------------------- class CPhonemeExtractorLipSinc : public IPhonemeExtractor { public: virtual PE_APITYPE GetAPIType() const { return SPEECH_API_LIPSINC; } // Used for menus, etc virtual char const *GetName() const { return "IMS (LipSinc)"; } SR_RESULT Extract( const char *wavfile, int numsamples, void (*pfnPrint)( const char *fmt, ... ), CSentence& inwords, CSentence& outwords ); CPhonemeExtractorLipSinc( void ); ~CPhonemeExtractorLipSinc( void ); enum { MAX_WORD_LENGTH = 128, }; private: class CAnalyzedWord { public: char buffer[ MAX_WORD_LENGTH ]; double starttime; double endtime; }; class CAnalyzedPhoneme { public: char phoneme[ 32 ]; double starttime; double endtime; }; bool InitLipSinc( void ); void ShutdownLipSinc( void ); void DescribeError( TALKBACK_ERR err ); void Printf( char const *fmt, ... ); bool CheckSoundFile( char const *filename ); bool GetInitialized( void ); void SetInitialized( bool init ); void (*m_pfnPrint)( const char *fmt, ... ); char const *ConstructInputSentence( CSentence& inwords ); bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ); char const *ApplyTBWordRules( char const *word ); void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ); int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart ); int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ); CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ); int ComputeByteFromTime( float time ); bool m_bInitialized; float m_flSampleCount; float m_flDuration; float m_flSamplesPerSecond; int m_nBytesPerSample; HMODULE m_hHelper; }; CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) { m_hHelper = (HMODULE)0; m_pfnPrint = NULL; m_bInitialized = false; m_flSampleCount = 0.0f; m_flDuration = 0.0f; m_flSamplesPerSecond = 0.0f; m_nBytesPerSample = 0; } CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) { if ( GetInitialized() ) { ShutdownLipSinc(); } } bool CPhonemeExtractorLipSinc::GetInitialized( void ) { return m_bInitialized; } void CPhonemeExtractorLipSinc::SetInitialized( bool init ) { m_bInitialized = init; } int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) { if ( !m_flDuration ) return 0; float frac = time / m_flDuration; float sampleNumber = frac * m_flSampleCount; int bytenumber = sampleNumber * m_nBytesPerSample; return bytenumber; } void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) { Assert( m_pfnPrint ); // Get the error description. char errorDesc[256] = ""; if ( err != TALKBACK_NOERR ) { talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); } // Report or log the error... (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); } //----------------------------------------------------------------------------- // Purpose: // Input : *fmt - // .. - //----------------------------------------------------------------------------- void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) { Assert( m_pfnPrint ); char string[ 4096 ]; va_list argptr; va_start( argptr, fmt ); vsprintf( string, fmt, argptr ); va_end( argptr ); (*m_pfnPrint)( "%s", string ); } bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) { TALKBACK_SOUND_FILE_METRICS fm; memset( &fm, 0, sizeof( fm ) ); fm.m_size = sizeof( fm ); TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return false; } if ( fm.m_canBeAnalyzed ) { Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", filename, fm.m_duration, fm.m_sampleRate, fm.m_bitsPerSample, fm.m_channelCount ); } m_flDuration = fm.m_duration; if ( m_flDuration > 0 ) { m_flSamplesPerSecond = m_flSampleCount / m_flDuration; } else { m_flSamplesPerSecond = 0.0f; } m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 ); m_flSampleCount /= m_nBytesPerSample; m_nBytesPerSample /= fm.m_channelCount; return fm.m_canBeAnalyzed ? true : false; } typedef IImsHelper *(*pfnImsHelper)(void); //----------------------------------------------------------------------------- // Purpose: // Output : Returns true on success, false on failure. //----------------------------------------------------------------------------- bool CPhonemeExtractorLipSinc::InitLipSinc( void ) { if ( GetInitialized() ) { return true; } m_hHelper = LoadLibrary( "ims_helper.dll" ); if ( !m_hHelper ) { return false; } pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); if ( !factory ) { FreeLibrary( m_hHelper ); return false; } talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); if ( !talkback ) { FreeLibrary( m_hHelper ); return false; } char szExeName[ MAX_PATH ]; szExeName[0] = 0; GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) ); char szBaseDir[ MAX_PATH ]; Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) ); Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); Q_StripTrailingSlash( szBaseDir ); Q_strlower( szBaseDir ); char coreDataDir[ 512 ]; Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", szBaseDir ); Q_FixSlashes( coreDataDir ); char szCheck[ 512 ]; Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); struct __stat64 buf; if ( _stat64( szCheck, &buf ) != 0 ) { Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", szBaseDir ); Q_FixSlashes( coreDataDir ); Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); if ( _stat64( szCheck, &buf ) != 0 ) { Error( "Unable to find talkback data files in %s.", coreDataDir ); } } TALKBACK_ERR err; err = talkback->TalkBackStartupLibrary( coreDataDir ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); FreeLibrary( m_hHelper ); return false; } long verMajor = 0; long verMinor = 0; long verRevision = 0; err = talkback->TalkBackGetVersion( &verMajor, &verMinor, &verRevision); if ( err != TALKBACK_NOERR ) { DescribeError( err ); FreeLibrary( m_hHelper ); return false; } Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision ); m_bInitialized = true; return true; } //----------------------------------------------------------------------------- // Purpose: //----------------------------------------------------------------------------- void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) { // HACK HACK: This seems to crash on exit sometimes __try { talkback->TalkBackShutdownLibrary(); FreeLibrary( m_hHelper ); } __except(EXCEPTION_EXECUTE_HANDLER ) { OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); } } //----------------------------------------------------------------------------- // Purpose: // Input : inwords - // Output : char const //----------------------------------------------------------------------------- char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) { static char sentence[ 16384 ]; sentence[ 0 ] = 0; int last = inwords.m_Words.Count() - 1; for ( int i = 0 ; i <= last; i++ ) { CWordTag *w = inwords.m_Words[ i ]; strcat( sentence, w->GetWord() ); if ( i != last ) { strcat( sentence, " " ); } } if ( inwords.m_Words.Count() == 1 && !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) { sentence[ 0 ] = 0; } return sentence; } bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) { *ppAnalysis = NULL; TALKBACK_ANALYSIS_SETTINGS settings; memset( &settings, 0, sizeof( settings ) ); // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the // structure. settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS ); // Default value: 30 (frames per second). settings.fFrameRate = 100; // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. // // Default value: 0 (normal analysis). settings.fOptimizeForFlipbook = 0; // Set this to -1 to seed the random number generator with the current time. // Any other number will be used directly for the random number seed, which // is useful if you want repeatable speech gestures. This value does not // influence lip-synching at all. // // Default value: -1 (use current time). settings.fRandomSeed = -1; // Path to the configuration (.INI) file with phoneme-to-speech-target // mapping. Set this to NULL to use the default mapping. // // Default value: NULL (use default mapping). settings.fConfigFile = NULL; char const *text = ConstructInputSentence( inwords ); Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME ); TALKBACK_ERR err = talkback->TalkBackGetAnalysis( ppAnalysis, wavfile, text, &settings ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return false; } Printf( "Analysis successful...\n" ); return true; } typedef struct { TALKBACK_PHONEME phoneme; char const *string; } TBPHONEMES_t; static TBPHONEMES_t g_TBPhonemeList[]= { { TALKBACK_PHONEME_IY, "iy" }, { TALKBACK_PHONEME_IH, "ih" }, { TALKBACK_PHONEME_EH, "eh" }, { TALKBACK_PHONEME_EY, "ey" }, { TALKBACK_PHONEME_AE, "ae" }, { TALKBACK_PHONEME_AA, "aa" }, { TALKBACK_PHONEME_AW, "aw" }, { TALKBACK_PHONEME_AY, "ay" }, { TALKBACK_PHONEME_AH, "ah" }, { TALKBACK_PHONEME_AO, "ao" }, { TALKBACK_PHONEME_OY, "oy" }, { TALKBACK_PHONEME_OW, "ow" }, { TALKBACK_PHONEME_UH, "uh" }, { TALKBACK_PHONEME_UW, "uw" }, { TALKBACK_PHONEME_ER, "er" }, { TALKBACK_PHONEME_AX, "ax" }, { TALKBACK_PHONEME_S, "s" }, { TALKBACK_PHONEME_SH, "sh" }, { TALKBACK_PHONEME_Z, "z" }, { TALKBACK_PHONEME_ZH, "zh" }, { TALKBACK_PHONEME_F, "f" }, { TALKBACK_PHONEME_TH, "th" }, { TALKBACK_PHONEME_V, "v" }, { TALKBACK_PHONEME_DH, "dh" }, { TALKBACK_PHONEME_M, "m" }, { TALKBACK_PHONEME_N, "n" }, { TALKBACK_PHONEME_NG, "ng" }, { TALKBACK_PHONEME_L, "l" }, { TALKBACK_PHONEME_R, "r" }, { TALKBACK_PHONEME_W, "w" }, { TALKBACK_PHONEME_Y, "y" }, { TALKBACK_PHONEME_HH, "hh" }, { TALKBACK_PHONEME_B, "b" }, { TALKBACK_PHONEME_D, "d" }, { TALKBACK_PHONEME_JH, "jh" }, { TALKBACK_PHONEME_G, "g" }, { TALKBACK_PHONEME_P, "p" }, { TALKBACK_PHONEME_T, "t" }, { TALKBACK_PHONEME_K, "k" }, { TALKBACK_PHONEME_CH, "ch" }, { TALKBACK_PHONEME_SIL, "" }, { -1, NULL } }; char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) { if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) { return "Bogus"; } TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; return item->string; } //----------------------------------------------------------------------------- // Purpose: // Input : *analysis - // time - // start - // Output : int //----------------------------------------------------------------------------- int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) { long count; TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return -1; } if ( count <= 0L ) return -1; // Bogus if ( count >= 100000L ) return -1; for ( int i = 0; i < (int)count; i++ ) { TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); continue; } double t; if ( start ) { err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); } else { err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); } if ( err != TALKBACK_NOERR ) { DescribeError( err ); continue; } if ( t == time ) { return i; } } return -1; } //----------------------------------------------------------------------------- // Purpose: // Input : *analysis - // starttime - // Output : int //----------------------------------------------------------------------------- int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) { return GetPhonemeIndexAtWord( analysis, starttime, true ); } //----------------------------------------------------------------------------- // Purpose: // Input : *analysis - // endtime - // Output : int //----------------------------------------------------------------------------- int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) { return GetPhonemeIndexAtWord( analysis, endtime, false ); } CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) { static CAnalyzedPhoneme p; memset( &p, 0, sizeof( p ) ); TALKBACK_PHONEME tb; TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } strcpy( p.phoneme, TBPhonemeToString( tb ) ); err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } return &p; } CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) { static CAnalyzedWord w; memset( &w, 0, sizeof( w ) ); long chars = sizeof( w.buffer ); TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return NULL; } return &w; } //----------------------------------------------------------------------------- // Purpose: // Input : *w1 - // *w2 - // Output : Returns true on success, false on failure. //----------------------------------------------------------------------------- bool FuzzyWordMatch( char const *w1, char const *w2 ) { int len1 = strlen( w1 ); int len2 = strlen( w2 ); int minlen = min( len1, len2 ); // Found a match if ( !strnicmp( w1, w2, minlen ) ) return true; int letterdiff = abs( len1 - len2 ); // More than three letters different, don't bother if ( letterdiff > 5 ) return false; // Compute a "delta" char *p1 = (char *)w1; char *p2 = (char *)w2; CUtlVector word1; CUtlVector word2; while ( *p1 ) { if ( V_isalpha( *p1 ) ) { word1.AddToTail( *p1 ); } p1++; } while ( *p2 ) { if ( V_isalpha( *p2 ) ) { word2.AddToTail( *p2 ); } p2++; } int i; for ( i = 0; i < word1.Count(); i++ ) { char c = word1[ i ]; // See if c is in word 2, if so subtract it out int idx = word2.Find( c ); if ( idx != word2.InvalidIndex() ) { word2.Remove( idx ); } } if ( word2.Count() <= letterdiff ) return true; word2.RemoveAll(); while ( *p2 ) { if ( V_isalpha( *p2 ) ) { word2.AddToTail( *p2 ); } p2++; } for ( i = 0; i < word2.Count(); i++ ) { char c = word2[ i ]; // See if c is in word 2, if so subtract it out int idx = word1.Find( c ); if ( idx != word1.InvalidIndex() ) { word1.Remove( idx ); } } if ( word1.Count() <= letterdiff ) return true; return false; } //----------------------------------------------------------------------------- // Purpose: For foreign language stuff, if inwords is empty, process anyway... // Input : *analysis - // outwords - //----------------------------------------------------------------------------- void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) { long count; TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return; } CWordTag *newWord = new CWordTag; newWord->SetWord( TEXTLESS_WORDNAME ); float starttime = 0.0f; float endtime = 1.0f; for ( int i = 0; i < count; ++i ) { // Get phoneme and timing info CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); if ( !ph ) continue; CPhonemeTag *ptag = new CPhonemeTag; if ( i == 0 || ( ph->starttime < starttime ) ) { starttime = ph->starttime; } if ( i == 0 || ( ph->endtime > endtime ) ) { endtime = ph->endtime; } ptag->SetStartTime( ph->starttime ); ptag->SetEndTime( ph->endtime ); ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); ptag->SetTag( ph->phoneme ); ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); newWord->m_Phonemes.AddToTail( ptag ); } newWord->m_flStartTime = starttime; newWord->m_flEndTime = endtime; newWord->m_uiStartByte = ComputeByteFromTime( starttime ); newWord->m_uiEndByte = ComputeByteFromTime( endtime ); outwords.Reset(); outwords.AddWordTag( newWord ); outwords.SetTextFromWords(); } //----------------------------------------------------------------------------- // Purpose: // Input : *analysis - // inwords - // outwords - //----------------------------------------------------------------------------- void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) { long count; TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); if ( err != TALKBACK_NOERR ) { DescribeError( err ); return; } if ( count <= 0L ) { if ( inwords.m_Words.Count() == 0 || !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) { ProcessWordsTextless( analysis, outwords ); } return; } // Bogus if ( count >= 100000L ) return; int inwordpos = 0; int awordpos = 0; outwords.Reset(); char previous[ 256 ]; previous[ 0 ] = 0; while ( inwordpos < inwords.m_Words.Count() ) { CWordTag *in = inwords.m_Words[ inwordpos ]; if ( awordpos >= count ) { // Just copy the rest over without phonemes CWordTag *copy = new CWordTag( *in ); outwords.AddWordTag( copy ); inwordpos++; continue; } // Should never fail CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); if ( !w ) { return; } if ( !stricmp( w->buffer, "" ) ) { awordpos++; continue; } char const *check = ApplyTBWordRules( in->GetWord() ); if ( !FuzzyWordMatch( check, w->buffer ) ) { bool advance_input = true; if ( previous[ 0 ] ) { if ( FuzzyWordMatch( previous, w->buffer ) ) { advance_input = false; } } if ( advance_input ) { inwordpos++; } awordpos++; continue; } strcpy( previous, check ); CWordTag *newWord = new CWordTag; newWord->SetWord( in->GetWord() ); newWord->m_flStartTime = w->starttime; newWord->m_flEndTime = w->endtime; newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); newWord->m_uiEndByte = ComputeByteFromTime( w->endtime ); int phonemestart, phonemeend; phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime ); if ( phonemestart >= 0 && phonemeend >= 0 ) { for ( ; phonemestart <= phonemeend; phonemestart++ ) { // Get phoneme and timing info CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); if ( !ph ) continue; CPhonemeTag *ptag = new CPhonemeTag; ptag->SetStartTime( ph->starttime ); ptag->SetEndTime( ph->endtime ); ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); ptag->SetTag( ph->phoneme ); ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); newWord->m_Phonemes.AddToTail( ptag ); } } outwords.AddWordTag( newWord ); inwordpos++; awordpos++; } } char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) { static char outword[ 256 ]; char const *in = word; char *out = outword; while ( *in && ( ( out - outword ) <= 255 ) ) { if ( *in == '\t' || *in == ' ' || *in == '\n' || *in == '-' || *in == '.' || *in == ',' || *in == ';' || *in == '?' || *in == '"' || *in == ':' || *in == '(' || *in == ')' ) { in++; *out++ = ' '; continue; } if ( !V_isprint( *in ) ) { in++; continue; } if ( *in >= 128 ) { in++; continue; } // Skip numbers if ( *in >= '0' && *in <= '9' ) { in++; continue; } // Convert all letters to upper case if ( *in >= 'a' && *in <= 'z' ) { *out++ = ( *in++ ) - 'a' + 'A'; continue; } if ( *in >= 'A' && *in <= 'Z' ) { *out++ = *in++; continue; } if ( *in == '\'' ) { *out++ = *in++; continue; } in++; } *out = 0; return outword; } //----------------------------------------------------------------------------- // Purpose: Given a wavfile and a list of inwords, determines the word/phonene // sample counts for the sentce // Output : SR_RESULT //----------------------------------------------------------------------------- SR_RESULT CPhonemeExtractorLipSinc::Extract( const char *wavfile, int numsamples, void (*pfnPrint)( const char *fmt, ... ), CSentence& inwords, CSentence& outwords ) { // g_enableTalkBackDebuggingOutput = 1; m_pfnPrint = pfnPrint; if ( !InitLipSinc() ) { return SR_RESULT_ERROR; } m_flSampleCount = numsamples; if ( !CheckSoundFile( wavfile ) ) { FreeLibrary( m_hHelper ); return SR_RESULT_ERROR; } TALKBACK_ANALYSIS *analysis = NULL; if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) { FreeLibrary( m_hHelper ); return SR_RESULT_FAILED; } if ( strlen( inwords.GetText() ) <= 0 ) { inwords.SetTextFromWords(); } outwords = inwords; // Examine data ProcessWords( analysis, inwords, outwords ); if ( analysis ) { talkback->TalkBackFreeAnalysis( &analysis ); } return SR_RESULT_SUCCESS; } EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );