Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1074 lines
23 KiB

  1. //========= Copyright � 1996-2005, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //=============================================================================//
  7. #include <stdio.h>
  8. #include <stdarg.h>
  9. #include <memory.h>
  10. #include <windows.h>
  11. #include <mmsystem.h>
  12. #include <mmreg.h>
  13. #include <sys/types.h>
  14. #include <sys/stat.h>
  15. #include "phonemeextractor/PhonemeExtractor.h"
  16. #include "ims_helper/ims_helper.h"
  17. #include "tier0/dbg.h"
  18. #include "sentence.h"
  19. #include "PhonemeConverter.h"
  20. #include "tier1/strtools.h"
  21. #define TEXTLESS_WORDNAME "[Textless]"
  22. static IImsHelper *talkback = NULL;
  23. //-----------------------------------------------------------------------------
  24. // Purpose: Expose the interface
  25. //-----------------------------------------------------------------------------
  26. class CPhonemeExtractorLipSinc : public IPhonemeExtractor
  27. {
  28. public:
  29. virtual PE_APITYPE GetAPIType() const
  30. {
  31. return SPEECH_API_LIPSINC;
  32. }
  33. // Used for menus, etc
  34. virtual char const *GetName() const
  35. {
  36. return "IMS (LipSinc)";
  37. }
  38. SR_RESULT Extract(
  39. const char *wavfile,
  40. int numsamples,
  41. void (*pfnPrint)( const char *fmt, ... ),
  42. CSentence& inwords,
  43. CSentence& outwords );
  44. CPhonemeExtractorLipSinc( void );
  45. ~CPhonemeExtractorLipSinc( void );
  46. enum
  47. {
  48. MAX_WORD_LENGTH = 128,
  49. };
  50. private:
  51. class CAnalyzedWord
  52. {
  53. public:
  54. char buffer[ MAX_WORD_LENGTH ];
  55. double starttime;
  56. double endtime;
  57. };
  58. class CAnalyzedPhoneme
  59. {
  60. public:
  61. char phoneme[ 32 ];
  62. double starttime;
  63. double endtime;
  64. };
  65. bool InitLipSinc( void );
  66. void ShutdownLipSinc( void );
  67. void DescribeError( TALKBACK_ERR err );
  68. void Printf( char const *fmt, ... );
  69. bool CheckSoundFile( char const *filename );
  70. bool GetInitialized( void );
  71. void SetInitialized( bool init );
  72. void (*m_pfnPrint)( const char *fmt, ... );
  73. char const *ConstructInputSentence( CSentence& inwords );
  74. bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
  75. char const *ApplyTBWordRules( char const *word );
  76. void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords );
  77. void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
  78. int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
  79. int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime );
  80. int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
  81. CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index );
  82. CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
  83. int ComputeByteFromTime( float time );
  84. bool m_bInitialized;
  85. float m_flSampleCount;
  86. float m_flDuration;
  87. float m_flSamplesPerSecond;
  88. int m_nBytesPerSample;
  89. HMODULE m_hHelper;
  90. };
  91. CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void )
  92. {
  93. m_hHelper = (HMODULE)0;
  94. m_pfnPrint = NULL;
  95. m_bInitialized = false;
  96. m_flSampleCount = 0.0f;
  97. m_flDuration = 0.0f;
  98. m_flSamplesPerSecond = 0.0f;
  99. m_nBytesPerSample = 0;
  100. }
  101. CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void )
  102. {
  103. if ( GetInitialized() )
  104. {
  105. ShutdownLipSinc();
  106. }
  107. }
  108. bool CPhonemeExtractorLipSinc::GetInitialized( void )
  109. {
  110. return m_bInitialized;
  111. }
  112. void CPhonemeExtractorLipSinc::SetInitialized( bool init )
  113. {
  114. m_bInitialized = init;
  115. }
  116. int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time )
  117. {
  118. if ( !m_flDuration )
  119. return 0;
  120. float frac = time / m_flDuration;
  121. float sampleNumber = frac * m_flSampleCount;
  122. int bytenumber = sampleNumber * m_nBytesPerSample;
  123. return bytenumber;
  124. }
  125. void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err )
  126. {
  127. Assert( m_pfnPrint );
  128. // Get the error description.
  129. char errorDesc[256] = "";
  130. if ( err != TALKBACK_NOERR )
  131. {
  132. talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc );
  133. }
  134. // Report or log the error...
  135. (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc );
  136. }
  137. //-----------------------------------------------------------------------------
  138. // Purpose:
  139. // Input : *fmt -
  140. // .. -
  141. //-----------------------------------------------------------------------------
  142. void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... )
  143. {
  144. Assert( m_pfnPrint );
  145. char string[ 4096 ];
  146. va_list argptr;
  147. va_start( argptr, fmt );
  148. vsprintf( string, fmt, argptr );
  149. va_end( argptr );
  150. (*m_pfnPrint)( "%s", string );
  151. }
  152. bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename )
  153. {
  154. TALKBACK_SOUND_FILE_METRICS fm;
  155. memset( &fm, 0, sizeof( fm ) );
  156. fm.m_size = sizeof( fm );
  157. TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm );
  158. if ( err != TALKBACK_NOERR )
  159. {
  160. DescribeError( err );
  161. return false;
  162. }
  163. if ( fm.m_canBeAnalyzed )
  164. {
  165. Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n",
  166. filename,
  167. fm.m_duration,
  168. fm.m_sampleRate,
  169. fm.m_bitsPerSample,
  170. fm.m_channelCount );
  171. }
  172. m_flDuration = fm.m_duration;
  173. if ( m_flDuration > 0 )
  174. {
  175. m_flSamplesPerSecond = m_flSampleCount / m_flDuration;
  176. }
  177. else
  178. {
  179. m_flSamplesPerSecond = 0.0f;
  180. }
  181. m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
  182. m_flSampleCount /= m_nBytesPerSample;
  183. m_nBytesPerSample /= fm.m_channelCount;
  184. return fm.m_canBeAnalyzed ? true : false;
  185. }
  186. typedef IImsHelper *(*pfnImsHelper)(void);
  187. //-----------------------------------------------------------------------------
  188. // Purpose:
  189. // Output : Returns true on success, false on failure.
  190. //-----------------------------------------------------------------------------
  191. bool CPhonemeExtractorLipSinc::InitLipSinc( void )
  192. {
  193. if ( GetInitialized() )
  194. {
  195. return true;
  196. }
  197. m_hHelper = LoadLibrary( "ims_helper.dll" );
  198. if ( !m_hHelper )
  199. {
  200. return false;
  201. }
  202. pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" );
  203. if ( !factory )
  204. {
  205. FreeLibrary( m_hHelper );
  206. return false;
  207. }
  208. talkback = reinterpret_cast< IImsHelper * >( (*factory)() );
  209. if ( !talkback )
  210. {
  211. FreeLibrary( m_hHelper );
  212. return false;
  213. }
  214. char szExeName[ MAX_PATH ];
  215. szExeName[0] = 0;
  216. GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
  217. char szBaseDir[ MAX_PATH ];
  218. Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
  219. Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) );
  220. Q_StripTrailingSlash( szBaseDir );
  221. Q_strlower( szBaseDir );
  222. char coreDataDir[ 512 ];
  223. Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\",
  224. szBaseDir );
  225. Q_FixSlashes( coreDataDir );
  226. char szCheck[ 512 ];
  227. Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
  228. struct __stat64 buf;
  229. if ( _stat64( szCheck, &buf ) != 0 )
  230. {
  231. Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\",
  232. szBaseDir );
  233. Q_FixSlashes( coreDataDir );
  234. Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
  235. if ( _stat64( szCheck, &buf ) != 0 )
  236. {
  237. Error( "Unable to find talkback data files in %s.", coreDataDir );
  238. }
  239. }
  240. TALKBACK_ERR err;
  241. err = talkback->TalkBackStartupLibrary( coreDataDir );
  242. if ( err != TALKBACK_NOERR )
  243. {
  244. DescribeError( err );
  245. FreeLibrary( m_hHelper );
  246. return false;
  247. }
  248. long verMajor = 0;
  249. long verMinor = 0;
  250. long verRevision = 0;
  251. err = talkback->TalkBackGetVersion(
  252. &verMajor,
  253. &verMinor,
  254. &verRevision);
  255. if ( err != TALKBACK_NOERR )
  256. {
  257. DescribeError( err );
  258. FreeLibrary( m_hHelper );
  259. return false;
  260. }
  261. Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
  262. m_bInitialized = true;
  263. return true;
  264. }
  265. //-----------------------------------------------------------------------------
  266. // Purpose:
  267. //-----------------------------------------------------------------------------
  268. void CPhonemeExtractorLipSinc::ShutdownLipSinc( void )
  269. {
  270. // HACK HACK: This seems to crash on exit sometimes
  271. __try
  272. {
  273. talkback->TalkBackShutdownLibrary();
  274. FreeLibrary( m_hHelper );
  275. }
  276. __except(EXCEPTION_EXECUTE_HANDLER )
  277. {
  278. OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" );
  279. }
  280. }
  281. //-----------------------------------------------------------------------------
  282. // Purpose:
  283. // Input : inwords -
  284. // Output : char const
  285. //-----------------------------------------------------------------------------
  286. char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords )
  287. {
  288. static char sentence[ 16384 ];
  289. sentence[ 0 ] = 0;
  290. int last = inwords.m_Words.Count() - 1;
  291. for ( int i = 0 ; i <= last; i++ )
  292. {
  293. CWordTag *w = inwords.m_Words[ i ];
  294. strcat( sentence, w->GetWord() );
  295. if ( i != last )
  296. {
  297. strcat( sentence, " " );
  298. }
  299. }
  300. if ( inwords.m_Words.Count() == 1 &&
  301. !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
  302. {
  303. sentence[ 0 ] = 0;
  304. }
  305. return sentence;
  306. }
  307. bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords )
  308. {
  309. *ppAnalysis = NULL;
  310. TALKBACK_ANALYSIS_SETTINGS settings;
  311. memset( &settings, 0, sizeof( settings ) );
  312. // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
  313. // structure.
  314. settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
  315. // Default value: 30 (frames per second).
  316. settings.fFrameRate = 100;
  317. // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
  318. //
  319. // Default value: 0 (normal analysis).
  320. settings.fOptimizeForFlipbook = 0;
  321. // Set this to -1 to seed the random number generator with the current time.
  322. // Any other number will be used directly for the random number seed, which
  323. // is useful if you want repeatable speech gestures. This value does not
  324. // influence lip-synching at all.
  325. //
  326. // Default value: -1 (use current time).
  327. settings.fRandomSeed = -1;
  328. // Path to the configuration (.INI) file with phoneme-to-speech-target
  329. // mapping. Set this to NULL to use the default mapping.
  330. //
  331. // Default value: NULL (use default mapping).
  332. settings.fConfigFile = NULL;
  333. char const *text = ConstructInputSentence( inwords );
  334. Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
  335. TALKBACK_ERR err = talkback->TalkBackGetAnalysis(
  336. ppAnalysis,
  337. wavfile,
  338. text,
  339. &settings );
  340. if ( err != TALKBACK_NOERR )
  341. {
  342. DescribeError( err );
  343. return false;
  344. }
  345. Printf( "Analysis successful...\n" );
  346. return true;
  347. }
  348. typedef struct
  349. {
  350. TALKBACK_PHONEME phoneme;
  351. char const *string;
  352. } TBPHONEMES_t;
  353. static TBPHONEMES_t g_TBPhonemeList[]=
  354. {
  355. { TALKBACK_PHONEME_IY, "iy" },
  356. { TALKBACK_PHONEME_IH, "ih" },
  357. { TALKBACK_PHONEME_EH, "eh" },
  358. { TALKBACK_PHONEME_EY, "ey" },
  359. { TALKBACK_PHONEME_AE, "ae" },
  360. { TALKBACK_PHONEME_AA, "aa" },
  361. { TALKBACK_PHONEME_AW, "aw" },
  362. { TALKBACK_PHONEME_AY, "ay" },
  363. { TALKBACK_PHONEME_AH, "ah" },
  364. { TALKBACK_PHONEME_AO, "ao" },
  365. { TALKBACK_PHONEME_OY, "oy" },
  366. { TALKBACK_PHONEME_OW, "ow" },
  367. { TALKBACK_PHONEME_UH, "uh" },
  368. { TALKBACK_PHONEME_UW, "uw" },
  369. { TALKBACK_PHONEME_ER, "er" },
  370. { TALKBACK_PHONEME_AX, "ax" },
  371. { TALKBACK_PHONEME_S, "s" },
  372. { TALKBACK_PHONEME_SH, "sh" },
  373. { TALKBACK_PHONEME_Z, "z" },
  374. { TALKBACK_PHONEME_ZH, "zh" },
  375. { TALKBACK_PHONEME_F, "f" },
  376. { TALKBACK_PHONEME_TH, "th" },
  377. { TALKBACK_PHONEME_V, "v" },
  378. { TALKBACK_PHONEME_DH, "dh" },
  379. { TALKBACK_PHONEME_M, "m" },
  380. { TALKBACK_PHONEME_N, "n" },
  381. { TALKBACK_PHONEME_NG, "ng" },
  382. { TALKBACK_PHONEME_L, "l" },
  383. { TALKBACK_PHONEME_R, "r" },
  384. { TALKBACK_PHONEME_W, "w" },
  385. { TALKBACK_PHONEME_Y, "y" },
  386. { TALKBACK_PHONEME_HH, "hh" },
  387. { TALKBACK_PHONEME_B, "b" },
  388. { TALKBACK_PHONEME_D, "d" },
  389. { TALKBACK_PHONEME_JH, "jh" },
  390. { TALKBACK_PHONEME_G, "g" },
  391. { TALKBACK_PHONEME_P, "p" },
  392. { TALKBACK_PHONEME_T, "t" },
  393. { TALKBACK_PHONEME_K, "k" },
  394. { TALKBACK_PHONEME_CH, "ch" },
  395. { TALKBACK_PHONEME_SIL, "<sil>" },
  396. { -1, NULL }
  397. };
  398. char const *TBPhonemeToString( TALKBACK_PHONEME phoneme )
  399. {
  400. if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST )
  401. {
  402. return "Bogus";
  403. }
  404. TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ];
  405. return item->string;
  406. }
  407. //-----------------------------------------------------------------------------
  408. // Purpose:
  409. // Input : *analysis -
  410. // time -
  411. // start -
  412. // Output : int
  413. //-----------------------------------------------------------------------------
  414. int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start )
  415. {
  416. long count;
  417. TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
  418. if ( err != TALKBACK_NOERR )
  419. {
  420. DescribeError( err );
  421. return -1;
  422. }
  423. if ( count <= 0L )
  424. return -1;
  425. // Bogus
  426. if ( count >= 100000L )
  427. return -1;
  428. for ( int i = 0; i < (int)count; i++ )
  429. {
  430. TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID;
  431. err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme );
  432. if ( err != TALKBACK_NOERR )
  433. {
  434. DescribeError( err );
  435. continue;
  436. }
  437. double t;
  438. if ( start )
  439. {
  440. err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t );
  441. }
  442. else
  443. {
  444. err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t );
  445. }
  446. if ( err != TALKBACK_NOERR )
  447. {
  448. DescribeError( err );
  449. continue;
  450. }
  451. if ( t == time )
  452. {
  453. return i;
  454. }
  455. }
  456. return -1;
  457. }
  458. //-----------------------------------------------------------------------------
  459. // Purpose:
  460. // Input : *analysis -
  461. // starttime -
  462. // Output : int
  463. //-----------------------------------------------------------------------------
  464. int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime )
  465. {
  466. return GetPhonemeIndexAtWord( analysis, starttime, true );
  467. }
  468. //-----------------------------------------------------------------------------
  469. // Purpose:
  470. // Input : *analysis -
  471. // endtime -
  472. // Output : int
  473. //-----------------------------------------------------------------------------
  474. int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime )
  475. {
  476. return GetPhonemeIndexAtWord( analysis, endtime, false );
  477. }
  478. CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index )
  479. {
  480. static CAnalyzedPhoneme p;
  481. memset( &p, 0, sizeof( p ) );
  482. TALKBACK_PHONEME tb;
  483. TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb );
  484. if ( err != TALKBACK_NOERR )
  485. {
  486. DescribeError( err );
  487. return NULL;
  488. }
  489. strcpy( p.phoneme, TBPhonemeToString( tb ) );
  490. err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime );
  491. if ( err != TALKBACK_NOERR )
  492. {
  493. DescribeError( err );
  494. return NULL;
  495. }
  496. err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime );
  497. if ( err != TALKBACK_NOERR )
  498. {
  499. DescribeError( err );
  500. return NULL;
  501. }
  502. return &p;
  503. }
  504. CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index )
  505. {
  506. static CAnalyzedWord w;
  507. memset( &w, 0, sizeof( w ) );
  508. long chars = sizeof( w.buffer );
  509. TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer );
  510. if ( err != TALKBACK_NOERR )
  511. {
  512. DescribeError( err );
  513. return NULL;
  514. }
  515. err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime );
  516. if ( err != TALKBACK_NOERR )
  517. {
  518. DescribeError( err );
  519. return NULL;
  520. }
  521. err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime );
  522. if ( err != TALKBACK_NOERR )
  523. {
  524. DescribeError( err );
  525. return NULL;
  526. }
  527. return &w;
  528. }
  529. //-----------------------------------------------------------------------------
  530. // Purpose:
  531. // Input : *w1 -
  532. // *w2 -
  533. // Output : Returns true on success, false on failure.
  534. //-----------------------------------------------------------------------------
  535. bool FuzzyWordMatch( char const *w1, char const *w2 )
  536. {
  537. int len1 = strlen( w1 );
  538. int len2 = strlen( w2 );
  539. int minlen = min( len1, len2 );
  540. // Found a match
  541. if ( !strnicmp( w1, w2, minlen ) )
  542. return true;
  543. int letterdiff = abs( len1 - len2 );
  544. // More than three letters different, don't bother
  545. if ( letterdiff > 5 )
  546. return false;
  547. // Compute a "delta"
  548. char *p1 = (char *)w1;
  549. char *p2 = (char *)w2;
  550. CUtlVector <char> word1;
  551. CUtlVector <char> word2;
  552. while ( *p1 )
  553. {
  554. if ( V_isalpha( *p1 ) )
  555. {
  556. word1.AddToTail( *p1 );
  557. }
  558. p1++;
  559. }
  560. while ( *p2 )
  561. {
  562. if ( V_isalpha( *p2 ) )
  563. {
  564. word2.AddToTail( *p2 );
  565. }
  566. p2++;
  567. }
  568. int i;
  569. for ( i = 0; i < word1.Count(); i++ )
  570. {
  571. char c = word1[ i ];
  572. // See if c is in word 2, if so subtract it out
  573. int idx = word2.Find( c );
  574. if ( idx != word2.InvalidIndex() )
  575. {
  576. word2.Remove( idx );
  577. }
  578. }
  579. if ( word2.Count() <= letterdiff )
  580. return true;
  581. word2.RemoveAll();
  582. while ( *p2 )
  583. {
  584. if ( V_isalpha( *p2 ) )
  585. {
  586. word2.AddToTail( *p2 );
  587. }
  588. p2++;
  589. }
  590. for ( i = 0; i < word2.Count(); i++ )
  591. {
  592. char c = word2[ i ];
  593. // See if c is in word 2, if so subtract it out
  594. int idx = word1.Find( c );
  595. if ( idx != word1.InvalidIndex() )
  596. {
  597. word1.Remove( idx );
  598. }
  599. }
  600. if ( word1.Count() <= letterdiff )
  601. return true;
  602. return false;
  603. }
  604. //-----------------------------------------------------------------------------
  605. // Purpose: For foreign language stuff, if inwords is empty, process anyway...
  606. // Input : *analysis -
  607. // outwords -
  608. //-----------------------------------------------------------------------------
  609. void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords )
  610. {
  611. long count;
  612. TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
  613. if ( err != TALKBACK_NOERR )
  614. {
  615. DescribeError( err );
  616. return;
  617. }
  618. CWordTag *newWord = new CWordTag;
  619. newWord->SetWord( TEXTLESS_WORDNAME );
  620. float starttime = 0.0f;
  621. float endtime = 1.0f;
  622. for ( int i = 0; i < count; ++i )
  623. {
  624. // Get phoneme and timing info
  625. CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i );
  626. if ( !ph )
  627. continue;
  628. CPhonemeTag *ptag = new CPhonemeTag;
  629. if ( i == 0 || ( ph->starttime < starttime ) )
  630. {
  631. starttime = ph->starttime;
  632. }
  633. if ( i == 0 || ( ph->endtime > endtime ) )
  634. {
  635. endtime = ph->endtime;
  636. }
  637. ptag->SetStartTime( ph->starttime );
  638. ptag->SetEndTime( ph->endtime );
  639. ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
  640. ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
  641. ptag->SetTag( ph->phoneme );
  642. ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
  643. newWord->m_Phonemes.AddToTail( ptag );
  644. }
  645. newWord->m_flStartTime = starttime;
  646. newWord->m_flEndTime = endtime;
  647. newWord->m_uiStartByte = ComputeByteFromTime( starttime );
  648. newWord->m_uiEndByte = ComputeByteFromTime( endtime );
  649. outwords.Reset();
  650. outwords.AddWordTag( newWord );
  651. outwords.SetTextFromWords();
  652. }
  653. //-----------------------------------------------------------------------------
  654. // Purpose:
  655. // Input : *analysis -
  656. // inwords -
  657. // outwords -
  658. //-----------------------------------------------------------------------------
  659. void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords )
  660. {
  661. long count;
  662. TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count );
  663. if ( err != TALKBACK_NOERR )
  664. {
  665. DescribeError( err );
  666. return;
  667. }
  668. if ( count <= 0L )
  669. {
  670. if ( inwords.m_Words.Count() == 0 ||
  671. !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
  672. {
  673. ProcessWordsTextless( analysis, outwords );
  674. }
  675. return;
  676. }
  677. // Bogus
  678. if ( count >= 100000L )
  679. return;
  680. int inwordpos = 0;
  681. int awordpos = 0;
  682. outwords.Reset();
  683. char previous[ 256 ];
  684. previous[ 0 ] = 0;
  685. while ( inwordpos < inwords.m_Words.Count() )
  686. {
  687. CWordTag *in = inwords.m_Words[ inwordpos ];
  688. if ( awordpos >= count )
  689. {
  690. // Just copy the rest over without phonemes
  691. CWordTag *copy = new CWordTag( *in );
  692. outwords.AddWordTag( copy );
  693. inwordpos++;
  694. continue;
  695. }
  696. // Should never fail
  697. CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos );
  698. if ( !w )
  699. {
  700. return;
  701. }
  702. if ( !stricmp( w->buffer, "<SIL>" ) )
  703. {
  704. awordpos++;
  705. continue;
  706. }
  707. char const *check = ApplyTBWordRules( in->GetWord() );
  708. if ( !FuzzyWordMatch( check, w->buffer ) )
  709. {
  710. bool advance_input = true;
  711. if ( previous[ 0 ] )
  712. {
  713. if ( FuzzyWordMatch( previous, w->buffer ) )
  714. {
  715. advance_input = false;
  716. }
  717. }
  718. if ( advance_input )
  719. {
  720. inwordpos++;
  721. }
  722. awordpos++;
  723. continue;
  724. }
  725. strcpy( previous, check );
  726. CWordTag *newWord = new CWordTag;
  727. newWord->SetWord( in->GetWord() );
  728. newWord->m_flStartTime = w->starttime;
  729. newWord->m_flEndTime = w->endtime;
  730. newWord->m_uiStartByte = ComputeByteFromTime( w->starttime );
  731. newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
  732. int phonemestart, phonemeend;
  733. phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime );
  734. phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
  735. if ( phonemestart >= 0 && phonemeend >= 0 )
  736. {
  737. for ( ; phonemestart <= phonemeend; phonemestart++ )
  738. {
  739. // Get phoneme and timing info
  740. CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart );
  741. if ( !ph )
  742. continue;
  743. CPhonemeTag *ptag = new CPhonemeTag;
  744. ptag->SetStartTime( ph->starttime );
  745. ptag->SetEndTime( ph->endtime );
  746. ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
  747. ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
  748. ptag->SetTag( ph->phoneme );
  749. ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
  750. newWord->m_Phonemes.AddToTail( ptag );
  751. }
  752. }
  753. outwords.AddWordTag( newWord );
  754. inwordpos++;
  755. awordpos++;
  756. }
  757. }
  758. char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word )
  759. {
  760. static char outword[ 256 ];
  761. char const *in = word;
  762. char *out = outword;
  763. while ( *in && ( ( out - outword ) <= 255 ) )
  764. {
  765. if ( *in == '\t' ||
  766. *in == ' ' ||
  767. *in == '\n' ||
  768. *in == '-' ||
  769. *in == '.' ||
  770. *in == ',' ||
  771. *in == ';' ||
  772. *in == '?' ||
  773. *in == '"' ||
  774. *in == ':' ||
  775. *in == '(' ||
  776. *in == ')' )
  777. {
  778. in++;
  779. *out++ = ' ';
  780. continue;
  781. }
  782. if ( !V_isprint( *in ) )
  783. {
  784. in++;
  785. continue;
  786. }
  787. if ( *in >= 128 )
  788. {
  789. in++;
  790. continue;
  791. }
  792. // Skip numbers
  793. if ( *in >= '0' && *in <= '9' )
  794. {
  795. in++;
  796. continue;
  797. }
  798. // Convert all letters to upper case
  799. if ( *in >= 'a' && *in <= 'z' )
  800. {
  801. *out++ = ( *in++ ) - 'a' + 'A';
  802. continue;
  803. }
  804. if ( *in >= 'A' && *in <= 'Z' )
  805. {
  806. *out++ = *in++;
  807. continue;
  808. }
  809. if ( *in == '\'' )
  810. {
  811. *out++ = *in++;
  812. continue;
  813. }
  814. in++;
  815. }
  816. *out = 0;
  817. return outword;
  818. }
  819. //-----------------------------------------------------------------------------
  820. // Purpose: Given a wavfile and a list of inwords, determines the word/phonene
  821. // sample counts for the sentce
  822. // Output : SR_RESULT
  823. //-----------------------------------------------------------------------------
  824. SR_RESULT CPhonemeExtractorLipSinc::Extract(
  825. const char *wavfile,
  826. int numsamples,
  827. void (*pfnPrint)( const char *fmt, ... ),
  828. CSentence& inwords,
  829. CSentence& outwords )
  830. {
  831. // g_enableTalkBackDebuggingOutput = 1;
  832. m_pfnPrint = pfnPrint;
  833. if ( !InitLipSinc() )
  834. {
  835. return SR_RESULT_ERROR;
  836. }
  837. m_flSampleCount = numsamples;
  838. if ( !CheckSoundFile( wavfile ) )
  839. {
  840. FreeLibrary( m_hHelper );
  841. return SR_RESULT_ERROR;
  842. }
  843. TALKBACK_ANALYSIS *analysis = NULL;
  844. if ( !AttemptAnalysis( &analysis, wavfile, inwords ) )
  845. {
  846. FreeLibrary( m_hHelper );
  847. return SR_RESULT_FAILED;
  848. }
  849. if ( strlen( inwords.GetText() ) <= 0 )
  850. {
  851. inwords.SetTextFromWords();
  852. }
  853. outwords = inwords;
  854. // Examine data
  855. ProcessWords( analysis, inwords, outwords );
  856. if ( analysis )
  857. {
  858. talkback->TalkBackFreeAnalysis( &analysis );
  859. }
  860. return SR_RESULT_SUCCESS;
  861. }
  862. EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );