Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

618 lines
26 KiB

  1. /*******************************************************************************
  2. * SpTtsEngDebug.h *
  3. *-----------------*
  4. * Description:
  5. * This header file contains debug output services for the TTS Engine
  6. *-------------------------------------------------------------------------------
  7. * Copyright (C) 1998-2000 Microsoft Corporation
  8. * All Rights Reserved
  9. *
  10. *-------------------------------------------------------------------------------
  11. * Revisions:
  12. *
  13. ********************************************************************* AKH ******/
  14. #ifndef spttsengdebug_h
  15. #define spttsengdebug_h
  16. #include "stdsentenum.h"
  17. #include "feedchain.h"
  18. //--- This enumeration is used to index the array of IStreams used to write stuff to the debug file
  19. typedef enum
  20. {
  21. STREAM_WAVE = 0,
  22. STREAM_EPOCH,
  23. STREAM_UNIT,
  24. STREAM_WAVEINFO,
  25. STREAM_TOBI,
  26. STREAM_SENTENCEBREAKS,
  27. STREAM_NORMALIZEDTEXT,
  28. STREAM_LEXLOOKUP,
  29. STREAM_POSPOSSIBILITIES,
  30. STREAM_MORPHOLOGY,
  31. STREAM_LASTTYPE
  32. } STREAM_TYPE;
  33. #ifdef _DEBUG
  34. //--- This struct is used to log units...
  35. #pragma pack (1)
  36. typedef struct
  37. {
  38. char name[8];
  39. long phonID;
  40. long unitID;
  41. long cSamples;
  42. float time;
  43. long cEpochs;
  44. long knots;
  45. long flags;
  46. long ctrlFlags;
  47. float pTime[KNOTS_PER_PHON];
  48. float pF0[KNOTS_PER_PHON];
  49. float pAmp[KNOTS_PER_PHON];
  50. enum SILENCE_SOURCE silenceSource;
  51. } UNIT_STREAM;
  52. #pragma pack ()
  53. //--- This struct is just used as a helper to initialize the PRONRECORD to all zeroes
  54. struct DebugPronRecord : PRONRECORD
  55. {
  56. public:
  57. DebugPronRecord() { ZeroMemory( (void*) this, sizeof( DebugPronRecord ) ); }
  58. operator =( PRONRECORD InRecord )
  59. {
  60. memcpy( this, &InRecord, sizeof( PRONRECORD ) );
  61. }
  62. };
  63. //--- This struct is used to replace the SPVCONTEXT struct for outputting to the debug streams -
  64. //--- cannot have any pointers in a struct which we will output as binary data...
  65. struct DebugContext
  66. {
  67. WCHAR Category[32];
  68. WCHAR Before[32];
  69. WCHAR After[32];
  70. public:
  71. DebugContext() { ZeroMemory( (void*) this, sizeof( DebugContext ) ); }
  72. operator =( SPVCONTEXT InContext )
  73. {
  74. if ( InContext.pCategory )
  75. {
  76. wcsncpy( Category, InContext.pCategory,
  77. wcslen(InContext.pCategory) > 31 ? 31 : wcslen(InContext.pCategory) );
  78. }
  79. if ( InContext.pBefore )
  80. {
  81. wcsncpy( Before, InContext.pBefore,
  82. wcslen(InContext.pBefore) > 31 ? 31 : wcslen(InContext.pBefore) );
  83. }
  84. if ( InContext.pAfter )
  85. {
  86. wcsncpy( After, InContext.pAfter,
  87. wcslen(InContext.pAfter) > 31 ? 31 : wcslen(InContext.pAfter) );
  88. }
  89. }
  90. };
  91. //--- This struct is used to replace the SPVSTATE struct for outputting to the debug streams -
  92. //--- cannot have any pointers in a struct which we will output as binary data...
  93. struct DebugState
  94. {
  95. SPVACTIONS eAction;
  96. LANGID LangID;
  97. WORD wReserved;
  98. long EmphAdj;
  99. long RateAdj;
  100. ULONG Volume;
  101. SPVPITCH PitchAdj;
  102. ULONG SilenceMSecs;
  103. SPPHONEID PhoneIds[64];
  104. ENGPARTOFSPEECH ePartOfSpeech;
  105. DebugContext Context;
  106. public:
  107. DebugState() { ZeroMemory( (void*) this, sizeof( DebugState ) ); }
  108. operator =( SPVSTATE InState )
  109. {
  110. eAction = InState.eAction;
  111. LangID = InState.LangID;
  112. wReserved = InState.wReserved;
  113. EmphAdj = InState.EmphAdj;
  114. RateAdj = InState.RateAdj;
  115. Volume = InState.Volume;
  116. PitchAdj = InState.PitchAdj;
  117. SilenceMSecs = InState.SilenceMSecs;
  118. ePartOfSpeech = (ENGPARTOFSPEECH)InState.ePartOfSpeech;
  119. Context = InState.Context;
  120. if ( InState.pPhoneIds )
  121. {
  122. wcsncpy( PhoneIds, InState.pPhoneIds,
  123. wcslen(InState.pPhoneIds) > 63 ? 63 : wcslen(InState.pPhoneIds) );
  124. }
  125. }
  126. };
  127. //--- This struct is used to replace the TTSWord struct for outputting to the debug streams -
  128. //--- cannot have any pointers in a struct which we will output as binary data...
  129. struct DebugWord
  130. {
  131. DebugState XmlState;
  132. WCHAR WordText[32];
  133. ULONG ulWordLen;
  134. WCHAR LemmaText[32];
  135. ULONG ulLemmaLen;
  136. SPPHONEID WordPron[64];
  137. ENGPARTOFSPEECH eWordPartOfSpeech;
  138. public:
  139. DebugWord() { ZeroMemory( (void*) this, sizeof( DebugWord ) ); }
  140. operator =( TTSWord InWord )
  141. {
  142. XmlState = *(InWord.pXmlState);
  143. if ( InWord.pWordText )
  144. {
  145. wcsncpy( WordText, InWord.pWordText, InWord.ulWordLen > 31 ? 31 : InWord.ulWordLen );
  146. }
  147. ulWordLen = InWord.ulWordLen;
  148. if ( InWord.pLemma )
  149. {
  150. wcsncpy( LemmaText, InWord.pLemma, InWord.ulLemmaLen > 31 ? 31 : InWord.ulLemmaLen );
  151. }
  152. ulLemmaLen = InWord.ulLemmaLen;
  153. if ( InWord.pWordPron )
  154. {
  155. wcsncpy( WordPron, InWord.pWordPron,
  156. wcslen( InWord.pWordPron ) > 63 ? 63 : wcslen( InWord.pWordPron ) );
  157. }
  158. eWordPartOfSpeech = InWord.eWordPartOfSpeech;
  159. }
  160. };
  161. struct DebugItemInfo
  162. {
  163. TTSItemType Type;
  164. public:
  165. DebugItemInfo() { ZeroMemory( (void*) this, sizeof( DebugItemInfo ) ); }
  166. operator =( TTSItemInfo InItemInfo )
  167. {
  168. Type = InItemInfo.Type;
  169. }
  170. };
  171. //--- This struct is used to replace the TTSSentItem struct for outputting to the debug streams -
  172. //--- cannot have any pointers in a struct which we will output as binary data...
  173. struct DebugSentItem
  174. {
  175. WCHAR ItemSrcText[32];
  176. ULONG ulItemSrcLen;
  177. ULONG ulItemSrcOffset;
  178. DebugWord Words[32];
  179. ULONG ulNumWords;
  180. ENGPARTOFSPEECH eItemPartOfSpeech;
  181. DebugItemInfo ItemInfo;
  182. public:
  183. DebugSentItem() { ZeroMemory( (void*) this, sizeof( DebugSentItem ) ); }
  184. operator =( TTSSentItem InItem )
  185. {
  186. if ( InItem.pItemSrcText )
  187. {
  188. wcsncpy( ItemSrcText, InItem.pItemSrcText, InItem.ulItemSrcLen > 31 ? 31 : InItem.ulItemSrcLen );
  189. }
  190. ulItemSrcLen = InItem.ulItemSrcLen;
  191. ulItemSrcOffset = InItem.ulItemSrcOffset;
  192. for ( ULONG i = 0; i < InItem.ulNumWords && i < 32; i++ )
  193. {
  194. Words[i] = InItem.Words[i];
  195. }
  196. ulNumWords = InItem.ulNumWords;
  197. eItemPartOfSpeech = InItem.eItemPartOfSpeech;
  198. if ( InItem.pItemInfo )
  199. {
  200. ItemInfo = *(InItem.pItemInfo);
  201. }
  202. }
  203. };
  204. //--- This enumeration should correspond to the previous one, and is used to name the array of IStreams
  205. //--- used to write stuff to the debug file
  206. static const SPLSTR StreamTypeStrings[] =
  207. {
  208. DEF_SPLSTR( "Wave" ),
  209. DEF_SPLSTR( "Epoch" ),
  210. DEF_SPLSTR( "Unit" ),
  211. DEF_SPLSTR( "WaveInfo" ),
  212. DEF_SPLSTR( "ToBI" ),
  213. DEF_SPLSTR( "SentenceBreaks" ),
  214. DEF_SPLSTR( "NormalizedText" ),
  215. DEF_SPLSTR( "LexLookup" ),
  216. DEF_SPLSTR( "PosPossibilities" ),
  217. DEF_SPLSTR( "Morphology" ),
  218. };
  219. //--- This const is just the storage mode with which the debug file and its associated streams are opened
  220. static const DWORD STORAGE_MODE = ( STGM_CREATE | STGM_READWRITE | STGM_SHARE_EXCLUSIVE );
  221. #define TEXT_LEN_MAX 20
  222. //--- This struct is used to keep track of pitch information for outputting to the debug streams
  223. struct PITCH_TARGET
  224. {
  225. float time;
  226. float level;
  227. enum TOBI_ACCENT accent;
  228. //--- Diagnostic
  229. enum ACCENT_SOURCE accentSource;
  230. enum BOUNDARY_SOURCE boundarySource;
  231. char textStr[TEXT_LEN_MAX];
  232. };
  233. //--- This class implements most of the functionality required for TTS Debugging Support
  234. class CTTSDebug
  235. {
  236. public:
  237. //=== Interface Functions ===//
  238. //--- Constructor - just sets all member variables to NULL
  239. CTTSDebug()
  240. {
  241. m_pDebugFile = NULL;
  242. for ( int i = 0; i < STREAM_LASTTYPE; i++ )
  243. {
  244. m_pDebugStreams[i] = NULL;
  245. }
  246. m_fInitialized = false;
  247. }
  248. //--- Destructor - just closes the file
  249. ~CTTSDebug()
  250. {
  251. CloseDebugFile();
  252. }
  253. //--- OpenDebugFile - opens a file (path is obtained from the Key DebugFile in the voices registry
  254. //--- entry) and associated streams...
  255. void OpenDebugFile( WCHAR *pFileName )
  256. {
  257. HRESULT hr = S_OK;
  258. hr = StgCreateDocfile( pFileName, STORAGE_MODE, 0, &m_pDebugFile );
  259. if ( SUCCEEDED( hr ) )
  260. {
  261. for ( int i = 0; SUCCEEDED( hr ) && i < STREAM_LASTTYPE; i++ )
  262. {
  263. hr = m_pDebugFile->CreateStream( StreamTypeStrings[i].pStr, STORAGE_MODE, 0, 0, &m_pDebugStreams[i] );
  264. }
  265. }
  266. if ( FAILED( hr ) )
  267. {
  268. CloseDebugFile();
  269. }
  270. else
  271. {
  272. m_fInitialized = true;
  273. }
  274. }
  275. //--- CloseDebugFile - just closes the file and streams opened by OpenDebugFile
  276. void CloseDebugFile( void )
  277. {
  278. if ( m_pDebugFile )
  279. {
  280. for ( int i = 0; i < STREAM_LASTTYPE; i++ )
  281. {
  282. if ( m_pDebugStreams[i] )
  283. {
  284. m_pDebugStreams[i]->Release();
  285. m_pDebugStreams[i] = NULL;
  286. }
  287. }
  288. m_pDebugFile->Release();
  289. m_pDebugFile = NULL;
  290. m_fInitialized = false;
  291. }
  292. }
  293. //--- AppendToStream - writes data to the Stream specified by Type
  294. void AppendToStream( STREAM_TYPE Type, void *pData, ULONG cBytes )
  295. {
  296. HRESULT hr = S_OK;
  297. hr = m_pDebugStreams[Type]->Write( pData, cBytes, NULL );
  298. }
  299. //--- AddPitchToList - keeps track of pitch targets which will later be output to a debug stream
  300. void AddPitchToList( float time,
  301. float level,
  302. TOBI_ACCENT accent,
  303. ACCENT_SOURCE accentSource,
  304. BOUNDARY_SOURCE boundarySource,
  305. char *pTextStr)
  306. {
  307. PITCH_TARGET *pNewPitch, *pNextPitch;
  308. SPLISTPOS curPosition, nextPosition;
  309. pNewPitch = new PITCH_TARGET;
  310. if( pNewPitch )
  311. {
  312. pNewPitch->time = time;
  313. pNewPitch->level = level;
  314. pNewPitch->accent = accent;
  315. if( pTextStr )
  316. {
  317. strcpy( pNewPitch->textStr, pTextStr );
  318. }
  319. else
  320. {
  321. // No string
  322. pNewPitch->textStr[0] = 0;
  323. }
  324. pNewPitch->accentSource = accentSource;
  325. pNewPitch->boundarySource = boundarySource;
  326. if( PitchTargetList.IsEmpty() )
  327. {
  328. PitchTargetList.AddHead( pNewPitch );
  329. }
  330. else
  331. {
  332. nextPosition = PitchTargetList.GetHeadPosition();
  333. while( nextPosition )
  334. {
  335. curPosition = nextPosition;
  336. pNextPitch = (PITCH_TARGET*)PitchTargetList.GetNext( nextPosition );
  337. if( time < pNextPitch->time )
  338. {
  339. PitchTargetList.InsertBefore( curPosition, pNewPitch );
  340. break;
  341. }
  342. if( nextPosition == NULL )
  343. {
  344. PitchTargetList.AddTail( pNewPitch );
  345. break;
  346. }
  347. }
  348. }
  349. }
  350. }
  351. //--- DeletePitchList - Cleans up pitch target list after it has been output to a debug stream
  352. void DeletePitchList()
  353. {
  354. PITCH_TARGET *pTarget;
  355. while ( !PitchTargetList.IsEmpty() )
  356. {
  357. pTarget = (PITCH_TARGET*)PitchTargetList.RemoveHead();
  358. delete pTarget;
  359. }
  360. }
  361. //--- IsInitialized - Just returns true or false based on whether OpenDebugFile has been called
  362. //--- and has succeeded...
  363. bool IsInitialized() { return m_fInitialized; }
  364. //=== Member Variables ===//
  365. private:
  366. IStorage *m_pDebugFile;
  367. IStream *m_pDebugStreams[STREAM_LASTTYPE];
  368. bool m_fInitialized;
  369. public:
  370. CSPList<PITCH_TARGET*,PITCH_TARGET*> PitchTargetList;
  371. };
  372. inline CTTSDebug *pTTSDebug()
  373. {
  374. static CTTSDebug debug;
  375. return &debug;
  376. }
  377. #define TTSDBG_OPENFILE \
  378. do \
  379. { \
  380. CSpDynamicString dstrTemp; \
  381. if ( SUCCEEDED( m_cpToken->GetStringValue( L"DebugFile", &dstrTemp) ) ) \
  382. { \
  383. pTTSDebug()->OpenDebugFile( dstrTemp ); \
  384. } \
  385. } \
  386. while (0)
  387. #define TTSDBG_CLOSEFILE \
  388. pTTSDebug()->CloseDebugFile()
  389. #define TTSDBG_LOGITEMLIST( ItemList, Stream ) \
  390. do \
  391. { \
  392. if ( pTTSDebug()->IsInitialized() ) \
  393. { \
  394. SPLISTPOS ListPos = ItemList.GetHeadPosition(); \
  395. DebugSentItem Item; \
  396. pTTSDebug()->AppendToStream( Stream, (void*) &Item, sizeof( Item ) ); \
  397. while ( ListPos ) \
  398. { \
  399. ZeroMemory( &Item, sizeof( Item ) ); \
  400. Item = ItemList.GetNext( ListPos ); \
  401. pTTSDebug()->AppendToStream( Stream, (void*) &Item, sizeof( Item ) ); \
  402. } \
  403. pItemEnum->Reset(); \
  404. } \
  405. } \
  406. while (0)
  407. #define TTSDBG_LOGPOSPOSSIBILITIES( pProns, ulNumWords, Stream ) \
  408. do \
  409. { \
  410. if ( pTTSDebug()->IsInitialized() ) \
  411. { \
  412. ULONG ulIndex = 0; \
  413. DebugPronRecord dbgRecord; \
  414. pTTSDebug()->AppendToStream( Stream, (void*) &dbgRecord, \
  415. sizeof( DebugPronRecord ) ); \
  416. while ( ulIndex < ulNumWords ) \
  417. { \
  418. dbgRecord = pProns[ulIndex]; \
  419. pTTSDebug()->AppendToStream( Stream, (void*) &dbgRecord, \
  420. sizeof( DebugPronRecord ) ); \
  421. ulIndex++; \
  422. } \
  423. } \
  424. } \
  425. while (0)
  426. #define TTSDBG_LOGMORPHOLOGY( pwRoot, SuffixList, Stream ) \
  427. do \
  428. { \
  429. if ( pTTSDebug()->IsInitialized() ) \
  430. { \
  431. pTTSDebug()->AppendToStream( Stream, (void*) pwRoot, \
  432. SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  433. SPLISTPOS ListPos = SuffixList.GetHeadPosition(); \
  434. SUFFIXPRON_INFO* pSuffixPron; \
  435. while ( ListPos ) \
  436. { \
  437. pSuffixPron = SuffixList.GetNext( ListPos ); \
  438. pTTSDebug()->AppendToStream( Stream, (void*) pSuffixPron->SuffixString, \
  439. SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  440. } \
  441. WCHAR Delimiter[SP_MAX_WORD_LENGTH]; \
  442. ZeroMemory( Delimiter, SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  443. pTTSDebug()->AppendToStream( Stream, (void*) Delimiter, \
  444. SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  445. } \
  446. } \
  447. while (0)
  448. #define TTSDBG_LOGWAVE \
  449. do \
  450. { \
  451. if ( pTTSDebug()->IsInitialized() ) \
  452. { \
  453. if ( m_SpeechState == SPEECH_CONTINUE ) \
  454. { \
  455. pTTSDebug()->AppendToStream( STREAM_WAVE, (void*)m_pSpeechBuf, \
  456. m_cOutSamples_Frame * m_BytesPerSample ); \
  457. } \
  458. } \
  459. } \
  460. while (0)
  461. #define TTSDBG_ADDPITCHTARGET( time, level, accent) \
  462. do \
  463. { \
  464. if ( pTTSDebug()->IsInitialized() ) \
  465. { \
  466. pTTSDebug()->AddPitchToList( time, level, accent, m_CurAccentSource, m_CurBoundarySource, m_pCurTextStr ); \
  467. } \
  468. } \
  469. while (0)
  470. #define TTSDBG_LOGTOBI \
  471. do \
  472. { \
  473. if ( pTTSDebug()->IsInitialized() ) \
  474. { \
  475. SPLISTPOS curPosition; \
  476. PITCH_TARGET *pPitch; \
  477. curPosition = pTTSDebug()->PitchTargetList.GetHeadPosition(); \
  478. while( curPosition ) \
  479. { \
  480. pPitch = (PITCH_TARGET*)pTTSDebug()->PitchTargetList.GetNext(curPosition); \
  481. pTTSDebug()->AppendToStream( STREAM_TOBI, (void*)pPitch, \
  482. sizeof(PITCH_TARGET) ); \
  483. } \
  484. pTTSDebug()->DeletePitchList(); \
  485. } \
  486. } \
  487. while (0)
  488. #define TTSDBG_LOGSILEPOCH \
  489. do \
  490. { \
  491. float fEpoch; \
  492. \
  493. if( pTTSDebug()->IsInitialized() ) \
  494. { \
  495. if( m_silMode ) \
  496. { \
  497. fEpoch = (float)m_durationTarget; \
  498. pTTSDebug()->AppendToStream( STREAM_EPOCH, (void*)&fEpoch, sizeof(float) ); \
  499. } \
  500. } \
  501. } \
  502. while (0)
  503. #define TTSDBG_LOGEPOCHS \
  504. do \
  505. { \
  506. if( pTTSDebug()->IsInitialized() ) \
  507. { \
  508. float fEpoch; \
  509. \
  510. if( OutSize > 1 ) \
  511. { \
  512. fEpoch = (float)OutSize; \
  513. pTTSDebug()->AppendToStream( STREAM_EPOCH, (void*)&fEpoch, sizeof(float) ); \
  514. } \
  515. } \
  516. } \
  517. while (0)
  518. #define TTSDBG_LOGUNITS \
  519. do \
  520. { \
  521. if ( pTTSDebug()->IsInitialized() ) \
  522. { \
  523. UNIT_STREAM us; \
  524. \
  525. us.phonID = pCurUnit->PhonID; \
  526. us.unitID = pCurUnit->UnitID; \
  527. us.flags = pCurUnit->flags; \
  528. us.ctrlFlags = pCurUnit->ctrlFlags; \
  529. us.cEpochs = 1; \
  530. us.cSamples = m_durationTarget; \
  531. us.time = (float)m_cOutSamples_Total / m_SampleRate; \
  532. us.knots = KNOTS_PER_PHON; \
  533. for( i = 0; i < KNOTS_PER_PHON; i++ ) \
  534. { \
  535. us.pTime[i] = pCurUnit->pTime[i]; \
  536. us.pF0[i] = pCurUnit->pF0[i]; \
  537. us.pAmp[i] = pCurUnit->pAmp[i]; \
  538. } \
  539. strcpy( us.name, pCurUnit->szUnitName ); \
  540. us.silenceSource = pCurUnit->silenceSource; \
  541. pTTSDebug()->AppendToStream( STREAM_UNIT, (void*)&us, sizeof(UNIT_STREAM) ); \
  542. } \
  543. } \
  544. while (0)
  545. #else // _DEBUG
  546. #define TTSDBG_OPENFILE
  547. #define TTSDBG_CLOSEFILE
  548. #define TTSDBG_LOGITEMLIST(ItemList, Stream)
  549. #define TTSDBG_LOGWAVE
  550. #define TTSDBG_ADDPITCHTARGET(time, level, accent)
  551. #define TTSDBG_LOGTOBI
  552. #define TTSDBG_LOGEPOCHS
  553. #define TTSDBG_LOGSILEPOCH
  554. #define TTSDBG_LOGUNITS
  555. #define TTSDBG_LOGPOSPOSSIBILITIES( pProns, ulNumWords, Stream )
  556. #define TTSDBG_LOGMORPHOLOGY( pwRoot, SuffixList, Stream )
  557. #endif // _DEBUG
  558. #endif // spttsengdebug_h