Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

626 lines
26 KiB

  1. /*******************************************************************************
  2. * SpTtsEngDebug.h *
  3. *-----------------*
  4. * Description:
  5. * This header file contains debug output services for the TTS Engine
  6. *-------------------------------------------------------------------------------
  7. * Copyright (C) 1998-2000 Microsoft Corporation
  8. * All Rights Reserved
  9. *
  10. *-------------------------------------------------------------------------------
  11. * Revisions:
  12. *
  13. ********************************************************************* AKH ******/
  14. #ifndef spttsengdebug_h
  15. #define spttsengdebug_h
  16. #include "stdsentenum.h"
  17. #include "feedchain.h"
  18. //--- This enumeration is used to index the array of IStreams used to write stuff to the debug file
  19. typedef enum
  20. {
  21. STREAM_WAVE = 0,
  22. STREAM_EPOCH,
  23. STREAM_UNIT,
  24. STREAM_WAVEINFO,
  25. STREAM_TOBI,
  26. STREAM_SENTENCEBREAKS,
  27. STREAM_NORMALIZEDTEXT,
  28. STREAM_LEXLOOKUP,
  29. STREAM_POSPOSSIBILITIES,
  30. STREAM_MORPHOLOGY,
  31. STREAM_LASTTYPE
  32. } STREAM_TYPE;
  33. #ifdef _DEBUG
  34. //--- This struct is used to log units...
  35. #pragma pack (1)
  36. typedef struct
  37. {
  38. char name[8];
  39. long phonID;
  40. long unitID;
  41. long cSamples;
  42. float time;
  43. long cEpochs;
  44. long knots;
  45. long flags;
  46. long ctrlFlags;
  47. float pTime[KNOTS_PER_PHON];
  48. float pF0[KNOTS_PER_PHON];
  49. float pAmp[KNOTS_PER_PHON];
  50. enum SILENCE_SOURCE silenceSource;
  51. } UNIT_STREAM;
  52. #pragma pack ()
  53. //--- This struct is just used as a helper to initialize the PRONRECORD to all zeroes
  54. struct DebugPronRecord : PRONRECORD
  55. {
  56. public:
  57. DebugPronRecord() { ZeroMemory( (void*) this, sizeof( DebugPronRecord ) ); }
  58. DebugPronRecord& operator =( PRONRECORD InRecord )
  59. {
  60. memcpy( this, &InRecord, sizeof( PRONRECORD ) );
  61. return (*this);
  62. }
  63. };
  64. //--- This struct is used to replace the SPVCONTEXT struct for outputting to the debug streams -
  65. //--- cannot have any pointers in a struct which we will output as binary data...
  66. struct DebugContext
  67. {
  68. WCHAR Category[32];
  69. WCHAR Before[32];
  70. WCHAR After[32];
  71. public:
  72. DebugContext() { ZeroMemory( (void*) this, sizeof( DebugContext ) ); }
  73. DebugContext& operator =( SPVCONTEXT InContext )
  74. {
  75. if ( InContext.pCategory )
  76. {
  77. wcsncpy( Category, InContext.pCategory,
  78. wcslen(InContext.pCategory) > 31 ? 31 : wcslen(InContext.pCategory) );
  79. }
  80. if ( InContext.pBefore )
  81. {
  82. wcsncpy( Before, InContext.pBefore,
  83. wcslen(InContext.pBefore) > 31 ? 31 : wcslen(InContext.pBefore) );
  84. }
  85. if ( InContext.pAfter )
  86. {
  87. wcsncpy( After, InContext.pAfter,
  88. wcslen(InContext.pAfter) > 31 ? 31 : wcslen(InContext.pAfter) );
  89. }
  90. return (*this);
  91. }
  92. };
  93. //--- This struct is used to replace the SPVSTATE struct for outputting to the debug streams -
  94. //--- cannot have any pointers in a struct which we will output as binary data...
  95. struct DebugState
  96. {
  97. SPVACTIONS eAction;
  98. LANGID LangID;
  99. WORD wReserved;
  100. long EmphAdj;
  101. long RateAdj;
  102. ULONG Volume;
  103. SPVPITCH PitchAdj;
  104. ULONG SilenceMSecs;
  105. SPPHONEID PhoneIds[64];
  106. ENGPARTOFSPEECH ePartOfSpeech;
  107. DebugContext Context;
  108. public:
  109. DebugState() { ZeroMemory( (void*) this, sizeof( DebugState ) ); }
  110. DebugState& operator =( SPVSTATE InState )
  111. {
  112. eAction = InState.eAction;
  113. LangID = InState.LangID;
  114. wReserved = InState.wReserved;
  115. EmphAdj = InState.EmphAdj;
  116. RateAdj = InState.RateAdj;
  117. Volume = InState.Volume;
  118. PitchAdj = InState.PitchAdj;
  119. SilenceMSecs = InState.SilenceMSecs;
  120. ePartOfSpeech = (ENGPARTOFSPEECH)InState.ePartOfSpeech;
  121. Context = InState.Context;
  122. if ( InState.pPhoneIds )
  123. {
  124. wcsncpy( PhoneIds, InState.pPhoneIds,
  125. wcslen(InState.pPhoneIds) > 63 ? 63 : wcslen(InState.pPhoneIds) );
  126. }
  127. return (*this);
  128. }
  129. };
  130. //--- This struct is used to replace the TTSWord struct for outputting to the debug streams -
  131. //--- cannot have any pointers in a struct which we will output as binary data...
  132. struct DebugWord
  133. {
  134. DebugState XmlState;
  135. WCHAR WordText[32];
  136. ULONG ulWordLen;
  137. WCHAR LemmaText[32];
  138. ULONG ulLemmaLen;
  139. SPPHONEID WordPron[64];
  140. ENGPARTOFSPEECH eWordPartOfSpeech;
  141. public:
  142. DebugWord() { ZeroMemory( (void*) this, sizeof( DebugWord ) ); }
  143. DebugWord& operator =( TTSWord InWord )
  144. {
  145. XmlState = *(InWord.pXmlState);
  146. if ( InWord.pWordText )
  147. {
  148. wcsncpy( WordText, InWord.pWordText, InWord.ulWordLen > 31 ? 31 : InWord.ulWordLen );
  149. }
  150. ulWordLen = InWord.ulWordLen;
  151. if ( InWord.pLemma )
  152. {
  153. wcsncpy( LemmaText, InWord.pLemma, InWord.ulLemmaLen > 31 ? 31 : InWord.ulLemmaLen );
  154. }
  155. ulLemmaLen = InWord.ulLemmaLen;
  156. if ( InWord.pWordPron )
  157. {
  158. wcsncpy( WordPron, InWord.pWordPron,
  159. wcslen( InWord.pWordPron ) > 63 ? 63 : wcslen( InWord.pWordPron ) );
  160. }
  161. eWordPartOfSpeech = InWord.eWordPartOfSpeech;
  162. return (*this);
  163. }
  164. };
  165. struct DebugItemInfo
  166. {
  167. TTSItemType Type;
  168. public:
  169. DebugItemInfo() { ZeroMemory( (void*) this, sizeof( DebugItemInfo ) ); }
  170. DebugItemInfo& operator =( TTSItemInfo InItemInfo )
  171. {
  172. Type = InItemInfo.Type;
  173. return (*this);
  174. }
  175. };
  176. //--- This struct is used to replace the TTSSentItem struct for outputting to the debug streams -
  177. //--- cannot have any pointers in a struct which we will output as binary data...
  178. struct DebugSentItem
  179. {
  180. WCHAR ItemSrcText[32];
  181. ULONG ulItemSrcLen;
  182. ULONG ulItemSrcOffset;
  183. DebugWord Words[32];
  184. ULONG ulNumWords;
  185. ENGPARTOFSPEECH eItemPartOfSpeech;
  186. DebugItemInfo ItemInfo;
  187. public:
  188. DebugSentItem() { ZeroMemory( (void*) this, sizeof( DebugSentItem ) ); }
  189. DebugSentItem& operator =( TTSSentItem InItem )
  190. {
  191. if ( InItem.pItemSrcText )
  192. {
  193. wcsncpy( ItemSrcText, InItem.pItemSrcText, InItem.ulItemSrcLen > 31 ? 31 : InItem.ulItemSrcLen );
  194. }
  195. ulItemSrcLen = InItem.ulItemSrcLen;
  196. ulItemSrcOffset = InItem.ulItemSrcOffset;
  197. for ( ULONG i = 0; i < InItem.ulNumWords && i < 32; i++ )
  198. {
  199. Words[i] = InItem.Words[i];
  200. }
  201. ulNumWords = InItem.ulNumWords;
  202. eItemPartOfSpeech = InItem.eItemPartOfSpeech;
  203. if ( InItem.pItemInfo )
  204. {
  205. ItemInfo = *(InItem.pItemInfo);
  206. }
  207. return (*this);
  208. }
  209. };
  210. //--- This enumeration should correspond to the previous one, and is used to name the array of IStreams
  211. //--- used to write stuff to the debug file
  212. static const SPLSTR StreamTypeStrings[] =
  213. {
  214. DEF_SPLSTR( "Wave" ),
  215. DEF_SPLSTR( "Epoch" ),
  216. DEF_SPLSTR( "Unit" ),
  217. DEF_SPLSTR( "WaveInfo" ),
  218. DEF_SPLSTR( "ToBI" ),
  219. DEF_SPLSTR( "SentenceBreaks" ),
  220. DEF_SPLSTR( "NormalizedText" ),
  221. DEF_SPLSTR( "LexLookup" ),
  222. DEF_SPLSTR( "PosPossibilities" ),
  223. DEF_SPLSTR( "Morphology" ),
  224. };
  225. //--- This const is just the storage mode with which the debug file and its associated streams are opened
  226. static const DWORD STORAGE_MODE = ( STGM_CREATE | STGM_READWRITE | STGM_SHARE_EXCLUSIVE );
  227. #define TEXT_LEN_MAX 20
  228. //--- This struct is used to keep track of pitch information for outputting to the debug streams
  229. struct PITCH_TARGET
  230. {
  231. float time;
  232. float level;
  233. enum TOBI_ACCENT accent;
  234. //--- Diagnostic
  235. enum ACCENT_SOURCE accentSource;
  236. enum BOUNDARY_SOURCE boundarySource;
  237. char textStr[TEXT_LEN_MAX];
  238. };
  239. //--- This class implements most of the functionality required for TTS Debugging Support
  240. class CTTSDebug
  241. {
  242. public:
  243. //=== Interface Functions ===//
  244. //--- Constructor - just sets all member variables to NULL
  245. CTTSDebug()
  246. {
  247. m_pDebugFile = NULL;
  248. for ( int i = 0; i < STREAM_LASTTYPE; i++ )
  249. {
  250. m_pDebugStreams[i] = NULL;
  251. }
  252. m_fInitialized = false;
  253. }
  254. //--- Destructor - just closes the file
  255. ~CTTSDebug()
  256. {
  257. CloseDebugFile();
  258. }
  259. //--- OpenDebugFile - opens a file (path is obtained from the Key DebugFile in the voices registry
  260. //--- entry) and associated streams...
  261. void OpenDebugFile( WCHAR *pFileName )
  262. {
  263. HRESULT hr = S_OK;
  264. hr = StgCreateDocfile( pFileName, STORAGE_MODE, 0, &m_pDebugFile );
  265. if ( SUCCEEDED( hr ) )
  266. {
  267. for ( int i = 0; SUCCEEDED( hr ) && i < STREAM_LASTTYPE; i++ )
  268. {
  269. hr = m_pDebugFile->CreateStream( StreamTypeStrings[i].pStr, STORAGE_MODE, 0, 0, &m_pDebugStreams[i] );
  270. }
  271. }
  272. if ( FAILED( hr ) )
  273. {
  274. CloseDebugFile();
  275. }
  276. else
  277. {
  278. m_fInitialized = true;
  279. }
  280. }
  281. //--- CloseDebugFile - just closes the file and streams opened by OpenDebugFile
  282. void CloseDebugFile( void )
  283. {
  284. if ( m_pDebugFile )
  285. {
  286. for ( int i = 0; i < STREAM_LASTTYPE; i++ )
  287. {
  288. if ( m_pDebugStreams[i] )
  289. {
  290. m_pDebugStreams[i]->Release();
  291. m_pDebugStreams[i] = NULL;
  292. }
  293. }
  294. m_pDebugFile->Release();
  295. m_pDebugFile = NULL;
  296. m_fInitialized = false;
  297. }
  298. }
  299. //--- AppendToStream - writes data to the Stream specified by Type
  300. void AppendToStream( STREAM_TYPE Type, void *pData, ULONG cBytes )
  301. {
  302. HRESULT hr = S_OK;
  303. hr = m_pDebugStreams[Type]->Write( pData, cBytes, NULL );
  304. }
  305. //--- AddPitchToList - keeps track of pitch targets which will later be output to a debug stream
  306. void AddPitchToList( float time,
  307. float level,
  308. TOBI_ACCENT accent,
  309. ACCENT_SOURCE accentSource,
  310. BOUNDARY_SOURCE boundarySource,
  311. char *pTextStr)
  312. {
  313. PITCH_TARGET *pNewPitch, *pNextPitch;
  314. SPLISTPOS curPosition, nextPosition;
  315. pNewPitch = new PITCH_TARGET;
  316. if( pNewPitch )
  317. {
  318. pNewPitch->time = time;
  319. pNewPitch->level = level;
  320. pNewPitch->accent = accent;
  321. if( pTextStr )
  322. {
  323. strcpy( pNewPitch->textStr, pTextStr );
  324. }
  325. else
  326. {
  327. // No string
  328. pNewPitch->textStr[0] = 0;
  329. }
  330. pNewPitch->accentSource = accentSource;
  331. pNewPitch->boundarySource = boundarySource;
  332. if( PitchTargetList.IsEmpty() )
  333. {
  334. PitchTargetList.AddHead( pNewPitch );
  335. }
  336. else
  337. {
  338. nextPosition = PitchTargetList.GetHeadPosition();
  339. while( nextPosition )
  340. {
  341. curPosition = nextPosition;
  342. pNextPitch = (PITCH_TARGET*)PitchTargetList.GetNext( nextPosition );
  343. if( time < pNextPitch->time )
  344. {
  345. PitchTargetList.InsertBefore( curPosition, pNewPitch );
  346. break;
  347. }
  348. if( nextPosition == NULL )
  349. {
  350. PitchTargetList.AddTail( pNewPitch );
  351. break;
  352. }
  353. }
  354. }
  355. }
  356. }
  357. //--- DeletePitchList - Cleans up pitch target list after it has been output to a debug stream
  358. void DeletePitchList()
  359. {
  360. PITCH_TARGET *pTarget;
  361. while ( !PitchTargetList.IsEmpty() )
  362. {
  363. pTarget = (PITCH_TARGET*)PitchTargetList.RemoveHead();
  364. delete pTarget;
  365. }
  366. }
  367. //--- IsInitialized - Just returns true or false based on whether OpenDebugFile has been called
  368. //--- and has succeeded...
  369. bool IsInitialized() { return m_fInitialized; }
  370. //=== Member Variables ===//
  371. private:
  372. IStorage *m_pDebugFile;
  373. IStream *m_pDebugStreams[STREAM_LASTTYPE];
  374. bool m_fInitialized;
  375. public:
  376. CSPList<PITCH_TARGET*,PITCH_TARGET*> PitchTargetList;
  377. };
  378. inline CTTSDebug *pTTSDebug()
  379. {
  380. static CTTSDebug debug;
  381. return &debug;
  382. }
  383. #define TTSDBG_OPENFILE \
  384. do \
  385. { \
  386. CSpDynamicString dstrTemp; \
  387. CComPtr<ISpObjectToken> cpVoiceToken; \
  388. cpVoiceToken = ((CVoiceDataObj*)m_pVoiceDataObj)->GetVoiceToken(); \
  389. if ( SUCCEEDED( cpVoiceToken->GetStringValue( L"DebugFile", &dstrTemp) ) ) \
  390. { \
  391. pTTSDebug()->OpenDebugFile( dstrTemp ); \
  392. } \
  393. } \
  394. while (0)
  395. #define TTSDBG_CLOSEFILE \
  396. pTTSDebug()->CloseDebugFile()
  397. #define TTSDBG_LOGITEMLIST( ItemList, Stream ) \
  398. do \
  399. { \
  400. if ( pTTSDebug()->IsInitialized() ) \
  401. { \
  402. SPLISTPOS ListPos = ItemList.GetHeadPosition(); \
  403. DebugSentItem Item; \
  404. pTTSDebug()->AppendToStream( Stream, (void*) &Item, sizeof( Item ) ); \
  405. while ( ListPos ) \
  406. { \
  407. ZeroMemory( &Item, sizeof( Item ) ); \
  408. Item = ItemList.GetNext( ListPos ); \
  409. pTTSDebug()->AppendToStream( Stream, (void*) &Item, sizeof( Item ) ); \
  410. } \
  411. pItemEnum->Reset(); \
  412. } \
  413. } \
  414. while (0)
  415. #define TTSDBG_LOGPOSPOSSIBILITIES( pProns, ulNumWords, Stream ) \
  416. do \
  417. { \
  418. if ( pTTSDebug()->IsInitialized() ) \
  419. { \
  420. ULONG ulIndex = 0; \
  421. DebugPronRecord dbgRecord; \
  422. pTTSDebug()->AppendToStream( Stream, (void*) &dbgRecord, \
  423. sizeof( DebugPronRecord ) ); \
  424. while ( ulIndex < ulNumWords ) \
  425. { \
  426. dbgRecord = pProns[ulIndex]; \
  427. pTTSDebug()->AppendToStream( Stream, (void*) &dbgRecord, \
  428. sizeof( DebugPronRecord ) ); \
  429. ulIndex++; \
  430. } \
  431. } \
  432. } \
  433. while (0)
  434. #define TTSDBG_LOGMORPHOLOGY( pwRoot, SuffixList, Stream ) \
  435. do \
  436. { \
  437. if ( pTTSDebug()->IsInitialized() ) \
  438. { \
  439. pTTSDebug()->AppendToStream( Stream, (void*) pwRoot, \
  440. SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  441. SPLISTPOS ListPos = SuffixList.GetHeadPosition(); \
  442. SUFFIXPRON_INFO* pSuffixPron; \
  443. while ( ListPos ) \
  444. { \
  445. pSuffixPron = SuffixList.GetNext( ListPos ); \
  446. pTTSDebug()->AppendToStream( Stream, (void*) pSuffixPron->SuffixString, \
  447. SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  448. } \
  449. WCHAR Delimiter[SP_MAX_WORD_LENGTH]; \
  450. ZeroMemory( Delimiter, SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  451. pTTSDebug()->AppendToStream( Stream, (void*) Delimiter, \
  452. SP_MAX_WORD_LENGTH * sizeof( WCHAR ) ); \
  453. } \
  454. } \
  455. while (0)
  456. #define TTSDBG_LOGWAVE \
  457. do \
  458. { \
  459. if ( pTTSDebug()->IsInitialized() ) \
  460. { \
  461. if ( m_SpeechState == SPEECH_CONTINUE ) \
  462. { \
  463. pTTSDebug()->AppendToStream( STREAM_WAVE, (void*)m_pSpeechBuf, \
  464. m_cOutSamples_Frame * m_BytesPerSample ); \
  465. } \
  466. } \
  467. } \
  468. while (0)
  469. #define TTSDBG_ADDPITCHTARGET( time, level, accent) \
  470. do \
  471. { \
  472. if ( pTTSDebug()->IsInitialized() ) \
  473. { \
  474. pTTSDebug()->AddPitchToList( time, level, accent, m_CurAccentSource, m_CurBoundarySource, m_pCurTextStr ); \
  475. } \
  476. } \
  477. while (0)
  478. #define TTSDBG_LOGTOBI \
  479. do \
  480. { \
  481. if ( pTTSDebug()->IsInitialized() ) \
  482. { \
  483. SPLISTPOS curPosition; \
  484. PITCH_TARGET *pPitch; \
  485. curPosition = pTTSDebug()->PitchTargetList.GetHeadPosition(); \
  486. while( curPosition ) \
  487. { \
  488. pPitch = (PITCH_TARGET*)pTTSDebug()->PitchTargetList.GetNext(curPosition); \
  489. pTTSDebug()->AppendToStream( STREAM_TOBI, (void*)pPitch, \
  490. sizeof(PITCH_TARGET) ); \
  491. } \
  492. pTTSDebug()->DeletePitchList(); \
  493. } \
  494. } \
  495. while (0)
  496. #define TTSDBG_LOGSILEPOCH \
  497. do \
  498. { \
  499. float fEpoch; \
  500. \
  501. if( pTTSDebug()->IsInitialized() ) \
  502. { \
  503. if( m_silMode ) \
  504. { \
  505. fEpoch = (float)m_durationTarget; \
  506. pTTSDebug()->AppendToStream( STREAM_EPOCH, (void*)&fEpoch, sizeof(float) ); \
  507. } \
  508. } \
  509. } \
  510. while (0)
  511. #define TTSDBG_LOGEPOCHS \
  512. do \
  513. { \
  514. if( pTTSDebug()->IsInitialized() ) \
  515. { \
  516. float fEpoch; \
  517. \
  518. if( OutSize > 1 ) \
  519. { \
  520. fEpoch = (float)OutSize; \
  521. pTTSDebug()->AppendToStream( STREAM_EPOCH, (void*)&fEpoch, sizeof(float) ); \
  522. } \
  523. } \
  524. } \
  525. while (0)
  526. #define TTSDBG_LOGUNITS \
  527. do \
  528. { \
  529. if ( pTTSDebug()->IsInitialized() ) \
  530. { \
  531. UNIT_STREAM us; \
  532. \
  533. us.phonID = pCurUnit->PhonID; \
  534. us.unitID = pCurUnit->UnitID; \
  535. us.flags = pCurUnit->flags; \
  536. us.ctrlFlags = pCurUnit->ctrlFlags; \
  537. us.cEpochs = 1; \
  538. us.cSamples = m_durationTarget; \
  539. us.time = (float)m_cOutSamples_Total / m_SampleRate; \
  540. us.knots = KNOTS_PER_PHON; \
  541. for( i = 0; i < KNOTS_PER_PHON; i++ ) \
  542. { \
  543. us.pTime[i] = pCurUnit->pTime[i]; \
  544. us.pF0[i] = pCurUnit->pF0[i]; \
  545. us.pAmp[i] = pCurUnit->pAmp[i]; \
  546. } \
  547. strcpy( us.name, pCurUnit->szUnitName ); \
  548. us.silenceSource = pCurUnit->silenceSource; \
  549. pTTSDebug()->AppendToStream( STREAM_UNIT, (void*)&us, sizeof(UNIT_STREAM) ); \
  550. } \
  551. } \
  552. while (0)
  553. #else // _DEBUG
  554. #define TTSDBG_OPENFILE
  555. #define TTSDBG_CLOSEFILE
  556. #define TTSDBG_LOGITEMLIST(ItemList, Stream)
  557. #define TTSDBG_LOGWAVE
  558. #define TTSDBG_ADDPITCHTARGET(time, level, accent)
  559. #define TTSDBG_LOGTOBI
  560. #define TTSDBG_LOGEPOCHS
  561. #define TTSDBG_LOGSILEPOCH
  562. #define TTSDBG_LOGUNITS
  563. #define TTSDBG_LOGPOSPOSSIBILITIES( pProns, ulNumWords, Stream )
  564. #define TTSDBG_LOGMORPHOLOGY( pwRoot, SuffixList, Stream )
  565. #endif // _DEBUG
  566. #endif // spttsengdebug_h