Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

201 lines
6.2 KiB

  1. /******************************************************************************
  2. * Frontend.h *
  3. *------------*
  4. * This is the header file for the CFrontend implementation.
  5. *------------------------------------------------------------------------------
  6. * Copyright (C) 1999 Microsoft Corporation Date: 03/01/99
  7. * All Rights Reserved
  8. *
  9. *********************************************************************** MC ****/
  10. #ifndef Frontend_H
  11. #define Frontend_H
  12. #include "FeedChain.h"
  13. #include "AlloOps.h"
  14. #include "ms_entropicengine.h"
  15. #include "sapi.h"
  16. #include "..\truetalk\backend\backend.h"
  17. #ifdef USE_VOICEDATAOBJ
  18. #include "voicedataobj.h"
  19. #endif
  20. struct SentenceData
  21. {
  22. Phone *pPhones;
  23. ULONG ulNumPhones;
  24. float *pf0;
  25. ULONG ulNumf0;
  26. };
  27. struct EntropicPitchInfo
  28. {
  29. int BasePitch;
  30. int Range;
  31. };
  32. //static const float DISCRETE_BKPT = 0.6667f;
  33. static const float DISCRETE_BKPT = 0.3333f;
  34. //-----------------------------------------
  35. // Parse Next Sentence or Previous Sentence
  36. //-----------------------------------------
  37. enum DIRECTION
  38. {
  39. eNEXT = 0,
  40. ePREVIOUS = 1,
  41. };
  42. //------------------------------------------------------
  43. // Tag Values
  44. //------------------------------------------------------
  45. enum USER_VOLUME_VALUE
  46. {
  47. MIN_USER_VOL = 0,
  48. MAX_USER_VOL = 100,
  49. DEFAULT_USER_VOL = MAX_USER_VOL
  50. };
  51. enum USER_PITCH_VALUE
  52. {
  53. MIN_USER_PITCH = (-24),
  54. MAX_USER_PITCH = 24,
  55. DEFAULT_USER_PITCH = 0 // None
  56. };
  57. enum USER_EMPH_VALUE
  58. {
  59. MIN_USER_EMPH = (-10),
  60. MAX_USER_EMPH = 10,
  61. SAPI_USER_EMPH = 5,
  62. DEFAULT_USER_EMPH = 0 // None
  63. };
  64. //------------------------
  65. // ToBI phrasing
  66. //------------------------
  67. typedef struct
  68. {
  69. PROSODY_POS posClass;
  70. long start;
  71. long end;
  72. } TOBI_PHRASE;
  73. class CFrontend: public CFeedChain
  74. {
  75. public:
  76. //--------------------------------
  77. // Methods
  78. //--------------------------------
  79. CFrontend( );
  80. ~CFrontend( );
  81. void PrepareSpeech( IEnumSpSentence* pEnumSent, ISpTTSEngineSite* pOutputSite );
  82. #ifdef USE_VOICEDATAOBJ
  83. HRESULT Init( CVoiceData* pVoiceDataObj, CFeedChain *pSrcObj, MSVOICEINFO* pVoiceInfo,
  84. EntropicPitchInfo PitchInfo, bool fNewPhoneSet );
  85. #else
  86. HRESULT Init( void* pVoiceDataObj, CFeedChain *pSrcObj, void* pVoiceInfo,
  87. EntropicPitchInfo PitchInfo, bool fNewPhoneSet );
  88. #endif
  89. //--------------------------------
  90. // CFeedChain methods
  91. //--------------------------------
  92. virtual HRESULT NextData( void**pData, SPEECH_STATE *pSpeechState ) ;
  93. private:
  94. #ifdef USE_VOICEDATAOBJ
  95. HRESULT AlloToUnit( CAlloList *pAllos, UNITINFO *pu );
  96. #endif
  97. HRESULT ParseSentence( DIRECTION eDirection );
  98. HRESULT TokensToAllo( CFETokenList *pTokList, CAlloList *pAllo );
  99. HRESULT GetSentenceTokens( DIRECTION eDirection );
  100. void GetItemControls( const SPVSTATE* pXmlState, CFEToken* pToken );
  101. #ifdef USE_VOICEDATAOBJ
  102. void DisposeUnits( );
  103. void RecalcProsody();
  104. #endif
  105. HRESULT ToBISymbols();
  106. void DoPhrasing();
  107. void DeleteTokenList();
  108. #ifdef USE_VOICEDATAOBJ
  109. HRESULT UnitLookahead ();
  110. void AlloToUnitPitch( CAlloList *pAllos, UNITINFO *pu );
  111. #endif
  112. void UnitToAlloDur( CAlloList *pAllos, UNITINFO *pu );
  113. float CntrlToRatio( long rateControl );
  114. PROSODY_POS GetPOSClass( ENGPARTOFSPEECH sapiPOS );
  115. bool StateQuoteProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil );
  116. bool StartParenProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil );
  117. bool EndParenProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil );
  118. bool EmphSetup( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil );
  119. SPLISTPOS InsertSilenceAtTail( CFEToken *pWordTok, TTSSentItem *pSentItem, long msec );
  120. SPLISTPOS InsertSilenceAfterPos( CFEToken *pWordTok, SPLISTPOS position );
  121. SPLISTPOS InsertSilenceBeforePos( CFEToken *pWordTok, SPLISTPOS position );
  122. void DoWordAccent();
  123. void ExclamEmph();
  124. void ProsodyTemplates( SPLISTPOS clusterPos, TTSSentItem *pSentItem );
  125. long DoIntegerTemplate( SPLISTPOS *pClusterPos, TTSNumberItemInfo *pNInfo, long cWordCount );
  126. void DoNumByNumTemplate( SPLISTPOS *pClusterPos, long cWordCount );
  127. void DoCurrencyTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem );
  128. void DoPhoneNumberTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem );
  129. void DoTODTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem );
  130. long DoFractionTemplate( SPLISTPOS *pClusterPos, TTSNumberItemInfo *pNInfo, long cWordCount );
  131. CFEToken *InsertPhoneSilenceAtSpace( SPLISTPOS *pClusterPos,
  132. BOUNDARY_SOURCE bndSrc,
  133. SILENCE_SOURCE silSrc );
  134. void InsertPhoneSilenceAtEnd( BOUNDARY_SOURCE bndSrc,
  135. SILENCE_SOURCE silSrc );
  136. void CalcSentenceLength();
  137. //--------------------------------
  138. // Members
  139. //--------------------------------
  140. #ifdef USE_VOICEDATAOBJ
  141. UNITINFO* m_pUnits;
  142. #endif
  143. ULONG m_unitCount;
  144. ULONG m_CurUnitIndex;
  145. SPEECH_STATE m_SpeechState;
  146. CFeedChain *m_pSrcObj;
  147. long m_VoiceWPM; // Voice defined speaking rate (wpm)
  148. float m_RateRatio_API; // API modulated speaking rate (ratio)
  149. float m_CurDurScale; // control tag (ratio)
  150. float m_RateRatio_BKPT; // Below this, add pauses (ratio)
  151. float m_RateRatio_PROSODY; // API modulated speaking rate (ratio)
  152. int m_BasePitch; // FROM VOICE: Baseline pitch in hz
  153. int m_PitchRange; // FROM VOICE: Pitch range in hz
  154. bool m_HasSpeech;
  155. CFETokenList m_TokList;
  156. long m_cNumOfWords;
  157. CPitchProsody m_PitchObj;
  158. IEnumSpSentence *m_pEnumSent;
  159. CDuration m_DurObj;
  160. CSyllableTagger m_SyllObj;
  161. #ifdef USE_VOICEDATAOBJ
  162. CVoiceData* m_pVoiceDataObj;
  163. float m_ProsodyGain;
  164. float m_SampleRate;
  165. #endif
  166. CAlloList *m_pAllos;
  167. bool m_fInQuoteProsody; // Special prosody mode
  168. bool m_fInParenProsody; // Special prosody mode
  169. float m_CurPitchOffs; // Pitch offset in octaves
  170. float m_CurPitchRange; // Pitch range scale (0 - 2.0)
  171. bool m_fNewPhoneSet;
  172. ISpTTSEngineSite *m_pOutputSite;
  173. };
  174. #endif //--- This must be the last line in the file