Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

401 lines
13 KiB

  1. /******************************************************************************
  2. * VoiceDataObj.h *
  3. *----------------*
  4. * This is the header file for the CVoiceDataObj implementation. This object
  5. * is used to provide shared access to a specific voice data file.
  6. *------------------------------------------------------------------------------
  7. * Copyright (C) 1999 Microsoft Corporation Date: 05/06/99
  8. * All Rights Reserved
  9. *
  10. *********************************************************************** EDC ***/
  11. #ifndef VoiceDataObj_h
  12. #define VoiceDataObj_h
  13. //--- Additional includes
  14. #ifndef __spttseng_h__
  15. #include "spttseng.h"
  16. #endif
  17. #ifndef SPDDKHLP_h
  18. #include <spddkhlp.h>
  19. #endif
  20. #ifndef SPHelper_h
  21. #include <sphelper.h>
  22. #endif
  23. #include <MMREG.H>
  24. #include "resource.h"
  25. #include "SpTtsEngDebug.h"
  26. //=== Constants ====================================================
  27. static const long VOICE_VERSION = 0x10001;
  28. static const long HEADER_VERSION = 0x10000;
  29. static const long MS_VOICE_TYPE = MAKEFOURCC('V','o','i','s');
  30. static const long MS_DATA_TYPE = MAKEFOURCC('D','a','t','a');
  31. static const float SIL_DURATION = 0.01f;
  32. //=== Class, Enum, Struct and Union Declarations ===================
  33. //------------------------------------
  34. // Selector for 'GetData()'
  35. // For accessing voice data blocks
  36. //------------------------------------
  37. enum VOICEDATATYPE
  38. {
  39. MSVD_PHONE,
  40. MSVD_SENONE,
  41. MSVD_TREEIMAGE,
  42. MSVD_INVENTORY,
  43. MSVD_ALLOID
  44. };
  45. //---------------------------
  46. // VOICEINFO data types
  47. //---------------------------
  48. enum GENDER
  49. {
  50. GENDER_NEUTRAL = 0,
  51. GENDER_FEMALE,
  52. GENDER_MALE
  53. };
  54. enum COMPRESS_TYPE
  55. {
  56. COMPRESS_NONE = 0,
  57. COMPRESS_LPC
  58. };
  59. // THis is the data
  60. #pragma pack (1)
  61. struct VOICEINFO
  62. {
  63. long Type; // Always 'MS_VOICE_TYPE'
  64. ULONG Version; // Always 'VOICE_VERSION'
  65. WCHAR Copyright[256]; // INFO:
  66. WCHAR VoiceName[64]; // INFO:
  67. WCHAR Example[64]; // INFO:
  68. LCID LangID;
  69. GENDER Gender; // INFO: Male, female or neuter
  70. ULONG Age; // INFO: Speaker age in years
  71. ULONG Rate; // INFO & FE: Words-per-minute
  72. ULONG Pitch; // INFO & FE: Average pitch in Hz
  73. COMPRESS_TYPE CompressionType; // BE: Always 'COMPRESS_LPC'
  74. REVERBTYPE ReverbType; // BE: Reverb param
  75. ULONG NumOfTaps; // BE: Whisper param
  76. float TapCoefficients[8]; // BE: Whisper param
  77. ULONG ProsodyGain; // FE: 0 = monotone
  78. float VibratoFreq; // Hertz
  79. ULONG VibratoDepth; // 0 - 100%
  80. ULONG SampleRate; // 22050 typical
  81. GUID formatID; // SAPI audio format ID
  82. long Unused[4];
  83. };
  84. #pragma pack ()
  85. typedef VOICEINFO *PVOICEINFO;
  86. //---------------------------------------------------
  87. // Header definition for voice data block
  88. //---------------------------------------------------
  89. #pragma pack (1)
  90. struct VOICEBLOCKOFFSETS
  91. {
  92. long Type; // Always 'MS_DATA_TYPE'
  93. long Version; // Always 'HEADER_VERSION'
  94. GUID DataID; // File ID
  95. long PhonOffset; // Offset to PHON block (from beginning of file)
  96. long PhonLen; // Length of PHON block
  97. long SenoneOffset; // Offset to SENONE block (from beginning of file)
  98. long SenoneLen; // Length of SENONE block
  99. long TreeOffset; // Offset to TREE block (from beginning of file)
  100. long TreeLen; // Length of TREE block
  101. long InvOffset; // Offset to INV block (from beginning of file)
  102. long InvLen; // Length of INV block
  103. long AlloIDOffset; // Offset to AlloId block (from beginning of file)
  104. long AlloIDLen; // Length of AlloID block
  105. };
  106. #pragma pack ()
  107. // Single VQ Codebook
  108. #pragma pack (1)
  109. typedef struct Book
  110. {
  111. long cCodeSize; // Number of codewords
  112. long cCodeDim; // Dimension of codeword
  113. long pData; // Offset to data (INVENTORY rel)
  114. } BOOK, *PBOOK;
  115. #pragma pack ()
  116. static const long BOOKSHELF = 32;
  117. #pragma pack (1)
  118. typedef struct Inventory
  119. {
  120. long SampleRate; // Sample rate in Hz
  121. long cNumLPCBooks; // Number of LPC Codebooks
  122. long cNumResBooks; // Number of Residual Codebooks
  123. long cNumDresBooks; // Number of Delta Residual Codebooks
  124. BOOK LPCBook[BOOKSHELF]; // LPC Codebook array
  125. BOOK ResBook[BOOKSHELF]; // Residual Codebook array
  126. BOOK DresBook[BOOKSHELF]; // Delta residual Codebook array
  127. long cNumUnits; // Total number of units
  128. long UnitsOffset; // Offset to offset array to unit data (INVENTORY rel)
  129. long cOrder; // LPC analysis order
  130. long FFTSize; // Size of FFT
  131. long FFTOrder; // Order of FFT
  132. long TrigOffset; // Offset to sine table (INVENTORY rel)
  133. long WindowOffset; // Offset to Hanning Window (INVENTORY rel)
  134. long pGaussOffset; // Offset to Gaussian Random noise (INVENTORY rel)
  135. long GaussID; // Gaussian sample index
  136. } INVENTORY, *PINVENTORY;
  137. #pragma pack ()
  138. //------------------------
  139. // LPC order * 2
  140. //------------------------
  141. static const long MAXNO = 40;
  142. static const float KONEPI = 3.1415926535897931032f;
  143. static const float KTWOPI = (KONEPI * 2);
  144. static const float K2 = 0.70710678118655f;
  145. #pragma pack (1)
  146. typedef struct
  147. {
  148. long val; // Phon ID
  149. long obj; // Offset to phon string
  150. } HASH_ENTRY;
  151. #pragma pack ()
  152. #pragma pack (1)
  153. typedef struct
  154. {
  155. long size; // Number entries in the table (127 typ.)
  156. long UNUSED1;
  157. long entryArrayOffs; // Offset to HASH_ENTRY array
  158. long UNUSED2;
  159. long UNUSED3;
  160. long UNUSED4;
  161. long UNUSED5;
  162. } HASH_TABLE;
  163. #pragma pack ()
  164. #pragma pack (1)
  165. typedef struct
  166. {
  167. HASH_TABLE phonHash;
  168. long phones_list; // Offset to offsets to phon strings
  169. long numPhones;
  170. long numCiPhones; // Number of context ind. phones
  171. } PHON_DICT;
  172. #pragma pack ()
  173. #pragma pack (1)
  174. typedef struct
  175. {
  176. long nfeat;
  177. long nint32perq;
  178. long b_ques;
  179. long e_ques;
  180. long s_ques;
  181. long eors_ques;
  182. long wwt_ques;
  183. long nstateq;
  184. } FEATURE;
  185. #pragma pack ()
  186. #pragma pack (1)
  187. typedef struct
  188. {
  189. long prod; // For leaves, it means the counts.
  190. // For non-leaves, it is the offset
  191. // into TRIPHONE_TREE.prodspace.
  192. short yes; // Negative means there is no child. so this is a leaf
  193. short no; // for leaves, it is lcdsid
  194. short shallow_lcdsid; // negative means this is NOT a shallow leaf
  195. } C_NODE;
  196. #pragma pack ()
  197. #pragma pack (1)
  198. typedef struct
  199. {
  200. short nnodes;
  201. short nleaves;
  202. long nodes; // Offset
  203. }TREE_ELEM;
  204. #define NUM_PHONS_MAX 64
  205. #pragma pack (1)
  206. typedef struct
  207. {
  208. FEATURE feat;
  209. long UNUSED; // PHON_DICT *pd usually
  210. long nsenones;
  211. long silPhoneId;
  212. long nonSilCxt;
  213. long nclass;
  214. long gsOffset[NUM_PHONS_MAX]; // nclass+1 entries
  215. TREE_ELEM tree[NUM_PHONS_MAX];
  216. long nuniq_prod; // not used for detailed tree
  217. long uniq_prod_Offset; // Offset to table
  218. long nint32perProd;
  219. } TRIPHONE_TREE;
  220. #pragma pack ()
  221. static const long NO_PHON = (-1);
  222. #define ABS(x) ((x) >= 0 ? (x) : -(x))
  223. #define MAX(x,y) (((x) >= (y)) ? (x) : (y))
  224. #define MIN(x,y) (((x) <= (y)) ? (x) : (y))
  225. #pragma pack (1)
  226. typedef struct
  227. {
  228. float dur;
  229. float durSD;
  230. float amp;
  231. float ampRatio;
  232. } UNIT_STATS;
  233. #pragma pack ()
  234. //=== Enumerated Set Definitions ===================================
  235. //=== Function Type Definitions ====================================
  236. //=== Class, Struct and Union Definitions ==========================
  237. /*** CVoiceDataObj COM object ********************************
  238. */
  239. class ATL_NO_VTABLE CVoiceDataObj :
  240. public CComObjectRootEx<CComMultiThreadModel>,
  241. public CComCoClass<CVoiceDataObj, &CLSID_MSVoiceData>,
  242. public IMSVoiceData,
  243. public ISpObjectWithToken
  244. {
  245. /*=== ATL Setup ===*/
  246. public:
  247. DECLARE_REGISTRY_RESOURCEID(IDR_MSVOICEDATA)
  248. DECLARE_PROTECT_FINAL_CONSTRUCT()
  249. DECLARE_GET_CONTROLLING_UNKNOWN()
  250. BEGIN_COM_MAP(CVoiceDataObj)
  251. COM_INTERFACE_ENTRY(ISpObjectWithToken)
  252. COM_INTERFACE_ENTRY(IMSVoiceData)
  253. COM_INTERFACE_ENTRY_AGGREGATE_BLIND( m_cpunkDrvVoice.p )
  254. END_COM_MAP()
  255. /*=== Methods =======*/
  256. public:
  257. /*--- Constructors/Destructors ---*/
  258. HRESULT FinalConstruct();
  259. void FinalRelease();
  260. ISpObjectToken* GetVoiceToken() {return m_cpToken;}
  261. private:
  262. /*--- Non interface methods ---*/
  263. HRESULT MapFile(const WCHAR * pszTokenValName, HANDLE * phMapping, void ** ppvData);
  264. HRESULT GetDataBlock( VOICEDATATYPE type, char **ppvOut, ULONG *pdwSize );
  265. HRESULT InitVoiceData();
  266. HRESULT DecompressUnit( ULONG UnitID, MSUNITDATA* pSynth );
  267. long DecompressEpoch( signed char *rgbyte, long cNumEpochs, float *pEpoch );
  268. long OrderLSP( PFLOAT pLSPFrame, INT cOrder );
  269. void LSPtoPC( float *pLSP, float *pLPC, long cOrder, long frame );
  270. void PutSpectralBand( float *pFFT, float *pBand, long StartBin,
  271. long cNumBins, long FFTSize );
  272. void AddSpectralBand( float *pFFT, float *pBand, long StartBin,
  273. long cNumBins, long FFTSize );
  274. void InverseFFT( float *pDest, long fftSize, long fftOrder, float *sinePtr );
  275. void SetEpochLen( float *pOutRes, long OutSize, float *pInRes,
  276. long InSize );
  277. void GainDeNormalize( float *pRes, long FFTSize, float Gain );
  278. long PhonToID( PHON_DICT *pd, char *phone_str );
  279. char *PhonFromID( PHON_DICT *pd, long phone_id );
  280. HRESULT GetTriphoneID( TRIPHONE_TREE *forest,
  281. long phon, // target phon
  282. long leftPhon, // left context
  283. long rightPhon, // right context
  284. long pos, // word position ("b", "e" or "s"
  285. PHON_DICT *pd,
  286. ULONG *pResult );
  287. long PhonHashLookup( PHON_DICT *pPD, // the hash table
  288. char *sym, // The symbol to look up
  289. long *val ); // Phon ID
  290. void FIR_Filter( float *pVector, long cNumSamples, float *pFilter,
  291. float *pHistory, long cNumTaps );
  292. void IIR_Filter( float *pVector, long cNumSamples, float *pFilter,
  293. float *pHistory, long cNumTaps );
  294. HRESULT GetUnitDur( ULONG UnitID, float* pDur );
  295. /*=== Interfaces ====*/
  296. public:
  297. //--- ISpObjectWithToken ----------------------------------
  298. STDMETHODIMP SetObjectToken(ISpObjectToken * pToken);
  299. STDMETHODIMP GetObjectToken(ISpObjectToken ** ppToken)
  300. { return SpGenericGetObjectToken( ppToken, m_cpToken ); }
  301. //--- IMSVoiceData ----------------------------------------
  302. STDMETHOD(GetVoiceInfo)( MSVOICEINFO* pVoiceInfo );
  303. //STDMETHOD(GetUnitInfo)( UNIT_INFO* pUnitInfo );
  304. STDMETHOD(GetUnitIDs)( UNIT_CVT* pUnits, ULONG cUnits );
  305. STDMETHOD(GetUnitData)( ULONG unitID, MSUNITDATA* pUnitData );
  306. STDMETHOD(AlloToUnit)( short allo, long attributes, long* pUnitID );
  307. private:
  308. /*=== Member Data ===*/
  309. CComPtr<IUnknown> m_cpunkDrvVoice;
  310. CComPtr<ISpObjectToken> m_cpToken;
  311. HANDLE m_hVoiceDef;
  312. HANDLE m_hVoiceData;
  313. VOICEINFO* m_pVoiceDef;
  314. VOICEBLOCKOFFSETS* m_pVoiceData;
  315. PHON_DICT* m_pd;
  316. TRIPHONE_TREE* m_pForest;
  317. UNALIGNED long* m_SenoneBlock;
  318. ULONG m_First_Context_Phone;
  319. ULONG m_Sil_Index;
  320. // Unit Inventory
  321. INVENTORY* m_pInv;
  322. float m_SampleRate;
  323. long m_cOrder;
  324. long *m_pUnit; // Pointer to offsets to unit data
  325. float *m_pTrig; // Sine table
  326. float *m_pWindow; // Hanning Window
  327. float *m_pGauss; // Gaussian Random noise
  328. COMPRESS_TYPE m_CompressionType;
  329. ULONG m_FFTSize;
  330. long m_GaussID;
  331. short *m_AlloToUnitTbl;
  332. long m_NumOfAllos;
  333. ULONG m_NumOfUnits; // Inventory size
  334. };
  335. #endif //--- This must be the last line in the file