Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

365 lines
12 KiB

  1. /******************************************************************************
  2. * VoiceDataObj.h *
  3. *----------------*
  4. * This is the header file for the CVoiceDataObj implementation. This object
  5. * is used to provide shared access to a specific voice data file.
  6. *------------------------------------------------------------------------------
  7. * Copyright (C) 1999 Microsoft Corporation Date: 05/06/99
  8. * All Rights Reserved
  9. *
  10. *********************************************************************** EDC ***/
  11. #ifndef VoiceDataObj_h
  12. #define VoiceDataObj_h
  13. //--- Additional includes
  14. #include "ms_entropicengine.h"
  15. #include <spddkhlp.h>
  16. #include <sphelper.h>
  17. #include <MMREG.H>
  18. #include "resource.h"
  19. #include "SpTtsEngDebug.h"
  20. //=== Constants ====================================================
  21. static const long VOICE_VERSION = 0x10001;
  22. static const long HEADER_VERSION = 0x10000;
  23. static const long MS_VOICE_TYPE = MAKEFOURCC('V','o','i','s');
  24. static const long MS_DATA_TYPE = MAKEFOURCC('D','a','t','a');
  25. static const float SIL_DURATION = 0.01f;
  26. //=== Class, Enum, Struct and Union Declarations ===================
  27. //------------------------------------
  28. // Selector for 'GetData()'
  29. // For accessing voice data blocks
  30. //------------------------------------
  31. enum VOICEDATATYPE
  32. {
  33. MSVD_PHONE,
  34. MSVD_SENONE,
  35. MSVD_TREEIMAGE,
  36. MSVD_INVENTORY,
  37. MSVD_ALLOID
  38. };
  39. //---------------------------
  40. // VOICEINFO data types
  41. //---------------------------
  42. enum GENDER
  43. {
  44. GENDER_NEUTRAL = 0,
  45. GENDER_FEMALE,
  46. GENDER_MALE
  47. };
  48. enum COMPRESS_TYPE
  49. {
  50. COMPRESS_NONE = 0,
  51. COMPRESS_LPC
  52. };
  53. // THis is the data
  54. #pragma pack (1)
  55. struct VOICEINFO
  56. {
  57. long Type; // Always 'MS_VOICE_TYPE'
  58. ULONG Version; // Always 'VOICE_VERSION'
  59. WCHAR Copyright[256]; // INFO:
  60. WCHAR VoiceName[64]; // INFO:
  61. WCHAR Example[64]; // INFO:
  62. LCID LangID;
  63. GENDER Gender; // INFO: Male, female or neuter
  64. ULONG Age; // INFO: Speaker age in years
  65. ULONG Rate; // INFO & FE: Words-per-minute
  66. ULONG Pitch; // INFO & FE: Average pitch in Hz
  67. COMPRESS_TYPE CompressionType; // BE: Always 'COMPRESS_LPC'
  68. REVERBTYPE ReverbType; // BE: Reverb param
  69. ULONG NumOfTaps; // BE: Whisper param
  70. float TapCoefficients[8]; // BE: Whisper param
  71. ULONG ProsodyGain; // FE: 0 = monotone
  72. float VibratoFreq; // Hertz
  73. ULONG VibratoDepth; // 0 - 100%
  74. ULONG SampleRate; // 22050 typical
  75. GUID formatID; // SAPI audio format ID
  76. long Unused[4];
  77. };
  78. #pragma pack ()
  79. typedef VOICEINFO *PVOICEINFO;
  80. //---------------------------------------------------
  81. // Header definition for voice data block
  82. //---------------------------------------------------
  83. #pragma pack (1)
  84. struct VOICEBLOCKOFFSETS
  85. {
  86. long Type; // Always 'MS_DATA_TYPE'
  87. long Version; // Always 'HEADER_VERSION'
  88. GUID DataID; // File ID
  89. long PhonOffset; // Offset to PHON block (from beginning of file)
  90. long PhonLen; // Length of PHON block
  91. long SenoneOffset; // Offset to SENONE block (from beginning of file)
  92. long SenoneLen; // Length of SENONE block
  93. long TreeOffset; // Offset to TREE block (from beginning of file)
  94. long TreeLen; // Length of TREE block
  95. long InvOffset; // Offset to INV block (from beginning of file)
  96. long InvLen; // Length of INV block
  97. long AlloIDOffset; // Offset to AlloId block (from beginning of file)
  98. long AlloIDLen; // Length of AlloID block
  99. };
  100. #pragma pack ()
  101. // Single VQ Codebook
  102. #pragma pack (1)
  103. typedef struct Book
  104. {
  105. long cCodeSize; // Number of codewords
  106. long cCodeDim; // Dimension of codeword
  107. long pData; // Offset to data (INVENTORY rel)
  108. } BOOK, *PBOOK;
  109. #pragma pack ()
  110. static const long BOOKSHELF = 32;
  111. #pragma pack (1)
  112. typedef struct Inventory
  113. {
  114. long SampleRate; // Sample rate in Hz
  115. long cNumLPCBooks; // Number of LPC Codebooks
  116. long cNumResBooks; // Number of Residual Codebooks
  117. long cNumDresBooks; // Number of Delta Residual Codebooks
  118. BOOK LPCBook[BOOKSHELF]; // LPC Codebook array
  119. BOOK ResBook[BOOKSHELF]; // Residual Codebook array
  120. BOOK DresBook[BOOKSHELF]; // Delta residual Codebook array
  121. long cNumUnits; // Total number of units
  122. long UnitsOffset; // Offset to offset array to unit data (INVENTORY rel)
  123. long cOrder; // LPC analysis order
  124. long FFTSize; // Size of FFT
  125. long FFTOrder; // Order of FFT
  126. long TrigOffset; // Offset to sine table (INVENTORY rel)
  127. long WindowOffset; // Offset to Hanning Window (INVENTORY rel)
  128. long pGaussOffset; // Offset to Gaussian Random noise (INVENTORY rel)
  129. long GaussID; // Gaussian sample index
  130. } INVENTORY, *PINVENTORY;
  131. #pragma pack ()
  132. //------------------------
  133. // LPC order * 2
  134. //------------------------
  135. static const long MAXNO = 40;
  136. static const float KONEPI = 3.1415926535897931032f;
  137. static const float KTWOPI = (KONEPI * 2);
  138. static const float K2 = 0.70710678118655f;
  139. #pragma pack (1)
  140. typedef struct
  141. {
  142. long val; // Phon ID
  143. long obj; // Offset to phon string
  144. } HASH_ENTRY;
  145. #pragma pack ()
  146. #pragma pack (1)
  147. typedef struct
  148. {
  149. long size; // Number entries in the table (127 typ.)
  150. long UNUSED1;
  151. long entryArrayOffs; // Offset to HASH_ENTRY array
  152. long UNUSED2;
  153. long UNUSED3;
  154. long UNUSED4;
  155. long UNUSED5;
  156. } HASH_TABLE;
  157. #pragma pack ()
  158. #pragma pack (1)
  159. typedef struct
  160. {
  161. HASH_TABLE phonHash;
  162. long phones_list; // Offset to offsets to phon strings
  163. long numPhones;
  164. long numCiPhones; // Number of context ind. phones
  165. } PHON_DICT;
  166. #pragma pack ()
  167. #pragma pack (1)
  168. typedef struct
  169. {
  170. long nfeat;
  171. long nint32perq;
  172. long b_ques;
  173. long e_ques;
  174. long s_ques;
  175. long eors_ques;
  176. long wwt_ques;
  177. long nstateq;
  178. } FEATURE;
  179. #pragma pack ()
  180. #pragma pack (1)
  181. typedef struct
  182. {
  183. long prod; // For leaves, it means the counts.
  184. // For non-leaves, it is the offset
  185. // into TRIPHONE_TREE.prodspace.
  186. short yes; // Negative means there is no child. so this is a leaf
  187. short no; // for leaves, it is lcdsid
  188. short shallow_lcdsid; // negative means this is NOT a shallow leaf
  189. } C_NODE;
  190. #pragma pack ()
  191. #pragma pack (1)
  192. typedef struct
  193. {
  194. short nnodes;
  195. short nleaves;
  196. long nodes; // Offset
  197. }TREE_ELEM;
  198. #define NUM_PHONS_MAX 64
  199. #pragma pack (1)
  200. typedef struct
  201. {
  202. FEATURE feat;
  203. long UNUSED; // PHON_DICT *pd usually
  204. long nsenones;
  205. long silPhoneId;
  206. long nonSilCxt;
  207. long nclass;
  208. long gsOffset[NUM_PHONS_MAX]; // nclass+1 entries
  209. TREE_ELEM tree[NUM_PHONS_MAX];
  210. long nuniq_prod; // not used for detailed tree
  211. long uniq_prod_Offset; // Offset to table
  212. long nint32perProd;
  213. } TRIPHONE_TREE;
  214. #pragma pack ()
  215. static const long NO_PHON = (-1);
  216. #define ABS(x) ((x) >= 0 ? (x) : -(x))
  217. #define MAX(x,y) (((x) >= (y)) ? (x) : (y))
  218. #define MIN(x,y) (((x) <= (y)) ? (x) : (y))
  219. #pragma pack (1)
  220. typedef struct
  221. {
  222. float dur;
  223. float durSD;
  224. float amp;
  225. float ampRatio;
  226. } UNIT_STATS;
  227. #pragma pack ()
  228. //=== Enumerated Set Definitions ===================================
  229. //=== Function Type Definitions ====================================
  230. //=== Class, Struct and Union Definitions ==========================
  231. /*** CVoiceDataObj COM object ********************************
  232. */
  233. class CVoiceData
  234. {
  235. /*=== Methods =======*/
  236. public:
  237. CVoiceData();
  238. ~CVoiceData();
  239. private:
  240. /*--- Non interface methods ---*/
  241. HRESULT MapFile(const WCHAR * pszTokenValName, HANDLE * phMapping, void ** ppvData);
  242. HRESULT GetDataBlock( VOICEDATATYPE type, char **ppvOut, ULONG *pdwSize );
  243. HRESULT InitVoiceData();
  244. HRESULT DecompressUnit( ULONG UnitID, MSUNITDATA* pSynth );
  245. long DecompressEpoch( signed char *rgbyte, long cNumEpochs, float *pEpoch );
  246. long OrderLSP( PFLOAT pLSPFrame, INT cOrder );
  247. void LSPtoPC( float *pLSP, float *pLPC, long cOrder, long frame );
  248. void PutSpectralBand( float *pFFT, float *pBand, long StartBin,
  249. long cNumBins, long FFTSize );
  250. void AddSpectralBand( float *pFFT, float *pBand, long StartBin,
  251. long cNumBins, long FFTSize );
  252. void InverseFFT( float *pDest, long fftSize, long fftOrder, float *sinePtr );
  253. void SetEpochLen( float *pOutRes, long OutSize, float *pInRes,
  254. long InSize );
  255. void GainDeNormalize( float *pRes, long FFTSize, float Gain );
  256. long PhonToID( PHON_DICT *pd, char *phone_str );
  257. char *PhonFromID( PHON_DICT *pd, long phone_id );
  258. HRESULT GetTriphoneID( TRIPHONE_TREE *forest,
  259. long phon, // target phon
  260. long leftPhon, // left context
  261. long rightPhon, // right context
  262. long pos, // word position ("b", "e" or "s"
  263. PHON_DICT *pd,
  264. ULONG *pResult );
  265. long PhonHashLookup( PHON_DICT *pPD, // the hash table
  266. char *sym, // The symbol to look up
  267. long *val ); // Phon ID
  268. void FIR_Filter( float *pVector, long cNumSamples, float *pFilter,
  269. float *pHistory, long cNumTaps );
  270. void IIR_Filter( float *pVector, long cNumSamples, float *pFilter,
  271. float *pHistory, long cNumTaps );
  272. HRESULT GetUnitDur( ULONG UnitID, float* pDur );
  273. /*=== Interfaces ====*/
  274. public:
  275. STDMETHOD(GetVoiceInfo)( MSVOICEINFO* pVoiceInfo );
  276. STDMETHOD(GetUnitIDs)( UNIT_CVT* pUnits, ULONG cUnits );
  277. STDMETHOD(GetUnitData)( ULONG unitID, MSUNITDATA* pUnitData );
  278. STDMETHOD(AlloToUnit)( short allo, long attributes, long* pUnitID );
  279. STDMETHOD(SetObjectToken)( ISpObjectToken *pToken );
  280. private:
  281. /*=== Member Data ===*/
  282. CComPtr<ISpObjectToken> m_cpToken;
  283. HANDLE m_hVoiceDef;
  284. HANDLE m_hVoiceData;
  285. VOICEINFO* m_pVoiceDef;
  286. VOICEBLOCKOFFSETS* m_pVoiceData;
  287. PHON_DICT* m_pd;
  288. TRIPHONE_TREE* m_pForest;
  289. long* m_SenoneBlock;
  290. ULONG m_First_Context_Phone;
  291. ULONG m_Sil_Index;
  292. // Unit Inventory
  293. INVENTORY* m_pInv;
  294. float m_SampleRate;
  295. long m_cOrder;
  296. long *m_pUnit; // Pointer to offsets to unit data
  297. float *m_pTrig; // Sine table
  298. float *m_pWindow; // Hanning Window
  299. float *m_pGauss; // Gaussian Random noise
  300. COMPRESS_TYPE m_CompressionType;
  301. ULONG m_FFTSize;
  302. long m_GaussID;
  303. short *m_AlloToUnitTbl;
  304. long m_NumOfAllos;
  305. ULONG m_NumOfUnits; // Inventory size
  306. };
  307. #endif //--- This must be the last line in the file