Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

386 lines
12 KiB

/*****************************************************************************
* spttseng.idl *
*--------------*
* Description:
* This is the idl file for the Microsoft Text To Speech Driver.
*-----------------------------------------------------------------------------
* Creation: 03/01/99
* Copyright (C) Microsoft Corporation 1999
* All rights reserved.
*
****************************************************************** EDC ******/
//--- Import base idl
import "oaidl.idl";
import "ocidl.idl";
import "sapiddk.idl";
//=== Forward References ======================================================
interface IMSVoiceData;
interface IMSTTSEngineInit;
typedef enum ENGPARTOFSPEECH
{
MS_NotOverriden = SPPS_NotOverriden,
MS_Unknown = SPPS_Unknown, // Probably from user lexicon
MS_Noun = SPPS_Noun,
MS_Verb = SPPS_Verb,
MS_Modifier = SPPS_Modifier,
MS_Function = SPPS_Function,
MS_Interjection = SPPS_Interjection,
// MS Nouns
MS_Pron = ( SPPS_Noun + 1 ),
MS_SubjPron = ( SPPS_Noun + 2 ),
MS_ObjPron = ( SPPS_Noun + 3 ),
MS_RelPron = ( SPPS_Noun + 4 ),
MS_PosNoun = ( SPPS_Noun + 9 ),
// MS Modifiers
MS_Adj = ( SPPS_Modifier + 1 ),
MS_Adv = ( SPPS_Modifier + 2 ),
// MS Function Words
MS_VAux = ( SPPS_Function + 1 ),
MS_Conj = ( SPPS_Function + 3 ),
MS_CConj = ( SPPS_Function + 4 ),
MS_Interr = ( SPPS_Function + 5 ),
MS_Det = ( SPPS_Function + 6 ),
MS_Contr = ( SPPS_Function + 7 ),
MS_Prep = ( SPPS_Function + 9 ),
// MS Punctuation
MS_Punctuation = ( SPPS_Function + 11 ),
MS_GroupBegin = ( MS_Punctuation + 1 ),
MS_GroupEnd = ( MS_Punctuation + 2 ),
MS_EOSItem = ( MS_Punctuation + 3 ),
MS_MiscPunc = ( MS_Punctuation + 4 ),
MS_Quotation = ( MS_Punctuation + 5 )
} ENGPARTOFSPEECH;
typedef enum TTSItemType
{
eWORDLIST_NOT_VALID = 0x0000,
eWORDLIST_IS_VALID = 0x1000,
eUNMATCHED = eWORDLIST_IS_VALID + 1,
eALPHA_WORD = eWORDLIST_IS_VALID + 2,
eABBREVIATION = eWORDLIST_IS_VALID + 3,
eABBREVIATION_NORMALIZE = eWORDLIST_IS_VALID + 4,
eINITIALISM = eWORDLIST_IS_VALID + 5,
eNUM_CARDINAL = eWORDLIST_IS_VALID + 6,
eNUM_ORDINAL = eWORDLIST_IS_VALID + 7,
eNUM_DECIMAL = eWORDLIST_IS_VALID + 8,
eNUM_PERCENT = eWORDLIST_IS_VALID + 9,
eNUM_DEGREES = eWORDLIST_IS_VALID + 10,
eNUM_SQUARED = eWORDLIST_IS_VALID + 11,
eNUM_CUBED = eWORDLIST_IS_VALID + 12,
eNUM_CURRENCY = eWORDLIST_IS_VALID + 13,
eNUM_FRACTION = eWORDLIST_IS_VALID + 14,
eNUM_MIXEDFRACTION = eWORDLIST_IS_VALID + 15,
eNUM_ROMAN_NUMERAL = eWORDLIST_IS_VALID + 16,
eNUM_ROMAN_NUMERAL_ORDINAL = eWORDLIST_IS_VALID + 17,
eNUM_PHONENUMBER = eWORDLIST_IS_VALID + 18,
eNUM_ZIPCODE = eWORDLIST_IS_VALID + 19,
eDATE_YEAR = eWORDLIST_IS_VALID + 20,
eDATE = eWORDLIST_IS_VALID + 21,
eDATE_LONGFORM = eWORDLIST_IS_VALID + 22,
eDECADE = eWORDLIST_IS_VALID + 23,
eTIMEOFDAY = eWORDLIST_IS_VALID + 24,
eTIME = eWORDLIST_IS_VALID + 25,
eSPELLOUT = eWORDLIST_IS_VALID + 26,
eHYPHENATED_STRING = eWORDLIST_IS_VALID + 27,
eSTATE_AND_ZIPCODE = eWORDLIST_IS_VALID + 28,
eTIME_RANGE = eWORDLIST_IS_VALID + 29,
eNUM_RANGE = eWORDLIST_IS_VALID + 30,
eTEMP_NUMBER = eWORDLIST_IS_VALID + 31,
eTEMP_PERCENT = eWORDLIST_IS_VALID + 32,
eTEMP_DEGREES = eWORDLIST_IS_VALID + 33,
eTEMP_NUM_FRACTION = eWORDLIST_IS_VALID + 34,
eTEMP_NUM_MIXEDFRACTION = eWORDLIST_IS_VALID + 35,
eTEMP_NUM_DECIMAL = eWORDLIST_IS_VALID + 36,
eTEMP_NUM_ORDINAL = eWORDLIST_IS_VALID + 37,
eTEMP_NUM_CURRENCY = eWORDLIST_IS_VALID + 38,
eNEWNUM_PHONENUMBER = eWORDLIST_IS_VALID + 39,
eNUM_CURRENCYRANGE = eWORDLIST_IS_VALID + 40,
eSUFFIX = eWORDLIST_IS_VALID + 41,
eOPEN_PARENTHESIS = eWORDLIST_NOT_VALID + 1,
eOPEN_BRACKET = eWORDLIST_NOT_VALID + 2,
eOPEN_BRACE = eWORDLIST_NOT_VALID + 3,
eCLOSE_PARENTHESIS = eWORDLIST_NOT_VALID + 4,
eCLOSE_BRACKET = eWORDLIST_NOT_VALID + 5,
eCLOSE_BRACE = eWORDLIST_NOT_VALID + 6,
eSINGLE_QUOTE = eWORDLIST_NOT_VALID + 7,
eDOUBLE_QUOTE = eWORDLIST_NOT_VALID + 8,
ePERIOD = eWORDLIST_NOT_VALID + 9,
eEXCLAMATION = eWORDLIST_NOT_VALID + 10,
eQUESTION = eWORDLIST_NOT_VALID + 11,
eCOMMA = eWORDLIST_NOT_VALID + 12,
eSEMICOLON = eWORDLIST_NOT_VALID + 13,
eCOLON = eWORDLIST_NOT_VALID + 14,
eHYPHEN = eWORDLIST_NOT_VALID + 15,
eELLIPSIS = eWORDLIST_NOT_VALID + 16,
} TTSItemType;
typedef struct TTSWord
{
const SPVSTATE* pXmlState; // The XML State of the word
LPCWSTR pWordText; // Pointer to the orthographic form of the word
ULONG ulWordLen; // Length of the word, in WCHARs
LPCWSTR pLemma; // Pointer to the orthographic form of the root word
ULONG ulLemmaLen; // Length of the lemma, in WCHARs
SPPHONEID* pWordPron; // Pointer to the NULL terminated pronunciation of the word
ENGPARTOFSPEECH eWordPartOfSpeech; // The part of speech of the word - Is this needed???
} TTSWord;
typedef struct TTSItemInfo
{
TTSItemType Type;
} TTSItemInfo;
typedef struct TTSSentItem
{
LPCWSTR pItemSrcText; // Pointer to original text of the item
ULONG ulItemSrcLen; // Length of the original text of the item
ULONG ulItemSrcOffset; // Offset of the original text of the item
TTSWord* Words; // The words of the item, post normalization
ULONG ulNumWords; // The number of words of the item, post normalization
ENGPARTOFSPEECH eItemPartOfSpeech; // The part of speech of the entire item
TTSItemInfo* pItemInfo;
} TTSSentItem;
//=== Constants ===============================================================
typedef enum INVCONST
{
MAX_LPCORDER = 30,
MAX_FFTSIZE = 512
} INVCONST;
//=== Interface definitions ===================================================
///// NOTE: This section to be moved to SAPI.IDL in SAPI6
[
object,
local,
uuid(E0F4088D-CD08-11d2-B503-00C04F797396),
helpstring("IEnumSENTITEM Interface"),
pointer_default(unique)
]
interface IEnumSENTITEM : IUnknown
{
HRESULT Next( [out] TTSSentItem *pItemEnum );
HRESULT Reset(void);
};
//--- IEnumSpSentence -------------------------------------------------------
// This structure points to a text fragement within the input stream and
// the rendering attributes that are described by associated XML tags
//
[
object,
local,
uuid(299A9157-CD08-11d2-B503-00C04F797396),
helpstring("IEnumSpSentence Interface"),
pointer_default(unique)
]
interface IEnumSpSentence : IUnknown
{
HRESULT SetFragList( [in] const SPVTEXTFRAG* pTextFragList, [in] DWORD dwSpeakFlags);
HRESULT Next( [out]IEnumSENTITEM **ppSentItemEnum );
HRESULT Previous( [out]IEnumSENTITEM **ppSentItemEnum );
HRESULT Reset(void);
};
///// End SAPI6 section
// Max number of POS per pronunciation
enum { POS_MAX = 4 };
// Pronunciation source
typedef enum PRONSRC
{
PRON_LEX = 0,
PRON_LTS,
} PRONSRC;
//------------------------
// POS subset for prosody
//------------------------
enum PROSODY_POS
{
POS_UNK, // unknown
POS_FUNC, // any function word
POS_CONTENT, // any content word
POS_AUX,
};
// Revberb delay presets
typedef enum REVERBTYPE
{
REVERB_TYPE_OFF = 0,
REVERB_TYPE_BATHTUB,
REVERB_TYPE_ROOM,
REVERB_TYPE_HALL,
REVERB_TYPE_CHURCH,
REVERB_TYPE_STADIUM,
REVERB_TYPE_ECHO,
REVERB_TYPE_ROBOSEQ, // Robot with 'sequencer'
} REVERBTYPE;
typedef enum UNITFLAGS
{
WORD_START_FLAG = (1L << 0), // Word starts on this unit
SENT_START_FLAG = (1L << 1), // Sentence starts on this unit
}UNITFLAGS;
typedef enum TAPS
{
MAXTAPS = 8
}TAPS;
enum USER_RATE_VALUE
{
MIN_USER_RATE = (-18),
MAX_USER_RATE = 18,
DEFAULT_USER_RATE = 0 // None
};
// Change to new rate if value is NOT this
enum { NO_RATE_CHANGE = MAX_USER_RATE + 1 };
/*** UNITINFO
* This describes the unit info structure
*/
typedef struct UNIT_CVT
{
ULONG PhonID; // {in} Phoneme ID
ULONG flags; // {in] Position flags
ULONG UnitID; // [out] Inventory table ID
ULONG SenoneID; // [out] Context offset
float Dur; // [out] duration in seconds
float Amp; // [out] Amplitude
float AmpRatio; // [out] Amplitude gain
CHAR szUnitName[15]; // [out] name string
} UNIT_CVT;
/*** MSVOICEINFO
* This describes the voice data object
*/
typedef struct MSVOICEINFO
{
WAVEFORMATEX WaveFormatEx; // Voice data format.
LCID LangID; // Voice data language ID
ULONG Rate; // Words-per-minute
ULONG Pitch; // Average pitch in Hz
REVERBTYPE eReverbType; // Reverb param
ULONG ProsodyGain; // 0 = monotone
ULONG NumOfTaps; // BE: Whisper param
float TapCoefficients[MAXTAPS]; // BE: Whisper param
float VibratoFreq; // Hertz
ULONG VibratoDepth; // 0 - 100%
ULONG SampleRate; // 22050 typical
ULONG LPCOrder; // Number of LPC coefficients
ULONG FFTSize; // FFT window length
float* pWindow; // Hanning Window
} MSVOICEINFO;
/*** MSUNITINFO
* This is the result of an Unit fetch
*/
typedef struct MSUNITDATA
{
ULONG cNumEpochs;
ULONG cNumSamples;
ULONG cOrder;
float *pEpoch;
float *pLPC;
float *pRes;
float *pGain;
} MSUNITDATA;
// AlloToUnit() attributes
enum { ALLO_IS_STRESSED = (1 << 0) };
/*** IMSVoiceData
* Private interface on TTS voice data objects. A voice data object encapsulates
* the voice data with the necessary lookup logic.
*/
[
object,
local,
uuid(6265B7E1-0340-11d3-B50C-00C04F797396),
helpstring("IMSVoiceData Interface"),
pointer_default(unique)
]
interface IMSVoiceData : IUnknown
{
HRESULT GetVoiceInfo( [out]MSVOICEINFO* pVoiceInfo );
HRESULT GetUnitIDs( [in,out]UNIT_CVT* pUnits, [in]ULONG cUnits );
HRESULT GetUnitData( [in]ULONG unitID, [out]MSUNITDATA* pUnitData );
HRESULT AlloToUnit( [in]short allo, [in]long attributes, [out]long* pUnitID );
};
/*** IMSTTSEngineInit
* Private engine initialization interface used to connect the voice
* object to the synthesizer.
*/
[
object,
local,
uuid(8A7C38EB-D8B0-11d2-B504-00C04F797396),
helpstring("IMSTTSEngineInit Interface"),
pointer_default(unique)
]
interface IMSTTSEngineInit : IUnknown
{
HRESULT VoiceInit( [in]IMSVoiceData* pVoiceData );
};
//=== CoClass definitions =====================================================
[
uuid(3F7C4D29-D007-11D2-B503-00C04F797396),
version(1.0),
helpstring("MS TTS Engine 1.0 Type Library")
]
library MSTTSENGINELib
{
importlib("stdole32.tlb");
importlib("stdole2.tlb");
//--- This object is used to load the voice data files
// and expose them to the driver.
[
uuid(65DBDDEF-0725-11d3-B50C-00C04F797396),
helpstring("MSVoiceData Class")
]
coclass MSVoiceData
{
[default] interface IMSVoiceData;
};
//--- This is the synthesizer object
[
uuid(B93AE09F-D033-11D2-B503-00C04F797396),
helpstring("MSTTSEngine Class")
]
coclass MSTTSEngine
{
[default] interface ISpTTSEngine;
interface IMSTTSEngineInit;
};
};