windows-server-2003/enduser/speech/tts/msttsdrv/engine/morph.h


								/*******************************************************************************

								* morph.h *

								*---------*

								*   Description:

								*       This is the header file for the CSMorph implementation.  This class

								*   attempts to find pronunciations for morphological variants (which do not

								*   occur in the lexicon) of root words (which do occur in the lexicon).

								*-------------------------------------------------------------------------------

								*  Created By: AH                            Date: 08/16/99

								*  Copyright (C) 1999 Microsoft Corporation

								*  All Rights Reserved

								*******************************************************************************/

								#ifndef Morph_h

								#define Morph_h


								#ifndef __spttseng_h__

								#include "spttseng.h"

								#endif


								// Additional includes...

								#include "stdafx.h"

								#include "commonlx.h"


								//== CONSTANTS ================================================================


								#define MAX_POSCONVERSIONS 4

								#define NUM_POS 5


								/*** SUFFIX_TYPE **************************************************************

								* This enumeration contains values for all of the suffixes which can be matched

								* and accounted for by the CSMorph class.

								*/

								static const enum SUFFIX_TYPE

								{

								    S_SUFFIX = 0,

								    ED_SUFFIX,

								    ING_SUFFIX,

								    APOSTROPHES_SUFFIX,

								    APOSTROPHE_SUFFIX,

								    ER_SUFFIX,

								    EST_SUFFIX,

								    OR_SUFFIX,

								    MENT_SUFFIX,

								    AGE_SUFFIX,

								    LESS_SUFFIX,

								    Y_SUFFIX,

								    EDLY_SUFFIX,

								    LY_SUFFIX,

								    ABLE_SUFFIX,

								    NESS_SUFFIX,

								    ISM_SUFFIX,

								    IZE_SUFFIX,

								    IZ_SUFFIX,

								    HOOD_SUFFIX,

								    FUL_SUFFIX,

								    LIKE_SUFFIX,

								    WISE_SUFFIX,

								    ISH_SUFFIX,

								    ABLY_SUFFIX,

								    SHIP_SUFFIX,

								    ICALLY_SUFFIX,

								    SOME_SUFFIX,

								    ILY_SUFFIX,

								    ICISM_SUFFIX,

								    ICIZE_SUFFIX,

								    NO_MATCH = -1,

								};


								/* SUFFIX_INFO, g_SuffixTable[] ***********************************************

								* This table is used to map the orthographic forms of suffixes to their suffix

								* types.  Each suffix is stored in reverse order for easier comparison with

								* the ends of strings...

								*/

								struct SUFFIX_INFO

								{

								    WCHAR       Orth[10];

								    SUFFIX_TYPE Type;

								};


								static const SUFFIX_INFO g_SuffixTable[] =

								{

								    { L"RE",        ER_SUFFIX },

								    { L"TSE",       EST_SUFFIX },

								    { L"GNI",       ING_SUFFIX },

								    { L"ELBA",      ABLE_SUFFIX },

								    { L"ELBI",      ABLE_SUFFIX },

								    { L"YLDE",      EDLY_SUFFIX },

								    { L"YLBA",      ABLY_SUFFIX },

								    { L"YLBI",      ABLY_SUFFIX },

								    { L"YLLACI",    ICALLY_SUFFIX },

								    { L"YLI",       ILY_SUFFIX },

								    { L"YL",        LY_SUFFIX },

								    { L"Y",         Y_SUFFIX },

								    { L"TNEM",      MENT_SUFFIX },

								    { L"RO",        OR_SUFFIX },

								    { L"SSEN",      NESS_SUFFIX },

								    { L"SSEL",      LESS_SUFFIX },

								    { L"EZICI",     ICIZE_SUFFIX },

								    { L"EZI",       IZE_SUFFIX },

								    { L"ZI",        IZ_SUFFIX },

								    { L"MSICI",     ICISM_SUFFIX },

								    { L"MSI",       ISM_SUFFIX },

								    { L"DE",        ED_SUFFIX },

								    { L"S'",        APOSTROPHES_SUFFIX },

								    { L"S",         S_SUFFIX },

								    { L"'",         APOSTROPHE_SUFFIX },

								    { L"EGA",       AGE_SUFFIX },

								    { L"DOOH",      HOOD_SUFFIX },

								    { L"LUF",       FUL_SUFFIX },

								    { L"EKIL",      LIKE_SUFFIX },

								    { L"ESIW",      WISE_SUFFIX },

								    { L"HSI",       ISH_SUFFIX },

								    { L"PIHS",      SHIP_SUFFIX },

								    { L"EMOS",      SOME_SUFFIX },

								};


								/*** PHONTYPE *****************************************************************

								* This enumeration creates flags which can be used to determine the relevant

								* features of each phone.

								*/

								static const enum PHONTYPE

								{

								    eCONSONANTF = (1<<0),

								    eVOICEDF = (1<<1),

								    ePALATALF = (1<<2),

								};


								/*** g_PhonTable[], g_PhonS, g_PhonZ *******************************************

								* This table is used to map the internal values of phones to their types, which

								* are just clusters of features relevant to the necessary phonological rules.

								* g_PhonS, g_PhonZ, g_PhonD, g_PhonT are just used to make the code a bit more

								* readable.

								*/

								static const long g_PhonTable[] =

								{

								    eCONSONANTF,                        // Default value - 0 is not a valid phone

								    eCONSONANTF,                        // 1 is a syllable boundary - shouldn't ever occur at the end of a word

								    eCONSONANTF,                        // 2 is an exclamation point - shouldn't ever occur at the end of a word

								    eCONSONANTF,                        // 3 is a word boundary - treated as a consonant

								    eCONSONANTF,                        // 4 is a comma - shouldn't ever occur at the end of a word

								    eCONSONANTF,                        // 5 is a period - shouldn't ever occur at the end of a word

								    eCONSONANTF,                        // 6 is a question mark - shouldn't ever occur at the end of a word

								    eCONSONANTF,                        // 7 is a silence - shouldn't ever occur at the end of a word

								    eVOICEDF,                           // 8 is primary stress - treat as a vowel since it should always be attached to a vowel nucleus

								    eVOICEDF,                           // 9 is secondatry stress - see primary stress

								    eVOICEDF,                           // 10 -> AA

								    eVOICEDF,                           // 11 -> AE

								    eVOICEDF,                           // 12 -> AH

								    eVOICEDF,                           // 13 -> AO

								    eVOICEDF,                           // 14 -> AW

								    eVOICEDF,                           // 15 -> AX

								    eVOICEDF,                           // 16 -> AY

								    eCONSONANTF + eVOICEDF,             // 17 -> b

								    eCONSONANTF + ePALATALF,            // 18 -> CH

								    eCONSONANTF + eVOICEDF,             // 19 -> d

								    eCONSONANTF + eVOICEDF,             // 20 -> DH

								    eVOICEDF,                           // 21 -> EH

								    eVOICEDF,                           // 22 -> ER

								    eVOICEDF,                           // 23 -> EY

								    eCONSONANTF,                        // 24 -> f

								    eCONSONANTF + eVOICEDF,             // 25 -> g

								    eCONSONANTF,                        // 26 -> h

								    eVOICEDF,                           // 27 -> IH

								    eVOICEDF,                           // 28 -> IY

								    eCONSONANTF + eVOICEDF + ePALATALF, // 29 -> JH

								    eCONSONANTF,                        // 30 -> k

								    eCONSONANTF + eVOICEDF,             // 31 -> l

								    eCONSONANTF + eVOICEDF,             // 32 -> m

								    eCONSONANTF + eVOICEDF,             // 33 -> n

								    eCONSONANTF + eVOICEDF,             // 34 -> NG

								    eVOICEDF,                           // 35 -> OW

								    eVOICEDF,                           // 36 -> OY

								    eCONSONANTF,                        // 37 -> p

								    eCONSONANTF + eVOICEDF,             // 38 -> r

								    eCONSONANTF,                        // 39 -> s

								    eCONSONANTF + ePALATALF,            // 40 -> SH

								    eCONSONANTF,                        // 41 -> t

								    eCONSONANTF,                        // 42 -> TH

								    eVOICEDF,                           // 43 -> UH

								    eVOICEDF,                           // 44 -> UW

								    eCONSONANTF + eVOICEDF,             // 45 -> v

								    eCONSONANTF + eVOICEDF,             // 46 -> w

								    eCONSONANTF + eVOICEDF,             // 47 -> y

								    eCONSONANTF + eVOICEDF,             // 48 -> z

								    eCONSONANTF + eVOICEDF + ePALATALF, // 49 -> ZH

								};


								static WCHAR g_phonAXl[] = L" AX l";

								static WCHAR g_phonAXz[] = L" AX z";

								static WCHAR g_phonS[] = L" s";

								static WCHAR g_phonZ[] = L" z";

								static WCHAR g_phonD[] = L" d";

								static WCHAR g_phonAXd[] = L" AX d";

								static WCHAR g_phonT[] = L" t";

								static WCHAR g_phonIY[] = L" IY";

								static WCHAR g_phonL[] = L" l";


								/*** struct POS_CONVERT *******************************************************

								* This struct stores the From and To parts of speech for a suffix...

								*/

								struct POS_CONVERT

								{

								    ENGPARTOFSPEECH FromPos;

								    ENGPARTOFSPEECH ToPos;

								};


								/*** MorphSpecialCaseFlags ****************************************************

								* This enum allows DoSuffixMorph to be nearly completely table driven.  Each

								* suffix has a MorphSpecialCaseFlags entry in the SuffixInfoTable which tells

								* DoSuffixMorph which special case functions (check for missing E, etc.) need

								* to be called if the initial lex lookup fails.

								*/

								typedef enum MorphSpecialCaseFlags

								{

								    eCheckForMissingE       = 1L << 0,

								    eCheckYtoIMutation      = 1L << 1,

								    eCheckDoubledMutation   = 1L << 2,

								    eCheckForMissingY       = 1L << 3,

								    eCheckForMissingL       = 1L << 4,

								} MorphSpecialCaseFlags;


								/*** struct SUFFIXPRON_INFO ***************************************************

								* This struct stores the pronunciation of a suffix, as well as the POS

								* categories it takes as input and output.

								*/

								struct SUFFIXPRON_INFO

								{

								    WCHAR SuffixString[SP_MAX_PRON_LENGTH];

								    POS_CONVERT Conversions[MAX_POSCONVERSIONS];

								    short NumConversions;

								    DWORD dwMorphSpecialCaseFlags;

								};


								/*** bool SuffixInfoTableInitialized *******************************************

								* This bool just lets threads know whether they are the first to use the

								* following table, and thus whether they need to initialize it or not.

								*/

								static bool SuffixInfoTableInitialized = false;


								/*** SUFFIXPRON_INFO g_SuffixInfoTable *****************************************

								* This table drives the DoSuffixMorph function, by storing the pronunciation,

								* conversions, number of conversions, and special case flags for each suffix...

								*/

								static SUFFIXPRON_INFO g_SuffixInfoTable [] =

								{

								/********************************************************************************************************/

								/*    Pronunciation     *  Conversions  *   NumConversions * Special Case Flags      *   SuffixType      */

								/********************************************************************************************************/

								    { L" s",            { {MS_Verb,   MS_Verb},

								                          {MS_Noun,   MS_Noun}  },    2,  0 },                          // S_SUFFIX

								    { L" d",            { {MS_Verb,   MS_Verb},

								                          {MS_Verb,   MS_Adj}   },    2,  eCheckForMissingE +

								                                                          eCheckYtoIMutation +

								                                                          eCheckDoubledMutation   },    // ED_SUFFIX

								    { L" IH NG",        { {MS_Verb,   MS_Verb},

								                          {MS_Verb,   MS_Adj},

								                          {MS_Verb,   MS_Noun}  },    3,  eCheckForMissingE +

								                                                          eCheckDoubledMutation   },    // ING_SUFFIX

								    { L" s",            { {MS_Noun,   MS_Noun}  },    1,  0 },                          // APOSTROPHES_SUFFIX

								    { L" s",            { {MS_Noun,   MS_Noun}  },    1,  0 },                          // APOSTROPHE_SUFFIX

								    { L" ER",           { {MS_Verb,   MS_Noun},

								                          {MS_Adj,    MS_Adj},

								                          {MS_Adv,    MS_Adv},

								                          {MS_Adj,    MS_Adv}   },    4,  eCheckForMissingE +

								                                                          eCheckYtoIMutation +

								                                                          eCheckDoubledMutation   },    // ER_SUFFIX

								    { L" AX s t",       { {MS_Adj,    MS_Adj},

								                          {MS_Adv,    MS_Adv},

								                          {MS_Adj,    MS_Adv}   },    3,  eCheckForMissingE +

								                                                          eCheckYtoIMutation +

								                                                          eCheckDoubledMutation   },    // EST_SUFFIX

								    { L" ER",           { {MS_Verb,   MS_Noun}  },    1,  eCheckForMissingE +

								                                                          eCheckDoubledMutation },      // OR_SUFFIX

								    { L" m AX n t",     { {MS_Verb,   MS_Noun}  },    1,  eCheckYtoIMutation },         // MENT_SUFFIX

								    { L" IH JH",        { {MS_Verb,   MS_Noun}  },    1,  eCheckForMissingE +

								                                                          eCheckDoubledMutation   },    // AGE_SUFFIX

								    { L" l IH s",       { {MS_Noun,   MS_Adj}   },    1,  eCheckYtoIMutation      },    // LESS_SUFFIX

								    { L" IY",           { {MS_Noun,   MS_Adj},

								                          {MS_Adj,    MS_Adv}   },    2,  eCheckForMissingE +

								                                                          eCheckDoubledMutation   },    // Y_SUFFIX

								    { L" AX d l IY",    { {MS_Verb,   MS_Adj},

								                          {MS_Verb,   MS_Adv}   },    2,  eCheckForMissingE +

								                                                          eCheckYtoIMutation +

								                                                          eCheckDoubledMutation   },    // EDLY_SUFFIX

								    { L" l IY",         { {MS_Noun,   MS_Adj},

								                          {MS_Adj,    MS_Adv}   },    2,  eCheckForMissingL },          // LY_XUFFIX

								    { L" AX - b AX l",  { {MS_Verb,   MS_Adj},

								                          {MS_Noun,   MS_Adj}   },    2,  eCheckForMissingE +

								                                                          eCheckYtoIMutation +

								                                                          eCheckDoubledMutation   },    // ABLE_SUFFIX

								    { L" n IH s",       { {MS_Adj,    MS_Noun}  },    1,  eCheckYtoIMutation      },    // NESS_SUFFIX

								    { L" IH z AX m",    { {MS_Adj,    MS_Noun},

								                          {MS_Noun,   MS_Noun}  },    2,  eCheckForMissingE       },    // ISM_SUFFIX

								    { L" AY z",         { {MS_Noun,   MS_Verb},

								                          {MS_Adj,    MS_Verb}  },    2,  eCheckForMissingE       },    // IZE_SUFFIX

								    { L" AY z",         { {MS_Noun,   MS_Verb},

								                          {MS_Adj,    MS_Verb}  },    2,  eCheckForMissingE       },    // IZ_SUFFIX

								    { L" h UH d",       { {MS_Noun,   MS_Noun}  },    1,  0 },                          // HOOD_SUFFIX

								    { L" f AX l",       { {MS_Noun,   MS_Adj},

								                          {MS_Verb,   MS_Adj}   },    2,  0 } ,                         // FUL_SUFFIX

								    { L" l AY k",       { {MS_Noun,   MS_Adj}   },    1,  0 },                          // LIKE_SUFFIX

								    { L" w AY z",       { {MS_Noun,   MS_Adj}   },    1,  eCheckYtoIMutation },                        // WISE_SUFFIX

								    { L" IH SH",        { {MS_Noun,   MS_Adj}   },    1,  eCheckForMissingE +

								                                                          eCheckDoubledMutation   },    // ISH_SUFFIX

								    { L" AX - b l IY",  { {MS_Verb,   MS_Adv},

								                          {MS_Noun,   MS_Adv}   },    2,  eCheckForMissingE +

								                                                          eCheckYtoIMutation +

								                                                          eCheckDoubledMutation   },    // ABLY_SUFFIX

								    { L" SH IH 2 p",    { {MS_Noun,   MS_Noun}  },    1,  0 },                          // SHIP_SUFFIX

								    { L" L IY",         { {MS_Adj,    MS_Adv}   },    1,  0 },                          // ICALLY_SUFFIX

								    { L" S AX M",       { {MS_Noun,   MS_Adj}   },    1,  eCheckYtoIMutation      },    // SOME_SUFFIX

								    { L" AX L IY",      { {MS_Noun,   MS_Adv}   },    1,  eCheckDoubledMutation +

								                                                          eCheckForMissingY       },    // ILY_SUFFIX

								    { L" IH z AX m",    { {MS_Adj,    MS_Noun},

								                          {MS_Noun,   MS_Noun}  },    2,  eCheckForMissingE       },    // ICISM_SUFFIX

								    { L" AY z",         { {MS_Noun,   MS_Verb},

								                          {MS_Adj,    MS_Verb}  },    2,  eCheckForMissingE       },    // ICIZE_SUFFIX

								};


								/*** CSuffixList **************************************************************

								* This typedef just makes the code a little easier to read.  A CSuffixList is

								* used to keep track of each of the suffixes which has been stripped from a

								* word, so that their pronunciations can be concatenated with that of the root.

								*/

								typedef CSPList<SUFFIXPRON_INFO*, SUFFIXPRON_INFO*> CSuffixList;


								/*** CComAutoCriticalSection g_SuffixInfoTableCritSec *************************

								* This critical section is used to make sure the SuffixInfoTable only gets

								* initialized once.

								*/

								static CComAutoCriticalSection g_SuffixInfoTableCritSec;


								/*** CSMorph ******************************************************************

								* This is the definition of the CSMorph class.

								*/

								class CSMorph

								{

								public:


								    /*=== PUBLIC METHODS =====*/

								    CSMorph( ISpLexicon *pMasterLex=0, HRESULT *hr=0 );


								    /*=== INTERFACE METHOD =====*/

								    HRESULT DoSuffixMorph( const WCHAR *pwWord, WCHAR *pwRoot, LANGID LangID, DWORD dwFlags,

								                           SPWORDPRONUNCIATIONLIST *pWordPronunciationList );


								private:


								    /*=== PRIVATE METHODS =====*/

								    SUFFIX_TYPE MatchSuffix( WCHAR *TargWord, long *RootLen );

								    HRESULT LexLookup( const WCHAR *pOrth, long length, DWORD dwFlags,

								                       SPWORDPRONUNCIATIONLIST *pWordPronunciationList );

								    HRESULT LTSLookup( const WCHAR *pOrth, long length,

								                       SPWORDPRONUNCIATIONLIST *pWordPronunciationList);

								    HRESULT AccumulateSuffixes( CSuffixList *pSuffixList, SPWORDPRONUNCIATIONLIST *pWordPronunciationList );

								    HRESULT AccumulateSuffixes_LTS( CSuffixList *pSuffixList, SPWORDPRONUNCIATIONLIST *pWordPronunciationList );

								    HRESULT DefaultAccumulateSuffixes( CSuffixList *pSuffixList, SPWORDPRONUNCIATIONLIST *pWordPronunciationList );


								    HRESULT CheckForMissingE( WCHAR *pOrth, long length, DWORD dwFlags,

								                              SPWORDPRONUNCIATIONLIST *pWordPronunciationList);

								    HRESULT CheckForMissingY( WCHAR *pOrth, long length, DWORD dwFlags,

								                              SPWORDPRONUNCIATIONLIST *pWordPronunciationList );

								    HRESULT CheckForMissingL( WCHAR *pOrth, long length, DWORD dwFlags,

								                              SPWORDPRONUNCIATIONLIST *pWordPronunciationList );

								    HRESULT CheckYtoIMutation( WCHAR *pOrth, long length, DWORD dwFlags,

								                               SPWORDPRONUNCIATIONLIST *pWordPronunciationList);

								    HRESULT CheckDoubledMutation( WCHAR *pOrth, long length, DWORD dwFlags,

								                                  SPWORDPRONUNCIATIONLIST *pWordPronunciationList);

								    HRESULT CheckYtoIEMutation( WCHAR *pOrth, long length, DWORD dwFlags,

								                                SPWORDPRONUNCIATIONLIST *pWordPronunciationList);

								    HRESULT CheckAbleMutation( WCHAR *pOrth, long length, DWORD dwFlags,

								                               SPWORDPRONUNCIATIONLIST *pWordPronunciationList);

								    HRESULT Phon_SorZ( WCHAR *pPronunciation, long length );

								    HRESULT Phon_DorED( WCHAR *pPronunciation, long length );


								    /*=== MEMBER DATA =====*/


								    // Pointer to the Master Lexicon...

								    ISpLexicon  *m_pMasterLex;

								};


								inline BOOL SearchPosSet( ENGPARTOFSPEECH Pos, const ENGPARTOFSPEECH *Set, ULONG Count )

								{

								    for( ULONG i = 0; i < Count; ++i )

								    {

								        if( Pos == Set[i] )

								        {

								            return true;

								        }

								    }

								    return false;

								}


								#endif //--- End of File -------------------------------------------------------------