|
|
/*******************************************************************************
* Disambig.cpp * *--------------* * Description: * This module contains the methods to disambiguate part of speech and * select the correct pronounciation from the lexicon. *------------------------------------------------------------------------------- * Created By: EDC Date: 07/15/99 * Copyright (C) 1999 Microsoft Corporation * All Rights Reserved * *******************************************************************************/
//--- Additional includes
#include "stdafx.h"
#include "commonlx.h"
#ifndef StdSentEnum_h
#include "stdsentenum.h"
#endif
#include "spttsengdebug.h"
/*****************************************************************************
* TryPOSConversion * *------------------* * * Description: * Checks to see whether the argument PRONRECORD contains the argument * ENGPARTOFSPEECH as an option. If so, sets the PRONRECORD alternate * choice and part of speech choice, and returns true. If not, just returns * false without modifying the PRONRECORD at all. * ***************************************************************** AH *********/ bool TryPOSConversion( PRONRECORD& pPron, ENGPARTOFSPEECH PartOfSpeech ) {
//--- Check first pronunciation
for ( ULONG i = 0; i < pPron.pronArray[0].POScount; i++ ) { if ( pPron.pronArray[0].POScode[i] == PartOfSpeech ) { pPron.altChoice = 0; pPron.POSchoice = PartOfSpeech; return true; } }
//--- Check second pronunciation
if ( pPron.hasAlt ) { for ( ULONG i = 0; i < pPron.pronArray[1].POScount; i++ ) { if ( pPron.pronArray[1].POScode[i] == PartOfSpeech ) { pPron.altChoice = 1; pPron.POSchoice = PartOfSpeech; return true; } } }
return false; } /* TryPOS Conversion */
/*****************************************************************************
* DisambiguatePOS * *-----------------* * * Description: * Disambiguate parts of speech by applying patches in order... This * work is an implementation of Eric Brill's rule-based part of speech * tagger - see, for example: * * Brill, Eric. 1992. A simple rule-based part of speech tagger. * In Proceedings of the Third Conference on Applied Natural * Language Processing, ACL. Trento, Italy. * ***************************************************************** AH *********/ void DisambiguatePOS( PRONRECORD *pProns, ULONG cNumOfWords ) { SPDBG_FUNC( "DisambiguatePOS" );
//--- Iterate over the patches, applying each (where applicable) to the
//--- entire sentence. For each patch, iterate over each word in the
//--- sentence to which the patch could apply (from left to right).
for ( int i = 0; i < sp_countof( g_POSTaggerPatches ); i++ ) { switch ( g_POSTaggerPatches[i].eTemplateType ) { case PREV1T: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous POS matches, and
//--- the conversion POS is a possibility for this word, convert the
//--- POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT1T: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next POS matches, and
//--- the conversion POS is a possibility for this word, convert the
//--- POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV2T: { if ( cNumOfWords > 2 ) { for ( ULONG j = 2; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the POS two previous matches, and
//--- the conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT2T: { if ( cNumOfWords > 2 ) { for ( ULONG j = 0; j < cNumOfWords - 2; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the POS two after matches, and
//--- the conversion POS is a possibility for this word, convert the
//--- POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV1OR2T: { if ( cNumOfWords > 2 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous POS matches OR the
//--- POS two previous matches, and the conversion POS is a possibility
//--- for this word, convert the POS.
if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) || ( j > 1 && pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT1OR2T: { if ( cNumOfWords > 2 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next POS matches OR the POS
//--- two after matches, and the conversion POS is a possibility for this
//--- word, convert the POS.
if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) || ( j < cNumOfWords - 2 && pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV1OR2OR3T: { if ( cNumOfWords > 3 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous POS matches OR the
//--- POS two previous matches OR the POS three previous matches, and
//--- the conversion POS is a possibility for this word, convert the POS.
if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) || ( j > 1 && pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) || ( j > 2 && pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT1OR2OR3T: { if ( cNumOfWords > 3 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next POS matches OR the POS
//--- two after matches OR the POS three after matches, and the conversion
//--- POS is a possibility for this word, convert the POS.
if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) || ( j < cNumOfWords - 2 && pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) || ( j < cNumOfWords - 3 && pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV1TNEXT1T: { if ( cNumOfWords > 2 ) { for ( ULONG j = 1; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next POS matches, and the
//--- previous POS matches, and the conversion POS is a possibility
//--- for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV1TNEXT2T: { if ( cNumOfWords > 3 ) { for ( ULONG j = 1; j < cNumOfWords - 2; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the POS two after matches, and the
//--- previous POS matches, and the conversion POS is a possibility
//--- for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV2TNEXT1T: { if ( cNumOfWords > 3 ) { for ( ULONG j = 2; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next POS matches, and the
//--- POS two previous matches, and the conversion POS is a possibility
//--- for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CAP: { for ( ULONG j = 0; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the word is capitalized, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && iswupper( pProns[j].orthStr[0] ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } break; case NOTCAP: { for ( ULONG j = 0; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the word is not capitalized, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && !iswupper( pProns[j].orthStr[0] ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } break; case PREVCAP: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous word is capitalized,
//--- and the conversion POS is a possibility for this word, convert the
//--- POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && iswupper( pProns[j - 1].orthStr[0] ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREVNOTCAP: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the word is capitalized, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && !iswupper( pProns[j - 1].orthStr[0] ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV1W: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous word matches, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT1W: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next word matches, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV2W: { if ( cNumOfWords > 2 ) { for ( ULONG j = 2; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the word two previous matches, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT2W: { if ( cNumOfWords > 2 ) { for ( ULONG j = 0; j < cNumOfWords - 2; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the word two after matches, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case PREV1OR2W: { if ( cNumOfWords > 2 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous word OR the word two
//--- previous matches, and the conversion POS is a possibility for this word,
//--- convert the POS.
if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) || ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT1OR2W: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next word matches OR the word two after
//--- matches, and the conversion POS is a possibility for this word, convert the
//--- POS.
if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) || ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRWPREV1W: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current word matches, and the previous
//--- word matches, and the conversion POS is a possibility for this word, convert
//--- the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 && _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRWNEXT1W: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current word matches, and the next
//--- word matches, and the conversion POS is a possibility for this word, convert
//--- the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 && _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRWPREV1T: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current word matches, and the previous
//--- POS matches, and the conversion POS is a possibility for this word, convert
//--- the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRWNEXT1T: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current word matches, and the next
//--- POS matches, and the conversion POS is a possibility for this word, convert
//--- the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRW: { for ( ULONG j = 0; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current word matches, and the
//--- conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ) ; } } } } break; case PREV1WT: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the previous word and POS match, and
//--- the conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case NEXT1WT: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the next word and POS match, and
//--- the conversion POS is a possibility for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRWPREV1WT: { if ( cNumOfWords > 1 ) { for ( ULONG j = 1; j < cNumOfWords; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current words matches, and the
//--- previous word and POS match, and the conversion POS is a possibility
//--- for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 && pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; case CURRWNEXT1WT: { if ( cNumOfWords > 1 ) { for ( ULONG j = 0; j < cNumOfWords - 1; j++ ) { if ( pProns[j].XMLPartOfSpeech == MS_Unknown ) { //--- If the current POS matches, and the current words matches, and the
//--- next word and POS match, and the conversion POS is a possibility
//--- for this word, convert the POS.
if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS && _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 && pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 && _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 ) { TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ); } } } } } break; } } } /* DisambiguatePOS */
/*****************************************************************************
* Pronounce * *-----------* * Description: * Get lexicon or letter-to-sound (LTS) pronunciations * ********************************************************************** MC ***/ HRESULT CStdSentEnum::Pronounce( PRONRECORD *pPron ) { SPDBG_FUNC( "Pronounce" ); SPWORDPRONUNCIATIONLIST SPList; HRESULT hr = SPERR_NOT_IN_LEX; ULONG cPhonLen; DWORD dwFlags = eLEXTYPE_USER | eLEXTYPE_APP | eLEXTYPE_PRIVATE1 | eLEXTYPE_PRIVATE2; BOOL fPOSExists = false; ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
//--- Special Case - XML Provided Part Of Speech. Search for exact match first...
if ( pPron->XMLPartOfSpeech != MS_Unknown ) { //--- Try User Lexicon
hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList ); if ( SUCCEEDED( hr ) && SPList.pFirstWordPronunciation ) { for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation; pPronunciation = pPronunciation->pNextWordPronunciation ) { if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech ) { fPOSExists = true; break; } } if ( !fPOSExists ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } } } //--- Handle empty pronunciation
else if ( !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } //--- Try App Lexicon
if ( !fPOSExists ) { hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList ); if ( SUCCEEDED( hr ) && SPList.pFirstWordPronunciation ) { for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation; pPronunciation = pPronunciation->pNextWordPronunciation ) { if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech ) { fPOSExists = true; break; } } if ( !fPOSExists ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } } } //--- Handle empty pronunciation
else if ( !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } //--- Try Vendor Lexicon
if ( !fPOSExists ) { hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList ); if ( SUCCEEDED( hr ) && SPList.pFirstWordPronunciation ) { for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation; pPronunciation = pPronunciation->pNextWordPronunciation ) { if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech ) { fPOSExists = true; break; } } if ( !fPOSExists ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } } } //--- Handle empty pronunciation
else if ( !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } //--- Try Morph Lexicon
if ( !fPOSExists ) { hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033, dwFlags, &SPList ); if ( SUCCEEDED( hr ) && SPList.pFirstWordPronunciation ) { for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation; pPronunciation = pPronunciation->pNextWordPronunciation ) { if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech ) { fPOSExists = true; break; } } if ( !fPOSExists ) { //--- Need to do this the last time, to make sure we hit the default code below...
//--- RAID 5078
hr = SPERR_NOT_IN_LEX; if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } } } //--- Handle empty pronunciation
else if ( !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } } //--- Default case - just look up orthography and go with first match.
if ( hr == SPERR_NOT_IN_LEX ) { hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
//--- Handle empty pronunciation
if ( SUCCEEDED( hr ) && !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } if ( hr == SPERR_NOT_IN_LEX ) { hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
//--- Handle empty pronunciation
if ( SUCCEEDED( hr ) && !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } if ( hr == SPERR_NOT_IN_LEX ) { hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
//--- Handle empty pronunciation
if ( SUCCEEDED( hr ) && !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } if ( hr == SPERR_NOT_IN_LEX ) { hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033, dwFlags, &SPList );
//--- Handle empty pronunciation
if ( SUCCEEDED( hr ) && !SPList.pFirstWordPronunciation ) { if ( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) ); } hr = SPERR_NOT_IN_LEX; } } if ( hr == SPERR_NOT_IN_LEX ) { hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE2, &SPList );
//--- Make all LTS words Nouns...
for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation; pPronunciation = pPronunciation->pNextWordPronunciation ) { pPronunciation->ePartOfSpeech = SPPS_Noun; } }
if (SUCCEEDED(hr)) { //--- WARNING - this assumes pronunciations will only come from one type of lexicon, an assumption
//--- which was true as of July, 2000
pPron->pronType = SPList.pFirstWordPronunciation->eLexiconType;
//------------------------------------------------------------
// SAPI unrolls pronunciations from their POS.
// So roll them back into the original collapsed array
// of one or two candidates with sorted POS (argh...)
//------------------------------------------------------------
SPWORDPRONUNCIATION *firstPron, *pCurPron, *pNextPron; //------------------------------------------
// Init pronunciation A
//------------------------------------------
pCurPron = firstPron = SPList.pFirstWordPronunciation; pPron->pronArray[PRON_A].POScount = 1; //----------------------------
// Get phoneme length
//----------------------------
cPhonLen = wcslen( firstPron->szPronunciation ) + 1; // include delimiter
//----------------------------
// Clip phoneme string to max
//----------------------------
if( cPhonLen > SP_MAX_PRON_LENGTH ) { cPhonLen = SP_MAX_PRON_LENGTH; } //----------------------------
// Copy unicode phoneme string
//----------------------------
memcpy( pPron->pronArray[PRON_A].phon_Str, firstPron->szPronunciation, cPhonLen * sizeof(WCHAR) ); // RAID 21464 - null-terminate SP_MAX_PRON_LENGTH length pronunciations...
pPron->pronArray[PRON_A].phon_Str[cPhonLen-1] = 0; pPron->pronArray[PRON_A].phon_Len = cPhonLen -1; // minus delimiter
pPron->pronArray[PRON_A].POScode[0] = (ENGPARTOFSPEECH)firstPron->ePartOfSpeech; //------------------------------------------
// Init pronunciation B
//------------------------------------------
pPron->pronArray[PRON_B].POScount = 0; pPron->pronArray[PRON_B].phon_Len = 0;
pNextPron = pCurPron->pNextWordPronunciation; while( pNextPron ) { int isDiff; isDiff = wcscmp( firstPron->szPronunciation, pNextPron->szPronunciation ); if( isDiff ) { //------------------------------------------------
// Next pronunciation is different from 1st
//------------------------------------------------
if( pPron->pronArray[PRON_B].POScount < POS_MAX ) { //---------------------------------------
// Gather POS B into array
//---------------------------------------
pPron->pronArray[PRON_B].POScode[pPron->pronArray[PRON_B].POScount] = (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech; pPron->pronArray[PRON_B].POScount++; if( pPron->pronArray[PRON_B].phon_Len == 0 ) { //-----------------------------------------
// If there's no B pron yet, make one
//-----------------------------------------
cPhonLen = wcslen( pNextPron->szPronunciation ) + 1; // include delimiter
//----------------------------
// Clip phoneme string to max
//----------------------------
if( cPhonLen > SP_MAX_PRON_LENGTH ) { cPhonLen = SP_MAX_PRON_LENGTH; } //----------------------------
// Copy unicode phoneme string
//----------------------------
memcpy( pPron->pronArray[PRON_B].phon_Str, pNextPron->szPronunciation, cPhonLen * sizeof(WCHAR) ); // RAID 21464 - null-terminate SP_MAX_PRON_LENGTH length pronunciations...
pPron->pronArray[PRON_B].phon_Str[cPhonLen-1] = 0; pPron->pronArray[PRON_B].phon_Len = cPhonLen -1; // minus delimiter
pPron->hasAlt = true; } } } else { //------------------------------------------------
// Next pronunciation is same as 1st
//------------------------------------------------
if( pPron->pronArray[PRON_A].POScount < POS_MAX ) { //---------------------------------------
// Gather POS A into array
//---------------------------------------
pPron->pronArray[PRON_A].POScode[pPron->pronArray[PRON_A].POScount] = (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech; pPron->pronArray[PRON_A].POScount++; } } pCurPron = pNextPron; pNextPron = pCurPron->pNextWordPronunciation; } }
//--- If XML POS provided, set selection now as it won't be touched by the POS Tagger
if ( pPron->XMLPartOfSpeech != MS_Unknown ) { BOOL fMadeMatch = false;
//--- Check first pronunciation
for ( ULONG i = 0; i < pPron->pronArray[0].POScount; i++ ) { if ( pPron->pronArray[0].POScode[i] == pPron->XMLPartOfSpeech ) { pPron->altChoice = 0; pPron->POSchoice = pPron->XMLPartOfSpeech; fMadeMatch = true; } }
//--- Check second pronunciation
if ( pPron->hasAlt ) { for ( ULONG i = 0; i < pPron->pronArray[1].POScount; i++ ) { if ( pPron->pronArray[1].POScode[i] == pPron->XMLPartOfSpeech ) { pPron->altChoice = 1; pPron->POSchoice = pPron->XMLPartOfSpeech; fMadeMatch = true; } } }
//--- If this POS didn't exist for the word, let POS Tagger do its thing
//--- to determine a pronunciation, and then reassign the POS later...
if ( !fMadeMatch ) { pPron->XMLPartOfSpeech = MS_Unknown; pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0]; } } //--- Set default POS, for later refinement by POS Tagger
else { pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0]; pPron->altChoice = PRON_A; }
if( SPList.pvBuffer ) { ::CoTaskMemFree( SPList.pvBuffer ); }
return hr; } /* Pronounce */
/*****************************************************************************
* CStdSentEnum::DetermineProns * *------------------------------* * Description: * This method determines POS and looks up the pronounciation ********************************************************************* MC ****/ HRESULT CStdSentEnum::DetermineProns( CItemList& ItemList, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::DetermineProns" ); HRESULT hr = S_OK; ULONG cNumOfProns, cPronIndex; PRONRECORD* pProns = NULL;
//--- Count the total number of pronunciations needed
cNumOfProns = 0; SPLISTPOS ListPos = ItemList.GetHeadPosition(); while( ListPos ) { TTSSentItem& Item = ItemList.GetNext( ListPos ); for ( ULONG i = 0; i < Item.ulNumWords; i++ ) { if( Item.Words[i].pWordText && ( Item.Words[i].pXmlState->eAction == SPVA_Speak || Item.Words[i].pXmlState->eAction == SPVA_SpellOut || Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) ) { ++cNumOfProns; } } }
if ( cNumOfProns ) { pProns = new PRONRECORD[cNumOfProns];
if( !pProns ) { hr = E_OUTOFMEMORY; } else { //--- First, get item pronunciation(s)
ZeroMemory( pProns, cNumOfProns * sizeof(PRONRECORD) ); cPronIndex = 0; ListPos = ItemList.GetHeadPosition();
//--- Iterate through ItemList
while( ListPos && SUCCEEDED( hr ) ) { TTSSentItem& Item = ItemList.GetNext( ListPos ); //--- Iterate over Words
for ( ULONG i = 0; i < Item.ulNumWords; i++ ) { //--- Get pronunciations and parts of speech for spoken items only
if ( Item.Words[i].pWordText && ( Item.Words[i].pXmlState->eAction == SPVA_Speak || Item.Words[i].pXmlState->eAction == SPVA_SpellOut || Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) ) { SPDBG_ASSERT( cPronIndex < cNumOfProns ); ULONG cItemLen = Item.Words[i].ulWordLen; //--- Clip at max text length
if( cItemLen > ( SP_MAX_WORD_LENGTH-1 ) ) { cItemLen = SP_MAX_WORD_LENGTH - 1; } //--- Copy item text
memcpy( pProns[cPronIndex].orthStr, Item.Words[i].pWordText, cItemLen * sizeof(WCHAR) ); pProns[cPronIndex].orthStr[cItemLen] = 0; //--- Set Part of Speech, if given in XML
if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown ) { pProns[cPronIndex].XMLPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech; } //--- Do Lex Lookup, if necessary
if ( Item.Words[i].pXmlState->pPhoneIds == NULL || Item.Words[i].pXmlState->ePartOfSpeech == MS_Unknown ) { //--- Special Case - Disambiguate Abbreviations
if ( Item.pItemInfo->Type == eABBREVIATION || Item.pItemInfo->Type == eABBREVIATION_NORMALIZE ) { const AbbrevRecord *pAbbrevInfo = ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation; if ( pAbbrevInfo->iPronDisambig < 0 ) { //--- Default case - just take the first (and only) pronunciation
pProns[cPronIndex].pronArray[PRON_A].POScount = 1; wcscpy( pProns[cPronIndex].pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 ); pProns[cPronIndex].pronArray[PRON_A].phon_Len = wcslen( pProns[cPronIndex].pronArray[PRON_A].phon_Str ); pProns[cPronIndex].pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1; pProns[cPronIndex].pronArray[PRON_B].POScount = 0; pProns[cPronIndex].pronArray[PRON_B].phon_Len = 0; pProns[cPronIndex].hasAlt = false; pProns[cPronIndex].altChoice = PRON_A; pProns[cPronIndex].POSchoice = pAbbrevInfo->POS1; //--- Abbreviation table prons are basically just vendor lex prons...
pProns[cPronIndex].pronType = eLEXTYPE_PRIVATE1; } else { hr = ( this->*g_PronDisambigTable[pAbbrevInfo->iPronDisambig] ) ( pAbbrevInfo, &pProns[cPronIndex], ItemList, ListPos ); } pProns[cPronIndex].fUsePron = true; } //--- Default case
else { //--- Check disambiguation list
const AbbrevRecord* pAbbrevRecord = (AbbrevRecord*) bsearch( (void*) pProns[cPronIndex].orthStr, (void*) g_AmbiguousWordTable, sp_countof( g_AmbiguousWordTable ), sizeof( AbbrevRecord ), CompareStringAndAbbrevRecord ); if ( pAbbrevRecord ) { hr = ( this->*g_AmbiguousWordDisambigTable[pAbbrevRecord->iPronDisambig] ) ( pAbbrevRecord, &pProns[cPronIndex], ItemList, ListPos ); pProns[cPronIndex].fUsePron = true; } //--- Do Lex Lookup, if necessary
else { hr = Pronounce( &pProns[cPronIndex] ); } } } cPronIndex++; } } }
if (SUCCEEDED(hr)) { //--- Next, disambiguate part-of-speech
DisambiguatePOS( pProns, cNumOfProns );
//--- Output debugging information
TTSDBG_LOGPOSPOSSIBILITIES( pProns, cNumOfProns, STREAM_POSPOSSIBILITIES );
//--- Finally, copy selected pronunciation to 'ItemList'
PRONUNIT *selectedUnit; cPronIndex = 0; ListPos = ItemList.GetHeadPosition();
while( ListPos && SUCCEEDED(hr) ) { TTSSentItem& Item = ItemList.GetNext( ListPos ); for ( ULONG i = 0; i < Item.ulNumWords; i++ ) { //--- Set pronunciation and part-of-speech for spoken items only
if( Item.Words[i].pWordText && ( Item.Words[i].pXmlState->eAction == SPVA_Speak || Item.Words[i].pXmlState->eAction == SPVA_SpellOut || Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) ) { SPDBG_ASSERT( cPronIndex < cNumOfProns ); //--- Use XML specified pronunciation, if given.
if ( Item.Words[i].pXmlState->pPhoneIds ) { Item.Words[i].pWordPron = Item.Words[i].pXmlState->pPhoneIds; } else { selectedUnit = &pProns[cPronIndex].pronArray[pProns[cPronIndex].altChoice]; Item.Words[i].pWordPron = (SPPHONEID*) MemoryManager.GetMemory( (selectedUnit->phon_Len + 1) * sizeof(SPPHONEID), &hr ); if ( SUCCEEDED( hr ) ) { wcscpy( Item.Words[i].pWordPron, selectedUnit->phon_Str ); } }
//--- Use XML specified part-of-speech, if given. This will override the case
//--- where the POS didn't exist as an option and the POS Tagger did its thing
//--- to find a pronunciation.
if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown ) { Item.Words[i].eWordPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech; } else { Item.Words[i].eWordPartOfSpeech = pProns[cPronIndex].POSchoice; }
//--- Root word
if ( pProns[cPronIndex].lemmaStr[0] ) { Item.Words[i].ulLemmaLen = wcslen( pProns[cPronIndex].lemmaStr ); Item.Words[i].pLemma = (WCHAR*) MemoryManager.GetMemory( Item.Words[i].ulLemmaLen * sizeof(WCHAR), &hr ); if ( SUCCEEDED( hr ) ) { wcsncpy( (WCHAR*) Item.Words[i].pLemma, pProns[cPronIndex].lemmaStr, Item.Words[i].ulLemmaLen ); } }
//--- Insert pron in text, if appropriate - RAID #4746
if ( pProns[cPronIndex].fUsePron ) { ULONG ulNumChars = wcslen( Item.Words[i].pWordPron ); Item.Words[i].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) Item.Words[i].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) Item.Words[i].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) Item.Words[i].pWordText + 1 ), Item.Words[i].pWordPron ); (WCHAR) Item.Words[i].pWordText[ ulNumChars + 1 ] = L'*'; Item.Words[i].ulWordLen = ulNumChars + 2; } }
cPronIndex++; } } } }
if ( SUCCEEDED( hr ) ) { //--- Check Post POS disambiguation list
SPLISTPOS ListPos = ItemList.GetHeadPosition(); while ( ListPos && SUCCEEDED( hr ) ) { TTSSentItem& Item = ItemList.GetNext( ListPos ); if ( Item.pItemInfo->Type == eALPHA_WORD || Item.pItemInfo->Type == eABBREVIATION || Item.pItemInfo->Type == eABBREVIATION_NORMALIZE ) { WCHAR temp; BOOL fPeriod = false; if ( Item.pItemSrcText[Item.ulItemSrcLen - 1] == L'.' && Item.ulItemSrcLen > 1 ) { temp = Item.pItemSrcText[Item.ulItemSrcLen - 1]; *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = 0; fPeriod = true; } else { temp = Item.pItemSrcText[Item.ulItemSrcLen]; *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = 0; }
const AbbrevRecord* pAbbrevRecord = (AbbrevRecord*) bsearch( (void*) Item.pItemSrcText, (void*) g_PostLexLookupWordTable, sp_countof( g_PostLexLookupWordTable ), sizeof( AbbrevRecord ), CompareStringAndAbbrevRecord ); if ( pAbbrevRecord ) { hr = ( this->*g_PostLexLookupDisambigTable[pAbbrevRecord->iPronDisambig] ) ( pAbbrevRecord, ItemList, ListPos, MemoryManager ); } if ( fPeriod ) { *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = temp; } else { *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = temp; } } } } } } if (pProns) { delete [] pProns; }
return hr; } /* CStdSentEnum::DetermineProns */
/***********************************************************************************************
* MeasurementDisambig * *---------------------* * Description: * This overrides initial pronunciations of measurement abbreviations when they are used * as modifiers - e.g. "a 7 ft. pole" vs. "the pole was 7 ft. long" * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::MeasurementDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList, SPLISTPOS ListPos, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::MeasurementDisambig" ); HRESULT hr = S_OK;
//--- Get previous two items
SPLISTPOS TempPos = ListPos; if ( TempPos ) { ItemList.GetPrev( TempPos ); if ( TempPos ) { ItemList.GetPrev( TempPos ); if ( TempPos ) { TTSSentItem TempItem = ItemList.GetPrev( TempPos ); //--- Previous must be a number
if ( TempItem.pItemInfo->Type == eNUM_CARDINAL ) { //--- Get next item
TempPos = ListPos; TempItem = ItemList.GetNext( TempPos ); //--- Next must be a noun or adj
if ( TempItem.eItemPartOfSpeech == MS_Noun ) { //--- Matched a 7 ft. pole type example - go with singular
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos ); //--- Singular will always be shorter than plural, so this should never overwrite
//--- anything...
wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron ); MeasurementItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron ); (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; MeasurementItem.Words[0].ulWordLen = ulNumChars + 2; } } else if ( TempItem.eItemPartOfSpeech == MS_Adj && TempPos ) { //--- Next must be a noun
TempItem = ItemList.GetNext( TempPos ); { if ( TempItem.eItemPartOfSpeech == MS_Noun ) { //--- Matched a 7 ft. pole type example - go with singular
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos ); //--- Singular will always be shorter than plural, so this should never overwrite
//--- anything...
wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron ); MeasurementItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron ); (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; MeasurementItem.Words[0].ulWordLen = ulNumChars + 2; } } } } } } } }
return hr; } /* MeasurementDisambig */
/***********************************************************************************************
* TheDisambig * *-------------* * Description: * This function disambiguates the word the - before a vowel it becomes "thee", before a * consonant it is "thuh"... * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::TheDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList, SPLISTPOS ListPos, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::TheDisambig" ); HRESULT hr = S_OK;
//--- Get next item
SPLISTPOS TempPos = ListPos; if ( TempPos ) { TTSSentItem NextItem = ItemList.GetNext( TempPos );
if ( NextItem.Words[0].pWordPron && bsearch( (void*) NextItem.Words[0].pWordPron, (void*) g_Vowels, sp_countof( g_Vowels ), sizeof( WCHAR ), CompareWCHARAndWCHAR ) ) { //--- Matched a vowel - go with / DH IY 1 /
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& TheItem = ItemList.GetPrev( TempPos ); //--- The two pronunciations are exactly the same length, so this should never overwrite
//--- anything
wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron1 ); TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1; //--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron ); TheItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) TheItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron ); (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; TheItem.Words[0].ulWordLen = ulNumChars + 2; } } else { //--- Didn't match a vowel - go with / DH AX 1 /
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& TheItem = ItemList.GetPrev( TempPos ); //--- The two pronunciations are exactly the same length, so this should never overwrite
//--- anything
wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron2 ); TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2; //--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron ); TheItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) TheItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron ); (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; TheItem.Words[0].ulWordLen = ulNumChars + 2; } } }
return hr; } /* TheDisambig */
/***********************************************************************************************
* ADisambig * *-----------* * Description: * This function disambiguates the word "a" - / EY 1 - Noun / vs. / AX - Det / * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ADisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList, SPLISTPOS ListPos ) { SPDBG_FUNC( "CStdSentEnum::ADisambig" ); HRESULT hr = S_OK; BOOL fNoun = false;
//--- Get Current Item...
SPLISTPOS TempPos = ListPos; if ( TempPos ) { ItemList.GetPrev( TempPos ); if ( TempPos ) { TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
//--- If "a" is part of a multi-word item, use the Noun pronunciation...
//--- If "a" is not an AlphaWord, use the Noun pronunciation...
if ( CurrentItem.ulNumWords > 1 || CurrentItem.pItemInfo->Type != eALPHA_WORD ) { fNoun = true; wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 ); pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str ); pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1; pPron->POSchoice = pAbbrevInfo->POS1; } } }
if ( !fNoun ) { //--- Get Next Item...
TempPos = ListPos; if ( TempPos ) { TTSSentItem NextItem = ItemList.GetNext( TempPos );
//--- If "a" is followed by punctuation, use the Noun pronunciation...
if ( !( NextItem.pItemInfo->Type & eWORDLIST_IS_VALID ) ) { fNoun = true; wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 ); pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str ); pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1; pPron->POSchoice = pAbbrevInfo->POS1; } } }
//--- Default - use the Determiner pronunciation (but include Noun pronunciation as well,
//--- so that POS tagger rules will work properly)...
if ( !fNoun ) { wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 ); pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str ); pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2; pPron->pronArray[PRON_A].POScount = 1; pPron->POSchoice = pAbbrevInfo->POS2; wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 ); pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str ); pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1; pPron->pronArray[PRON_B].POScount = 1; pPron->hasAlt = true; }
return hr; } /* ADisambig */
/***********************************************************************************************
* PolishDisambig * *----------------* * Description: * This function disambiguates the word "polish" - [p ow 1 l - ax sh - Noun] vs. * [p ow 1 l - ax sh - Adj] vs. [p aa 1 l - ih sh - Verb] vs. [p aa 1 l - ih sh - Noun] * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::PolishDisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList, SPLISTPOS ListPos ) { SPDBG_FUNC( "CStdSentEnum::PolishDisambig" ); HRESULT hr = S_OK; BOOL fMatch = false;
//--- Get Current Item...
SPLISTPOS TempPos = ListPos; if ( TempPos ) { ItemList.GetPrev( TempPos ); if ( TempPos ) { TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
//--- If "Polish" is capitalized and not sentence-initial, and not preceded immediately
//--- by an open double-quote or parenthesis, use Noun...
if ( iswupper( CurrentItem.pItemSrcText[0] ) ) { BOOL fSentenceInitial = false; if ( !TempPos ) { fSentenceInitial = true; } else { TTSSentItem PrevItem = ItemList.GetPrev( TempPos ); if ( PrevItem.pItemInfo->Type == eOPEN_PARENTHESIS || PrevItem.pItemInfo->Type == eOPEN_BRACKET || PrevItem.pItemInfo->Type == eOPEN_BRACE || PrevItem.pItemInfo->Type == eSINGLE_QUOTE || PrevItem.pItemInfo->Type == eDOUBLE_QUOTE ) { fSentenceInitial = true; } } if ( fSentenceInitial ) { fMatch = true; wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 ); pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str ); pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2; pPron->POSchoice = pAbbrevInfo->POS2; } else { fMatch = true; wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 ); pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str ); pPron->pronArray[PRON_A].POScode[0] = MS_Noun; pPron->POSchoice = MS_Noun; } } } }
//--- Default - use the Verb pronunciation (but include the others as well,
//--- so that POS tagger rules will work properly)...
if ( !fMatch ) { //--- Verb, Noun
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 ); pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str ); pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2; pPron->pronArray[PRON_A].POScode[1] = pAbbrevInfo->POS3; pPron->pronArray[PRON_A].POScount = 2; //--- Adj
wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 ); pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str ); pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1; pPron->pronArray[PRON_B].POScount = 1; //--- Set initial choice to Verb...
pPron->POSchoice = pAbbrevInfo->POS2; pPron->hasAlt = true; }
return hr; } /* PolishDisambig */
/***********************************************************************************************
* ReadDisambig * *--------------* * Description: * This function disambiguates the word Read - past tense vs. present... * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ReadDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList, SPLISTPOS ListPos, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::ReadDisambig" ); HRESULT hr = S_OK; BOOL fMatch = false;
//--- Get prev item
SPLISTPOS TempPos = ListPos; if ( TempPos ) { ItemList.GetPrev( TempPos ); if ( TempPos ) { ItemList.GetPrev( TempPos ); if ( TempPos ) { TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
//--- Check for closest auxiliary
while ( PrevItem.Words[0].eWordPartOfSpeech != MS_VAux && PrevItem.Words[0].eWordPartOfSpeech != MS_Contr && TempPos ) { PrevItem = ItemList.GetPrev( TempPos ); }
if ( PrevItem.Words[0].eWordPartOfSpeech == MS_VAux ) { fMatch = true; if ( wcsnicmp( PrevItem.Words[0].pWordText, L"have", 4 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"has", 3 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"had", 3 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"am", 2 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"ain't", 5 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"are", 3 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"aren't", 6 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"be", 2 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"is", 2 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"was", 3 ) == 0 || wcsnicmp( PrevItem.Words[0].pWordText, L"were", 4 ) == 0 ) { //--- Matched have or haven't (has or hasn't, had or hadn't) - go with "red"
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); //--- The two pronunciations are exactly the same length, so this should never overwrite
//--- anything
wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } } else { //--- Some other auxiliary - go with "reed"
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); //--- The two pronunciations are exactly the same length, so this should never overwrite
//--- anything
wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } } } //--- Check for pronoun aux contractions
else if ( PrevItem.Words[0].eWordPartOfSpeech == MS_Contr ) { fMatch = true; const WCHAR *pApostrophe = wcsstr( PrevItem.Words[0].pWordText, L"'" ); if ( pApostrophe && wcsnicmp( pApostrophe, L"'ll", 3 ) == 0 ) { //--- Matched an 'll form - go with "reed"
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } } else { //--- Some other form - go with "red"
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } } } //--- Check for infinitival form
else { TempPos = ListPos; ItemList.GetPrev( TempPos ); ItemList.GetPrev( TempPos ); PrevItem = ItemList.GetPrev( TempPos );
if ( PrevItem.Words[0].ulWordLen == 2 && wcsnicmp( PrevItem.Words[0].pWordText, L"to", 2 ) == 0 ) { fMatch = true;
//--- Matched infinitival form - go with "reed"
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } } } } //--- Sentence initial - go with "reed"
else { fMatch = true;
TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } } } }
if ( !fMatch ) { TempPos = ListPos; ItemList.GetPrev( TempPos ); TTSSentItem& ReadItem = ItemList.GetPrev( TempPos ); //--- Default - go with past tense...
wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 ); ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2; ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
//--- Insert pron into word text - RAID #4746
ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron ); ReadItem.Words[0].pWordText = (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) ); (WCHAR) ReadItem.Words[0].pWordText[0] = L'*'; wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron ); (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*'; ReadItem.Words[0].ulWordLen = ulNumChars + 2; } }
return hr; } /* ReadDisambig */
|