You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1463 lines
62 KiB
1463 lines
62 KiB
/***********************************************************************************************
|
|
* AlphaNorm.cpp *
|
|
*---------------*
|
|
* Description:
|
|
* These functions normalize mostly-alpha strings.
|
|
*-----------------------------------------------------------------------------------------------
|
|
* Created by AARONHAL August 3, 1999
|
|
* Copyright (C) 1999 Microsoft Corporation
|
|
* All Rights Reserved
|
|
*
|
|
***********************************************************************************************/
|
|
|
|
#include "stdafx.h"
|
|
#include "stdsentenum.h"
|
|
|
|
/***********************************************************************************************
|
|
* IsAbbreviationEOS *
|
|
*-------------------*
|
|
* Description:
|
|
* Abbreviations which get here are ALWAYS abbreviations. This function tries to determine
|
|
* whether or not the period at the end of the abbreviation is the end of the sentence.
|
|
*
|
|
* If match made:
|
|
* Sets the Item in the ItemList at ItemPos to the abbreviation.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IsAbbreviationEOS( const AbbrevRecord* pAbbreviation, CItemList &ItemList, SPLISTPOS ItemPos,
|
|
CSentItemMemory &MemoryManager, BOOL* pfIsEOS )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IsAbbreviationEOS" );
|
|
HRESULT hr = S_OK;
|
|
BOOL fMatchedEOS = false;
|
|
|
|
//--- Need to determine whether the abbreviation's period is also the end of the sentence.
|
|
if ( !(*pfIsEOS) )
|
|
{
|
|
//--- Advance to the beginning of the next token
|
|
const WCHAR *pTempNextChar = (WCHAR*) m_pEndOfCurrToken, *pTempEndChar = (WCHAR*) m_pEndChar;
|
|
const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
|
|
hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
|
|
//--- If we have reached the end of the buffer, consider the abbreviation's period as
|
|
//--- the end of the sentence.
|
|
if ( !pTempNextChar )
|
|
{
|
|
*pfIsEOS = true;
|
|
fMatchedEOS = true;
|
|
}
|
|
//--- Otherwise, only consider the abbreviation's period as the end of the sentence if
|
|
//--- the next token is a common first word (which must be capitalized).
|
|
else if ( IsCapital( *pTempNextChar ) )
|
|
{
|
|
WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
|
|
|
|
//--- Try to match a first word.
|
|
WCHAR temp = (WCHAR) *pTempEndOfItem;
|
|
*pTempEndOfItem = 0;
|
|
|
|
if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
|
|
sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
|
|
{
|
|
*pfIsEOS = true;
|
|
fMatchedEOS = true;
|
|
}
|
|
|
|
*pTempEndOfItem = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
//--- Insert abbreviation into the ItemList
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
CSentItem Item;
|
|
|
|
Item.pItemSrcText = m_pNextChar;
|
|
Item.ulItemSrcLen = (long) (m_pEndOfCurrItem - m_pNextChar);
|
|
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
|
|
(long)( m_pNextChar - m_pCurrFrag->pTextStart );
|
|
Item.ulNumWords = 1;
|
|
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
ZeroMemory( Item.Words, sizeof(TTSWord) );
|
|
Item.Words[0].pXmlState = &m_pCurrFrag->State;
|
|
Item.Words[0].pWordText = Item.pItemSrcText;
|
|
Item.Words[0].ulWordLen = Item.ulItemSrcLen;
|
|
Item.Words[0].pLemma = Item.pItemSrcText;
|
|
Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
|
|
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if ( NeedsToBeNormalized( pAbbreviation ) )
|
|
{
|
|
Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
|
|
}
|
|
else
|
|
{
|
|
Item.pItemInfo->Type = eABBREVIATION;
|
|
}
|
|
( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbreviation;
|
|
ItemList.SetAt( ItemPos, Item );
|
|
}
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* IsAbbreviationEOS */
|
|
|
|
/***********************************************************************************************
|
|
* IfEOSNotAbbreviation *
|
|
*----------------------*
|
|
* Description:
|
|
* Abbreviations which get here may or may not be abbreviations. If the period is EOS,
|
|
* this is not an abbreviation (and return will be E_INVALIDARG), otherwise, it is an
|
|
* abbreviation.
|
|
*
|
|
* If match made:
|
|
* Sets the Item in the ItemList at ItemPos to the abbreviation.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IfEOSNotAbbreviation( const AbbrevRecord* pAbbreviation, CItemList &ItemList, SPLISTPOS ItemPos,
|
|
CSentItemMemory &MemoryManager, BOOL* pfIsEOS )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IfEOSNotAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
//--- Need to determine whether the abbreviation's period is also the end of the sentence.
|
|
if ( !(*pfIsEOS) )
|
|
{
|
|
//--- Advance to the beginning of the next token
|
|
const WCHAR *pTempNextChar = m_pEndOfCurrToken, *pTempEndChar = m_pEndChar;
|
|
const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
|
|
hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
|
|
|
|
if ( !pTempNextChar )
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
|
|
//--- If we have reached the end of the buffer, consider the abbreviation's period as
|
|
//--- the end of the sentence.
|
|
if ( !pTempNextChar )
|
|
{
|
|
*pfIsEOS = true;
|
|
}
|
|
//--- Otherwise, only consider the abbreviation's period as the end of the sentence if
|
|
//--- the next token is a common first word (which must be capitalized).
|
|
else if ( IsCapital( *pTempNextChar ) )
|
|
{
|
|
WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
|
|
|
|
//--- Try to match a first word.
|
|
WCHAR temp = (WCHAR) *pTempEndOfItem;
|
|
*pTempEndOfItem = 0;
|
|
|
|
if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
|
|
sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
|
|
{
|
|
*pfIsEOS = true;
|
|
}
|
|
|
|
*pTempEndOfItem = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( *pfIsEOS )
|
|
{
|
|
//--- EOS - not an abbreviation
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else
|
|
{
|
|
//--- Insert abbreviation into the ItemList
|
|
CSentItem Item;
|
|
|
|
Item.pItemSrcText = m_pNextChar;
|
|
Item.ulItemSrcLen = (long)(m_pEndOfCurrItem - m_pNextChar);
|
|
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
|
|
(long)( m_pNextChar - m_pCurrFrag->pTextStart );
|
|
Item.ulNumWords = 1;
|
|
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
ZeroMemory( Item.Words, sizeof(TTSWord) );
|
|
Item.Words[0].pXmlState = &m_pCurrFrag->State;
|
|
Item.Words[0].pWordText = Item.pItemSrcText;
|
|
Item.Words[0].ulWordLen = Item.ulItemSrcLen;
|
|
Item.Words[0].pLemma = Item.pItemSrcText;
|
|
Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
|
|
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if ( NeedsToBeNormalized( pAbbreviation ) )
|
|
{
|
|
Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
|
|
}
|
|
else
|
|
{
|
|
Item.pItemInfo->Type = eABBREVIATION;
|
|
}
|
|
( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbreviation;
|
|
ItemList.SetAt( ItemPos, Item );
|
|
}
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* IfEOSNotAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* IfEOSAndLowercaseNotAbbreviation *
|
|
*----------------------------------*
|
|
* Description:
|
|
* Abbreviations which get here may or may not be abbreviations. If the period is EOS,
|
|
* and the next item is lowercase this is not an abbreviation (and return will be E_INVALIDARG),
|
|
* otherwise, it is an abbreviation.
|
|
*
|
|
* If match made:
|
|
* Sets the Item in the ItemList at ItemPos to the abbreviation.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IfEOSAndLowercaseNotAbbreviation( const AbbrevRecord* pAbbreviation, CItemList &ItemList,
|
|
SPLISTPOS ItemPos, CSentItemMemory &MemoryManager,
|
|
BOOL* pfIsEOS )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IfEOSAndLowercaseNotAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
//--- Need to determine whether the abbreviation's period is also the end of the sentence.
|
|
if ( !(*pfIsEOS) )
|
|
{
|
|
//--- Advance to the beginning of the next token
|
|
const WCHAR *pTempNextChar = m_pEndOfCurrToken, *pTempEndChar = m_pEndChar;
|
|
const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
|
|
hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
|
|
//--- If we have reached the end of the buffer, consider the abbreviation's period as
|
|
//--- the end of the sentence.
|
|
if ( !pTempNextChar )
|
|
{
|
|
*pfIsEOS = true;
|
|
}
|
|
//--- Otherwise, only consider the abbreviation's period as the end of the sentence if
|
|
//--- the next token is a common first word (which must be capitalized).
|
|
else if ( IsCapital( *pTempNextChar ) )
|
|
{
|
|
WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
|
|
|
|
//--- Try to match a first word.
|
|
WCHAR temp = (WCHAR) *pTempEndOfItem;
|
|
*pTempEndOfItem = 0;
|
|
|
|
if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
|
|
sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
|
|
{
|
|
*pfIsEOS = true;
|
|
}
|
|
|
|
*pTempEndOfItem = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( *pfIsEOS &&
|
|
!iswupper( *m_pNextChar ) )
|
|
{
|
|
//--- EOS - not an abbreviation
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else
|
|
{
|
|
//--- Insert abbreviation into the ItemList
|
|
CSentItem Item;
|
|
|
|
Item.pItemSrcText = m_pNextChar;
|
|
Item.ulItemSrcLen = (long)(m_pEndOfCurrItem - m_pNextChar);
|
|
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
|
|
(long)( m_pNextChar - m_pCurrFrag->pTextStart );
|
|
Item.ulNumWords = 1;
|
|
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
ZeroMemory( Item.Words, sizeof(TTSWord) );
|
|
Item.Words[0].pXmlState = &m_pCurrFrag->State;
|
|
Item.Words[0].pWordText = Item.pItemSrcText;
|
|
Item.Words[0].ulWordLen = Item.ulItemSrcLen;
|
|
Item.Words[0].pLemma = Item.pItemSrcText;
|
|
Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
|
|
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if ( NeedsToBeNormalized( pAbbreviation ) )
|
|
{
|
|
Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
|
|
}
|
|
else
|
|
{
|
|
Item.pItemInfo->Type = eABBREVIATION;
|
|
}
|
|
( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbreviation;
|
|
ItemList.SetAt( ItemPos, Item );
|
|
}
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* IfEOSNotAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* SingleOrPluralAbbreviation *
|
|
*----------------------------*
|
|
* Description:
|
|
* At this point, we are already sure that the item is an abbreviation, and just need to
|
|
* determine whether it should take its singular form, plural form, or some alternate.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::SingleOrPluralAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::SingleOrPluralAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
//--- Get Item which comes before the abbreviation
|
|
SPLISTPOS TempPos = ListPos;
|
|
TTSSentItem TempItem = ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- If a cardinal number, need to do singular vs. plural logic
|
|
if ( TempItem.pItemInfo->Type == eNUM_CARDINAL ||
|
|
TempItem.pItemInfo->Type == eDATE_YEAR )
|
|
{
|
|
if ( ( TempItem.ulItemSrcLen == 1 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"1", 1 ) == 0 ) ||
|
|
( TempItem.ulItemSrcLen == 2 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"-1", 2 ) == 0 ) )
|
|
{
|
|
//--- Use singular form - first entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
else
|
|
{
|
|
//--- Use plural form - second entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
//--- If a decimal number, pick plural
|
|
else if ( TempItem.pItemInfo->Type == eNUM_DECIMAL )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
//--- If an ordinal number or fraction, pick singular
|
|
else if ( TempItem.pItemInfo->Type == eNUM_ORDINAL )
|
|
{
|
|
//--- Use singular form - first entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
//--- Fractions and mixed fractions require some more work...
|
|
else if ( TempItem.pItemInfo->Type == eNUM_FRACTION )
|
|
{
|
|
if ( ( (TTSNumberItemInfo*) TempItem.pItemInfo )->pFractionalPart->fIsStandard )
|
|
{
|
|
//--- Standard fractions (e.g. 11/20) get the plural form
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
|
|
}
|
|
else
|
|
{
|
|
//--- Singular form with [of a] or [of an] inserted beforehand
|
|
if ( bsearch( (void*) pAbbrevInfo->pPron1, (void*) g_Vowels, sp_countof( g_Vowels ),
|
|
sizeof( WCHAR ), CompareWCHARAndWCHAR ) )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, g_pOfAn );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( g_pOfAn );
|
|
}
|
|
else
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, g_pOfA );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( g_pOfA );
|
|
}
|
|
wcscat( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len += wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
}
|
|
else if ( TempItem.pItemInfo->Type == eNUM_MIXEDFRACTION )
|
|
{
|
|
//--- Plural form
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
|
|
}
|
|
//--- Special case - preceded by "one"
|
|
else if ( TempItem.ulItemSrcLen == 3 &&
|
|
wcsnicmp( TempItem.pItemSrcText, L"one", 3 ) == 0 )
|
|
{
|
|
//--- Use singular form - first entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
//--- Special case - Number cu. MeasurementAbbrev (e.g. 10 cu. cm, 1 cu cm)
|
|
//--- Special case - Number fl. MeasurementAbbrev (e.g. 10 fl. oz., 10 fl oz)
|
|
else if ( ( TempItem.ulItemSrcLen == 2 &&
|
|
( _wcsnicmp( TempItem.pItemSrcText, L"cu", 2 ) == 0 ||
|
|
_wcsnicmp( TempItem.pItemSrcText, L"sq", 2 ) == 0 ||
|
|
_wcsnicmp( TempItem.pItemSrcText, L"fl", 2 ) == 0 ) ) ||
|
|
( TempItem.ulItemSrcLen == 3 &&
|
|
( _wcsnicmp( TempItem.pItemSrcText, L"cu.", 3 ) == 0 ||
|
|
_wcsnicmp( TempItem.pItemSrcText, L"sq.", 3 ) == 0 ||
|
|
_wcsnicmp( TempItem.pItemSrcText, L"fl.", 3 ) == 0 ) ) )
|
|
{
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
//--- If a cardinal number, need to do singular vs. plural logic
|
|
if ( TempItem.pItemInfo->Type == eNUM_CARDINAL )
|
|
{
|
|
if ( ( TempItem.ulItemSrcLen == 1 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"1", 1 ) == 0 ) ||
|
|
( TempItem.ulItemSrcLen == 2 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"-1", 2 ) == 0 ) )
|
|
{
|
|
//--- Use singular form - first entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
else
|
|
{
|
|
//--- Use plural form - second entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
//--- If a decimal number, pick plural
|
|
else if ( TempItem.pItemInfo->Type == eNUM_DECIMAL )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
//--- If an ordinal number or fraction, pick singular
|
|
else if ( TempItem.pItemInfo->Type == eNUM_ORDINAL )
|
|
{
|
|
//--- Use singular form - first entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
//--- Fractions and mixed fractions require some more work...
|
|
else if ( TempItem.pItemInfo->Type == eNUM_FRACTION )
|
|
{
|
|
if (( (TTSNumberItemInfo*) TempItem.pItemInfo )->pFractionalPart->fIsStandard )
|
|
{
|
|
//--- Standard fractions (e.g. 11/20) get the plural form
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
else
|
|
{
|
|
//--- Singular form with [of a] or [of an] inserted beforehand
|
|
//--- (this was handled when processing 'cu' or 'sq')
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
}
|
|
else if ( TempItem.pItemInfo->Type == eNUM_MIXEDFRACTION )
|
|
{
|
|
//--- Plural form
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
|
|
}
|
|
//--- Special case - preceded by "one"
|
|
else if ( TempItem.ulItemSrcLen == 3 &&
|
|
wcsnicmp( TempItem.pItemSrcText, L"one", 3 ) == 0 )
|
|
{
|
|
//--- Use singular form - first entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
//--- Default behavior
|
|
else
|
|
{
|
|
//--- Use plural form - second entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
}
|
|
//--- Check for number words - just cover through 99...
|
|
else if ( ( TempItem.ulItemSrcLen == 3 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"two", 3 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"six", 3 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"ten", 3 ) == 0 ) ) ||
|
|
( TempItem.ulItemSrcLen == 4 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"four", 4 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"five", 4 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"nine", 4 ) == 0 ) ) ||
|
|
( TempItem.ulItemSrcLen == 5 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"three", 5 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"seven", 5 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"eight", 5 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"forty", 5 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"fifty", 5 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"sixty", 5 ) == 0 ) ) ||
|
|
( TempItem.ulItemSrcLen == 6 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"twenty", 6 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"thirty", 6 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"eighty", 6 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"ninety", 6 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"eleven", 6 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"twelve", 6 ) == 0 ) ) ||
|
|
( TempItem.ulItemSrcLen == 7 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"seventy", 7 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"fifteen", 7 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"sixteen", 7 ) == 0 ) ) ||
|
|
( TempItem.ulItemSrcLen == 8 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"thirteen", 8 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"fourteen", 8 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"eighteen", 8 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"nineteen", 8 ) == 0 ) ) )
|
|
{
|
|
//--- Use plural form - second entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
//--- Default behavior
|
|
else
|
|
{
|
|
//--- Has alternate when non-number precedes - special case
|
|
if ( pAbbrevInfo->pPron3 )
|
|
{
|
|
//--- Use initial form - third entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron3 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS3;
|
|
pPron->POSchoice = pAbbrevInfo->POS3;
|
|
}
|
|
else
|
|
{
|
|
//--- Use plural form - second entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
}
|
|
//--- Default behavior
|
|
else if ( hr == E_INVALIDARG )
|
|
{
|
|
hr = S_OK;
|
|
|
|
//--- Has alternate when non-number precedes - special case
|
|
if ( pAbbrevInfo->pPron3 )
|
|
{
|
|
//--- Use initial form - third entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron3 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS3;
|
|
pPron->POSchoice = pAbbrevInfo->POS3;
|
|
}
|
|
else
|
|
{
|
|
//--- Use plural form - second entry
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* SingleOrPluralAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* DoctorDriveAbbreviation *
|
|
*-------------------------*
|
|
* Description:
|
|
* At this point, we are already sure that the item is an abbreviation, and just need to
|
|
* determine whether it should be Doctor (Saint) or Drive (Street).
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::DoctorDriveAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::SingleOrPluralAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
BOOL fMatch = false;
|
|
BOOL fDoctor = false;
|
|
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- Get Item which comes after the Abbreviation
|
|
SPLISTPOS TempPos = ListPos;
|
|
if ( !ListPos )
|
|
{
|
|
//--- Go with Drive - end of buffer cannot be followed by a name...
|
|
fDoctor = false;
|
|
fMatch = true;
|
|
}
|
|
else
|
|
{
|
|
TTSSentItem TempItem = ItemList.GetNext( TempPos );
|
|
if ( TempItem.eItemPartOfSpeech == MS_EOSItem )
|
|
{
|
|
//--- Go with Drive - end of buffer cannot be followed by a name...
|
|
fDoctor = false;
|
|
fMatch = true;
|
|
}
|
|
else
|
|
{
|
|
ULONG index = 0;
|
|
|
|
//--- Try to match a Name (an uppercase letter followed by lowercase letters)
|
|
if ( TempItem.ulItemSrcLen > 0 &&
|
|
iswupper( TempItem.pItemSrcText[index] ) )
|
|
{
|
|
index++;
|
|
while ( index < TempItem.ulItemSrcLen &&
|
|
iswlower( TempItem.pItemSrcText[index] ) )
|
|
{
|
|
index++;
|
|
}
|
|
//--- Check for possessives - RAID 5823
|
|
if ( index == TempItem.ulItemSrcLen - 2 &&
|
|
TempItem.pItemSrcText[index+1] == L'\'' &&
|
|
TempItem.pItemSrcText[index+2] == L's' )
|
|
{
|
|
index += 2;
|
|
}
|
|
|
|
//--- Check for directions - North, South, West, East, Ne, Nw, Se, Sw, N, S, E, W
|
|
if ( index == TempItem.ulItemSrcLen &&
|
|
wcsncmp( TempItem.pItemSrcText, L"North", 5 ) != 0 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"South", 5 ) != 0 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"West", 4 ) != 0 &&
|
|
wcsncmp( TempItem.pItemSrcText, L"East", 4 ) != 0 &&
|
|
!( TempItem.ulItemSrcLen == 2 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"Ne", 2 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"Nw", 2 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"Se", 2 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"Sw", 2 ) == 0 ) ) &&
|
|
!( TempItem.ulItemSrcLen == 1 &&
|
|
( wcsncmp( TempItem.pItemSrcText, L"N", 1 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"S", 1 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"E", 1 ) == 0 ||
|
|
wcsncmp( TempItem.pItemSrcText, L"W", 1 ) == 0 ) ) )
|
|
{
|
|
//--- Check for name previous item
|
|
TempPos = ListPos;
|
|
|
|
ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
|
|
index = 0;
|
|
|
|
if ( PrevItem.ulItemSrcLen > 0 &&
|
|
iswupper( PrevItem.pItemSrcText[index++] ) )
|
|
{
|
|
while ( index < PrevItem.ulItemSrcLen &&
|
|
islower( PrevItem.pItemSrcText[index] ) )
|
|
{
|
|
index++;
|
|
}
|
|
if ( index == PrevItem.ulItemSrcLen )
|
|
{
|
|
//--- Go with Drive - names before and after, e.g. Main St. Washington, D.C.
|
|
fDoctor = false;
|
|
fMatch = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( !fMatch )
|
|
{
|
|
//--- Go with Doctor - matched a Name after and not a name before
|
|
fDoctor = true;
|
|
fMatch = true;
|
|
}
|
|
}
|
|
else if ( index == 1 &&
|
|
TempItem.ulItemSrcLen == 2 &&
|
|
TempItem.pItemSrcText[index] == L'.' )
|
|
{
|
|
//--- Go with Doctor - matched an initial
|
|
fDoctor = true;
|
|
fMatch = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( !fMatch )
|
|
{
|
|
//--- Try to get previous item...
|
|
BOOL fSentenceInitial = false;
|
|
TempPos = ListPos;
|
|
if ( TempPos )
|
|
{
|
|
ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
ItemList.GetPrev( TempPos );
|
|
if ( !TempPos )
|
|
{
|
|
fSentenceInitial = true;
|
|
}
|
|
else
|
|
{
|
|
TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
|
|
if ( PrevItem.pItemInfo->Type == eOPEN_PARENTHESIS ||
|
|
PrevItem.pItemInfo->Type == eOPEN_BRACKET ||
|
|
PrevItem.pItemInfo->Type == eOPEN_BRACE ||
|
|
PrevItem.pItemInfo->Type == eSINGLE_QUOTE ||
|
|
PrevItem.pItemInfo->Type == eDOUBLE_QUOTE )
|
|
{
|
|
fSentenceInitial = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//--- Sentence initial - go with Doctor
|
|
if ( fSentenceInitial )
|
|
{
|
|
fDoctor = true;
|
|
fMatch = true;
|
|
}
|
|
//--- Default - go with Drive
|
|
else
|
|
{
|
|
fDoctor = false;
|
|
fMatch = true;
|
|
}
|
|
}
|
|
|
|
if ( fDoctor )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
else
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
|
|
|
|
return hr;
|
|
} /* DoctorDriveAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* AbbreviationFollowedByDigit *
|
|
*-----------------------------*
|
|
* Description:
|
|
* At this point, we are already sure that the item is an abbreviation, and just need to
|
|
* determine which pronunciation to go with.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::AbbreviationFollowedByDigit( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::AbbreviationFollowedByDigit" );
|
|
HRESULT hr = S_OK;
|
|
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- Get Item which comes after the Abbreviation
|
|
SPLISTPOS TempPos = ListPos;
|
|
if ( !ListPos )
|
|
{
|
|
//--- Go with pron 2
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
else
|
|
{
|
|
TTSSentItem TempItem = ItemList.GetNext( TempPos );
|
|
|
|
if ( TempItem.ulItemSrcLen > 0 &&
|
|
iswdigit( TempItem.pItemSrcText[0] ) )
|
|
{
|
|
//--- Go with pron 1
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
else
|
|
{
|
|
//--- Go with pron 2
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* AbbreviationFollowedByDigit */
|
|
|
|
/***********************************************************************************************
|
|
* AllCapsAbbreviation *
|
|
*---------------------*
|
|
* Description:
|
|
* This functions disambiguates abbreviations without periods which are pronounced
|
|
* differently if they are all capital letters.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::AllCapsAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::AllCapsAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- Get this item
|
|
SPLISTPOS TempPos = ListPos;
|
|
TTSSentItem TempItem = ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
for ( ULONG i = 0; i < TempItem.ulItemSrcLen; i++ )
|
|
{
|
|
if ( !iswupper( TempItem.pItemSrcText[i] ) )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
//--- All Caps - go with first pronunciation
|
|
if ( i == TempItem.ulItemSrcLen )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
//--- Not All Caps - go with second pronunciation
|
|
else
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* AllCapsAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* CapitalizedAbbreviation *
|
|
*-------------------------*
|
|
* Description:
|
|
* This functions disambiguates abbreviations without periods which are pronounced
|
|
* differently if they begin with a capital letter.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::CapitalizedAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::CapitalizedAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- Get this item
|
|
SPLISTPOS TempPos = ListPos;
|
|
TTSSentItem TempItem = ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Capitalized - go with first pronunciation
|
|
if ( iswupper( TempItem.pItemSrcText[0] ) )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
//--- Not Capitalized - go with second pronunciation
|
|
else
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* CapitalizedAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* SECAbbreviation *
|
|
*-----------------*
|
|
* Description:
|
|
* This functions disambiguates SEC, Sec, and sec and so forth...
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::SECAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::SECAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- Get this item
|
|
SPLISTPOS TempPos = ListPos;
|
|
TTSSentItem TempItem = ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
for ( ULONG i = 0; i < TempItem.ulItemSrcLen; i++ )
|
|
{
|
|
if ( !iswupper( TempItem.pItemSrcText[i] ) )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
//--- All Caps - go with SEC
|
|
if ( i == TempItem.ulItemSrcLen )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron3 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS3;
|
|
pPron->POSchoice = pAbbrevInfo->POS3;
|
|
}
|
|
//--- Not All Caps - do SingleOrPlural disambiguation
|
|
else
|
|
{
|
|
SingleOrPluralAbbreviation( pAbbrevInfo, pPron, ItemList, ListPos );
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* SECAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* DegreeAbbreviation *
|
|
*--------------------*
|
|
* Description:
|
|
* This functions disambiguates C, F, and K (Celsius, Fahrenheit, Kelvin)
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::DegreeAbbreviation( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::DegreeAbbreviation" );
|
|
HRESULT hr = S_OK;
|
|
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- Get this item and previous item
|
|
SPLISTPOS TempPos = ListPos;
|
|
TTSSentItem TempItem, PrevItem;
|
|
BOOL fLetter = false;
|
|
|
|
if ( TempPos )
|
|
{
|
|
ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
PrevItem = ItemList.GetPrev( TempPos );
|
|
if ( PrevItem.pItemInfo->Type != eNUM_DEGREES )
|
|
{
|
|
fLetter = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fLetter = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if ( fLetter )
|
|
{
|
|
//--- This word is just the letter C, F, or K - second pron
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
//--- This word is the degree expansion - Celsius, Fahrenheit, or Kelvin
|
|
else
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* DegreeAbbreviation */
|
|
|
|
/***********************************************************************************************
|
|
* IsInitialIsm *
|
|
*--------------*
|
|
* Description:
|
|
* Checks the next token in the text stream to determine if it is an initialism. Also
|
|
* tries to determine whether or not the period at the end of the initialism is the end of
|
|
* the sentence.
|
|
*
|
|
* If match made:
|
|
* Advances m_pNextChar to the appropriate position (either the period at the end of the
|
|
* abbreviation, or just past that period). Sets the Item in the ItemList at ItemPos to the
|
|
* abbreviation.
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IsInitialism( CItemList &ItemList, SPLISTPOS ItemPos, CSentItemMemory &MemoryManager,
|
|
BOOL* pfIsEOS )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IsInitialism" );
|
|
|
|
HRESULT hr = S_OK;
|
|
BOOL fMatchedEOS = false;
|
|
|
|
//--- Initialism must be at least two characters.
|
|
if ( (long)(m_pEndOfCurrItem - m_pNextChar) < 4 )
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else
|
|
{
|
|
const WCHAR *pIterator = NULL;
|
|
ULONG ulCount = 0;
|
|
|
|
pIterator = m_pNextChar;
|
|
|
|
//--- Iterate through the token, each time checking for an alpha character followed by a period.
|
|
while ( SUCCEEDED(hr) &&
|
|
pIterator <= m_pEndOfCurrItem - 2)
|
|
{
|
|
if ( !iswalpha(*pIterator) ||
|
|
*(pIterator + 1) != L'.' )
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else
|
|
{
|
|
pIterator += 2;
|
|
ulCount++;
|
|
}
|
|
}
|
|
|
|
//--- Need to determine whether the initialism's period is also the end of the sentence.
|
|
if ( SUCCEEDED( hr ) &&
|
|
!(*pfIsEOS) )
|
|
{
|
|
//--- Advance to the beginning of the next token
|
|
const WCHAR *pTempNextChar = m_pEndOfCurrToken, *pTempEndChar = m_pEndChar;
|
|
const SPVTEXTFRAG *pTempCurrFrag = m_pCurrFrag;
|
|
hr = SkipWhiteSpaceAndTags( pTempNextChar, pTempEndChar, pTempCurrFrag, MemoryManager );
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
|
|
//--- If we have reached the end of the buffer, consider the abbreviation's period as
|
|
//--- the end of the sentence.
|
|
if ( !pTempNextChar )
|
|
{
|
|
*pfIsEOS = true;
|
|
fMatchedEOS = true;
|
|
}
|
|
//--- Otherwise, only consider the abbreviation's period as the end of the sentence if
|
|
//--- the next token is a common first word (which must be capitalized).
|
|
else if ( IsCapital( *pTempNextChar ) )
|
|
{
|
|
WCHAR *pTempEndOfItem = (WCHAR*) FindTokenEnd( pTempNextChar, pTempEndChar );
|
|
|
|
//--- Try to match a first word.
|
|
WCHAR temp = (WCHAR) *pTempEndOfItem;
|
|
*pTempEndOfItem = 0;
|
|
|
|
if ( bsearch( (void*) pTempNextChar, (void*) g_FirstWords, sp_countof( g_FirstWords ),
|
|
sizeof( SPLSTR ), CompareStringAndSPLSTR ) )
|
|
{
|
|
*pfIsEOS = true;
|
|
fMatchedEOS = true;
|
|
}
|
|
|
|
*pTempEndOfItem = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
//--- Now insert the Initialism in the ItemList.
|
|
if ( SUCCEEDED(hr) )
|
|
{
|
|
CSentItem Item;
|
|
Item.pItemSrcText = m_pNextChar;
|
|
Item.ulItemSrcLen = (long)(m_pEndOfCurrItem - m_pNextChar);
|
|
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
|
|
(long)( m_pNextChar - m_pCurrFrag->pTextStart );
|
|
Item.ulNumWords = ulCount;
|
|
Item.Words = (TTSWord*) MemoryManager.GetMemory( ulCount * sizeof(TTSWord), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Ensure letters are pronounced as nouns...
|
|
memcpy( pNewState, &m_pCurrFrag->State, sizeof( SPVSTATE ) );
|
|
pNewState->ePartOfSpeech = SPPS_Noun;
|
|
|
|
ZeroMemory( Item.Words, ulCount * sizeof(TTSWord) );
|
|
for ( ULONG i = 0; i < ulCount; i++ )
|
|
{
|
|
Item.Words[i].pXmlState = pNewState;
|
|
Item.Words[i].pWordText = &Item.pItemSrcText[ 2 * i ];
|
|
Item.Words[i].ulWordLen = 1;
|
|
Item.Words[i].pLemma = Item.Words[i].pWordText;
|
|
Item.Words[i].ulLemmaLen = Item.Words[i].ulWordLen;
|
|
}
|
|
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
Item.pItemInfo->Type = eINITIALISM;
|
|
ItemList.SetAt( ItemPos, Item );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return hr;
|
|
} /* IsInitialism */
|
|
|
|
/***********************************************************************************************
|
|
* IsAlphaWord *
|
|
*-------------*
|
|
* Description:
|
|
* Checks the next token in the text stream to determine if it is an Alpha Word (all alpha
|
|
* characters, except possibly a single apostrophe).
|
|
*
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IsAlphaWord( const WCHAR* pStartChar, const WCHAR* pEndChar, TTSItemInfo*& pItemNormInfo,
|
|
CSentItemMemory& MemoryManager )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IsAlphaWord" );
|
|
SPDBG_ASSERT( pStartChar < pEndChar );
|
|
HRESULT hr = S_OK;
|
|
|
|
bool fApostropheSeen = false;
|
|
WCHAR *pCurrChar = (WCHAR*) pStartChar;
|
|
|
|
while ( SUCCEEDED( hr ) &&
|
|
pCurrChar &&
|
|
pCurrChar < pEndChar )
|
|
{
|
|
if ( iswalpha( *pCurrChar ) )
|
|
{
|
|
pCurrChar++;
|
|
}
|
|
else if ( *pCurrChar == L'\''&&
|
|
!fApostropheSeen )
|
|
{
|
|
fApostropheSeen = true;
|
|
pCurrChar++;
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Matched Alpha Word
|
|
pItemNormInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
pItemNormInfo->Type = eALPHA_WORD;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* IsAlphaWord */
|
|
|
|
/***********************************************************************************************
|
|
* AbbreviationModifier *
|
|
*----------------------*
|
|
* Description:
|
|
* Fixes pronunciation issues for special case where 'sq' or 'cu' modifies
|
|
* a measurement.
|
|
*
|
|
*************************************************************** MERESHAW **********************/
|
|
HRESULT CStdSentEnum::AbbreviationModifier( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron,
|
|
CItemList& ItemList, SPLISTPOS ListPos )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::AbbreviationModifier" );
|
|
HRESULT hr = S_OK;
|
|
|
|
//--- Get Item which comes before the abbreviation modifier
|
|
SPLISTPOS TempPos = ListPos;
|
|
TTSSentItem TempItem = ItemList.GetPrev( TempPos );
|
|
if ( TempPos )
|
|
{
|
|
//--- Current Item - if All Caps, go with first pronunciation (need to do this before next
|
|
//--- stage of processing, since CU and FL's all caps prons take precedence over numeric...)
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
for ( ULONG i = 0; i < TempItem.ulItemSrcLen; i++ )
|
|
{
|
|
if ( !iswupper( TempItem.pItemSrcText[i] ) )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if ( i == TempItem.ulItemSrcLen )
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
return hr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
if ( TempPos )
|
|
{
|
|
TempItem = ItemList.GetPrev( TempPos );
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
pPron->pronArray[PRON_A].POScount = 1;
|
|
pPron->pronArray[PRON_B].POScount = 0;
|
|
pPron->pronArray[PRON_B].phon_Len = 0;
|
|
pPron->hasAlt = false;
|
|
pPron->altChoice = PRON_A;
|
|
//--- Abbreviation table pronunciations are basically just vendor lex prons...
|
|
pPron->pronType = eLEXTYPE_PRIVATE1;
|
|
|
|
//--- If a cardinal, decimal, or ordinal number, use regular form
|
|
if (( TempItem.pItemInfo->Type == eNUM_CARDINAL ) ||
|
|
( TempItem.pItemInfo->Type == eNUM_DECIMAL ) ||
|
|
( TempItem.pItemInfo->Type == eNUM_ORDINAL ) ||
|
|
( TempItem.pItemInfo->Type == eNUM_MIXEDFRACTION ) ||
|
|
( TempItem.pItemInfo->Type == eDATE_YEAR ) ||
|
|
( TempItem.ulItemSrcLen == 3 &&
|
|
wcsnicmp( TempItem.pItemSrcText, L"one", 3 ) == 0 ))
|
|
{
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
|
|
//--- Fractions and mixed fractions require some more work...
|
|
else if ( TempItem.pItemInfo->Type == eNUM_FRACTION )
|
|
{
|
|
if (( (TTSNumberItemInfo*) TempItem.pItemInfo )->pFractionalPart->fIsStandard )
|
|
{
|
|
//--- Standard fractions (e.g. 11/20) get the plural form
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
else
|
|
{
|
|
//--- Singular form with [of a] inserted beforehand ([of an] case need not be
|
|
//--- checked because we're only dealing with 'sq' or 'cu'.
|
|
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, g_pOfA );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( g_pOfA );
|
|
|
|
wcscat( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len += wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
|
|
//--- Default behavior
|
|
else
|
|
{
|
|
//--- Use default form ('sq')
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
|
|
pPron->POSchoice = pAbbrevInfo->POS2;
|
|
}
|
|
}
|
|
//--- Default behavior - use first pron
|
|
else if ( hr == E_INVALIDARG )
|
|
{
|
|
hr = S_OK;
|
|
wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
|
|
pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
|
|
pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
|
|
pPron->POSchoice = pAbbrevInfo->POS1;
|
|
}
|
|
|
|
return hr;
|
|
} /* AbbreviationModifier */
|