Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3915 lines
149 KiB

/***********************************************************************************************
* NumNorm.cpp *
*-------------*
* Description:
* These functions normalize ordinary ordinal and cardinal numbers
*-----------------------------------------------------------------------------------------------
* Created by AH August 3, 1999
* Copyright (C) 1999 Microsoft Corporation
* All Rights Reserved
*
***********************************************************************************************/
#include "stdafx.h"
#ifndef StdSentEnum_h
#include "stdsentenum.h"
#endif
/***********************************************************************************************
* IsNumberCategory *
*------------------*
* Description:
* Checks the next token in the text stream to determine if it is a number category -
* percents, degrees, squared and cubed numbers, and plain old numbers get matched here.
*
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsNumberCategory( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
CSentItemMemory& MemoryManager )
{
HRESULT hr = S_OK;
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfItem = m_pEndOfCurrItem;
const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
TTSItemInfo *pNumberInfo = NULL;
hr = IsNumber( pNumberInfo, Context, MemoryManager );
if ( SUCCEEDED( hr ) &&
pNumberInfo->Type != eDATE_YEAR &&
( (TTSNumberItemInfo*) pNumberInfo )->pEndChar == m_pEndOfCurrItem - 1 )
{
if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'%' )
{
pItemNormInfo = pNumberInfo;
pItemNormInfo->Type = eNUM_PERCENT;
}
else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'°' )
{
pItemNormInfo = pNumberInfo;
pItemNormInfo->Type = eNUM_DEGREES;
}
else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'²' )
{
pItemNormInfo = pNumberInfo;
pItemNormInfo->Type = eNUM_SQUARED;
}
else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'³' )
{
pItemNormInfo = pNumberInfo;
pItemNormInfo->Type = eNUM_CUBED;
}
else
{
hr = E_INVALIDARG;
delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
}
}
else if ( SUCCEEDED( hr ) &&
( pNumberInfo->Type == eDATE_YEAR ||
( (TTSNumberItemInfo*) pNumberInfo )->pEndChar == m_pEndOfCurrItem ) )
{
pItemNormInfo = pNumberInfo;
}
else if ( SUCCEEDED( hr ) )
{
hr = E_INVALIDARG;
if ( pNumberInfo->Type != eDATE_YEAR )
{
delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
}
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
}
return hr;
} /* IsNumberCategory */
/***********************************************************************************************
* IsNumber *
*----------*
* Description:
* Checks the next token in the text stream to determine if it is a number.
*
* RegExp:
* [-]? { d+ || d(1-3)[,ddd]+ } { { .d+ } || { "st" || "nd" || "rd" || "th" } }?
* It is actually a bit more complicated than this - for instance, the ordinal
* strings may only follow certain digits (1st, 2nd, 3rd, 4-0th)...
*
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsNumber( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
CSentItemMemory& MemoryManager, BOOL fMultiItem )
{
SPDBG_FUNC( "CStdSentEnum::IsNumber" );
HRESULT hr = S_OK;
bool fNegative = false;
TTSIntegerItemInfo* pIntegerInfo = NULL;
TTSDigitsItemInfo* pDecimalInfo = NULL;
TTSFractionItemInfo* pFractionInfo = NULL;
const SPVSTATE *pIntegerState = &m_pCurrFrag->State;
CItemList PostIntegerList;
ULONG ulOffset = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
WCHAR wcDecimalPoint;
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfItem = m_pEndOfCurrItem;
const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
if ( ulTokenLen )
{
//--- Set Separator and Decimal Point character preferences for this call
if ( m_eSeparatorAndDecimal == COMMA_PERIOD )
{
wcDecimalPoint = L'.';
}
else
{
wcDecimalPoint = L',';
}
//--- Try to match the negative sign - [-]?
if ( m_pNextChar[ulOffset] == L'-' )
{
fNegative = true;
ulOffset++;
}
//--- Try to match the integral part
hr = IsInteger( m_pNextChar + ulOffset, pIntegerInfo, MemoryManager );
//--- Adjust ulOffset and hr...
if ( SUCCEEDED( hr ) )
{
ulOffset += (ULONG)(pIntegerInfo->pEndChar - pIntegerInfo->pStartChar);
}
else if ( hr == E_INVALIDARG )
{
hr = S_OK;
pIntegerInfo = NULL;
}
//--- Try to match a decimal part
if ( ulOffset < ulTokenLen &&
m_pNextChar[ulOffset] == wcDecimalPoint )
{
hr = IsDigitString( m_pNextChar + ulOffset + 1, pDecimalInfo, MemoryManager );
if ( SUCCEEDED( hr ) )
{
ulOffset += pDecimalInfo->ulNumDigits + 1;
//--- Check for special case - decimal number numerator...
if ( ulOffset < ulTokenLen &&
m_pNextChar[ulOffset] == L'/' )
{
pIntegerInfo = NULL;
pDecimalInfo = NULL;
fNegative ? ulOffset = 1 : ulOffset = 0;
hr = IsFraction( m_pNextChar + ulOffset, pFractionInfo, MemoryManager );
if ( SUCCEEDED( hr ) )
{
if ( pFractionInfo->pVulgar )
{
ulOffset++;
}
else
{
ulOffset += (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar);
}
}
else if ( hr == E_INVALIDARG )
{
hr = S_OK;
}
}
}
else if ( hr == E_INVALIDARG )
{
hr = S_OK;
pDecimalInfo = NULL;
}
}
//--- Try to match an ordinal string
else if ( pIntegerInfo &&
ulOffset < ulTokenLen &&
isalpha( m_pNextChar[ulOffset] ) )
{
switch ( toupper( m_pNextChar[ulOffset] ) )
{
case 'S':
//--- Must be of the form "...1st" but not "...11st"
if ( toupper( m_pNextChar[ulOffset+1] ) == L'T' &&
m_pNextChar[ulOffset-1] == L'1' &&
(ulOffset + 2) == ulTokenLen &&
( ulOffset == 1 ||
m_pNextChar[ulOffset-2] != L'1' ) )
{
ulOffset += 2;
pIntegerInfo->fOrdinal = true;
}
break;
case 'N':
//--- Must be of the form "...2nd" but not "...12nd"
if ( (ulOffset + 2) == ulTokenLen &&
toupper(m_pNextChar[ulOffset+1]) == L'D' &&
m_pNextChar[ulOffset-1] == L'2' &&
( ulOffset == 1 ||
m_pNextChar[ulOffset-2] != L'1' ) )
{
ulOffset += 2;
pIntegerInfo->fOrdinal = true;
}
break;
case 'R':
//--- Must be of the form "...3rd" but not "...13rd"
if ( (ulOffset + 2) == ulTokenLen &&
toupper(m_pNextChar[ulOffset+1]) == L'D' &&
m_pNextChar[ulOffset-1] == L'3' &&
( ulOffset == 1 ||
m_pNextChar[ulOffset-2] != L'1' ) )
{
ulOffset += 2;
pIntegerInfo->fOrdinal = true;
}
break;
case 'T':
//--- Must be of the form "...[4-9]th" or "...[11-19]th" or "...[0]th"
if ( (ulOffset + 2) == ulTokenLen &&
toupper(m_pNextChar[ulOffset+1]) == L'H' &&
( ( m_pNextChar[ulOffset-1] <= L'9' && m_pNextChar[ulOffset-1] >= L'4') ||
( m_pNextChar[ulOffset-1] == L'0') ||
( ulOffset == 1 || m_pNextChar[ulOffset-2] == L'1') ) )
{
ulOffset += 2;
pIntegerInfo->fOrdinal = true;
}
break;
default:
// Some invalid non-digit character found at the end of the string
break;
}
}
//--- Try to match a fraction
else
{
//--- Try to match an attached fraction
if ( ulOffset < ulTokenLen )
{
if ( m_pNextChar[ulOffset] == L'-' )
{
ulOffset++;
}
hr = IsFraction( m_pNextChar + ulOffset, pFractionInfo, MemoryManager );
if ( SUCCEEDED( hr ) )
{
if ( pFractionInfo->pVulgar )
{
ulOffset++;
}
else
{
ulOffset += (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar);
}
}
else if ( hr == E_INVALIDARG )
{
hr = S_OK;
}
}
//--- Try to match an unattached fraction
else if ( fMultiItem )
{
pIntegerState = &m_pCurrFrag->State;
//--- Advance in text
m_pNextChar = m_pEndOfCurrItem;
hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager,
true, &PostIntegerList );
if ( !m_pNextChar &&
SUCCEEDED( hr ) )
{
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pCurrFrag = pTempFrag;
}
else if ( m_pNextChar &&
SUCCEEDED( hr ) )
{
m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
{
m_pEndOfCurrItem--;
}
hr = IsFraction( m_pNextChar, pFractionInfo, MemoryManager );
if ( FAILED( hr ) )
{
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
if ( hr == E_INVALIDARG )
{
hr = S_OK;
}
}
else
{
ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
if ( pFractionInfo->pVulgar )
{
ulOffset = 1;
}
else
{
ulOffset = (ULONG)(pFractionInfo->pDenominator->pEndChar -
pFractionInfo->pNumerator->pStartChar);
}
}
}
}
}
}
else
{
hr = E_INVALIDARG;
}
//--- If we haven't processed the whole item yet, and it isn't part of a larger item --
//--- e.g. a percent, a degrees number, or a square or cube -- then fail to match it
//--- as a number...
if ( ulOffset != ulTokenLen &&
!( ulTokenLen == ulOffset + 1 &&
( m_pNextChar[ulOffset] == L'%' ||
m_pNextChar[ulOffset] == L'°' ||
m_pNextChar[ulOffset] == L'²' ||
m_pNextChar[ulOffset] == L'³' ) ) )
{
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pEndChar = pTempEndChar;
m_pCurrFrag = pTempFrag;
hr = E_INVALIDARG;
}
//--- Fill out pItemNormInfo...
if ( SUCCEEDED( hr ) &&
( pIntegerInfo ||
pDecimalInfo ||
pFractionInfo ) )
{
//--- Reset m_pNextChar to handle the Mixed Fraction case...
m_pNextChar = pTempNextChar;
if ( pIntegerInfo &&
pIntegerInfo->pEndChar - pIntegerInfo->pStartChar == 4 &&
!pIntegerInfo->fSeparators &&
!pIntegerInfo->fOrdinal &&
!pDecimalInfo &&
!pFractionInfo &&
!fNegative &&
ulOffset == ulTokenLen &&
( !Context ||
_wcsnicmp( Context, L"NUMBER", 6 ) != 0 ) )
{
pItemNormInfo = (TTSYearItemInfo*) MemoryManager.GetMemory( sizeof( TTSYearItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
pItemNormInfo->Type = eDATE_YEAR;
( (TTSYearItemInfo*) pItemNormInfo )->pYear = m_pNextChar;
( (TTSYearItemInfo*) pItemNormInfo )->ulNumDigits = 4;
}
}
else
{
pItemNormInfo = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pItemNormInfo, sizeof( TTSNumberItemInfo ) );
if ( pDecimalInfo )
{
pItemNormInfo->Type = eNUM_DECIMAL;
if ( pIntegerInfo )
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar +
pDecimalInfo->ulNumDigits + 1;
}
else
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = m_pNextChar + pDecimalInfo->ulNumDigits + 1;
if ( fNegative )
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar++;
}
}
}
else if ( pFractionInfo )
{
if ( pFractionInfo->pVulgar )
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pFractionInfo->pVulgar + 1;
}
else
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar =
pFractionInfo->pDenominator->pEndChar;
}
if ( pIntegerInfo )
{
pItemNormInfo->Type = eNUM_MIXEDFRACTION;
}
else
{
pItemNormInfo->Type = eNUM_FRACTION;
}
}
else if ( pIntegerInfo )
{
if ( pIntegerInfo->fOrdinal )
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar + 2;
pItemNormInfo->Type = eNUM_ORDINAL;
}
else
{
( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar;
pItemNormInfo->Type = eNUM_CARDINAL;
}
}
}
if ( SUCCEEDED( hr ) )
{
( (TTSNumberItemInfo*) pItemNormInfo )->fNegative = fNegative;
( (TTSNumberItemInfo*) pItemNormInfo )->pIntegerPart = pIntegerInfo;
( (TTSNumberItemInfo*) pItemNormInfo )->pDecimalPart = pDecimalInfo;
( (TTSNumberItemInfo*) pItemNormInfo )->pFractionalPart = pFractionInfo;
( (TTSNumberItemInfo*) pItemNormInfo )->pStartChar = m_pNextChar;
( (TTSNumberItemInfo*) pItemNormInfo )->pWordList = new CWordList;
}
}
}
else
{
hr = E_INVALIDARG;
}
//--- Expand Number into WordList
if ( SUCCEEDED( hr ) &&
pItemNormInfo->Type != eDATE_YEAR )
{
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = pIntegerState;
Word.eWordPartOfSpeech = MS_Unknown;
//--- Insert "negative"
if ( fNegative )
{
Word.pWordText = g_negative.pStr;
Word.ulWordLen = g_negative.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word );
}
//--- Expand Integral Part
if ( pIntegerInfo )
{
ExpandInteger( pIntegerInfo, Context, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList );
}
//--- Expand Decimal Part
if ( pDecimalInfo )
{
//--- Insert "point"
Word.pWordText = g_decimalpoint.pStr;
Word.ulWordLen = g_decimalpoint.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word );
ExpandDigits( pDecimalInfo, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList );
}
//--- Expand Fractional Part
if ( pFractionInfo )
{
//--- Insert Post-Integer Non-Spoken XML States, if any
while ( !PostIntegerList.IsEmpty() )
{
( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( ( PostIntegerList.RemoveHead() ).Words[0] );
}
//--- Insert "and", if also an integer part
if ( pIntegerInfo )
{
Word.pXmlState = &m_pCurrFrag->State;
Word.pWordText = g_And.pStr;
Word.ulWordLen = g_And.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word );
}
hr = ExpandFraction( pFractionInfo, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList );
}
}
return hr;
} /* IsNumber */
/***********************************************************************************************
* ExpandNumber *
*--------------*
* Description:
* Expands Items previously determined to be of type NUM_CARDINAL, NUM_DECIMAL, or
* NUM_ORDINAL by IsNumber.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandNumber( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "NumNorm ExpandNumber" );
HRESULT hr = S_OK;
WordList.AddTail( pItemInfo->pWordList );
delete pItemInfo->pWordList;
return hr;
} /* ExpandNumber */
/***********************************************************************************************
* ExpandPercent *
*---------------*
* Description:
* Expands Items previously determined to be of type NUM_PERCENT by IsNumber.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandPercent( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandPercent" );
HRESULT hr = S_OK;
WordList.AddTail( pItemInfo->pWordList );
delete pItemInfo->pWordList;
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_percent.pStr;
Word.ulWordLen = g_percent.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
return hr;
} /* ExpandPercent */
/***********************************************************************************************
* ExpandDegree *
*---------------*
* Description:
* Expands Items previously determined to be of type NUM_DEGREES by IsNumber.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandDegrees( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandDegrees" );
HRESULT hr = S_OK;
WordList.AddTail( pItemInfo->pWordList );
delete pItemInfo->pWordList;
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
if ( !pItemInfo->pDecimalPart &&
!pItemInfo->pFractionalPart &&
pItemInfo->pIntegerPart &&
pItemInfo->pIntegerPart->pEndChar - pItemInfo->pIntegerPart->pStartChar == 1 &&
pItemInfo->pIntegerPart->pStartChar[0] == L'1' )
{
Word.pWordText = g_degree.pStr;
Word.ulWordLen = g_degree.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
}
else if ( !pItemInfo->pIntegerPart &&
pItemInfo->pFractionalPart &&
!pItemInfo->pFractionalPart->fIsStandard )
{
Word.pWordText = g_of.pStr;
Word.ulWordLen = g_of.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
Word.pWordText = g_a.pStr;
Word.ulWordLen = g_a.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
Word.pWordText = g_degree.pStr;
Word.ulWordLen = g_degree.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
}
else
{
Word.pWordText = g_degrees.pStr;
Word.ulWordLen = g_degrees.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
}
WordList.AddTail( Word );
return hr;
} /* ExpandDegrees */
/***********************************************************************************************
* ExpandSquare *
*---------------*
* Description:
* Expands Items previously determined to be of type NUM_SQUARED by IsNumber.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandSquare( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandSquare" );
HRESULT hr = S_OK;
WordList.AddTail( pItemInfo->pWordList );
delete pItemInfo->pWordList;
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_squared.pStr;
Word.ulWordLen = g_squared.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
return hr;
} /* ExpandSquare */
/***********************************************************************************************
* ExpandCube *
*---------------*
* Description:
* Expands Items previously determined to be of type NUM_CUBED by IsNumber.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandCube( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandCube" );
HRESULT hr = S_OK;
WordList.AddTail( pItemInfo->pWordList );
delete pItemInfo->pWordList;
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_cubed.pStr;
Word.ulWordLen = g_cubed.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
return hr;
} /* ExpandCube */
/***********************************************************************************************
* IsInteger *
*-----------*
* Description:
* Helper for IsNumber which matches the integer part...
*
* RegExp:
* { d+ || d(1-3)[,ddd]+ }
*
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsInteger( const WCHAR* pStartChar, TTSIntegerItemInfo*& pIntegerInfo,
CSentItemMemory& MemoryManager )
{
HRESULT hr = S_OK;
ULONG ulOffset = 0, ulCount = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - pStartChar);
BOOL fSeparators = false, fDone = false;
WCHAR wcSeparator, wcDecimalPoint;
if ( m_eSeparatorAndDecimal == COMMA_PERIOD )
{
wcSeparator = L',';
wcDecimalPoint = L'.';
}
else
{
wcSeparator = L'.';
wcDecimalPoint = L',';
}
//--- Check for first digit
if ( !isdigit(pStartChar[ulOffset]) )
{
hr = E_INVALIDARG;
}
else
{
ulCount++;
ulOffset++;
}
//--- Check for separators
ULONG i = ulOffset + 3;
while ( SUCCEEDED( hr ) &&
ulOffset < i &&
ulOffset < ulTokenLen )
{
if ( pStartChar[ulOffset] == wcSeparator )
{
//--- Found a separator
fSeparators = true;
break;
}
else if ( !isdigit( pStartChar[ulOffset] ) &&
( pStartChar[ulOffset] == wcDecimalPoint ||
pStartChar[ulOffset] == L'%' ||
pStartChar[ulOffset] == L'°' ||
pStartChar[ulOffset] == L'²' ||
pStartChar[ulOffset] == L'³' ||
pStartChar[ulOffset] == L'-' ||
pStartChar[ulOffset] == L'¼' ||
pStartChar[ulOffset] == L'½' ||
pStartChar[ulOffset] == L'¾' ||
toupper( pStartChar[ulOffset] ) == L'S' ||
toupper( pStartChar[ulOffset] ) == L'N' ||
toupper( pStartChar[ulOffset] ) == L'R' ||
toupper( pStartChar[ulOffset] ) == L'T' ) )
{
fDone = true;
break;
}
else if ( isdigit( pStartChar[ulOffset] ) )
{
//--- Just another digit
ulCount++;
ulOffset++;
}
else
{
hr = E_INVALIDARG;
break;
}
}
if ( SUCCEEDED( hr ) &&
!fDone &&
ulOffset < ulTokenLen )
{
if ( !fSeparators )
{
//--- No separators. Pattern must be {d+} if this is indeed a number, so just count digits.
while ( isdigit( pStartChar[ulOffset] ) &&
ulOffset < ulTokenLen )
{
ulCount++;
ulOffset++;
}
if ( ulOffset != ulTokenLen &&
!( pStartChar[ulOffset] == wcDecimalPoint ||
pStartChar[ulOffset] == L'%' ||
pStartChar[ulOffset] == L'°' ||
pStartChar[ulOffset] == L'²' ||
pStartChar[ulOffset] == L'³' ||
pStartChar[ulOffset] == L'%' ||
pStartChar[ulOffset] == L'°' ||
pStartChar[ulOffset] == L'²' ||
pStartChar[ulOffset] == L'³' ||
pStartChar[ulOffset] == L'-' ||
pStartChar[ulOffset] == L'¼' ||
pStartChar[ulOffset] == L'½' ||
pStartChar[ulOffset] == L'¾' ||
toupper( pStartChar[ulOffset] ) == L'S' ||
toupper( pStartChar[ulOffset] ) == L'N' ||
toupper( pStartChar[ulOffset] ) == L'R' ||
toupper( pStartChar[ulOffset] ) == L'T' ) )
{
hr = E_INVALIDARG;
}
}
else
{
//--- Separators. Pattern must be { d(1-3)[,ddd]+ }, so make sure the separators match up
while ( SUCCEEDED( hr ) &&
pStartChar[ulOffset] == wcSeparator &&
( ulOffset + 3 ) < ulTokenLen)
{
ulOffset++;
for ( i = ulOffset + 3; SUCCEEDED( hr ) && ulOffset < i; ulOffset++ )
{
if ( isdigit( pStartChar[ulOffset] ) )
{
ulCount++;
}
else // Some non-digit character found - abort!
{
hr = E_INVALIDARG;
}
}
}
if ( ulOffset != ulTokenLen &&
!( pStartChar[ulOffset] == wcDecimalPoint ||
pStartChar[ulOffset] == L'%' ||
pStartChar[ulOffset] == L'°' ||
pStartChar[ulOffset] == L'²' ||
pStartChar[ulOffset] == L'³' ||
pStartChar[ulOffset] == L'-' ||
pStartChar[ulOffset] == L'¼' ||
pStartChar[ulOffset] == L'½' ||
pStartChar[ulOffset] == L'¾' ||
toupper( pStartChar[ulOffset] ) == L'S' ||
toupper( pStartChar[ulOffset] ) == L'N' ||
toupper( pStartChar[ulOffset] ) == L'R' ||
toupper( pStartChar[ulOffset] ) == L'T' ) )
{
hr = E_INVALIDARG;
}
}
}
if ( SUCCEEDED( hr ) )
{
pIntegerInfo = (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pIntegerInfo, sizeof( TTSIntegerItemInfo ) );
pIntegerInfo->fSeparators = fSeparators;
pIntegerInfo->lLeftOver = ulCount % 3;
pIntegerInfo->lNumGroups = ( ulCount - 1 ) / 3;
pIntegerInfo->pStartChar = pStartChar;
pIntegerInfo->pEndChar = pStartChar + ulOffset;
}
}
return hr;
} /* IsInteger */
/***********************************************************************************************
* ExpandInteger *
*---------------*
* Description:
*
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandInteger( TTSIntegerItemInfo* pItemInfo, const WCHAR* Context, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandInteger" );
//--- Local variable declarations and initialization
BOOL bFinished = false;
const WCHAR *pStartChar = pItemInfo->pStartChar, *pEndChar = pItemInfo->pEndChar;
ULONG ulOffset = 0, ulTokenLen = (ULONG)(pEndChar - pStartChar), ulTemp = (ULONG)(pItemInfo->lNumGroups + 1);
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
//--- Out of range integer, or integer beginning with one or more zeroes...
if ( pStartChar[0] == L'0' ||
( Context &&
_wcsicmp( Context, L"NUMBER_DIGIT" ) == 0 ) ||
pItemInfo->lNumGroups >= sp_countof(g_quantifiers) )
{
pItemInfo->fDigitByDigit = true;
pItemInfo->ulNumDigits = 0;
for ( ULONG i = 0; i < ulTokenLen; i++ )
{
if ( isdigit( pStartChar[i] ) )
{
ExpandDigit( pStartChar[i], pItemInfo->Groups[0], WordList );
pItemInfo->ulNumDigits++;
}
}
}
//--- Expanding a number < 1000
else if ( pItemInfo->lNumGroups == 0 )
{
// 0th through 999th...
if ( pItemInfo->fOrdinal )
{
switch ( pItemInfo->lLeftOver )
{
case 1:
// 0th through 9th...
ExpandDigitOrdinal( pStartChar[ulOffset], pItemInfo->Groups[0], WordList );
break;
case 2:
// 10th through 99th...
ExpandTwoOrdinal( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
break;
case 0:
// 100th through 999th...
ExpandThreeOrdinal( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
break;
case -1:
ulTemp = 0;
pItemInfo->lLeftOver = 0;
break;
}
}
// 0 through 999...
else
{
switch ( pItemInfo->lLeftOver )
{
case 1:
// 0 through 9...
ExpandDigit( pStartChar[ulOffset], pItemInfo->Groups[0], WordList );
ulOffset += 1;
break;
case 2:
// 10 through 99...
ExpandTwoDigits( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
ulOffset += 2;
break;
case 0:
// 100 through 999...
ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
ulOffset += 3;
break;
case -1:
ulTemp = 0;
pItemInfo->lLeftOver = 0;
break;
}
}
}
else
{
//--- 1000 through highest number covered, e.g. 1,234,567
//--- Expand first grouping, e.g. 1 million
//--- Expand digit group
switch ( pItemInfo->lLeftOver )
{
case 1:
ExpandDigit( pStartChar[ulOffset], pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
ulOffset += 1;
break;
case 2:
ExpandTwoDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
ulOffset += 2;
break;
case 0:
ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
ulOffset += 3;
break;
}
//--- Special Case: rare ordinal cases - e.g. 1,000,000th
if ( pItemInfo->fOrdinal &&
Zeroes(pStartChar + ulOffset) )
{
//--- Insert ordinal quantifier
pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
Word.pWordText = g_quantifiersOrdinal[pItemInfo->lNumGroups].pStr;
Word.ulWordLen = g_quantifiersOrdinal[pItemInfo->lNumGroups--].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
bFinished = true;
}
//--- Default Case
else
{
//--- Insert quantifier
pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
Word.pWordText = g_quantifiers[pItemInfo->lNumGroups].pStr;
Word.ulWordLen = g_quantifiers[pItemInfo->lNumGroups--].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
//--- Expand rest of groupings which need to be followed by a quantifier
while ( pItemInfo->lNumGroups > 0 &&
!bFinished )
{
if ( pItemInfo->fSeparators )
{
ulOffset++;
}
//--- Expand digit group
ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
ulOffset += 3;
//--- Special case: rare ordinal cases, e.g. 1,234,000th
if ( pItemInfo->fOrdinal &&
Zeroes( pStartChar + ulOffset ) )
{
//--- Insert ordinal quantifier
pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
Word.pWordText = g_quantifiersOrdinal[pItemInfo->lNumGroups].pStr;
Word.ulWordLen = g_quantifiersOrdinal[pItemInfo->lNumGroups--].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
bFinished = true;
}
//--- Default Case
else if ( !ThreeZeroes( pStartChar + ulOffset - 3 ) )
{
//--- Insert quantifier
pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
Word.pWordText = g_quantifiers[pItemInfo->lNumGroups].pStr;
Word.ulWordLen = g_quantifiers[pItemInfo->lNumGroups--].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
//--- Special Case: this grouping is all zeroes, e.g. 1,000,567
else
{
pItemInfo->lNumGroups--;
}
}
//--- Expand final grouping, which requires no quantifier
if ( pItemInfo->fSeparators &&
!bFinished )
{
ulOffset++;
}
if ( pItemInfo->fOrdinal &&
!bFinished )
{
ExpandThreeOrdinal( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
ulOffset += 3;
}
else if ( !bFinished )
{
ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
ulOffset += 3;
}
}
pItemInfo->lNumGroups = (long) ulTemp;
} /* ExpandInteger */
/***********************************************************************************************
* IsDigitString *
*---------------*
* Description:
* Helper for IsNumber, IsPhoneNumber, etc. which matches a digit string...
*
* RegExp:
* d+
*
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsDigitString( const WCHAR* pStartChar, TTSDigitsItemInfo*& pDigitsInfo,
CSentItemMemory& MemoryManager )
{
HRESULT hr = S_OK;
ULONG ulOffset = 0;
while ( pStartChar + ulOffset < m_pEndOfCurrItem &&
isdigit( pStartChar[ulOffset] ) )
{
ulOffset++;
}
if ( ulOffset )
{
pDigitsInfo = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pDigitsInfo, sizeof( pDigitsInfo ) );
pDigitsInfo->pFirstDigit = pStartChar;
pDigitsInfo->ulNumDigits = ulOffset;
}
}
else
{
hr = E_INVALIDARG;
}
return hr;
} /* IsDigitString */
/***********************************************************************************************
* ExpandDigits *
*--------------*
* Description:
* Expands a string of digits, digit by digit.
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandDigits( TTSDigitsItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandDigits" );
for ( ULONG i = 0; i < pItemInfo->ulNumDigits; i++ )
{
NumberGroup Garbage;
ExpandDigit( pItemInfo->pFirstDigit[i], Garbage, WordList );
}
} /* ExpandDigits */
/***********************************************************************************************
* IsFraction *
*------------*
* Description:
* Helper for IsNumber which matches a fraction...
*
* RegExp:
* { NUM_CARDINAL || NUM_DECIMAL } / { NUM_CARDINAL || NUM_DECIMAL }
*
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsFraction( const WCHAR* pStartChar, TTSFractionItemInfo*& pFractionInfo,
CSentItemMemory& MemoryManager )
{
SPDBG_FUNC( "CStdSentEnum::IsFraction" );
HRESULT hr = S_OK;
ULONG ulTokenLen = (ULONG)(m_pEndOfCurrItem - pStartChar);
if ( ulTokenLen )
{
//--- Check for Vulgar Fraction
if ( pStartChar[0] == L'¼' ||
pStartChar[0] == L'½' ||
pStartChar[0] == L'¾' )
{
pFractionInfo = (TTSFractionItemInfo*) MemoryManager.GetMemory( sizeof( TTSFractionItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pFractionInfo, sizeof( TTSFractionItemInfo ) );
pFractionInfo->pVulgar = pStartChar;
pFractionInfo->pNumerator =
(TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pFractionInfo->pNumerator, sizeof( TTSNumberItemInfo ) );
pFractionInfo->pDenominator =
(TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pFractionInfo->pDenominator, sizeof( TTSNumberItemInfo ) );
pFractionInfo->pNumerator->pIntegerPart =
(TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pFractionInfo->pNumerator->pIntegerPart, sizeof( TTSIntegerItemInfo ) );
pFractionInfo->pDenominator->pIntegerPart =
(TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pFractionInfo->pDenominator->pIntegerPart, sizeof( TTSIntegerItemInfo ) );
pFractionInfo->fIsStandard = false;
pFractionInfo->pNumerator->pIntegerPart->lLeftOver = 1;
pFractionInfo->pNumerator->pIntegerPart->lNumGroups = 1;
pFractionInfo->pNumerator->pIntegerPart->Groups[0].fOnes = true;
pFractionInfo->pDenominator->pIntegerPart->lLeftOver = 1;
pFractionInfo->pDenominator->pIntegerPart->lNumGroups = 1;
pFractionInfo->pDenominator->pIntegerPart->Groups[0].fOnes = true;
}
}
}
}
}
}
//--- Check for multi-character fraction
else
{
TTSItemInfo *pNumeratorInfo = NULL, *pDenominatorInfo = NULL;
const WCHAR* pTempNextChar = m_pNextChar, *pTempEndOfCurrItem = m_pEndOfCurrItem;
m_pNextChar = pStartChar;
m_pEndOfCurrItem = wcschr( pStartChar, L'/' );
if ( !m_pEndOfCurrItem ||
m_pEndOfCurrItem >= pTempEndOfCurrItem )
{
hr = E_INVALIDARG;
}
//--- Try to get numerator
if ( SUCCEEDED( hr ) )
{
hr = IsNumber( pNumeratorInfo, L"NUMBER", MemoryManager, false );
}
if ( SUCCEEDED( hr ) &&
pNumeratorInfo->Type != eNUM_MIXEDFRACTION &&
pNumeratorInfo->Type != eNUM_FRACTION &&
pNumeratorInfo->Type != eNUM_ORDINAL )
{
if ( ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart )
{
m_pNextChar += ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart->pEndChar -
( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart->pStartChar;
}
if ( ( (TTSNumberItemInfo*) pNumeratorInfo )->pDecimalPart )
{
m_pNextChar += ( (TTSNumberItemInfo*) pNumeratorInfo )->pDecimalPart->ulNumDigits + 1;
}
}
else if ( SUCCEEDED( hr ) )
{
delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
hr = E_INVALIDARG;
}
m_pEndOfCurrItem = pTempEndOfCurrItem;
//--- Try to get denominator
if ( SUCCEEDED( hr ) &&
m_pNextChar[0] == L'/' )
{
m_pNextChar++;
hr = IsNumber( pDenominatorInfo, L"NUMBER", MemoryManager, false );
if ( SUCCEEDED( hr ) &&
pDenominatorInfo->Type != eNUM_MIXEDFRACTION &&
pDenominatorInfo->Type != eNUM_FRACTION &&
pDenominatorInfo->Type != eNUM_ORDINAL )
{
pFractionInfo =
( TTSFractionItemInfo*) MemoryManager.GetMemory( sizeof( TTSFractionItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pFractionInfo, sizeof( TTSFractionItemInfo ) );
pFractionInfo->pNumerator = (TTSNumberItemInfo*) pNumeratorInfo;
pFractionInfo->pDenominator = (TTSNumberItemInfo*) pDenominatorInfo;
pFractionInfo->pVulgar = NULL;
pFractionInfo->fIsStandard = false;
}
}
else if ( SUCCEEDED( hr ) )
{
delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
delete ( (TTSNumberItemInfo*) pDenominatorInfo )->pWordList;
hr = E_INVALIDARG;
}
else
{
delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
}
}
else if ( SUCCEEDED( hr ) )
{
hr = E_INVALIDARG;
delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
}
m_pNextChar = pTempNextChar;
}
}
else
{
hr = E_INVALIDARG;
}
return hr;
} /* IsFraction */
/***********************************************************************************************
* ExpandFraction *
*----------------*
* Description:
* Expands Items previously determined to be of type NUM_FRACTION by IsFraction.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandFraction( TTSFractionItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandFraction" );
HRESULT hr = S_OK;
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
//--- Special case - vulgar fractions ( ¼, ½, ¾ )
if ( pItemInfo->pVulgar )
{
if ( pItemInfo->pVulgar[0] == L'¼' )
{
Word.pWordText = g_ones[1].pStr;
Word.ulWordLen = g_ones[1].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
Word.pWordText = g_onesOrdinal[4].pStr;
Word.ulWordLen = g_onesOrdinal[4].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
else if ( pItemInfo->pVulgar[0] == L'½' )
{
Word.pWordText = g_ones[1].pStr;
Word.ulWordLen = g_ones[1].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
Word.pWordText = g_Half.pStr;
Word.ulWordLen = g_Half.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
else
{
Word.pWordText = g_ones[3].pStr;
Word.ulWordLen = g_ones[3].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
Word.pWordText = g_PluralDenominators[4].pStr;
Word.ulWordLen = g_PluralDenominators[4].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
else
{
//--- Insert Numerator WordList
WordList.AddTail( pItemInfo->pNumerator->pWordList );
delete pItemInfo->pNumerator->pWordList;
//--- Expand denominator ---//
//--- If no decimal part, must check for special cases ( x/2 - x/9, x/10, x/100 )
if ( !pItemInfo->pDenominator->pDecimalPart &&
!pItemInfo->pNumerator->pDecimalPart &&
!pItemInfo->pDenominator->fNegative )
{
//--- Check for special cases - halves through ninths
if ( ( pItemInfo->pDenominator->pEndChar -
pItemInfo->pDenominator->pStartChar ) == 1 &&
pItemInfo->pDenominator->pStartChar[0] != L'1' )
{
pItemInfo->fIsStandard = false;
//--- Insert singular form of denominator
if ( ( pItemInfo->pNumerator->pEndChar -
pItemInfo->pNumerator->pStartChar ) == 1 &&
pItemInfo->pNumerator->pStartChar[0] == L'1' )
{
if ( pItemInfo->pDenominator->pStartChar[0] == L'2' )
{
Word.pWordText = g_Half.pStr;
Word.ulWordLen = g_Half.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
else
{
ExpandDigitOrdinal( pItemInfo->pDenominator->pStartChar[0],
pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
}
}
//--- Insert plural form of denominator
else
{
ULONG index = pItemInfo->pDenominator->pStartChar[0] - L'0';
Word.pWordText = g_PluralDenominators[index].pStr;
Word.ulWordLen = g_PluralDenominators[index].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
//--- Check for special case - tenths
else if ( ( pItemInfo->pDenominator->pEndChar -
pItemInfo->pDenominator->pStartChar ) == 2 &&
wcsncmp( pItemInfo->pDenominator->pStartChar, L"10", 2 ) == 0 )
{
pItemInfo->fIsStandard = false;
//--- Insert singular form of denominator
if ( ( pItemInfo->pNumerator->pEndChar -
pItemInfo->pNumerator->pStartChar ) == 1 &&
pItemInfo->pNumerator->pStartChar[0] == L'1' )
{
ExpandTwoOrdinal( pItemInfo->pDenominator->pStartChar,
pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
}
//--- Insert plural form denominator
else
{
Word.pWordText = g_Tenths.pStr;
Word.ulWordLen = g_Tenths.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
//--- Check for special case - sixteenths
else if ( ( pItemInfo->pDenominator->pEndChar -
pItemInfo->pDenominator->pStartChar ) == 2 &&
wcsncmp( pItemInfo->pDenominator->pStartChar, L"16", 2 ) == 0 )
{
pItemInfo->fIsStandard = false;
//--- Insert singular form of denominator
if ( ( pItemInfo->pNumerator->pEndChar -
pItemInfo->pNumerator->pStartChar ) == 1 &&
pItemInfo->pNumerator->pStartChar[0] == L'1' )
{
ExpandTwoOrdinal( pItemInfo->pDenominator->pStartChar,
pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
}
//--- Insert plural form denominator
else
{
Word.pWordText = g_Sixteenths.pStr;
Word.ulWordLen = g_Sixteenths.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
//--- Check for special case - hundredths
else if ( ( pItemInfo->pDenominator->pEndChar -
pItemInfo->pDenominator->pStartChar ) == 3 &&
wcsncmp( pItemInfo->pDenominator->pStartChar, L"100", 3 ) == 0 )
{
pItemInfo->fIsStandard = false;
//--- Insert singular form of denominator
if ( ( pItemInfo->pNumerator->pEndChar -
pItemInfo->pNumerator->pStartChar ) == 1 &&
pItemInfo->pNumerator->pStartChar[0] == L'1' )
{
ExpandThreeOrdinal( pItemInfo->pDenominator->pStartChar,
pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
}
//--- Insert plural form of denominator
else
{
Word.pWordText = g_Hundredths.pStr;
Word.ulWordLen = g_Hundredths.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
else
{
pItemInfo->fIsStandard = true;
}
}
else
{
pItemInfo->fIsStandard = true;
}
//--- Default case - Numerator "over" Denominator
if ( pItemInfo->fIsStandard )
{
//--- Insert "over"
Word.pWordText = g_Over.pStr;
Word.ulWordLen = g_Over.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
//--- Insert denominator WordList
WordList.AddTail( pItemInfo->pDenominator->pWordList );
}
delete pItemInfo->pDenominator->pWordList;
}
return hr;
} /* ExpandFraction */
/***********************************************************************************************
* ExpandDigit *
*-------------*
* Description:
* Expands single digits into words, and inserts them into WordList
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandDigit( const WCHAR Number, NumberGroup& NormGroupInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandDigit" );
SPDBG_ASSERT( isdigit(Number) );
// 0-9
ULONG Index = Number - L'0';
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
Word.pXmlState = &m_pCurrFrag->State;
Word.pWordText = g_ones[Index].pStr;
Word.ulWordLen = g_ones[Index].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
Word.eWordPartOfSpeech = MS_Unknown;
WordList.AddTail( Word );
NormGroupInfo.fOnes = true;
} /* ExpandDigit */
/***********************************************************************************************
* ExpandTwo *
*-----------*
* Description:
* Expands two digit strings into words, and inserts them into WordList.
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandTwoDigits( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandTwoDigits" );
SPDBG_ASSERT( NumberString &&
wcslen(NumberString) >= 2 &&
isdigit(NumberString[0]) &&
isdigit(NumberString[1]) );
// 10-99
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
ULONG IndexOne = NumberString[0] - L'0';
ULONG IndexTwo = NumberString[1] - L'0';
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
if ( IndexOne != 1 )
{
// 20-99, or 00-09
if (IndexOne != 0)
{
Word.pWordText = g_tens[IndexOne].pStr;
Word.ulWordLen = g_tens[IndexOne].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
NormGroupInfo.fTens = true;
}
if ( IndexTwo != 0 )
{
ExpandDigit( NumberString[1], NormGroupInfo, WordList );
NormGroupInfo.fOnes = true;
}
}
else
{
// 10-19
Word.pWordText = g_teens[IndexTwo].pStr;
Word.ulWordLen = g_teens[IndexTwo].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
NormGroupInfo.fOnes = true;
}
} /* ExpandTwo */
/***********************************************************************************************
* ExpandThree *
*-------------*
* Description:
* Expands three digit strings into words, and inserts them into WordList.
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandThreeDigits( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandThreeDigits" );
SPDBG_ASSERT( NumberString &&
wcslen(NumberString) >= 3 &&
isdigit(NumberString[0]) &&
isdigit(NumberString[1]) &&
isdigit(NumberString[2]) );
// 100-999
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
ULONG IndexOne = NumberString[0] - L'0';
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
if ( IndexOne != 0 )
{
// Take care of hundreds...
ExpandDigit( NumberString[0], NormGroupInfo, WordList );
Word.pWordText = g_quantifiers[0].pStr;
Word.ulWordLen = g_quantifiers[0].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
NormGroupInfo.fHundreds = true;
NormGroupInfo.fOnes = false;
}
// Take care of tens and ones...
ExpandTwoDigits( NumberString + 1, NormGroupInfo, WordList );
} /* ExpandThree */
/***********************************************************************************************
* ExpandDigitOrdinal *
*--------------------*
* Description:
* Expands single digit ordinal strings into words, and inserts them into WordList.
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandDigitOrdinal( const WCHAR Number, NumberGroup& NormGroupInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandDigitOrdinal" );
SPDBG_ASSERT( isdigit(Number) );
// 0-9
ULONG Index = Number - L'0';
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
Word.pXmlState = &m_pCurrFrag->State;
Word.pWordText = g_onesOrdinal[Index].pStr;
Word.ulWordLen = g_onesOrdinal[Index].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
Word.eWordPartOfSpeech = MS_Unknown;
WordList.AddTail( Word );
NormGroupInfo.fOnes = true;
} /* ExpandDigitOrdinal */
/***********************************************************************************************
* ExpandTwoOrdinal *
*------------------*
* Description:
* Expands two digit ordinal strings into words, and inserts them into WordList.
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandTwoOrdinal( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandTwoOrdinal" );
SPDBG_ASSERT( NumberString &&
wcslen(NumberString) >= 2 &&
isdigit(NumberString[0]) &&
isdigit(NumberString[1]) );
// 10-99
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
ULONG IndexOne = NumberString[0] - L'0';
ULONG IndexTwo = NumberString[1] - L'0';
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
if ( IndexOne != 1 )
{
// 20-99, or 00-09
if (IndexOne != 0)
{
if ( IndexTwo != 0 )
{
Word.pWordText = g_tens[IndexOne].pStr;
Word.ulWordLen = g_tens[IndexOne].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
NormGroupInfo.fTens = true;
ExpandDigitOrdinal( NumberString[1], NormGroupInfo, WordList );
NormGroupInfo.fOnes = true;
}
else
{
Word.pWordText = g_tensOrdinal[IndexOne].pStr;
Word.ulWordLen = g_tensOrdinal[IndexOne].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
else
{
ExpandDigitOrdinal( NumberString[1], NormGroupInfo, WordList );
}
}
else
{
// 10-19
Word.pWordText = g_teensOrdinal[IndexTwo].pStr;
Word.ulWordLen = g_teensOrdinal[IndexTwo].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
NormGroupInfo.fOnes = true;
}
} /* ExpandTwoOrdinal */
/***********************************************************************************************
* ExpandThreeOrdinal *
*--------------------*
* Description:
* Expands three digit ordinal strings into words, and inserts them into WordList.
*
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
void CStdSentEnum::ExpandThreeOrdinal( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandThreeDigits" );
SPDBG_ASSERT( NumberString &&
wcslen(NumberString) >= 3 &&
isdigit(NumberString[0]) &&
isdigit(NumberString[1]) &&
isdigit(NumberString[2]) );
// 100-999
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
ULONG IndexOne = NumberString[0] - L'0';
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
if ( IndexOne != 0 )
{
ExpandDigit( NumberString[0], NormGroupInfo, WordList );
//--- Special case - x hundredth
if ( Zeroes( NumberString + 1 ) )
{
Word.pWordText = g_quantifiersOrdinal[0].pStr;
Word.ulWordLen = g_quantifiersOrdinal[0].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
NormGroupInfo.fHundreds = true;
NormGroupInfo.fOnes = false;
}
//--- Default case - x hundred yth
else
{
Word.pWordText = g_quantifiers[0].pStr;
Word.ulWordLen = g_quantifiers[0].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
ExpandTwoOrdinal( NumberString + 1, NormGroupInfo, WordList );
NormGroupInfo.fHundreds = true;
}
}
//--- Special case - no hundreds
else
{
ExpandTwoOrdinal( NumberString + 1, NormGroupInfo, WordList );
}
} /* ExpandThreeOrdinal */
/***********************************************************************************************
* MatchQuantifier *
*-----------------*
* Description:
* Checks the incoming Item's text to determine whether or not it
* is a numerical quantifier.
********************************************************************* AH **********************/
int MatchQuantifier( const WCHAR*& pStartChar, const WCHAR*& pEndChar )
{
int Index = -1;
for (int i = 0; i < sp_countof(g_quantifiers); i++)
{
if ( pEndChar - pStartChar >= g_quantifiers[i].Len &&
wcsnicmp( pStartChar, g_quantifiers[i].pStr, g_quantifiers[i].Len ) == 0 )
{
pStartChar += g_quantifiers[i].Len;
Index = i;
break;
}
}
return Index;
} /* MatchQuantifier */
/***********************************************************************************************
* IsCurrency *
*------------*
* Description:
* Checks the incoming Item's text to determine whether or not it
* is a currency.
*
* RegExp:
* { [CurrencySign] { d+ || d(1-3)[,ddd]+ } { [.]d+ }? } { [whitespace] [quantifier] }? ||
* { { d+ || d(1-3)[,ddd]+ } { [.]d+ }? { [whitespace] [quantifier] }? [whitespace]? [CurrencySign] }
*
* Types assigned:
* NUM_CURRENCY
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsCurrency( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
CWordList& WordList )
{
SPDBG_FUNC( "NumNorm IsCurrency" );
HRESULT hr = S_OK;
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem, *pTempEndChar = m_pEndChar;
const SPVTEXTFRAG* pTempFrag = m_pCurrFrag;
const SPVSTATE *pNumberXMLState = NULL, *pSymbolXMLState = NULL, *pQuantifierXMLState = NULL;
CItemList PostNumberList, PostSymbolList;
int iSymbolIndex = -1, iQuantIndex = -1;
TTSItemInfo* pNumberInfo = NULL;
BOOL fDone = false, fNegative = false;
WCHAR wcDecimalPoint = ( m_eSeparatorAndDecimal == COMMA_PERIOD ? L'.' : L',' );
//--- Try to match [CurrencySign] [Number] [Quantifier]
NORM_POSITION ePosition = UNATTACHED;
if ( m_pNextChar[0] == L'-' )
{
fNegative = true;
m_pNextChar++;
}
iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
if ( iSymbolIndex >= 0 &&
ePosition == PRECEDING )
{
pSymbolXMLState = &m_pCurrFrag->State;
//--- Skip any whitespace in between the currency sign and the number...
hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostSymbolList );
if ( !m_pNextChar )
{
hr = E_INVALIDARG;
}
if ( SUCCEEDED( hr ) )
{
m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
{
fDone = true;
m_pEndOfCurrItem--;
}
}
//--- Try to match a number string
if ( SUCCEEDED( hr ) )
{
hr = IsNumberCategory( pNumberInfo, L"NUMBER", MemoryManager );
if ( SUCCEEDED( hr ) )
{
if ( pNumberInfo->Type != eNUM_CARDINAL &&
pNumberInfo->Type != eNUM_DECIMAL &&
pNumberInfo->Type != eNUM_FRACTION &&
pNumberInfo->Type != eNUM_MIXEDFRACTION )
{
hr = E_INVALIDARG;
}
else
{
pNumberXMLState = &m_pCurrFrag->State;
}
}
//--- Skip any whitespace in between the number and the quantifier...
if ( !fDone &&
SUCCEEDED( hr ) )
{
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar;
const WCHAR *pTempEndOfItem = m_pEndOfCurrItem;
const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
m_pNextChar = m_pEndOfCurrItem;
hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostNumberList );
if ( m_pNextChar &&
SUCCEEDED( hr ) )
{
m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
{
m_pEndOfCurrItem--;
}
//--- Try to match a quantifier
iQuantIndex = MatchQuantifier( m_pNextChar, m_pEndOfCurrItem );
if ( iQuantIndex >= 0 )
{
pQuantifierXMLState = &m_pCurrFrag->State;
}
else
{
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
}
}
else
{
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
}
}
}
}
//--- Try to match [Number] [CurrencySign] [Quantifier]
else
{
//--- Try to match a number string
hr = IsNumberCategory( pNumberInfo, L"NUMBER", MemoryManager );
if ( SUCCEEDED( hr ) )
{
if ( pNumberInfo->Type != eNUM_CARDINAL &&
pNumberInfo->Type != eNUM_DECIMAL &&
pNumberInfo->Type != eNUM_FRACTION &&
pNumberInfo->Type != eNUM_MIXEDFRACTION )
{
hr = E_INVALIDARG;
}
else
{
pNumberXMLState = &m_pCurrFrag->State;
}
}
//--- Skip any whitespace and XML markup between the number and the currency sign
if ( SUCCEEDED( hr ) )
{
m_pNextChar = m_pEndOfCurrItem;
hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostNumberList );
if ( !m_pNextChar )
{
hr = E_INVALIDARG;
}
if ( SUCCEEDED( hr ) )
{
m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
{
m_pEndOfCurrItem--;
fDone = true;
}
}
}
//--- Try to match a Currency Sign
if ( SUCCEEDED( hr ) )
{
iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
if ( iSymbolIndex >= 0 )
{
pSymbolXMLState = &m_pCurrFrag->State;
}
//--- Skip any whitespace in between the currency sign and the quantifier
if ( !fDone &&
iSymbolIndex >= 0 )
{
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar;
const WCHAR *pTempEndOfItem = m_pEndOfCurrItem;
const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostSymbolList );
if ( !m_pNextChar )
{
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
fDone = true;
}
if ( !fDone &&
SUCCEEDED( hr ) )
{
m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
{
fDone = true;
m_pEndOfCurrItem--;
}
//--- Try to match quantifier
iQuantIndex = MatchQuantifier( m_pNextChar, m_pEndOfCurrItem );
if ( iQuantIndex >= 0 )
{
pQuantifierXMLState = &m_pCurrFrag->State;
}
else
{
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
}
}
}
else if ( iSymbolIndex < 0 )
{
hr = E_INVALIDARG;
}
}
}
//--- Successfully matched a currency! Now expand it and fill out pItemNormInfo.
if ( SUCCEEDED( hr ) )
{
TTSWord Word;
ZeroMemory( &Word, sizeof(TTSWord) );
Word.eWordPartOfSpeech = MS_Unknown;
pItemNormInfo = (TTSCurrencyItemInfo*) MemoryManager.GetMemory( sizeof(TTSCurrencyItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
//--- Fill in known parts of pItemNormInfo
ZeroMemory( pItemNormInfo, sizeof(TTSCurrencyItemInfo) );
pItemNormInfo->Type = eNUM_CURRENCY;
( (TTSCurrencyItemInfo*) pItemNormInfo )->fQuantifier = iQuantIndex >= 0 ? true : false;
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart = (TTSNumberItemInfo*) pNumberInfo;
( (TTSCurrencyItemInfo*) pItemNormInfo )->lNumPostNumberStates = PostNumberList.GetCount();
( (TTSCurrencyItemInfo*) pItemNormInfo )->lNumPostSymbolStates = PostSymbolList.GetCount();
//--- Need to determine whether this currency will have a primary and secondary part
//--- (e.g. "ten dollars and fifty cents") or just a primary part (e.g. "ten point
//--- five zero cents", "one hundred dollars").
//--- First check whether the number is a cardinal, there is a quantifier present, or the
//--- currency unit has no secondary (e.g. cents). In any of these cases, we need do no
//--- further checking.
if ( pNumberInfo->Type == eNUM_DECIMAL &&
iQuantIndex == -1 &&
g_CurrencySigns[iSymbolIndex].SecondaryUnit.Len > 0 )
{
WCHAR *pDecimalPoint = wcschr( ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar, wcDecimalPoint );
SPDBG_ASSERT( pDecimalPoint );
if ( pDecimalPoint &&
( (TTSNumberItemInfo*) pNumberInfo )->pEndChar - pDecimalPoint == 3 )
{
//--- We do have a secondary part! Fix up PrimaryNumberPart appropriately,
//--- and fill in pSecondaryNumberPart.
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
const WCHAR *pTemp = ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar;
m_pNextChar = ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar;
m_pEndOfCurrItem = pDecimalPoint;
delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
//--- m_pNextChar == m_pEndOfCurrItem when integer part is empty and non-negative, e.g. $.50
//--- Other case is empty and negative, e.g. $-.50
if ( m_pNextChar != m_pEndOfCurrItem &&
!( *m_pNextChar == L'-' &&
m_pNextChar == m_pEndOfCurrItem - 1 ) )
{
hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false );
}
else
{
pNumberInfo = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pNumberInfo, sizeof( TTSNumberItemInfo ) );
if ( *m_pNextChar == L'-' )
{
( (TTSNumberItemInfo*) pNumberInfo )->fNegative = true;
}
else
{
( (TTSNumberItemInfo*) pNumberInfo )->fNegative = false;
}
( (TTSNumberItemInfo*) pNumberInfo )->pStartChar = NULL;
( (TTSNumberItemInfo*) pNumberInfo )->pEndChar = NULL;
( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart =
(TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart->fDigitByDigit = true;
( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart->ulNumDigits = 1;
( (TTSNumberItemInfo*) pNumberInfo )->pWordList = new CWordList;
if ( ( (TTSNumberItemInfo*) pNumberInfo )->fNegative )
{
Word.pXmlState = pNumberXMLState;
Word.pWordText = g_negative.pStr;
Word.ulWordLen = g_negative.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
( (TTSNumberItemInfo*) pNumberInfo )->pWordList->AddTail( Word );
}
Word.pWordText = g_ones[0].pStr;
Word.ulWordLen = g_ones[0].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
( (TTSNumberItemInfo*) pNumberInfo )->pWordList->AddTail( Word );
}
}
}
if ( SUCCEEDED( hr ) )
{
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart =
(TTSNumberItemInfo*) pNumberInfo;
m_pNextChar = m_pEndOfCurrItem + 1;
m_pEndOfCurrItem = pTemp;
//--- If zeroes, don't pronounce them...
if ( m_pNextChar[0] != L'0' )
{
hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false );
if ( SUCCEEDED( hr ) )
{
( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart =
(TTSNumberItemInfo*) pNumberInfo;
}
}
else if ( m_pNextChar[1] != L'0' )
{
m_pNextChar++;
hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false );
if ( SUCCEEDED( hr ) )
{
( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart =
(TTSNumberItemInfo*) pNumberInfo;
}
}
}
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pTempEndOfItem;
}
}
if ( SUCCEEDED( hr ) )
{
//--- Expand Primary number part
if ( fNegative )
{
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->fNegative = true;
Word.pXmlState = pNumberXMLState;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_negative.pStr;
Word.ulWordLen = g_negative.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
hr = ExpandNumber( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart, WordList );
}
//--- Clean up Number XML States
SPLISTPOS WordListPos;
if ( SUCCEEDED( hr ) )
{
WordListPos = WordList.GetHeadPosition();
while ( WordListPos )
{
TTSWord& TempWord = WordList.GetNext( WordListPos );
TempWord.pXmlState = pNumberXMLState;
}
//--- Insert PostNumber XML States
while ( !PostNumberList.IsEmpty() )
{
WordList.AddTail( ( PostNumberList.RemoveHead() ).Words[0] );
}
//--- If a quantifier is present, expand it
if ( iQuantIndex >= 0 )
{
Word.pXmlState = pQuantifierXMLState;
Word.pWordText = g_quantifiers[iQuantIndex].pStr;
Word.ulWordLen = g_quantifiers[iQuantIndex].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
BOOL fFraction = false;
//--- If a fractional unit with no quantifier, insert "of a"
if ( iQuantIndex < 0 &&
!( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart &&
!( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pIntegerPart &&
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pFractionalPart &&
!( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pFractionalPart->fIsStandard )
{
fFraction = true;
Word.pXmlState = pNumberXMLState;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_of.pStr;
Word.ulWordLen = g_of.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
Word.pWordText = g_a.pStr;
Word.ulWordLen = g_a.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
//--- Insert Main Currency Unit
//--- Plural if not a fraction and either a quantifier is present or the integral part is not one.
if ( !fFraction &&
( iQuantIndex >= 0 ||
( ( ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pEndChar -
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar != 1 ) ||
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[0] != L'1' ) &&
( ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pEndChar -
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar != 2 ) ||
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[0] != L'-' ||
( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[1] != L'1' ) ) ) )
{
Word.pXmlState = pSymbolXMLState;
Word.pWordText = g_CurrencySigns[iSymbolIndex].MainUnit.pStr;
Word.ulWordLen = g_CurrencySigns[iSymbolIndex].MainUnit.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
//--- ONLY "one" or "negative one" should precede this...
else
{
Word.pXmlState = pSymbolXMLState;
Word.pWordText = g_SingularPrimaryCurrencySigns[iSymbolIndex].pStr;
Word.ulWordLen = g_SingularPrimaryCurrencySigns[iSymbolIndex].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
//--- Insert Post Symbol XML States
while ( !PostSymbolList.IsEmpty() )
{
WordList.AddTail( ( PostSymbolList.RemoveHead() ).Words[0] );
}
//--- Insert Secondary number part
if ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart )
{
Word.pXmlState = pNumberXMLState;
Word.pWordText = g_And.pStr;
Word.ulWordLen = g_And.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
WordListPos = WordList.GetTailPosition();
hr = ExpandNumber( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart, WordList );
//--- Clean up number XML State
if ( SUCCEEDED( hr ) )
{
while ( WordListPos )
{
TTSWord& TempWord = WordList.GetNext( WordListPos );
TempWord.pXmlState = pNumberXMLState;
}
}
//--- Insert secondary currency unit
if ( SUCCEEDED( hr ) )
{
if ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pEndChar -
( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pStartChar == 1 &&
( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pStartChar[0] == L'1' )
{
Word.pXmlState = pSymbolXMLState;
Word.pWordText = g_SingularSecondaryCurrencySigns[iSymbolIndex].pStr;
Word.ulWordLen = g_SingularSecondaryCurrencySigns[iSymbolIndex].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
else
{
Word.pXmlState = pSymbolXMLState;
Word.pWordText = g_CurrencySigns[iSymbolIndex].SecondaryUnit.pStr;
Word.ulWordLen = g_CurrencySigns[iSymbolIndex].SecondaryUnit.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
}
}
if ( SUCCEEDED( hr ) )
{
m_pNextChar = pTempNextChar;
}
}
}
}
else
{
if ( pNumberInfo )
{
delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
}
m_pNextChar = pTempNextChar;
m_pEndChar = pTempEndChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pCurrFrag = pTempFrag;
}
return hr;
} /* IsCurrency */
/***********************************************************************************************
* IsRomanNumeral *
*----------------*
* Description:
* Checks the incoming Item's text to determine whether or not it
* is a fraction.
*
* RegExp:
* [M](0-3) { [CM] || [CD] || { [D]?[C](0-3) } } { [XC] || [XL] || { [L]?[X](0-3) } }
* { [IX] || [IV] || { [V]?[I](0-3) }}
*
* Types assigned:
* NUM_ROMAN_NUMERAL
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsRomanNumeral( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
CSentItemMemory& MemoryManager )
{
SPDBG_FUNC( "NumNorm IsRomanNumeral" );
HRESULT hr = S_OK;
ULONG ulValue = 0, ulIndex = 0, ulMaxOfThree = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
//--- Match Thousands - M(0-3)
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'M' &&
ulMaxOfThree < 3 )
{
ulValue += 1000;
ulMaxOfThree++;
ulIndex++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
//--- Match Hundreds - { [CM] || [CD] || { [D]?[C](0-3) } }
if ( SUCCEEDED( hr ) )
{
ulMaxOfThree = 0;
//--- Matched C first
if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'C' )
{
ulValue += 100;
ulMaxOfThree++;
ulIndex++;
//--- Special Case - CM = 900
if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'M' )
{
ulValue += 800;
ulIndex++;
}
//--- Special Case - CD = 400
else if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'D' )
{
ulValue += 300;
ulIndex++;
}
//--- Default Case
else
{
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'C' &&
ulMaxOfThree < 3 )
{
ulValue += 100;
ulMaxOfThree++;
ulIndex++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
}
}
//--- Matched D First
else if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'D' )
{
ulValue += 500;
ulIndex++;
ulMaxOfThree = 0;
//--- Match C's
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'C' &&
ulMaxOfThree < 3 )
{
ulValue += 100;
ulIndex++;
ulMaxOfThree++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
}
}
//--- Match Tens - { [XC] || [XL] || { [L]?[X](0-3) } }
if ( SUCCEEDED( hr ) )
{
ulMaxOfThree = 0;
//--- Matched X First
if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'X' )
{
ulValue += 10;
ulMaxOfThree++;
ulIndex++;
//--- Special Case - XC = 90
if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'C' )
{
ulValue += 80;
ulIndex++;
}
//--- Special Case - XL = 40
else if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == 'L' )
{
ulValue += 30;
ulIndex++;
}
//--- Default Case
else
{
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'X' &&
ulMaxOfThree < 3 )
{
ulValue += 10;
ulMaxOfThree ++;
ulIndex++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
}
}
//--- Matched L First
else if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'L' )
{
ulValue += 50;
ulIndex++;
//--- Match X's
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'X' &&
ulMaxOfThree < 3 )
{
ulValue += 10;
ulMaxOfThree++;
ulIndex++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
}
}
//--- Match Ones - { [IX] || [IV] || { [V]?[I](0-3) } }
if ( SUCCEEDED( hr ) )
{
ulMaxOfThree = 0;
//--- Matched I First
if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'I' )
{
ulValue += 1;
ulMaxOfThree++;
ulIndex++;
//--- Special Case - IX = 9
if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'X' )
{
ulValue += 8;
ulIndex++;
}
//--- Special Case - IV = 4
else if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'V' )
{
ulValue += 3;
ulIndex++;
}
//--- Default Case
else
{
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'I' &&
ulMaxOfThree < 3 )
{
ulValue += 1;
ulMaxOfThree++;
ulIndex++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
}
}
//--- Matched V First
else if ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'V' )
{
ulValue += 5;
ulIndex++;
//--- Match I's
while ( ulIndex < ulTokenLen &&
towupper( m_pNextChar[ulIndex] ) == L'I' &&
ulMaxOfThree < 3 )
{
ulValue += 1;
ulMaxOfThree++;
ulIndex++;
}
if ( ulMaxOfThree > 3 )
{
hr = E_INVALIDARG;
}
}
}
if ( ulIndex != ulTokenLen )
{
hr = E_INVALIDARG;
}
else
{
//--- Successfully matched a roman numeral!
WCHAR *tempNumberString;
//--- Max value of ulValue is 3999, so the resultant string cannot be more than
//--- four characters long (plus one for the comma, just in case)
tempNumberString = (WCHAR*) MemoryManager.GetMemory( 6 * sizeof(WCHAR), &hr );
if ( SUCCEEDED( hr ) )
{
TTSItemInfo *pNumberInfo = NULL;
_ltow( (long) ulValue, tempNumberString, 10 );
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
m_pNextChar = tempNumberString;
m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString );
hr = IsNumber( pNumberInfo, Context, MemoryManager, false );
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pTempEndOfItem;
if ( SUCCEEDED( hr ) )
{
pItemNormInfo =
(TTSRomanNumeralItemInfo*) MemoryManager.GetMemory( sizeof( TTSRomanNumeralItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
( (TTSRomanNumeralItemInfo*) pItemNormInfo )->pNumberInfo = pNumberInfo;
}
pItemNormInfo->Type = eNUM_ROMAN_NUMERAL;
}
}
}
return hr;
} /* IsRomanNumeral */
/***********************************************************************************************
* IsPhoneNumber *
*---------------*
* Description:
* Checks the incoming Item's text to determine whether or not it
* is a phone number.
*
* RegExp:
* { ddd-dddd } || { ddd-ddd-dddd }
*
* Types assigned:
* NUM_PHONENUMBER
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsPhoneNumber( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager,
CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::IsPhoneNumber" );
HRESULT hr = S_OK;
const WCHAR *pCountryCode = NULL, *pAreaCode = NULL, *pGroups[4] = { NULL, NULL, NULL, NULL };
const WCHAR *pStartChar = m_pNextChar, *pEndChar = m_pEndChar, *pEndOfItem = m_pEndOfCurrItem;
const SPVTEXTFRAG *pFrag = m_pCurrFrag;
BOOL fMatchedLeftParen = false, fMatchedOne = false;
ULONG ulCountryCodeLen = 0, ulAreaCodeLen = 0, ulNumGroups = 0, ulGroupLen[4] = { 0, 0, 0, 0 };
CItemList PostCountryCodeList, PostOneList, PostAreaCodeList, PostGroupLists[4];
const SPVSTATE *pCountryCodeState = NULL, *pOneState = NULL, *pAreaCodeState = NULL;
const SPVSTATE *pGroupStates[4] = { NULL, NULL, NULL, NULL };
const WCHAR *pDelimiter = NULL;
const WCHAR *pTempEndChar = NULL;
const SPVTEXTFRAG *pTempFrag = NULL;
ULONG i = 0;
//--- Try to match Country Code
if ( pStartChar[0] == L'+' )
{
pStartChar++;
i = 0;
//--- Try to match d(1-3)
while ( pEndOfItem > pStartChar + i &&
iswdigit( pStartChar[i] ) &&
i < 3 )
{
i++;
}
pCountryCode = pStartChar;
pCountryCodeState = &pFrag->State;
ulCountryCodeLen = i;
//--- Try to match delimiter
if ( i >= 1 &&
pEndOfItem > pStartChar + i &&
MatchPhoneNumberDelimiter( pStartChar[i] ) )
{
pDelimiter = pStartChar + i;
pStartChar += i + 1;
}
//--- Try to advance in text - whitespace counts as a delimiter...
else if ( i >= 1 &&
pEndOfItem == pStartChar + i )
{
pStartChar += i;
pCountryCodeState = &pFrag->State;
hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
&PostCountryCodeList );
if ( !pStartChar &&
SUCCEEDED( hr ) )
{
hr = E_INVALIDARG;
}
else if ( SUCCEEDED( hr ) )
{
pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
}
}
else
{
hr = E_INVALIDARG;
}
}
//--- Try to match a "1"
if ( SUCCEEDED( hr ) &&
!pCountryCode &&
pStartChar[0] == L'1' &&
!iswdigit( pStartChar[1] ) )
{
pOneState = &pFrag->State;
fMatchedOne = true;
pStartChar++;
if ( pEndOfItem > pStartChar &&
MatchPhoneNumberDelimiter( pStartChar[0] ) )
{
//--- If we've already hit a delimiter, make sure all others agree
if ( pDelimiter )
{
if ( *pDelimiter != pStartChar[0] )
{
hr = E_INVALIDARG;
}
}
else
{
pDelimiter = pStartChar;
}
pStartChar++;
}
//--- Try to advance in text - whitespace counts as a delimiter...
else if ( !pDelimiter &&
pEndOfItem == pStartChar )
{
pOneState = &pFrag->State;
hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
&PostOneList );
if ( !pStartChar &&
SUCCEEDED( hr ) )
{
hr = E_INVALIDARG;
}
else if ( SUCCEEDED( hr ) )
{
pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
}
}
else
{
hr = E_INVALIDARG;
}
}
//--- Try to match Area Code
if ( SUCCEEDED( hr ) &&
pStartChar < pEndOfItem )
{
i = 0;
//--- Try to match a left parenthesis
if ( ( pCountryCode ||
fMatchedOne ) &&
pStartChar[0] == L'(' )
{
pStartChar++;
fMatchedLeftParen = true;
}
else if ( !pCountryCode &&
!fMatchedOne &&
pStartChar > pFrag->pTextStart &&
*( pStartChar - 1 ) == L'(' )
{
fMatchedLeftParen = true;
}
if ( fMatchedLeftParen )
{
//--- Try to match ddd?
while ( pEndOfItem > pStartChar + i &&
iswdigit( pStartChar[i] ) &&
i < 3 )
{
i++;
}
pAreaCodeState = &pFrag->State;
pAreaCode = pStartChar;
ulAreaCodeLen = i;
if ( i < 2 )
{
//--- Failed to match at least two digits
hr = E_INVALIDARG;
}
else
{
if ( pStartChar[i] != L')' )
{
//--- Matched left parenthesis without corresponding right parenthesis
hr = E_INVALIDARG;
}
else if ( ( !( pCountryCode || fMatchedOne ) &&
pEndOfItem > pStartChar + i ) ||
( ( pCountryCode || fMatchedOne ) &&
pEndOfItem > pStartChar + i + 1 ) )
{
i++;
//--- Delimiter is optional with parentheses
if ( MatchPhoneNumberDelimiter( pStartChar[i] ) )
{
//--- If we've already hit a delimiter, make sure all others agree
if ( pDelimiter )
{
if ( *pDelimiter != pStartChar[i] )
{
hr = E_INVALIDARG;
}
}
else
{
pDelimiter = pStartChar + i;
}
i++;
}
pStartChar += i;
}
//--- Try to advance in text - whitespace counts as a delimiter...
else if ( !pDelimiter )
{
pStartChar += i + 1;
pAreaCodeState = &pFrag->State;
hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
&PostAreaCodeList );
if ( !pStartChar &&
SUCCEEDED( hr ) )
{
hr = E_INVALIDARG;
}
else if ( SUCCEEDED( hr ) )
{
pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
}
}
else
{
hr = E_INVALIDARG;
}
}
}
}
//--- Try to match main number part
if ( SUCCEEDED( hr ) &&
pStartChar < pEndOfItem )
{
//--- Try to match some groups of digits
for ( int j = 0; SUCCEEDED( hr ) && j < 4; j++ )
{
i = 0;
//--- Try to match a digit string
while ( pEndOfItem > pStartChar + i &&
iswdigit( pStartChar[i] ) &&
i < 4 )
{
i++;
}
//--- Try to match a delimiter
if ( i >= 2 )
{
pGroupStates[j] = &pFrag->State;
ulGroupLen[j] = i;
pGroups[j] = pStartChar;
pStartChar += i;
if ( pEndOfItem > pStartChar + 1 &&
MatchPhoneNumberDelimiter( pStartChar[0] ) )
{
//--- If we've already hit a delimiter, make sure all others agree
if ( pDelimiter )
{
if ( *pDelimiter != pStartChar[0] )
{
hr = E_INVALIDARG;
}
}
//--- Only allow a new delimiter to be matched on the first main number group...
//--- e.g. "+45 35 32 90.89" should not all match...
else if ( j == 0 )
{
pDelimiter = pStartChar;
}
else
{
pEndChar = pTempEndChar;
pFrag = pTempFrag;
ulNumGroups = j;
break;
}
pStartChar++;
}
//--- Try to advance in text - whitespace counts as a delimiter...
else if ( !pDelimiter &&
pEndOfItem == pStartChar )
{
pGroupStates[j] = &pFrag->State;
pTempEndChar = pEndChar;
pTempFrag = pFrag;
hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
&PostGroupLists[j] );
if ( !pStartChar &&
SUCCEEDED( hr ) )
{
pEndChar = pTempEndChar;
pFrag = pTempFrag;
ulNumGroups = j + 1;
break;
}
else if ( SUCCEEDED( hr ) )
{
pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
}
}
else if ( pEndOfItem == pStartChar + 1 )
{
if ( IsGroupEnding( *pStartChar ) != eUNMATCHED ||
IsQuotationMark( *pStartChar ) != eUNMATCHED ||
IsMiscPunctuation( *pStartChar ) != eUNMATCHED ||
IsEOSItem( *pStartChar ) != eUNMATCHED )
{
pEndOfItem--;
ulNumGroups = j + 1;
break;
}
else
{
hr = E_INVALIDARG;
}
}
else
{
while ( pEndOfItem != pStartChar )
{
if ( IsGroupEnding( *pEndOfItem ) != eUNMATCHED ||
IsQuotationMark( *pEndOfItem ) != eUNMATCHED ||
IsMiscPunctuation( *pEndOfItem ) != eUNMATCHED ||
IsEOSItem( *pEndOfItem ) != eUNMATCHED )
{
pEndOfItem--;
}
else
{
break;
}
}
if ( pEndOfItem == pStartChar )
{
ulNumGroups = j + 1;
break;
}
else
{
hr = E_INVALIDARG;
break;
}
}
}
//--- Matched something like 206.709.8286.1 - definitely bad
else if ( pDelimiter )
{
hr = E_INVALIDARG;
}
//--- Matched somethinge like 206 709 8286 1 - could be OK
else
{
if ( pTempEndChar )
{
pEndChar = pTempEndChar;
pFrag = pTempFrag;
}
ulNumGroups = j;
break;
}
}
//--- Didn't hit either break statement
if ( !ulNumGroups )
{
ulNumGroups = j;
}
}
//--- Check for appropriate formats
if ( SUCCEEDED( hr ) )
{
//--- Check for [1<sep>]?(ddd?)<sep>?ddd<sep>dddd? OR ddd<sep>dddd?
if ( !pCountryCode &&
ulNumGroups == 2 &&
ulGroupLen[0] == 3 &&
ulGroupLen[1] >= 3 &&
!( fMatchedOne && !pAreaCode ) )
{
if ( ( !Context ||
_wcsicmp( Context, L"phone_number" ) != 0 ) &&
!pCountryCode &&
!pAreaCode &&
!fMatchedOne &&
( pDelimiter ? (*pDelimiter == L'.') : 0 ) )
{
hr = E_INVALIDARG;
}
}
//--- Check for [1<sep>]?ddd?<sep>ddd<sep>dddd?
else if ( !pCountryCode &&
!pAreaCode &&
ulNumGroups == 3 &&
( ulGroupLen[0] == 2 ||
ulGroupLen[0] == 3 ) &&
ulGroupLen[1] == 3 &&
ulGroupLen[2] >= 3 )
{
pAreaCode = pGroups[0];
ulAreaCodeLen = ulGroupLen[0];
pAreaCodeState = pGroupStates[0];
PostAreaCodeList.AddTail( &PostGroupLists[0] );
pGroups[0] = pGroups[1];
ulGroupLen[0] = ulGroupLen[1];
pGroupStates[0] = pGroupStates[1];
PostGroupLists[0].RemoveAll();
PostGroupLists[0].AddTail( &PostGroupLists[1] );
pGroups[1] = pGroups[2];
ulGroupLen[1] = ulGroupLen[2];
pGroupStates[1] = pGroupStates[2];
PostGroupLists[1].RemoveAll();
PostGroupLists[2].RemoveAll();
ulNumGroups--;
}
//--- Check for (ddd?)<sep>?ddd?<sep>dd<sep>ddd?d?
else if ( !pCountryCode &&
!fMatchedOne &&
pAreaCode &&
ulNumGroups == 3 &&
( ulGroupLen[0] == 2 ||
ulGroupLen[0] == 3 ) &&
ulGroupLen[1] == 2 &&
ulGroupLen[2] >= 2 )
{
NULL;
}
//--- Check for +dd?d?<sep>ddd?<sep>ddd?<sep>ddd?d?<sep>ddd?d?
else if ( pCountryCode &&
!fMatchedOne &&
!pAreaCode &&
ulNumGroups == 4 &&
( ulGroupLen[0] == 2 ||
ulGroupLen[0] == 3 ) &&
( ulGroupLen[1] == 2 ||
ulGroupLen[1] == 3 ) &&
ulGroupLen[2] >= 2 &&
ulGroupLen[3] >= 2 )
{
pAreaCode = pGroups[0];
ulAreaCodeLen = ulGroupLen[0];
pAreaCodeState = pGroupStates[0];
PostAreaCodeList.AddTail( &PostGroupLists[0] );
pGroups[0] = pGroups[1];
ulGroupLen[0] = ulGroupLen[1];
pGroupStates[0] = pGroupStates[1];
PostGroupLists[0].RemoveAll();
PostGroupLists[0].AddTail( &PostGroupLists[1] );
pGroups[1] = pGroups[2];
ulGroupLen[1] = ulGroupLen[2];
pGroupStates[1] = pGroupStates[2];
PostGroupLists[1].RemoveAll();
PostGroupLists[1].AddTail( &PostGroupLists[2] );
pGroups[2] = pGroups[3];
ulGroupLen[2] = ulGroupLen[3];
pGroupStates[2] = pGroupStates[3];
PostGroupLists[2].RemoveAll();
PostGroupLists[3].RemoveAll();
ulNumGroups--;
}
//--- Check for +dd?d?<sep>ddd?<sep>ddd?<sep>ddd?d?
else if ( pCountryCode &&
!fMatchedOne &&
!pAreaCode &&
ulNumGroups == 3 &&
( ulGroupLen[0] == 2 ||
ulGroupLen[0] == 3 ) &&
( ulGroupLen[1] == 2 ||
ulGroupLen[1] == 3 ) &&
ulGroupLen[2] >= 2 )
{
pAreaCode = pGroups[0];
ulAreaCodeLen = ulGroupLen[0];
pAreaCodeState = pGroupStates[0];
PostAreaCodeList.AddTail( &PostGroupLists[0] );
pGroups[0] = pGroups[1];
ulGroupLen[0] = ulGroupLen[1];
pGroupStates[0] = pGroupStates[1];
PostGroupLists[0].RemoveAll();
PostGroupLists[0].AddTail( &PostGroupLists[1] );
pGroups[1] = pGroups[2];
ulGroupLen[1] = ulGroupLen[2];
pGroupStates[1] = pGroupStates[2];
PostGroupLists[1].RemoveAll();
PostGroupLists[2].RemoveAll();
ulNumGroups--;
}
//--- Check for +dd?d?<sep>(ddd?)<sep>?ddd?<sep>ddd?d?<sep>ddd?d?
else if ( pCountryCode &&
!fMatchedOne &&
pAreaCode &&
ulNumGroups == 3 &&
( ulGroupLen[0] == 2 ||
ulGroupLen[0] == 3 ) &&
ulGroupLen[1] >= 2 &&
ulGroupLen[2] >= 2 )
{
NULL;
}
//--- Check for +dd?d?<sep>(ddd?)<sep>?ddd?<sep>ddd?d?
else if ( pCountryCode &&
!fMatchedOne &&
pAreaCode &&
ulNumGroups == 2 &&
( ulGroupLen[0] == 2 ||
ulGroupLen[0] == 3 ) &&
ulGroupLen[1] >= 2 )
{
NULL;
}
else
{
hr = E_INVALIDARG;
}
}
//--- Fill in pItemNormInfo
if ( SUCCEEDED(hr) )
{
m_pEndOfCurrItem = pGroups[ulNumGroups-1] + ulGroupLen[ulNumGroups-1];
m_pEndChar = pEndChar;
m_pCurrFrag = pFrag;
pItemNormInfo = (TTSPhoneNumberItemInfo*) MemoryManager.GetMemory( sizeof(TTSPhoneNumberItemInfo),
&hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pItemNormInfo, sizeof(TTSPhoneNumberItemInfo) );
pItemNormInfo->Type = eNEWNUM_PHONENUMBER;
//--- Fill in fOne
if ( fMatchedOne )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->fOne = true;
}
//--- Fill in Country Code...
if ( pCountryCode )
{
TTSItemInfo* pCountryCodeInfo;
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
m_pNextChar = pCountryCode;
m_pEndOfCurrItem = pCountryCode + ulCountryCodeLen;
hr = IsNumber( pCountryCodeInfo, L"NUMBER", MemoryManager, false );
if ( SUCCEEDED( hr ) )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pCountryCode = (TTSNumberItemInfo*) pCountryCodeInfo;
}
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pTempEndOfItem;
}
//--- Fill in Area Code...
if ( SUCCEEDED( hr ) &&
pAreaCode )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode =
(TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode->ulNumDigits = ulAreaCodeLen;
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode->pFirstDigit = pAreaCode;
}
}
//--- Fill in Main Number...
if ( SUCCEEDED( hr ) )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ulNumGroups = ulNumGroups;
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups =
(TTSDigitsItemInfo**) MemoryManager.GetMemory( ulNumGroups * sizeof(TTSDigitsItemInfo*), &hr );
for ( ULONG j = 0; SUCCEEDED( hr ) && j < ulNumGroups; j++ )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j] =
(TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j]->ulNumDigits = ulGroupLen[j];
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j]->pFirstDigit = pGroups[j];
}
}
}
}
}
//--- Expand Phone Number
if ( SUCCEEDED( hr ) )
{
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.eWordPartOfSpeech = MS_Unknown;
SPLISTPOS ListPos;
if ( pCountryCode )
{
//--- Insert "country"
Word.pXmlState = pCountryCodeState;
Word.pWordText = g_Country.pStr;
Word.ulWordLen = g_Country.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
//--- Insert "code"
Word.pWordText = g_Code.pStr;
Word.ulWordLen = g_Code.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
ListPos = WordList.GetTailPosition();
//--- Expand Country Code
ExpandNumber( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pCountryCode, WordList );
//--- Clean up digits XML states...
WordList.GetNext( ListPos );
while ( ListPos )
{
TTSWord& TempWord = WordList.GetNext( ListPos );
TempWord.pXmlState = pCountryCodeState;
}
//--- Insert Post Symbol XML States
while ( !PostCountryCodeList.IsEmpty() )
{
WordList.AddTail( ( PostCountryCodeList.RemoveHead() ).Words[0] );
}
}
if ( fMatchedOne )
{
//--- Insert "one"
Word.pXmlState = pOneState;
Word.pWordText = g_ones[1].pStr;
Word.ulWordLen = g_ones[1].Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
//--- Insert PostOne XML States
while ( !PostOneList.IsEmpty() )
{
WordList.AddTail( ( PostOneList.RemoveHead() ).Words[0] );
}
}
if ( pAreaCode )
{
//--- Expand digits - 800 and 900 get expanded as one number, otherwise digit by digit
if ( ( pAreaCode[0] == L'8' ||
pAreaCode[0] == L'9' ) &&
pAreaCode[1] == L'0' &&
pAreaCode[2] == L'0' )
{
( (TTSPhoneNumberItemInfo*) pItemNormInfo )->fIs800 = true;
NumberGroup Garbage;
ListPos = WordList.GetTailPosition();
ExpandThreeDigits( pAreaCode, Garbage, WordList );
//--- Clean up digits XML states...
//--- List was possibly empty prior to inserting "eight hundred" or "nine hundred"...
if ( !ListPos )
{
ListPos = WordList.GetHeadPosition();
}
WordList.GetNext( ListPos );
while ( ListPos )
{
TTSWord& TempWord = WordList.GetNext( ListPos );
TempWord.pXmlState = pAreaCodeState;
}
}
else
{
//--- Insert "area"
Word.pXmlState = pAreaCodeState;
Word.pWordText = g_Area.pStr;
Word.ulWordLen = g_Area.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
//--- Insert "code"
Word.pWordText = g_Code.pStr;
Word.ulWordLen = g_Code.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
ListPos = WordList.GetTailPosition();
ExpandDigits( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode, WordList );
//--- Clean up digits XML states...
WordList.GetNext( ListPos );
while ( ListPos )
{
TTSWord& TempWord = WordList.GetNext( ListPos );
TempWord.pXmlState = pAreaCodeState;
}
}
//--- Insert PostAreaCode XML States
while ( !PostAreaCodeList.IsEmpty() )
{
WordList.AddTail( ( PostAreaCodeList.RemoveHead() ).Words[0] );
}
}
for ( ULONG j = 0; j < ulNumGroups; j++ )
{
ListPos = WordList.GetTailPosition();
ExpandDigits( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j], WordList );
//--- Clean up digits XML states...
//--- List was possibly empty prior to inserting "eight hundred" or "nine hundred"...
if ( !ListPos )
{
ListPos = WordList.GetHeadPosition();
}
WordList.GetNext( ListPos );
while ( ListPos )
{
TTSWord& TempWord = WordList.GetNext( ListPos );
TempWord.pXmlState = pGroupStates[j];
}
//--- Insert Post Group XML States
while ( !PostGroupLists[j].IsEmpty() )
{
WordList.AddTail( ( PostGroupLists[j].RemoveHead() ).Words[0] );
}
}
}
return hr;
} /* IsPhoneNumber */
/***********************************************************************************************
* IsZipCode *
*-----------*
* Description:
* Checks the incoming Item's text to determine whether or not it
* is a zipcode.
*
* RegExp:
* ddddd{-dddd}?
*
* Types assigned:
* NUM_ZIPCODE
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsZipCode( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
CSentItemMemory& MemoryManager )
{
SPDBG_FUNC( "CStdSentEnum::IsZipCode" );
HRESULT hr = S_OK;
ULONG ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
BOOL fLastFour = false;
//--- length must be 5 or 10
if ( ulTokenLen != 5 &&
ulTokenLen != 10 )
{
hr = E_INVALIDARG;
}
else
{
//--- match 5 digits
for ( ULONG i = 0; i < 5; i++ )
{
if ( !iswdigit( m_pNextChar[i] ) )
{
hr = E_INVALIDARG;
break;
}
}
if ( SUCCEEDED(hr) &&
i < ulTokenLen )
{
//--- match dash
if ( m_pNextChar[i] != L'-' )
{
hr = E_INVALIDARG;
}
else
{
//--- match 4 digits
for ( i = 0; i < 4; i++ )
{
if ( !iswdigit( m_pNextChar[i] ) )
{
hr = E_INVALIDARG;
break;
}
}
fLastFour = true;
}
}
}
if (SUCCEEDED(hr))
{
pItemNormInfo = (TTSZipCodeItemInfo*) MemoryManager.GetMemory( sizeof(TTSZipCodeItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( pItemNormInfo, sizeof(TTSZipCodeItemInfo) );
pItemNormInfo->Type = eNUM_ZIPCODE;
( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive =
(TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof(TTSDigitsItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive->ulNumDigits = 5;
( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive->pFirstDigit = m_pNextChar;
if ( fLastFour )
{
( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour =
(TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof(TTSDigitsItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour->ulNumDigits = 4;
( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour->pFirstDigit = m_pNextChar + 6;
}
}
}
}
}
return hr;
} /* IsZipCode */
/***********************************************************************************************
* ExpandZipCode *
*---------------*
* Description:
* Expands Items previously determined to be of type NUM_ZIPCODE by IsZipCode.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandZipCode( TTSZipCodeItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandZipCode" );
HRESULT hr = S_OK;
ExpandDigits( pItemInfo->pFirstFive, WordList );
if ( pItemInfo->pLastFour )
{
//--- Insert "dash"
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_dash.pStr;
Word.ulWordLen = g_dash.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
ExpandDigits( pItemInfo->pLastFour, WordList );
}
return hr;
} /* ExpandZipCode */
/***********************************************************************************************
* IsNumberRange *
*---------------*
* Description:
* Checks the incoming Item's text to determine whether or not it
* is a number range.
*
* RegExp:
* [Number]-[Number]
*
* Types assigned:
* NUM_RANGE
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsNumberRange( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager )
{
SPDBG_FUNC( "CStdSentEnum::IsNumberRange" );
HRESULT hr = S_OK;
TTSItemInfo *pFirstNumberInfo = NULL, *pSecondNumberInfo = NULL;
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
const WCHAR *pHyphen = NULL;
for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ )
{
if ( *pHyphen == L'-' )
{
break;
}
}
if ( *pHyphen == L'-' &&
pHyphen > m_pNextChar &&
pHyphen < m_pEndOfCurrItem - 1 )
{
m_pEndOfCurrItem = pHyphen;
hr = IsNumber( pFirstNumberInfo, NULL, MemoryManager );
if ( SUCCEEDED( hr ) )
{
m_pNextChar = pHyphen + 1;
m_pEndOfCurrItem = pTempEndOfItem;
hr = IsNumberCategory( pSecondNumberInfo, NULL, MemoryManager );
if ( SUCCEEDED( hr ) )
{
//--- Matched a number range!
pItemNormInfo =
(TTSNumberRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberRangeItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
pItemNormInfo->Type = eNUM_RANGE;
( (TTSNumberRangeItemInfo*) pItemNormInfo )->pFirstNumberInfo = pFirstNumberInfo;
( (TTSNumberRangeItemInfo*) pItemNormInfo )->pSecondNumberInfo = pSecondNumberInfo;
}
}
else if ( pFirstNumberInfo->Type != eDATE_YEAR )
{
delete ( (TTSNumberItemInfo*) pFirstNumberInfo )->pWordList;
}
}
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pTempEndOfItem;
}
else
{
hr = E_INVALIDARG;
}
return hr;
} /* IsNumberRange */
/***********************************************************************************************
* ExpandNumberRange *
*-------------------*
* Description:
* Expands Items previously determined to be of type NUM_RANGE by IsNumberRange.
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::ExpandNumberRange( TTSNumberRangeItemInfo* pItemInfo, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::ExpandNumberRange" );
HRESULT hr = S_OK;
//--- Expand first number (or year)...
switch( pItemInfo->pFirstNumberInfo->Type )
{
case eDATE_YEAR:
hr = ExpandYear( (TTSYearItemInfo*) pItemInfo->pFirstNumberInfo, WordList );
break;
default:
hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pFirstNumberInfo, WordList );
break;
}
//--- Insert "to"
if ( SUCCEEDED( hr ) )
{
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_to.pStr;
Word.ulWordLen = g_to.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
WordList.AddTail( Word );
}
//--- Expand second number (or year)...
if ( SUCCEEDED( hr ) )
{
switch( pItemInfo->pSecondNumberInfo->Type )
{
case eDATE_YEAR:
hr = ExpandYear( (TTSYearItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
break;
case eNUM_PERCENT:
hr = ExpandPercent( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
break;
case eNUM_DEGREES:
hr = ExpandDegrees( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
break;
case eNUM_SQUARED:
hr = ExpandSquare( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
break;
case eNUM_CUBED:
hr = ExpandCube( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
break;
default:
hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
break;
}
}
return hr;
} /* ExpandNumberRange */
/***********************************************************************************************
* IsCurrencyRange *
*-------------------*
* Description:
* Expands Items determined to be of type CURRENCY_RANGE
*
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
HRESULT CStdSentEnum::IsCurrencyRange( TTSItemInfo*& pItemInfo, CSentItemMemory& MemoryManager, CWordList& WordList )
{
SPDBG_FUNC( "CStdSentEnum::IsCurrencyRange" );
HRESULT hr = S_OK;
TTSItemInfo *pFirstNumberInfo = NULL, *pSecondNumberInfo = NULL;
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem, *pTempEndChar = m_pEndChar;
const WCHAR *pHyphen = NULL;
CWordList TempWordList;
NORM_POSITION ePosition = UNATTACHED; //for currency sign checking
int iSymbolIndex, iTempSymbolIndex = -1;
WCHAR *tempNumberString;
iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
if(iSymbolIndex < 0)
{
hr = E_INVALIDARG;
}
else
{
for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ )
{
if ( *pHyphen == L'-' )
{
break;
}
}
if ( !( *pHyphen == L'-' &&
pHyphen > m_pNextChar &&
pHyphen < m_pEndOfCurrItem - 1 ) )
{
hr = E_INVALIDARG;
}
else
{
*( (WCHAR*)pHyphen) = L' '; // Token must break at hyphen, or IsCurrency() will not work
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pHyphen;
NORM_POSITION temp = UNATTACHED;
iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, temp );
if( iTempSymbolIndex >= 0 && iSymbolIndex != iTempSymbolIndex )
{
hr = E_INVALIDARG;
}
else //--- Get both NumberInfos
{
hr = IsNumber( pFirstNumberInfo, L"NUMBER", MemoryManager, false );
if( SUCCEEDED ( hr ) )
{
m_pNextChar = pHyphen + 1;
m_pEndOfCurrItem = pTempEndOfItem;
iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, temp );
hr = IsNumber( pSecondNumberInfo, L"NUMBER", MemoryManager, false );
}
}
if( SUCCEEDED ( hr ) )
{
//--- If both currency values are cardinal numbers, then the first number can be
//--- expanded without saying its currency ("$10-12" -> "ten to twelve dollars")
if( pFirstNumberInfo->Type == eNUM_CARDINAL && pSecondNumberInfo->Type == eNUM_CARDINAL )
{
ExpandNumber( (TTSNumberItemInfo*) pFirstNumberInfo, TempWordList );
}
else // one or both values are non-cardinal numbers, so we must
{ // expand the first value as a full currency.
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pHyphen;
if( ePosition == FOLLOWING )
{
if( iTempSymbolIndex < 0 ) // No symbol on first number item - need to fill a buffer
{ // with currency symbol and value to pass to IsCurrency().
ULONG ulNumChars = (long)(m_pEndOfCurrItem - m_pNextChar + g_CurrencySigns[iSymbolIndex].Sign.Len + 1);
tempNumberString = (WCHAR*) MemoryManager.GetMemory( (ulNumChars) * sizeof(WCHAR), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( tempNumberString, ( ulNumChars ) * sizeof( WCHAR ) );
wcsncpy( tempNumberString, m_pNextChar, m_pEndOfCurrItem - m_pNextChar );
wcscat( tempNumberString, g_CurrencySigns[iSymbolIndex].Sign.pStr );
m_pNextChar = tempNumberString;
m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString );
m_pEndChar = m_pEndOfCurrItem;
}
}
else if( iTempSymbolIndex != iSymbolIndex ) // mismatched symbols
{
hr = E_INVALIDARG;
}
}
if ( SUCCEEDED ( hr ) )
{
hr = IsCurrency( pFirstNumberInfo, MemoryManager, TempWordList );
m_pEndChar = pTempEndChar;
}
}
}
if ( SUCCEEDED ( hr ) )
{
TTSWord Word;
ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State;
Word.eWordPartOfSpeech = MS_Unknown;
Word.pWordText = g_to.pStr;
Word.ulWordLen = g_to.Len;
Word.pLemma = Word.pWordText;
Word.ulLemmaLen = Word.ulWordLen;
TempWordList.AddTail( Word );
m_pNextChar = pHyphen + 1;
m_pEndOfCurrItem = pTempEndOfItem;
if( ePosition == PRECEDING )
{
iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
if( iTempSymbolIndex < 0 ) // No symbol on second number item
{ // create temporary string from first currency sign and second number item
ULONG ulNumChars = (long)(m_pEndOfCurrItem - m_pNextChar + g_CurrencySigns[iSymbolIndex].Sign.Len + 1);
tempNumberString = (WCHAR*) MemoryManager.GetMemory( (ulNumChars) * sizeof(WCHAR), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( tempNumberString, ( ulNumChars ) * sizeof( WCHAR ) );
wcsncpy( tempNumberString, g_CurrencySigns[iSymbolIndex].Sign.pStr, g_CurrencySigns[iSymbolIndex].Sign.Len );
wcsncpy( tempNumberString+g_CurrencySigns[iSymbolIndex].Sign.Len, m_pNextChar, m_pEndOfCurrItem - m_pNextChar );
m_pNextChar = tempNumberString;
m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString );
m_pEndChar = m_pEndOfCurrItem;
}
}
else if( iTempSymbolIndex == iSymbolIndex ) // matched leading symbol on second number item
{
m_pNextChar = pHyphen + 1;
m_pEndOfCurrItem = pTempEndOfItem;
}
else // mismatched symbol
{
hr = E_INVALIDARG;
}
}
if( SUCCEEDED(hr) )
{
hr = IsCurrency( pSecondNumberInfo, MemoryManager, TempWordList );
if ( SUCCEEDED( hr ) )
{
//--- Matched a currency range!
pItemInfo =
(TTSNumberRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberRangeItemInfo ), &hr );
if ( SUCCEEDED( hr ) )
{
pItemInfo->Type = eNUM_CURRENCYRANGE;
( (TTSNumberRangeItemInfo*) pItemInfo )->pFirstNumberInfo = pFirstNumberInfo;
( (TTSNumberRangeItemInfo*) pItemInfo )->pSecondNumberInfo = pSecondNumberInfo;
//--- Copy temp word list to real word list if everything has succeeded...
WordList.AddTail( &TempWordList );
}
}
}
}
*( (WCHAR*)pHyphen) = L'-';
}
}
//Reset member variables regardless of failure or success
m_pNextChar = pTempNextChar;
m_pEndOfCurrItem = pTempEndOfItem;
m_pEndChar = pTempEndChar;
return hr;
} /* IsCurrencyRange */
/***********************************************************************************************
* MatchCurrencySign *
*-------------------*
* Description:
* Helper function which tries to match a currency sign at the beginning of a string.
********************************************************************* AH **********************/
int MatchCurrencySign( const WCHAR*& pStartChar, const WCHAR*& pEndChar, NORM_POSITION& ePosition )
{
int Index = -1;
for (int i = 0; i < sp_countof(g_CurrencySigns); i++)
{
if ( pEndChar - pStartChar >= g_CurrencySigns[i].Sign.Len &&
wcsnicmp( pStartChar, g_CurrencySigns[i].Sign.pStr, g_CurrencySigns[i].Sign.Len ) == 0 )
{
Index = i;
pStartChar += g_CurrencySigns[i].Sign.Len;
ePosition = PRECEDING;
break;
}
}
if ( Index == -1 )
{
for ( int i = 0; i < sp_countof(g_CurrencySigns); i++ )
{
if ( pEndChar - pStartChar >= g_CurrencySigns[i].Sign.Len &&
wcsnicmp( pEndChar - g_CurrencySigns[i].Sign.Len, g_CurrencySigns[i].Sign.pStr, g_CurrencySigns[i].Sign.Len ) == 0 )
{
Index = i;
pEndChar -= g_CurrencySigns[i].Sign.Len;
ePosition = FOLLOWING;
break;
}
}
}
return Index;
} /* MatchCurrencySign */
/***********************************************************************************************
* Zeroes *
*--------*
* Description:
* A helper function which simply determines if a number string contains only zeroes...
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
bool CStdSentEnum::Zeroes(const WCHAR *NumberString)
{
bool bAllZeroes = true;
for (ULONG i = 0; i < wcslen(NumberString); i++)
{
if (NumberString[i] != '0' && isdigit(NumberString[i]) )
{
bAllZeroes = false;
break;
}
else if ( !isdigit( NumberString[i] ) && NumberString[i] != ',' )
{
break;
}
}
return bAllZeroes;
} /* Zeroes */
/***********************************************************************************************
* ThreeZeroes *
*-------------*
* Description:
* A helper function which simply determines if a number string contains three zeroes...
* Note: This function does not do parameter validation. Assumed to be done by caller.
********************************************************************* AH **********************/
bool CStdSentEnum::ThreeZeroes(const WCHAR *NumberString)
{
bool bThreeZeroes = true;
for (ULONG i = 0; i < 3; i++)
{
if (NumberString[i] != '0' && isdigit(NumberString[i]))
{
bThreeZeroes = false;
break;
}
}
return bThreeZeroes;
} /* ThreeZeroes */
//-----------End Of File-------------------------------------------------------------------