/*********************************************************************************************** * NumNorm.cpp * *-------------* * Description: * These functions normalize ordinary ordinal and cardinal numbers *----------------------------------------------------------------------------------------------- * Created by AH August 3, 1999 * Copyright (C) 1999 Microsoft Corporation * All Rights Reserved * ***********************************************************************************************/ #include "stdafx.h" #ifndef StdSentEnum_h #include "stdsentenum.h" #endif /*********************************************************************************************** * IsNumberCategory * *------------------* * Description: * Checks the next token in the text stream to determine if it is a number category - * percents, degrees, squared and cubed numbers, and plain old numbers get matched here. * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsNumberCategory( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager ) { HRESULT hr = S_OK; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfItem = m_pEndOfCurrItem; const SPVTEXTFRAG *pTempFrag = m_pCurrFrag; TTSItemInfo *pNumberInfo = NULL; hr = IsNumber( pNumberInfo, Context, MemoryManager ); if ( SUCCEEDED( hr ) && pNumberInfo->Type != eDATE_YEAR && ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar == m_pEndOfCurrItem - 1 ) { if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'%' ) { pItemNormInfo = pNumberInfo; pItemNormInfo->Type = eNUM_PERCENT; } else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'°' ) { pItemNormInfo = pNumberInfo; pItemNormInfo->Type = eNUM_DEGREES; } else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'²' ) { pItemNormInfo = pNumberInfo; pItemNormInfo->Type = eNUM_SQUARED; } else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'³' ) { pItemNormInfo = pNumberInfo; pItemNormInfo->Type = eNUM_CUBED; } else { hr = E_INVALIDARG; delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList; } } else if ( SUCCEEDED( hr ) && ( pNumberInfo->Type == eDATE_YEAR || ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar == m_pEndOfCurrItem ) ) { pItemNormInfo = pNumberInfo; } else if ( SUCCEEDED( hr ) ) { hr = E_INVALIDARG; if ( pNumberInfo->Type != eDATE_YEAR ) { delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList; } m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; } return hr; } /* IsNumberCategory */ /*********************************************************************************************** * IsNumber * *----------* * Description: * Checks the next token in the text stream to determine if it is a number. * * RegExp: * [-]? { d+ || d(1-3)[,ddd]+ } { { .d+ } || { "st" || "nd" || "rd" || "th" } }? * It is actually a bit more complicated than this - for instance, the ordinal * strings may only follow certain digits (1st, 2nd, 3rd, 4-0th)... * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsNumber( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager, BOOL fMultiItem ) { SPDBG_FUNC( "CStdSentEnum::IsNumber" ); HRESULT hr = S_OK; bool fNegative = false; TTSIntegerItemInfo* pIntegerInfo = NULL; TTSDigitsItemInfo* pDecimalInfo = NULL; TTSFractionItemInfo* pFractionInfo = NULL; const SPVSTATE *pIntegerState = &m_pCurrFrag->State; CItemList PostIntegerList; ULONG ulOffset = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar); WCHAR wcDecimalPoint; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfItem = m_pEndOfCurrItem; const SPVTEXTFRAG *pTempFrag = m_pCurrFrag; if ( ulTokenLen ) { //--- Set Separator and Decimal Point character preferences for this call if ( m_eSeparatorAndDecimal == COMMA_PERIOD ) { wcDecimalPoint = L'.'; } else { wcDecimalPoint = L','; } //--- Try to match the negative sign - [-]? if ( m_pNextChar[ulOffset] == L'-' ) { fNegative = true; ulOffset++; } //--- Try to match the integral part hr = IsInteger( m_pNextChar + ulOffset, pIntegerInfo, MemoryManager ); //--- Adjust ulOffset and hr... if ( SUCCEEDED( hr ) ) { ulOffset += (ULONG)(pIntegerInfo->pEndChar - pIntegerInfo->pStartChar); } else if ( hr == E_INVALIDARG ) { hr = S_OK; pIntegerInfo = NULL; } //--- Try to match a decimal part if ( ulOffset < ulTokenLen && m_pNextChar[ulOffset] == wcDecimalPoint ) { hr = IsDigitString( m_pNextChar + ulOffset + 1, pDecimalInfo, MemoryManager ); if ( SUCCEEDED( hr ) ) { ulOffset += pDecimalInfo->ulNumDigits + 1; //--- Check for special case - decimal number numerator... if ( ulOffset < ulTokenLen && m_pNextChar[ulOffset] == L'/' ) { pIntegerInfo = NULL; pDecimalInfo = NULL; fNegative ? ulOffset = 1 : ulOffset = 0; hr = IsFraction( m_pNextChar + ulOffset, pFractionInfo, MemoryManager ); if ( SUCCEEDED( hr ) ) { if ( pFractionInfo->pVulgar ) { ulOffset++; } else { ulOffset += (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar); } } else if ( hr == E_INVALIDARG ) { hr = S_OK; } } } else if ( hr == E_INVALIDARG ) { hr = S_OK; pDecimalInfo = NULL; } } //--- Try to match an ordinal string else if ( pIntegerInfo && ulOffset < ulTokenLen && isalpha( m_pNextChar[ulOffset] ) ) { switch ( toupper( m_pNextChar[ulOffset] ) ) { case 'S': //--- Must be of the form "...1st" but not "...11st" if ( toupper( m_pNextChar[ulOffset+1] ) == L'T' && m_pNextChar[ulOffset-1] == L'1' && (ulOffset + 2) == ulTokenLen && ( ulOffset == 1 || m_pNextChar[ulOffset-2] != L'1' ) ) { ulOffset += 2; pIntegerInfo->fOrdinal = true; } break; case 'N': //--- Must be of the form "...2nd" but not "...12nd" if ( (ulOffset + 2) == ulTokenLen && toupper(m_pNextChar[ulOffset+1]) == L'D' && m_pNextChar[ulOffset-1] == L'2' && ( ulOffset == 1 || m_pNextChar[ulOffset-2] != L'1' ) ) { ulOffset += 2; pIntegerInfo->fOrdinal = true; } break; case 'R': //--- Must be of the form "...3rd" but not "...13rd" if ( (ulOffset + 2) == ulTokenLen && toupper(m_pNextChar[ulOffset+1]) == L'D' && m_pNextChar[ulOffset-1] == L'3' && ( ulOffset == 1 || m_pNextChar[ulOffset-2] != L'1' ) ) { ulOffset += 2; pIntegerInfo->fOrdinal = true; } break; case 'T': //--- Must be of the form "...[4-9]th" or "...[11-19]th" or "...[0]th" if ( (ulOffset + 2) == ulTokenLen && toupper(m_pNextChar[ulOffset+1]) == L'H' && ( ( m_pNextChar[ulOffset-1] <= L'9' && m_pNextChar[ulOffset-1] >= L'4') || ( m_pNextChar[ulOffset-1] == L'0') || ( ulOffset == 1 || m_pNextChar[ulOffset-2] == L'1') ) ) { ulOffset += 2; pIntegerInfo->fOrdinal = true; } break; default: // Some invalid non-digit character found at the end of the string break; } } //--- Try to match a fraction else { //--- Try to match an attached fraction if ( ulOffset < ulTokenLen ) { if ( m_pNextChar[ulOffset] == L'-' ) { ulOffset++; } hr = IsFraction( m_pNextChar + ulOffset, pFractionInfo, MemoryManager ); if ( SUCCEEDED( hr ) ) { if ( pFractionInfo->pVulgar ) { ulOffset++; } else { ulOffset += (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar); } } else if ( hr == E_INVALIDARG ) { hr = S_OK; } } //--- Try to match an unattached fraction else if ( fMultiItem ) { pIntegerState = &m_pCurrFrag->State; //--- Advance in text m_pNextChar = m_pEndOfCurrItem; hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostIntegerList ); if ( !m_pNextChar && SUCCEEDED( hr ) ) { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pCurrFrag = pTempFrag; } else if ( m_pNextChar && SUCCEEDED( hr ) ) { m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar ); while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ) { m_pEndOfCurrItem--; } hr = IsFraction( m_pNextChar, pFractionInfo, MemoryManager ); if ( FAILED( hr ) ) { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; if ( hr == E_INVALIDARG ) { hr = S_OK; } } else { ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar); if ( pFractionInfo->pVulgar ) { ulOffset = 1; } else { ulOffset = (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar); } } } } } } else { hr = E_INVALIDARG; } //--- If we haven't processed the whole item yet, and it isn't part of a larger item -- //--- e.g. a percent, a degrees number, or a square or cube -- then fail to match it //--- as a number... if ( ulOffset != ulTokenLen && !( ulTokenLen == ulOffset + 1 && ( m_pNextChar[ulOffset] == L'%' || m_pNextChar[ulOffset] == L'°' || m_pNextChar[ulOffset] == L'²' || m_pNextChar[ulOffset] == L'³' ) ) ) { m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem; m_pEndChar = pTempEndChar; m_pCurrFrag = pTempFrag; hr = E_INVALIDARG; } //--- Fill out pItemNormInfo... if ( SUCCEEDED( hr ) && ( pIntegerInfo || pDecimalInfo || pFractionInfo ) ) { //--- Reset m_pNextChar to handle the Mixed Fraction case... m_pNextChar = pTempNextChar; if ( pIntegerInfo && pIntegerInfo->pEndChar - pIntegerInfo->pStartChar == 4 && !pIntegerInfo->fSeparators && !pIntegerInfo->fOrdinal && !pDecimalInfo && !pFractionInfo && !fNegative && ulOffset == ulTokenLen && ( !Context || _wcsnicmp( Context, L"NUMBER", 6 ) != 0 ) ) { pItemNormInfo = (TTSYearItemInfo*) MemoryManager.GetMemory( sizeof( TTSYearItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { pItemNormInfo->Type = eDATE_YEAR; ( (TTSYearItemInfo*) pItemNormInfo )->pYear = m_pNextChar; ( (TTSYearItemInfo*) pItemNormInfo )->ulNumDigits = 4; } } else { pItemNormInfo = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pItemNormInfo, sizeof( TTSNumberItemInfo ) ); if ( pDecimalInfo ) { pItemNormInfo->Type = eNUM_DECIMAL; if ( pIntegerInfo ) { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar + pDecimalInfo->ulNumDigits + 1; } else { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = m_pNextChar + pDecimalInfo->ulNumDigits + 1; if ( fNegative ) { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar++; } } } else if ( pFractionInfo ) { if ( pFractionInfo->pVulgar ) { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pFractionInfo->pVulgar + 1; } else { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pFractionInfo->pDenominator->pEndChar; } if ( pIntegerInfo ) { pItemNormInfo->Type = eNUM_MIXEDFRACTION; } else { pItemNormInfo->Type = eNUM_FRACTION; } } else if ( pIntegerInfo ) { if ( pIntegerInfo->fOrdinal ) { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar + 2; pItemNormInfo->Type = eNUM_ORDINAL; } else { ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar; pItemNormInfo->Type = eNUM_CARDINAL; } } } if ( SUCCEEDED( hr ) ) { ( (TTSNumberItemInfo*) pItemNormInfo )->fNegative = fNegative; ( (TTSNumberItemInfo*) pItemNormInfo )->pIntegerPart = pIntegerInfo; ( (TTSNumberItemInfo*) pItemNormInfo )->pDecimalPart = pDecimalInfo; ( (TTSNumberItemInfo*) pItemNormInfo )->pFractionalPart = pFractionInfo; ( (TTSNumberItemInfo*) pItemNormInfo )->pStartChar = m_pNextChar; ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList = new CWordList; } } } else { hr = E_INVALIDARG; } //--- Expand Number into WordList if ( SUCCEEDED( hr ) && pItemNormInfo->Type != eDATE_YEAR ) { TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = pIntegerState; Word.eWordPartOfSpeech = MS_Unknown; //--- Insert "negative" if ( fNegative ) { Word.pWordText = g_negative.pStr; Word.ulWordLen = g_negative.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word ); } //--- Expand Integral Part if ( pIntegerInfo ) { ExpandInteger( pIntegerInfo, Context, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList ); } //--- Expand Decimal Part if ( pDecimalInfo ) { //--- Insert "point" Word.pWordText = g_decimalpoint.pStr; Word.ulWordLen = g_decimalpoint.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word ); ExpandDigits( pDecimalInfo, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList ); } //--- Expand Fractional Part if ( pFractionInfo ) { //--- Insert Post-Integer Non-Spoken XML States, if any while ( !PostIntegerList.IsEmpty() ) { ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( ( PostIntegerList.RemoveHead() ).Words[0] ); } //--- Insert "and", if also an integer part if ( pIntegerInfo ) { Word.pXmlState = &m_pCurrFrag->State; Word.pWordText = g_And.pStr; Word.ulWordLen = g_And.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word ); } hr = ExpandFraction( pFractionInfo, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList ); } } return hr; } /* IsNumber */ /*********************************************************************************************** * ExpandNumber * *--------------* * Description: * Expands Items previously determined to be of type NUM_CARDINAL, NUM_DECIMAL, or * NUM_ORDINAL by IsNumber. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandNumber( TTSNumberItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "NumNorm ExpandNumber" ); HRESULT hr = S_OK; WordList.AddTail( pItemInfo->pWordList ); delete pItemInfo->pWordList; return hr; } /* ExpandNumber */ /*********************************************************************************************** * ExpandPercent * *---------------* * Description: * Expands Items previously determined to be of type NUM_PERCENT by IsNumber. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandPercent( TTSNumberItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandPercent" ); HRESULT hr = S_OK; WordList.AddTail( pItemInfo->pWordList ); delete pItemInfo->pWordList; TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_percent.pStr; Word.ulWordLen = g_percent.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); return hr; } /* ExpandPercent */ /*********************************************************************************************** * ExpandDegree * *---------------* * Description: * Expands Items previously determined to be of type NUM_DEGREES by IsNumber. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandDegrees( TTSNumberItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandDegrees" ); HRESULT hr = S_OK; WordList.AddTail( pItemInfo->pWordList ); delete pItemInfo->pWordList; TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; if ( !pItemInfo->pDecimalPart && !pItemInfo->pFractionalPart && pItemInfo->pIntegerPart && pItemInfo->pIntegerPart->pEndChar - pItemInfo->pIntegerPart->pStartChar == 1 && pItemInfo->pIntegerPart->pStartChar[0] == L'1' ) { Word.pWordText = g_degree.pStr; Word.ulWordLen = g_degree.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; } else if ( !pItemInfo->pIntegerPart && pItemInfo->pFractionalPart && !pItemInfo->pFractionalPart->fIsStandard ) { Word.pWordText = g_of.pStr; Word.ulWordLen = g_of.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); Word.pWordText = g_a.pStr; Word.ulWordLen = g_a.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); Word.pWordText = g_degree.pStr; Word.ulWordLen = g_degree.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; } else { Word.pWordText = g_degrees.pStr; Word.ulWordLen = g_degrees.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; } WordList.AddTail( Word ); return hr; } /* ExpandDegrees */ /*********************************************************************************************** * ExpandSquare * *---------------* * Description: * Expands Items previously determined to be of type NUM_SQUARED by IsNumber. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandSquare( TTSNumberItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandSquare" ); HRESULT hr = S_OK; WordList.AddTail( pItemInfo->pWordList ); delete pItemInfo->pWordList; TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_squared.pStr; Word.ulWordLen = g_squared.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); return hr; } /* ExpandSquare */ /*********************************************************************************************** * ExpandCube * *---------------* * Description: * Expands Items previously determined to be of type NUM_CUBED by IsNumber. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandCube( TTSNumberItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandCube" ); HRESULT hr = S_OK; WordList.AddTail( pItemInfo->pWordList ); delete pItemInfo->pWordList; TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_cubed.pStr; Word.ulWordLen = g_cubed.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); return hr; } /* ExpandCube */ /*********************************************************************************************** * IsInteger * *-----------* * Description: * Helper for IsNumber which matches the integer part... * * RegExp: * { d+ || d(1-3)[,ddd]+ } * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsInteger( const WCHAR* pStartChar, TTSIntegerItemInfo*& pIntegerInfo, CSentItemMemory& MemoryManager ) { HRESULT hr = S_OK; ULONG ulOffset = 0, ulCount = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - pStartChar); BOOL fSeparators = false, fDone = false; WCHAR wcSeparator, wcDecimalPoint; if ( m_eSeparatorAndDecimal == COMMA_PERIOD ) { wcSeparator = L','; wcDecimalPoint = L'.'; } else { wcSeparator = L'.'; wcDecimalPoint = L','; } //--- Check for first digit if ( !isdigit(pStartChar[ulOffset]) ) { hr = E_INVALIDARG; } else { ulCount++; ulOffset++; } //--- Check for separators ULONG i = ulOffset + 3; while ( SUCCEEDED( hr ) && ulOffset < i && ulOffset < ulTokenLen ) { if ( pStartChar[ulOffset] == wcSeparator ) { //--- Found a separator fSeparators = true; break; } else if ( !isdigit( pStartChar[ulOffset] ) && ( pStartChar[ulOffset] == wcDecimalPoint || pStartChar[ulOffset] == L'%' || pStartChar[ulOffset] == L'°' || pStartChar[ulOffset] == L'²' || pStartChar[ulOffset] == L'³' || pStartChar[ulOffset] == L'-' || pStartChar[ulOffset] == L'¼' || pStartChar[ulOffset] == L'½' || pStartChar[ulOffset] == L'¾' || toupper( pStartChar[ulOffset] ) == L'S' || toupper( pStartChar[ulOffset] ) == L'N' || toupper( pStartChar[ulOffset] ) == L'R' || toupper( pStartChar[ulOffset] ) == L'T' ) ) { fDone = true; break; } else if ( isdigit( pStartChar[ulOffset] ) ) { //--- Just another digit ulCount++; ulOffset++; } else { hr = E_INVALIDARG; break; } } if ( SUCCEEDED( hr ) && !fDone && ulOffset < ulTokenLen ) { if ( !fSeparators ) { //--- No separators. Pattern must be {d+} if this is indeed a number, so just count digits. while ( isdigit( pStartChar[ulOffset] ) && ulOffset < ulTokenLen ) { ulCount++; ulOffset++; } if ( ulOffset != ulTokenLen && !( pStartChar[ulOffset] == wcDecimalPoint || pStartChar[ulOffset] == L'%' || pStartChar[ulOffset] == L'°' || pStartChar[ulOffset] == L'²' || pStartChar[ulOffset] == L'³' || pStartChar[ulOffset] == L'%' || pStartChar[ulOffset] == L'°' || pStartChar[ulOffset] == L'²' || pStartChar[ulOffset] == L'³' || pStartChar[ulOffset] == L'-' || pStartChar[ulOffset] == L'¼' || pStartChar[ulOffset] == L'½' || pStartChar[ulOffset] == L'¾' || toupper( pStartChar[ulOffset] ) == L'S' || toupper( pStartChar[ulOffset] ) == L'N' || toupper( pStartChar[ulOffset] ) == L'R' || toupper( pStartChar[ulOffset] ) == L'T' ) ) { hr = E_INVALIDARG; } } else { //--- Separators. Pattern must be { d(1-3)[,ddd]+ }, so make sure the separators match up while ( SUCCEEDED( hr ) && pStartChar[ulOffset] == wcSeparator && ( ulOffset + 3 ) < ulTokenLen) { ulOffset++; for ( i = ulOffset + 3; SUCCEEDED( hr ) && ulOffset < i; ulOffset++ ) { if ( isdigit( pStartChar[ulOffset] ) ) { ulCount++; } else // Some non-digit character found - abort! { hr = E_INVALIDARG; } } } if ( ulOffset != ulTokenLen && !( pStartChar[ulOffset] == wcDecimalPoint || pStartChar[ulOffset] == L'%' || pStartChar[ulOffset] == L'°' || pStartChar[ulOffset] == L'²' || pStartChar[ulOffset] == L'³' || pStartChar[ulOffset] == L'-' || pStartChar[ulOffset] == L'¼' || pStartChar[ulOffset] == L'½' || pStartChar[ulOffset] == L'¾' || toupper( pStartChar[ulOffset] ) == L'S' || toupper( pStartChar[ulOffset] ) == L'N' || toupper( pStartChar[ulOffset] ) == L'R' || toupper( pStartChar[ulOffset] ) == L'T' ) ) { hr = E_INVALIDARG; } } } if ( SUCCEEDED( hr ) ) { pIntegerInfo = (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pIntegerInfo, sizeof( TTSIntegerItemInfo ) ); pIntegerInfo->fSeparators = fSeparators; pIntegerInfo->lLeftOver = ulCount % 3; pIntegerInfo->lNumGroups = ( ulCount - 1 ) / 3; pIntegerInfo->pStartChar = pStartChar; pIntegerInfo->pEndChar = pStartChar + ulOffset; } } return hr; } /* IsInteger */ /*********************************************************************************************** * ExpandInteger * *---------------* * Description: * * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandInteger( TTSIntegerItemInfo* pItemInfo, const WCHAR* Context, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandInteger" ); //--- Local variable declarations and initialization BOOL bFinished = false; const WCHAR *pStartChar = pItemInfo->pStartChar, *pEndChar = pItemInfo->pEndChar; ULONG ulOffset = 0, ulTokenLen = (ULONG)(pEndChar - pStartChar), ulTemp = (ULONG)(pItemInfo->lNumGroups + 1); TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; //--- Out of range integer, or integer beginning with one or more zeroes... if ( pStartChar[0] == L'0' || ( Context && _wcsicmp( Context, L"NUMBER_DIGIT" ) == 0 ) || pItemInfo->lNumGroups >= sp_countof(g_quantifiers) ) { pItemInfo->fDigitByDigit = true; pItemInfo->ulNumDigits = 0; for ( ULONG i = 0; i < ulTokenLen; i++ ) { if ( isdigit( pStartChar[i] ) ) { ExpandDigit( pStartChar[i], pItemInfo->Groups[0], WordList ); pItemInfo->ulNumDigits++; } } } //--- Expanding a number < 1000 else if ( pItemInfo->lNumGroups == 0 ) { // 0th through 999th... if ( pItemInfo->fOrdinal ) { switch ( pItemInfo->lLeftOver ) { case 1: // 0th through 9th... ExpandDigitOrdinal( pStartChar[ulOffset], pItemInfo->Groups[0], WordList ); break; case 2: // 10th through 99th... ExpandTwoOrdinal( pStartChar + ulOffset, pItemInfo->Groups[0], WordList ); break; case 0: // 100th through 999th... ExpandThreeOrdinal( pStartChar + ulOffset, pItemInfo->Groups[0], WordList ); break; case -1: ulTemp = 0; pItemInfo->lLeftOver = 0; break; } } // 0 through 999... else { switch ( pItemInfo->lLeftOver ) { case 1: // 0 through 9... ExpandDigit( pStartChar[ulOffset], pItemInfo->Groups[0], WordList ); ulOffset += 1; break; case 2: // 10 through 99... ExpandTwoDigits( pStartChar + ulOffset, pItemInfo->Groups[0], WordList ); ulOffset += 2; break; case 0: // 100 through 999... ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[0], WordList ); ulOffset += 3; break; case -1: ulTemp = 0; pItemInfo->lLeftOver = 0; break; } } } else { //--- 1000 through highest number covered, e.g. 1,234,567 //--- Expand first grouping, e.g. 1 million //--- Expand digit group switch ( pItemInfo->lLeftOver ) { case 1: ExpandDigit( pStartChar[ulOffset], pItemInfo->Groups[pItemInfo->lNumGroups], WordList ); ulOffset += 1; break; case 2: ExpandTwoDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList ); ulOffset += 2; break; case 0: ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList ); ulOffset += 3; break; } //--- Special Case: rare ordinal cases - e.g. 1,000,000th if ( pItemInfo->fOrdinal && Zeroes(pStartChar + ulOffset) ) { //--- Insert ordinal quantifier pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true; Word.pWordText = g_quantifiersOrdinal[pItemInfo->lNumGroups].pStr; Word.ulWordLen = g_quantifiersOrdinal[pItemInfo->lNumGroups--].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); bFinished = true; } //--- Default Case else { //--- Insert quantifier pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true; Word.pWordText = g_quantifiers[pItemInfo->lNumGroups].pStr; Word.ulWordLen = g_quantifiers[pItemInfo->lNumGroups--].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- Expand rest of groupings which need to be followed by a quantifier while ( pItemInfo->lNumGroups > 0 && !bFinished ) { if ( pItemInfo->fSeparators ) { ulOffset++; } //--- Expand digit group ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList ); ulOffset += 3; //--- Special case: rare ordinal cases, e.g. 1,234,000th if ( pItemInfo->fOrdinal && Zeroes( pStartChar + ulOffset ) ) { //--- Insert ordinal quantifier pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true; Word.pWordText = g_quantifiersOrdinal[pItemInfo->lNumGroups].pStr; Word.ulWordLen = g_quantifiersOrdinal[pItemInfo->lNumGroups--].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); bFinished = true; } //--- Default Case else if ( !ThreeZeroes( pStartChar + ulOffset - 3 ) ) { //--- Insert quantifier pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true; Word.pWordText = g_quantifiers[pItemInfo->lNumGroups].pStr; Word.ulWordLen = g_quantifiers[pItemInfo->lNumGroups--].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- Special Case: this grouping is all zeroes, e.g. 1,000,567 else { pItemInfo->lNumGroups--; } } //--- Expand final grouping, which requires no quantifier if ( pItemInfo->fSeparators && !bFinished ) { ulOffset++; } if ( pItemInfo->fOrdinal && !bFinished ) { ExpandThreeOrdinal( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList ); ulOffset += 3; } else if ( !bFinished ) { ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList ); ulOffset += 3; } } pItemInfo->lNumGroups = (long) ulTemp; } /* ExpandInteger */ /*********************************************************************************************** * IsDigitString * *---------------* * Description: * Helper for IsNumber, IsPhoneNumber, etc. which matches a digit string... * * RegExp: * d+ * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsDigitString( const WCHAR* pStartChar, TTSDigitsItemInfo*& pDigitsInfo, CSentItemMemory& MemoryManager ) { HRESULT hr = S_OK; ULONG ulOffset = 0; while ( pStartChar + ulOffset < m_pEndOfCurrItem && isdigit( pStartChar[ulOffset] ) ) { ulOffset++; } if ( ulOffset ) { pDigitsInfo = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pDigitsInfo, sizeof( pDigitsInfo ) ); pDigitsInfo->pFirstDigit = pStartChar; pDigitsInfo->ulNumDigits = ulOffset; } } else { hr = E_INVALIDARG; } return hr; } /* IsDigitString */ /*********************************************************************************************** * ExpandDigits * *--------------* * Description: * Expands a string of digits, digit by digit. * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandDigits( TTSDigitsItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandDigits" ); for ( ULONG i = 0; i < pItemInfo->ulNumDigits; i++ ) { NumberGroup Garbage; ExpandDigit( pItemInfo->pFirstDigit[i], Garbage, WordList ); } } /* ExpandDigits */ /*********************************************************************************************** * IsFraction * *------------* * Description: * Helper for IsNumber which matches a fraction... * * RegExp: * { NUM_CARDINAL || NUM_DECIMAL } / { NUM_CARDINAL || NUM_DECIMAL } * ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsFraction( const WCHAR* pStartChar, TTSFractionItemInfo*& pFractionInfo, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::IsFraction" ); HRESULT hr = S_OK; ULONG ulTokenLen = (ULONG)(m_pEndOfCurrItem - pStartChar); if ( ulTokenLen ) { //--- Check for Vulgar Fraction if ( pStartChar[0] == L'¼' || pStartChar[0] == L'½' || pStartChar[0] == L'¾' ) { pFractionInfo = (TTSFractionItemInfo*) MemoryManager.GetMemory( sizeof( TTSFractionItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pFractionInfo, sizeof( TTSFractionItemInfo ) ); pFractionInfo->pVulgar = pStartChar; pFractionInfo->pNumerator = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pFractionInfo->pNumerator, sizeof( TTSNumberItemInfo ) ); pFractionInfo->pDenominator = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pFractionInfo->pDenominator, sizeof( TTSNumberItemInfo ) ); pFractionInfo->pNumerator->pIntegerPart = (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pFractionInfo->pNumerator->pIntegerPart, sizeof( TTSIntegerItemInfo ) ); pFractionInfo->pDenominator->pIntegerPart = (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pFractionInfo->pDenominator->pIntegerPart, sizeof( TTSIntegerItemInfo ) ); pFractionInfo->fIsStandard = false; pFractionInfo->pNumerator->pIntegerPart->lLeftOver = 1; pFractionInfo->pNumerator->pIntegerPart->lNumGroups = 1; pFractionInfo->pNumerator->pIntegerPart->Groups[0].fOnes = true; pFractionInfo->pDenominator->pIntegerPart->lLeftOver = 1; pFractionInfo->pDenominator->pIntegerPart->lNumGroups = 1; pFractionInfo->pDenominator->pIntegerPart->Groups[0].fOnes = true; } } } } } } //--- Check for multi-character fraction else { TTSItemInfo *pNumeratorInfo = NULL, *pDenominatorInfo = NULL; const WCHAR* pTempNextChar = m_pNextChar, *pTempEndOfCurrItem = m_pEndOfCurrItem; m_pNextChar = pStartChar; m_pEndOfCurrItem = wcschr( pStartChar, L'/' ); if ( !m_pEndOfCurrItem || m_pEndOfCurrItem >= pTempEndOfCurrItem ) { hr = E_INVALIDARG; } //--- Try to get numerator if ( SUCCEEDED( hr ) ) { hr = IsNumber( pNumeratorInfo, L"NUMBER", MemoryManager, false ); } if ( SUCCEEDED( hr ) && pNumeratorInfo->Type != eNUM_MIXEDFRACTION && pNumeratorInfo->Type != eNUM_FRACTION && pNumeratorInfo->Type != eNUM_ORDINAL ) { if ( ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart ) { m_pNextChar += ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart->pEndChar - ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart->pStartChar; } if ( ( (TTSNumberItemInfo*) pNumeratorInfo )->pDecimalPart ) { m_pNextChar += ( (TTSNumberItemInfo*) pNumeratorInfo )->pDecimalPart->ulNumDigits + 1; } } else if ( SUCCEEDED( hr ) ) { delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList; hr = E_INVALIDARG; } m_pEndOfCurrItem = pTempEndOfCurrItem; //--- Try to get denominator if ( SUCCEEDED( hr ) && m_pNextChar[0] == L'/' ) { m_pNextChar++; hr = IsNumber( pDenominatorInfo, L"NUMBER", MemoryManager, false ); if ( SUCCEEDED( hr ) && pDenominatorInfo->Type != eNUM_MIXEDFRACTION && pDenominatorInfo->Type != eNUM_FRACTION && pDenominatorInfo->Type != eNUM_ORDINAL ) { pFractionInfo = ( TTSFractionItemInfo*) MemoryManager.GetMemory( sizeof( TTSFractionItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pFractionInfo, sizeof( TTSFractionItemInfo ) ); pFractionInfo->pNumerator = (TTSNumberItemInfo*) pNumeratorInfo; pFractionInfo->pDenominator = (TTSNumberItemInfo*) pDenominatorInfo; pFractionInfo->pVulgar = NULL; pFractionInfo->fIsStandard = false; } } else if ( SUCCEEDED( hr ) ) { delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList; delete ( (TTSNumberItemInfo*) pDenominatorInfo )->pWordList; hr = E_INVALIDARG; } else { delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList; } } else if ( SUCCEEDED( hr ) ) { hr = E_INVALIDARG; delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList; } m_pNextChar = pTempNextChar; } } else { hr = E_INVALIDARG; } return hr; } /* IsFraction */ /*********************************************************************************************** * ExpandFraction * *----------------* * Description: * Expands Items previously determined to be of type NUM_FRACTION by IsFraction. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandFraction( TTSFractionItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandFraction" ); HRESULT hr = S_OK; TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; //--- Special case - vulgar fractions ( ¼, ½, ¾ ) if ( pItemInfo->pVulgar ) { if ( pItemInfo->pVulgar[0] == L'¼' ) { Word.pWordText = g_ones[1].pStr; Word.ulWordLen = g_ones[1].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); Word.pWordText = g_onesOrdinal[4].pStr; Word.ulWordLen = g_onesOrdinal[4].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else if ( pItemInfo->pVulgar[0] == L'½' ) { Word.pWordText = g_ones[1].pStr; Word.ulWordLen = g_ones[1].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); Word.pWordText = g_Half.pStr; Word.ulWordLen = g_Half.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else { Word.pWordText = g_ones[3].pStr; Word.ulWordLen = g_ones[3].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); Word.pWordText = g_PluralDenominators[4].pStr; Word.ulWordLen = g_PluralDenominators[4].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } else { //--- Insert Numerator WordList WordList.AddTail( pItemInfo->pNumerator->pWordList ); delete pItemInfo->pNumerator->pWordList; //--- Expand denominator ---// //--- If no decimal part, must check for special cases ( x/2 - x/9, x/10, x/100 ) if ( !pItemInfo->pDenominator->pDecimalPart && !pItemInfo->pNumerator->pDecimalPart && !pItemInfo->pDenominator->fNegative ) { //--- Check for special cases - halves through ninths if ( ( pItemInfo->pDenominator->pEndChar - pItemInfo->pDenominator->pStartChar ) == 1 && pItemInfo->pDenominator->pStartChar[0] != L'1' ) { pItemInfo->fIsStandard = false; //--- Insert singular form of denominator if ( ( pItemInfo->pNumerator->pEndChar - pItemInfo->pNumerator->pStartChar ) == 1 && pItemInfo->pNumerator->pStartChar[0] == L'1' ) { if ( pItemInfo->pDenominator->pStartChar[0] == L'2' ) { Word.pWordText = g_Half.pStr; Word.ulWordLen = g_Half.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else { ExpandDigitOrdinal( pItemInfo->pDenominator->pStartChar[0], pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList ); } } //--- Insert plural form of denominator else { ULONG index = pItemInfo->pDenominator->pStartChar[0] - L'0'; Word.pWordText = g_PluralDenominators[index].pStr; Word.ulWordLen = g_PluralDenominators[index].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } //--- Check for special case - tenths else if ( ( pItemInfo->pDenominator->pEndChar - pItemInfo->pDenominator->pStartChar ) == 2 && wcsncmp( pItemInfo->pDenominator->pStartChar, L"10", 2 ) == 0 ) { pItemInfo->fIsStandard = false; //--- Insert singular form of denominator if ( ( pItemInfo->pNumerator->pEndChar - pItemInfo->pNumerator->pStartChar ) == 1 && pItemInfo->pNumerator->pStartChar[0] == L'1' ) { ExpandTwoOrdinal( pItemInfo->pDenominator->pStartChar, pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList ); } //--- Insert plural form denominator else { Word.pWordText = g_Tenths.pStr; Word.ulWordLen = g_Tenths.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } //--- Check for special case - sixteenths else if ( ( pItemInfo->pDenominator->pEndChar - pItemInfo->pDenominator->pStartChar ) == 2 && wcsncmp( pItemInfo->pDenominator->pStartChar, L"16", 2 ) == 0 ) { pItemInfo->fIsStandard = false; //--- Insert singular form of denominator if ( ( pItemInfo->pNumerator->pEndChar - pItemInfo->pNumerator->pStartChar ) == 1 && pItemInfo->pNumerator->pStartChar[0] == L'1' ) { ExpandTwoOrdinal( pItemInfo->pDenominator->pStartChar, pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList ); } //--- Insert plural form denominator else { Word.pWordText = g_Sixteenths.pStr; Word.ulWordLen = g_Sixteenths.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } //--- Check for special case - hundredths else if ( ( pItemInfo->pDenominator->pEndChar - pItemInfo->pDenominator->pStartChar ) == 3 && wcsncmp( pItemInfo->pDenominator->pStartChar, L"100", 3 ) == 0 ) { pItemInfo->fIsStandard = false; //--- Insert singular form of denominator if ( ( pItemInfo->pNumerator->pEndChar - pItemInfo->pNumerator->pStartChar ) == 1 && pItemInfo->pNumerator->pStartChar[0] == L'1' ) { ExpandThreeOrdinal( pItemInfo->pDenominator->pStartChar, pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList ); } //--- Insert plural form of denominator else { Word.pWordText = g_Hundredths.pStr; Word.ulWordLen = g_Hundredths.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } else { pItemInfo->fIsStandard = true; } } else { pItemInfo->fIsStandard = true; } //--- Default case - Numerator "over" Denominator if ( pItemInfo->fIsStandard ) { //--- Insert "over" Word.pWordText = g_Over.pStr; Word.ulWordLen = g_Over.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); //--- Insert denominator WordList WordList.AddTail( pItemInfo->pDenominator->pWordList ); } delete pItemInfo->pDenominator->pWordList; } return hr; } /* ExpandFraction */ /*********************************************************************************************** * ExpandDigit * *-------------* * Description: * Expands single digits into words, and inserts them into WordList * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandDigit( const WCHAR Number, NumberGroup& NormGroupInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandDigit" ); SPDBG_ASSERT( isdigit(Number) ); // 0-9 ULONG Index = Number - L'0'; TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.pXmlState = &m_pCurrFrag->State; Word.pWordText = g_ones[Index].pStr; Word.ulWordLen = g_ones[Index].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; Word.eWordPartOfSpeech = MS_Unknown; WordList.AddTail( Word ); NormGroupInfo.fOnes = true; } /* ExpandDigit */ /*********************************************************************************************** * ExpandTwo * *-----------* * Description: * Expands two digit strings into words, and inserts them into WordList. * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandTwoDigits( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandTwoDigits" ); SPDBG_ASSERT( NumberString && wcslen(NumberString) >= 2 && isdigit(NumberString[0]) && isdigit(NumberString[1]) ); // 10-99 TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); ULONG IndexOne = NumberString[0] - L'0'; ULONG IndexTwo = NumberString[1] - L'0'; Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; if ( IndexOne != 1 ) { // 20-99, or 00-09 if (IndexOne != 0) { Word.pWordText = g_tens[IndexOne].pStr; Word.ulWordLen = g_tens[IndexOne].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); NormGroupInfo.fTens = true; } if ( IndexTwo != 0 ) { ExpandDigit( NumberString[1], NormGroupInfo, WordList ); NormGroupInfo.fOnes = true; } } else { // 10-19 Word.pWordText = g_teens[IndexTwo].pStr; Word.ulWordLen = g_teens[IndexTwo].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); NormGroupInfo.fOnes = true; } } /* ExpandTwo */ /*********************************************************************************************** * ExpandThree * *-------------* * Description: * Expands three digit strings into words, and inserts them into WordList. * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandThreeDigits( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandThreeDigits" ); SPDBG_ASSERT( NumberString && wcslen(NumberString) >= 3 && isdigit(NumberString[0]) && isdigit(NumberString[1]) && isdigit(NumberString[2]) ); // 100-999 TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); ULONG IndexOne = NumberString[0] - L'0'; Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; if ( IndexOne != 0 ) { // Take care of hundreds... ExpandDigit( NumberString[0], NormGroupInfo, WordList ); Word.pWordText = g_quantifiers[0].pStr; Word.ulWordLen = g_quantifiers[0].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); NormGroupInfo.fHundreds = true; NormGroupInfo.fOnes = false; } // Take care of tens and ones... ExpandTwoDigits( NumberString + 1, NormGroupInfo, WordList ); } /* ExpandThree */ /*********************************************************************************************** * ExpandDigitOrdinal * *--------------------* * Description: * Expands single digit ordinal strings into words, and inserts them into WordList. * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandDigitOrdinal( const WCHAR Number, NumberGroup& NormGroupInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandDigitOrdinal" ); SPDBG_ASSERT( isdigit(Number) ); // 0-9 ULONG Index = Number - L'0'; TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.pXmlState = &m_pCurrFrag->State; Word.pWordText = g_onesOrdinal[Index].pStr; Word.ulWordLen = g_onesOrdinal[Index].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; Word.eWordPartOfSpeech = MS_Unknown; WordList.AddTail( Word ); NormGroupInfo.fOnes = true; } /* ExpandDigitOrdinal */ /*********************************************************************************************** * ExpandTwoOrdinal * *------------------* * Description: * Expands two digit ordinal strings into words, and inserts them into WordList. * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandTwoOrdinal( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandTwoOrdinal" ); SPDBG_ASSERT( NumberString && wcslen(NumberString) >= 2 && isdigit(NumberString[0]) && isdigit(NumberString[1]) ); // 10-99 TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); ULONG IndexOne = NumberString[0] - L'0'; ULONG IndexTwo = NumberString[1] - L'0'; Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; if ( IndexOne != 1 ) { // 20-99, or 00-09 if (IndexOne != 0) { if ( IndexTwo != 0 ) { Word.pWordText = g_tens[IndexOne].pStr; Word.ulWordLen = g_tens[IndexOne].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); NormGroupInfo.fTens = true; ExpandDigitOrdinal( NumberString[1], NormGroupInfo, WordList ); NormGroupInfo.fOnes = true; } else { Word.pWordText = g_tensOrdinal[IndexOne].pStr; Word.ulWordLen = g_tensOrdinal[IndexOne].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } else { ExpandDigitOrdinal( NumberString[1], NormGroupInfo, WordList ); } } else { // 10-19 Word.pWordText = g_teensOrdinal[IndexTwo].pStr; Word.ulWordLen = g_teensOrdinal[IndexTwo].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); NormGroupInfo.fOnes = true; } } /* ExpandTwoOrdinal */ /*********************************************************************************************** * ExpandThreeOrdinal * *--------------------* * Description: * Expands three digit ordinal strings into words, and inserts them into WordList. * * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ void CStdSentEnum::ExpandThreeOrdinal( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandThreeDigits" ); SPDBG_ASSERT( NumberString && wcslen(NumberString) >= 3 && isdigit(NumberString[0]) && isdigit(NumberString[1]) && isdigit(NumberString[2]) ); // 100-999 TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); ULONG IndexOne = NumberString[0] - L'0'; Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; if ( IndexOne != 0 ) { ExpandDigit( NumberString[0], NormGroupInfo, WordList ); //--- Special case - x hundredth if ( Zeroes( NumberString + 1 ) ) { Word.pWordText = g_quantifiersOrdinal[0].pStr; Word.ulWordLen = g_quantifiersOrdinal[0].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); NormGroupInfo.fHundreds = true; NormGroupInfo.fOnes = false; } //--- Default case - x hundred yth else { Word.pWordText = g_quantifiers[0].pStr; Word.ulWordLen = g_quantifiers[0].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); ExpandTwoOrdinal( NumberString + 1, NormGroupInfo, WordList ); NormGroupInfo.fHundreds = true; } } //--- Special case - no hundreds else { ExpandTwoOrdinal( NumberString + 1, NormGroupInfo, WordList ); } } /* ExpandThreeOrdinal */ /*********************************************************************************************** * MatchQuantifier * *-----------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a numerical quantifier. ********************************************************************* AH **********************/ int MatchQuantifier( const WCHAR*& pStartChar, const WCHAR*& pEndChar ) { int Index = -1; for (int i = 0; i < sp_countof(g_quantifiers); i++) { if ( pEndChar - pStartChar >= g_quantifiers[i].Len && wcsnicmp( pStartChar, g_quantifiers[i].pStr, g_quantifiers[i].Len ) == 0 ) { pStartChar += g_quantifiers[i].Len; Index = i; break; } } return Index; } /* MatchQuantifier */ /*********************************************************************************************** * IsCurrency * *------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a currency. * * RegExp: * { [CurrencySign] { d+ || d(1-3)[,ddd]+ } { [.]d+ }? } { [whitespace] [quantifier] }? || * { { d+ || d(1-3)[,ddd]+ } { [.]d+ }? { [whitespace] [quantifier] }? [whitespace]? [CurrencySign] } * * Types assigned: * NUM_CURRENCY ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsCurrency( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager, CWordList& WordList ) { SPDBG_FUNC( "NumNorm IsCurrency" ); HRESULT hr = S_OK; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem, *pTempEndChar = m_pEndChar; const SPVTEXTFRAG* pTempFrag = m_pCurrFrag; const SPVSTATE *pNumberXMLState = NULL, *pSymbolXMLState = NULL, *pQuantifierXMLState = NULL; CItemList PostNumberList, PostSymbolList; int iSymbolIndex = -1, iQuantIndex = -1; TTSItemInfo* pNumberInfo = NULL; BOOL fDone = false, fNegative = false; WCHAR wcDecimalPoint = ( m_eSeparatorAndDecimal == COMMA_PERIOD ? L'.' : L',' ); //--- Try to match [CurrencySign] [Number] [Quantifier] NORM_POSITION ePosition = UNATTACHED; if ( m_pNextChar[0] == L'-' ) { fNegative = true; m_pNextChar++; } iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition ); if ( iSymbolIndex >= 0 && ePosition == PRECEDING ) { pSymbolXMLState = &m_pCurrFrag->State; //--- Skip any whitespace in between the currency sign and the number... hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostSymbolList ); if ( !m_pNextChar ) { hr = E_INVALIDARG; } if ( SUCCEEDED( hr ) ) { m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar ); while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ) { fDone = true; m_pEndOfCurrItem--; } } //--- Try to match a number string if ( SUCCEEDED( hr ) ) { hr = IsNumberCategory( pNumberInfo, L"NUMBER", MemoryManager ); if ( SUCCEEDED( hr ) ) { if ( pNumberInfo->Type != eNUM_CARDINAL && pNumberInfo->Type != eNUM_DECIMAL && pNumberInfo->Type != eNUM_FRACTION && pNumberInfo->Type != eNUM_MIXEDFRACTION ) { hr = E_INVALIDARG; } else { pNumberXMLState = &m_pCurrFrag->State; } } //--- Skip any whitespace in between the number and the quantifier... if ( !fDone && SUCCEEDED( hr ) ) { const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar; const WCHAR *pTempEndOfItem = m_pEndOfCurrItem; const SPVTEXTFRAG *pTempFrag = m_pCurrFrag; m_pNextChar = m_pEndOfCurrItem; hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostNumberList ); if ( m_pNextChar && SUCCEEDED( hr ) ) { m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar ); while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ) { m_pEndOfCurrItem--; } //--- Try to match a quantifier iQuantIndex = MatchQuantifier( m_pNextChar, m_pEndOfCurrItem ); if ( iQuantIndex >= 0 ) { pQuantifierXMLState = &m_pCurrFrag->State; } else { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; } } else { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; } } } } //--- Try to match [Number] [CurrencySign] [Quantifier] else { //--- Try to match a number string hr = IsNumberCategory( pNumberInfo, L"NUMBER", MemoryManager ); if ( SUCCEEDED( hr ) ) { if ( pNumberInfo->Type != eNUM_CARDINAL && pNumberInfo->Type != eNUM_DECIMAL && pNumberInfo->Type != eNUM_FRACTION && pNumberInfo->Type != eNUM_MIXEDFRACTION ) { hr = E_INVALIDARG; } else { pNumberXMLState = &m_pCurrFrag->State; } } //--- Skip any whitespace and XML markup between the number and the currency sign if ( SUCCEEDED( hr ) ) { m_pNextChar = m_pEndOfCurrItem; hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostNumberList ); if ( !m_pNextChar ) { hr = E_INVALIDARG; } if ( SUCCEEDED( hr ) ) { m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar ); while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ) { m_pEndOfCurrItem--; fDone = true; } } } //--- Try to match a Currency Sign if ( SUCCEEDED( hr ) ) { iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition ); if ( iSymbolIndex >= 0 ) { pSymbolXMLState = &m_pCurrFrag->State; } //--- Skip any whitespace in between the currency sign and the quantifier if ( !fDone && iSymbolIndex >= 0 ) { const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar; const WCHAR *pTempEndOfItem = m_pEndOfCurrItem; const SPVTEXTFRAG *pTempFrag = m_pCurrFrag; hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostSymbolList ); if ( !m_pNextChar ) { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; fDone = true; } if ( !fDone && SUCCEEDED( hr ) ) { m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar ); while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ) { fDone = true; m_pEndOfCurrItem--; } //--- Try to match quantifier iQuantIndex = MatchQuantifier( m_pNextChar, m_pEndOfCurrItem ); if ( iQuantIndex >= 0 ) { pQuantifierXMLState = &m_pCurrFrag->State; } else { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; } } } else if ( iSymbolIndex < 0 ) { hr = E_INVALIDARG; } } } //--- Successfully matched a currency! Now expand it and fill out pItemNormInfo. if ( SUCCEEDED( hr ) ) { TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.eWordPartOfSpeech = MS_Unknown; pItemNormInfo = (TTSCurrencyItemInfo*) MemoryManager.GetMemory( sizeof(TTSCurrencyItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { //--- Fill in known parts of pItemNormInfo ZeroMemory( pItemNormInfo, sizeof(TTSCurrencyItemInfo) ); pItemNormInfo->Type = eNUM_CURRENCY; ( (TTSCurrencyItemInfo*) pItemNormInfo )->fQuantifier = iQuantIndex >= 0 ? true : false; ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart = (TTSNumberItemInfo*) pNumberInfo; ( (TTSCurrencyItemInfo*) pItemNormInfo )->lNumPostNumberStates = PostNumberList.GetCount(); ( (TTSCurrencyItemInfo*) pItemNormInfo )->lNumPostSymbolStates = PostSymbolList.GetCount(); //--- Need to determine whether this currency will have a primary and secondary part //--- (e.g. "ten dollars and fifty cents") or just a primary part (e.g. "ten point //--- five zero cents", "one hundred dollars"). //--- First check whether the number is a cardinal, there is a quantifier present, or the //--- currency unit has no secondary (e.g. cents). In any of these cases, we need do no //--- further checking. if ( pNumberInfo->Type == eNUM_DECIMAL && iQuantIndex == -1 && g_CurrencySigns[iSymbolIndex].SecondaryUnit.Len > 0 ) { WCHAR *pDecimalPoint = wcschr( ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar, wcDecimalPoint ); SPDBG_ASSERT( pDecimalPoint ); if ( pDecimalPoint && ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar - pDecimalPoint == 3 ) { //--- We do have a secondary part! Fix up PrimaryNumberPart appropriately, //--- and fill in pSecondaryNumberPart. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem; const WCHAR *pTemp = ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar; m_pNextChar = ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar; m_pEndOfCurrItem = pDecimalPoint; delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList; //--- m_pNextChar == m_pEndOfCurrItem when integer part is empty and non-negative, e.g. $.50 //--- Other case is empty and negative, e.g. $-.50 if ( m_pNextChar != m_pEndOfCurrItem && !( *m_pNextChar == L'-' && m_pNextChar == m_pEndOfCurrItem - 1 ) ) { hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false ); } else { pNumberInfo = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pNumberInfo, sizeof( TTSNumberItemInfo ) ); if ( *m_pNextChar == L'-' ) { ( (TTSNumberItemInfo*) pNumberInfo )->fNegative = true; } else { ( (TTSNumberItemInfo*) pNumberInfo )->fNegative = false; } ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar = NULL; ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar = NULL; ( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart = (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart->fDigitByDigit = true; ( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart->ulNumDigits = 1; ( (TTSNumberItemInfo*) pNumberInfo )->pWordList = new CWordList; if ( ( (TTSNumberItemInfo*) pNumberInfo )->fNegative ) { Word.pXmlState = pNumberXMLState; Word.pWordText = g_negative.pStr; Word.ulWordLen = g_negative.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; ( (TTSNumberItemInfo*) pNumberInfo )->pWordList->AddTail( Word ); } Word.pWordText = g_ones[0].pStr; Word.ulWordLen = g_ones[0].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; ( (TTSNumberItemInfo*) pNumberInfo )->pWordList->AddTail( Word ); } } } if ( SUCCEEDED( hr ) ) { ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart = (TTSNumberItemInfo*) pNumberInfo; m_pNextChar = m_pEndOfCurrItem + 1; m_pEndOfCurrItem = pTemp; //--- If zeroes, don't pronounce them... if ( m_pNextChar[0] != L'0' ) { hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false ); if ( SUCCEEDED( hr ) ) { ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart = (TTSNumberItemInfo*) pNumberInfo; } } else if ( m_pNextChar[1] != L'0' ) { m_pNextChar++; hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false ); if ( SUCCEEDED( hr ) ) { ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart = (TTSNumberItemInfo*) pNumberInfo; } } } m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem; } } if ( SUCCEEDED( hr ) ) { //--- Expand Primary number part if ( fNegative ) { ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->fNegative = true; Word.pXmlState = pNumberXMLState; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_negative.pStr; Word.ulWordLen = g_negative.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } hr = ExpandNumber( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart, WordList ); } //--- Clean up Number XML States SPLISTPOS WordListPos; if ( SUCCEEDED( hr ) ) { WordListPos = WordList.GetHeadPosition(); while ( WordListPos ) { TTSWord& TempWord = WordList.GetNext( WordListPos ); TempWord.pXmlState = pNumberXMLState; } //--- Insert PostNumber XML States while ( !PostNumberList.IsEmpty() ) { WordList.AddTail( ( PostNumberList.RemoveHead() ).Words[0] ); } //--- If a quantifier is present, expand it if ( iQuantIndex >= 0 ) { Word.pXmlState = pQuantifierXMLState; Word.pWordText = g_quantifiers[iQuantIndex].pStr; Word.ulWordLen = g_quantifiers[iQuantIndex].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } BOOL fFraction = false; //--- If a fractional unit with no quantifier, insert "of a" if ( iQuantIndex < 0 && !( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart && !( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pIntegerPart && ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pFractionalPart && !( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pFractionalPart->fIsStandard ) { fFraction = true; Word.pXmlState = pNumberXMLState; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_of.pStr; Word.ulWordLen = g_of.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); Word.pWordText = g_a.pStr; Word.ulWordLen = g_a.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- Insert Main Currency Unit //--- Plural if not a fraction and either a quantifier is present or the integral part is not one. if ( !fFraction && ( iQuantIndex >= 0 || ( ( ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pEndChar - ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar != 1 ) || ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[0] != L'1' ) && ( ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pEndChar - ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar != 2 ) || ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[0] != L'-' || ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[1] != L'1' ) ) ) ) { Word.pXmlState = pSymbolXMLState; Word.pWordText = g_CurrencySigns[iSymbolIndex].MainUnit.pStr; Word.ulWordLen = g_CurrencySigns[iSymbolIndex].MainUnit.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- ONLY "one" or "negative one" should precede this... else { Word.pXmlState = pSymbolXMLState; Word.pWordText = g_SingularPrimaryCurrencySigns[iSymbolIndex].pStr; Word.ulWordLen = g_SingularPrimaryCurrencySigns[iSymbolIndex].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- Insert Post Symbol XML States while ( !PostSymbolList.IsEmpty() ) { WordList.AddTail( ( PostSymbolList.RemoveHead() ).Words[0] ); } //--- Insert Secondary number part if ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart ) { Word.pXmlState = pNumberXMLState; Word.pWordText = g_And.pStr; Word.ulWordLen = g_And.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); WordListPos = WordList.GetTailPosition(); hr = ExpandNumber( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart, WordList ); //--- Clean up number XML State if ( SUCCEEDED( hr ) ) { while ( WordListPos ) { TTSWord& TempWord = WordList.GetNext( WordListPos ); TempWord.pXmlState = pNumberXMLState; } } //--- Insert secondary currency unit if ( SUCCEEDED( hr ) ) { if ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pEndChar - ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pStartChar == 1 && ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pStartChar[0] == L'1' ) { Word.pXmlState = pSymbolXMLState; Word.pWordText = g_SingularSecondaryCurrencySigns[iSymbolIndex].pStr; Word.ulWordLen = g_SingularSecondaryCurrencySigns[iSymbolIndex].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else { Word.pXmlState = pSymbolXMLState; Word.pWordText = g_CurrencySigns[iSymbolIndex].SecondaryUnit.pStr; Word.ulWordLen = g_CurrencySigns[iSymbolIndex].SecondaryUnit.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } } if ( SUCCEEDED( hr ) ) { m_pNextChar = pTempNextChar; } } } } else { if ( pNumberInfo ) { delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList; } m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfItem; m_pCurrFrag = pTempFrag; } return hr; } /* IsCurrency */ /*********************************************************************************************** * IsRomanNumeral * *----------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a fraction. * * RegExp: * [M](0-3) { [CM] || [CD] || { [D]?[C](0-3) } } { [XC] || [XL] || { [L]?[X](0-3) } } * { [IX] || [IV] || { [V]?[I](0-3) }} * * Types assigned: * NUM_ROMAN_NUMERAL ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsRomanNumeral( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "NumNorm IsRomanNumeral" ); HRESULT hr = S_OK; ULONG ulValue = 0, ulIndex = 0, ulMaxOfThree = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar); //--- Match Thousands - M(0-3) while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'M' && ulMaxOfThree < 3 ) { ulValue += 1000; ulMaxOfThree++; ulIndex++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } //--- Match Hundreds - { [CM] || [CD] || { [D]?[C](0-3) } } if ( SUCCEEDED( hr ) ) { ulMaxOfThree = 0; //--- Matched C first if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'C' ) { ulValue += 100; ulMaxOfThree++; ulIndex++; //--- Special Case - CM = 900 if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'M' ) { ulValue += 800; ulIndex++; } //--- Special Case - CD = 400 else if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'D' ) { ulValue += 300; ulIndex++; } //--- Default Case else { while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'C' && ulMaxOfThree < 3 ) { ulValue += 100; ulMaxOfThree++; ulIndex++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } } } //--- Matched D First else if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'D' ) { ulValue += 500; ulIndex++; ulMaxOfThree = 0; //--- Match C's while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'C' && ulMaxOfThree < 3 ) { ulValue += 100; ulIndex++; ulMaxOfThree++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } } } //--- Match Tens - { [XC] || [XL] || { [L]?[X](0-3) } } if ( SUCCEEDED( hr ) ) { ulMaxOfThree = 0; //--- Matched X First if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'X' ) { ulValue += 10; ulMaxOfThree++; ulIndex++; //--- Special Case - XC = 90 if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'C' ) { ulValue += 80; ulIndex++; } //--- Special Case - XL = 40 else if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == 'L' ) { ulValue += 30; ulIndex++; } //--- Default Case else { while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'X' && ulMaxOfThree < 3 ) { ulValue += 10; ulMaxOfThree ++; ulIndex++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } } } //--- Matched L First else if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'L' ) { ulValue += 50; ulIndex++; //--- Match X's while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'X' && ulMaxOfThree < 3 ) { ulValue += 10; ulMaxOfThree++; ulIndex++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } } } //--- Match Ones - { [IX] || [IV] || { [V]?[I](0-3) } } if ( SUCCEEDED( hr ) ) { ulMaxOfThree = 0; //--- Matched I First if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'I' ) { ulValue += 1; ulMaxOfThree++; ulIndex++; //--- Special Case - IX = 9 if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'X' ) { ulValue += 8; ulIndex++; } //--- Special Case - IV = 4 else if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'V' ) { ulValue += 3; ulIndex++; } //--- Default Case else { while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'I' && ulMaxOfThree < 3 ) { ulValue += 1; ulMaxOfThree++; ulIndex++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } } } //--- Matched V First else if ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'V' ) { ulValue += 5; ulIndex++; //--- Match I's while ( ulIndex < ulTokenLen && towupper( m_pNextChar[ulIndex] ) == L'I' && ulMaxOfThree < 3 ) { ulValue += 1; ulMaxOfThree++; ulIndex++; } if ( ulMaxOfThree > 3 ) { hr = E_INVALIDARG; } } } if ( ulIndex != ulTokenLen ) { hr = E_INVALIDARG; } else { //--- Successfully matched a roman numeral! WCHAR *tempNumberString; //--- Max value of ulValue is 3999, so the resultant string cannot be more than //--- four characters long (plus one for the comma, just in case) tempNumberString = (WCHAR*) MemoryManager.GetMemory( 6 * sizeof(WCHAR), &hr ); if ( SUCCEEDED( hr ) ) { TTSItemInfo *pNumberInfo = NULL; _ltow( (long) ulValue, tempNumberString, 10 ); const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem; m_pNextChar = tempNumberString; m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString ); hr = IsNumber( pNumberInfo, Context, MemoryManager, false ); m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem; if ( SUCCEEDED( hr ) ) { pItemNormInfo = (TTSRomanNumeralItemInfo*) MemoryManager.GetMemory( sizeof( TTSRomanNumeralItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ( (TTSRomanNumeralItemInfo*) pItemNormInfo )->pNumberInfo = pNumberInfo; } pItemNormInfo->Type = eNUM_ROMAN_NUMERAL; } } } return hr; } /* IsRomanNumeral */ /*********************************************************************************************** * IsPhoneNumber * *---------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a phone number. * * RegExp: * { ddd-dddd } || { ddd-ddd-dddd } * * Types assigned: * NUM_PHONENUMBER ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsPhoneNumber( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::IsPhoneNumber" ); HRESULT hr = S_OK; const WCHAR *pCountryCode = NULL, *pAreaCode = NULL, *pGroups[4] = { NULL, NULL, NULL, NULL }; const WCHAR *pStartChar = m_pNextChar, *pEndChar = m_pEndChar, *pEndOfItem = m_pEndOfCurrItem; const SPVTEXTFRAG *pFrag = m_pCurrFrag; BOOL fMatchedLeftParen = false, fMatchedOne = false; ULONG ulCountryCodeLen = 0, ulAreaCodeLen = 0, ulNumGroups = 0, ulGroupLen[4] = { 0, 0, 0, 0 }; CItemList PostCountryCodeList, PostOneList, PostAreaCodeList, PostGroupLists[4]; const SPVSTATE *pCountryCodeState = NULL, *pOneState = NULL, *pAreaCodeState = NULL; const SPVSTATE *pGroupStates[4] = { NULL, NULL, NULL, NULL }; const WCHAR *pDelimiter = NULL; const WCHAR *pTempEndChar = NULL; const SPVTEXTFRAG *pTempFrag = NULL; ULONG i = 0; //--- Try to match Country Code if ( pStartChar[0] == L'+' ) { pStartChar++; i = 0; //--- Try to match d(1-3) while ( pEndOfItem > pStartChar + i && iswdigit( pStartChar[i] ) && i < 3 ) { i++; } pCountryCode = pStartChar; pCountryCodeState = &pFrag->State; ulCountryCodeLen = i; //--- Try to match delimiter if ( i >= 1 && pEndOfItem > pStartChar + i && MatchPhoneNumberDelimiter( pStartChar[i] ) ) { pDelimiter = pStartChar + i; pStartChar += i + 1; } //--- Try to advance in text - whitespace counts as a delimiter... else if ( i >= 1 && pEndOfItem == pStartChar + i ) { pStartChar += i; pCountryCodeState = &pFrag->State; hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true, &PostCountryCodeList ); if ( !pStartChar && SUCCEEDED( hr ) ) { hr = E_INVALIDARG; } else if ( SUCCEEDED( hr ) ) { pEndOfItem = FindTokenEnd( pStartChar, pEndChar ); } } else { hr = E_INVALIDARG; } } //--- Try to match a "1" if ( SUCCEEDED( hr ) && !pCountryCode && pStartChar[0] == L'1' && !iswdigit( pStartChar[1] ) ) { pOneState = &pFrag->State; fMatchedOne = true; pStartChar++; if ( pEndOfItem > pStartChar && MatchPhoneNumberDelimiter( pStartChar[0] ) ) { //--- If we've already hit a delimiter, make sure all others agree if ( pDelimiter ) { if ( *pDelimiter != pStartChar[0] ) { hr = E_INVALIDARG; } } else { pDelimiter = pStartChar; } pStartChar++; } //--- Try to advance in text - whitespace counts as a delimiter... else if ( !pDelimiter && pEndOfItem == pStartChar ) { pOneState = &pFrag->State; hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true, &PostOneList ); if ( !pStartChar && SUCCEEDED( hr ) ) { hr = E_INVALIDARG; } else if ( SUCCEEDED( hr ) ) { pEndOfItem = FindTokenEnd( pStartChar, pEndChar ); } } else { hr = E_INVALIDARG; } } //--- Try to match Area Code if ( SUCCEEDED( hr ) && pStartChar < pEndOfItem ) { i = 0; //--- Try to match a left parenthesis if ( ( pCountryCode || fMatchedOne ) && pStartChar[0] == L'(' ) { pStartChar++; fMatchedLeftParen = true; } else if ( !pCountryCode && !fMatchedOne && pStartChar > pFrag->pTextStart && *( pStartChar - 1 ) == L'(' ) { fMatchedLeftParen = true; } if ( fMatchedLeftParen ) { //--- Try to match ddd? while ( pEndOfItem > pStartChar + i && iswdigit( pStartChar[i] ) && i < 3 ) { i++; } pAreaCodeState = &pFrag->State; pAreaCode = pStartChar; ulAreaCodeLen = i; if ( i < 2 ) { //--- Failed to match at least two digits hr = E_INVALIDARG; } else { if ( pStartChar[i] != L')' ) { //--- Matched left parenthesis without corresponding right parenthesis hr = E_INVALIDARG; } else if ( ( !( pCountryCode || fMatchedOne ) && pEndOfItem > pStartChar + i ) || ( ( pCountryCode || fMatchedOne ) && pEndOfItem > pStartChar + i + 1 ) ) { i++; //--- Delimiter is optional with parentheses if ( MatchPhoneNumberDelimiter( pStartChar[i] ) ) { //--- If we've already hit a delimiter, make sure all others agree if ( pDelimiter ) { if ( *pDelimiter != pStartChar[i] ) { hr = E_INVALIDARG; } } else { pDelimiter = pStartChar + i; } i++; } pStartChar += i; } //--- Try to advance in text - whitespace counts as a delimiter... else if ( !pDelimiter ) { pStartChar += i + 1; pAreaCodeState = &pFrag->State; hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true, &PostAreaCodeList ); if ( !pStartChar && SUCCEEDED( hr ) ) { hr = E_INVALIDARG; } else if ( SUCCEEDED( hr ) ) { pEndOfItem = FindTokenEnd( pStartChar, pEndChar ); } } else { hr = E_INVALIDARG; } } } } //--- Try to match main number part if ( SUCCEEDED( hr ) && pStartChar < pEndOfItem ) { //--- Try to match some groups of digits for ( int j = 0; SUCCEEDED( hr ) && j < 4; j++ ) { i = 0; //--- Try to match a digit string while ( pEndOfItem > pStartChar + i && iswdigit( pStartChar[i] ) && i < 4 ) { i++; } //--- Try to match a delimiter if ( i >= 2 ) { pGroupStates[j] = &pFrag->State; ulGroupLen[j] = i; pGroups[j] = pStartChar; pStartChar += i; if ( pEndOfItem > pStartChar + 1 && MatchPhoneNumberDelimiter( pStartChar[0] ) ) { //--- If we've already hit a delimiter, make sure all others agree if ( pDelimiter ) { if ( *pDelimiter != pStartChar[0] ) { hr = E_INVALIDARG; } } //--- Only allow a new delimiter to be matched on the first main number group... //--- e.g. "+45 35 32 90.89" should not all match... else if ( j == 0 ) { pDelimiter = pStartChar; } else { pEndChar = pTempEndChar; pFrag = pTempFrag; ulNumGroups = j; break; } pStartChar++; } //--- Try to advance in text - whitespace counts as a delimiter... else if ( !pDelimiter && pEndOfItem == pStartChar ) { pGroupStates[j] = &pFrag->State; pTempEndChar = pEndChar; pTempFrag = pFrag; hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true, &PostGroupLists[j] ); if ( !pStartChar && SUCCEEDED( hr ) ) { pEndChar = pTempEndChar; pFrag = pTempFrag; ulNumGroups = j + 1; break; } else if ( SUCCEEDED( hr ) ) { pEndOfItem = FindTokenEnd( pStartChar, pEndChar ); } } else if ( pEndOfItem == pStartChar + 1 ) { if ( IsGroupEnding( *pStartChar ) != eUNMATCHED || IsQuotationMark( *pStartChar ) != eUNMATCHED || IsMiscPunctuation( *pStartChar ) != eUNMATCHED || IsEOSItem( *pStartChar ) != eUNMATCHED ) { pEndOfItem--; ulNumGroups = j + 1; break; } else { hr = E_INVALIDARG; } } else { while ( pEndOfItem != pStartChar ) { if ( IsGroupEnding( *pEndOfItem ) != eUNMATCHED || IsQuotationMark( *pEndOfItem ) != eUNMATCHED || IsMiscPunctuation( *pEndOfItem ) != eUNMATCHED || IsEOSItem( *pEndOfItem ) != eUNMATCHED ) { pEndOfItem--; } else { break; } } if ( pEndOfItem == pStartChar ) { ulNumGroups = j + 1; break; } else { hr = E_INVALIDARG; break; } } } //--- Matched something like 206.709.8286.1 - definitely bad else if ( pDelimiter ) { hr = E_INVALIDARG; } //--- Matched somethinge like 206 709 8286 1 - could be OK else { if ( pTempEndChar ) { pEndChar = pTempEndChar; pFrag = pTempFrag; } ulNumGroups = j; break; } } //--- Didn't hit either break statement if ( !ulNumGroups ) { ulNumGroups = j; } } //--- Check for appropriate formats if ( SUCCEEDED( hr ) ) { //--- Check for [1]?(ddd?)?ddddddd? OR ddddddd? if ( !pCountryCode && ulNumGroups == 2 && ulGroupLen[0] == 3 && ulGroupLen[1] >= 3 && !( fMatchedOne && !pAreaCode ) ) { if ( ( !Context || _wcsicmp( Context, L"phone_number" ) != 0 ) && !pCountryCode && !pAreaCode && !fMatchedOne && ( pDelimiter ? (*pDelimiter == L'.') : 0 ) ) { hr = E_INVALIDARG; } } //--- Check for [1]?ddd?ddddddd? else if ( !pCountryCode && !pAreaCode && ulNumGroups == 3 && ( ulGroupLen[0] == 2 || ulGroupLen[0] == 3 ) && ulGroupLen[1] == 3 && ulGroupLen[2] >= 3 ) { pAreaCode = pGroups[0]; ulAreaCodeLen = ulGroupLen[0]; pAreaCodeState = pGroupStates[0]; PostAreaCodeList.AddTail( &PostGroupLists[0] ); pGroups[0] = pGroups[1]; ulGroupLen[0] = ulGroupLen[1]; pGroupStates[0] = pGroupStates[1]; PostGroupLists[0].RemoveAll(); PostGroupLists[0].AddTail( &PostGroupLists[1] ); pGroups[1] = pGroups[2]; ulGroupLen[1] = ulGroupLen[2]; pGroupStates[1] = pGroupStates[2]; PostGroupLists[1].RemoveAll(); PostGroupLists[2].RemoveAll(); ulNumGroups--; } //--- Check for (ddd?)?ddd?ddddd?d? else if ( !pCountryCode && !fMatchedOne && pAreaCode && ulNumGroups == 3 && ( ulGroupLen[0] == 2 || ulGroupLen[0] == 3 ) && ulGroupLen[1] == 2 && ulGroupLen[2] >= 2 ) { NULL; } //--- Check for +dd?d?ddd?ddd?ddd?d?ddd?d? else if ( pCountryCode && !fMatchedOne && !pAreaCode && ulNumGroups == 4 && ( ulGroupLen[0] == 2 || ulGroupLen[0] == 3 ) && ( ulGroupLen[1] == 2 || ulGroupLen[1] == 3 ) && ulGroupLen[2] >= 2 && ulGroupLen[3] >= 2 ) { pAreaCode = pGroups[0]; ulAreaCodeLen = ulGroupLen[0]; pAreaCodeState = pGroupStates[0]; PostAreaCodeList.AddTail( &PostGroupLists[0] ); pGroups[0] = pGroups[1]; ulGroupLen[0] = ulGroupLen[1]; pGroupStates[0] = pGroupStates[1]; PostGroupLists[0].RemoveAll(); PostGroupLists[0].AddTail( &PostGroupLists[1] ); pGroups[1] = pGroups[2]; ulGroupLen[1] = ulGroupLen[2]; pGroupStates[1] = pGroupStates[2]; PostGroupLists[1].RemoveAll(); PostGroupLists[1].AddTail( &PostGroupLists[2] ); pGroups[2] = pGroups[3]; ulGroupLen[2] = ulGroupLen[3]; pGroupStates[2] = pGroupStates[3]; PostGroupLists[2].RemoveAll(); PostGroupLists[3].RemoveAll(); ulNumGroups--; } //--- Check for +dd?d?ddd?ddd?ddd?d? else if ( pCountryCode && !fMatchedOne && !pAreaCode && ulNumGroups == 3 && ( ulGroupLen[0] == 2 || ulGroupLen[0] == 3 ) && ( ulGroupLen[1] == 2 || ulGroupLen[1] == 3 ) && ulGroupLen[2] >= 2 ) { pAreaCode = pGroups[0]; ulAreaCodeLen = ulGroupLen[0]; pAreaCodeState = pGroupStates[0]; PostAreaCodeList.AddTail( &PostGroupLists[0] ); pGroups[0] = pGroups[1]; ulGroupLen[0] = ulGroupLen[1]; pGroupStates[0] = pGroupStates[1]; PostGroupLists[0].RemoveAll(); PostGroupLists[0].AddTail( &PostGroupLists[1] ); pGroups[1] = pGroups[2]; ulGroupLen[1] = ulGroupLen[2]; pGroupStates[1] = pGroupStates[2]; PostGroupLists[1].RemoveAll(); PostGroupLists[2].RemoveAll(); ulNumGroups--; } //--- Check for +dd?d?(ddd?)?ddd?ddd?d?ddd?d? else if ( pCountryCode && !fMatchedOne && pAreaCode && ulNumGroups == 3 && ( ulGroupLen[0] == 2 || ulGroupLen[0] == 3 ) && ulGroupLen[1] >= 2 && ulGroupLen[2] >= 2 ) { NULL; } //--- Check for +dd?d?(ddd?)?ddd?ddd?d? else if ( pCountryCode && !fMatchedOne && pAreaCode && ulNumGroups == 2 && ( ulGroupLen[0] == 2 || ulGroupLen[0] == 3 ) && ulGroupLen[1] >= 2 ) { NULL; } else { hr = E_INVALIDARG; } } //--- Fill in pItemNormInfo if ( SUCCEEDED(hr) ) { m_pEndOfCurrItem = pGroups[ulNumGroups-1] + ulGroupLen[ulNumGroups-1]; m_pEndChar = pEndChar; m_pCurrFrag = pFrag; pItemNormInfo = (TTSPhoneNumberItemInfo*) MemoryManager.GetMemory( sizeof(TTSPhoneNumberItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pItemNormInfo, sizeof(TTSPhoneNumberItemInfo) ); pItemNormInfo->Type = eNEWNUM_PHONENUMBER; //--- Fill in fOne if ( fMatchedOne ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->fOne = true; } //--- Fill in Country Code... if ( pCountryCode ) { TTSItemInfo* pCountryCodeInfo; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem; m_pNextChar = pCountryCode; m_pEndOfCurrItem = pCountryCode + ulCountryCodeLen; hr = IsNumber( pCountryCodeInfo, L"NUMBER", MemoryManager, false ); if ( SUCCEEDED( hr ) ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pCountryCode = (TTSNumberItemInfo*) pCountryCodeInfo; } m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem; } //--- Fill in Area Code... if ( SUCCEEDED( hr ) && pAreaCode ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode->ulNumDigits = ulAreaCodeLen; ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode->pFirstDigit = pAreaCode; } } //--- Fill in Main Number... if ( SUCCEEDED( hr ) ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ulNumGroups = ulNumGroups; ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups = (TTSDigitsItemInfo**) MemoryManager.GetMemory( ulNumGroups * sizeof(TTSDigitsItemInfo*), &hr ); for ( ULONG j = 0; SUCCEEDED( hr ) && j < ulNumGroups; j++ ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j] = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j]->ulNumDigits = ulGroupLen[j]; ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j]->pFirstDigit = pGroups[j]; } } } } } //--- Expand Phone Number if ( SUCCEEDED( hr ) ) { TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.eWordPartOfSpeech = MS_Unknown; SPLISTPOS ListPos; if ( pCountryCode ) { //--- Insert "country" Word.pXmlState = pCountryCodeState; Word.pWordText = g_Country.pStr; Word.ulWordLen = g_Country.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); //--- Insert "code" Word.pWordText = g_Code.pStr; Word.ulWordLen = g_Code.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); ListPos = WordList.GetTailPosition(); //--- Expand Country Code ExpandNumber( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pCountryCode, WordList ); //--- Clean up digits XML states... WordList.GetNext( ListPos ); while ( ListPos ) { TTSWord& TempWord = WordList.GetNext( ListPos ); TempWord.pXmlState = pCountryCodeState; } //--- Insert Post Symbol XML States while ( !PostCountryCodeList.IsEmpty() ) { WordList.AddTail( ( PostCountryCodeList.RemoveHead() ).Words[0] ); } } if ( fMatchedOne ) { //--- Insert "one" Word.pXmlState = pOneState; Word.pWordText = g_ones[1].pStr; Word.ulWordLen = g_ones[1].Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); //--- Insert PostOne XML States while ( !PostOneList.IsEmpty() ) { WordList.AddTail( ( PostOneList.RemoveHead() ).Words[0] ); } } if ( pAreaCode ) { //--- Expand digits - 800 and 900 get expanded as one number, otherwise digit by digit if ( ( pAreaCode[0] == L'8' || pAreaCode[0] == L'9' ) && pAreaCode[1] == L'0' && pAreaCode[2] == L'0' ) { ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->fIs800 = true; NumberGroup Garbage; ListPos = WordList.GetTailPosition(); ExpandThreeDigits( pAreaCode, Garbage, WordList ); //--- Clean up digits XML states... //--- List was possibly empty prior to inserting "eight hundred" or "nine hundred"... if ( !ListPos ) { ListPos = WordList.GetHeadPosition(); } WordList.GetNext( ListPos ); while ( ListPos ) { TTSWord& TempWord = WordList.GetNext( ListPos ); TempWord.pXmlState = pAreaCodeState; } } else { //--- Insert "area" Word.pXmlState = pAreaCodeState; Word.pWordText = g_Area.pStr; Word.ulWordLen = g_Area.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); //--- Insert "code" Word.pWordText = g_Code.pStr; Word.ulWordLen = g_Code.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); ListPos = WordList.GetTailPosition(); ExpandDigits( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode, WordList ); //--- Clean up digits XML states... WordList.GetNext( ListPos ); while ( ListPos ) { TTSWord& TempWord = WordList.GetNext( ListPos ); TempWord.pXmlState = pAreaCodeState; } } //--- Insert PostAreaCode XML States while ( !PostAreaCodeList.IsEmpty() ) { WordList.AddTail( ( PostAreaCodeList.RemoveHead() ).Words[0] ); } } for ( ULONG j = 0; j < ulNumGroups; j++ ) { ListPos = WordList.GetTailPosition(); ExpandDigits( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j], WordList ); //--- Clean up digits XML states... //--- List was possibly empty prior to inserting "eight hundred" or "nine hundred"... if ( !ListPos ) { ListPos = WordList.GetHeadPosition(); } WordList.GetNext( ListPos ); while ( ListPos ) { TTSWord& TempWord = WordList.GetNext( ListPos ); TempWord.pXmlState = pGroupStates[j]; } //--- Insert Post Group XML States while ( !PostGroupLists[j].IsEmpty() ) { WordList.AddTail( ( PostGroupLists[j].RemoveHead() ).Words[0] ); } } } return hr; } /* IsPhoneNumber */ /*********************************************************************************************** * IsZipCode * *-----------* * Description: * Checks the incoming Item's text to determine whether or not it * is a zipcode. * * RegExp: * ddddd{-dddd}? * * Types assigned: * NUM_ZIPCODE ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsZipCode( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::IsZipCode" ); HRESULT hr = S_OK; ULONG ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar); BOOL fLastFour = false; //--- length must be 5 or 10 if ( ulTokenLen != 5 && ulTokenLen != 10 ) { hr = E_INVALIDARG; } else { //--- match 5 digits for ( ULONG i = 0; i < 5; i++ ) { if ( !iswdigit( m_pNextChar[i] ) ) { hr = E_INVALIDARG; break; } } if ( SUCCEEDED(hr) && i < ulTokenLen ) { //--- match dash if ( m_pNextChar[i] != L'-' ) { hr = E_INVALIDARG; } else { //--- match 4 digits for ( i = 0; i < 4; i++ ) { if ( !iswdigit( m_pNextChar[i] ) ) { hr = E_INVALIDARG; break; } } fLastFour = true; } } } if (SUCCEEDED(hr)) { pItemNormInfo = (TTSZipCodeItemInfo*) MemoryManager.GetMemory( sizeof(TTSZipCodeItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pItemNormInfo, sizeof(TTSZipCodeItemInfo) ); pItemNormInfo->Type = eNUM_ZIPCODE; ( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof(TTSDigitsItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive->ulNumDigits = 5; ( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive->pFirstDigit = m_pNextChar; if ( fLastFour ) { ( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof(TTSDigitsItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour->ulNumDigits = 4; ( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour->pFirstDigit = m_pNextChar + 6; } } } } } return hr; } /* IsZipCode */ /*********************************************************************************************** * ExpandZipCode * *---------------* * Description: * Expands Items previously determined to be of type NUM_ZIPCODE by IsZipCode. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandZipCode( TTSZipCodeItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandZipCode" ); HRESULT hr = S_OK; ExpandDigits( pItemInfo->pFirstFive, WordList ); if ( pItemInfo->pLastFour ) { //--- Insert "dash" TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_dash.pStr; Word.ulWordLen = g_dash.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); ExpandDigits( pItemInfo->pLastFour, WordList ); } return hr; } /* ExpandZipCode */ /*********************************************************************************************** * IsNumberRange * *---------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a number range. * * RegExp: * [Number]-[Number] * * Types assigned: * NUM_RANGE ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsNumberRange( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "CStdSentEnum::IsNumberRange" ); HRESULT hr = S_OK; TTSItemInfo *pFirstNumberInfo = NULL, *pSecondNumberInfo = NULL; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem; const WCHAR *pHyphen = NULL; for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ ) { if ( *pHyphen == L'-' ) { break; } } if ( *pHyphen == L'-' && pHyphen > m_pNextChar && pHyphen < m_pEndOfCurrItem - 1 ) { m_pEndOfCurrItem = pHyphen; hr = IsNumber( pFirstNumberInfo, NULL, MemoryManager ); if ( SUCCEEDED( hr ) ) { m_pNextChar = pHyphen + 1; m_pEndOfCurrItem = pTempEndOfItem; hr = IsNumberCategory( pSecondNumberInfo, NULL, MemoryManager ); if ( SUCCEEDED( hr ) ) { //--- Matched a number range! pItemNormInfo = (TTSNumberRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberRangeItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { pItemNormInfo->Type = eNUM_RANGE; ( (TTSNumberRangeItemInfo*) pItemNormInfo )->pFirstNumberInfo = pFirstNumberInfo; ( (TTSNumberRangeItemInfo*) pItemNormInfo )->pSecondNumberInfo = pSecondNumberInfo; } } else if ( pFirstNumberInfo->Type != eDATE_YEAR ) { delete ( (TTSNumberItemInfo*) pFirstNumberInfo )->pWordList; } } m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem; } else { hr = E_INVALIDARG; } return hr; } /* IsNumberRange */ /*********************************************************************************************** * ExpandNumberRange * *-------------------* * Description: * Expands Items previously determined to be of type NUM_RANGE by IsNumberRange. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandNumberRange( TTSNumberRangeItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandNumberRange" ); HRESULT hr = S_OK; //--- Expand first number (or year)... switch( pItemInfo->pFirstNumberInfo->Type ) { case eDATE_YEAR: hr = ExpandYear( (TTSYearItemInfo*) pItemInfo->pFirstNumberInfo, WordList ); break; default: hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pFirstNumberInfo, WordList ); break; } //--- Insert "to" if ( SUCCEEDED( hr ) ) { TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_to.pStr; Word.ulWordLen = g_to.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- Expand second number (or year)... if ( SUCCEEDED( hr ) ) { switch( pItemInfo->pSecondNumberInfo->Type ) { case eDATE_YEAR: hr = ExpandYear( (TTSYearItemInfo*) pItemInfo->pSecondNumberInfo, WordList ); break; case eNUM_PERCENT: hr = ExpandPercent( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList ); break; case eNUM_DEGREES: hr = ExpandDegrees( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList ); break; case eNUM_SQUARED: hr = ExpandSquare( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList ); break; case eNUM_CUBED: hr = ExpandCube( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList ); break; default: hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList ); break; } } return hr; } /* ExpandNumberRange */ /*********************************************************************************************** * IsCurrencyRange * *-------------------* * Description: * Expands Items determined to be of type CURRENCY_RANGE * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsCurrencyRange( TTSItemInfo*& pItemInfo, CSentItemMemory& MemoryManager, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::IsCurrencyRange" ); HRESULT hr = S_OK; TTSItemInfo *pFirstNumberInfo = NULL, *pSecondNumberInfo = NULL; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem, *pTempEndChar = m_pEndChar; const WCHAR *pHyphen = NULL; CWordList TempWordList; NORM_POSITION ePosition = UNATTACHED; //for currency sign checking int iSymbolIndex, iTempSymbolIndex = -1; WCHAR *tempNumberString; iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition ); if(iSymbolIndex < 0) { hr = E_INVALIDARG; } else { for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ ) { if ( *pHyphen == L'-' ) { break; } } if ( !( *pHyphen == L'-' && pHyphen > m_pNextChar && pHyphen < m_pEndOfCurrItem - 1 ) ) { hr = E_INVALIDARG; } else { *( (WCHAR*)pHyphen) = L' '; // Token must break at hyphen, or IsCurrency() will not work m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pHyphen; NORM_POSITION temp = UNATTACHED; iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, temp ); if( iTempSymbolIndex >= 0 && iSymbolIndex != iTempSymbolIndex ) { hr = E_INVALIDARG; } else //--- Get both NumberInfos { hr = IsNumber( pFirstNumberInfo, L"NUMBER", MemoryManager, false ); if( SUCCEEDED ( hr ) ) { m_pNextChar = pHyphen + 1; m_pEndOfCurrItem = pTempEndOfItem; iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, temp ); hr = IsNumber( pSecondNumberInfo, L"NUMBER", MemoryManager, false ); } } if( SUCCEEDED ( hr ) ) { //--- If both currency values are cardinal numbers, then the first number can be //--- expanded without saying its currency ("$10-12" -> "ten to twelve dollars") if( pFirstNumberInfo->Type == eNUM_CARDINAL && pSecondNumberInfo->Type == eNUM_CARDINAL ) { ExpandNumber( (TTSNumberItemInfo*) pFirstNumberInfo, TempWordList ); } else // one or both values are non-cardinal numbers, so we must { // expand the first value as a full currency. m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pHyphen; if( ePosition == FOLLOWING ) { if( iTempSymbolIndex < 0 ) // No symbol on first number item - need to fill a buffer { // with currency symbol and value to pass to IsCurrency(). ULONG ulNumChars = (long)(m_pEndOfCurrItem - m_pNextChar + g_CurrencySigns[iSymbolIndex].Sign.Len + 1); tempNumberString = (WCHAR*) MemoryManager.GetMemory( (ulNumChars) * sizeof(WCHAR), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( tempNumberString, ( ulNumChars ) * sizeof( WCHAR ) ); wcsncpy( tempNumberString, m_pNextChar, m_pEndOfCurrItem - m_pNextChar ); wcscat( tempNumberString, g_CurrencySigns[iSymbolIndex].Sign.pStr ); m_pNextChar = tempNumberString; m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString ); m_pEndChar = m_pEndOfCurrItem; } } else if( iTempSymbolIndex != iSymbolIndex ) // mismatched symbols { hr = E_INVALIDARG; } } if ( SUCCEEDED ( hr ) ) { hr = IsCurrency( pFirstNumberInfo, MemoryManager, TempWordList ); m_pEndChar = pTempEndChar; } } } if ( SUCCEEDED ( hr ) ) { TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_to.pStr; Word.ulWordLen = g_to.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; TempWordList.AddTail( Word ); m_pNextChar = pHyphen + 1; m_pEndOfCurrItem = pTempEndOfItem; if( ePosition == PRECEDING ) { iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition ); if( iTempSymbolIndex < 0 ) // No symbol on second number item { // create temporary string from first currency sign and second number item ULONG ulNumChars = (long)(m_pEndOfCurrItem - m_pNextChar + g_CurrencySigns[iSymbolIndex].Sign.Len + 1); tempNumberString = (WCHAR*) MemoryManager.GetMemory( (ulNumChars) * sizeof(WCHAR), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( tempNumberString, ( ulNumChars ) * sizeof( WCHAR ) ); wcsncpy( tempNumberString, g_CurrencySigns[iSymbolIndex].Sign.pStr, g_CurrencySigns[iSymbolIndex].Sign.Len ); wcsncpy( tempNumberString+g_CurrencySigns[iSymbolIndex].Sign.Len, m_pNextChar, m_pEndOfCurrItem - m_pNextChar ); m_pNextChar = tempNumberString; m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString ); m_pEndChar = m_pEndOfCurrItem; } } else if( iTempSymbolIndex == iSymbolIndex ) // matched leading symbol on second number item { m_pNextChar = pHyphen + 1; m_pEndOfCurrItem = pTempEndOfItem; } else // mismatched symbol { hr = E_INVALIDARG; } } if( SUCCEEDED(hr) ) { hr = IsCurrency( pSecondNumberInfo, MemoryManager, TempWordList ); if ( SUCCEEDED( hr ) ) { //--- Matched a currency range! pItemInfo = (TTSNumberRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberRangeItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { pItemInfo->Type = eNUM_CURRENCYRANGE; ( (TTSNumberRangeItemInfo*) pItemInfo )->pFirstNumberInfo = pFirstNumberInfo; ( (TTSNumberRangeItemInfo*) pItemInfo )->pSecondNumberInfo = pSecondNumberInfo; //--- Copy temp word list to real word list if everything has succeeded... WordList.AddTail( &TempWordList ); } } } } *( (WCHAR*)pHyphen) = L'-'; } } //Reset member variables regardless of failure or success m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem; m_pEndChar = pTempEndChar; return hr; } /* IsCurrencyRange */ /*********************************************************************************************** * MatchCurrencySign * *-------------------* * Description: * Helper function which tries to match a currency sign at the beginning of a string. ********************************************************************* AH **********************/ int MatchCurrencySign( const WCHAR*& pStartChar, const WCHAR*& pEndChar, NORM_POSITION& ePosition ) { int Index = -1; for (int i = 0; i < sp_countof(g_CurrencySigns); i++) { if ( pEndChar - pStartChar >= g_CurrencySigns[i].Sign.Len && wcsnicmp( pStartChar, g_CurrencySigns[i].Sign.pStr, g_CurrencySigns[i].Sign.Len ) == 0 ) { Index = i; pStartChar += g_CurrencySigns[i].Sign.Len; ePosition = PRECEDING; break; } } if ( Index == -1 ) { for ( int i = 0; i < sp_countof(g_CurrencySigns); i++ ) { if ( pEndChar - pStartChar >= g_CurrencySigns[i].Sign.Len && wcsnicmp( pEndChar - g_CurrencySigns[i].Sign.Len, g_CurrencySigns[i].Sign.pStr, g_CurrencySigns[i].Sign.Len ) == 0 ) { Index = i; pEndChar -= g_CurrencySigns[i].Sign.Len; ePosition = FOLLOWING; break; } } } return Index; } /* MatchCurrencySign */ /*********************************************************************************************** * Zeroes * *--------* * Description: * A helper function which simply determines if a number string contains only zeroes... * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ bool CStdSentEnum::Zeroes(const WCHAR *NumberString) { bool bAllZeroes = true; for (ULONG i = 0; i < wcslen(NumberString); i++) { if (NumberString[i] != '0' && isdigit(NumberString[i]) ) { bAllZeroes = false; break; } else if ( !isdigit( NumberString[i] ) && NumberString[i] != ',' ) { break; } } return bAllZeroes; } /* Zeroes */ /*********************************************************************************************** * ThreeZeroes * *-------------* * Description: * A helper function which simply determines if a number string contains three zeroes... * Note: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ bool CStdSentEnum::ThreeZeroes(const WCHAR *NumberString) { bool bThreeZeroes = true; for (ULONG i = 0; i < 3; i++) { if (NumberString[i] != '0' && isdigit(NumberString[i])) { bThreeZeroes = false; break; } } return bThreeZeroes; } /* ThreeZeroes */ //-----------End Of File-------------------------------------------------------------------