|
|
/***********************************************************************************************
* TimeNorm.cpp * *-------------* * Description: * These functions normalize times of day and time measurements. *----------------------------------------------------------------------------------------------- * Created by AH August 3, 1999 * Copyright (C) 1999 Microsoft Corporation * All Rights Reserved * ***********************************************************************************************/
#include "stdafx.h"
#ifndef StdSentEnum_h
#include "stdsentenum.h"
#endif
#pragma warning (disable : 4296)
/***********************************************************************************************
* IsTimeOfDay * *-------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a time of day. * * RegExp: * [01-09,1-12][:][00-09,10-59][TimeAbbreviation]? * * Types assigned: * TIMEOFDAY ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsTimeOfDay( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager, CWordList& WordList, BOOL fMultiItem ) { SPDBG_FUNC( "CStdSentEnum::IsTimeOfDay" );
HRESULT hr = S_OK; const WCHAR *pStartChar = m_pNextChar, *pEndOfItem = m_pEndOfCurrItem, *pEndChar = m_pEndChar; const SPVTEXTFRAG* pFrag = m_pCurrFrag; const SPVSTATE *pTimeXMLState = &pFrag->State, *pAbbreviationXMLState = NULL; CItemList PreAbbreviationList; BOOL fAdvancePointers = false; WCHAR *pHours = NULL, *pMinutes = NULL, *pAbbreviation = NULL; ULONG ulHours = 0, ulMinutes = 0; TIMEABBREVIATION TimeAbbreviation = UNDEFINED; TTSItemType ItemType = eUNMATCHED;
//--- Max length of a string matching this regexp is 9 character
if ( pEndOfItem - pStartChar > 9 ) { hr = E_INVALIDARG; } else { pHours = (WCHAR*) pStartChar; //--- Try to match a number for the hour of day - [01-09,1-12]
ulHours = my_wcstoul( pHours, &pMinutes ); if ( pHours != pMinutes && pMinutes - pHours <= 2 ) { //--- Try to match the colon - [:]
if ( *pMinutes == ':' ) { pMinutes++; //--- Try to match a number for the minutes - [00-09,10-59]
ulMinutes = my_wcstoul( pMinutes, &pAbbreviation ); if ( pMinutes != pAbbreviation && pAbbreviation - pMinutes == 2 ) { //--- Verify that this is the end of the string
if ( pAbbreviation == pEndOfItem ) { //--- May have gotten hours and minutes - validate values
if ( HOURMIN <= ulHours && ulHours <= HOURMAX && MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX ) { //--- A successful match has been made, but peek ahead in text for Time Abbreviation
if ( fMultiItem ) { pStartChar = pEndOfItem; hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true, &PreAbbreviationList ); if ( pStartChar && SUCCEEDED( hr ) ) { pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
while ( IsMiscPunctuation( *(pEndOfItem - 1) ) != eUNMATCHED || IsGroupEnding( *(pEndOfItem - 1) ) != eUNMATCHED || IsQuotationMark( *(pEndOfItem - 1) ) != eUNMATCHED || ( ( ItemType = IsEOSItem( *(pEndOfItem - 1) ) ) != eUNMATCHED && ( ItemType != ePERIOD || ( _wcsnicmp( pStartChar, L"am.", 3 ) == 0 && pStartChar + 3 == pEndOfItem ) || ( _wcsnicmp( pStartChar, L"pm.", 3 ) == 0 && pStartChar + 3 == pEndOfItem ) ) ) ) { pEndOfItem--; } pAbbreviation = (WCHAR*) pStartChar;
if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
TimeAbbreviation = AM; pAbbreviationXMLState = &pFrag->State; fAdvancePointers = true; } else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
TimeAbbreviation = PM; pAbbreviationXMLState = &pFrag->State; fAdvancePointers = true; } } } } else // hours or minutes were out of range
{ hr = E_INVALIDARG; } } //--- Check to see if the rest of the string is a time abbreviation - [TimeAbbreviation]
else if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- May have gotten hours and minutes and time abbreviation - validate values
if ( HOURMIN <= ulHours && ulHours <= HOURMAX && MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX ) { //--- A successful match has been made
TimeAbbreviation = AM; pAbbreviationXMLState = &pFrag->State; } else // hours or minutes were out of range
{ hr = E_INVALIDARG; } } //--- Check to see if the rest of the string is a time abbreviation - [TimeAbbreviation]
else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- May have gotten hours and minutes and time abbreviation - validate values
if ( HOURMIN <= ulHours && ulHours <= HOURMAX && MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX ) { //--- A successful match has been made
pAbbreviationXMLState = &pFrag->State; TimeAbbreviation = PM; } else // hours or minutes were out of range
{ hr = E_INVALIDARG; } } else // string ended in invalid characters
{ hr = E_INVALIDARG; } } // failed to match a valid minutes string
else { hr = E_INVALIDARG; } } // failed to match the colon, could be just hours and a time abbreviation
else if ( pMinutes < m_pEndOfCurrItem ) { pAbbreviation = pMinutes; pMinutes = NULL; //--- Check for TimeAbbreviation - [TimeAbbreviation]
if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- A successful match has been made - Hour AM
pAbbreviationXMLState = &pFrag->State; TimeAbbreviation = AM; } else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- A successful match has been made - Hour PM
pAbbreviationXMLState = &pFrag->State; TimeAbbreviation = PM; } else // failed to match a valid time abbreviation
{ hr = E_INVALIDARG; } } else if ( fMultiItem ) { //--- Set pMinutes to NULL, so we know later that we've got no minutes string...
pMinutes = NULL;
//--- Peek ahead in text for a time abbreviation
pStartChar = pEndOfItem; hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true, &PreAbbreviationList ); if ( !pStartChar && SUCCEEDED( hr ) ) { hr = E_INVALIDARG; } else if ( pStartChar && SUCCEEDED( hr ) ) { pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
while ( IsMiscPunctuation( *(pEndOfItem - 1) ) != eUNMATCHED || IsGroupEnding( *(pEndOfItem - 1) ) != eUNMATCHED || IsQuotationMark( *(pEndOfItem - 1) ) != eUNMATCHED || ( ( ItemType = IsEOSItem( *(pEndOfItem - 1) ) ) != eUNMATCHED && ItemType != ePERIOD ) ) { pEndOfItem--; } pAbbreviation = (WCHAR*) pStartChar;
if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
TimeAbbreviation = AM; pAbbreviationXMLState = &pFrag->State; fAdvancePointers = true; } else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 && pAbbreviation + 2 == pEndOfItem ) || ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 && pAbbreviation + 4 == pEndOfItem ) ) { //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
TimeAbbreviation = PM; pAbbreviationXMLState = &pFrag->State; fAdvancePointers = true; } //--- Failed to match a valid Time Abbreviation
else { hr = E_INVALIDARG; } } } else { hr = E_INVALIDARG; } } // failed to match a valid hours string
else { hr = E_INVALIDARG; }
//--- Successfully matched a Time Of Day! Now expand it and fill out pItemNormInfo
if ( SUCCEEDED( hr ) ) { NumberGroup Garbage; TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.eWordPartOfSpeech = MS_Unknown;
pItemNormInfo = (TTSTimeOfDayItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeOfDayItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { //--- Fill out known parts of pItemNormInfo
ZeroMemory( pItemNormInfo, sizeof(TTSTimeOfDayItemInfo) ); pItemNormInfo->Type = eTIMEOFDAY; ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fMinutes = pMinutes ? true : false; ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTimeAbbreviation = TimeAbbreviation != UNDEFINED ? true : false; ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTwentyFourHour = false; //--- Expand the hours
if ( !iswdigit( pHours[1] ) ) { ExpandDigit( pHours[0], Garbage, WordList ); } else { ExpandTwoDigits( pHours, Garbage, WordList ); }
//--- Expand the minutes
if ( pMinutes ) { //--- Special case: A bare o'clock - 1:00, 2:00, etc.
if ( wcsncmp( pMinutes, L"00", 2 ) == 0 ) { WCHAR *pGarbage; ULONG ulHours = my_wcstoul( pHours, &pGarbage ); //--- Under twelve is followed by "o'clock"
if ( ulHours <= 12 ) { Word.pWordText = g_OClock.pStr; Word.ulWordLen = g_OClock.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } //--- Over twelve is followed by "hundred hours"
else { ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTwentyFourHour = true;
Word.pWordText = g_hundred.pStr; Word.ulWordLen = g_hundred.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word );
Word.pWordText = g_hours.pStr; Word.ulWordLen = g_hours.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } //--- Special Case: Minutes less than 10 - 1:05, 2:06, etc.
else if ( pMinutes[0] == L'0' ) { Word.pWordText = g_O.pStr; Word.ulWordLen = g_O.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word );
ExpandDigit( pMinutes[1], Garbage, WordList ); } //--- Default Case
else { ExpandTwoDigits( pMinutes, Garbage, WordList ); } }
//--- Clean up Time XML State
SPLISTPOS WordListPos = WordList.GetHeadPosition(); while ( WordListPos ) { TTSWord& TempWord = WordList.GetNext( WordListPos ); TempWord.pXmlState = pTimeXMLState; }
//--- Insert Pre-Abbreviation XML States
while ( !PreAbbreviationList.IsEmpty() ) { WordList.AddTail( ( PreAbbreviationList.RemoveHead() ).Words[0] ); }
//--- Expand the Time Abbreviation
//--- AM
if ( TimeAbbreviation == AM ) { //--- Ensure the letters are pronounced as nouns...
SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr ); if ( SUCCEEDED( hr ) ) { memcpy( pNewState, pAbbreviationXMLState, sizeof( SPVSTATE ) ); pNewState->ePartOfSpeech = SPPS_Noun;
Word.pXmlState = pNewState; Word.pWordText = g_A.pStr; Word.ulWordLen = g_A.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word );
Word.pWordText = g_M.pStr; Word.ulWordLen = g_M.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } //--- PM
else if ( TimeAbbreviation == PM ) { //--- Ensure the letters are pronounced as nouns...
SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr ); if ( SUCCEEDED( hr ) ) { memcpy( pNewState, pAbbreviationXMLState, sizeof( SPVSTATE ) ); pNewState->ePartOfSpeech = SPPS_Noun;
Word.pXmlState = pAbbreviationXMLState; Word.pWordText = g_P.pStr; Word.ulWordLen = g_P.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word );
Word.pWordText = g_M.pStr; Word.ulWordLen = g_M.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } }
//--- Update pointers, if necessary
if ( fAdvancePointers ) { m_pCurrFrag = pFrag; m_pEndChar = pEndChar; m_pEndOfCurrItem = pEndOfItem; } } } } return hr; } /* IsTimeOfDay */
/***********************************************************************************************
* IsTime * *--------* * Description: * Checks the incoming Item's text to determine whether or not it * is a time. * * RegExp: * { d+ || d(1-3)[,ddd]+ }[:][00-09,10-59]{ [:][00-09,10-59] }? * * Types assigned: * TIME_HRMIN, TIME_MINSEC, TIME_HRMINSEC ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsTime( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager ) { SPDBG_FUNC( "TimeNorm IsTime" );
HRESULT hr = S_OK; WCHAR *pFirstChunk = NULL, *pSecondChunk = NULL, *pThirdChunk = NULL, *pLeftOver = NULL; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem; ULONG ulSecond = 0, ulThird = 0; TTSItemInfo *pFirstChunkInfo = NULL; BOOL fNegative = false;
pFirstChunk = (WCHAR*) m_pNextChar; //--- Try to match a number for the hours/minutes - { d+ }
if ( *pFirstChunk == L'-' ) { pFirstChunk++; fNegative = true; } while ( *pFirstChunk == L'0' ) { pFirstChunk++; } if ( *pFirstChunk == L':' ) { pFirstChunk--; } pSecondChunk = wcschr( pFirstChunk, L':' );
if ( pSecondChunk && pFirstChunk < pSecondChunk && pSecondChunk < m_pEndOfCurrItem - 1 ) { m_pNextChar = pFirstChunk; m_pEndOfCurrItem = pSecondChunk;
hr = IsNumberCategory( pFirstChunkInfo, L"NUMBER", MemoryManager );
m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem;
if ( SUCCEEDED( hr ) && ( pFirstChunkInfo->Type == eNUM_DECIMAL || pFirstChunkInfo->Type == eNUM_CARDINAL ) ) { if ( fNegative ) { ( (TTSNumberItemInfo*) pFirstChunkInfo )->fNegative = true; TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) ); Word.eWordPartOfSpeech = MS_Unknown; Word.pXmlState = &m_pCurrFrag->State; Word.pWordText = g_negative.pStr; Word.ulWordLen = g_negative.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; ( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList->AddHead( Word ); }
pSecondChunk++; //--- Try to match a number for the minutes/seconds - [00-09,10-59]
ulSecond = my_wcstoul( pSecondChunk, &pThirdChunk ); if ( pSecondChunk != pThirdChunk && pThirdChunk - pSecondChunk == 2 ) { //--- Verify that this is the end of the string
if ( pThirdChunk == m_pEndOfCurrItem ) { //--- May have gotten hours and minutes or minutes and seconds - validate values
if ( MINUTEMIN <= ulSecond && ulSecond <= MINUTEMAX ) { //--- A successful match has been made
//--- Default behavior here is to assume minutes and seconds
if ( Context == NULL || _wcsicmp( Context, L"TIME_MS" ) == 0 ) { //--- Successfully matched minutes and seconds.
pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) ); pItemNormInfo->Type = eTIME;
( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes = (TTSNumberItemInfo*) pFirstChunkInfo; if ( *pSecondChunk != L'0' ) { ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pSecondChunk; } else { ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pSecondChunk + 1; } } } //--- If context overrides, values represent hours and minutes
else if ( _wcsicmp( Context, L"TIME_HM" ) == 0 ) { //--- Successfully matched hours and pMinutes->
pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) ); pItemNormInfo->Type = eTIME; ( (TTSTimeItemInfo*) pItemNormInfo )->pHours = (TTSNumberItemInfo*) pFirstChunkInfo;
TTSItemInfo* pMinutesInfo;
//--- Don't want "zero zero..." behavior of numbers - strip off beginning zeroes
if ( *pSecondChunk == L'0' ) { pSecondChunk++; }
m_pNextChar = pSecondChunk; m_pEndOfCurrItem = pThirdChunk;
hr = IsNumber( pMinutesInfo, L"NUMBER", MemoryManager );
m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem;
if ( SUCCEEDED( hr ) ) { ( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes = (TTSNumberItemInfo*) pMinutesInfo; } } } else { hr = E_INVALIDARG; } } else // minutes or seconds were out of range
{ hr = E_INVALIDARG; } } //--- Check for seconds - TIME_HRMINS
else { //--- Try to match the colon
if ( *pThirdChunk == L':' ) { pThirdChunk++; //--- Try to match a number for the seconds - [00-09,10-59]
ulThird = my_wcstoul( pThirdChunk, &pLeftOver ); if ( pThirdChunk != pLeftOver && pLeftOver - pThirdChunk == 2 ) { //--- Verify that this is the end of the string
if ( pLeftOver == m_pEndOfCurrItem ) { //--- May have gotten hours minutes and seconds - validate values
if ( MINUTEMIN <= ulSecond && ulSecond <= MINUTEMAX && SECONDMIN <= ulThird && ulThird <= SECONDMAX ) { //--- Successfully matched hours, minutes, and seconds.
pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo), &hr ); if ( SUCCEEDED( hr ) ) { ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) ); pItemNormInfo->Type = eTIME; ( (TTSTimeItemInfo*) pItemNormInfo )->pHours = (TTSNumberItemInfo*) pFirstChunkInfo;
if ( SUCCEEDED( hr ) ) { TTSItemInfo* pMinutesInfo;
//--- Don't want "zero zero..." behavior of numbers - strip off beginning zeroes
if ( ulSecond != 0 ) { pSecondChunk += ( ( pThirdChunk - 1 ) - pSecondChunk ) - (ULONG)( log10( ulSecond ) + 1 ); } else { pSecondChunk = pThirdChunk - 2; }
m_pNextChar = pSecondChunk; m_pEndOfCurrItem = pThirdChunk - 1;
hr = IsNumber( pMinutesInfo, L"NUMBER", MemoryManager );
m_pNextChar = pTempNextChar; m_pEndOfCurrItem = pTempEndOfItem;
if ( SUCCEEDED( hr ) ) { ( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes = (TTSNumberItemInfo*) pMinutesInfo; if ( *pThirdChunk != L'0' ) { ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pThirdChunk; } else { ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pThirdChunk + 1; } } } } } else // minutes or seconds were out of range
{ hr = E_INVALIDARG; } } else // extra junk at end of string
{ hr = E_INVALIDARG; } } else // extra junk at end of string
{ hr = E_INVALIDARG; } } else // failed to match a colon
{ hr = E_INVALIDARG; } } } else // failed to match a second number
{ hr = E_INVALIDARG; } } else // failed to match a colon
{ hr = E_INVALIDARG; } } else // failed to match a first number
{ hr = E_INVALIDARG; }
if ( FAILED( hr ) ) { if ( pFirstChunkInfo ) { delete ( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList; } }
return hr; } /* IsTime */
/***********************************************************************************************
* ExpandTime * *------------* * Description: * Expands Items previously determined to be of type TIME_HRMINSEC by IsTime. * * NOTE: This function does not do parameter validation. Assumed to be done by caller. ********************************************************************* AH **********************/ HRESULT CStdSentEnum::ExpandTime( TTSTimeItemInfo* pItemInfo, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::ExpandTime" );
HRESULT hr = S_OK; TTSWord Word; ZeroMemory( &Word, sizeof(TTSWord) ); Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown;
//-------------------
// Expand the hours
//-------------------
if ( pItemInfo->pHours ) { //--- Expand Number
hr = ExpandNumber( pItemInfo->pHours, WordList );
//--- Insert "hour" or "hours"
if ( SUCCEEDED( hr ) ) { if ( pItemInfo->pHours->pEndChar - pItemInfo->pHours->pStartChar == 1 && pItemInfo->pHours->pStartChar[0] == L'1' ) { Word.pWordText = g_hour.pStr; Word.ulWordLen = g_hour.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else { Word.pWordText = g_hours.pStr; Word.ulWordLen = g_hours.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); }
}
//--- Insert "and"
if ( SUCCEEDED( hr ) && pItemInfo->pMinutes->pStartChar && !pItemInfo->pSeconds ) { Word.pWordText = g_And.pStr; Word.ulWordLen = g_And.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } } //---------------------
// Expand the minutes
//---------------------
if ( SUCCEEDED( hr ) && pItemInfo->pMinutes ) { //--- Expand Number
hr = ExpandNumber( pItemInfo->pMinutes, WordList );
//--- Insert "minutes"
if ( SUCCEEDED( hr ) ) { if ( pItemInfo->pMinutes->pEndChar - pItemInfo->pMinutes->pStartChar == 1 && pItemInfo->pMinutes->pStartChar[0] == L'1' ) { Word.pWordText = g_minute.pStr; Word.ulWordLen = g_minute.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else { Word.pWordText = g_minutes.pStr; Word.ulWordLen = g_minutes.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } }
//--- Insert "and"
if ( SUCCEEDED( hr ) && pItemInfo->pSeconds ) { Word.pWordText = g_And.pStr; Word.ulWordLen = g_And.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } }
//---------------------
// Expand the seconds
//---------------------
if ( SUCCEEDED( hr ) && pItemInfo->pSeconds ) { //--- Expand Number
NumberGroup Garbage; if ( iswdigit( pItemInfo->pSeconds[1] ) ) { ExpandTwoDigits( pItemInfo->pSeconds, Garbage, WordList ); } else { ExpandDigit( pItemInfo->pSeconds[0], Garbage, WordList ); }
//--- Insert "seconds"
if ( pItemInfo->pSeconds[0] == L'1' && !iswdigit( pItemInfo->pSeconds[1] ) ) { Word.pWordText = g_second.pStr; Word.ulWordLen = g_second.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } else { Word.pWordText = g_seconds.pStr; Word.ulWordLen = g_seconds.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; WordList.AddTail( Word ); } }
return hr; } /* ExpandTime */
/***********************************************************************************************
* IsTimeRange * *-------------* * Description: * Checks the incoming Item's text to determine whether or not it * is a time range. * * RegExp: * [TimeOfDay]-[TimeOfDay] * * Types assigned: * TIME_RANGE ********************************************************************* AH **********************/ HRESULT CStdSentEnum::IsTimeRange( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager, CWordList& WordList ) { SPDBG_FUNC( "CStdSentEnum::IsTimeRange" );
HRESULT hr = S_OK; CWordList TempWordList; TTSItemInfo *pFirstTimeInfo = NULL, *pSecondTimeInfo = NULL; const WCHAR *pHyphen = NULL; CItemList PreAbbreviationList; // Needed for SkipWhitespace function calls
BOOL fMultiItem = false; const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfCurrItem = m_pEndOfCurrItem; const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ ) { if ( *pHyphen == L'-' ) { break; } }
//--- Might be whitespace and time suffix before hyphen
if( pHyphen == m_pEndOfCurrItem ) { hr = SkipWhiteSpaceAndTags( pHyphen, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PreAbbreviationList ); if ( pHyphen && SUCCEEDED( hr ) ) { if ( ( _wcsnicmp( pHyphen, L"am", 2 ) == 0 && pHyphen[2] == L'-' ) || ( _wcsnicmp( pHyphen, L"pm", 2 ) == 0 && pHyphen[2] == L'-' ) ) { pHyphen += 2; *( (WCHAR*) pHyphen ) = ' '; fMultiItem = true; } else if ( ( _wcsnicmp( pHyphen, L"a.m.", 4 ) == 0 && pHyphen[4] == L'-' ) || ( _wcsnicmp( pHyphen, L"p.m.", 4 ) == 0 && pHyphen[4] == L'-' ) ) { pHyphen +=4; *( (WCHAR*) pHyphen ) = ' '; fMultiItem = true; } else { hr = E_INVALIDARG; } } else { hr = E_INVALIDARG; } }
if ( SUCCEEDED( hr ) ) { //--- Position m_pEndOfCurrItem so it is at the end of the first token, or at the hyphen,
//--- whichever comes first (this is necessary for IsTimeOfDay to work).
if( ( m_pNextChar < pHyphen ) && ( pHyphen < m_pEndOfCurrItem ) ) { m_pEndOfCurrItem = pHyphen; }
//--- Check for time of day
hr = IsTimeOfDay( pFirstTimeInfo, MemoryManager, TempWordList, fMultiItem );
//--- Check for just a number (hour)
if ( hr == E_INVALIDARG && ( pHyphen <= m_pNextChar + 2 ) ) { WCHAR *pTemp = NULL; int ulHours = my_wcstoul( m_pNextChar, &pTemp );
if ( pTemp == pHyphen && HOURMIN <= ulHours && ulHours <= HOURMAX ) { NumberGroup Garbage; if ( pTemp - m_pNextChar == 1 ) { ExpandDigit( m_pNextChar[0], Garbage, TempWordList ); } else { ExpandTwoDigits( m_pNextChar, Garbage, TempWordList ); } hr = S_OK; } }
if ( SUCCEEDED( hr ) ) { //--- Insert "to"
TTSWord Word; ZeroMemory( &Word, sizeof( TTSWord ) );
Word.pXmlState = &m_pCurrFrag->State; Word.eWordPartOfSpeech = MS_Unknown; Word.pWordText = g_to.pStr; Word.ulWordLen = g_to.Len; Word.pLemma = Word.pWordText; Word.ulLemmaLen = Word.ulWordLen; TempWordList.AddTail( Word );
m_pNextChar = pHyphen + 1; m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
//---Move m_pEndOfCurrItem back from any punctuation. ("4:30-5:30.")
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED || IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ) { m_pEndOfCurrItem--; }
hr = IsTimeOfDay( pSecondTimeInfo, MemoryManager, TempWordList );
if ( SUCCEEDED( hr ) ) { //--- Matched a time range!
m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar;
pItemNormInfo = (TTSTimeRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSTimeRangeItemInfo ), &hr ); if ( SUCCEEDED( hr ) ) { pItemNormInfo->Type = eTIME_RANGE; ( (TTSTimeRangeItemInfo*) pItemNormInfo )->pFirstTimeInfo = (TTSTimeOfDayItemInfo*) pFirstTimeInfo; ( (TTSTimeRangeItemInfo*) pItemNormInfo )->pSecondTimeInfo = (TTSTimeOfDayItemInfo*) pSecondTimeInfo; //--- Copy temp word list to real word list if everything has succeeded...
WordList.AddTail( &TempWordList ); } } } }
if ( !SUCCEEDED( hr ) ) { m_pNextChar = pTempNextChar; m_pEndChar = pTempEndChar; m_pEndOfCurrItem = pTempEndOfCurrItem; m_pCurrFrag = pTempFrag; if ( fMultiItem ) { *( (WCHAR*) pHyphen ) = L'-'; } }
return hr; } /* IsTimeRange */ //-----------End Of File-------------------------------------------------------------------
|