You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1023 lines
43 KiB
1023 lines
43 KiB
/***********************************************************************************************
|
|
* TimeNorm.cpp *
|
|
*-------------*
|
|
* Description:
|
|
* These functions normalize times of day and time measurements.
|
|
*-----------------------------------------------------------------------------------------------
|
|
* Created by AH August 3, 1999
|
|
* Copyright (C) 1999 Microsoft Corporation
|
|
* All Rights Reserved
|
|
*
|
|
***********************************************************************************************/
|
|
|
|
#include "stdafx.h"
|
|
|
|
#ifndef StdSentEnum_h
|
|
#include "stdsentenum.h"
|
|
#endif
|
|
|
|
#pragma warning (disable : 4296)
|
|
|
|
/***********************************************************************************************
|
|
* IsTimeOfDay *
|
|
*-------------*
|
|
* Description:
|
|
* Checks the incoming Item's text to determine whether or not it
|
|
* is a time of day.
|
|
*
|
|
* RegExp:
|
|
* [01-09,1-12][:][00-09,10-59][TimeAbbreviation]?
|
|
*
|
|
* Types assigned:
|
|
* TIMEOFDAY
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IsTimeOfDay( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
|
|
CWordList& WordList, BOOL fMultiItem )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IsTimeOfDay" );
|
|
|
|
HRESULT hr = S_OK;
|
|
const WCHAR *pStartChar = m_pNextChar, *pEndOfItem = m_pEndOfCurrItem, *pEndChar = m_pEndChar;
|
|
const SPVTEXTFRAG* pFrag = m_pCurrFrag;
|
|
const SPVSTATE *pTimeXMLState = &pFrag->State, *pAbbreviationXMLState = NULL;
|
|
CItemList PreAbbreviationList;
|
|
BOOL fAdvancePointers = false;
|
|
WCHAR *pHours = NULL, *pMinutes = NULL, *pAbbreviation = NULL;
|
|
ULONG ulHours = 0, ulMinutes = 0;
|
|
TIMEABBREVIATION TimeAbbreviation = UNDEFINED;
|
|
TTSItemType ItemType = eUNMATCHED;
|
|
|
|
//--- Max length of a string matching this regexp is 9 character
|
|
if ( pEndOfItem - pStartChar > 9 )
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else
|
|
{
|
|
pHours = (WCHAR*) pStartChar;
|
|
|
|
//--- Try to match a number for the hour of day - [01-09,1-12]
|
|
ulHours = my_wcstoul( pHours, &pMinutes );
|
|
if ( pHours != pMinutes &&
|
|
pMinutes - pHours <= 2 )
|
|
{
|
|
//--- Try to match the colon - [:]
|
|
if ( *pMinutes == ':' )
|
|
{
|
|
pMinutes++;
|
|
//--- Try to match a number for the minutes - [00-09,10-59]
|
|
ulMinutes = my_wcstoul( pMinutes, &pAbbreviation );
|
|
if ( pMinutes != pAbbreviation &&
|
|
pAbbreviation - pMinutes == 2 )
|
|
{
|
|
//--- Verify that this is the end of the string
|
|
if ( pAbbreviation == pEndOfItem )
|
|
{
|
|
//--- May have gotten hours and minutes - validate values
|
|
if ( HOURMIN <= ulHours && ulHours <= HOURMAX &&
|
|
MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX )
|
|
{
|
|
//--- A successful match has been made, but peek ahead in text for Time Abbreviation
|
|
if ( fMultiItem )
|
|
{
|
|
pStartChar = pEndOfItem;
|
|
hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager,
|
|
true, &PreAbbreviationList );
|
|
if ( pStartChar &&
|
|
SUCCEEDED( hr ) )
|
|
{
|
|
pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
|
|
|
|
while ( IsMiscPunctuation( *(pEndOfItem - 1) ) != eUNMATCHED ||
|
|
IsGroupEnding( *(pEndOfItem - 1) ) != eUNMATCHED ||
|
|
IsQuotationMark( *(pEndOfItem - 1) ) != eUNMATCHED ||
|
|
( ( ItemType = IsEOSItem( *(pEndOfItem - 1) ) ) != eUNMATCHED &&
|
|
( ItemType != ePERIOD ||
|
|
( _wcsnicmp( pStartChar, L"am.", 3 ) == 0 &&
|
|
pStartChar + 3 == pEndOfItem ) ||
|
|
( _wcsnicmp( pStartChar, L"pm.", 3 ) == 0 &&
|
|
pStartChar + 3 == pEndOfItem ) ) ) )
|
|
{
|
|
pEndOfItem--;
|
|
}
|
|
pAbbreviation = (WCHAR*) pStartChar;
|
|
|
|
if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
|
|
TimeAbbreviation = AM;
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
fAdvancePointers = true;
|
|
}
|
|
else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
|
|
TimeAbbreviation = PM;
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
fAdvancePointers = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else // hours or minutes were out of range
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
//--- Check to see if the rest of the string is a time abbreviation - [TimeAbbreviation]
|
|
else if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- May have gotten hours and minutes and time abbreviation - validate values
|
|
if ( HOURMIN <= ulHours && ulHours <= HOURMAX &&
|
|
MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX )
|
|
{
|
|
//--- A successful match has been made
|
|
TimeAbbreviation = AM;
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
}
|
|
else // hours or minutes were out of range
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
//--- Check to see if the rest of the string is a time abbreviation - [TimeAbbreviation]
|
|
else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- May have gotten hours and minutes and time abbreviation - validate values
|
|
if ( HOURMIN <= ulHours && ulHours <= HOURMAX &&
|
|
MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX )
|
|
{
|
|
//--- A successful match has been made
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
TimeAbbreviation = PM;
|
|
}
|
|
else // hours or minutes were out of range
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // string ended in invalid characters
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
} // failed to match a valid minutes string
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
} // failed to match the colon, could be just hours and a time abbreviation
|
|
else if ( pMinutes < m_pEndOfCurrItem )
|
|
{
|
|
pAbbreviation = pMinutes;
|
|
pMinutes = NULL;
|
|
|
|
|
|
//--- Check for TimeAbbreviation - [TimeAbbreviation]
|
|
if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- A successful match has been made - Hour AM
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
TimeAbbreviation = AM;
|
|
}
|
|
else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- A successful match has been made - Hour PM
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
TimeAbbreviation = PM;
|
|
}
|
|
else // failed to match a valid time abbreviation
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else if ( fMultiItem )
|
|
{
|
|
//--- Set pMinutes to NULL, so we know later that we've got no minutes string...
|
|
pMinutes = NULL;
|
|
|
|
//--- Peek ahead in text for a time abbreviation
|
|
pStartChar = pEndOfItem;
|
|
hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager,
|
|
true, &PreAbbreviationList );
|
|
if ( !pStartChar &&
|
|
SUCCEEDED( hr ) )
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else if ( pStartChar &&
|
|
SUCCEEDED( hr ) )
|
|
{
|
|
pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
|
|
|
|
while ( IsMiscPunctuation( *(pEndOfItem - 1) ) != eUNMATCHED ||
|
|
IsGroupEnding( *(pEndOfItem - 1) ) != eUNMATCHED ||
|
|
IsQuotationMark( *(pEndOfItem - 1) ) != eUNMATCHED ||
|
|
( ( ItemType = IsEOSItem( *(pEndOfItem - 1) ) ) != eUNMATCHED &&
|
|
ItemType != ePERIOD ) )
|
|
{
|
|
pEndOfItem--;
|
|
}
|
|
pAbbreviation = (WCHAR*) pStartChar;
|
|
|
|
if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
|
|
TimeAbbreviation = AM;
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
fAdvancePointers = true;
|
|
}
|
|
else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
|
|
pAbbreviation + 2 == pEndOfItem ) ||
|
|
( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
|
|
pAbbreviation + 4 == pEndOfItem ) )
|
|
{
|
|
//--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
|
|
TimeAbbreviation = PM;
|
|
pAbbreviationXMLState = &pFrag->State;
|
|
fAdvancePointers = true;
|
|
}
|
|
//--- Failed to match a valid Time Abbreviation
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
} // failed to match a valid hours string
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
//--- Successfully matched a Time Of Day! Now expand it and fill out pItemNormInfo
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
NumberGroup Garbage;
|
|
TTSWord Word;
|
|
ZeroMemory( &Word, sizeof(TTSWord) );
|
|
Word.eWordPartOfSpeech = MS_Unknown;
|
|
|
|
pItemNormInfo = (TTSTimeOfDayItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeOfDayItemInfo), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Fill out known parts of pItemNormInfo
|
|
ZeroMemory( pItemNormInfo, sizeof(TTSTimeOfDayItemInfo) );
|
|
pItemNormInfo->Type = eTIMEOFDAY;
|
|
( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fMinutes = pMinutes ? true : false;
|
|
( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTimeAbbreviation = TimeAbbreviation != UNDEFINED ? true : false;
|
|
( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTwentyFourHour = false;
|
|
|
|
//--- Expand the hours
|
|
if ( !iswdigit( pHours[1] ) )
|
|
{
|
|
ExpandDigit( pHours[0], Garbage, WordList );
|
|
}
|
|
else
|
|
{
|
|
ExpandTwoDigits( pHours, Garbage, WordList );
|
|
}
|
|
|
|
//--- Expand the minutes
|
|
if ( pMinutes )
|
|
{
|
|
//--- Special case: A bare o'clock - 1:00, 2:00, etc.
|
|
if ( wcsncmp( pMinutes, L"00", 2 ) == 0 )
|
|
{
|
|
WCHAR *pGarbage;
|
|
ULONG ulHours = my_wcstoul( pHours, &pGarbage );
|
|
//--- Under twelve is followed by "o'clock"
|
|
if ( ulHours <= 12 )
|
|
{
|
|
Word.pWordText = g_OClock.pStr;
|
|
Word.ulWordLen = g_OClock.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
//--- Over twelve is followed by "hundred hours"
|
|
else
|
|
{
|
|
( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTwentyFourHour = true;
|
|
|
|
Word.pWordText = g_hundred.pStr;
|
|
Word.ulWordLen = g_hundred.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
|
|
Word.pWordText = g_hours.pStr;
|
|
Word.ulWordLen = g_hours.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
//--- Special Case: Minutes less than 10 - 1:05, 2:06, etc.
|
|
else if ( pMinutes[0] == L'0' )
|
|
{
|
|
Word.pWordText = g_O.pStr;
|
|
Word.ulWordLen = g_O.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
|
|
ExpandDigit( pMinutes[1], Garbage, WordList );
|
|
}
|
|
//--- Default Case
|
|
else
|
|
{
|
|
ExpandTwoDigits( pMinutes, Garbage, WordList );
|
|
}
|
|
}
|
|
|
|
//--- Clean up Time XML State
|
|
SPLISTPOS WordListPos = WordList.GetHeadPosition();
|
|
while ( WordListPos )
|
|
{
|
|
TTSWord& TempWord = WordList.GetNext( WordListPos );
|
|
TempWord.pXmlState = pTimeXMLState;
|
|
}
|
|
|
|
//--- Insert Pre-Abbreviation XML States
|
|
while ( !PreAbbreviationList.IsEmpty() )
|
|
{
|
|
WordList.AddTail( ( PreAbbreviationList.RemoveHead() ).Words[0] );
|
|
}
|
|
|
|
//--- Expand the Time Abbreviation
|
|
//--- AM
|
|
if ( TimeAbbreviation == AM )
|
|
{
|
|
//--- Ensure the letters are pronounced as nouns...
|
|
SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
memcpy( pNewState, pAbbreviationXMLState, sizeof( SPVSTATE ) );
|
|
pNewState->ePartOfSpeech = SPPS_Noun;
|
|
|
|
Word.pXmlState = pNewState;
|
|
Word.pWordText = g_A.pStr;
|
|
Word.ulWordLen = g_A.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
|
|
Word.pWordText = g_M.pStr;
|
|
Word.ulWordLen = g_M.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
//--- PM
|
|
else if ( TimeAbbreviation == PM )
|
|
{
|
|
//--- Ensure the letters are pronounced as nouns...
|
|
SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
memcpy( pNewState, pAbbreviationXMLState, sizeof( SPVSTATE ) );
|
|
pNewState->ePartOfSpeech = SPPS_Noun;
|
|
|
|
Word.pXmlState = pAbbreviationXMLState;
|
|
Word.pWordText = g_P.pStr;
|
|
Word.ulWordLen = g_P.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
|
|
Word.pWordText = g_M.pStr;
|
|
Word.ulWordLen = g_M.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
|
|
//--- Update pointers, if necessary
|
|
if ( fAdvancePointers )
|
|
{
|
|
m_pCurrFrag = pFrag;
|
|
m_pEndChar = pEndChar;
|
|
m_pEndOfCurrItem = pEndOfItem;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return hr;
|
|
} /* IsTimeOfDay */
|
|
|
|
/***********************************************************************************************
|
|
* IsTime *
|
|
*--------*
|
|
* Description:
|
|
* Checks the incoming Item's text to determine whether or not it
|
|
* is a time.
|
|
*
|
|
* RegExp:
|
|
* { d+ || d(1-3)[,ddd]+ }[:][00-09,10-59]{ [:][00-09,10-59] }?
|
|
*
|
|
* Types assigned:
|
|
* TIME_HRMIN, TIME_MINSEC, TIME_HRMINSEC
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IsTime( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager )
|
|
{
|
|
SPDBG_FUNC( "TimeNorm IsTime" );
|
|
|
|
HRESULT hr = S_OK;
|
|
WCHAR *pFirstChunk = NULL, *pSecondChunk = NULL, *pThirdChunk = NULL, *pLeftOver = NULL;
|
|
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
|
|
ULONG ulSecond = 0, ulThird = 0;
|
|
TTSItemInfo *pFirstChunkInfo = NULL;
|
|
BOOL fNegative = false;
|
|
|
|
pFirstChunk = (WCHAR*) m_pNextChar;
|
|
|
|
//--- Try to match a number for the hours/minutes - { d+ }
|
|
if ( *pFirstChunk == L'-' )
|
|
{
|
|
pFirstChunk++;
|
|
fNegative = true;
|
|
}
|
|
while ( *pFirstChunk == L'0' )
|
|
{
|
|
pFirstChunk++;
|
|
}
|
|
if ( *pFirstChunk == L':' )
|
|
{
|
|
pFirstChunk--;
|
|
}
|
|
pSecondChunk = wcschr( pFirstChunk, L':' );
|
|
|
|
if ( pSecondChunk &&
|
|
pFirstChunk < pSecondChunk &&
|
|
pSecondChunk < m_pEndOfCurrItem - 1 )
|
|
{
|
|
m_pNextChar = pFirstChunk;
|
|
m_pEndOfCurrItem = pSecondChunk;
|
|
|
|
hr = IsNumberCategory( pFirstChunkInfo, L"NUMBER", MemoryManager );
|
|
|
|
m_pNextChar = pTempNextChar;
|
|
m_pEndOfCurrItem = pTempEndOfItem;
|
|
|
|
if ( SUCCEEDED( hr ) &&
|
|
( pFirstChunkInfo->Type == eNUM_DECIMAL ||
|
|
pFirstChunkInfo->Type == eNUM_CARDINAL ) )
|
|
{
|
|
if ( fNegative )
|
|
{
|
|
( (TTSNumberItemInfo*) pFirstChunkInfo )->fNegative = true;
|
|
TTSWord Word;
|
|
ZeroMemory( &Word, sizeof( TTSWord ) );
|
|
Word.eWordPartOfSpeech = MS_Unknown;
|
|
Word.pXmlState = &m_pCurrFrag->State;
|
|
Word.pWordText = g_negative.pStr;
|
|
Word.ulWordLen = g_negative.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList->AddHead( Word );
|
|
}
|
|
|
|
pSecondChunk++;
|
|
//--- Try to match a number for the minutes/seconds - [00-09,10-59]
|
|
ulSecond = my_wcstoul( pSecondChunk, &pThirdChunk );
|
|
if ( pSecondChunk != pThirdChunk &&
|
|
pThirdChunk - pSecondChunk == 2 )
|
|
{
|
|
//--- Verify that this is the end of the string
|
|
if ( pThirdChunk == m_pEndOfCurrItem )
|
|
{
|
|
//--- May have gotten hours and minutes or minutes and seconds - validate values
|
|
if ( MINUTEMIN <= ulSecond && ulSecond <= MINUTEMAX )
|
|
{
|
|
//--- A successful match has been made
|
|
//--- Default behavior here is to assume minutes and seconds
|
|
if ( Context == NULL ||
|
|
_wcsicmp( Context, L"TIME_MS" ) == 0 )
|
|
{
|
|
//--- Successfully matched minutes and seconds.
|
|
pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo),
|
|
&hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) );
|
|
pItemNormInfo->Type = eTIME;
|
|
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes =
|
|
(TTSNumberItemInfo*) pFirstChunkInfo;
|
|
if ( *pSecondChunk != L'0' )
|
|
{
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pSecondChunk;
|
|
}
|
|
else
|
|
{
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pSecondChunk + 1;
|
|
}
|
|
}
|
|
}
|
|
//--- If context overrides, values represent hours and minutes
|
|
else if ( _wcsicmp( Context, L"TIME_HM" ) == 0 )
|
|
{
|
|
//--- Successfully matched hours and pMinutes->
|
|
pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo),
|
|
&hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) );
|
|
pItemNormInfo->Type = eTIME;
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pHours =
|
|
(TTSNumberItemInfo*) pFirstChunkInfo;
|
|
|
|
TTSItemInfo* pMinutesInfo;
|
|
|
|
//--- Don't want "zero zero..." behavior of numbers - strip off beginning zeroes
|
|
if ( *pSecondChunk == L'0' )
|
|
{
|
|
pSecondChunk++;
|
|
}
|
|
|
|
m_pNextChar = pSecondChunk;
|
|
m_pEndOfCurrItem = pThirdChunk;
|
|
|
|
hr = IsNumber( pMinutesInfo, L"NUMBER", MemoryManager );
|
|
|
|
m_pNextChar = pTempNextChar;
|
|
m_pEndOfCurrItem = pTempEndOfItem;
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes = (TTSNumberItemInfo*) pMinutesInfo;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // minutes or seconds were out of range
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
//--- Check for seconds - TIME_HRMINS
|
|
else
|
|
{
|
|
//--- Try to match the colon
|
|
if ( *pThirdChunk == L':' )
|
|
{
|
|
pThirdChunk++;
|
|
//--- Try to match a number for the seconds - [00-09,10-59]
|
|
ulThird = my_wcstoul( pThirdChunk, &pLeftOver );
|
|
if ( pThirdChunk != pLeftOver &&
|
|
pLeftOver - pThirdChunk == 2 )
|
|
{
|
|
//--- Verify that this is the end of the string
|
|
if ( pLeftOver == m_pEndOfCurrItem )
|
|
{
|
|
//--- May have gotten hours minutes and seconds - validate values
|
|
if ( MINUTEMIN <= ulSecond && ulSecond <= MINUTEMAX &&
|
|
SECONDMIN <= ulThird && ulThird <= SECONDMAX )
|
|
{
|
|
//--- Successfully matched hours, minutes, and seconds.
|
|
pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo),
|
|
&hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) );
|
|
pItemNormInfo->Type = eTIME;
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pHours =
|
|
(TTSNumberItemInfo*) pFirstChunkInfo;
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
TTSItemInfo* pMinutesInfo;
|
|
|
|
//--- Don't want "zero zero..." behavior of numbers - strip off beginning zeroes
|
|
if ( ulSecond != 0 )
|
|
{
|
|
pSecondChunk += ( ( pThirdChunk - 1 ) - pSecondChunk ) -
|
|
(ULONG)( log10( ulSecond ) + 1 );
|
|
}
|
|
else
|
|
{
|
|
pSecondChunk = pThirdChunk - 2;
|
|
}
|
|
|
|
m_pNextChar = pSecondChunk;
|
|
m_pEndOfCurrItem = pThirdChunk - 1;
|
|
|
|
hr = IsNumber( pMinutesInfo, L"NUMBER", MemoryManager );
|
|
|
|
m_pNextChar = pTempNextChar;
|
|
m_pEndOfCurrItem = pTempEndOfItem;
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes =
|
|
(TTSNumberItemInfo*) pMinutesInfo;
|
|
if ( *pThirdChunk != L'0' )
|
|
{
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pThirdChunk;
|
|
}
|
|
else
|
|
{
|
|
( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pThirdChunk + 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else // minutes or seconds were out of range
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // extra junk at end of string
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // extra junk at end of string
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // failed to match a colon
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
}
|
|
else // failed to match a second number
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // failed to match a colon
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else // failed to match a first number
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
|
|
if ( FAILED( hr ) )
|
|
{
|
|
if ( pFirstChunkInfo )
|
|
{
|
|
delete ( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* IsTime */
|
|
|
|
/***********************************************************************************************
|
|
* ExpandTime *
|
|
*------------*
|
|
* Description:
|
|
* Expands Items previously determined to be of type TIME_HRMINSEC by IsTime.
|
|
*
|
|
* NOTE: This function does not do parameter validation. Assumed to be done by caller.
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::ExpandTime( TTSTimeItemInfo* pItemInfo, CWordList& WordList )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::ExpandTime" );
|
|
|
|
HRESULT hr = S_OK;
|
|
TTSWord Word;
|
|
ZeroMemory( &Word, sizeof(TTSWord) );
|
|
Word.pXmlState = &m_pCurrFrag->State;
|
|
Word.eWordPartOfSpeech = MS_Unknown;
|
|
|
|
//-------------------
|
|
// Expand the hours
|
|
//-------------------
|
|
|
|
if ( pItemInfo->pHours )
|
|
{
|
|
//--- Expand Number
|
|
hr = ExpandNumber( pItemInfo->pHours, WordList );
|
|
|
|
//--- Insert "hour" or "hours"
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if ( pItemInfo->pHours->pEndChar - pItemInfo->pHours->pStartChar == 1 &&
|
|
pItemInfo->pHours->pStartChar[0] == L'1' )
|
|
{
|
|
Word.pWordText = g_hour.pStr;
|
|
Word.ulWordLen = g_hour.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
else
|
|
{
|
|
Word.pWordText = g_hours.pStr;
|
|
Word.ulWordLen = g_hours.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
|
|
}
|
|
|
|
//--- Insert "and"
|
|
if ( SUCCEEDED( hr ) &&
|
|
pItemInfo->pMinutes->pStartChar &&
|
|
!pItemInfo->pSeconds )
|
|
{
|
|
Word.pWordText = g_And.pStr;
|
|
Word.ulWordLen = g_And.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
|
|
//---------------------
|
|
// Expand the minutes
|
|
//---------------------
|
|
|
|
if ( SUCCEEDED( hr ) &&
|
|
pItemInfo->pMinutes )
|
|
{
|
|
//--- Expand Number
|
|
hr = ExpandNumber( pItemInfo->pMinutes, WordList );
|
|
|
|
//--- Insert "minutes"
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
if ( pItemInfo->pMinutes->pEndChar - pItemInfo->pMinutes->pStartChar == 1 &&
|
|
pItemInfo->pMinutes->pStartChar[0] == L'1' )
|
|
{
|
|
Word.pWordText = g_minute.pStr;
|
|
Word.ulWordLen = g_minute.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
else
|
|
{
|
|
Word.pWordText = g_minutes.pStr;
|
|
Word.ulWordLen = g_minutes.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
|
|
//--- Insert "and"
|
|
if ( SUCCEEDED( hr ) &&
|
|
pItemInfo->pSeconds )
|
|
{
|
|
Word.pWordText = g_And.pStr;
|
|
Word.ulWordLen = g_And.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
|
|
//---------------------
|
|
// Expand the seconds
|
|
//---------------------
|
|
|
|
if ( SUCCEEDED( hr ) &&
|
|
pItemInfo->pSeconds )
|
|
{
|
|
//--- Expand Number
|
|
NumberGroup Garbage;
|
|
if ( iswdigit( pItemInfo->pSeconds[1] ) )
|
|
{
|
|
ExpandTwoDigits( pItemInfo->pSeconds, Garbage, WordList );
|
|
}
|
|
else
|
|
{
|
|
ExpandDigit( pItemInfo->pSeconds[0], Garbage, WordList );
|
|
}
|
|
|
|
//--- Insert "seconds"
|
|
if ( pItemInfo->pSeconds[0] == L'1' &&
|
|
!iswdigit( pItemInfo->pSeconds[1] ) )
|
|
{
|
|
Word.pWordText = g_second.pStr;
|
|
Word.ulWordLen = g_second.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
else
|
|
{
|
|
Word.pWordText = g_seconds.pStr;
|
|
Word.ulWordLen = g_seconds.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
WordList.AddTail( Word );
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* ExpandTime */
|
|
|
|
/***********************************************************************************************
|
|
* IsTimeRange *
|
|
*-------------*
|
|
* Description:
|
|
* Checks the incoming Item's text to determine whether or not it
|
|
* is a time range.
|
|
*
|
|
* RegExp:
|
|
* [TimeOfDay]-[TimeOfDay]
|
|
*
|
|
* Types assigned:
|
|
* TIME_RANGE
|
|
********************************************************************* AH **********************/
|
|
HRESULT CStdSentEnum::IsTimeRange( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
|
|
CWordList& WordList )
|
|
{
|
|
SPDBG_FUNC( "CStdSentEnum::IsTimeRange" );
|
|
|
|
HRESULT hr = S_OK;
|
|
CWordList TempWordList;
|
|
TTSItemInfo *pFirstTimeInfo = NULL, *pSecondTimeInfo = NULL;
|
|
const WCHAR *pHyphen = NULL;
|
|
CItemList PreAbbreviationList; // Needed for SkipWhitespace function calls
|
|
BOOL fMultiItem = false;
|
|
|
|
const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfCurrItem = m_pEndOfCurrItem;
|
|
const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
|
|
|
|
for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ )
|
|
{
|
|
if ( *pHyphen == L'-' )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
//--- Might be whitespace and time suffix before hyphen
|
|
if( pHyphen == m_pEndOfCurrItem )
|
|
{
|
|
hr = SkipWhiteSpaceAndTags( pHyphen, m_pEndChar, m_pCurrFrag, MemoryManager,
|
|
true, &PreAbbreviationList );
|
|
if ( pHyphen && SUCCEEDED( hr ) )
|
|
{
|
|
if ( ( _wcsnicmp( pHyphen, L"am", 2 ) == 0 &&
|
|
pHyphen[2] == L'-' ) ||
|
|
( _wcsnicmp( pHyphen, L"pm", 2 ) == 0 &&
|
|
pHyphen[2] == L'-' ) )
|
|
{
|
|
pHyphen += 2;
|
|
*( (WCHAR*) pHyphen ) = ' ';
|
|
fMultiItem = true;
|
|
}
|
|
else if ( ( _wcsnicmp( pHyphen, L"a.m.", 4 ) == 0 &&
|
|
pHyphen[4] == L'-' ) ||
|
|
( _wcsnicmp( pHyphen, L"p.m.", 4 ) == 0 &&
|
|
pHyphen[4] == L'-' ) )
|
|
{
|
|
pHyphen +=4;
|
|
*( (WCHAR*) pHyphen ) = ' ';
|
|
fMultiItem = true;
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Position m_pEndOfCurrItem so it is at the end of the first token, or at the hyphen,
|
|
//--- whichever comes first (this is necessary for IsTimeOfDay to work).
|
|
if( ( m_pNextChar < pHyphen ) && ( pHyphen < m_pEndOfCurrItem ) )
|
|
{
|
|
m_pEndOfCurrItem = pHyphen;
|
|
}
|
|
|
|
//--- Check for time of day
|
|
hr = IsTimeOfDay( pFirstTimeInfo, MemoryManager, TempWordList, fMultiItem );
|
|
|
|
//--- Check for just a number (hour)
|
|
if ( hr == E_INVALIDARG && ( pHyphen <= m_pNextChar + 2 ) )
|
|
{
|
|
WCHAR *pTemp = NULL;
|
|
int ulHours = my_wcstoul( m_pNextChar, &pTemp );
|
|
|
|
if ( pTemp == pHyphen &&
|
|
HOURMIN <= ulHours &&
|
|
ulHours <= HOURMAX )
|
|
{
|
|
NumberGroup Garbage;
|
|
if ( pTemp - m_pNextChar == 1 )
|
|
{
|
|
ExpandDigit( m_pNextChar[0], Garbage, TempWordList );
|
|
}
|
|
else
|
|
{
|
|
ExpandTwoDigits( m_pNextChar, Garbage, TempWordList );
|
|
}
|
|
hr = S_OK;
|
|
}
|
|
}
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Insert "to"
|
|
TTSWord Word;
|
|
ZeroMemory( &Word, sizeof( TTSWord ) );
|
|
|
|
Word.pXmlState = &m_pCurrFrag->State;
|
|
Word.eWordPartOfSpeech = MS_Unknown;
|
|
Word.pWordText = g_to.pStr;
|
|
Word.ulWordLen = g_to.Len;
|
|
Word.pLemma = Word.pWordText;
|
|
Word.ulLemmaLen = Word.ulWordLen;
|
|
TempWordList.AddTail( Word );
|
|
|
|
m_pNextChar = pHyphen + 1;
|
|
m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
|
|
|
|
//---Move m_pEndOfCurrItem back from any punctuation. ("4:30-5:30.")
|
|
while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
|
|
IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
|
|
IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
|
|
IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
|
|
{
|
|
m_pEndOfCurrItem--;
|
|
}
|
|
|
|
hr = IsTimeOfDay( pSecondTimeInfo, MemoryManager, TempWordList );
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
//--- Matched a time range!
|
|
m_pNextChar = pTempNextChar;
|
|
m_pEndChar = pTempEndChar;
|
|
|
|
pItemNormInfo =
|
|
(TTSTimeRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSTimeRangeItemInfo ), &hr );
|
|
if ( SUCCEEDED( hr ) )
|
|
{
|
|
pItemNormInfo->Type = eTIME_RANGE;
|
|
( (TTSTimeRangeItemInfo*) pItemNormInfo )->pFirstTimeInfo =
|
|
(TTSTimeOfDayItemInfo*) pFirstTimeInfo;
|
|
( (TTSTimeRangeItemInfo*) pItemNormInfo )->pSecondTimeInfo =
|
|
(TTSTimeOfDayItemInfo*) pSecondTimeInfo;
|
|
//--- Copy temp word list to real word list if everything has succeeded...
|
|
WordList.AddTail( &TempWordList );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( !SUCCEEDED( hr ) )
|
|
{
|
|
m_pNextChar = pTempNextChar;
|
|
m_pEndChar = pTempEndChar;
|
|
m_pEndOfCurrItem = pTempEndOfCurrItem;
|
|
m_pCurrFrag = pTempFrag;
|
|
if ( fMultiItem )
|
|
{
|
|
*( (WCHAR*) pHyphen ) = L'-';
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
} /* IsTimeRange */
|
|
//-----------End Of File-------------------------------------------------------------------
|