Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1022 lines
43 KiB

  1. /***********************************************************************************************
  2. * TimeNorm.cpp *
  3. *-------------*
  4. * Description:
  5. * These functions normalize times of day and time measurements.
  6. *-----------------------------------------------------------------------------------------------
  7. * Created by AH August 3, 1999
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. ***********************************************************************************************/
  12. #include "stdafx.h"
  13. #ifndef StdSentEnum_h
  14. #include "stdsentenum.h"
  15. #endif
  16. #pragma warning (disable : 4296)
  17. /***********************************************************************************************
  18. * IsTimeOfDay *
  19. *-------------*
  20. * Description:
  21. * Checks the incoming Item's text to determine whether or not it
  22. * is a time of day.
  23. *
  24. * RegExp:
  25. * [01-09,1-12][:][00-09,10-59][TimeAbbreviation]?
  26. *
  27. * Types assigned:
  28. * TIMEOFDAY
  29. ********************************************************************* AH **********************/
  30. HRESULT CStdSentEnum::IsTimeOfDay( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
  31. CWordList& WordList, BOOL fMultiItem )
  32. {
  33. SPDBG_FUNC( "CStdSentEnum::IsTimeOfDay" );
  34. HRESULT hr = S_OK;
  35. const WCHAR *pStartChar = m_pNextChar, *pEndOfItem = m_pEndOfCurrItem, *pEndChar = m_pEndChar;
  36. const SPVTEXTFRAG* pFrag = m_pCurrFrag;
  37. const SPVSTATE *pTimeXMLState = &pFrag->State, *pAbbreviationXMLState = NULL;
  38. CItemList PreAbbreviationList;
  39. BOOL fAdvancePointers = false;
  40. WCHAR *pHours = NULL, *pMinutes = NULL, *pAbbreviation = NULL;
  41. ULONG ulHours = 0, ulMinutes = 0;
  42. TIMEABBREVIATION TimeAbbreviation = UNDEFINED;
  43. TTSItemType ItemType = eUNMATCHED;
  44. //--- Max length of a string matching this regexp is 9 character
  45. if ( pEndOfItem - pStartChar > 9 )
  46. {
  47. hr = E_INVALIDARG;
  48. }
  49. else
  50. {
  51. pHours = (WCHAR*) pStartChar;
  52. //--- Try to match a number for the hour of day - [01-09,1-12]
  53. ulHours = my_wcstoul( pHours, &pMinutes );
  54. if ( pHours != pMinutes &&
  55. pMinutes - pHours <= 2 )
  56. {
  57. //--- Try to match the colon - [:]
  58. if ( *pMinutes == ':' )
  59. {
  60. pMinutes++;
  61. //--- Try to match a number for the minutes - [00-09,10-59]
  62. ulMinutes = my_wcstoul( pMinutes, &pAbbreviation );
  63. if ( pMinutes != pAbbreviation &&
  64. pAbbreviation - pMinutes == 2 )
  65. {
  66. //--- Verify that this is the end of the string
  67. if ( pAbbreviation == pEndOfItem )
  68. {
  69. //--- May have gotten hours and minutes - validate values
  70. if ( HOURMIN <= ulHours && ulHours <= HOURMAX &&
  71. MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX )
  72. {
  73. //--- A successful match has been made, but peek ahead in text for Time Abbreviation
  74. if ( fMultiItem )
  75. {
  76. pStartChar = pEndOfItem;
  77. hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager,
  78. true, &PreAbbreviationList );
  79. if ( pStartChar &&
  80. SUCCEEDED( hr ) )
  81. {
  82. pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
  83. while ( IsMiscPunctuation( *(pEndOfItem - 1) ) != eUNMATCHED ||
  84. IsGroupEnding( *(pEndOfItem - 1) ) != eUNMATCHED ||
  85. IsQuotationMark( *(pEndOfItem - 1) ) != eUNMATCHED ||
  86. ( ( ItemType = IsEOSItem( *(pEndOfItem - 1) ) ) != eUNMATCHED &&
  87. ( ItemType != ePERIOD ||
  88. ( _wcsnicmp( pStartChar, L"am.", 3 ) == 0 &&
  89. pStartChar + 3 == pEndOfItem ) ||
  90. ( _wcsnicmp( pStartChar, L"pm.", 3 ) == 0 &&
  91. pStartChar + 3 == pEndOfItem ) ) ) )
  92. {
  93. pEndOfItem--;
  94. }
  95. pAbbreviation = (WCHAR*) pStartChar;
  96. if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
  97. pAbbreviation + 2 == pEndOfItem ) ||
  98. ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
  99. pAbbreviation + 4 == pEndOfItem ) )
  100. {
  101. //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
  102. TimeAbbreviation = AM;
  103. pAbbreviationXMLState = &pFrag->State;
  104. fAdvancePointers = true;
  105. }
  106. else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
  107. pAbbreviation + 2 == pEndOfItem ) ||
  108. ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
  109. pAbbreviation + 4 == pEndOfItem ) )
  110. {
  111. //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
  112. TimeAbbreviation = PM;
  113. pAbbreviationXMLState = &pFrag->State;
  114. fAdvancePointers = true;
  115. }
  116. }
  117. }
  118. }
  119. else // hours or minutes were out of range
  120. {
  121. hr = E_INVALIDARG;
  122. }
  123. }
  124. //--- Check to see if the rest of the string is a time abbreviation - [TimeAbbreviation]
  125. else if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
  126. pAbbreviation + 2 == pEndOfItem ) ||
  127. ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
  128. pAbbreviation + 4 == pEndOfItem ) )
  129. {
  130. //--- May have gotten hours and minutes and time abbreviation - validate values
  131. if ( HOURMIN <= ulHours && ulHours <= HOURMAX &&
  132. MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX )
  133. {
  134. //--- A successful match has been made
  135. TimeAbbreviation = AM;
  136. pAbbreviationXMLState = &pFrag->State;
  137. }
  138. else // hours or minutes were out of range
  139. {
  140. hr = E_INVALIDARG;
  141. }
  142. }
  143. //--- Check to see if the rest of the string is a time abbreviation - [TimeAbbreviation]
  144. else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
  145. pAbbreviation + 2 == pEndOfItem ) ||
  146. ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
  147. pAbbreviation + 4 == pEndOfItem ) )
  148. {
  149. //--- May have gotten hours and minutes and time abbreviation - validate values
  150. if ( HOURMIN <= ulHours && ulHours <= HOURMAX &&
  151. MINUTEMIN <= ulMinutes && ulMinutes <= MINUTEMAX )
  152. {
  153. //--- A successful match has been made
  154. pAbbreviationXMLState = &pFrag->State;
  155. TimeAbbreviation = PM;
  156. }
  157. else // hours or minutes were out of range
  158. {
  159. hr = E_INVALIDARG;
  160. }
  161. }
  162. else // string ended in invalid characters
  163. {
  164. hr = E_INVALIDARG;
  165. }
  166. } // failed to match a valid minutes string
  167. else
  168. {
  169. hr = E_INVALIDARG;
  170. }
  171. } // failed to match the colon, could be just hours and a time abbreviation
  172. else if ( pMinutes < m_pEndOfCurrItem )
  173. {
  174. pAbbreviation = pMinutes;
  175. pMinutes = NULL;
  176. //--- Check for TimeAbbreviation - [TimeAbbreviation]
  177. if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
  178. pAbbreviation + 2 == pEndOfItem ) ||
  179. ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
  180. pAbbreviation + 4 == pEndOfItem ) )
  181. {
  182. //--- A successful match has been made - Hour AM
  183. pAbbreviationXMLState = &pFrag->State;
  184. TimeAbbreviation = AM;
  185. }
  186. else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
  187. pAbbreviation + 2 == pEndOfItem ) ||
  188. ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
  189. pAbbreviation + 4 == pEndOfItem ) )
  190. {
  191. //--- A successful match has been made - Hour PM
  192. pAbbreviationXMLState = &pFrag->State;
  193. TimeAbbreviation = PM;
  194. }
  195. else // failed to match a valid time abbreviation
  196. {
  197. hr = E_INVALIDARG;
  198. }
  199. }
  200. else if ( fMultiItem )
  201. {
  202. //--- Set pMinutes to NULL, so we know later that we've got no minutes string...
  203. pMinutes = NULL;
  204. //--- Peek ahead in text for a time abbreviation
  205. pStartChar = pEndOfItem;
  206. hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager,
  207. true, &PreAbbreviationList );
  208. if ( !pStartChar &&
  209. SUCCEEDED( hr ) )
  210. {
  211. hr = E_INVALIDARG;
  212. }
  213. else if ( pStartChar &&
  214. SUCCEEDED( hr ) )
  215. {
  216. pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
  217. while ( IsMiscPunctuation( *(pEndOfItem - 1) ) != eUNMATCHED ||
  218. IsGroupEnding( *(pEndOfItem - 1) ) != eUNMATCHED ||
  219. IsQuotationMark( *(pEndOfItem - 1) ) != eUNMATCHED ||
  220. ( ( ItemType = IsEOSItem( *(pEndOfItem - 1) ) ) != eUNMATCHED &&
  221. ItemType != ePERIOD ) )
  222. {
  223. pEndOfItem--;
  224. }
  225. pAbbreviation = (WCHAR*) pStartChar;
  226. if ( ( _wcsnicmp( pAbbreviation, L"am", 2 ) == 0 &&
  227. pAbbreviation + 2 == pEndOfItem ) ||
  228. ( _wcsnicmp( pAbbreviation, L"a.m.", 4 ) == 0 &&
  229. pAbbreviation + 4 == pEndOfItem ) )
  230. {
  231. //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
  232. TimeAbbreviation = AM;
  233. pAbbreviationXMLState = &pFrag->State;
  234. fAdvancePointers = true;
  235. }
  236. else if ( ( _wcsnicmp( pAbbreviation, L"pm", 2 ) == 0 &&
  237. pAbbreviation + 2 == pEndOfItem ) ||
  238. ( _wcsnicmp( pAbbreviation, L"p.m.", 4 ) == 0 &&
  239. pAbbreviation + 4 == pEndOfItem ) )
  240. {
  241. //--- Found a valid Time Abbreviation - [Hours:Minutes] [whitespace] [Abbrev]
  242. TimeAbbreviation = PM;
  243. pAbbreviationXMLState = &pFrag->State;
  244. fAdvancePointers = true;
  245. }
  246. //--- Failed to match a valid Time Abbreviation
  247. else
  248. {
  249. hr = E_INVALIDARG;
  250. }
  251. }
  252. }
  253. else
  254. {
  255. hr = E_INVALIDARG;
  256. }
  257. } // failed to match a valid hours string
  258. else
  259. {
  260. hr = E_INVALIDARG;
  261. }
  262. //--- Successfully matched a Time Of Day! Now expand it and fill out pItemNormInfo
  263. if ( SUCCEEDED( hr ) )
  264. {
  265. NumberGroup Garbage;
  266. TTSWord Word;
  267. ZeroMemory( &Word, sizeof(TTSWord) );
  268. Word.eWordPartOfSpeech = MS_Unknown;
  269. pItemNormInfo = (TTSTimeOfDayItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeOfDayItemInfo), &hr );
  270. if ( SUCCEEDED( hr ) )
  271. {
  272. //--- Fill out known parts of pItemNormInfo
  273. ZeroMemory( pItemNormInfo, sizeof(TTSTimeOfDayItemInfo) );
  274. pItemNormInfo->Type = eTIMEOFDAY;
  275. ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fMinutes = pMinutes ? true : false;
  276. ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTimeAbbreviation = TimeAbbreviation != UNDEFINED ? true : false;
  277. ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTwentyFourHour = false;
  278. //--- Expand the hours
  279. if ( !iswdigit( pHours[1] ) )
  280. {
  281. ExpandDigit( pHours[0], Garbage, WordList );
  282. }
  283. else
  284. {
  285. ExpandTwoDigits( pHours, Garbage, WordList );
  286. }
  287. //--- Expand the minutes
  288. if ( pMinutes )
  289. {
  290. //--- Special case: A bare o'clock - 1:00, 2:00, etc.
  291. if ( wcsncmp( pMinutes, L"00", 2 ) == 0 )
  292. {
  293. WCHAR *pGarbage;
  294. ULONG ulHours = my_wcstoul( pHours, &pGarbage );
  295. //--- Under twelve is followed by "o'clock"
  296. if ( ulHours <= 12 )
  297. {
  298. Word.pWordText = g_OClock.pStr;
  299. Word.ulWordLen = g_OClock.Len;
  300. Word.pLemma = Word.pWordText;
  301. Word.ulLemmaLen = Word.ulWordLen;
  302. WordList.AddTail( Word );
  303. }
  304. //--- Over twelve is followed by "hundred hours"
  305. else
  306. {
  307. ( (TTSTimeOfDayItemInfo*) pItemNormInfo )->fTwentyFourHour = true;
  308. Word.pWordText = g_hundred.pStr;
  309. Word.ulWordLen = g_hundred.Len;
  310. Word.pLemma = Word.pWordText;
  311. Word.ulLemmaLen = Word.ulWordLen;
  312. WordList.AddTail( Word );
  313. Word.pWordText = g_hours.pStr;
  314. Word.ulWordLen = g_hours.Len;
  315. Word.pLemma = Word.pWordText;
  316. Word.ulLemmaLen = Word.ulWordLen;
  317. WordList.AddTail( Word );
  318. }
  319. }
  320. //--- Special Case: Minutes less than 10 - 1:05, 2:06, etc.
  321. else if ( pMinutes[0] == L'0' )
  322. {
  323. Word.pWordText = g_O.pStr;
  324. Word.ulWordLen = g_O.Len;
  325. Word.pLemma = Word.pWordText;
  326. Word.ulLemmaLen = Word.ulWordLen;
  327. WordList.AddTail( Word );
  328. ExpandDigit( pMinutes[1], Garbage, WordList );
  329. }
  330. //--- Default Case
  331. else
  332. {
  333. ExpandTwoDigits( pMinutes, Garbage, WordList );
  334. }
  335. }
  336. //--- Clean up Time XML State
  337. SPLISTPOS WordListPos = WordList.GetHeadPosition();
  338. while ( WordListPos )
  339. {
  340. TTSWord& TempWord = WordList.GetNext( WordListPos );
  341. TempWord.pXmlState = pTimeXMLState;
  342. }
  343. //--- Insert Pre-Abbreviation XML States
  344. while ( !PreAbbreviationList.IsEmpty() )
  345. {
  346. WordList.AddTail( ( PreAbbreviationList.RemoveHead() ).Words[0] );
  347. }
  348. //--- Expand the Time Abbreviation
  349. //--- AM
  350. if ( TimeAbbreviation == AM )
  351. {
  352. //--- Ensure the letters are pronounced as nouns...
  353. SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr );
  354. if ( SUCCEEDED( hr ) )
  355. {
  356. memcpy( pNewState, pAbbreviationXMLState, sizeof( SPVSTATE ) );
  357. pNewState->ePartOfSpeech = SPPS_Noun;
  358. Word.pXmlState = pNewState;
  359. Word.pWordText = g_A.pStr;
  360. Word.ulWordLen = g_A.Len;
  361. Word.pLemma = Word.pWordText;
  362. Word.ulLemmaLen = Word.ulWordLen;
  363. WordList.AddTail( Word );
  364. Word.pWordText = g_M.pStr;
  365. Word.ulWordLen = g_M.Len;
  366. Word.pLemma = Word.pWordText;
  367. Word.ulLemmaLen = Word.ulWordLen;
  368. WordList.AddTail( Word );
  369. }
  370. }
  371. //--- PM
  372. else if ( TimeAbbreviation == PM )
  373. {
  374. //--- Ensure the letters are pronounced as nouns...
  375. SPVSTATE* pNewState = (SPVSTATE*) MemoryManager.GetMemory( sizeof( SPVSTATE ), &hr );
  376. if ( SUCCEEDED( hr ) )
  377. {
  378. memcpy( pNewState, pAbbreviationXMLState, sizeof( SPVSTATE ) );
  379. pNewState->ePartOfSpeech = SPPS_Noun;
  380. Word.pXmlState = pAbbreviationXMLState;
  381. Word.pWordText = g_P.pStr;
  382. Word.ulWordLen = g_P.Len;
  383. Word.pLemma = Word.pWordText;
  384. Word.ulLemmaLen = Word.ulWordLen;
  385. WordList.AddTail( Word );
  386. Word.pWordText = g_M.pStr;
  387. Word.ulWordLen = g_M.Len;
  388. Word.pLemma = Word.pWordText;
  389. Word.ulLemmaLen = Word.ulWordLen;
  390. WordList.AddTail( Word );
  391. }
  392. }
  393. //--- Update pointers, if necessary
  394. if ( fAdvancePointers )
  395. {
  396. m_pCurrFrag = pFrag;
  397. m_pEndChar = pEndChar;
  398. m_pEndOfCurrItem = pEndOfItem;
  399. }
  400. }
  401. }
  402. }
  403. return hr;
  404. } /* IsTimeOfDay */
  405. /***********************************************************************************************
  406. * IsTime *
  407. *--------*
  408. * Description:
  409. * Checks the incoming Item's text to determine whether or not it
  410. * is a time.
  411. *
  412. * RegExp:
  413. * { d+ || d(1-3)[,ddd]+ }[:][00-09,10-59]{ [:][00-09,10-59] }?
  414. *
  415. * Types assigned:
  416. * TIME_HRMIN, TIME_MINSEC, TIME_HRMINSEC
  417. ********************************************************************* AH **********************/
  418. HRESULT CStdSentEnum::IsTime( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager )
  419. {
  420. SPDBG_FUNC( "TimeNorm IsTime" );
  421. HRESULT hr = S_OK;
  422. WCHAR *pFirstChunk = NULL, *pSecondChunk = NULL, *pThirdChunk = NULL, *pLeftOver = NULL;
  423. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  424. ULONG ulSecond = 0, ulThird = 0;
  425. TTSItemInfo *pFirstChunkInfo = NULL;
  426. BOOL fNegative = false;
  427. pFirstChunk = (WCHAR*) m_pNextChar;
  428. //--- Try to match a number for the hours/minutes - { d+ }
  429. if ( *pFirstChunk == L'-' )
  430. {
  431. pFirstChunk++;
  432. fNegative = true;
  433. }
  434. while ( *pFirstChunk == L'0' )
  435. {
  436. pFirstChunk++;
  437. }
  438. if ( *pFirstChunk == L':' )
  439. {
  440. pFirstChunk--;
  441. }
  442. pSecondChunk = wcschr( pFirstChunk, L':' );
  443. if ( pSecondChunk &&
  444. pFirstChunk < pSecondChunk &&
  445. pSecondChunk < m_pEndOfCurrItem - 1 )
  446. {
  447. m_pNextChar = pFirstChunk;
  448. m_pEndOfCurrItem = pSecondChunk;
  449. hr = IsNumberCategory( pFirstChunkInfo, L"NUMBER", MemoryManager );
  450. m_pNextChar = pTempNextChar;
  451. m_pEndOfCurrItem = pTempEndOfItem;
  452. if ( SUCCEEDED( hr ) &&
  453. ( pFirstChunkInfo->Type == eNUM_DECIMAL ||
  454. pFirstChunkInfo->Type == eNUM_CARDINAL ) )
  455. {
  456. if ( fNegative )
  457. {
  458. ( (TTSNumberItemInfo*) pFirstChunkInfo )->fNegative = true;
  459. TTSWord Word;
  460. ZeroMemory( &Word, sizeof( TTSWord ) );
  461. Word.eWordPartOfSpeech = MS_Unknown;
  462. Word.pXmlState = &m_pCurrFrag->State;
  463. Word.pWordText = g_negative.pStr;
  464. Word.ulWordLen = g_negative.Len;
  465. Word.pLemma = Word.pWordText;
  466. Word.ulLemmaLen = Word.ulWordLen;
  467. ( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList->AddHead( Word );
  468. }
  469. pSecondChunk++;
  470. //--- Try to match a number for the minutes/seconds - [00-09,10-59]
  471. ulSecond = my_wcstoul( pSecondChunk, &pThirdChunk );
  472. if ( pSecondChunk != pThirdChunk &&
  473. pThirdChunk - pSecondChunk == 2 )
  474. {
  475. //--- Verify that this is the end of the string
  476. if ( pThirdChunk == m_pEndOfCurrItem )
  477. {
  478. //--- May have gotten hours and minutes or minutes and seconds - validate values
  479. if ( MINUTEMIN <= ulSecond && ulSecond <= MINUTEMAX )
  480. {
  481. //--- A successful match has been made
  482. //--- Default behavior here is to assume minutes and seconds
  483. if ( Context == NULL ||
  484. _wcsicmp( Context, L"TIME_MS" ) == 0 )
  485. {
  486. //--- Successfully matched minutes and seconds.
  487. pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo),
  488. &hr );
  489. if ( SUCCEEDED( hr ) )
  490. {
  491. ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) );
  492. pItemNormInfo->Type = eTIME;
  493. ( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes =
  494. (TTSNumberItemInfo*) pFirstChunkInfo;
  495. if ( *pSecondChunk != L'0' )
  496. {
  497. ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pSecondChunk;
  498. }
  499. else
  500. {
  501. ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pSecondChunk + 1;
  502. }
  503. }
  504. }
  505. //--- If context overrides, values represent hours and minutes
  506. else if ( _wcsicmp( Context, L"TIME_HM" ) == 0 )
  507. {
  508. //--- Successfully matched hours and pMinutes->
  509. pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo),
  510. &hr );
  511. if ( SUCCEEDED( hr ) )
  512. {
  513. ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) );
  514. pItemNormInfo->Type = eTIME;
  515. ( (TTSTimeItemInfo*) pItemNormInfo )->pHours =
  516. (TTSNumberItemInfo*) pFirstChunkInfo;
  517. TTSItemInfo* pMinutesInfo;
  518. //--- Don't want "zero zero..." behavior of numbers - strip off beginning zeroes
  519. if ( *pSecondChunk == L'0' )
  520. {
  521. pSecondChunk++;
  522. }
  523. m_pNextChar = pSecondChunk;
  524. m_pEndOfCurrItem = pThirdChunk;
  525. hr = IsNumber( pMinutesInfo, L"NUMBER", MemoryManager );
  526. m_pNextChar = pTempNextChar;
  527. m_pEndOfCurrItem = pTempEndOfItem;
  528. if ( SUCCEEDED( hr ) )
  529. {
  530. ( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes = (TTSNumberItemInfo*) pMinutesInfo;
  531. }
  532. }
  533. }
  534. else
  535. {
  536. hr = E_INVALIDARG;
  537. }
  538. }
  539. else // minutes or seconds were out of range
  540. {
  541. hr = E_INVALIDARG;
  542. }
  543. }
  544. //--- Check for seconds - TIME_HRMINS
  545. else
  546. {
  547. //--- Try to match the colon
  548. if ( *pThirdChunk == L':' )
  549. {
  550. pThirdChunk++;
  551. //--- Try to match a number for the seconds - [00-09,10-59]
  552. ulThird = my_wcstoul( pThirdChunk, &pLeftOver );
  553. if ( pThirdChunk != pLeftOver &&
  554. pLeftOver - pThirdChunk == 2 )
  555. {
  556. //--- Verify that this is the end of the string
  557. if ( pLeftOver == m_pEndOfCurrItem )
  558. {
  559. //--- May have gotten hours minutes and seconds - validate values
  560. if ( MINUTEMIN <= ulSecond && ulSecond <= MINUTEMAX &&
  561. SECONDMIN <= ulThird && ulThird <= SECONDMAX )
  562. {
  563. //--- Successfully matched hours, minutes, and seconds.
  564. pItemNormInfo = (TTSTimeItemInfo*) MemoryManager.GetMemory( sizeof(TTSTimeItemInfo),
  565. &hr );
  566. if ( SUCCEEDED( hr ) )
  567. {
  568. ZeroMemory( pItemNormInfo, sizeof(TTSTimeItemInfo) );
  569. pItemNormInfo->Type = eTIME;
  570. ( (TTSTimeItemInfo*) pItemNormInfo )->pHours =
  571. (TTSNumberItemInfo*) pFirstChunkInfo;
  572. if ( SUCCEEDED( hr ) )
  573. {
  574. TTSItemInfo* pMinutesInfo;
  575. //--- Don't want "zero zero..." behavior of numbers - strip off beginning zeroes
  576. if ( ulSecond != 0 )
  577. {
  578. pSecondChunk += ( ( pThirdChunk - 1 ) - pSecondChunk ) -
  579. (ULONG)( log10( ulSecond ) + 1 );
  580. }
  581. else
  582. {
  583. pSecondChunk = pThirdChunk - 2;
  584. }
  585. m_pNextChar = pSecondChunk;
  586. m_pEndOfCurrItem = pThirdChunk - 1;
  587. hr = IsNumber( pMinutesInfo, L"NUMBER", MemoryManager );
  588. m_pNextChar = pTempNextChar;
  589. m_pEndOfCurrItem = pTempEndOfItem;
  590. if ( SUCCEEDED( hr ) )
  591. {
  592. ( (TTSTimeItemInfo*) pItemNormInfo )->pMinutes =
  593. (TTSNumberItemInfo*) pMinutesInfo;
  594. if ( *pThirdChunk != L'0' )
  595. {
  596. ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pThirdChunk;
  597. }
  598. else
  599. {
  600. ( (TTSTimeItemInfo*) pItemNormInfo )->pSeconds = pThirdChunk + 1;
  601. }
  602. }
  603. }
  604. }
  605. }
  606. else // minutes or seconds were out of range
  607. {
  608. hr = E_INVALIDARG;
  609. }
  610. }
  611. else // extra junk at end of string
  612. {
  613. hr = E_INVALIDARG;
  614. }
  615. }
  616. else // extra junk at end of string
  617. {
  618. hr = E_INVALIDARG;
  619. }
  620. }
  621. else // failed to match a colon
  622. {
  623. hr = E_INVALIDARG;
  624. }
  625. }
  626. }
  627. else // failed to match a second number
  628. {
  629. hr = E_INVALIDARG;
  630. }
  631. }
  632. else // failed to match a colon
  633. {
  634. hr = E_INVALIDARG;
  635. }
  636. }
  637. else // failed to match a first number
  638. {
  639. hr = E_INVALIDARG;
  640. }
  641. if ( FAILED( hr ) )
  642. {
  643. if ( pFirstChunkInfo )
  644. {
  645. delete ( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList;
  646. }
  647. }
  648. return hr;
  649. } /* IsTime */
  650. /***********************************************************************************************
  651. * ExpandTime *
  652. *------------*
  653. * Description:
  654. * Expands Items previously determined to be of type TIME_HRMINSEC by IsTime.
  655. *
  656. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  657. ********************************************************************* AH **********************/
  658. HRESULT CStdSentEnum::ExpandTime( TTSTimeItemInfo* pItemInfo, CWordList& WordList )
  659. {
  660. SPDBG_FUNC( "CStdSentEnum::ExpandTime" );
  661. HRESULT hr = S_OK;
  662. TTSWord Word;
  663. ZeroMemory( &Word, sizeof(TTSWord) );
  664. Word.pXmlState = &m_pCurrFrag->State;
  665. Word.eWordPartOfSpeech = MS_Unknown;
  666. //-------------------
  667. // Expand the hours
  668. //-------------------
  669. if ( pItemInfo->pHours )
  670. {
  671. //--- Expand Number
  672. hr = ExpandNumber( pItemInfo->pHours, WordList );
  673. //--- Insert "hour" or "hours"
  674. if ( SUCCEEDED( hr ) )
  675. {
  676. if ( pItemInfo->pHours->pEndChar - pItemInfo->pHours->pStartChar == 1 &&
  677. pItemInfo->pHours->pStartChar[0] == L'1' )
  678. {
  679. Word.pWordText = g_hour.pStr;
  680. Word.ulWordLen = g_hour.Len;
  681. Word.pLemma = Word.pWordText;
  682. Word.ulLemmaLen = Word.ulWordLen;
  683. WordList.AddTail( Word );
  684. }
  685. else
  686. {
  687. Word.pWordText = g_hours.pStr;
  688. Word.ulWordLen = g_hours.Len;
  689. Word.pLemma = Word.pWordText;
  690. Word.ulLemmaLen = Word.ulWordLen;
  691. WordList.AddTail( Word );
  692. }
  693. }
  694. //--- Insert "and"
  695. if ( SUCCEEDED( hr ) &&
  696. pItemInfo->pMinutes->pStartChar &&
  697. !pItemInfo->pSeconds )
  698. {
  699. Word.pWordText = g_And.pStr;
  700. Word.ulWordLen = g_And.Len;
  701. Word.pLemma = Word.pWordText;
  702. Word.ulLemmaLen = Word.ulWordLen;
  703. WordList.AddTail( Word );
  704. }
  705. }
  706. //---------------------
  707. // Expand the minutes
  708. //---------------------
  709. if ( SUCCEEDED( hr ) &&
  710. pItemInfo->pMinutes )
  711. {
  712. //--- Expand Number
  713. hr = ExpandNumber( pItemInfo->pMinutes, WordList );
  714. //--- Insert "minutes"
  715. if ( SUCCEEDED( hr ) )
  716. {
  717. if ( pItemInfo->pMinutes->pEndChar - pItemInfo->pMinutes->pStartChar == 1 &&
  718. pItemInfo->pMinutes->pStartChar[0] == L'1' )
  719. {
  720. Word.pWordText = g_minute.pStr;
  721. Word.ulWordLen = g_minute.Len;
  722. Word.pLemma = Word.pWordText;
  723. Word.ulLemmaLen = Word.ulWordLen;
  724. WordList.AddTail( Word );
  725. }
  726. else
  727. {
  728. Word.pWordText = g_minutes.pStr;
  729. Word.ulWordLen = g_minutes.Len;
  730. Word.pLemma = Word.pWordText;
  731. Word.ulLemmaLen = Word.ulWordLen;
  732. WordList.AddTail( Word );
  733. }
  734. }
  735. //--- Insert "and"
  736. if ( SUCCEEDED( hr ) &&
  737. pItemInfo->pSeconds )
  738. {
  739. Word.pWordText = g_And.pStr;
  740. Word.ulWordLen = g_And.Len;
  741. Word.pLemma = Word.pWordText;
  742. Word.ulLemmaLen = Word.ulWordLen;
  743. WordList.AddTail( Word );
  744. }
  745. }
  746. //---------------------
  747. // Expand the seconds
  748. //---------------------
  749. if ( SUCCEEDED( hr ) &&
  750. pItemInfo->pSeconds )
  751. {
  752. //--- Expand Number
  753. NumberGroup Garbage;
  754. if ( iswdigit( pItemInfo->pSeconds[1] ) )
  755. {
  756. ExpandTwoDigits( pItemInfo->pSeconds, Garbage, WordList );
  757. }
  758. else
  759. {
  760. ExpandDigit( pItemInfo->pSeconds[0], Garbage, WordList );
  761. }
  762. //--- Insert "seconds"
  763. if ( pItemInfo->pSeconds[0] == L'1' &&
  764. !iswdigit( pItemInfo->pSeconds[1] ) )
  765. {
  766. Word.pWordText = g_second.pStr;
  767. Word.ulWordLen = g_second.Len;
  768. Word.pLemma = Word.pWordText;
  769. Word.ulLemmaLen = Word.ulWordLen;
  770. WordList.AddTail( Word );
  771. }
  772. else
  773. {
  774. Word.pWordText = g_seconds.pStr;
  775. Word.ulWordLen = g_seconds.Len;
  776. Word.pLemma = Word.pWordText;
  777. Word.ulLemmaLen = Word.ulWordLen;
  778. WordList.AddTail( Word );
  779. }
  780. }
  781. return hr;
  782. } /* ExpandTime */
  783. /***********************************************************************************************
  784. * IsTimeRange *
  785. *-------------*
  786. * Description:
  787. * Checks the incoming Item's text to determine whether or not it
  788. * is a time range.
  789. *
  790. * RegExp:
  791. * [TimeOfDay]-[TimeOfDay]
  792. *
  793. * Types assigned:
  794. * TIME_RANGE
  795. ********************************************************************* AH **********************/
  796. HRESULT CStdSentEnum::IsTimeRange( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
  797. CWordList& WordList )
  798. {
  799. SPDBG_FUNC( "CStdSentEnum::IsTimeRange" );
  800. HRESULT hr = S_OK;
  801. CWordList TempWordList;
  802. TTSItemInfo *pFirstTimeInfo = NULL, *pSecondTimeInfo = NULL;
  803. const WCHAR *pHyphen = NULL;
  804. CItemList PreAbbreviationList; // Needed for SkipWhitespace function calls
  805. BOOL fMultiItem = false;
  806. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfCurrItem = m_pEndOfCurrItem;
  807. const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
  808. for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ )
  809. {
  810. if ( *pHyphen == L'-' )
  811. {
  812. break;
  813. }
  814. }
  815. //--- Might be whitespace and time suffix before hyphen
  816. if( pHyphen == m_pEndOfCurrItem )
  817. {
  818. hr = SkipWhiteSpaceAndTags( pHyphen, m_pEndChar, m_pCurrFrag, MemoryManager,
  819. true, &PreAbbreviationList );
  820. if ( pHyphen && SUCCEEDED( hr ) )
  821. {
  822. if ( ( _wcsnicmp( pHyphen, L"am", 2 ) == 0 &&
  823. pHyphen[2] == L'-' ) ||
  824. ( _wcsnicmp( pHyphen, L"pm", 2 ) == 0 &&
  825. pHyphen[2] == L'-' ) )
  826. {
  827. pHyphen += 2;
  828. *( (WCHAR*) pHyphen ) = ' ';
  829. fMultiItem = true;
  830. }
  831. else if ( ( _wcsnicmp( pHyphen, L"a.m.", 4 ) == 0 &&
  832. pHyphen[4] == L'-' ) ||
  833. ( _wcsnicmp( pHyphen, L"p.m.", 4 ) == 0 &&
  834. pHyphen[4] == L'-' ) )
  835. {
  836. pHyphen +=4;
  837. *( (WCHAR*) pHyphen ) = ' ';
  838. fMultiItem = true;
  839. }
  840. else
  841. {
  842. hr = E_INVALIDARG;
  843. }
  844. }
  845. else
  846. {
  847. hr = E_INVALIDARG;
  848. }
  849. }
  850. if ( SUCCEEDED( hr ) )
  851. {
  852. //--- Position m_pEndOfCurrItem so it is at the end of the first token, or at the hyphen,
  853. //--- whichever comes first (this is necessary for IsTimeOfDay to work).
  854. if( ( m_pNextChar < pHyphen ) && ( pHyphen < m_pEndOfCurrItem ) )
  855. {
  856. m_pEndOfCurrItem = pHyphen;
  857. }
  858. //--- Check for time of day
  859. hr = IsTimeOfDay( pFirstTimeInfo, MemoryManager, TempWordList, fMultiItem );
  860. //--- Check for just a number (hour)
  861. if ( hr == E_INVALIDARG && ( pHyphen <= m_pNextChar + 2 ) )
  862. {
  863. WCHAR *pTemp = NULL;
  864. int ulHours = my_wcstoul( m_pNextChar, &pTemp );
  865. if ( pTemp == pHyphen &&
  866. HOURMIN <= ulHours &&
  867. ulHours <= HOURMAX )
  868. {
  869. NumberGroup Garbage;
  870. if ( pTemp - m_pNextChar == 1 )
  871. {
  872. ExpandDigit( m_pNextChar[0], Garbage, TempWordList );
  873. }
  874. else
  875. {
  876. ExpandTwoDigits( m_pNextChar, Garbage, TempWordList );
  877. }
  878. hr = S_OK;
  879. }
  880. }
  881. if ( SUCCEEDED( hr ) )
  882. {
  883. //--- Insert "to"
  884. TTSWord Word;
  885. ZeroMemory( &Word, sizeof( TTSWord ) );
  886. Word.pXmlState = &m_pCurrFrag->State;
  887. Word.eWordPartOfSpeech = MS_Unknown;
  888. Word.pWordText = g_to.pStr;
  889. Word.ulWordLen = g_to.Len;
  890. Word.pLemma = Word.pWordText;
  891. Word.ulLemmaLen = Word.ulWordLen;
  892. TempWordList.AddTail( Word );
  893. m_pNextChar = pHyphen + 1;
  894. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  895. //---Move m_pEndOfCurrItem back from any punctuation. ("4:30-5:30.")
  896. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  897. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  898. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  899. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  900. {
  901. m_pEndOfCurrItem--;
  902. }
  903. hr = IsTimeOfDay( pSecondTimeInfo, MemoryManager, TempWordList );
  904. if ( SUCCEEDED( hr ) )
  905. {
  906. //--- Matched a time range!
  907. m_pNextChar = pTempNextChar;
  908. m_pEndChar = pTempEndChar;
  909. pItemNormInfo =
  910. (TTSTimeRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSTimeRangeItemInfo ), &hr );
  911. if ( SUCCEEDED( hr ) )
  912. {
  913. pItemNormInfo->Type = eTIME_RANGE;
  914. ( (TTSTimeRangeItemInfo*) pItemNormInfo )->pFirstTimeInfo =
  915. (TTSTimeOfDayItemInfo*) pFirstTimeInfo;
  916. ( (TTSTimeRangeItemInfo*) pItemNormInfo )->pSecondTimeInfo =
  917. (TTSTimeOfDayItemInfo*) pSecondTimeInfo;
  918. //--- Copy temp word list to real word list if everything has succeeded...
  919. WordList.AddTail( &TempWordList );
  920. }
  921. }
  922. }
  923. }
  924. if ( !SUCCEEDED( hr ) )
  925. {
  926. m_pNextChar = pTempNextChar;
  927. m_pEndChar = pTempEndChar;
  928. m_pEndOfCurrItem = pTempEndOfCurrItem;
  929. m_pCurrFrag = pTempFrag;
  930. if ( fMultiItem )
  931. {
  932. *( (WCHAR*) pHyphen ) = L'-';
  933. }
  934. }
  935. return hr;
  936. } /* IsTimeRange */
  937. //-----------End Of File-------------------------------------------------------------------