Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3914 lines
149 KiB

  1. /***********************************************************************************************
  2. * NumNorm.cpp *
  3. *-------------*
  4. * Description:
  5. * These functions normalize ordinary ordinal and cardinal numbers
  6. *-----------------------------------------------------------------------------------------------
  7. * Created by AH August 3, 1999
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. ***********************************************************************************************/
  12. #include "stdafx.h"
  13. #ifndef StdSentEnum_h
  14. #include "stdsentenum.h"
  15. #endif
  16. /***********************************************************************************************
  17. * IsNumberCategory *
  18. *------------------*
  19. * Description:
  20. * Checks the next token in the text stream to determine if it is a number category -
  21. * percents, degrees, squared and cubed numbers, and plain old numbers get matched here.
  22. *
  23. ********************************************************************* AH **********************/
  24. HRESULT CStdSentEnum::IsNumberCategory( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
  25. CSentItemMemory& MemoryManager )
  26. {
  27. HRESULT hr = S_OK;
  28. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfItem = m_pEndOfCurrItem;
  29. const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
  30. TTSItemInfo *pNumberInfo = NULL;
  31. hr = IsNumber( pNumberInfo, Context, MemoryManager );
  32. if ( SUCCEEDED( hr ) &&
  33. pNumberInfo->Type != eDATE_YEAR &&
  34. ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar == m_pEndOfCurrItem - 1 )
  35. {
  36. if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'%' )
  37. {
  38. pItemNormInfo = pNumberInfo;
  39. pItemNormInfo->Type = eNUM_PERCENT;
  40. }
  41. else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'' )
  42. {
  43. pItemNormInfo = pNumberInfo;
  44. pItemNormInfo->Type = eNUM_DEGREES;
  45. }
  46. else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'' )
  47. {
  48. pItemNormInfo = pNumberInfo;
  49. pItemNormInfo->Type = eNUM_SQUARED;
  50. }
  51. else if ( *( ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar ) == L'' )
  52. {
  53. pItemNormInfo = pNumberInfo;
  54. pItemNormInfo->Type = eNUM_CUBED;
  55. }
  56. else
  57. {
  58. hr = E_INVALIDARG;
  59. delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
  60. }
  61. }
  62. else if ( SUCCEEDED( hr ) &&
  63. ( pNumberInfo->Type == eDATE_YEAR ||
  64. ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar == m_pEndOfCurrItem ) )
  65. {
  66. pItemNormInfo = pNumberInfo;
  67. }
  68. else if ( SUCCEEDED( hr ) )
  69. {
  70. hr = E_INVALIDARG;
  71. if ( pNumberInfo->Type != eDATE_YEAR )
  72. {
  73. delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
  74. }
  75. m_pNextChar = pTempNextChar;
  76. m_pEndChar = pTempEndChar;
  77. m_pEndOfCurrItem = pTempEndOfItem;
  78. m_pCurrFrag = pTempFrag;
  79. }
  80. return hr;
  81. } /* IsNumberCategory */
  82. /***********************************************************************************************
  83. * IsNumber *
  84. *----------*
  85. * Description:
  86. * Checks the next token in the text stream to determine if it is a number.
  87. *
  88. * RegExp:
  89. * [-]? { d+ || d(1-3)[,ddd]+ } { { .d+ } || { "st" || "nd" || "rd" || "th" } }?
  90. * It is actually a bit more complicated than this - for instance, the ordinal
  91. * strings may only follow certain digits (1st, 2nd, 3rd, 4-0th)...
  92. *
  93. ********************************************************************* AH **********************/
  94. HRESULT CStdSentEnum::IsNumber( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
  95. CSentItemMemory& MemoryManager, BOOL fMultiItem )
  96. {
  97. SPDBG_FUNC( "CStdSentEnum::IsNumber" );
  98. HRESULT hr = S_OK;
  99. bool fNegative = false;
  100. TTSIntegerItemInfo* pIntegerInfo = NULL;
  101. TTSDigitsItemInfo* pDecimalInfo = NULL;
  102. TTSFractionItemInfo* pFractionInfo = NULL;
  103. const SPVSTATE *pIntegerState = &m_pCurrFrag->State;
  104. CItemList PostIntegerList;
  105. ULONG ulOffset = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  106. WCHAR wcDecimalPoint;
  107. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfItem = m_pEndOfCurrItem;
  108. const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
  109. if ( ulTokenLen )
  110. {
  111. //--- Set Separator and Decimal Point character preferences for this call
  112. if ( m_eSeparatorAndDecimal == COMMA_PERIOD )
  113. {
  114. wcDecimalPoint = L'.';
  115. }
  116. else
  117. {
  118. wcDecimalPoint = L',';
  119. }
  120. //--- Try to match the negative sign - [-]?
  121. if ( m_pNextChar[ulOffset] == L'-' )
  122. {
  123. fNegative = true;
  124. ulOffset++;
  125. }
  126. //--- Try to match the integral part
  127. hr = IsInteger( m_pNextChar + ulOffset, pIntegerInfo, MemoryManager );
  128. //--- Adjust ulOffset and hr...
  129. if ( SUCCEEDED( hr ) )
  130. {
  131. ulOffset += (ULONG)(pIntegerInfo->pEndChar - pIntegerInfo->pStartChar);
  132. }
  133. else if ( hr == E_INVALIDARG )
  134. {
  135. hr = S_OK;
  136. pIntegerInfo = NULL;
  137. }
  138. //--- Try to match a decimal part
  139. if ( ulOffset < ulTokenLen &&
  140. m_pNextChar[ulOffset] == wcDecimalPoint )
  141. {
  142. hr = IsDigitString( m_pNextChar + ulOffset + 1, pDecimalInfo, MemoryManager );
  143. if ( SUCCEEDED( hr ) )
  144. {
  145. ulOffset += pDecimalInfo->ulNumDigits + 1;
  146. //--- Check for special case - decimal number numerator...
  147. if ( ulOffset < ulTokenLen &&
  148. m_pNextChar[ulOffset] == L'/' )
  149. {
  150. pIntegerInfo = NULL;
  151. pDecimalInfo = NULL;
  152. fNegative ? ulOffset = 1 : ulOffset = 0;
  153. hr = IsFraction( m_pNextChar + ulOffset, pFractionInfo, MemoryManager );
  154. if ( SUCCEEDED( hr ) )
  155. {
  156. if ( pFractionInfo->pVulgar )
  157. {
  158. ulOffset++;
  159. }
  160. else
  161. {
  162. ulOffset += (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar);
  163. }
  164. }
  165. else if ( hr == E_INVALIDARG )
  166. {
  167. hr = S_OK;
  168. }
  169. }
  170. }
  171. else if ( hr == E_INVALIDARG )
  172. {
  173. hr = S_OK;
  174. pDecimalInfo = NULL;
  175. }
  176. }
  177. //--- Try to match an ordinal string
  178. else if ( pIntegerInfo &&
  179. ulOffset < ulTokenLen &&
  180. isalpha( m_pNextChar[ulOffset] ) )
  181. {
  182. switch ( toupper( m_pNextChar[ulOffset] ) )
  183. {
  184. case 'S':
  185. //--- Must be of the form "...1st" but not "...11st"
  186. if ( toupper( m_pNextChar[ulOffset+1] ) == L'T' &&
  187. m_pNextChar[ulOffset-1] == L'1' &&
  188. (ulOffset + 2) == ulTokenLen &&
  189. ( ulOffset == 1 ||
  190. m_pNextChar[ulOffset-2] != L'1' ) )
  191. {
  192. ulOffset += 2;
  193. pIntegerInfo->fOrdinal = true;
  194. }
  195. break;
  196. case 'N':
  197. //--- Must be of the form "...2nd" but not "...12nd"
  198. if ( (ulOffset + 2) == ulTokenLen &&
  199. toupper(m_pNextChar[ulOffset+1]) == L'D' &&
  200. m_pNextChar[ulOffset-1] == L'2' &&
  201. ( ulOffset == 1 ||
  202. m_pNextChar[ulOffset-2] != L'1' ) )
  203. {
  204. ulOffset += 2;
  205. pIntegerInfo->fOrdinal = true;
  206. }
  207. break;
  208. case 'R':
  209. //--- Must be of the form "...3rd" but not "...13rd"
  210. if ( (ulOffset + 2) == ulTokenLen &&
  211. toupper(m_pNextChar[ulOffset+1]) == L'D' &&
  212. m_pNextChar[ulOffset-1] == L'3' &&
  213. ( ulOffset == 1 ||
  214. m_pNextChar[ulOffset-2] != L'1' ) )
  215. {
  216. ulOffset += 2;
  217. pIntegerInfo->fOrdinal = true;
  218. }
  219. break;
  220. case 'T':
  221. //--- Must be of the form "...[4-9]th" or "...[11-19]th" or "...[0]th"
  222. if ( (ulOffset + 2) == ulTokenLen &&
  223. toupper(m_pNextChar[ulOffset+1]) == L'H' &&
  224. ( ( m_pNextChar[ulOffset-1] <= L'9' && m_pNextChar[ulOffset-1] >= L'4') ||
  225. ( m_pNextChar[ulOffset-1] == L'0') ||
  226. ( ulOffset == 1 || m_pNextChar[ulOffset-2] == L'1') ) )
  227. {
  228. ulOffset += 2;
  229. pIntegerInfo->fOrdinal = true;
  230. }
  231. break;
  232. default:
  233. // Some invalid non-digit character found at the end of the string
  234. break;
  235. }
  236. }
  237. //--- Try to match a fraction
  238. else
  239. {
  240. //--- Try to match an attached fraction
  241. if ( ulOffset < ulTokenLen )
  242. {
  243. if ( m_pNextChar[ulOffset] == L'-' )
  244. {
  245. ulOffset++;
  246. }
  247. hr = IsFraction( m_pNextChar + ulOffset, pFractionInfo, MemoryManager );
  248. if ( SUCCEEDED( hr ) )
  249. {
  250. if ( pFractionInfo->pVulgar )
  251. {
  252. ulOffset++;
  253. }
  254. else
  255. {
  256. ulOffset += (ULONG)(pFractionInfo->pDenominator->pEndChar - pFractionInfo->pNumerator->pStartChar);
  257. }
  258. }
  259. else if ( hr == E_INVALIDARG )
  260. {
  261. hr = S_OK;
  262. }
  263. }
  264. //--- Try to match an unattached fraction
  265. else if ( fMultiItem )
  266. {
  267. pIntegerState = &m_pCurrFrag->State;
  268. //--- Advance in text
  269. m_pNextChar = m_pEndOfCurrItem;
  270. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager,
  271. true, &PostIntegerList );
  272. if ( !m_pNextChar &&
  273. SUCCEEDED( hr ) )
  274. {
  275. m_pNextChar = pTempNextChar;
  276. m_pEndChar = pTempEndChar;
  277. m_pCurrFrag = pTempFrag;
  278. }
  279. else if ( m_pNextChar &&
  280. SUCCEEDED( hr ) )
  281. {
  282. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  283. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  284. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  285. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  286. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  287. {
  288. m_pEndOfCurrItem--;
  289. }
  290. hr = IsFraction( m_pNextChar, pFractionInfo, MemoryManager );
  291. if ( FAILED( hr ) )
  292. {
  293. m_pNextChar = pTempNextChar;
  294. m_pEndChar = pTempEndChar;
  295. m_pEndOfCurrItem = pTempEndOfItem;
  296. m_pCurrFrag = pTempFrag;
  297. if ( hr == E_INVALIDARG )
  298. {
  299. hr = S_OK;
  300. }
  301. }
  302. else
  303. {
  304. ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  305. if ( pFractionInfo->pVulgar )
  306. {
  307. ulOffset = 1;
  308. }
  309. else
  310. {
  311. ulOffset = (ULONG)(pFractionInfo->pDenominator->pEndChar -
  312. pFractionInfo->pNumerator->pStartChar);
  313. }
  314. }
  315. }
  316. }
  317. }
  318. }
  319. else
  320. {
  321. hr = E_INVALIDARG;
  322. }
  323. //--- If we haven't processed the whole item yet, and it isn't part of a larger item --
  324. //--- e.g. a percent, a degrees number, or a square or cube -- then fail to match it
  325. //--- as a number...
  326. if ( ulOffset != ulTokenLen &&
  327. !( ulTokenLen == ulOffset + 1 &&
  328. ( m_pNextChar[ulOffset] == L'%' ||
  329. m_pNextChar[ulOffset] == L'' ||
  330. m_pNextChar[ulOffset] == L'' ||
  331. m_pNextChar[ulOffset] == L'' ) ) )
  332. {
  333. m_pNextChar = pTempNextChar;
  334. m_pEndOfCurrItem = pTempEndOfItem;
  335. m_pEndChar = pTempEndChar;
  336. m_pCurrFrag = pTempFrag;
  337. hr = E_INVALIDARG;
  338. }
  339. //--- Fill out pItemNormInfo...
  340. if ( SUCCEEDED( hr ) &&
  341. ( pIntegerInfo ||
  342. pDecimalInfo ||
  343. pFractionInfo ) )
  344. {
  345. //--- Reset m_pNextChar to handle the Mixed Fraction case...
  346. m_pNextChar = pTempNextChar;
  347. if ( pIntegerInfo &&
  348. pIntegerInfo->pEndChar - pIntegerInfo->pStartChar == 4 &&
  349. !pIntegerInfo->fSeparators &&
  350. !pIntegerInfo->fOrdinal &&
  351. !pDecimalInfo &&
  352. !pFractionInfo &&
  353. !fNegative &&
  354. ulOffset == ulTokenLen &&
  355. ( !Context ||
  356. _wcsnicmp( Context, L"NUMBER", 6 ) != 0 ) )
  357. {
  358. pItemNormInfo = (TTSYearItemInfo*) MemoryManager.GetMemory( sizeof( TTSYearItemInfo ), &hr );
  359. if ( SUCCEEDED( hr ) )
  360. {
  361. pItemNormInfo->Type = eDATE_YEAR;
  362. ( (TTSYearItemInfo*) pItemNormInfo )->pYear = m_pNextChar;
  363. ( (TTSYearItemInfo*) pItemNormInfo )->ulNumDigits = 4;
  364. }
  365. }
  366. else
  367. {
  368. pItemNormInfo = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
  369. if ( SUCCEEDED( hr ) )
  370. {
  371. ZeroMemory( pItemNormInfo, sizeof( TTSNumberItemInfo ) );
  372. if ( pDecimalInfo )
  373. {
  374. pItemNormInfo->Type = eNUM_DECIMAL;
  375. if ( pIntegerInfo )
  376. {
  377. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar +
  378. pDecimalInfo->ulNumDigits + 1;
  379. }
  380. else
  381. {
  382. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = m_pNextChar + pDecimalInfo->ulNumDigits + 1;
  383. if ( fNegative )
  384. {
  385. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar++;
  386. }
  387. }
  388. }
  389. else if ( pFractionInfo )
  390. {
  391. if ( pFractionInfo->pVulgar )
  392. {
  393. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pFractionInfo->pVulgar + 1;
  394. }
  395. else
  396. {
  397. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar =
  398. pFractionInfo->pDenominator->pEndChar;
  399. }
  400. if ( pIntegerInfo )
  401. {
  402. pItemNormInfo->Type = eNUM_MIXEDFRACTION;
  403. }
  404. else
  405. {
  406. pItemNormInfo->Type = eNUM_FRACTION;
  407. }
  408. }
  409. else if ( pIntegerInfo )
  410. {
  411. if ( pIntegerInfo->fOrdinal )
  412. {
  413. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar + 2;
  414. pItemNormInfo->Type = eNUM_ORDINAL;
  415. }
  416. else
  417. {
  418. ( (TTSNumberItemInfo*) pItemNormInfo )->pEndChar = pIntegerInfo->pEndChar;
  419. pItemNormInfo->Type = eNUM_CARDINAL;
  420. }
  421. }
  422. }
  423. if ( SUCCEEDED( hr ) )
  424. {
  425. ( (TTSNumberItemInfo*) pItemNormInfo )->fNegative = fNegative;
  426. ( (TTSNumberItemInfo*) pItemNormInfo )->pIntegerPart = pIntegerInfo;
  427. ( (TTSNumberItemInfo*) pItemNormInfo )->pDecimalPart = pDecimalInfo;
  428. ( (TTSNumberItemInfo*) pItemNormInfo )->pFractionalPart = pFractionInfo;
  429. ( (TTSNumberItemInfo*) pItemNormInfo )->pStartChar = m_pNextChar;
  430. ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList = new CWordList;
  431. }
  432. }
  433. }
  434. else
  435. {
  436. hr = E_INVALIDARG;
  437. }
  438. //--- Expand Number into WordList
  439. if ( SUCCEEDED( hr ) &&
  440. pItemNormInfo->Type != eDATE_YEAR )
  441. {
  442. TTSWord Word;
  443. ZeroMemory( &Word, sizeof( TTSWord ) );
  444. Word.pXmlState = pIntegerState;
  445. Word.eWordPartOfSpeech = MS_Unknown;
  446. //--- Insert "negative"
  447. if ( fNegative )
  448. {
  449. Word.pWordText = g_negative.pStr;
  450. Word.ulWordLen = g_negative.Len;
  451. Word.pLemma = Word.pWordText;
  452. Word.ulLemmaLen = Word.ulWordLen;
  453. ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word );
  454. }
  455. //--- Expand Integral Part
  456. if ( pIntegerInfo )
  457. {
  458. ExpandInteger( pIntegerInfo, Context, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList );
  459. }
  460. //--- Expand Decimal Part
  461. if ( pDecimalInfo )
  462. {
  463. //--- Insert "point"
  464. Word.pWordText = g_decimalpoint.pStr;
  465. Word.ulWordLen = g_decimalpoint.Len;
  466. Word.pLemma = Word.pWordText;
  467. Word.ulLemmaLen = Word.ulWordLen;
  468. ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word );
  469. ExpandDigits( pDecimalInfo, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList );
  470. }
  471. //--- Expand Fractional Part
  472. if ( pFractionInfo )
  473. {
  474. //--- Insert Post-Integer Non-Spoken XML States, if any
  475. while ( !PostIntegerList.IsEmpty() )
  476. {
  477. ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( ( PostIntegerList.RemoveHead() ).Words[0] );
  478. }
  479. //--- Insert "and", if also an integer part
  480. if ( pIntegerInfo )
  481. {
  482. Word.pXmlState = &m_pCurrFrag->State;
  483. Word.pWordText = g_And.pStr;
  484. Word.ulWordLen = g_And.Len;
  485. Word.pLemma = Word.pWordText;
  486. Word.ulLemmaLen = Word.ulWordLen;
  487. ( (TTSNumberItemInfo*) pItemNormInfo )->pWordList->AddTail( Word );
  488. }
  489. hr = ExpandFraction( pFractionInfo, *( (TTSNumberItemInfo*) pItemNormInfo )->pWordList );
  490. }
  491. }
  492. return hr;
  493. } /* IsNumber */
  494. /***********************************************************************************************
  495. * ExpandNumber *
  496. *--------------*
  497. * Description:
  498. * Expands Items previously determined to be of type NUM_CARDINAL, NUM_DECIMAL, or
  499. * NUM_ORDINAL by IsNumber.
  500. *
  501. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  502. ********************************************************************* AH **********************/
  503. HRESULT CStdSentEnum::ExpandNumber( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
  504. {
  505. SPDBG_FUNC( "NumNorm ExpandNumber" );
  506. HRESULT hr = S_OK;
  507. WordList.AddTail( pItemInfo->pWordList );
  508. delete pItemInfo->pWordList;
  509. return hr;
  510. } /* ExpandNumber */
  511. /***********************************************************************************************
  512. * ExpandPercent *
  513. *---------------*
  514. * Description:
  515. * Expands Items previously determined to be of type NUM_PERCENT by IsNumber.
  516. *
  517. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  518. ********************************************************************* AH **********************/
  519. HRESULT CStdSentEnum::ExpandPercent( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
  520. {
  521. SPDBG_FUNC( "CStdSentEnum::ExpandPercent" );
  522. HRESULT hr = S_OK;
  523. WordList.AddTail( pItemInfo->pWordList );
  524. delete pItemInfo->pWordList;
  525. TTSWord Word;
  526. ZeroMemory( &Word, sizeof( TTSWord ) );
  527. Word.pXmlState = &m_pCurrFrag->State;
  528. Word.eWordPartOfSpeech = MS_Unknown;
  529. Word.pWordText = g_percent.pStr;
  530. Word.ulWordLen = g_percent.Len;
  531. Word.pLemma = Word.pWordText;
  532. Word.ulLemmaLen = Word.ulWordLen;
  533. WordList.AddTail( Word );
  534. return hr;
  535. } /* ExpandPercent */
  536. /***********************************************************************************************
  537. * ExpandDegree *
  538. *---------------*
  539. * Description:
  540. * Expands Items previously determined to be of type NUM_DEGREES by IsNumber.
  541. *
  542. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  543. ********************************************************************* AH **********************/
  544. HRESULT CStdSentEnum::ExpandDegrees( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
  545. {
  546. SPDBG_FUNC( "CStdSentEnum::ExpandDegrees" );
  547. HRESULT hr = S_OK;
  548. WordList.AddTail( pItemInfo->pWordList );
  549. delete pItemInfo->pWordList;
  550. TTSWord Word;
  551. ZeroMemory( &Word, sizeof( TTSWord ) );
  552. Word.pXmlState = &m_pCurrFrag->State;
  553. Word.eWordPartOfSpeech = MS_Unknown;
  554. if ( !pItemInfo->pDecimalPart &&
  555. !pItemInfo->pFractionalPart &&
  556. pItemInfo->pIntegerPart &&
  557. pItemInfo->pIntegerPart->pEndChar - pItemInfo->pIntegerPart->pStartChar == 1 &&
  558. pItemInfo->pIntegerPart->pStartChar[0] == L'1' )
  559. {
  560. Word.pWordText = g_degree.pStr;
  561. Word.ulWordLen = g_degree.Len;
  562. Word.pLemma = Word.pWordText;
  563. Word.ulLemmaLen = Word.ulWordLen;
  564. }
  565. else if ( !pItemInfo->pIntegerPart &&
  566. pItemInfo->pFractionalPart &&
  567. !pItemInfo->pFractionalPart->fIsStandard )
  568. {
  569. Word.pWordText = g_of.pStr;
  570. Word.ulWordLen = g_of.Len;
  571. Word.pLemma = Word.pWordText;
  572. Word.ulLemmaLen = Word.ulWordLen;
  573. WordList.AddTail( Word );
  574. Word.pWordText = g_a.pStr;
  575. Word.ulWordLen = g_a.Len;
  576. Word.pLemma = Word.pWordText;
  577. Word.ulLemmaLen = Word.ulWordLen;
  578. WordList.AddTail( Word );
  579. Word.pWordText = g_degree.pStr;
  580. Word.ulWordLen = g_degree.Len;
  581. Word.pLemma = Word.pWordText;
  582. Word.ulLemmaLen = Word.ulWordLen;
  583. }
  584. else
  585. {
  586. Word.pWordText = g_degrees.pStr;
  587. Word.ulWordLen = g_degrees.Len;
  588. Word.pLemma = Word.pWordText;
  589. Word.ulLemmaLen = Word.ulWordLen;
  590. }
  591. WordList.AddTail( Word );
  592. return hr;
  593. } /* ExpandDegrees */
  594. /***********************************************************************************************
  595. * ExpandSquare *
  596. *---------------*
  597. * Description:
  598. * Expands Items previously determined to be of type NUM_SQUARED by IsNumber.
  599. *
  600. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  601. ********************************************************************* AH **********************/
  602. HRESULT CStdSentEnum::ExpandSquare( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
  603. {
  604. SPDBG_FUNC( "CStdSentEnum::ExpandSquare" );
  605. HRESULT hr = S_OK;
  606. WordList.AddTail( pItemInfo->pWordList );
  607. delete pItemInfo->pWordList;
  608. TTSWord Word;
  609. ZeroMemory( &Word, sizeof( TTSWord ) );
  610. Word.pXmlState = &m_pCurrFrag->State;
  611. Word.eWordPartOfSpeech = MS_Unknown;
  612. Word.pWordText = g_squared.pStr;
  613. Word.ulWordLen = g_squared.Len;
  614. Word.pLemma = Word.pWordText;
  615. Word.ulLemmaLen = Word.ulWordLen;
  616. WordList.AddTail( Word );
  617. return hr;
  618. } /* ExpandSquare */
  619. /***********************************************************************************************
  620. * ExpandCube *
  621. *---------------*
  622. * Description:
  623. * Expands Items previously determined to be of type NUM_CUBED by IsNumber.
  624. *
  625. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  626. ********************************************************************* AH **********************/
  627. HRESULT CStdSentEnum::ExpandCube( TTSNumberItemInfo* pItemInfo, CWordList& WordList )
  628. {
  629. SPDBG_FUNC( "CStdSentEnum::ExpandCube" );
  630. HRESULT hr = S_OK;
  631. WordList.AddTail( pItemInfo->pWordList );
  632. delete pItemInfo->pWordList;
  633. TTSWord Word;
  634. ZeroMemory( &Word, sizeof( TTSWord ) );
  635. Word.pXmlState = &m_pCurrFrag->State;
  636. Word.eWordPartOfSpeech = MS_Unknown;
  637. Word.pWordText = g_cubed.pStr;
  638. Word.ulWordLen = g_cubed.Len;
  639. Word.pLemma = Word.pWordText;
  640. Word.ulLemmaLen = Word.ulWordLen;
  641. WordList.AddTail( Word );
  642. return hr;
  643. } /* ExpandCube */
  644. /***********************************************************************************************
  645. * IsInteger *
  646. *-----------*
  647. * Description:
  648. * Helper for IsNumber which matches the integer part...
  649. *
  650. * RegExp:
  651. * { d+ || d(1-3)[,ddd]+ }
  652. *
  653. ********************************************************************* AH **********************/
  654. HRESULT CStdSentEnum::IsInteger( const WCHAR* pStartChar, TTSIntegerItemInfo*& pIntegerInfo,
  655. CSentItemMemory& MemoryManager )
  656. {
  657. HRESULT hr = S_OK;
  658. ULONG ulOffset = 0, ulCount = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - pStartChar);
  659. BOOL fSeparators = false, fDone = false;
  660. WCHAR wcSeparator, wcDecimalPoint;
  661. if ( m_eSeparatorAndDecimal == COMMA_PERIOD )
  662. {
  663. wcSeparator = L',';
  664. wcDecimalPoint = L'.';
  665. }
  666. else
  667. {
  668. wcSeparator = L'.';
  669. wcDecimalPoint = L',';
  670. }
  671. //--- Check for first digit
  672. if ( !isdigit(pStartChar[ulOffset]) )
  673. {
  674. hr = E_INVALIDARG;
  675. }
  676. else
  677. {
  678. ulCount++;
  679. ulOffset++;
  680. }
  681. //--- Check for separators
  682. ULONG i = ulOffset + 3;
  683. while ( SUCCEEDED( hr ) &&
  684. ulOffset < i &&
  685. ulOffset < ulTokenLen )
  686. {
  687. if ( pStartChar[ulOffset] == wcSeparator )
  688. {
  689. //--- Found a separator
  690. fSeparators = true;
  691. break;
  692. }
  693. else if ( !isdigit( pStartChar[ulOffset] ) &&
  694. ( pStartChar[ulOffset] == wcDecimalPoint ||
  695. pStartChar[ulOffset] == L'%' ||
  696. pStartChar[ulOffset] == L'' ||
  697. pStartChar[ulOffset] == L'' ||
  698. pStartChar[ulOffset] == L'' ||
  699. pStartChar[ulOffset] == L'-' ||
  700. pStartChar[ulOffset] == L'' ||
  701. pStartChar[ulOffset] == L'' ||
  702. pStartChar[ulOffset] == L'' ||
  703. toupper( pStartChar[ulOffset] ) == L'S' ||
  704. toupper( pStartChar[ulOffset] ) == L'N' ||
  705. toupper( pStartChar[ulOffset] ) == L'R' ||
  706. toupper( pStartChar[ulOffset] ) == L'T' ) )
  707. {
  708. fDone = true;
  709. break;
  710. }
  711. else if ( isdigit( pStartChar[ulOffset] ) )
  712. {
  713. //--- Just another digit
  714. ulCount++;
  715. ulOffset++;
  716. }
  717. else
  718. {
  719. hr = E_INVALIDARG;
  720. break;
  721. }
  722. }
  723. if ( SUCCEEDED( hr ) &&
  724. !fDone &&
  725. ulOffset < ulTokenLen )
  726. {
  727. if ( !fSeparators )
  728. {
  729. //--- No separators. Pattern must be {d+} if this is indeed a number, so just count digits.
  730. while ( isdigit( pStartChar[ulOffset] ) &&
  731. ulOffset < ulTokenLen )
  732. {
  733. ulCount++;
  734. ulOffset++;
  735. }
  736. if ( ulOffset != ulTokenLen &&
  737. !( pStartChar[ulOffset] == wcDecimalPoint ||
  738. pStartChar[ulOffset] == L'%' ||
  739. pStartChar[ulOffset] == L'' ||
  740. pStartChar[ulOffset] == L'' ||
  741. pStartChar[ulOffset] == L'' ||
  742. pStartChar[ulOffset] == L'%' ||
  743. pStartChar[ulOffset] == L'' ||
  744. pStartChar[ulOffset] == L'' ||
  745. pStartChar[ulOffset] == L'' ||
  746. pStartChar[ulOffset] == L'-' ||
  747. pStartChar[ulOffset] == L'' ||
  748. pStartChar[ulOffset] == L'' ||
  749. pStartChar[ulOffset] == L'' ||
  750. toupper( pStartChar[ulOffset] ) == L'S' ||
  751. toupper( pStartChar[ulOffset] ) == L'N' ||
  752. toupper( pStartChar[ulOffset] ) == L'R' ||
  753. toupper( pStartChar[ulOffset] ) == L'T' ) )
  754. {
  755. hr = E_INVALIDARG;
  756. }
  757. }
  758. else
  759. {
  760. //--- Separators. Pattern must be { d(1-3)[,ddd]+ }, so make sure the separators match up
  761. while ( SUCCEEDED( hr ) &&
  762. pStartChar[ulOffset] == wcSeparator &&
  763. ( ulOffset + 3 ) < ulTokenLen)
  764. {
  765. ulOffset++;
  766. for ( i = ulOffset + 3; SUCCEEDED( hr ) && ulOffset < i; ulOffset++ )
  767. {
  768. if ( isdigit( pStartChar[ulOffset] ) )
  769. {
  770. ulCount++;
  771. }
  772. else // Some non-digit character found - abort!
  773. {
  774. hr = E_INVALIDARG;
  775. }
  776. }
  777. }
  778. if ( ulOffset != ulTokenLen &&
  779. !( pStartChar[ulOffset] == wcDecimalPoint ||
  780. pStartChar[ulOffset] == L'%' ||
  781. pStartChar[ulOffset] == L'' ||
  782. pStartChar[ulOffset] == L'' ||
  783. pStartChar[ulOffset] == L'' ||
  784. pStartChar[ulOffset] == L'-' ||
  785. pStartChar[ulOffset] == L'' ||
  786. pStartChar[ulOffset] == L'' ||
  787. pStartChar[ulOffset] == L'' ||
  788. toupper( pStartChar[ulOffset] ) == L'S' ||
  789. toupper( pStartChar[ulOffset] ) == L'N' ||
  790. toupper( pStartChar[ulOffset] ) == L'R' ||
  791. toupper( pStartChar[ulOffset] ) == L'T' ) )
  792. {
  793. hr = E_INVALIDARG;
  794. }
  795. }
  796. }
  797. if ( SUCCEEDED( hr ) )
  798. {
  799. pIntegerInfo = (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr );
  800. if ( SUCCEEDED( hr ) )
  801. {
  802. ZeroMemory( pIntegerInfo, sizeof( TTSIntegerItemInfo ) );
  803. pIntegerInfo->fSeparators = fSeparators;
  804. pIntegerInfo->lLeftOver = ulCount % 3;
  805. pIntegerInfo->lNumGroups = ( ulCount - 1 ) / 3;
  806. pIntegerInfo->pStartChar = pStartChar;
  807. pIntegerInfo->pEndChar = pStartChar + ulOffset;
  808. }
  809. }
  810. return hr;
  811. } /* IsInteger */
  812. /***********************************************************************************************
  813. * ExpandInteger *
  814. *---------------*
  815. * Description:
  816. *
  817. *
  818. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  819. ********************************************************************* AH **********************/
  820. void CStdSentEnum::ExpandInteger( TTSIntegerItemInfo* pItemInfo, const WCHAR* Context, CWordList& WordList )
  821. {
  822. SPDBG_FUNC( "CStdSentEnum::ExpandInteger" );
  823. //--- Local variable declarations and initialization
  824. BOOL bFinished = false;
  825. const WCHAR *pStartChar = pItemInfo->pStartChar, *pEndChar = pItemInfo->pEndChar;
  826. ULONG ulOffset = 0, ulTokenLen = (ULONG)(pEndChar - pStartChar), ulTemp = (ULONG)(pItemInfo->lNumGroups + 1);
  827. TTSWord Word;
  828. ZeroMemory( &Word, sizeof(TTSWord) );
  829. Word.pXmlState = &m_pCurrFrag->State;
  830. Word.eWordPartOfSpeech = MS_Unknown;
  831. //--- Out of range integer, or integer beginning with one or more zeroes...
  832. if ( pStartChar[0] == L'0' ||
  833. ( Context &&
  834. _wcsicmp( Context, L"NUMBER_DIGIT" ) == 0 ) ||
  835. pItemInfo->lNumGroups >= sp_countof(g_quantifiers) )
  836. {
  837. pItemInfo->fDigitByDigit = true;
  838. pItemInfo->ulNumDigits = 0;
  839. for ( ULONG i = 0; i < ulTokenLen; i++ )
  840. {
  841. if ( isdigit( pStartChar[i] ) )
  842. {
  843. ExpandDigit( pStartChar[i], pItemInfo->Groups[0], WordList );
  844. pItemInfo->ulNumDigits++;
  845. }
  846. }
  847. }
  848. //--- Expanding a number < 1000
  849. else if ( pItemInfo->lNumGroups == 0 )
  850. {
  851. // 0th through 999th...
  852. if ( pItemInfo->fOrdinal )
  853. {
  854. switch ( pItemInfo->lLeftOver )
  855. {
  856. case 1:
  857. // 0th through 9th...
  858. ExpandDigitOrdinal( pStartChar[ulOffset], pItemInfo->Groups[0], WordList );
  859. break;
  860. case 2:
  861. // 10th through 99th...
  862. ExpandTwoOrdinal( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
  863. break;
  864. case 0:
  865. // 100th through 999th...
  866. ExpandThreeOrdinal( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
  867. break;
  868. case -1:
  869. ulTemp = 0;
  870. pItemInfo->lLeftOver = 0;
  871. break;
  872. }
  873. }
  874. // 0 through 999...
  875. else
  876. {
  877. switch ( pItemInfo->lLeftOver )
  878. {
  879. case 1:
  880. // 0 through 9...
  881. ExpandDigit( pStartChar[ulOffset], pItemInfo->Groups[0], WordList );
  882. ulOffset += 1;
  883. break;
  884. case 2:
  885. // 10 through 99...
  886. ExpandTwoDigits( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
  887. ulOffset += 2;
  888. break;
  889. case 0:
  890. // 100 through 999...
  891. ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[0], WordList );
  892. ulOffset += 3;
  893. break;
  894. case -1:
  895. ulTemp = 0;
  896. pItemInfo->lLeftOver = 0;
  897. break;
  898. }
  899. }
  900. }
  901. else
  902. {
  903. //--- 1000 through highest number covered, e.g. 1,234,567
  904. //--- Expand first grouping, e.g. 1 million
  905. //--- Expand digit group
  906. switch ( pItemInfo->lLeftOver )
  907. {
  908. case 1:
  909. ExpandDigit( pStartChar[ulOffset], pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
  910. ulOffset += 1;
  911. break;
  912. case 2:
  913. ExpandTwoDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
  914. ulOffset += 2;
  915. break;
  916. case 0:
  917. ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
  918. ulOffset += 3;
  919. break;
  920. }
  921. //--- Special Case: rare ordinal cases - e.g. 1,000,000th
  922. if ( pItemInfo->fOrdinal &&
  923. Zeroes(pStartChar + ulOffset) )
  924. {
  925. //--- Insert ordinal quantifier
  926. pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
  927. Word.pWordText = g_quantifiersOrdinal[pItemInfo->lNumGroups].pStr;
  928. Word.ulWordLen = g_quantifiersOrdinal[pItemInfo->lNumGroups--].Len;
  929. Word.pLemma = Word.pWordText;
  930. Word.ulLemmaLen = Word.ulWordLen;
  931. WordList.AddTail( Word );
  932. bFinished = true;
  933. }
  934. //--- Default Case
  935. else
  936. {
  937. //--- Insert quantifier
  938. pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
  939. Word.pWordText = g_quantifiers[pItemInfo->lNumGroups].pStr;
  940. Word.ulWordLen = g_quantifiers[pItemInfo->lNumGroups--].Len;
  941. Word.pLemma = Word.pWordText;
  942. Word.ulLemmaLen = Word.ulWordLen;
  943. WordList.AddTail( Word );
  944. }
  945. //--- Expand rest of groupings which need to be followed by a quantifier
  946. while ( pItemInfo->lNumGroups > 0 &&
  947. !bFinished )
  948. {
  949. if ( pItemInfo->fSeparators )
  950. {
  951. ulOffset++;
  952. }
  953. //--- Expand digit group
  954. ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
  955. ulOffset += 3;
  956. //--- Special case: rare ordinal cases, e.g. 1,234,000th
  957. if ( pItemInfo->fOrdinal &&
  958. Zeroes( pStartChar + ulOffset ) )
  959. {
  960. //--- Insert ordinal quantifier
  961. pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
  962. Word.pWordText = g_quantifiersOrdinal[pItemInfo->lNumGroups].pStr;
  963. Word.ulWordLen = g_quantifiersOrdinal[pItemInfo->lNumGroups--].Len;
  964. Word.pLemma = Word.pWordText;
  965. Word.ulLemmaLen = Word.ulWordLen;
  966. WordList.AddTail( Word );
  967. bFinished = true;
  968. }
  969. //--- Default Case
  970. else if ( !ThreeZeroes( pStartChar + ulOffset - 3 ) )
  971. {
  972. //--- Insert quantifier
  973. pItemInfo->Groups[pItemInfo->lNumGroups].fQuantifier = true;
  974. Word.pWordText = g_quantifiers[pItemInfo->lNumGroups].pStr;
  975. Word.ulWordLen = g_quantifiers[pItemInfo->lNumGroups--].Len;
  976. Word.pLemma = Word.pWordText;
  977. Word.ulLemmaLen = Word.ulWordLen;
  978. WordList.AddTail( Word );
  979. }
  980. //--- Special Case: this grouping is all zeroes, e.g. 1,000,567
  981. else
  982. {
  983. pItemInfo->lNumGroups--;
  984. }
  985. }
  986. //--- Expand final grouping, which requires no quantifier
  987. if ( pItemInfo->fSeparators &&
  988. !bFinished )
  989. {
  990. ulOffset++;
  991. }
  992. if ( pItemInfo->fOrdinal &&
  993. !bFinished )
  994. {
  995. ExpandThreeOrdinal( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
  996. ulOffset += 3;
  997. }
  998. else if ( !bFinished )
  999. {
  1000. ExpandThreeDigits( pStartChar + ulOffset, pItemInfo->Groups[pItemInfo->lNumGroups], WordList );
  1001. ulOffset += 3;
  1002. }
  1003. }
  1004. pItemInfo->lNumGroups = (long) ulTemp;
  1005. } /* ExpandInteger */
  1006. /***********************************************************************************************
  1007. * IsDigitString *
  1008. *---------------*
  1009. * Description:
  1010. * Helper for IsNumber, IsPhoneNumber, etc. which matches a digit string...
  1011. *
  1012. * RegExp:
  1013. * d+
  1014. *
  1015. ********************************************************************* AH **********************/
  1016. HRESULT CStdSentEnum::IsDigitString( const WCHAR* pStartChar, TTSDigitsItemInfo*& pDigitsInfo,
  1017. CSentItemMemory& MemoryManager )
  1018. {
  1019. HRESULT hr = S_OK;
  1020. ULONG ulOffset = 0;
  1021. while ( pStartChar + ulOffset < m_pEndOfCurrItem &&
  1022. isdigit( pStartChar[ulOffset] ) )
  1023. {
  1024. ulOffset++;
  1025. }
  1026. if ( ulOffset )
  1027. {
  1028. pDigitsInfo = (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr );
  1029. if ( SUCCEEDED( hr ) )
  1030. {
  1031. ZeroMemory( pDigitsInfo, sizeof( pDigitsInfo ) );
  1032. pDigitsInfo->pFirstDigit = pStartChar;
  1033. pDigitsInfo->ulNumDigits = ulOffset;
  1034. }
  1035. }
  1036. else
  1037. {
  1038. hr = E_INVALIDARG;
  1039. }
  1040. return hr;
  1041. } /* IsDigitString */
  1042. /***********************************************************************************************
  1043. * ExpandDigits *
  1044. *--------------*
  1045. * Description:
  1046. * Expands a string of digits, digit by digit.
  1047. *
  1048. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1049. ********************************************************************* AH **********************/
  1050. void CStdSentEnum::ExpandDigits( TTSDigitsItemInfo* pItemInfo, CWordList& WordList )
  1051. {
  1052. SPDBG_FUNC( "CStdSentEnum::ExpandDigits" );
  1053. for ( ULONG i = 0; i < pItemInfo->ulNumDigits; i++ )
  1054. {
  1055. NumberGroup Garbage;
  1056. ExpandDigit( pItemInfo->pFirstDigit[i], Garbage, WordList );
  1057. }
  1058. } /* ExpandDigits */
  1059. /***********************************************************************************************
  1060. * IsFraction *
  1061. *------------*
  1062. * Description:
  1063. * Helper for IsNumber which matches a fraction...
  1064. *
  1065. * RegExp:
  1066. * { NUM_CARDINAL || NUM_DECIMAL } / { NUM_CARDINAL || NUM_DECIMAL }
  1067. *
  1068. ********************************************************************* AH **********************/
  1069. HRESULT CStdSentEnum::IsFraction( const WCHAR* pStartChar, TTSFractionItemInfo*& pFractionInfo,
  1070. CSentItemMemory& MemoryManager )
  1071. {
  1072. SPDBG_FUNC( "CStdSentEnum::IsFraction" );
  1073. HRESULT hr = S_OK;
  1074. ULONG ulTokenLen = (ULONG)(m_pEndOfCurrItem - pStartChar);
  1075. if ( ulTokenLen )
  1076. {
  1077. //--- Check for Vulgar Fraction
  1078. if ( pStartChar[0] == L'' ||
  1079. pStartChar[0] == L'' ||
  1080. pStartChar[0] == L'' )
  1081. {
  1082. pFractionInfo = (TTSFractionItemInfo*) MemoryManager.GetMemory( sizeof( TTSFractionItemInfo ), &hr );
  1083. if ( SUCCEEDED( hr ) )
  1084. {
  1085. ZeroMemory( pFractionInfo, sizeof( TTSFractionItemInfo ) );
  1086. pFractionInfo->pVulgar = pStartChar;
  1087. pFractionInfo->pNumerator =
  1088. (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
  1089. if ( SUCCEEDED( hr ) )
  1090. {
  1091. ZeroMemory( pFractionInfo->pNumerator, sizeof( TTSNumberItemInfo ) );
  1092. pFractionInfo->pDenominator =
  1093. (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
  1094. if ( SUCCEEDED( hr ) )
  1095. {
  1096. ZeroMemory( pFractionInfo->pDenominator, sizeof( TTSNumberItemInfo ) );
  1097. pFractionInfo->pNumerator->pIntegerPart =
  1098. (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr );
  1099. if ( SUCCEEDED( hr ) )
  1100. {
  1101. ZeroMemory( pFractionInfo->pNumerator->pIntegerPart, sizeof( TTSIntegerItemInfo ) );
  1102. pFractionInfo->pDenominator->pIntegerPart =
  1103. (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo ), &hr );
  1104. if ( SUCCEEDED( hr ) )
  1105. {
  1106. ZeroMemory( pFractionInfo->pDenominator->pIntegerPart, sizeof( TTSIntegerItemInfo ) );
  1107. pFractionInfo->fIsStandard = false;
  1108. pFractionInfo->pNumerator->pIntegerPart->lLeftOver = 1;
  1109. pFractionInfo->pNumerator->pIntegerPart->lNumGroups = 1;
  1110. pFractionInfo->pNumerator->pIntegerPart->Groups[0].fOnes = true;
  1111. pFractionInfo->pDenominator->pIntegerPart->lLeftOver = 1;
  1112. pFractionInfo->pDenominator->pIntegerPart->lNumGroups = 1;
  1113. pFractionInfo->pDenominator->pIntegerPart->Groups[0].fOnes = true;
  1114. }
  1115. }
  1116. }
  1117. }
  1118. }
  1119. }
  1120. //--- Check for multi-character fraction
  1121. else
  1122. {
  1123. TTSItemInfo *pNumeratorInfo = NULL, *pDenominatorInfo = NULL;
  1124. const WCHAR* pTempNextChar = m_pNextChar, *pTempEndOfCurrItem = m_pEndOfCurrItem;
  1125. m_pNextChar = pStartChar;
  1126. m_pEndOfCurrItem = wcschr( pStartChar, L'/' );
  1127. if ( !m_pEndOfCurrItem ||
  1128. m_pEndOfCurrItem >= pTempEndOfCurrItem )
  1129. {
  1130. hr = E_INVALIDARG;
  1131. }
  1132. //--- Try to get numerator
  1133. if ( SUCCEEDED( hr ) )
  1134. {
  1135. hr = IsNumber( pNumeratorInfo, L"NUMBER", MemoryManager, false );
  1136. }
  1137. if ( SUCCEEDED( hr ) &&
  1138. pNumeratorInfo->Type != eNUM_MIXEDFRACTION &&
  1139. pNumeratorInfo->Type != eNUM_FRACTION &&
  1140. pNumeratorInfo->Type != eNUM_ORDINAL )
  1141. {
  1142. if ( ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart )
  1143. {
  1144. m_pNextChar += ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart->pEndChar -
  1145. ( (TTSNumberItemInfo*) pNumeratorInfo )->pIntegerPart->pStartChar;
  1146. }
  1147. if ( ( (TTSNumberItemInfo*) pNumeratorInfo )->pDecimalPart )
  1148. {
  1149. m_pNextChar += ( (TTSNumberItemInfo*) pNumeratorInfo )->pDecimalPart->ulNumDigits + 1;
  1150. }
  1151. }
  1152. else if ( SUCCEEDED( hr ) )
  1153. {
  1154. delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
  1155. hr = E_INVALIDARG;
  1156. }
  1157. m_pEndOfCurrItem = pTempEndOfCurrItem;
  1158. //--- Try to get denominator
  1159. if ( SUCCEEDED( hr ) &&
  1160. m_pNextChar[0] == L'/' )
  1161. {
  1162. m_pNextChar++;
  1163. hr = IsNumber( pDenominatorInfo, L"NUMBER", MemoryManager, false );
  1164. if ( SUCCEEDED( hr ) &&
  1165. pDenominatorInfo->Type != eNUM_MIXEDFRACTION &&
  1166. pDenominatorInfo->Type != eNUM_FRACTION &&
  1167. pDenominatorInfo->Type != eNUM_ORDINAL )
  1168. {
  1169. pFractionInfo =
  1170. ( TTSFractionItemInfo*) MemoryManager.GetMemory( sizeof( TTSFractionItemInfo ), &hr );
  1171. if ( SUCCEEDED( hr ) )
  1172. {
  1173. ZeroMemory( pFractionInfo, sizeof( TTSFractionItemInfo ) );
  1174. pFractionInfo->pNumerator = (TTSNumberItemInfo*) pNumeratorInfo;
  1175. pFractionInfo->pDenominator = (TTSNumberItemInfo*) pDenominatorInfo;
  1176. pFractionInfo->pVulgar = NULL;
  1177. pFractionInfo->fIsStandard = false;
  1178. }
  1179. }
  1180. else if ( SUCCEEDED( hr ) )
  1181. {
  1182. delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
  1183. delete ( (TTSNumberItemInfo*) pDenominatorInfo )->pWordList;
  1184. hr = E_INVALIDARG;
  1185. }
  1186. else
  1187. {
  1188. delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
  1189. }
  1190. }
  1191. else if ( SUCCEEDED( hr ) )
  1192. {
  1193. hr = E_INVALIDARG;
  1194. delete ( (TTSNumberItemInfo*) pNumeratorInfo )->pWordList;
  1195. }
  1196. m_pNextChar = pTempNextChar;
  1197. }
  1198. }
  1199. else
  1200. {
  1201. hr = E_INVALIDARG;
  1202. }
  1203. return hr;
  1204. } /* IsFraction */
  1205. /***********************************************************************************************
  1206. * ExpandFraction *
  1207. *----------------*
  1208. * Description:
  1209. * Expands Items previously determined to be of type NUM_FRACTION by IsFraction.
  1210. *
  1211. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  1212. ********************************************************************* AH **********************/
  1213. HRESULT CStdSentEnum::ExpandFraction( TTSFractionItemInfo* pItemInfo, CWordList& WordList )
  1214. {
  1215. SPDBG_FUNC( "CStdSentEnum::ExpandFraction" );
  1216. HRESULT hr = S_OK;
  1217. TTSWord Word;
  1218. ZeroMemory( &Word, sizeof(TTSWord) );
  1219. Word.pXmlState = &m_pCurrFrag->State;
  1220. Word.eWordPartOfSpeech = MS_Unknown;
  1221. //--- Special case - vulgar fractions ( �, �, � )
  1222. if ( pItemInfo->pVulgar )
  1223. {
  1224. if ( pItemInfo->pVulgar[0] == L'' )
  1225. {
  1226. Word.pWordText = g_ones[1].pStr;
  1227. Word.ulWordLen = g_ones[1].Len;
  1228. Word.pLemma = Word.pWordText;
  1229. Word.ulLemmaLen = Word.ulWordLen;
  1230. WordList.AddTail( Word );
  1231. Word.pWordText = g_onesOrdinal[4].pStr;
  1232. Word.ulWordLen = g_onesOrdinal[4].Len;
  1233. Word.pLemma = Word.pWordText;
  1234. Word.ulLemmaLen = Word.ulWordLen;
  1235. WordList.AddTail( Word );
  1236. }
  1237. else if ( pItemInfo->pVulgar[0] == L'' )
  1238. {
  1239. Word.pWordText = g_ones[1].pStr;
  1240. Word.ulWordLen = g_ones[1].Len;
  1241. Word.pLemma = Word.pWordText;
  1242. Word.ulLemmaLen = Word.ulWordLen;
  1243. WordList.AddTail( Word );
  1244. Word.pWordText = g_Half.pStr;
  1245. Word.ulWordLen = g_Half.Len;
  1246. Word.pLemma = Word.pWordText;
  1247. Word.ulLemmaLen = Word.ulWordLen;
  1248. WordList.AddTail( Word );
  1249. }
  1250. else
  1251. {
  1252. Word.pWordText = g_ones[3].pStr;
  1253. Word.ulWordLen = g_ones[3].Len;
  1254. Word.pLemma = Word.pWordText;
  1255. Word.ulLemmaLen = Word.ulWordLen;
  1256. WordList.AddTail( Word );
  1257. Word.pWordText = g_PluralDenominators[4].pStr;
  1258. Word.ulWordLen = g_PluralDenominators[4].Len;
  1259. Word.pLemma = Word.pWordText;
  1260. Word.ulLemmaLen = Word.ulWordLen;
  1261. WordList.AddTail( Word );
  1262. }
  1263. }
  1264. else
  1265. {
  1266. //--- Insert Numerator WordList
  1267. WordList.AddTail( pItemInfo->pNumerator->pWordList );
  1268. delete pItemInfo->pNumerator->pWordList;
  1269. //--- Expand denominator ---//
  1270. //--- If no decimal part, must check for special cases ( x/2 - x/9, x/10, x/100 )
  1271. if ( !pItemInfo->pDenominator->pDecimalPart &&
  1272. !pItemInfo->pNumerator->pDecimalPart &&
  1273. !pItemInfo->pDenominator->fNegative )
  1274. {
  1275. //--- Check for special cases - halves through ninths
  1276. if ( ( pItemInfo->pDenominator->pEndChar -
  1277. pItemInfo->pDenominator->pStartChar ) == 1 &&
  1278. pItemInfo->pDenominator->pStartChar[0] != L'1' )
  1279. {
  1280. pItemInfo->fIsStandard = false;
  1281. //--- Insert singular form of denominator
  1282. if ( ( pItemInfo->pNumerator->pEndChar -
  1283. pItemInfo->pNumerator->pStartChar ) == 1 &&
  1284. pItemInfo->pNumerator->pStartChar[0] == L'1' )
  1285. {
  1286. if ( pItemInfo->pDenominator->pStartChar[0] == L'2' )
  1287. {
  1288. Word.pWordText = g_Half.pStr;
  1289. Word.ulWordLen = g_Half.Len;
  1290. Word.pLemma = Word.pWordText;
  1291. Word.ulLemmaLen = Word.ulWordLen;
  1292. WordList.AddTail( Word );
  1293. }
  1294. else
  1295. {
  1296. ExpandDigitOrdinal( pItemInfo->pDenominator->pStartChar[0],
  1297. pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
  1298. }
  1299. }
  1300. //--- Insert plural form of denominator
  1301. else
  1302. {
  1303. ULONG index = pItemInfo->pDenominator->pStartChar[0] - L'0';
  1304. Word.pWordText = g_PluralDenominators[index].pStr;
  1305. Word.ulWordLen = g_PluralDenominators[index].Len;
  1306. Word.pLemma = Word.pWordText;
  1307. Word.ulLemmaLen = Word.ulWordLen;
  1308. WordList.AddTail( Word );
  1309. }
  1310. }
  1311. //--- Check for special case - tenths
  1312. else if ( ( pItemInfo->pDenominator->pEndChar -
  1313. pItemInfo->pDenominator->pStartChar ) == 2 &&
  1314. wcsncmp( pItemInfo->pDenominator->pStartChar, L"10", 2 ) == 0 )
  1315. {
  1316. pItemInfo->fIsStandard = false;
  1317. //--- Insert singular form of denominator
  1318. if ( ( pItemInfo->pNumerator->pEndChar -
  1319. pItemInfo->pNumerator->pStartChar ) == 1 &&
  1320. pItemInfo->pNumerator->pStartChar[0] == L'1' )
  1321. {
  1322. ExpandTwoOrdinal( pItemInfo->pDenominator->pStartChar,
  1323. pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
  1324. }
  1325. //--- Insert plural form denominator
  1326. else
  1327. {
  1328. Word.pWordText = g_Tenths.pStr;
  1329. Word.ulWordLen = g_Tenths.Len;
  1330. Word.pLemma = Word.pWordText;
  1331. Word.ulLemmaLen = Word.ulWordLen;
  1332. WordList.AddTail( Word );
  1333. }
  1334. }
  1335. //--- Check for special case - sixteenths
  1336. else if ( ( pItemInfo->pDenominator->pEndChar -
  1337. pItemInfo->pDenominator->pStartChar ) == 2 &&
  1338. wcsncmp( pItemInfo->pDenominator->pStartChar, L"16", 2 ) == 0 )
  1339. {
  1340. pItemInfo->fIsStandard = false;
  1341. //--- Insert singular form of denominator
  1342. if ( ( pItemInfo->pNumerator->pEndChar -
  1343. pItemInfo->pNumerator->pStartChar ) == 1 &&
  1344. pItemInfo->pNumerator->pStartChar[0] == L'1' )
  1345. {
  1346. ExpandTwoOrdinal( pItemInfo->pDenominator->pStartChar,
  1347. pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
  1348. }
  1349. //--- Insert plural form denominator
  1350. else
  1351. {
  1352. Word.pWordText = g_Sixteenths.pStr;
  1353. Word.ulWordLen = g_Sixteenths.Len;
  1354. Word.pLemma = Word.pWordText;
  1355. Word.ulLemmaLen = Word.ulWordLen;
  1356. WordList.AddTail( Word );
  1357. }
  1358. }
  1359. //--- Check for special case - hundredths
  1360. else if ( ( pItemInfo->pDenominator->pEndChar -
  1361. pItemInfo->pDenominator->pStartChar ) == 3 &&
  1362. wcsncmp( pItemInfo->pDenominator->pStartChar, L"100", 3 ) == 0 )
  1363. {
  1364. pItemInfo->fIsStandard = false;
  1365. //--- Insert singular form of denominator
  1366. if ( ( pItemInfo->pNumerator->pEndChar -
  1367. pItemInfo->pNumerator->pStartChar ) == 1 &&
  1368. pItemInfo->pNumerator->pStartChar[0] == L'1' )
  1369. {
  1370. ExpandThreeOrdinal( pItemInfo->pDenominator->pStartChar,
  1371. pItemInfo->pDenominator->pIntegerPart->Groups[0], WordList );
  1372. }
  1373. //--- Insert plural form of denominator
  1374. else
  1375. {
  1376. Word.pWordText = g_Hundredths.pStr;
  1377. Word.ulWordLen = g_Hundredths.Len;
  1378. Word.pLemma = Word.pWordText;
  1379. Word.ulLemmaLen = Word.ulWordLen;
  1380. WordList.AddTail( Word );
  1381. }
  1382. }
  1383. else
  1384. {
  1385. pItemInfo->fIsStandard = true;
  1386. }
  1387. }
  1388. else
  1389. {
  1390. pItemInfo->fIsStandard = true;
  1391. }
  1392. //--- Default case - Numerator "over" Denominator
  1393. if ( pItemInfo->fIsStandard )
  1394. {
  1395. //--- Insert "over"
  1396. Word.pWordText = g_Over.pStr;
  1397. Word.ulWordLen = g_Over.Len;
  1398. Word.pLemma = Word.pWordText;
  1399. Word.ulLemmaLen = Word.ulWordLen;
  1400. WordList.AddTail( Word );
  1401. //--- Insert denominator WordList
  1402. WordList.AddTail( pItemInfo->pDenominator->pWordList );
  1403. }
  1404. delete pItemInfo->pDenominator->pWordList;
  1405. }
  1406. return hr;
  1407. } /* ExpandFraction */
  1408. /***********************************************************************************************
  1409. * ExpandDigit *
  1410. *-------------*
  1411. * Description:
  1412. * Expands single digits into words, and inserts them into WordList
  1413. *
  1414. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1415. ********************************************************************* AH **********************/
  1416. void CStdSentEnum::ExpandDigit( const WCHAR Number, NumberGroup& NormGroupInfo, CWordList& WordList )
  1417. {
  1418. SPDBG_FUNC( "CStdSentEnum::ExpandDigit" );
  1419. SPDBG_ASSERT( isdigit(Number) );
  1420. // 0-9
  1421. ULONG Index = Number - L'0';
  1422. TTSWord Word;
  1423. ZeroMemory( &Word, sizeof(TTSWord) );
  1424. Word.pXmlState = &m_pCurrFrag->State;
  1425. Word.pWordText = g_ones[Index].pStr;
  1426. Word.ulWordLen = g_ones[Index].Len;
  1427. Word.pLemma = Word.pWordText;
  1428. Word.ulLemmaLen = Word.ulWordLen;
  1429. Word.eWordPartOfSpeech = MS_Unknown;
  1430. WordList.AddTail( Word );
  1431. NormGroupInfo.fOnes = true;
  1432. } /* ExpandDigit */
  1433. /***********************************************************************************************
  1434. * ExpandTwo *
  1435. *-----------*
  1436. * Description:
  1437. * Expands two digit strings into words, and inserts them into WordList.
  1438. *
  1439. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1440. ********************************************************************* AH **********************/
  1441. void CStdSentEnum::ExpandTwoDigits( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
  1442. {
  1443. SPDBG_FUNC( "CStdSentEnum::ExpandTwoDigits" );
  1444. SPDBG_ASSERT( NumberString &&
  1445. wcslen(NumberString) >= 2 &&
  1446. isdigit(NumberString[0]) &&
  1447. isdigit(NumberString[1]) );
  1448. // 10-99
  1449. TTSWord Word;
  1450. ZeroMemory( &Word, sizeof(TTSWord) );
  1451. ULONG IndexOne = NumberString[0] - L'0';
  1452. ULONG IndexTwo = NumberString[1] - L'0';
  1453. Word.pXmlState = &m_pCurrFrag->State;
  1454. Word.eWordPartOfSpeech = MS_Unknown;
  1455. if ( IndexOne != 1 )
  1456. {
  1457. // 20-99, or 00-09
  1458. if (IndexOne != 0)
  1459. {
  1460. Word.pWordText = g_tens[IndexOne].pStr;
  1461. Word.ulWordLen = g_tens[IndexOne].Len;
  1462. Word.pLemma = Word.pWordText;
  1463. Word.ulLemmaLen = Word.ulWordLen;
  1464. WordList.AddTail( Word );
  1465. NormGroupInfo.fTens = true;
  1466. }
  1467. if ( IndexTwo != 0 )
  1468. {
  1469. ExpandDigit( NumberString[1], NormGroupInfo, WordList );
  1470. NormGroupInfo.fOnes = true;
  1471. }
  1472. }
  1473. else
  1474. {
  1475. // 10-19
  1476. Word.pWordText = g_teens[IndexTwo].pStr;
  1477. Word.ulWordLen = g_teens[IndexTwo].Len;
  1478. Word.pLemma = Word.pWordText;
  1479. Word.ulLemmaLen = Word.ulWordLen;
  1480. WordList.AddTail( Word );
  1481. NormGroupInfo.fOnes = true;
  1482. }
  1483. } /* ExpandTwo */
  1484. /***********************************************************************************************
  1485. * ExpandThree *
  1486. *-------------*
  1487. * Description:
  1488. * Expands three digit strings into words, and inserts them into WordList.
  1489. *
  1490. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1491. ********************************************************************* AH **********************/
  1492. void CStdSentEnum::ExpandThreeDigits( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
  1493. {
  1494. SPDBG_FUNC( "CStdSentEnum::ExpandThreeDigits" );
  1495. SPDBG_ASSERT( NumberString &&
  1496. wcslen(NumberString) >= 3 &&
  1497. isdigit(NumberString[0]) &&
  1498. isdigit(NumberString[1]) &&
  1499. isdigit(NumberString[2]) );
  1500. // 100-999
  1501. TTSWord Word;
  1502. ZeroMemory( &Word, sizeof(TTSWord) );
  1503. ULONG IndexOne = NumberString[0] - L'0';
  1504. Word.pXmlState = &m_pCurrFrag->State;
  1505. Word.eWordPartOfSpeech = MS_Unknown;
  1506. if ( IndexOne != 0 )
  1507. {
  1508. // Take care of hundreds...
  1509. ExpandDigit( NumberString[0], NormGroupInfo, WordList );
  1510. Word.pWordText = g_quantifiers[0].pStr;
  1511. Word.ulWordLen = g_quantifiers[0].Len;
  1512. Word.pLemma = Word.pWordText;
  1513. Word.ulLemmaLen = Word.ulWordLen;
  1514. WordList.AddTail( Word );
  1515. NormGroupInfo.fHundreds = true;
  1516. NormGroupInfo.fOnes = false;
  1517. }
  1518. // Take care of tens and ones...
  1519. ExpandTwoDigits( NumberString + 1, NormGroupInfo, WordList );
  1520. } /* ExpandThree */
  1521. /***********************************************************************************************
  1522. * ExpandDigitOrdinal *
  1523. *--------------------*
  1524. * Description:
  1525. * Expands single digit ordinal strings into words, and inserts them into WordList.
  1526. *
  1527. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1528. ********************************************************************* AH **********************/
  1529. void CStdSentEnum::ExpandDigitOrdinal( const WCHAR Number, NumberGroup& NormGroupInfo, CWordList& WordList )
  1530. {
  1531. SPDBG_FUNC( "CStdSentEnum::ExpandDigitOrdinal" );
  1532. SPDBG_ASSERT( isdigit(Number) );
  1533. // 0-9
  1534. ULONG Index = Number - L'0';
  1535. TTSWord Word;
  1536. ZeroMemory( &Word, sizeof(TTSWord) );
  1537. Word.pXmlState = &m_pCurrFrag->State;
  1538. Word.pWordText = g_onesOrdinal[Index].pStr;
  1539. Word.ulWordLen = g_onesOrdinal[Index].Len;
  1540. Word.pLemma = Word.pWordText;
  1541. Word.ulLemmaLen = Word.ulWordLen;
  1542. Word.eWordPartOfSpeech = MS_Unknown;
  1543. WordList.AddTail( Word );
  1544. NormGroupInfo.fOnes = true;
  1545. } /* ExpandDigitOrdinal */
  1546. /***********************************************************************************************
  1547. * ExpandTwoOrdinal *
  1548. *------------------*
  1549. * Description:
  1550. * Expands two digit ordinal strings into words, and inserts them into WordList.
  1551. *
  1552. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1553. ********************************************************************* AH **********************/
  1554. void CStdSentEnum::ExpandTwoOrdinal( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
  1555. {
  1556. SPDBG_FUNC( "CStdSentEnum::ExpandTwoOrdinal" );
  1557. SPDBG_ASSERT( NumberString &&
  1558. wcslen(NumberString) >= 2 &&
  1559. isdigit(NumberString[0]) &&
  1560. isdigit(NumberString[1]) );
  1561. // 10-99
  1562. TTSWord Word;
  1563. ZeroMemory( &Word, sizeof(TTSWord) );
  1564. ULONG IndexOne = NumberString[0] - L'0';
  1565. ULONG IndexTwo = NumberString[1] - L'0';
  1566. Word.pXmlState = &m_pCurrFrag->State;
  1567. Word.eWordPartOfSpeech = MS_Unknown;
  1568. if ( IndexOne != 1 )
  1569. {
  1570. // 20-99, or 00-09
  1571. if (IndexOne != 0)
  1572. {
  1573. if ( IndexTwo != 0 )
  1574. {
  1575. Word.pWordText = g_tens[IndexOne].pStr;
  1576. Word.ulWordLen = g_tens[IndexOne].Len;
  1577. Word.pLemma = Word.pWordText;
  1578. Word.ulLemmaLen = Word.ulWordLen;
  1579. WordList.AddTail( Word );
  1580. NormGroupInfo.fTens = true;
  1581. ExpandDigitOrdinal( NumberString[1], NormGroupInfo, WordList );
  1582. NormGroupInfo.fOnes = true;
  1583. }
  1584. else
  1585. {
  1586. Word.pWordText = g_tensOrdinal[IndexOne].pStr;
  1587. Word.ulWordLen = g_tensOrdinal[IndexOne].Len;
  1588. Word.pLemma = Word.pWordText;
  1589. Word.ulLemmaLen = Word.ulWordLen;
  1590. WordList.AddTail( Word );
  1591. }
  1592. }
  1593. else
  1594. {
  1595. ExpandDigitOrdinal( NumberString[1], NormGroupInfo, WordList );
  1596. }
  1597. }
  1598. else
  1599. {
  1600. // 10-19
  1601. Word.pWordText = g_teensOrdinal[IndexTwo].pStr;
  1602. Word.ulWordLen = g_teensOrdinal[IndexTwo].Len;
  1603. Word.pLemma = Word.pWordText;
  1604. Word.ulLemmaLen = Word.ulWordLen;
  1605. WordList.AddTail( Word );
  1606. NormGroupInfo.fOnes = true;
  1607. }
  1608. } /* ExpandTwoOrdinal */
  1609. /***********************************************************************************************
  1610. * ExpandThreeOrdinal *
  1611. *--------------------*
  1612. * Description:
  1613. * Expands three digit ordinal strings into words, and inserts them into WordList.
  1614. *
  1615. * Note: This function does not do parameter validation. Assumed to be done by caller.
  1616. ********************************************************************* AH **********************/
  1617. void CStdSentEnum::ExpandThreeOrdinal( const WCHAR *NumberString, NumberGroup& NormGroupInfo, CWordList& WordList )
  1618. {
  1619. SPDBG_FUNC( "CStdSentEnum::ExpandThreeDigits" );
  1620. SPDBG_ASSERT( NumberString &&
  1621. wcslen(NumberString) >= 3 &&
  1622. isdigit(NumberString[0]) &&
  1623. isdigit(NumberString[1]) &&
  1624. isdigit(NumberString[2]) );
  1625. // 100-999
  1626. TTSWord Word;
  1627. ZeroMemory( &Word, sizeof(TTSWord) );
  1628. ULONG IndexOne = NumberString[0] - L'0';
  1629. Word.pXmlState = &m_pCurrFrag->State;
  1630. Word.eWordPartOfSpeech = MS_Unknown;
  1631. if ( IndexOne != 0 )
  1632. {
  1633. ExpandDigit( NumberString[0], NormGroupInfo, WordList );
  1634. //--- Special case - x hundredth
  1635. if ( Zeroes( NumberString + 1 ) )
  1636. {
  1637. Word.pWordText = g_quantifiersOrdinal[0].pStr;
  1638. Word.ulWordLen = g_quantifiersOrdinal[0].Len;
  1639. Word.pLemma = Word.pWordText;
  1640. Word.ulLemmaLen = Word.ulWordLen;
  1641. WordList.AddTail( Word );
  1642. NormGroupInfo.fHundreds = true;
  1643. NormGroupInfo.fOnes = false;
  1644. }
  1645. //--- Default case - x hundred yth
  1646. else
  1647. {
  1648. Word.pWordText = g_quantifiers[0].pStr;
  1649. Word.ulWordLen = g_quantifiers[0].Len;
  1650. Word.pLemma = Word.pWordText;
  1651. Word.ulLemmaLen = Word.ulWordLen;
  1652. WordList.AddTail( Word );
  1653. ExpandTwoOrdinal( NumberString + 1, NormGroupInfo, WordList );
  1654. NormGroupInfo.fHundreds = true;
  1655. }
  1656. }
  1657. //--- Special case - no hundreds
  1658. else
  1659. {
  1660. ExpandTwoOrdinal( NumberString + 1, NormGroupInfo, WordList );
  1661. }
  1662. } /* ExpandThreeOrdinal */
  1663. /***********************************************************************************************
  1664. * MatchQuantifier *
  1665. *-----------------*
  1666. * Description:
  1667. * Checks the incoming Item's text to determine whether or not it
  1668. * is a numerical quantifier.
  1669. ********************************************************************* AH **********************/
  1670. int MatchQuantifier( const WCHAR*& pStartChar, const WCHAR*& pEndChar )
  1671. {
  1672. int Index = -1;
  1673. for (int i = 0; i < sp_countof(g_quantifiers); i++)
  1674. {
  1675. if ( pEndChar - pStartChar >= g_quantifiers[i].Len &&
  1676. wcsnicmp( pStartChar, g_quantifiers[i].pStr, g_quantifiers[i].Len ) == 0 )
  1677. {
  1678. pStartChar += g_quantifiers[i].Len;
  1679. Index = i;
  1680. break;
  1681. }
  1682. }
  1683. return Index;
  1684. } /* MatchQuantifier */
  1685. /***********************************************************************************************
  1686. * IsCurrency *
  1687. *------------*
  1688. * Description:
  1689. * Checks the incoming Item's text to determine whether or not it
  1690. * is a currency.
  1691. *
  1692. * RegExp:
  1693. * { [CurrencySign] { d+ || d(1-3)[,ddd]+ } { [.]d+ }? } { [whitespace] [quantifier] }? ||
  1694. * { { d+ || d(1-3)[,ddd]+ } { [.]d+ }? { [whitespace] [quantifier] }? [whitespace]? [CurrencySign] }
  1695. *
  1696. * Types assigned:
  1697. * NUM_CURRENCY
  1698. ********************************************************************* AH **********************/
  1699. HRESULT CStdSentEnum::IsCurrency( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
  1700. CWordList& WordList )
  1701. {
  1702. SPDBG_FUNC( "NumNorm IsCurrency" );
  1703. HRESULT hr = S_OK;
  1704. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem, *pTempEndChar = m_pEndChar;
  1705. const SPVTEXTFRAG* pTempFrag = m_pCurrFrag;
  1706. const SPVSTATE *pNumberXMLState = NULL, *pSymbolXMLState = NULL, *pQuantifierXMLState = NULL;
  1707. CItemList PostNumberList, PostSymbolList;
  1708. int iSymbolIndex = -1, iQuantIndex = -1;
  1709. TTSItemInfo* pNumberInfo = NULL;
  1710. BOOL fDone = false, fNegative = false;
  1711. WCHAR wcDecimalPoint = ( m_eSeparatorAndDecimal == COMMA_PERIOD ? L'.' : L',' );
  1712. //--- Try to match [CurrencySign] [Number] [Quantifier]
  1713. NORM_POSITION ePosition = UNATTACHED;
  1714. if ( m_pNextChar[0] == L'-' )
  1715. {
  1716. fNegative = true;
  1717. m_pNextChar++;
  1718. }
  1719. iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
  1720. if ( iSymbolIndex >= 0 &&
  1721. ePosition == PRECEDING )
  1722. {
  1723. pSymbolXMLState = &m_pCurrFrag->State;
  1724. //--- Skip any whitespace in between the currency sign and the number...
  1725. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostSymbolList );
  1726. if ( !m_pNextChar )
  1727. {
  1728. hr = E_INVALIDARG;
  1729. }
  1730. if ( SUCCEEDED( hr ) )
  1731. {
  1732. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  1733. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1734. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1735. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1736. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  1737. {
  1738. fDone = true;
  1739. m_pEndOfCurrItem--;
  1740. }
  1741. }
  1742. //--- Try to match a number string
  1743. if ( SUCCEEDED( hr ) )
  1744. {
  1745. hr = IsNumberCategory( pNumberInfo, L"NUMBER", MemoryManager );
  1746. if ( SUCCEEDED( hr ) )
  1747. {
  1748. if ( pNumberInfo->Type != eNUM_CARDINAL &&
  1749. pNumberInfo->Type != eNUM_DECIMAL &&
  1750. pNumberInfo->Type != eNUM_FRACTION &&
  1751. pNumberInfo->Type != eNUM_MIXEDFRACTION )
  1752. {
  1753. hr = E_INVALIDARG;
  1754. }
  1755. else
  1756. {
  1757. pNumberXMLState = &m_pCurrFrag->State;
  1758. }
  1759. }
  1760. //--- Skip any whitespace in between the number and the quantifier...
  1761. if ( !fDone &&
  1762. SUCCEEDED( hr ) )
  1763. {
  1764. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar;
  1765. const WCHAR *pTempEndOfItem = m_pEndOfCurrItem;
  1766. const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
  1767. m_pNextChar = m_pEndOfCurrItem;
  1768. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostNumberList );
  1769. if ( m_pNextChar &&
  1770. SUCCEEDED( hr ) )
  1771. {
  1772. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  1773. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1774. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1775. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1776. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  1777. {
  1778. m_pEndOfCurrItem--;
  1779. }
  1780. //--- Try to match a quantifier
  1781. iQuantIndex = MatchQuantifier( m_pNextChar, m_pEndOfCurrItem );
  1782. if ( iQuantIndex >= 0 )
  1783. {
  1784. pQuantifierXMLState = &m_pCurrFrag->State;
  1785. }
  1786. else
  1787. {
  1788. m_pNextChar = pTempNextChar;
  1789. m_pEndChar = pTempEndChar;
  1790. m_pEndOfCurrItem = pTempEndOfItem;
  1791. m_pCurrFrag = pTempFrag;
  1792. }
  1793. }
  1794. else
  1795. {
  1796. m_pNextChar = pTempNextChar;
  1797. m_pEndChar = pTempEndChar;
  1798. m_pEndOfCurrItem = pTempEndOfItem;
  1799. m_pCurrFrag = pTempFrag;
  1800. }
  1801. }
  1802. }
  1803. }
  1804. //--- Try to match [Number] [CurrencySign] [Quantifier]
  1805. else
  1806. {
  1807. //--- Try to match a number string
  1808. hr = IsNumberCategory( pNumberInfo, L"NUMBER", MemoryManager );
  1809. if ( SUCCEEDED( hr ) )
  1810. {
  1811. if ( pNumberInfo->Type != eNUM_CARDINAL &&
  1812. pNumberInfo->Type != eNUM_DECIMAL &&
  1813. pNumberInfo->Type != eNUM_FRACTION &&
  1814. pNumberInfo->Type != eNUM_MIXEDFRACTION )
  1815. {
  1816. hr = E_INVALIDARG;
  1817. }
  1818. else
  1819. {
  1820. pNumberXMLState = &m_pCurrFrag->State;
  1821. }
  1822. }
  1823. //--- Skip any whitespace and XML markup between the number and the currency sign
  1824. if ( SUCCEEDED( hr ) )
  1825. {
  1826. m_pNextChar = m_pEndOfCurrItem;
  1827. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostNumberList );
  1828. if ( !m_pNextChar )
  1829. {
  1830. hr = E_INVALIDARG;
  1831. }
  1832. if ( SUCCEEDED( hr ) )
  1833. {
  1834. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  1835. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1836. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1837. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1838. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  1839. {
  1840. m_pEndOfCurrItem--;
  1841. fDone = true;
  1842. }
  1843. }
  1844. }
  1845. //--- Try to match a Currency Sign
  1846. if ( SUCCEEDED( hr ) )
  1847. {
  1848. iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
  1849. if ( iSymbolIndex >= 0 )
  1850. {
  1851. pSymbolXMLState = &m_pCurrFrag->State;
  1852. }
  1853. //--- Skip any whitespace in between the currency sign and the quantifier
  1854. if ( !fDone &&
  1855. iSymbolIndex >= 0 )
  1856. {
  1857. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar;
  1858. const WCHAR *pTempEndOfItem = m_pEndOfCurrItem;
  1859. const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
  1860. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostSymbolList );
  1861. if ( !m_pNextChar )
  1862. {
  1863. m_pNextChar = pTempNextChar;
  1864. m_pEndChar = pTempEndChar;
  1865. m_pEndOfCurrItem = pTempEndOfItem;
  1866. m_pCurrFrag = pTempFrag;
  1867. fDone = true;
  1868. }
  1869. if ( !fDone &&
  1870. SUCCEEDED( hr ) )
  1871. {
  1872. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  1873. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1874. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1875. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  1876. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  1877. {
  1878. fDone = true;
  1879. m_pEndOfCurrItem--;
  1880. }
  1881. //--- Try to match quantifier
  1882. iQuantIndex = MatchQuantifier( m_pNextChar, m_pEndOfCurrItem );
  1883. if ( iQuantIndex >= 0 )
  1884. {
  1885. pQuantifierXMLState = &m_pCurrFrag->State;
  1886. }
  1887. else
  1888. {
  1889. m_pNextChar = pTempNextChar;
  1890. m_pEndChar = pTempEndChar;
  1891. m_pEndOfCurrItem = pTempEndOfItem;
  1892. m_pCurrFrag = pTempFrag;
  1893. }
  1894. }
  1895. }
  1896. else if ( iSymbolIndex < 0 )
  1897. {
  1898. hr = E_INVALIDARG;
  1899. }
  1900. }
  1901. }
  1902. //--- Successfully matched a currency! Now expand it and fill out pItemNormInfo.
  1903. if ( SUCCEEDED( hr ) )
  1904. {
  1905. TTSWord Word;
  1906. ZeroMemory( &Word, sizeof(TTSWord) );
  1907. Word.eWordPartOfSpeech = MS_Unknown;
  1908. pItemNormInfo = (TTSCurrencyItemInfo*) MemoryManager.GetMemory( sizeof(TTSCurrencyItemInfo), &hr );
  1909. if ( SUCCEEDED( hr ) )
  1910. {
  1911. //--- Fill in known parts of pItemNormInfo
  1912. ZeroMemory( pItemNormInfo, sizeof(TTSCurrencyItemInfo) );
  1913. pItemNormInfo->Type = eNUM_CURRENCY;
  1914. ( (TTSCurrencyItemInfo*) pItemNormInfo )->fQuantifier = iQuantIndex >= 0 ? true : false;
  1915. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart = (TTSNumberItemInfo*) pNumberInfo;
  1916. ( (TTSCurrencyItemInfo*) pItemNormInfo )->lNumPostNumberStates = PostNumberList.GetCount();
  1917. ( (TTSCurrencyItemInfo*) pItemNormInfo )->lNumPostSymbolStates = PostSymbolList.GetCount();
  1918. //--- Need to determine whether this currency will have a primary and secondary part
  1919. //--- (e.g. "ten dollars and fifty cents") or just a primary part (e.g. "ten point
  1920. //--- five zero cents", "one hundred dollars").
  1921. //--- First check whether the number is a cardinal, there is a quantifier present, or the
  1922. //--- currency unit has no secondary (e.g. cents). In any of these cases, we need do no
  1923. //--- further checking.
  1924. if ( pNumberInfo->Type == eNUM_DECIMAL &&
  1925. iQuantIndex == -1 &&
  1926. g_CurrencySigns[iSymbolIndex].SecondaryUnit.Len > 0 )
  1927. {
  1928. WCHAR *pDecimalPoint = wcschr( ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar, wcDecimalPoint );
  1929. SPDBG_ASSERT( pDecimalPoint );
  1930. if ( pDecimalPoint &&
  1931. ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar - pDecimalPoint == 3 )
  1932. {
  1933. //--- We do have a secondary part! Fix up PrimaryNumberPart appropriately,
  1934. //--- and fill in pSecondaryNumberPart.
  1935. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  1936. const WCHAR *pTemp = ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar;
  1937. m_pNextChar = ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar;
  1938. m_pEndOfCurrItem = pDecimalPoint;
  1939. delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
  1940. //--- m_pNextChar == m_pEndOfCurrItem when integer part is empty and non-negative, e.g. $.50
  1941. //--- Other case is empty and negative, e.g. $-.50
  1942. if ( m_pNextChar != m_pEndOfCurrItem &&
  1943. !( *m_pNextChar == L'-' &&
  1944. m_pNextChar == m_pEndOfCurrItem - 1 ) )
  1945. {
  1946. hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false );
  1947. }
  1948. else
  1949. {
  1950. pNumberInfo = (TTSNumberItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberItemInfo ), &hr );
  1951. if ( SUCCEEDED( hr ) )
  1952. {
  1953. ZeroMemory( pNumberInfo, sizeof( TTSNumberItemInfo ) );
  1954. if ( *m_pNextChar == L'-' )
  1955. {
  1956. ( (TTSNumberItemInfo*) pNumberInfo )->fNegative = true;
  1957. }
  1958. else
  1959. {
  1960. ( (TTSNumberItemInfo*) pNumberInfo )->fNegative = false;
  1961. }
  1962. ( (TTSNumberItemInfo*) pNumberInfo )->pStartChar = NULL;
  1963. ( (TTSNumberItemInfo*) pNumberInfo )->pEndChar = NULL;
  1964. ( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart =
  1965. (TTSIntegerItemInfo*) MemoryManager.GetMemory( sizeof( TTSIntegerItemInfo), &hr );
  1966. if ( SUCCEEDED( hr ) )
  1967. {
  1968. ( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart->fDigitByDigit = true;
  1969. ( (TTSNumberItemInfo*) pNumberInfo )->pIntegerPart->ulNumDigits = 1;
  1970. ( (TTSNumberItemInfo*) pNumberInfo )->pWordList = new CWordList;
  1971. if ( ( (TTSNumberItemInfo*) pNumberInfo )->fNegative )
  1972. {
  1973. Word.pXmlState = pNumberXMLState;
  1974. Word.pWordText = g_negative.pStr;
  1975. Word.ulWordLen = g_negative.Len;
  1976. Word.pLemma = Word.pWordText;
  1977. Word.ulLemmaLen = Word.ulWordLen;
  1978. ( (TTSNumberItemInfo*) pNumberInfo )->pWordList->AddTail( Word );
  1979. }
  1980. Word.pWordText = g_ones[0].pStr;
  1981. Word.ulWordLen = g_ones[0].Len;
  1982. Word.pLemma = Word.pWordText;
  1983. Word.ulLemmaLen = Word.ulWordLen;
  1984. ( (TTSNumberItemInfo*) pNumberInfo )->pWordList->AddTail( Word );
  1985. }
  1986. }
  1987. }
  1988. if ( SUCCEEDED( hr ) )
  1989. {
  1990. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart =
  1991. (TTSNumberItemInfo*) pNumberInfo;
  1992. m_pNextChar = m_pEndOfCurrItem + 1;
  1993. m_pEndOfCurrItem = pTemp;
  1994. //--- If zeroes, don't pronounce them...
  1995. if ( m_pNextChar[0] != L'0' )
  1996. {
  1997. hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false );
  1998. if ( SUCCEEDED( hr ) )
  1999. {
  2000. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart =
  2001. (TTSNumberItemInfo*) pNumberInfo;
  2002. }
  2003. }
  2004. else if ( m_pNextChar[1] != L'0' )
  2005. {
  2006. m_pNextChar++;
  2007. hr = IsNumber( pNumberInfo, L"NUMBER", MemoryManager, false );
  2008. if ( SUCCEEDED( hr ) )
  2009. {
  2010. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart =
  2011. (TTSNumberItemInfo*) pNumberInfo;
  2012. }
  2013. }
  2014. }
  2015. m_pNextChar = pTempNextChar;
  2016. m_pEndOfCurrItem = pTempEndOfItem;
  2017. }
  2018. }
  2019. if ( SUCCEEDED( hr ) )
  2020. {
  2021. //--- Expand Primary number part
  2022. if ( fNegative )
  2023. {
  2024. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->fNegative = true;
  2025. Word.pXmlState = pNumberXMLState;
  2026. Word.eWordPartOfSpeech = MS_Unknown;
  2027. Word.pWordText = g_negative.pStr;
  2028. Word.ulWordLen = g_negative.Len;
  2029. Word.pLemma = Word.pWordText;
  2030. Word.ulLemmaLen = Word.ulWordLen;
  2031. WordList.AddTail( Word );
  2032. }
  2033. hr = ExpandNumber( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart, WordList );
  2034. }
  2035. //--- Clean up Number XML States
  2036. SPLISTPOS WordListPos;
  2037. if ( SUCCEEDED( hr ) )
  2038. {
  2039. WordListPos = WordList.GetHeadPosition();
  2040. while ( WordListPos )
  2041. {
  2042. TTSWord& TempWord = WordList.GetNext( WordListPos );
  2043. TempWord.pXmlState = pNumberXMLState;
  2044. }
  2045. //--- Insert PostNumber XML States
  2046. while ( !PostNumberList.IsEmpty() )
  2047. {
  2048. WordList.AddTail( ( PostNumberList.RemoveHead() ).Words[0] );
  2049. }
  2050. //--- If a quantifier is present, expand it
  2051. if ( iQuantIndex >= 0 )
  2052. {
  2053. Word.pXmlState = pQuantifierXMLState;
  2054. Word.pWordText = g_quantifiers[iQuantIndex].pStr;
  2055. Word.ulWordLen = g_quantifiers[iQuantIndex].Len;
  2056. Word.pLemma = Word.pWordText;
  2057. Word.ulLemmaLen = Word.ulWordLen;
  2058. WordList.AddTail( Word );
  2059. }
  2060. BOOL fFraction = false;
  2061. //--- If a fractional unit with no quantifier, insert "of a"
  2062. if ( iQuantIndex < 0 &&
  2063. !( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart &&
  2064. !( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pIntegerPart &&
  2065. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pFractionalPart &&
  2066. !( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pFractionalPart->fIsStandard )
  2067. {
  2068. fFraction = true;
  2069. Word.pXmlState = pNumberXMLState;
  2070. Word.eWordPartOfSpeech = MS_Unknown;
  2071. Word.pWordText = g_of.pStr;
  2072. Word.ulWordLen = g_of.Len;
  2073. Word.pLemma = Word.pWordText;
  2074. Word.ulLemmaLen = Word.ulWordLen;
  2075. WordList.AddTail( Word );
  2076. Word.pWordText = g_a.pStr;
  2077. Word.ulWordLen = g_a.Len;
  2078. Word.pLemma = Word.pWordText;
  2079. Word.ulLemmaLen = Word.ulWordLen;
  2080. WordList.AddTail( Word );
  2081. }
  2082. //--- Insert Main Currency Unit
  2083. //--- Plural if not a fraction and either a quantifier is present or the integral part is not one.
  2084. if ( !fFraction &&
  2085. ( iQuantIndex >= 0 ||
  2086. ( ( ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pEndChar -
  2087. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar != 1 ) ||
  2088. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[0] != L'1' ) &&
  2089. ( ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pEndChar -
  2090. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar != 2 ) ||
  2091. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[0] != L'-' ||
  2092. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pPrimaryNumberPart->pStartChar[1] != L'1' ) ) ) )
  2093. {
  2094. Word.pXmlState = pSymbolXMLState;
  2095. Word.pWordText = g_CurrencySigns[iSymbolIndex].MainUnit.pStr;
  2096. Word.ulWordLen = g_CurrencySigns[iSymbolIndex].MainUnit.Len;
  2097. Word.pLemma = Word.pWordText;
  2098. Word.ulLemmaLen = Word.ulWordLen;
  2099. WordList.AddTail( Word );
  2100. }
  2101. //--- ONLY "one" or "negative one" should precede this...
  2102. else
  2103. {
  2104. Word.pXmlState = pSymbolXMLState;
  2105. Word.pWordText = g_SingularPrimaryCurrencySigns[iSymbolIndex].pStr;
  2106. Word.ulWordLen = g_SingularPrimaryCurrencySigns[iSymbolIndex].Len;
  2107. Word.pLemma = Word.pWordText;
  2108. Word.ulLemmaLen = Word.ulWordLen;
  2109. WordList.AddTail( Word );
  2110. }
  2111. //--- Insert Post Symbol XML States
  2112. while ( !PostSymbolList.IsEmpty() )
  2113. {
  2114. WordList.AddTail( ( PostSymbolList.RemoveHead() ).Words[0] );
  2115. }
  2116. //--- Insert Secondary number part
  2117. if ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart )
  2118. {
  2119. Word.pXmlState = pNumberXMLState;
  2120. Word.pWordText = g_And.pStr;
  2121. Word.ulWordLen = g_And.Len;
  2122. Word.pLemma = Word.pWordText;
  2123. Word.ulLemmaLen = Word.ulWordLen;
  2124. WordList.AddTail( Word );
  2125. WordListPos = WordList.GetTailPosition();
  2126. hr = ExpandNumber( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart, WordList );
  2127. //--- Clean up number XML State
  2128. if ( SUCCEEDED( hr ) )
  2129. {
  2130. while ( WordListPos )
  2131. {
  2132. TTSWord& TempWord = WordList.GetNext( WordListPos );
  2133. TempWord.pXmlState = pNumberXMLState;
  2134. }
  2135. }
  2136. //--- Insert secondary currency unit
  2137. if ( SUCCEEDED( hr ) )
  2138. {
  2139. if ( ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pEndChar -
  2140. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pStartChar == 1 &&
  2141. ( (TTSCurrencyItemInfo*) pItemNormInfo )->pSecondaryNumberPart->pStartChar[0] == L'1' )
  2142. {
  2143. Word.pXmlState = pSymbolXMLState;
  2144. Word.pWordText = g_SingularSecondaryCurrencySigns[iSymbolIndex].pStr;
  2145. Word.ulWordLen = g_SingularSecondaryCurrencySigns[iSymbolIndex].Len;
  2146. Word.pLemma = Word.pWordText;
  2147. Word.ulLemmaLen = Word.ulWordLen;
  2148. WordList.AddTail( Word );
  2149. }
  2150. else
  2151. {
  2152. Word.pXmlState = pSymbolXMLState;
  2153. Word.pWordText = g_CurrencySigns[iSymbolIndex].SecondaryUnit.pStr;
  2154. Word.ulWordLen = g_CurrencySigns[iSymbolIndex].SecondaryUnit.Len;
  2155. Word.pLemma = Word.pWordText;
  2156. Word.ulLemmaLen = Word.ulWordLen;
  2157. WordList.AddTail( Word );
  2158. }
  2159. }
  2160. }
  2161. if ( SUCCEEDED( hr ) )
  2162. {
  2163. m_pNextChar = pTempNextChar;
  2164. }
  2165. }
  2166. }
  2167. }
  2168. else
  2169. {
  2170. if ( pNumberInfo )
  2171. {
  2172. delete ( (TTSNumberItemInfo*) pNumberInfo )->pWordList;
  2173. }
  2174. m_pNextChar = pTempNextChar;
  2175. m_pEndChar = pTempEndChar;
  2176. m_pEndOfCurrItem = pTempEndOfItem;
  2177. m_pCurrFrag = pTempFrag;
  2178. }
  2179. return hr;
  2180. } /* IsCurrency */
  2181. /***********************************************************************************************
  2182. * IsRomanNumeral *
  2183. *----------------*
  2184. * Description:
  2185. * Checks the incoming Item's text to determine whether or not it
  2186. * is a fraction.
  2187. *
  2188. * RegExp:
  2189. * [M](0-3) { [CM] || [CD] || { [D]?[C](0-3) } } { [XC] || [XL] || { [L]?[X](0-3) } }
  2190. * { [IX] || [IV] || { [V]?[I](0-3) }}
  2191. *
  2192. * Types assigned:
  2193. * NUM_ROMAN_NUMERAL
  2194. ********************************************************************* AH **********************/
  2195. HRESULT CStdSentEnum::IsRomanNumeral( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
  2196. CSentItemMemory& MemoryManager )
  2197. {
  2198. SPDBG_FUNC( "NumNorm IsRomanNumeral" );
  2199. HRESULT hr = S_OK;
  2200. ULONG ulValue = 0, ulIndex = 0, ulMaxOfThree = 0, ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  2201. //--- Match Thousands - M(0-3)
  2202. while ( ulIndex < ulTokenLen &&
  2203. towupper( m_pNextChar[ulIndex] ) == L'M' &&
  2204. ulMaxOfThree < 3 )
  2205. {
  2206. ulValue += 1000;
  2207. ulMaxOfThree++;
  2208. ulIndex++;
  2209. }
  2210. if ( ulMaxOfThree > 3 )
  2211. {
  2212. hr = E_INVALIDARG;
  2213. }
  2214. //--- Match Hundreds - { [CM] || [CD] || { [D]?[C](0-3) } }
  2215. if ( SUCCEEDED( hr ) )
  2216. {
  2217. ulMaxOfThree = 0;
  2218. //--- Matched C first
  2219. if ( ulIndex < ulTokenLen &&
  2220. towupper( m_pNextChar[ulIndex] ) == L'C' )
  2221. {
  2222. ulValue += 100;
  2223. ulMaxOfThree++;
  2224. ulIndex++;
  2225. //--- Special Case - CM = 900
  2226. if ( ulIndex < ulTokenLen &&
  2227. towupper( m_pNextChar[ulIndex] ) == L'M' )
  2228. {
  2229. ulValue += 800;
  2230. ulIndex++;
  2231. }
  2232. //--- Special Case - CD = 400
  2233. else if ( ulIndex < ulTokenLen &&
  2234. towupper( m_pNextChar[ulIndex] ) == L'D' )
  2235. {
  2236. ulValue += 300;
  2237. ulIndex++;
  2238. }
  2239. //--- Default Case
  2240. else
  2241. {
  2242. while ( ulIndex < ulTokenLen &&
  2243. towupper( m_pNextChar[ulIndex] ) == L'C' &&
  2244. ulMaxOfThree < 3 )
  2245. {
  2246. ulValue += 100;
  2247. ulMaxOfThree++;
  2248. ulIndex++;
  2249. }
  2250. if ( ulMaxOfThree > 3 )
  2251. {
  2252. hr = E_INVALIDARG;
  2253. }
  2254. }
  2255. }
  2256. //--- Matched D First
  2257. else if ( ulIndex < ulTokenLen &&
  2258. towupper( m_pNextChar[ulIndex] ) == L'D' )
  2259. {
  2260. ulValue += 500;
  2261. ulIndex++;
  2262. ulMaxOfThree = 0;
  2263. //--- Match C's
  2264. while ( ulIndex < ulTokenLen &&
  2265. towupper( m_pNextChar[ulIndex] ) == L'C' &&
  2266. ulMaxOfThree < 3 )
  2267. {
  2268. ulValue += 100;
  2269. ulIndex++;
  2270. ulMaxOfThree++;
  2271. }
  2272. if ( ulMaxOfThree > 3 )
  2273. {
  2274. hr = E_INVALIDARG;
  2275. }
  2276. }
  2277. }
  2278. //--- Match Tens - { [XC] || [XL] || { [L]?[X](0-3) } }
  2279. if ( SUCCEEDED( hr ) )
  2280. {
  2281. ulMaxOfThree = 0;
  2282. //--- Matched X First
  2283. if ( ulIndex < ulTokenLen &&
  2284. towupper( m_pNextChar[ulIndex] ) == L'X' )
  2285. {
  2286. ulValue += 10;
  2287. ulMaxOfThree++;
  2288. ulIndex++;
  2289. //--- Special Case - XC = 90
  2290. if ( ulIndex < ulTokenLen &&
  2291. towupper( m_pNextChar[ulIndex] ) == L'C' )
  2292. {
  2293. ulValue += 80;
  2294. ulIndex++;
  2295. }
  2296. //--- Special Case - XL = 40
  2297. else if ( ulIndex < ulTokenLen &&
  2298. towupper( m_pNextChar[ulIndex] ) == 'L' )
  2299. {
  2300. ulValue += 30;
  2301. ulIndex++;
  2302. }
  2303. //--- Default Case
  2304. else
  2305. {
  2306. while ( ulIndex < ulTokenLen &&
  2307. towupper( m_pNextChar[ulIndex] ) == L'X' &&
  2308. ulMaxOfThree < 3 )
  2309. {
  2310. ulValue += 10;
  2311. ulMaxOfThree ++;
  2312. ulIndex++;
  2313. }
  2314. if ( ulMaxOfThree > 3 )
  2315. {
  2316. hr = E_INVALIDARG;
  2317. }
  2318. }
  2319. }
  2320. //--- Matched L First
  2321. else if ( ulIndex < ulTokenLen &&
  2322. towupper( m_pNextChar[ulIndex] ) == L'L' )
  2323. {
  2324. ulValue += 50;
  2325. ulIndex++;
  2326. //--- Match X's
  2327. while ( ulIndex < ulTokenLen &&
  2328. towupper( m_pNextChar[ulIndex] ) == L'X' &&
  2329. ulMaxOfThree < 3 )
  2330. {
  2331. ulValue += 10;
  2332. ulMaxOfThree++;
  2333. ulIndex++;
  2334. }
  2335. if ( ulMaxOfThree > 3 )
  2336. {
  2337. hr = E_INVALIDARG;
  2338. }
  2339. }
  2340. }
  2341. //--- Match Ones - { [IX] || [IV] || { [V]?[I](0-3) } }
  2342. if ( SUCCEEDED( hr ) )
  2343. {
  2344. ulMaxOfThree = 0;
  2345. //--- Matched I First
  2346. if ( ulIndex < ulTokenLen &&
  2347. towupper( m_pNextChar[ulIndex] ) == L'I' )
  2348. {
  2349. ulValue += 1;
  2350. ulMaxOfThree++;
  2351. ulIndex++;
  2352. //--- Special Case - IX = 9
  2353. if ( ulIndex < ulTokenLen &&
  2354. towupper( m_pNextChar[ulIndex] ) == L'X' )
  2355. {
  2356. ulValue += 8;
  2357. ulIndex++;
  2358. }
  2359. //--- Special Case - IV = 4
  2360. else if ( ulIndex < ulTokenLen &&
  2361. towupper( m_pNextChar[ulIndex] ) == L'V' )
  2362. {
  2363. ulValue += 3;
  2364. ulIndex++;
  2365. }
  2366. //--- Default Case
  2367. else
  2368. {
  2369. while ( ulIndex < ulTokenLen &&
  2370. towupper( m_pNextChar[ulIndex] ) == L'I' &&
  2371. ulMaxOfThree < 3 )
  2372. {
  2373. ulValue += 1;
  2374. ulMaxOfThree++;
  2375. ulIndex++;
  2376. }
  2377. if ( ulMaxOfThree > 3 )
  2378. {
  2379. hr = E_INVALIDARG;
  2380. }
  2381. }
  2382. }
  2383. //--- Matched V First
  2384. else if ( ulIndex < ulTokenLen &&
  2385. towupper( m_pNextChar[ulIndex] ) == L'V' )
  2386. {
  2387. ulValue += 5;
  2388. ulIndex++;
  2389. //--- Match I's
  2390. while ( ulIndex < ulTokenLen &&
  2391. towupper( m_pNextChar[ulIndex] ) == L'I' &&
  2392. ulMaxOfThree < 3 )
  2393. {
  2394. ulValue += 1;
  2395. ulMaxOfThree++;
  2396. ulIndex++;
  2397. }
  2398. if ( ulMaxOfThree > 3 )
  2399. {
  2400. hr = E_INVALIDARG;
  2401. }
  2402. }
  2403. }
  2404. if ( ulIndex != ulTokenLen )
  2405. {
  2406. hr = E_INVALIDARG;
  2407. }
  2408. else
  2409. {
  2410. //--- Successfully matched a roman numeral!
  2411. WCHAR *tempNumberString;
  2412. //--- Max value of ulValue is 3999, so the resultant string cannot be more than
  2413. //--- four characters long (plus one for the comma, just in case)
  2414. tempNumberString = (WCHAR*) MemoryManager.GetMemory( 6 * sizeof(WCHAR), &hr );
  2415. if ( SUCCEEDED( hr ) )
  2416. {
  2417. TTSItemInfo *pNumberInfo = NULL;
  2418. _ltow( (long) ulValue, tempNumberString, 10 );
  2419. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  2420. m_pNextChar = tempNumberString;
  2421. m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString );
  2422. hr = IsNumber( pNumberInfo, Context, MemoryManager, false );
  2423. m_pNextChar = pTempNextChar;
  2424. m_pEndOfCurrItem = pTempEndOfItem;
  2425. if ( SUCCEEDED( hr ) )
  2426. {
  2427. pItemNormInfo =
  2428. (TTSRomanNumeralItemInfo*) MemoryManager.GetMemory( sizeof( TTSRomanNumeralItemInfo ), &hr );
  2429. if ( SUCCEEDED( hr ) )
  2430. {
  2431. ( (TTSRomanNumeralItemInfo*) pItemNormInfo )->pNumberInfo = pNumberInfo;
  2432. }
  2433. pItemNormInfo->Type = eNUM_ROMAN_NUMERAL;
  2434. }
  2435. }
  2436. }
  2437. return hr;
  2438. } /* IsRomanNumeral */
  2439. /***********************************************************************************************
  2440. * IsPhoneNumber *
  2441. *---------------*
  2442. * Description:
  2443. * Checks the incoming Item's text to determine whether or not it
  2444. * is a phone number.
  2445. *
  2446. * RegExp:
  2447. * { ddd-dddd } || { ddd-ddd-dddd }
  2448. *
  2449. * Types assigned:
  2450. * NUM_PHONENUMBER
  2451. ********************************************************************* AH **********************/
  2452. HRESULT CStdSentEnum::IsPhoneNumber( TTSItemInfo*& pItemNormInfo, const WCHAR* Context, CSentItemMemory& MemoryManager,
  2453. CWordList& WordList )
  2454. {
  2455. SPDBG_FUNC( "CStdSentEnum::IsPhoneNumber" );
  2456. HRESULT hr = S_OK;
  2457. const WCHAR *pCountryCode = NULL, *pAreaCode = NULL, *pGroups[4] = { NULL, NULL, NULL, NULL };
  2458. const WCHAR *pStartChar = m_pNextChar, *pEndChar = m_pEndChar, *pEndOfItem = m_pEndOfCurrItem;
  2459. const SPVTEXTFRAG *pFrag = m_pCurrFrag;
  2460. BOOL fMatchedLeftParen = false, fMatchedOne = false;
  2461. ULONG ulCountryCodeLen = 0, ulAreaCodeLen = 0, ulNumGroups = 0, ulGroupLen[4] = { 0, 0, 0, 0 };
  2462. CItemList PostCountryCodeList, PostOneList, PostAreaCodeList, PostGroupLists[4];
  2463. const SPVSTATE *pCountryCodeState = NULL, *pOneState = NULL, *pAreaCodeState = NULL;
  2464. const SPVSTATE *pGroupStates[4] = { NULL, NULL, NULL, NULL };
  2465. const WCHAR *pDelimiter = NULL;
  2466. const WCHAR *pTempEndChar = NULL;
  2467. const SPVTEXTFRAG *pTempFrag = NULL;
  2468. ULONG i = 0;
  2469. //--- Try to match Country Code
  2470. if ( pStartChar[0] == L'+' )
  2471. {
  2472. pStartChar++;
  2473. i = 0;
  2474. //--- Try to match d(1-3)
  2475. while ( pEndOfItem > pStartChar + i &&
  2476. iswdigit( pStartChar[i] ) &&
  2477. i < 3 )
  2478. {
  2479. i++;
  2480. }
  2481. pCountryCode = pStartChar;
  2482. pCountryCodeState = &pFrag->State;
  2483. ulCountryCodeLen = i;
  2484. //--- Try to match delimiter
  2485. if ( i >= 1 &&
  2486. pEndOfItem > pStartChar + i &&
  2487. MatchPhoneNumberDelimiter( pStartChar[i] ) )
  2488. {
  2489. pDelimiter = pStartChar + i;
  2490. pStartChar += i + 1;
  2491. }
  2492. //--- Try to advance in text - whitespace counts as a delimiter...
  2493. else if ( i >= 1 &&
  2494. pEndOfItem == pStartChar + i )
  2495. {
  2496. pStartChar += i;
  2497. pCountryCodeState = &pFrag->State;
  2498. hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
  2499. &PostCountryCodeList );
  2500. if ( !pStartChar &&
  2501. SUCCEEDED( hr ) )
  2502. {
  2503. hr = E_INVALIDARG;
  2504. }
  2505. else if ( SUCCEEDED( hr ) )
  2506. {
  2507. pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
  2508. }
  2509. }
  2510. else
  2511. {
  2512. hr = E_INVALIDARG;
  2513. }
  2514. }
  2515. //--- Try to match a "1"
  2516. if ( SUCCEEDED( hr ) &&
  2517. !pCountryCode &&
  2518. pStartChar[0] == L'1' &&
  2519. !iswdigit( pStartChar[1] ) )
  2520. {
  2521. pOneState = &pFrag->State;
  2522. fMatchedOne = true;
  2523. pStartChar++;
  2524. if ( pEndOfItem > pStartChar &&
  2525. MatchPhoneNumberDelimiter( pStartChar[0] ) )
  2526. {
  2527. //--- If we've already hit a delimiter, make sure all others agree
  2528. if ( pDelimiter )
  2529. {
  2530. if ( *pDelimiter != pStartChar[0] )
  2531. {
  2532. hr = E_INVALIDARG;
  2533. }
  2534. }
  2535. else
  2536. {
  2537. pDelimiter = pStartChar;
  2538. }
  2539. pStartChar++;
  2540. }
  2541. //--- Try to advance in text - whitespace counts as a delimiter...
  2542. else if ( !pDelimiter &&
  2543. pEndOfItem == pStartChar )
  2544. {
  2545. pOneState = &pFrag->State;
  2546. hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
  2547. &PostOneList );
  2548. if ( !pStartChar &&
  2549. SUCCEEDED( hr ) )
  2550. {
  2551. hr = E_INVALIDARG;
  2552. }
  2553. else if ( SUCCEEDED( hr ) )
  2554. {
  2555. pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
  2556. }
  2557. }
  2558. else
  2559. {
  2560. hr = E_INVALIDARG;
  2561. }
  2562. }
  2563. //--- Try to match Area Code
  2564. if ( SUCCEEDED( hr ) &&
  2565. pStartChar < pEndOfItem )
  2566. {
  2567. i = 0;
  2568. //--- Try to match a left parenthesis
  2569. if ( ( pCountryCode ||
  2570. fMatchedOne ) &&
  2571. pStartChar[0] == L'(' )
  2572. {
  2573. pStartChar++;
  2574. fMatchedLeftParen = true;
  2575. }
  2576. else if ( !pCountryCode &&
  2577. !fMatchedOne &&
  2578. pStartChar > pFrag->pTextStart &&
  2579. *( pStartChar - 1 ) == L'(' )
  2580. {
  2581. fMatchedLeftParen = true;
  2582. }
  2583. if ( fMatchedLeftParen )
  2584. {
  2585. //--- Try to match ddd?
  2586. while ( pEndOfItem > pStartChar + i &&
  2587. iswdigit( pStartChar[i] ) &&
  2588. i < 3 )
  2589. {
  2590. i++;
  2591. }
  2592. pAreaCodeState = &pFrag->State;
  2593. pAreaCode = pStartChar;
  2594. ulAreaCodeLen = i;
  2595. if ( i < 2 )
  2596. {
  2597. //--- Failed to match at least two digits
  2598. hr = E_INVALIDARG;
  2599. }
  2600. else
  2601. {
  2602. if ( pStartChar[i] != L')' )
  2603. {
  2604. //--- Matched left parenthesis without corresponding right parenthesis
  2605. hr = E_INVALIDARG;
  2606. }
  2607. else if ( ( !( pCountryCode || fMatchedOne ) &&
  2608. pEndOfItem > pStartChar + i ) ||
  2609. ( ( pCountryCode || fMatchedOne ) &&
  2610. pEndOfItem > pStartChar + i + 1 ) )
  2611. {
  2612. i++;
  2613. //--- Delimiter is optional with parentheses
  2614. if ( MatchPhoneNumberDelimiter( pStartChar[i] ) )
  2615. {
  2616. //--- If we've already hit a delimiter, make sure all others agree
  2617. if ( pDelimiter )
  2618. {
  2619. if ( *pDelimiter != pStartChar[i] )
  2620. {
  2621. hr = E_INVALIDARG;
  2622. }
  2623. }
  2624. else
  2625. {
  2626. pDelimiter = pStartChar + i;
  2627. }
  2628. i++;
  2629. }
  2630. pStartChar += i;
  2631. }
  2632. //--- Try to advance in text - whitespace counts as a delimiter...
  2633. else if ( !pDelimiter )
  2634. {
  2635. pStartChar += i + 1;
  2636. pAreaCodeState = &pFrag->State;
  2637. hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
  2638. &PostAreaCodeList );
  2639. if ( !pStartChar &&
  2640. SUCCEEDED( hr ) )
  2641. {
  2642. hr = E_INVALIDARG;
  2643. }
  2644. else if ( SUCCEEDED( hr ) )
  2645. {
  2646. pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
  2647. }
  2648. }
  2649. else
  2650. {
  2651. hr = E_INVALIDARG;
  2652. }
  2653. }
  2654. }
  2655. }
  2656. //--- Try to match main number part
  2657. if ( SUCCEEDED( hr ) &&
  2658. pStartChar < pEndOfItem )
  2659. {
  2660. //--- Try to match some groups of digits
  2661. for ( int j = 0; SUCCEEDED( hr ) && j < 4; j++ )
  2662. {
  2663. i = 0;
  2664. //--- Try to match a digit string
  2665. while ( pEndOfItem > pStartChar + i &&
  2666. iswdigit( pStartChar[i] ) &&
  2667. i < 4 )
  2668. {
  2669. i++;
  2670. }
  2671. //--- Try to match a delimiter
  2672. if ( i >= 2 )
  2673. {
  2674. pGroupStates[j] = &pFrag->State;
  2675. ulGroupLen[j] = i;
  2676. pGroups[j] = pStartChar;
  2677. pStartChar += i;
  2678. if ( pEndOfItem > pStartChar + 1 &&
  2679. MatchPhoneNumberDelimiter( pStartChar[0] ) )
  2680. {
  2681. //--- If we've already hit a delimiter, make sure all others agree
  2682. if ( pDelimiter )
  2683. {
  2684. if ( *pDelimiter != pStartChar[0] )
  2685. {
  2686. hr = E_INVALIDARG;
  2687. }
  2688. }
  2689. //--- Only allow a new delimiter to be matched on the first main number group...
  2690. //--- e.g. "+45 35 32 90.89" should not all match...
  2691. else if ( j == 0 )
  2692. {
  2693. pDelimiter = pStartChar;
  2694. }
  2695. else
  2696. {
  2697. pEndChar = pTempEndChar;
  2698. pFrag = pTempFrag;
  2699. ulNumGroups = j;
  2700. break;
  2701. }
  2702. pStartChar++;
  2703. }
  2704. //--- Try to advance in text - whitespace counts as a delimiter...
  2705. else if ( !pDelimiter &&
  2706. pEndOfItem == pStartChar )
  2707. {
  2708. pGroupStates[j] = &pFrag->State;
  2709. pTempEndChar = pEndChar;
  2710. pTempFrag = pFrag;
  2711. hr = SkipWhiteSpaceAndTags( pStartChar, pEndChar, pFrag, MemoryManager, true,
  2712. &PostGroupLists[j] );
  2713. if ( !pStartChar &&
  2714. SUCCEEDED( hr ) )
  2715. {
  2716. pEndChar = pTempEndChar;
  2717. pFrag = pTempFrag;
  2718. ulNumGroups = j + 1;
  2719. break;
  2720. }
  2721. else if ( SUCCEEDED( hr ) )
  2722. {
  2723. pEndOfItem = FindTokenEnd( pStartChar, pEndChar );
  2724. }
  2725. }
  2726. else if ( pEndOfItem == pStartChar + 1 )
  2727. {
  2728. if ( IsGroupEnding( *pStartChar ) != eUNMATCHED ||
  2729. IsQuotationMark( *pStartChar ) != eUNMATCHED ||
  2730. IsMiscPunctuation( *pStartChar ) != eUNMATCHED ||
  2731. IsEOSItem( *pStartChar ) != eUNMATCHED )
  2732. {
  2733. pEndOfItem--;
  2734. ulNumGroups = j + 1;
  2735. break;
  2736. }
  2737. else
  2738. {
  2739. hr = E_INVALIDARG;
  2740. }
  2741. }
  2742. else
  2743. {
  2744. while ( pEndOfItem != pStartChar )
  2745. {
  2746. if ( IsGroupEnding( *pEndOfItem ) != eUNMATCHED ||
  2747. IsQuotationMark( *pEndOfItem ) != eUNMATCHED ||
  2748. IsMiscPunctuation( *pEndOfItem ) != eUNMATCHED ||
  2749. IsEOSItem( *pEndOfItem ) != eUNMATCHED )
  2750. {
  2751. pEndOfItem--;
  2752. }
  2753. else
  2754. {
  2755. break;
  2756. }
  2757. }
  2758. if ( pEndOfItem == pStartChar )
  2759. {
  2760. ulNumGroups = j + 1;
  2761. break;
  2762. }
  2763. else
  2764. {
  2765. hr = E_INVALIDARG;
  2766. break;
  2767. }
  2768. }
  2769. }
  2770. //--- Matched something like 206.709.8286.1 - definitely bad
  2771. else if ( pDelimiter )
  2772. {
  2773. hr = E_INVALIDARG;
  2774. }
  2775. //--- Matched somethinge like 206 709 8286 1 - could be OK
  2776. else
  2777. {
  2778. if ( pTempEndChar )
  2779. {
  2780. pEndChar = pTempEndChar;
  2781. pFrag = pTempFrag;
  2782. }
  2783. ulNumGroups = j;
  2784. break;
  2785. }
  2786. }
  2787. //--- Didn't hit either break statement
  2788. if ( !ulNumGroups )
  2789. {
  2790. ulNumGroups = j;
  2791. }
  2792. }
  2793. //--- Check for appropriate formats
  2794. if ( SUCCEEDED( hr ) )
  2795. {
  2796. //--- Check for [1<sep>]?(ddd?)<sep>?ddd<sep>dddd? OR ddd<sep>dddd?
  2797. if ( !pCountryCode &&
  2798. ulNumGroups == 2 &&
  2799. ulGroupLen[0] == 3 &&
  2800. ulGroupLen[1] >= 3 &&
  2801. !( fMatchedOne && !pAreaCode ) )
  2802. {
  2803. if ( ( !Context ||
  2804. _wcsicmp( Context, L"phone_number" ) != 0 ) &&
  2805. !pCountryCode &&
  2806. !pAreaCode &&
  2807. !fMatchedOne &&
  2808. ( pDelimiter ? (*pDelimiter == L'.') : 0 ) )
  2809. {
  2810. hr = E_INVALIDARG;
  2811. }
  2812. }
  2813. //--- Check for [1<sep>]?ddd?<sep>ddd<sep>dddd?
  2814. else if ( !pCountryCode &&
  2815. !pAreaCode &&
  2816. ulNumGroups == 3 &&
  2817. ( ulGroupLen[0] == 2 ||
  2818. ulGroupLen[0] == 3 ) &&
  2819. ulGroupLen[1] == 3 &&
  2820. ulGroupLen[2] >= 3 )
  2821. {
  2822. pAreaCode = pGroups[0];
  2823. ulAreaCodeLen = ulGroupLen[0];
  2824. pAreaCodeState = pGroupStates[0];
  2825. PostAreaCodeList.AddTail( &PostGroupLists[0] );
  2826. pGroups[0] = pGroups[1];
  2827. ulGroupLen[0] = ulGroupLen[1];
  2828. pGroupStates[0] = pGroupStates[1];
  2829. PostGroupLists[0].RemoveAll();
  2830. PostGroupLists[0].AddTail( &PostGroupLists[1] );
  2831. pGroups[1] = pGroups[2];
  2832. ulGroupLen[1] = ulGroupLen[2];
  2833. pGroupStates[1] = pGroupStates[2];
  2834. PostGroupLists[1].RemoveAll();
  2835. PostGroupLists[2].RemoveAll();
  2836. ulNumGroups--;
  2837. }
  2838. //--- Check for (ddd?)<sep>?ddd?<sep>dd<sep>ddd?d?
  2839. else if ( !pCountryCode &&
  2840. !fMatchedOne &&
  2841. pAreaCode &&
  2842. ulNumGroups == 3 &&
  2843. ( ulGroupLen[0] == 2 ||
  2844. ulGroupLen[0] == 3 ) &&
  2845. ulGroupLen[1] == 2 &&
  2846. ulGroupLen[2] >= 2 )
  2847. {
  2848. NULL;
  2849. }
  2850. //--- Check for +dd?d?<sep>ddd?<sep>ddd?<sep>ddd?d?<sep>ddd?d?
  2851. else if ( pCountryCode &&
  2852. !fMatchedOne &&
  2853. !pAreaCode &&
  2854. ulNumGroups == 4 &&
  2855. ( ulGroupLen[0] == 2 ||
  2856. ulGroupLen[0] == 3 ) &&
  2857. ( ulGroupLen[1] == 2 ||
  2858. ulGroupLen[1] == 3 ) &&
  2859. ulGroupLen[2] >= 2 &&
  2860. ulGroupLen[3] >= 2 )
  2861. {
  2862. pAreaCode = pGroups[0];
  2863. ulAreaCodeLen = ulGroupLen[0];
  2864. pAreaCodeState = pGroupStates[0];
  2865. PostAreaCodeList.AddTail( &PostGroupLists[0] );
  2866. pGroups[0] = pGroups[1];
  2867. ulGroupLen[0] = ulGroupLen[1];
  2868. pGroupStates[0] = pGroupStates[1];
  2869. PostGroupLists[0].RemoveAll();
  2870. PostGroupLists[0].AddTail( &PostGroupLists[1] );
  2871. pGroups[1] = pGroups[2];
  2872. ulGroupLen[1] = ulGroupLen[2];
  2873. pGroupStates[1] = pGroupStates[2];
  2874. PostGroupLists[1].RemoveAll();
  2875. PostGroupLists[1].AddTail( &PostGroupLists[2] );
  2876. pGroups[2] = pGroups[3];
  2877. ulGroupLen[2] = ulGroupLen[3];
  2878. pGroupStates[2] = pGroupStates[3];
  2879. PostGroupLists[2].RemoveAll();
  2880. PostGroupLists[3].RemoveAll();
  2881. ulNumGroups--;
  2882. }
  2883. //--- Check for +dd?d?<sep>ddd?<sep>ddd?<sep>ddd?d?
  2884. else if ( pCountryCode &&
  2885. !fMatchedOne &&
  2886. !pAreaCode &&
  2887. ulNumGroups == 3 &&
  2888. ( ulGroupLen[0] == 2 ||
  2889. ulGroupLen[0] == 3 ) &&
  2890. ( ulGroupLen[1] == 2 ||
  2891. ulGroupLen[1] == 3 ) &&
  2892. ulGroupLen[2] >= 2 )
  2893. {
  2894. pAreaCode = pGroups[0];
  2895. ulAreaCodeLen = ulGroupLen[0];
  2896. pAreaCodeState = pGroupStates[0];
  2897. PostAreaCodeList.AddTail( &PostGroupLists[0] );
  2898. pGroups[0] = pGroups[1];
  2899. ulGroupLen[0] = ulGroupLen[1];
  2900. pGroupStates[0] = pGroupStates[1];
  2901. PostGroupLists[0].RemoveAll();
  2902. PostGroupLists[0].AddTail( &PostGroupLists[1] );
  2903. pGroups[1] = pGroups[2];
  2904. ulGroupLen[1] = ulGroupLen[2];
  2905. pGroupStates[1] = pGroupStates[2];
  2906. PostGroupLists[1].RemoveAll();
  2907. PostGroupLists[2].RemoveAll();
  2908. ulNumGroups--;
  2909. }
  2910. //--- Check for +dd?d?<sep>(ddd?)<sep>?ddd?<sep>ddd?d?<sep>ddd?d?
  2911. else if ( pCountryCode &&
  2912. !fMatchedOne &&
  2913. pAreaCode &&
  2914. ulNumGroups == 3 &&
  2915. ( ulGroupLen[0] == 2 ||
  2916. ulGroupLen[0] == 3 ) &&
  2917. ulGroupLen[1] >= 2 &&
  2918. ulGroupLen[2] >= 2 )
  2919. {
  2920. NULL;
  2921. }
  2922. //--- Check for +dd?d?<sep>(ddd?)<sep>?ddd?<sep>ddd?d?
  2923. else if ( pCountryCode &&
  2924. !fMatchedOne &&
  2925. pAreaCode &&
  2926. ulNumGroups == 2 &&
  2927. ( ulGroupLen[0] == 2 ||
  2928. ulGroupLen[0] == 3 ) &&
  2929. ulGroupLen[1] >= 2 )
  2930. {
  2931. NULL;
  2932. }
  2933. else
  2934. {
  2935. hr = E_INVALIDARG;
  2936. }
  2937. }
  2938. //--- Fill in pItemNormInfo
  2939. if ( SUCCEEDED(hr) )
  2940. {
  2941. m_pEndOfCurrItem = pGroups[ulNumGroups-1] + ulGroupLen[ulNumGroups-1];
  2942. m_pEndChar = pEndChar;
  2943. m_pCurrFrag = pFrag;
  2944. pItemNormInfo = (TTSPhoneNumberItemInfo*) MemoryManager.GetMemory( sizeof(TTSPhoneNumberItemInfo),
  2945. &hr );
  2946. if ( SUCCEEDED( hr ) )
  2947. {
  2948. ZeroMemory( pItemNormInfo, sizeof(TTSPhoneNumberItemInfo) );
  2949. pItemNormInfo->Type = eNEWNUM_PHONENUMBER;
  2950. //--- Fill in fOne
  2951. if ( fMatchedOne )
  2952. {
  2953. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->fOne = true;
  2954. }
  2955. //--- Fill in Country Code...
  2956. if ( pCountryCode )
  2957. {
  2958. TTSItemInfo* pCountryCodeInfo;
  2959. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  2960. m_pNextChar = pCountryCode;
  2961. m_pEndOfCurrItem = pCountryCode + ulCountryCodeLen;
  2962. hr = IsNumber( pCountryCodeInfo, L"NUMBER", MemoryManager, false );
  2963. if ( SUCCEEDED( hr ) )
  2964. {
  2965. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pCountryCode = (TTSNumberItemInfo*) pCountryCodeInfo;
  2966. }
  2967. m_pNextChar = pTempNextChar;
  2968. m_pEndOfCurrItem = pTempEndOfItem;
  2969. }
  2970. //--- Fill in Area Code...
  2971. if ( SUCCEEDED( hr ) &&
  2972. pAreaCode )
  2973. {
  2974. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode =
  2975. (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr );
  2976. if ( SUCCEEDED( hr ) )
  2977. {
  2978. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode->ulNumDigits = ulAreaCodeLen;
  2979. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode->pFirstDigit = pAreaCode;
  2980. }
  2981. }
  2982. //--- Fill in Main Number...
  2983. if ( SUCCEEDED( hr ) )
  2984. {
  2985. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ulNumGroups = ulNumGroups;
  2986. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups =
  2987. (TTSDigitsItemInfo**) MemoryManager.GetMemory( ulNumGroups * sizeof(TTSDigitsItemInfo*), &hr );
  2988. for ( ULONG j = 0; SUCCEEDED( hr ) && j < ulNumGroups; j++ )
  2989. {
  2990. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j] =
  2991. (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof( TTSDigitsItemInfo ), &hr );
  2992. if ( SUCCEEDED( hr ) )
  2993. {
  2994. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j]->ulNumDigits = ulGroupLen[j];
  2995. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j]->pFirstDigit = pGroups[j];
  2996. }
  2997. }
  2998. }
  2999. }
  3000. }
  3001. //--- Expand Phone Number
  3002. if ( SUCCEEDED( hr ) )
  3003. {
  3004. TTSWord Word;
  3005. ZeroMemory( &Word, sizeof( TTSWord ) );
  3006. Word.eWordPartOfSpeech = MS_Unknown;
  3007. SPLISTPOS ListPos;
  3008. if ( pCountryCode )
  3009. {
  3010. //--- Insert "country"
  3011. Word.pXmlState = pCountryCodeState;
  3012. Word.pWordText = g_Country.pStr;
  3013. Word.ulWordLen = g_Country.Len;
  3014. Word.pLemma = Word.pWordText;
  3015. Word.ulLemmaLen = Word.ulWordLen;
  3016. WordList.AddTail( Word );
  3017. //--- Insert "code"
  3018. Word.pWordText = g_Code.pStr;
  3019. Word.ulWordLen = g_Code.Len;
  3020. Word.pLemma = Word.pWordText;
  3021. Word.ulLemmaLen = Word.ulWordLen;
  3022. WordList.AddTail( Word );
  3023. ListPos = WordList.GetTailPosition();
  3024. //--- Expand Country Code
  3025. ExpandNumber( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pCountryCode, WordList );
  3026. //--- Clean up digits XML states...
  3027. WordList.GetNext( ListPos );
  3028. while ( ListPos )
  3029. {
  3030. TTSWord& TempWord = WordList.GetNext( ListPos );
  3031. TempWord.pXmlState = pCountryCodeState;
  3032. }
  3033. //--- Insert Post Symbol XML States
  3034. while ( !PostCountryCodeList.IsEmpty() )
  3035. {
  3036. WordList.AddTail( ( PostCountryCodeList.RemoveHead() ).Words[0] );
  3037. }
  3038. }
  3039. if ( fMatchedOne )
  3040. {
  3041. //--- Insert "one"
  3042. Word.pXmlState = pOneState;
  3043. Word.pWordText = g_ones[1].pStr;
  3044. Word.ulWordLen = g_ones[1].Len;
  3045. Word.pLemma = Word.pWordText;
  3046. Word.ulLemmaLen = Word.ulWordLen;
  3047. WordList.AddTail( Word );
  3048. //--- Insert PostOne XML States
  3049. while ( !PostOneList.IsEmpty() )
  3050. {
  3051. WordList.AddTail( ( PostOneList.RemoveHead() ).Words[0] );
  3052. }
  3053. }
  3054. if ( pAreaCode )
  3055. {
  3056. //--- Expand digits - 800 and 900 get expanded as one number, otherwise digit by digit
  3057. if ( ( pAreaCode[0] == L'8' ||
  3058. pAreaCode[0] == L'9' ) &&
  3059. pAreaCode[1] == L'0' &&
  3060. pAreaCode[2] == L'0' )
  3061. {
  3062. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->fIs800 = true;
  3063. NumberGroup Garbage;
  3064. ListPos = WordList.GetTailPosition();
  3065. ExpandThreeDigits( pAreaCode, Garbage, WordList );
  3066. //--- Clean up digits XML states...
  3067. //--- List was possibly empty prior to inserting "eight hundred" or "nine hundred"...
  3068. if ( !ListPos )
  3069. {
  3070. ListPos = WordList.GetHeadPosition();
  3071. }
  3072. WordList.GetNext( ListPos );
  3073. while ( ListPos )
  3074. {
  3075. TTSWord& TempWord = WordList.GetNext( ListPos );
  3076. TempWord.pXmlState = pAreaCodeState;
  3077. }
  3078. }
  3079. else
  3080. {
  3081. //--- Insert "area"
  3082. Word.pXmlState = pAreaCodeState;
  3083. Word.pWordText = g_Area.pStr;
  3084. Word.ulWordLen = g_Area.Len;
  3085. Word.pLemma = Word.pWordText;
  3086. Word.ulLemmaLen = Word.ulWordLen;
  3087. WordList.AddTail( Word );
  3088. //--- Insert "code"
  3089. Word.pWordText = g_Code.pStr;
  3090. Word.ulWordLen = g_Code.Len;
  3091. Word.pLemma = Word.pWordText;
  3092. Word.ulLemmaLen = Word.ulWordLen;
  3093. WordList.AddTail( Word );
  3094. ListPos = WordList.GetTailPosition();
  3095. ExpandDigits( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode, WordList );
  3096. //--- Clean up digits XML states...
  3097. WordList.GetNext( ListPos );
  3098. while ( ListPos )
  3099. {
  3100. TTSWord& TempWord = WordList.GetNext( ListPos );
  3101. TempWord.pXmlState = pAreaCodeState;
  3102. }
  3103. }
  3104. //--- Insert PostAreaCode XML States
  3105. while ( !PostAreaCodeList.IsEmpty() )
  3106. {
  3107. WordList.AddTail( ( PostAreaCodeList.RemoveHead() ).Words[0] );
  3108. }
  3109. }
  3110. for ( ULONG j = 0; j < ulNumGroups; j++ )
  3111. {
  3112. ListPos = WordList.GetTailPosition();
  3113. ExpandDigits( ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->ppGroups[j], WordList );
  3114. //--- Clean up digits XML states...
  3115. //--- List was possibly empty prior to inserting "eight hundred" or "nine hundred"...
  3116. if ( !ListPos )
  3117. {
  3118. ListPos = WordList.GetHeadPosition();
  3119. }
  3120. WordList.GetNext( ListPos );
  3121. while ( ListPos )
  3122. {
  3123. TTSWord& TempWord = WordList.GetNext( ListPos );
  3124. TempWord.pXmlState = pGroupStates[j];
  3125. }
  3126. //--- Insert Post Group XML States
  3127. while ( !PostGroupLists[j].IsEmpty() )
  3128. {
  3129. WordList.AddTail( ( PostGroupLists[j].RemoveHead() ).Words[0] );
  3130. }
  3131. }
  3132. }
  3133. return hr;
  3134. } /* IsPhoneNumber */
  3135. /***********************************************************************************************
  3136. * IsZipCode *
  3137. *-----------*
  3138. * Description:
  3139. * Checks the incoming Item's text to determine whether or not it
  3140. * is a zipcode.
  3141. *
  3142. * RegExp:
  3143. * ddddd{-dddd}?
  3144. *
  3145. * Types assigned:
  3146. * NUM_ZIPCODE
  3147. ********************************************************************* AH **********************/
  3148. HRESULT CStdSentEnum::IsZipCode( TTSItemInfo*& pItemNormInfo, const WCHAR* Context,
  3149. CSentItemMemory& MemoryManager )
  3150. {
  3151. SPDBG_FUNC( "CStdSentEnum::IsZipCode" );
  3152. HRESULT hr = S_OK;
  3153. ULONG ulTokenLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  3154. BOOL fLastFour = false;
  3155. //--- length must be 5 or 10
  3156. if ( ulTokenLen != 5 &&
  3157. ulTokenLen != 10 )
  3158. {
  3159. hr = E_INVALIDARG;
  3160. }
  3161. else
  3162. {
  3163. //--- match 5 digits
  3164. for ( ULONG i = 0; i < 5; i++ )
  3165. {
  3166. if ( !iswdigit( m_pNextChar[i] ) )
  3167. {
  3168. hr = E_INVALIDARG;
  3169. break;
  3170. }
  3171. }
  3172. if ( SUCCEEDED(hr) &&
  3173. i < ulTokenLen )
  3174. {
  3175. //--- match dash
  3176. if ( m_pNextChar[i] != L'-' )
  3177. {
  3178. hr = E_INVALIDARG;
  3179. }
  3180. else
  3181. {
  3182. //--- match 4 digits
  3183. for ( i = 0; i < 4; i++ )
  3184. {
  3185. if ( !iswdigit( m_pNextChar[i] ) )
  3186. {
  3187. hr = E_INVALIDARG;
  3188. break;
  3189. }
  3190. }
  3191. fLastFour = true;
  3192. }
  3193. }
  3194. }
  3195. if (SUCCEEDED(hr))
  3196. {
  3197. pItemNormInfo = (TTSZipCodeItemInfo*) MemoryManager.GetMemory( sizeof(TTSZipCodeItemInfo), &hr );
  3198. if ( SUCCEEDED( hr ) )
  3199. {
  3200. ZeroMemory( pItemNormInfo, sizeof(TTSZipCodeItemInfo) );
  3201. pItemNormInfo->Type = eNUM_ZIPCODE;
  3202. ( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive =
  3203. (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof(TTSDigitsItemInfo), &hr );
  3204. if ( SUCCEEDED( hr ) )
  3205. {
  3206. ( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive->ulNumDigits = 5;
  3207. ( (TTSZipCodeItemInfo*) pItemNormInfo )->pFirstFive->pFirstDigit = m_pNextChar;
  3208. if ( fLastFour )
  3209. {
  3210. ( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour =
  3211. (TTSDigitsItemInfo*) MemoryManager.GetMemory( sizeof(TTSDigitsItemInfo), &hr );
  3212. if ( SUCCEEDED( hr ) )
  3213. {
  3214. ( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour->ulNumDigits = 4;
  3215. ( (TTSZipCodeItemInfo*) pItemNormInfo )->pLastFour->pFirstDigit = m_pNextChar + 6;
  3216. }
  3217. }
  3218. }
  3219. }
  3220. }
  3221. return hr;
  3222. } /* IsZipCode */
  3223. /***********************************************************************************************
  3224. * ExpandZipCode *
  3225. *---------------*
  3226. * Description:
  3227. * Expands Items previously determined to be of type NUM_ZIPCODE by IsZipCode.
  3228. *
  3229. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  3230. ********************************************************************* AH **********************/
  3231. HRESULT CStdSentEnum::ExpandZipCode( TTSZipCodeItemInfo* pItemInfo, CWordList& WordList )
  3232. {
  3233. SPDBG_FUNC( "CStdSentEnum::ExpandZipCode" );
  3234. HRESULT hr = S_OK;
  3235. ExpandDigits( pItemInfo->pFirstFive, WordList );
  3236. if ( pItemInfo->pLastFour )
  3237. {
  3238. //--- Insert "dash"
  3239. TTSWord Word;
  3240. ZeroMemory( &Word, sizeof( TTSWord ) );
  3241. Word.pXmlState = &m_pCurrFrag->State;
  3242. Word.eWordPartOfSpeech = MS_Unknown;
  3243. Word.pWordText = g_dash.pStr;
  3244. Word.ulWordLen = g_dash.Len;
  3245. Word.pLemma = Word.pWordText;
  3246. Word.ulLemmaLen = Word.ulWordLen;
  3247. WordList.AddTail( Word );
  3248. ExpandDigits( pItemInfo->pLastFour, WordList );
  3249. }
  3250. return hr;
  3251. } /* ExpandZipCode */
  3252. /***********************************************************************************************
  3253. * IsNumberRange *
  3254. *---------------*
  3255. * Description:
  3256. * Checks the incoming Item's text to determine whether or not it
  3257. * is a number range.
  3258. *
  3259. * RegExp:
  3260. * [Number]-[Number]
  3261. *
  3262. * Types assigned:
  3263. * NUM_RANGE
  3264. ********************************************************************* AH **********************/
  3265. HRESULT CStdSentEnum::IsNumberRange( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager )
  3266. {
  3267. SPDBG_FUNC( "CStdSentEnum::IsNumberRange" );
  3268. HRESULT hr = S_OK;
  3269. TTSItemInfo *pFirstNumberInfo = NULL, *pSecondNumberInfo = NULL;
  3270. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  3271. const WCHAR *pHyphen = NULL;
  3272. for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ )
  3273. {
  3274. if ( *pHyphen == L'-' )
  3275. {
  3276. break;
  3277. }
  3278. }
  3279. if ( *pHyphen == L'-' &&
  3280. pHyphen > m_pNextChar &&
  3281. pHyphen < m_pEndOfCurrItem - 1 )
  3282. {
  3283. m_pEndOfCurrItem = pHyphen;
  3284. hr = IsNumber( pFirstNumberInfo, NULL, MemoryManager );
  3285. if ( SUCCEEDED( hr ) )
  3286. {
  3287. m_pNextChar = pHyphen + 1;
  3288. m_pEndOfCurrItem = pTempEndOfItem;
  3289. hr = IsNumberCategory( pSecondNumberInfo, NULL, MemoryManager );
  3290. if ( SUCCEEDED( hr ) )
  3291. {
  3292. //--- Matched a number range!
  3293. pItemNormInfo =
  3294. (TTSNumberRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberRangeItemInfo ), &hr );
  3295. if ( SUCCEEDED( hr ) )
  3296. {
  3297. pItemNormInfo->Type = eNUM_RANGE;
  3298. ( (TTSNumberRangeItemInfo*) pItemNormInfo )->pFirstNumberInfo = pFirstNumberInfo;
  3299. ( (TTSNumberRangeItemInfo*) pItemNormInfo )->pSecondNumberInfo = pSecondNumberInfo;
  3300. }
  3301. }
  3302. else if ( pFirstNumberInfo->Type != eDATE_YEAR )
  3303. {
  3304. delete ( (TTSNumberItemInfo*) pFirstNumberInfo )->pWordList;
  3305. }
  3306. }
  3307. m_pNextChar = pTempNextChar;
  3308. m_pEndOfCurrItem = pTempEndOfItem;
  3309. }
  3310. else
  3311. {
  3312. hr = E_INVALIDARG;
  3313. }
  3314. return hr;
  3315. } /* IsNumberRange */
  3316. /***********************************************************************************************
  3317. * ExpandNumberRange *
  3318. *-------------------*
  3319. * Description:
  3320. * Expands Items previously determined to be of type NUM_RANGE by IsNumberRange.
  3321. *
  3322. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  3323. ********************************************************************* AH **********************/
  3324. HRESULT CStdSentEnum::ExpandNumberRange( TTSNumberRangeItemInfo* pItemInfo, CWordList& WordList )
  3325. {
  3326. SPDBG_FUNC( "CStdSentEnum::ExpandNumberRange" );
  3327. HRESULT hr = S_OK;
  3328. //--- Expand first number (or year)...
  3329. switch( pItemInfo->pFirstNumberInfo->Type )
  3330. {
  3331. case eDATE_YEAR:
  3332. hr = ExpandYear( (TTSYearItemInfo*) pItemInfo->pFirstNumberInfo, WordList );
  3333. break;
  3334. default:
  3335. hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pFirstNumberInfo, WordList );
  3336. break;
  3337. }
  3338. //--- Insert "to"
  3339. if ( SUCCEEDED( hr ) )
  3340. {
  3341. TTSWord Word;
  3342. ZeroMemory( &Word, sizeof( TTSWord ) );
  3343. Word.pXmlState = &m_pCurrFrag->State;
  3344. Word.eWordPartOfSpeech = MS_Unknown;
  3345. Word.pWordText = g_to.pStr;
  3346. Word.ulWordLen = g_to.Len;
  3347. Word.pLemma = Word.pWordText;
  3348. Word.ulLemmaLen = Word.ulWordLen;
  3349. WordList.AddTail( Word );
  3350. }
  3351. //--- Expand second number (or year)...
  3352. if ( SUCCEEDED( hr ) )
  3353. {
  3354. switch( pItemInfo->pSecondNumberInfo->Type )
  3355. {
  3356. case eDATE_YEAR:
  3357. hr = ExpandYear( (TTSYearItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
  3358. break;
  3359. case eNUM_PERCENT:
  3360. hr = ExpandPercent( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
  3361. break;
  3362. case eNUM_DEGREES:
  3363. hr = ExpandDegrees( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
  3364. break;
  3365. case eNUM_SQUARED:
  3366. hr = ExpandSquare( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
  3367. break;
  3368. case eNUM_CUBED:
  3369. hr = ExpandCube( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
  3370. break;
  3371. default:
  3372. hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pSecondNumberInfo, WordList );
  3373. break;
  3374. }
  3375. }
  3376. return hr;
  3377. } /* ExpandNumberRange */
  3378. /***********************************************************************************************
  3379. * IsCurrencyRange *
  3380. *-------------------*
  3381. * Description:
  3382. * Expands Items determined to be of type CURRENCY_RANGE
  3383. *
  3384. * NOTE: This function does not do parameter validation. Assumed to be done by caller.
  3385. ********************************************************************* AH **********************/
  3386. HRESULT CStdSentEnum::IsCurrencyRange( TTSItemInfo*& pItemInfo, CSentItemMemory& MemoryManager, CWordList& WordList )
  3387. {
  3388. SPDBG_FUNC( "CStdSentEnum::IsCurrencyRange" );
  3389. HRESULT hr = S_OK;
  3390. TTSItemInfo *pFirstNumberInfo = NULL, *pSecondNumberInfo = NULL;
  3391. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem, *pTempEndChar = m_pEndChar;
  3392. const WCHAR *pHyphen = NULL;
  3393. CWordList TempWordList;
  3394. NORM_POSITION ePosition = UNATTACHED; //for currency sign checking
  3395. int iSymbolIndex, iTempSymbolIndex = -1;
  3396. WCHAR *tempNumberString;
  3397. iSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
  3398. if(iSymbolIndex < 0)
  3399. {
  3400. hr = E_INVALIDARG;
  3401. }
  3402. else
  3403. {
  3404. for ( pHyphen = m_pNextChar; pHyphen < m_pEndOfCurrItem; pHyphen++ )
  3405. {
  3406. if ( *pHyphen == L'-' )
  3407. {
  3408. break;
  3409. }
  3410. }
  3411. if ( !( *pHyphen == L'-' &&
  3412. pHyphen > m_pNextChar &&
  3413. pHyphen < m_pEndOfCurrItem - 1 ) )
  3414. {
  3415. hr = E_INVALIDARG;
  3416. }
  3417. else
  3418. {
  3419. *( (WCHAR*)pHyphen) = L' '; // Token must break at hyphen, or IsCurrency() will not work
  3420. m_pNextChar = pTempNextChar;
  3421. m_pEndOfCurrItem = pHyphen;
  3422. NORM_POSITION temp = UNATTACHED;
  3423. iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, temp );
  3424. if( iTempSymbolIndex >= 0 && iSymbolIndex != iTempSymbolIndex )
  3425. {
  3426. hr = E_INVALIDARG;
  3427. }
  3428. else //--- Get both NumberInfos
  3429. {
  3430. hr = IsNumber( pFirstNumberInfo, L"NUMBER", MemoryManager, false );
  3431. if( SUCCEEDED ( hr ) )
  3432. {
  3433. m_pNextChar = pHyphen + 1;
  3434. m_pEndOfCurrItem = pTempEndOfItem;
  3435. iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, temp );
  3436. hr = IsNumber( pSecondNumberInfo, L"NUMBER", MemoryManager, false );
  3437. }
  3438. }
  3439. if( SUCCEEDED ( hr ) )
  3440. {
  3441. //--- If both currency values are cardinal numbers, then the first number can be
  3442. //--- expanded without saying its currency ("$10-12" -> "ten to twelve dollars")
  3443. if( pFirstNumberInfo->Type == eNUM_CARDINAL && pSecondNumberInfo->Type == eNUM_CARDINAL )
  3444. {
  3445. ExpandNumber( (TTSNumberItemInfo*) pFirstNumberInfo, TempWordList );
  3446. }
  3447. else // one or both values are non-cardinal numbers, so we must
  3448. { // expand the first value as a full currency.
  3449. m_pNextChar = pTempNextChar;
  3450. m_pEndOfCurrItem = pHyphen;
  3451. if( ePosition == FOLLOWING )
  3452. {
  3453. if( iTempSymbolIndex < 0 ) // No symbol on first number item - need to fill a buffer
  3454. { // with currency symbol and value to pass to IsCurrency().
  3455. ULONG ulNumChars = (long)(m_pEndOfCurrItem - m_pNextChar + g_CurrencySigns[iSymbolIndex].Sign.Len + 1);
  3456. tempNumberString = (WCHAR*) MemoryManager.GetMemory( (ulNumChars) * sizeof(WCHAR), &hr );
  3457. if ( SUCCEEDED( hr ) )
  3458. {
  3459. ZeroMemory( tempNumberString, ( ulNumChars ) * sizeof( WCHAR ) );
  3460. wcsncpy( tempNumberString, m_pNextChar, m_pEndOfCurrItem - m_pNextChar );
  3461. wcscat( tempNumberString, g_CurrencySigns[iSymbolIndex].Sign.pStr );
  3462. m_pNextChar = tempNumberString;
  3463. m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString );
  3464. m_pEndChar = m_pEndOfCurrItem;
  3465. }
  3466. }
  3467. else if( iTempSymbolIndex != iSymbolIndex ) // mismatched symbols
  3468. {
  3469. hr = E_INVALIDARG;
  3470. }
  3471. }
  3472. if ( SUCCEEDED ( hr ) )
  3473. {
  3474. hr = IsCurrency( pFirstNumberInfo, MemoryManager, TempWordList );
  3475. m_pEndChar = pTempEndChar;
  3476. }
  3477. }
  3478. }
  3479. if ( SUCCEEDED ( hr ) )
  3480. {
  3481. TTSWord Word;
  3482. ZeroMemory( &Word, sizeof( TTSWord ) );
  3483. Word.pXmlState = &m_pCurrFrag->State;
  3484. Word.eWordPartOfSpeech = MS_Unknown;
  3485. Word.pWordText = g_to.pStr;
  3486. Word.ulWordLen = g_to.Len;
  3487. Word.pLemma = Word.pWordText;
  3488. Word.ulLemmaLen = Word.ulWordLen;
  3489. TempWordList.AddTail( Word );
  3490. m_pNextChar = pHyphen + 1;
  3491. m_pEndOfCurrItem = pTempEndOfItem;
  3492. if( ePosition == PRECEDING )
  3493. {
  3494. iTempSymbolIndex = MatchCurrencySign( m_pNextChar, m_pEndOfCurrItem, ePosition );
  3495. if( iTempSymbolIndex < 0 ) // No symbol on second number item
  3496. { // create temporary string from first currency sign and second number item
  3497. ULONG ulNumChars = (long)(m_pEndOfCurrItem - m_pNextChar + g_CurrencySigns[iSymbolIndex].Sign.Len + 1);
  3498. tempNumberString = (WCHAR*) MemoryManager.GetMemory( (ulNumChars) * sizeof(WCHAR), &hr );
  3499. if ( SUCCEEDED( hr ) )
  3500. {
  3501. ZeroMemory( tempNumberString, ( ulNumChars ) * sizeof( WCHAR ) );
  3502. wcsncpy( tempNumberString, g_CurrencySigns[iSymbolIndex].Sign.pStr, g_CurrencySigns[iSymbolIndex].Sign.Len );
  3503. wcsncpy( tempNumberString+g_CurrencySigns[iSymbolIndex].Sign.Len, m_pNextChar, m_pEndOfCurrItem - m_pNextChar );
  3504. m_pNextChar = tempNumberString;
  3505. m_pEndOfCurrItem = tempNumberString + wcslen( tempNumberString );
  3506. m_pEndChar = m_pEndOfCurrItem;
  3507. }
  3508. }
  3509. else if( iTempSymbolIndex == iSymbolIndex ) // matched leading symbol on second number item
  3510. {
  3511. m_pNextChar = pHyphen + 1;
  3512. m_pEndOfCurrItem = pTempEndOfItem;
  3513. }
  3514. else // mismatched symbol
  3515. {
  3516. hr = E_INVALIDARG;
  3517. }
  3518. }
  3519. if( SUCCEEDED(hr) )
  3520. {
  3521. hr = IsCurrency( pSecondNumberInfo, MemoryManager, TempWordList );
  3522. if ( SUCCEEDED( hr ) )
  3523. {
  3524. //--- Matched a currency range!
  3525. pItemInfo =
  3526. (TTSNumberRangeItemInfo*) MemoryManager.GetMemory( sizeof( TTSNumberRangeItemInfo ), &hr );
  3527. if ( SUCCEEDED( hr ) )
  3528. {
  3529. pItemInfo->Type = eNUM_CURRENCYRANGE;
  3530. ( (TTSNumberRangeItemInfo*) pItemInfo )->pFirstNumberInfo = pFirstNumberInfo;
  3531. ( (TTSNumberRangeItemInfo*) pItemInfo )->pSecondNumberInfo = pSecondNumberInfo;
  3532. //--- Copy temp word list to real word list if everything has succeeded...
  3533. WordList.AddTail( &TempWordList );
  3534. }
  3535. }
  3536. }
  3537. }
  3538. *( (WCHAR*)pHyphen) = L'-';
  3539. }
  3540. }
  3541. //Reset member variables regardless of failure or success
  3542. m_pNextChar = pTempNextChar;
  3543. m_pEndOfCurrItem = pTempEndOfItem;
  3544. m_pEndChar = pTempEndChar;
  3545. return hr;
  3546. } /* IsCurrencyRange */
  3547. /***********************************************************************************************
  3548. * MatchCurrencySign *
  3549. *-------------------*
  3550. * Description:
  3551. * Helper function which tries to match a currency sign at the beginning of a string.
  3552. ********************************************************************* AH **********************/
  3553. int MatchCurrencySign( const WCHAR*& pStartChar, const WCHAR*& pEndChar, NORM_POSITION& ePosition )
  3554. {
  3555. int Index = -1;
  3556. for (int i = 0; i < sp_countof(g_CurrencySigns); i++)
  3557. {
  3558. if ( pEndChar - pStartChar >= g_CurrencySigns[i].Sign.Len &&
  3559. wcsnicmp( pStartChar, g_CurrencySigns[i].Sign.pStr, g_CurrencySigns[i].Sign.Len ) == 0 )
  3560. {
  3561. Index = i;
  3562. pStartChar += g_CurrencySigns[i].Sign.Len;
  3563. ePosition = PRECEDING;
  3564. break;
  3565. }
  3566. }
  3567. if ( Index == -1 )
  3568. {
  3569. for ( int i = 0; i < sp_countof(g_CurrencySigns); i++ )
  3570. {
  3571. if ( pEndChar - pStartChar >= g_CurrencySigns[i].Sign.Len &&
  3572. wcsnicmp( pEndChar - g_CurrencySigns[i].Sign.Len, g_CurrencySigns[i].Sign.pStr, g_CurrencySigns[i].Sign.Len ) == 0 )
  3573. {
  3574. Index = i;
  3575. pEndChar -= g_CurrencySigns[i].Sign.Len;
  3576. ePosition = FOLLOWING;
  3577. break;
  3578. }
  3579. }
  3580. }
  3581. return Index;
  3582. } /* MatchCurrencySign */
  3583. /***********************************************************************************************
  3584. * Zeroes *
  3585. *--------*
  3586. * Description:
  3587. * A helper function which simply determines if a number string contains only zeroes...
  3588. * Note: This function does not do parameter validation. Assumed to be done by caller.
  3589. ********************************************************************* AH **********************/
  3590. bool CStdSentEnum::Zeroes(const WCHAR *NumberString)
  3591. {
  3592. bool bAllZeroes = true;
  3593. for (ULONG i = 0; i < wcslen(NumberString); i++)
  3594. {
  3595. if (NumberString[i] != '0' && isdigit(NumberString[i]) )
  3596. {
  3597. bAllZeroes = false;
  3598. break;
  3599. }
  3600. else if ( !isdigit( NumberString[i] ) && NumberString[i] != ',' )
  3601. {
  3602. break;
  3603. }
  3604. }
  3605. return bAllZeroes;
  3606. } /* Zeroes */
  3607. /***********************************************************************************************
  3608. * ThreeZeroes *
  3609. *-------------*
  3610. * Description:
  3611. * A helper function which simply determines if a number string contains three zeroes...
  3612. * Note: This function does not do parameter validation. Assumed to be done by caller.
  3613. ********************************************************************* AH **********************/
  3614. bool CStdSentEnum::ThreeZeroes(const WCHAR *NumberString)
  3615. {
  3616. bool bThreeZeroes = true;
  3617. for (ULONG i = 0; i < 3; i++)
  3618. {
  3619. if (NumberString[i] != '0' && isdigit(NumberString[i]))
  3620. {
  3621. bThreeZeroes = false;
  3622. break;
  3623. }
  3624. }
  3625. return bThreeZeroes;
  3626. } /* ThreeZeroes */
  3627. //-----------End Of File-------------------------------------------------------------------