Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

504 lines
18 KiB

  1. /*******************************************************************************
  2. * MainNorm.cpp *
  3. *--------------*
  4. * Description:
  5. *
  6. *-------------------------------------------------------------------------------
  7. * Created By: AH Date: 01/18/2000
  8. * Copyright (C) 2000 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. *******************************************************************************/
  12. //--- Additional includes
  13. #include "stdafx.h"
  14. #ifndef StdSentEnum_h
  15. #include "stdsentenum.h"
  16. #endif
  17. /*****************************************************************************
  18. * CStdSentEnum::Normalize *
  19. *-------------------------*
  20. *
  21. ********************************************************************** AH ***/
  22. HRESULT CStdSentEnum::Normalize( CItemList& ItemList, SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  23. {
  24. SPDBG_FUNC( "CStdSentEnum::Normalize" );
  25. HRESULT hr = S_OK;
  26. TTSItemInfo* pItemNormInfo = NULL;
  27. CWordList WordList;
  28. const SPVTEXTFRAG* pTempFrag = m_pCurrFrag;
  29. TTSSentItem& TempItem = ItemList.GetAt( ListPos );
  30. if ( TempItem.pItemInfo )
  31. {
  32. pItemNormInfo = TempItem.pItemInfo;
  33. }
  34. //--- Match the normalization category of the current token.
  35. if ( m_pCurrFrag->State.eAction == SPVA_Speak )
  36. {
  37. if ( !pItemNormInfo ||
  38. ( pItemNormInfo->Type != eABBREVIATION &&
  39. pItemNormInfo->Type != eINITIALISM ) )
  40. {
  41. hr = MatchCategory( pItemNormInfo, MemoryManager, WordList );
  42. }
  43. }
  44. //--- Action must be SPVA_SpellOut - assign eSPELLOUT as category
  45. else
  46. {
  47. pItemNormInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  48. if ( SUCCEEDED( hr ) )
  49. {
  50. pItemNormInfo->Type = eSPELLOUT;
  51. }
  52. }
  53. if (SUCCEEDED(hr))
  54. {
  55. switch ( pItemNormInfo->Type )
  56. {
  57. //--- Alpha Word - just insert into the Item List.
  58. case eALPHA_WORD:
  59. {
  60. CSentItem Item;
  61. Item.pItemSrcText = m_pNextChar;
  62. Item.ulItemSrcLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  63. Item.ulItemSrcOffset = pTempFrag->ulTextSrcOffset +
  64. (ULONG)( m_pNextChar - pTempFrag->pTextStart );
  65. Item.ulNumWords = 1;
  66. Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
  67. if ( SUCCEEDED( hr ) )
  68. {
  69. ZeroMemory( Item.Words, sizeof(TTSWord) );
  70. Item.Words[0].pXmlState = &pTempFrag->State;
  71. Item.Words[0].pWordText = m_pNextChar;
  72. Item.Words[0].ulWordLen = Item.ulItemSrcLen;
  73. Item.Words[0].pLemma = Item.Words[0].pWordText;
  74. Item.Words[0].ulLemmaLen = Item.Words[0].ulWordLen;
  75. Item.Words[0].eWordPartOfSpeech = MS_Unknown;
  76. Item.eItemPartOfSpeech = MS_Unknown;
  77. Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo*), &hr );
  78. if ( SUCCEEDED( hr ) )
  79. {
  80. Item.pItemInfo->Type = eALPHA_WORD;
  81. ItemList.SetAt( ListPos, Item );
  82. }
  83. }
  84. }
  85. break;
  86. case eABBREVIATION:
  87. case eABBREVIATION_NORMALIZE:
  88. case eINITIALISM:
  89. break;
  90. //--- Multi-token categories have already been expanded into WordList, now just accumulate
  91. //--- words, and insert back into the Item List.
  92. case eNEWNUM_PHONENUMBER:
  93. //--- Special case - remove parentheses (of area code), if present in the item list
  94. {
  95. SPLISTPOS TempPos = ListPos;
  96. CSentItem Item = ItemList.GetPrev( TempPos );
  97. if ( TempPos )
  98. {
  99. SPLISTPOS RemovePos = TempPos;
  100. Item = ItemList.GetPrev( TempPos );
  101. if ( Item.pItemInfo->Type == eOPEN_PARENTHESIS &&
  102. ( (TTSPhoneNumberItemInfo*) pItemNormInfo )->pAreaCode )
  103. {
  104. ItemList.RemoveAt( RemovePos );
  105. m_pNextChar--;
  106. }
  107. }
  108. }
  109. case eNUM_CURRENCY:
  110. case eNUM_CURRENCYRANGE:
  111. case eTIMEOFDAY:
  112. case eDATE_LONGFORM:
  113. case eSTATE_AND_ZIPCODE:
  114. case eTIME_RANGE:
  115. {
  116. //--- Set Item data, and add to ItemList.
  117. if ( SUCCEEDED( hr ) )
  118. {
  119. CSentItem Item;
  120. Item.pItemSrcText = m_pNextChar;
  121. Item.ulItemSrcLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  122. Item.ulItemSrcOffset = pTempFrag->ulTextSrcOffset +
  123. (ULONG)( m_pNextChar - pTempFrag->pTextStart );
  124. hr = SetWordList( Item, WordList, MemoryManager );
  125. if ( SUCCEEDED( hr ) )
  126. {
  127. Item.pItemInfo = pItemNormInfo;
  128. ItemList.SetAt( ListPos, Item );
  129. }
  130. }
  131. }
  132. break;
  133. //--- Expand the single token, according to its normalization category.
  134. default:
  135. hr = ExpandCategory( pItemNormInfo, ItemList, ListPos, MemoryManager );
  136. break;
  137. }
  138. }
  139. return hr;
  140. } /* Normalize */
  141. /*****************************************************************************
  142. * CStdSentEnum::MatchCategory *
  143. *-----------------------------*
  144. *
  145. ********************************************************************** AH ***/
  146. HRESULT CStdSentEnum::MatchCategory( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
  147. CWordList& WordList )
  148. {
  149. SPDBG_FUNC( "CStdSentEnum::MatchCategory" );
  150. SPDBG_ASSERT( m_pNextChar );
  151. HRESULT hr = E_INVALIDARG;
  152. //--- Context has been specified
  153. if ( m_pCurrFrag->State.Context.pCategory )
  154. {
  155. if ( wcsicmp( m_pCurrFrag->State.Context.pCategory, L"ADDRESS" ) == 0 )
  156. {
  157. hr = IsZipCode( pItemNormInfo, m_pCurrFrag->State.Context.pCategory, MemoryManager );
  158. }
  159. else if ( wcsnicmp( m_pCurrFrag->State.Context.pCategory, L"DATE", 4 ) == 0 )
  160. {
  161. hr = IsNumericCompactDate( pItemNormInfo, m_pCurrFrag->State.Context.pCategory, MemoryManager );
  162. if ( hr == E_INVALIDARG )
  163. {
  164. hr = IsMonthStringCompactDate( pItemNormInfo, m_pCurrFrag->State.Context.pCategory, MemoryManager );
  165. }
  166. }
  167. else if ( wcsnicmp( m_pCurrFrag->State.Context.pCategory, L"TIME", 4 ) == 0 )
  168. {
  169. hr = IsTime( pItemNormInfo, m_pCurrFrag->State.Context.pCategory, MemoryManager );
  170. }
  171. else if ( wcsnicmp( m_pCurrFrag->State.Context.pCategory, L"NUM", 3 ) == 0 )
  172. {
  173. hr = IsNumberCategory( pItemNormInfo, m_pCurrFrag->State.Context.pCategory, MemoryManager );
  174. if ( hr == E_INVALIDARG )
  175. {
  176. hr = IsRomanNumeral( pItemNormInfo, m_pCurrFrag->State.Context.pCategory, MemoryManager );
  177. }
  178. }
  179. else if ( wcsicmp( m_pCurrFrag->State.Context.pCategory, L"PHONE_NUMBER" ) == 0 )
  180. {
  181. hr = IsPhoneNumber( pItemNormInfo, L"PHONE_NUMBER", MemoryManager, WordList );
  182. }
  183. }
  184. //--- Default Context
  185. if ( hr == E_INVALIDARG )
  186. {
  187. //--- Do ALPHA Normalization checks
  188. if ( hr == E_INVALIDARG )
  189. {
  190. hr = IsAlphaWord( m_pNextChar, m_pEndOfCurrItem, pItemNormInfo, MemoryManager );
  191. //--- Check ALPHA Exceptions
  192. if ( SUCCEEDED( hr ) )
  193. {
  194. hr = E_INVALIDARG;
  195. if ( hr == E_INVALIDARG )
  196. {
  197. hr = IsLongFormDate_DMDY( pItemNormInfo, MemoryManager, WordList );
  198. }
  199. if ( hr == E_INVALIDARG )
  200. {
  201. hr = IsLongFormDate_DDMY( pItemNormInfo, MemoryManager, WordList );
  202. }
  203. if ( hr == E_INVALIDARG )
  204. {
  205. hr = IsStateAndZipcode( pItemNormInfo, MemoryManager, WordList );
  206. }
  207. if ( hr == E_INVALIDARG )
  208. {
  209. hr = IsCurrency( pItemNormInfo, MemoryManager, WordList );
  210. }
  211. if ( hr == E_INVALIDARG )
  212. {
  213. hr = S_OK;
  214. }
  215. }
  216. }
  217. //--- Do Multi-Token Normalization checks
  218. if ( hr == E_INVALIDARG )
  219. {
  220. hr = IsLongFormDate_DMDY( pItemNormInfo, MemoryManager, WordList );
  221. }
  222. if ( hr == E_INVALIDARG )
  223. {
  224. hr = IsLongFormDate_DDMY( pItemNormInfo, MemoryManager, WordList );
  225. }
  226. if ( hr == E_INVALIDARG )
  227. {
  228. hr = IsCurrency( pItemNormInfo, MemoryManager, WordList );
  229. }
  230. //--- Do TIME Normalization check
  231. if ( hr == E_INVALIDARG )
  232. {
  233. hr = IsTimeRange( pItemNormInfo, MemoryManager, WordList );
  234. }
  235. if ( hr == E_INVALIDARG )
  236. {
  237. hr = IsTimeOfDay( pItemNormInfo, MemoryManager, WordList );
  238. }
  239. //--- Do NUMBER Normalization checks
  240. if ( hr == E_INVALIDARG )
  241. {
  242. hr = IsPhoneNumber( pItemNormInfo, NULL, MemoryManager, WordList );
  243. }
  244. if ( hr == E_INVALIDARG )
  245. {
  246. hr = IsNumberCategory( pItemNormInfo, NULL, MemoryManager );
  247. }
  248. if ( hr == E_INVALIDARG )
  249. {
  250. hr = IsNumberRange( pItemNormInfo, MemoryManager );
  251. }
  252. if ( hr == E_INVALIDARG )
  253. {
  254. hr = IsCurrencyRange( pItemNormInfo, MemoryManager, WordList );
  255. }
  256. //--- Do DATE Normalization checks
  257. if ( hr == E_INVALIDARG )
  258. {
  259. hr = IsNumericCompactDate( pItemNormInfo, NULL, MemoryManager );
  260. }
  261. if ( hr == E_INVALIDARG )
  262. {
  263. hr = IsMonthStringCompactDate( pItemNormInfo, NULL, MemoryManager );
  264. }
  265. if ( hr == E_INVALIDARG )
  266. {
  267. hr = IsDecade( pItemNormInfo, MemoryManager );
  268. }
  269. //--- Do TIME Normalization checks
  270. if ( hr == E_INVALIDARG )
  271. {
  272. hr = IsTime( pItemNormInfo, NULL, MemoryManager );
  273. }
  274. if ( hr == E_INVALIDARG )
  275. {
  276. hr = IsHyphenatedString( m_pNextChar, m_pEndOfCurrItem, pItemNormInfo, MemoryManager );
  277. }
  278. if ( hr == E_INVALIDARG )
  279. {
  280. hr = IsSuffix( m_pNextChar, m_pEndOfCurrItem, pItemNormInfo, MemoryManager );
  281. }
  282. }
  283. if ( hr == E_INVALIDARG &&
  284. !pItemNormInfo )
  285. {
  286. hr = S_OK;
  287. pItemNormInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
  288. if ( SUCCEEDED( hr ) )
  289. {
  290. pItemNormInfo->Type = eUNMATCHED;
  291. }
  292. }
  293. else if ( hr == E_INVALIDARG &&
  294. pItemNormInfo )
  295. {
  296. hr = S_OK;
  297. }
  298. return hr;
  299. } /* MatchCategory */
  300. /*****************************************************************************
  301. * CStdSentEnum::ExpandCategory *
  302. *------------------------------*
  303. * Expands previously matched items in the Item List into their normalized
  304. * forms.
  305. ********************************************************************** AH ***/
  306. HRESULT CStdSentEnum::ExpandCategory( TTSItemInfo*& pItemNormInfo, CItemList& ItemList, SPLISTPOS ListPos,
  307. CSentItemMemory& MemoryManager )
  308. {
  309. SPDBG_FUNC( "CStdSentEnum::ExpandCategory" );
  310. HRESULT hr = S_OK;
  311. CSentItem Item;
  312. CWordList WordList;
  313. Item.pItemSrcText = m_pNextChar;
  314. Item.ulItemSrcLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
  315. Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
  316. (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
  317. switch ( pItemNormInfo->Type )
  318. {
  319. case eNUM_ROMAN_NUMERAL:
  320. switch ( ( (TTSRomanNumeralItemInfo*) pItemNormInfo )->pNumberInfo->Type )
  321. {
  322. case eDATE_YEAR:
  323. hr = ExpandYear( (TTSYearItemInfo*) ( (TTSRomanNumeralItemInfo*) pItemNormInfo )->pNumberInfo,
  324. WordList );
  325. break;
  326. default:
  327. hr = ExpandNumber( (TTSNumberItemInfo*) ( (TTSRomanNumeralItemInfo*) pItemNormInfo )->pNumberInfo,
  328. WordList );
  329. break;
  330. }
  331. break;
  332. case eNUM_CARDINAL:
  333. case eNUM_ORDINAL:
  334. case eNUM_DECIMAL:
  335. case eNUM_FRACTION:
  336. case eNUM_MIXEDFRACTION:
  337. hr = ExpandNumber( (TTSNumberItemInfo*) pItemNormInfo, WordList );
  338. break;
  339. case eNUM_PERCENT:
  340. hr = ExpandPercent( (TTSNumberItemInfo*) pItemNormInfo, WordList );
  341. break;
  342. case eNUM_DEGREES:
  343. hr = ExpandDegrees( (TTSNumberItemInfo*) pItemNormInfo, WordList );
  344. break;
  345. case eNUM_SQUARED:
  346. hr = ExpandSquare( (TTSNumberItemInfo*) pItemNormInfo, WordList );
  347. break;
  348. case eNUM_CUBED:
  349. hr = ExpandCube( (TTSNumberItemInfo*) pItemNormInfo, WordList );
  350. break;
  351. case eNUM_ZIPCODE:
  352. hr = ExpandZipCode( (TTSZipCodeItemInfo*) pItemNormInfo, WordList );
  353. break;
  354. case eNUM_RANGE:
  355. hr = ExpandNumberRange( (TTSNumberRangeItemInfo*) pItemNormInfo, WordList );
  356. break;
  357. case eDATE:
  358. hr = ExpandDate( (TTSDateItemInfo*) pItemNormInfo, WordList );
  359. break;
  360. case eDATE_YEAR:
  361. hr = ExpandYear( (TTSYearItemInfo*) pItemNormInfo, WordList );
  362. break;
  363. case eDECADE:
  364. hr = ExpandDecade( (TTSDecadeItemInfo*) pItemNormInfo, WordList );
  365. break;
  366. case eTIME:
  367. hr = ExpandTime( (TTSTimeItemInfo*) pItemNormInfo, WordList );
  368. break;
  369. case eHYPHENATED_STRING:
  370. hr = ExpandHyphenatedString( (TTSHyphenatedStringInfo*) pItemNormInfo, WordList );
  371. break;
  372. case eSUFFIX:
  373. hr = ExpandSuffix( (TTSSuffixItemInfo*) pItemNormInfo, WordList );
  374. break;
  375. case eSPELLOUT:
  376. hr = SpellOutString( WordList );
  377. break;
  378. case eUNMATCHED:
  379. default:
  380. hr = ExpandUnrecognizedString( WordList, MemoryManager );
  381. break;
  382. }
  383. //--- Set Item data, and add to ItemList.
  384. if ( SUCCEEDED( hr ) )
  385. {
  386. hr = SetWordList( Item, WordList, MemoryManager );
  387. if ( SUCCEEDED( hr ) )
  388. {
  389. Item.pItemInfo = pItemNormInfo;
  390. ItemList.SetAt( ListPos, Item );
  391. }
  392. }
  393. return hr;
  394. } /* ExpandCategory */
  395. /*****************************************************************************
  396. * CStdSentEnum::DoUnicodeToAsciiMap *
  397. *-----------------------------------*
  398. * Description:
  399. * Maps incoming strings to known values.
  400. ********************************************************************* AH ****/
  401. HRESULT CStdSentEnum::DoUnicodeToAsciiMap( const WCHAR *pUnicodeString, ULONG ulUnicodeStringLength,
  402. WCHAR *pConvertedString )
  403. {
  404. SPDBG_FUNC( "CSpVoice::DoUnicodeToAsciiMap" );
  405. HRESULT hr = S_OK;
  406. unsigned char *pBuffer = NULL;
  407. WCHAR *pWideCharBuffer = NULL;
  408. if ( pUnicodeString )
  409. {
  410. //--- Make copy of pUnicodeString
  411. pWideCharBuffer = new WCHAR[ulUnicodeStringLength+1];
  412. if ( !pWideCharBuffer )
  413. {
  414. hr = E_OUTOFMEMORY;
  415. }
  416. if ( SUCCEEDED( hr ) )
  417. {
  418. wcsncpy( pWideCharBuffer, pUnicodeString, ulUnicodeStringLength );
  419. pWideCharBuffer[ulUnicodeStringLength] = 0;
  420. pBuffer = new unsigned char[ulUnicodeStringLength+1];
  421. if ( !pBuffer || !pWideCharBuffer )
  422. {
  423. hr = E_OUTOFMEMORY;
  424. }
  425. if ( SUCCEEDED(hr) )
  426. {
  427. pBuffer[ulUnicodeStringLength] = 0;
  428. if ( ulUnicodeStringLength > 0 )
  429. {
  430. //--- Map WCHARs to ANSI chars
  431. if ( !WideCharToMultiByte( 1252, NULL, pWideCharBuffer, ulUnicodeStringLength, (char*) pBuffer,
  432. ulUnicodeStringLength, &g_pFlagCharacter, NULL ) )
  433. {
  434. hr = E_UNEXPECTED;
  435. }
  436. //--- Use internal table to map ANSI to ASCII
  437. for (ULONG i = 0; i < ulUnicodeStringLength && SUCCEEDED(hr); i++)
  438. {
  439. pBuffer[i] = g_AnsiToAscii[pBuffer[i]];
  440. }
  441. //--- Map back to WCHARs
  442. for ( i = 0; i < ulUnicodeStringLength && SUCCEEDED(hr); i++ )
  443. {
  444. pConvertedString[i] = pBuffer[i];
  445. }
  446. }
  447. }
  448. }
  449. }
  450. else
  451. {
  452. pConvertedString = NULL;
  453. }
  454. if (pBuffer)
  455. {
  456. delete [] pBuffer;
  457. }
  458. if (pWideCharBuffer)
  459. {
  460. delete [] pWideCharBuffer;
  461. }
  462. return hr;
  463. } /* CStdSentEnum::DoUnicodeToAsciiMap */