Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

628 lines
23 KiB

  1. /***********************************************************************************************
  2. * MiscNorm.cpp *
  3. *--------------*
  4. * Description:
  5. * These are miscallaneous functions used in normalization.
  6. *-----------------------------------------------------------------------------------------------
  7. * Created by AH August 3, 1999
  8. * Copyright (C) 1999 Microsoft Corporation
  9. * All Rights Reserved
  10. *
  11. ***********************************************************************************************/
  12. #include "stdafx.h"
  13. #ifndef StdSentEnum_h
  14. #include "stdsentenum.h"
  15. #endif
  16. /*****************************************************************************
  17. * IsStateAndZipcode *
  18. *-------------------*
  19. * This function checks to see if the next two tokens are a state
  20. * abbreviation and zipcode.
  21. *
  22. ********************************************************************* AH ****/
  23. HRESULT CStdSentEnum::IsStateAndZipcode( TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager,
  24. CWordList& WordList )
  25. {
  26. SPDBG_FUNC( "CStdSentEnum::IsStateAndZipcode" );
  27. HRESULT hr = S_OK;
  28. const StateStruct *pState = NULL;
  29. const WCHAR temp = *m_pEndOfCurrItem;
  30. *( (WCHAR*) m_pEndOfCurrItem ) = 0;
  31. //--- Try to match a state abbreviation
  32. pState = (StateStruct*) bsearch( (void*) m_pNextChar, (void*) g_StateAbbreviations, sp_countof( g_StateAbbreviations),
  33. sizeof( StateStruct ), CompareStringAndStateStruct );
  34. if ( pState )
  35. {
  36. *( (WCHAR*) m_pEndOfCurrItem ) = temp;
  37. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndChar = m_pEndChar, *pTempEndOfCurrItem = m_pEndOfCurrItem;
  38. const SPVTEXTFRAG *pTempFrag = m_pCurrFrag;
  39. CItemList PostStateList;
  40. TTSItemInfo *pZipCodeInfo;
  41. m_pNextChar = m_pEndOfCurrItem;
  42. if ( *m_pNextChar == L',' ||
  43. *m_pNextChar == L';' )
  44. {
  45. m_pNextChar++;
  46. }
  47. hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &PostStateList );
  48. if ( !m_pNextChar &&
  49. SUCCEEDED( hr ) )
  50. {
  51. hr = E_INVALIDARG;
  52. }
  53. else if ( SUCCEEDED( hr ) )
  54. {
  55. m_pEndOfCurrItem = FindTokenEnd( m_pNextChar, m_pEndChar );
  56. while ( IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  57. IsGroupEnding( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  58. IsQuotationMark( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED ||
  59. IsEOSItem( *(m_pEndOfCurrItem - 1) ) != eUNMATCHED )
  60. {
  61. m_pEndOfCurrItem--;
  62. }
  63. }
  64. if ( SUCCEEDED( hr ) )
  65. {
  66. hr = IsZipCode( pZipCodeInfo, L"ZIPCODE", MemoryManager );
  67. if ( SUCCEEDED( hr ) )
  68. {
  69. pItemNormInfo =
  70. (TTSStateAndZipCodeItemInfo*) MemoryManager.GetMemory( sizeof( TTSStateAndZipCodeItemInfo ),
  71. &hr );
  72. if ( SUCCEEDED( hr ) )
  73. {
  74. pItemNormInfo->Type = eSTATE_AND_ZIPCODE;
  75. ( (TTSStateAndZipCodeItemInfo*) pItemNormInfo )->pZipCode = (TTSZipCodeItemInfo*) pZipCodeInfo;
  76. TTSWord Word;
  77. ZeroMemory( &Word, sizeof( TTSWord ) );
  78. //--- Some states have multi-word names
  79. const WCHAR *pNextPointer = NULL, *pPrevPointer = NULL;
  80. ULONG ulLength = 0;
  81. pNextPointer = pState->FullName.pStr;
  82. do {
  83. pPrevPointer = pNextPointer;
  84. pNextPointer = wcschr(pPrevPointer, L' ');
  85. if (pNextPointer)
  86. {
  87. ulLength = (ULONG)(pNextPointer - pPrevPointer);
  88. pNextPointer++;
  89. }
  90. else
  91. {
  92. ulLength = wcslen(pPrevPointer);
  93. }
  94. Word.pXmlState = &pTempFrag->State;
  95. Word.pWordText = pPrevPointer;
  96. Word.ulWordLen = ulLength;
  97. Word.pLemma = pPrevPointer;
  98. Word.ulLemmaLen = ulLength;
  99. Word.eWordPartOfSpeech = MS_Unknown;
  100. WordList.AddTail( Word );
  101. } while ( pNextPointer );
  102. while( !PostStateList.IsEmpty() )
  103. {
  104. WordList.AddTail( ( PostStateList.RemoveHead() ).Words[0] );
  105. }
  106. hr = ExpandZipCode( (TTSZipCodeItemInfo*) pZipCodeInfo, WordList );
  107. }
  108. }
  109. else
  110. {
  111. m_pNextChar = pTempNextChar;
  112. m_pEndOfCurrItem = pTempEndOfCurrItem;
  113. m_pEndChar = pTempEndChar;
  114. m_pCurrFrag = pTempFrag;
  115. hr = E_INVALIDARG;
  116. }
  117. }
  118. m_pNextChar = pTempNextChar;
  119. }
  120. else
  121. {
  122. *( (WCHAR*) m_pEndOfCurrItem ) = temp;
  123. hr = E_INVALIDARG;
  124. }
  125. return hr;
  126. } /* IsStateAndZipcode */
  127. /*****************************************************************************
  128. * IsHyphenatedString *
  129. *--------------------*
  130. * This function checks to see if the next token is a hyphenated string
  131. * consisting of two alpha words or numbers, or one of these and another
  132. * hyphenated string.
  133. ********************************************************************* AH ****/
  134. HRESULT CStdSentEnum::IsHyphenatedString( const WCHAR* pStartChar, const WCHAR* pEndChar,
  135. TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager )
  136. {
  137. SPDBG_FUNC( "CStdSentEnum::IsHyphenatedString" );
  138. HRESULT hr = S_OK;
  139. TTSItemInfo *pFirstChunkInfo = NULL, *pSecondChunkInfo = NULL;
  140. const WCHAR* pHyphen = NULL, *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  141. for ( pHyphen = pStartChar; pHyphen < pEndChar; pHyphen++ )
  142. {
  143. if ( *pHyphen == L'-' )
  144. {
  145. break;
  146. }
  147. }
  148. if ( *pHyphen == L'-' &&
  149. pHyphen > pStartChar &&
  150. pHyphen < pEndChar - 1 )
  151. {
  152. hr = IsAlphaWord( pStartChar, pHyphen, pFirstChunkInfo, MemoryManager );
  153. if ( hr == E_INVALIDARG )
  154. {
  155. m_pNextChar = pStartChar;
  156. m_pEndOfCurrItem = pHyphen;
  157. hr = IsNumberCategory( pFirstChunkInfo, L"NUMBER", MemoryManager );
  158. }
  159. if ( SUCCEEDED( hr ) )
  160. {
  161. hr = IsAlphaWord( pHyphen + 1, pEndChar, pSecondChunkInfo, MemoryManager );
  162. if ( hr == E_INVALIDARG )
  163. {
  164. m_pNextChar = pHyphen + 1;
  165. m_pEndOfCurrItem = pEndChar;
  166. hr = IsNumberCategory( pSecondChunkInfo, L"NUMBER", MemoryManager );
  167. }
  168. if ( hr == E_INVALIDARG )
  169. {
  170. hr = IsHyphenatedString( pHyphen + 1, pEndChar, pSecondChunkInfo, MemoryManager );
  171. }
  172. if ( hr == E_INVALIDARG )
  173. {
  174. if ( pFirstChunkInfo->Type != eALPHA_WORD )
  175. {
  176. delete ( (TTSNumberItemInfo*) pFirstChunkInfo )->pWordList;
  177. }
  178. }
  179. }
  180. m_pNextChar = pTempNextChar;
  181. m_pEndOfCurrItem = pTempEndOfItem;
  182. }
  183. else
  184. {
  185. hr = E_INVALIDARG;
  186. }
  187. if ( SUCCEEDED( hr ) )
  188. {
  189. pItemNormInfo = (TTSHyphenatedStringInfo*) MemoryManager.GetMemory( sizeof(TTSHyphenatedStringInfo), &hr );
  190. if ( SUCCEEDED( hr ) )
  191. {
  192. pItemNormInfo->Type = eHYPHENATED_STRING;
  193. ( (TTSHyphenatedStringInfo*) pItemNormInfo )->pFirstChunkInfo = pFirstChunkInfo;
  194. ( (TTSHyphenatedStringInfo*) pItemNormInfo )->pSecondChunkInfo = pSecondChunkInfo;
  195. ( (TTSHyphenatedStringInfo*) pItemNormInfo )->pFirstChunk = pStartChar;
  196. ( (TTSHyphenatedStringInfo*) pItemNormInfo )->pSecondChunk = pHyphen + 1;
  197. }
  198. }
  199. return hr;
  200. } /* IsHyphenatedString */
  201. /*****************************************************************************
  202. * ExpandHyphenatedString *
  203. *------------------------*
  204. * This function expands hyphenated strings.
  205. ********************************************************************* AH ****/
  206. HRESULT CStdSentEnum::ExpandHyphenatedString( TTSHyphenatedStringInfo* pItemInfo, CWordList& WordList )
  207. {
  208. SPDBG_FUNC( "CStdSentEnum::ExpandHyphenatedString" );
  209. HRESULT hr = S_OK;
  210. TTSWord Word;
  211. ZeroMemory( &Word, sizeof(TTSWord) );
  212. Word.pXmlState = &m_pCurrFrag->State;
  213. Word.eWordPartOfSpeech = MS_Unknown;
  214. if ( pItemInfo->pFirstChunkInfo->Type == eALPHA_WORD )
  215. {
  216. Word.pWordText = pItemInfo->pFirstChunk;
  217. Word.ulWordLen = (ULONG)(pItemInfo->pSecondChunk - pItemInfo->pFirstChunk - 1);
  218. Word.pLemma = Word.pWordText;
  219. Word.ulLemmaLen = Word.ulWordLen;
  220. WordList.AddTail( Word );
  221. }
  222. else
  223. {
  224. hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pFirstChunkInfo, WordList );
  225. }
  226. if ( SUCCEEDED( hr ) )
  227. {
  228. if ( pItemInfo->pSecondChunkInfo->Type == eALPHA_WORD )
  229. {
  230. Word.pWordText = pItemInfo->pSecondChunk;
  231. Word.ulWordLen = (ULONG)(m_pEndOfCurrItem - pItemInfo->pSecondChunk);
  232. Word.pLemma = Word.pWordText;
  233. Word.ulLemmaLen = Word.ulWordLen;
  234. WordList.AddTail( Word );
  235. }
  236. else if ( pItemInfo->pSecondChunkInfo->Type == eHYPHENATED_STRING )
  237. {
  238. hr = ExpandHyphenatedString( (TTSHyphenatedStringInfo*) pItemInfo->pSecondChunkInfo, WordList );
  239. }
  240. else
  241. {
  242. hr = ExpandNumber( (TTSNumberItemInfo*) pItemInfo->pSecondChunkInfo, WordList );
  243. }
  244. }
  245. return hr;
  246. } /* ExpandHyphenatedString */
  247. /*****************************************************************************
  248. * IsSuffix *
  249. *----------*
  250. * This function checks to see if the next token is a suffix string
  251. * consisting of a hyphen followed by alpha characters.
  252. *
  253. ********************************************************************* AH ****/
  254. HRESULT CStdSentEnum::IsSuffix( const WCHAR* pStartChar, const WCHAR* pEndChar,
  255. TTSItemInfo*& pItemNormInfo, CSentItemMemory& MemoryManager )
  256. {
  257. SPDBG_FUNC( "CStdSentEnum::IsSuffix" );
  258. HRESULT hr = S_OK;
  259. if ( *pStartChar == L'-' )
  260. {
  261. const WCHAR *pIterator = pStartChar + 1;
  262. while ( pIterator < pEndChar &&
  263. iswalpha( *pIterator ) )
  264. {
  265. pIterator++;
  266. }
  267. if ( pIterator == pEndChar &&
  268. pIterator != ( pStartChar + 1 ) )
  269. {
  270. pItemNormInfo = (TTSSuffixItemInfo*) MemoryManager.GetMemory( sizeof( TTSSuffixItemInfo), &hr );
  271. if ( SUCCEEDED( hr ) )
  272. {
  273. pItemNormInfo->Type = eSUFFIX;
  274. ( (TTSSuffixItemInfo*) pItemNormInfo )->pFirstChar = pStartChar + 1;
  275. ( (TTSSuffixItemInfo*) pItemNormInfo )->ulNumChars = (ULONG)( ( pEndChar - pStartChar ) - 1 );
  276. }
  277. }
  278. else
  279. {
  280. hr = E_INVALIDARG;
  281. }
  282. }
  283. else
  284. {
  285. hr = E_INVALIDARG;
  286. }
  287. return hr;
  288. } /* IsSuffix */
  289. /*****************************************************************************
  290. * ExpandSuffix *
  291. *--------------*
  292. * This function expands strings determined to by suffixes by IsSuffix
  293. *
  294. ********************************************************************* AH ****/
  295. HRESULT CStdSentEnum::ExpandSuffix( TTSSuffixItemInfo* pItemInfo, CWordList& WordList )
  296. {
  297. SPDBG_FUNC( "CStdSentEnum::ExpandSuffix" );
  298. HRESULT hr = S_OK;
  299. TTSWord Word;
  300. ZeroMemory( &Word, sizeof( TTSWord ) );
  301. Word.pXmlState = &m_pCurrFrag->State;
  302. Word.eWordPartOfSpeech = MS_Unknown;
  303. for ( ULONG i = 0; i < pItemInfo->ulNumChars; i++ )
  304. {
  305. Word.pWordText = g_ANSICharacterProns[ pItemInfo->pFirstChar[i] ].pStr;
  306. Word.ulWordLen = g_ANSICharacterProns[ pItemInfo->pFirstChar[i] ].Len;
  307. Word.pLemma = Word.pWordText;
  308. Word.ulLemmaLen = Word.ulWordLen;
  309. WordList.AddTail( Word );
  310. }
  311. return hr;
  312. } /* ExpandSuffix */
  313. /*****************************************************************************
  314. * ExpandPunctuation *
  315. *-------------------*
  316. * This function expands punctuation marks into words - e.g. '.' becomes
  317. * "period". It actually just uses the same table that
  318. * ExpandUnrecognizedString uses to look up string versions of characters.
  319. ********************************************************************* AH ****/
  320. void CStdSentEnum::ExpandPunctuation( CWordList& WordList, WCHAR wc )
  321. {
  322. const WCHAR *pPrevPointer = NULL, *pNextPointer = NULL;
  323. ULONG ulLength = 0;
  324. TTSWord Word;
  325. ZeroMemory( &Word, sizeof( TTSWord ) );
  326. Word.pXmlState = &m_pCurrFrag->State;
  327. Word.eWordPartOfSpeech = MS_Unknown;
  328. switch ( wc )
  329. {
  330. //--- Periods normally are pronounced as "dot", rather than "period".
  331. case L'.':
  332. Word.pWordText = g_periodString.pStr;
  333. Word.ulWordLen = g_periodString.Len;
  334. Word.pLemma = Word.pWordText;
  335. Word.ulLemmaLen = Word.ulWordLen;
  336. WordList.AddTail( Word );
  337. break;
  338. default:
  339. //--- Some characters have multi-word names
  340. pNextPointer = g_ANSICharacterProns[wc].pStr;
  341. do {
  342. pPrevPointer = pNextPointer;
  343. pNextPointer = wcschr(pPrevPointer, L' ');
  344. if (pNextPointer)
  345. {
  346. ulLength = (ULONG)(pNextPointer - pPrevPointer);
  347. pNextPointer++;
  348. }
  349. else
  350. {
  351. ulLength = wcslen(pPrevPointer);
  352. }
  353. Word.pXmlState = &m_pCurrFrag->State;
  354. Word.pWordText = pPrevPointer;
  355. Word.ulWordLen = ulLength;
  356. Word.pLemma = pPrevPointer;
  357. Word.ulLemmaLen = ulLength;
  358. Word.eWordPartOfSpeech = MS_Unknown;
  359. WordList.AddTail( Word );
  360. } while ( pNextPointer );
  361. break;
  362. }
  363. } /* ExpandPunctuation */
  364. /*****************************************************************************
  365. * ExpandUnrecognizedString *
  366. *--------------------------*
  367. * This function is where text ends up if it needs to be normalized,
  368. * and wasn't recognized as anything (e.g. a number or a date). Contiguous
  369. * alpha characters are grouped together for lookup, contiguous digits are
  370. * expanded as numbers, and all other characters are expanded by name (e.g.
  371. * '(' -> "left parenthesis").
  372. *
  373. ********************************************************************* AH ****/
  374. HRESULT CStdSentEnum::ExpandUnrecognizedString( CWordList& WordList, CSentItemMemory& MemoryManager )
  375. {
  376. SPDBG_FUNC( "CStdSentEnum::ExpandUnrecognizedString" );
  377. HRESULT hr = S_OK;
  378. TTSWord Word;
  379. ZeroMemory( &Word, sizeof(TTSWord) );
  380. const WCHAR *pCurr = m_pNextChar, *pPrev, *pEnd = m_pEndOfCurrItem;
  381. const WCHAR *pTempNextChar = m_pNextChar, *pTempEndOfItem = m_pEndOfCurrItem;
  382. const WCHAR *pPrevPointer = NULL, *pNextPointer = NULL;
  383. WCHAR Temp = 0;
  384. ULONG ulTempCount = 0;
  385. ULONG ulLength;
  386. bool bDone = false;
  387. //--- RAID 9143, 1/05/2001
  388. if ( _wcsnicmp( pCurr, L"AT&T", pEnd - pCurr ) == 0 )
  389. {
  390. //--- "A"
  391. Word.pXmlState = &m_pCurrFrag->State;
  392. Word.pWordText = pCurr;
  393. Word.ulWordLen = 1;
  394. Word.pLemma = Word.pWordText;
  395. Word.ulLemmaLen = Word.ulWordLen;
  396. Word.eWordPartOfSpeech = MS_Unknown;
  397. WordList.AddTail( Word );
  398. //--- "T"
  399. Word.pWordText = pCurr + 1;
  400. Word.pLemma = Word.pWordText;
  401. WordList.AddTail( Word );
  402. //--- "And"
  403. Word.pWordText = g_And.pStr;
  404. Word.ulWordLen = g_And.Len;
  405. Word.pLemma = Word.pWordText;
  406. Word.ulLemmaLen = Word.ulWordLen;
  407. WordList.AddTail( Word );
  408. //--- "T"
  409. Word.pWordText = pCurr + 3;
  410. Word.ulWordLen = 1;
  411. Word.pLemma = Word.pWordText;
  412. Word.ulLemmaLen = Word.ulWordLen;
  413. WordList.AddTail( Word );
  414. }
  415. else
  416. {
  417. while (pCurr < pEnd && SUCCEEDED(hr) && !bDone)
  418. {
  419. pPrev = pCurr;
  420. //--- Special Case: alpha characters
  421. if (iswalpha(*pCurr))
  422. {
  423. ulTempCount = 0;
  424. do {
  425. pCurr++;
  426. } while (pCurr < pEnd && iswalpha(*pCurr));
  427. Word.pXmlState = &m_pCurrFrag->State;
  428. Word.pWordText = pPrev;
  429. Word.ulWordLen = (ULONG)(pCurr - pPrev);
  430. Word.pLemma = Word.pWordText;
  431. Word.ulLemmaLen = Word.ulWordLen;
  432. Word.eWordPartOfSpeech = MS_Unknown;
  433. WordList.AddTail( Word );
  434. }
  435. //--- Special Case: digits
  436. else if (isdigit(*pCurr))
  437. {
  438. ulTempCount = 0;
  439. do {
  440. pCurr++;
  441. } while (pCurr < pEnd && isdigit(*pCurr));
  442. TTSItemInfo* pGarbage;
  443. m_pNextChar = pPrev;
  444. m_pEndOfCurrItem = pCurr;
  445. hr = IsNumber( pGarbage, L"NUMBER", MemoryManager, false );
  446. if ( SUCCEEDED( hr ) )
  447. {
  448. hr = ExpandNumber( (TTSNumberItemInfo*) pGarbage, WordList );
  449. }
  450. m_pNextChar = pTempNextChar;
  451. m_pEndOfCurrItem = pTempEndOfItem;
  452. }
  453. //--- Default Case
  454. else if (0 <= *pCurr && *pCurr <= sp_countof(g_ANSICharacterProns) &&
  455. g_ANSICharacterProns[*pCurr].Len != 0)
  456. {
  457. if ( ulTempCount == 0 )
  458. {
  459. Temp = *pCurr;
  460. ulTempCount++;
  461. }
  462. else if ( Temp == *pCurr )
  463. {
  464. ulTempCount++;
  465. }
  466. else
  467. {
  468. Temp = *pCurr;
  469. ulTempCount = 1;
  470. }
  471. if ( ulTempCount < 4 )
  472. {
  473. //--- Some characters have multi-word names
  474. pNextPointer = g_ANSICharacterProns[*pCurr].pStr;
  475. do {
  476. pPrevPointer = pNextPointer;
  477. pNextPointer = wcschr(pPrevPointer, L' ');
  478. if (pNextPointer)
  479. {
  480. ulLength = (ULONG )(pNextPointer - pPrevPointer);
  481. pNextPointer++;
  482. }
  483. else
  484. {
  485. ulLength = wcslen(pPrevPointer);
  486. }
  487. Word.pXmlState = &m_pCurrFrag->State;
  488. Word.pWordText = pPrevPointer;
  489. Word.ulWordLen = ulLength;
  490. Word.pLemma = pPrevPointer;
  491. Word.ulLemmaLen = ulLength;
  492. Word.eWordPartOfSpeech = MS_Unknown;
  493. WordList.AddTail( Word );
  494. } while (SUCCEEDED(hr) && pNextPointer);
  495. }
  496. pCurr++;
  497. }
  498. else // Character is not expandable
  499. {
  500. pCurr++;
  501. }
  502. }
  503. }
  504. return hr;
  505. } /* ExpandUnrecognizedString */
  506. /*****************************************************************************
  507. * SpellOutString *
  508. *----------------*
  509. * This function expands strings surrounded by the <SPElL> XML tag.
  510. * It uses the same table to look up character expansions as
  511. * ExpandUnrecognizedString, but ALL characters are expanded by name.
  512. ********************************************************************* AH ****/
  513. HRESULT CStdSentEnum::SpellOutString( CWordList& WordList )
  514. {
  515. SPDBG_FUNC( "CStdSentEnum::SpellOutString" );
  516. HRESULT hr = S_OK;
  517. TTSWord Word;
  518. ZeroMemory( &Word, sizeof(TTSWord) );
  519. const WCHAR *pCurr = m_pNextChar, *pPrev, *pEnd = m_pEndOfCurrItem;
  520. const WCHAR *pPrevPointer = NULL, *pNextPointer = NULL;
  521. ULONG ulLength;
  522. bool bDone = false;
  523. while (pCurr < pEnd && SUCCEEDED(hr) && !bDone)
  524. {
  525. pPrev = pCurr;
  526. if ( 0 <= *pCurr &&
  527. *pCurr <= sp_countof(g_ANSICharacterProns) &&
  528. g_ANSICharacterProns[*pCurr].Len != 0 )
  529. {
  530. //--- Some characters have multi-word names
  531. pNextPointer = g_ANSICharacterProns[*pCurr].pStr;
  532. do {
  533. pPrevPointer = pNextPointer;
  534. pNextPointer = wcschr(pPrevPointer, L' ');
  535. if (pNextPointer)
  536. {
  537. ulLength = (ULONG)(pNextPointer - pPrevPointer);
  538. pNextPointer++;
  539. }
  540. else
  541. {
  542. ulLength = wcslen(pPrevPointer);
  543. }
  544. Word.pXmlState = &m_pCurrFrag->State;
  545. Word.pWordText = pPrevPointer;
  546. Word.ulWordLen = ulLength;
  547. Word.pLemma = pPrevPointer;
  548. Word.ulLemmaLen = ulLength;
  549. Word.eWordPartOfSpeech = MS_Unknown;
  550. WordList.AddTail( Word );
  551. } while (SUCCEEDED(hr) && pNextPointer);
  552. pCurr++;
  553. }
  554. else // Character is not expandable
  555. {
  556. pCurr++;
  557. }
  558. }
  559. return hr;
  560. } /* SpellOutString */
  561. //-----------End Of File-------------------------------------------------------------------