Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2009 lines
92 KiB

  1. /*******************************************************************************
  2. * Disambig.cpp *
  3. *--------------*
  4. * Description:
  5. * This module contains the methods to disambiguate part of speech and
  6. * select the correct pronounciation from the lexicon.
  7. *-------------------------------------------------------------------------------
  8. * Created By: EDC Date: 07/15/99
  9. * Copyright (C) 1999 Microsoft Corporation
  10. * All Rights Reserved
  11. *
  12. *******************************************************************************/
  13. //--- Additional includes
  14. #include "stdafx.h"
  15. #include "commonlx.h"
  16. #ifndef StdSentEnum_h
  17. #include "stdsentenum.h"
  18. #endif
  19. #include "spttsengdebug.h"
  20. /*****************************************************************************
  21. * TryPOSConversion *
  22. *------------------*
  23. *
  24. * Description:
  25. * Checks to see whether the argument PRONRECORD contains the argument
  26. * ENGPARTOFSPEECH as an option. If so, sets the PRONRECORD alternate
  27. * choice and part of speech choice, and returns true. If not, just returns
  28. * false without modifying the PRONRECORD at all.
  29. *
  30. ***************************************************************** AH *********/
  31. bool TryPOSConversion( PRONRECORD& pPron, ENGPARTOFSPEECH PartOfSpeech )
  32. {
  33. //--- Check first pronunciation
  34. for ( ULONG i = 0; i < pPron.pronArray[0].POScount; i++ )
  35. {
  36. if ( pPron.pronArray[0].POScode[i] == PartOfSpeech )
  37. {
  38. pPron.altChoice = 0;
  39. pPron.POSchoice = PartOfSpeech;
  40. return true;
  41. }
  42. }
  43. //--- Check second pronunciation
  44. if ( pPron.hasAlt )
  45. {
  46. for ( ULONG i = 0; i < pPron.pronArray[1].POScount; i++ )
  47. {
  48. if ( pPron.pronArray[1].POScode[i] == PartOfSpeech )
  49. {
  50. pPron.altChoice = 1;
  51. pPron.POSchoice = PartOfSpeech;
  52. return true;
  53. }
  54. }
  55. }
  56. return false;
  57. } /* TryPOS Conversion */
  58. /*****************************************************************************
  59. * DisambiguatePOS *
  60. *-----------------*
  61. *
  62. * Description:
  63. * Disambiguate parts of speech by applying patches in order... This
  64. * work is an implementation of Eric Brill's rule-based part of speech
  65. * tagger - see, for example:
  66. *
  67. * Brill, Eric. 1992. A simple rule-based part of speech tagger.
  68. * In Proceedings of the Third Conference on Applied Natural
  69. * Language Processing, ACL. Trento, Italy.
  70. *
  71. ***************************************************************** AH *********/
  72. void DisambiguatePOS( PRONRECORD *pProns, ULONG cNumOfWords )
  73. {
  74. SPDBG_FUNC( "DisambiguatePOS" );
  75. //--- Iterate over the patches, applying each (where applicable) to the
  76. //--- entire sentence. For each patch, iterate over each word in the
  77. //--- sentence to which the patch could apply (from left to right).
  78. for ( int i = 0; i < sp_countof( g_POSTaggerPatches ); i++ )
  79. {
  80. switch ( g_POSTaggerPatches[i].eTemplateType )
  81. {
  82. case PREV1T:
  83. {
  84. if ( cNumOfWords > 1 )
  85. {
  86. for ( ULONG j = 1; j < cNumOfWords; j++ )
  87. {
  88. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  89. {
  90. //--- If the current POS matches, and the previous POS matches, and
  91. //--- the conversion POS is a possibility for this word, convert the
  92. //--- POS.
  93. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  94. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  95. {
  96. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  97. }
  98. }
  99. }
  100. }
  101. }
  102. break;
  103. case NEXT1T:
  104. {
  105. if ( cNumOfWords > 1 )
  106. {
  107. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  108. {
  109. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  110. {
  111. //--- If the current POS matches, and the next POS matches, and
  112. //--- the conversion POS is a possibility for this word, convert the
  113. //--- POS.
  114. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  115. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  116. {
  117. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  118. }
  119. }
  120. }
  121. }
  122. }
  123. break;
  124. case PREV2T:
  125. {
  126. if ( cNumOfWords > 2 )
  127. {
  128. for ( ULONG j = 2; j < cNumOfWords; j++ )
  129. {
  130. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  131. {
  132. //--- If the current POS matches, and the POS two previous matches, and
  133. //--- the conversion POS is a possibility for this word, convert the POS.
  134. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  135. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  136. {
  137. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  138. }
  139. }
  140. }
  141. }
  142. }
  143. break;
  144. case NEXT2T:
  145. {
  146. if ( cNumOfWords > 2 )
  147. {
  148. for ( ULONG j = 0; j < cNumOfWords - 2; j++ )
  149. {
  150. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  151. {
  152. //--- If the current POS matches, and the POS two after matches, and
  153. //--- the conversion POS is a possibility for this word, convert the
  154. //--- POS.
  155. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  156. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  157. {
  158. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  159. }
  160. }
  161. }
  162. }
  163. }
  164. break;
  165. case PREV1OR2T:
  166. {
  167. if ( cNumOfWords > 2 )
  168. {
  169. for ( ULONG j = 1; j < cNumOfWords; j++ )
  170. {
  171. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  172. {
  173. //--- If the current POS matches, and the previous POS matches OR the
  174. //--- POS two previous matches, and the conversion POS is a possibility
  175. //--- for this word, convert the POS.
  176. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  177. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  178. ( j > 1 &&
  179. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  180. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  181. {
  182. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  183. }
  184. }
  185. }
  186. }
  187. }
  188. break;
  189. case NEXT1OR2T:
  190. {
  191. if ( cNumOfWords > 2 )
  192. {
  193. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  194. {
  195. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  196. {
  197. //--- If the current POS matches, and the next POS matches OR the POS
  198. //--- two after matches, and the conversion POS is a possibility for this
  199. //--- word, convert the POS.
  200. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  201. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  202. ( j < cNumOfWords - 2 &&
  203. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  204. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  205. {
  206. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  207. }
  208. }
  209. }
  210. }
  211. }
  212. break;
  213. case PREV1OR2OR3T:
  214. {
  215. if ( cNumOfWords > 3 )
  216. {
  217. for ( ULONG j = 1; j < cNumOfWords; j++ )
  218. {
  219. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  220. {
  221. //--- If the current POS matches, and the previous POS matches OR the
  222. //--- POS two previous matches OR the POS three previous matches, and
  223. //--- the conversion POS is a possibility for this word, convert the POS.
  224. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  225. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  226. ( j > 1 &&
  227. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  228. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  229. ( j > 2 &&
  230. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  231. pProns[j - 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  232. {
  233. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  234. }
  235. }
  236. }
  237. }
  238. }
  239. break;
  240. case NEXT1OR2OR3T:
  241. {
  242. if ( cNumOfWords > 3 )
  243. {
  244. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  245. {
  246. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  247. {
  248. //--- If the current POS matches, and the next POS matches OR the POS
  249. //--- two after matches OR the POS three after matches, and the conversion
  250. //--- POS is a possibility for this word, convert the POS.
  251. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  252. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  253. ( j < cNumOfWords - 2 &&
  254. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  255. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  256. ( j < cNumOfWords - 3 &&
  257. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  258. pProns[j + 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  259. {
  260. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  261. }
  262. }
  263. }
  264. }
  265. }
  266. break;
  267. case PREV1TNEXT1T:
  268. {
  269. if ( cNumOfWords > 2 )
  270. {
  271. for ( ULONG j = 1; j < cNumOfWords - 1; j++ )
  272. {
  273. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  274. {
  275. //--- If the current POS matches, and the next POS matches, and the
  276. //--- previous POS matches, and the conversion POS is a possibility
  277. //--- for this word, convert the POS.
  278. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  279. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  280. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  281. {
  282. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  283. }
  284. }
  285. }
  286. }
  287. }
  288. break;
  289. case PREV1TNEXT2T:
  290. {
  291. if ( cNumOfWords > 3 )
  292. {
  293. for ( ULONG j = 1; j < cNumOfWords - 2; j++ )
  294. {
  295. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  296. {
  297. //--- If the current POS matches, and the POS two after matches, and the
  298. //--- previous POS matches, and the conversion POS is a possibility
  299. //--- for this word, convert the POS.
  300. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  301. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  302. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  303. {
  304. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  305. }
  306. }
  307. }
  308. }
  309. }
  310. break;
  311. case PREV2TNEXT1T:
  312. {
  313. if ( cNumOfWords > 3 )
  314. {
  315. for ( ULONG j = 2; j < cNumOfWords - 1; j++ )
  316. {
  317. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  318. {
  319. //--- If the current POS matches, and the next POS matches, and the
  320. //--- POS two previous matches, and the conversion POS is a possibility
  321. //--- for this word, convert the POS.
  322. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  323. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  324. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  325. {
  326. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  327. }
  328. }
  329. }
  330. }
  331. }
  332. break;
  333. case CAP:
  334. {
  335. for ( ULONG j = 0; j < cNumOfWords; j++ )
  336. {
  337. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  338. {
  339. //--- If the current POS matches, and the word is capitalized, and the
  340. //--- conversion POS is a possibility for this word, convert the POS.
  341. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  342. iswupper( pProns[j].orthStr[0] ) )
  343. {
  344. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  345. }
  346. }
  347. }
  348. }
  349. break;
  350. case NOTCAP:
  351. {
  352. for ( ULONG j = 0; j < cNumOfWords; j++ )
  353. {
  354. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  355. {
  356. //--- If the current POS matches, and the word is not capitalized, and the
  357. //--- conversion POS is a possibility for this word, convert the POS.
  358. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  359. !iswupper( pProns[j].orthStr[0] ) )
  360. {
  361. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  362. }
  363. }
  364. }
  365. }
  366. break;
  367. case PREVCAP:
  368. {
  369. if ( cNumOfWords > 1 )
  370. {
  371. for ( ULONG j = 1; j < cNumOfWords; j++ )
  372. {
  373. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  374. {
  375. //--- If the current POS matches, and the previous word is capitalized,
  376. //--- and the conversion POS is a possibility for this word, convert the
  377. //--- POS.
  378. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  379. iswupper( pProns[j - 1].orthStr[0] ) )
  380. {
  381. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  382. }
  383. }
  384. }
  385. }
  386. }
  387. break;
  388. case PREVNOTCAP:
  389. {
  390. if ( cNumOfWords > 1 )
  391. {
  392. for ( ULONG j = 1; j < cNumOfWords; j++ )
  393. {
  394. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  395. {
  396. //--- If the current POS matches, and the word is capitalized, and the
  397. //--- conversion POS is a possibility for this word, convert the POS.
  398. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  399. !iswupper( pProns[j - 1].orthStr[0] ) )
  400. {
  401. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  402. }
  403. }
  404. }
  405. }
  406. }
  407. break;
  408. case PREV1W:
  409. {
  410. if ( cNumOfWords > 1 )
  411. {
  412. for ( ULONG j = 1; j < cNumOfWords; j++ )
  413. {
  414. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  415. {
  416. //--- If the current POS matches, and the previous word matches, and the
  417. //--- conversion POS is a possibility for this word, convert the POS.
  418. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  419. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  420. {
  421. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  422. }
  423. }
  424. }
  425. }
  426. }
  427. break;
  428. case NEXT1W:
  429. {
  430. if ( cNumOfWords > 1 )
  431. {
  432. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  433. {
  434. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  435. {
  436. //--- If the current POS matches, and the next word matches, and the
  437. //--- conversion POS is a possibility for this word, convert the POS.
  438. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  439. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  440. {
  441. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  442. }
  443. }
  444. }
  445. }
  446. }
  447. break;
  448. case PREV2W:
  449. {
  450. if ( cNumOfWords > 2 )
  451. {
  452. for ( ULONG j = 2; j < cNumOfWords; j++ )
  453. {
  454. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  455. {
  456. //--- If the current POS matches, and the word two previous matches, and the
  457. //--- conversion POS is a possibility for this word, convert the POS.
  458. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  459. _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  460. {
  461. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  462. }
  463. }
  464. }
  465. }
  466. }
  467. break;
  468. case NEXT2W:
  469. {
  470. if ( cNumOfWords > 2 )
  471. {
  472. for ( ULONG j = 0; j < cNumOfWords - 2; j++ )
  473. {
  474. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  475. {
  476. //--- If the current POS matches, and the word two after matches, and the
  477. //--- conversion POS is a possibility for this word, convert the POS.
  478. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  479. _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  480. {
  481. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  482. }
  483. }
  484. }
  485. }
  486. }
  487. break;
  488. case PREV1OR2W:
  489. {
  490. if ( cNumOfWords > 2 )
  491. {
  492. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  493. {
  494. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  495. {
  496. //--- If the current POS matches, and the previous word OR the word two
  497. //--- previous matches, and the conversion POS is a possibility for this word,
  498. //--- convert the POS.
  499. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  500. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ||
  501. ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  502. _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) )
  503. {
  504. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  505. }
  506. }
  507. }
  508. }
  509. }
  510. break;
  511. case NEXT1OR2W:
  512. {
  513. if ( cNumOfWords > 1 )
  514. {
  515. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  516. {
  517. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  518. {
  519. //--- If the current POS matches, and the next word matches OR the word two after
  520. //--- matches, and the conversion POS is a possibility for this word, convert the
  521. //--- POS.
  522. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  523. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ||
  524. ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  525. _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) )
  526. {
  527. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  528. }
  529. }
  530. }
  531. }
  532. }
  533. break;
  534. case CURRWPREV1W:
  535. {
  536. if ( cNumOfWords > 1 )
  537. {
  538. for ( ULONG j = 1; j < cNumOfWords; j++ )
  539. {
  540. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  541. {
  542. //--- If the current POS matches, and the current word matches, and the previous
  543. //--- word matches, and the conversion POS is a possibility for this word, convert
  544. //--- the POS.
  545. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  546. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  547. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  548. {
  549. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  550. }
  551. }
  552. }
  553. }
  554. }
  555. break;
  556. case CURRWNEXT1W:
  557. {
  558. if ( cNumOfWords > 1 )
  559. {
  560. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  561. {
  562. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  563. {
  564. //--- If the current POS matches, and the current word matches, and the next
  565. //--- word matches, and the conversion POS is a possibility for this word, convert
  566. //--- the POS.
  567. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  568. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  569. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  570. {
  571. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  572. }
  573. }
  574. }
  575. }
  576. }
  577. break;
  578. case CURRWPREV1T:
  579. {
  580. if ( cNumOfWords > 1 )
  581. {
  582. for ( ULONG j = 1; j < cNumOfWords; j++ )
  583. {
  584. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  585. {
  586. //--- If the current POS matches, and the current word matches, and the previous
  587. //--- POS matches, and the conversion POS is a possibility for this word, convert
  588. //--- the POS.
  589. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  590. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  591. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  592. {
  593. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  594. }
  595. }
  596. }
  597. }
  598. }
  599. break;
  600. case CURRWNEXT1T:
  601. {
  602. if ( cNumOfWords > 1 )
  603. {
  604. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  605. {
  606. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  607. {
  608. //--- If the current POS matches, and the current word matches, and the next
  609. //--- POS matches, and the conversion POS is a possibility for this word, convert
  610. //--- the POS.
  611. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  612. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  613. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  614. {
  615. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  616. }
  617. }
  618. }
  619. }
  620. }
  621. break;
  622. case CURRW:
  623. {
  624. for ( ULONG j = 0; j < cNumOfWords; j++ )
  625. {
  626. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  627. {
  628. //--- If the current POS matches, and the current word matches, and the
  629. //--- conversion POS is a possibility for this word, convert the POS.
  630. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  631. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  632. {
  633. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ) ;
  634. }
  635. }
  636. }
  637. }
  638. break;
  639. case PREV1WT:
  640. {
  641. if ( cNumOfWords > 1 )
  642. {
  643. for ( ULONG j = 1; j < cNumOfWords; j++ )
  644. {
  645. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  646. {
  647. //--- If the current POS matches, and the previous word and POS match, and
  648. //--- the conversion POS is a possibility for this word, convert the POS.
  649. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  650. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  651. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  652. {
  653. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  654. }
  655. }
  656. }
  657. }
  658. }
  659. break;
  660. case NEXT1WT:
  661. {
  662. if ( cNumOfWords > 1 )
  663. {
  664. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  665. {
  666. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  667. {
  668. //--- If the current POS matches, and the next word and POS match, and
  669. //--- the conversion POS is a possibility for this word, convert the POS.
  670. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  671. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  672. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  673. {
  674. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  675. }
  676. }
  677. }
  678. }
  679. }
  680. break;
  681. case CURRWPREV1WT:
  682. {
  683. if ( cNumOfWords > 1 )
  684. {
  685. for ( ULONG j = 1; j < cNumOfWords; j++ )
  686. {
  687. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  688. {
  689. //--- If the current POS matches, and the current words matches, and the
  690. //--- previous word and POS match, and the conversion POS is a possibility
  691. //--- for this word, convert the POS.
  692. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  693. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  694. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  695. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  696. {
  697. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  698. }
  699. }
  700. }
  701. }
  702. }
  703. break;
  704. case CURRWNEXT1WT:
  705. {
  706. if ( cNumOfWords > 1 )
  707. {
  708. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  709. {
  710. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  711. {
  712. //--- If the current POS matches, and the current words matches, and the
  713. //--- next word and POS match, and the conversion POS is a possibility
  714. //--- for this word, convert the POS.
  715. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  716. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  717. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  718. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  719. {
  720. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  721. }
  722. }
  723. }
  724. }
  725. }
  726. break;
  727. }
  728. }
  729. } /* DisambiguatePOS */
  730. /*****************************************************************************
  731. * Pronounce *
  732. *-----------*
  733. * Description:
  734. * Get lexicon or letter-to-sound (LTS) pronunciations
  735. *
  736. ********************************************************************** MC ***/
  737. HRESULT CStdSentEnum::Pronounce( PRONRECORD *pPron )
  738. {
  739. SPDBG_FUNC( "Pronounce" );
  740. SPWORDPRONUNCIATIONLIST SPList;
  741. HRESULT hr = SPERR_NOT_IN_LEX;
  742. ULONG cPhonLen;
  743. DWORD dwFlags = eLEXTYPE_USER | eLEXTYPE_APP | eLEXTYPE_PRIVATE1 | eLEXTYPE_PRIVATE2;
  744. BOOL fPOSExists = false;
  745. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  746. //--- Special Case - XML Provided Part Of Speech. Search for exact match first...
  747. if ( pPron->XMLPartOfSpeech != MS_Unknown )
  748. {
  749. //--- Try User Lexicon
  750. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
  751. if ( SUCCEEDED( hr ) &&
  752. SPList.pFirstWordPronunciation )
  753. {
  754. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  755. pPronunciation = pPronunciation->pNextWordPronunciation )
  756. {
  757. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  758. {
  759. fPOSExists = true;
  760. break;
  761. }
  762. }
  763. if ( !fPOSExists )
  764. {
  765. if ( SPList.pvBuffer )
  766. {
  767. ::CoTaskMemFree( SPList.pvBuffer );
  768. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  769. }
  770. }
  771. }
  772. //--- Handle empty pronunciation
  773. else if ( !SPList.pFirstWordPronunciation )
  774. {
  775. if ( SPList.pvBuffer )
  776. {
  777. ::CoTaskMemFree( SPList.pvBuffer );
  778. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  779. }
  780. hr = SPERR_NOT_IN_LEX;
  781. }
  782. //--- Try App Lexicon
  783. if ( !fPOSExists )
  784. {
  785. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
  786. if ( SUCCEEDED( hr ) &&
  787. SPList.pFirstWordPronunciation )
  788. {
  789. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  790. pPronunciation = pPronunciation->pNextWordPronunciation )
  791. {
  792. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  793. {
  794. fPOSExists = true;
  795. break;
  796. }
  797. }
  798. if ( !fPOSExists )
  799. {
  800. if ( SPList.pvBuffer )
  801. {
  802. ::CoTaskMemFree( SPList.pvBuffer );
  803. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  804. }
  805. }
  806. }
  807. //--- Handle empty pronunciation
  808. else if ( !SPList.pFirstWordPronunciation )
  809. {
  810. if ( SPList.pvBuffer )
  811. {
  812. ::CoTaskMemFree( SPList.pvBuffer );
  813. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  814. }
  815. hr = SPERR_NOT_IN_LEX;
  816. }
  817. }
  818. //--- Try Vendor Lexicon
  819. if ( !fPOSExists )
  820. {
  821. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
  822. if ( SUCCEEDED( hr ) &&
  823. SPList.pFirstWordPronunciation )
  824. {
  825. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  826. pPronunciation = pPronunciation->pNextWordPronunciation )
  827. {
  828. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  829. {
  830. fPOSExists = true;
  831. break;
  832. }
  833. }
  834. if ( !fPOSExists )
  835. {
  836. if ( SPList.pvBuffer )
  837. {
  838. ::CoTaskMemFree( SPList.pvBuffer );
  839. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  840. }
  841. }
  842. }
  843. //--- Handle empty pronunciation
  844. else if ( !SPList.pFirstWordPronunciation )
  845. {
  846. if ( SPList.pvBuffer )
  847. {
  848. ::CoTaskMemFree( SPList.pvBuffer );
  849. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  850. }
  851. hr = SPERR_NOT_IN_LEX;
  852. }
  853. }
  854. //--- Try Morph Lexicon
  855. if ( !fPOSExists )
  856. {
  857. hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033, dwFlags, &SPList );
  858. if ( SUCCEEDED( hr ) &&
  859. SPList.pFirstWordPronunciation )
  860. {
  861. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  862. pPronunciation = pPronunciation->pNextWordPronunciation )
  863. {
  864. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  865. {
  866. fPOSExists = true;
  867. break;
  868. }
  869. }
  870. if ( !fPOSExists )
  871. {
  872. //--- Need to do this the last time, to make sure we hit the default code below...
  873. //--- RAID 5078
  874. hr = SPERR_NOT_IN_LEX;
  875. if ( SPList.pvBuffer )
  876. {
  877. ::CoTaskMemFree( SPList.pvBuffer );
  878. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  879. }
  880. }
  881. }
  882. //--- Handle empty pronunciation
  883. else if ( !SPList.pFirstWordPronunciation )
  884. {
  885. if ( SPList.pvBuffer )
  886. {
  887. ::CoTaskMemFree( SPList.pvBuffer );
  888. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  889. }
  890. hr = SPERR_NOT_IN_LEX;
  891. }
  892. }
  893. }
  894. //--- Default case - just look up orthography and go with first match.
  895. if ( hr == SPERR_NOT_IN_LEX )
  896. {
  897. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
  898. //--- Handle empty pronunciation
  899. if ( SUCCEEDED( hr ) &&
  900. !SPList.pFirstWordPronunciation )
  901. {
  902. if ( SPList.pvBuffer )
  903. {
  904. ::CoTaskMemFree( SPList.pvBuffer );
  905. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  906. }
  907. hr = SPERR_NOT_IN_LEX;
  908. }
  909. }
  910. if ( hr == SPERR_NOT_IN_LEX )
  911. {
  912. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
  913. //--- Handle empty pronunciation
  914. if ( SUCCEEDED( hr ) &&
  915. !SPList.pFirstWordPronunciation )
  916. {
  917. if ( SPList.pvBuffer )
  918. {
  919. ::CoTaskMemFree( SPList.pvBuffer );
  920. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  921. }
  922. hr = SPERR_NOT_IN_LEX;
  923. }
  924. }
  925. if ( hr == SPERR_NOT_IN_LEX )
  926. {
  927. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
  928. //--- Handle empty pronunciation
  929. if ( SUCCEEDED( hr ) &&
  930. !SPList.pFirstWordPronunciation )
  931. {
  932. if ( SPList.pvBuffer )
  933. {
  934. ::CoTaskMemFree( SPList.pvBuffer );
  935. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  936. }
  937. hr = SPERR_NOT_IN_LEX;
  938. }
  939. }
  940. if ( hr == SPERR_NOT_IN_LEX )
  941. {
  942. hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033,
  943. dwFlags, &SPList );
  944. //--- Handle empty pronunciation
  945. if ( SUCCEEDED( hr ) &&
  946. !SPList.pFirstWordPronunciation )
  947. {
  948. if ( SPList.pvBuffer )
  949. {
  950. ::CoTaskMemFree( SPList.pvBuffer );
  951. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  952. }
  953. hr = SPERR_NOT_IN_LEX;
  954. }
  955. }
  956. if ( hr == SPERR_NOT_IN_LEX )
  957. {
  958. if ( m_fHaveNamesLTS &&
  959. !wcscmp( pPron->CustomLtsToken, L"Names" ) )
  960. {
  961. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE3, &SPList );
  962. }
  963. else
  964. {
  965. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE2, &SPList );
  966. }
  967. //--- Make all LTS words Nouns...
  968. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  969. pPronunciation = pPronunciation->pNextWordPronunciation )
  970. {
  971. pPronunciation->ePartOfSpeech = SPPS_Noun;
  972. }
  973. }
  974. if (SUCCEEDED(hr))
  975. {
  976. //--- WARNING - this assumes pronunciations will only come from one type of lexicon, an assumption
  977. //--- which was true as of July, 2000
  978. pPron->pronType = SPList.pFirstWordPronunciation->eLexiconType;
  979. //------------------------------------------------------------
  980. // SAPI unrolls pronunciations from their POS.
  981. // So roll them back into the original collapsed array
  982. // of one or two candidates with sorted POS (argh...)
  983. //------------------------------------------------------------
  984. SPWORDPRONUNCIATION *firstPron, *pCurPron, *pNextPron;
  985. //------------------------------------------
  986. // Init pronunciation A
  987. //------------------------------------------
  988. pCurPron = firstPron = SPList.pFirstWordPronunciation;
  989. pPron->pronArray[PRON_A].POScount = 1;
  990. //----------------------------
  991. // Get phoneme length
  992. //----------------------------
  993. cPhonLen = wcslen( firstPron->szPronunciation ) + 1; // include delimiter
  994. //----------------------------
  995. // Clip phoneme string to max
  996. //----------------------------
  997. if( cPhonLen > SP_MAX_PRON_LENGTH )
  998. {
  999. cPhonLen = SP_MAX_PRON_LENGTH;
  1000. }
  1001. //----------------------------
  1002. // Copy unicode phoneme string
  1003. //----------------------------
  1004. memcpy( pPron->pronArray[PRON_A].phon_Str, firstPron->szPronunciation, cPhonLen * sizeof(WCHAR) );
  1005. pPron->pronArray[PRON_A].phon_Len = cPhonLen -1; // minus delimiter
  1006. pPron->pronArray[PRON_A].POScode[0] = (ENGPARTOFSPEECH)firstPron->ePartOfSpeech;
  1007. //------------------------------------------
  1008. // Init pronunciation B
  1009. //------------------------------------------
  1010. pPron->pronArray[PRON_B].POScount = 0;
  1011. pPron->pronArray[PRON_B].phon_Len = 0;
  1012. pNextPron = pCurPron->pNextWordPronunciation;
  1013. while( pNextPron )
  1014. {
  1015. int isDiff;
  1016. isDiff = wcscmp( firstPron->szPronunciation, pNextPron->szPronunciation );
  1017. if( isDiff )
  1018. {
  1019. //------------------------------------------------
  1020. // Next pronunciation is different from 1st
  1021. //------------------------------------------------
  1022. if( pPron->pronArray[PRON_B].POScount < POS_MAX )
  1023. {
  1024. //---------------------------------------
  1025. // Gather POS B into array
  1026. //---------------------------------------
  1027. pPron->pronArray[PRON_B].POScode[pPron->pronArray[PRON_B].POScount] =
  1028. (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech;
  1029. pPron->pronArray[PRON_B].POScount++;
  1030. if( pPron->pronArray[PRON_B].phon_Len == 0 )
  1031. {
  1032. //-----------------------------------------
  1033. // If there's no B pron yet, make one
  1034. //-----------------------------------------
  1035. cPhonLen = wcslen( pNextPron->szPronunciation ) + 1; // include delimiter
  1036. //----------------------------
  1037. // Clip phoneme string to max
  1038. //----------------------------
  1039. if( cPhonLen > SP_MAX_PRON_LENGTH )
  1040. {
  1041. cPhonLen = SP_MAX_PRON_LENGTH;
  1042. }
  1043. //----------------------------
  1044. // Copy unicode phoneme string
  1045. //----------------------------
  1046. memcpy( pPron->pronArray[PRON_B].phon_Str,
  1047. pNextPron->szPronunciation,
  1048. cPhonLen * sizeof(WCHAR) );
  1049. pPron->pronArray[PRON_B].phon_Len = cPhonLen -1; // minus delimiter
  1050. pPron->hasAlt = true;
  1051. }
  1052. }
  1053. }
  1054. else
  1055. {
  1056. //------------------------------------------------
  1057. // Next pronunciation is same as 1st
  1058. //------------------------------------------------
  1059. if( pPron->pronArray[PRON_A].POScount < POS_MAX )
  1060. {
  1061. //---------------------------------------
  1062. // Gather POS A into array
  1063. //---------------------------------------
  1064. pPron->pronArray[PRON_A].POScode[pPron->pronArray[PRON_A].POScount] =
  1065. (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech;
  1066. pPron->pronArray[PRON_A].POScount++;
  1067. }
  1068. }
  1069. pCurPron = pNextPron;
  1070. pNextPron = pCurPron->pNextWordPronunciation;
  1071. }
  1072. }
  1073. //--- If XML POS provided, set selection now as it won't be touched by the POS Tagger
  1074. if ( pPron->XMLPartOfSpeech != MS_Unknown )
  1075. {
  1076. BOOL fMadeMatch = false;
  1077. //--- Check first pronunciation
  1078. for ( ULONG i = 0; i < pPron->pronArray[0].POScount; i++ )
  1079. {
  1080. if ( pPron->pronArray[0].POScode[i] == pPron->XMLPartOfSpeech )
  1081. {
  1082. pPron->altChoice = 0;
  1083. pPron->POSchoice = pPron->XMLPartOfSpeech;
  1084. fMadeMatch = true;
  1085. }
  1086. }
  1087. //--- Check second pronunciation
  1088. if ( pPron->hasAlt )
  1089. {
  1090. for ( ULONG i = 0; i < pPron->pronArray[1].POScount; i++ )
  1091. {
  1092. if ( pPron->pronArray[1].POScode[i] == pPron->XMLPartOfSpeech )
  1093. {
  1094. pPron->altChoice = 1;
  1095. pPron->POSchoice = pPron->XMLPartOfSpeech;
  1096. fMadeMatch = true;
  1097. }
  1098. }
  1099. }
  1100. //--- If this POS didn't exist for the word, let POS Tagger do its thing
  1101. //--- to determine a pronunciation, and then reassign the POS later...
  1102. if ( !fMadeMatch )
  1103. {
  1104. pPron->XMLPartOfSpeech = MS_Unknown;
  1105. pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0];
  1106. }
  1107. }
  1108. //--- Set default POS, for later refinement by POS Tagger
  1109. else
  1110. {
  1111. pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0];
  1112. pPron->altChoice = PRON_A;
  1113. }
  1114. if( SPList.pvBuffer )
  1115. {
  1116. ::CoTaskMemFree( SPList.pvBuffer );
  1117. }
  1118. return hr;
  1119. } /* Pronounce */
  1120. /*****************************************************************************
  1121. * CStdSentEnum::DetermineProns *
  1122. *------------------------------*
  1123. * Description:
  1124. * This method determines POS and looks up the pronounciation
  1125. ********************************************************************* MC ****/
  1126. HRESULT CStdSentEnum::DetermineProns( CItemList& ItemList, CSentItemMemory& MemoryManager )
  1127. {
  1128. SPDBG_FUNC( "CStdSentEnum::DetermineProns" );
  1129. HRESULT hr = S_OK;
  1130. ULONG cNumOfProns, cPronIndex;
  1131. PRONRECORD* pProns = NULL;
  1132. //--- Count the total number of pronunciations needed
  1133. cNumOfProns = 0;
  1134. SPLISTPOS ListPos = ItemList.GetHeadPosition();
  1135. while( ListPos )
  1136. {
  1137. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1138. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1139. {
  1140. if( Item.Words[i].pWordText &&
  1141. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1142. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1143. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1144. {
  1145. ++cNumOfProns;
  1146. }
  1147. }
  1148. }
  1149. if ( cNumOfProns )
  1150. {
  1151. pProns = new PRONRECORD[cNumOfProns];
  1152. if( !pProns )
  1153. {
  1154. hr = E_OUTOFMEMORY;
  1155. }
  1156. else
  1157. {
  1158. //--- First, get item pronunciation(s)
  1159. ZeroMemory( pProns, cNumOfProns * sizeof(PRONRECORD) );
  1160. cPronIndex = 0;
  1161. ListPos = ItemList.GetHeadPosition();
  1162. //--- Iterate through ItemList
  1163. while( ListPos && SUCCEEDED( hr ) )
  1164. {
  1165. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1166. //--- Iterate over Words
  1167. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1168. {
  1169. //--- Get pronunciations and parts of speech for spoken items only
  1170. if ( Item.Words[i].pWordText &&
  1171. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1172. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1173. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1174. {
  1175. SPDBG_ASSERT( cPronIndex < cNumOfProns );
  1176. ULONG cItemLen = Item.Words[i].ulWordLen;
  1177. //--- Clip at max text length
  1178. if( cItemLen > ( SP_MAX_WORD_LENGTH-1 ) )
  1179. {
  1180. cItemLen = SP_MAX_WORD_LENGTH - 1;
  1181. }
  1182. //--- Copy item text
  1183. memcpy( pProns[cPronIndex].orthStr,
  1184. Item.Words[i].pWordText,
  1185. cItemLen * sizeof(WCHAR) );
  1186. pProns[cPronIndex].orthStr[cItemLen] = 0;
  1187. //--- Set Part of Speech, if given in XML
  1188. if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown )
  1189. {
  1190. pProns[cPronIndex].XMLPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech;
  1191. }
  1192. //--- Copy CustomLtsToken string...
  1193. wcscpy( pProns[cPronIndex].CustomLtsToken, Item.CustomLtsToken );
  1194. //--- Do Lex Lookup, if necessary
  1195. if ( Item.Words[i].pXmlState->pPhoneIds == NULL ||
  1196. Item.Words[i].pXmlState->ePartOfSpeech == MS_Unknown )
  1197. {
  1198. //--- Special Case - Disambiguate Abbreviations
  1199. if ( Item.pItemInfo->Type == eABBREVIATION ||
  1200. Item.pItemInfo->Type == eABBREVIATION_NORMALIZE )
  1201. {
  1202. const AbbrevRecord *pAbbrevInfo =
  1203. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation;
  1204. if ( pAbbrevInfo->iPronDisambig < 0 )
  1205. {
  1206. //--- Default case - just take the first (and only) pronunciation
  1207. pProns[cPronIndex].pronArray[PRON_A].POScount = 1;
  1208. wcscpy( pProns[cPronIndex].pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1209. pProns[cPronIndex].pronArray[PRON_A].phon_Len =
  1210. wcslen( pProns[cPronIndex].pronArray[PRON_A].phon_Str );
  1211. pProns[cPronIndex].pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1212. pProns[cPronIndex].pronArray[PRON_B].POScount = 0;
  1213. pProns[cPronIndex].pronArray[PRON_B].phon_Len = 0;
  1214. pProns[cPronIndex].hasAlt = false;
  1215. pProns[cPronIndex].altChoice = PRON_A;
  1216. pProns[cPronIndex].POSchoice = pAbbrevInfo->POS1;
  1217. //--- Abbreviation table prons are basically just vendor lex prons...
  1218. pProns[cPronIndex].pronType = eLEXTYPE_PRIVATE1;
  1219. }
  1220. else
  1221. {
  1222. hr = ( this->*g_PronDisambigTable[pAbbrevInfo->iPronDisambig] )
  1223. ( pAbbrevInfo, &pProns[cPronIndex], ItemList, ListPos );
  1224. }
  1225. pProns[cPronIndex].fUsePron = true;
  1226. }
  1227. //--- Default case
  1228. else
  1229. {
  1230. //--- Check disambiguation list
  1231. const AbbrevRecord* pAbbrevRecord =
  1232. (AbbrevRecord*) bsearch( (void*) pProns[cPronIndex].orthStr, (void*) g_AmbiguousWordTable,
  1233. sp_countof( g_AmbiguousWordTable ), sizeof( AbbrevRecord ),
  1234. CompareStringAndAbbrevRecord );
  1235. if ( pAbbrevRecord )
  1236. {
  1237. hr = ( this->*g_AmbiguousWordDisambigTable[pAbbrevRecord->iPronDisambig] )
  1238. ( pAbbrevRecord, &pProns[cPronIndex], ItemList, ListPos );
  1239. pProns[cPronIndex].fUsePron = true;
  1240. }
  1241. //--- Do Lex Lookup, if necessary
  1242. else
  1243. {
  1244. hr = Pronounce( &pProns[cPronIndex] );
  1245. }
  1246. }
  1247. }
  1248. cPronIndex++;
  1249. }
  1250. }
  1251. }
  1252. if (SUCCEEDED(hr))
  1253. {
  1254. //--- Next, disambiguate part-of-speech
  1255. DisambiguatePOS( pProns, cNumOfProns );
  1256. //--- Output debugging information
  1257. TTSDBG_LOGPOSPOSSIBILITIES( pProns, cNumOfProns, STREAM_POSPOSSIBILITIES );
  1258. //--- Finally, copy selected pronunciation to 'ItemList'
  1259. PRONUNIT *selectedUnit;
  1260. cPronIndex = 0;
  1261. ListPos = ItemList.GetHeadPosition();
  1262. while( ListPos && SUCCEEDED(hr) )
  1263. {
  1264. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1265. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1266. {
  1267. //--- Set pronunciation and part-of-speech for spoken items only
  1268. if( Item.Words[i].pWordText &&
  1269. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1270. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1271. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1272. {
  1273. SPDBG_ASSERT( cPronIndex < cNumOfProns );
  1274. //--- Use XML specified pronunciation, if given.
  1275. if ( Item.Words[i].pXmlState->pPhoneIds )
  1276. {
  1277. Item.Words[i].pWordPron = Item.Words[i].pXmlState->pPhoneIds;
  1278. }
  1279. else
  1280. {
  1281. selectedUnit = &pProns[cPronIndex].pronArray[pProns[cPronIndex].altChoice];
  1282. Item.Words[i].pWordPron =
  1283. (SPPHONEID*) MemoryManager.GetMemory( (selectedUnit->phon_Len + 1) *
  1284. sizeof(SPPHONEID), &hr );
  1285. if ( SUCCEEDED( hr ) )
  1286. {
  1287. wcscpy( Item.Words[i].pWordPron, selectedUnit->phon_Str );
  1288. }
  1289. }
  1290. //--- Use XML specified part-of-speech, if given. This will override the case
  1291. //--- where the POS didn't exist as an option and the POS Tagger did its thing
  1292. //--- to find a pronunciation.
  1293. if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown )
  1294. {
  1295. Item.Words[i].eWordPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech;
  1296. }
  1297. else
  1298. {
  1299. Item.Words[i].eWordPartOfSpeech = pProns[cPronIndex].POSchoice;
  1300. }
  1301. //--- Root word
  1302. if ( pProns[cPronIndex].lemmaStr[0] )
  1303. {
  1304. Item.Words[i].ulLemmaLen = wcslen( pProns[cPronIndex].lemmaStr );
  1305. Item.Words[i].pLemma =
  1306. (WCHAR*) MemoryManager.GetMemory( Item.Words[i].ulLemmaLen * sizeof(WCHAR), &hr );
  1307. if ( SUCCEEDED( hr ) )
  1308. {
  1309. wcsncpy( (WCHAR*) Item.Words[i].pLemma, pProns[cPronIndex].lemmaStr,
  1310. Item.Words[i].ulLemmaLen );
  1311. }
  1312. }
  1313. //--- Insert pron in text, if appropriate - RAID #4746
  1314. if ( pProns[cPronIndex].fUsePron )
  1315. {
  1316. ULONG ulNumChars = wcslen( Item.Words[i].pWordPron );
  1317. Item.Words[i].pWordText =
  1318. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1319. if ( SUCCEEDED( hr ) )
  1320. {
  1321. ZeroMemory( (WCHAR*) Item.Words[i].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1322. (WCHAR) Item.Words[i].pWordText[0] = L'*';
  1323. wcscpy( ( (WCHAR*) Item.Words[i].pWordText + 1 ), Item.Words[i].pWordPron );
  1324. (WCHAR) Item.Words[i].pWordText[ ulNumChars + 1 ] = L'*';
  1325. Item.Words[i].ulWordLen = ulNumChars + 2;
  1326. }
  1327. }
  1328. cPronIndex++;
  1329. }
  1330. }
  1331. }
  1332. }
  1333. if ( SUCCEEDED( hr ) )
  1334. {
  1335. //--- Check Post POS disambiguation list
  1336. SPLISTPOS ListPos = ItemList.GetHeadPosition();
  1337. while ( ListPos && SUCCEEDED( hr ) )
  1338. {
  1339. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1340. if ( Item.pItemInfo->Type == eALPHA_WORD ||
  1341. Item.pItemInfo->Type == eABBREVIATION ||
  1342. Item.pItemInfo->Type == eABBREVIATION_NORMALIZE )
  1343. {
  1344. WCHAR temp;
  1345. BOOL fPeriod = false;
  1346. if ( Item.pItemSrcText[Item.ulItemSrcLen - 1] == L'.' &&
  1347. Item.ulItemSrcLen > 1 )
  1348. {
  1349. temp = Item.pItemSrcText[Item.ulItemSrcLen - 1];
  1350. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = 0;
  1351. fPeriod = true;
  1352. }
  1353. else
  1354. {
  1355. temp = Item.pItemSrcText[Item.ulItemSrcLen];
  1356. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = 0;
  1357. }
  1358. const AbbrevRecord* pAbbrevRecord =
  1359. (AbbrevRecord*) bsearch( (void*) Item.pItemSrcText, (void*) g_PostLexLookupWordTable,
  1360. sp_countof( g_PostLexLookupWordTable ), sizeof( AbbrevRecord ),
  1361. CompareStringAndAbbrevRecord );
  1362. if ( pAbbrevRecord )
  1363. {
  1364. hr = ( this->*g_PostLexLookupDisambigTable[pAbbrevRecord->iPronDisambig] )
  1365. ( pAbbrevRecord, ItemList, ListPos, MemoryManager );
  1366. }
  1367. if ( fPeriod )
  1368. {
  1369. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = temp;
  1370. }
  1371. else
  1372. {
  1373. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = temp;
  1374. }
  1375. }
  1376. }
  1377. }
  1378. }
  1379. }
  1380. if (pProns)
  1381. {
  1382. delete [] pProns;
  1383. }
  1384. return hr;
  1385. } /* CStdSentEnum::DetermineProns */
  1386. /***********************************************************************************************
  1387. * MeasurementDisambig *
  1388. *---------------------*
  1389. * Description:
  1390. * This overrides initial pronunciations of measurement abbreviations when they are used
  1391. * as modifiers - e.g. "a 7 ft. pole" vs. "the pole was 7 ft. long"
  1392. *
  1393. ********************************************************************* AH **********************/
  1394. HRESULT CStdSentEnum::MeasurementDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1395. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1396. {
  1397. SPDBG_FUNC( "CStdSentEnum::MeasurementDisambig" );
  1398. HRESULT hr = S_OK;
  1399. //--- Get previous two items
  1400. SPLISTPOS TempPos = ListPos;
  1401. if ( TempPos )
  1402. {
  1403. ItemList.GetPrev( TempPos );
  1404. if ( TempPos )
  1405. {
  1406. ItemList.GetPrev( TempPos );
  1407. if ( TempPos )
  1408. {
  1409. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  1410. //--- Previous must be a number
  1411. if ( TempItem.pItemInfo->Type == eNUM_CARDINAL )
  1412. {
  1413. //--- Get next item
  1414. TempPos = ListPos;
  1415. TempItem = ItemList.GetNext( TempPos );
  1416. //--- Next must be a noun or adj
  1417. if ( TempItem.eItemPartOfSpeech == MS_Noun )
  1418. {
  1419. //--- Matched a 7 ft. pole type example - go with singular
  1420. TempPos = ListPos;
  1421. ItemList.GetPrev( TempPos );
  1422. TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos );
  1423. //--- Singular will always be shorter than plural, so this should never overwrite
  1424. //--- anything...
  1425. wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1426. //--- Insert pron into word text - RAID #4746
  1427. ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron );
  1428. MeasurementItem.Words[0].pWordText =
  1429. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1430. if ( SUCCEEDED( hr ) )
  1431. {
  1432. ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1433. (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*';
  1434. wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron );
  1435. (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1436. MeasurementItem.Words[0].ulWordLen = ulNumChars + 2;
  1437. }
  1438. }
  1439. else if ( TempItem.eItemPartOfSpeech == MS_Adj &&
  1440. TempPos )
  1441. {
  1442. //--- Next must be a noun
  1443. TempItem = ItemList.GetNext( TempPos );
  1444. {
  1445. if ( TempItem.eItemPartOfSpeech == MS_Noun )
  1446. {
  1447. //--- Matched a 7 ft. pole type example - go with singular
  1448. TempPos = ListPos;
  1449. ItemList.GetPrev( TempPos );
  1450. TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos );
  1451. //--- Singular will always be shorter than plural, so this should never overwrite
  1452. //--- anything...
  1453. wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1454. //--- Insert pron into word text - RAID #4746
  1455. ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron );
  1456. MeasurementItem.Words[0].pWordText =
  1457. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1458. if ( SUCCEEDED( hr ) )
  1459. {
  1460. ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1461. (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*';
  1462. wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron );
  1463. (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1464. MeasurementItem.Words[0].ulWordLen = ulNumChars + 2;
  1465. }
  1466. }
  1467. }
  1468. }
  1469. }
  1470. }
  1471. }
  1472. }
  1473. return hr;
  1474. } /* MeasurementDisambig */
  1475. /***********************************************************************************************
  1476. * TheDisambig *
  1477. *-------------*
  1478. * Description:
  1479. * This function disambiguates the word the - before a vowel it becomes "thee", before a
  1480. * consonant it is "thuh"...
  1481. *
  1482. ********************************************************************* AH **********************/
  1483. HRESULT CStdSentEnum::TheDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1484. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1485. {
  1486. SPDBG_FUNC( "CStdSentEnum::TheDisambig" );
  1487. HRESULT hr = S_OK;
  1488. //--- Get next item
  1489. SPLISTPOS TempPos = ListPos;
  1490. if ( TempPos )
  1491. {
  1492. TTSSentItem NextItem = ItemList.GetNext( TempPos );
  1493. if ( NextItem.Words[0].pWordPron &&
  1494. bsearch( (void*) NextItem.Words[0].pWordPron, (void*) g_Vowels, sp_countof( g_Vowels ),
  1495. sizeof( WCHAR ), CompareWCHARAndWCHAR ) )
  1496. {
  1497. //--- Matched a vowel - go with / DH IY 1 /
  1498. TempPos = ListPos;
  1499. ItemList.GetPrev( TempPos );
  1500. TTSSentItem& TheItem = ItemList.GetPrev( TempPos );
  1501. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1502. //--- anything
  1503. wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1504. TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1505. //--- Insert pron into word text - RAID #4746
  1506. ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron );
  1507. TheItem.Words[0].pWordText =
  1508. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1509. if ( SUCCEEDED( hr ) )
  1510. {
  1511. ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1512. (WCHAR) TheItem.Words[0].pWordText[0] = L'*';
  1513. wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron );
  1514. (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1515. TheItem.Words[0].ulWordLen = ulNumChars + 2;
  1516. }
  1517. }
  1518. else
  1519. {
  1520. //--- Didn't match a vowel - go with / DH AX 1 /
  1521. TempPos = ListPos;
  1522. ItemList.GetPrev( TempPos );
  1523. TTSSentItem& TheItem = ItemList.GetPrev( TempPos );
  1524. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1525. //--- anything
  1526. wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1527. TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1528. //--- Insert pron into word text - RAID #4746
  1529. ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron );
  1530. TheItem.Words[0].pWordText =
  1531. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1532. if ( SUCCEEDED( hr ) )
  1533. {
  1534. ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1535. (WCHAR) TheItem.Words[0].pWordText[0] = L'*';
  1536. wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron );
  1537. (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1538. TheItem.Words[0].ulWordLen = ulNumChars + 2;
  1539. }
  1540. }
  1541. }
  1542. return hr;
  1543. } /* TheDisambig */
  1544. /***********************************************************************************************
  1545. * ADisambig *
  1546. *-----------*
  1547. * Description:
  1548. * This function disambiguates the word "a" - / EY 1 - Noun / vs. / AX - Det /
  1549. *
  1550. ********************************************************************* AH **********************/
  1551. HRESULT CStdSentEnum::ADisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList,
  1552. SPLISTPOS ListPos )
  1553. {
  1554. SPDBG_FUNC( "CStdSentEnum::ADisambig" );
  1555. HRESULT hr = S_OK;
  1556. BOOL fNoun = false;
  1557. //--- Get Current Item...
  1558. SPLISTPOS TempPos = ListPos;
  1559. if ( TempPos )
  1560. {
  1561. ItemList.GetPrev( TempPos );
  1562. if ( TempPos )
  1563. {
  1564. TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
  1565. //--- If "a" is part of a multi-word item, use the Noun pronunciation...
  1566. //--- If "a" is not an AlphaWord, use the Noun pronunciation...
  1567. if ( CurrentItem.ulNumWords > 1 ||
  1568. CurrentItem.pItemInfo->Type != eALPHA_WORD )
  1569. {
  1570. fNoun = true;
  1571. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1572. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1573. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1574. pPron->POSchoice = pAbbrevInfo->POS1;
  1575. }
  1576. }
  1577. }
  1578. if ( !fNoun )
  1579. {
  1580. //--- Get Next Item...
  1581. TempPos = ListPos;
  1582. if ( TempPos )
  1583. {
  1584. TTSSentItem NextItem = ItemList.GetNext( TempPos );
  1585. //--- If "a" is followed by punctuation, use the Noun pronunciation...
  1586. if ( !( NextItem.pItemInfo->Type & eWORDLIST_IS_VALID ) )
  1587. {
  1588. fNoun = true;
  1589. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1590. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1591. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1592. pPron->POSchoice = pAbbrevInfo->POS1;
  1593. }
  1594. }
  1595. }
  1596. //--- Default - use the Determiner pronunciation (but include Noun pronunciation as well,
  1597. //--- so that POS tagger rules will work properly)...
  1598. if ( !fNoun )
  1599. {
  1600. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1601. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1602. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1603. pPron->pronArray[PRON_A].POScount = 1;
  1604. pPron->POSchoice = pAbbrevInfo->POS2;
  1605. wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 );
  1606. pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str );
  1607. pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1;
  1608. pPron->pronArray[PRON_B].POScount = 1;
  1609. pPron->hasAlt = true;
  1610. }
  1611. return hr;
  1612. } /* ADisambig */
  1613. /***********************************************************************************************
  1614. * PolishDisambig *
  1615. *----------------*
  1616. * Description:
  1617. * This function disambiguates the word "polish" - [p ow 1 l - ax sh - Noun] vs.
  1618. * [p ow 1 l - ax sh - Adj] vs. [p aa 1 l - ih sh - Verb] vs. [p aa 1 l - ih sh - Noun]
  1619. *
  1620. ********************************************************************* AH **********************/
  1621. HRESULT CStdSentEnum::PolishDisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList,
  1622. SPLISTPOS ListPos )
  1623. {
  1624. SPDBG_FUNC( "CStdSentEnum::PolishDisambig" );
  1625. HRESULT hr = S_OK;
  1626. BOOL fMatch = false;
  1627. //--- Get Current Item...
  1628. SPLISTPOS TempPos = ListPos;
  1629. if ( TempPos )
  1630. {
  1631. ItemList.GetPrev( TempPos );
  1632. if ( TempPos )
  1633. {
  1634. TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
  1635. //--- If "Polish" is capitalized and not sentence-initial, and not preceded immediately
  1636. //--- by an open double-quote or parenthesis, use Noun...
  1637. if ( iswupper( CurrentItem.pItemSrcText[0] ) )
  1638. {
  1639. BOOL fSentenceInitial = false;
  1640. if ( !TempPos )
  1641. {
  1642. fSentenceInitial = true;
  1643. }
  1644. else
  1645. {
  1646. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  1647. if ( PrevItem.pItemInfo->Type == eOPEN_PARENTHESIS ||
  1648. PrevItem.pItemInfo->Type == eOPEN_BRACKET ||
  1649. PrevItem.pItemInfo->Type == eOPEN_BRACE ||
  1650. PrevItem.pItemInfo->Type == eSINGLE_QUOTE ||
  1651. PrevItem.pItemInfo->Type == eDOUBLE_QUOTE )
  1652. {
  1653. fSentenceInitial = true;
  1654. }
  1655. }
  1656. if ( fSentenceInitial )
  1657. {
  1658. fMatch = true;
  1659. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1660. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1661. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1662. pPron->POSchoice = pAbbrevInfo->POS2;
  1663. }
  1664. else
  1665. {
  1666. fMatch = true;
  1667. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1668. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1669. pPron->pronArray[PRON_A].POScode[0] = MS_Noun;
  1670. pPron->POSchoice = MS_Noun;
  1671. }
  1672. }
  1673. }
  1674. }
  1675. //--- Default - use the Verb pronunciation (but include the others as well,
  1676. //--- so that POS tagger rules will work properly)...
  1677. if ( !fMatch )
  1678. {
  1679. //--- Verb, Noun
  1680. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1681. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1682. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1683. pPron->pronArray[PRON_A].POScode[1] = pAbbrevInfo->POS3;
  1684. pPron->pronArray[PRON_A].POScount = 2;
  1685. //--- Adj
  1686. wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 );
  1687. pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str );
  1688. pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1;
  1689. pPron->pronArray[PRON_B].POScount = 1;
  1690. //--- Set initial choice to Verb...
  1691. pPron->POSchoice = pAbbrevInfo->POS2;
  1692. pPron->hasAlt = true;
  1693. }
  1694. return hr;
  1695. } /* PolishDisambig */
  1696. /***********************************************************************************************
  1697. * ReadDisambig *
  1698. *--------------*
  1699. * Description:
  1700. * This function disambiguates the word Read - past tense vs. present...
  1701. *
  1702. ********************************************************************* AH **********************/
  1703. HRESULT CStdSentEnum::ReadDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1704. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1705. {
  1706. SPDBG_FUNC( "CStdSentEnum::ReadDisambig" );
  1707. HRESULT hr = S_OK;
  1708. BOOL fMatch = false;
  1709. //--- Get prev item
  1710. SPLISTPOS TempPos = ListPos;
  1711. if ( TempPos )
  1712. {
  1713. ItemList.GetPrev( TempPos );
  1714. if ( TempPos )
  1715. {
  1716. ItemList.GetPrev( TempPos );
  1717. if ( TempPos )
  1718. {
  1719. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  1720. //--- Check for closest auxiliary
  1721. while ( PrevItem.Words[0].eWordPartOfSpeech != MS_VAux &&
  1722. PrevItem.Words[0].eWordPartOfSpeech != MS_Contr &&
  1723. TempPos )
  1724. {
  1725. PrevItem = ItemList.GetPrev( TempPos );
  1726. }
  1727. if ( PrevItem.Words[0].eWordPartOfSpeech == MS_VAux )
  1728. {
  1729. fMatch = true;
  1730. if ( wcsnicmp( PrevItem.Words[0].pWordText, L"have", 4 ) == 0 ||
  1731. wcsnicmp( PrevItem.Words[0].pWordText, L"has", 3 ) == 0 ||
  1732. wcsnicmp( PrevItem.Words[0].pWordText, L"had", 3 ) == 0 ||
  1733. wcsnicmp( PrevItem.Words[0].pWordText, L"am", 2 ) == 0 ||
  1734. wcsnicmp( PrevItem.Words[0].pWordText, L"ain't", 5 ) == 0 ||
  1735. wcsnicmp( PrevItem.Words[0].pWordText, L"are", 3 ) == 0 ||
  1736. wcsnicmp( PrevItem.Words[0].pWordText, L"aren't", 6 ) == 0 ||
  1737. wcsnicmp( PrevItem.Words[0].pWordText, L"be", 2 ) == 0 ||
  1738. wcsnicmp( PrevItem.Words[0].pWordText, L"is", 2 ) == 0 ||
  1739. wcsnicmp( PrevItem.Words[0].pWordText, L"was", 3 ) == 0 ||
  1740. wcsnicmp( PrevItem.Words[0].pWordText, L"were", 4 ) == 0 )
  1741. {
  1742. //--- Matched have or haven't (has or hasn't, had or hadn't) - go with "red"
  1743. TempPos = ListPos;
  1744. ItemList.GetPrev( TempPos );
  1745. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1746. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1747. //--- anything
  1748. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1749. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1750. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1751. //--- Insert pron into word text - RAID #4746
  1752. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1753. ReadItem.Words[0].pWordText =
  1754. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1755. if ( SUCCEEDED( hr ) )
  1756. {
  1757. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1758. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1759. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1760. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1761. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1762. }
  1763. }
  1764. else
  1765. {
  1766. //--- Some other auxiliary - go with "reed"
  1767. TempPos = ListPos;
  1768. ItemList.GetPrev( TempPos );
  1769. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1770. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1771. //--- anything
  1772. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1773. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1774. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1775. //--- Insert pron into word text - RAID #4746
  1776. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1777. ReadItem.Words[0].pWordText =
  1778. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1779. if ( SUCCEEDED( hr ) )
  1780. {
  1781. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1782. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1783. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1784. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1785. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1786. }
  1787. }
  1788. }
  1789. //--- Check for pronoun aux contractions
  1790. else if ( PrevItem.Words[0].eWordPartOfSpeech == MS_Contr )
  1791. {
  1792. fMatch = true;
  1793. const WCHAR *pApostrophe = wcsstr( PrevItem.Words[0].pWordText, L"'" );
  1794. if ( pApostrophe &&
  1795. wcsnicmp( pApostrophe, L"'ll", 3 ) == 0 )
  1796. {
  1797. //--- Matched an 'll form - go with "reed"
  1798. TempPos = ListPos;
  1799. ItemList.GetPrev( TempPos );
  1800. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1801. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1802. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1803. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1804. //--- Insert pron into word text - RAID #4746
  1805. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1806. ReadItem.Words[0].pWordText =
  1807. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1808. if ( SUCCEEDED( hr ) )
  1809. {
  1810. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1811. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1812. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1813. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1814. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1815. }
  1816. }
  1817. else
  1818. {
  1819. //--- Some other form - go with "red"
  1820. TempPos = ListPos;
  1821. ItemList.GetPrev( TempPos );
  1822. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1823. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1824. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1825. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1826. //--- Insert pron into word text - RAID #4746
  1827. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1828. ReadItem.Words[0].pWordText =
  1829. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1830. if ( SUCCEEDED( hr ) )
  1831. {
  1832. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1833. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1834. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1835. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1836. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1837. }
  1838. }
  1839. }
  1840. //--- Check for infinitival form
  1841. else
  1842. {
  1843. TempPos = ListPos;
  1844. ItemList.GetPrev( TempPos );
  1845. ItemList.GetPrev( TempPos );
  1846. PrevItem = ItemList.GetPrev( TempPos );
  1847. if ( PrevItem.Words[0].ulWordLen == 2 &&
  1848. wcsnicmp( PrevItem.Words[0].pWordText, L"to", 2 ) == 0 )
  1849. {
  1850. fMatch = true;
  1851. //--- Matched infinitival form - go with "reed"
  1852. TempPos = ListPos;
  1853. ItemList.GetPrev( TempPos );
  1854. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1855. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1856. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1857. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1858. //--- Insert pron into word text - RAID #4746
  1859. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1860. ReadItem.Words[0].pWordText =
  1861. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1862. if ( SUCCEEDED( hr ) )
  1863. {
  1864. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1865. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1866. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1867. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1868. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1869. }
  1870. }
  1871. }
  1872. }
  1873. //--- Sentence initial - go with "reed"
  1874. else
  1875. {
  1876. fMatch = true;
  1877. TempPos = ListPos;
  1878. ItemList.GetPrev( TempPos );
  1879. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1880. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1881. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1882. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1883. //--- Insert pron into word text - RAID #4746
  1884. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1885. ReadItem.Words[0].pWordText =
  1886. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1887. if ( SUCCEEDED( hr ) )
  1888. {
  1889. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1890. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1891. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1892. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1893. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1894. }
  1895. }
  1896. }
  1897. }
  1898. if ( !fMatch )
  1899. {
  1900. TempPos = ListPos;
  1901. ItemList.GetPrev( TempPos );
  1902. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1903. //--- Default - go with past tense...
  1904. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1905. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1906. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1907. //--- Insert pron into word text - RAID #4746
  1908. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1909. ReadItem.Words[0].pWordText =
  1910. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1911. if ( SUCCEEDED( hr ) )
  1912. {
  1913. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1914. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1915. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1916. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1917. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1918. }
  1919. }
  1920. return hr;
  1921. } /* ReadDisambig */