Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1996 lines
92 KiB

  1. /*******************************************************************************
  2. * Disambig.cpp *
  3. *--------------*
  4. * Description:
  5. * This module contains the methods to disambiguate part of speech and
  6. * select the correct pronounciation from the lexicon.
  7. *-------------------------------------------------------------------------------
  8. * Created By: EDC Date: 07/15/99
  9. * Copyright (C) 1999 Microsoft Corporation
  10. * All Rights Reserved
  11. *
  12. *******************************************************************************/
  13. //--- Additional includes
  14. #include "stdafx.h"
  15. #include "commonlx.h"
  16. #ifndef StdSentEnum_h
  17. #include "stdsentenum.h"
  18. #endif
  19. #include "spttsengdebug.h"
  20. /*****************************************************************************
  21. * TryPOSConversion *
  22. *------------------*
  23. *
  24. * Description:
  25. * Checks to see whether the argument PRONRECORD contains the argument
  26. * ENGPARTOFSPEECH as an option. If so, sets the PRONRECORD alternate
  27. * choice and part of speech choice, and returns true. If not, just returns
  28. * false without modifying the PRONRECORD at all.
  29. *
  30. ***************************************************************** AH *********/
  31. bool TryPOSConversion( PRONRECORD& pPron, ENGPARTOFSPEECH PartOfSpeech )
  32. {
  33. //--- Check first pronunciation
  34. for ( ULONG i = 0; i < pPron.pronArray[0].POScount; i++ )
  35. {
  36. if ( pPron.pronArray[0].POScode[i] == PartOfSpeech )
  37. {
  38. pPron.altChoice = 0;
  39. pPron.POSchoice = PartOfSpeech;
  40. return true;
  41. }
  42. }
  43. //--- Check second pronunciation
  44. if ( pPron.hasAlt )
  45. {
  46. for ( ULONG i = 0; i < pPron.pronArray[1].POScount; i++ )
  47. {
  48. if ( pPron.pronArray[1].POScode[i] == PartOfSpeech )
  49. {
  50. pPron.altChoice = 1;
  51. pPron.POSchoice = PartOfSpeech;
  52. return true;
  53. }
  54. }
  55. }
  56. return false;
  57. } /* TryPOS Conversion */
  58. /*****************************************************************************
  59. * DisambiguatePOS *
  60. *-----------------*
  61. *
  62. * Description:
  63. * Disambiguate parts of speech by applying patches in order... This
  64. * work is an implementation of Eric Brill's rule-based part of speech
  65. * tagger - see, for example:
  66. *
  67. * Brill, Eric. 1992. A simple rule-based part of speech tagger.
  68. * In Proceedings of the Third Conference on Applied Natural
  69. * Language Processing, ACL. Trento, Italy.
  70. *
  71. ***************************************************************** AH *********/
  72. void DisambiguatePOS( PRONRECORD *pProns, ULONG cNumOfWords )
  73. {
  74. SPDBG_FUNC( "DisambiguatePOS" );
  75. //--- Iterate over the patches, applying each (where applicable) to the
  76. //--- entire sentence. For each patch, iterate over each word in the
  77. //--- sentence to which the patch could apply (from left to right).
  78. for ( int i = 0; i < sp_countof( g_POSTaggerPatches ); i++ )
  79. {
  80. switch ( g_POSTaggerPatches[i].eTemplateType )
  81. {
  82. case PREV1T:
  83. {
  84. if ( cNumOfWords > 1 )
  85. {
  86. for ( ULONG j = 1; j < cNumOfWords; j++ )
  87. {
  88. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  89. {
  90. //--- If the current POS matches, and the previous POS matches, and
  91. //--- the conversion POS is a possibility for this word, convert the
  92. //--- POS.
  93. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  94. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  95. {
  96. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  97. }
  98. }
  99. }
  100. }
  101. }
  102. break;
  103. case NEXT1T:
  104. {
  105. if ( cNumOfWords > 1 )
  106. {
  107. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  108. {
  109. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  110. {
  111. //--- If the current POS matches, and the next POS matches, and
  112. //--- the conversion POS is a possibility for this word, convert the
  113. //--- POS.
  114. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  115. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  116. {
  117. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  118. }
  119. }
  120. }
  121. }
  122. }
  123. break;
  124. case PREV2T:
  125. {
  126. if ( cNumOfWords > 2 )
  127. {
  128. for ( ULONG j = 2; j < cNumOfWords; j++ )
  129. {
  130. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  131. {
  132. //--- If the current POS matches, and the POS two previous matches, and
  133. //--- the conversion POS is a possibility for this word, convert the POS.
  134. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  135. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  136. {
  137. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  138. }
  139. }
  140. }
  141. }
  142. }
  143. break;
  144. case NEXT2T:
  145. {
  146. if ( cNumOfWords > 2 )
  147. {
  148. for ( ULONG j = 0; j < cNumOfWords - 2; j++ )
  149. {
  150. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  151. {
  152. //--- If the current POS matches, and the POS two after matches, and
  153. //--- the conversion POS is a possibility for this word, convert the
  154. //--- POS.
  155. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  156. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  157. {
  158. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  159. }
  160. }
  161. }
  162. }
  163. }
  164. break;
  165. case PREV1OR2T:
  166. {
  167. if ( cNumOfWords > 2 )
  168. {
  169. for ( ULONG j = 1; j < cNumOfWords; j++ )
  170. {
  171. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  172. {
  173. //--- If the current POS matches, and the previous POS matches OR the
  174. //--- POS two previous matches, and the conversion POS is a possibility
  175. //--- for this word, convert the POS.
  176. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  177. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  178. ( j > 1 &&
  179. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  180. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  181. {
  182. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  183. }
  184. }
  185. }
  186. }
  187. }
  188. break;
  189. case NEXT1OR2T:
  190. {
  191. if ( cNumOfWords > 2 )
  192. {
  193. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  194. {
  195. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  196. {
  197. //--- If the current POS matches, and the next POS matches OR the POS
  198. //--- two after matches, and the conversion POS is a possibility for this
  199. //--- word, convert the POS.
  200. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  201. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  202. ( j < cNumOfWords - 2 &&
  203. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  204. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  205. {
  206. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  207. }
  208. }
  209. }
  210. }
  211. }
  212. break;
  213. case PREV1OR2OR3T:
  214. {
  215. if ( cNumOfWords > 3 )
  216. {
  217. for ( ULONG j = 1; j < cNumOfWords; j++ )
  218. {
  219. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  220. {
  221. //--- If the current POS matches, and the previous POS matches OR the
  222. //--- POS two previous matches OR the POS three previous matches, and
  223. //--- the conversion POS is a possibility for this word, convert the POS.
  224. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  225. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  226. ( j > 1 &&
  227. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  228. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  229. ( j > 2 &&
  230. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  231. pProns[j - 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  232. {
  233. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  234. }
  235. }
  236. }
  237. }
  238. }
  239. break;
  240. case NEXT1OR2OR3T:
  241. {
  242. if ( cNumOfWords > 3 )
  243. {
  244. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  245. {
  246. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  247. {
  248. //--- If the current POS matches, and the next POS matches OR the POS
  249. //--- two after matches OR the POS three after matches, and the conversion
  250. //--- POS is a possibility for this word, convert the POS.
  251. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  252. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  253. ( j < cNumOfWords - 2 &&
  254. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  255. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  256. ( j < cNumOfWords - 3 &&
  257. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  258. pProns[j + 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  259. {
  260. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  261. }
  262. }
  263. }
  264. }
  265. }
  266. break;
  267. case PREV1TNEXT1T:
  268. {
  269. if ( cNumOfWords > 2 )
  270. {
  271. for ( ULONG j = 1; j < cNumOfWords - 1; j++ )
  272. {
  273. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  274. {
  275. //--- If the current POS matches, and the next POS matches, and the
  276. //--- previous POS matches, and the conversion POS is a possibility
  277. //--- for this word, convert the POS.
  278. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  279. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  280. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  281. {
  282. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  283. }
  284. }
  285. }
  286. }
  287. }
  288. break;
  289. case PREV1TNEXT2T:
  290. {
  291. if ( cNumOfWords > 3 )
  292. {
  293. for ( ULONG j = 1; j < cNumOfWords - 2; j++ )
  294. {
  295. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  296. {
  297. //--- If the current POS matches, and the POS two after matches, and the
  298. //--- previous POS matches, and the conversion POS is a possibility
  299. //--- for this word, convert the POS.
  300. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  301. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  302. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  303. {
  304. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  305. }
  306. }
  307. }
  308. }
  309. }
  310. break;
  311. case PREV2TNEXT1T:
  312. {
  313. if ( cNumOfWords > 3 )
  314. {
  315. for ( ULONG j = 2; j < cNumOfWords - 1; j++ )
  316. {
  317. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  318. {
  319. //--- If the current POS matches, and the next POS matches, and the
  320. //--- POS two previous matches, and the conversion POS is a possibility
  321. //--- for this word, convert the POS.
  322. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  323. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  324. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  325. {
  326. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  327. }
  328. }
  329. }
  330. }
  331. }
  332. break;
  333. case CAP:
  334. {
  335. for ( ULONG j = 0; j < cNumOfWords; j++ )
  336. {
  337. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  338. {
  339. //--- If the current POS matches, and the word is capitalized, and the
  340. //--- conversion POS is a possibility for this word, convert the POS.
  341. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  342. iswupper( pProns[j].orthStr[0] ) )
  343. {
  344. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  345. }
  346. }
  347. }
  348. }
  349. break;
  350. case NOTCAP:
  351. {
  352. for ( ULONG j = 0; j < cNumOfWords; j++ )
  353. {
  354. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  355. {
  356. //--- If the current POS matches, and the word is not capitalized, and the
  357. //--- conversion POS is a possibility for this word, convert the POS.
  358. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  359. !iswupper( pProns[j].orthStr[0] ) )
  360. {
  361. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  362. }
  363. }
  364. }
  365. }
  366. break;
  367. case PREVCAP:
  368. {
  369. if ( cNumOfWords > 1 )
  370. {
  371. for ( ULONG j = 1; j < cNumOfWords; j++ )
  372. {
  373. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  374. {
  375. //--- If the current POS matches, and the previous word is capitalized,
  376. //--- and the conversion POS is a possibility for this word, convert the
  377. //--- POS.
  378. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  379. iswupper( pProns[j - 1].orthStr[0] ) )
  380. {
  381. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  382. }
  383. }
  384. }
  385. }
  386. }
  387. break;
  388. case PREVNOTCAP:
  389. {
  390. if ( cNumOfWords > 1 )
  391. {
  392. for ( ULONG j = 1; j < cNumOfWords; j++ )
  393. {
  394. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  395. {
  396. //--- If the current POS matches, and the word is capitalized, and the
  397. //--- conversion POS is a possibility for this word, convert the POS.
  398. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  399. !iswupper( pProns[j - 1].orthStr[0] ) )
  400. {
  401. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  402. }
  403. }
  404. }
  405. }
  406. }
  407. break;
  408. case PREV1W:
  409. {
  410. if ( cNumOfWords > 1 )
  411. {
  412. for ( ULONG j = 1; j < cNumOfWords; j++ )
  413. {
  414. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  415. {
  416. //--- If the current POS matches, and the previous word matches, and the
  417. //--- conversion POS is a possibility for this word, convert the POS.
  418. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  419. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  420. {
  421. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  422. }
  423. }
  424. }
  425. }
  426. }
  427. break;
  428. case NEXT1W:
  429. {
  430. if ( cNumOfWords > 1 )
  431. {
  432. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  433. {
  434. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  435. {
  436. //--- If the current POS matches, and the next word matches, and the
  437. //--- conversion POS is a possibility for this word, convert the POS.
  438. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  439. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  440. {
  441. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  442. }
  443. }
  444. }
  445. }
  446. }
  447. break;
  448. case PREV2W:
  449. {
  450. if ( cNumOfWords > 2 )
  451. {
  452. for ( ULONG j = 2; j < cNumOfWords; j++ )
  453. {
  454. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  455. {
  456. //--- If the current POS matches, and the word two previous matches, and the
  457. //--- conversion POS is a possibility for this word, convert the POS.
  458. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  459. _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  460. {
  461. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  462. }
  463. }
  464. }
  465. }
  466. }
  467. break;
  468. case NEXT2W:
  469. {
  470. if ( cNumOfWords > 2 )
  471. {
  472. for ( ULONG j = 0; j < cNumOfWords - 2; j++ )
  473. {
  474. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  475. {
  476. //--- If the current POS matches, and the word two after matches, and the
  477. //--- conversion POS is a possibility for this word, convert the POS.
  478. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  479. _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  480. {
  481. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  482. }
  483. }
  484. }
  485. }
  486. }
  487. break;
  488. case PREV1OR2W:
  489. {
  490. if ( cNumOfWords > 2 )
  491. {
  492. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  493. {
  494. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  495. {
  496. //--- If the current POS matches, and the previous word OR the word two
  497. //--- previous matches, and the conversion POS is a possibility for this word,
  498. //--- convert the POS.
  499. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  500. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ||
  501. ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  502. _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) )
  503. {
  504. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  505. }
  506. }
  507. }
  508. }
  509. }
  510. break;
  511. case NEXT1OR2W:
  512. {
  513. if ( cNumOfWords > 1 )
  514. {
  515. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  516. {
  517. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  518. {
  519. //--- If the current POS matches, and the next word matches OR the word two after
  520. //--- matches, and the conversion POS is a possibility for this word, convert the
  521. //--- POS.
  522. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  523. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ||
  524. ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  525. _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) )
  526. {
  527. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  528. }
  529. }
  530. }
  531. }
  532. }
  533. break;
  534. case CURRWPREV1W:
  535. {
  536. if ( cNumOfWords > 1 )
  537. {
  538. for ( ULONG j = 1; j < cNumOfWords; j++ )
  539. {
  540. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  541. {
  542. //--- If the current POS matches, and the current word matches, and the previous
  543. //--- word matches, and the conversion POS is a possibility for this word, convert
  544. //--- the POS.
  545. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  546. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  547. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  548. {
  549. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  550. }
  551. }
  552. }
  553. }
  554. }
  555. break;
  556. case CURRWNEXT1W:
  557. {
  558. if ( cNumOfWords > 1 )
  559. {
  560. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  561. {
  562. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  563. {
  564. //--- If the current POS matches, and the current word matches, and the next
  565. //--- word matches, and the conversion POS is a possibility for this word, convert
  566. //--- the POS.
  567. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  568. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  569. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  570. {
  571. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  572. }
  573. }
  574. }
  575. }
  576. }
  577. break;
  578. case CURRWPREV1T:
  579. {
  580. if ( cNumOfWords > 1 )
  581. {
  582. for ( ULONG j = 1; j < cNumOfWords; j++ )
  583. {
  584. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  585. {
  586. //--- If the current POS matches, and the current word matches, and the previous
  587. //--- POS matches, and the conversion POS is a possibility for this word, convert
  588. //--- the POS.
  589. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  590. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  591. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  592. {
  593. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  594. }
  595. }
  596. }
  597. }
  598. }
  599. break;
  600. case CURRWNEXT1T:
  601. {
  602. if ( cNumOfWords > 1 )
  603. {
  604. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  605. {
  606. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  607. {
  608. //--- If the current POS matches, and the current word matches, and the next
  609. //--- POS matches, and the conversion POS is a possibility for this word, convert
  610. //--- the POS.
  611. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  612. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  613. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  614. {
  615. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  616. }
  617. }
  618. }
  619. }
  620. }
  621. break;
  622. case CURRW:
  623. {
  624. for ( ULONG j = 0; j < cNumOfWords; j++ )
  625. {
  626. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  627. {
  628. //--- If the current POS matches, and the current word matches, and the
  629. //--- conversion POS is a possibility for this word, convert the POS.
  630. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  631. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  632. {
  633. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ) ;
  634. }
  635. }
  636. }
  637. }
  638. break;
  639. case PREV1WT:
  640. {
  641. if ( cNumOfWords > 1 )
  642. {
  643. for ( ULONG j = 1; j < cNumOfWords; j++ )
  644. {
  645. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  646. {
  647. //--- If the current POS matches, and the previous word and POS match, and
  648. //--- the conversion POS is a possibility for this word, convert the POS.
  649. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  650. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  651. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  652. {
  653. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  654. }
  655. }
  656. }
  657. }
  658. }
  659. break;
  660. case NEXT1WT:
  661. {
  662. if ( cNumOfWords > 1 )
  663. {
  664. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  665. {
  666. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  667. {
  668. //--- If the current POS matches, and the next word and POS match, and
  669. //--- the conversion POS is a possibility for this word, convert the POS.
  670. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  671. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  672. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  673. {
  674. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  675. }
  676. }
  677. }
  678. }
  679. }
  680. break;
  681. case CURRWPREV1WT:
  682. {
  683. if ( cNumOfWords > 1 )
  684. {
  685. for ( ULONG j = 1; j < cNumOfWords; j++ )
  686. {
  687. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  688. {
  689. //--- If the current POS matches, and the current words matches, and the
  690. //--- previous word and POS match, and the conversion POS is a possibility
  691. //--- for this word, convert the POS.
  692. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  693. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  694. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  695. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  696. {
  697. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  698. }
  699. }
  700. }
  701. }
  702. }
  703. break;
  704. case CURRWNEXT1WT:
  705. {
  706. if ( cNumOfWords > 1 )
  707. {
  708. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  709. {
  710. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  711. {
  712. //--- If the current POS matches, and the current words matches, and the
  713. //--- next word and POS match, and the conversion POS is a possibility
  714. //--- for this word, convert the POS.
  715. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  716. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  717. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  718. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  719. {
  720. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  721. }
  722. }
  723. }
  724. }
  725. }
  726. break;
  727. }
  728. }
  729. } /* DisambiguatePOS */
  730. /*****************************************************************************
  731. * Pronounce *
  732. *-----------*
  733. * Description:
  734. * Get lexicon or letter-to-sound (LTS) pronunciations
  735. *
  736. ********************************************************************** MC ***/
  737. HRESULT CStdSentEnum::Pronounce( PRONRECORD *pPron )
  738. {
  739. SPDBG_FUNC( "Pronounce" );
  740. SPWORDPRONUNCIATIONLIST SPList;
  741. HRESULT hr = SPERR_NOT_IN_LEX;
  742. ULONG cPhonLen;
  743. DWORD dwFlags = eLEXTYPE_USER | eLEXTYPE_APP | eLEXTYPE_PRIVATE1 | eLEXTYPE_PRIVATE2;
  744. BOOL fPOSExists = false;
  745. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  746. //--- Special Case - XML Provided Part Of Speech. Search for exact match first...
  747. if ( pPron->XMLPartOfSpeech != MS_Unknown )
  748. {
  749. //--- Try User Lexicon
  750. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
  751. if ( SUCCEEDED( hr ) &&
  752. SPList.pFirstWordPronunciation )
  753. {
  754. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  755. pPronunciation = pPronunciation->pNextWordPronunciation )
  756. {
  757. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  758. {
  759. fPOSExists = true;
  760. break;
  761. }
  762. }
  763. if ( !fPOSExists )
  764. {
  765. if ( SPList.pvBuffer )
  766. {
  767. ::CoTaskMemFree( SPList.pvBuffer );
  768. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  769. }
  770. }
  771. }
  772. //--- Handle empty pronunciation
  773. else if ( !SPList.pFirstWordPronunciation )
  774. {
  775. if ( SPList.pvBuffer )
  776. {
  777. ::CoTaskMemFree( SPList.pvBuffer );
  778. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  779. }
  780. hr = SPERR_NOT_IN_LEX;
  781. }
  782. //--- Try App Lexicon
  783. if ( !fPOSExists )
  784. {
  785. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
  786. if ( SUCCEEDED( hr ) &&
  787. SPList.pFirstWordPronunciation )
  788. {
  789. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  790. pPronunciation = pPronunciation->pNextWordPronunciation )
  791. {
  792. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  793. {
  794. fPOSExists = true;
  795. break;
  796. }
  797. }
  798. if ( !fPOSExists )
  799. {
  800. if ( SPList.pvBuffer )
  801. {
  802. ::CoTaskMemFree( SPList.pvBuffer );
  803. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  804. }
  805. }
  806. }
  807. //--- Handle empty pronunciation
  808. else if ( !SPList.pFirstWordPronunciation )
  809. {
  810. if ( SPList.pvBuffer )
  811. {
  812. ::CoTaskMemFree( SPList.pvBuffer );
  813. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  814. }
  815. hr = SPERR_NOT_IN_LEX;
  816. }
  817. }
  818. //--- Try Vendor Lexicon
  819. if ( !fPOSExists )
  820. {
  821. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
  822. if ( SUCCEEDED( hr ) &&
  823. SPList.pFirstWordPronunciation )
  824. {
  825. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  826. pPronunciation = pPronunciation->pNextWordPronunciation )
  827. {
  828. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  829. {
  830. fPOSExists = true;
  831. break;
  832. }
  833. }
  834. if ( !fPOSExists )
  835. {
  836. if ( SPList.pvBuffer )
  837. {
  838. ::CoTaskMemFree( SPList.pvBuffer );
  839. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  840. }
  841. }
  842. }
  843. //--- Handle empty pronunciation
  844. else if ( !SPList.pFirstWordPronunciation )
  845. {
  846. if ( SPList.pvBuffer )
  847. {
  848. ::CoTaskMemFree( SPList.pvBuffer );
  849. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  850. }
  851. hr = SPERR_NOT_IN_LEX;
  852. }
  853. }
  854. //--- Try Morph Lexicon
  855. if ( !fPOSExists )
  856. {
  857. hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033, dwFlags, &SPList );
  858. if ( SUCCEEDED( hr ) &&
  859. SPList.pFirstWordPronunciation )
  860. {
  861. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  862. pPronunciation = pPronunciation->pNextWordPronunciation )
  863. {
  864. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  865. {
  866. fPOSExists = true;
  867. break;
  868. }
  869. }
  870. if ( !fPOSExists )
  871. {
  872. //--- Need to do this the last time, to make sure we hit the default code below...
  873. //--- RAID 5078
  874. hr = SPERR_NOT_IN_LEX;
  875. if ( SPList.pvBuffer )
  876. {
  877. ::CoTaskMemFree( SPList.pvBuffer );
  878. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  879. }
  880. }
  881. }
  882. //--- Handle empty pronunciation
  883. else if ( !SPList.pFirstWordPronunciation )
  884. {
  885. if ( SPList.pvBuffer )
  886. {
  887. ::CoTaskMemFree( SPList.pvBuffer );
  888. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  889. }
  890. hr = SPERR_NOT_IN_LEX;
  891. }
  892. }
  893. }
  894. //--- Default case - just look up orthography and go with first match.
  895. if ( hr == SPERR_NOT_IN_LEX )
  896. {
  897. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
  898. //--- Handle empty pronunciation
  899. if ( SUCCEEDED( hr ) &&
  900. !SPList.pFirstWordPronunciation )
  901. {
  902. if ( SPList.pvBuffer )
  903. {
  904. ::CoTaskMemFree( SPList.pvBuffer );
  905. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  906. }
  907. hr = SPERR_NOT_IN_LEX;
  908. }
  909. }
  910. if ( hr == SPERR_NOT_IN_LEX )
  911. {
  912. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
  913. //--- Handle empty pronunciation
  914. if ( SUCCEEDED( hr ) &&
  915. !SPList.pFirstWordPronunciation )
  916. {
  917. if ( SPList.pvBuffer )
  918. {
  919. ::CoTaskMemFree( SPList.pvBuffer );
  920. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  921. }
  922. hr = SPERR_NOT_IN_LEX;
  923. }
  924. }
  925. if ( hr == SPERR_NOT_IN_LEX )
  926. {
  927. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
  928. //--- Handle empty pronunciation
  929. if ( SUCCEEDED( hr ) &&
  930. !SPList.pFirstWordPronunciation )
  931. {
  932. if ( SPList.pvBuffer )
  933. {
  934. ::CoTaskMemFree( SPList.pvBuffer );
  935. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  936. }
  937. hr = SPERR_NOT_IN_LEX;
  938. }
  939. }
  940. if ( hr == SPERR_NOT_IN_LEX )
  941. {
  942. hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033,
  943. dwFlags, &SPList );
  944. //--- Handle empty pronunciation
  945. if ( SUCCEEDED( hr ) &&
  946. !SPList.pFirstWordPronunciation )
  947. {
  948. if ( SPList.pvBuffer )
  949. {
  950. ::CoTaskMemFree( SPList.pvBuffer );
  951. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  952. }
  953. hr = SPERR_NOT_IN_LEX;
  954. }
  955. }
  956. if ( hr == SPERR_NOT_IN_LEX )
  957. {
  958. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE2, &SPList );
  959. //--- Make all LTS words Nouns...
  960. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  961. pPronunciation = pPronunciation->pNextWordPronunciation )
  962. {
  963. pPronunciation->ePartOfSpeech = SPPS_Noun;
  964. }
  965. }
  966. if (SUCCEEDED(hr))
  967. {
  968. //--- WARNING - this assumes pronunciations will only come from one type of lexicon, an assumption
  969. //--- which was true as of July, 2000
  970. pPron->pronType = SPList.pFirstWordPronunciation->eLexiconType;
  971. //------------------------------------------------------------
  972. // SAPI unrolls pronunciations from their POS.
  973. // So roll them back into the original collapsed array
  974. // of one or two candidates with sorted POS (argh...)
  975. //------------------------------------------------------------
  976. SPWORDPRONUNCIATION *firstPron, *pCurPron, *pNextPron;
  977. //------------------------------------------
  978. // Init pronunciation A
  979. //------------------------------------------
  980. pCurPron = firstPron = SPList.pFirstWordPronunciation;
  981. pPron->pronArray[PRON_A].POScount = 1;
  982. //----------------------------
  983. // Get phoneme length
  984. //----------------------------
  985. cPhonLen = wcslen( firstPron->szPronunciation ) + 1; // include delimiter
  986. //----------------------------
  987. // Clip phoneme string to max
  988. //----------------------------
  989. if( cPhonLen > SP_MAX_PRON_LENGTH )
  990. {
  991. cPhonLen = SP_MAX_PRON_LENGTH;
  992. }
  993. //----------------------------
  994. // Copy unicode phoneme string
  995. //----------------------------
  996. memcpy( pPron->pronArray[PRON_A].phon_Str, firstPron->szPronunciation, cPhonLen * sizeof(WCHAR) );
  997. pPron->pronArray[PRON_A].phon_Len = cPhonLen -1; // minus delimiter
  998. pPron->pronArray[PRON_A].POScode[0] = (ENGPARTOFSPEECH)firstPron->ePartOfSpeech;
  999. //------------------------------------------
  1000. // Init pronunciation B
  1001. //------------------------------------------
  1002. pPron->pronArray[PRON_B].POScount = 0;
  1003. pPron->pronArray[PRON_B].phon_Len = 0;
  1004. pNextPron = pCurPron->pNextWordPronunciation;
  1005. while( pNextPron )
  1006. {
  1007. int isDiff;
  1008. isDiff = wcscmp( firstPron->szPronunciation, pNextPron->szPronunciation );
  1009. if( isDiff )
  1010. {
  1011. //------------------------------------------------
  1012. // Next pronunciation is different from 1st
  1013. //------------------------------------------------
  1014. if( pPron->pronArray[PRON_B].POScount < POS_MAX )
  1015. {
  1016. //---------------------------------------
  1017. // Gather POS B into array
  1018. //---------------------------------------
  1019. pPron->pronArray[PRON_B].POScode[pPron->pronArray[PRON_B].POScount] =
  1020. (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech;
  1021. pPron->pronArray[PRON_B].POScount++;
  1022. if( pPron->pronArray[PRON_B].phon_Len == 0 )
  1023. {
  1024. //-----------------------------------------
  1025. // If there's no B pron yet, make one
  1026. //-----------------------------------------
  1027. cPhonLen = wcslen( pNextPron->szPronunciation ) + 1; // include delimiter
  1028. //----------------------------
  1029. // Clip phoneme string to max
  1030. //----------------------------
  1031. if( cPhonLen > SP_MAX_PRON_LENGTH )
  1032. {
  1033. cPhonLen = SP_MAX_PRON_LENGTH;
  1034. }
  1035. //----------------------------
  1036. // Copy unicode phoneme string
  1037. //----------------------------
  1038. memcpy( pPron->pronArray[PRON_B].phon_Str,
  1039. pNextPron->szPronunciation,
  1040. cPhonLen * sizeof(WCHAR) );
  1041. pPron->pronArray[PRON_B].phon_Len = cPhonLen -1; // minus delimiter
  1042. pPron->hasAlt = true;
  1043. }
  1044. }
  1045. }
  1046. else
  1047. {
  1048. //------------------------------------------------
  1049. // Next pronunciation is same as 1st
  1050. //------------------------------------------------
  1051. if( pPron->pronArray[PRON_A].POScount < POS_MAX )
  1052. {
  1053. //---------------------------------------
  1054. // Gather POS A into array
  1055. //---------------------------------------
  1056. pPron->pronArray[PRON_A].POScode[pPron->pronArray[PRON_A].POScount] =
  1057. (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech;
  1058. pPron->pronArray[PRON_A].POScount++;
  1059. }
  1060. }
  1061. pCurPron = pNextPron;
  1062. pNextPron = pCurPron->pNextWordPronunciation;
  1063. }
  1064. }
  1065. //--- If XML POS provided, set selection now as it won't be touched by the POS Tagger
  1066. if ( pPron->XMLPartOfSpeech != MS_Unknown )
  1067. {
  1068. BOOL fMadeMatch = false;
  1069. //--- Check first pronunciation
  1070. for ( ULONG i = 0; i < pPron->pronArray[0].POScount; i++ )
  1071. {
  1072. if ( pPron->pronArray[0].POScode[i] == pPron->XMLPartOfSpeech )
  1073. {
  1074. pPron->altChoice = 0;
  1075. pPron->POSchoice = pPron->XMLPartOfSpeech;
  1076. fMadeMatch = true;
  1077. }
  1078. }
  1079. //--- Check second pronunciation
  1080. if ( pPron->hasAlt )
  1081. {
  1082. for ( ULONG i = 0; i < pPron->pronArray[1].POScount; i++ )
  1083. {
  1084. if ( pPron->pronArray[1].POScode[i] == pPron->XMLPartOfSpeech )
  1085. {
  1086. pPron->altChoice = 1;
  1087. pPron->POSchoice = pPron->XMLPartOfSpeech;
  1088. fMadeMatch = true;
  1089. }
  1090. }
  1091. }
  1092. //--- If this POS didn't exist for the word, let POS Tagger do its thing
  1093. //--- to determine a pronunciation, and then reassign the POS later...
  1094. if ( !fMadeMatch )
  1095. {
  1096. pPron->XMLPartOfSpeech = MS_Unknown;
  1097. pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0];
  1098. }
  1099. }
  1100. //--- Set default POS, for later refinement by POS Tagger
  1101. else
  1102. {
  1103. pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0];
  1104. pPron->altChoice = PRON_A;
  1105. }
  1106. if( SPList.pvBuffer )
  1107. {
  1108. ::CoTaskMemFree( SPList.pvBuffer );
  1109. }
  1110. return hr;
  1111. } /* Pronounce */
  1112. /*****************************************************************************
  1113. * CStdSentEnum::DetermineProns *
  1114. *------------------------------*
  1115. * Description:
  1116. * This method determines POS and looks up the pronounciation
  1117. ********************************************************************* MC ****/
  1118. HRESULT CStdSentEnum::DetermineProns( CItemList& ItemList, CSentItemMemory& MemoryManager )
  1119. {
  1120. SPDBG_FUNC( "CStdSentEnum::DetermineProns" );
  1121. HRESULT hr = S_OK;
  1122. ULONG cNumOfProns, cPronIndex;
  1123. PRONRECORD* pProns = NULL;
  1124. //--- Count the total number of pronunciations needed
  1125. cNumOfProns = 0;
  1126. SPLISTPOS ListPos = ItemList.GetHeadPosition();
  1127. while( ListPos )
  1128. {
  1129. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1130. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1131. {
  1132. if( Item.Words[i].pWordText &&
  1133. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1134. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1135. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1136. {
  1137. ++cNumOfProns;
  1138. }
  1139. }
  1140. }
  1141. if ( cNumOfProns )
  1142. {
  1143. pProns = new PRONRECORD[cNumOfProns];
  1144. if( !pProns )
  1145. {
  1146. hr = E_OUTOFMEMORY;
  1147. }
  1148. else
  1149. {
  1150. //--- First, get item pronunciation(s)
  1151. ZeroMemory( pProns, cNumOfProns * sizeof(PRONRECORD) );
  1152. cPronIndex = 0;
  1153. ListPos = ItemList.GetHeadPosition();
  1154. //--- Iterate through ItemList
  1155. while( ListPos && SUCCEEDED( hr ) )
  1156. {
  1157. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1158. //--- Iterate over Words
  1159. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1160. {
  1161. //--- Get pronunciations and parts of speech for spoken items only
  1162. if ( Item.Words[i].pWordText &&
  1163. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1164. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1165. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1166. {
  1167. SPDBG_ASSERT( cPronIndex < cNumOfProns );
  1168. ULONG cItemLen = Item.Words[i].ulWordLen;
  1169. //--- Clip at max text length
  1170. if( cItemLen > ( SP_MAX_WORD_LENGTH-1 ) )
  1171. {
  1172. cItemLen = SP_MAX_WORD_LENGTH - 1;
  1173. }
  1174. //--- Copy item text
  1175. memcpy( pProns[cPronIndex].orthStr,
  1176. Item.Words[i].pWordText,
  1177. cItemLen * sizeof(WCHAR) );
  1178. pProns[cPronIndex].orthStr[cItemLen] = 0;
  1179. //--- Set Part of Speech, if given in XML
  1180. if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown )
  1181. {
  1182. pProns[cPronIndex].XMLPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech;
  1183. }
  1184. //--- Do Lex Lookup, if necessary
  1185. if ( Item.Words[i].pXmlState->pPhoneIds == NULL ||
  1186. Item.Words[i].pXmlState->ePartOfSpeech == MS_Unknown )
  1187. {
  1188. //--- Special Case - Disambiguate Abbreviations
  1189. if ( Item.pItemInfo->Type == eABBREVIATION ||
  1190. Item.pItemInfo->Type == eABBREVIATION_NORMALIZE )
  1191. {
  1192. const AbbrevRecord *pAbbrevInfo =
  1193. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation;
  1194. if ( pAbbrevInfo->iPronDisambig < 0 )
  1195. {
  1196. //--- Default case - just take the first (and only) pronunciation
  1197. pProns[cPronIndex].pronArray[PRON_A].POScount = 1;
  1198. wcscpy( pProns[cPronIndex].pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1199. pProns[cPronIndex].pronArray[PRON_A].phon_Len =
  1200. wcslen( pProns[cPronIndex].pronArray[PRON_A].phon_Str );
  1201. pProns[cPronIndex].pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1202. pProns[cPronIndex].pronArray[PRON_B].POScount = 0;
  1203. pProns[cPronIndex].pronArray[PRON_B].phon_Len = 0;
  1204. pProns[cPronIndex].hasAlt = false;
  1205. pProns[cPronIndex].altChoice = PRON_A;
  1206. pProns[cPronIndex].POSchoice = pAbbrevInfo->POS1;
  1207. //--- Abbreviation table prons are basically just vendor lex prons...
  1208. pProns[cPronIndex].pronType = eLEXTYPE_PRIVATE1;
  1209. }
  1210. else
  1211. {
  1212. hr = ( this->*g_PronDisambigTable[pAbbrevInfo->iPronDisambig] )
  1213. ( pAbbrevInfo, &pProns[cPronIndex], ItemList, ListPos );
  1214. }
  1215. pProns[cPronIndex].fUsePron = true;
  1216. }
  1217. //--- Default case
  1218. else
  1219. {
  1220. //--- Check disambiguation list
  1221. const AbbrevRecord* pAbbrevRecord =
  1222. (AbbrevRecord*) bsearch( (void*) pProns[cPronIndex].orthStr, (void*) g_AmbiguousWordTable,
  1223. sp_countof( g_AmbiguousWordTable ), sizeof( AbbrevRecord ),
  1224. CompareStringAndAbbrevRecord );
  1225. if ( pAbbrevRecord )
  1226. {
  1227. hr = ( this->*g_AmbiguousWordDisambigTable[pAbbrevRecord->iPronDisambig] )
  1228. ( pAbbrevRecord, &pProns[cPronIndex], ItemList, ListPos );
  1229. pProns[cPronIndex].fUsePron = true;
  1230. }
  1231. //--- Do Lex Lookup, if necessary
  1232. else
  1233. {
  1234. hr = Pronounce( &pProns[cPronIndex] );
  1235. }
  1236. }
  1237. }
  1238. cPronIndex++;
  1239. }
  1240. }
  1241. }
  1242. if (SUCCEEDED(hr))
  1243. {
  1244. //--- Next, disambiguate part-of-speech
  1245. DisambiguatePOS( pProns, cNumOfProns );
  1246. //--- Output debugging information
  1247. TTSDBG_LOGPOSPOSSIBILITIES( pProns, cNumOfProns, STREAM_POSPOSSIBILITIES );
  1248. //--- Finally, copy selected pronunciation to 'ItemList'
  1249. PRONUNIT *selectedUnit;
  1250. cPronIndex = 0;
  1251. ListPos = ItemList.GetHeadPosition();
  1252. while( ListPos && SUCCEEDED(hr) )
  1253. {
  1254. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1255. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1256. {
  1257. //--- Set pronunciation and part-of-speech for spoken items only
  1258. if( Item.Words[i].pWordText &&
  1259. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1260. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1261. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1262. {
  1263. SPDBG_ASSERT( cPronIndex < cNumOfProns );
  1264. //--- Use XML specified pronunciation, if given.
  1265. if ( Item.Words[i].pXmlState->pPhoneIds )
  1266. {
  1267. Item.Words[i].pWordPron = Item.Words[i].pXmlState->pPhoneIds;
  1268. }
  1269. else
  1270. {
  1271. selectedUnit = &pProns[cPronIndex].pronArray[pProns[cPronIndex].altChoice];
  1272. Item.Words[i].pWordPron =
  1273. (SPPHONEID*) MemoryManager.GetMemory( (selectedUnit->phon_Len + 1) *
  1274. sizeof(SPPHONEID), &hr );
  1275. if ( SUCCEEDED( hr ) )
  1276. {
  1277. wcscpy( Item.Words[i].pWordPron, selectedUnit->phon_Str );
  1278. }
  1279. }
  1280. //--- Use XML specified part-of-speech, if given. This will override the case
  1281. //--- where the POS didn't exist as an option and the POS Tagger did its thing
  1282. //--- to find a pronunciation.
  1283. if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown )
  1284. {
  1285. Item.Words[i].eWordPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech;
  1286. }
  1287. else
  1288. {
  1289. Item.Words[i].eWordPartOfSpeech = pProns[cPronIndex].POSchoice;
  1290. }
  1291. //--- Root word
  1292. if ( pProns[cPronIndex].lemmaStr[0] )
  1293. {
  1294. Item.Words[i].ulLemmaLen = wcslen( pProns[cPronIndex].lemmaStr );
  1295. Item.Words[i].pLemma =
  1296. (WCHAR*) MemoryManager.GetMemory( Item.Words[i].ulLemmaLen * sizeof(WCHAR), &hr );
  1297. if ( SUCCEEDED( hr ) )
  1298. {
  1299. wcsncpy( (WCHAR*) Item.Words[i].pLemma, pProns[cPronIndex].lemmaStr,
  1300. Item.Words[i].ulLemmaLen );
  1301. }
  1302. }
  1303. //--- Insert pron in text, if appropriate - RAID #4746
  1304. if ( pProns[cPronIndex].fUsePron )
  1305. {
  1306. ULONG ulNumChars = wcslen( Item.Words[i].pWordPron );
  1307. Item.Words[i].pWordText =
  1308. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1309. if ( SUCCEEDED( hr ) )
  1310. {
  1311. ZeroMemory( (WCHAR*) Item.Words[i].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1312. (WCHAR) Item.Words[i].pWordText[0] = L'*';
  1313. wcscpy( ( (WCHAR*) Item.Words[i].pWordText + 1 ), Item.Words[i].pWordPron );
  1314. (WCHAR) Item.Words[i].pWordText[ ulNumChars + 1 ] = L'*';
  1315. Item.Words[i].ulWordLen = ulNumChars + 2;
  1316. }
  1317. }
  1318. cPronIndex++;
  1319. }
  1320. }
  1321. }
  1322. }
  1323. if ( SUCCEEDED( hr ) )
  1324. {
  1325. //--- Check Post POS disambiguation list
  1326. SPLISTPOS ListPos = ItemList.GetHeadPosition();
  1327. while ( ListPos && SUCCEEDED( hr ) )
  1328. {
  1329. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1330. if ( Item.pItemInfo->Type == eALPHA_WORD ||
  1331. Item.pItemInfo->Type == eABBREVIATION ||
  1332. Item.pItemInfo->Type == eABBREVIATION_NORMALIZE )
  1333. {
  1334. WCHAR temp;
  1335. BOOL fPeriod = false;
  1336. if ( Item.pItemSrcText[Item.ulItemSrcLen - 1] == L'.' &&
  1337. Item.ulItemSrcLen > 1 )
  1338. {
  1339. temp = Item.pItemSrcText[Item.ulItemSrcLen - 1];
  1340. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = 0;
  1341. fPeriod = true;
  1342. }
  1343. else
  1344. {
  1345. temp = Item.pItemSrcText[Item.ulItemSrcLen];
  1346. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = 0;
  1347. }
  1348. const AbbrevRecord* pAbbrevRecord =
  1349. (AbbrevRecord*) bsearch( (void*) Item.pItemSrcText, (void*) g_PostLexLookupWordTable,
  1350. sp_countof( g_PostLexLookupWordTable ), sizeof( AbbrevRecord ),
  1351. CompareStringAndAbbrevRecord );
  1352. if ( pAbbrevRecord )
  1353. {
  1354. hr = ( this->*g_PostLexLookupDisambigTable[pAbbrevRecord->iPronDisambig] )
  1355. ( pAbbrevRecord, ItemList, ListPos, MemoryManager );
  1356. }
  1357. if ( fPeriod )
  1358. {
  1359. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = temp;
  1360. }
  1361. else
  1362. {
  1363. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = temp;
  1364. }
  1365. }
  1366. }
  1367. }
  1368. }
  1369. }
  1370. if (pProns)
  1371. {
  1372. delete [] pProns;
  1373. }
  1374. return hr;
  1375. } /* CStdSentEnum::DetermineProns */
  1376. /***********************************************************************************************
  1377. * MeasurementDisambig *
  1378. *---------------------*
  1379. * Description:
  1380. * This overrides initial pronunciations of measurement abbreviations when they are used
  1381. * as modifiers - e.g. "a 7 ft. pole" vs. "the pole was 7 ft. long"
  1382. *
  1383. ********************************************************************* AH **********************/
  1384. HRESULT CStdSentEnum::MeasurementDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1385. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1386. {
  1387. SPDBG_FUNC( "CStdSentEnum::MeasurementDisambig" );
  1388. HRESULT hr = S_OK;
  1389. //--- Get previous two items
  1390. SPLISTPOS TempPos = ListPos;
  1391. if ( TempPos )
  1392. {
  1393. ItemList.GetPrev( TempPos );
  1394. if ( TempPos )
  1395. {
  1396. ItemList.GetPrev( TempPos );
  1397. if ( TempPos )
  1398. {
  1399. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  1400. //--- Previous must be a number
  1401. if ( TempItem.pItemInfo->Type == eNUM_CARDINAL )
  1402. {
  1403. //--- Get next item
  1404. TempPos = ListPos;
  1405. TempItem = ItemList.GetNext( TempPos );
  1406. //--- Next must be a noun or adj
  1407. if ( TempItem.eItemPartOfSpeech == MS_Noun )
  1408. {
  1409. //--- Matched a 7 ft. pole type example - go with singular
  1410. TempPos = ListPos;
  1411. ItemList.GetPrev( TempPos );
  1412. TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos );
  1413. //--- Singular will always be shorter than plural, so this should never overwrite
  1414. //--- anything...
  1415. wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1416. //--- Insert pron into word text - RAID #4746
  1417. ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron );
  1418. MeasurementItem.Words[0].pWordText =
  1419. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1420. if ( SUCCEEDED( hr ) )
  1421. {
  1422. ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1423. (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*';
  1424. wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron );
  1425. (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1426. MeasurementItem.Words[0].ulWordLen = ulNumChars + 2;
  1427. }
  1428. }
  1429. else if ( TempItem.eItemPartOfSpeech == MS_Adj &&
  1430. TempPos )
  1431. {
  1432. //--- Next must be a noun
  1433. TempItem = ItemList.GetNext( TempPos );
  1434. {
  1435. if ( TempItem.eItemPartOfSpeech == MS_Noun )
  1436. {
  1437. //--- Matched a 7 ft. pole type example - go with singular
  1438. TempPos = ListPos;
  1439. ItemList.GetPrev( TempPos );
  1440. TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos );
  1441. //--- Singular will always be shorter than plural, so this should never overwrite
  1442. //--- anything...
  1443. wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1444. //--- Insert pron into word text - RAID #4746
  1445. ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron );
  1446. MeasurementItem.Words[0].pWordText =
  1447. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1448. if ( SUCCEEDED( hr ) )
  1449. {
  1450. ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1451. (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*';
  1452. wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron );
  1453. (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1454. MeasurementItem.Words[0].ulWordLen = ulNumChars + 2;
  1455. }
  1456. }
  1457. }
  1458. }
  1459. }
  1460. }
  1461. }
  1462. }
  1463. return hr;
  1464. } /* MeasurementDisambig */
  1465. /***********************************************************************************************
  1466. * TheDisambig *
  1467. *-------------*
  1468. * Description:
  1469. * This function disambiguates the word the - before a vowel it becomes "thee", before a
  1470. * consonant it is "thuh"...
  1471. *
  1472. ********************************************************************* AH **********************/
  1473. HRESULT CStdSentEnum::TheDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1474. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1475. {
  1476. SPDBG_FUNC( "CStdSentEnum::TheDisambig" );
  1477. HRESULT hr = S_OK;
  1478. //--- Get next item
  1479. SPLISTPOS TempPos = ListPos;
  1480. if ( TempPos )
  1481. {
  1482. TTSSentItem NextItem = ItemList.GetNext( TempPos );
  1483. if ( NextItem.Words[0].pWordPron &&
  1484. bsearch( (void*) NextItem.Words[0].pWordPron, (void*) g_Vowels, sp_countof( g_Vowels ),
  1485. sizeof( WCHAR ), CompareWCHARAndWCHAR ) )
  1486. {
  1487. //--- Matched a vowel - go with / DH IY 1 /
  1488. TempPos = ListPos;
  1489. ItemList.GetPrev( TempPos );
  1490. TTSSentItem& TheItem = ItemList.GetPrev( TempPos );
  1491. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1492. //--- anything
  1493. wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1494. TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1495. //--- Insert pron into word text - RAID #4746
  1496. ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron );
  1497. TheItem.Words[0].pWordText =
  1498. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1499. if ( SUCCEEDED( hr ) )
  1500. {
  1501. ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1502. (WCHAR) TheItem.Words[0].pWordText[0] = L'*';
  1503. wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron );
  1504. (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1505. TheItem.Words[0].ulWordLen = ulNumChars + 2;
  1506. }
  1507. }
  1508. else
  1509. {
  1510. //--- Didn't match a vowel - go with / DH AX 1 /
  1511. TempPos = ListPos;
  1512. ItemList.GetPrev( TempPos );
  1513. TTSSentItem& TheItem = ItemList.GetPrev( TempPos );
  1514. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1515. //--- anything
  1516. wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1517. TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1518. //--- Insert pron into word text - RAID #4746
  1519. ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron );
  1520. TheItem.Words[0].pWordText =
  1521. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1522. if ( SUCCEEDED( hr ) )
  1523. {
  1524. ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1525. (WCHAR) TheItem.Words[0].pWordText[0] = L'*';
  1526. wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron );
  1527. (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1528. TheItem.Words[0].ulWordLen = ulNumChars + 2;
  1529. }
  1530. }
  1531. }
  1532. return hr;
  1533. } /* TheDisambig */
  1534. /***********************************************************************************************
  1535. * ADisambig *
  1536. *-----------*
  1537. * Description:
  1538. * This function disambiguates the word "a" - / EY 1 - Noun / vs. / AX - Det /
  1539. *
  1540. ********************************************************************* AH **********************/
  1541. HRESULT CStdSentEnum::ADisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList,
  1542. SPLISTPOS ListPos )
  1543. {
  1544. SPDBG_FUNC( "CStdSentEnum::ADisambig" );
  1545. HRESULT hr = S_OK;
  1546. BOOL fNoun = false;
  1547. //--- Get Current Item...
  1548. SPLISTPOS TempPos = ListPos;
  1549. if ( TempPos )
  1550. {
  1551. ItemList.GetPrev( TempPos );
  1552. if ( TempPos )
  1553. {
  1554. TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
  1555. //--- If "a" is part of a multi-word item, use the Noun pronunciation...
  1556. //--- If "a" is not an AlphaWord, use the Noun pronunciation...
  1557. if ( CurrentItem.ulNumWords > 1 ||
  1558. CurrentItem.pItemInfo->Type != eALPHA_WORD )
  1559. {
  1560. fNoun = true;
  1561. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1562. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1563. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1564. pPron->POSchoice = pAbbrevInfo->POS1;
  1565. }
  1566. }
  1567. }
  1568. if ( !fNoun )
  1569. {
  1570. //--- Get Next Item...
  1571. TempPos = ListPos;
  1572. if ( TempPos )
  1573. {
  1574. TTSSentItem NextItem = ItemList.GetNext( TempPos );
  1575. //--- If "a" is followed by punctuation, use the Noun pronunciation...
  1576. if ( !( NextItem.pItemInfo->Type & eWORDLIST_IS_VALID ) )
  1577. {
  1578. fNoun = true;
  1579. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1580. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1581. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1582. pPron->POSchoice = pAbbrevInfo->POS1;
  1583. }
  1584. }
  1585. }
  1586. //--- Default - use the Determiner pronunciation (but include Noun pronunciation as well,
  1587. //--- so that POS tagger rules will work properly)...
  1588. if ( !fNoun )
  1589. {
  1590. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1591. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1592. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1593. pPron->pronArray[PRON_A].POScount = 1;
  1594. pPron->POSchoice = pAbbrevInfo->POS2;
  1595. wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 );
  1596. pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str );
  1597. pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1;
  1598. pPron->pronArray[PRON_B].POScount = 1;
  1599. pPron->hasAlt = true;
  1600. }
  1601. return hr;
  1602. } /* ADisambig */
  1603. /***********************************************************************************************
  1604. * PolishDisambig *
  1605. *----------------*
  1606. * Description:
  1607. * This function disambiguates the word "polish" - [p ow 1 l - ax sh - Noun] vs.
  1608. * [p ow 1 l - ax sh - Adj] vs. [p aa 1 l - ih sh - Verb] vs. [p aa 1 l - ih sh - Noun]
  1609. *
  1610. ********************************************************************* AH **********************/
  1611. HRESULT CStdSentEnum::PolishDisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList,
  1612. SPLISTPOS ListPos )
  1613. {
  1614. SPDBG_FUNC( "CStdSentEnum::PolishDisambig" );
  1615. HRESULT hr = S_OK;
  1616. BOOL fMatch = false;
  1617. //--- Get Current Item...
  1618. SPLISTPOS TempPos = ListPos;
  1619. if ( TempPos )
  1620. {
  1621. ItemList.GetPrev( TempPos );
  1622. if ( TempPos )
  1623. {
  1624. TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
  1625. //--- If "Polish" is capitalized and not sentence-initial, and not preceded immediately
  1626. //--- by an open double-quote or parenthesis, use Noun...
  1627. if ( iswupper( CurrentItem.pItemSrcText[0] ) )
  1628. {
  1629. BOOL fSentenceInitial = false;
  1630. if ( !TempPos )
  1631. {
  1632. fSentenceInitial = true;
  1633. }
  1634. else
  1635. {
  1636. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  1637. if ( PrevItem.pItemInfo->Type == eOPEN_PARENTHESIS ||
  1638. PrevItem.pItemInfo->Type == eOPEN_BRACKET ||
  1639. PrevItem.pItemInfo->Type == eOPEN_BRACE ||
  1640. PrevItem.pItemInfo->Type == eSINGLE_QUOTE ||
  1641. PrevItem.pItemInfo->Type == eDOUBLE_QUOTE )
  1642. {
  1643. fSentenceInitial = true;
  1644. }
  1645. }
  1646. if ( fSentenceInitial )
  1647. {
  1648. fMatch = true;
  1649. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1650. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1651. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1652. pPron->POSchoice = pAbbrevInfo->POS2;
  1653. }
  1654. else
  1655. {
  1656. fMatch = true;
  1657. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1658. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1659. pPron->pronArray[PRON_A].POScode[0] = MS_Noun;
  1660. pPron->POSchoice = MS_Noun;
  1661. }
  1662. }
  1663. }
  1664. }
  1665. //--- Default - use the Verb pronunciation (but include the others as well,
  1666. //--- so that POS tagger rules will work properly)...
  1667. if ( !fMatch )
  1668. {
  1669. //--- Verb, Noun
  1670. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1671. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1672. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1673. pPron->pronArray[PRON_A].POScode[1] = pAbbrevInfo->POS3;
  1674. pPron->pronArray[PRON_A].POScount = 2;
  1675. //--- Adj
  1676. wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 );
  1677. pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str );
  1678. pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1;
  1679. pPron->pronArray[PRON_B].POScount = 1;
  1680. //--- Set initial choice to Verb...
  1681. pPron->POSchoice = pAbbrevInfo->POS2;
  1682. pPron->hasAlt = true;
  1683. }
  1684. return hr;
  1685. } /* PolishDisambig */
  1686. /***********************************************************************************************
  1687. * ReadDisambig *
  1688. *--------------*
  1689. * Description:
  1690. * This function disambiguates the word Read - past tense vs. present...
  1691. *
  1692. ********************************************************************* AH **********************/
  1693. HRESULT CStdSentEnum::ReadDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1694. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1695. {
  1696. SPDBG_FUNC( "CStdSentEnum::ReadDisambig" );
  1697. HRESULT hr = S_OK;
  1698. BOOL fMatch = false;
  1699. //--- Get prev item
  1700. SPLISTPOS TempPos = ListPos;
  1701. if ( TempPos )
  1702. {
  1703. ItemList.GetPrev( TempPos );
  1704. if ( TempPos )
  1705. {
  1706. ItemList.GetPrev( TempPos );
  1707. if ( TempPos )
  1708. {
  1709. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  1710. //--- Check for closest auxiliary
  1711. while ( PrevItem.Words[0].eWordPartOfSpeech != MS_VAux &&
  1712. PrevItem.Words[0].eWordPartOfSpeech != MS_Contr &&
  1713. TempPos )
  1714. {
  1715. PrevItem = ItemList.GetPrev( TempPos );
  1716. }
  1717. if ( PrevItem.Words[0].eWordPartOfSpeech == MS_VAux )
  1718. {
  1719. fMatch = true;
  1720. if ( wcsnicmp( PrevItem.Words[0].pWordText, L"have", 4 ) == 0 ||
  1721. wcsnicmp( PrevItem.Words[0].pWordText, L"has", 3 ) == 0 ||
  1722. wcsnicmp( PrevItem.Words[0].pWordText, L"had", 3 ) == 0 ||
  1723. wcsnicmp( PrevItem.Words[0].pWordText, L"am", 2 ) == 0 ||
  1724. wcsnicmp( PrevItem.Words[0].pWordText, L"ain't", 5 ) == 0 ||
  1725. wcsnicmp( PrevItem.Words[0].pWordText, L"are", 3 ) == 0 ||
  1726. wcsnicmp( PrevItem.Words[0].pWordText, L"aren't", 6 ) == 0 ||
  1727. wcsnicmp( PrevItem.Words[0].pWordText, L"be", 2 ) == 0 ||
  1728. wcsnicmp( PrevItem.Words[0].pWordText, L"is", 2 ) == 0 ||
  1729. wcsnicmp( PrevItem.Words[0].pWordText, L"was", 3 ) == 0 ||
  1730. wcsnicmp( PrevItem.Words[0].pWordText, L"were", 4 ) == 0 )
  1731. {
  1732. //--- Matched have or haven't (has or hasn't, had or hadn't) - go with "red"
  1733. TempPos = ListPos;
  1734. ItemList.GetPrev( TempPos );
  1735. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1736. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1737. //--- anything
  1738. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1739. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1740. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1741. //--- Insert pron into word text - RAID #4746
  1742. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1743. ReadItem.Words[0].pWordText =
  1744. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1745. if ( SUCCEEDED( hr ) )
  1746. {
  1747. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1748. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1749. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1750. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1751. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1752. }
  1753. }
  1754. else
  1755. {
  1756. //--- Some other auxiliary - go with "reed"
  1757. TempPos = ListPos;
  1758. ItemList.GetPrev( TempPos );
  1759. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1760. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1761. //--- anything
  1762. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1763. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1764. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1765. //--- Insert pron into word text - RAID #4746
  1766. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1767. ReadItem.Words[0].pWordText =
  1768. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1769. if ( SUCCEEDED( hr ) )
  1770. {
  1771. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1772. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1773. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1774. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1775. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1776. }
  1777. }
  1778. }
  1779. //--- Check for pronoun aux contractions
  1780. else if ( PrevItem.Words[0].eWordPartOfSpeech == MS_Contr )
  1781. {
  1782. fMatch = true;
  1783. const WCHAR *pApostrophe = wcsstr( PrevItem.Words[0].pWordText, L"'" );
  1784. if ( pApostrophe &&
  1785. wcsnicmp( pApostrophe, L"'ll", 3 ) == 0 )
  1786. {
  1787. //--- Matched an 'll form - go with "reed"
  1788. TempPos = ListPos;
  1789. ItemList.GetPrev( TempPos );
  1790. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1791. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1792. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1793. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1794. //--- Insert pron into word text - RAID #4746
  1795. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1796. ReadItem.Words[0].pWordText =
  1797. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1798. if ( SUCCEEDED( hr ) )
  1799. {
  1800. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1801. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1802. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1803. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1804. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1805. }
  1806. }
  1807. else
  1808. {
  1809. //--- Some other form - go with "red"
  1810. TempPos = ListPos;
  1811. ItemList.GetPrev( TempPos );
  1812. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1813. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1814. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1815. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1816. //--- Insert pron into word text - RAID #4746
  1817. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1818. ReadItem.Words[0].pWordText =
  1819. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1820. if ( SUCCEEDED( hr ) )
  1821. {
  1822. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1823. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1824. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1825. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1826. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1827. }
  1828. }
  1829. }
  1830. //--- Check for infinitival form
  1831. else
  1832. {
  1833. TempPos = ListPos;
  1834. ItemList.GetPrev( TempPos );
  1835. ItemList.GetPrev( TempPos );
  1836. PrevItem = ItemList.GetPrev( TempPos );
  1837. if ( PrevItem.Words[0].ulWordLen == 2 &&
  1838. wcsnicmp( PrevItem.Words[0].pWordText, L"to", 2 ) == 0 )
  1839. {
  1840. fMatch = true;
  1841. //--- Matched infinitival form - go with "reed"
  1842. TempPos = ListPos;
  1843. ItemList.GetPrev( TempPos );
  1844. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1845. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1846. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1847. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1848. //--- Insert pron into word text - RAID #4746
  1849. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1850. ReadItem.Words[0].pWordText =
  1851. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1852. if ( SUCCEEDED( hr ) )
  1853. {
  1854. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1855. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1856. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1857. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1858. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1859. }
  1860. }
  1861. }
  1862. }
  1863. //--- Sentence initial - go with "reed"
  1864. else
  1865. {
  1866. fMatch = true;
  1867. TempPos = ListPos;
  1868. ItemList.GetPrev( TempPos );
  1869. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1870. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1871. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1872. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1873. //--- Insert pron into word text - RAID #4746
  1874. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1875. ReadItem.Words[0].pWordText =
  1876. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1877. if ( SUCCEEDED( hr ) )
  1878. {
  1879. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1880. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1881. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1882. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1883. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1884. }
  1885. }
  1886. }
  1887. }
  1888. if ( !fMatch )
  1889. {
  1890. TempPos = ListPos;
  1891. ItemList.GetPrev( TempPos );
  1892. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1893. //--- Default - go with past tense...
  1894. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1895. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1896. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1897. //--- Insert pron into word text - RAID #4746
  1898. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1899. ReadItem.Words[0].pWordText =
  1900. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1901. if ( SUCCEEDED( hr ) )
  1902. {
  1903. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1904. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1905. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1906. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1907. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1908. }
  1909. }
  1910. return hr;
  1911. } /* ReadDisambig */