Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2000 lines
94 KiB

  1. /*******************************************************************************
  2. * Disambig.cpp *
  3. *--------------*
  4. * Description:
  5. * This module contains the methods to disambiguate part of speech and
  6. * select the correct pronounciation from the lexicon.
  7. *-------------------------------------------------------------------------------
  8. * Created By: EDC Date: 07/15/99
  9. * Copyright (C) 1999 Microsoft Corporation
  10. * All Rights Reserved
  11. *
  12. *******************************************************************************/
  13. //--- Additional includes
  14. #include "stdafx.h"
  15. #include "commonlx.h"
  16. #ifndef StdSentEnum_h
  17. #include "stdsentenum.h"
  18. #endif
  19. #include "spttsengdebug.h"
  20. /*****************************************************************************
  21. * TryPOSConversion *
  22. *------------------*
  23. *
  24. * Description:
  25. * Checks to see whether the argument PRONRECORD contains the argument
  26. * ENGPARTOFSPEECH as an option. If so, sets the PRONRECORD alternate
  27. * choice and part of speech choice, and returns true. If not, just returns
  28. * false without modifying the PRONRECORD at all.
  29. *
  30. ***************************************************************** AH *********/
  31. bool TryPOSConversion( PRONRECORD& pPron, ENGPARTOFSPEECH PartOfSpeech )
  32. {
  33. //--- Check first pronunciation
  34. for ( ULONG i = 0; i < pPron.pronArray[0].POScount; i++ )
  35. {
  36. if ( pPron.pronArray[0].POScode[i] == PartOfSpeech )
  37. {
  38. pPron.altChoice = 0;
  39. pPron.POSchoice = PartOfSpeech;
  40. return true;
  41. }
  42. }
  43. //--- Check second pronunciation
  44. if ( pPron.hasAlt )
  45. {
  46. for ( ULONG i = 0; i < pPron.pronArray[1].POScount; i++ )
  47. {
  48. if ( pPron.pronArray[1].POScode[i] == PartOfSpeech )
  49. {
  50. pPron.altChoice = 1;
  51. pPron.POSchoice = PartOfSpeech;
  52. return true;
  53. }
  54. }
  55. }
  56. return false;
  57. } /* TryPOS Conversion */
  58. /*****************************************************************************
  59. * DisambiguatePOS *
  60. *-----------------*
  61. *
  62. * Description:
  63. * Disambiguate parts of speech by applying patches in order... This
  64. * work is an implementation of Eric Brill's rule-based part of speech
  65. * tagger - see, for example:
  66. *
  67. * Brill, Eric. 1992. A simple rule-based part of speech tagger.
  68. * In Proceedings of the Third Conference on Applied Natural
  69. * Language Processing, ACL. Trento, Italy.
  70. *
  71. ***************************************************************** AH *********/
  72. void DisambiguatePOS( PRONRECORD *pProns, ULONG cNumOfWords )
  73. {
  74. SPDBG_FUNC( "DisambiguatePOS" );
  75. //--- Iterate over the patches, applying each (where applicable) to the
  76. //--- entire sentence. For each patch, iterate over each word in the
  77. //--- sentence to which the patch could apply (from left to right).
  78. for ( int i = 0; i < sp_countof( g_POSTaggerPatches ); i++ )
  79. {
  80. switch ( g_POSTaggerPatches[i].eTemplateType )
  81. {
  82. case PREV1T:
  83. {
  84. if ( cNumOfWords > 1 )
  85. {
  86. for ( ULONG j = 1; j < cNumOfWords; j++ )
  87. {
  88. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  89. {
  90. //--- If the current POS matches, and the previous POS matches, and
  91. //--- the conversion POS is a possibility for this word, convert the
  92. //--- POS.
  93. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  94. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  95. {
  96. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  97. }
  98. }
  99. }
  100. }
  101. }
  102. break;
  103. case NEXT1T:
  104. {
  105. if ( cNumOfWords > 1 )
  106. {
  107. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  108. {
  109. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  110. {
  111. //--- If the current POS matches, and the next POS matches, and
  112. //--- the conversion POS is a possibility for this word, convert the
  113. //--- POS.
  114. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  115. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  116. {
  117. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  118. }
  119. }
  120. }
  121. }
  122. }
  123. break;
  124. case PREV2T:
  125. {
  126. if ( cNumOfWords > 2 )
  127. {
  128. for ( ULONG j = 2; j < cNumOfWords; j++ )
  129. {
  130. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  131. {
  132. //--- If the current POS matches, and the POS two previous matches, and
  133. //--- the conversion POS is a possibility for this word, convert the POS.
  134. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  135. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  136. {
  137. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  138. }
  139. }
  140. }
  141. }
  142. }
  143. break;
  144. case NEXT2T:
  145. {
  146. if ( cNumOfWords > 2 )
  147. {
  148. for ( ULONG j = 0; j < cNumOfWords - 2; j++ )
  149. {
  150. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  151. {
  152. //--- If the current POS matches, and the POS two after matches, and
  153. //--- the conversion POS is a possibility for this word, convert the
  154. //--- POS.
  155. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  156. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  157. {
  158. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  159. }
  160. }
  161. }
  162. }
  163. }
  164. break;
  165. case PREV1OR2T:
  166. {
  167. if ( cNumOfWords > 2 )
  168. {
  169. for ( ULONG j = 1; j < cNumOfWords; j++ )
  170. {
  171. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  172. {
  173. //--- If the current POS matches, and the previous POS matches OR the
  174. //--- POS two previous matches, and the conversion POS is a possibility
  175. //--- for this word, convert the POS.
  176. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  177. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  178. ( j > 1 &&
  179. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  180. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  181. {
  182. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  183. }
  184. }
  185. }
  186. }
  187. }
  188. break;
  189. case NEXT1OR2T:
  190. {
  191. if ( cNumOfWords > 2 )
  192. {
  193. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  194. {
  195. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  196. {
  197. //--- If the current POS matches, and the next POS matches OR the POS
  198. //--- two after matches, and the conversion POS is a possibility for this
  199. //--- word, convert the POS.
  200. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  201. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  202. ( j < cNumOfWords - 2 &&
  203. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  204. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  205. {
  206. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  207. }
  208. }
  209. }
  210. }
  211. }
  212. break;
  213. case PREV1OR2OR3T:
  214. {
  215. if ( cNumOfWords > 3 )
  216. {
  217. for ( ULONG j = 1; j < cNumOfWords; j++ )
  218. {
  219. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  220. {
  221. //--- If the current POS matches, and the previous POS matches OR the
  222. //--- POS two previous matches OR the POS three previous matches, and
  223. //--- the conversion POS is a possibility for this word, convert the POS.
  224. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  225. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  226. ( j > 1 &&
  227. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  228. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  229. ( j > 2 &&
  230. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  231. pProns[j - 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  232. {
  233. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  234. }
  235. }
  236. }
  237. }
  238. }
  239. break;
  240. case NEXT1OR2OR3T:
  241. {
  242. if ( cNumOfWords > 3 )
  243. {
  244. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  245. {
  246. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  247. {
  248. //--- If the current POS matches, and the next POS matches OR the POS
  249. //--- two after matches OR the POS three after matches, and the conversion
  250. //--- POS is a possibility for this word, convert the POS.
  251. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  252. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  253. ( j < cNumOfWords - 2 &&
  254. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  255. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) ||
  256. ( j < cNumOfWords - 3 &&
  257. pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  258. pProns[j + 3].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 ) )
  259. {
  260. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  261. }
  262. }
  263. }
  264. }
  265. }
  266. break;
  267. case PREV1TNEXT1T:
  268. {
  269. if ( cNumOfWords > 2 )
  270. {
  271. for ( ULONG j = 1; j < cNumOfWords - 1; j++ )
  272. {
  273. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  274. {
  275. //--- If the current POS matches, and the next POS matches, and the
  276. //--- previous POS matches, and the conversion POS is a possibility
  277. //--- for this word, convert the POS.
  278. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  279. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  280. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  281. {
  282. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  283. }
  284. }
  285. }
  286. }
  287. }
  288. break;
  289. case PREV1TNEXT2T:
  290. {
  291. if ( cNumOfWords > 3 )
  292. {
  293. for ( ULONG j = 1; j < cNumOfWords - 2; j++ )
  294. {
  295. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  296. {
  297. //--- If the current POS matches, and the POS two after matches, and the
  298. //--- previous POS matches, and the conversion POS is a possibility
  299. //--- for this word, convert the POS.
  300. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  301. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  302. pProns[j + 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  303. {
  304. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  305. }
  306. }
  307. }
  308. }
  309. }
  310. break;
  311. case PREV2TNEXT1T:
  312. {
  313. if ( cNumOfWords > 3 )
  314. {
  315. for ( ULONG j = 2; j < cNumOfWords - 1; j++ )
  316. {
  317. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  318. {
  319. //--- If the current POS matches, and the next POS matches, and the
  320. //--- POS two previous matches, and the conversion POS is a possibility
  321. //--- for this word, convert the POS.
  322. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  323. pProns[j - 2].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  324. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS2 )
  325. {
  326. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  327. }
  328. }
  329. }
  330. }
  331. }
  332. break;
  333. case CAP:
  334. {
  335. for ( ULONG j = 0; j < cNumOfWords; j++ )
  336. {
  337. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  338. {
  339. //--- If the current POS matches, and the word is capitalized, and the
  340. //--- conversion POS is a possibility for this word, convert the POS.
  341. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  342. iswupper( pProns[j].orthStr[0] ) )
  343. {
  344. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  345. }
  346. }
  347. }
  348. }
  349. break;
  350. case NOTCAP:
  351. {
  352. for ( ULONG j = 0; j < cNumOfWords; j++ )
  353. {
  354. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  355. {
  356. //--- If the current POS matches, and the word is not capitalized, and the
  357. //--- conversion POS is a possibility for this word, convert the POS.
  358. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  359. !iswupper( pProns[j].orthStr[0] ) )
  360. {
  361. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  362. }
  363. }
  364. }
  365. }
  366. break;
  367. case PREVCAP:
  368. {
  369. if ( cNumOfWords > 1 )
  370. {
  371. for ( ULONG j = 1; j < cNumOfWords; j++ )
  372. {
  373. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  374. {
  375. //--- If the current POS matches, and the previous word is capitalized,
  376. //--- and the conversion POS is a possibility for this word, convert the
  377. //--- POS.
  378. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  379. iswupper( pProns[j - 1].orthStr[0] ) )
  380. {
  381. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  382. }
  383. }
  384. }
  385. }
  386. }
  387. break;
  388. case PREVNOTCAP:
  389. {
  390. if ( cNumOfWords > 1 )
  391. {
  392. for ( ULONG j = 1; j < cNumOfWords; j++ )
  393. {
  394. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  395. {
  396. //--- If the current POS matches, and the word is capitalized, and the
  397. //--- conversion POS is a possibility for this word, convert the POS.
  398. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  399. !iswupper( pProns[j - 1].orthStr[0] ) )
  400. {
  401. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  402. }
  403. }
  404. }
  405. }
  406. }
  407. break;
  408. case PREV1W:
  409. {
  410. if ( cNumOfWords > 1 )
  411. {
  412. for ( ULONG j = 1; j < cNumOfWords; j++ )
  413. {
  414. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  415. {
  416. //--- If the current POS matches, and the previous word matches, and the
  417. //--- conversion POS is a possibility for this word, convert the POS.
  418. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  419. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  420. {
  421. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  422. }
  423. }
  424. }
  425. }
  426. }
  427. break;
  428. case NEXT1W:
  429. {
  430. if ( cNumOfWords > 1 )
  431. {
  432. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  433. {
  434. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  435. {
  436. //--- If the current POS matches, and the next word matches, and the
  437. //--- conversion POS is a possibility for this word, convert the POS.
  438. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  439. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  440. {
  441. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  442. }
  443. }
  444. }
  445. }
  446. }
  447. break;
  448. case PREV2W:
  449. {
  450. if ( cNumOfWords > 2 )
  451. {
  452. for ( ULONG j = 2; j < cNumOfWords; j++ )
  453. {
  454. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  455. {
  456. //--- If the current POS matches, and the word two previous matches, and the
  457. //--- conversion POS is a possibility for this word, convert the POS.
  458. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  459. _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  460. {
  461. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  462. }
  463. }
  464. }
  465. }
  466. }
  467. break;
  468. case NEXT2W:
  469. {
  470. if ( cNumOfWords > 2 )
  471. {
  472. for ( ULONG j = 0; j < cNumOfWords - 2; j++ )
  473. {
  474. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  475. {
  476. //--- If the current POS matches, and the word two after matches, and the
  477. //--- conversion POS is a possibility for this word, convert the POS.
  478. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  479. _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  480. {
  481. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  482. }
  483. }
  484. }
  485. }
  486. }
  487. break;
  488. case PREV1OR2W:
  489. {
  490. if ( cNumOfWords > 2 )
  491. {
  492. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  493. {
  494. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  495. {
  496. //--- If the current POS matches, and the previous word OR the word two
  497. //--- previous matches, and the conversion POS is a possibility for this word,
  498. //--- convert the POS.
  499. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  500. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ||
  501. ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  502. _wcsicmp( pProns[j - 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) )
  503. {
  504. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  505. }
  506. }
  507. }
  508. }
  509. }
  510. break;
  511. case NEXT1OR2W:
  512. {
  513. if ( cNumOfWords > 1 )
  514. {
  515. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  516. {
  517. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  518. {
  519. //--- If the current POS matches, and the next word matches OR the word two after
  520. //--- matches, and the conversion POS is a possibility for this word, convert the
  521. //--- POS.
  522. if ( ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  523. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) ||
  524. ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  525. _wcsicmp( pProns[j + 2].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 ) )
  526. {
  527. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  528. }
  529. }
  530. }
  531. }
  532. }
  533. break;
  534. case CURRWPREV1W:
  535. {
  536. if ( cNumOfWords > 1 )
  537. {
  538. for ( ULONG j = 1; j < cNumOfWords; j++ )
  539. {
  540. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  541. {
  542. //--- If the current POS matches, and the current word matches, and the previous
  543. //--- word matches, and the conversion POS is a possibility for this word, convert
  544. //--- the POS.
  545. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  546. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  547. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  548. {
  549. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  550. }
  551. }
  552. }
  553. }
  554. }
  555. break;
  556. case CURRWNEXT1W:
  557. {
  558. if ( cNumOfWords > 1 )
  559. {
  560. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  561. {
  562. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  563. {
  564. //--- If the current POS matches, and the current word matches, and the next
  565. //--- word matches, and the conversion POS is a possibility for this word, convert
  566. //--- the POS.
  567. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  568. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  569. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  570. {
  571. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  572. }
  573. }
  574. }
  575. }
  576. }
  577. break;
  578. case CURRWPREV1T:
  579. {
  580. if ( cNumOfWords > 1 )
  581. {
  582. for ( ULONG j = 1; j < cNumOfWords; j++ )
  583. {
  584. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  585. {
  586. //--- If the current POS matches, and the current word matches, and the previous
  587. //--- POS matches, and the conversion POS is a possibility for this word, convert
  588. //--- the POS.
  589. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  590. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  591. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  592. {
  593. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  594. }
  595. }
  596. }
  597. }
  598. }
  599. break;
  600. case CURRWNEXT1T:
  601. {
  602. if ( cNumOfWords > 1 )
  603. {
  604. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  605. {
  606. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  607. {
  608. //--- If the current POS matches, and the current word matches, and the next
  609. //--- POS matches, and the conversion POS is a possibility for this word, convert
  610. //--- the POS.
  611. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  612. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  613. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 )
  614. {
  615. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  616. }
  617. }
  618. }
  619. }
  620. }
  621. break;
  622. case CURRW:
  623. {
  624. for ( ULONG j = 0; j < cNumOfWords; j++ )
  625. {
  626. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  627. {
  628. //--- If the current POS matches, and the current word matches, and the
  629. //--- conversion POS is a possibility for this word, convert the POS.
  630. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  631. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  632. {
  633. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS ) ;
  634. }
  635. }
  636. }
  637. }
  638. break;
  639. case PREV1WT:
  640. {
  641. if ( cNumOfWords > 1 )
  642. {
  643. for ( ULONG j = 1; j < cNumOfWords; j++ )
  644. {
  645. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  646. {
  647. //--- If the current POS matches, and the previous word and POS match, and
  648. //--- the conversion POS is a possibility for this word, convert the POS.
  649. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  650. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  651. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  652. {
  653. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  654. }
  655. }
  656. }
  657. }
  658. }
  659. break;
  660. case NEXT1WT:
  661. {
  662. if ( cNumOfWords > 1 )
  663. {
  664. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  665. {
  666. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  667. {
  668. //--- If the current POS matches, and the next word and POS match, and
  669. //--- the conversion POS is a possibility for this word, convert the POS.
  670. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  671. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  672. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 )
  673. {
  674. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  675. }
  676. }
  677. }
  678. }
  679. }
  680. break;
  681. case CURRWPREV1WT:
  682. {
  683. if ( cNumOfWords > 1 )
  684. {
  685. for ( ULONG j = 1; j < cNumOfWords; j++ )
  686. {
  687. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  688. {
  689. //--- If the current POS matches, and the current words matches, and the
  690. //--- previous word and POS match, and the conversion POS is a possibility
  691. //--- for this word, convert the POS.
  692. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  693. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  694. pProns[j - 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  695. _wcsicmp( pProns[j - 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  696. {
  697. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  698. }
  699. }
  700. }
  701. }
  702. }
  703. break;
  704. case CURRWNEXT1WT:
  705. {
  706. if ( cNumOfWords > 1 )
  707. {
  708. for ( ULONG j = 0; j < cNumOfWords - 1; j++ )
  709. {
  710. if ( pProns[j].XMLPartOfSpeech == MS_Unknown )
  711. {
  712. //--- If the current POS matches, and the current words matches, and the
  713. //--- next word and POS match, and the conversion POS is a possibility
  714. //--- for this word, convert the POS.
  715. if ( pProns[j].POSchoice == g_POSTaggerPatches[i].eCurrentPOS &&
  716. _wcsicmp( pProns[j].orthStr, g_POSTaggerPatches[i].pTemplateWord1 ) == 0 &&
  717. pProns[j + 1].POSchoice == g_POSTaggerPatches[i].eTemplatePOS1 &&
  718. _wcsicmp( pProns[j + 1].orthStr, g_POSTaggerPatches[i].pTemplateWord2 ) == 0 )
  719. {
  720. TryPOSConversion( pProns[j], g_POSTaggerPatches[i].eConvertToPOS );
  721. }
  722. }
  723. }
  724. }
  725. }
  726. break;
  727. }
  728. }
  729. } /* DisambiguatePOS */
  730. /*****************************************************************************
  731. * Pronounce *
  732. *-----------*
  733. * Description:
  734. * Get lexicon or letter-to-sound (LTS) pronunciations
  735. *
  736. ********************************************************************** MC ***/
  737. HRESULT CStdSentEnum::Pronounce( PRONRECORD *pPron )
  738. {
  739. SPDBG_FUNC( "Pronounce" );
  740. SPWORDPRONUNCIATIONLIST SPList;
  741. HRESULT hr = SPERR_NOT_IN_LEX;
  742. ULONG cPhonLen;
  743. DWORD dwFlags = eLEXTYPE_USER | eLEXTYPE_APP | eLEXTYPE_PRIVATE1 | eLEXTYPE_PRIVATE2;
  744. BOOL fPOSExists = false;
  745. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  746. //--- Special Case - XML Provided Part Of Speech. Search for exact match first...
  747. if ( pPron->XMLPartOfSpeech != MS_Unknown )
  748. {
  749. //--- Try User Lexicon
  750. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
  751. if ( SUCCEEDED( hr ) &&
  752. SPList.pFirstWordPronunciation )
  753. {
  754. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  755. pPronunciation = pPronunciation->pNextWordPronunciation )
  756. {
  757. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  758. {
  759. fPOSExists = true;
  760. break;
  761. }
  762. }
  763. if ( !fPOSExists )
  764. {
  765. if ( SPList.pvBuffer )
  766. {
  767. ::CoTaskMemFree( SPList.pvBuffer );
  768. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  769. }
  770. }
  771. }
  772. //--- Handle empty pronunciation
  773. else if ( !SPList.pFirstWordPronunciation )
  774. {
  775. if ( SPList.pvBuffer )
  776. {
  777. ::CoTaskMemFree( SPList.pvBuffer );
  778. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  779. }
  780. hr = SPERR_NOT_IN_LEX;
  781. }
  782. //--- Try App Lexicon
  783. if ( !fPOSExists )
  784. {
  785. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
  786. if ( SUCCEEDED( hr ) &&
  787. SPList.pFirstWordPronunciation )
  788. {
  789. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  790. pPronunciation = pPronunciation->pNextWordPronunciation )
  791. {
  792. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  793. {
  794. fPOSExists = true;
  795. break;
  796. }
  797. }
  798. if ( !fPOSExists )
  799. {
  800. if ( SPList.pvBuffer )
  801. {
  802. ::CoTaskMemFree( SPList.pvBuffer );
  803. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  804. }
  805. }
  806. }
  807. //--- Handle empty pronunciation
  808. else if ( !SPList.pFirstWordPronunciation )
  809. {
  810. if ( SPList.pvBuffer )
  811. {
  812. ::CoTaskMemFree( SPList.pvBuffer );
  813. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  814. }
  815. hr = SPERR_NOT_IN_LEX;
  816. }
  817. }
  818. //--- Try Vendor Lexicon
  819. if ( !fPOSExists )
  820. {
  821. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
  822. if ( SUCCEEDED( hr ) &&
  823. SPList.pFirstWordPronunciation )
  824. {
  825. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  826. pPronunciation = pPronunciation->pNextWordPronunciation )
  827. {
  828. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  829. {
  830. fPOSExists = true;
  831. break;
  832. }
  833. }
  834. if ( !fPOSExists )
  835. {
  836. if ( SPList.pvBuffer )
  837. {
  838. ::CoTaskMemFree( SPList.pvBuffer );
  839. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  840. }
  841. }
  842. }
  843. //--- Handle empty pronunciation
  844. else if ( !SPList.pFirstWordPronunciation )
  845. {
  846. if ( SPList.pvBuffer )
  847. {
  848. ::CoTaskMemFree( SPList.pvBuffer );
  849. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  850. }
  851. hr = SPERR_NOT_IN_LEX;
  852. }
  853. }
  854. //--- Try Morph Lexicon
  855. if ( !fPOSExists )
  856. {
  857. hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033, dwFlags, &SPList );
  858. if ( SUCCEEDED( hr ) &&
  859. SPList.pFirstWordPronunciation )
  860. {
  861. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  862. pPronunciation = pPronunciation->pNextWordPronunciation )
  863. {
  864. if ( pPronunciation->ePartOfSpeech == pPron->XMLPartOfSpeech )
  865. {
  866. fPOSExists = true;
  867. break;
  868. }
  869. }
  870. if ( !fPOSExists )
  871. {
  872. //--- Need to do this the last time, to make sure we hit the default code below...
  873. //--- RAID 5078
  874. hr = SPERR_NOT_IN_LEX;
  875. if ( SPList.pvBuffer )
  876. {
  877. ::CoTaskMemFree( SPList.pvBuffer );
  878. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  879. }
  880. }
  881. }
  882. //--- Handle empty pronunciation
  883. else if ( !SPList.pFirstWordPronunciation )
  884. {
  885. if ( SPList.pvBuffer )
  886. {
  887. ::CoTaskMemFree( SPList.pvBuffer );
  888. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  889. }
  890. hr = SPERR_NOT_IN_LEX;
  891. }
  892. }
  893. }
  894. //--- Default case - just look up orthography and go with first match.
  895. if ( hr == SPERR_NOT_IN_LEX )
  896. {
  897. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_USER, &SPList );
  898. //--- Handle empty pronunciation
  899. if ( SUCCEEDED( hr ) &&
  900. !SPList.pFirstWordPronunciation )
  901. {
  902. if ( SPList.pvBuffer )
  903. {
  904. ::CoTaskMemFree( SPList.pvBuffer );
  905. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  906. }
  907. hr = SPERR_NOT_IN_LEX;
  908. }
  909. }
  910. if ( hr == SPERR_NOT_IN_LEX )
  911. {
  912. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_APP, &SPList );
  913. //--- Handle empty pronunciation
  914. if ( SUCCEEDED( hr ) &&
  915. !SPList.pFirstWordPronunciation )
  916. {
  917. if ( SPList.pvBuffer )
  918. {
  919. ::CoTaskMemFree( SPList.pvBuffer );
  920. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  921. }
  922. hr = SPERR_NOT_IN_LEX;
  923. }
  924. }
  925. if ( hr == SPERR_NOT_IN_LEX )
  926. {
  927. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE1, &SPList );
  928. //--- Handle empty pronunciation
  929. if ( SUCCEEDED( hr ) &&
  930. !SPList.pFirstWordPronunciation )
  931. {
  932. if ( SPList.pvBuffer )
  933. {
  934. ::CoTaskMemFree( SPList.pvBuffer );
  935. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  936. }
  937. hr = SPERR_NOT_IN_LEX;
  938. }
  939. }
  940. if ( hr == SPERR_NOT_IN_LEX )
  941. {
  942. hr = m_pMorphLexicon->DoSuffixMorph( pPron->orthStr, pPron->lemmaStr, 1033,
  943. dwFlags, &SPList );
  944. //--- Handle empty pronunciation
  945. if ( SUCCEEDED( hr ) &&
  946. !SPList.pFirstWordPronunciation )
  947. {
  948. if ( SPList.pvBuffer )
  949. {
  950. ::CoTaskMemFree( SPList.pvBuffer );
  951. ZeroMemory( &SPList, sizeof(SPWORDPRONUNCIATIONLIST) );
  952. }
  953. hr = SPERR_NOT_IN_LEX;
  954. }
  955. }
  956. if ( hr == SPERR_NOT_IN_LEX )
  957. {
  958. hr = m_cpAggregateLexicon->GetPronunciations( pPron->orthStr, 1033, eLEXTYPE_PRIVATE2, &SPList );
  959. //--- Make all LTS words Nouns...
  960. for ( SPWORDPRONUNCIATION *pPronunciation = SPList.pFirstWordPronunciation; pPronunciation;
  961. pPronunciation = pPronunciation->pNextWordPronunciation )
  962. {
  963. pPronunciation->ePartOfSpeech = SPPS_Noun;
  964. }
  965. }
  966. if (SUCCEEDED(hr))
  967. {
  968. //--- WARNING - this assumes pronunciations will only come from one type of lexicon, an assumption
  969. //--- which was true as of July, 2000
  970. pPron->pronType = SPList.pFirstWordPronunciation->eLexiconType;
  971. //------------------------------------------------------------
  972. // SAPI unrolls pronunciations from their POS.
  973. // So roll them back into the original collapsed array
  974. // of one or two candidates with sorted POS (argh...)
  975. //------------------------------------------------------------
  976. SPWORDPRONUNCIATION *firstPron, *pCurPron, *pNextPron;
  977. //------------------------------------------
  978. // Init pronunciation A
  979. //------------------------------------------
  980. pCurPron = firstPron = SPList.pFirstWordPronunciation;
  981. pPron->pronArray[PRON_A].POScount = 1;
  982. //----------------------------
  983. // Get phoneme length
  984. //----------------------------
  985. cPhonLen = wcslen( firstPron->szPronunciation ) + 1; // include delimiter
  986. //----------------------------
  987. // Clip phoneme string to max
  988. //----------------------------
  989. if( cPhonLen > SP_MAX_PRON_LENGTH )
  990. {
  991. cPhonLen = SP_MAX_PRON_LENGTH;
  992. }
  993. //----------------------------
  994. // Copy unicode phoneme string
  995. //----------------------------
  996. memcpy( pPron->pronArray[PRON_A].phon_Str, firstPron->szPronunciation, cPhonLen * sizeof(WCHAR) );
  997. // RAID 21464 - null-terminate SP_MAX_PRON_LENGTH length pronunciations...
  998. pPron->pronArray[PRON_A].phon_Str[cPhonLen-1] = 0;
  999. pPron->pronArray[PRON_A].phon_Len = cPhonLen -1; // minus delimiter
  1000. pPron->pronArray[PRON_A].POScode[0] = (ENGPARTOFSPEECH)firstPron->ePartOfSpeech;
  1001. //------------------------------------------
  1002. // Init pronunciation B
  1003. //------------------------------------------
  1004. pPron->pronArray[PRON_B].POScount = 0;
  1005. pPron->pronArray[PRON_B].phon_Len = 0;
  1006. pNextPron = pCurPron->pNextWordPronunciation;
  1007. while( pNextPron )
  1008. {
  1009. int isDiff;
  1010. isDiff = wcscmp( firstPron->szPronunciation, pNextPron->szPronunciation );
  1011. if( isDiff )
  1012. {
  1013. //------------------------------------------------
  1014. // Next pronunciation is different from 1st
  1015. //------------------------------------------------
  1016. if( pPron->pronArray[PRON_B].POScount < POS_MAX )
  1017. {
  1018. //---------------------------------------
  1019. // Gather POS B into array
  1020. //---------------------------------------
  1021. pPron->pronArray[PRON_B].POScode[pPron->pronArray[PRON_B].POScount] =
  1022. (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech;
  1023. pPron->pronArray[PRON_B].POScount++;
  1024. if( pPron->pronArray[PRON_B].phon_Len == 0 )
  1025. {
  1026. //-----------------------------------------
  1027. // If there's no B pron yet, make one
  1028. //-----------------------------------------
  1029. cPhonLen = wcslen( pNextPron->szPronunciation ) + 1; // include delimiter
  1030. //----------------------------
  1031. // Clip phoneme string to max
  1032. //----------------------------
  1033. if( cPhonLen > SP_MAX_PRON_LENGTH )
  1034. {
  1035. cPhonLen = SP_MAX_PRON_LENGTH;
  1036. }
  1037. //----------------------------
  1038. // Copy unicode phoneme string
  1039. //----------------------------
  1040. memcpy( pPron->pronArray[PRON_B].phon_Str,
  1041. pNextPron->szPronunciation,
  1042. cPhonLen * sizeof(WCHAR) );
  1043. // RAID 21464 - null-terminate SP_MAX_PRON_LENGTH length pronunciations...
  1044. pPron->pronArray[PRON_B].phon_Str[cPhonLen-1] = 0;
  1045. pPron->pronArray[PRON_B].phon_Len = cPhonLen -1; // minus delimiter
  1046. pPron->hasAlt = true;
  1047. }
  1048. }
  1049. }
  1050. else
  1051. {
  1052. //------------------------------------------------
  1053. // Next pronunciation is same as 1st
  1054. //------------------------------------------------
  1055. if( pPron->pronArray[PRON_A].POScount < POS_MAX )
  1056. {
  1057. //---------------------------------------
  1058. // Gather POS A into array
  1059. //---------------------------------------
  1060. pPron->pronArray[PRON_A].POScode[pPron->pronArray[PRON_A].POScount] =
  1061. (ENGPARTOFSPEECH)pNextPron->ePartOfSpeech;
  1062. pPron->pronArray[PRON_A].POScount++;
  1063. }
  1064. }
  1065. pCurPron = pNextPron;
  1066. pNextPron = pCurPron->pNextWordPronunciation;
  1067. }
  1068. }
  1069. //--- If XML POS provided, set selection now as it won't be touched by the POS Tagger
  1070. if ( pPron->XMLPartOfSpeech != MS_Unknown )
  1071. {
  1072. BOOL fMadeMatch = false;
  1073. //--- Check first pronunciation
  1074. for ( ULONG i = 0; i < pPron->pronArray[0].POScount; i++ )
  1075. {
  1076. if ( pPron->pronArray[0].POScode[i] == pPron->XMLPartOfSpeech )
  1077. {
  1078. pPron->altChoice = 0;
  1079. pPron->POSchoice = pPron->XMLPartOfSpeech;
  1080. fMadeMatch = true;
  1081. }
  1082. }
  1083. //--- Check second pronunciation
  1084. if ( pPron->hasAlt )
  1085. {
  1086. for ( ULONG i = 0; i < pPron->pronArray[1].POScount; i++ )
  1087. {
  1088. if ( pPron->pronArray[1].POScode[i] == pPron->XMLPartOfSpeech )
  1089. {
  1090. pPron->altChoice = 1;
  1091. pPron->POSchoice = pPron->XMLPartOfSpeech;
  1092. fMadeMatch = true;
  1093. }
  1094. }
  1095. }
  1096. //--- If this POS didn't exist for the word, let POS Tagger do its thing
  1097. //--- to determine a pronunciation, and then reassign the POS later...
  1098. if ( !fMadeMatch )
  1099. {
  1100. pPron->XMLPartOfSpeech = MS_Unknown;
  1101. pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0];
  1102. }
  1103. }
  1104. //--- Set default POS, for later refinement by POS Tagger
  1105. else
  1106. {
  1107. pPron->POSchoice = pPron->pronArray[PRON_A].POScode[0];
  1108. pPron->altChoice = PRON_A;
  1109. }
  1110. if( SPList.pvBuffer )
  1111. {
  1112. ::CoTaskMemFree( SPList.pvBuffer );
  1113. }
  1114. return hr;
  1115. } /* Pronounce */
  1116. /*****************************************************************************
  1117. * CStdSentEnum::DetermineProns *
  1118. *------------------------------*
  1119. * Description:
  1120. * This method determines POS and looks up the pronounciation
  1121. ********************************************************************* MC ****/
  1122. HRESULT CStdSentEnum::DetermineProns( CItemList& ItemList, CSentItemMemory& MemoryManager )
  1123. {
  1124. SPDBG_FUNC( "CStdSentEnum::DetermineProns" );
  1125. HRESULT hr = S_OK;
  1126. ULONG cNumOfProns, cPronIndex;
  1127. PRONRECORD* pProns = NULL;
  1128. //--- Count the total number of pronunciations needed
  1129. cNumOfProns = 0;
  1130. SPLISTPOS ListPos = ItemList.GetHeadPosition();
  1131. while( ListPos )
  1132. {
  1133. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1134. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1135. {
  1136. if( Item.Words[i].pWordText &&
  1137. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1138. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1139. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1140. {
  1141. ++cNumOfProns;
  1142. }
  1143. }
  1144. }
  1145. if ( cNumOfProns )
  1146. {
  1147. pProns = new PRONRECORD[cNumOfProns];
  1148. if( !pProns )
  1149. {
  1150. hr = E_OUTOFMEMORY;
  1151. }
  1152. else
  1153. {
  1154. //--- First, get item pronunciation(s)
  1155. ZeroMemory( pProns, cNumOfProns * sizeof(PRONRECORD) );
  1156. cPronIndex = 0;
  1157. ListPos = ItemList.GetHeadPosition();
  1158. //--- Iterate through ItemList
  1159. while( ListPos && SUCCEEDED( hr ) )
  1160. {
  1161. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1162. //--- Iterate over Words
  1163. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1164. {
  1165. //--- Get pronunciations and parts of speech for spoken items only
  1166. if ( Item.Words[i].pWordText &&
  1167. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1168. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1169. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1170. {
  1171. SPDBG_ASSERT( cPronIndex < cNumOfProns );
  1172. ULONG cItemLen = Item.Words[i].ulWordLen;
  1173. //--- Clip at max text length
  1174. if( cItemLen > ( SP_MAX_WORD_LENGTH-1 ) )
  1175. {
  1176. cItemLen = SP_MAX_WORD_LENGTH - 1;
  1177. }
  1178. //--- Copy item text
  1179. memcpy( pProns[cPronIndex].orthStr,
  1180. Item.Words[i].pWordText,
  1181. cItemLen * sizeof(WCHAR) );
  1182. pProns[cPronIndex].orthStr[cItemLen] = 0;
  1183. //--- Set Part of Speech, if given in XML
  1184. if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown )
  1185. {
  1186. pProns[cPronIndex].XMLPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech;
  1187. }
  1188. //--- Do Lex Lookup, if necessary
  1189. if ( Item.Words[i].pXmlState->pPhoneIds == NULL ||
  1190. Item.Words[i].pXmlState->ePartOfSpeech == MS_Unknown )
  1191. {
  1192. //--- Special Case - Disambiguate Abbreviations
  1193. if ( Item.pItemInfo->Type == eABBREVIATION ||
  1194. Item.pItemInfo->Type == eABBREVIATION_NORMALIZE )
  1195. {
  1196. const AbbrevRecord *pAbbrevInfo =
  1197. ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation;
  1198. if ( pAbbrevInfo->iPronDisambig < 0 )
  1199. {
  1200. //--- Default case - just take the first (and only) pronunciation
  1201. pProns[cPronIndex].pronArray[PRON_A].POScount = 1;
  1202. wcscpy( pProns[cPronIndex].pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1203. pProns[cPronIndex].pronArray[PRON_A].phon_Len =
  1204. wcslen( pProns[cPronIndex].pronArray[PRON_A].phon_Str );
  1205. pProns[cPronIndex].pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1206. pProns[cPronIndex].pronArray[PRON_B].POScount = 0;
  1207. pProns[cPronIndex].pronArray[PRON_B].phon_Len = 0;
  1208. pProns[cPronIndex].hasAlt = false;
  1209. pProns[cPronIndex].altChoice = PRON_A;
  1210. pProns[cPronIndex].POSchoice = pAbbrevInfo->POS1;
  1211. //--- Abbreviation table prons are basically just vendor lex prons...
  1212. pProns[cPronIndex].pronType = eLEXTYPE_PRIVATE1;
  1213. }
  1214. else
  1215. {
  1216. hr = ( this->*g_PronDisambigTable[pAbbrevInfo->iPronDisambig] )
  1217. ( pAbbrevInfo, &pProns[cPronIndex], ItemList, ListPos );
  1218. }
  1219. pProns[cPronIndex].fUsePron = true;
  1220. }
  1221. //--- Default case
  1222. else
  1223. {
  1224. //--- Check disambiguation list
  1225. const AbbrevRecord* pAbbrevRecord =
  1226. (AbbrevRecord*) bsearch( (void*) pProns[cPronIndex].orthStr, (void*) g_AmbiguousWordTable,
  1227. sp_countof( g_AmbiguousWordTable ), sizeof( AbbrevRecord ),
  1228. CompareStringAndAbbrevRecord );
  1229. if ( pAbbrevRecord )
  1230. {
  1231. hr = ( this->*g_AmbiguousWordDisambigTable[pAbbrevRecord->iPronDisambig] )
  1232. ( pAbbrevRecord, &pProns[cPronIndex], ItemList, ListPos );
  1233. pProns[cPronIndex].fUsePron = true;
  1234. }
  1235. //--- Do Lex Lookup, if necessary
  1236. else
  1237. {
  1238. hr = Pronounce( &pProns[cPronIndex] );
  1239. }
  1240. }
  1241. }
  1242. cPronIndex++;
  1243. }
  1244. }
  1245. }
  1246. if (SUCCEEDED(hr))
  1247. {
  1248. //--- Next, disambiguate part-of-speech
  1249. DisambiguatePOS( pProns, cNumOfProns );
  1250. //--- Output debugging information
  1251. TTSDBG_LOGPOSPOSSIBILITIES( pProns, cNumOfProns, STREAM_POSPOSSIBILITIES );
  1252. //--- Finally, copy selected pronunciation to 'ItemList'
  1253. PRONUNIT *selectedUnit;
  1254. cPronIndex = 0;
  1255. ListPos = ItemList.GetHeadPosition();
  1256. while( ListPos && SUCCEEDED(hr) )
  1257. {
  1258. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1259. for ( ULONG i = 0; i < Item.ulNumWords; i++ )
  1260. {
  1261. //--- Set pronunciation and part-of-speech for spoken items only
  1262. if( Item.Words[i].pWordText &&
  1263. ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
  1264. Item.Words[i].pXmlState->eAction == SPVA_SpellOut ||
  1265. Item.Words[i].pXmlState->eAction == SPVA_Pronounce ) )
  1266. {
  1267. SPDBG_ASSERT( cPronIndex < cNumOfProns );
  1268. //--- Use XML specified pronunciation, if given.
  1269. if ( Item.Words[i].pXmlState->pPhoneIds )
  1270. {
  1271. Item.Words[i].pWordPron = Item.Words[i].pXmlState->pPhoneIds;
  1272. }
  1273. else
  1274. {
  1275. selectedUnit = &pProns[cPronIndex].pronArray[pProns[cPronIndex].altChoice];
  1276. Item.Words[i].pWordPron =
  1277. (SPPHONEID*) MemoryManager.GetMemory( (selectedUnit->phon_Len + 1) *
  1278. sizeof(SPPHONEID), &hr );
  1279. if ( SUCCEEDED( hr ) )
  1280. {
  1281. wcscpy( Item.Words[i].pWordPron, selectedUnit->phon_Str );
  1282. }
  1283. }
  1284. //--- Use XML specified part-of-speech, if given. This will override the case
  1285. //--- where the POS didn't exist as an option and the POS Tagger did its thing
  1286. //--- to find a pronunciation.
  1287. if ( Item.Words[i].pXmlState->ePartOfSpeech != MS_Unknown )
  1288. {
  1289. Item.Words[i].eWordPartOfSpeech = (ENGPARTOFSPEECH)Item.Words[i].pXmlState->ePartOfSpeech;
  1290. }
  1291. else
  1292. {
  1293. Item.Words[i].eWordPartOfSpeech = pProns[cPronIndex].POSchoice;
  1294. }
  1295. //--- Root word
  1296. if ( pProns[cPronIndex].lemmaStr[0] )
  1297. {
  1298. Item.Words[i].ulLemmaLen = wcslen( pProns[cPronIndex].lemmaStr );
  1299. Item.Words[i].pLemma =
  1300. (WCHAR*) MemoryManager.GetMemory( Item.Words[i].ulLemmaLen * sizeof(WCHAR), &hr );
  1301. if ( SUCCEEDED( hr ) )
  1302. {
  1303. wcsncpy( (WCHAR*) Item.Words[i].pLemma, pProns[cPronIndex].lemmaStr,
  1304. Item.Words[i].ulLemmaLen );
  1305. }
  1306. }
  1307. //--- Insert pron in text, if appropriate - RAID #4746
  1308. if ( pProns[cPronIndex].fUsePron )
  1309. {
  1310. ULONG ulNumChars = wcslen( Item.Words[i].pWordPron );
  1311. Item.Words[i].pWordText =
  1312. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1313. if ( SUCCEEDED( hr ) )
  1314. {
  1315. ZeroMemory( (WCHAR*) Item.Words[i].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1316. (WCHAR) Item.Words[i].pWordText[0] = L'*';
  1317. wcscpy( ( (WCHAR*) Item.Words[i].pWordText + 1 ), Item.Words[i].pWordPron );
  1318. (WCHAR) Item.Words[i].pWordText[ ulNumChars + 1 ] = L'*';
  1319. Item.Words[i].ulWordLen = ulNumChars + 2;
  1320. }
  1321. }
  1322. cPronIndex++;
  1323. }
  1324. }
  1325. }
  1326. }
  1327. if ( SUCCEEDED( hr ) )
  1328. {
  1329. //--- Check Post POS disambiguation list
  1330. SPLISTPOS ListPos = ItemList.GetHeadPosition();
  1331. while ( ListPos && SUCCEEDED( hr ) )
  1332. {
  1333. TTSSentItem& Item = ItemList.GetNext( ListPos );
  1334. if ( Item.pItemInfo->Type == eALPHA_WORD ||
  1335. Item.pItemInfo->Type == eABBREVIATION ||
  1336. Item.pItemInfo->Type == eABBREVIATION_NORMALIZE )
  1337. {
  1338. WCHAR temp;
  1339. BOOL fPeriod = false;
  1340. if ( Item.pItemSrcText[Item.ulItemSrcLen - 1] == L'.' &&
  1341. Item.ulItemSrcLen > 1 )
  1342. {
  1343. temp = Item.pItemSrcText[Item.ulItemSrcLen - 1];
  1344. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = 0;
  1345. fPeriod = true;
  1346. }
  1347. else
  1348. {
  1349. temp = Item.pItemSrcText[Item.ulItemSrcLen];
  1350. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = 0;
  1351. }
  1352. const AbbrevRecord* pAbbrevRecord =
  1353. (AbbrevRecord*) bsearch( (void*) Item.pItemSrcText, (void*) g_PostLexLookupWordTable,
  1354. sp_countof( g_PostLexLookupWordTable ), sizeof( AbbrevRecord ),
  1355. CompareStringAndAbbrevRecord );
  1356. if ( pAbbrevRecord )
  1357. {
  1358. hr = ( this->*g_PostLexLookupDisambigTable[pAbbrevRecord->iPronDisambig] )
  1359. ( pAbbrevRecord, ItemList, ListPos, MemoryManager );
  1360. }
  1361. if ( fPeriod )
  1362. {
  1363. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen - 1 ) = temp;
  1364. }
  1365. else
  1366. {
  1367. *( (WCHAR*) Item.pItemSrcText + Item.ulItemSrcLen ) = temp;
  1368. }
  1369. }
  1370. }
  1371. }
  1372. }
  1373. }
  1374. if (pProns)
  1375. {
  1376. delete [] pProns;
  1377. }
  1378. return hr;
  1379. } /* CStdSentEnum::DetermineProns */
  1380. /***********************************************************************************************
  1381. * MeasurementDisambig *
  1382. *---------------------*
  1383. * Description:
  1384. * This overrides initial pronunciations of measurement abbreviations when they are used
  1385. * as modifiers - e.g. "a 7 ft. pole" vs. "the pole was 7 ft. long"
  1386. *
  1387. ********************************************************************* AH **********************/
  1388. HRESULT CStdSentEnum::MeasurementDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1389. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1390. {
  1391. SPDBG_FUNC( "CStdSentEnum::MeasurementDisambig" );
  1392. HRESULT hr = S_OK;
  1393. //--- Get previous two items
  1394. SPLISTPOS TempPos = ListPos;
  1395. if ( TempPos )
  1396. {
  1397. ItemList.GetPrev( TempPos );
  1398. if ( TempPos )
  1399. {
  1400. ItemList.GetPrev( TempPos );
  1401. if ( TempPos )
  1402. {
  1403. TTSSentItem TempItem = ItemList.GetPrev( TempPos );
  1404. //--- Previous must be a number
  1405. if ( TempItem.pItemInfo->Type == eNUM_CARDINAL )
  1406. {
  1407. //--- Get next item
  1408. TempPos = ListPos;
  1409. TempItem = ItemList.GetNext( TempPos );
  1410. //--- Next must be a noun or adj
  1411. if ( TempItem.eItemPartOfSpeech == MS_Noun )
  1412. {
  1413. //--- Matched a 7 ft. pole type example - go with singular
  1414. TempPos = ListPos;
  1415. ItemList.GetPrev( TempPos );
  1416. TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos );
  1417. //--- Singular will always be shorter than plural, so this should never overwrite
  1418. //--- anything...
  1419. wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1420. //--- Insert pron into word text - RAID #4746
  1421. ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron );
  1422. MeasurementItem.Words[0].pWordText =
  1423. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1424. if ( SUCCEEDED( hr ) )
  1425. {
  1426. ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1427. (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*';
  1428. wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron );
  1429. (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1430. MeasurementItem.Words[0].ulWordLen = ulNumChars + 2;
  1431. }
  1432. }
  1433. else if ( TempItem.eItemPartOfSpeech == MS_Adj &&
  1434. TempPos )
  1435. {
  1436. //--- Next must be a noun
  1437. TempItem = ItemList.GetNext( TempPos );
  1438. {
  1439. if ( TempItem.eItemPartOfSpeech == MS_Noun )
  1440. {
  1441. //--- Matched a 7 ft. pole type example - go with singular
  1442. TempPos = ListPos;
  1443. ItemList.GetPrev( TempPos );
  1444. TTSSentItem& MeasurementItem = ItemList.GetPrev( TempPos );
  1445. //--- Singular will always be shorter than plural, so this should never overwrite
  1446. //--- anything...
  1447. wcscpy( MeasurementItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1448. //--- Insert pron into word text - RAID #4746
  1449. ULONG ulNumChars = wcslen( MeasurementItem.Words[0].pWordPron );
  1450. MeasurementItem.Words[0].pWordText =
  1451. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1452. if ( SUCCEEDED( hr ) )
  1453. {
  1454. ZeroMemory( (WCHAR*) MeasurementItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1455. (WCHAR) MeasurementItem.Words[0].pWordText[0] = L'*';
  1456. wcscpy( ( (WCHAR*) MeasurementItem.Words[0].pWordText + 1 ), MeasurementItem.Words[0].pWordPron );
  1457. (WCHAR) MeasurementItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1458. MeasurementItem.Words[0].ulWordLen = ulNumChars + 2;
  1459. }
  1460. }
  1461. }
  1462. }
  1463. }
  1464. }
  1465. }
  1466. }
  1467. return hr;
  1468. } /* MeasurementDisambig */
  1469. /***********************************************************************************************
  1470. * TheDisambig *
  1471. *-------------*
  1472. * Description:
  1473. * This function disambiguates the word the - before a vowel it becomes "thee", before a
  1474. * consonant it is "thuh"...
  1475. *
  1476. ********************************************************************* AH **********************/
  1477. HRESULT CStdSentEnum::TheDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1478. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1479. {
  1480. SPDBG_FUNC( "CStdSentEnum::TheDisambig" );
  1481. HRESULT hr = S_OK;
  1482. //--- Get next item
  1483. SPLISTPOS TempPos = ListPos;
  1484. if ( TempPos )
  1485. {
  1486. TTSSentItem NextItem = ItemList.GetNext( TempPos );
  1487. if ( NextItem.Words[0].pWordPron &&
  1488. bsearch( (void*) NextItem.Words[0].pWordPron, (void*) g_Vowels, sp_countof( g_Vowels ),
  1489. sizeof( WCHAR ), CompareWCHARAndWCHAR ) )
  1490. {
  1491. //--- Matched a vowel - go with / DH IY 1 /
  1492. TempPos = ListPos;
  1493. ItemList.GetPrev( TempPos );
  1494. TTSSentItem& TheItem = ItemList.GetPrev( TempPos );
  1495. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1496. //--- anything
  1497. wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1498. TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1499. //--- Insert pron into word text - RAID #4746
  1500. ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron );
  1501. TheItem.Words[0].pWordText =
  1502. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1503. if ( SUCCEEDED( hr ) )
  1504. {
  1505. ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1506. (WCHAR) TheItem.Words[0].pWordText[0] = L'*';
  1507. wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron );
  1508. (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1509. TheItem.Words[0].ulWordLen = ulNumChars + 2;
  1510. }
  1511. }
  1512. else
  1513. {
  1514. //--- Didn't match a vowel - go with / DH AX 1 /
  1515. TempPos = ListPos;
  1516. ItemList.GetPrev( TempPos );
  1517. TTSSentItem& TheItem = ItemList.GetPrev( TempPos );
  1518. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1519. //--- anything
  1520. wcscpy( TheItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1521. TheItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1522. //--- Insert pron into word text - RAID #4746
  1523. ULONG ulNumChars = wcslen( TheItem.Words[0].pWordPron );
  1524. TheItem.Words[0].pWordText =
  1525. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1526. if ( SUCCEEDED( hr ) )
  1527. {
  1528. ZeroMemory( (WCHAR*) TheItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1529. (WCHAR) TheItem.Words[0].pWordText[0] = L'*';
  1530. wcscpy( ( (WCHAR*) TheItem.Words[0].pWordText + 1 ), TheItem.Words[0].pWordPron );
  1531. (WCHAR) TheItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1532. TheItem.Words[0].ulWordLen = ulNumChars + 2;
  1533. }
  1534. }
  1535. }
  1536. return hr;
  1537. } /* TheDisambig */
  1538. /***********************************************************************************************
  1539. * ADisambig *
  1540. *-----------*
  1541. * Description:
  1542. * This function disambiguates the word "a" - / EY 1 - Noun / vs. / AX - Det /
  1543. *
  1544. ********************************************************************* AH **********************/
  1545. HRESULT CStdSentEnum::ADisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList,
  1546. SPLISTPOS ListPos )
  1547. {
  1548. SPDBG_FUNC( "CStdSentEnum::ADisambig" );
  1549. HRESULT hr = S_OK;
  1550. BOOL fNoun = false;
  1551. //--- Get Current Item...
  1552. SPLISTPOS TempPos = ListPos;
  1553. if ( TempPos )
  1554. {
  1555. ItemList.GetPrev( TempPos );
  1556. if ( TempPos )
  1557. {
  1558. TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
  1559. //--- If "a" is part of a multi-word item, use the Noun pronunciation...
  1560. //--- If "a" is not an AlphaWord, use the Noun pronunciation...
  1561. if ( CurrentItem.ulNumWords > 1 ||
  1562. CurrentItem.pItemInfo->Type != eALPHA_WORD )
  1563. {
  1564. fNoun = true;
  1565. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1566. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1567. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1568. pPron->POSchoice = pAbbrevInfo->POS1;
  1569. }
  1570. }
  1571. }
  1572. if ( !fNoun )
  1573. {
  1574. //--- Get Next Item...
  1575. TempPos = ListPos;
  1576. if ( TempPos )
  1577. {
  1578. TTSSentItem NextItem = ItemList.GetNext( TempPos );
  1579. //--- If "a" is followed by punctuation, use the Noun pronunciation...
  1580. if ( !( NextItem.pItemInfo->Type & eWORDLIST_IS_VALID ) )
  1581. {
  1582. fNoun = true;
  1583. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1584. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1585. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS1;
  1586. pPron->POSchoice = pAbbrevInfo->POS1;
  1587. }
  1588. }
  1589. }
  1590. //--- Default - use the Determiner pronunciation (but include Noun pronunciation as well,
  1591. //--- so that POS tagger rules will work properly)...
  1592. if ( !fNoun )
  1593. {
  1594. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1595. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1596. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1597. pPron->pronArray[PRON_A].POScount = 1;
  1598. pPron->POSchoice = pAbbrevInfo->POS2;
  1599. wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 );
  1600. pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str );
  1601. pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1;
  1602. pPron->pronArray[PRON_B].POScount = 1;
  1603. pPron->hasAlt = true;
  1604. }
  1605. return hr;
  1606. } /* ADisambig */
  1607. /***********************************************************************************************
  1608. * PolishDisambig *
  1609. *----------------*
  1610. * Description:
  1611. * This function disambiguates the word "polish" - [p ow 1 l - ax sh - Noun] vs.
  1612. * [p ow 1 l - ax sh - Adj] vs. [p aa 1 l - ih sh - Verb] vs. [p aa 1 l - ih sh - Noun]
  1613. *
  1614. ********************************************************************* AH **********************/
  1615. HRESULT CStdSentEnum::PolishDisambig( const AbbrevRecord* pAbbrevInfo, PRONRECORD* pPron, CItemList& ItemList,
  1616. SPLISTPOS ListPos )
  1617. {
  1618. SPDBG_FUNC( "CStdSentEnum::PolishDisambig" );
  1619. HRESULT hr = S_OK;
  1620. BOOL fMatch = false;
  1621. //--- Get Current Item...
  1622. SPLISTPOS TempPos = ListPos;
  1623. if ( TempPos )
  1624. {
  1625. ItemList.GetPrev( TempPos );
  1626. if ( TempPos )
  1627. {
  1628. TTSSentItem CurrentItem = ItemList.GetPrev( TempPos );
  1629. //--- If "Polish" is capitalized and not sentence-initial, and not preceded immediately
  1630. //--- by an open double-quote or parenthesis, use Noun...
  1631. if ( iswupper( CurrentItem.pItemSrcText[0] ) )
  1632. {
  1633. BOOL fSentenceInitial = false;
  1634. if ( !TempPos )
  1635. {
  1636. fSentenceInitial = true;
  1637. }
  1638. else
  1639. {
  1640. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  1641. if ( PrevItem.pItemInfo->Type == eOPEN_PARENTHESIS ||
  1642. PrevItem.pItemInfo->Type == eOPEN_BRACKET ||
  1643. PrevItem.pItemInfo->Type == eOPEN_BRACE ||
  1644. PrevItem.pItemInfo->Type == eSINGLE_QUOTE ||
  1645. PrevItem.pItemInfo->Type == eDOUBLE_QUOTE )
  1646. {
  1647. fSentenceInitial = true;
  1648. }
  1649. }
  1650. if ( fSentenceInitial )
  1651. {
  1652. fMatch = true;
  1653. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1654. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1655. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1656. pPron->POSchoice = pAbbrevInfo->POS2;
  1657. }
  1658. else
  1659. {
  1660. fMatch = true;
  1661. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron1 );
  1662. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1663. pPron->pronArray[PRON_A].POScode[0] = MS_Noun;
  1664. pPron->POSchoice = MS_Noun;
  1665. }
  1666. }
  1667. }
  1668. }
  1669. //--- Default - use the Verb pronunciation (but include the others as well,
  1670. //--- so that POS tagger rules will work properly)...
  1671. if ( !fMatch )
  1672. {
  1673. //--- Verb, Noun
  1674. wcscpy( pPron->pronArray[PRON_A].phon_Str, pAbbrevInfo->pPron2 );
  1675. pPron->pronArray[PRON_A].phon_Len = wcslen( pPron->pronArray[PRON_A].phon_Str );
  1676. pPron->pronArray[PRON_A].POScode[0] = pAbbrevInfo->POS2;
  1677. pPron->pronArray[PRON_A].POScode[1] = pAbbrevInfo->POS3;
  1678. pPron->pronArray[PRON_A].POScount = 2;
  1679. //--- Adj
  1680. wcscpy( pPron->pronArray[PRON_B].phon_Str, pAbbrevInfo->pPron1 );
  1681. pPron->pronArray[PRON_B].phon_Len = wcslen( pPron->pronArray[PRON_B].phon_Str );
  1682. pPron->pronArray[PRON_B].POScode[0] = pAbbrevInfo->POS1;
  1683. pPron->pronArray[PRON_B].POScount = 1;
  1684. //--- Set initial choice to Verb...
  1685. pPron->POSchoice = pAbbrevInfo->POS2;
  1686. pPron->hasAlt = true;
  1687. }
  1688. return hr;
  1689. } /* PolishDisambig */
  1690. /***********************************************************************************************
  1691. * ReadDisambig *
  1692. *--------------*
  1693. * Description:
  1694. * This function disambiguates the word Read - past tense vs. present...
  1695. *
  1696. ********************************************************************* AH **********************/
  1697. HRESULT CStdSentEnum::ReadDisambig( const AbbrevRecord* pAbbrevInfo, CItemList& ItemList,
  1698. SPLISTPOS ListPos, CSentItemMemory& MemoryManager )
  1699. {
  1700. SPDBG_FUNC( "CStdSentEnum::ReadDisambig" );
  1701. HRESULT hr = S_OK;
  1702. BOOL fMatch = false;
  1703. //--- Get prev item
  1704. SPLISTPOS TempPos = ListPos;
  1705. if ( TempPos )
  1706. {
  1707. ItemList.GetPrev( TempPos );
  1708. if ( TempPos )
  1709. {
  1710. ItemList.GetPrev( TempPos );
  1711. if ( TempPos )
  1712. {
  1713. TTSSentItem PrevItem = ItemList.GetPrev( TempPos );
  1714. //--- Check for closest auxiliary
  1715. while ( PrevItem.Words[0].eWordPartOfSpeech != MS_VAux &&
  1716. PrevItem.Words[0].eWordPartOfSpeech != MS_Contr &&
  1717. TempPos )
  1718. {
  1719. PrevItem = ItemList.GetPrev( TempPos );
  1720. }
  1721. if ( PrevItem.Words[0].eWordPartOfSpeech == MS_VAux )
  1722. {
  1723. fMatch = true;
  1724. if ( wcsnicmp( PrevItem.Words[0].pWordText, L"have", 4 ) == 0 ||
  1725. wcsnicmp( PrevItem.Words[0].pWordText, L"has", 3 ) == 0 ||
  1726. wcsnicmp( PrevItem.Words[0].pWordText, L"had", 3 ) == 0 ||
  1727. wcsnicmp( PrevItem.Words[0].pWordText, L"am", 2 ) == 0 ||
  1728. wcsnicmp( PrevItem.Words[0].pWordText, L"ain't", 5 ) == 0 ||
  1729. wcsnicmp( PrevItem.Words[0].pWordText, L"are", 3 ) == 0 ||
  1730. wcsnicmp( PrevItem.Words[0].pWordText, L"aren't", 6 ) == 0 ||
  1731. wcsnicmp( PrevItem.Words[0].pWordText, L"be", 2 ) == 0 ||
  1732. wcsnicmp( PrevItem.Words[0].pWordText, L"is", 2 ) == 0 ||
  1733. wcsnicmp( PrevItem.Words[0].pWordText, L"was", 3 ) == 0 ||
  1734. wcsnicmp( PrevItem.Words[0].pWordText, L"were", 4 ) == 0 )
  1735. {
  1736. //--- Matched have or haven't (has or hasn't, had or hadn't) - go with "red"
  1737. TempPos = ListPos;
  1738. ItemList.GetPrev( TempPos );
  1739. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1740. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1741. //--- anything
  1742. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1743. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1744. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1745. //--- Insert pron into word text - RAID #4746
  1746. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1747. ReadItem.Words[0].pWordText =
  1748. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1749. if ( SUCCEEDED( hr ) )
  1750. {
  1751. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1752. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1753. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1754. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1755. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1756. }
  1757. }
  1758. else
  1759. {
  1760. //--- Some other auxiliary - go with "reed"
  1761. TempPos = ListPos;
  1762. ItemList.GetPrev( TempPos );
  1763. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1764. //--- The two pronunciations are exactly the same length, so this should never overwrite
  1765. //--- anything
  1766. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1767. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1768. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1769. //--- Insert pron into word text - RAID #4746
  1770. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1771. ReadItem.Words[0].pWordText =
  1772. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1773. if ( SUCCEEDED( hr ) )
  1774. {
  1775. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1776. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1777. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1778. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1779. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1780. }
  1781. }
  1782. }
  1783. //--- Check for pronoun aux contractions
  1784. else if ( PrevItem.Words[0].eWordPartOfSpeech == MS_Contr )
  1785. {
  1786. fMatch = true;
  1787. const WCHAR *pApostrophe = wcsstr( PrevItem.Words[0].pWordText, L"'" );
  1788. if ( pApostrophe &&
  1789. wcsnicmp( pApostrophe, L"'ll", 3 ) == 0 )
  1790. {
  1791. //--- Matched an 'll form - go with "reed"
  1792. TempPos = ListPos;
  1793. ItemList.GetPrev( TempPos );
  1794. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1795. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1796. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1797. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1798. //--- Insert pron into word text - RAID #4746
  1799. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1800. ReadItem.Words[0].pWordText =
  1801. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1802. if ( SUCCEEDED( hr ) )
  1803. {
  1804. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1805. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1806. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1807. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1808. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1809. }
  1810. }
  1811. else
  1812. {
  1813. //--- Some other form - go with "red"
  1814. TempPos = ListPos;
  1815. ItemList.GetPrev( TempPos );
  1816. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1817. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1818. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1819. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1820. //--- Insert pron into word text - RAID #4746
  1821. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1822. ReadItem.Words[0].pWordText =
  1823. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1824. if ( SUCCEEDED( hr ) )
  1825. {
  1826. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1827. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1828. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1829. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1830. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1831. }
  1832. }
  1833. }
  1834. //--- Check for infinitival form
  1835. else
  1836. {
  1837. TempPos = ListPos;
  1838. ItemList.GetPrev( TempPos );
  1839. ItemList.GetPrev( TempPos );
  1840. PrevItem = ItemList.GetPrev( TempPos );
  1841. if ( PrevItem.Words[0].ulWordLen == 2 &&
  1842. wcsnicmp( PrevItem.Words[0].pWordText, L"to", 2 ) == 0 )
  1843. {
  1844. fMatch = true;
  1845. //--- Matched infinitival form - go with "reed"
  1846. TempPos = ListPos;
  1847. ItemList.GetPrev( TempPos );
  1848. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1849. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1850. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1851. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1852. //--- Insert pron into word text - RAID #4746
  1853. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1854. ReadItem.Words[0].pWordText =
  1855. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1856. if ( SUCCEEDED( hr ) )
  1857. {
  1858. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1859. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1860. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1861. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1862. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1863. }
  1864. }
  1865. }
  1866. }
  1867. //--- Sentence initial - go with "reed"
  1868. else
  1869. {
  1870. fMatch = true;
  1871. TempPos = ListPos;
  1872. ItemList.GetPrev( TempPos );
  1873. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1874. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron1 );
  1875. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS1;
  1876. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS1;
  1877. //--- Insert pron into word text - RAID #4746
  1878. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1879. ReadItem.Words[0].pWordText =
  1880. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1881. if ( SUCCEEDED( hr ) )
  1882. {
  1883. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1884. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1885. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1886. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1887. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1888. }
  1889. }
  1890. }
  1891. }
  1892. if ( !fMatch )
  1893. {
  1894. TempPos = ListPos;
  1895. ItemList.GetPrev( TempPos );
  1896. TTSSentItem& ReadItem = ItemList.GetPrev( TempPos );
  1897. //--- Default - go with past tense...
  1898. wcscpy( ReadItem.Words[0].pWordPron, pAbbrevInfo->pPron2 );
  1899. ReadItem.Words[0].eWordPartOfSpeech = pAbbrevInfo->POS2;
  1900. ReadItem.eItemPartOfSpeech = pAbbrevInfo->POS2;
  1901. //--- Insert pron into word text - RAID #4746
  1902. ULONG ulNumChars = wcslen( ReadItem.Words[0].pWordPron );
  1903. ReadItem.Words[0].pWordText =
  1904. (WCHAR*) MemoryManager.GetMemory( ( ulNumChars + 3 ) * sizeof( WCHAR ), &hr );
  1905. if ( SUCCEEDED( hr ) )
  1906. {
  1907. ZeroMemory( (WCHAR*) ReadItem.Words[0].pWordText, ( ulNumChars + 3 ) * sizeof( WCHAR ) );
  1908. (WCHAR) ReadItem.Words[0].pWordText[0] = L'*';
  1909. wcscpy( ( (WCHAR*) ReadItem.Words[0].pWordText + 1 ), ReadItem.Words[0].pWordPron );
  1910. (WCHAR) ReadItem.Words[0].pWordText[ ulNumChars + 1 ] = L'*';
  1911. ReadItem.Words[0].ulWordLen = ulNumChars + 2;
  1912. }
  1913. }
  1914. return hr;
  1915. } /* ReadDisambig */