Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

247 lines
9.1 KiB

  1. #include <stdlib.h>
  2. #include "common.h"
  3. #include "factoid.h"
  4. #include "volcanop.h"
  5. #define JPN_LANG 1
  6. #define CHS_LANG 2
  7. #define CHT_LANG 4
  8. #define KOR_LANG 8
  9. #define ALL_LANG (JPN_LANG | CHS_LANG | CHT_LANG | KOR_LANG)
  10. typedef struct FACTOID_DEF
  11. {
  12. DWORD dwFactoid;
  13. int iLang;
  14. ALC alc;
  15. wchar_t *wszChars;
  16. wchar_t *wszDense;
  17. } FACTOID_DEF;
  18. #define YEN L"\x00a5"
  19. #define WON L"\x20a9"
  20. #define CURRENCY_SYMBOLS L"\x0024\x00A3\x20AC"
  21. #define CHINESE_ZERO L"\x96f6"
  22. #define IDEOGRAPHIC_ZERO L"\x3007"
  23. #define KANJI_DIGITS L"\x4e00\x4e8c\x4e09\x56db\x4e94\x516d\x4e03\x516b\x4e5d\x5341"
  24. #define JPN_YEARS L"\x660e\x6cbb\x5927\x6b63\x662d\x548c\x5e73\x6210"
  25. static FACTOID_DEF g_factoidTable[] =
  26. {
  27. { FACTOID_NONE, ALL_LANG, 0, NULL },
  28. { FACTOID_EMAIL, ALL_LANG, ALC_UCALPHA | ALC_LCALPHA, L"[email protected]+=_" },
  29. { FACTOID_WEB, ALL_LANG, ALC_ASCII, NULL },
  30. { FACTOID_ONECHAR, ALL_LANG, ALC_ASCII, NULL },
  31. { FACTOID_NUMBER, ALL_LANG, ALC_NUMERIC | ALC_NUMERIC_PUNC | ALC_MATH | ALC_MONETARY , NULL },
  32. { FACTOID_DIGITCHAR, ALL_LANG, ALC_NUMERIC, NULL },
  33. { FACTOID_NUMSIMPLE, ALL_LANG, ALC_NUMERIC, L",." },
  34. { FACTOID_NUMCURRENCY, JPN_LANG, 0,
  35. #if defined(WINCE) || defined(FAKE_WINCE)
  36. /* Symbols */ L"\x0024\x00A3\x20AC\x00A5"
  37. #else
  38. /* Symbols */ L"\x0024\x00A3\x20AC\x00A5\x20A9"
  39. #endif
  40. /* cent "L\x00A2" */
  41. /* Digits */ L"\x002C\x002E\x0030\x0031\x0032\x0033\x0034\x0035\x0036\x0037\x0038\x0039\x3007\x4E00\x4E8C\x4E09\x56DB\x4E94\x516D\x4E03\x516B\x4E5D\x5341\x58F1\x5F10\x53C2\x4F0D\x62FE"
  42. /* Units */ L"\x5341\x767E\x5343\x4E07\x842C\x5104\x5146\x4EDF\x9621"
  43. /* Kanji */ L"\x5186"
  44. },
  45. { FACTOID_NUMCURRENCY, CHS_LANG, 0,
  46. /* Symbols */ L"\x0024\x00A3\x20AC\x00A5"
  47. /* won L"\x20A9" cent L"\x00A2" */
  48. /* Digits */ L"\x002C\x002E\x0030\x0031\x0032\x0033\x0034\x0035\x0036\x0037\x0038\x0039\x96F6\x58F9\x8D30\x53C1\x8086\x4F0D\x9646\x67D2\x634C\x7396"
  49. /* Units */ L"\x62FE\x4F70\x4EDF\x4E07\x4EBF"
  50. /* Kanji */ L"\x5143\x89D2\x5206"
  51. },
  52. { FACTOID_NUMCURRENCY, CHT_LANG, 0,
  53. /* Symbols */ L"\x0024\x00A3\x20AC\x00A5\x20A9"
  54. /* cent L"\x00A2" */
  55. /* Digits */ L"\x002C\x002E\x0030\x0031\x0032\x0033\x0034\x0035\x0036\x0037\x0038\x0039\x96F6\x3007\x4E00\x4E8C\x4E09\x56DB\x4E94\x516D\x4E03\x516B\x4E5D\x5341\x96F6\x58F9\x8CB3\x53C3\x8086\x4F0D\x9678\x67D2\x634C\x7396\x62FE"
  56. /* Units */ L"\x5341\x767E\x5343\x842C\x5104\x62FE\x4F70\x4EDF"
  57. /* Kanji */ L"\x89D2\x5143\x5206\x6BDB\x584A\x9322\x5713"
  58. },
  59. { FACTOID_NUMCURRENCY, KOR_LANG, 0,
  60. /* Symbols */ L"\x0024\x00A3\x20AC\x20A9\x00A5"
  61. /* cent L"\x00A2" */
  62. /* Digits */ L"\x002C\x002E\x0030\x0031\x0032\x0033\x0034\x0035\x0036\x0037\x0038\x0039\x96F6\x58F9\x8CB3\x53C3\x8086\x4F0D\x9678\x67D2\x634C\x7396\x62FE"
  63. /* Units */ L"\x5341\x767E\x5343\x842C\x5104"
  64. /* Kanji */ L"\xC6D0"
  65. },
  66. { FACTOID_ZIP, ALL_LANG, ALC_NUMERIC, L"-" },
  67. { FACTOID_NUMPERCENT, ALL_LANG, ALC_NUMERIC, L".%" },
  68. { FACTOID_NUMDATE, JPN_LANG, ALC_NUMERIC,
  69. L"().'/-\x5e74\x6708\x65e5\x66dc\x706b\x6c34\x6728\x91d1\x571f\x5143\x5eff\x4e17" IDEOGRAPHIC_ZERO KANJI_DIGITS JPN_YEARS },
  70. { FACTOID_NUMDATE, CHS_LANG, ALC_NUMERIC,
  71. L"().'/-\x5e74\x6708\x65e5\x661f\x671f" CHINESE_ZERO KANJI_DIGITS },
  72. { FACTOID_NUMDATE, CHT_LANG, ALC_NUMERIC,
  73. L"().'/-\x5e74\x6708\x570b\x65e5\x5143\x6c11\x516c\x897f\x524d\x9031\x83ef\x661f\x671f\x4e2d" CHINESE_ZERO IDEOGRAPHIC_ZERO KANJI_DIGITS },
  74. { FACTOID_NUMDATE, KOR_LANG, ALC_NUMERIC,
  75. L"'/-\x5e74\x6708\x65e5\xb144\xc6d4\xc77c\xd654\xc218\xbaa9\xae08\xd1a0\xc6d4\xc694" KANJI_DIGITS CHINESE_ZERO },
  76. { FACTOID_NUMTIME, JPN_LANG, ALC_NUMERIC,
  77. L"apmAPM.:\x5348\x524d\x5f8c\x6642\x5206\x79d2" IDEOGRAPHIC_ZERO CHINESE_ZERO KANJI_DIGITS },
  78. { FACTOID_NUMTIME, CHS_LANG, ALC_NUMERIC,
  79. L"apmAPM.:\x4e0a\x5348\x4e0b\x65f6\x70b9\x5206\x79d2\x65e9\x591c\x4e2d\x665a" CHINESE_ZERO KANJI_DIGITS },
  80. { FACTOID_NUMTIME, CHT_LANG, ALC_NUMERIC,
  81. L"apmAPM.:\x4e0a\x5348\x4e0b\x9ede\x5206\x79d2\x65e9\x591c\x4e2d\x665a\x6642" CHINESE_ZERO KANJI_DIGITS },
  82. { FACTOID_NUMTIME, KOR_LANG, ALC_NUMERIC,
  83. L":apmAPM.\xc624\xc804\xd6c4\xc2dc\xbd84" KANJI_DIGITS CHINESE_ZERO },
  84. { FACTOID_NUMPHONE, JPN_LANG, ALC_NUMERIC | ALC_UCALPHA, L"#()-+.\x30fb\x5185\x7dda" },
  85. { FACTOID_NUMPHONE, CHS_LANG, ALC_NUMERIC | ALC_UCALPHA, L"()/-+.\x5185\x8f6c" CHINESE_ZERO KANJI_DIGITS },
  86. { FACTOID_NUMPHONE, CHT_LANG, ALC_NUMERIC | ALC_UCALPHA, L"()-+.EeXxt\x5206\x6a5f\x8f49" CHINESE_ZERO KANJI_DIGITS },
  87. { FACTOID_NUMPHONE, KOR_LANG, ALC_NUMERIC | ALC_UCALPHA, L"()-+.\xad50\xd658:" },
  88. { FACTOID_FILENAME, ALL_LANG, 0xFFFFFFFF, NULL },
  89. { FACTOID_UPPERCHAR, ALL_LANG, ALC_UCALPHA, NULL },
  90. { FACTOID_LOWERCHAR, ALL_LANG, ALC_LCALPHA, NULL },
  91. { FACTOID_PUNCCHAR, ALL_LANG, ALC_PUNC | ALC_MATH | ALC_NUMERIC_PUNC | ALC_MONETARY | ALC_OTHER, NULL },
  92. { FACTOID_JPN_COMMON, JPN_LANG, ALC_JPN_COMMON | ALC_EXTENDED_SYM, NULL },
  93. { FACTOID_CHS_COMMON, CHS_LANG, ALC_CHS_COMMON | ALC_EXTENDED_SYM, NULL },
  94. { FACTOID_CHT_COMMON, CHT_LANG, ALC_CHT_COMMON | ALC_EXTENDED_SYM, NULL },
  95. { FACTOID_KOR_COMMON, KOR_LANG, ALC_KOR_COMMON | ALC_EXTENDED_SYM, NULL },
  96. { FACTOID_HIRAGANA, JPN_LANG, ALC_HIRAGANA, NULL },
  97. { FACTOID_KATAKANA, JPN_LANG, ALC_KATAKANA, NULL },
  98. { FACTOID_KANJI_COMMON, ALL_LANG, ALC_KANJI_COMMON, NULL },
  99. { FACTOID_KANJI_RARE, ALL_LANG, ALC_KANJI_RARE, NULL },
  100. { FACTOID_HANGUL_COMMON,KOR_LANG, ALC_HANGUL_COMMON, NULL },
  101. { FACTOID_HANGUL_RARE, KOR_LANG, ALC_HANGUL_RARE, NULL },
  102. { FACTOID_JAMO, KOR_LANG, ALC_JAMO, NULL },
  103. { FACTOID_BOPOMOFO, CHT_LANG, ALC_BOPOMOFO, NULL },
  104. };
  105. static int g_iFactoidTableSize = sizeof(g_factoidTable) / sizeof(FACTOID_DEF);
  106. static int g_iRecognizerLanguage;
  107. BOOL FactoidTableConfig(LOCRUN_INFO *pLocRunInfo, wchar_t *wszRecognizerLanguage)
  108. {
  109. int iFactoid;
  110. if (wcsicmp(wszRecognizerLanguage, L"JPN") == 0)
  111. {
  112. g_iRecognizerLanguage = JPN_LANG;
  113. }
  114. else if (wcsicmp(wszRecognizerLanguage, L"CHS") == 0)
  115. {
  116. g_iRecognizerLanguage = CHS_LANG;
  117. }
  118. else if (wcsicmp(wszRecognizerLanguage, L"CHT") == 0)
  119. {
  120. g_iRecognizerLanguage = CHT_LANG;
  121. }
  122. else if (wcsicmp(wszRecognizerLanguage, L"KOR") == 0)
  123. {
  124. g_iRecognizerLanguage = KOR_LANG;
  125. }
  126. else
  127. {
  128. return FALSE;
  129. }
  130. for (iFactoid = 0; iFactoid < g_iFactoidTableSize; iFactoid++)
  131. {
  132. if ((g_factoidTable[iFactoid].iLang & g_iRecognizerLanguage) != 0 &&
  133. g_factoidTable[iFactoid].wszChars != NULL)
  134. {
  135. wchar_t *wsz = Externwcsdup(g_factoidTable[iFactoid].wszChars);
  136. if (wsz == NULL)
  137. {
  138. return FALSE;
  139. }
  140. g_factoidTable[iFactoid].wszDense = wsz;
  141. while (*wsz != 0)
  142. {
  143. wchar_t wch = LocRunUnicode2Dense(pLocRunInfo, *wsz);
  144. if (wch == LOC_TRAIN_NO_DENSE_CODE)
  145. {
  146. return FALSE;
  147. }
  148. *wsz = wch;
  149. wsz++;
  150. }
  151. }
  152. else
  153. {
  154. g_factoidTable[iFactoid].wszDense = NULL;
  155. }
  156. }
  157. return TRUE;
  158. }
  159. BOOL FactoidTableUnconfig()
  160. {
  161. int iFactoid;
  162. for (iFactoid = 0; iFactoid < g_iFactoidTableSize; iFactoid++)
  163. {
  164. ExternFree(g_factoidTable[iFactoid].wszDense);
  165. }
  166. return TRUE;
  167. }
  168. BOOL SetFactoidDefaultInternal(LATTICE *pLattice)
  169. {
  170. // Clear out the ALCs
  171. pLattice->recogSettings.alcValid = 0xFFFFFFFF;
  172. ExternFree(pLattice->recogSettings.pbAllowedChars);
  173. pLattice->recogSettings.pbAllowedChars = NULL;
  174. pLattice->recogSettings.alcPriority = 0;
  175. ExternFree(pLattice->recogSettings.pbPriorityChars);
  176. pLattice->recogSettings.pbPriorityChars = NULL;
  177. // Clear out any factoid setting to the default
  178. pLattice->fUseFactoid = TRUE;
  179. ExternFree(pLattice->pbFactoidChars);
  180. pLattice->pbFactoidChars = NULL;
  181. pLattice->alcFactoid = 0xFFFFFFFF;
  182. return TRUE;
  183. }
  184. BOOL IsSupportedFactoid(DWORD dwFactoid)
  185. {
  186. int iFactoid;
  187. for (iFactoid = 0; iFactoid < g_iFactoidTableSize; iFactoid++)
  188. {
  189. if (g_factoidTable[iFactoid].dwFactoid == dwFactoid &&
  190. (g_factoidTable[iFactoid].iLang & g_iRecognizerLanguage) != 0)
  191. {
  192. break;
  193. }
  194. }
  195. return (iFactoid < g_iFactoidTableSize);
  196. }
  197. BOOL SetFactoidInternal(LOCRUN_INFO *pLocRunInfo, LATTICE *pLattice, DWORD dwFactoid)
  198. {
  199. wchar_t *wsz;
  200. int iFactoid;
  201. for (iFactoid = 0; iFactoid < g_iFactoidTableSize; iFactoid++)
  202. {
  203. if (g_factoidTable[iFactoid].dwFactoid == dwFactoid &&
  204. (g_factoidTable[iFactoid].iLang & g_iRecognizerLanguage) != 0)
  205. {
  206. break;
  207. }
  208. }
  209. if (iFactoid == g_iFactoidTableSize)
  210. {
  211. return FALSE;
  212. }
  213. pLattice->alcFactoid |= g_factoidTable[iFactoid].alc;
  214. wsz = g_factoidTable[iFactoid].wszDense;
  215. if (wsz != NULL)
  216. {
  217. while (*wsz != 0)
  218. {
  219. SetAllowedChar(pLocRunInfo, &(pLattice->pbFactoidChars), *wsz);
  220. wsz++;
  221. }
  222. }
  223. return TRUE;
  224. }