Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

314 lines
8.7 KiB

  1. #include "base.h"
  2. #include "SpanishUtils.h"
  3. CAutoClassPointer<CSpanishUtil> g_apSpanishUtil = NULL;
  4. const CSuffixTerm g_rSpanishSuffix[] =
  5. {
  6. {L"et" ,2, 2, TYPE1}, // te
  7. {L"es" ,2, 2, TYPE1}, // se
  8. {L"em" ,2, 2, TYPE1}, // me
  9. {L"son" ,3, 3, TYPE1}, // nos
  10. {L"sol" ,3, 3, TYPE1}, // los
  11. {L"sal" ,3, 3, TYPE1}, // las
  12. {L"sel" ,3, 3, TYPE1}, // les
  13. {L"ol" ,2, 2, TYPE1}, // lo
  14. {L"el" ,2, 2, TYPE1}, // le
  15. {L"al" ,2, 2, TYPE1}, // la
  16. {L"etes",4, 4, TYPE1}, // sete
  17. #ifdef DICT_GEN
  18. {L"odn\x0e1" ,4, 3, TYPE2}, // �ndo
  19. #endif
  20. {L"etodn\x0e1" ,6, 5, TYPE2}, // �ndote
  21. {L"esodn\x0e1" ,6, 5, TYPE2}, // �ndose
  22. {L"emodn\x0e1" ,6, 5, TYPE2}, // �ndome
  23. {L"olodn\x0e1" ,6, 5, TYPE2}, // �ndolo
  24. {L"elodn\x0e1" ,6, 5, TYPE2}, // �ndole
  25. {L"alodn\x0e1" ,6, 5, TYPE2}, // �ndola
  26. {L"sonodn\x0e1",7, 6, TYPE2}, // �ndonos
  27. {L"solodn\x0e1",7, 6, TYPE2}, // �ndolos
  28. {L"salodn\x0e1",7, 6, TYPE2}, // �ndolas
  29. {L"selodn\x0e1",7, 6, TYPE2}, // �ndoles
  30. {L"etne" ,4, 3, TYPE3}, //ente
  31. {L"esne" ,4, 3, TYPE3}, //en se
  32. {L"emne" ,4, 3, TYPE3}, //enme
  33. {L"sonne" ,5, 4, TYPE3}, //ennos
  34. {L"solne" ,5, 4, TYPE3}, //enlos
  35. {L"salne" ,5, 4, TYPE3}, //enlas
  36. {L"selne" ,5, 4, TYPE3}, //enles
  37. {L"olne" ,4, 3, TYPE3}, //enlo
  38. {L"elne" ,4, 3, TYPE3}, //enle
  39. {L"alne" ,4, 3, TYPE3}, //enla
  40. {L"emetne",6, 5, TYPE3}, //enteme
  41. {L"etsom" ,5, 5, TYPE4}, //moste
  42. {L"essom" ,5, 5, TYPE4}, //mosse
  43. {L"emsom" ,5, 5, TYPE4}, //mosme
  44. {L"sonsom" ,6, 6, TYPE4}, //mosnos
  45. {L"solsom" ,6, 6, TYPE4}, //moslos
  46. {L"salsom" ,6, 6, TYPE4}, //moslas
  47. {L"selsom" ,6, 6, TYPE4}, //mosles
  48. {L"olsom" ,5, 5, TYPE4}, //moslo
  49. {L"elsom" ,5, 5, TYPE4}, //mosle
  50. {L"alsom" ,5, 5, TYPE4}, //mosla
  51. {L"etessom",7, 7, TYPE4}, //mossete
  52. {L"soetda",6, 5, TYPE5}, // adteos
  53. {L"emetda",6, 5, TYPE5}, // adteme
  54. {L"etda" ,4, 3, TYPE5}, // adte
  55. {L"esda" ,4, 3, TYPE5}, // adse
  56. {L"emda" ,4, 3, TYPE5}, // adem
  57. {L"sonda" ,5, 4, TYPE5}, // adnos
  58. {L"solda" ,5, 4, TYPE5}, // adlos
  59. {L"salda" ,5, 4, TYPE5}, // adlas
  60. {L"selda" ,5, 4, TYPE5}, // adles
  61. {L"olda" ,4, 3, TYPE5}, // adlo
  62. {L"elda" ,4, 3, TYPE5}, // adle
  63. {L"alda" ,4, 3, TYPE5}, // adla
  64. {L"etr\x0e1" ,4, 3, TYPE6}, // �rte
  65. {L"esr\x0e1" ,4, 3, TYPE6}, // �rse
  66. {L"emr\x0e1" ,4, 3, TYPE6}, // �rme
  67. {L"sonr\x0e1",5, 4, TYPE6}, // �rnos
  68. {L"solr\x0e1",5, 4, TYPE6}, // �rlos
  69. {L"salr\x0e1",5, 4, TYPE6}, // �rlas
  70. {L"selr\x0e1",5, 4, TYPE6}, // �rles
  71. {L"olr\x0e1" ,4, 3, TYPE6}, // �rlo
  72. {L"elr\x0e1" ,4, 3, TYPE6}, // �rle
  73. {L"alr\x0e1" ,4, 3, TYPE6}, // �rla
  74. {L"emes" ,4, 4, TYPE7}, // seme
  75. {L"sones",5, 5, TYPE7}, // senos
  76. {L"soles",5, 5, TYPE7}, // selos
  77. {L"oles" ,4, 4, TYPE7}, // selo
  78. {L"seles",5, 5, TYPE7}, // seles
  79. {L"eles" ,4, 4, TYPE7}, // sele
  80. {L"sales",5, 5, TYPE7}, // sesal
  81. {L"ales" ,4, 4, TYPE7}, // sela
  82. {L"emem", 4, 4, TYPE16}, // meme
  83. {L"sonem",5, 5, TYPE16}, // menos
  84. {L"solem",5, 5, TYPE8}, // melos
  85. {L"olem" ,4, 4, TYPE8}, // melo
  86. {L"selem",5, 5, TYPE8}, // meles
  87. {L"elem" ,4, 4, TYPE8}, // mele
  88. {L"salem",5, 5, TYPE8}, // mesal
  89. {L"alem" ,4, 4, TYPE8}, // mela
  90. {L"emet" ,4, 4, TYPE9}, // teme
  91. {L"sonet",5, 5, TYPE9}, // tenos
  92. {L"solet",5, 5, TYPE9}, // telos
  93. {L"olet" ,4, 4, TYPE9}, // telo
  94. {L"selet",5, 5, TYPE9}, // teles
  95. {L"elet" ,4, 4, TYPE9}, // tele
  96. {L"salet",5, 5, TYPE9}, // tesal
  97. {L"alet" ,4, 4, TYPE9}, // tela
  98. {L"etsoets\x0e9",8, 4, TYPE10}, // �steoste
  99. {L"soets\x0e9" ,6, 2, TYPE10}, // �steos
  100. {L"sole",4, 0,TYPE11}, // elos
  101. {L"ole" ,3, 0,TYPE11}, // elo
  102. {L"eme" ,3, 0,TYPE11}, // eme
  103. {L"sele",4, 0,TYPE11}, // eles
  104. {L"ele" ,3, 0,TYPE11}, // ele
  105. {L"sale",4, 0,TYPE11}, // elas
  106. {L"ale" ,3, 0,TYPE11}, // ela
  107. {L"sona",4, 0,TYPE12}, // anos
  108. {L"ese",3, 0, TYPE13}, // ese
  109. {L"esa",3, 0, TYPE13}, // ase
  110. {L"sone",4, 0,TYPE14}, // enos
  111. {L"olner",5, 5, TYPE15}, // renlo
  112. {L"\0",0,0,0}
  113. };
  114. CSpanishUtil::CSpanishUtil()
  115. {
  116. WCHAR wch;
  117. for (wch = 0; wch < 256; wch++)
  118. {
  119. m_rCharConvert[wch] = towupper(wch);
  120. m_rAccentConvert[wch] = 0;
  121. m_rCharCompress[wch] = 0;
  122. }
  123. memset(m_rReverseAccentConvert, 0, sizeof(char) * 16);
  124. m_rCharConvert[0xc0] = L'A';
  125. m_rCharConvert[0xc1] = L'A';
  126. m_rCharConvert[0xc2] = L'A';
  127. m_rCharConvert[0xc3] = L'A';
  128. m_rCharConvert[0xc4] = L'A';
  129. m_rCharConvert[0xc5] = L'A';
  130. m_rCharConvert[0xc8] = L'E';
  131. m_rCharConvert[0xc9] = L'E';
  132. m_rCharConvert[0xca] = L'E';
  133. m_rCharConvert[0xcb] = L'E';
  134. m_rCharConvert[0xcc] = L'I';
  135. m_rCharConvert[0xcd] = L'I';
  136. m_rCharConvert[0xce] = L'I';
  137. m_rCharConvert[0xcf] = L'I';
  138. m_rCharConvert[0xd2] = L'O';
  139. m_rCharConvert[0xd3] = L'O';
  140. m_rCharConvert[0xd4] = L'O';
  141. m_rCharConvert[0xd5] = L'O';
  142. m_rCharConvert[0xd6] = L'O';
  143. m_rCharConvert[0xd9] = L'U';
  144. m_rCharConvert[0xda] = L'U';
  145. m_rCharConvert[0xdb] = L'U';
  146. m_rCharConvert[0xdc] = L'U';
  147. m_rCharConvert[0xe0] = L'A';
  148. m_rCharConvert[0xe1] = L'A';
  149. m_rCharConvert[0xe2] = L'A';
  150. m_rCharConvert[0xe3] = L'A';
  151. m_rCharConvert[0xe4] = L'A';
  152. m_rCharConvert[0xe5] = L'A';
  153. m_rCharConvert[0xe8] = L'E';
  154. m_rCharConvert[0xe9] = L'E';
  155. m_rCharConvert[0xea] = L'E';
  156. m_rCharConvert[0xeb] = L'E';
  157. m_rCharConvert[0xec] = L'I';
  158. m_rCharConvert[0xed] = L'I';
  159. m_rCharConvert[0xee] = L'I';
  160. m_rCharConvert[0xef] = L'I';
  161. m_rCharConvert[0xf2] = L'O';
  162. m_rCharConvert[0xf3] = L'O';
  163. m_rCharConvert[0xf4] = L'O';
  164. m_rCharConvert[0xf5] = L'O';
  165. m_rCharConvert[0xf6] = L'O';
  166. m_rCharConvert[0xf9] = L'U';
  167. m_rCharConvert[0xfa] = L'U';
  168. m_rCharConvert[0xfb] = L'U';
  169. m_rCharConvert[0xfc] = L'U';
  170. for (wch = 0; wch < 256; wch++)
  171. {
  172. if (m_rCharConvert[wch] >= L'A' && m_rCharConvert[wch] <= L'Z')
  173. {
  174. m_rCharCompress[wch] = m_rCharConvert[wch] - L'A' + 1;
  175. }
  176. }
  177. m_rCharCompress[0xD1] = 28;
  178. m_rCharCompress[0xF1] = 28;
  179. m_rAccentConvert[0xe1] = 1;
  180. m_rAccentConvert[0xf3] = 2;
  181. m_rAccentConvert[0xcd] = 3;
  182. m_rAccentConvert[0xe9] = 4;
  183. m_rAccentConvert[0xfa] = 5;
  184. m_rAccentConvert[0xfc] = 6;
  185. m_rAccentConvert[0x61] = 7;
  186. m_rAccentConvert[0x6f] = 8;
  187. m_rAccentConvert[0x69] = 9;
  188. m_rAccentConvert[0x65] = 10;
  189. m_rAccentConvert[0x75] = 11;
  190. m_rReverseAccentConvert[1] = (WCHAR)0xe1;
  191. m_rReverseAccentConvert[2] = (WCHAR)0xf3;
  192. m_rReverseAccentConvert[3] = (WCHAR)0xcd;
  193. m_rReverseAccentConvert[4] = (WCHAR)0xe9;
  194. m_rReverseAccentConvert[5] = (WCHAR)0xfa;
  195. m_rReverseAccentConvert[6] = (WCHAR)0xfc;
  196. m_rReverseAccentConvert[7] = (WCHAR)0x61;
  197. m_rReverseAccentConvert[8] = (WCHAR)0x6f;
  198. m_rReverseAccentConvert[9] = (WCHAR)0x69;
  199. m_rReverseAccentConvert[10] = (WCHAR)0x65;
  200. m_rReverseAccentConvert[11] = (WCHAR)0x75;
  201. }
  202. int CSpanishUtil::aiWcscmp(const WCHAR* p, const WCHAR* t)
  203. {
  204. while (*p && *t && (m_rCharConvert[*p] == m_rCharConvert[*t]))
  205. {
  206. p++;
  207. t++;
  208. }
  209. if ((m_rCharConvert[*p] == m_rCharConvert[*t]))
  210. {
  211. return 0;
  212. }
  213. if ((m_rCharConvert[*p] > m_rCharConvert[*t]))
  214. {
  215. return 1;
  216. }
  217. return -1;
  218. }
  219. int CSpanishUtil::aiStrcmp(const unsigned char* p, const unsigned char* t)
  220. {
  221. while (*p && *t && (m_rCharConvert[*p] == m_rCharConvert[*t]))
  222. {
  223. p++;
  224. t++;
  225. }
  226. if (m_rCharConvert[*p] == m_rCharConvert[*t])
  227. {
  228. return 0;
  229. }
  230. if (m_rCharConvert[*p] > m_rCharConvert[*t])
  231. {
  232. return 1;
  233. }
  234. return -1;
  235. }
  236. int CSpanishUtil::aiWcsncmp(const WCHAR* p, const WCHAR* t, const int iLen)
  237. {
  238. int i = 0;
  239. while ((i < iLen) && *p && *t && (m_rCharConvert[*p] == m_rCharConvert[*t]))
  240. {
  241. p++;
  242. t++;
  243. i++;
  244. }
  245. if ((i == iLen) || (m_rCharConvert[*p] == m_rCharConvert[*t]))
  246. {
  247. return 0;
  248. }
  249. if (m_rCharConvert[*p] > m_rCharConvert[*t])
  250. {
  251. return 1;
  252. }
  253. return -1;
  254. }
  255. CSpanishSuffixDict::CSpanishSuffixDict()
  256. {
  257. WCHAR* pwcsCur;
  258. int i;
  259. DictStatus status;
  260. for (i = 0, pwcsCur = g_rSpanishSuffix[i].pwcs;
  261. *pwcsCur != L'\0';
  262. i++, pwcsCur = g_rSpanishSuffix[i].pwcs)
  263. {
  264. status = m_SuffixTrie.trie_Insert(
  265. pwcsCur,
  266. TRIE_IGNORECASE,
  267. const_cast<CSuffixTerm*>(&g_rSpanishSuffix[i]),
  268. NULL);
  269. Assert (DICT_SUCCESS == status);
  270. }
  271. }