Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

383 lines
9.7 KiB

  1. /***
  2. *xmbtowc.c - Convert multibyte char to wide char.
  3. *
  4. * Copyright (c) 1995-2001, Microsoft Corporation. All rights reserved.
  5. *
  6. *Purpose:
  7. * Convert a multibyte character into the equivalent wide character.
  8. *
  9. *Revision History:
  10. * 12-XX-95 PJP Created from mbtowc.c December 1995 by P.J. Plauger
  11. * 04-17-96 GJF Updated for current locale locking. Also, reformatted
  12. * and made several cosmetic changes.
  13. * 09-25-96 GJF Made mbrlen, mbrtowc and mbsrtowcs multithread safe.
  14. * 09-17-97 JWM Added "return MB_CUR_MAX" to "if (*pst != 0)" branch.
  15. *
  16. *******************************************************************************/
  17. #include <cruntime.h>
  18. #include <stdlib.h>
  19. #include <mtdll.h>
  20. #include <errno.h>
  21. #include <dbgint.h>
  22. #include <ctype.h>
  23. #include <limits.h> /* for INT_MAX */
  24. #include <stdio.h> /* for EOF */
  25. #include <xlocinfo.h> /* for _Cvtvec, _Mbrtowc */
  26. #ifdef _WIN32
  27. #include <internal.h>
  28. #include <locale.h>
  29. #include <setlocal.h>
  30. #endif /* _WIN32 */
  31. #ifndef _MT
  32. #define _Mbrtowc_lk _Mbrtowc
  33. #endif
  34. /***
  35. *int _Mbrtowc() - Convert multibyte char to wide character.
  36. *
  37. *Purpose:
  38. * Convert a multi-byte character into the equivalent wide character,
  39. * according to the specified LC_CTYPE category, or the current locale.
  40. * [ANSI].
  41. *
  42. * NOTE: Currently, the C libraries support the "C" locale only.
  43. * Non-C locale support now available under _INTL switch.
  44. *Entry:
  45. * wchar_t *pwc = pointer to destination wide character
  46. * const char *s = pointer to multibyte character
  47. * size_t n = maximum length of multibyte character to consider
  48. * mbstate_t *pst = pointer to state
  49. * const _Cvtvec * = pointer to locale info
  50. *
  51. *Exit:
  52. * If s = NULL, returns 0, indicating we only use state-independent
  53. * character encodings.
  54. * If s != NULL, returns: 0 (if *s = null char)
  55. * -1 (if the next n or fewer bytes not valid mbc)
  56. * number of bytes comprising converted mbc
  57. *
  58. *Exceptions:
  59. *
  60. *******************************************************************************/
  61. #ifdef _MT
  62. static int __cdecl _Mbrtowc_lk
  63. (
  64. wchar_t *pwc,
  65. const char *s,
  66. size_t n,
  67. mbstate_t *pst,
  68. const _Cvtvec *ploc
  69. );
  70. int _CRTIMP2 __cdecl _Mbrtowc(
  71. wchar_t *pwc,
  72. const char *s,
  73. size_t n,
  74. mbstate_t *pst,
  75. const _Cvtvec *ploc
  76. )
  77. {
  78. int retval;
  79. #ifdef _MT
  80. int local_lock_flag;
  81. #endif
  82. _lock_locale( local_lock_flag )
  83. retval = _Mbrtowc_lk(pwc, s, n, pst, ploc);
  84. _unlock_locale( local_lock_flag )
  85. return retval;
  86. }
  87. #endif /* _MT */
  88. #ifdef _MT
  89. static int __cdecl _Mbrtowc_lk
  90. #else /* _MT */
  91. int _CRTIMP2 __cdecl _Mbrtowc
  92. #endif /* _MT */
  93. (
  94. wchar_t *pwc,
  95. const char *s,
  96. size_t n,
  97. mbstate_t *pst,
  98. const _Cvtvec *ploc
  99. )
  100. {
  101. _ASSERTE (MB_CUR_MAX == 1 || MB_CUR_MAX == 2);
  102. if ( !s || n == 0 )
  103. /* indicate do not have state-dependent encodings,
  104. handle zero length string */
  105. return 0;
  106. if ( !*s )
  107. {
  108. /* handle NULL char */
  109. if (pwc)
  110. *pwc = 0;
  111. return 0;
  112. }
  113. #ifdef _WIN32
  114. { /* perform locale-dependent parse */
  115. LCID handle;
  116. UINT codepage;
  117. if (ploc == 0)
  118. {
  119. handle = __lc_handle[LC_CTYPE];
  120. codepage = __lc_codepage;
  121. }
  122. else
  123. {
  124. handle = ploc->_Hand;
  125. codepage = ploc->_Page;
  126. }
  127. if ( handle == _CLOCALEHANDLE )
  128. {
  129. if (pwc)
  130. *pwc = (wchar_t)(unsigned char)*s;
  131. return sizeof(char);
  132. }
  133. if (*pst != 0)
  134. { /* complete two-byte multibyte character */
  135. ((char *)pst)[1] = *s;
  136. if (MB_CUR_MAX <= 1 || (MultiByteToWideChar(codepage,
  137. MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
  138. (char *)pst, 2, pwc, (pwc) ? 1 : 0) == 0))
  139. { /* translation failed */
  140. *pst = 0;
  141. errno = EILSEQ;
  142. return -1;
  143. }
  144. *pst = 0;
  145. return MB_CUR_MAX;
  146. }
  147. else if ( isleadbyte((unsigned char)*s) )
  148. {
  149. /* multi-byte char */
  150. if (n < MB_CUR_MAX)
  151. { /* save partial multibyte character */
  152. ((char *)pst)[0] = *s;
  153. return (-2);
  154. }
  155. else if ( MB_CUR_MAX <= 1 ||
  156. (MultiByteToWideChar( codepage,
  157. MB_PRECOMPOSED |
  158. MB_ERR_INVALID_CHARS,
  159. s,
  160. MB_CUR_MAX,
  161. pwc,
  162. (pwc) ? 1 : 0) == 0) )
  163. {
  164. /* validate high byte of mbcs char */
  165. if (!*(s+1))
  166. {
  167. *pst = 0;
  168. errno = EILSEQ;
  169. return -1;
  170. }
  171. /* else translation failed with no complaint? [pjp] */
  172. }
  173. return MB_CUR_MAX;
  174. }
  175. else {
  176. /* single byte char */
  177. if ( MultiByteToWideChar( codepage,
  178. MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
  179. s,
  180. 1,
  181. pwc,
  182. (pwc) ? 1 : 0) == 0 )
  183. {
  184. errno = EILSEQ;
  185. return -1;
  186. }
  187. return sizeof(char);
  188. }
  189. }
  190. #else /* _WIN32 */
  191. /* stuck the "C" locale again */
  192. if (pwc)
  193. *pwc = (wchar_t)(unsigned char)*s;
  194. return sizeof(char);
  195. #endif /* _WIN32 */
  196. }
  197. /***
  198. *wint_t btowc(c) - translate single byte to wide char
  199. *
  200. *Purpose:
  201. *
  202. *Entry:
  203. *
  204. *Exit:
  205. *
  206. *Exceptions:
  207. *
  208. *******************************************************************************/
  209. wint_t _CRTIMP2 __cdecl btowc (
  210. int c
  211. )
  212. {
  213. if (c == EOF)
  214. return (WEOF);
  215. else
  216. { /* convert as one-byte string */
  217. char ch = (char)c;
  218. mbstate_t mbst = 0;
  219. wchar_t wc;
  220. return (_Mbrtowc(&wc, &ch, 1, &mbst, 0) < 0 ? WEOF : wc);
  221. }
  222. }
  223. /***
  224. *size_t mbrlen(s, n, pst) - determine next multibyte code, restartably
  225. *
  226. *Purpose:
  227. *
  228. *Entry:
  229. *
  230. *Exit:
  231. *
  232. *Exceptions:
  233. *
  234. *******************************************************************************/
  235. size_t _CRTIMP2 __cdecl mbrlen (
  236. const char *s,
  237. size_t n,
  238. mbstate_t *pst
  239. )
  240. {
  241. #ifdef _MT
  242. int local_lock_flag;
  243. #endif
  244. size_t retval;
  245. static mbstate_t mbst = {0};
  246. _lock_locale( local_lock_flag )
  247. retval = _Mbrtowc_lk(0, s != 0 ? s : 0, n, pst ? pst : &mbst, 0);
  248. _unlock_locale( local_lock_flag )
  249. return retval;
  250. }
  251. /***
  252. *size_t mbrtowc(pwc, s, n, pst) - translate multibyte to wchar_t, restartably
  253. *
  254. *Purpose:
  255. *
  256. *Entry:
  257. *
  258. *Exit:
  259. *
  260. *Exceptions:
  261. *
  262. *******************************************************************************/
  263. size_t _CRTIMP2 __cdecl mbrtowc (
  264. wchar_t *pwc,
  265. const char *s,
  266. size_t n,
  267. mbstate_t *pst
  268. )
  269. {
  270. #ifdef _MT
  271. int local_lock_flag;
  272. #endif
  273. size_t retval;
  274. static mbstate_t mbst = {0};
  275. _lock_locale( local_lock_flag )
  276. retval = (s != 0) ? _Mbrtowc_lk(pwc, s, n, pst ? pst : &mbst, 0)
  277. : _Mbrtowc_lk(0, "", n, pst ? pst : &mbst, 0);
  278. _unlock_locale( local_lock_flag )
  279. return retval;
  280. }
  281. /***
  282. *size_t mbsrtowcs(wcs, ps, n, pst) - translate multibyte string to wide,
  283. * restartably
  284. *
  285. *Purpose:
  286. *
  287. *Entry:
  288. *
  289. *Exit:
  290. *
  291. *Exceptions:
  292. *
  293. *******************************************************************************/
  294. size_t _CRTIMP2 __cdecl mbsrtowcs (
  295. wchar_t *wcs,
  296. const char **ps,
  297. size_t n,
  298. mbstate_t *pst
  299. )
  300. {
  301. const char *s = *ps;
  302. int i;
  303. size_t nwc = 0;
  304. #ifdef _MT
  305. int local_lock_flag;
  306. #endif
  307. static mbstate_t mbst = {0};
  308. if (pst == 0)
  309. pst = &mbst;
  310. _lock_locale( local_lock_flag )
  311. if (wcs == 0)
  312. for (; ; ++nwc, s += i)
  313. { /* translate but don't store */
  314. wchar_t wc;
  315. if ((i = _Mbrtowc_lk(&wc, s, INT_MAX, pst, 0)) < 0) {
  316. _unlock_locale( local_lock_flag )
  317. return ((size_t)-1);
  318. }
  319. else if (i == 0) {
  320. _unlock_locale( local_lock_flag )
  321. return (nwc);
  322. }
  323. }
  324. for (; 0 < n; ++nwc, s += i, ++wcs, --n)
  325. { /* translate and store */
  326. if ((i = _Mbrtowc_lk(wcs, s, INT_MAX, pst, 0)) < 0)
  327. { /* encountered invalid sequence */
  328. nwc = (size_t)-1;
  329. break;
  330. }
  331. else if (i == 0)
  332. { /* encountered terminating null */
  333. s = 0;
  334. break;
  335. }
  336. }
  337. *ps = s;
  338. _unlock_locale( local_lock_flag )
  339. return (nwc);
  340. }