Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

568 lines
11 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. unicode.c
  5. Abstract:
  6. Simplified Unicode-Ansi conversion functions.
  7. Externally exposed routines:
  8. In-Place Conversion:
  9. KnownSizeDbcsToUnicodeN
  10. KnownSizeUnicodeToDbcsN
  11. KnownSizeWtoA
  12. KnownSizeAtoW
  13. In-Place Conversion without nul checks:
  14. DirectDbcsToUnicodeN
  15. DirectUnicodeToDbcsN
  16. DirectAtoW
  17. DirectWtoA
  18. Length/pool options:
  19. DbcsToUnicodeN
  20. UnicodeToDbcsN
  21. DbcsToUnicode
  22. UnicodeToDbcs
  23. FreeConvertedPoolStr
  24. Simplified type conversions:
  25. ConvertWtoA
  26. ConvertAtoW
  27. FreeConvertedStr
  28. TCHAR routines that can be compiled both ways:
  29. CreateDbcs
  30. CreateUnicode
  31. DestroyDbcs
  32. DestroyUnicode
  33. Author:
  34. Jim Schmidt (jimschm) 04-Aug-1997
  35. Revision History:
  36. jimschm 15-Feb-1999 Eliminated MikeCo's routines, since they are
  37. broken on FE
  38. jimschm 23-Sep-1998 Added in-place routines
  39. --*/
  40. #include "pch.h"
  41. #include <locale.h>
  42. #include <mbctype.h>
  43. extern POOLHANDLE g_TextPool;
  44. extern DWORD g_MigutilWCToMBFlags;
  45. WORD g_GlobalCodePage = CP_ACP;
  46. typedef VOID(WINAPI SETACP)(WORD CodePage);
  47. typedef SETACP * PSETACP;
  48. VOID
  49. SetGlobalCodePage (
  50. IN WORD CodePage,
  51. IN LCID Locale
  52. )
  53. {
  54. PSETACP SetACP;
  55. HANDLE Lib;
  56. g_GlobalCodePage = CodePage;
  57. if (ISNT()) {
  58. Lib = LoadLibrary (TEXT("kernel32.dll"));
  59. if (Lib) {
  60. SetACP = (PSETACP) GetProcAddress (Lib, "SetCPGlobal");
  61. if (SetACP) {
  62. SetACP (CodePage);
  63. }
  64. FreeLibrary (Lib);
  65. }
  66. }
  67. SetThreadLocale (Locale);
  68. setlocale(LC_ALL,"");
  69. _setmbcp(CodePage);
  70. }
  71. VOID
  72. GetGlobalCodePage (
  73. OUT PWORD CodePage, OPTIONAL
  74. OUT PLCID Locale OPTIONAL
  75. )
  76. {
  77. if (CodePage) {
  78. if (g_GlobalCodePage == CP_ACP) {
  79. *CodePage = (WORD)GetACP();
  80. } else {
  81. *CodePage = g_GlobalCodePage;
  82. }
  83. }
  84. if (Locale) {
  85. *Locale = GetThreadLocale();
  86. }
  87. }
  88. PCSTR
  89. RealUnicodeToDbcsN (
  90. IN POOLHANDLE Pool, OPTIONAL
  91. IN PCWSTR StrIn,
  92. IN DWORD Chars
  93. )
  94. /*++
  95. Routine Description:
  96. Converts a UNICODE string to DBCS.
  97. WARNING: Currently supports the ANSI code page only. Later we can fix this.
  98. Arguments:
  99. Pool - Specifies the pool where memory is allocated from. If not specified,
  100. g_TextPool is used instead.
  101. StrIn - Specifies the inbound UNICODE string
  102. Chars - Specifies the number of characters, excluding the nul, to
  103. convert.
  104. Return Value:
  105. A pointer to the ANSI string, or NULL if an error occurred.
  106. --*/
  107. {
  108. PSTR DbcsStr;
  109. DWORD Size;
  110. DWORD rc;
  111. if (!Pool) {
  112. Pool = g_TextPool;
  113. }
  114. if (INVALID_CHAR_COUNT == Chars) {
  115. Chars = CharCountW (StrIn);
  116. }
  117. Size = (Chars + 1) * sizeof (WCHAR);
  118. DbcsStr = (PSTR) PoolMemGetAlignedMemory (Pool, Size);
  119. if (!DbcsStr) {
  120. DEBUGMSG ((DBG_ERROR, "UnicodeToDbcsN could not allocate string"));
  121. return NULL;
  122. }
  123. rc = WideCharToMultiByte (
  124. g_GlobalCodePage,
  125. g_MigutilWCToMBFlags,
  126. StrIn,
  127. Chars, // wc input count
  128. DbcsStr,
  129. Size,
  130. NULL,
  131. NULL
  132. );
  133. // Report error returns from WideCharToMultiByte
  134. if (!rc && Chars) {
  135. PushError();
  136. PoolMemReleaseMemory (Pool, DbcsStr);
  137. PopError();
  138. DEBUGMSG ((
  139. DBG_WARNING,
  140. "UnicodeToDbcsN error caused memory to be released in pool; may cause harmless PoolMem warnings."
  141. ));
  142. return NULL;
  143. }
  144. *CharCountToPointerA (DbcsStr, Chars) = 0;
  145. return DbcsStr;
  146. }
  147. PCWSTR
  148. RealDbcsToUnicodeN (
  149. IN POOLHANDLE Pool, OPTIONAL
  150. IN PCSTR StrIn,
  151. IN DWORD Chars
  152. )
  153. /*++
  154. Routine Description:
  155. Converts a DBCS string to UNICODE.
  156. WARNING: Currently supports the ANSI code page only. Later we can fix this.
  157. Arguments:
  158. Pool - Specifies pool to allocate UNICODE string from. If not specified,
  159. g_TextPool is used.
  160. StrIn - Specifies string to be converted
  161. Chars - Specifies the number of multibyte characters, excluding the nul,
  162. to convert. If -1, all of StrIn will be converted.
  163. Return Value:
  164. A pointer to the converted UNICODE string, or NULL if an error ocurred.
  165. --*/
  166. {
  167. PWSTR UnicodeStr;
  168. DWORD UnicodeStrBufLenBytes;
  169. DWORD WcharsConverted;
  170. DWORD StrInBytesToConvert;
  171. //
  172. // Find number of multi-byte characters to convert. Punt on case where
  173. // caller asks for more chars than available.
  174. //
  175. if (INVALID_CHAR_COUNT == Chars) {
  176. Chars = CharCountA (StrIn);
  177. }
  178. //
  179. // Count bytes to convert from the input string (excludes delimiter)
  180. //
  181. StrInBytesToConvert = CharCountToPointerA(StrIn, Chars) - StrIn;
  182. //
  183. // Get output buffer size, in bytes, including delimiter
  184. //
  185. UnicodeStrBufLenBytes = (Chars + 1) * sizeof (WCHAR);
  186. if (!Pool) {
  187. Pool = g_TextPool;
  188. }
  189. //
  190. // Get buffer
  191. //
  192. UnicodeStr = (PWSTR) PoolMemGetAlignedMemory (Pool, UnicodeStrBufLenBytes);
  193. if (!UnicodeStr) {
  194. DEBUGMSG ((DBG_ERROR, "DbcsToUnicodeN could not allocate string"));
  195. return NULL;
  196. }
  197. //
  198. // Convert
  199. //
  200. WcharsConverted = MultiByteToWideChar (
  201. g_GlobalCodePage,
  202. 0, // MB_ERR_INVALID_CHARS,
  203. StrIn,
  204. StrInBytesToConvert,
  205. UnicodeStr,
  206. UnicodeStrBufLenBytes
  207. );
  208. //
  209. // Check for conversion error (>0 chars in, 0 chars out)
  210. //
  211. if (0 == WcharsConverted && 0 != Chars) {
  212. PushError();
  213. PoolMemReleaseMemory (Pool, UnicodeStr);
  214. PopError();
  215. DEBUGMSG ((
  216. DBG_WARNING,
  217. "DbcsToUnicodeN error caused memory to be released in pool; may cause harmless PoolMem warnings."
  218. ));
  219. return NULL;
  220. }
  221. //
  222. // Write delimiter on the output string
  223. //
  224. UnicodeStr[WcharsConverted] = 0;
  225. return UnicodeStr;
  226. }
  227. VOID
  228. FreeConvertedPoolStr (
  229. IN POOLHANDLE Pool, OPTIONAL
  230. IN PVOID StrIn
  231. )
  232. /*++
  233. Routine Description:
  234. Frees the memory allocated by UnicodeToDbcsN or DbcsToUnicodeN.
  235. Arguments:
  236. Pool - Specifies pool to allocate UNICODE string from. If not specified,
  237. g_TextPool is used.
  238. StrIn - Specifies string that was returned by UnicodeToDebcsN or
  239. DbcsToUnicodeN.
  240. Return Value:
  241. none
  242. --*/
  243. {
  244. if (!StrIn) {
  245. return;
  246. }
  247. if (!Pool) {
  248. Pool = g_TextPool;
  249. }
  250. PoolMemReleaseMemory (Pool, (PVOID) StrIn);
  251. }
  252. PSTR
  253. KnownSizeUnicodeToDbcsN (
  254. OUT PSTR StrOut,
  255. IN PCWSTR StrIn,
  256. IN DWORD Chars
  257. )
  258. /*++
  259. Routine Description:
  260. KnownSizeUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
  261. manages the outbound buffer.
  262. Arguments:
  263. StrOut - Receives the DBCS result.
  264. StrIn - Specifies the UNICODE string to convert.
  265. Chars - Specifies the character count of StrIn (not the byte count), or
  266. INVALID_CHAR_COUNT for the complete string.
  267. Return Value:
  268. Returns StrOut.
  269. --*/
  270. {
  271. DWORD rc;
  272. if (INVALID_CHAR_COUNT == Chars) {
  273. Chars = CharCountW (StrIn);
  274. }
  275. rc = WideCharToMultiByte (
  276. g_GlobalCodePage,
  277. g_MigutilWCToMBFlags,
  278. StrIn,
  279. Chars, // wc input count
  280. StrOut,
  281. Chars * 2,
  282. NULL,
  283. NULL
  284. );
  285. DEBUGMSG_IF ((
  286. !rc && Chars,
  287. DBG_WARNING,
  288. "KnownSizeUnicodeToDbcsN failed."
  289. ));
  290. StrOut[rc] = 0;
  291. return StrOut;
  292. }
  293. PWSTR
  294. KnownSizeDbcsToUnicodeN (
  295. OUT PWSTR StrOut,
  296. IN PCSTR StrIn,
  297. IN DWORD Chars
  298. )
  299. /*++
  300. Routine Description:
  301. KnownSizeDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
  302. manages the outbound buffer.
  303. Arguments:
  304. StrOut - Receives the UNICODE result.
  305. StrIn - Specifies the DBCS string to convert.
  306. Chars - Specifies the character count of StrIn (not the byte count), or
  307. INVALID_CHAR_COUNT for the complete string.
  308. Return Value:
  309. Returns StrOut.
  310. --*/
  311. {
  312. DWORD rc;
  313. DWORD StrInBytesToConvert;
  314. if (INVALID_CHAR_COUNT == Chars) {
  315. StrInBytesToConvert = ByteCountA (StrIn);
  316. } else {
  317. StrInBytesToConvert = CharCountToPointerA (StrIn, Chars) - StrIn;
  318. }
  319. rc = MultiByteToWideChar (
  320. g_GlobalCodePage,
  321. 0, // MB_ERR_INVALID_CHARS,
  322. StrIn,
  323. StrInBytesToConvert,
  324. StrOut,
  325. StrInBytesToConvert * 2
  326. );
  327. DEBUGMSG_IF ((
  328. !rc && Chars,
  329. DBG_WARNING,
  330. "KnownSizeDbcsToUnicodeN failed."
  331. ));
  332. StrOut[rc] = 0;
  333. return StrOut;
  334. }
  335. PSTR
  336. DirectUnicodeToDbcsN (
  337. OUT PSTR StrOut,
  338. IN PCWSTR StrIn,
  339. IN DWORD Bytes
  340. )
  341. /*++
  342. Routine Description:
  343. DirectUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
  344. manages the outbound buffer. This function does not check for nuls
  345. in StrIn when Bytes is non-zero, and it does not terminate the
  346. string.
  347. Arguments:
  348. StrOut - Receives the DBCS result.
  349. StrIn - Specifies the UNICODE string to convert.
  350. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
  351. for the complete string.
  352. Return Value:
  353. Returns StrOut.
  354. --*/
  355. {
  356. DWORD rc;
  357. if (INVALID_CHAR_COUNT == Bytes) {
  358. Bytes = ByteCountW (StrIn);
  359. }
  360. rc = WideCharToMultiByte (
  361. g_GlobalCodePage,
  362. g_MigutilWCToMBFlags,
  363. StrIn,
  364. Bytes / sizeof (WCHAR),
  365. StrOut,
  366. Bytes,
  367. NULL,
  368. NULL
  369. );
  370. DEBUGMSG_IF ((
  371. !rc && Bytes,
  372. DBG_WARNING,
  373. "DirectUnicodeToDbcsN failed."
  374. ));
  375. return StrOut;
  376. }
  377. PWSTR
  378. DirectDbcsToUnicodeN (
  379. OUT PWSTR StrOut,
  380. IN PCSTR StrIn,
  381. IN DWORD Bytes
  382. )
  383. /*++
  384. Routine Description:
  385. DirectDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
  386. manages the outbound buffer. This function does not check for nuls
  387. in StrIn when Bytes is non-zero, and it does not terminate the string.
  388. Arguments:
  389. StrOut - Receives the UNICODE result.
  390. StrIn - Specifies the DBCS string to convert.
  391. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
  392. for the complete string.
  393. Return Value:
  394. Returns StrOut.
  395. --*/
  396. {
  397. DWORD rc;
  398. if (INVALID_CHAR_COUNT == Bytes) {
  399. Bytes = ByteCountA (StrIn);
  400. }
  401. rc = MultiByteToWideChar (
  402. g_GlobalCodePage,
  403. 0, // MB_ERR_INVALID_CHARS,
  404. StrIn,
  405. Bytes,
  406. StrOut,
  407. Bytes * 2
  408. );
  409. DEBUGMSG_IF ((
  410. !rc && Bytes,
  411. DBG_WARNING,
  412. "DirectDbcsToUnicodeN failed."
  413. ));
  414. return StrOut;
  415. }