Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

568 lines
12 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. unicode.c
  5. Abstract:
  6. Simplified Unicode-Ansi conversion functions.
  7. Externally exposed routines:
  8. In-Place Conversion:
  9. KnownSizeDbcsToUnicodeN
  10. KnownSizeUnicodeToDbcsN
  11. KnownSizeWtoA
  12. KnownSizeAtoW
  13. In-Place Conversion without nul checks:
  14. DirectDbcsToUnicodeN
  15. DirectUnicodeToDbcsN
  16. DirectAtoW
  17. DirectWtoA
  18. Length/pool options:
  19. DbcsToUnicodeN
  20. UnicodeToDbcsN
  21. DbcsToUnicode
  22. UnicodeToDbcs
  23. FreeConvertedPoolStr
  24. Simplified type conversions:
  25. ConvertWtoA
  26. ConvertAtoW
  27. FreeConvertedStr
  28. TCHAR routines that can be compiled both ways:
  29. CreateDbcs
  30. CreateUnicode
  31. DestroyDbcs
  32. DestroyUnicode
  33. Author:
  34. Jim Schmidt (jimschm) 04-Aug-1997
  35. Revision History:
  36. marcw 2-Sep-1999 Moved over from Win9xUpg project.
  37. jimschm 15-Feb-1999 Eliminated MikeCo's routines, since they are
  38. broken on FE
  39. jimschm 23-Sep-1998 Added in-place routines
  40. --*/
  41. #include "pch.h"
  42. #include <locale.h>
  43. extern PMHANDLE g_TextPool;
  44. extern DWORD g_MigutilWCToMBFlags;
  45. WORD g_GlobalCodePage = CP_ACP;
  46. typedef VOID(WINAPI SETACP)(WORD CodePage);
  47. typedef SETACP * PSETACP;
  48. VOID
  49. SetGlobalCodePage (
  50. IN WORD CodePage,
  51. IN LCID Locale
  52. )
  53. {
  54. PSETACP SetACP;
  55. HANDLE Lib;
  56. g_GlobalCodePage = CodePage;
  57. if (ISNT()) {
  58. Lib = LoadLibrary (TEXT("kernel32.dll"));
  59. if (Lib) {
  60. SetACP = (PSETACP) GetProcAddress (Lib, "SetCPGlobal");
  61. if (SetACP) {
  62. SetACP (CodePage);
  63. }
  64. FreeLibrary (Lib);
  65. }
  66. }
  67. SetThreadLocale (Locale);
  68. setlocale(LC_ALL,"");
  69. }
  70. VOID
  71. GetGlobalCodePage (
  72. OUT PWORD CodePage, OPTIONAL
  73. OUT PLCID Locale OPTIONAL
  74. )
  75. {
  76. if (CodePage) {
  77. if (g_GlobalCodePage == CP_ACP) {
  78. *CodePage = (WORD) GetACP();
  79. } else {
  80. *CodePage = g_GlobalCodePage;
  81. }
  82. }
  83. if (Locale) {
  84. *Locale = GetThreadLocale();
  85. }
  86. }
  87. PCSTR
  88. RealUnicodeToDbcsN (
  89. IN PMHANDLE Pool, OPTIONAL
  90. IN PCWSTR StrIn,
  91. IN DWORD Chars
  92. )
  93. /*++
  94. Routine Description:
  95. Converts a UNICODE string to DBCS.
  96. BUGBUG: Currently supports the ANSI code page only. Later we can fix this.
  97. Arguments:
  98. Pool - Specifies the pool where memory is allocated from. If not specified,
  99. g_TextPool is used instead.
  100. StrIn - Specifies the inbound UNICODE string
  101. Chars - Specifies the number of characters, excluding the nul, to
  102. convert.
  103. Return Value:
  104. A pointer to the ANSI string, or NULL if an error occurred.
  105. --*/
  106. {
  107. PSTR DbcsStr;
  108. DWORD Size;
  109. DWORD rc;
  110. if (!Pool) {
  111. Pool = g_TextPool;
  112. }
  113. if (INVALID_CHAR_COUNT == Chars) {
  114. Chars = CharCountW (StrIn);
  115. }
  116. Size = (Chars + 1) * sizeof (WCHAR);
  117. DbcsStr = (PSTR) PmGetAlignedMemory (Pool, Size);
  118. if (!DbcsStr) {
  119. DEBUGMSG ((DBG_ERROR, "UnicodeToDbcsN could not allocate string"));
  120. return NULL;
  121. }
  122. rc = (DWORD) WideCharToMultiByte (
  123. g_GlobalCodePage,
  124. g_MigutilWCToMBFlags,
  125. StrIn,
  126. (INT) Chars, // wc input count
  127. DbcsStr,
  128. (INT) Size,
  129. NULL,
  130. NULL
  131. );
  132. // Report error returns from WideCharToMultiByte
  133. if (!rc && Chars) {
  134. PushError();
  135. PmReleaseMemory (Pool, DbcsStr);
  136. PopError();
  137. DEBUGMSG ((
  138. DBG_WARNING,
  139. "UnicodeToDbcsN error caused memory to be released in pool; may cause harmless PoolMem warnings."
  140. ));
  141. return NULL;
  142. }
  143. *CharCountToPointerA (DbcsStr, Chars) = 0;
  144. return DbcsStr;
  145. }
  146. PCWSTR
  147. RealDbcsToUnicodeN (
  148. IN PMHANDLE Pool, OPTIONAL
  149. IN PCSTR StrIn,
  150. IN DWORD Chars
  151. )
  152. /*++
  153. Routine Description:
  154. Converts a DBCS string to UNICODE.
  155. BUGBUG: Currently supports the ANSI code page only. Later we can fix this.
  156. Arguments:
  157. Pool - Specifies pool to allocate UNICODE string from. If not specified,
  158. g_TextPool is used.
  159. StrIn - Specifies string to be converted
  160. Chars - Specifies the number of multibyte characters, excluding the nul,
  161. to convert. If -1, all of StrIn will be converted.
  162. Return Value:
  163. A pointer to the converted UNICODE string, or NULL if an error ocurred.
  164. --*/
  165. {
  166. PWSTR UnicodeStr;
  167. DWORD UnicodeStrBufLenBytes;
  168. DWORD WcharsConverted;
  169. DWORD StrInBytesToConvert;
  170. //
  171. // Find number of multi-byte characters to convert. Punt on case where
  172. // caller asks for more chars than available.
  173. //
  174. if (INVALID_CHAR_COUNT == Chars) {
  175. Chars = CharCountA (StrIn);
  176. }
  177. //
  178. // Count bytes to convert from the input string (excludes delimiter)
  179. //
  180. StrInBytesToConvert = (DWORD)(CharCountToPointerA(StrIn, Chars) - StrIn);
  181. //
  182. // Get output buffer size, in bytes, including delimiter
  183. //
  184. UnicodeStrBufLenBytes = (Chars + 1) * sizeof (WCHAR);
  185. if (!Pool) {
  186. Pool = g_TextPool;
  187. }
  188. //
  189. // Get buffer
  190. //
  191. UnicodeStr = (PWSTR) PmGetAlignedMemory (Pool, UnicodeStrBufLenBytes);
  192. if (!UnicodeStr) {
  193. DEBUGMSG ((DBG_ERROR, "DbcsToUnicodeN could not allocate string"));
  194. return NULL;
  195. }
  196. //
  197. // Convert
  198. //
  199. WcharsConverted = (DWORD) MultiByteToWideChar (
  200. g_GlobalCodePage,
  201. 0,
  202. StrIn,
  203. (INT) StrInBytesToConvert,
  204. UnicodeStr,
  205. (INT) UnicodeStrBufLenBytes
  206. );
  207. //
  208. // Check for conversion error (>0 chars in, 0 chars out)
  209. //
  210. if (0 == WcharsConverted && 0 != Chars) {
  211. PushError();
  212. PmReleaseMemory (Pool, UnicodeStr);
  213. PopError();
  214. DEBUGMSG ((
  215. DBG_WARNING,
  216. "DbcsToUnicodeN error caused memory to be released in pool; may cause harmless warnings."
  217. ));
  218. return NULL;
  219. }
  220. //
  221. // Write delimiter on the output string
  222. //
  223. UnicodeStr[WcharsConverted] = 0;
  224. return UnicodeStr;
  225. }
  226. VOID
  227. FreeConvertedPoolStr (
  228. IN PMHANDLE Pool, OPTIONAL
  229. IN PVOID StrIn
  230. )
  231. /*++
  232. Routine Description:
  233. Frees the memory allocated by UnicodeToDbcsN or DbcsToUnicodeN.
  234. Arguments:
  235. Pool - Specifies pool to allocate UNICODE string from. If not specified,
  236. g_TextPool is used.
  237. StrIn - Specifies string that was returned by UnicodeToDebcsN or
  238. DbcsToUnicodeN.
  239. Return Value:
  240. none
  241. --*/
  242. {
  243. if (!StrIn) {
  244. return;
  245. }
  246. if (!Pool) {
  247. Pool = g_TextPool;
  248. }
  249. PmReleaseMemory (Pool, (PVOID) StrIn);
  250. }
  251. PSTR
  252. KnownSizeUnicodeToDbcsN (
  253. OUT PSTR StrOut,
  254. IN PCWSTR StrIn,
  255. IN DWORD Chars
  256. )
  257. /*++
  258. Routine Description:
  259. KnownSizeUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
  260. manages the outbound buffer.
  261. Arguments:
  262. StrOut - Receives the DBCS result.
  263. StrIn - Specifies the UNICODE string to convert.
  264. Chars - Specifies the character count of StrIn (not the byte count), or
  265. INVALID_CHAR_COUNT for the complete string.
  266. Return Value:
  267. Returns StrOut.
  268. --*/
  269. {
  270. DWORD rc;
  271. if (INVALID_CHAR_COUNT == Chars) {
  272. Chars = CharCountW (StrIn);
  273. }
  274. rc = (DWORD) WideCharToMultiByte (
  275. g_GlobalCodePage,
  276. g_MigutilWCToMBFlags,
  277. StrIn,
  278. (INT) Chars, // wc input count
  279. StrOut,
  280. (INT) Chars * 2,
  281. NULL,
  282. NULL
  283. );
  284. DEBUGMSG_IF ((
  285. !rc && Chars,
  286. DBG_WARNING,
  287. "KnownSizeUnicodeToDbcsN failed."
  288. ));
  289. StrOut[rc] = 0;
  290. return StrOut;
  291. }
  292. PWSTR
  293. KnownSizeDbcsToUnicodeN (
  294. OUT PWSTR StrOut,
  295. IN PCSTR StrIn,
  296. IN DWORD Chars
  297. )
  298. /*++
  299. Routine Description:
  300. KnownSizeDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
  301. manages the outbound buffer.
  302. Arguments:
  303. StrOut - Receives the UNICODE result.
  304. StrIn - Specifies the DBCS string to convert.
  305. Chars - Specifies the character count of StrIn (not the byte count), or
  306. INVALID_CHAR_COUNT for the complete string.
  307. Return Value:
  308. Returns StrOut.
  309. --*/
  310. {
  311. DWORD rc;
  312. DWORD StrInBytesToConvert;
  313. if (INVALID_CHAR_COUNT == Chars) {
  314. StrInBytesToConvert = ByteCountA (StrIn);
  315. } else {
  316. StrInBytesToConvert = (DWORD)(CharCountToPointerA (StrIn, Chars) - StrIn);
  317. }
  318. rc = (DWORD) MultiByteToWideChar (
  319. g_GlobalCodePage,
  320. 0, // MB_ERR_INVALID_CHARS,
  321. StrIn,
  322. (INT) StrInBytesToConvert,
  323. StrOut,
  324. (INT) StrInBytesToConvert * 2
  325. );
  326. DEBUGMSG_IF ((
  327. !rc && Chars,
  328. DBG_WARNING,
  329. "KnownSizeDbcsToUnicodeN failed."
  330. ));
  331. StrOut[rc] = 0;
  332. return StrOut;
  333. }
  334. PSTR
  335. DirectUnicodeToDbcsN (
  336. OUT PSTR StrOut,
  337. IN PCWSTR StrIn,
  338. IN DWORD Bytes
  339. )
  340. /*++
  341. Routine Description:
  342. DirectUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
  343. manages the outbound buffer. This function does not check for nuls
  344. in StrIn when Bytes is non-zero, and it does not terminate the
  345. string.
  346. Arguments:
  347. StrOut - Receives the DBCS result.
  348. StrIn - Specifies the UNICODE string to convert.
  349. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
  350. for the complete string.
  351. Return Value:
  352. Returns StrOut.
  353. --*/
  354. {
  355. DWORD rc;
  356. if (INVALID_CHAR_COUNT == Bytes) {
  357. Bytes = ByteCountW (StrIn);
  358. }
  359. rc = (DWORD) WideCharToMultiByte (
  360. g_GlobalCodePage,
  361. g_MigutilWCToMBFlags,
  362. StrIn,
  363. (INT) (Bytes / sizeof (WCHAR)),
  364. StrOut,
  365. (INT) Bytes,
  366. NULL,
  367. NULL
  368. );
  369. DEBUGMSG_IF ((
  370. !rc && Bytes,
  371. DBG_WARNING,
  372. "DirectUnicodeToDbcsN failed."
  373. ));
  374. return StrOut + rc;
  375. }
  376. PWSTR
  377. DirectDbcsToUnicodeN (
  378. OUT PWSTR StrOut,
  379. IN PCSTR StrIn,
  380. IN DWORD Bytes
  381. )
  382. /*++
  383. Routine Description:
  384. DirectDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
  385. manages the outbound buffer. This function does not check for nuls
  386. in StrIn when Bytes is non-zero, and it does not terminate the string.
  387. Arguments:
  388. StrOut - Receives the UNICODE result.
  389. StrIn - Specifies the DBCS string to convert.
  390. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
  391. for the complete string.
  392. Return Value:
  393. Returns StrOut.
  394. --*/
  395. {
  396. DWORD rc;
  397. if (INVALID_CHAR_COUNT == Bytes) {
  398. Bytes = ByteCountA (StrIn);
  399. }
  400. rc = (DWORD) MultiByteToWideChar (
  401. g_GlobalCodePage,
  402. 0, // MB_ERR_INVALID_CHARS,
  403. StrIn,
  404. (INT) Bytes,
  405. StrOut,
  406. (INT) Bytes * 2
  407. );
  408. DEBUGMSG_IF ((
  409. !rc && Bytes,
  410. DBG_WARNING,
  411. "DirectDbcsToUnicodeN failed."
  412. ));
  413. return StrOut + rc;
  414. }