Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

579 lines
12 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. unicode.c
  5. Abstract:
  6. Simplified Unicode-Ansi conversion functions.
  7. Externally exposed routines:
  8. In-Place Conversion:
  9. KnownSizeDbcsToUnicodeN
  10. KnownSizeUnicodeToDbcsN
  11. KnownSizeWtoA
  12. KnownSizeAtoW
  13. In-Place Conversion without nul checks:
  14. DirectDbcsToUnicodeN
  15. DirectUnicodeToDbcsN
  16. DirectAtoW
  17. DirectWtoA
  18. Length/pool options:
  19. DbcsToUnicodeN
  20. UnicodeToDbcsN
  21. DbcsToUnicode
  22. UnicodeToDbcs
  23. FreeConvertedPoolStr
  24. Simplified type conversions:
  25. ConvertWtoA
  26. ConvertAtoW
  27. FreeConvertedStr
  28. TCHAR routines that can be compiled both ways:
  29. CreateDbcs
  30. CreateUnicode
  31. DestroyDbcs
  32. DestroyUnicode
  33. Author:
  34. Jim Schmidt (jimschm) 04-Aug-1997
  35. Revision History:
  36. marcw 2-Sep-1999 Moved over from Win9xUpg project.
  37. jimschm 15-Feb-1999 Eliminated MikeCo's routines, since they are
  38. broken on FE
  39. jimschm 23-Sep-1998 Added in-place routines
  40. --*/
  41. #include "pch.h"
  42. #include <locale.h>
  43. #include "utilsp.h"
  44. extern PMHANDLE g_TextPool;
  45. extern DWORD g_MigutilWCToMBFlags;
  46. WORD g_GlobalCodePage = CP_ACP;
  47. typedef VOID(WINAPI SETACP)(WORD CodePage);
  48. typedef SETACP * PSETACP;
  49. VOID
  50. SetGlobalCodePage (
  51. IN WORD CodePage,
  52. IN LCID Locale
  53. )
  54. {
  55. PSETACP SetACP;
  56. HANDLE Lib;
  57. g_GlobalCodePage = CodePage;
  58. if (ISNT()) {
  59. Lib = LoadLibrary (TEXT("kernel32.dll"));
  60. if (Lib) {
  61. SetACP = (PSETACP) GetProcAddress (Lib, "SetCPGlobal");
  62. if (SetACP) {
  63. SetACP (CodePage);
  64. }
  65. FreeLibrary (Lib);
  66. }
  67. }
  68. SetThreadLocale (Locale);
  69. setlocale(LC_ALL,"");
  70. InitLeadByteTable ();
  71. }
  72. WORD
  73. SetConversionCodePage (
  74. IN WORD CodePage
  75. )
  76. {
  77. WORD oldCodePage = g_GlobalCodePage;
  78. g_GlobalCodePage = CodePage;
  79. return oldCodePage;
  80. }
  81. VOID
  82. GetGlobalCodePage (
  83. OUT PWORD CodePage, OPTIONAL
  84. OUT PLCID Locale OPTIONAL
  85. )
  86. {
  87. if (CodePage) {
  88. if (g_GlobalCodePage == CP_ACP) {
  89. *CodePage = (WORD) GetACP();
  90. } else {
  91. *CodePage = g_GlobalCodePage;
  92. }
  93. }
  94. if (Locale) {
  95. *Locale = GetThreadLocale();
  96. }
  97. }
  98. PCSTR
  99. RealUnicodeToDbcsN (
  100. IN PMHANDLE Pool, OPTIONAL
  101. IN PCWSTR StrIn,
  102. IN DWORD Chars
  103. )
  104. /*++
  105. Routine Description:
  106. Converts a UNICODE string to DBCS.
  107. Arguments:
  108. Pool - Specifies the pool where memory is allocated from. If not specified,
  109. g_TextPool is used instead.
  110. StrIn - Specifies the inbound UNICODE string
  111. Chars - Specifies the number of characters, excluding the nul, to
  112. convert.
  113. Return Value:
  114. A pointer to the ANSI string, or NULL if an error occurred.
  115. --*/
  116. {
  117. PSTR DbcsStr;
  118. DWORD Size;
  119. DWORD rc;
  120. if (!Pool) {
  121. Pool = g_TextPool;
  122. }
  123. if (INVALID_CHAR_COUNT == Chars) {
  124. Chars = CharCountW (StrIn);
  125. }
  126. Size = (Chars + 1) * 3; // maximum for UTF8 encoding
  127. DbcsStr = (PSTR) PmGetAlignedMemory (Pool, Size);
  128. if (!DbcsStr) {
  129. DEBUGMSG ((DBG_ERROR, "UnicodeToDbcsN could not allocate string"));
  130. return NULL;
  131. }
  132. rc = (DWORD) WideCharToMultiByte (
  133. g_GlobalCodePage,
  134. (g_GlobalCodePage == CP_UTF8)?0:g_MigutilWCToMBFlags,
  135. StrIn,
  136. (INT) Chars, // wc input count
  137. DbcsStr,
  138. (INT) Size,
  139. NULL,
  140. NULL
  141. );
  142. // Report error returns from WideCharToMultiByte
  143. if (!rc && Chars) {
  144. PushError();
  145. PmReleaseMemory (Pool, DbcsStr);
  146. PopError();
  147. DEBUGMSG ((
  148. DBG_WARNING,
  149. "UnicodeToDbcsN error caused memory to be released in pool; may cause harmless PoolMem warnings."
  150. ));
  151. return NULL;
  152. }
  153. if (g_GlobalCodePage == CP_UTF8) {
  154. DbcsStr [rc] = 0;
  155. } else {
  156. *CharCountToPointerA (DbcsStr, Chars) = 0;
  157. }
  158. return DbcsStr;
  159. }
  160. PCWSTR
  161. RealDbcsToUnicodeN (
  162. IN PMHANDLE Pool, OPTIONAL
  163. IN PCSTR StrIn,
  164. IN DWORD Chars
  165. )
  166. /*++
  167. Routine Description:
  168. Converts a DBCS string to UNICODE.
  169. Arguments:
  170. Pool - Specifies pool to allocate UNICODE string from. If not specified,
  171. g_TextPool is used.
  172. StrIn - Specifies string to be converted
  173. Chars - Specifies the number of multibyte characters, excluding the nul,
  174. to convert. If -1, all of StrIn will be converted.
  175. Return Value:
  176. A pointer to the converted UNICODE string, or NULL if an error ocurred.
  177. --*/
  178. {
  179. PWSTR UnicodeStr;
  180. DWORD UnicodeStrBufLenBytes;
  181. DWORD WcharsConverted;
  182. DWORD StrInBytesToConvert;
  183. //
  184. // Find number of multi-byte characters to convert. Punt on case where
  185. // caller asks for more chars than available.
  186. //
  187. if (INVALID_CHAR_COUNT == Chars) {
  188. Chars = CharCountA (StrIn);
  189. }
  190. //
  191. // Count bytes to convert from the input string (excludes delimiter)
  192. //
  193. StrInBytesToConvert = (DWORD)(CharCountToPointerA(StrIn, Chars) - StrIn);
  194. //
  195. // Get output buffer size, in bytes, including delimiter
  196. //
  197. UnicodeStrBufLenBytes = (Chars + 1) * sizeof (WCHAR);
  198. if (!Pool) {
  199. Pool = g_TextPool;
  200. }
  201. //
  202. // Get buffer
  203. //
  204. UnicodeStr = (PWSTR) PmGetAlignedMemory (Pool, UnicodeStrBufLenBytes);
  205. if (!UnicodeStr) {
  206. DEBUGMSG ((DBG_ERROR, "DbcsToUnicodeN could not allocate string"));
  207. return NULL;
  208. }
  209. //
  210. // Convert
  211. //
  212. WcharsConverted = (DWORD) MultiByteToWideChar (
  213. g_GlobalCodePage,
  214. 0,
  215. StrIn,
  216. (INT) StrInBytesToConvert,
  217. UnicodeStr,
  218. (INT) UnicodeStrBufLenBytes
  219. );
  220. //
  221. // Check for conversion error (>0 chars in, 0 chars out)
  222. //
  223. if (0 == WcharsConverted && 0 != Chars) {
  224. PushError();
  225. PmReleaseMemory (Pool, UnicodeStr);
  226. PopError();
  227. DEBUGMSG ((
  228. DBG_WARNING,
  229. "DbcsToUnicodeN error caused memory to be released in pool; may cause harmless warnings."
  230. ));
  231. return NULL;
  232. }
  233. //
  234. // Write delimiter on the output string
  235. //
  236. UnicodeStr[WcharsConverted] = 0;
  237. return UnicodeStr;
  238. }
  239. VOID
  240. FreeConvertedPoolStr (
  241. IN PMHANDLE Pool, OPTIONAL
  242. IN PVOID StrIn
  243. )
  244. /*++
  245. Routine Description:
  246. Frees the memory allocated by UnicodeToDbcsN or DbcsToUnicodeN.
  247. Arguments:
  248. Pool - Specifies pool to allocate UNICODE string from. If not specified,
  249. g_TextPool is used.
  250. StrIn - Specifies string that was returned by UnicodeToDebcsN or
  251. DbcsToUnicodeN.
  252. Return Value:
  253. none
  254. --*/
  255. {
  256. if (!StrIn) {
  257. return;
  258. }
  259. if (!Pool) {
  260. Pool = g_TextPool;
  261. }
  262. PmReleaseMemory (Pool, (PVOID) StrIn);
  263. }
  264. PSTR
  265. KnownSizeUnicodeToDbcsN (
  266. OUT PSTR StrOut,
  267. IN PCWSTR StrIn,
  268. IN DWORD Chars
  269. )
  270. /*++
  271. Routine Description:
  272. KnownSizeUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
  273. manages the outbound buffer.
  274. Arguments:
  275. StrOut - Receives the DBCS result.
  276. StrIn - Specifies the UNICODE string to convert.
  277. Chars - Specifies the character count of StrIn (not the byte count), or
  278. INVALID_CHAR_COUNT for the complete string.
  279. Return Value:
  280. Returns StrOut.
  281. --*/
  282. {
  283. DWORD rc;
  284. if (INVALID_CHAR_COUNT == Chars) {
  285. Chars = CharCountW (StrIn);
  286. }
  287. rc = (DWORD) WideCharToMultiByte (
  288. g_GlobalCodePage,
  289. g_MigutilWCToMBFlags,
  290. StrIn,
  291. (INT) Chars, // wc input count
  292. StrOut,
  293. (INT) Chars * 2,
  294. NULL,
  295. NULL
  296. );
  297. DEBUGMSG_IF ((
  298. !rc && Chars,
  299. DBG_WARNING,
  300. "KnownSizeUnicodeToDbcsN failed."
  301. ));
  302. StrOut[rc] = 0;
  303. return StrOut;
  304. }
  305. PWSTR
  306. KnownSizeDbcsToUnicodeN (
  307. OUT PWSTR StrOut,
  308. IN PCSTR StrIn,
  309. IN DWORD Chars
  310. )
  311. /*++
  312. Routine Description:
  313. KnownSizeDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
  314. manages the outbound buffer.
  315. Arguments:
  316. StrOut - Receives the UNICODE result.
  317. StrIn - Specifies the DBCS string to convert.
  318. Chars - Specifies the character count of StrIn (not the byte count), or
  319. INVALID_CHAR_COUNT for the complete string.
  320. Return Value:
  321. Returns StrOut.
  322. --*/
  323. {
  324. DWORD rc;
  325. DWORD StrInBytesToConvert;
  326. if (INVALID_CHAR_COUNT == Chars) {
  327. StrInBytesToConvert = ByteCountA (StrIn);
  328. } else {
  329. StrInBytesToConvert = (DWORD)(CharCountToPointerA (StrIn, Chars) - StrIn);
  330. }
  331. rc = (DWORD) MultiByteToWideChar (
  332. g_GlobalCodePage,
  333. 0, // MB_ERR_INVALID_CHARS,
  334. StrIn,
  335. (INT) StrInBytesToConvert,
  336. StrOut,
  337. (INT) StrInBytesToConvert * 2
  338. );
  339. DEBUGMSG_IF ((
  340. !rc && Chars,
  341. DBG_WARNING,
  342. "KnownSizeDbcsToUnicodeN failed."
  343. ));
  344. StrOut[rc] = 0;
  345. return StrOut;
  346. }
  347. PSTR
  348. DirectUnicodeToDbcsN (
  349. OUT PSTR StrOut,
  350. IN PCWSTR StrIn,
  351. IN DWORD Bytes
  352. )
  353. /*++
  354. Routine Description:
  355. DirectUnicodeToDbcsN converts a UNICODE string to DBCS. The caller
  356. manages the outbound buffer. This function does not check for nuls
  357. in StrIn when Bytes is non-zero, and it does not terminate the
  358. string.
  359. Arguments:
  360. StrOut - Receives the DBCS result.
  361. StrIn - Specifies the UNICODE string to convert.
  362. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
  363. for the complete string.
  364. Return Value:
  365. Returns StrOut.
  366. --*/
  367. {
  368. DWORD rc;
  369. if (INVALID_CHAR_COUNT == Bytes) {
  370. Bytes = ByteCountW (StrIn);
  371. }
  372. rc = (DWORD) WideCharToMultiByte (
  373. g_GlobalCodePage,
  374. g_MigutilWCToMBFlags,
  375. StrIn,
  376. (INT) (Bytes / sizeof (WCHAR)),
  377. StrOut,
  378. (INT) Bytes,
  379. NULL,
  380. NULL
  381. );
  382. DEBUGMSG_IF ((
  383. !rc && Bytes,
  384. DBG_WARNING,
  385. "DirectUnicodeToDbcsN failed."
  386. ));
  387. return StrOut + rc;
  388. }
  389. PWSTR
  390. DirectDbcsToUnicodeN (
  391. OUT PWSTR StrOut,
  392. IN PCSTR StrIn,
  393. IN DWORD Bytes
  394. )
  395. /*++
  396. Routine Description:
  397. DirectDbcsToUnicodeN converts a DBCS string to UNICODE. The caller
  398. manages the outbound buffer. This function does not check for nuls
  399. in StrIn when Bytes is non-zero, and it does not terminate the string.
  400. Arguments:
  401. StrOut - Receives the UNICODE result.
  402. StrIn - Specifies the DBCS string to convert.
  403. Bytes - Specifies the byte count of StrIn, or INVALID_CHAR_COUNT
  404. for the complete string.
  405. Return Value:
  406. Returns StrOut.
  407. --*/
  408. {
  409. DWORD rc;
  410. if (INVALID_CHAR_COUNT == Bytes) {
  411. Bytes = ByteCountA (StrIn);
  412. }
  413. rc = (DWORD) MultiByteToWideChar (
  414. g_GlobalCodePage,
  415. 0, // MB_ERR_INVALID_CHARS,
  416. StrIn,
  417. (INT) Bytes,
  418. StrOut,
  419. (INT) Bytes * 2
  420. );
  421. DEBUGMSG_IF ((
  422. !rc && Bytes,
  423. DBG_WARNING,
  424. "DirectDbcsToUnicodeN failed."
  425. ));
  426. return StrOut + rc;
  427. }