Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

628 lines
19 KiB

  1. //============================================================================
  2. //
  3. // UNICODE and ANSI conversion functions
  4. //
  5. //============================================================================
  6. #include "stdafx.h"
  7. #include "uniansi.h"
  8. #define SIZEOF sizeof
  9. #ifdef UNICODE
  10. // SHTruncateString takes a BUFFER SIZE, so subtract 1 to properly null terminate.
  11. //
  12. #define SHTruncateString(wzStr, cch) ((cch) ? ((wzStr)[cch-1]=L'\0', (cch-1)) : 0)
  13. #else
  14. LWSTDAPI_(int) SHTruncateString(CHAR *sz, int cchBufferSize);
  15. #endif // UNICODE
  16. /*
  17. * @doc INTERNAL
  18. *
  19. * @func int | SHAnsiToUnicodeNativeCP |
  20. *
  21. * Convert an ANSI string to a UNICODE string via the
  22. * specified Windows code page. If the source string is too large
  23. * for the destination buffer, then as many characters as
  24. * possible are copied.
  25. *
  26. * The resulting output string is always null-terminated.
  27. *
  28. * @parm UINT | uiCP |
  29. *
  30. * The code page in which to perform the conversion.
  31. * This must be a Windows code page.
  32. *
  33. * @parm LPCSTR | pszSrc |
  34. *
  35. * Source buffer containing ANSI string to be converted.
  36. *
  37. * @parm int | cchSrc |
  38. *
  39. * Source buffer length, including terminating null.
  40. *
  41. * @parm LPWSTR | pwszDst |
  42. *
  43. * Destination buffer to receive converted UNICODE string.
  44. *
  45. * @parm int | cwchBuf |
  46. *
  47. * Size of the destination buffer in <t WCHAR>s.
  48. *
  49. * @returns
  50. *
  51. * On success, the number of characters copied to the output
  52. * buffer is returned, including the terminating null.
  53. */
  54. int
  55. SHAnsiToUnicodeNativeCP(UINT uiCP,
  56. LPCSTR pszSrc, int cchSrc,
  57. LPWSTR pwszDst, int cwchBuf)
  58. {
  59. int cwchRc = 0; /* Assume failure */
  60. /*
  61. * Checks the caller should've made.
  62. */
  63. ASSERT(IS_VALID_STRING_PTRA(pszSrc, -1));
  64. ASSERT(cchSrc == lstrlenA(pszSrc) + 1);
  65. ASSERT(IS_VALID_WRITE_BUFFER(pwszDst, WCHAR, cwchBuf));
  66. ASSERT(pszSrc != NULL);
  67. ASSERT(uiCP != 1200 && uiCP != 65000 && uiCP != 50000 && uiCP != 65001);
  68. ASSERT(pwszDst);
  69. ASSERT(cwchBuf);
  70. cwchRc = MultiByteToWideChar(uiCP, 0, pszSrc, cchSrc, pwszDst, cwchBuf);
  71. if (cwchRc) {
  72. /*
  73. * The output buffer was big enough; no double-buffering
  74. * needed.
  75. */
  76. } else if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
  77. /*
  78. * The output buffer wasn't big enough. Need to double-buffer.
  79. */
  80. int cwchNeeded = MultiByteToWideChar(uiCP, 0, pszSrc, cchSrc,
  81. NULL, 0);
  82. ASSERT(cwchRc == 0); /* In case we fail later */
  83. if (cwchNeeded) {
  84. LPWSTR pwsz = (LPWSTR)LocalAlloc(LMEM_FIXED,
  85. cwchNeeded * SIZEOF(WCHAR));
  86. if (pwsz) {
  87. cwchRc = MultiByteToWideChar(uiCP, 0, pszSrc, cchSrc,
  88. pwsz, cwchNeeded);
  89. if (cwchRc) {
  90. wcsncpy(pwszDst, pwsz, cwchBuf);
  91. cwchRc = cwchBuf;
  92. }
  93. LocalFree(pwsz);
  94. }
  95. }
  96. } else {
  97. /* Possibly unsupported code page */
  98. ASSERT(!"Unexpected error in MultiByteToWideChar");
  99. }
  100. return cwchRc;
  101. }
  102. /*
  103. * @doc EXTERNAL
  104. *
  105. * @func int | SHAnsiToUnicodeCP |
  106. *
  107. * Convert an ANSI string to a UNICODE string via the
  108. * specified code page, which can be either a native
  109. * Windows code page or an Internet code page.
  110. * If the source string is too large
  111. * for the destination buffer, then as many characters as
  112. * possible are copied.
  113. *
  114. * The resulting output string is always null-terminated.
  115. *
  116. * @parm UINT | uiCP |
  117. *
  118. * The code page in which to perform the conversion.
  119. *
  120. * @parm LPCSTR | pszSrc |
  121. *
  122. * Source buffer containing ANSI string to be converted.
  123. *
  124. * @parm LPWSTR | pwszDst |
  125. *
  126. * Destination buffer to receive converted UNICODE string.
  127. *
  128. * @parm int | cwchBuf |
  129. *
  130. * Size of the destination buffer in <t WCHAR>s.
  131. *
  132. * @returns
  133. *
  134. * On success, the number of characters copied to the output
  135. * buffer is returned, including the terminating null.
  136. */
  137. int
  138. SHAnsiToUnicodeCP(UINT uiCP, LPCSTR pszSrc, LPWSTR pwszDst, int cwchBuf)
  139. {
  140. int cwchRc = 0; /* Assume failure */
  141. ASSERT(IS_VALID_STRING_PTRA(pszSrc, -1));
  142. ASSERT(IS_VALID_WRITE_BUFFER(pwszDst, WCHAR, cwchBuf));
  143. /*
  144. * Sanity check - NULL source string is treated as a null string.
  145. */
  146. if (pszSrc == NULL) {
  147. pszSrc = "";
  148. }
  149. /*
  150. * Sanity check - Output buffer must be non-NULL and must be of
  151. * nonzero size.
  152. */
  153. if (pwszDst && cwchBuf) {
  154. int cchSrc;
  155. pwszDst[0] = 0; /* In case of error */
  156. cchSrc = lstrlenA(pszSrc) + 1;
  157. /*
  158. * Decide what kind of code page it is.
  159. */
  160. switch (uiCP) {
  161. case 1200: // UCS-2 (Unicode)
  162. uiCP = 65001;
  163. // Fall through
  164. case 50000: // "User Defined"
  165. case 65000: // UTF-7
  166. case 65001: // UTF-8
  167. //FIXFIX
  168. //cwchRc = SHAnsiToUnicodeInetCP(uiCP, pszSrc, cchSrc, pwszDst, cwchBuf);
  169. break;
  170. default:
  171. cwchRc = SHAnsiToUnicodeNativeCP(uiCP, pszSrc, cchSrc, pwszDst, cwchBuf);
  172. break;
  173. }
  174. }
  175. return cwchRc;
  176. }
  177. // This function exists to make sure SHAnsiToAnsi and SHUnicodeToAnsi
  178. // have the same return value. Callers use SHTCharToAnsi and don't know
  179. // when it callapses to SHAnsiToAnsi.
  180. int SHAnsiToAnsi(LPCSTR pszSrc, LPSTR pszDst, int cchBuf)
  181. {
  182. strncpy(pszDst, pszSrc, cchBuf);
  183. return (lstrlenA(pszDst) + 1); // size including terminator
  184. }
  185. // This function exists to make sure SHUnicodeToUnicode and SHUnicodeToAnsi
  186. // have the same return value. Callers use SHTCharToUnicode and don't know
  187. // when it callapses to SHUnicodeToUnicode.
  188. int SHUnicodeToUnicode(LPCWSTR pwzSrc, LPWSTR pwzDst, int cchBuf)
  189. {
  190. wcsncpy(pwzDst, pwzSrc, cchBuf);
  191. return (lstrlenW(pwzDst) + 1); // size including terminator
  192. }
  193. /*
  194. * @doc EXTERNAL
  195. *
  196. * @func int | SHAnsiToUnicode |
  197. *
  198. * Convert an ANSI string to a UNICODE string via the
  199. * <c CP_ACP> code page. If the source string is too large
  200. * for the destination buffer, then as many characters as
  201. * possible are copied.
  202. *
  203. * The resulting output string is always null-terminated.
  204. *
  205. * @parm LPCSTR | pszSrc |
  206. *
  207. * Source buffer containing ANSI string to be converted.
  208. *
  209. * @parm LPWSTR | pwszDst |
  210. *
  211. * Destination buffer to receive converted UNICODE string.
  212. *
  213. * @parm int | cwchBuf |
  214. *
  215. * Size of the destination buffer in <t WCHAR>s.
  216. *
  217. * @returns
  218. *
  219. * On success, the number of characters copied to the output
  220. * buffer is returned, including the terminating null.
  221. *
  222. */
  223. int
  224. SHAnsiToUnicode(LPCSTR pszSrc, LPWSTR pwszDst, int cwchBuf)
  225. {
  226. return SHAnsiToUnicodeCP(CP_ACP, pszSrc, pwszDst, cwchBuf);
  227. }
  228. /*
  229. * @doc INTERNAL
  230. *
  231. * @func int | SHUnicodeToAnsiNativeCP |
  232. *
  233. * Convert a UNICODE string to an ANSI string via the
  234. * specified Windows code page. If the source string is too large
  235. * for the destination buffer, then as many characters as
  236. * possible are copied. Care is taken not to break a double-byte
  237. * character.
  238. *
  239. * The resulting output string is always null-terminated.
  240. *
  241. * @parm UINT | uiCP |
  242. *
  243. * The code page in which to perform the conversion.
  244. * This must be a Windows code page.
  245. *
  246. * @parm LPCWSTR | pwszSrc |
  247. *
  248. * Source buffer containing UNICODE string to be converted.
  249. *
  250. * @parm int | cwchSrc |
  251. *
  252. * Number of characters in source buffer, including terminating
  253. * null.
  254. *
  255. * @parm LPSTR | pszDst |
  256. *
  257. * Destination buffer to receive converted ANSI string.
  258. *
  259. * @parm int | cchBuf |
  260. *
  261. * Size of the destination buffer in <t CHAR>s.
  262. *
  263. * @returns
  264. *
  265. * On success, the number of characters copied to the output
  266. * buffer is returned, including the terminating null.
  267. * (For the purpose of this function, a double-byte character
  268. * counts as two characters.)
  269. */
  270. int
  271. SHUnicodeToAnsiNativeCP(UINT uiCP,
  272. LPCWSTR pwszSrc, int cwchSrc,
  273. LPSTR pszDst, int cchBuf)
  274. {
  275. int cchRc = 0; /* Assume failure */
  276. #if DBG
  277. BOOL fVerify = TRUE;
  278. BOOL fLossy;
  279. if (uiCP == CP_ACPNOVALIDATE) {
  280. // -1 means use CP_ACP, but do *not* verify
  281. // kind of a hack, but it's DEBUG and leaves 99% of callers unchanged
  282. uiCP = CP_ACP;
  283. fVerify = FALSE;
  284. }
  285. #define USUALLY_NULL (&fLossy)
  286. #else
  287. #define USUALLY_NULL NULL
  288. #endif
  289. /*
  290. * Checks the caller should've made.
  291. */
  292. ASSERT(IS_VALID_STRING_PTRW(pwszSrc, -1));
  293. ASSERT(cwchSrc == lstrlenW(pwszSrc) + 1);
  294. ASSERT(IS_VALID_WRITE_BUFFER(pszDst, CHAR, cchBuf));
  295. ASSERT(uiCP != 1200 && uiCP != 65000 && uiCP != 50000 && uiCP != 65001);
  296. ASSERT(pwszSrc);
  297. ASSERT(pszDst);
  298. ASSERT(cchBuf);
  299. cchRc = WideCharToMultiByte(uiCP, 0, pwszSrc, cwchSrc, pszDst, cchBuf,
  300. NULL, USUALLY_NULL);
  301. if (cchRc) {
  302. /*
  303. * The output buffer was big enough; no double-buffering
  304. * needed.
  305. */
  306. } else if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
  307. /*
  308. * The output buffer wasn't big enough. Need to double-buffer.
  309. */
  310. int cchNeeded = WideCharToMultiByte(uiCP, 0, pwszSrc, cwchSrc,
  311. NULL, 0, NULL, NULL);
  312. ASSERT(cchRc == 0); /* In case we fail later */
  313. if (cchNeeded) {
  314. LPSTR psz = (LPSTR)LocalAlloc(LMEM_FIXED,
  315. cchNeeded * SIZEOF(CHAR));
  316. if (psz) {
  317. cchRc = WideCharToMultiByte(uiCP, 0, pwszSrc, cwchSrc,
  318. psz, cchNeeded, NULL, USUALLY_NULL);
  319. if (cchRc) {
  320. // lstrcpyn doesn't check if it's chopping a DBCS char
  321. // so we need to use SHTruncateString.
  322. //
  323. // Add 1 because SHTruncateString doesn't count
  324. // the trailing null but we do
  325. //
  326. // Assert that we meet the preconditions for
  327. // SHTruncateString to return a valid value.
  328. //
  329. ASSERT(cchRc > cchBuf);
  330. cchRc = SHTruncateString(psz, cchBuf) + 1;
  331. lstrcpynA(pszDst, psz, cchBuf);
  332. }
  333. LocalFree(psz);
  334. }
  335. }
  336. } else {
  337. /* Possibly unsupported code page */
  338. ASSERT(!"Unexpected error in WideCharToMultiByte");
  339. }
  340. #if DBG
  341. ASSERT(!fVerify || !fLossy);
  342. #endif
  343. return cchRc;
  344. }
  345. #if 0
  346. /*
  347. * @doc INTERNAL
  348. *
  349. * @func int | SHUnicodeToAnsiInetCP |
  350. *
  351. * Convert a UNICODE string to an ANSI string via the
  352. * specified Internet code page. If the source string is too large
  353. * for the destination buffer, then as many characters as
  354. * possible are copied. Care is taken not to break a double-byte
  355. * character.
  356. *
  357. * The resulting output string is always null-terminated.
  358. *
  359. * @parm UINT | uiCP |
  360. *
  361. * The code page in which to perform the conversion.
  362. * This must be an Internet code page.
  363. *
  364. * @parm LPCWSTR | pwszSrc |
  365. *
  366. * Source buffer containing UNICODE string to be converted.
  367. *
  368. * @parm int | cwchSrc |
  369. *
  370. * Number of characters in source buffer, including terminating
  371. * null.
  372. *
  373. * @parm LPSTR | pszDst |
  374. *
  375. * Destination buffer to receive converted ANSI string.
  376. *
  377. * @parm int | cchBuf |
  378. *
  379. * Size of the destination buffer in <t CHAR>s.
  380. *
  381. * @returns
  382. *
  383. * On success, the number of characters copied to the output
  384. * buffer is returned, including the terminating null.
  385. * (For the purpose of this function, a double-byte character
  386. * counts as two characters.)
  387. */
  388. int
  389. SHUnicodeToAnsiInetCP(UINT uiCP,
  390. LPCWSTR pwszSrc, int cwchSrc,
  391. LPSTR pszDst, int cchBuf)
  392. {
  393. int cwchSrcT, cchNeeded;
  394. int cchRc = 0; /* Assume failure */
  395. HRESULT hres;
  396. /*
  397. * Checks the caller should've made.
  398. */
  399. ASSERT(IS_VALID_STRING_PTRW(pwszSrc, -1));
  400. ASSERT(cwchSrc == lstrlenW(pwszSrc) + 1);
  401. ASSERT(IS_VALID_WRITE_BUFFER(pszDst, CHAR, cchBuf));
  402. ASSERT(uiCP == 1200 || uiCP == 65000 || uiCP == 65001);
  403. ASSERT(pwszSrc);
  404. ASSERT(pszDst);
  405. ASSERT(cchBuf);
  406. cwchSrcT = cwchSrc;
  407. cchNeeded = cchBuf;
  408. hres = ConvertINetUnicodeToMultiByte(NULL, uiCP, pwszSrc,
  409. &cwchSrcT, pszDst, &cchNeeded);
  410. if (SUCCEEDED(hres)) {
  411. if (cwchSrcT >= cwchSrc) {
  412. /*
  413. * The output buffer was big enough; no double-buffering
  414. * needed.
  415. */
  416. } else {
  417. /*
  418. * The output buffer wasn't big enough. Need to double-buffer.
  419. */
  420. LPSTR psz = (LPSTR)LocalAlloc(LMEM_FIXED,
  421. cchNeeded * SIZEOF(CHAR));
  422. if (psz) {
  423. hres = ConvertINetUnicodeToMultiByte(NULL, uiCP, pwszSrc,
  424. &cwchSrc, psz, &cchNeeded);
  425. if (SUCCEEDED(hres)) {
  426. // lstrcpyn doesn't check if it's chopping a DBCS char
  427. // so we need to use SHTruncateString.
  428. //
  429. // Add 1 because SHTruncateString doesn't count
  430. // the trailing null but we do
  431. //
  432. // Assert that we meet the preconditions for
  433. // SHTruncateString to return a valid value.
  434. //
  435. ASSERT(cchNeeded > cchBuf);
  436. cchRc = SHTruncateString(psz, cchBuf) + 1;
  437. lstrcpynA(pszDst, psz, cchBuf);
  438. }
  439. LocalFree(psz);
  440. }
  441. }
  442. } else {
  443. /* Possibly unsupported code page */
  444. ASSERT(!"Unexpected error in ConvertInetUnicodeToMultiByte");
  445. }
  446. return cchRc;
  447. }
  448. #endif
  449. /*
  450. * @doc EXTERNAL
  451. *
  452. * @func int | SHUnicodeToAnsiCP |
  453. *
  454. * Convert a UNICODE string to an ANSI string via the
  455. * specified code page, which can be either a native
  456. * Windows code page or an Internet code page.
  457. * If the source string is too large
  458. * for the destination buffer, then as many characters as
  459. * possible are copied. Care is taken not to break a double-byte
  460. * character.
  461. *
  462. * The resulting output string is always null-terminated.
  463. *
  464. * @parm UINT | uiCP |
  465. *
  466. * The code page in which to perform the conversion.
  467. *
  468. * @parm LPCWSTR | pwszSrc |
  469. *
  470. * Source buffer containing UNICODE string to be converted.
  471. *
  472. * @parm LPSTR | pszDst |
  473. *
  474. * Destination buffer to receive converted ANSI string.
  475. *
  476. * @parm int | cchBuf |
  477. *
  478. * Size of the destination buffer in <t CHAR>s.
  479. *
  480. * @returns
  481. *
  482. * On success, the number of characters copied to the output
  483. * buffer is returned, including the terminating null.
  484. * (For the purpose of this function, a double-byte character
  485. * counts as two characters.)
  486. *
  487. */
  488. int
  489. SHUnicodeToAnsiCP(UINT uiCP, LPCWSTR pwszSrc, LPSTR pszDst, int cchBuf)
  490. {
  491. int cchRc = 0; /* Assume failure */
  492. #if DBG
  493. #define GET_CP(uiCP) (((uiCP) == CP_ACPNOVALIDATE) ? CP_ACP : (uiCP))
  494. #else
  495. #define GET_CP(uiCP) uiCP
  496. #endif
  497. ASSERT(IS_VALID_STRING_PTRW(pwszSrc, -1));
  498. ASSERT(IS_VALID_WRITE_BUFFER(pszDst, CHAR, cchBuf));
  499. /*
  500. * Sanity check - NULL source string is treated as a null string.
  501. */
  502. if (pwszSrc == NULL) {
  503. pwszSrc = L"";
  504. }
  505. /*
  506. * Sanity check - Output buffer must be non-NULL and must be of
  507. * nonzero size.
  508. */
  509. if (pszDst && cchBuf) {
  510. int cwchSrc;
  511. pszDst[0] = 0; /* In case of error */
  512. cwchSrc = lstrlenW(pwszSrc) + 1; /* Yes, Win9x has lstrlenW */
  513. /*
  514. * Decide what kind of code page it is.
  515. */
  516. switch (GET_CP(uiCP)) {
  517. case 1200: // UCS-2 (Unicode)
  518. uiCP = 65001;
  519. // Fall through
  520. #if 0 //FIXIFX
  521. case 50000: // "User Defined"
  522. case 65000: // UTF-7
  523. case 65001: // UTF-8
  524. cchRc = SHUnicodeToAnsiInetCP(GET_CP(uiCP), pwszSrc, cwchSrc, pszDst, cchBuf);
  525. break;
  526. #endif
  527. default:
  528. cchRc = SHUnicodeToAnsiNativeCP(uiCP, pwszSrc, cwchSrc, pszDst, cchBuf);
  529. break;
  530. }
  531. }
  532. return cchRc;
  533. }
  534. /*
  535. * @doc EXTERNAL
  536. *
  537. * @func int | SHUnicodeToAnsi |
  538. *
  539. * Convert a UNICODE string to an ANSI string via the
  540. * <c CP_ACP> code page. If the source string is too large
  541. * for the destination buffer, then as many characters as
  542. * possible are copied. Care is taken not to break a double-byte
  543. * character.
  544. *
  545. * The resulting output string is always null-terminated.
  546. *
  547. * @parm LPCWSTR | pwszSrc |
  548. *
  549. * Source buffer containing UNICODE string to be converted.
  550. *
  551. * @parm LPSTR | pszDst |
  552. *
  553. * Destination buffer to receive converted ANSI string.
  554. *
  555. * @parm int | cchBuf |
  556. *
  557. * Size of the destination buffer in <t CHAR>s.
  558. *
  559. * @returns
  560. *
  561. * On success, the number of characters copied to the output
  562. * buffer is returned, including the terminating null.
  563. * (For the purpose of this function, a double-byte character
  564. * counts as two characters.)
  565. *
  566. */
  567. int
  568. SHUnicodeToAnsi(LPCWSTR pwszSrc, LPSTR pszDst, int cchBuf)
  569. {
  570. return SHUnicodeToAnsiCP(CP_ACP, pwszSrc, pszDst, cchBuf);
  571. }