//============================================================================ // // UNICODE and ANSI conversion functions // //============================================================================ #include "priv.h" #include /* * @doc INTERNAL * * @func int | SHAnsiToUnicodeNativeCP | * * Convert an ANSI string to a UNICODE string via the * specified Windows code page. If the source string is too large * for the destination buffer, then as many characters as * possible are copied. * * The resulting output string is always null-terminated. * * @parm UINT | uiCP | * * The code page in which to perform the conversion. * This must be a Windows code page. * * @parm LPCSTR | pszSrc | * * Source buffer containing ANSI string to be converted. * * @parm int | cchSrc | * * Source buffer length, including terminating null. * * @parm LPWSTR | pwszDst | * * Destination buffer to receive converted UNICODE string. * * @parm int | cwchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. */ int SHAnsiToUnicodeNativeCP(UINT uiCP, LPCSTR pszSrc, int cchSrc, LPWSTR pwszDst, int cwchBuf) { int cwchRc = 0; /* Assume failure */ /* * Checks the caller should've made. */ ASSERT(IS_VALID_STRING_PTRA(pszSrc, -1)); ASSERT(cchSrc == lstrlenA(pszSrc) + 1); ASSERT(IS_VALID_WRITE_BUFFER(pwszDst, WCHAR, cwchBuf)); ASSERT(pszSrc != NULL); ASSERT(uiCP != 1200 && uiCP != 65000 && uiCP != 50000 && uiCP != 65001); ASSERT(pwszDst); ASSERT(cwchBuf); cwchRc = MultiByteToWideChar(uiCP, 0, pszSrc, cchSrc, pwszDst, cwchBuf); if (cwchRc) { /* * The output buffer was big enough; no double-buffering * needed. */ } else if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { /* * The output buffer wasn't big enough. Need to double-buffer. */ int cwchNeeded = MultiByteToWideChar(uiCP, 0, pszSrc, cchSrc, NULL, 0); ASSERT(cwchRc == 0); /* In case we fail later */ if (cwchNeeded) { LPWSTR pwsz = (LPWSTR)LocalAlloc(LMEM_FIXED, cwchNeeded * SIZEOF(WCHAR)); if (pwsz) { cwchRc = MultiByteToWideChar(uiCP, 0, pszSrc, cchSrc, pwsz, cwchNeeded); if (cwchRc) { StrCpyNW(pwszDst, pwsz, cwchBuf); cwchRc = cwchBuf; } LocalFree(pwsz); } } } else { /* Possibly unsupported code page */ ASSERT(!"Unexpected error in MultiByteToWideChar"); } return cwchRc; } /* * @doc INTERNAL * * @func int | SHAnsiToUnicodeInetCP | * * Convert an ANSI string to a UNICODE string via the * specified Internet code page. If the source string is too large * for the destination buffer, then as many characters as * possible are copied. * * The resulting output string is always null-terminated. * * @parm UINT | uiCP | * * The code page in which to perform the conversion. * This must be an Internet code page. * * @parm LPCSTR | pszSrc | * * Source buffer containing ANSI string to be converted. * * @parm int | cchSrc | * * Source buffer length, including terminating null. * * @parm LPWSTR | pwszDst | * * Destination buffer to receive converted UNICODE string. * * @parm int | cwchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. */ int SHAnsiToUnicodeInetCP(UINT uiCP, LPCSTR pszSrc, int cchSrc, LPWSTR pwszDst, int cwchBuf) { int cchSrcT, cwchNeeded; int cwchRc = 0; /* Assume failure */ HRESULT hres; DWORD dwMode; /* * Checks the caller should've made. */ ASSERT(IS_VALID_STRING_PTRA(pszSrc, -1)); ASSERT(cchSrc == lstrlenA(pszSrc) + 1); ASSERT(IS_VALID_WRITE_BUFFER(pwszDst, WCHAR, cwchBuf)); ASSERT(pszSrc != NULL); ASSERT(uiCP == 1200 || uiCP == 65000 || uiCP == 65001); ASSERT(pwszDst); ASSERT(cwchBuf); cchSrcT = cchSrc; cwchNeeded = cwchBuf; dwMode = 0; hres = ConvertINetMultiByteToUnicode(&dwMode, uiCP, pszSrc, &cchSrcT, pwszDst, &cwchNeeded); if (SUCCEEDED(hres)) { if (cchSrcT >= cchSrc) { /* * The output buffer was big enough; no double-buffering * needed. */ cwchRc = cwchNeeded; } else { /* * The output buffer wasn't big enough. Need to double-buffer. */ LPWSTR pwsz = (LPWSTR)LocalAlloc(LMEM_FIXED, cwchNeeded * SIZEOF(WCHAR)); if (pwsz) { dwMode = 0; hres = ConvertINetMultiByteToUnicode(&dwMode, uiCP, pszSrc, &cchSrc, pwsz, &cwchNeeded); if (SUCCEEDED(hres)) { StrCpyNW(pwszDst, pwsz, cwchBuf); cwchRc = cwchBuf; } LocalFree(pwsz); } } } else { /* Possibly unsupported code page */ ASSERT(!"Unexpected error in ConvertInetMultiByteToUnicode"); } return cwchRc; } /* * @doc EXTERNAL * * @func int | SHAnsiToUnicodeCP | * * Convert an ANSI string to a UNICODE string via the * specified code page, which can be either a native * Windows code page or an Internet code page. * If the source string is too large * for the destination buffer, then as many characters as * possible are copied. * * The resulting output string is always null-terminated. * * @parm UINT | uiCP | * * The code page in which to perform the conversion. * * @parm LPCSTR | pszSrc | * * Source buffer containing ANSI string to be converted. * * @parm LPWSTR | pwszDst | * * Destination buffer to receive converted UNICODE string. * * @parm int | cwchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. */ int SHAnsiToUnicodeCP(UINT uiCP, LPCSTR pszSrc, LPWSTR pwszDst, int cwchBuf) { int cwchRc = 0; /* Assume failure */ RIPMSG(IS_VALID_STRING_PTRA(pszSrc, -1), "Caller of SHAnsiToUnicodeCP passed in a NULL pszSrc!"); ASSERT(IS_VALID_WRITE_BUFFER(pwszDst, WCHAR, cwchBuf)); /* * Sanity check - NULL source string is treated as a null string. */ if (pszSrc == NULL) { pszSrc = ""; } /* * Sanity check - Output buffer must be non-NULL and must be of * nonzero size. */ if (pwszDst && cwchBuf) { int cchSrc; pwszDst[0] = 0; /* In case of error */ cchSrc = lstrlenA(pszSrc) + 1; /* * Decide what kind of code page it is. */ switch (uiCP) { case 1200: // UCS-2 (Unicode) uiCP = 65001; // Fall through case 50000: // "User Defined" case 65000: // UTF-7 case 65001: // UTF-8 cwchRc = SHAnsiToUnicodeInetCP(uiCP, pszSrc, cchSrc, pwszDst, cwchBuf); break; default: cwchRc = SHAnsiToUnicodeNativeCP(uiCP, pszSrc, cchSrc, pwszDst, cwchBuf); break; } } return cwchRc; } // This function exists to make sure SHAnsiToAnsi and SHUnicodeToAnsi // have the same return value. Callers use SHTCharToAnsi and don't know // when it callapses to SHAnsiToAnsi. int SHAnsiToAnsi(LPCSTR pszSrc, LPSTR pszDst, int cchBuf) { int cchRc = 0; /* Assume failure */ if (cchBuf) { // APP COMPAT! WARNING! Sony PictureGear passes too-small buffers to // SHGetPathFromIDList (which uses SHAnsiToAnsi eventually), so we // must be careful to pass the actual buffer size to SHTruncateString // and not the theoretical maximum (rarely attained). LPSTR pszEnd = StrCpyNXA(pszDst, pszSrc, cchBuf); cchRc = (int)(pszEnd - pszDst) + 1; cchRc = SHTruncateString(pszDst, cchRc) + 1; } return cchRc; } // This function exists to make sure SHUnicodeToUnicode and SHUnicodeToAnsi // have the same return value. Callers use SHTCharToUnicode and don't know // when it callapses to SHUnicodeToUnicode. int SHUnicodeToUnicode(LPCWSTR pwzSrc, LPWSTR pwzDst, int cchBuf) { return (int) (StrCpyNXW(pwzDst, pwzSrc, cchBuf) - pwzDst + 1); // size including terminator } /* * @doc EXTERNAL * * @func int | SHAnsiToUnicode | * * Convert an ANSI string to a UNICODE string via the * code page. If the source string is too large * for the destination buffer, then as many characters as * possible are copied. * * The resulting output string is always null-terminated. * * @parm LPCSTR | pszSrc | * * Source buffer containing ANSI string to be converted. * * @parm LPWSTR | pwszDst | * * Destination buffer to receive converted UNICODE string. * * @parm int | cwchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. * */ int SHAnsiToUnicode(LPCSTR pszSrc, LPWSTR pwszDst, int cwchBuf) { return SHAnsiToUnicodeCP(CP_ACP, pszSrc, pwszDst, cwchBuf); } /* * @doc INTERNAL * * @func int | SHUnicodeToAnsiNativeCP | * * Convert a UNICODE string to an ANSI string via the * specified Windows code page. If the source string is too large * for the destination buffer, then as many characters as * possible are copied. Care is taken not to break a double-byte * character. * * The resulting output string is always null-terminated. * * @parm UINT | uiCP | * * The code page in which to perform the conversion. * This must be a Windows code page. * * @parm LPCWSTR | pwszSrc | * * Source buffer containing UNICODE string to be converted. * * @parm int | cwchSrc | * * Number of characters in source buffer, including terminating * null. * * @parm LPSTR | pszDst | * * Destination buffer to receive converted ANSI string. * * @parm int | cchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. * (For the purpose of this function, a double-byte character * counts as two characters.) */ int SHUnicodeToAnsiNativeCP(UINT uiCP, LPCWSTR pwszSrc, int cwchSrc, LPSTR pszDst, int cchBuf) { int cchRc = 0; /* Assume failure */ #ifdef DEBUG BOOL fVerify = TRUE; BOOL fLossy; if (uiCP == CP_ACPNOVALIDATE) { // -1 means use CP_ACP, but do *not* verify // kind of a hack, but it's DEBUG and leaves 99% of callers unchanged uiCP = CP_ACP; fVerify = FALSE; } #define USUALLY_NULL (&fLossy) #else #define USUALLY_NULL NULL #endif /* * Checks the caller should've made. */ ASSERT(IS_VALID_STRING_PTRW(pwszSrc, -1)); ASSERT(cwchSrc == lstrlenW(pwszSrc) + 1); ASSERT(IS_VALID_WRITE_BUFFER(pszDst, CHAR, cchBuf)); ASSERT(uiCP != 1200 && uiCP != 65000 && uiCP != 50000 && uiCP != 65001); ASSERT(pwszSrc); ASSERT(pszDst); ASSERT(cchBuf); cchRc = WideCharToMultiByte(uiCP, 0, pwszSrc, cwchSrc, pszDst, cchBuf, NULL, USUALLY_NULL); if (cchRc) { /* * The output buffer was big enough; no double-buffering * needed. */ } else if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { /* * The output buffer wasn't big enough. Need to double-buffer. */ int cchNeeded = WideCharToMultiByte(uiCP, 0, pwszSrc, cwchSrc, NULL, 0, NULL, NULL); ASSERT(cchRc == 0); /* In case we fail later */ if (cchNeeded) { LPSTR psz = (LPSTR)LocalAlloc(LMEM_FIXED, cchNeeded * SIZEOF(CHAR)); if (psz) { cchRc = WideCharToMultiByte(uiCP, 0, pwszSrc, cwchSrc, psz, cchNeeded, NULL, USUALLY_NULL); if (cchRc) { // lstrcpyn doesn't check if it's chopping a DBCS char // so we need to use SHTruncateString. // // Add 1 because SHTruncateString doesn't count // the trailing null but we do // // Assert that we meet the preconditions for // SHTruncateString to return a valid value. // ASSERT(cchRc > cchBuf); cchRc = SHTruncateString(psz, cchBuf) + 1; lstrcpynA(pszDst, psz, cchBuf); } LocalFree(psz); } } } else { /* Possibly unsupported code page */ ASSERT(!"Unexpected error in WideCharToMultiByte"); } #ifdef DEBUG TBOOL(!fVerify || !fLossy); #endif return cchRc; } /* * @doc INTERNAL * * @func int | SHUnicodeToAnsiInetCP | * * Convert a UNICODE string to an ANSI string via the * specified Internet code page. If the source string is too large * for the destination buffer, then as many characters as * possible are copied. Care is taken not to break a double-byte * character. * * The resulting output string is always null-terminated. * * @parm UINT | uiCP | * * The code page in which to perform the conversion. * This must be an Internet code page. * * @parm LPCWSTR | pwszSrc | * * Source buffer containing UNICODE string to be converted. * * @parm int | cwchSrc | * * Number of characters in source buffer, including terminating * null. * * @parm LPSTR | pszDst | * * Destination buffer to receive converted ANSI string. * * @parm int | cchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. * (For the purpose of this function, a double-byte character * counts as two characters.) */ int SHUnicodeToAnsiInetCP(UINT uiCP, LPCWSTR pwszSrc, int cwchSrc, LPSTR pszDst, int cchBuf) { int cwchSrcT, cchNeeded; int cchRc = 0; /* Assume failure */ DWORD dwMode; HRESULT hres; /* * Checks the caller should've made. */ ASSERT(IS_VALID_STRING_PTRW(pwszSrc, -1)); ASSERT(cwchSrc == lstrlenW(pwszSrc) + 1); ASSERT(IS_VALID_WRITE_BUFFER(pszDst, CHAR, cchBuf)); ASSERT(uiCP == 1200 || uiCP == 65000 || uiCP == 65001); ASSERT(pwszSrc); ASSERT(pszDst); ASSERT(cchBuf); /* * Note that not all encodings translate a null terminator into a null * terminator, so we have to save the NUL for last. */ cwchSrc--; /* Save the NUL for last */ cwchSrcT = cwchSrc; cchNeeded = cchBuf - 1; /* Save the NUL for last */ dwMode = 0; /* Start fresh */ hres = ConvertINetUnicodeToMultiByte(&dwMode, uiCP, pwszSrc, &cwchSrcT, pszDst, &cchNeeded); if (SUCCEEDED(hres)) { if (cwchSrcT >= cwchSrc) { /* * The output buffer was big enough; no double-buffering * needed. Translate the NUL manually. */ ASSERT(cchNeeded < cchBuf); pszDst[cchNeeded] = TEXT('\0'); cchRc = cchNeeded + 1; } else { /* * The output buffer wasn't big enough. Need to double-buffer. */ LPSTR psz = (LPSTR)LocalAlloc(LMEM_FIXED, cchNeeded * SIZEOF(CHAR)); if (psz) { dwMode = 0; /* Start fresh */ hres = ConvertINetUnicodeToMultiByte(&dwMode, uiCP, pwszSrc, &cwchSrc, psz, &cchNeeded); if (SUCCEEDED(hres)) { // lstrcpyn doesn't check if it's chopping a DBCS char // so we need to use SHTruncateString. // // Add 1 because SHTruncateString doesn't count // the trailing null but we do // // Assert that we meet the preconditions for // SHTruncateString to return a valid value. // ASSERT(cchNeeded > cchBuf); cchRc = SHTruncateString(psz, cchBuf) + 1; lstrcpynA(pszDst, psz, cchBuf); } LocalFree(psz); } } } else { /* Possibly unsupported code page */ ASSERT(!"Unexpected error in ConvertInetUnicodeToMultiByte"); } return cchRc; } /* * @doc EXTERNAL * * @func int | SHUnicodeToAnsiCP | * * Convert a UNICODE string to an ANSI string via the * specified code page, which can be either a native * Windows code page or an Internet code page. * If the source string is too large * for the destination buffer, then as many characters as * possible are copied. Care is taken not to break a double-byte * character. * * The resulting output string is always null-terminated. * * @parm UINT | uiCP | * * The code page in which to perform the conversion. * * @parm LPCWSTR | pwszSrc | * * Source buffer containing UNICODE string to be converted. * * @parm LPSTR | pszDst | * * Destination buffer to receive converted ANSI string. * * @parm int | cchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. * (For the purpose of this function, a double-byte character * counts as two characters.) * */ int SHUnicodeToAnsiCP(UINT uiCP, LPCWSTR pwszSrc, LPSTR pszDst, int cchBuf) { int cchRc = 0; /* Assume failure */ #ifdef DEBUG #define GET_CP(uiCP) (((uiCP) == CP_ACPNOVALIDATE) ? CP_ACP : (uiCP)) #else #define GET_CP(uiCP) uiCP #endif RIPMSG(IS_VALID_STRING_PTRW(pwszSrc, -1), "Caller of SHUnicodeToAnsiCP passed in a NULL pwszSrc!"); ASSERT(IS_VALID_WRITE_BUFFER(pszDst, CHAR, cchBuf)); /* * Sanity check - NULL source string is treated as a null string. */ if (pwszSrc == NULL) { pwszSrc = L""; } /* * Sanity check - Output buffer must be non-NULL and must be of * nonzero size. */ if (pszDst && cchBuf) { int cwchSrc; pszDst[0] = 0; /* In case of error */ cwchSrc = lstrlenW(pwszSrc) + 1; /* Yes, Win9x has lstrlenW */ /* * Decide what kind of code page it is. */ switch (GET_CP(uiCP)) { case 1200: // UCS-2 (Unicode) uiCP = 65001; // Fall through case 50000: // "User Defined" case 65000: // UTF-7 case 65001: // UTF-8 cchRc = SHUnicodeToAnsiInetCP(GET_CP(uiCP), pwszSrc, cwchSrc, pszDst, cchBuf); break; default: cchRc = SHUnicodeToAnsiNativeCP(uiCP, pwszSrc, cwchSrc, pszDst, cchBuf); break; } } return cchRc; } /* * @doc EXTERNAL * * @func int | SHUnicodeToAnsi | * * Convert a UNICODE string to an ANSI string via the * code page. If the source string is too large * for the destination buffer, then as many characters as * possible are copied. Care is taken not to break a double-byte * character. * * The resulting output string is always null-terminated. * * @parm LPCWSTR | pwszSrc | * * Source buffer containing UNICODE string to be converted. * * @parm LPSTR | pszDst | * * Destination buffer to receive converted ANSI string. * * @parm int | cchBuf | * * Size of the destination buffer in s. * * @returns * * On success, the number of characters copied to the output * buffer is returned, including the terminating null. * (For the purpose of this function, a double-byte character * counts as two characters.) * */ int SHUnicodeToAnsi(LPCWSTR pwszSrc, LPSTR pszDst, int cchBuf) { return SHUnicodeToAnsiCP(CP_ACP, pwszSrc, pszDst, cchBuf); }