windows-server-2003/windows/core/ntuser/rtl/chartran.c

/****************************** Module Header ******************************\
* Module Name: chartran.c
*
* Copyright (c) 1985 - 1999, Microsoft Corporation
*
* This module contains the routines for translating ACP characters
* to Unicode and translating Unicode characters to ACP characters.
* NOTE: The ACP is the currently installed 8-bit code page.
*
*
* History:
* 08-01-91 GregoryW      Created.
* 05-14-92 GregoryW      Modified to use the Rtl translation routines.
\***************************************************************************/

extern __declspec(dllimport) USHORT NlsAnsiCodePage;

#define IS_ACP(cp) (((cp) == NlsAnsiCodePage) || ((cp) == CP_ACP))

/***************************************************************************\
* WCSToMBEx (API)
*
* Convert a wide-character (Unicode) string to MBCS (ANSI) string.
*
* nAnsiChar > 0 indicates the number of bytes to allocate to store the
*    ANSI string (if bAllocateMem == TRUE) or the size of the buffer
*    pointed to by *pAnsiString (bAllocateMem == FALSE).
*
* nAnsiChar == -1 indicates that the necessary number of bytes be allocated
*    to hold the translated string.  bAllocateMem must be set to TRUE in
*    this case.
*
* Return value
*   Success: number of characters in the output string
*        If bAllocateMem was TRUE, then FreeAnsiString() may be
*        used to free the allocated memory at *ppAnsiString.
*   Failure: 0 means failure
*        (Any buffers allocated by this routine are freed)
*
* History:
*  1992-??-?? GregoryW   Created
*  1993-01-07 IanJa      fix memory leak on error case.
\***************************************************************************/

int
WCSToMBEx(
    WORD wCodePage,
    LPCWSTR pUnicodeString,
    int cchUnicodeString,
    LPSTR *ppAnsiString,
    int nAnsiChar,
    BOOL bAllocateMem)
{
    ULONG nCharsInAnsiString;
#ifdef _USERK_
    INT iCharsInAnsiString;
#endif // _USERK_

    if (nAnsiChar == 0 || cchUnicodeString == 0 || pUnicodeString == NULL) {
        return 0;      // nothing to translate or nowhere to put it
    }

    /*
     * Adjust the cchUnicodeString value.  If cchUnicodeString == -1 then the
     * string pointed to by pUnicodeString is NUL terminated so we
     * count the number of bytes.  If cchUnicodeString < -1 this is an
     * illegal value so we return FALSE.  Otherwise, cchUnicodeString is
     * set and requires no adjustment.
     */
    if (cchUnicodeString == -1) {
        cchUnicodeString = (wcslen(pUnicodeString) + 1);
    } else if (cchUnicodeString < -1) {
        return 0;     // illegal value
    }

    /*
     * Adjust the nAnsiChar value.  If nAnsiChar == -1 then we pick a
     * value based on cchUnicodeString to hold the converted string.  If
     * nAnsiChar < -1 this is an illegal value so we return FALSE.
     * Otherwise, nAnsiChar is set and requires no adjustment.
     */
    if (nAnsiChar == -1) {
        if (bAllocateMem == FALSE) {
            return 0;  // no destination
        }
        nAnsiChar = cchUnicodeString * DBCS_CHARSIZE;
    } else if (nAnsiChar < -1) {
        return 0;     // illegal value
    }

    if (bAllocateMem) {
        /*
         * We need to allocate memory to hold the translated string.
         */
        *ppAnsiString = (LPSTR)UserRtlAllocMem(nAnsiChar);
        if (*ppAnsiString == NULL) {
            return 0;
        }
    }

    /*
     * translate Unicode string pointed to by pUnicodeString into
     * ANSI and store in location pointed to by pAnsiString.  We
     * stop translating when we fill up the ANSI buffer or reach
     * the end of the Unicode string.
     */

    /*
     * if the target multibyte codepage is eqaul to ACP, Call faster Rtl function.
     */
    if (IS_ACP(wCodePage)) {

        NTSTATUS Status;

        Status = RtlUnicodeToMultiByteN(
                        (PCH)*ppAnsiString,
                        nAnsiChar,
                        &nCharsInAnsiString,
                        (PWCH)pUnicodeString,
                        cchUnicodeString * sizeof(WCHAR));
        /*
         * If the ansi buffer is too small, RtlUnicodeToMultiByteN()
         * returns STATUS_BUFFER_OVERFLOW. In this case, the function
         * put as many ansi characters as specified in the buffer and
         *  returns the number by chacacters(in bytes) written. We would
         * like to return the actual byte  count written in the ansi
         * buffer rather than returnning 0 since callers of this function
         * don't expect to be returned 0 in most case.
         */

        if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
            if (bAllocateMem) {
                UserRtlFreeMem(*ppAnsiString);
            }
            return 0;   // translation failed
        }

        return (int)nCharsInAnsiString;

    } else {

#ifdef _USERK_
        /*
         * Call GRE to convert string to Unicode. (Kernel mode)
         */

        iCharsInAnsiString = EngWideCharToMultiByte(
                                 (UINT)wCodePage,
                                 (LPWSTR)pUnicodeString,
                                 cchUnicodeString * sizeof(WCHAR),
                                 (LPSTR)*ppAnsiString,
                                 nAnsiChar);

        nCharsInAnsiString = (iCharsInAnsiString == -1) ? 0 :
                                                          (ULONG) iCharsInAnsiString;

#else
        /*
         * Call NLS API (Kernel32) to convert string to Unicode. (User mode)
         */
        nCharsInAnsiString = WideCharToMultiByte(
                                 (UINT)wCodePage, 0,
                                 (LPCWSTR)pUnicodeString,
                                 cchUnicodeString,
                                 (LPSTR)*ppAnsiString,
                                 nAnsiChar,
                                 NULL, NULL);
#endif // _USERK_

        if (nCharsInAnsiString == 0) {
            if (bAllocateMem) {
                UserRtlFreeMem(*ppAnsiString);
            }
        }

        return (int)nCharsInAnsiString;
    }
}

// Returns number of character converted

int MBToWCSEx(
    WORD wCodePage,
    LPCSTR pAnsiString,
    int nAnsiChar,
    LPWSTR *ppUnicodeString,
    int cchUnicodeString,
    BOOL bAllocateMem)
{
    ULONG nBytesInUnicodeString;

    if (nAnsiChar == 0 || cchUnicodeString == 0 || pAnsiString == NULL) {
        return 0;      // nothing to translate or nowhere to put it
    }

    /*
     * Adjust the nAnsiChar value.  If nAnsiChar == -1 then the
     * string pointed to by pAnsiString is NUL terminated so we
     * count the number of bytes.  If nAnsiChar < -1 this is an
     * illegal value so we return FALSE.  Otherwise, nAnsiChar is
     * set and requires no adjustment.
     */

#ifdef _USERK_
    UserAssert(nAnsiChar >= USER_AWCONV_COUNTSTRINGSZ);
#endif
    if (nAnsiChar < 0) {

        /*
         *  Bug 268035 - joejo
         *  Need to fail if the count is a negative number less than -2!
         */
        if (nAnsiChar < USER_AWCONV_COUNTSTRINGSZ) {
            return 0;
        }

#if (USER_AWCONV_COUNTSTRING != -1 || USER_AWCONV_COUNTSTRINGSZ != -2)
#error USER_AWCONV_COUNTSTRING or USER_AWCONV_COUNTSTRINGSZ has unexpected value.
#endif
        /* HACK HACK HACK
         * If nAnsiChar is -1 (USER_AWCONV_COUNTSTRING), nAnsiChar length will be strlen() + 1,
         * to allocate the memory including trailing \0: this is compatible to the original code.
         * If nAnsiCahr is -2 (USER_AWCONV_COUNTSTRINGSZ), memory for trailing \0 will not be needed,
         * so memory allocation is optimized and the return value would be same as strlen().
         */
        nAnsiChar = strlen(pAnsiString) + 2 + nAnsiChar;   // don't forget the NUL if nAnsiChar == -1

        if (nAnsiChar == 0) {
            return 0;
        }
    }

    /*
     * Adjust the cchUnicodeString value.  If cchUnicodeString == -1 then we
     * pick a value based on nAnsiChar to hold the converted string.  If
     * cchUnicodeString < -1 this is an illegal value so we return FALSE.
     * Otherwise, cchUnicodeString is set and requires no adjustment.
     */
    if (cchUnicodeString == -1) {
        if (bAllocateMem == FALSE) {
            return 0;    // no destination
        }
        cchUnicodeString = nAnsiChar;
    } else if (cchUnicodeString < -1) {
        return 0;     // illegal value
    }

    if (bAllocateMem) {
        *ppUnicodeString = (LPWSTR)UserRtlAllocMem(cchUnicodeString*sizeof(WCHAR));
        if (*ppUnicodeString == NULL) {
            return 0;    // allocation failed
        }
    }

    /*
     * if codepage is CP_ACP, We will call faster RtlXXX function.
     */
    if (IS_ACP(wCodePage)) {
        /*
         * translate ANSI string pointed to by pAnsiString into Unicode
         * and store in location pointed to by pUnicodeString.  We
         * stop translating when we fill up the Unicode buffer or reach
         * the end of the ANSI string.
         */
        if (!NT_SUCCESS(RtlMultiByteToUnicodeN(
                            (PWCH)*ppUnicodeString,
                            cchUnicodeString * sizeof(WCHAR),
                            &nBytesInUnicodeString,
                            (PCH)pAnsiString,
                            nAnsiChar
                            ))) {
            if (bAllocateMem) {
                UserRtlFreeMem(*ppUnicodeString);
            }
            return 0;   // translation failed
        }

        return (int)(nBytesInUnicodeString / sizeof(WCHAR));

    } else {
        /*
         * if wCodePage is not ACP, Call NLS API.
         */
        ULONG nCharsInUnicodeString;

#ifdef _USERK_

        /*
         * I believe we will never hit this code which is why I am
         * adding this assert.  [gerritv] 5-21-96
         */
#define SHOULD_NOT_REACH_HERE   0
        UserAssert(SHOULD_NOT_REACH_HERE);
#undef  SHOULD_NOT_REACH_HERE
        return 0;

#if 0   // FYI: old code
        INT   iCharsInUnicodeString;

        /*
         * Call GRE to convert string to Unicode. (Kernel mode)
         * I believe we will never hit this code which is why I am
         * adding this assert.  [gerritv] 5-21-96
         */

        UserAssert(0);

        iCharsInUnicodeString = EngMultiByteToWideChar(
                                    (UINT)wCodePage,
                                    (LPWSTR)*ppUnicodeString,
                                    (int)cchUnicodeString * sizeof(WCHAR),
                                    (LPSTR)pAnsiString,
                                    (int)nAnsiChar);

        nCharsInUnicodeString = (iCharsInUnicodeString == -1) ? 0 :
                                                          (ULONG) iCharsInUnicodeString;
#endif

#else
        /*
         * Call NLS API (Kernel32) to convert string to Unicode. (User mode)
         */
        nCharsInUnicodeString = MultiByteToWideChar(
                                    (UINT)wCodePage, 0,
                                    (LPCSTR)pAnsiString,
                                    (int)nAnsiChar,
                                    (LPWSTR)*ppUnicodeString,
                                    (int)cchUnicodeString);
#endif // _USERK_

        if (nCharsInUnicodeString == 0) {
            if (bAllocateMem) {
                UserRtlFreeMem(*ppUnicodeString);
            }
        }

        return (int)nCharsInUnicodeString;
    }

}


/**************************************************************************\
* RtlWCSMessageWParmCharToMB
*
* Converts a Wide Character to a Multibyte character; in place
* Returns the number of characters converted or zero if failure
*
* 11-Feb-1992  JohnC    Created
\**************************************************************************/

BOOL RtlWCSMessageWParamCharToMB(DWORD msg, WPARAM *pWParam)
{
    DWORD dwAnsi;
    NTSTATUS Status;
    WORD CodePage;
    int nbWch;

#ifdef FE_SB // RtlWCSMessageWParamCharToMB()
    //
    // Format of *pWParam here...
    //
    // LOWORD(*pWParam) = Unicode CodePoint...
    // HIWORD(*pWParam) = Has some information for DBCS messaging
    //                    (ex. WPARAM_IR_DBCSCHAR)
    //
    // Then we need to convert ONLY loword of wParam to Unicode...
    //
#endif // FE_SB
#ifndef FE_SB
    // NtBug #3135 (Closed 02/04/93)
    // Publisher Posts WM_CHAR messages with wParam > 0xFF (not a valid ANSI char)!
    //
    // It does this to disable TranslateAccelerator for that char.
    // MSPub's winproc must get the non-ANSI 'character' value, so PostMessage must
    // translate *two* characters of wParam for character messages, and PeekMessage
    // must translate *two* Unicode chars of wParam for ANSI app.
#endif

    /*
     * Only these messages have CHARs: others are passed through
     */

    switch(msg) {
#ifdef FE_IME // RtlWCSMessageWParamCharToMB()
    case WM_IME_CHAR:
    case WM_IME_COMPOSITION:
#endif // FE_IME
    case WM_CHAR:
    case WM_CHARTOITEM:
    case EM_SETPASSWORDCHAR:
    case WM_DEADCHAR:
    case WM_SYSCHAR:
    case WM_SYSDEADCHAR:
    case WM_MENUCHAR:

        CodePage = THREAD_CODEPAGE();
        dwAnsi = 0;

        nbWch = IS_DBCS_ENABLED() ? 1 * sizeof(WCHAR) : 2 * sizeof(WCHAR);

        if (IS_ACP(CodePage)) {
            // HACK HACK HACK HACK (for NtBug #3135)
            // to allow applications that store data in high word of wParam
            // Jan/06/96 hiroyama
            Status = RtlUnicodeToMultiByteN((LPSTR)&dwAnsi, sizeof(dwAnsi),
                    NULL, (LPWSTR)pWParam, nbWch);
            if (!NT_SUCCESS(Status)) {
                // LATER IanJa: returning FALSE makes GetMessage fail, which
                // terminates the app.  We should use some default 'bad character'
                // I use 0x00 for now.
                *pWParam = 0x00;
                return TRUE;
            }
        } else {
            int cwch;
            // assuming little endian
#ifdef _USERK_
            cwch = EngWideCharToMultiByte(CodePage,
                    (LPWSTR)pWParam, nbWch,
                    (LPSTR)&dwAnsi, sizeof(dwAnsi));
#else
            cwch = WideCharToMultiByte(CodePage, 0,
                    (LPCWSTR)pWParam, nbWch / sizeof(WCHAR),
                    (LPSTR)&dwAnsi, sizeof(dwAnsi), NULL, NULL);
#endif // _USERK_
            // KdPrint(("0x%04x -> 0x%02x (%d)\n", *pWParam, dwAnsi, CodePage));
            if (cwch == 0) {
                *pWParam = 0x00;
                return TRUE;
            }
        }
        if (IS_DBCS_ENABLED()) {
            WORD wAnsi = LOWORD(dwAnsi);
            //
            // From:
            //   HIBYTE(wAnsi)            = Dbcs TrailingByte.
            //   LOBYTE(wAnsi)            = Dbcs LeadingByte or Sbcs character.
            //
            // To:
            //   HIWORD(*pWParam)         = Original Data (information for DBCS messgaing).
            //   HIBYTE(LOWORD(*pWParam)) = Dbcs LeadingByte Byte.
            //   LOBYTE(LOWORD(*pWParam)) = Dbcs TrailingByte or Sbcs character.
            //
            if (IS_DBCS_MESSAGE(wAnsi)) {
                //
                // It's a DBCS character.
                //
                *pWParam = MAKEWPARAM(MAKEWORD(HIBYTE(wAnsi),LOBYTE(wAnsi)),HIWORD(*pWParam));
            } else {
                //
                // It's a SBCS character.
                //
                *pWParam = MAKEWPARAM(MAKEWORD(LOBYTE(wAnsi),0),0);
            }
        } else {
#if DBG
            if ((dwAnsi == 0) || (dwAnsi > 0xFF)) {
                RIPMSG1(RIP_VERBOSE, "msgW -> msgA: char = 0x%.4lX\n", dwAnsi);
            }
#endif
            *pWParam = dwAnsi;
        }
        break;
    }

    return TRUE;
}


/**************************************************************************\
* RtlMBMessageCharToWCS
*
* Converts a Multibyte character to a Wide character; in place
* Returns the number of characters converted or zero if failure
*
* 11-Feb-1992  JohnC    Created
* 13-Jan-1993  IanJa    Translate 2 characters (Publisher posts these!)
\**************************************************************************/

BOOL RtlMBMessageWParamCharToWCS(DWORD msg, WPARAM *pWParam)
{
    DWORD dwUni;
    NTSTATUS Status;
    // FE_SB    (RtlMBMessageWParamCharToWCS)
    BOOL  bWmCrIrDbcsChar = FALSE;
    WORD  wAnsi = LOWORD(*pWParam);
    // end FE_SB    (RtlMBMessageWParamCharToWCS)
    WORD CodePage = THREAD_CODEPAGE();

    /*
     * Only these messages have CHARs: others are passed through
     */

    switch(msg) {
    // FE_SB    (RtlMBMessageWParamCharToWCS)
    case WM_CHAR:
        //
        // WM_CHAR's wParam format for WM_IME_REPORT:IR_DBCSCHAR
        //
        if (IS_DBCS_ENABLED() && (*pWParam & WMCR_IR_DBCSCHAR)) {
            //
            // Mark this message is sent as IR_DBCSCHAR format.
            //
            bWmCrIrDbcsChar = TRUE;
        }

        //
        // Fall through....
        //
#ifdef FE_IME
    case WM_IME_CHAR:
    case WM_IME_COMPOSITION:
        //
        // We need to re-align for Unicode convertsion..
        // WM_CHAR/WM_IME_CHAR/WM_IME_COMPOSITION's wParam format :
        //
        // ReAlign IR_DBCS char format to regular sequence.
        //
        // From:
        //
        //  HIWORD(wParam)         = 0;
        //  HIBYTE(LOWORD(wParam)) = DBCS LeadingByte.
        //  LOBYTE(LOWORD(wParan)) = DBCS TrailingByte or SBCS character.
        //
        // To:
        //  HIWORD(wParam)         = 0;
        //  HIBYTE(LOWORD(wParam)) = DBCS TrailingByte.
        //  LOBYTE(LOWORD(wParam)) = DBCS LeadingByte or SBCS character.
        //
        if (IS_DBCS_ENABLED()) {
            *pWParam = MAKE_WPARAM_DBCSCHAR(wAnsi);
        }
#endif
        //
        // Fall through...
        //
        // end FE_SB    (RtlMBMessageWParamCharToWCS)
    case WM_CHARTOITEM:
    case EM_SETPASSWORDCHAR:
    case WM_DEADCHAR:
    case WM_SYSCHAR:
    case WM_SYSDEADCHAR:
    case WM_MENUCHAR:

        dwUni = 0;

        if (IS_ACP(CodePage)) {
            Status = RtlMultiByteToUnicodeN((LPWSTR)&dwUni, sizeof(dwUni),
                    NULL, (LPSTR)pWParam, 2 * sizeof(CHAR));
            if (!NT_SUCCESS(Status))
                return FALSE;
        } else {
            int cwch;
#ifdef _USERK_
            cwch = EngMultiByteToWideChar(CodePage,
                    (LPWSTR)&dwUni, sizeof(dwUni),
                    (LPSTR)pWParam, 2);
#else
            cwch = MultiByteToWideChar(CodePage, 0,
                    (LPSTR)pWParam, 2,
                    (LPWSTR)&dwUni, sizeof(dwUni) / sizeof(WCHAR));
#endif // _USERK_
            // KdPrint(("0x%02x -> 0x%04x (%d)\n", *pWParam, dwUni, CodePage));
            if (cwch == 0) {
                return FALSE;
            }
        }

        // FE_SB    (RtlMBMessageWParamCharToWCS)
        //
        // if this character is sent for WM_IME_REPORT:IR_DBCSCHAR, we mark it.
        //
        if (bWmCrIrDbcsChar)
            dwUni |= WMCR_IR_DBCSCHAR;
        // else FE_SB (RtlMBMessageWParamCharToWCS)
#if DBG
        if ((dwUni == 0) || (dwUni > 0xFF)) {
            RIPMSG1(RIP_VERBOSE, "msgA -> msgW: wchar = 0x%lX\n", dwUni);
        }
#endif
        // end FE_SB
        *pWParam = dwUni;
        break;
    }

    return TRUE;
}

/**************************************************************************\
* RtlInitLargeAnsiString
*
* Captures a large ANSI string in the same manner as
* RtlInitAnsiString.
*
* 03-22-95 JimA         Created.
\**************************************************************************/

VOID RtlInitLargeAnsiString(
    PLARGE_ANSI_STRING plstr,
    LPCSTR psz,
    UINT cchLimit)
{
    ULONG Length;

    plstr->Buffer = (PSTR)psz;
    plstr->bAnsi = TRUE;
    if (ARGUMENT_PRESENT( psz )) {
        Length = strlen( psz );
        plstr->Length = min(Length, cchLimit);
        plstr->MaximumLength = min((Length + 1), cchLimit);
    } else {
        plstr->MaximumLength = 0;
        plstr->Length = 0;
    }
}

/**************************************************************************\
* RtlInitLargeUnicodeString
*
* Captures a large unicode string in the same manner as
* RtlInitUnicodeString.
*
* 03-22-95 JimA         Created.
\**************************************************************************/

VOID RtlInitLargeUnicodeString(
    PLARGE_UNICODE_STRING plstr,
    LPCWSTR psz,
    UINT cchLimit)
{
    ULONG Length;

    plstr->Buffer = (PWSTR)psz;
    plstr->bAnsi = FALSE;
    if (ARGUMENT_PRESENT( psz )) {
        Length = wcslen( psz ) * sizeof( WCHAR );
        plstr->Length = min(Length, cchLimit);
        plstr->MaximumLength = min((Length + sizeof(UNICODE_NULL)), cchLimit);
    } else {
        plstr->MaximumLength = 0;
        plstr->Length = 0;
    }
}