windows-server-2003/windows/core/ntuser/rtl/chartran.c


								/****************************** Module Header ******************************\

								* Module Name: chartran.c

								*

								* Copyright (c) 1985 - 1999, Microsoft Corporation

								*

								* This module contains the routines for translating ACP characters

								* to Unicode and translating Unicode characters to ACP characters.

								* NOTE: The ACP is the currently installed 8-bit code page.

								*

								*

								* History:

								* 08-01-91 GregoryW      Created.

								* 05-14-92 GregoryW      Modified to use the Rtl translation routines.

								\***************************************************************************/


								extern __declspec(dllimport) USHORT NlsAnsiCodePage;


								#define IS_ACP(cp) (((cp) == NlsAnsiCodePage) || ((cp) == CP_ACP))


								/***************************************************************************\

								* WCSToMBEx (API)

								*

								* Convert a wide-character (Unicode) string to MBCS (ANSI) string.

								*

								* nAnsiChar > 0 indicates the number of bytes to allocate to store the

								*    ANSI string (if bAllocateMem == TRUE) or the size of the buffer

								*    pointed to by *pAnsiString (bAllocateMem == FALSE).

								*

								* nAnsiChar == -1 indicates that the necessary number of bytes be allocated

								*    to hold the translated string.  bAllocateMem must be set to TRUE in

								*    this case.

								*

								* Return value

								*   Success: number of characters in the output string

								*        If bAllocateMem was TRUE, then FreeAnsiString() may be

								*        used to free the allocated memory at *ppAnsiString.

								*   Failure: 0 means failure

								*        (Any buffers allocated by this routine are freed)

								*

								* History:

								*  1992-??-?? GregoryW   Created

								*  1993-01-07 IanJa      fix memory leak on error case.

								\***************************************************************************/


								int

								WCSToMBEx(

								    WORD wCodePage,

								    LPCWSTR pUnicodeString,

								    int cchUnicodeString,

								    LPSTR *ppAnsiString,

								    int nAnsiChar,

								    BOOL bAllocateMem)

								{

								    ULONG nCharsInAnsiString;

								#ifdef _USERK_

								    INT iCharsInAnsiString;

								#endif // _USERK_


								    if (nAnsiChar == 0 || cchUnicodeString == 0 || pUnicodeString == NULL) {

								        return 0;      // nothing to translate or nowhere to put it

								    }


								    /*

								     * Adjust the cchUnicodeString value.  If cchUnicodeString == -1 then the

								     * string pointed to by pUnicodeString is NUL terminated so we

								     * count the number of bytes.  If cchUnicodeString < -1 this is an

								     * illegal value so we return FALSE.  Otherwise, cchUnicodeString is

								     * set and requires no adjustment.

								     */

								    if (cchUnicodeString == -1) {

								        cchUnicodeString = (wcslen(pUnicodeString) + 1);

								    } else if (cchUnicodeString < -1) {

								        return 0;     // illegal value

								    }


								    /*

								     * Adjust the nAnsiChar value.  If nAnsiChar == -1 then we pick a

								     * value based on cchUnicodeString to hold the converted string.  If

								     * nAnsiChar < -1 this is an illegal value so we return FALSE.

								     * Otherwise, nAnsiChar is set and requires no adjustment.

								     */

								    if (nAnsiChar == -1) {

								        if (bAllocateMem == FALSE) {

								            return 0;  // no destination

								        }

								        nAnsiChar = cchUnicodeString * DBCS_CHARSIZE;

								    } else if (nAnsiChar < -1) {

								        return 0;     // illegal value

								    }


								    if (bAllocateMem) {

								        /*

								         * We need to allocate memory to hold the translated string.

								         */

								        *ppAnsiString = (LPSTR)UserRtlAllocMem(nAnsiChar);

								        if (*ppAnsiString == NULL) {

								            return 0;

								        }

								    }


								    /*

								     * translate Unicode string pointed to by pUnicodeString into

								     * ANSI and store in location pointed to by pAnsiString.  We

								     * stop translating when we fill up the ANSI buffer or reach

								     * the end of the Unicode string.

								     */


								    /*

								     * if the target multibyte codepage is eqaul to ACP, Call faster Rtl function.

								     */

								    if (IS_ACP(wCodePage)) {


								        NTSTATUS Status;


								        Status = RtlUnicodeToMultiByteN(

								                        (PCH)*ppAnsiString,

								                        nAnsiChar,

								                        &nCharsInAnsiString,

								                        (PWCH)pUnicodeString,

								                        cchUnicodeString * sizeof(WCHAR));

								        /*

								         * If the ansi buffer is too small, RtlUnicodeToMultiByteN()

								         * returns STATUS_BUFFER_OVERFLOW. In this case, the function

								         * put as many ansi characters as specified in the buffer and

								         *  returns the number by chacacters(in bytes) written. We would

								         * like to return the actual byte  count written in the ansi

								         * buffer rather than returnning 0 since callers of this function

								         * don't expect to be returned 0 in most case.

								         */


								        if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {

								            if (bAllocateMem) {

								                UserRtlFreeMem(*ppAnsiString);

								            }

								            return 0;   // translation failed

								        }


								        return (int)nCharsInAnsiString;


								    } else {


								#ifdef _USERK_

								        /*

								         * Call GRE to convert string to Unicode. (Kernel mode)

								         */


								        iCharsInAnsiString = EngWideCharToMultiByte(

								                                 (UINT)wCodePage,

								                                 (LPWSTR)pUnicodeString,

								                                 cchUnicodeString * sizeof(WCHAR),

								                                 (LPSTR)*ppAnsiString,

								                                 nAnsiChar);


								        nCharsInAnsiString = (iCharsInAnsiString == -1) ? 0 :

								                                                          (ULONG) iCharsInAnsiString;


								#else

								        /*

								         * Call NLS API (Kernel32) to convert string to Unicode. (User mode)

								         */

								        nCharsInAnsiString = WideCharToMultiByte(

								                                 (UINT)wCodePage, 0,

								                                 (LPCWSTR)pUnicodeString,

								                                 cchUnicodeString,

								                                 (LPSTR)*ppAnsiString,

								                                 nAnsiChar,

								                                 NULL, NULL);

								#endif // _USERK_


								        if (nCharsInAnsiString == 0) {

								            if (bAllocateMem) {

								                UserRtlFreeMem(*ppAnsiString);

								            }

								        }


								        return (int)nCharsInAnsiString;

								    }

								}


								// Returns number of character converted


								int MBToWCSEx(

								    WORD wCodePage,

								    LPCSTR pAnsiString,

								    int nAnsiChar,

								    LPWSTR *ppUnicodeString,

								    int cchUnicodeString,

								    BOOL bAllocateMem)

								{

								    ULONG nBytesInUnicodeString;


								    if (nAnsiChar == 0 || cchUnicodeString == 0 || pAnsiString == NULL) {

								        return 0;      // nothing to translate or nowhere to put it

								    }


								    /*

								     * Adjust the nAnsiChar value.  If nAnsiChar == -1 then the

								     * string pointed to by pAnsiString is NUL terminated so we

								     * count the number of bytes.  If nAnsiChar < -1 this is an

								     * illegal value so we return FALSE.  Otherwise, nAnsiChar is

								     * set and requires no adjustment.

								     */


								#ifdef _USERK_

								    UserAssert(nAnsiChar >= USER_AWCONV_COUNTSTRINGSZ);

								#endif

								    if (nAnsiChar < 0) {


								        /*

								         *  Bug 268035 - joejo

								         *  Need to fail if the count is a negative number less than -2!

								         */

								        if (nAnsiChar < USER_AWCONV_COUNTSTRINGSZ) {

								            return 0;

								        }


								#if (USER_AWCONV_COUNTSTRING != -1 || USER_AWCONV_COUNTSTRINGSZ != -2)

								#error USER_AWCONV_COUNTSTRING or USER_AWCONV_COUNTSTRINGSZ has unexpected value.

								#endif

								        /* HACK HACK HACK

								         * If nAnsiChar is -1 (USER_AWCONV_COUNTSTRING), nAnsiChar length will be strlen() + 1,

								         * to allocate the memory including trailing \0: this is compatible to the original code.

								         * If nAnsiCahr is -2 (USER_AWCONV_COUNTSTRINGSZ), memory for trailing \0 will not be needed,

								         * so memory allocation is optimized and the return value would be same as strlen().

								         */

								        nAnsiChar = strlen(pAnsiString) + 2 + nAnsiChar;   // don't forget the NUL if nAnsiChar == -1


								        if (nAnsiChar == 0) {

								            return 0;

								        }

								    }


								    /*

								     * Adjust the cchUnicodeString value.  If cchUnicodeString == -1 then we

								     * pick a value based on nAnsiChar to hold the converted string.  If

								     * cchUnicodeString < -1 this is an illegal value so we return FALSE.

								     * Otherwise, cchUnicodeString is set and requires no adjustment.

								     */

								    if (cchUnicodeString == -1) {

								        if (bAllocateMem == FALSE) {

								            return 0;    // no destination

								        }

								        cchUnicodeString = nAnsiChar;

								    } else if (cchUnicodeString < -1) {

								        return 0;     // illegal value

								    }


								    if (bAllocateMem) {

								        *ppUnicodeString = (LPWSTR)UserRtlAllocMem(cchUnicodeString*sizeof(WCHAR));

								        if (*ppUnicodeString == NULL) {

								            return 0;    // allocation failed

								        }

								    }


								    /*

								     * if codepage is CP_ACP, We will call faster RtlXXX function.

								     */

								    if (IS_ACP(wCodePage)) {

								        /*

								         * translate ANSI string pointed to by pAnsiString into Unicode

								         * and store in location pointed to by pUnicodeString.  We

								         * stop translating when we fill up the Unicode buffer or reach

								         * the end of the ANSI string.

								         */

								        if (!NT_SUCCESS(RtlMultiByteToUnicodeN(

								                            (PWCH)*ppUnicodeString,

								                            cchUnicodeString * sizeof(WCHAR),

								                            &nBytesInUnicodeString,

								                            (PCH)pAnsiString,

								                            nAnsiChar

								                            ))) {

								            if (bAllocateMem) {

								                UserRtlFreeMem(*ppUnicodeString);

								            }

								            return 0;   // translation failed

								        }


								        return (int)(nBytesInUnicodeString / sizeof(WCHAR));


								    } else {

								        /*

								         * if wCodePage is not ACP, Call NLS API.

								         */

								        ULONG nCharsInUnicodeString;


								#ifdef _USERK_


								        /*

								         * I believe we will never hit this code which is why I am

								         * adding this assert.  [gerritv] 5-21-96

								         */

								#define SHOULD_NOT_REACH_HERE   0

								        UserAssert(SHOULD_NOT_REACH_HERE);

								#undef  SHOULD_NOT_REACH_HERE

								        return 0;


								#if 0   // FYI: old code

								        INT   iCharsInUnicodeString;


								        /*

								         * Call GRE to convert string to Unicode. (Kernel mode)

								         * I believe we will never hit this code which is why I am

								         * adding this assert.  [gerritv] 5-21-96

								         */


								        UserAssert(0);


								        iCharsInUnicodeString = EngMultiByteToWideChar(

								                                    (UINT)wCodePage,

								                                    (LPWSTR)*ppUnicodeString,

								                                    (int)cchUnicodeString * sizeof(WCHAR),

								                                    (LPSTR)pAnsiString,

								                                    (int)nAnsiChar);


								        nCharsInUnicodeString = (iCharsInUnicodeString == -1) ? 0 :

								                                                          (ULONG) iCharsInUnicodeString;

								#endif


								#else

								        /*

								         * Call NLS API (Kernel32) to convert string to Unicode. (User mode)

								         */

								        nCharsInUnicodeString = MultiByteToWideChar(

								                                    (UINT)wCodePage, 0,

								                                    (LPCSTR)pAnsiString,

								                                    (int)nAnsiChar,

								                                    (LPWSTR)*ppUnicodeString,

								                                    (int)cchUnicodeString);

								#endif // _USERK_


								        if (nCharsInUnicodeString == 0) {

								            if (bAllocateMem) {

								                UserRtlFreeMem(*ppUnicodeString);

								            }

								        }


								        return (int)nCharsInUnicodeString;

								    }


								}


								/**************************************************************************\

								* RtlWCSMessageWParmCharToMB

								*

								* Converts a Wide Character to a Multibyte character; in place

								* Returns the number of characters converted or zero if failure

								*

								* 11-Feb-1992  JohnC    Created

								\**************************************************************************/


								BOOL RtlWCSMessageWParamCharToMB(DWORD msg, WPARAM *pWParam)

								{

								    DWORD dwAnsi;

								    NTSTATUS Status;

								    WORD CodePage;

								    int nbWch;


								#ifdef FE_SB // RtlWCSMessageWParamCharToMB()

								    //

								    // Format of *pWParam here...

								    //

								    // LOWORD(*pWParam) = Unicode CodePoint...

								    // HIWORD(*pWParam) = Has some information for DBCS messaging

								    //                    (ex. WPARAM_IR_DBCSCHAR)

								    //

								    // Then we need to convert ONLY loword of wParam to Unicode...

								    //

								#endif // FE_SB

								#ifndef FE_SB

								    // NtBug #3135 (Closed 02/04/93)

								    // Publisher Posts WM_CHAR messages with wParam > 0xFF (not a valid ANSI char)!

								    //

								    // It does this to disable TranslateAccelerator for that char.

								    // MSPub's winproc must get the non-ANSI 'character' value, so PostMessage must

								    // translate *two* characters of wParam for character messages, and PeekMessage

								    // must translate *two* Unicode chars of wParam for ANSI app.

								#endif


								    /*

								     * Only these messages have CHARs: others are passed through

								     */


								    switch(msg) {

								#ifdef FE_IME // RtlWCSMessageWParamCharToMB()

								    case WM_IME_CHAR:

								    case WM_IME_COMPOSITION:

								#endif // FE_IME

								    case WM_CHAR:

								    case WM_CHARTOITEM:

								    case EM_SETPASSWORDCHAR:

								    case WM_DEADCHAR:

								    case WM_SYSCHAR:

								    case WM_SYSDEADCHAR:

								    case WM_MENUCHAR:


								        CodePage = THREAD_CODEPAGE();

								        dwAnsi = 0;


								        nbWch = IS_DBCS_ENABLED() ? 1 * sizeof(WCHAR) : 2 * sizeof(WCHAR);


								        if (IS_ACP(CodePage)) {

								            // HACK HACK HACK HACK (for NtBug #3135)

								            // to allow applications that store data in high word of wParam

								            // Jan/06/96 hiroyama

								            Status = RtlUnicodeToMultiByteN((LPSTR)&dwAnsi, sizeof(dwAnsi),

								                    NULL, (LPWSTR)pWParam, nbWch);

								            if (!NT_SUCCESS(Status)) {

								                // LATER IanJa: returning FALSE makes GetMessage fail, which

								                // terminates the app.  We should use some default 'bad character'

								                // I use 0x00 for now.

								                *pWParam = 0x00;

								                return TRUE;

								            }

								        } else {

								            int cwch;

								            // assuming little endian

								#ifdef _USERK_

								            cwch = EngWideCharToMultiByte(CodePage,

								                    (LPWSTR)pWParam, nbWch,

								                    (LPSTR)&dwAnsi, sizeof(dwAnsi));

								#else

								            cwch = WideCharToMultiByte(CodePage, 0,

								                    (LPCWSTR)pWParam, nbWch / sizeof(WCHAR),

								                    (LPSTR)&dwAnsi, sizeof(dwAnsi), NULL, NULL);

								#endif // _USERK_

								            // KdPrint(("0x%04x -> 0x%02x (%d)\n", *pWParam, dwAnsi, CodePage));

								            if (cwch == 0) {

								                *pWParam = 0x00;

								                return TRUE;

								            }

								        }

								        if (IS_DBCS_ENABLED()) {

								            WORD wAnsi = LOWORD(dwAnsi);

								            //

								            // From:

								            //   HIBYTE(wAnsi)            = Dbcs TrailingByte.

								            //   LOBYTE(wAnsi)            = Dbcs LeadingByte or Sbcs character.

								            //

								            // To:

								            //   HIWORD(*pWParam)         = Original Data (information for DBCS messgaing).

								            //   HIBYTE(LOWORD(*pWParam)) = Dbcs LeadingByte Byte.

								            //   LOBYTE(LOWORD(*pWParam)) = Dbcs TrailingByte or Sbcs character.

								            //

								            if (IS_DBCS_MESSAGE(wAnsi)) {

								                //

								                // It's a DBCS character.

								                //

								                *pWParam = MAKEWPARAM(MAKEWORD(HIBYTE(wAnsi),LOBYTE(wAnsi)),HIWORD(*pWParam));

								            } else {

								                //

								                // It's a SBCS character.

								                //

								                *pWParam = MAKEWPARAM(MAKEWORD(LOBYTE(wAnsi),0),0);

								            }

								        } else {

								#if DBG

								            if ((dwAnsi == 0) || (dwAnsi > 0xFF)) {

								                RIPMSG1(RIP_VERBOSE, "msgW -> msgA: char = 0x%.4lX\n", dwAnsi);

								            }

								#endif

								            *pWParam = dwAnsi;

								        }

								        break;

								    }


								    return TRUE;

								}


								/**************************************************************************\

								* RtlMBMessageCharToWCS

								*

								* Converts a Multibyte character to a Wide character; in place

								* Returns the number of characters converted or zero if failure

								*

								* 11-Feb-1992  JohnC    Created

								* 13-Jan-1993  IanJa    Translate 2 characters (Publisher posts these!)

								\**************************************************************************/


								BOOL RtlMBMessageWParamCharToWCS(DWORD msg, WPARAM *pWParam)

								{

								    DWORD dwUni;

								    NTSTATUS Status;

								    // FE_SB    (RtlMBMessageWParamCharToWCS)

								    BOOL  bWmCrIrDbcsChar = FALSE;

								    WORD  wAnsi = LOWORD(*pWParam);

								    // end FE_SB    (RtlMBMessageWParamCharToWCS)

								    WORD CodePage = THREAD_CODEPAGE();


								    /*

								     * Only these messages have CHARs: others are passed through

								     */


								    switch(msg) {

								    // FE_SB    (RtlMBMessageWParamCharToWCS)

								    case WM_CHAR:

								        //

								        // WM_CHAR's wParam format for WM_IME_REPORT:IR_DBCSCHAR

								        //

								        if (IS_DBCS_ENABLED() && (*pWParam & WMCR_IR_DBCSCHAR)) {

								            //

								            // Mark this message is sent as IR_DBCSCHAR format.

								            //

								            bWmCrIrDbcsChar = TRUE;

								        }


								        //

								        // Fall through....

								        //

								#ifdef FE_IME

								    case WM_IME_CHAR:

								    case WM_IME_COMPOSITION:

								        //

								        // We need to re-align for Unicode convertsion..

								        // WM_CHAR/WM_IME_CHAR/WM_IME_COMPOSITION's wParam format :

								        //

								        // ReAlign IR_DBCS char format to regular sequence.

								        //

								        // From:

								        //

								        //  HIWORD(wParam)         = 0;

								        //  HIBYTE(LOWORD(wParam)) = DBCS LeadingByte.

								        //  LOBYTE(LOWORD(wParan)) = DBCS TrailingByte or SBCS character.

								        //

								        // To:

								        //  HIWORD(wParam)         = 0;

								        //  HIBYTE(LOWORD(wParam)) = DBCS TrailingByte.

								        //  LOBYTE(LOWORD(wParam)) = DBCS LeadingByte or SBCS character.

								        //

								        if (IS_DBCS_ENABLED()) {

								            *pWParam = MAKE_WPARAM_DBCSCHAR(wAnsi);

								        }

								#endif

								        //

								        // Fall through...

								        //

								        // end FE_SB    (RtlMBMessageWParamCharToWCS)

								    case WM_CHARTOITEM:

								    case EM_SETPASSWORDCHAR:

								    case WM_DEADCHAR:

								    case WM_SYSCHAR:

								    case WM_SYSDEADCHAR:

								    case WM_MENUCHAR:


								        dwUni = 0;


								        if (IS_ACP(CodePage)) {

								            Status = RtlMultiByteToUnicodeN((LPWSTR)&dwUni, sizeof(dwUni),

								                    NULL, (LPSTR)pWParam, 2 * sizeof(CHAR));

								            if (!NT_SUCCESS(Status))

								                return FALSE;

								        } else {

								            int cwch;

								#ifdef _USERK_

								            cwch = EngMultiByteToWideChar(CodePage,

								                    (LPWSTR)&dwUni, sizeof(dwUni),

								                    (LPSTR)pWParam, 2);

								#else

								            cwch = MultiByteToWideChar(CodePage, 0,

								                    (LPSTR)pWParam, 2,

								                    (LPWSTR)&dwUni, sizeof(dwUni) / sizeof(WCHAR));

								#endif // _USERK_

								            // KdPrint(("0x%02x -> 0x%04x (%d)\n", *pWParam, dwUni, CodePage));

								            if (cwch == 0) {

								                return FALSE;

								            }

								        }


								        // FE_SB    (RtlMBMessageWParamCharToWCS)

								        //

								        // if this character is sent for WM_IME_REPORT:IR_DBCSCHAR, we mark it.

								        //

								        if (bWmCrIrDbcsChar)

								            dwUni |= WMCR_IR_DBCSCHAR;

								        // else FE_SB (RtlMBMessageWParamCharToWCS)

								#if DBG

								        if ((dwUni == 0) || (dwUni > 0xFF)) {

								            RIPMSG1(RIP_VERBOSE, "msgA -> msgW: wchar = 0x%lX\n", dwUni);

								        }

								#endif

								        // end FE_SB

								        *pWParam = dwUni;

								        break;

								    }


								    return TRUE;

								}


								/**************************************************************************\

								* RtlInitLargeAnsiString

								*

								* Captures a large ANSI string in the same manner as

								* RtlInitAnsiString.

								*

								* 03-22-95 JimA         Created.

								\**************************************************************************/


								VOID RtlInitLargeAnsiString(

								    PLARGE_ANSI_STRING plstr,

								    LPCSTR psz,

								    UINT cchLimit)

								{

								    ULONG Length;


								    plstr->Buffer = (PSTR)psz;

								    plstr->bAnsi = TRUE;

								    if (ARGUMENT_PRESENT( psz )) {

								        Length = strlen( psz );

								        plstr->Length = min(Length, cchLimit);

								        plstr->MaximumLength = min((Length + 1), cchLimit);

								    } else {

								        plstr->MaximumLength = 0;

								        plstr->Length = 0;

								    }

								}


								/**************************************************************************\

								* RtlInitLargeUnicodeString

								*

								* Captures a large unicode string in the same manner as

								* RtlInitUnicodeString.

								*

								* 03-22-95 JimA         Created.

								\**************************************************************************/


								VOID RtlInitLargeUnicodeString(

								    PLARGE_UNICODE_STRING plstr,

								    LPCWSTR psz,

								    UINT cchLimit)

								{

								    ULONG Length;


								    plstr->Buffer = (PWSTR)psz;

								    plstr->bAnsi = FALSE;

								    if (ARGUMENT_PRESENT( psz )) {

								        Length = wcslen( psz ) * sizeof( WCHAR );

								        plstr->Length = min(Length, cchLimit);

								        plstr->MaximumLength = min((Length + sizeof(UNICODE_NULL)), cchLimit);

								    } else {

								        plstr->MaximumLength = 0;

								        plstr->Length = 0;

								    }

								}