/****************************** Module Header ******************************\ * Module Name: chartran.c * * Copyright (c) 1985 - 1999, Microsoft Corporation * * This module contains the routines for translating ACP characters * to Unicode and translating Unicode characters to ACP characters. * NOTE: The ACP is the currently installed 8-bit code page. * * * History: * 08-01-91 GregoryW Created. * 05-14-92 GregoryW Modified to use the Rtl translation routines. \***************************************************************************/ extern __declspec(dllimport) USHORT NlsAnsiCodePage; #define IS_ACP(cp) (((cp) == NlsAnsiCodePage) || ((cp) == CP_ACP)) /***************************************************************************\ * WCSToMBEx (API) * * Convert a wide-character (Unicode) string to MBCS (ANSI) string. * * nAnsiChar > 0 indicates the number of bytes to allocate to store the * ANSI string (if bAllocateMem == TRUE) or the size of the buffer * pointed to by *pAnsiString (bAllocateMem == FALSE). * * nAnsiChar == -1 indicates that the necessary number of bytes be allocated * to hold the translated string. bAllocateMem must be set to TRUE in * this case. * * Return value * Success: number of characters in the output string * If bAllocateMem was TRUE, then FreeAnsiString() may be * used to free the allocated memory at *ppAnsiString. * Failure: 0 means failure * (Any buffers allocated by this routine are freed) * * History: * 1992-??-?? GregoryW Created * 1993-01-07 IanJa fix memory leak on error case. \***************************************************************************/ int WCSToMBEx( WORD wCodePage, LPCWSTR pUnicodeString, int cchUnicodeString, LPSTR *ppAnsiString, int nAnsiChar, BOOL bAllocateMem) { ULONG nCharsInAnsiString; #ifdef _USERK_ INT iCharsInAnsiString; #endif // _USERK_ if (nAnsiChar == 0 || cchUnicodeString == 0 || pUnicodeString == NULL) { return 0; // nothing to translate or nowhere to put it } /* * Adjust the cchUnicodeString value. If cchUnicodeString == -1 then the * string pointed to by pUnicodeString is NUL terminated so we * count the number of bytes. If cchUnicodeString < -1 this is an * illegal value so we return FALSE. Otherwise, cchUnicodeString is * set and requires no adjustment. */ if (cchUnicodeString == -1) { cchUnicodeString = (wcslen(pUnicodeString) + 1); } else if (cchUnicodeString < -1) { return 0; // illegal value } /* * Adjust the nAnsiChar value. If nAnsiChar == -1 then we pick a * value based on cchUnicodeString to hold the converted string. If * nAnsiChar < -1 this is an illegal value so we return FALSE. * Otherwise, nAnsiChar is set and requires no adjustment. */ if (nAnsiChar == -1) { if (bAllocateMem == FALSE) { return 0; // no destination } nAnsiChar = cchUnicodeString * DBCS_CHARSIZE; } else if (nAnsiChar < -1) { return 0; // illegal value } if (bAllocateMem) { /* * We need to allocate memory to hold the translated string. */ *ppAnsiString = (LPSTR)UserRtlAllocMem(nAnsiChar); if (*ppAnsiString == NULL) { return 0; } } /* * translate Unicode string pointed to by pUnicodeString into * ANSI and store in location pointed to by pAnsiString. We * stop translating when we fill up the ANSI buffer or reach * the end of the Unicode string. */ /* * if the target multibyte codepage is eqaul to ACP, Call faster Rtl function. */ if (IS_ACP(wCodePage)) { NTSTATUS Status; Status = RtlUnicodeToMultiByteN( (PCH)*ppAnsiString, nAnsiChar, &nCharsInAnsiString, (PWCH)pUnicodeString, cchUnicodeString * sizeof(WCHAR)); /* * If the ansi buffer is too small, RtlUnicodeToMultiByteN() * returns STATUS_BUFFER_OVERFLOW. In this case, the function * put as many ansi characters as specified in the buffer and * returns the number by chacacters(in bytes) written. We would * like to return the actual byte count written in the ansi * buffer rather than returnning 0 since callers of this function * don't expect to be returned 0 in most case. */ if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { if (bAllocateMem) { UserRtlFreeMem(*ppAnsiString); } return 0; // translation failed } return (int)nCharsInAnsiString; } else { #ifdef _USERK_ /* * Call GRE to convert string to Unicode. (Kernel mode) */ iCharsInAnsiString = EngWideCharToMultiByte( (UINT)wCodePage, (LPWSTR)pUnicodeString, cchUnicodeString * sizeof(WCHAR), (LPSTR)*ppAnsiString, nAnsiChar); nCharsInAnsiString = (iCharsInAnsiString == -1) ? 0 : (ULONG) iCharsInAnsiString; #else /* * Call NLS API (Kernel32) to convert string to Unicode. (User mode) */ nCharsInAnsiString = WideCharToMultiByte( (UINT)wCodePage, 0, (LPCWSTR)pUnicodeString, cchUnicodeString, (LPSTR)*ppAnsiString, nAnsiChar, NULL, NULL); #endif // _USERK_ if (nCharsInAnsiString == 0) { if (bAllocateMem) { UserRtlFreeMem(*ppAnsiString); } } return (int)nCharsInAnsiString; } } // Returns number of character converted int MBToWCSEx( WORD wCodePage, LPCSTR pAnsiString, int nAnsiChar, LPWSTR *ppUnicodeString, int cchUnicodeString, BOOL bAllocateMem) { ULONG nBytesInUnicodeString; if (nAnsiChar == 0 || cchUnicodeString == 0 || pAnsiString == NULL) { return 0; // nothing to translate or nowhere to put it } /* * Adjust the nAnsiChar value. If nAnsiChar == -1 then the * string pointed to by pAnsiString is NUL terminated so we * count the number of bytes. If nAnsiChar < -1 this is an * illegal value so we return FALSE. Otherwise, nAnsiChar is * set and requires no adjustment. */ #ifdef _USERK_ UserAssert(nAnsiChar >= USER_AWCONV_COUNTSTRINGSZ); #endif if (nAnsiChar < 0) { /* * Bug 268035 - joejo * Need to fail if the count is a negative number less than -2! */ if (nAnsiChar < USER_AWCONV_COUNTSTRINGSZ) { return 0; } #if (USER_AWCONV_COUNTSTRING != -1 || USER_AWCONV_COUNTSTRINGSZ != -2) #error USER_AWCONV_COUNTSTRING or USER_AWCONV_COUNTSTRINGSZ has unexpected value. #endif /* HACK HACK HACK * If nAnsiChar is -1 (USER_AWCONV_COUNTSTRING), nAnsiChar length will be strlen() + 1, * to allocate the memory including trailing \0: this is compatible to the original code. * If nAnsiCahr is -2 (USER_AWCONV_COUNTSTRINGSZ), memory for trailing \0 will not be needed, * so memory allocation is optimized and the return value would be same as strlen(). */ nAnsiChar = strlen(pAnsiString) + 2 + nAnsiChar; // don't forget the NUL if nAnsiChar == -1 if (nAnsiChar == 0) { return 0; } } /* * Adjust the cchUnicodeString value. If cchUnicodeString == -1 then we * pick a value based on nAnsiChar to hold the converted string. If * cchUnicodeString < -1 this is an illegal value so we return FALSE. * Otherwise, cchUnicodeString is set and requires no adjustment. */ if (cchUnicodeString == -1) { if (bAllocateMem == FALSE) { return 0; // no destination } cchUnicodeString = nAnsiChar; } else if (cchUnicodeString < -1) { return 0; // illegal value } if (bAllocateMem) { *ppUnicodeString = (LPWSTR)UserRtlAllocMem(cchUnicodeString*sizeof(WCHAR)); if (*ppUnicodeString == NULL) { return 0; // allocation failed } } /* * if codepage is CP_ACP, We will call faster RtlXXX function. */ if (IS_ACP(wCodePage)) { /* * translate ANSI string pointed to by pAnsiString into Unicode * and store in location pointed to by pUnicodeString. We * stop translating when we fill up the Unicode buffer or reach * the end of the ANSI string. */ if (!NT_SUCCESS(RtlMultiByteToUnicodeN( (PWCH)*ppUnicodeString, cchUnicodeString * sizeof(WCHAR), &nBytesInUnicodeString, (PCH)pAnsiString, nAnsiChar ))) { if (bAllocateMem) { UserRtlFreeMem(*ppUnicodeString); } return 0; // translation failed } return (int)(nBytesInUnicodeString / sizeof(WCHAR)); } else { /* * if wCodePage is not ACP, Call NLS API. */ ULONG nCharsInUnicodeString; #ifdef _USERK_ /* * I believe we will never hit this code which is why I am * adding this assert. [gerritv] 5-21-96 */ #define SHOULD_NOT_REACH_HERE 0 UserAssert(SHOULD_NOT_REACH_HERE); #undef SHOULD_NOT_REACH_HERE return 0; #if 0 // FYI: old code INT iCharsInUnicodeString; /* * Call GRE to convert string to Unicode. (Kernel mode) * I believe we will never hit this code which is why I am * adding this assert. [gerritv] 5-21-96 */ UserAssert(0); iCharsInUnicodeString = EngMultiByteToWideChar( (UINT)wCodePage, (LPWSTR)*ppUnicodeString, (int)cchUnicodeString * sizeof(WCHAR), (LPSTR)pAnsiString, (int)nAnsiChar); nCharsInUnicodeString = (iCharsInUnicodeString == -1) ? 0 : (ULONG) iCharsInUnicodeString; #endif #else /* * Call NLS API (Kernel32) to convert string to Unicode. (User mode) */ nCharsInUnicodeString = MultiByteToWideChar( (UINT)wCodePage, 0, (LPCSTR)pAnsiString, (int)nAnsiChar, (LPWSTR)*ppUnicodeString, (int)cchUnicodeString); #endif // _USERK_ if (nCharsInUnicodeString == 0) { if (bAllocateMem) { UserRtlFreeMem(*ppUnicodeString); } } return (int)nCharsInUnicodeString; } } /**************************************************************************\ * RtlWCSMessageWParmCharToMB * * Converts a Wide Character to a Multibyte character; in place * Returns the number of characters converted or zero if failure * * 11-Feb-1992 JohnC Created \**************************************************************************/ BOOL RtlWCSMessageWParamCharToMB(DWORD msg, WPARAM *pWParam) { DWORD dwAnsi; NTSTATUS Status; WORD CodePage; int nbWch; #ifdef FE_SB // RtlWCSMessageWParamCharToMB() // // Format of *pWParam here... // // LOWORD(*pWParam) = Unicode CodePoint... // HIWORD(*pWParam) = Has some information for DBCS messaging // (ex. WPARAM_IR_DBCSCHAR) // // Then we need to convert ONLY loword of wParam to Unicode... // #endif // FE_SB #ifndef FE_SB // NtBug #3135 (Closed 02/04/93) // Publisher Posts WM_CHAR messages with wParam > 0xFF (not a valid ANSI char)! // // It does this to disable TranslateAccelerator for that char. // MSPub's winproc must get the non-ANSI 'character' value, so PostMessage must // translate *two* characters of wParam for character messages, and PeekMessage // must translate *two* Unicode chars of wParam for ANSI app. #endif /* * Only these messages have CHARs: others are passed through */ switch(msg) { #ifdef FE_IME // RtlWCSMessageWParamCharToMB() case WM_IME_CHAR: case WM_IME_COMPOSITION: #endif // FE_IME case WM_CHAR: case WM_CHARTOITEM: case EM_SETPASSWORDCHAR: case WM_DEADCHAR: case WM_SYSCHAR: case WM_SYSDEADCHAR: case WM_MENUCHAR: CodePage = THREAD_CODEPAGE(); dwAnsi = 0; nbWch = IS_DBCS_ENABLED() ? 1 * sizeof(WCHAR) : 2 * sizeof(WCHAR); if (IS_ACP(CodePage)) { // HACK HACK HACK HACK (for NtBug #3135) // to allow applications that store data in high word of wParam // Jan/06/96 hiroyama Status = RtlUnicodeToMultiByteN((LPSTR)&dwAnsi, sizeof(dwAnsi), NULL, (LPWSTR)pWParam, nbWch); if (!NT_SUCCESS(Status)) { // LATER IanJa: returning FALSE makes GetMessage fail, which // terminates the app. We should use some default 'bad character' // I use 0x00 for now. *pWParam = 0x00; return TRUE; } } else { int cwch; // assuming little endian #ifdef _USERK_ cwch = EngWideCharToMultiByte(CodePage, (LPWSTR)pWParam, nbWch, (LPSTR)&dwAnsi, sizeof(dwAnsi)); #else cwch = WideCharToMultiByte(CodePage, 0, (LPCWSTR)pWParam, nbWch / sizeof(WCHAR), (LPSTR)&dwAnsi, sizeof(dwAnsi), NULL, NULL); #endif // _USERK_ // KdPrint(("0x%04x -> 0x%02x (%d)\n", *pWParam, dwAnsi, CodePage)); if (cwch == 0) { *pWParam = 0x00; return TRUE; } } if (IS_DBCS_ENABLED()) { WORD wAnsi = LOWORD(dwAnsi); // // From: // HIBYTE(wAnsi) = Dbcs TrailingByte. // LOBYTE(wAnsi) = Dbcs LeadingByte or Sbcs character. // // To: // HIWORD(*pWParam) = Original Data (information for DBCS messgaing). // HIBYTE(LOWORD(*pWParam)) = Dbcs LeadingByte Byte. // LOBYTE(LOWORD(*pWParam)) = Dbcs TrailingByte or Sbcs character. // if (IS_DBCS_MESSAGE(wAnsi)) { // // It's a DBCS character. // *pWParam = MAKEWPARAM(MAKEWORD(HIBYTE(wAnsi),LOBYTE(wAnsi)),HIWORD(*pWParam)); } else { // // It's a SBCS character. // *pWParam = MAKEWPARAM(MAKEWORD(LOBYTE(wAnsi),0),0); } } else { #if DBG if ((dwAnsi == 0) || (dwAnsi > 0xFF)) { RIPMSG1(RIP_VERBOSE, "msgW -> msgA: char = 0x%.4lX\n", dwAnsi); } #endif *pWParam = dwAnsi; } break; } return TRUE; } /**************************************************************************\ * RtlMBMessageCharToWCS * * Converts a Multibyte character to a Wide character; in place * Returns the number of characters converted or zero if failure * * 11-Feb-1992 JohnC Created * 13-Jan-1993 IanJa Translate 2 characters (Publisher posts these!) \**************************************************************************/ BOOL RtlMBMessageWParamCharToWCS(DWORD msg, WPARAM *pWParam) { DWORD dwUni; NTSTATUS Status; // FE_SB (RtlMBMessageWParamCharToWCS) BOOL bWmCrIrDbcsChar = FALSE; WORD wAnsi = LOWORD(*pWParam); // end FE_SB (RtlMBMessageWParamCharToWCS) WORD CodePage = THREAD_CODEPAGE(); /* * Only these messages have CHARs: others are passed through */ switch(msg) { // FE_SB (RtlMBMessageWParamCharToWCS) case WM_CHAR: // // WM_CHAR's wParam format for WM_IME_REPORT:IR_DBCSCHAR // if (IS_DBCS_ENABLED() && (*pWParam & WMCR_IR_DBCSCHAR)) { // // Mark this message is sent as IR_DBCSCHAR format. // bWmCrIrDbcsChar = TRUE; } // // Fall through.... // #ifdef FE_IME case WM_IME_CHAR: case WM_IME_COMPOSITION: // // We need to re-align for Unicode convertsion.. // WM_CHAR/WM_IME_CHAR/WM_IME_COMPOSITION's wParam format : // // ReAlign IR_DBCS char format to regular sequence. // // From: // // HIWORD(wParam) = 0; // HIBYTE(LOWORD(wParam)) = DBCS LeadingByte. // LOBYTE(LOWORD(wParan)) = DBCS TrailingByte or SBCS character. // // To: // HIWORD(wParam) = 0; // HIBYTE(LOWORD(wParam)) = DBCS TrailingByte. // LOBYTE(LOWORD(wParam)) = DBCS LeadingByte or SBCS character. // if (IS_DBCS_ENABLED()) { *pWParam = MAKE_WPARAM_DBCSCHAR(wAnsi); } #endif // // Fall through... // // end FE_SB (RtlMBMessageWParamCharToWCS) case WM_CHARTOITEM: case EM_SETPASSWORDCHAR: case WM_DEADCHAR: case WM_SYSCHAR: case WM_SYSDEADCHAR: case WM_MENUCHAR: dwUni = 0; if (IS_ACP(CodePage)) { Status = RtlMultiByteToUnicodeN((LPWSTR)&dwUni, sizeof(dwUni), NULL, (LPSTR)pWParam, 2 * sizeof(CHAR)); if (!NT_SUCCESS(Status)) return FALSE; } else { int cwch; #ifdef _USERK_ cwch = EngMultiByteToWideChar(CodePage, (LPWSTR)&dwUni, sizeof(dwUni), (LPSTR)pWParam, 2); #else cwch = MultiByteToWideChar(CodePage, 0, (LPSTR)pWParam, 2, (LPWSTR)&dwUni, sizeof(dwUni) / sizeof(WCHAR)); #endif // _USERK_ // KdPrint(("0x%02x -> 0x%04x (%d)\n", *pWParam, dwUni, CodePage)); if (cwch == 0) { return FALSE; } } // FE_SB (RtlMBMessageWParamCharToWCS) // // if this character is sent for WM_IME_REPORT:IR_DBCSCHAR, we mark it. // if (bWmCrIrDbcsChar) dwUni |= WMCR_IR_DBCSCHAR; // else FE_SB (RtlMBMessageWParamCharToWCS) #if DBG if ((dwUni == 0) || (dwUni > 0xFF)) { RIPMSG1(RIP_VERBOSE, "msgA -> msgW: wchar = 0x%lX\n", dwUni); } #endif // end FE_SB *pWParam = dwUni; break; } return TRUE; } /**************************************************************************\ * RtlInitLargeAnsiString * * Captures a large ANSI string in the same manner as * RtlInitAnsiString. * * 03-22-95 JimA Created. \**************************************************************************/ VOID RtlInitLargeAnsiString( PLARGE_ANSI_STRING plstr, LPCSTR psz, UINT cchLimit) { ULONG Length; plstr->Buffer = (PSTR)psz; plstr->bAnsi = TRUE; if (ARGUMENT_PRESENT( psz )) { Length = strlen( psz ); plstr->Length = min(Length, cchLimit); plstr->MaximumLength = min((Length + 1), cchLimit); } else { plstr->MaximumLength = 0; plstr->Length = 0; } } /**************************************************************************\ * RtlInitLargeUnicodeString * * Captures a large unicode string in the same manner as * RtlInitUnicodeString. * * 03-22-95 JimA Created. \**************************************************************************/ VOID RtlInitLargeUnicodeString( PLARGE_UNICODE_STRING plstr, LPCWSTR psz, UINT cchLimit) { ULONG Length; plstr->Buffer = (PWSTR)psz; plstr->bAnsi = FALSE; if (ARGUMENT_PRESENT( psz )) { Length = wcslen( psz ) * sizeof( WCHAR ); plstr->Length = min(Length, cchLimit); plstr->MaximumLength = min((Length + sizeof(UNICODE_NULL)), cchLimit); } else { plstr->MaximumLength = 0; plstr->Length = 0; } }