windows-xp/Source/XPSP1/NT/sdktools/restools/unicode/nlsxlat.c


								/****************************** Module Header ******************************\

								* Module Name: nlsxlat.c

								*

								* Copyright (c) 1985-91, Microsoft Corporation

								*

								* This modules contains the private routines for character translation:

								* 8-bit <=> Unicode.

								*

								* History:

								* 03-Jan-1992 gregoryw

								\***************************************************************************/


								#include <nt.h>

								#include <ntrtl.h>


								/*

								 * External declarations - these are temporary tables

								 */

								extern USHORT TmpUnicodeToAnsiTable[];

								extern WCHAR TmpAnsiToUnicodeTable[];

								#ifdef DBCS

								extern WCHAR sjtouni( USHORT );

								#define IsDBCSFirst(w) (((unsigned char)w >= 0x81 && (unsigned char)w <= 0x9f) || (((unsigned char)w >= 0xe0 && (unsigned char)w <= 0xfc)))

								#endif // DBCS


								/*

								 * Various defines for data access

								 */

								#define DBCS_TABLE_SIZE 256


								#define LONIBBLE(b)         ((UCHAR)((UCHAR)(b) & 0xF))

								#define HINIBBLE(b)         ((UCHAR)(((UCHAR)(b) >> 4) & 0xF))


								#define LOBYTE(w)           ((UCHAR)(w))

								#define HIBYTE(w)           ((UCHAR)(((USHORT)(w) >> 8) & 0xFF))


								/*

								 * Global data used by the translation routines.

								 *

								 */

								UCHAR    NlsLeadByteInfo[DBCS_TABLE_SIZE]; // Lead byte info. for ACP

								PUSHORT *NlsMbCodePageTables;         // Multibyte to Unicode translation tables

								PUSHORT  NlsAnsiToUnicodeData = TmpAnsiToUnicodeTable; // Ansi CP to Unicode translation table

								PUSHORT  NlsUnicodeToAnsiData = TmpUnicodeToAnsiTable; // Unicode to Ansi CP translation table


								NTSTATUS

								xxxRtlMultiByteToUnicodeN(

								    OUT PWCH UnicodeString,

								    OUT PULONG BytesInUnicodeString OPTIONAL,

								    IN PCH MultiByteString,

								    IN ULONG BytesInMultiByteString)


								/*++


								Routine Description:


								    This functions converts the specified ansi source string into a

								    Unicode string. The translation is done with respect to the

								    ANSI Code Page (ACP) installed at boot time.  Single byte characters

								    in the range 0x00 - 0x7f are simply zero extended as a performance

								    enhancement.  In some far eastern code pages 0x5c is defined as the

								    Yen sign.  For system translation we always want to consider 0x5c

								    to be the backslash character.  We get this for free by zero extending.


								    NOTE: This routine only supports precomposed Unicode characters.


								Arguments:


								    UnicodeString - Returns a unicode string that is equivalent to

								        the ansi source string.


								    BytesInUnicodeString - Returns the number of bytes in the returned

								        unicode string pointed to by UnicodeString.


								    MultiByteString - Supplies the ansi source string that is to be

								        converted to unicode.


								    BytesInMultiByteString - The number of bytes in the string pointed to

								        by MultiByteString.


								Return Value:


								    SUCCESS - The conversion was successful


								--*/


								{

								    UCHAR Entry;

								    PWCH UnicodeStringAnchor;

								    PUSHORT DBCSTable;


								    UnicodeStringAnchor = UnicodeString;


								#ifdef DBCS

								        while (BytesInMultiByteString--) {

								            if ( IsDBCSFirst( *MultiByteString ) ) {

								                if (!BytesInMultiByteString) {

								                    return STATUS_UNSUCCESSFUL;

								                }

								                *UnicodeString++ = sjtouni( (((USHORT)(*(PUCHAR)MultiByteString++)) << 8) +

								                                            (USHORT)(*(PUCHAR)MultiByteString++)

								                                          );

								                BytesInMultiByteString--;

								            } else {

								                *UnicodeString++ = sjtouni( *(PUCHAR)MultiByteString++ );

								            }

								        }

								#else

								    if (NlsMbCodePageTag) {

								        //

								        // The ACP is a multibyte code page.  Check each character

								        // to see if it is a lead byte before doing the translation.

								        //

								        while (BytesInMultiByteString--) {

								            if ( NlsLeadByteInfo[*MultiByteString]) {

								                //

								                // Lead byte - translate the trail byte using the table

								                // that corresponds to this lead byte.  NOTE: make sure

								                // we have a trail byte to convert.

								                //

								                if (!BytesInMultiByteString) {

								                    return STATUS_UNSUCCESSFUL;

								                }

								                Entry = NlsLeadByteInfo[*MultiByteString++];

								                DBCSTable = NlsMbCodePageTables[HINIBBLE(Entry)] + (LONIBBLE(Entry) * DBCS_TABLE_SIZE);

								                *UnicodeString++ = DBCSTable[*MultiByteString++];

								                BytesInMultiByteString--;

								            } else {

								                //

								                // Single byte character.

								                //

								                if (*MultiByteString & 0x80) {

								                    *UnicodeString++ = NlsAnsiToUnicodeData[*MultiByteString++];

								                } else {

								                    *UnicodeString++ = (WCHAR)*MultiByteString++;

								                }

								            }

								        }

								    } else {

								        //

								        // The ACP is a single byte code page.

								        //

								        while (BytesInMultiByteString--) {

								            if (*MultiByteString & 0x80) {

								                *UnicodeString++ = NlsAnsiToUnicodeData[*MultiByteString++];

								            } else {

								                *UnicodeString++ = (WCHAR)*MultiByteString++;

								            }

								        }

								    }

								#endif


								    if (ARGUMENT_PRESENT(BytesInUnicodeString)) {

								        *BytesInUnicodeString = (ULONG)((PCH)UnicodeString - (PCH)UnicodeStringAnchor);

								    }


								    return STATUS_SUCCESS;

								}


								NTSTATUS

								xxxRtlUnicodeToMultiByteN(

								    OUT PCH MultiByteString,

								    OUT PULONG BytesInMultiByteString OPTIONAL,

								    IN PWCH UnicodeString,

								    IN ULONG BytesInUnicodeString)


								/*++


								Routine Description:


								    This functions converts the specified unicode source string into an

								    ansi string. The translation is done with respect to the

								    ANSI Code Page (ACP) loaded at boot time.


								Arguments:


								    MultiByteString - Returns an ansi string that is equivalent to the

								        unicode source string.  If the translation can not be done

								        because a character in the unicode string does not map to an

								        ansi character in the ACP, an error is returned.


								    BytesInMultiByteString - Returns the number of bytes in the returned

								        ansi string pointed to by MultiByteString.


								    UnicodeString - Supplies the unicode source string that is to be

								        converted to ansi.


								    BytesInUnicodeString - The number of bytes in the the string pointed to by

								        UnicodeString.


								Return Value:


								    SUCCESS - The conversion was successful


								    !SUCCESS - The conversion failed.  A unicode character was encountered

								        that has no translation for the current ANSI Code Page (ACP).


								--*/


								{

								    USHORT Offset;

								    USHORT Entry;

								    ULONG CharsInUnicodeString;

								    PCH MultiByteStringAnchor;


								    MultiByteStringAnchor = MultiByteString;


								    /*

								     * convert from bytes to chars for easier loop handling.

								     */

								    CharsInUnicodeString = BytesInUnicodeString / sizeof(WCHAR);


								    while (CharsInUnicodeString--) {

								        Offset = NlsUnicodeToAnsiData[HIBYTE(*UnicodeString)];

								        if (Offset != 0) {

								            Offset = NlsUnicodeToAnsiData[Offset + HINIBBLE(*UnicodeString)];

								            if (Offset != 0) {

								                Entry = NlsUnicodeToAnsiData[Offset + LONIBBLE(*UnicodeString)];

								                if (HIBYTE(Entry) != 0) {

								                    *MultiByteString++ = HIBYTE(Entry);  // lead byte

								                }

								                *MultiByteString++ = LOBYTE(Entry);

								            } else {

								                //

								                // no translation for this Unicode character.  Return

								                // an error.

								                //

								#ifdef DBCS // RtlUnicodeToMultiByteN : temporary hack to avoid error return

								                if ( *UnicodeString <= (WCHAR)0xff )

								                    *MultiByteString++ = (UCHAR)*UnicodeString;

								                else

								                    *MultiByteString++ = '\x20';

								#else

								                return STATUS_UNSUCCESSFUL;

								#endif

								            }

								        } else {

								            //

								            // no translation for this Unicode character.  Return an error.

								            //

								#ifdef DBCS // RtlUnicodeToMultiByteN : temporary hack to avoid error return

								            if ( *UnicodeString <= (WCHAR)0xff )

								                *MultiByteString++ = (UCHAR)*UnicodeString;

								            else

								                *MultiByteString++ = '\x20';

								#else

								            return STATUS_UNSUCCESSFUL;

								#endif

								        }

								        UnicodeString++;

								    }


								    if (ARGUMENT_PRESENT(BytesInMultiByteString)) {

								        *BytesInMultiByteString = (ULONG)(MultiByteString - MultiByteStringAnchor);

								    }


								    return STATUS_SUCCESS;

								}