windows-server-2003/base/win32/winnls/data/dlls/gb18030/c_gb18030.cpp

/*++

Copyright (c) 1991-1999,  Microsoft Corporation  All rights reserved.

Module Name:

    c_gb18030.c

Abstract:

    This file contains functions to convert GB18030-2000 (code page 54936) into Unicode, and vice versa.
    The target module is c_g18030.dll.  This will be the external DLL used by WideCharToMultiByte()
    and MultiByteToWideChar() to perform the conversion for GB18030 codepage.

    External Routines in this file:
      DllEntry
      NlsDllCodePageTranslation

Notes:
    GB18030-2000 (aka GBK2K) is designed to be mostly compatible with GBK (codepage 936), 
    while supports the full range of Unicode code points (BMP + 16 supplementary planes).

    The structure for GB18030 is:
        * Single byte: 
            0x00 ~ 0x7f
        * Two-byte: 
            0x81 ~ 0xfe, 0x40 ~ 0x7e    (leading byte, trailing byte)
            0x81 ~ 0xfe, 0x80 ~ 0xfe    (leading byte, trailing byte)
        * Four-byte:
            0x81 ~ 0xfe, 0x30 ~ 0x39, 0x81 ~ 0xfe, 0x30 ~ 0x39.
            The surrogare pair will be encoded from 0x90, 0x30, 0x81, 0x30

    The BMP range is fully supported in GB18030 using 1-byte, 2-byte and 4-byte sequences.
    In valid 4-byte GB18030, there are two gaps that can not be mapped to Unicode characters.
        0x84, 0x31, 0xa5, 0x30 (just after the GB18030 bytes for U+FFFF(*)) ~ 0x8f, 0x39, 0xfe, 0x39 (just before the first GB18030 bytes for U+D800,U+DC00)
        0xe3, 0x32, 0x9a, 0x36 (just after the GB18030 bytes for U+DBFF U+DFFF(**)) ~ 0xfe, 0x39, 0xfe, 0x39
        

        Note1: U+FFFF = 0x84, 0x31, 0xa4, 0x39
        Note2: U+DBFF U+DFFF = 0xe3, 0x32, 0x9a, 0x35

    Tables used in c_g18030.dll:
        * From Unicode to bytes:
            * g_wUnicodeToGB:
                Used to convert Unicode character to 2-byte GBK, 2-byte GB18030, or 4-byte GB18030.
                The index is 0x0000 ~ 0xffff, for Unicode BMP range.
                When the valures are:
                
                    Value       Meaning
                    ======      =======
                    0xffff      2-byte GB18030, which is compatible with GBK.  Call WC2MB(936,...) to convert.
                    0xfffe ~ [0xfffe - (ARRAYSIZE(g_wUnicodeToGBTwoBytes))+1]
                                2-byte GB18030, which is NOT compatible with GBK.  (0xfffe - Value) will be indexed into
                                a second table g_wUnicodeToGBTwoBytes, which contains the two-byte GB18030 values.
                                E.g. if the value is 0xfffe, the index into g_wUnicodeToGBTwoBytes is 0, so the two-byte
                                GB18030 will be 0xa8, 0xbf (which are stored g_wUnicodeToGBTwoBytes[0],g_wUnicodeToGBTwoBytes[1])
                    0x0000 ~ 0x99fb
                                An offset value that can be used to convert to 4-byte GB18030
                                If the value is 0x000, the 4-byte GB18030 is 0x81, 0x30, 0x81, 0x30.
                                
        * From bytes to Unicode
            * Two-byte GB18030 to Unicode:
                * g_wGBLeadByteOffset
                    The index into this table is lead byte 0x80 ~ 0xff (converted to index 0x00 ~ 0x7f).  
                    If the value is 0x0000, it means that this lead byte is compatible with GBK.  
                    Otherwise, the value can be:
                    0x0100  This is used to indexed into g_wUnicodeFromGBTwoBytes[0x0000 ~ 0x00ff].  
                            The value of g_wUnicodeFromGBTwoBytesis the Unicode value for this lead byte with the next valid trailing byte.
                    0x0200  This is used to indexed into g_wUnicodeFromGBTwoBytes[0x0100 ~ 0x01ff].  
                    0x0300  This is used to indexed into g_wUnicodeFromGBTwoBytes[0x0200 ~ 0x02ff].  
                    0x0400  This is used to indexed into g_wUnicodeFromGBTwoBytes[0x0300 ~ 0x03ff].  
                    
                    E.g. g_wGBLeadByteOffset[0x07] = 0x0000. It means that GB18030 two-byte lead byte 0x87 is compatible with GBK.
                    g_wGBLeadByteOffset[0x28] = 0x0200.  It means that GB18030 two-byte lead byte 0xa8 (0x28+0x80 = 0xa8) is NOT compatible with GBK.
                    The Unicode value for 0xa8, <trail byte> will be stored in g_wUnicodeFromGBTwoBytes[0x0100+<trail byte>]
                    
            * Four-byte GB18030 to Unicode:
                * g_wGBFourBytesToUnicode
                    The table is used to convert 4-byte GB18030 into a Unicode.
                    
                    The index value is the offset of the 4-byte GB18030.

                    4-byte GB18030      Index value
                    ==============      ===========
                    81,30,81,30         0
                    81,30,81,31         1
                    81,30,81,32         2
                    ...                 ...

                    The value of g_wGBFourBytesToUnicode cotains the Unicode codepoint for the offset of the 
                    corresponding 4-byte GB18030.

                    E.g. g_wGBFourBytesToUnicode[0] = 0x0080.  This means that GB18030 0x81, 0x30, 0x81, 0x30 will be converted to Unicode U+0800.
    
Revision History:

    02-20-2001    YSLin    Created.
    
--*/


//
//  Include Files.
//

#include <share.h>
#include "c_gb18030.h"

//
//  Constant Declarations.
//


//
// Structure used in GetCPInfo().
//
CPINFO g_CPInfo = 
{
    //UINT    MaxCharSize;
    4,
    //BYTE    DefaultChar[MAX_DEFAULTCHAR];
    {0x3f, 0x00},
    //BYTE    LeadByte[MAX_LEADBYTES];
    // Since GBK2K can have up to 4 bytes, we don't return
    // 0x81-0xfe as lead bytes here.
    {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00},    
};

// This is the offset for the start of surrogate U+D800, U+DC00
#define SURROGATE_OFFSET        GET_FOUR_BYTES_OFFSET_FROM_BYTES(0x90, 0x30, 0x81, 0x30)
// This is the offset for the end of surrogate U+DBFF, U+DFFF
#define SURROGATE_MAX_OFFSET    GET_FOUR_BYTES_OFFSET_FROM_BYTES(0xe3, 0x32, 0x9a, 0x35)


//-------------------------------------------------------------------------//
//                            EXTERNAL ROUTINES                            //
//-------------------------------------------------------------------------//


////////////////////////////////////////////////////////////////////////////
//
//  NlsDllCodePageTranslation
//
//  This routine is the main exported procedure for the functionality in
//  this DLL.  All calls to this DLL must go through this function.
//
//  02-20-2001    YSLin    Created.
////////////////////////////////////////////////////////////////////////////

STDAPI_(DWORD) NlsDllCodePageTranslation(
    DWORD CodePage,
    DWORD dwFlags,
    LPSTR lpMultiByteStr,
    int cchMultiByte,
    LPWSTR lpWideCharStr,
    int cchWideChar,
    LPCPINFO lpCPInfo)
{

    //
    //  Error out if internally needed c_*.nls file is not installed.
    //
    if (!IsValidCodePage(CODEPAGE_GBK))
    {
        SetLastError(ERROR_INVALID_PARAMETER);
        return (0);
    }

    switch (dwFlags)
    {
        case ( NLS_CP_CPINFO ) :
        {
            memcpy(lpCPInfo, &g_CPInfo, sizeof(CPINFO));
            return (TRUE);
        }
        case ( NLS_CP_MBTOWC ) :
        {
            return (BytesToUnicode((BYTE*)lpMultiByteStr, cchMultiByte, NULL, lpWideCharStr, cchWideChar));
        }
        case ( NLS_CP_WCTOMB ) :
        {
            return (UnicodeToBytes(lpWideCharStr, cchWideChar, lpMultiByteStr, cchMultiByte));
        }
    }

    //
    //  This shouldn't happen since this gets called by the NLS APIs.
    //
    SetLastError(ERROR_INVALID_PARAMETER);
    return (0);
}

//-------------------------------------------------------------------------//
//                            INTERNAL ROUTINES                            //
//-------------------------------------------------------------------------//

////////////////////////////////////////////////////////////////////////////
//
//  GetBytesToUnicodeCount
//
//  Return the Unicode character count needed to convert the specified
//  GB18030 multi-byte string.
//
//  Parameters:
//      lpMultiByteStr  The multi-byte string to be converted.
//      cchMultiByte    The byte size of the multi-byte string to be converted
//      bSupportEncoder If TRUE and we have a lead byte at the end of string,
//                      we will not convert that lead byte.  Otherwise,
//                      convert it to the default character.
//
//  02-21-2001    YSLin    Created.
////////////////////////////////////////////////////////////////////////////

DWORD GetBytesToUnicodeCount(BYTE* lpMultiByteStr, int cchMultiByte, BOOL bSupportEncoder)
{
    int i = 0;
    BYTE ch;
    DWORD cchWCCount = 0;
    WORD wOffset;
    BYTE offset1, offset2, offset3, offset4;
    DWORD dwFourBytesOffset;
    
    if (cchMultiByte == -1)
    {
        cchMultiByte = strlen((LPSTR)lpMultiByteStr);
    }
    
    while (i < cchMultiByte)
    {
        ch = lpMultiByteStr[i];
        if (ch <= 0x7f)
        {
            cchWCCount++;
            i++;
        } else if (IS_GB_LEAD_BYTE(ch))
        {
            offset1 = (ch - GBK2K_BYTE1_MIN);
            //
            // If this is a lead byte, look ahead to see if this is
            // a two-byte GB18030 or four-byte GB18030.
            //
            if (i+1 < cchMultiByte)
            {
                if (IS_GB_TWO_BYTES_TRAILING(lpMultiByteStr[i+1]))
                {
                    
                    //
                    // The trailing byte is a GB18030 two-byte.
                    //
                    cchWCCount++;
                    i += 2;                        
                } else if (i+3 < cchMultiByte) 
                {
                    //
                    // Check if this is a four-byte GB18030.
                    //
                    if (IS_GB_FOUR_BYTES_TRAILING(lpMultiByteStr[i+1]) &&
                        IS_GB_LEAD_BYTE(lpMultiByteStr[i+2]) &&
                        IS_GB_FOUR_BYTES_TRAILING(lpMultiByteStr[i+3]))
                    {
                        offset2 = lpMultiByteStr[i+1] - GBK2K_BYTE2_MIN;
                        offset3 = lpMultiByteStr[i+2] - GBK2K_BYTE3_MIN;
                        offset4 = lpMultiByteStr[i+3] - GBK2K_BYTE4_MIN;
                        //
                        // Four-byte GB18030
                        //
                        dwFourBytesOffset = GET_FOUR_BYTES_OFFSET(offset1, offset2, offset3, offset4);
                        if (dwFourBytesOffset <= g_wMax4BytesOffset) 
                        {
                            //
                            // The Unicode will be in the BMP range.
                            //
                            cchWCCount++;                            
                        } else if (dwFourBytesOffset >= SURROGATE_OFFSET && dwFourBytesOffset <= SURROGATE_MAX_OFFSET)
                        {
                            //
                            // This will be converted to a surrogate pair.
                            //
                            cchWCCount+=2;
                        } else {
                            //
                            // Valid GBK2K code point, but can not be mapped to Unicode.
                            //
                            cchWCCount++;
                        }                        
                        i += 4;
                    } else 
                    {
                        if (bSupportEncoder)
                        {
                            // Set i to cchMultiByte so that we will bail out the while loop.
                            i = cchMultiByte;
                        } else 
                        {
                            //
                            // We have a lead byte, but do have have a valid trailing byte.
                            //
                            // Use default Unicode char.
                            i++;
                            cchWCCount++;
                        }                    
                    }
                }else
                {
                    if (bSupportEncoder)
                    {
                        // Set i to cchMultiByte so that we will bail out the while loop.
                        i = cchMultiByte;
                    } else 
                    {
                        //
                        // We have a lead byte, but do have have a valid trailing byte.
                        //
                        // Use default Unicode char.
                        i++;
                        cchWCCount++;
                    }
                }
            } else
            {
                //
                // We have a lead byte at the end of the string.
                //
                if (bSupportEncoder)
                {
                    i++;
                } else
                {
                    // Use default Unicode char.
                    i++;
                    cchWCCount++;
                }
            }
        }else
        {
            //
            // This byte is NOT between 0x00 ~ 0x7f, and not a lead byte.
            // Use the default character.
            //
            i++;
            cchWCCount++;
        }
    }

    return (cchWCCount);
    
}

BOOL __forceinline PutDefaultCharacter(UINT* pCchWCCount, UINT cchWideChar, LPWSTR lpWideCharStr)
{
    //
    // This byte is NOT between 0x00 ~ 0x7f, not a lead byte.
    //
    if (*pCchWCCount >= cchWideChar)
    {
        SetLastError(ERROR_INSUFFICIENT_BUFFER);
        return (FALSE);
    }
    lpWideCharStr[(*pCchWCCount)++] = GB18030_DEFAULT_UNICODE_CHAR; 
    return (TRUE);
}

STDAPI_(DWORD) BytesToUnicode(
    BYTE* lpMultiByteStr,
    UINT cchMultiByte,
    UINT* pcchLeftOverBytes,
    LPWSTR lpWideCharStr,
    UINT cchWideChar)
{

    UINT i = 0;
    BYTE ch;
    UINT cchWCCount = 0;
    BYTE offset1, offset2, offset3, offset4;
    WORD wOffset;
    DWORD dwOffset;
    int nResult;
    
    if ((lpWideCharStr == NULL) || (cchWideChar == 0))
    {
        return (GetBytesToUnicodeCount(lpMultiByteStr, cchMultiByte, (pcchLeftOverBytes != NULL)));
    }
    
    if (cchMultiByte == -1)
    {
        cchMultiByte = strlen((LPSTR)lpMultiByteStr);
    }

    if (pcchLeftOverBytes != NULL)
    {
        *pcchLeftOverBytes = 0;
    }

    //
    // NOTENOTE YSLin:
    // If you make fix in the following code, remember to make the appropriate fix
    // in GetBytesToUnicodeCount() as well.
    //
    while (i < cchMultiByte)
    {
        ch = lpMultiByteStr[i];
        if (ch <= 0x7f)
        {
            // 
            // This byte is from 0x00 ~ 0x7f.
            //
            if (cchWCCount >= cchWideChar)
            {
                SetLastError(ERROR_INSUFFICIENT_BUFFER);
                return (0);
            }                
            lpWideCharStr[cchWCCount++] = ch;
            i++;
        } else if (IS_GB_LEAD_BYTE(ch))
        {
            offset1 = ch - GBK2K_BYTE1_MIN;
            //
            // If this is a lead byte, just look ahead to see if this is
            // a two-byte GB18030 or four-byte GB18030.
            //
            if (i+1 < cchMultiByte)
            {
                if (IS_GB_TWO_BYTES_TRAILING(lpMultiByteStr[i+1]))
                {
                    //
                    // The trailing byte is a GB18030 two-byte.
                    //
                
                    //
                    // Look up the table to see if we have the table for
                    // the mapping Unicode character.
                    //                
                    wOffset = g_wGBLeadByteOffset[ch - 0x80];
                    if (wOffset == 0x0000)
                    {
                        if (cchWCCount == cchWideChar)
                        {
                            SetLastError(ERROR_INSUFFICIENT_BUFFER);
                            return (0);
                        }
                        //
                        // We don't have the table, because this is a GBK compatible two-byte GB18030.
                        //
                        
                        //
                        // Two-byte GB18030
                        //
                        nResult = MultiByteToWideChar(CODEPAGE_GBK, 0, (LPCSTR)(lpMultiByteStr+i), 2, lpWideCharStr+cchWCCount, 1);
                        if (nResult == 0)
                        {
                            return (0);
                        }
                        cchWCCount++; 
                        i += 2;                        
                    } else
                    {
                        if (cchWCCount == cchWideChar)
                        {
                            SetLastError(ERROR_INSUFFICIENT_BUFFER);
                            return (0);
                        }                                    
                        wOffset -= 0x0100;
                        lpWideCharStr[cchWCCount++] = g_wUnicodeFromGBTwoBytes[wOffset + lpMultiByteStr[i+1]];
                        i+= 2;
                    }                
                } else if (i+3 < cchMultiByte) 
                {
                    if (IS_GB_FOUR_BYTES_TRAILING(lpMultiByteStr[i+1]) &&
                        IS_GB_LEAD_BYTE(lpMultiByteStr[i+2]) &&
                        IS_GB_FOUR_BYTES_TRAILING(lpMultiByteStr[i+3]))
                    {
                        offset2 = lpMultiByteStr[i+1] - GBK2K_BYTE2_MIN;
                        offset3 = lpMultiByteStr[i+2] - GBK2K_BYTE3_MIN;
                        offset4 = lpMultiByteStr[i+3] - GBK2K_BYTE4_MIN;
                        
                        //
                        // Four-byte GB18030
                        //
                        dwOffset = GET_FOUR_BYTES_OFFSET(offset1, offset2, offset3, offset4);
                        if (dwOffset <= g_wMax4BytesOffset) 
                        {
                            if (cchWCCount == cchWideChar)
                            {
                                SetLastError(ERROR_INSUFFICIENT_BUFFER);
                                return (0);
                            }                                    
                        
                            //
                            // The Unicode will be in the BMP range.
                            //
                            lpWideCharStr[cchWCCount++] = g_wGBFourBytesToUnicode[dwOffset];
                        } else if (dwOffset >= SURROGATE_OFFSET && dwOffset <= SURROGATE_MAX_OFFSET) 
                        {
                            if (cchWCCount + 2 > cchWideChar)
                            {
                                SetLastError(ERROR_INSUFFICIENT_BUFFER);
                                return (0);
                            }                                    
                            //
                            // This will be converted to a surrogate pair.
                            //
                            dwOffset -= SURROGATE_OFFSET;
                            lpWideCharStr[cchWCCount++] = 0xd800 + (WORD)(dwOffset / 0x400);
                            lpWideCharStr[cchWCCount++] = 0xdc00 + (WORD)(dwOffset % 0x400);
                        } else
                        {
                            //
                            // Valid GBK2K code point, but can not be mapped to Unicode.
                            //
                            if (!PutDefaultCharacter(&cchWCCount, cchWideChar, lpWideCharStr))
                            {
                                return (0);
                            }    
                        }   
                        i += 4;
                    }else
                    {
                        if (!PutDefaultCharacter(&cchWCCount, cchWideChar, lpWideCharStr))
                        {
                            return (0);
                        }
                        i++;
                    }                    
                }else
                {
                    if (pcchLeftOverBytes != NULL)
                    {
                        *pcchLeftOverBytes = cchMultiByte - i;
                        // Set i to cchMultiByte so that we will bail out the while loop.
                        i = cchMultiByte;
                    } else 
                    {
                        //
                        // We have a lead byte, but do have have a valid trailing byte.
                        //
                        // Use default Unicode char.
                        if (!PutDefaultCharacter(&cchWCCount, cchWideChar, lpWideCharStr))
                        {
                            return (0);
                        }
                        i++;
                    }
                }
            } else
            {
                if (pcchLeftOverBytes != NULL) 
                {
                    *pcchLeftOverBytes = 1;
                    i++;
                } else
                {
                    // We have a lead byte, but do have have a trailing byte.
                    // Use default Unicode char.
                    if (!PutDefaultCharacter(&cchWCCount, cchWideChar, lpWideCharStr))
                    {
                        return (0);
                    }
                    i++;
                }
            }
        } else 
        {
            if (!PutDefaultCharacter(&cchWCCount, cchWideChar, lpWideCharStr))
            {
                return (0);
            }
            i++;
        }
    }
    return (cchWCCount);
}

DWORD GetUnicodeToBytesCount(LPWSTR lpWideCharStr, int cchWideChar)
{
    int i;
    WORD wch;
    int cchMBCount = 0;
    DWORD wOffset;

    if (cchWideChar == -1)
    {
        cchWideChar = wcslen(lpWideCharStr);
    }
    
    for (i = 0; i < cchWideChar; i++) 
    {
        wch = lpWideCharStr[i];

        if (wch <= 0x7f)
        {
            // One-byte GB18030.
            cchMBCount++;
        } else if (IS_HIGH_SURROGATE(wch))
        {
            //
            // Look ahead one character to see if the next char is a low surrogate.
            //
            if (i + 1 < cchWideChar)
            {
                if (IS_LOW_SURROGATE(lpWideCharStr[ i+1 ]))
                {
                    //
                    // Found a surrogate pair.  This will be a four-byte GB18030.
                    //
                    cchMBCount += 4;    
                    i++;
                } else
                {
                    //
                    // A High surrogate character without a trailing low surrogate character.
                    // In this case, we will convert this character to a default character.
                    //
                    cchMBCount++;
                }
            } else
            {
                //
                // A High surrogate character without a valid trailing low surrogate character.
                // In this case, we will convert this character to a default character.  
                //
                cchMBCount++;
            }
        } else if (IS_LOW_SURROGATE(wch))
        {
            //
            // Only a low surrogate character without a leading high surrogate.
            // In this case, we will convert this character to a default character.  
            //
            cchMBCount++;
        } else
        {
            //
            // Not a surrogate character.  Look up the table to see this BMP Unicode character
            // will be converted to a two-byte GB18030 or four-byte GB18030.
            //
            wOffset = g_wUnicodeToGB[wch];

            if (wOffset == 0xFFFF)
            {
                //
                // This Unicode character will be converted to GBK compatible two-byte code.
                //
                cchMBCount += 2;
            } else if (wOffset <= g_wMax4BytesOffset)
            {
                //
                // This Unicode character will be converted to four-byte GB18030.
                //
                cchMBCount += 4;                    
            } else
            {
                //
                // This Unicode character will be converted to two-byte GB18030, which is not compatible
                // with GBK.
                //
                cchMBCount += 2;
            }
        }                
    }
    return (cchMBCount);
}

STDAPI_(DWORD) UnicodeToBytes(
    LPWSTR lpWideCharStr,
    UINT cchWideChar,
    LPSTR lpMultiByteStr,
    UINT cchMultiByte)
{
    UINT i;
    WORD wch;
    UINT cchMBCount = 0;
    CHAR MBTwoBytes[2];
    BYTE MBFourBytes[4];
    WORD wOffset;
    DWORD dwSurrogateOffset;
    int nResult;

    if ((lpMultiByteStr == NULL) || (cchMultiByte == 0))
    {
        return (GetUnicodeToBytesCount(lpWideCharStr, cchWideChar));
    }

    if (cchWideChar == -1)
    {
        cchWideChar = wcslen(lpWideCharStr);
    }
    //
    // NOTENOTE YSLin:
    // If you make fix in the following code, remember to make the appropriate fix
    // in GetUnicodeToBytesCount() as well.
    //
    for (i = 0; i < cchWideChar; i++) 
    {
        wch = lpWideCharStr[i];

        if (wch <= 0x7f)
        {
            if (cchMBCount == cchMultiByte)
            {
                SetLastError(ERROR_INSUFFICIENT_BUFFER);
                return (0);
            }
            lpMultiByteStr[cchMBCount++] = (BYTE)wch;
        } else if (IS_HIGH_SURROGATE(wch))
        {
            //
            // Look ahead one character to see if the next char is a low surrogate.
            //
            if (i + 1 < cchWideChar)
            {
                if (IS_LOW_SURROGATE(lpWideCharStr[ i+1 ]))
                {
                    if (cchMBCount + 4 > cchMultiByte)
                    {
                        SetLastError(ERROR_INSUFFICIENT_BUFFER);
                        return (0);
                    }
                
                    i++;
                    //
                    // A surrogate pair will be converted to GB 18030 four-byte from
                    // 0x90308130 ~ 0xe339fe39.
                    //                
                    dwSurrogateOffset = (wch - 0xd800) * 0x0400 + (lpWideCharStr[i] - 0xdc00);
                    lpMultiByteStr[cchMBCount+3] = (BYTE)(dwSurrogateOffset % GBK2K_BYTE4_RANGE) + GBK2K_BYTE4_MIN;
                    dwSurrogateOffset /= GBK2K_BYTE4_RANGE;
                    lpMultiByteStr[cchMBCount+2] = (BYTE)(dwSurrogateOffset % GBK2K_BYTE3_RANGE) + GBK2K_BYTE3_MIN;
                    dwSurrogateOffset /= GBK2K_BYTE3_RANGE;
                    lpMultiByteStr[cchMBCount+1] = (BYTE)(dwSurrogateOffset % GBK2K_BYTE2_RANGE) + GBK2K_BYTE2_MIN;
                    dwSurrogateOffset /= GBK2K_BYTE2_RANGE;
                    lpMultiByteStr[cchMBCount]   = (BYTE)(dwSurrogateOffset % GBK2K_BYTE1_RANGE) + 0x90;

                    cchMBCount += 4;   
                } else
                {
                    if (cchMBCount == cchMultiByte)
                    {
                        SetLastError(ERROR_INSUFFICIENT_BUFFER);
                        return (0);
                    }
                    //
                    // A High surrogate character is at the end of string.
                    // In this case, we will convert this character to a default character.  
                    //
                    lpMultiByteStr[cchMBCount++] = GB18030_DEFAULT_CHAR;
                }
            }else
            {
                if (cchMBCount >= cchMultiByte)
                {
                    SetLastError(ERROR_INSUFFICIENT_BUFFER);
                    return (0);
                }
                //
                // A High surrogate character without a valid trailing low surrogate character.
                // In this case, we will convert this character to a default character.  
                //
                lpMultiByteStr[cchMBCount++] = GB18030_DEFAULT_CHAR;
            }
        } else if (IS_LOW_SURROGATE(wch))
        {
            if (cchMBCount == cchMultiByte)
            {
                SetLastError(ERROR_INSUFFICIENT_BUFFER);
                return (0);
            }
            //
            // Only a low surrogate character without a leading high surrogate.
            // In this case, we will convert this character to a default character.  
            //
            lpMultiByteStr[cchMBCount++] = GB18030_DEFAULT_CHAR;
        } else
        {
            //
            // This character is not below 0x7f, not a surrogate character.
            // Check the table to see how this Unicode character should be
            // converted.  It could be:
            //  1. Two-byte GB18030, which is compatible with GBK.  (wOffset == 0xffff)
            //  2. Two-byte GB18030, which is NOT compatible with GBK. (wOffset = 0xfffe and below)
            //  3. Four-byte GB18030. (wOffset >= 0 && wOffset < g_wMax4BytesOffset)
            //
            wOffset = g_wUnicodeToGB[wch];

            if (wOffset == 0xffff)
            {
                // 
                // This Unicode character will be converted to the same two-byte GBK code, so use GBK table.
                //
                if (cchMBCount + 2 > cchMultiByte) 
                {
                    SetLastError(ERROR_INSUFFICIENT_BUFFER);
                    return (0);
                }
                nResult = WideCharToMultiByte(CODEPAGE_GBK, 0, lpWideCharStr+i, 1, lpMultiByteStr+cchMBCount, 2, NULL, NULL);
                if (nResult == 0) 
                {
                    return (0);
                }
                if (cchMBCount + nResult > cchMultiByte)
                {
                    SetLastError(ERROR_INSUFFICIENT_BUFFER);
                    return (0);
                }                
                cchMBCount += nResult;
            } else if (wOffset <= g_wMax4BytesOffset)
            {
                if (cchMBCount + 4 > cchMultiByte)
                {
                    SetLastError(ERROR_INSUFFICIENT_BUFFER);
                    return (0);
                }                
            
                //
                // This Unicode character will be converted to four-byte GB18030.
                //
                lpMultiByteStr[cchMBCount+3] = (wOffset % GBK2K_BYTE4_RANGE) + GBK2K_BYTE4_MIN;
                wOffset /= GBK2K_BYTE4_RANGE;
                lpMultiByteStr[cchMBCount+2] = (wOffset % GBK2K_BYTE3_RANGE) + GBK2K_BYTE3_MIN;
                wOffset /= GBK2K_BYTE3_RANGE;
                lpMultiByteStr[cchMBCount+1] = (wOffset % GBK2K_BYTE2_RANGE) + GBK2K_BYTE2_MIN;
                wOffset /= GBK2K_BYTE2_RANGE;
                lpMultiByteStr[cchMBCount]   = (wOffset % GBK2K_BYTE1_RANGE) + GBK2K_BYTE1_MIN;

                cchMBCount += 4;                    
            } else
            {
                if (cchMBCount + 2 > cchMultiByte)
                {
                    SetLastError(ERROR_INSUFFICIENT_BUFFER);
                    return (0);
                }             
                //
                // This Unicode character will be converted to two-byte GB18030, which is not compatible
                // with GBK.
                //
                wOffset = 0xfffe - wOffset;
                // We don't have to check the range of wOffset here, since the value of wOffset is coming from
                // g_wUnicodeToGB.
                CopyMemory(lpMultiByteStr+cchMBCount, &g_wUnicodeToGBTwoBytes[wOffset * 2], 2);
                // Copy two bytes (a WORD) into lpMultiByteStr[cchMBCount].
                // Instead od CompMemory(), This is probably faster:
                // *((LPWORD)lpMultiByteStr[cchMBCount]) = *((LPWORD)g_wUnicodeToGBTwoBytes[wOffset * 2]);
                cchMBCount += 2;
            }
        }                
    }
            
    return (cchMBCount);
}