windows-server-2003/enduser/msasn1/ms_utf8.c


								/* Copyright (C) Boris Nikolaus, Germany, 1996-1997. All rights reserved. */

								/* Copyright (C) Microsoft Corporation, 1997-1998. All rights reserved. */


								#include "precomp.h"


								#ifdef ENABLE_BER


								extern ASN1int32_t _WideCharToUTF8(WCHAR *, ASN1int32_t, ASN1char_t *, ASN1int32_t);

								extern ASN1int32_t _UTF8ToWideChar(ASN1char_t *, ASN1int32_t, WCHAR *, ASN1int32_t);


								int ASN1BEREncUTF8String(ASN1encoding_t enc, ASN1uint32_t tag, ASN1uint32_t length, WCHAR *value)

								{

								    if (value && length)

								    {

								        // first, get the size of the dest UTF8 string

								        ASN1int32_t cbStrSize = _WideCharToUTF8(value, length, NULL, 0);

								        if (cbStrSize)

								        {

								            ASN1char_t *psz = (ASN1char_t *) EncMemAlloc(enc, cbStrSize);

								            if (psz)

								            {

								                int rc;

								                ASN1int32_t cbStrSize2 = _WideCharToUTF8(value, length, psz, cbStrSize);

								                EncAssert(enc, cbStrSize2);

								                EncAssert(enc, cbStrSize == cbStrSize2);

								                rc = ASN1BEREncOctetString(enc, tag, cbStrSize2, psz);

								                EncMemFree(enc, psz);

								                return rc;

								            }

								        }

								        else

								        {

								            ASN1EncSetError(enc, ASN1_ERR_UTF8);

								        }

								    }

								    else

								    {

								        return ASN1BEREncOctetString(enc, tag, 0, NULL);

								    }

								    return 0;

								}


								int ASN1BERDecUTF8String(ASN1decoding_t dec, ASN1uint32_t tag, ASN1wstring_t *val)

								{

								    ASN1octetstring_t ostr;

								    if (ASN1BERDecOctetString(dec, tag, &ostr))

								    {

								        if (ostr.length)

								        {

								            ASN1int32_t cchWideChar = _UTF8ToWideChar(ostr.value, ostr.length, NULL, 0);

								            if (cchWideChar)

								            {

								                val->value = (WCHAR *) DecMemAlloc(dec, sizeof(WCHAR) * cchWideChar);

								                if (val->value)

								                {

								                    val->length = _UTF8ToWideChar(ostr.value, ostr.length, val->value, cchWideChar);

								                    DecAssert(dec, val->length);

								                    DecAssert(dec, cchWideChar == (ASN1int32_t) val->length);

								                    ASN1octetstring_free(&ostr);

								                    return 1;

								                }

								            }

								            else

								            {

								                ASN1DecSetError(dec, ASN1_ERR_UTF8);

								            }

								            ASN1octetstring_free(&ostr);

								        }

								        else

								        {

								            val->length = 0;

								            val->value = NULL;

								            return 1;

								        }

								    }

								    return 0;

								}


								#if 1


								//

								//  Constant Declarations.

								//


								#define ASCII                 0x007f


								#define SHIFT_IN              '+'     // beginning of a shift sequence

								#define SHIFT_OUT             '-'     // end       of a shift sequence


								#define UTF8_2_MAX            0x07ff  // max UTF8 2-byte sequence (32 * 64 = 2048)

								#define UTF8_1ST_OF_2         0xc0    // 110x xxxx

								#define UTF8_1ST_OF_3         0xe0    // 1110 xxxx

								#define UTF8_1ST_OF_4         0xf0    // 1111 xxxx

								#define UTF8_TRAIL            0x80    // 10xx xxxx


								#define HIGHER_6_BIT(u)       ((u) >> 12)

								#define MIDDLE_6_BIT(u)       (((u) & 0x0fc0) >> 6)

								#define LOWER_6_BIT(u)        ((u) & 0x003f)


								#define BIT7(a)               ((a) & 0x80)

								#define BIT6(a)               ((a) & 0x40)


								#define HIGH_SURROGATE_START  0xd800

								#define HIGH_SURROGATE_END    0xdbff

								#define LOW_SURROGATE_START   0xdc00

								#define LOW_SURROGATE_END     0xdfff


								////////////////////////////////////////////////////////////////////////////

								//

								//  UTF8ToUnicode

								//

								//  Maps a UTF-8 character string to its wide character string counterpart.

								//

								//  02-06-96    JulieB    Created.

								////////////////////////////////////////////////////////////////////////////


								ASN1int32_t _UTF8ToWideChar

								(

								    /* in */    ASN1char_t         *lpSrcStr,

								    /* in */    ASN1int32_t         cchSrc,

								    /* out */   WCHAR              *lpDestStr,

								    /* in */    ASN1int32_t         cchDest

								)

								{

								    int nTB = 0;                   // # trail bytes to follow

								    int cchWC = 0;                 // # of Unicode code points generated

								    LPCSTR pUTF8 = lpSrcStr;

								    DWORD dwSurrogateChar;         // Full surrogate char

								    BOOL bSurrogatePair = FALSE;   // Indicate we'r collecting a surrogate pair

								    char UTF8;


								    while ((cchSrc--) && ((cchDest == 0) || (cchWC < cchDest)))

								    {

								        //

								        //  See if there are any trail bytes.

								        //

								        if (BIT7(*pUTF8) == 0)

								        {

								            //

								            //  Found ASCII.

								            //

								            if (cchDest)

								            {

								                lpDestStr[cchWC] = (WCHAR)*pUTF8;

								            }

								            bSurrogatePair = FALSE;

								            cchWC++;

								        }

								        else if (BIT6(*pUTF8) == 0)

								        {

								            //

								            //  Found a trail byte.

								            //  Note : Ignore the trail byte if there was no lead byte.

								            //

								            if (nTB != 0)

								            {

								                //

								                //  Decrement the trail byte counter.

								                //

								                nTB--;


								                if (bSurrogatePair)

								                {

								                    dwSurrogateChar <<= 6;

								                    dwSurrogateChar |= LOWER_6_BIT(*pUTF8);


								                    if (nTB == 0)

								                    {

								                        if (cchDest)

								                        {

								                            if ((cchWC + 1) < cchDest)

								                            {

								                                lpDestStr[cchWC]   = (WCHAR)

								                                                     (((dwSurrogateChar - 0x10000) >> 10) + HIGH_SURROGATE_START);


								                                lpDestStr[cchWC+1] = (WCHAR)

								                                                     ((dwSurrogateChar - 0x10000)%0x400 + LOW_SURROGATE_START);

								                            }

								                        }


								                        cchWC += 2;

								                        bSurrogatePair = FALSE;

								                    }

								                }

								                else

								                {

								                    //

								                    //  Make room for the trail byte and add the trail byte

								                    //  value.

								                    //

								                    if (cchDest)

								                    {

								                        lpDestStr[cchWC] <<= 6;

								                        lpDestStr[cchWC] |= LOWER_6_BIT(*pUTF8);

								                    }


								                    if (nTB == 0)

								                    {

								                        //

								                        //  End of sequence.  Advance the output counter.

								                        //

								                        cchWC++;

								                    }

								                }

								            }

								            else

								            {

								                // error - not expecting a trail byte

								                bSurrogatePair = FALSE;

								            }

								        }

								        else

								        {

								            //

								            //  Found a lead byte.

								            //

								            if (nTB > 0)

								            {

								                //

								                //  Error - previous sequence not finished.

								                //

								                nTB = 0;

								                bSurrogatePair = FALSE;

								                cchWC++;

								            }

								            else

								            {

								                //

								                //  Calculate the number of bytes to follow.

								                //  Look for the first 0 from left to right.

								                //

								                UTF8 = *pUTF8;

								                while (BIT7(UTF8) != 0)

								                {

								                    UTF8 <<= 1;

								                    nTB++;

								                }


								                //

								                // If this is a surrogate unicode pair

								                //

								                if (nTB == 4)

								                {

								                    dwSurrogateChar = UTF8 >> nTB;

								                    bSurrogatePair = TRUE;

								                }


								                //

								                //  Store the value from the first byte and decrement

								                //  the number of bytes to follow.

								                //

								                if (cchDest)

								                {

								                    lpDestStr[cchWC] = UTF8 >> nTB;

								                }

								                nTB--;

								            }

								        }


								        pUTF8++;

								    }


								    //

								    //  Make sure the destination buffer was large enough.

								    //

								    if (cchDest && (cchSrc >= 0))

								    {

								        SetLastError(ERROR_INSUFFICIENT_BUFFER);

								        return (0);

								    }


								    //

								    //  Return the number of Unicode characters written.

								    //

								    return (cchWC);

								}


								////////////////////////////////////////////////////////////////////////////

								//

								//  UnicodeToUTF8

								//

								//  Maps a Unicode character string to its UTF-8 string counterpart.

								//

								//  02-06-96    JulieB    Created.

								////////////////////////////////////////////////////////////////////////////


								ASN1int32_t _WideCharToUTF8

								(

								    /* in */    WCHAR              *lpSrcStr,

								    /* in */    ASN1int32_t         cchSrc,

								    /* out */   ASN1char_t         *lpDestStr,

								    /* in */    ASN1int32_t         cchDest

								)

								{

								    LPCWSTR lpWC = lpSrcStr;

								    int     cchU8 = 0;                // # of UTF8 chars generated

								    DWORD   dwSurrogateChar;

								    WCHAR   wchHighSurrogate = 0;

								    BOOL    bHandled;


								    while ((cchSrc--) && ((cchDest == 0) || (cchU8 < cchDest)))

								    {

								        bHandled = FALSE;


								        //

								        // Check if high surrogate is available

								        //

								        if ((*lpWC >= HIGH_SURROGATE_START) && (*lpWC <= HIGH_SURROGATE_END))

								        {

								            if (cchDest)

								            {

								                // Another high surrogate, then treat the 1st as normal

								                // Unicode character.

								                if (wchHighSurrogate)

								                {

								                    if ((cchU8 + 2) < cchDest)

								                    {

								                        lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(wchHighSurrogate);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | MIDDLE_6_BIT(wchHighSurrogate);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | LOWER_6_BIT(wchHighSurrogate);

								                    }

								                    else

								                    {

								                        // not enough buffer

								                        cchSrc++;

								                        break;

								                    }

								                }

								            }

								            else

								            {

								                cchU8 += 3;

								            }

								            wchHighSurrogate = *lpWC;

								            bHandled = TRUE;

								        }


								        if (!bHandled && wchHighSurrogate)

								        {

								            if ((*lpWC >= LOW_SURROGATE_START) && (*lpWC <= LOW_SURROGATE_END))

								            {

								                 // wheee, valid surrogate pairs


								                 if (cchDest)

								                 {

								                     if ((cchU8 + 3) < cchDest)

								                     {

								                         dwSurrogateChar = (((wchHighSurrogate-0xD800) << 10) + (*lpWC - 0xDC00) + 0x10000);


								                         lpDestStr[cchU8++] = (UTF8_1ST_OF_4 |

								                                               (unsigned char)(dwSurrogateChar >> 18));           // 3 bits from 1st byte


								                         lpDestStr[cchU8++] =  (UTF8_TRAIL |

								                                                (unsigned char)((dwSurrogateChar >> 12) & 0x3f)); // 6 bits from 2nd byte


								                         lpDestStr[cchU8++] = (UTF8_TRAIL |

								                                               (unsigned char)((dwSurrogateChar >> 6) & 0x3f));   // 6 bits from 3rd byte


								                         lpDestStr[cchU8++] = (UTF8_TRAIL |

								                                               (unsigned char)(0x3f & dwSurrogateChar));          // 6 bits from 4th byte

								                     }

								                     else

								                     {

								                        // not enough buffer

								                        cchSrc++;

								                        break;

								                     }

								                 }

								                 else

								                 {

								                     // we already counted 3 previously (in high surrogate)

								                     cchU8 += 1;

								                 }


								                 bHandled = TRUE;

								            }

								            else

								            {

								                 // Bad Surrogate pair : ERROR

								                 // Just process wchHighSurrogate , and the code below will

								                 // process the current code point

								                 if (cchDest)

								                 {

								                     if ((cchU8 + 2) < cchDest)

								                     {

								                        lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(wchHighSurrogate);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | MIDDLE_6_BIT(wchHighSurrogate);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | LOWER_6_BIT(wchHighSurrogate);

								                     }

								                     else

								                     {

								                        // not enough buffer

								                        cchSrc++;

								                        break;

								                     }

								                 }

								            }


								            wchHighSurrogate = 0;

								        }


								        if (!bHandled)

								        {

								            if (*lpWC <= ASCII)

								            {

								                //

								                //  Found ASCII.

								                //

								                if (cchDest)

								                {

								                    lpDestStr[cchU8] = (char)*lpWC;

								                }

								                cchU8++;

								            }

								            else if (*lpWC <= UTF8_2_MAX)

								            {

								                //

								                //  Found 2 byte sequence if < 0x07ff (11 bits).

								                //

								                if (cchDest)

								                {

								                    if ((cchU8 + 1) < cchDest)

								                    {

								                        //

								                        //  Use upper 5 bits in first byte.

								                        //  Use lower 6 bits in second byte.

								                        //

								                        lpDestStr[cchU8++] = UTF8_1ST_OF_2 | (*lpWC >> 6);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | LOWER_6_BIT(*lpWC);

								                    }

								                    else

								                    {

								                        //

								                        //  Error - buffer too small.

								                        //

								                        cchSrc++;

								                        break;

								                    }

								                }

								                else

								                {

								                    cchU8 += 2;

								                }

								            }

								            else

								            {

								                //

								                //  Found 3 byte sequence.

								                //

								                if (cchDest)

								                {

								                    if ((cchU8 + 2) < cchDest)

								                    {

								                        //

								                        //  Use upper  4 bits in first byte.

								                        //  Use middle 6 bits in second byte.

								                        //  Use lower  6 bits in third byte.

								                        //

								                        lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(*lpWC);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | MIDDLE_6_BIT(*lpWC);

								                        lpDestStr[cchU8++] = UTF8_TRAIL    | LOWER_6_BIT(*lpWC);

								                    }

								                    else

								                    {

								                        //

								                        //  Error - buffer too small.

								                        //

								                        cchSrc++;

								                        break;

								                    }

								                }

								                else

								                {

								                    cchU8 += 3;

								                }

								            }

								        }


								        lpWC++;

								    }


								    //

								    // If the last character was a high surrogate, then handle it as a normal

								    // unicode character.

								    //

								    if ((cchSrc < 0) && (wchHighSurrogate != 0))

								    {

								        if (cchDest)

								        {

								            if ((cchU8 + 2) < cchDest)

								            {

								                lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(wchHighSurrogate);

								                lpDestStr[cchU8++] = UTF8_TRAIL    | MIDDLE_6_BIT(wchHighSurrogate);

								                lpDestStr[cchU8++] = UTF8_TRAIL    | LOWER_6_BIT(wchHighSurrogate);

								            }

								            else

								            {

								                cchSrc++;

								            }

								        }

								    }


								    //

								    //  Make sure the destination buffer was large enough.

								    //

								    if (cchDest && (cchSrc >= 0))

								    {

								        SetLastError(ERROR_INSUFFICIENT_BUFFER);

								        return (0);

								    }


								    //

								    //  Return the number of UTF-8 characters written.

								    //

								    return (cchU8);

								}


								#else


								//+-------------------------------------------------------------------------

								//

								//  Microsoft Windows

								//

								//  Copyright (C) Microsoft Corporation, 1995 - 1997

								//

								//  File:       utf8.cpp

								//

								//  Contents:   WideChar to/from UTF8 APIs

								//

								//  Functions:  WideCharToUTF8

								//              UTF8ToWideChar

								//

								//  History:    19-Feb-97   philh   created

								//--------------------------------------------------------------------------


								//+-------------------------------------------------------------------------

								//  Maps a wide-character (Unicode) string to a new UTF-8 encoded character

								//  string.

								//

								//  The wide characters are mapped as follows:

								//

								//  Start   End     Bits    UTF-8 Characters

								//  ------  ------  ----    --------------------------------

								//  0x0000  0x007F  7       0x0xxxxxxx

								//  0x0080  0x07FF  11      0x110xxxxx 0x10xxxxxx

								//  0x0800  0xFFFF  16      0x1110xxxx 0x10xxxxxx 0x10xxxxxx

								//

								//  The parameter and return value semantics are the same as for the

								//  Win32 API, WideCharToMultiByte.

								//

								//  Note, starting with NT 4.0, WideCharToMultiByte supports CP_UTF8. CP_UTF8

								//  isn't supported on Win95.

								//--------------------------------------------------------------------------

								ASN1int32_t _WideCharToUTF8

								(

								    /* in */    WCHAR              *lpWideCharStr,

								    /* in */    ASN1int32_t         cchWideChar,

								    /* out */   ASN1char_t         *lpUTF8Str,

								    /* in */    ASN1int32_t         cchUTF8

								)

								{

								    if (cchUTF8 >= 0)

								    {

								        ASN1int32_t cchRemainUTF8 = cchUTF8;


								        if (cchWideChar < 0)

								        {

								            cchWideChar = My_lstrlenW(lpWideCharStr) + 1;

								        }


								        while (cchWideChar--)

								        {

								            WCHAR wch = *lpWideCharStr++;

								            if (wch <= 0x7F)

								            {

								                // 7 bits

								                cchRemainUTF8--;

								                if (cchRemainUTF8 >= 0)

								                {

								                    *lpUTF8Str++ = (ASN1char_t) wch;

								                }

								            }

								            else

								            if (wch <= 0x7FF)

								            {

								                // 11 bits

								                cchRemainUTF8 -= 2;

								                if (cchRemainUTF8 >= 0)

								                {

								                    *lpUTF8Str++ = (ASN1char_t) (0xC0 | ((wch >> 6) & 0x1F));

								                    *lpUTF8Str++ = (ASN1char_t) (0x80 | (wch & 0x3F));

								                }

								            }

								            else

								            {

								                // 16 bits

								                cchRemainUTF8 -= 3;

								                if (cchRemainUTF8 >= 0)

								                {

								                    *lpUTF8Str++ = (ASN1char_t) (0xE0 | ((wch >> 12) & 0x0F));

								                    *lpUTF8Str++ = (ASN1char_t) (0x80 | ((wch >> 6) & 0x3F));

								                    *lpUTF8Str++ = (ASN1char_t) (0x80 | (wch & 0x3F));

								                }

								            }

								        }


								        if (cchRemainUTF8 >= 0)

								        {

								            return (cchUTF8 - cchRemainUTF8);

								        }

								        else

								        if (cchUTF8 == 0)

								        {

								            return (-cchRemainUTF8);

								        }

								    }

								    return 0;

								}


								//+-------------------------------------------------------------------------

								//  Maps a UTF-8 encoded character string to a new wide-character (Unicode)

								//  string.

								//

								//  See CertWideCharToUTF8 for how the UTF-8 characters are mapped to wide

								//  characters.

								//

								//  The parameter and return value semantics are the same as for the

								//  Win32 API, MultiByteToWideChar.

								//

								//  If the UTF-8 characters don't contain the expected high order bits,

								//  ERROR_INVALID_PARAMETER is set and 0 is returned.

								//

								//  Note, starting with NT 4.0, MultiByteToWideChar supports CP_UTF8. CP_UTF8

								//  isn't supported on Win95.

								//--------------------------------------------------------------------------

								ASN1int32_t _UTF8ToWideChar

								(

								    /* in */    ASN1char_t         *lpUTF8Str,

								    /* in */    ASN1int32_t         cchUTF8,

								    /* out */   WCHAR              *lpWideCharStr,

								    /* in */    ASN1int32_t         cchWideChar

								)

								{

								    if (cchWideChar >= 0)

								    {

								        ASN1int32_t cchRemainWideChar = cchWideChar;


								        if (cchUTF8 < 0)

								        {

								            cchUTF8 = My_lstrlenA(lpUTF8Str) + 1;

								        }


								        while (cchUTF8--)

								        {

								            ASN1char_t ch = *lpUTF8Str++;

								            WCHAR wch;

								            ASN1char_t ch2, ch3;


								            if (0 == (ch & 0x80))

								            {

								                // 7 bits, 1 byte

								                wch = (WCHAR) ch;

								            }

								            else

								            if (0xC0 == (ch & 0xE0))

								            {

								                // 11 bits, 2 bytes

								                if (--cchUTF8 >= 0)

								                {

								                    ch2 = *lpUTF8Str++;

								                    if (0x80 == (ch2 & 0xC0))

								                    {

								                        wch = (((WCHAR) ch  & 0x1F) << 6) |

								                               ((WCHAR) ch2 & 0x3F);

								                    }

								                    else

								                    {

								                        goto MyExit;

								                    }

								                }

								                else

								                {

								                    goto MyExit;

								                }

								            }

								            else

								            if (0xE0 == (ch & 0xF0))

								            {

								                // 16 bits, 3 bytes

								                cchUTF8 -= 2;

								                if (cchUTF8 >= 0)

								                {

								                    ch2 = *lpUTF8Str++;

								                    ch3 = *lpUTF8Str++;

								                    if (0x80 == (ch2 & 0xC0) && 0x80 == (ch3 & 0xC0))

								                    {

								                        wch = (((WCHAR) ch  & 0x0F) << 12) |

								                              (((WCHAR) ch2 & 0x3F) <<  6) |

								                               ((WCHAR) ch3 & 0x3F);

								                    }

								                    else

								                    {

								                        goto MyExit;

								                    }

								                }

								                else

								                {

								                    goto MyExit;

								                }

								            }

								            else

								            {

								                goto MyExit;

								            }


								            if (--cchRemainWideChar >= 0)

								            {

								                *lpWideCharStr++ = wch;

								            }

								        }


								        if (cchRemainWideChar >= 0)

								        {

								            return (cchWideChar - cchRemainWideChar);

								        }

								        else

								        if (cchWideChar == 0)

								        {

								            return (-cchRemainWideChar);

								        }

								    }

								MyExit:

								    return 0;

								}


								#endif // 1


								#endif // ENABLE_BER