windows-server-2003/net/ias/services/util/iasutf8.c


								///////////////////////////////////////////////////////////////////////////////

								//

								// Copyright (c) 1998, Microsoft Corp. All rights reserved.

								//

								// FILE

								//

								//    iasutf8.c

								//

								// SYNOPSIS

								//

								//    Defines functions for converting between UTF-8 and Unicode.

								//

								// MODIFICATION HISTORY

								//

								//    01/22/1999    Original version.

								//

								///////////////////////////////////////////////////////////////////////////////


								#include <windows.h>

								#include <iasutf8.h>


								/////////

								// Tests the validity of a UTF-8 trail byte. Must be of the form 10vvvvvv.

								/////////

								#define NOT_TRAIL_BYTE(b) (((BYTE)(b) & 0xC0) != 0x80)


								//////////

								// Returns the number of characters required to hold the converted string. The

								// source string may not contain nulls.  Returns -1 if 'src' is not a valid

								// UTF-8 string.

								//////////

								LONG

								WINAPI

								IASUtf8ToUnicodeLength(

								    PCSTR src,

								    DWORD srclen

								    )

								{

								   LONG nchar;

								   PCSTR end;


								   if (src == NULL) { return 0; }


								   // Number of characters needed.

								   nchar = 0;


								   // End of string to be converted.

								   end = src + srclen;


								   // Loop through the UTF-8 string.

								   while (src < end)

								   {

								      if (*src == 0)

								      {

								         // Do not allow embedded nulls.

								         return -1;

								      }

								      else if ((BYTE)*src < 0x80)

								      {

								         // 0vvvvvvv = 1 byte character.

								      }

								      else if ((BYTE)*src < 0xC0)

								      {

								         // 10vvvvvv = Invalid lead byte.

								         return -1;

								      }

								      else if ((BYTE)*src < 0xE0)

								      {

								         // 110vvvvv = 2 byte character.

								         if (NOT_TRAIL_BYTE(*++src)) { return -1; }

								      }

								      else if ((BYTE)*src < 0xF0)

								      {

								         // 1110vvvv = 3 byte character.

								         if (NOT_TRAIL_BYTE(*++src)) { return -1; }

								         if (NOT_TRAIL_BYTE(*++src)) { return -1; }

								      }

								      else

								      {

								         // In theory, UTF-8 supports 4-6 byte characters, but Windows uses

								         // 16-bit integers for Unicode, so we can't handle them.

								         return -1;

								      }


								      // We successfully parsed a UTF-8 character.

								      ++src;

								      ++nchar;

								   }


								   // Return the number of characters needed.

								   return nchar;

								}


								//////////

								// Returns the number of characters required to hold the converted string.

								//////////

								LONG

								WINAPI

								IASUnicodeToUtf8Length(

								    PCWSTR src,

								    DWORD srclen

								    )

								{

								   LONG nchar;

								   PCWSTR end;


								   if (src == NULL) { return 0; }


								   // Number of characters needed.

								   nchar = 0;


								   // End of string to be converted.

								   end = src + srclen;


								   // Loop through the Unicode string.

								   while (src < end)

								   {

								      if (*src < 0x80)

								      {

								         // 1 byte character.

								         nchar += 1;

								      }

								      else if (*src < 0x800)

								      {

								         // 2 byte character.

								         nchar += 2;

								      }

								      else

								      {

								         // 3 byte character.

								         nchar += 3;

								      }


								      // Advance to the next character in the string.

								      ++src;

								   }


								   // Return the number of characters needed.

								   return nchar;

								}


								/////////

								// Converts a UTF-8 string to Unicode.  Returns the number of characters in the

								// converted string. The source string may not contain nulls. Returns -1 if

								// 'src' is not a valid UTF-8 string.

								/////////

								LONG

								IASUtf8ToUnicode(

								    PCSTR src,

								    DWORD srclen,

								    PWSTR dst

								    )

								{

								   PCWSTR start;

								   PCSTR end;


								   if (!src || !dst) { return 0; }


								   // Remember where we started.

								   start = dst;


								   // End of the string to be converted.

								   end = src + srclen;


								   // Loop through the source UTF-8 string.

								   while (src < end)

								   {

								      if (*src == 0)

								      {

								         // Do not allow embedded nulls.

								         return -1;

								      }

								      else if ((BYTE)*src < 0x80)

								      {

								         // 1 byte character: 0vvvvvvv

								         *dst = *src;

								      }

								      else if ((BYTE)*src < 0xC0)

								      {

								         // Invalid lead byte: 10vvvvvv

								         return -1;

								      }

								      else if ((BYTE)*src < 0xE0)

								      {

								         // 2 byte character: 110vvvvv 10vvvvvv

								         *dst  = (*src & 0x1F) <<  6;

								         if (NOT_TRAIL_BYTE(*++src)) { return -1; }

								         *dst |= (*src & 0x3F);

								      }

								      else if ((BYTE)*src < 0xF0)

								      {

								         // 3 byte character: 1110vvvv 10vvvvvv 10vvvvvv

								         *dst  = (*src & 0x0F) << 12;

								         if (NOT_TRAIL_BYTE(*++src)) { return -1; }

								         *dst |= (*src & 0x3f) <<  6;

								         if (NOT_TRAIL_BYTE(*++src)) { return -1; }

								         *dst |= (*src & 0x3f);

								      }

								      else

								      {

								         // In theory, UTF-8 supports 4-6 byte characters, but Windows uses

								         // 16-bit integers for Unicode, so we can't handle them.

								         return -1;

								      }


								      // Advance to the next character.

								      ++src;

								      ++dst;

								   }


								   // Return the number of characters in the converted string.

								   return  (LONG)(dst - start);

								}


								/////////

								// Converts a Unicode string to UTF-8.  Returns the number of characters in the

								// converted string.

								/////////

								LONG

								IASUnicodeToUtf8(

								    PCWSTR src,

								    DWORD srclen,

								    PSTR dst

								    )

								{

								   PCSTR start;

								   PCWSTR end;


								   if (!src || !dst) { return 0; }


								   // Remember where we started.

								   start = dst;


								   // End of the string to be converted.

								   end = src + srclen;


								   // Loop through the source Unicode string.

								   while (src < end)

								   {

								      if (*src < 0x80)

								      {

								         // Pack as 0vvvvvvv

								         *dst++ = (CHAR)*src;

								      }

								      else if (*src < 0x800)

								      {

								         // Pack as 110vvvvv 10vvvvvv 10vvvvvv

								         *dst++ = (CHAR)(0xC0 | ((*src >>  6) & 0x3F));

								         *dst++ = (CHAR)(0x80 | ((*src      ) & 0x3F));

								      }

								      else

								      {

								         // Pack as 1110vvvv 10vvvvvv 10vvvvvv

								         *dst++ = (CHAR)(0xE0 | ((*src >> 12)       ));

								         *dst++ = (CHAR)(0x80 | ((*src >>  6) & 0x3F));

								         *dst++ = (CHAR)(0x80 | ((*src      ) & 0x3F));

								      }


								      // Advance to the next character.

								      ++src;

								   }


								   // Return the number of characters in the converted string.

								   return  (LONG)(dst - start);

								}