mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
220 lines
6.8 KiB
220 lines
6.8 KiB
// UTF8.CPP -- Implementation of the Unicode to/from UTF8 conversion routines
|
|
|
|
#include "stdafx.h"
|
|
|
|
inline INT FailWith(INT iError)
|
|
{
|
|
SetLastError(iError);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int WideCharToUTF8(LPCWSTR lpWideCharStr, int cchWideChar,
|
|
LPSTR lpMultiByteStr, int cchMultiByte
|
|
)
|
|
{
|
|
if ( PBYTE(lpWideCharStr) == PBYTE(lpMultiByteStr)
|
|
|| cchWideChar < -1
|
|
|| cchMultiByte < 0
|
|
)
|
|
return FailWith(ERROR_INVALID_PARAMETER);
|
|
|
|
if (cchWideChar == -1) // -1 means lpWideCharStr is null terminated.
|
|
cchWideChar = wcsLen(lpWideCharStr) + 1;
|
|
|
|
int cbNecessary = 0; // Number of UTF8 bytes necessary
|
|
// to represent the Unicode string
|
|
|
|
BOOL fStoring = cchMultiByte > 0;
|
|
|
|
for (; cchWideChar--; )
|
|
{
|
|
WCHAR wc= *lpWideCharStr++;
|
|
|
|
if (wc < 0x0080) // ASCII characters
|
|
{
|
|
cbNecessary++;
|
|
|
|
if (fStoring)
|
|
if (cchMultiByte > 0)
|
|
{
|
|
*lpMultiByteStr++ = BYTE(wc);
|
|
--cchMultiByte;
|
|
}
|
|
else
|
|
return FailWith(ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
else
|
|
if (wc < 0x0800) // 0x0080 - 0x07FF
|
|
{
|
|
cbNecessary += 2;
|
|
|
|
if (fStoring)
|
|
if (cchMultiByte > 1)
|
|
{
|
|
cchMultiByte -= 2;
|
|
|
|
*lpMultiByteStr++ = 0xC0 | (wc >> 6);
|
|
*lpMultiByteStr++ = 0x80 | (wc & 0x3F);
|
|
}
|
|
else
|
|
return FailWith(ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
else // 0x0800 - 0xFFFF
|
|
{
|
|
cbNecessary += 3;
|
|
|
|
if (fStoring)
|
|
if (cchMultiByte > 2)
|
|
{
|
|
cchMultiByte -= 3;
|
|
|
|
*lpMultiByteStr++ = 0xE0 | ( wc >> 12);
|
|
*lpMultiByteStr++ = 0x80 | ((wc >> 6) & 0x3F);
|
|
*lpMultiByteStr++ = 0x80 | ( wc & 0x3F);
|
|
}
|
|
else
|
|
return FailWith(ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
}
|
|
|
|
return cbNecessary;
|
|
}
|
|
|
|
int UTF8ToWideChar(LPCSTR lpMultiByteStr, int cchMultiByte,
|
|
LPWSTR lpWideCharStr, int cchWideChar
|
|
)
|
|
{
|
|
if ( PBYTE(lpWideCharStr) == PBYTE(lpMultiByteStr)
|
|
|| cchMultiByte < -1
|
|
|| cchWideChar < 0
|
|
)
|
|
return FailWith(ERROR_INVALID_PARAMETER);
|
|
|
|
if (cchMultiByte == -1) // -1 means lpMultiByteStr is null terminated
|
|
cchMultiByte = lstrlenA(lpMultiByteStr) + 1;
|
|
|
|
int cwcNecessary = 0; // Number of Unicode characters necessary to
|
|
// represent the UTF8 sequence.
|
|
|
|
BOOL fStoring = cchWideChar > 0;
|
|
|
|
for (; cchMultiByte--; cwcNecessary++)
|
|
{
|
|
BYTE b= *lpMultiByteStr++;
|
|
|
|
if (b < 0x80) // An ASCII character
|
|
{
|
|
if (fStoring)
|
|
if (cchWideChar > 0)
|
|
{
|
|
cchWideChar--;
|
|
|
|
*lpWideCharStr++ = WCHAR(b);
|
|
}
|
|
else
|
|
return FailWith(ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
else
|
|
if (b < 0xC0) // Trailing character in a multibyte code
|
|
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
|
|
else
|
|
if (b < 0xE0) // First character of a two-byte code
|
|
{
|
|
if (cchMultiByte <= 0) // Do we have a second byte?
|
|
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
|
|
|
|
cchMultiByte--;
|
|
|
|
BYTE b2 = *lpMultiByteStr++;
|
|
|
|
if ((b2 & 0xC0) != 0x80) // Trailing byte must
|
|
// have the form 10xxxxxx
|
|
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
|
|
|
|
if (fStoring)
|
|
if (cchWideChar > 0)
|
|
{
|
|
cchWideChar--;
|
|
|
|
*lpWideCharStr++ = ((b & 0x1F) << 6) | (b2 & 0x3F);
|
|
}
|
|
else
|
|
return FailWith(ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
else // First character of a three-byte code
|
|
{
|
|
if (cchMultiByte <= 1) // Do we have two more bytes?
|
|
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
|
|
|
|
cchMultiByte -= 2;
|
|
|
|
BYTE b2 = *lpMultiByteStr++;
|
|
BYTE b3 = *lpMultiByteStr++;
|
|
|
|
if ( (b2 & 0xC0) != 0x80 // Trailing bytes must
|
|
|| (b3 & 0xC0) != 0x80 // have the form 10xxxxxx
|
|
)
|
|
return FailWith(ERROR_NO_UNICODE_TRANSLATION);
|
|
|
|
if (fStoring)
|
|
if (cchWideChar > 0)
|
|
{
|
|
cchWideChar--;
|
|
|
|
*lpWideCharStr++ = ((b & 0x0F) << 12) | ((b2 & 0x3F) << 6)
|
|
| (b3 & 0x3F);
|
|
}
|
|
else
|
|
return FailWith(ERROR_INSUFFICIENT_BUFFER);
|
|
}
|
|
}
|
|
|
|
return cwcNecessary;
|
|
}
|
|
|
|
UINT BuildAKey(const WCHAR *pwcImage, UINT cwcImage, PCHAR pchKeyBuffer, UINT cchKeyBuffer)
|
|
{
|
|
// This routine constructs a key from a sequence of Unicode characters.
|
|
// A key consists of a packed-32 length value followed by a UTF-8 representation
|
|
// of the Unicode characters. The resulting key will be stored in the buffer
|
|
// denoted by pchKeyBuffer and cchKeyBuffer. The cchKeyBuffer parameter defines
|
|
// the size of the key buffer in bytes.
|
|
//
|
|
// The result value will always be the number of byte required to hold the key.
|
|
// So you can dyamically allocate the key buffer by first calling this routine
|
|
// with pchKeyBuffer set to NULL, allocating from the heap, and calling a second
|
|
// time to record the key string.
|
|
|
|
UINT cbKeyName = WideCharToUTF8(pwcImage, cwcImage, NULL, 0);
|
|
|
|
PCHAR pchCursor= pchKeyBuffer;
|
|
|
|
UINT cbSize= 0;
|
|
|
|
for (UINT c= cbKeyName; ; )
|
|
{
|
|
cbSize++;
|
|
|
|
if (pchCursor)
|
|
{
|
|
if (c < 0x80)
|
|
{
|
|
if (cbSize < cchKeyBuffer)
|
|
*pchCursor++ = CHAR(c);
|
|
|
|
break;
|
|
}
|
|
|
|
if (cbSize < cchKeyBuffer)
|
|
*pchCursor++ = CHAR(c & 0x7F) | 0x10;
|
|
}
|
|
|
|
c >>= 7;
|
|
}
|
|
|
|
if (pchCursor)
|
|
WideCharToUTF8(pwcImage, cwcImage, pchCursor, cchKeyBuffer - cbSize);
|
|
|
|
return cbSize + cbKeyName;
|
|
}
|