windows-xp/Source/XPSP1/NT/enduser/stuff/itss/utf8.h


								// UTF8.h -- Interface definition for conversions between Unicode and the UTF8 representation


								#ifndef __UTF8_H__


								#define __UTF8_H__


								// UTF8 is a multibyte encoding of 16-bit Unicode characters. Its primary purpose

								// is to provide a transmission form to take Unicode text through host environments

								// that assume all text is ASCII text.In particular many of those environments will

								// interpret a zero byte as marking the end of a text string.

								//

								// The UTF8 encoding guarantees that the ASCII section of Unicode (0x0000 - 0x007F)

								// is represented by 8-bit ASCII codes (0x00 - 0x7F). Thus any environment which

								// expects to see ASCII characters will see no difference when those ASCII characters

								// appear in a UTF8 stream.

								//

								// Those are the only single-byte encodings in UTF8. All other Unicode values are

								// represented with two or three byte codes. In those encodings all the byte values

								// values have their high bit set. Thus the appearance of a byte in the range

								// 0x00-0x7F always represents an ASCII character.

								//

								// Values in the range 0x0080 through 0x07FF are encoded in two bytes, while values

								// in the range 0x0x0800 through 0xFFFF are encoded with three bytes. The first byte

								// in an encoding defines the length of the encoding by the number of high order bits

								// set to one. Thus a two byte code has a first byte value of the form 110xxxxx and

								// the first byte of a three byte code has the form 1110xxxx. Trailing bytes always

								// have the form 10xxxxxx so they won't be mistaken for ASCII characters.

								//

								// Note that two byte codes represent values that have zeroes in the five high-order

								// bit positions. That means they can be represented in 11 bits. So we store those

								// eleven bits with the high order five bits in the first encoding byte, and we store

								// the low order six bits in the second byte of the code.

								//

								// Similarly for a three-byte code we store the high-order four-bits in the first byte,

								// we put the next six bits in the second code, and we store the low order six bits

								// in the third code.


								#define MAX_UTF8_PATH   (MAX_PATH*3 - 2)  // Worst case expansion from Unicode

								                                          // path to UTF-8 encoded path.

								int WideCharToUTF8

								    (LPCWSTR lpWideCharStr,	// address of wide-character string

								     int cchWideChar,	    // number of characters in string

								     LPSTR lpMultiByteStr,	// address of buffer for new string

								     int cchMultiByte 	    // size of buffer

								    );


								int UTF8ToWideChar

								    (LPCSTR lpMultiByteStr,	// address of string to map

								     int cchMultiByte,	    // number of characters in string

								     LPWSTR lpWideCharStr,	// address of wide-character buffer

								     int cchWideChar    	// size of buffer

								    );


								UINT BuildAKey(const WCHAR *pwcImage, UINT cwcImage, PCHAR pchKeyBuffer, UINT cchKeyBuffer);


								#endif // __UTF8_H__