Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
2.8 KiB

  1. // UTF8.h -- Interface definition for conversions between Unicode and the UTF8 representation
  2. #ifndef __UTF8_H__
  3. #define __UTF8_H__
  4. // UTF8 is a multibyte encoding of 16-bit Unicode characters. Its primary purpose
  5. // is to provide a transmission form to take Unicode text through host environments
  6. // that assume all text is ASCII text.In particular many of those environments will
  7. // interpret a zero byte as marking the end of a text string.
  8. //
  9. // The UTF8 encoding guarantees that the ASCII section of Unicode (0x0000 - 0x007F)
  10. // is represented by 8-bit ASCII codes (0x00 - 0x7F). Thus any environment which
  11. // expects to see ASCII characters will see no difference when those ASCII characters
  12. // appear in a UTF8 stream.
  13. //
  14. // Those are the only single-byte encodings in UTF8. All other Unicode values are
  15. // represented with two or three byte codes. In those encodings all the byte values
  16. // values have their high bit set. Thus the appearance of a byte in the range
  17. // 0x00-0x7F always represents an ASCII character.
  18. //
  19. // Values in the range 0x0080 through 0x07FF are encoded in two bytes, while values
  20. // in the range 0x0x0800 through 0xFFFF are encoded with three bytes. The first byte
  21. // in an encoding defines the length of the encoding by the number of high order bits
  22. // set to one. Thus a two byte code has a first byte value of the form 110xxxxx and
  23. // the first byte of a three byte code has the form 1110xxxx. Trailing bytes always
  24. // have the form 10xxxxxx so they won't be mistaken for ASCII characters.
  25. //
  26. // Note that two byte codes represent values that have zeroes in the five high-order
  27. // bit positions. That means they can be represented in 11 bits. So we store those
  28. // eleven bits with the high order five bits in the first encoding byte, and we store
  29. // the low order six bits in the second byte of the code.
  30. //
  31. // Similarly for a three-byte code we store the high-order four-bits in the first byte,
  32. // we put the next six bits in the second code, and we store the low order six bits
  33. // in the third code.
  34. #define MAX_UTF8_PATH (MAX_PATH*3 - 2) // Worst case expansion from Unicode
  35. // path to UTF-8 encoded path.
  36. int WideCharToUTF8
  37. (LPCWSTR lpWideCharStr, // address of wide-character string
  38. int cchWideChar, // number of characters in string
  39. LPSTR lpMultiByteStr, // address of buffer for new string
  40. int cchMultiByte // size of buffer
  41. );
  42. int UTF8ToWideChar
  43. (LPCSTR lpMultiByteStr, // address of string to map
  44. int cchMultiByte, // number of characters in string
  45. LPWSTR lpWideCharStr, // address of wide-character buffer
  46. int cchWideChar // size of buffer
  47. );
  48. UINT BuildAKey(const WCHAR *pwcImage, UINT cwcImage, PCHAR pchKeyBuffer, UINT cchKeyBuffer);
  49. #endif // __UTF8_H__