Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
6.8 KiB

  1. // UTF8.CPP -- Implementation of the Unicode to/from UTF8 conversion routines
  2. #include "stdafx.h"
  3. inline INT FailWith(INT iError)
  4. {
  5. SetLastError(iError);
  6. return 0;
  7. }
  8. int WideCharToUTF8(LPCWSTR lpWideCharStr, int cchWideChar,
  9. LPSTR lpMultiByteStr, int cchMultiByte
  10. )
  11. {
  12. if ( PBYTE(lpWideCharStr) == PBYTE(lpMultiByteStr)
  13. || cchWideChar < -1
  14. || cchMultiByte < 0
  15. )
  16. return FailWith(ERROR_INVALID_PARAMETER);
  17. if (cchWideChar == -1) // -1 means lpWideCharStr is null terminated.
  18. cchWideChar = wcsLen(lpWideCharStr) + 1;
  19. int cbNecessary = 0; // Number of UTF8 bytes necessary
  20. // to represent the Unicode string
  21. BOOL fStoring = cchMultiByte > 0;
  22. for (; cchWideChar--; )
  23. {
  24. WCHAR wc= *lpWideCharStr++;
  25. if (wc < 0x0080) // ASCII characters
  26. {
  27. cbNecessary++;
  28. if (fStoring)
  29. if (cchMultiByte > 0)
  30. {
  31. *lpMultiByteStr++ = BYTE(wc);
  32. --cchMultiByte;
  33. }
  34. else
  35. return FailWith(ERROR_INSUFFICIENT_BUFFER);
  36. }
  37. else
  38. if (wc < 0x0800) // 0x0080 - 0x07FF
  39. {
  40. cbNecessary += 2;
  41. if (fStoring)
  42. if (cchMultiByte > 1)
  43. {
  44. cchMultiByte -= 2;
  45. *lpMultiByteStr++ = 0xC0 | (wc >> 6);
  46. *lpMultiByteStr++ = 0x80 | (wc & 0x3F);
  47. }
  48. else
  49. return FailWith(ERROR_INSUFFICIENT_BUFFER);
  50. }
  51. else // 0x0800 - 0xFFFF
  52. {
  53. cbNecessary += 3;
  54. if (fStoring)
  55. if (cchMultiByte > 2)
  56. {
  57. cchMultiByte -= 3;
  58. *lpMultiByteStr++ = 0xE0 | ( wc >> 12);
  59. *lpMultiByteStr++ = 0x80 | ((wc >> 6) & 0x3F);
  60. *lpMultiByteStr++ = 0x80 | ( wc & 0x3F);
  61. }
  62. else
  63. return FailWith(ERROR_INSUFFICIENT_BUFFER);
  64. }
  65. }
  66. return cbNecessary;
  67. }
  68. int UTF8ToWideChar(LPCSTR lpMultiByteStr, int cchMultiByte,
  69. LPWSTR lpWideCharStr, int cchWideChar
  70. )
  71. {
  72. if ( PBYTE(lpWideCharStr) == PBYTE(lpMultiByteStr)
  73. || cchMultiByte < -1
  74. || cchWideChar < 0
  75. )
  76. return FailWith(ERROR_INVALID_PARAMETER);
  77. if (cchMultiByte == -1) // -1 means lpMultiByteStr is null terminated
  78. cchMultiByte = lstrlenA(lpMultiByteStr) + 1;
  79. int cwcNecessary = 0; // Number of Unicode characters necessary to
  80. // represent the UTF8 sequence.
  81. BOOL fStoring = cchWideChar > 0;
  82. for (; cchMultiByte--; cwcNecessary++)
  83. {
  84. BYTE b= *lpMultiByteStr++;
  85. if (b < 0x80) // An ASCII character
  86. {
  87. if (fStoring)
  88. if (cchWideChar > 0)
  89. {
  90. cchWideChar--;
  91. *lpWideCharStr++ = WCHAR(b);
  92. }
  93. else
  94. return FailWith(ERROR_INSUFFICIENT_BUFFER);
  95. }
  96. else
  97. if (b < 0xC0) // Trailing character in a multibyte code
  98. return FailWith(ERROR_NO_UNICODE_TRANSLATION);
  99. else
  100. if (b < 0xE0) // First character of a two-byte code
  101. {
  102. if (cchMultiByte <= 0) // Do we have a second byte?
  103. return FailWith(ERROR_NO_UNICODE_TRANSLATION);
  104. cchMultiByte--;
  105. BYTE b2 = *lpMultiByteStr++;
  106. if ((b2 & 0xC0) != 0x80) // Trailing byte must
  107. // have the form 10xxxxxx
  108. return FailWith(ERROR_NO_UNICODE_TRANSLATION);
  109. if (fStoring)
  110. if (cchWideChar > 0)
  111. {
  112. cchWideChar--;
  113. *lpWideCharStr++ = ((b & 0x1F) << 6) | (b2 & 0x3F);
  114. }
  115. else
  116. return FailWith(ERROR_INSUFFICIENT_BUFFER);
  117. }
  118. else // First character of a three-byte code
  119. {
  120. if (cchMultiByte <= 1) // Do we have two more bytes?
  121. return FailWith(ERROR_NO_UNICODE_TRANSLATION);
  122. cchMultiByte -= 2;
  123. BYTE b2 = *lpMultiByteStr++;
  124. BYTE b3 = *lpMultiByteStr++;
  125. if ( (b2 & 0xC0) != 0x80 // Trailing bytes must
  126. || (b3 & 0xC0) != 0x80 // have the form 10xxxxxx
  127. )
  128. return FailWith(ERROR_NO_UNICODE_TRANSLATION);
  129. if (fStoring)
  130. if (cchWideChar > 0)
  131. {
  132. cchWideChar--;
  133. *lpWideCharStr++ = ((b & 0x0F) << 12) | ((b2 & 0x3F) << 6)
  134. | (b3 & 0x3F);
  135. }
  136. else
  137. return FailWith(ERROR_INSUFFICIENT_BUFFER);
  138. }
  139. }
  140. return cwcNecessary;
  141. }
  142. UINT BuildAKey(const WCHAR *pwcImage, UINT cwcImage, PCHAR pchKeyBuffer, UINT cchKeyBuffer)
  143. {
  144. // This routine constructs a key from a sequence of Unicode characters.
  145. // A key consists of a packed-32 length value followed by a UTF-8 representation
  146. // of the Unicode characters. The resulting key will be stored in the buffer
  147. // denoted by pchKeyBuffer and cchKeyBuffer. The cchKeyBuffer parameter defines
  148. // the size of the key buffer in bytes.
  149. //
  150. // The result value will always be the number of byte required to hold the key.
  151. // So you can dyamically allocate the key buffer by first calling this routine
  152. // with pchKeyBuffer set to NULL, allocating from the heap, and calling a second
  153. // time to record the key string.
  154. UINT cbKeyName = WideCharToUTF8(pwcImage, cwcImage, NULL, 0);
  155. PCHAR pchCursor= pchKeyBuffer;
  156. UINT cbSize= 0;
  157. for (UINT c= cbKeyName; ; )
  158. {
  159. cbSize++;
  160. if (pchCursor)
  161. {
  162. if (c < 0x80)
  163. {
  164. if (cbSize < cchKeyBuffer)
  165. *pchCursor++ = CHAR(c);
  166. break;
  167. }
  168. if (cbSize < cchKeyBuffer)
  169. *pchCursor++ = CHAR(c & 0x7F) | 0x10;
  170. }
  171. c >>= 7;
  172. }
  173. if (pchCursor)
  174. WideCharToUTF8(pwcImage, cwcImage, pchCursor, cchKeyBuffer - cbSize);
  175. return cbSize + cbKeyName;
  176. }