Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

201 lines
4.7 KiB

  1. #include "precomp.h"
  2. #pragma hdrstop
  3. #include "utf8str.h"
  4. CUTF8String::~CUTF8String()
  5. {
  6. if (ALLOC_UNICODE & m_eAlloc)
  7. {
  8. delete m_pwszUnicode;
  9. }
  10. else if (ALLOC_UTF8 & m_eAlloc)
  11. {
  12. delete m_pszUTF8;
  13. }
  14. }
  15. CUTF8String::operator LPWSTR()
  16. {
  17. if ((NULL == m_pwszUnicode) && (NULL != m_pszUTF8))
  18. {
  19. DecodeUTF8();
  20. }
  21. return m_pwszUnicode;
  22. }
  23. CUTF8String::operator LPSTR()
  24. {
  25. if ((NULL == m_pszUTF8) && (NULL != m_pwszUnicode))
  26. {
  27. EncodeUTF8();
  28. }
  29. return m_pszUTF8;
  30. }
  31. VOID CUTF8String::EncodeUTF8()
  32. {
  33. m_hr = S_OK;
  34. int cchUTF8 = 1; // always include a NULL terminator
  35. // First make a pass to see how many characters we will be converting.
  36. LPWSTR pwsz = m_pwszUnicode;
  37. while (L'\0' != *pwsz)
  38. {
  39. WCHAR wch = *pwsz++;
  40. if (wch < 0x80)
  41. {
  42. cchUTF8 += 1;
  43. }
  44. else if (wch < 0x800)
  45. {
  46. cchUTF8 += 2;
  47. }
  48. else
  49. {
  50. cchUTF8 += 3;
  51. }
  52. }
  53. m_pszUTF8 = new CHAR[cchUTF8];
  54. if (NULL != m_pszUTF8)
  55. {
  56. m_eAlloc = ALLOC_UTF8;
  57. // Start encoding here:
  58. const BYTE cUtf8FirstSignal[4] = {0x00, 0x00, 0xC0, 0xE0};
  59. const BYTE cMask = 0xBF;
  60. const BYTE cSignal = 0x80;
  61. LPSTR pszStop = m_pszUTF8 + cchUTF8;
  62. LPSTR pszDst = m_pszUTF8;
  63. pwsz = m_pwszUnicode;
  64. while (pszDst < pszStop)
  65. {
  66. WCHAR wch = *pwsz++;
  67. #ifdef DEBUG
  68. if (L'\0' == wch)
  69. {
  70. }
  71. #endif // DEBUG
  72. int cchTotal;
  73. if (wch < 0x80)
  74. {
  75. cchTotal = 1;
  76. }
  77. else if (wch < 0x800)
  78. {
  79. cchTotal = 2;
  80. }
  81. else
  82. {
  83. cchTotal = 3;
  84. }
  85. pszDst += cchTotal;
  86. switch (cchTotal)
  87. {
  88. case 3:
  89. *--pszDst = (wch | cSignal) & cMask;
  90. wch >>= 6;
  91. // FALL THROUGH
  92. case 2:
  93. *--pszDst = (wch | cSignal) & cMask;
  94. wch >>= 6;
  95. // FALL THROUGH
  96. case 1:
  97. *--pszDst = (wch | cUtf8FirstSignal[cchTotal]);
  98. }
  99. pszDst += cchTotal;
  100. }
  101. m_hr = S_OK;
  102. }
  103. else
  104. {
  105. m_hr = E_OUTOFMEMORY;
  106. }
  107. }
  108. VOID CUTF8String::DecodeUTF8()
  109. {
  110. m_hr = S_OK;
  111. int cchUnicode = 1; // always include a NULL terminator
  112. LPSTR psz = m_pszUTF8;
  113. // First determine the destination size (cchUnicode)
  114. while ('\0' != *psz)
  115. {
  116. int cbChar = 0;
  117. BYTE bFirst = (BYTE) *psz;
  118. while (bFirst & 0x80)
  119. {
  120. cbChar++;
  121. bFirst <<= 1;
  122. }
  123. cbChar = max(1, cbChar);
  124. psz += cbChar;
  125. cchUnicode++;
  126. }
  127. m_pwszUnicode = new WCHAR[cchUnicode];
  128. if (NULL != m_pwszUnicode)
  129. {
  130. m_eAlloc = ALLOC_UNICODE;
  131. // Start decoding here:
  132. LPWSTR pwszStop = m_pwszUnicode + cchUnicode;
  133. LPWSTR pwszDst = m_pwszUnicode;
  134. psz = m_pszUTF8;
  135. while (pwszDst < pwszStop)
  136. {
  137. int cbChar = 0;
  138. BYTE bFirst = (BYTE) *psz;
  139. while (bFirst & 0x80)
  140. {
  141. cbChar++;
  142. bFirst <<= 1;
  143. }
  144. BOOL fValid = TRUE;
  145. WCHAR wch = L'\0';
  146. switch (cbChar)
  147. {
  148. case 6: psz++; // FALLTHROUGH // we don't handle
  149. case 5: psz++; // FALLTHROUGH // UCS-4; skip first
  150. case 4: psz++; // FALLTHROUGH // three bytes
  151. case 3:
  152. wch = WCHAR(*psz++ & 0x0f) << 12; // 0x0800 - 0xffff
  153. fValid = fValid && ((*psz & 0xc0) == 0x80);
  154. // FALLTHROUGH
  155. case 2:
  156. wch |= WCHAR(*psz++ & 0x3f) << 6; // 0x0080 - 0x07ff
  157. fValid = fValid && ((*psz & 0xc0) == 0x80);
  158. wch |= WCHAR(*psz++ & 0x3f);
  159. break;
  160. case 0:
  161. wch = WCHAR(*psz++); // 0x0000 - 0x007f
  162. break;
  163. default:
  164. wch = L'?';
  165. psz += cbChar;
  166. break;
  167. }
  168. if (FALSE == fValid)
  169. {
  170. *pwszDst = L'\0';
  171. m_hr = E_FAIL;
  172. break;
  173. }
  174. #ifdef DEBUG
  175. cchUnicode--;
  176. #endif // DEBUG
  177. *pwszDst++ = wch;
  178. }
  179. }
  180. else
  181. {
  182. m_hr = E_OUTOFMEMORY;
  183. }
  184. }