Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
8.0 KiB

  1. //========= Copyright � 1996-2005, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Functions for UCS/UTF/Unicode string operations. These functions are in vstdlib
  4. // instead of tier1, because on PS/3 they need to load and initialize a system module,
  5. // which is more frugal to do from a single place rather than multiple times in different PRX'es.
  6. // The functions themselves aren't supposed to be called frequently enough for the DLL/PRX boundary
  7. // marshalling, if any, to have any measureable impact on performance.
  8. //
  9. #ifndef VSTRTOOLS_HDR
  10. #define VSTRTOOLS_HDR
  11. #include "tier0/platform.h"
  12. #include "tier0/basetypes.h"
  13. #include "tier1/strtools.h"
  14. #ifdef STATIC_VSTDLIB
  15. #define VSTRTOOLS_INTERFACE
  16. #else
  17. #ifdef VSTDLIB_DLL_EXPORT
  18. #define VSTRTOOLS_INTERFACE DLL_EXPORT
  19. #else
  20. #define VSTRTOOLS_INTERFACE DLL_IMPORT
  21. #endif
  22. #endif
  23. // conversion functions wchar_t <-> char, returning the number of characters converted
  24. VSTRTOOLS_INTERFACE int V_UTF8ToUnicode( const char *pUTF8, wchar_t *pwchDest, int cubDestSizeInBytes );
  25. VSTRTOOLS_INTERFACE int V_UnicodeToUTF8( const wchar_t *pUnicode, char *pUTF8, int cubDestSizeInBytes );
  26. VSTRTOOLS_INTERFACE int V_UCS2ToUnicode( const ucs2 *pUCS2, wchar_t *pUnicode, int cubDestSizeInBytes );
  27. VSTRTOOLS_INTERFACE int V_UCS2ToUTF8( const ucs2 *pUCS2, char *pUTF8, int cubDestSizeInBytes );
  28. VSTRTOOLS_INTERFACE int V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, char *pUCS2, int cubDestSizeInBytes );
  29. VSTRTOOLS_INTERFACE int V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, ucs2 *pUCS2, int cubDestSizeInBytes );
  30. // copy at most n bytes into destination, will not corrupt utf-8 multi-byte sequences
  31. VSTRTOOLS_INTERFACE void * V_UTF8_strncpy( char *pDest, const char *pSrc, size_t nMaxBytes );
  32. //
  33. // This utility class is for performing UTF-8 <-> UTF-16 conversion.
  34. // It is intended for use with function/method parameters.
  35. //
  36. // For example, you can call
  37. // FunctionTakingUTF16( CStrAutoEncode( utf8_string ).ToWString() )
  38. // or
  39. // FunctionTakingUTF8( CStrAutoEncode( utf16_string ).ToString() )
  40. //
  41. // The converted string is allocated off the heap, and destroyed when
  42. // the object goes out of scope.
  43. //
  44. // if the string cannot be converted, NULL is returned.
  45. //
  46. // This class doesn't have any conversion operators; the intention is
  47. // to encourage the developer to get used to having to think about which
  48. // encoding is desired.
  49. //
  50. class CStrAutoEncode
  51. {
  52. public:
  53. // ctor
  54. explicit CStrAutoEncode( const char *pch )
  55. {
  56. m_pch = pch;
  57. m_pwch = NULL;
  58. #if !defined( WIN32 ) && !defined(_WIN32)
  59. m_pucs2 = NULL;
  60. m_bCreatedUCS2 = false;
  61. #endif
  62. m_bCreatedUTF16 = false;
  63. }
  64. // ctor
  65. explicit CStrAutoEncode( const wchar_t *pwch )
  66. {
  67. m_pch = NULL;
  68. m_pwch = pwch;
  69. #if !defined( WIN32 ) && !defined(_WIN32)
  70. m_pucs2 = NULL;
  71. m_bCreatedUCS2 = false;
  72. #endif
  73. m_bCreatedUTF16 = true;
  74. }
  75. #if !defined(WIN32) && !defined(_WINDOWS) && !defined(_WIN32) && !defined(_PS3)
  76. explicit CStrAutoEncode( const ucs2 *pwch )
  77. {
  78. m_pch = NULL;
  79. m_pwch = NULL;
  80. m_pucs2 = pwch;
  81. m_bCreatedUCS2 = true;
  82. m_bCreatedUTF16 = false;
  83. }
  84. #endif
  85. // returns the UTF-8 string, converting on the fly.
  86. const char* ToString()
  87. {
  88. PopulateUTF8();
  89. return m_pch;
  90. }
  91. // returns the UTF-8 string - a writable pointer.
  92. // only use this if you don't want to call const_cast
  93. // yourself. We need this for cases like CreateProcess.
  94. char* ToStringWritable()
  95. {
  96. PopulateUTF8();
  97. return const_cast< char* >( m_pch );
  98. }
  99. // returns the UTF-16 string, converting on the fly.
  100. const wchar_t* ToWString()
  101. {
  102. PopulateUTF16();
  103. return m_pwch;
  104. }
  105. #if !defined( WIN32 ) && !defined(_WIN32)
  106. // returns the UTF-16 string, converting on the fly.
  107. const ucs2* ToUCS2String()
  108. {
  109. PopulateUCS2();
  110. return m_pucs2;
  111. }
  112. #endif
  113. // returns the UTF-16 string - a writable pointer.
  114. // only use this if you don't want to call const_cast
  115. // yourself. We need this for cases like CreateProcess.
  116. wchar_t* ToWStringWritable()
  117. {
  118. PopulateUTF16();
  119. return const_cast< wchar_t* >( m_pwch );
  120. }
  121. // dtor
  122. ~CStrAutoEncode()
  123. {
  124. // if we're "native unicode" then the UTF-8 string is something we allocated,
  125. // and vice versa.
  126. if ( m_bCreatedUTF16 )
  127. {
  128. delete [] m_pch;
  129. }
  130. else
  131. {
  132. delete [] m_pwch;
  133. }
  134. #if !defined( WIN32 ) && !defined(_WIN32)
  135. if ( !m_bCreatedUCS2 && m_pucs2 )
  136. delete [] m_pucs2;
  137. #endif
  138. }
  139. private:
  140. // ensure we have done any conversion work required to farm out a
  141. // UTF-8 encoded string.
  142. //
  143. // We perform two heap allocs here; the first one is the worst-case
  144. // (four bytes per Unicode code point). This is usually quite pessimistic,
  145. // so we perform a second allocation that's just the size we need.
  146. void PopulateUTF8()
  147. {
  148. if ( !m_bCreatedUTF16 )
  149. return; // no work to do
  150. if ( m_pwch == NULL )
  151. return; // don't have a UTF-16 string to convert
  152. if ( m_pch != NULL )
  153. return; // already been converted to UTF-8; no work to do
  154. // each Unicode code point can expand to as many as four bytes in UTF-8; we
  155. // also need to leave room for the terminating NUL.
  156. uint32 cbMax = 4 * static_cast<uint32>( V_wcslen( m_pwch ) ) + 1;
  157. char *pchTemp = new char[ cbMax ];
  158. if ( V_UnicodeToUTF8( m_pwch, pchTemp, cbMax ) )
  159. {
  160. uint32 cchAlloc = static_cast<uint32>( V_strlen( pchTemp ) ) + 1;
  161. char *pchHeap = new char[ cchAlloc ];
  162. V_strncpy( pchHeap, pchTemp, cchAlloc );
  163. delete [] pchTemp;
  164. m_pch = pchHeap;
  165. }
  166. else
  167. {
  168. // do nothing, and leave the UTF-8 string NULL
  169. delete [] pchTemp;
  170. }
  171. }
  172. // ensure we have done any conversion work required to farm out a
  173. // UTF-16 encoded string.
  174. //
  175. // We perform two heap allocs here; the first one is the worst-case
  176. // (one code point per UTF-8 byte). This is sometimes pessimistic,
  177. // so we perform a second allocation that's just the size we need.
  178. void PopulateUTF16()
  179. {
  180. if ( m_bCreatedUTF16 )
  181. return; // no work to do
  182. if ( m_pch == NULL )
  183. return; // no UTF-8 string to convert
  184. if ( m_pwch != NULL )
  185. return; // already been converted to UTF-16; no work to do
  186. uint32 cchMax = static_cast<uint32>( V_strlen( m_pch ) ) + 1;
  187. wchar_t *pwchTemp = new wchar_t[ cchMax ];
  188. if ( V_UTF8ToUnicode( m_pch, pwchTemp, cchMax * sizeof( wchar_t ) ) )
  189. {
  190. uint32 cchAlloc = static_cast<uint32>( V_wcslen( pwchTemp ) ) + 1;
  191. wchar_t *pwchHeap = new wchar_t[ cchAlloc ];
  192. V_wcsncpy( pwchHeap, pwchTemp, cchAlloc * sizeof( wchar_t ) );
  193. delete [] pwchTemp;
  194. m_pwch = pwchHeap;
  195. }
  196. else
  197. {
  198. // do nothing, and leave the UTF-16 string NULL
  199. delete [] pwchTemp;
  200. }
  201. }
  202. #if !defined( WIN32 ) && !defined(_WIN32)
  203. // ensure we have done any conversion work required to farm out a
  204. // UTF-16 encoded string.
  205. //
  206. // We perform two heap allocs here; the first one is the worst-case
  207. // (one code point per UTF-8 byte). This is sometimes pessimistic,
  208. // so we perform a second allocation that's just the size we need.
  209. void PopulateUCS2()
  210. {
  211. if ( m_bCreatedUCS2 )
  212. return;
  213. if ( m_pch == NULL )
  214. return; // no UTF-8 string to convert
  215. if ( m_pucs2 != NULL )
  216. return; // already been converted to UTF-16; no work to do
  217. uint32 cchMax = static_cast<uint32>( V_strlen( m_pch ) ) + 1;
  218. ucs2 *pwchTemp = new ucs2[ cchMax ];
  219. if ( V_UTF8ToUCS2( m_pch, cchMax, pwchTemp, cchMax * sizeof( ucs2 ) ) )
  220. {
  221. uint32 cchAlloc = cchMax;
  222. ucs2 *pwchHeap = new ucs2[ cchAlloc ];
  223. memcpy( pwchHeap, pwchTemp, cchAlloc * sizeof( ucs2 ) );
  224. delete [] pwchTemp;
  225. m_pucs2 = pwchHeap;
  226. }
  227. else
  228. {
  229. // do nothing, and leave the UTF-16 string NULL
  230. delete [] pwchTemp;
  231. }
  232. }
  233. #endif
  234. // one of these pointers is an owned pointer; whichever
  235. // one is the encoding OTHER than the one we were initialized
  236. // with is the pointer we've allocated and must free.
  237. const char *m_pch;
  238. const wchar_t *m_pwch;
  239. #if !defined( WIN32 ) && !defined(_WIN32)
  240. const ucs2 *m_pucs2;
  241. bool m_bCreatedUCS2;
  242. #endif
  243. // "created as UTF-16", means our owned string is the UTF-8 string not the UTF-16 one.
  244. bool m_bCreatedUTF16;
  245. };
  246. #define V_UTF8ToUnicode V_UTF8ToUnicode
  247. #define V_UnicodeToUTF8 V_UnicodeToUTF8
  248. #endif