Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

275 lines
8.2 KiB

  1. //========= Copyright � 1996-2005, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Functions for UCS/UTF/Unicode string operations. These functions are in vstdlib
  4. // instead of tier1, because on PS/3 they need to load and initialize a system module,
  5. // which is more frugal to do from a single place rather than multiple times in different PRX'es.
  6. // The functions themselves aren't supposed to be called frequently enough for the DLL/PRX boundary
  7. // marshalling, if any, to have any measureable impact on performance.
  8. //
  9. #ifndef VSTRTOOLS_HDR
  10. #define VSTRTOOLS_HDR
  11. #include "tier0/platform.h"
  12. #include "tier0/basetypes.h"
  13. #include "tier1/strtools.h"
  14. #ifdef VSTDLIB_DLL_EXPORT
  15. #define VSTRTOOLS_INTERFACE DLL_EXPORT
  16. #else
  17. #define VSTRTOOLS_INTERFACE DLL_IMPORT
  18. #endif
  19. // conversion functions wchar_t <-> char, returning the number of characters converted
  20. VSTRTOOLS_INTERFACE int V_UTF8ToUnicode( const char *pUTF8, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pwchDest, int cubDestSizeInBytes );
  21. VSTRTOOLS_INTERFACE int V_UnicodeToUTF8( const wchar_t *pUnicode, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
  22. VSTRTOOLS_INTERFACE int V_UCS2ToUnicode( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pUnicode, int cubDestSizeInBytes );
  23. VSTRTOOLS_INTERFACE int V_UCS2ToUTF8( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
  24. VSTRTOOLS_INTERFACE int V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUCS2, int cubDestSizeInBytes );
  25. VSTRTOOLS_INTERFACE int V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) ucs2 *pUCS2, int cubDestSizeInBytes );
  26. // copy at most n bytes into destination, will not corrupt utf-8 multi-byte sequences
  27. VSTRTOOLS_INTERFACE void * V_UTF8_strncpy( OUT_Z_BYTECAP(nMaxBytes) char *pDest, const char *pSrc, size_t nMaxBytes );
  28. //
  29. // This utility class is for performing UTF-8 <-> UTF-16 conversion.
  30. // It is intended for use with function/method parameters.
  31. //
  32. // For example, you can call
  33. // FunctionTakingUTF16( CStrAutoEncode( utf8_string ).ToWString() )
  34. // or
  35. // FunctionTakingUTF8( CStrAutoEncode( utf16_string ).ToString() )
  36. //
  37. // The converted string is allocated off the heap, and destroyed when
  38. // the object goes out of scope.
  39. //
  40. // if the string cannot be converted, NULL is returned.
  41. //
  42. // This class doesn't have any conversion operators; the intention is
  43. // to encourage the developer to get used to having to think about which
  44. // encoding is desired.
  45. //
  46. class CStrAutoEncode
  47. {
  48. public:
  49. // ctor
  50. explicit CStrAutoEncode( const char *pch )
  51. {
  52. m_pch = pch;
  53. m_pwch = NULL;
  54. #if !defined( WIN32 ) && !defined(_WIN32)
  55. m_pucs2 = NULL;
  56. m_bCreatedUCS2 = false;
  57. #endif
  58. m_bCreatedUTF16 = false;
  59. }
  60. // ctor
  61. explicit CStrAutoEncode( const wchar_t *pwch )
  62. {
  63. m_pch = NULL;
  64. m_pwch = pwch;
  65. #if !defined( WIN32 ) && !defined(_WIN32)
  66. m_pucs2 = NULL;
  67. m_bCreatedUCS2 = false;
  68. #endif
  69. m_bCreatedUTF16 = true;
  70. }
  71. #if !defined(WIN32) && !defined(_WINDOWS) && !defined(_WIN32) && !defined(_PS3)
  72. explicit CStrAutoEncode( const ucs2 *pwch )
  73. {
  74. m_pch = NULL;
  75. m_pwch = NULL;
  76. m_pucs2 = pwch;
  77. m_bCreatedUCS2 = true;
  78. m_bCreatedUTF16 = false;
  79. }
  80. #endif
  81. // returns the UTF-8 string, converting on the fly.
  82. const char* ToString()
  83. {
  84. PopulateUTF8();
  85. return m_pch;
  86. }
  87. // Same as ToString() but here to match Steam's interface for this class
  88. const char *ToUTF8() { return ToString(); }
  89. // returns the UTF-8 string - a writable pointer.
  90. // only use this if you don't want to call const_cast
  91. // yourself. We need this for cases like CreateProcess.
  92. char* ToStringWritable()
  93. {
  94. PopulateUTF8();
  95. return const_cast< char* >( m_pch );
  96. }
  97. // returns the UTF-16 string, converting on the fly.
  98. const wchar_t* ToWString()
  99. {
  100. PopulateUTF16();
  101. return m_pwch;
  102. }
  103. #if !defined( WIN32 ) && !defined(_WIN32)
  104. // returns the UTF-16 string, converting on the fly.
  105. const ucs2* ToUCS2String()
  106. {
  107. PopulateUCS2();
  108. return m_pucs2;
  109. }
  110. #endif
  111. // returns the UTF-16 string - a writable pointer.
  112. // only use this if you don't want to call const_cast
  113. // yourself. We need this for cases like CreateProcess.
  114. wchar_t* ToWStringWritable()
  115. {
  116. PopulateUTF16();
  117. return const_cast< wchar_t* >( m_pwch );
  118. }
  119. // dtor
  120. ~CStrAutoEncode()
  121. {
  122. // if we're "native unicode" then the UTF-8 string is something we allocated,
  123. // and vice versa.
  124. if ( m_bCreatedUTF16 )
  125. {
  126. delete [] m_pch;
  127. }
  128. else
  129. {
  130. delete [] m_pwch;
  131. }
  132. #if !defined( WIN32 ) && !defined(_WIN32)
  133. if ( !m_bCreatedUCS2 && m_pucs2 )
  134. delete [] m_pucs2;
  135. #endif
  136. }
  137. private:
  138. // ensure we have done any conversion work required to farm out a
  139. // UTF-8 encoded string.
  140. //
  141. // We perform two heap allocs here; the first one is the worst-case
  142. // (four bytes per Unicode code point). This is usually quite pessimistic,
  143. // so we perform a second allocation that's just the size we need.
  144. void PopulateUTF8()
  145. {
  146. if ( !m_bCreatedUTF16 )
  147. return; // no work to do
  148. if ( m_pwch == NULL )
  149. return; // don't have a UTF-16 string to convert
  150. if ( m_pch != NULL )
  151. return; // already been converted to UTF-8; no work to do
  152. // each Unicode code point can expand to as many as four bytes in UTF-8; we
  153. // also need to leave room for the terminating NUL.
  154. uint32 cbMax = 4 * static_cast<uint32>( V_wcslen( m_pwch ) ) + 1;
  155. char *pchTemp = new char[ cbMax ];
  156. if ( V_UnicodeToUTF8( m_pwch, pchTemp, cbMax ) )
  157. {
  158. uint32 cchAlloc = static_cast<uint32>( V_strlen( pchTemp ) ) + 1;
  159. char *pchHeap = new char[ cchAlloc ];
  160. V_strncpy( pchHeap, pchTemp, cchAlloc );
  161. delete [] pchTemp;
  162. m_pch = pchHeap;
  163. }
  164. else
  165. {
  166. // do nothing, and leave the UTF-8 string NULL
  167. delete [] pchTemp;
  168. }
  169. }
  170. // ensure we have done any conversion work required to farm out a
  171. // UTF-16 encoded string.
  172. //
  173. // We perform two heap allocs here; the first one is the worst-case
  174. // (one code point per UTF-8 byte). This is sometimes pessimistic,
  175. // so we perform a second allocation that's just the size we need.
  176. void PopulateUTF16()
  177. {
  178. if ( m_bCreatedUTF16 )
  179. return; // no work to do
  180. if ( m_pch == NULL )
  181. return; // no UTF-8 string to convert
  182. if ( m_pwch != NULL )
  183. return; // already been converted to UTF-16; no work to do
  184. uint32 cchMax = static_cast<uint32>( V_strlen( m_pch ) ) + 1;
  185. wchar_t *pwchTemp = new wchar_t[ cchMax ];
  186. if ( V_UTF8ToUnicode( m_pch, pwchTemp, cchMax * sizeof( wchar_t ) ) )
  187. {
  188. uint32 cchAlloc = static_cast<uint32>( V_wcslen( pwchTemp ) ) + 1;
  189. wchar_t *pwchHeap = new wchar_t[ cchAlloc ];
  190. V_wcsncpy( pwchHeap, pwchTemp, cchAlloc * sizeof( wchar_t ) );
  191. delete [] pwchTemp;
  192. m_pwch = pwchHeap;
  193. }
  194. else
  195. {
  196. // do nothing, and leave the UTF-16 string NULL
  197. delete [] pwchTemp;
  198. }
  199. }
  200. #if !defined( WIN32 ) && !defined(_WIN32)
  201. // ensure we have done any conversion work required to farm out a
  202. // UTF-16 encoded string.
  203. //
  204. // We perform two heap allocs here; the first one is the worst-case
  205. // (one code point per UTF-8 byte). This is sometimes pessimistic,
  206. // so we perform a second allocation that's just the size we need.
  207. void PopulateUCS2()
  208. {
  209. if ( m_bCreatedUCS2 )
  210. return;
  211. if ( m_pch == NULL )
  212. return; // no UTF-8 string to convert
  213. if ( m_pucs2 != NULL )
  214. return; // already been converted to UTF-16; no work to do
  215. uint32 cchMax = static_cast<uint32>( V_strlen( m_pch ) ) + 1;
  216. ucs2 *pwchTemp = new ucs2[ cchMax ];
  217. if ( V_UTF8ToUCS2( m_pch, cchMax, pwchTemp, cchMax * sizeof( ucs2 ) ) )
  218. {
  219. uint32 cchAlloc = cchMax;
  220. ucs2 *pwchHeap = new ucs2[ cchAlloc ];
  221. memcpy( pwchHeap, pwchTemp, cchAlloc * sizeof( ucs2 ) );
  222. delete [] pwchTemp;
  223. m_pucs2 = pwchHeap;
  224. }
  225. else
  226. {
  227. // do nothing, and leave the UTF-16 string NULL
  228. delete [] pwchTemp;
  229. }
  230. }
  231. #endif
  232. // one of these pointers is an owned pointer; whichever
  233. // one is the encoding OTHER than the one we were initialized
  234. // with is the pointer we've allocated and must free.
  235. const char *m_pch;
  236. const wchar_t *m_pwch;
  237. #if !defined( WIN32 ) && !defined(_WIN32)
  238. const ucs2 *m_pucs2;
  239. bool m_bCreatedUCS2;
  240. #endif
  241. // "created as UTF-16", means our owned string is the UTF-8 string not the UTF-16 one.
  242. bool m_bCreatedUTF16;
  243. };
  244. #define Q_UTF8ToUnicode V_UTF8ToUnicode
  245. #define Q_UnicodeToUTF8 V_UnicodeToUTF8
  246. #endif