Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
9.9 KiB

  1. #include "tier0/dbg.h"
  2. #include "vstdlib/vstrtools.h"
  3. #if defined( _WIN32 ) && !defined( _X360 )
  4. #include <windows.h>
  5. #endif
  6. #if defined(POSIX) && !defined(_PS3)
  7. #include <iconv.h>
  8. #endif
  9. #ifdef _PS3
  10. #include <cell/sysmodule.h>
  11. #include <cell/l10n.h>
  12. class DummyInitL10N
  13. {
  14. public:
  15. DummyInitL10N()
  16. {
  17. int ret = cellSysmoduleLoadModule( CELL_SYSMODULE_L10N );
  18. if( ret != CELL_OK )
  19. {
  20. Warning( "Cannot initialize l10n, unicode services will not work. Error %d\n", ret );
  21. }
  22. }
  23. ~DummyInitL10N()
  24. {
  25. cellSysmoduleUnloadModule( CELL_SYSMODULE_L10N );
  26. }
  27. }s_dummyInitL10N;
  28. #endif
  29. //-----------------------------------------------------------------------------
  30. // Purpose: Converts a UTF8 string into a unicode string
  31. //-----------------------------------------------------------------------------
  32. int V_UTF8ToUnicode( const char *pUTF8, wchar_t *pwchDest, int cubDestSizeInBytes )
  33. {
  34. if ( !pUTF8 )
  35. return 0;
  36. AssertValidStringPtr(pUTF8);
  37. AssertValidWritePtr(pwchDest);
  38. pwchDest[0] = 0;
  39. #ifdef _WIN32
  40. int cchResult = MultiByteToWideChar( CP_UTF8, 0, pUTF8, -1, pwchDest, cubDestSizeInBytes / sizeof(wchar_t) );
  41. #elif defined( _PS3 )
  42. size_t cchResult = cubDestSizeInBytes / sizeof( uint16 ), cchSrc = V_strlen( pUTF8 ) + 1;
  43. L10nResult result = UTF8stoUCS2s( ( const uint8 *) pUTF8, &cchSrc, ( uint16 * ) pwchDest, &cchResult );
  44. Assert( result == ConversionOK );
  45. cchResult *= sizeof( uint16 );
  46. #elif POSIX
  47. iconv_t conv_t = iconv_open( "UTF-32LE", "UTF-8" );
  48. int cchResult = -1;
  49. size_t nLenUnicde = cubDestSizeInBytes;
  50. size_t nMaxUTF8 = strlen(pUTF8) + 1;
  51. char *pIn = (char *)pUTF8;
  52. char *pOut = (char *)pwchDest;
  53. if ( conv_t > 0 )
  54. {
  55. cchResult = 0;
  56. size_t nInputCharCount = nMaxUTF8;
  57. cchResult = iconv( conv_t, &pIn, &nMaxUTF8, &pOut, &nLenUnicde );
  58. iconv_close( conv_t );
  59. if ( (int)cchResult < 0 )
  60. cchResult = 0;
  61. else
  62. cchResult = nInputCharCount - nMaxUTF8; // nMaxUTF8 is decremented for each converted character. We want to return the count of conversions to match windows.
  63. }
  64. #endif
  65. pwchDest[(cubDestSizeInBytes / sizeof(wchar_t)) - 1] = 0;
  66. return cchResult;
  67. }
  68. //-----------------------------------------------------------------------------
  69. // Purpose: Converts a unicode string into a UTF8 (standard) string
  70. //-----------------------------------------------------------------------------
  71. int V_UnicodeToUTF8( const wchar_t *pUnicode, char *pUTF8, int cubDestSizeInBytes )
  72. {
  73. AssertValidStringPtr(pUTF8, cubDestSizeInBytes);
  74. AssertValidReadPtr(pUnicode);
  75. if ( cubDestSizeInBytes > 0 )
  76. {
  77. pUTF8[0] = 0;
  78. }
  79. #ifdef _WIN32
  80. int cchResult = WideCharToMultiByte( CP_UTF8, 0, pUnicode, -1, pUTF8, cubDestSizeInBytes, NULL, NULL );
  81. #elif defined( _PS3 )
  82. size_t cchResult = cubDestSizeInBytes, cchSrc = V_wcslen( pUnicode ) + 1;
  83. L10nResult result = UCS2stoUTF8s( ( const uint16 *) pUnicode, &cchSrc, ( uint8 * ) pUTF8, &cchResult );
  84. Assert( result == ConversionOK );
  85. #elif POSIX
  86. int cchResult = 0;
  87. if ( pUnicode && pUTF8 )
  88. {
  89. iconv_t conv_t = iconv_open( "UTF-8", "UTF-32LE" );
  90. size_t nLenUnicde = ( wcslen(pUnicode) + 1 ) * sizeof(wchar_t); // 4 bytes per wchar vs. 1 byte for utf8 for simple english
  91. size_t nMaxUTF8 = cubDestSizeInBytes;
  92. char *pIn = (char *)pUnicode;
  93. char *pOut = (char *)pUTF8;
  94. if ( conv_t > 0 )
  95. {
  96. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  97. iconv_close( conv_t );
  98. if ( (int)cchResult < 0 )
  99. cchResult = 0;
  100. else
  101. cchResult = nMaxUTF8;
  102. }
  103. }
  104. #endif
  105. if ( cubDestSizeInBytes > 0 )
  106. {
  107. pUTF8[cubDestSizeInBytes - 1] = 0;
  108. }
  109. return cchResult;
  110. }
  111. //-----------------------------------------------------------------------------
  112. // Purpose: Converts a ucs2 string to a unicode (wchar_t) one, no-op on win32
  113. //-----------------------------------------------------------------------------
  114. int V_UCS2ToUnicode( const ucs2 *pUCS2, wchar_t *pUnicode, int cubDestSizeInBytes )
  115. {
  116. AssertValidWritePtr(pUnicode);
  117. AssertValidReadPtr(pUCS2);
  118. pUnicode[0] = 0;
  119. #if defined( _WIN32 ) || defined( _PS3 )
  120. int lenUCS2 = V_wcslen( pUCS2 );
  121. int cchResult = MIN( (lenUCS2+1)*( int )sizeof(ucs2), cubDestSizeInBytes );
  122. V_wcsncpy( (wchar_t*)pUCS2, pUnicode, cchResult );
  123. #else
  124. iconv_t conv_t = iconv_open( "UCS-4LE", "UCS-2LE" );
  125. int cchResult = -1;
  126. size_t nLenUnicde = cubDestSizeInBytes;
  127. size_t nMaxUTF8 = cubDestSizeInBytes;
  128. char *pIn = (char *)pUCS2;
  129. char *pOut = (char *)pUnicode;
  130. if ( conv_t > 0 )
  131. {
  132. cchResult = 0;
  133. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  134. iconv_close( conv_t );
  135. if ( (int)cchResult < 0 )
  136. cchResult = 0;
  137. else
  138. cchResult = nMaxUTF8;
  139. }
  140. #endif
  141. pUnicode[(cubDestSizeInBytes / sizeof(wchar_t)) - 1] = 0;
  142. return cchResult;
  143. }
  144. //-----------------------------------------------------------------------------
  145. // Purpose: Converts a wchar_t string into a UCS2 string -noop on windows
  146. //-----------------------------------------------------------------------------
  147. int V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, char *pUCS2, int cubDestSizeInBytes )
  148. {
  149. // TODO: MACMERGE: Figure out how to convert from 2-byte Win32 wchars to platform wchar_t type that can be 4 bytes
  150. #if defined( _WIN32 ) || defined( _PS3 )
  151. int cchResult = MIN( cubSrcInBytes, cubDestSizeInBytes );
  152. V_wcsncpy( (wchar_t*)pUCS2, pUnicode, cchResult );
  153. #elif defined (POSIX)
  154. iconv_t conv_t = iconv_open( "UCS-2LE", "UTF-32LE" );
  155. size_t cchResult = -1;
  156. size_t nLenUnicde = cubSrcInBytes;
  157. size_t nMaxUCS2 = cubDestSizeInBytes;
  158. char *pIn = (char*)pUnicode;
  159. char *pOut = pUCS2;
  160. if ( conv_t > 0 )
  161. {
  162. cchResult = 0;
  163. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUCS2 );
  164. iconv_close( conv_t );
  165. if ( (int)cchResult < 0 )
  166. cchResult = 0;
  167. else
  168. cchResult = cubSrcInBytes / sizeof( wchar_t );
  169. }
  170. #endif
  171. return cchResult;
  172. }
  173. // UTF-8 encodes each character (code point) in 1 to 4 octets (8-bit bytes).
  174. // The first 128 characters of the Unicode character set (which correspond directly to the ASCII) use a single octet with the same binary value as in ASCII.
  175. // url:http://en.wikipedia.org/wiki/UTF-8
  176. #define MAX_UTF8_CHARACTER_BYTES 4
  177. //-----------------------------------------------------------------------------
  178. // Purpose: Converts a ucs-2 (windows wchar_t) string into a UTF8 (standard) string
  179. //-----------------------------------------------------------------------------
  180. VSTRTOOLS_INTERFACE int V_UCS2ToUTF8( const ucs2 *pUCS2, char *pUTF8, int cubDestSizeInBytes )
  181. {
  182. AssertValidStringPtr(pUTF8, cubDestSizeInBytes);
  183. AssertValidReadPtr(pUCS2);
  184. Assert( cubDestSizeInBytes >= 1 ); // must have at least 1 byte to write the terminator character
  185. pUTF8[0] = '\0';
  186. #ifdef _WIN32
  187. // under win32 wchar_t == ucs2, sigh
  188. int cchResult = WideCharToMultiByte( CP_UTF8, 0, pUCS2, -1, pUTF8, cubDestSizeInBytes, NULL, NULL );
  189. #elif defined( _PS3 )
  190. size_t cchResult = cubDestSizeInBytes, cchSrc = V_wcslen( pUCS2 ) + 1;
  191. L10nResult result = UCS2stoUTF8s( ( const uint16 *) pUCS2, &cchSrc, ( uint8 * ) pUTF8, &cchResult );
  192. Assert( result == ConversionOK );
  193. #elif defined(POSIX)
  194. iconv_t conv_t = iconv_open( "UTF-8", "UCS-2LE" );
  195. size_t cchResult = -1;
  196. size_t nLenUnicde = cubDestSizeInBytes;
  197. size_t nMaxUTF8 = cubDestSizeInBytes;
  198. char *pIn = (char *)pUCS2;
  199. char *pOut = (char *)pUTF8;
  200. if ( conv_t > 0 )
  201. {
  202. cchResult = 0;
  203. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  204. iconv_close( conv_t );
  205. if ( (int)cchResult < 0 )
  206. cchResult = 0;
  207. else
  208. cchResult = nMaxUTF8;
  209. }
  210. #endif
  211. pUTF8[cubDestSizeInBytes - 1] = '\0';
  212. return cchResult;
  213. }
  214. //-----------------------------------------------------------------------------
  215. // Purpose: Converts a UTF8 to ucs-2 (windows wchar_t)
  216. //-----------------------------------------------------------------------------
  217. VSTRTOOLS_INTERFACE int V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, ucs2 *pUCS2, int cubDestSizeInBytes )
  218. {
  219. AssertValidStringPtr(pUTF8, cubDestSizeInBytes);
  220. AssertValidReadPtr(pUCS2);
  221. pUCS2[0] = 0;
  222. #ifdef _WIN32
  223. // under win32 wchar_t == ucs2, sigh
  224. int cchResult = MultiByteToWideChar( CP_UTF8, 0, pUTF8, -1, pUCS2, cubDestSizeInBytes / sizeof(wchar_t) );
  225. #elif defined( _PS3 )
  226. size_t cchResult = cubDestSizeInBytes / sizeof( uint16 ), cchSrc = cubSrcInBytes;
  227. L10nResult result = UTF8stoUCS2s( ( const uint8 *) pUTF8, &cchSrc, ( uint16 * ) pUCS2, &cchResult );
  228. Assert( result == ConversionOK );
  229. cchResult *= sizeof( uint16 );
  230. #elif defined(POSIX)
  231. iconv_t conv_t = iconv_open( "UCS-2LE", "UTF-8" );
  232. size_t cchResult = -1;
  233. size_t nLenUnicde = cubSrcInBytes;
  234. size_t nMaxUTF8 = cubDestSizeInBytes;
  235. char *pIn = (char *)pUTF8;
  236. char *pOut = (char *)pUCS2;
  237. if ( conv_t > 0 )
  238. {
  239. cchResult = 0;
  240. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  241. iconv_close( conv_t );
  242. if ( (int)cchResult < 0 )
  243. cchResult = 0;
  244. else
  245. cchResult = cubSrcInBytes;
  246. }
  247. #endif
  248. pUCS2[ (cubDestSizeInBytes/sizeof(ucs2)) - 1] = 0;
  249. return cchResult;
  250. }
  251. //-----------------------------------------------------------------------------
  252. // Purpose: copies at most nMaxBytes of the UTF-8 input data into the destination,
  253. // ensuring that a trailing multi-byte sequence isn't truncated.
  254. //-----------------------------------------------------------------------------
  255. VSTRTOOLS_INTERFACE void * V_UTF8_strncpy( char *pDest, const char *pSrc, size_t nMaxBytes )
  256. {
  257. strncpy( pDest, pSrc, nMaxBytes );
  258. // http://en.wikipedia.org/wiki/UTF-8
  259. int end = nMaxBytes-1;
  260. pDest[end] = 0;
  261. // walk backwards, ignoring nulls
  262. while ( end > 0 && pDest[end] == 0 )
  263. --end;
  264. // found a non-null - see if it's part of a multi-byte sequence
  265. int nBytesSeen = 0;
  266. while ( end >= 0 && ( pDest[end] & 0xC0 ) == 0x80 ) // utf8 multi-byte trailing characters begin with 10xxxxxx
  267. {
  268. nBytesSeen++;
  269. --end;
  270. }
  271. int nBytesExpected = 0;
  272. if ( ( pDest[end] & 0xC0 ) == 0xC0 ) // utf8 multi-byte character sequences begin with 11xxxxxx
  273. {
  274. for ( int i = 6; i > 1; --i )
  275. {
  276. if ( (char)( pDest[end] >> i ) & 0x1 )
  277. ++nBytesExpected;
  278. else
  279. break;
  280. }
  281. }
  282. if ( nBytesExpected != nBytesSeen )
  283. pDest[end] = 0;
  284. return pDest;
  285. }