Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

315 lines
9.6 KiB

  1. #include "tier0/dbg.h"
  2. #include "vstdlib/vstrtools.h"
  3. #if defined( _WIN32 ) && !defined( _X360 )
  4. #include <windows.h>
  5. #endif
  6. #if defined(POSIX) && !defined(_PS3)
  7. #include <iconv.h>
  8. #endif
  9. #ifdef _PS3
  10. #include <cell/sysmodule.h>
  11. #include <cell/l10n.h>
  12. class DummyInitL10N
  13. {
  14. public:
  15. DummyInitL10N()
  16. {
  17. int ret = cellSysmoduleLoadModule( CELL_SYSMODULE_L10N );
  18. if( ret != CELL_OK )
  19. {
  20. Warning( "Cannot initialize l10n, unicode services will not work. Error %d\n", ret );
  21. }
  22. }
  23. ~DummyInitL10N()
  24. {
  25. cellSysmoduleUnloadModule( CELL_SYSMODULE_L10N );
  26. }
  27. }s_dummyInitL10N;
  28. #endif
  29. //-----------------------------------------------------------------------------
  30. // Purpose: Converts a UTF8 string into a unicode string
  31. //-----------------------------------------------------------------------------
  32. int V_UTF8ToUnicode( const char *pUTF8, wchar_t *pwchDest, int cubDestSizeInBytes )
  33. {
  34. if ( !pUTF8 )
  35. return 0;
  36. AssertValidStringPtr(pUTF8);
  37. AssertValidWritePtr(pwchDest);
  38. pwchDest[0] = 0;
  39. #ifdef _WIN32
  40. int cchResult = MultiByteToWideChar( CP_UTF8, 0, pUTF8, -1, pwchDest, cubDestSizeInBytes / sizeof(wchar_t) );
  41. #elif defined( _PS3 )
  42. size_t cchResult = cubDestSizeInBytes / sizeof( uint16 ), cchSrc = V_strlen( pUTF8 ) + 1;
  43. L10nResult result = UTF8stoUCS2s( ( const uint8 *) pUTF8, &cchSrc, ( uint16 * ) pwchDest, &cchResult );
  44. Assert( result == ConversionOK );
  45. cchResult *= sizeof( uint16 );
  46. #elif POSIX
  47. iconv_t conv_t = iconv_open( "UTF-32LE", "UTF-8" );
  48. int cchResult = -1;
  49. size_t nLenUnicde = cubDestSizeInBytes;
  50. size_t nMaxUTF8 = strlen(pUTF8) + 1;
  51. char *pIn = (char *)pUTF8;
  52. char *pOut = (char *)pwchDest;
  53. if ( conv_t > 0 )
  54. {
  55. cchResult = 0;
  56. cchResult = iconv( conv_t, &pIn, &nMaxUTF8, &pOut, &nLenUnicde );
  57. iconv_close( conv_t );
  58. if ( (int)cchResult < 0 )
  59. cchResult = 0;
  60. else
  61. cchResult = nMaxUTF8;
  62. }
  63. #endif
  64. pwchDest[(cubDestSizeInBytes / sizeof(wchar_t)) - 1] = 0;
  65. return cchResult;
  66. }
  67. //-----------------------------------------------------------------------------
  68. // Purpose: Converts a unicode string into a UTF8 (standard) string
  69. //-----------------------------------------------------------------------------
  70. int V_UnicodeToUTF8( const wchar_t *pUnicode, char *pUTF8, int cubDestSizeInBytes )
  71. {
  72. AssertValidStringPtr(pUTF8, cubDestSizeInBytes);
  73. AssertValidReadPtr(pUnicode);
  74. if ( cubDestSizeInBytes > 0 )
  75. {
  76. pUTF8[0] = 0;
  77. }
  78. #ifdef _WIN32
  79. int cchResult = WideCharToMultiByte( CP_UTF8, 0, pUnicode, -1, pUTF8, cubDestSizeInBytes, NULL, NULL );
  80. #elif defined( _PS3 )
  81. size_t cchResult = cubDestSizeInBytes, cchSrc = V_wcslen( pUnicode ) + 1;
  82. L10nResult result = UCS2stoUTF8s( ( const uint16 *) pUnicode, &cchSrc, ( uint8 * ) pUTF8, &cchResult );
  83. Assert( result == ConversionOK );
  84. #elif POSIX
  85. int cchResult = 0;
  86. if ( pUnicode && pUTF8 )
  87. {
  88. iconv_t conv_t = iconv_open( "UTF-8", "UTF-32LE" );
  89. int cchResult = -1;
  90. size_t nLenUnicde = ( wcslen(pUnicode) + 1 ) * sizeof(wchar_t); // 4 bytes per wchar vs. 1 byte for utf8 for simple english
  91. size_t nMaxUTF8 = cubDestSizeInBytes;
  92. char *pIn = (char *)pUnicode;
  93. char *pOut = (char *)pUTF8;
  94. if ( conv_t > 0 )
  95. {
  96. cchResult = 0;
  97. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  98. iconv_close( conv_t );
  99. if ( (int)cchResult < 0 )
  100. cchResult = 0;
  101. else
  102. cchResult = nMaxUTF8;
  103. }
  104. }
  105. #endif
  106. if ( cubDestSizeInBytes > 0 )
  107. {
  108. pUTF8[cubDestSizeInBytes - 1] = 0;
  109. }
  110. return cchResult;
  111. }
  112. //-----------------------------------------------------------------------------
  113. // Purpose: Converts a ucs2 string to a unicode (wchar_t) one, no-op on win32
  114. //-----------------------------------------------------------------------------
  115. int V_UCS2ToUnicode( const ucs2 *pUCS2, wchar_t *pUnicode, int cubDestSizeInBytes )
  116. {
  117. AssertValidWritePtr(pUnicode);
  118. AssertValidReadPtr(pUCS2);
  119. pUnicode[0] = 0;
  120. #if defined( _WIN32 ) || defined( _PS3 )
  121. int lenUCS2 = V_wcslen( pUCS2 );
  122. int cchResult = MIN( (lenUCS2+1)*( int )sizeof(ucs2), cubDestSizeInBytes );
  123. V_wcsncpy( (wchar_t*)pUCS2, pUnicode, cchResult );
  124. #else
  125. iconv_t conv_t = iconv_open( "UCS-4LE", "UCS-2LE" );
  126. int cchResult = -1;
  127. size_t nLenUnicde = cubDestSizeInBytes;
  128. size_t nMaxUTF8 = cubDestSizeInBytes;
  129. char *pIn = (char *)pUCS2;
  130. char *pOut = (char *)pUnicode;
  131. if ( conv_t > 0 )
  132. {
  133. cchResult = 0;
  134. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  135. iconv_close( conv_t );
  136. if ( (int)cchResult < 0 )
  137. cchResult = 0;
  138. else
  139. cchResult = nMaxUTF8;
  140. }
  141. #endif
  142. pUnicode[(cubDestSizeInBytes / sizeof(wchar_t)) - 1] = 0;
  143. return cchResult;
  144. }
  145. //-----------------------------------------------------------------------------
  146. // Purpose: Converts a wchar_t string into a UCS2 string -noop on windows
  147. //-----------------------------------------------------------------------------
  148. int V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, char *pUCS2, int cubDestSizeInBytes )
  149. {
  150. // TODO: MACMERGE: Figure out how to convert from 2-byte Win32 wchars to platform wchar_t type that can be 4 bytes
  151. #if defined( _WIN32 ) || defined( _PS3 )
  152. int cchResult = MIN( cubSrcInBytes, cubDestSizeInBytes );
  153. V_wcsncpy( (wchar_t*)pUCS2, pUnicode, cchResult );
  154. #elif defined (POSIX)
  155. iconv_t conv_t = iconv_open( "UCS-2LE", "UTF-32LE" );
  156. size_t cchResult = -1;
  157. size_t nLenUnicde = cubSrcInBytes;
  158. size_t nMaxUCS2 = cubDestSizeInBytes;
  159. char *pIn = (char*)pUnicode;
  160. char *pOut = pUCS2;
  161. if ( conv_t > 0 )
  162. {
  163. cchResult = 0;
  164. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUCS2 );
  165. iconv_close( conv_t );
  166. if ( (int)cchResult < 0 )
  167. cchResult = 0;
  168. else
  169. cchResult = cubSrcInBytes / sizeof( wchar_t );
  170. }
  171. #endif
  172. return cchResult;
  173. }
  174. // UTF-8 encodes each character (code point) in 1 to 4 octets (8-bit bytes).
  175. // The first 128 characters of the Unicode character set (which correspond directly to the ASCII) use a single octet with the same binary value as in ASCII.
  176. // url:http://en.wikipedia.org/wiki/UTF-8
  177. #define MAX_UTF8_CHARACTER_BYTES 4
  178. //-----------------------------------------------------------------------------
  179. // Purpose: Converts a ucs-2 (windows wchar_t) string into a UTF8 (standard) string
  180. //-----------------------------------------------------------------------------
  181. VSTRTOOLS_INTERFACE int V_UCS2ToUTF8( const ucs2 *pUCS2, char *pUTF8, int cubDestSizeInBytes )
  182. {
  183. AssertValidStringPtr(pUTF8, cubDestSizeInBytes);
  184. AssertValidReadPtr(pUCS2);
  185. Assert( cubDestSizeInBytes >= 1 ); // must have at least 1 byte to write the terminator character
  186. pUTF8[0] = '\0';
  187. #ifdef _WIN32
  188. // under win32 wchar_t == ucs2, sigh
  189. int cchResult = WideCharToMultiByte( CP_UTF8, 0, pUCS2, -1, pUTF8, cubDestSizeInBytes, NULL, NULL );
  190. #elif defined( _PS3 )
  191. size_t cchResult = cubDestSizeInBytes, cchSrc = V_wcslen( pUCS2 ) + 1;
  192. L10nResult result = UCS2stoUTF8s( ( const uint16 *) pUCS2, &cchSrc, ( uint8 * ) pUTF8, &cchResult );
  193. Assert( result == ConversionOK );
  194. #elif defined(POSIX)
  195. iconv_t conv_t = iconv_open( "UTF-8", "UCS-2LE" );
  196. size_t cchResult = -1;
  197. size_t nLenUnicde = cubDestSizeInBytes;
  198. size_t nMaxUTF8 = cubDestSizeInBytes;
  199. char *pIn = (char *)pUCS2;
  200. char *pOut = (char *)pUTF8;
  201. if ( conv_t > 0 )
  202. {
  203. cchResult = 0;
  204. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  205. iconv_close( conv_t );
  206. if ( (int)cchResult < 0 )
  207. cchResult = 0;
  208. else
  209. cchResult = nMaxUTF8;
  210. }
  211. #endif
  212. pUTF8[cubDestSizeInBytes - 1] = '\0';
  213. return cchResult;
  214. }
  215. //-----------------------------------------------------------------------------
  216. // Purpose: Converts a UTF8 to ucs-2 (windows wchar_t)
  217. //-----------------------------------------------------------------------------
  218. VSTRTOOLS_INTERFACE int V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, ucs2 *pUCS2, int cubDestSizeInBytes )
  219. {
  220. AssertValidStringPtr(pUTF8, cubDestSizeInBytes);
  221. AssertValidReadPtr(pUCS2);
  222. pUCS2[0] = 0;
  223. #ifdef _WIN32
  224. // under win32 wchar_t == ucs2, sigh
  225. int cchResult = MultiByteToWideChar( CP_UTF8, 0, pUTF8, -1, pUCS2, cubDestSizeInBytes / sizeof(wchar_t) );
  226. #elif defined( _PS3 )
  227. size_t cchResult = cubDestSizeInBytes / sizeof( uint16 ), cchSrc = cubSrcInBytes;
  228. L10nResult result = UTF8stoUCS2s( ( const uint8 *) pUTF8, &cchSrc, ( uint16 * ) pUCS2, &cchResult );
  229. Assert( result == ConversionOK );
  230. cchResult *= sizeof( uint16 );
  231. #elif defined(POSIX)
  232. iconv_t conv_t = iconv_open( "UCS-2LE", "UTF-8" );
  233. size_t cchResult = -1;
  234. size_t nLenUnicde = cubSrcInBytes;
  235. size_t nMaxUTF8 = cubDestSizeInBytes;
  236. char *pIn = (char *)pUTF8;
  237. char *pOut = (char *)pUCS2;
  238. if ( conv_t > 0 )
  239. {
  240. cchResult = 0;
  241. cchResult = iconv( conv_t, &pIn, &nLenUnicde, &pOut, &nMaxUTF8 );
  242. iconv_close( conv_t );
  243. if ( (int)cchResult < 0 )
  244. cchResult = 0;
  245. else
  246. cchResult = cubSrcInBytes;
  247. }
  248. #endif
  249. pUCS2[ (cubDestSizeInBytes/sizeof(ucs2)) - 1] = 0;
  250. return cchResult;
  251. }
  252. //-----------------------------------------------------------------------------
  253. // Purpose: copies at most nMaxBytes of the UTF-8 input data into the destination,
  254. // ensuring that a trailing multi-byte sequence isn't truncated.
  255. //-----------------------------------------------------------------------------
  256. VSTRTOOLS_INTERFACE void * V_UTF8_strncpy( char *pDest, const char *pSrc, size_t nMaxBytes )
  257. {
  258. strncpy( pDest, pSrc, nMaxBytes );
  259. // http://en.wikipedia.org/wiki/UTF-8
  260. int end = nMaxBytes-1;
  261. pDest[end] = 0;
  262. int nBytesSeen = 0, nBytesExpected = 0;
  263. // walk backwards, ignoring nulls
  264. while ( pDest[end] == 0 )
  265. --end;
  266. // found a non-null - see if it's part of a multi-byte sequence
  267. while ( ( pDest[end] & 0x80 ) && !( pDest[end] & 0x40 ) )
  268. {
  269. nBytesSeen++;
  270. --end;
  271. }
  272. if ( ( pDest[end] & 0xC0 ) == 0xC0 )
  273. {
  274. for ( int i = 6; i > 1; --i )
  275. {
  276. if ( (char)( pDest[end] >> i ) & 0x1 )
  277. ++nBytesExpected;
  278. }
  279. }
  280. if ( nBytesExpected != nBytesSeen )
  281. pDest[end] = 0;
  282. return pDest;
  283. }